Accepting request 250289 from home:jones_tony:branches:devel:tools
OBS-URL: https://build.opensuse.org/request/show/250289 OBS-URL: https://build.opensuse.org/package/show/devel:tools/oprofile?expand=0&rev=36
This commit is contained in:
parent
465476d433
commit
7da81abab5
@ -1,3 +0,0 @@
|
|||||||
version https://git-lfs.github.com/spec/v1
|
|
||||||
oid sha256:9bd94bbe3a6e8e9e92a8dbf68c753de588d829407ef829f0d022cbb5c632d07c
|
|
||||||
size 1132314
|
|
3
oprofile-1.0.0.tar.bz2
Normal file
3
oprofile-1.0.0.tar.bz2
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:9db8799b52073f1fa5be7e33b8730eba1a97ea5e3f084081068eac57d80edb9f
|
||||||
|
size 1027216
|
@ -1,227 +0,0 @@
|
|||||||
From: Andi Kleen <ak@linux.intel.com>
|
|
||||||
Subject: Add support for Intel Silvermont processor
|
|
||||||
Date: Thu Oct 10 13:12:28 2013 -0500
|
|
||||||
Git-commit: 4b1497d8befcc4c8b26dc4e4866c3422ae8787c3
|
|
||||||
References: bnc#891892
|
|
||||||
Signed-off-by: Tony Jones <tonyj@suse.de>
|
|
||||||
|
|
||||||
[adjust for context, no request to take e500/e6500 ppc changes]
|
|
||||||
|
|
||||||
Add support for Intel Silvermont processor
|
|
||||||
|
|
||||||
Just add the event list for Intel Silvermont based systems
|
|
||||||
(Avoton, BayTrail) and the usual changes for a new CPU.
|
|
||||||
No new code otherwise.
|
|
||||||
|
|
||||||
The model number list is incomplete at this point, more will
|
|
||||||
be added in the future.
|
|
||||||
|
|
||||||
I also finally removed the top level event list descriptions.
|
|
||||||
All the events are only described in the unit masks now
|
|
||||||
(Intel doesn't really have a top level event, and I had
|
|
||||||
to invent descriptions, which was error prone and
|
|
||||||
often wrong)
|
|
||||||
|
|
||||||
I also removed some outdated document number references.
|
|
||||||
|
|
||||||
Signed-off-by: Andi Kleen <ak@linux.intel.com>
|
|
||||||
|
|
||||||
---
|
|
||||||
events/Makefile.am | 1
|
|
||||||
events/i386/silvermont/events | 26 +++++++++++++
|
|
||||||
events/i386/silvermont/unit_masks | 71 ++++++++++++++++++++++++++++++++++++++
|
|
||||||
libop/op_cpu_type.c | 2 +
|
|
||||||
libop/op_cpu_type.h | 1
|
|
||||||
libop/op_events.c | 1
|
|
||||||
libop/op_hw_specific.h | 3 +
|
|
||||||
utils/ophelp.c | 5 +-
|
|
||||||
8 files changed, 108 insertions(+), 2 deletions(-)
|
|
||||||
|
|
||||||
--- a/events/Makefile.am
|
|
||||||
+++ b/events/Makefile.am
|
|
||||||
@@ -21,6 +21,7 @@ event_files = \
|
|
||||||
i386/sandybridge/events i386/sandybridge/unit_masks \
|
|
||||||
i386/ivybridge/events i386/ivybridge/unit_masks \
|
|
||||||
i386/haswell/events i386/haswell/unit_masks \
|
|
||||||
+ i386/silvermont/events i386/silvermont/unit_masks \
|
|
||||||
ia64/ia64/events ia64/ia64/unit_masks \
|
|
||||||
ia64/itanium2/events ia64/itanium2/unit_masks \
|
|
||||||
ia64/itanium/events ia64/itanium/unit_masks \
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/events/i386/silvermont/events
|
|
||||||
@@ -0,0 +1,26 @@
|
|
||||||
+#
|
|
||||||
+# Intel "Silvermont" microarchitecture core events.
|
|
||||||
+#
|
|
||||||
+# See http://ark.intel.com/ for help in identifying Silvermont based CPUs
|
|
||||||
+#
|
|
||||||
+# Note the minimum counts are not discovered experimentally and could be likely
|
|
||||||
+# lowered in many cases without ill effect.
|
|
||||||
+#
|
|
||||||
+include:i386/arch_perfmon
|
|
||||||
+event:0x32 counters:0,1 um:l2_prefetcher_throttle minimum:200003 name:l2_prefetcher_throttle :
|
|
||||||
+event:0x3e counters:0,1 um:one minimum:200003 name:l2_prefetcher_pref_stream_alloc :
|
|
||||||
+event:0x50 counters:0,1 um:zero minimum:200003 name:l2_prefetch_pend_streams_pref_stream_pend_set :
|
|
||||||
+event:0x86 counters:0,1 um:nip_stall minimum:200003 name:nip_stall :
|
|
||||||
+event:0x87 counters:0,1 um:decode_stall minimum:200003 name:decode_stall :
|
|
||||||
+event:0x96 counters:0,1 um:uip_match minimum:200003 name:uip_match :
|
|
||||||
+event:0xc2 counters:0,1 um:uops_retired minimum:2000003 name:uops_retired :
|
|
||||||
+event:0xc3 counters:0,1 um:x10 minimum:200003 name:machine_clears_live_lock_breaker :
|
|
||||||
+event:0xc4 counters:0,1 um:br_inst_retired minimum:2000003 name:br_inst_retired :
|
|
||||||
+event:0xc5 counters:0,1 um:br_misp_retired minimum:200003 name:br_misp_retired :
|
|
||||||
+event:0xca counters:0,1 um:no_alloc_cycles minimum:200003 name:no_alloc_cycles :
|
|
||||||
+event:0xcb counters:0,1 um:rs_full_stall minimum:200003 name:rs_full_stall :
|
|
||||||
+event:0xcc counters:0,1 um:rs_dispatch_stall minimum:200003 name:rs_dispatch_stall :
|
|
||||||
+event:0xe6 counters:0,1 um:baclears minimum:2000003 name:baclears :
|
|
||||||
+event:0xe7 counters:0,1 um:x02 minimum:200003 name:ms_decoded_early_exit :
|
|
||||||
+event:0xe8 counters:0,1 um:one minimum:200003 name:btclears_all :
|
|
||||||
+event:0xe9 counters:0,1 um:decode_restriction minimum:200003 name:decode_restriction :
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/events/i386/silvermont/unit_masks
|
|
||||||
@@ -0,0 +1,71 @@
|
|
||||||
+#
|
|
||||||
+# Unit masks for the Intel "Silvermont" micro architecture
|
|
||||||
+#
|
|
||||||
+# See http://ark.intel.com/ for help in identifying Silvermont based CPUs
|
|
||||||
+#
|
|
||||||
+include:i386/arch_perfmon
|
|
||||||
+name:x02 type:mandatory default:0x2
|
|
||||||
+ 0x2 No unit mask
|
|
||||||
+name:x10 type:mandatory default:0x10
|
|
||||||
+ 0x10 No unit mask
|
|
||||||
+name:l2_prefetcher_throttle type:exclusive default:0x2
|
|
||||||
+ 0x2 extra:edge conservative Counts the number of cycles the L2 prefetcher spends in throttling mode
|
|
||||||
+ 0x1 extra:edge aggressive Counts the number of cycles the L2 prefetcher spends in throttling mode
|
|
||||||
+name:nip_stall type:exclusive default:0x3f
|
|
||||||
+ 0x3f extra: all Counts the number of cycles the NIP stalls.
|
|
||||||
+ 0x1 extra: pfb_full Counts the number of cycles the NIP stalls and the PFBs are full. This DOES NOT inlude PFB throttler cases.
|
|
||||||
+ 0x2 extra: itlb_miss Counts the number of cycles the NIP stalls and there is an outstanding ITLB miss. This is a cummulative count of cycles the NIP stalled for all ITLB misses.
|
|
||||||
+ 0x8 extra: pfb_throttler Counts the number of cycles the NIP stalls, the throttler is engaged, and the PFBs appear full.
|
|
||||||
+ 0x10 extra: do_snoop Counts the number of cycles the NIP stalls because of a SMC compliance snoop to the MEC is required.
|
|
||||||
+ 0x20 extra: misc_other Counts the number of cycles the NIP stalls due to NUKE, Stop Front End, Inserted flows.
|
|
||||||
+ 0x1e extra: pfb_ready Counts the number of cycles the NIP stalls when the PFBs are not full and the decoders are able to process bytes. Does not count PFB_FULL nor MISC_OTHER stall cycles.
|
|
||||||
+name:decode_stall type:exclusive default:0x1
|
|
||||||
+ 0x1 extra: pfb_empty Counts the number of cycles decoder is stalled because the PFB is empty, this count is useful to see if the decoder is receiving the bytes from the front end. This event together with the DECODE_STALL.IQ_FULL may be used to narrow down on the bottleneck.
|
|
||||||
+ 0x2 extra: iq_full Counts the number of cycles decoder is stalled because the IQ is full, this count is useful to see if the decoder is delivering the decoded uops. This event together with the DECODE_STALL.PFB_EMPTY may be used to narrow down on the bottleneck.
|
|
||||||
+name:uip_match type:exclusive default:0x1
|
|
||||||
+ 0x1 extra: first_uip This event is used for counting the number of times a specific micro IP address was decoded
|
|
||||||
+ 0x2 extra: second_uip This event is used for counting the number of times a specific micro IP address was decoded
|
|
||||||
+name:uops_retired type:exclusive default:0x2
|
|
||||||
+ 0x2 extra: x87 This event counts the number of micro-ops retired that used X87 hardware.
|
|
||||||
+ 0x4 extra: mul This event counts the number of micro-ops retired that used MUL hardware.
|
|
||||||
+ 0x8 extra: div This event counts the number of micro-ops retired that used DIV hardware.
|
|
||||||
+ 0x1 extra: ms_cyles Counts the number of uops that are from the complex flows issued by the micro-sequencer (MS). This includes uops from flows due to faults, assists, and inserted flows.
|
|
||||||
+name:br_inst_retired type:exclusive default:0x1
|
|
||||||
+ 0x1 extra: remove_jcc REMOVE_JCC counts the number of branch instructions retired but removes taken and not taken conditional branches (JCC). Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.
|
|
||||||
+ 0x2 extra: remove_rel_call REMOVE_REL_CALL counts the number of branch instructions retired but removes near relative CALL. Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.
|
|
||||||
+ 0x4 extra: remove_ind_call REMOVE_IND_CALL counts the number of branch instructions retired but removes near indirect CALL. Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.
|
|
||||||
+ 0x8 extra: remove_ret REMOVE_RET counts the number of branch instructions retired but removes near RET. Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.
|
|
||||||
+ 0x10 extra: remove_ind_jmp REMOVE_IND_JMP counts the number of branch instructions retired but removes near indirect JMP. Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.
|
|
||||||
+ 0x20 extra: remove_rel_jmp REMOVE_REL_JMP counts the number of branch instructions retired but removes near relative JMP. Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.
|
|
||||||
+ 0x40 extra: remove_far REMOVE_FAR counts the number of branch instructions retired but removes all far branches. Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.
|
|
||||||
+ 0x80 extra: remove_not_taken_jcc REMOVE_NOT_TAKEN_JCC counts the number of branch instructions retired but removes taken conditional branches (JCC). Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.
|
|
||||||
+name:br_misp_retired type:exclusive default:0x1
|
|
||||||
+ 0x1 extra: remove_jcc REMOVE_JCC counts the number of mispredicted branch instructions retired but removes taken and not taken conditional branches (JCC). This event counts the number of retired branch instructions that were mispredicted by the processor, categorized by type. A branch misprediction occurs when the processor predicts that the branch would be taken, but it is not, or vice-versa. When the misprediction is discovered, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.
|
|
||||||
+ 0x4 extra: remove_ind_call REMOVE_IND_CALL Counts the number of mispredicted branch instructions retired but removes near indirect CALL. This event counts the number of retired branch instructions that were mispredicted by the processor, categorized by type. A branch misprediction occurs when the processor predicts that the branch would be taken, but it is not, or vice-versa. When the misprediction is discovered, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.
|
|
||||||
+ 0x8 extra: remove_ret REMOVE_RET Counts the number of mispredicted branch instructions retired but removes near RET. This event counts the number of retired branch instructions that were mispredicted by the processor, categorized by type. A branch misprediction occurs when the processor predicts that the branch would be taken, but it is not, or vice-versa. When the misprediction is discovered, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.
|
|
||||||
+ 0x10 extra: remove_ind_jmp REMOVE_IND_JMP counts the number of mispredicted branch instructions retired but removes near indirect JMP. This event counts the number of retired branch instructions that were mispredicted by the processor, categorized by type. A branch misprediction occurs when the processor predicts that the branch would be taken, but it is not, or vice-versa. When the misprediction is discovered, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.
|
|
||||||
+ 0x80 extra: remove_not_taken_jcc REMOVE_NOT_TAKEN_JCC counts the number of mispredicted branch instructions retired but removes taken conditional branches (JCC). This event counts the number of retired branch instructions that were mispredicted by the processor, categorized by type. A branch misprediction occurs when the processor predicts that the branch would be taken, but it is not, or vice-versa. When the misprediction is discovered, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.
|
|
||||||
+name:no_alloc_cycles type:exclusive default:0x3f
|
|
||||||
+ 0x3f extra:inv all Counts the number of cycles that uops are allocated (inverse of NO_ALLOC_CYCLES.ALL)
|
|
||||||
+ 0x2 extra: sd_buffer_full Counts the number of cycles when no uops are allocated and the store data buffer is full.
|
|
||||||
+ 0x4 extra: mispredicts Counts the number of cycles when no uops are allocated and the alloc pipe is stalled waiting for a mispredicted jump to retire. After the misprediction is detected, the front end will start immediately but the allocate pipe stalls until the mispredicted
|
|
||||||
+ 0x8 extra: scoreboard Counts the number of cycles when no uops are allocated and a microcode IQ-based scoreboard stall is active. This includes stalls due to both the retirement scoreboard (at-ret) and micro-Jcc execution scoreboard (at-jeu). Does not count cycles when the MS
|
|
||||||
+ 0x10 extra: iq_empty Counts the number of cycles when no uops are allocated and the IQ is empty. Will assert immediately after a mispredict and partially overlap with MISPREDICTS sub event.
|
|
||||||
+name:rs_full_stall type:exclusive default:0x2
|
|
||||||
+ 0x2 extra: iec_port0 Counts the number of cycles the Alloc pipeline is stalled because IEC RS for port 0 is full.
|
|
||||||
+ 0x4 extra: iec_port1 Counts the number of cycles the Alloc pipeline is stalled because IEC RS for port 1 is full.
|
|
||||||
+ 0x8 extra: fpc_port0 Counts the number of cycles the Alloc pipeline is stalled because FPC RS for port 0 is full.
|
|
||||||
+ 0x10 extra: fpc_port1 Counts the number of cycles the Alloc pipeline is stalled because FPC RS for port 1 is full.
|
|
||||||
+name:rs_dispatch_stall type:exclusive default:0x1
|
|
||||||
+ 0x1 extra: iec0_rs *COUNTER BROKEN - NO FIX* Counts cycles when no uops were disptached from port 0 of IEC RS while the RS had valid ops left to dispatch
|
|
||||||
+ 0x2 extra: iec1_rs *COUNTER BROKEN - NO FIX* Counts cycles when no uops were disptached from port 1 of IEC RS while the RS had valid ops left to dispatch
|
|
||||||
+ 0x4 extra: fpc0_rs Counts cycles when no uops were disptached from port 0 of FPC RS while the RS had valid ops left to dispatch
|
|
||||||
+ 0x8 extra: fpc1_rs Counts cycles when no uops were disptached from port 1 of FPC RS while the RS had valid ops left to dispatch
|
|
||||||
+ 0x10 extra: mec_rs Counts cycles when no uops were dispatched from the MEC RS or rehab queue while valid ops were left to dispatch
|
|
||||||
+name:baclears type:exclusive default:0x2
|
|
||||||
+ 0x2 extra: indirect Counts the number indirect branch baclears
|
|
||||||
+ 0x4 extra: uncond Counts the number unconditional branch baclears
|
|
||||||
+ 0x1e extra: no_corner_case sum of submasks [4:1]. Does not count special case baclears due to things like parity errors, bogus branches, and pd$ issues.
|
|
||||||
+name:decode_restriction type:exclusive default:0x1
|
|
||||||
+ 0x1 extra: pdcache_wrong Counts the number of times a decode restriction reduced the decode throughput due to wrong instruction length prediction
|
|
||||||
+ 0x2 extra: all_3cycle_resteers Counts the number of times a decode restriction reduced the decode throughput because of all 3 cycle resteer conditions. Mainly PDCACHE_WRONG and MS_ENTRY cases.
|
|
||||||
--- a/libop/op_cpu_type.c
|
|
||||||
+++ b/libop/op_cpu_type.c
|
|
||||||
@@ -118,6 +118,7 @@ static struct cpu_descr const cpu_descrs
|
|
||||||
{ "AMD64 generic", "x86-64/generic", CPU_AMD64_GENERIC, 4 },
|
|
||||||
{ "IBM Power Architected Events V1", "ppc64/architected_events_v1", CPU_PPC64_ARCH_V1, 6 },
|
|
||||||
{ "ppc64 POWER8", "ppc64/power8", CPU_PPC64_POWER8, 6 },
|
|
||||||
+ { "Intel Silvermont microarchitecture", "i386/silvermont", CPU_SILVERMONT, 2 },
|
|
||||||
};
|
|
||||||
|
|
||||||
static size_t const nr_cpu_descrs = sizeof(cpu_descrs) / sizeof(struct cpu_descr);
|
|
||||||
@@ -633,6 +634,7 @@ op_cpu op_cpu_base_type(op_cpu cpu_type)
|
|
||||||
case CPU_ATOM:
|
|
||||||
case CPU_NEHALEM:
|
|
||||||
case CPU_HASWELL:
|
|
||||||
+ case CPU_SILVERMONT:
|
|
||||||
case CPU_WESTMERE:
|
|
||||||
case CPU_SANDYBRIDGE:
|
|
||||||
case CPU_IVYBRIDGE:
|
|
||||||
--- a/libop/op_cpu_type.h
|
|
||||||
+++ b/libop/op_cpu_type.h
|
|
||||||
@@ -105,6 +105,7 @@ typedef enum {
|
|
||||||
CPU_AMD64_GENERIC, /**< AMD64 Generic */
|
|
||||||
CPU_PPC64_ARCH_V1, /** < IBM Power architected events version 1 */
|
|
||||||
CPU_PPC64_POWER8, /**< ppc64 POWER8 family */
|
|
||||||
+ CPU_SILVERMONT, /** < Intel Silvermont microarchitecture */
|
|
||||||
MAX_CPU_TYPE
|
|
||||||
} op_cpu;
|
|
||||||
|
|
||||||
--- a/libop/op_events.c
|
|
||||||
+++ b/libop/op_events.c
|
|
||||||
@@ -1201,6 +1201,7 @@ void op_default_event(op_cpu cpu_type, s
|
|
||||||
case CPU_CORE_I7:
|
|
||||||
case CPU_NEHALEM:
|
|
||||||
case CPU_HASWELL:
|
|
||||||
+ case CPU_SILVERMONT:
|
|
||||||
case CPU_WESTMERE:
|
|
||||||
case CPU_SANDYBRIDGE:
|
|
||||||
case CPU_IVYBRIDGE:
|
|
||||||
--- a/libop/op_hw_specific.h
|
|
||||||
+++ b/libop/op_hw_specific.h
|
|
||||||
@@ -150,6 +150,9 @@ static inline op_cpu op_cpu_specific_typ
|
|
||||||
case 0x46:
|
|
||||||
case 0x47:
|
|
||||||
return CPU_HASWELL;
|
|
||||||
+ case 0x37:
|
|
||||||
+ case 0x4d:
|
|
||||||
+ return CPU_SILVERMONT;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return cpu_type;
|
|
||||||
--- a/utils/ophelp.c
|
|
||||||
+++ b/utils/ophelp.c
|
|
||||||
@@ -551,19 +551,20 @@ int main(int argc, char const * argv[])
|
|
||||||
case CPU_CORE_I7:
|
|
||||||
case CPU_NEHALEM:
|
|
||||||
case CPU_HASWELL:
|
|
||||||
+ case CPU_SILVERMONT:
|
|
||||||
case CPU_WESTMERE:
|
|
||||||
case CPU_SANDYBRIDGE:
|
|
||||||
case CPU_IVYBRIDGE:
|
|
||||||
case CPU_ATOM:
|
|
||||||
event_doc =
|
|
||||||
"See Intel Architecture Developer's Manual Volume 3B, Appendix A and\n"
|
|
||||||
- "Intel Architecture Optimization Reference Manual (730795-001)\n\n";
|
|
||||||
+ "Intel Architecture Optimization Reference Manual\n\n";
|
|
||||||
break;
|
|
||||||
|
|
||||||
case CPU_ARCH_PERFMON:
|
|
||||||
event_doc =
|
|
||||||
"See Intel 64 and IA-32 Architectures Software Developer's Manual\n"
|
|
||||||
- "Volume 3B (Document 253669) Chapter 18 for architectural perfmon events\n"
|
|
||||||
+ "Volume 3B Chapter 18 for architectural perfmon events\n"
|
|
||||||
"This is a limited set of fallback events because oprofile doesn't know your CPU\n";
|
|
||||||
break;
|
|
||||||
|
|
@ -1,33 +0,0 @@
|
|||||||
From: Tony Jones
|
|
||||||
Subject: all compressed kernel when determining range
|
|
||||||
Upstream: no
|
|
||||||
|
|
||||||
Extracted from 0.9.7 patch 'oprofile-0.9.5-buildfixes.diff'.
|
|
||||||
|
|
||||||
This needs further investigation as purpose is unclear, since compressed image
|
|
||||||
isn't supported by underlying sampling code.
|
|
||||||
|
|
||||||
--- a/utils/opcontrol
|
|
||||||
+++ b/utils/opcontrol
|
|
||||||
@@ -482,10 +482,19 @@ get_image_range()
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
+ kernel_file=$FILE_IMAGE
|
|
||||||
+ case $(file -L $kernel_file) in
|
|
||||||
+ *"gzip compressed"*)
|
|
||||||
+ kernel_file=$(mktemp /tmp/opcXXXXXX) || exit 1
|
|
||||||
+ trap "rm -f $kernel_file" EXIT
|
|
||||||
+ gzip -cd $FILE_IMAGE > $kernel_file
|
|
||||||
+ ;;
|
|
||||||
+ esac
|
|
||||||
+
|
|
||||||
# start at the start of .text, and end at _etext
|
|
||||||
- range_info=`objdump -h $FILE_IMAGE 2>/dev/null | grep " .text "`
|
|
||||||
+ range_info=`objdump -h $kernel_file 2>/dev/null | grep " .text "`
|
|
||||||
tmp1=`echo $range_info | awk '{print $4}'`
|
|
||||||
- tmp2=`objdump -t $FILE_IMAGE 2>/dev/null | grep "_etext$" | awk '{ print $1 }'`
|
|
||||||
+ tmp2=`objdump -t $kernel_file 2>/dev/null | grep "_etext$" | awk '{ print $1 }'`
|
|
||||||
|
|
||||||
if test -z "$tmp1" -o -z "$tmp2"; then
|
|
||||||
echo "The specified file $FILE_IMAGE does not seem to be valid" >&2
|
|
@ -1,39 +0,0 @@
|
|||||||
From: Maynard Johnson <maynardj@us.ibm.com>
|
|
||||||
Git-commit: a2811baceccff810e055f166792acdf7a920bf8c
|
|
||||||
Subject: configure error message for missing libpfm is not informative enough
|
|
||||||
Date: Wed Oct 9 14:27:54 2013 -0500
|
|
||||||
|
|
||||||
configure error message for missing libpfm is not informative enough
|
|
||||||
|
|
||||||
On the ppc64 architecture, the libpfm library is used to get perf_events
|
|
||||||
encodings for events, so the configure script checks for the availability
|
|
||||||
of that library when building for ppc64. If the library is missing, the
|
|
||||||
configure error message is:
|
|
||||||
|
|
||||||
checking for perfmon/pfmlib.h... no
|
|
||||||
configure: error: pfmlib.h not found; usually provided in papi devel package
|
|
||||||
|
|
||||||
However, some newer distros (like Fedora 19) are now delivering separate
|
|
||||||
packages for libpfm and papi, instead of bundling them together. The patch
|
|
||||||
provided herein changes the configure message to reflect that change in
|
|
||||||
packaging.
|
|
||||||
|
|
||||||
Signed-off-by: Maynard Johnson <maynardj@us.ibm.com>
|
|
||||||
|
|
||||||
diff --git a/configure.ac b/configure.ac
|
|
||||||
index 758b676..457145a 100644
|
|
||||||
--- a/configure.ac
|
|
||||||
+++ b/configure.ac
|
|
||||||
@@ -185,10 +185,10 @@ AC_CANONICAL_HOST
|
|
||||||
if test "$HAVE_PERF_EVENTS" = "1"; then
|
|
||||||
PFM_LIB=
|
|
||||||
if test "$host_cpu" = "powerpc64"; then
|
|
||||||
- AC_CHECK_HEADER(perfmon/pfmlib.h,,[AC_MSG_ERROR([pfmlib.h not found; usually provided in papi devel package])])
|
|
||||||
+ AC_CHECK_HEADER(perfmon/pfmlib.h,,[AC_MSG_ERROR([pfmlib.h not found; may be provided by libpfm devel or papi devel package])])
|
|
||||||
AC_CHECK_LIB(pfm,pfm_get_os_event_encoding, HAVE_LIBPFM3='0'; HAVE_LIBPFM='1', [
|
|
||||||
AC_CHECK_LIB(pfm, pfm_get_event_name, HAVE_LIBPFM3='1'; HAVE_LIBPFM='1',
|
|
||||||
- [AC_MSG_ERROR([libpfm not found; usually provided in papi devel package])])])
|
|
||||||
+ [AC_MSG_ERROR([libpfm not found; may be provided by libpfm devel or papi devel package])])])
|
|
||||||
PFM_LIB="-lpfm"
|
|
||||||
AC_DEFINE_UNQUOTED(HAVE_LIBPFM3, $HAVE_LIBPFM3, [Define to 1 if using libpfm3; 0 if using newer libpfm])
|
|
||||||
AC_DEFINE_UNQUOTED(HAVE_LIBPFM, $HAVE_LIBPFM, [Define to 1 if libpfm is available])
|
|
@ -1,51 +0,0 @@
|
|||||||
From: Maynard Johnson <maynardj@us.ibm.com>
|
|
||||||
Subject: Enable oprofile for new ppc64le architecture
|
|
||||||
Git-commit: a265c549bff149f5e9064dca7d06b6689fb3d64e
|
|
||||||
Date: Thu Jan 9 15:47:09 2014 -0600
|
|
||||||
|
|
||||||
Enable oprofile for new ppc64le architecture
|
|
||||||
|
|
||||||
Signed-off-by: Maynard Johnson <maynardj@us.ibm.com>
|
|
||||||
|
|
||||||
diff --git a/configure.ac b/configure.ac
|
|
||||||
index 457145a..1e3a65f 100644
|
|
||||||
--- a/configure.ac
|
|
||||||
+++ b/configure.ac
|
|
||||||
@@ -184,7 +184,7 @@ AC_DEFINE_UNQUOTED(HAVE_PERF_EVENTS, $HAVE_PERF_EVENTS, [Kernel support for perf
|
|
||||||
AC_CANONICAL_HOST
|
|
||||||
if test "$HAVE_PERF_EVENTS" = "1"; then
|
|
||||||
PFM_LIB=
|
|
||||||
- if test "$host_cpu" = "powerpc64"; then
|
|
||||||
+ if test "$host_cpu" = "powerpc64le" -o "$host_cpu" = "powerpc64"; then
|
|
||||||
AC_CHECK_HEADER(perfmon/pfmlib.h,,[AC_MSG_ERROR([pfmlib.h not found; may be provided by libpfm devel or papi devel package])])
|
|
||||||
AC_CHECK_LIB(pfm,pfm_get_os_event_encoding, HAVE_LIBPFM3='0'; HAVE_LIBPFM='1', [
|
|
||||||
AC_CHECK_LIB(pfm, pfm_get_event_name, HAVE_LIBPFM3='1'; HAVE_LIBPFM='1',
|
|
||||||
diff --git a/libop/op_cpu_type.c b/libop/op_cpu_type.c
|
|
||||||
index 7d5262c..15c71ab 100644
|
|
||||||
--- a/libop/op_cpu_type.c
|
|
||||||
+++ b/libop/op_cpu_type.c
|
|
||||||
@@ -621,7 +621,8 @@ static op_cpu __get_cpu_type_alt_method(void)
|
|
||||||
fnmatch("i?86", uname_info.machine, 0) == 0) {
|
|
||||||
return _get_x86_64_cpu_type();
|
|
||||||
}
|
|
||||||
- if (strncmp(uname_info.machine, "ppc64", 5) == 0) {
|
|
||||||
+ if ((strncmp(uname_info.machine, "ppc64", 5) == 0) ||
|
|
||||||
+ (strncmp(uname_info.machine, "ppc64le", 7) == 0)) {
|
|
||||||
return _get_ppc64_cpu_type();
|
|
||||||
}
|
|
||||||
if (strncmp(uname_info.machine, "arm", 3) == 0) {
|
|
||||||
diff --git a/libutil++/bfd_support.cpp b/libutil++/bfd_support.cpp
|
|
||||||
index 67edd09..4b744f8 100644
|
|
||||||
--- a/libutil++/bfd_support.cpp
|
|
||||||
+++ b/libutil++/bfd_support.cpp
|
|
||||||
@@ -634,9 +634,7 @@ void bfd_info::translate_debuginfo_syms(asymbol ** dbg_syms, long nr_dbg_syms)
|
|
||||||
bool bfd_info::get_synth_symbols()
|
|
||||||
{
|
|
||||||
extern const bfd_target bfd_elf64_powerpc_vec;
|
|
||||||
- extern const bfd_target bfd_elf64_powerpcle_vec;
|
|
||||||
- bool is_elf64_powerpc_target = (abfd->xvec == &bfd_elf64_powerpc_vec)
|
|
||||||
- || (abfd->xvec == &bfd_elf64_powerpcle_vec);
|
|
||||||
+ bool is_elf64_powerpc_target = (abfd->xvec == &bfd_elf64_powerpc_vec);
|
|
||||||
|
|
||||||
if (!is_elf64_powerpc_target)
|
|
||||||
return false;
|
|
@ -1,30 +0,0 @@
|
|||||||
From: Maynard Johnson <maynardj@us.ibm.com>
|
|
||||||
Subject: Fix "Unable to open cpu_type file for reading" for IBM POWER7+
|
|
||||||
Git-commit: 88ed74bade0096042d643a6d7e68c2cbc4b6e34d
|
|
||||||
Date: Thu Jan 9 15:07:21 2014 -0600
|
|
||||||
|
|
||||||
Fix "Unable to open cpu_type file for reading" for IBM POWER7+
|
|
||||||
|
|
||||||
Using operf to do profiling on an IBM POWER7+ may result in
|
|
||||||
the following error message:
|
|
||||||
|
|
||||||
Unable to open cpu_type file for reading
|
|
||||||
|
|
||||||
This patch fixes the problem. There is also a simple workaround of
|
|
||||||
running 'opcontrol --init'.
|
|
||||||
|
|
||||||
Signed-off-by: Maynard Johnson <maynardj@us.ibm.com>
|
|
||||||
|
|
||||||
diff --git a/libop/op_cpu_type.c b/libop/op_cpu_type.c
|
|
||||||
index cd75ad4..7d5262c 100644
|
|
||||||
--- a/libop/op_cpu_type.c
|
|
||||||
+++ b/libop/op_cpu_type.c
|
|
||||||
@@ -326,6 +326,8 @@ static op_cpu _get_ppc64_cpu_type(void)
|
|
||||||
for (i = 0; i < (int)len ; i++)
|
|
||||||
cpu_name_lowercase[i] = tolower(cpu_name[i]);
|
|
||||||
|
|
||||||
+ if (strncmp(cpu_name_lowercase, "power7+", 7) == 0)
|
|
||||||
+ cpu_name_lowercase[6] = '\0';
|
|
||||||
cpu_type_str[0] = '\0';
|
|
||||||
strcat(cpu_type_str, "ppc64/");
|
|
||||||
strncat(cpu_type_str, cpu_name_lowercase, len);
|
|
@ -1,29 +0,0 @@
|
|||||||
From: Maynard Johnson <maynardj@us.ibm.com>
|
|
||||||
Subject: Make cpu type POWER8E equivalent to POWER8
|
|
||||||
Git-commit: 7243fa4ed8a25c6e59225a863fd263ce70989087
|
|
||||||
Date: Tue Feb 4 08:27:10 2014 -0600
|
|
||||||
|
|
||||||
Make cpu type POWER8E equivalent to POWER8
|
|
||||||
|
|
||||||
Recent mainline kernel changes resulted in a cpu type of
|
|
||||||
"POWER8E" being displayed in /proc/cpuinfo for certain revisions
|
|
||||||
of the IBM POWER8 processor model. But for profiling and
|
|
||||||
counting of native events, we can ignore the differences between
|
|
||||||
POWER8 and POWER8E. This patch addresses that issue.
|
|
||||||
|
|
||||||
Signed-off-by: Maynard Johnson <maynardj@us.ibm.com>
|
|
||||||
|
|
||||||
diff --git a/libop/op_cpu_type.c b/libop/op_cpu_type.c
|
|
||||||
index 2907f36..1ae2913 100644
|
|
||||||
--- a/libop/op_cpu_type.c
|
|
||||||
+++ b/libop/op_cpu_type.c
|
|
||||||
@@ -331,6 +331,9 @@ static op_cpu _get_ppc64_cpu_type(void)
|
|
||||||
|
|
||||||
if (strncmp(cpu_name_lowercase, "power7+", 7) == 0)
|
|
||||||
cpu_name_lowercase[6] = '\0';
|
|
||||||
+ if (strncmp(cpu_name_lowercase, "power8e", 7) == 0)
|
|
||||||
+ cpu_name_lowercase[6] = '\0';
|
|
||||||
+
|
|
||||||
cpu_type_str[0] = '\0';
|
|
||||||
strcat(cpu_type_str, "ppc64/");
|
|
||||||
strncat(cpu_type_str, cpu_name_lowercase, len);
|
|
@ -12,7 +12,7 @@ Upstream: no
|
|||||||
AC_CANONICAL_HOST
|
AC_CANONICAL_HOST
|
||||||
if test "$HAVE_PERF_EVENTS" = "1"; then
|
if test "$HAVE_PERF_EVENTS" = "1"; then
|
||||||
PFM_LIB=
|
PFM_LIB=
|
||||||
- if test "$host_cpu" = "powerpc64le" -o "$host_cpu" = "powerpc64"; then
|
- if test "$host_cpu" = "powerpc64le" -o "$host_cpu" = "powerpc64"; then
|
||||||
+ if test "$host_cpu" = "powerpc64le" -o "$host_cpu" = "powerpc64" -o "$host_cpu" = "powerpc32"; then
|
+ if test "$host_cpu" = "powerpc64le" -o "$host_cpu" = "powerpc64" -o "$host_cpu" = "powerpc32"; then
|
||||||
AC_CHECK_HEADER(perfmon/pfmlib.h,,[AC_MSG_ERROR([pfmlib.h not found; may be provided by libpfm devel or papi devel package])])
|
AC_CHECK_HEADER(perfmon/pfmlib.h,,[AC_MSG_ERROR([pfmlib.h not found; may be provided by libpfm devel or papi devel package])])
|
||||||
AC_CHECK_LIB(pfm,pfm_get_os_event_encoding, HAVE_LIBPFM3='0'; HAVE_LIBPFM='1', [
|
AC_CHECK_LIB(pfm,pfm_get_os_event_encoding, HAVE_LIBPFM3='0'; HAVE_LIBPFM='1', [
|
||||||
|
@ -1,3 +1,80 @@
|
|||||||
|
-------------------------------------------------------------------
|
||||||
|
Fri Sep 19 16:05:07 UTC 2014 - tonyj@suse.com
|
||||||
|
|
||||||
|
- Update to version 1.0.0. THIS IS A MAJOR OPROFILE RELEASE WITH SIGNIFICANT
|
||||||
|
CHANGES FROM THE PREVIOUS 0.9.9 RELEASE. See changelog below.
|
||||||
|
|
||||||
|
Drop patch: oprofile-add-support-for-intel-silvermont-processor.patch
|
||||||
|
Drop patch: oprofile-compressed-kernel.patch
|
||||||
|
Drop patch: oprofile-configure-error-message-for-missing-libpfm-is-not-informative-enough.patch
|
||||||
|
Drop patch: oprofile-enable-for-new-ppc64le-architecture.patch
|
||||||
|
Drop patch: oprofile-fix-unable-to-open-cpu_type-file-for-reading-for-ibm-power7.patch
|
||||||
|
Drop patch: oprofile-make-cpu-type-power8e-equivalent-to-power8.patch
|
||||||
|
|
||||||
|
|
||||||
|
Changelog:
|
||||||
|
Major changes:
|
||||||
|
- The legacy opcontrol-based profiler has been removed. operf is now the
|
||||||
|
only supported interface
|
||||||
|
- GUI component (i.e., oprof_start) has been removed.
|
||||||
|
- IBS events removed from AMD processors
|
||||||
|
- Following architectures have been removed (Alpha [except for EV67 which
|
||||||
|
is supported by operf/ocount], avr32, ia64, IBM Cell, P.A Semi PA64T)
|
||||||
|
- RTV (real time clock) mode has been removed
|
||||||
|
|
||||||
|
Other incompatibilities:
|
||||||
|
- Sample data collected with previous releases of OProfile are incompatible
|
||||||
|
with release 1.0.
|
||||||
|
- ophelp schema: Major version changed for removal of unit mask 'extra'
|
||||||
|
attribute and addition of unit mask 'name'.
|
||||||
|
|
||||||
|
New features:
|
||||||
|
- Enhance ocount to support millisecond time intervals
|
||||||
|
- Obtain kernel symbols from /proc/kallsyms if no vmlinux file specified
|
||||||
|
- New Processor Support (Freescale e6500, Freescale e500mc,
|
||||||
|
Intel Silvermont, ARMv7 Krait, APM X-Gene (ARMv8),
|
||||||
|
Intel Broadwell, ARMv8 Cortex A57, ARMv8 Cortex A53
|
||||||
|
- Added little endian support for IBM POWER8
|
||||||
|
- Update events for IBM POWER8
|
||||||
|
- Added edge-detect events for IBM POWER7
|
||||||
|
- Update events for Intel Haswell
|
||||||
|
|
||||||
|
Bug Fixes:
|
||||||
|
- opreport schema: Fix count field maxOccurs (changed to 'unbounded')
|
||||||
|
- Fix compile error on ppc/uClibc platform: 'AT_BASE_PLATFORM' undeclared'
|
||||||
|
- Duplicate event specs passed to ocount show up twice in output
|
||||||
|
- Fix operf/ocount default unit mask selection
|
||||||
|
- ocount: print the unit mask, kernel and user modes if specified for the
|
||||||
|
event
|
||||||
|
- ophelp schema is not included in installed files
|
||||||
|
- Remove unused 'extra' attribute from ophelp schema
|
||||||
|
- opreport from 'operf --callgraph' profile shows false recursive calls
|
||||||
|
- Fix handling of default named unit masks longer than 11 chars
|
||||||
|
- Print unit mask name where applicable in ophelp XML output
|
||||||
|
- Fix profiling of multi-threaded apps when using "--pid" option
|
||||||
|
- Fix operf/opreport kernel throttling detection
|
||||||
|
- Fix sample attribution problem when using multiple events
|
||||||
|
- exclude/include files option doesn't work for opannotate -a
|
||||||
|
|
||||||
|
- Fix behavior and documentation for '--threshold' option
|
||||||
|
- Remove hard-coded timeout for JIT dump conversion
|
||||||
|
- Update Alpha EV67 CPU support and remove all other Alpha CPU support
|
||||||
|
- operf main process improperly killing conversion process
|
||||||
|
- Fix up S390 support to work with operf/ocount
|
||||||
|
- Link ocount with librt for clock_gettime only when needed
|
||||||
|
- Fix 'Invalid argument' running 'opcontrol --start --callgraph=<n>' in
|
||||||
|
Timer mode
|
||||||
|
- Allow root to remove old jitdump files from /tmp/.oprofile/jitdump
|
||||||
|
- Remove opreport warnings for /no-vmlinux, [vdso], [hypervisor_bucket]
|
||||||
|
not found
|
||||||
|
- Fix event codes for marked architected events (IBM ppc64)
|
||||||
|
- Make operf/ocount detect invalid timer mode from opcontrol
|
||||||
|
- Reduce overhead of operf waiting for profiled app to end
|
||||||
|
- Fix "Unable to open cpu_type file for reading" for IBM POWER7+
|
||||||
|
- Allow all native events for IBM POWER8 in POWER7 compat mode
|
||||||
|
- Fix spurious "backtraces skipped due to no file mapping" log entries
|
||||||
|
- Fix the units for the reported CPU frequency
|
||||||
|
|
||||||
-------------------------------------------------------------------
|
-------------------------------------------------------------------
|
||||||
Thu Aug 14 17:45:58 UTC 2014 - tonyj@suse.com
|
Thu Aug 14 17:45:58 UTC 2014 - tonyj@suse.com
|
||||||
|
|
||||||
|
@ -30,7 +30,7 @@ BuildRequires: libpfm-devel >= 4.3.0
|
|||||||
%endif
|
%endif
|
||||||
Url: http://oprofile.sourceforge.net/
|
Url: http://oprofile.sourceforge.net/
|
||||||
PreReq: /usr/sbin/groupadd /usr/sbin/useradd
|
PreReq: /usr/sbin/groupadd /usr/sbin/useradd
|
||||||
Version: 0.9.9
|
Version: 1.0.0
|
||||||
Release: 0
|
Release: 0
|
||||||
Summary: System-Wide Profiler for Linux Systems
|
Summary: System-Wide Profiler for Linux Systems
|
||||||
License: GPL-2.0+ and LGPL-2.1+
|
License: GPL-2.0+ and LGPL-2.1+
|
||||||
@ -42,14 +42,8 @@ Source2: %{name}.rpmlintrc
|
|||||||
Source3: baselibs.conf
|
Source3: baselibs.conf
|
||||||
Source4: jvmpi.h
|
Source4: jvmpi.h
|
||||||
Source5: README-BEFORE-ADDING-PATCHES
|
Source5: README-BEFORE-ADDING-PATCHES
|
||||||
Patch1: %{name}-compressed-kernel.patch
|
Patch1: %{name}-no-libjvm-version.patch
|
||||||
Patch2: %{name}-no-libjvm-version.patch
|
Patch2: %{name}-pfm-ppc.patch
|
||||||
Patch3: %{name}-configure-error-message-for-missing-libpfm-is-not-informative-enough.patch
|
|
||||||
Patch4: %{name}-enable-for-new-ppc64le-architecture.patch
|
|
||||||
Patch5: %{name}-fix-unable-to-open-cpu_type-file-for-reading-for-ibm-power7.patch
|
|
||||||
Patch6: %{name}-make-cpu-type-power8e-equivalent-to-power8.patch
|
|
||||||
Patch7: %{name}-pfm-ppc.patch
|
|
||||||
Patch8: %{name}-add-support-for-intel-silvermont-processor.patch
|
|
||||||
|
|
||||||
%description
|
%description
|
||||||
OProfile is a system-wide profiler for Linux systems, capable of
|
OProfile is a system-wide profiler for Linux systems, capable of
|
||||||
@ -102,12 +96,6 @@ from supported virtual machines.
|
|||||||
%setup -q
|
%setup -q
|
||||||
%patch1 -p1
|
%patch1 -p1
|
||||||
%patch2 -p1
|
%patch2 -p1
|
||||||
%patch3 -p1
|
|
||||||
%patch4 -p1
|
|
||||||
%patch5 -p1
|
|
||||||
%patch6 -p1
|
|
||||||
%patch7 -p1
|
|
||||||
%patch8 -p1
|
|
||||||
|
|
||||||
mkdir -p java/include
|
mkdir -p java/include
|
||||||
# copy files necessary to build Java agent libraries
|
# copy files necessary to build Java agent libraries
|
||||||
|
Loading…
Reference in New Issue
Block a user