Path: blob/master/arch/x86/kernel/cpu/perf_event_amd.c
10699 views
#ifdef CONFIG_CPU_SUP_AMD12static __initconst const u64 amd_hw_cache_event_ids3[PERF_COUNT_HW_CACHE_MAX]4[PERF_COUNT_HW_CACHE_OP_MAX]5[PERF_COUNT_HW_CACHE_RESULT_MAX] =6{7[ C(L1D) ] = {8[ C(OP_READ) ] = {9[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */10[ C(RESULT_MISS) ] = 0x0141, /* Data Cache Misses */11},12[ C(OP_WRITE) ] = {13[ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */14[ C(RESULT_MISS) ] = 0,15},16[ C(OP_PREFETCH) ] = {17[ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */18[ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */19},20},21[ C(L1I ) ] = {22[ C(OP_READ) ] = {23[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */24[ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */25},26[ C(OP_WRITE) ] = {27[ C(RESULT_ACCESS) ] = -1,28[ C(RESULT_MISS) ] = -1,29},30[ C(OP_PREFETCH) ] = {31[ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */32[ C(RESULT_MISS) ] = 0,33},34},35[ C(LL ) ] = {36[ C(OP_READ) ] = {37[ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */38[ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */39},40[ C(OP_WRITE) ] = {41[ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */42[ C(RESULT_MISS) ] = 0,43},44[ C(OP_PREFETCH) ] = {45[ C(RESULT_ACCESS) ] = 0,46[ C(RESULT_MISS) ] = 0,47},48},49[ C(DTLB) ] = {50[ C(OP_READ) ] = {51[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */52[ C(RESULT_MISS) ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */53},54[ C(OP_WRITE) ] = {55[ C(RESULT_ACCESS) ] = 0,56[ C(RESULT_MISS) ] = 0,57},58[ C(OP_PREFETCH) ] = {59[ C(RESULT_ACCESS) ] = 0,60[ C(RESULT_MISS) ] = 0,61},62},63[ C(ITLB) ] = {64[ C(OP_READ) ] = {65[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */66[ C(RESULT_MISS) ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */67},68[ C(OP_WRITE) ] = {69[ C(RESULT_ACCESS) ] = -1,70[ C(RESULT_MISS) ] = -1,71},72[ C(OP_PREFETCH) ] = {73[ C(RESULT_ACCESS) ] = -1,74[ C(RESULT_MISS) ] = -1,75},76},77[ C(BPU ) ] = {78[ C(OP_READ) ] = {79[ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */80[ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */81},82[ C(OP_WRITE) ] = {83[ C(RESULT_ACCESS) ] = -1,84[ C(RESULT_MISS) ] = -1,85},86[ C(OP_PREFETCH) ] = {87[ C(RESULT_ACCESS) ] = -1,88[ C(RESULT_MISS) ] = -1,89},90},91};9293/*94* AMD Performance Monitor K7 and later.95*/96static const u64 amd_perfmon_event_map[] =97{98[PERF_COUNT_HW_CPU_CYCLES] = 0x0076,99[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,100[PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080,101[PERF_COUNT_HW_CACHE_MISSES] = 0x0081,102[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,103[PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,104[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */105[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x00d1, /* "Dispatch stalls" event */106};107108static u64 amd_pmu_event_map(int hw_event)109{110return amd_perfmon_event_map[hw_event];111}112113static int amd_pmu_hw_config(struct perf_event *event)114{115int ret = x86_pmu_hw_config(event);116117if (ret)118return ret;119120if (event->attr.type != PERF_TYPE_RAW)121return 0;122123event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;124125return 0;126}127128/*129* AMD64 events are detected based on their event codes.130*/131static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc)132{133return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff);134}135136static inline int amd_is_nb_event(struct hw_perf_event *hwc)137{138return (hwc->config & 0xe0) == 0xe0;139}140141static inline int amd_has_nb(struct cpu_hw_events *cpuc)142{143struct amd_nb *nb = cpuc->amd_nb;144145return nb && nb->nb_id != -1;146}147148static void amd_put_event_constraints(struct cpu_hw_events *cpuc,149struct perf_event *event)150{151struct hw_perf_event *hwc = &event->hw;152struct amd_nb *nb = cpuc->amd_nb;153int i;154155/*156* only care about NB events157*/158if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc)))159return;160161/*162* need to scan whole list because event may not have163* been assigned during scheduling164*165* no race condition possible because event can only166* be removed on one CPU at a time AND PMU is disabled167* when we come here168*/169for (i = 0; i < x86_pmu.num_counters; i++) {170if (nb->owners[i] == event) {171cmpxchg(nb->owners+i, event, NULL);172break;173}174}175}176177/*178* AMD64 NorthBridge events need special treatment because179* counter access needs to be synchronized across all cores180* of a package. Refer to BKDG section 3.12181*182* NB events are events measuring L3 cache, Hypertransport183* traffic. They are identified by an event code >= 0xe00.184* They measure events on the NorthBride which is shared185* by all cores on a package. NB events are counted on a186* shared set of counters. When a NB event is programmed187* in a counter, the data actually comes from a shared188* counter. Thus, access to those counters needs to be189* synchronized.190*191* We implement the synchronization such that no two cores192* can be measuring NB events using the same counters. Thus,193* we maintain a per-NB allocation table. The available slot194* is propagated using the event_constraint structure.195*196* We provide only one choice for each NB event based on197* the fact that only NB events have restrictions. Consequently,198* if a counter is available, there is a guarantee the NB event199* will be assigned to it. If no slot is available, an empty200* constraint is returned and scheduling will eventually fail201* for this event.202*203* Note that all cores attached the same NB compete for the same204* counters to host NB events, this is why we use atomic ops. Some205* multi-chip CPUs may have more than one NB.206*207* Given that resources are allocated (cmpxchg), they must be208* eventually freed for others to use. This is accomplished by209* calling amd_put_event_constraints().210*211* Non NB events are not impacted by this restriction.212*/213static struct event_constraint *214amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)215{216struct hw_perf_event *hwc = &event->hw;217struct amd_nb *nb = cpuc->amd_nb;218struct perf_event *old = NULL;219int max = x86_pmu.num_counters;220int i, j, k = -1;221222/*223* if not NB event or no NB, then no constraints224*/225if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc)))226return &unconstrained;227228/*229* detect if already present, if so reuse230*231* cannot merge with actual allocation232* because of possible holes233*234* event can already be present yet not assigned (in hwc->idx)235* because of successive calls to x86_schedule_events() from236* hw_perf_group_sched_in() without hw_perf_enable()237*/238for (i = 0; i < max; i++) {239/*240* keep track of first free slot241*/242if (k == -1 && !nb->owners[i])243k = i;244245/* already present, reuse */246if (nb->owners[i] == event)247goto done;248}249/*250* not present, so grab a new slot251* starting either at:252*/253if (hwc->idx != -1) {254/* previous assignment */255i = hwc->idx;256} else if (k != -1) {257/* start from free slot found */258i = k;259} else {260/*261* event not found, no slot found in262* first pass, try again from the263* beginning264*/265i = 0;266}267j = i;268do {269old = cmpxchg(nb->owners+i, NULL, event);270if (!old)271break;272if (++i == max)273i = 0;274} while (i != j);275done:276if (!old)277return &nb->event_constraints[i];278279return &emptyconstraint;280}281282static struct amd_nb *amd_alloc_nb(int cpu)283{284struct amd_nb *nb;285int i;286287nb = kmalloc_node(sizeof(struct amd_nb), GFP_KERNEL | __GFP_ZERO,288cpu_to_node(cpu));289if (!nb)290return NULL;291292nb->nb_id = -1;293294/*295* initialize all possible NB constraints296*/297for (i = 0; i < x86_pmu.num_counters; i++) {298__set_bit(i, nb->event_constraints[i].idxmsk);299nb->event_constraints[i].weight = 1;300}301return nb;302}303304static int amd_pmu_cpu_prepare(int cpu)305{306struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);307308WARN_ON_ONCE(cpuc->amd_nb);309310if (boot_cpu_data.x86_max_cores < 2)311return NOTIFY_OK;312313cpuc->amd_nb = amd_alloc_nb(cpu);314if (!cpuc->amd_nb)315return NOTIFY_BAD;316317return NOTIFY_OK;318}319320static void amd_pmu_cpu_starting(int cpu)321{322struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);323struct amd_nb *nb;324int i, nb_id;325326if (boot_cpu_data.x86_max_cores < 2)327return;328329nb_id = amd_get_nb_id(cpu);330WARN_ON_ONCE(nb_id == BAD_APICID);331332for_each_online_cpu(i) {333nb = per_cpu(cpu_hw_events, i).amd_nb;334if (WARN_ON_ONCE(!nb))335continue;336337if (nb->nb_id == nb_id) {338kfree(cpuc->amd_nb);339cpuc->amd_nb = nb;340break;341}342}343344cpuc->amd_nb->nb_id = nb_id;345cpuc->amd_nb->refcnt++;346}347348static void amd_pmu_cpu_dead(int cpu)349{350struct cpu_hw_events *cpuhw;351352if (boot_cpu_data.x86_max_cores < 2)353return;354355cpuhw = &per_cpu(cpu_hw_events, cpu);356357if (cpuhw->amd_nb) {358struct amd_nb *nb = cpuhw->amd_nb;359360if (nb->nb_id == -1 || --nb->refcnt == 0)361kfree(nb);362363cpuhw->amd_nb = NULL;364}365}366367static __initconst const struct x86_pmu amd_pmu = {368.name = "AMD",369.handle_irq = x86_pmu_handle_irq,370.disable_all = x86_pmu_disable_all,371.enable_all = x86_pmu_enable_all,372.enable = x86_pmu_enable_event,373.disable = x86_pmu_disable_event,374.hw_config = amd_pmu_hw_config,375.schedule_events = x86_schedule_events,376.eventsel = MSR_K7_EVNTSEL0,377.perfctr = MSR_K7_PERFCTR0,378.event_map = amd_pmu_event_map,379.max_events = ARRAY_SIZE(amd_perfmon_event_map),380.num_counters = 4,381.cntval_bits = 48,382.cntval_mask = (1ULL << 48) - 1,383.apic = 1,384/* use highest bit to detect overflow */385.max_period = (1ULL << 47) - 1,386.get_event_constraints = amd_get_event_constraints,387.put_event_constraints = amd_put_event_constraints,388389.cpu_prepare = amd_pmu_cpu_prepare,390.cpu_starting = amd_pmu_cpu_starting,391.cpu_dead = amd_pmu_cpu_dead,392};393394/* AMD Family 15h */395396#define AMD_EVENT_TYPE_MASK 0x000000F0ULL397398#define AMD_EVENT_FP 0x00000000ULL ... 0x00000010ULL399#define AMD_EVENT_LS 0x00000020ULL ... 0x00000030ULL400#define AMD_EVENT_DC 0x00000040ULL ... 0x00000050ULL401#define AMD_EVENT_CU 0x00000060ULL ... 0x00000070ULL402#define AMD_EVENT_IC_DE 0x00000080ULL ... 0x00000090ULL403#define AMD_EVENT_EX_LS 0x000000C0ULL404#define AMD_EVENT_DE 0x000000D0ULL405#define AMD_EVENT_NB 0x000000E0ULL ... 0x000000F0ULL406407/*408* AMD family 15h event code/PMC mappings:409*410* type = event_code & 0x0F0:411*412* 0x000 FP PERF_CTL[5:3]413* 0x010 FP PERF_CTL[5:3]414* 0x020 LS PERF_CTL[5:0]415* 0x030 LS PERF_CTL[5:0]416* 0x040 DC PERF_CTL[5:0]417* 0x050 DC PERF_CTL[5:0]418* 0x060 CU PERF_CTL[2:0]419* 0x070 CU PERF_CTL[2:0]420* 0x080 IC/DE PERF_CTL[2:0]421* 0x090 IC/DE PERF_CTL[2:0]422* 0x0A0 ---423* 0x0B0 ---424* 0x0C0 EX/LS PERF_CTL[5:0]425* 0x0D0 DE PERF_CTL[2:0]426* 0x0E0 NB NB_PERF_CTL[3:0]427* 0x0F0 NB NB_PERF_CTL[3:0]428*429* Exceptions:430*431* 0x000 FP PERF_CTL[3], PERF_CTL[5:3] (*)432* 0x003 FP PERF_CTL[3]433* 0x004 FP PERF_CTL[3], PERF_CTL[5:3] (*)434* 0x00B FP PERF_CTL[3]435* 0x00D FP PERF_CTL[3]436* 0x023 DE PERF_CTL[2:0]437* 0x02D LS PERF_CTL[3]438* 0x02E LS PERF_CTL[3,0]439* 0x043 CU PERF_CTL[2:0]440* 0x045 CU PERF_CTL[2:0]441* 0x046 CU PERF_CTL[2:0]442* 0x054 CU PERF_CTL[2:0]443* 0x055 CU PERF_CTL[2:0]444* 0x08F IC PERF_CTL[0]445* 0x187 DE PERF_CTL[0]446* 0x188 DE PERF_CTL[0]447* 0x0DB EX PERF_CTL[5:0]448* 0x0DC LS PERF_CTL[5:0]449* 0x0DD LS PERF_CTL[5:0]450* 0x0DE LS PERF_CTL[5:0]451* 0x0DF LS PERF_CTL[5:0]452* 0x1D6 EX PERF_CTL[5:0]453* 0x1D8 EX PERF_CTL[5:0]454*455* (*) depending on the umask all FPU counters may be used456*/457458static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0);459static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0);460static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0);461static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT(0, 0x09, 0);462static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);463static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);464465static struct event_constraint *466amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event)467{468struct hw_perf_event *hwc = &event->hw;469unsigned int event_code = amd_get_event_code(hwc);470471switch (event_code & AMD_EVENT_TYPE_MASK) {472case AMD_EVENT_FP:473switch (event_code) {474case 0x000:475if (!(hwc->config & 0x0000F000ULL))476break;477if (!(hwc->config & 0x00000F00ULL))478break;479return &amd_f15_PMC3;480case 0x004:481if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1)482break;483return &amd_f15_PMC3;484case 0x003:485case 0x00B:486case 0x00D:487return &amd_f15_PMC3;488}489return &amd_f15_PMC53;490case AMD_EVENT_LS:491case AMD_EVENT_DC:492case AMD_EVENT_EX_LS:493switch (event_code) {494case 0x023:495case 0x043:496case 0x045:497case 0x046:498case 0x054:499case 0x055:500return &amd_f15_PMC20;501case 0x02D:502return &amd_f15_PMC3;503case 0x02E:504return &amd_f15_PMC30;505default:506return &amd_f15_PMC50;507}508case AMD_EVENT_CU:509case AMD_EVENT_IC_DE:510case AMD_EVENT_DE:511switch (event_code) {512case 0x08F:513case 0x187:514case 0x188:515return &amd_f15_PMC0;516case 0x0DB ... 0x0DF:517case 0x1D6:518case 0x1D8:519return &amd_f15_PMC50;520default:521return &amd_f15_PMC20;522}523case AMD_EVENT_NB:524/* not yet implemented */525return &emptyconstraint;526default:527return &emptyconstraint;528}529}530531static __initconst const struct x86_pmu amd_pmu_f15h = {532.name = "AMD Family 15h",533.handle_irq = x86_pmu_handle_irq,534.disable_all = x86_pmu_disable_all,535.enable_all = x86_pmu_enable_all,536.enable = x86_pmu_enable_event,537.disable = x86_pmu_disable_event,538.hw_config = amd_pmu_hw_config,539.schedule_events = x86_schedule_events,540.eventsel = MSR_F15H_PERF_CTL,541.perfctr = MSR_F15H_PERF_CTR,542.event_map = amd_pmu_event_map,543.max_events = ARRAY_SIZE(amd_perfmon_event_map),544.num_counters = 6,545.cntval_bits = 48,546.cntval_mask = (1ULL << 48) - 1,547.apic = 1,548/* use highest bit to detect overflow */549.max_period = (1ULL << 47) - 1,550.get_event_constraints = amd_get_event_constraints_f15h,551/* nortbridge counters not yet implemented: */552#if 0553.put_event_constraints = amd_put_event_constraints,554555.cpu_prepare = amd_pmu_cpu_prepare,556.cpu_starting = amd_pmu_cpu_starting,557.cpu_dead = amd_pmu_cpu_dead,558#endif559};560561static __init int amd_pmu_init(void)562{563/* Performance-monitoring supported from K7 and later: */564if (boot_cpu_data.x86 < 6)565return -ENODEV;566567/*568* If core performance counter extensions exists, it must be569* family 15h, otherwise fail. See x86_pmu_addr_offset().570*/571switch (boot_cpu_data.x86) {572case 0x15:573if (!cpu_has_perfctr_core)574return -ENODEV;575x86_pmu = amd_pmu_f15h;576break;577default:578if (cpu_has_perfctr_core)579return -ENODEV;580x86_pmu = amd_pmu;581break;582}583584/* Events are common for all AMDs */585memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,586sizeof(hw_cache_event_ids));587588return 0;589}590591#else /* CONFIG_CPU_SUP_AMD */592593static int amd_pmu_init(void)594{595return 0;596}597598#endif599600601