Path: blob/master/arch/x86/kernel/cpu/perf_event_intel.c
10699 views
#ifdef CONFIG_CPU_SUP_INTEL12#define MAX_EXTRA_REGS 234/*5* Per register state.6*/7struct er_account {8int ref; /* reference count */9unsigned int extra_reg; /* extra MSR number */10u64 extra_config; /* extra MSR config */11};1213/*14* Per core state15* This used to coordinate shared registers for HT threads.16*/17struct intel_percore {18raw_spinlock_t lock; /* protect structure */19struct er_account regs[MAX_EXTRA_REGS];20int refcnt; /* number of threads */21unsigned core_id;22};2324/*25* Intel PerfMon, used on Core and later.26*/27static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =28{29[PERF_COUNT_HW_CPU_CYCLES] = 0x003c,30[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,31[PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e,32[PERF_COUNT_HW_CACHE_MISSES] = 0x412e,33[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,34[PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,35[PERF_COUNT_HW_BUS_CYCLES] = 0x013c,36};3738static struct event_constraint intel_core_event_constraints[] __read_mostly =39{40INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */41INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */42INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */43INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */44INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */45INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */46EVENT_CONSTRAINT_END47};4849static struct event_constraint intel_core2_event_constraints[] __read_mostly =50{51FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */52FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */53/*54* Core2 has Fixed Counter 2 listed as CPU_CLK_UNHALTED.REF and event55* 0x013c as CPU_CLK_UNHALTED.BUS and specifies there is a fixed56* ratio between these counters.57*/58/* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */59INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */60INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */61INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */62INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */63INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */64INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */65INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */66INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */67INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* ITLB_MISS_RETIRED (T30-9) */68INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */69EVENT_CONSTRAINT_END70};7172static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =73{74FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */75FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */76/* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */77INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */78INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */79INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */80INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */81INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */82INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */83INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */84INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */85EVENT_CONSTRAINT_END86};8788static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =89{90INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),91EVENT_EXTRA_END92};9394static struct event_constraint intel_nehalem_percore_constraints[] __read_mostly =95{96INTEL_EVENT_CONSTRAINT(0xb7, 0),97EVENT_CONSTRAINT_END98};99100static struct event_constraint intel_westmere_event_constraints[] __read_mostly =101{102FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */103FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */104/* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */105INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */106INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */107INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */108INTEL_EVENT_CONSTRAINT(0xb3, 0x1), /* SNOOPQ_REQUEST_OUTSTANDING */109EVENT_CONSTRAINT_END110};111112static struct event_constraint intel_snb_event_constraints[] __read_mostly =113{114FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */115FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */116/* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */117INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */118INTEL_EVENT_CONSTRAINT(0xb7, 0x1), /* OFF_CORE_RESPONSE_0 */119INTEL_EVENT_CONSTRAINT(0xbb, 0x8), /* OFF_CORE_RESPONSE_1 */120INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */121INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */122EVENT_CONSTRAINT_END123};124125static struct extra_reg intel_westmere_extra_regs[] __read_mostly =126{127INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),128INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff),129EVENT_EXTRA_END130};131132static struct event_constraint intel_westmere_percore_constraints[] __read_mostly =133{134INTEL_EVENT_CONSTRAINT(0xb7, 0),135INTEL_EVENT_CONSTRAINT(0xbb, 0),136EVENT_CONSTRAINT_END137};138139static struct event_constraint intel_gen_event_constraints[] __read_mostly =140{141FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */142FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */143/* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */144EVENT_CONSTRAINT_END145};146147static u64 intel_pmu_event_map(int hw_event)148{149return intel_perfmon_event_map[hw_event];150}151152static __initconst const u64 snb_hw_cache_event_ids153[PERF_COUNT_HW_CACHE_MAX]154[PERF_COUNT_HW_CACHE_OP_MAX]155[PERF_COUNT_HW_CACHE_RESULT_MAX] =156{157[ C(L1D) ] = {158[ C(OP_READ) ] = {159[ C(RESULT_ACCESS) ] = 0xf1d0, /* MEM_UOP_RETIRED.LOADS */160[ C(RESULT_MISS) ] = 0x0151, /* L1D.REPLACEMENT */161},162[ C(OP_WRITE) ] = {163[ C(RESULT_ACCESS) ] = 0xf2d0, /* MEM_UOP_RETIRED.STORES */164[ C(RESULT_MISS) ] = 0x0851, /* L1D.ALL_M_REPLACEMENT */165},166[ C(OP_PREFETCH) ] = {167[ C(RESULT_ACCESS) ] = 0x0,168[ C(RESULT_MISS) ] = 0x024e, /* HW_PRE_REQ.DL1_MISS */169},170},171[ C(L1I ) ] = {172[ C(OP_READ) ] = {173[ C(RESULT_ACCESS) ] = 0x0,174[ C(RESULT_MISS) ] = 0x0280, /* ICACHE.MISSES */175},176[ C(OP_WRITE) ] = {177[ C(RESULT_ACCESS) ] = -1,178[ C(RESULT_MISS) ] = -1,179},180[ C(OP_PREFETCH) ] = {181[ C(RESULT_ACCESS) ] = 0x0,182[ C(RESULT_MISS) ] = 0x0,183},184},185[ C(LL ) ] = {186[ C(OP_READ) ] = {187/* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */188[ C(RESULT_ACCESS) ] = 0x01b7,189/* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */190[ C(RESULT_MISS) ] = 0x01b7,191},192[ C(OP_WRITE) ] = {193/* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */194[ C(RESULT_ACCESS) ] = 0x01b7,195/* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */196[ C(RESULT_MISS) ] = 0x01b7,197},198[ C(OP_PREFETCH) ] = {199/* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */200[ C(RESULT_ACCESS) ] = 0x01b7,201/* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */202[ C(RESULT_MISS) ] = 0x01b7,203},204},205[ C(DTLB) ] = {206[ C(OP_READ) ] = {207[ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOP_RETIRED.ALL_LOADS */208[ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */209},210[ C(OP_WRITE) ] = {211[ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOP_RETIRED.ALL_STORES */212[ C(RESULT_MISS) ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */213},214[ C(OP_PREFETCH) ] = {215[ C(RESULT_ACCESS) ] = 0x0,216[ C(RESULT_MISS) ] = 0x0,217},218},219[ C(ITLB) ] = {220[ C(OP_READ) ] = {221[ C(RESULT_ACCESS) ] = 0x1085, /* ITLB_MISSES.STLB_HIT */222[ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.CAUSES_A_WALK */223},224[ C(OP_WRITE) ] = {225[ C(RESULT_ACCESS) ] = -1,226[ C(RESULT_MISS) ] = -1,227},228[ C(OP_PREFETCH) ] = {229[ C(RESULT_ACCESS) ] = -1,230[ C(RESULT_MISS) ] = -1,231},232},233[ C(BPU ) ] = {234[ C(OP_READ) ] = {235[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */236[ C(RESULT_MISS) ] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */237},238[ C(OP_WRITE) ] = {239[ C(RESULT_ACCESS) ] = -1,240[ C(RESULT_MISS) ] = -1,241},242[ C(OP_PREFETCH) ] = {243[ C(RESULT_ACCESS) ] = -1,244[ C(RESULT_MISS) ] = -1,245},246},247};248249static __initconst const u64 westmere_hw_cache_event_ids250[PERF_COUNT_HW_CACHE_MAX]251[PERF_COUNT_HW_CACHE_OP_MAX]252[PERF_COUNT_HW_CACHE_RESULT_MAX] =253{254[ C(L1D) ] = {255[ C(OP_READ) ] = {256[ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */257[ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */258},259[ C(OP_WRITE) ] = {260[ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */261[ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */262},263[ C(OP_PREFETCH) ] = {264[ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */265[ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */266},267},268[ C(L1I ) ] = {269[ C(OP_READ) ] = {270[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */271[ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */272},273[ C(OP_WRITE) ] = {274[ C(RESULT_ACCESS) ] = -1,275[ C(RESULT_MISS) ] = -1,276},277[ C(OP_PREFETCH) ] = {278[ C(RESULT_ACCESS) ] = 0x0,279[ C(RESULT_MISS) ] = 0x0,280},281},282[ C(LL ) ] = {283[ C(OP_READ) ] = {284/* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */285[ C(RESULT_ACCESS) ] = 0x01b7,286/* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */287[ C(RESULT_MISS) ] = 0x01b7,288},289/*290* Use RFO, not WRITEBACK, because a write miss would typically occur291* on RFO.292*/293[ C(OP_WRITE) ] = {294/* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */295[ C(RESULT_ACCESS) ] = 0x01b7,296/* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */297[ C(RESULT_MISS) ] = 0x01b7,298},299[ C(OP_PREFETCH) ] = {300/* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */301[ C(RESULT_ACCESS) ] = 0x01b7,302/* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */303[ C(RESULT_MISS) ] = 0x01b7,304},305},306[ C(DTLB) ] = {307[ C(OP_READ) ] = {308[ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */309[ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */310},311[ C(OP_WRITE) ] = {312[ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */313[ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */314},315[ C(OP_PREFETCH) ] = {316[ C(RESULT_ACCESS) ] = 0x0,317[ C(RESULT_MISS) ] = 0x0,318},319},320[ C(ITLB) ] = {321[ C(OP_READ) ] = {322[ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */323[ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.ANY */324},325[ C(OP_WRITE) ] = {326[ C(RESULT_ACCESS) ] = -1,327[ C(RESULT_MISS) ] = -1,328},329[ C(OP_PREFETCH) ] = {330[ C(RESULT_ACCESS) ] = -1,331[ C(RESULT_MISS) ] = -1,332},333},334[ C(BPU ) ] = {335[ C(OP_READ) ] = {336[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */337[ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */338},339[ C(OP_WRITE) ] = {340[ C(RESULT_ACCESS) ] = -1,341[ C(RESULT_MISS) ] = -1,342},343[ C(OP_PREFETCH) ] = {344[ C(RESULT_ACCESS) ] = -1,345[ C(RESULT_MISS) ] = -1,346},347},348};349350/*351* Nehalem/Westmere MSR_OFFCORE_RESPONSE bits;352* See IA32 SDM Vol 3B 30.6.1.3353*/354355#define NHM_DMND_DATA_RD (1 << 0)356#define NHM_DMND_RFO (1 << 1)357#define NHM_DMND_IFETCH (1 << 2)358#define NHM_DMND_WB (1 << 3)359#define NHM_PF_DATA_RD (1 << 4)360#define NHM_PF_DATA_RFO (1 << 5)361#define NHM_PF_IFETCH (1 << 6)362#define NHM_OFFCORE_OTHER (1 << 7)363#define NHM_UNCORE_HIT (1 << 8)364#define NHM_OTHER_CORE_HIT_SNP (1 << 9)365#define NHM_OTHER_CORE_HITM (1 << 10)366/* reserved */367#define NHM_REMOTE_CACHE_FWD (1 << 12)368#define NHM_REMOTE_DRAM (1 << 13)369#define NHM_LOCAL_DRAM (1 << 14)370#define NHM_NON_DRAM (1 << 15)371372#define NHM_ALL_DRAM (NHM_REMOTE_DRAM|NHM_LOCAL_DRAM)373374#define NHM_DMND_READ (NHM_DMND_DATA_RD)375#define NHM_DMND_WRITE (NHM_DMND_RFO|NHM_DMND_WB)376#define NHM_DMND_PREFETCH (NHM_PF_DATA_RD|NHM_PF_DATA_RFO)377378#define NHM_L3_HIT (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM)379#define NHM_L3_MISS (NHM_NON_DRAM|NHM_ALL_DRAM|NHM_REMOTE_CACHE_FWD)380#define NHM_L3_ACCESS (NHM_L3_HIT|NHM_L3_MISS)381382static __initconst const u64 nehalem_hw_cache_extra_regs383[PERF_COUNT_HW_CACHE_MAX]384[PERF_COUNT_HW_CACHE_OP_MAX]385[PERF_COUNT_HW_CACHE_RESULT_MAX] =386{387[ C(LL ) ] = {388[ C(OP_READ) ] = {389[ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_L3_ACCESS,390[ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_L3_MISS,391},392[ C(OP_WRITE) ] = {393[ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_L3_ACCESS,394[ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_L3_MISS,395},396[ C(OP_PREFETCH) ] = {397[ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS,398[ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_L3_MISS,399},400}401};402403static __initconst const u64 nehalem_hw_cache_event_ids404[PERF_COUNT_HW_CACHE_MAX]405[PERF_COUNT_HW_CACHE_OP_MAX]406[PERF_COUNT_HW_CACHE_RESULT_MAX] =407{408[ C(L1D) ] = {409[ C(OP_READ) ] = {410[ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */411[ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */412},413[ C(OP_WRITE) ] = {414[ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */415[ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */416},417[ C(OP_PREFETCH) ] = {418[ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */419[ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */420},421},422[ C(L1I ) ] = {423[ C(OP_READ) ] = {424[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */425[ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */426},427[ C(OP_WRITE) ] = {428[ C(RESULT_ACCESS) ] = -1,429[ C(RESULT_MISS) ] = -1,430},431[ C(OP_PREFETCH) ] = {432[ C(RESULT_ACCESS) ] = 0x0,433[ C(RESULT_MISS) ] = 0x0,434},435},436[ C(LL ) ] = {437[ C(OP_READ) ] = {438/* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */439[ C(RESULT_ACCESS) ] = 0x01b7,440/* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */441[ C(RESULT_MISS) ] = 0x01b7,442},443/*444* Use RFO, not WRITEBACK, because a write miss would typically occur445* on RFO.446*/447[ C(OP_WRITE) ] = {448/* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */449[ C(RESULT_ACCESS) ] = 0x01b7,450/* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */451[ C(RESULT_MISS) ] = 0x01b7,452},453[ C(OP_PREFETCH) ] = {454/* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */455[ C(RESULT_ACCESS) ] = 0x01b7,456/* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */457[ C(RESULT_MISS) ] = 0x01b7,458},459},460[ C(DTLB) ] = {461[ C(OP_READ) ] = {462[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */463[ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */464},465[ C(OP_WRITE) ] = {466[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */467[ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */468},469[ C(OP_PREFETCH) ] = {470[ C(RESULT_ACCESS) ] = 0x0,471[ C(RESULT_MISS) ] = 0x0,472},473},474[ C(ITLB) ] = {475[ C(OP_READ) ] = {476[ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */477[ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */478},479[ C(OP_WRITE) ] = {480[ C(RESULT_ACCESS) ] = -1,481[ C(RESULT_MISS) ] = -1,482},483[ C(OP_PREFETCH) ] = {484[ C(RESULT_ACCESS) ] = -1,485[ C(RESULT_MISS) ] = -1,486},487},488[ C(BPU ) ] = {489[ C(OP_READ) ] = {490[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */491[ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */492},493[ C(OP_WRITE) ] = {494[ C(RESULT_ACCESS) ] = -1,495[ C(RESULT_MISS) ] = -1,496},497[ C(OP_PREFETCH) ] = {498[ C(RESULT_ACCESS) ] = -1,499[ C(RESULT_MISS) ] = -1,500},501},502};503504static __initconst const u64 core2_hw_cache_event_ids505[PERF_COUNT_HW_CACHE_MAX]506[PERF_COUNT_HW_CACHE_OP_MAX]507[PERF_COUNT_HW_CACHE_RESULT_MAX] =508{509[ C(L1D) ] = {510[ C(OP_READ) ] = {511[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */512[ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */513},514[ C(OP_WRITE) ] = {515[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */516[ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */517},518[ C(OP_PREFETCH) ] = {519[ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */520[ C(RESULT_MISS) ] = 0,521},522},523[ C(L1I ) ] = {524[ C(OP_READ) ] = {525[ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */526[ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */527},528[ C(OP_WRITE) ] = {529[ C(RESULT_ACCESS) ] = -1,530[ C(RESULT_MISS) ] = -1,531},532[ C(OP_PREFETCH) ] = {533[ C(RESULT_ACCESS) ] = 0,534[ C(RESULT_MISS) ] = 0,535},536},537[ C(LL ) ] = {538[ C(OP_READ) ] = {539[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */540[ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */541},542[ C(OP_WRITE) ] = {543[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */544[ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */545},546[ C(OP_PREFETCH) ] = {547[ C(RESULT_ACCESS) ] = 0,548[ C(RESULT_MISS) ] = 0,549},550},551[ C(DTLB) ] = {552[ C(OP_READ) ] = {553[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */554[ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */555},556[ C(OP_WRITE) ] = {557[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */558[ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */559},560[ C(OP_PREFETCH) ] = {561[ C(RESULT_ACCESS) ] = 0,562[ C(RESULT_MISS) ] = 0,563},564},565[ C(ITLB) ] = {566[ C(OP_READ) ] = {567[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */568[ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */569},570[ C(OP_WRITE) ] = {571[ C(RESULT_ACCESS) ] = -1,572[ C(RESULT_MISS) ] = -1,573},574[ C(OP_PREFETCH) ] = {575[ C(RESULT_ACCESS) ] = -1,576[ C(RESULT_MISS) ] = -1,577},578},579[ C(BPU ) ] = {580[ C(OP_READ) ] = {581[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */582[ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */583},584[ C(OP_WRITE) ] = {585[ C(RESULT_ACCESS) ] = -1,586[ C(RESULT_MISS) ] = -1,587},588[ C(OP_PREFETCH) ] = {589[ C(RESULT_ACCESS) ] = -1,590[ C(RESULT_MISS) ] = -1,591},592},593};594595static __initconst const u64 atom_hw_cache_event_ids596[PERF_COUNT_HW_CACHE_MAX]597[PERF_COUNT_HW_CACHE_OP_MAX]598[PERF_COUNT_HW_CACHE_RESULT_MAX] =599{600[ C(L1D) ] = {601[ C(OP_READ) ] = {602[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */603[ C(RESULT_MISS) ] = 0,604},605[ C(OP_WRITE) ] = {606[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */607[ C(RESULT_MISS) ] = 0,608},609[ C(OP_PREFETCH) ] = {610[ C(RESULT_ACCESS) ] = 0x0,611[ C(RESULT_MISS) ] = 0,612},613},614[ C(L1I ) ] = {615[ C(OP_READ) ] = {616[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */617[ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */618},619[ C(OP_WRITE) ] = {620[ C(RESULT_ACCESS) ] = -1,621[ C(RESULT_MISS) ] = -1,622},623[ C(OP_PREFETCH) ] = {624[ C(RESULT_ACCESS) ] = 0,625[ C(RESULT_MISS) ] = 0,626},627},628[ C(LL ) ] = {629[ C(OP_READ) ] = {630[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */631[ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */632},633[ C(OP_WRITE) ] = {634[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */635[ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */636},637[ C(OP_PREFETCH) ] = {638[ C(RESULT_ACCESS) ] = 0,639[ C(RESULT_MISS) ] = 0,640},641},642[ C(DTLB) ] = {643[ C(OP_READ) ] = {644[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */645[ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */646},647[ C(OP_WRITE) ] = {648[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */649[ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */650},651[ C(OP_PREFETCH) ] = {652[ C(RESULT_ACCESS) ] = 0,653[ C(RESULT_MISS) ] = 0,654},655},656[ C(ITLB) ] = {657[ C(OP_READ) ] = {658[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */659[ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */660},661[ C(OP_WRITE) ] = {662[ C(RESULT_ACCESS) ] = -1,663[ C(RESULT_MISS) ] = -1,664},665[ C(OP_PREFETCH) ] = {666[ C(RESULT_ACCESS) ] = -1,667[ C(RESULT_MISS) ] = -1,668},669},670[ C(BPU ) ] = {671[ C(OP_READ) ] = {672[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */673[ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */674},675[ C(OP_WRITE) ] = {676[ C(RESULT_ACCESS) ] = -1,677[ C(RESULT_MISS) ] = -1,678},679[ C(OP_PREFETCH) ] = {680[ C(RESULT_ACCESS) ] = -1,681[ C(RESULT_MISS) ] = -1,682},683},684};685686static void intel_pmu_disable_all(void)687{688struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);689690wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);691692if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))693intel_pmu_disable_bts();694695intel_pmu_pebs_disable_all();696intel_pmu_lbr_disable_all();697}698699static void intel_pmu_enable_all(int added)700{701struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);702703intel_pmu_pebs_enable_all();704intel_pmu_lbr_enable_all();705wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);706707if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {708struct perf_event *event =709cpuc->events[X86_PMC_IDX_FIXED_BTS];710711if (WARN_ON_ONCE(!event))712return;713714intel_pmu_enable_bts(event->hw.config);715}716}717718/*719* Workaround for:720* Intel Errata AAK100 (model 26)721* Intel Errata AAP53 (model 30)722* Intel Errata BD53 (model 44)723*724* The official story:725* These chips need to be 'reset' when adding counters by programming the726* magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either727* in sequence on the same PMC or on different PMCs.728*729* In practise it appears some of these events do in fact count, and730* we need to programm all 4 events.731*/732static void intel_pmu_nhm_workaround(void)733{734struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);735static const unsigned long nhm_magic[4] = {7360x4300B5,7370x4300D2,7380x4300B1,7390x4300B1740};741struct perf_event *event;742int i;743744/*745* The Errata requires below steps:746* 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL;747* 2) Configure 4 PERFEVTSELx with the magic events and clear748* the corresponding PMCx;749* 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL;750* 4) Clear MSR_CORE_PERF_GLOBAL_CTRL;751* 5) Clear 4 pairs of ERFEVTSELx and PMCx;752*/753754/*755* The real steps we choose are a little different from above.756* A) To reduce MSR operations, we don't run step 1) as they757* are already cleared before this function is called;758* B) Call x86_perf_event_update to save PMCx before configuring759* PERFEVTSELx with magic number;760* C) With step 5), we do clear only when the PERFEVTSELx is761* not used currently.762* D) Call x86_perf_event_set_period to restore PMCx;763*/764765/* We always operate 4 pairs of PERF Counters */766for (i = 0; i < 4; i++) {767event = cpuc->events[i];768if (event)769x86_perf_event_update(event);770}771772for (i = 0; i < 4; i++) {773wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]);774wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0);775}776777wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf);778wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);779780for (i = 0; i < 4; i++) {781event = cpuc->events[i];782783if (event) {784x86_perf_event_set_period(event);785__x86_pmu_enable_event(&event->hw,786ARCH_PERFMON_EVENTSEL_ENABLE);787} else788wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0);789}790}791792static void intel_pmu_nhm_enable_all(int added)793{794if (added)795intel_pmu_nhm_workaround();796intel_pmu_enable_all(added);797}798799static inline u64 intel_pmu_get_status(void)800{801u64 status;802803rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);804805return status;806}807808static inline void intel_pmu_ack_status(u64 ack)809{810wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);811}812813static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)814{815int idx = hwc->idx - X86_PMC_IDX_FIXED;816u64 ctrl_val, mask;817818mask = 0xfULL << (idx * 4);819820rdmsrl(hwc->config_base, ctrl_val);821ctrl_val &= ~mask;822wrmsrl(hwc->config_base, ctrl_val);823}824825static void intel_pmu_disable_event(struct perf_event *event)826{827struct hw_perf_event *hwc = &event->hw;828829if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {830intel_pmu_disable_bts();831intel_pmu_drain_bts_buffer();832return;833}834835if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {836intel_pmu_disable_fixed(hwc);837return;838}839840x86_pmu_disable_event(event);841842if (unlikely(event->attr.precise_ip))843intel_pmu_pebs_disable(event);844}845846static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)847{848int idx = hwc->idx - X86_PMC_IDX_FIXED;849u64 ctrl_val, bits, mask;850851/*852* Enable IRQ generation (0x8),853* and enable ring-3 counting (0x2) and ring-0 counting (0x1)854* if requested:855*/856bits = 0x8ULL;857if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)858bits |= 0x2;859if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)860bits |= 0x1;861862/*863* ANY bit is supported in v3 and up864*/865if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)866bits |= 0x4;867868bits <<= (idx * 4);869mask = 0xfULL << (idx * 4);870871rdmsrl(hwc->config_base, ctrl_val);872ctrl_val &= ~mask;873ctrl_val |= bits;874wrmsrl(hwc->config_base, ctrl_val);875}876877static void intel_pmu_enable_event(struct perf_event *event)878{879struct hw_perf_event *hwc = &event->hw;880881if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {882if (!__this_cpu_read(cpu_hw_events.enabled))883return;884885intel_pmu_enable_bts(hwc->config);886return;887}888889if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {890intel_pmu_enable_fixed(hwc);891return;892}893894if (unlikely(event->attr.precise_ip))895intel_pmu_pebs_enable(event);896897__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);898}899900/*901* Save and restart an expired event. Called by NMI contexts,902* so it has to be careful about preempting normal event ops:903*/904static int intel_pmu_save_and_restart(struct perf_event *event)905{906x86_perf_event_update(event);907return x86_perf_event_set_period(event);908}909910static void intel_pmu_reset(void)911{912struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);913unsigned long flags;914int idx;915916if (!x86_pmu.num_counters)917return;918919local_irq_save(flags);920921printk("clearing PMU state on CPU#%d\n", smp_processor_id());922923for (idx = 0; idx < x86_pmu.num_counters; idx++) {924checking_wrmsrl(x86_pmu_config_addr(idx), 0ull);925checking_wrmsrl(x86_pmu_event_addr(idx), 0ull);926}927for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++)928checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);929930if (ds)931ds->bts_index = ds->bts_buffer_base;932933local_irq_restore(flags);934}935936/*937* This handler is triggered by the local APIC, so the APIC IRQ handling938* rules apply:939*/940static int intel_pmu_handle_irq(struct pt_regs *regs)941{942struct perf_sample_data data;943struct cpu_hw_events *cpuc;944int bit, loops;945u64 status;946int handled;947948perf_sample_data_init(&data, 0);949950cpuc = &__get_cpu_var(cpu_hw_events);951952/*953* Some chipsets need to unmask the LVTPC in a particular spot954* inside the nmi handler. As a result, the unmasking was pushed955* into all the nmi handlers.956*957* This handler doesn't seem to have any issues with the unmasking958* so it was left at the top.959*/960apic_write(APIC_LVTPC, APIC_DM_NMI);961962intel_pmu_disable_all();963handled = intel_pmu_drain_bts_buffer();964status = intel_pmu_get_status();965if (!status) {966intel_pmu_enable_all(0);967return handled;968}969970loops = 0;971again:972intel_pmu_ack_status(status);973if (++loops > 100) {974WARN_ONCE(1, "perfevents: irq loop stuck!\n");975perf_event_print_debug();976intel_pmu_reset();977goto done;978}979980inc_irq_stat(apic_perf_irqs);981982intel_pmu_lbr_read();983984/*985* PEBS overflow sets bit 62 in the global status register986*/987if (__test_and_clear_bit(62, (unsigned long *)&status)) {988handled++;989x86_pmu.drain_pebs(regs);990}991992for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {993struct perf_event *event = cpuc->events[bit];994995handled++;996997if (!test_bit(bit, cpuc->active_mask))998continue;9991000if (!intel_pmu_save_and_restart(event))1001continue;10021003data.period = event->hw.last_period;10041005if (perf_event_overflow(event, 1, &data, regs))1006x86_pmu_stop(event, 0);1007}10081009/*1010* Repeat if there is more work to be done:1011*/1012status = intel_pmu_get_status();1013if (status)1014goto again;10151016done:1017intel_pmu_enable_all(0);1018return handled;1019}10201021static struct event_constraint *1022intel_bts_constraints(struct perf_event *event)1023{1024struct hw_perf_event *hwc = &event->hw;1025unsigned int hw_event, bts_event;10261027if (event->attr.freq)1028return NULL;10291030hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;1031bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);10321033if (unlikely(hw_event == bts_event && hwc->sample_period == 1))1034return &bts_constraint;10351036return NULL;1037}10381039static struct event_constraint *1040intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)1041{1042struct hw_perf_event *hwc = &event->hw;1043unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT;1044struct event_constraint *c;1045struct intel_percore *pc;1046struct er_account *era;1047int i;1048int free_slot;1049int found;10501051if (!x86_pmu.percore_constraints || hwc->extra_alloc)1052return NULL;10531054for (c = x86_pmu.percore_constraints; c->cmask; c++) {1055if (e != c->code)1056continue;10571058/*1059* Allocate resource per core.1060*/1061pc = cpuc->per_core;1062if (!pc)1063break;1064c = &emptyconstraint;1065raw_spin_lock(&pc->lock);1066free_slot = -1;1067found = 0;1068for (i = 0; i < MAX_EXTRA_REGS; i++) {1069era = &pc->regs[i];1070if (era->ref > 0 && hwc->extra_reg == era->extra_reg) {1071/* Allow sharing same config */1072if (hwc->extra_config == era->extra_config) {1073era->ref++;1074cpuc->percore_used = 1;1075hwc->extra_alloc = 1;1076c = NULL;1077}1078/* else conflict */1079found = 1;1080break;1081} else if (era->ref == 0 && free_slot == -1)1082free_slot = i;1083}1084if (!found && free_slot != -1) {1085era = &pc->regs[free_slot];1086era->ref = 1;1087era->extra_reg = hwc->extra_reg;1088era->extra_config = hwc->extra_config;1089cpuc->percore_used = 1;1090hwc->extra_alloc = 1;1091c = NULL;1092}1093raw_spin_unlock(&pc->lock);1094return c;1095}10961097return NULL;1098}10991100static struct event_constraint *1101intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)1102{1103struct event_constraint *c;11041105c = intel_bts_constraints(event);1106if (c)1107return c;11081109c = intel_pebs_constraints(event);1110if (c)1111return c;11121113c = intel_percore_constraints(cpuc, event);1114if (c)1115return c;11161117return x86_get_event_constraints(cpuc, event);1118}11191120static void intel_put_event_constraints(struct cpu_hw_events *cpuc,1121struct perf_event *event)1122{1123struct extra_reg *er;1124struct intel_percore *pc;1125struct er_account *era;1126struct hw_perf_event *hwc = &event->hw;1127int i, allref;11281129if (!cpuc->percore_used)1130return;11311132for (er = x86_pmu.extra_regs; er->msr; er++) {1133if (er->event != (hwc->config & er->config_mask))1134continue;11351136pc = cpuc->per_core;1137raw_spin_lock(&pc->lock);1138for (i = 0; i < MAX_EXTRA_REGS; i++) {1139era = &pc->regs[i];1140if (era->ref > 0 &&1141era->extra_config == hwc->extra_config &&1142era->extra_reg == er->msr) {1143era->ref--;1144hwc->extra_alloc = 0;1145break;1146}1147}1148allref = 0;1149for (i = 0; i < MAX_EXTRA_REGS; i++)1150allref += pc->regs[i].ref;1151if (allref == 0)1152cpuc->percore_used = 0;1153raw_spin_unlock(&pc->lock);1154break;1155}1156}11571158static int intel_pmu_hw_config(struct perf_event *event)1159{1160int ret = x86_pmu_hw_config(event);11611162if (ret)1163return ret;11641165if (event->attr.precise_ip &&1166(event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {1167/*1168* Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P1169* (0x003c) so that we can use it with PEBS.1170*1171* The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't1172* PEBS capable. However we can use INST_RETIRED.ANY_P1173* (0x00c0), which is a PEBS capable event, to get the same1174* count.1175*1176* INST_RETIRED.ANY_P counts the number of cycles that retires1177* CNTMASK instructions. By setting CNTMASK to a value (16)1178* larger than the maximum number of instructions that can be1179* retired per cycle (4) and then inverting the condition, we1180* count all cycles that retire 16 or less instructions, which1181* is every cycle.1182*1183* Thereby we gain a PEBS capable cycle counter.1184*/1185u64 alt_config = 0x108000c0; /* INST_RETIRED.TOTAL_CYCLES */11861187alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);1188event->hw.config = alt_config;1189}11901191if (event->attr.type != PERF_TYPE_RAW)1192return 0;11931194if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY))1195return 0;11961197if (x86_pmu.version < 3)1198return -EINVAL;11991200if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))1201return -EACCES;12021203event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY;12041205return 0;1206}12071208static __initconst const struct x86_pmu core_pmu = {1209.name = "core",1210.handle_irq = x86_pmu_handle_irq,1211.disable_all = x86_pmu_disable_all,1212.enable_all = x86_pmu_enable_all,1213.enable = x86_pmu_enable_event,1214.disable = x86_pmu_disable_event,1215.hw_config = x86_pmu_hw_config,1216.schedule_events = x86_schedule_events,1217.eventsel = MSR_ARCH_PERFMON_EVENTSEL0,1218.perfctr = MSR_ARCH_PERFMON_PERFCTR0,1219.event_map = intel_pmu_event_map,1220.max_events = ARRAY_SIZE(intel_perfmon_event_map),1221.apic = 1,1222/*1223* Intel PMCs cannot be accessed sanely above 32 bit width,1224* so we install an artificial 1<<31 period regardless of1225* the generic event period:1226*/1227.max_period = (1ULL << 31) - 1,1228.get_event_constraints = intel_get_event_constraints,1229.put_event_constraints = intel_put_event_constraints,1230.event_constraints = intel_core_event_constraints,1231};12321233static int intel_pmu_cpu_prepare(int cpu)1234{1235struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);12361237if (!cpu_has_ht_siblings())1238return NOTIFY_OK;12391240cpuc->per_core = kzalloc_node(sizeof(struct intel_percore),1241GFP_KERNEL, cpu_to_node(cpu));1242if (!cpuc->per_core)1243return NOTIFY_BAD;12441245raw_spin_lock_init(&cpuc->per_core->lock);1246cpuc->per_core->core_id = -1;1247return NOTIFY_OK;1248}12491250static void intel_pmu_cpu_starting(int cpu)1251{1252struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);1253int core_id = topology_core_id(cpu);1254int i;12551256init_debug_store_on_cpu(cpu);1257/*1258* Deal with CPUs that don't clear their LBRs on power-up.1259*/1260intel_pmu_lbr_reset();12611262if (!cpu_has_ht_siblings())1263return;12641265for_each_cpu(i, topology_thread_cpumask(cpu)) {1266struct intel_percore *pc = per_cpu(cpu_hw_events, i).per_core;12671268if (pc && pc->core_id == core_id) {1269kfree(cpuc->per_core);1270cpuc->per_core = pc;1271break;1272}1273}12741275cpuc->per_core->core_id = core_id;1276cpuc->per_core->refcnt++;1277}12781279static void intel_pmu_cpu_dying(int cpu)1280{1281struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);1282struct intel_percore *pc = cpuc->per_core;12831284if (pc) {1285if (pc->core_id == -1 || --pc->refcnt == 0)1286kfree(pc);1287cpuc->per_core = NULL;1288}12891290fini_debug_store_on_cpu(cpu);1291}12921293static __initconst const struct x86_pmu intel_pmu = {1294.name = "Intel",1295.handle_irq = intel_pmu_handle_irq,1296.disable_all = intel_pmu_disable_all,1297.enable_all = intel_pmu_enable_all,1298.enable = intel_pmu_enable_event,1299.disable = intel_pmu_disable_event,1300.hw_config = intel_pmu_hw_config,1301.schedule_events = x86_schedule_events,1302.eventsel = MSR_ARCH_PERFMON_EVENTSEL0,1303.perfctr = MSR_ARCH_PERFMON_PERFCTR0,1304.event_map = intel_pmu_event_map,1305.max_events = ARRAY_SIZE(intel_perfmon_event_map),1306.apic = 1,1307/*1308* Intel PMCs cannot be accessed sanely above 32 bit width,1309* so we install an artificial 1<<31 period regardless of1310* the generic event period:1311*/1312.max_period = (1ULL << 31) - 1,1313.get_event_constraints = intel_get_event_constraints,1314.put_event_constraints = intel_put_event_constraints,13151316.cpu_prepare = intel_pmu_cpu_prepare,1317.cpu_starting = intel_pmu_cpu_starting,1318.cpu_dying = intel_pmu_cpu_dying,1319};13201321static void intel_clovertown_quirks(void)1322{1323/*1324* PEBS is unreliable due to:1325*1326* AJ67 - PEBS may experience CPL leaks1327* AJ68 - PEBS PMI may be delayed by one event1328* AJ69 - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12]1329* AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS1330*1331* AJ67 could be worked around by restricting the OS/USR flags.1332* AJ69 could be worked around by setting PMU_FREEZE_ON_PMI.1333*1334* AJ106 could possibly be worked around by not allowing LBR1335* usage from PEBS, including the fixup.1336* AJ68 could possibly be worked around by always programming1337* a pebs_event_reset[0] value and coping with the lost events.1338*1339* But taken together it might just make sense to not enable PEBS on1340* these chips.1341*/1342printk(KERN_WARNING "PEBS disabled due to CPU errata.\n");1343x86_pmu.pebs = 0;1344x86_pmu.pebs_constraints = NULL;1345}13461347static __init int intel_pmu_init(void)1348{1349union cpuid10_edx edx;1350union cpuid10_eax eax;1351unsigned int unused;1352unsigned int ebx;1353int version;13541355if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {1356switch (boot_cpu_data.x86) {1357case 0x6:1358return p6_pmu_init();1359case 0xf:1360return p4_pmu_init();1361}1362return -ENODEV;1363}13641365/*1366* Check whether the Architectural PerfMon supports1367* Branch Misses Retired hw_event or not.1368*/1369cpuid(10, &eax.full, &ebx, &unused, &edx.full);1370if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)1371return -ENODEV;13721373version = eax.split.version_id;1374if (version < 2)1375x86_pmu = core_pmu;1376else1377x86_pmu = intel_pmu;13781379x86_pmu.version = version;1380x86_pmu.num_counters = eax.split.num_counters;1381x86_pmu.cntval_bits = eax.split.bit_width;1382x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1;13831384/*1385* Quirk: v2 perfmon does not report fixed-purpose events, so1386* assume at least 3 events:1387*/1388if (version > 1)1389x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);13901391/*1392* v2 and above have a perf capabilities MSR1393*/1394if (version > 1) {1395u64 capabilities;13961397rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities);1398x86_pmu.intel_cap.capabilities = capabilities;1399}14001401intel_ds_init();14021403/*1404* Install the hw-cache-events table:1405*/1406switch (boot_cpu_data.x86_model) {1407case 14: /* 65 nm core solo/duo, "Yonah" */1408pr_cont("Core events, ");1409break;14101411case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */1412x86_pmu.quirks = intel_clovertown_quirks;1413case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */1414case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */1415case 29: /* six-core 45 nm xeon "Dunnington" */1416memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,1417sizeof(hw_cache_event_ids));14181419intel_pmu_lbr_init_core();14201421x86_pmu.event_constraints = intel_core2_event_constraints;1422x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints;1423pr_cont("Core2 events, ");1424break;14251426case 26: /* 45 nm nehalem, "Bloomfield" */1427case 30: /* 45 nm nehalem, "Lynnfield" */1428case 46: /* 45 nm nehalem-ex, "Beckton" */1429memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,1430sizeof(hw_cache_event_ids));1431memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,1432sizeof(hw_cache_extra_regs));14331434intel_pmu_lbr_init_nhm();14351436x86_pmu.event_constraints = intel_nehalem_event_constraints;1437x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints;1438x86_pmu.percore_constraints = intel_nehalem_percore_constraints;1439x86_pmu.enable_all = intel_pmu_nhm_enable_all;1440x86_pmu.extra_regs = intel_nehalem_extra_regs;14411442/* UOPS_ISSUED.STALLED_CYCLES */1443intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;1444/* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */1445intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;14461447if (ebx & 0x40) {1448/*1449* Erratum AAJ80 detected, we work it around by using1450* the BR_MISP_EXEC.ANY event. This will over-count1451* branch-misses, but it's still much better than the1452* architectural event which is often completely bogus:1453*/1454intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;14551456pr_cont("erratum AAJ80 worked around, ");1457}1458pr_cont("Nehalem events, ");1459break;14601461case 28: /* Atom */1462memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,1463sizeof(hw_cache_event_ids));14641465intel_pmu_lbr_init_atom();14661467x86_pmu.event_constraints = intel_gen_event_constraints;1468x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints;1469pr_cont("Atom events, ");1470break;14711472case 37: /* 32 nm nehalem, "Clarkdale" */1473case 44: /* 32 nm nehalem, "Gulftown" */1474case 47: /* 32 nm Xeon E7 */1475memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,1476sizeof(hw_cache_event_ids));1477memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,1478sizeof(hw_cache_extra_regs));14791480intel_pmu_lbr_init_nhm();14811482x86_pmu.event_constraints = intel_westmere_event_constraints;1483x86_pmu.percore_constraints = intel_westmere_percore_constraints;1484x86_pmu.enable_all = intel_pmu_nhm_enable_all;1485x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;1486x86_pmu.extra_regs = intel_westmere_extra_regs;14871488/* UOPS_ISSUED.STALLED_CYCLES */1489intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;1490/* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */1491intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;14921493pr_cont("Westmere events, ");1494break;14951496case 42: /* SandyBridge */1497memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,1498sizeof(hw_cache_event_ids));14991500intel_pmu_lbr_init_nhm();15011502x86_pmu.event_constraints = intel_snb_event_constraints;1503x86_pmu.pebs_constraints = intel_snb_pebs_events;15041505/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */1506intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;1507/* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/1508intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x18001b1;15091510pr_cont("SandyBridge events, ");1511break;15121513default:1514/*1515* default constraints for v2 and up1516*/1517x86_pmu.event_constraints = intel_gen_event_constraints;1518pr_cont("generic architected perfmon, ");1519}1520return 0;1521}15221523#else /* CONFIG_CPU_SUP_INTEL */15241525static int intel_pmu_init(void)1526{1527return 0;1528}15291530#endif /* CONFIG_CPU_SUP_INTEL */153115321533