Path: blob/master/arch/x86/kernel/cpu/perf_event_p4.c
10699 views
/*1* Netburst Performance Events (P4, old Xeon)2*3* Copyright (C) 2010 Parallels, Inc., Cyrill Gorcunov <[email protected]>4* Copyright (C) 2010 Intel Corporation, Lin Ming <[email protected]>5*6* For licencing details see kernel-base/COPYING7*/89#ifdef CONFIG_CPU_SUP_INTEL1011#include <asm/perf_event_p4.h>1213#define P4_CNTR_LIMIT 314/*15* array indices: 0,1 - HT threads, used with HT enabled cpu16*/17struct p4_event_bind {18unsigned int opcode; /* Event code and ESCR selector */19unsigned int escr_msr[2]; /* ESCR MSR for this event */20unsigned int escr_emask; /* valid ESCR EventMask bits */21unsigned int shared; /* event is shared across threads */22char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */23};2425struct p4_pebs_bind {26unsigned int metric_pebs;27unsigned int metric_vert;28};2930/* it sets P4_PEBS_ENABLE_UOP_TAG as well */31#define P4_GEN_PEBS_BIND(name, pebs, vert) \32[P4_PEBS_METRIC__##name] = { \33.metric_pebs = pebs | P4_PEBS_ENABLE_UOP_TAG, \34.metric_vert = vert, \35}3637/*38* note we have P4_PEBS_ENABLE_UOP_TAG always set here39*40* it's needed for mapping P4_PEBS_CONFIG_METRIC_MASK bits of41* event configuration to find out which values are to be42* written into MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT43* resgisters44*/45static struct p4_pebs_bind p4_pebs_bind_map[] = {46P4_GEN_PEBS_BIND(1stl_cache_load_miss_retired, 0x0000001, 0x0000001),47P4_GEN_PEBS_BIND(2ndl_cache_load_miss_retired, 0x0000002, 0x0000001),48P4_GEN_PEBS_BIND(dtlb_load_miss_retired, 0x0000004, 0x0000001),49P4_GEN_PEBS_BIND(dtlb_store_miss_retired, 0x0000004, 0x0000002),50P4_GEN_PEBS_BIND(dtlb_all_miss_retired, 0x0000004, 0x0000003),51P4_GEN_PEBS_BIND(tagged_mispred_branch, 0x0018000, 0x0000010),52P4_GEN_PEBS_BIND(mob_load_replay_retired, 0x0000200, 0x0000001),53P4_GEN_PEBS_BIND(split_load_retired, 0x0000400, 0x0000001),54P4_GEN_PEBS_BIND(split_store_retired, 0x0000400, 0x0000002),55};5657/*58* Note that we don't use CCCR1 here, there is an59* exception for P4_BSQ_ALLOCATION but we just have60* no workaround61*62* consider this binding as resources which particular63* event may borrow, it doesn't contain EventMask,64* Tags and friends -- they are left to a caller65*/66static struct p4_event_bind p4_event_bind_map[] = {67[P4_EVENT_TC_DELIVER_MODE] = {68.opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE),69.escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },70.escr_emask =71P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD) |72P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DB) |73P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DI) |74P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BD) |75P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BB) |76P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BI) |77P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, ID),78.shared = 1,79.cntr = { {4, 5, -1}, {6, 7, -1} },80},81[P4_EVENT_BPU_FETCH_REQUEST] = {82.opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST),83.escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 },84.escr_emask =85P4_ESCR_EMASK_BIT(P4_EVENT_BPU_FETCH_REQUEST, TCMISS),86.cntr = { {0, -1, -1}, {2, -1, -1} },87},88[P4_EVENT_ITLB_REFERENCE] = {89.opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE),90.escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 },91.escr_emask =92P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT) |93P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, MISS) |94P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT_UK),95.cntr = { {0, -1, -1}, {2, -1, -1} },96},97[P4_EVENT_MEMORY_CANCEL] = {98.opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL),99.escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },100.escr_emask =101P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, ST_RB_FULL) |102P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, 64K_CONF),103.cntr = { {8, 9, -1}, {10, 11, -1} },104},105[P4_EVENT_MEMORY_COMPLETE] = {106.opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE),107.escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },108.escr_emask =109P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, LSC) |110P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, SSC),111.cntr = { {8, 9, -1}, {10, 11, -1} },112},113[P4_EVENT_LOAD_PORT_REPLAY] = {114.opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY),115.escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 },116.escr_emask =117P4_ESCR_EMASK_BIT(P4_EVENT_LOAD_PORT_REPLAY, SPLIT_LD),118.cntr = { {8, 9, -1}, {10, 11, -1} },119},120[P4_EVENT_STORE_PORT_REPLAY] = {121.opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY),122.escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },123.escr_emask =124P4_ESCR_EMASK_BIT(P4_EVENT_STORE_PORT_REPLAY, SPLIT_ST),125.cntr = { {8, 9, -1}, {10, 11, -1} },126},127[P4_EVENT_MOB_LOAD_REPLAY] = {128.opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY),129.escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 },130.escr_emask =131P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STA) |132P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STD) |133P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, PARTIAL_DATA) |134P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, UNALGN_ADDR),135.cntr = { {0, -1, -1}, {2, -1, -1} },136},137[P4_EVENT_PAGE_WALK_TYPE] = {138.opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE),139.escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 },140.escr_emask =141P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, DTMISS) |142P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, ITMISS),143.shared = 1,144.cntr = { {0, -1, -1}, {2, -1, -1} },145},146[P4_EVENT_BSQ_CACHE_REFERENCE] = {147.opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE),148.escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 },149.escr_emask =150P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) |151P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) |152P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) |153P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) |154P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) |155P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM) |156P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) |157P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) |158P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS),159.cntr = { {0, -1, -1}, {2, -1, -1} },160},161[P4_EVENT_IOQ_ALLOCATION] = {162.opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION),163.escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },164.escr_emask =165P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, DEFAULT) |166P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_READ) |167P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_WRITE) |168P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_UC) |169P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WC) |170P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WT) |171P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WP) |172P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WB) |173P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OWN) |174P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OTHER) |175P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, PREFETCH),176.cntr = { {0, -1, -1}, {2, -1, -1} },177},178[P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */179.opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES),180.escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 },181.escr_emask =182P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, DEFAULT) |183P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_READ) |184P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_WRITE) |185P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_UC) |186P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WC) |187P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WT) |188P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WP) |189P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WB) |190P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OWN) |191P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OTHER) |192P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, PREFETCH),193.cntr = { {2, -1, -1}, {3, -1, -1} },194},195[P4_EVENT_FSB_DATA_ACTIVITY] = {196.opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY),197.escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },198.escr_emask =199P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) |200P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN) |201P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OTHER) |202P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_DRV) |203P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OWN) |204P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OTHER),205.shared = 1,206.cntr = { {0, -1, -1}, {2, -1, -1} },207},208[P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */209.opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION),210.escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 },211.escr_emask =212P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE0) |213P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE1) |214P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN0) |215P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN1) |216P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_IO_TYPE) |217P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LOCK_TYPE) |218P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_CACHE_TYPE) |219P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_SPLIT_TYPE) |220P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_DEM_TYPE) |221P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_ORD_TYPE) |222P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE0) |223P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE1) |224P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE2),225.cntr = { {0, -1, -1}, {1, -1, -1} },226},227[P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */228.opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES),229.escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 },230.escr_emask =231P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE0) |232P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE1) |233P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN0) |234P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN1) |235P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE) |236P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE) |237P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE) |238P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE) |239P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE) |240P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE) |241P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE0) |242P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE1) |243P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE2),244.cntr = { {2, -1, -1}, {3, -1, -1} },245},246[P4_EVENT_SSE_INPUT_ASSIST] = {247.opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST),248.escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },249.escr_emask =250P4_ESCR_EMASK_BIT(P4_EVENT_SSE_INPUT_ASSIST, ALL),251.shared = 1,252.cntr = { {8, 9, -1}, {10, 11, -1} },253},254[P4_EVENT_PACKED_SP_UOP] = {255.opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP),256.escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },257.escr_emask =258P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_SP_UOP, ALL),259.shared = 1,260.cntr = { {8, 9, -1}, {10, 11, -1} },261},262[P4_EVENT_PACKED_DP_UOP] = {263.opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP),264.escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },265.escr_emask =266P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_DP_UOP, ALL),267.shared = 1,268.cntr = { {8, 9, -1}, {10, 11, -1} },269},270[P4_EVENT_SCALAR_SP_UOP] = {271.opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP),272.escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },273.escr_emask =274P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_SP_UOP, ALL),275.shared = 1,276.cntr = { {8, 9, -1}, {10, 11, -1} },277},278[P4_EVENT_SCALAR_DP_UOP] = {279.opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP),280.escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },281.escr_emask =282P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_DP_UOP, ALL),283.shared = 1,284.cntr = { {8, 9, -1}, {10, 11, -1} },285},286[P4_EVENT_64BIT_MMX_UOP] = {287.opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP),288.escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },289.escr_emask =290P4_ESCR_EMASK_BIT(P4_EVENT_64BIT_MMX_UOP, ALL),291.shared = 1,292.cntr = { {8, 9, -1}, {10, 11, -1} },293},294[P4_EVENT_128BIT_MMX_UOP] = {295.opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP),296.escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },297.escr_emask =298P4_ESCR_EMASK_BIT(P4_EVENT_128BIT_MMX_UOP, ALL),299.shared = 1,300.cntr = { {8, 9, -1}, {10, 11, -1} },301},302[P4_EVENT_X87_FP_UOP] = {303.opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP),304.escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },305.escr_emask =306P4_ESCR_EMASK_BIT(P4_EVENT_X87_FP_UOP, ALL),307.shared = 1,308.cntr = { {8, 9, -1}, {10, 11, -1} },309},310[P4_EVENT_TC_MISC] = {311.opcode = P4_OPCODE(P4_EVENT_TC_MISC),312.escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },313.escr_emask =314P4_ESCR_EMASK_BIT(P4_EVENT_TC_MISC, FLUSH),315.cntr = { {4, 5, -1}, {6, 7, -1} },316},317[P4_EVENT_GLOBAL_POWER_EVENTS] = {318.opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS),319.escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },320.escr_emask =321P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING),322.cntr = { {0, -1, -1}, {2, -1, -1} },323},324[P4_EVENT_TC_MS_XFER] = {325.opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER),326.escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },327.escr_emask =328P4_ESCR_EMASK_BIT(P4_EVENT_TC_MS_XFER, CISC),329.cntr = { {4, 5, -1}, {6, 7, -1} },330},331[P4_EVENT_UOP_QUEUE_WRITES] = {332.opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES),333.escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },334.escr_emask =335P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_BUILD) |336P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_DELIVER) |337P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_ROM),338.cntr = { {4, 5, -1}, {6, 7, -1} },339},340[P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = {341.opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE),342.escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 },343.escr_emask =344P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL) |345P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CALL) |346P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, RETURN) |347P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT),348.cntr = { {4, 5, -1}, {6, 7, -1} },349},350[P4_EVENT_RETIRED_BRANCH_TYPE] = {351.opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE),352.escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 },353.escr_emask =354P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) |355P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) |356P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) |357P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT),358.cntr = { {4, 5, -1}, {6, 7, -1} },359},360[P4_EVENT_RESOURCE_STALL] = {361.opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL),362.escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 },363.escr_emask =364P4_ESCR_EMASK_BIT(P4_EVENT_RESOURCE_STALL, SBFULL),365.cntr = { {12, 13, 16}, {14, 15, 17} },366},367[P4_EVENT_WC_BUFFER] = {368.opcode = P4_OPCODE(P4_EVENT_WC_BUFFER),369.escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },370.escr_emask =371P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_EVICTS) |372P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_FULL_EVICTS),373.shared = 1,374.cntr = { {8, 9, -1}, {10, 11, -1} },375},376[P4_EVENT_B2B_CYCLES] = {377.opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES),378.escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },379.escr_emask = 0,380.cntr = { {0, -1, -1}, {2, -1, -1} },381},382[P4_EVENT_BNR] = {383.opcode = P4_OPCODE(P4_EVENT_BNR),384.escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },385.escr_emask = 0,386.cntr = { {0, -1, -1}, {2, -1, -1} },387},388[P4_EVENT_SNOOP] = {389.opcode = P4_OPCODE(P4_EVENT_SNOOP),390.escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },391.escr_emask = 0,392.cntr = { {0, -1, -1}, {2, -1, -1} },393},394[P4_EVENT_RESPONSE] = {395.opcode = P4_OPCODE(P4_EVENT_RESPONSE),396.escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },397.escr_emask = 0,398.cntr = { {0, -1, -1}, {2, -1, -1} },399},400[P4_EVENT_FRONT_END_EVENT] = {401.opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT),402.escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },403.escr_emask =404P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, NBOGUS) |405P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, BOGUS),406.cntr = { {12, 13, 16}, {14, 15, 17} },407},408[P4_EVENT_EXECUTION_EVENT] = {409.opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT),410.escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },411.escr_emask =412P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0) |413P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1) |414P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2) |415P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3) |416P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) |417P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) |418P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) |419P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3),420.cntr = { {12, 13, 16}, {14, 15, 17} },421},422[P4_EVENT_REPLAY_EVENT] = {423.opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT),424.escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },425.escr_emask =426P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, NBOGUS) |427P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, BOGUS),428.cntr = { {12, 13, 16}, {14, 15, 17} },429},430[P4_EVENT_INSTR_RETIRED] = {431.opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED),432.escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },433.escr_emask =434P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) |435P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSTAG) |436P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG) |437P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSTAG),438.cntr = { {12, 13, 16}, {14, 15, 17} },439},440[P4_EVENT_UOPS_RETIRED] = {441.opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED),442.escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },443.escr_emask =444P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, NBOGUS) |445P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, BOGUS),446.cntr = { {12, 13, 16}, {14, 15, 17} },447},448[P4_EVENT_UOP_TYPE] = {449.opcode = P4_OPCODE(P4_EVENT_UOP_TYPE),450.escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 },451.escr_emask =452P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGLOADS) |453P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGSTORES),454.cntr = { {12, 13, 16}, {14, 15, 17} },455},456[P4_EVENT_BRANCH_RETIRED] = {457.opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED),458.escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },459.escr_emask =460P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNP) |461P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNM) |462P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTP) |463P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTM),464.cntr = { {12, 13, 16}, {14, 15, 17} },465},466[P4_EVENT_MISPRED_BRANCH_RETIRED] = {467.opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED),468.escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },469.escr_emask =470P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS),471.cntr = { {12, 13, 16}, {14, 15, 17} },472},473[P4_EVENT_X87_ASSIST] = {474.opcode = P4_OPCODE(P4_EVENT_X87_ASSIST),475.escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },476.escr_emask =477P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSU) |478P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSO) |479P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAO) |480P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAU) |481P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, PREA),482.cntr = { {12, 13, 16}, {14, 15, 17} },483},484[P4_EVENT_MACHINE_CLEAR] = {485.opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR),486.escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },487.escr_emask =488P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, CLEAR) |489P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, MOCLEAR) |490P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, SMCLEAR),491.cntr = { {12, 13, 16}, {14, 15, 17} },492},493[P4_EVENT_INSTR_COMPLETED] = {494.opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED),495.escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },496.escr_emask =497P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, NBOGUS) |498P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, BOGUS),499.cntr = { {12, 13, 16}, {14, 15, 17} },500},501};502503#define P4_GEN_CACHE_EVENT(event, bit, metric) \504p4_config_pack_escr(P4_ESCR_EVENT(event) | \505P4_ESCR_EMASK_BIT(event, bit)) | \506p4_config_pack_cccr(metric | \507P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event))))508509static __initconst const u64 p4_hw_cache_event_ids510[PERF_COUNT_HW_CACHE_MAX]511[PERF_COUNT_HW_CACHE_OP_MAX]512[PERF_COUNT_HW_CACHE_RESULT_MAX] =513{514[ C(L1D ) ] = {515[ C(OP_READ) ] = {516[ C(RESULT_ACCESS) ] = 0x0,517[ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,518P4_PEBS_METRIC__1stl_cache_load_miss_retired),519},520},521[ C(LL ) ] = {522[ C(OP_READ) ] = {523[ C(RESULT_ACCESS) ] = 0x0,524[ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,525P4_PEBS_METRIC__2ndl_cache_load_miss_retired),526},527},528[ C(DTLB) ] = {529[ C(OP_READ) ] = {530[ C(RESULT_ACCESS) ] = 0x0,531[ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,532P4_PEBS_METRIC__dtlb_load_miss_retired),533},534[ C(OP_WRITE) ] = {535[ C(RESULT_ACCESS) ] = 0x0,536[ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,537P4_PEBS_METRIC__dtlb_store_miss_retired),538},539},540[ C(ITLB) ] = {541[ C(OP_READ) ] = {542[ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT,543P4_PEBS_METRIC__none),544[ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS,545P4_PEBS_METRIC__none),546},547[ C(OP_WRITE) ] = {548[ C(RESULT_ACCESS) ] = -1,549[ C(RESULT_MISS) ] = -1,550},551[ C(OP_PREFETCH) ] = {552[ C(RESULT_ACCESS) ] = -1,553[ C(RESULT_MISS) ] = -1,554},555},556};557558static u64 p4_general_events[PERF_COUNT_HW_MAX] = {559/* non-halted CPU clocks */560[PERF_COUNT_HW_CPU_CYCLES] =561p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) |562P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)),563564/*565* retired instructions566* in a sake of simplicity we don't use the FSB tagging567*/568[PERF_COUNT_HW_INSTRUCTIONS] =569p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_INSTR_RETIRED) |570P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) |571P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG)),572573/* cache hits */574[PERF_COUNT_HW_CACHE_REFERENCES] =575p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) |576P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) |577P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) |578P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) |579P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) |580P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) |581P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM)),582583/* cache misses */584[PERF_COUNT_HW_CACHE_MISSES] =585p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) |586P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) |587P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) |588P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS)),589590/* branch instructions retired */591[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =592p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_RETIRED_BRANCH_TYPE) |593P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) |594P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) |595P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) |596P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT)),597598/* mispredicted branches retired */599[PERF_COUNT_HW_BRANCH_MISSES] =600p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_MISPRED_BRANCH_RETIRED) |601P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS)),602603/* bus ready clocks (cpu is driving #DRDY_DRV\#DRDY_OWN): */604[PERF_COUNT_HW_BUS_CYCLES] =605p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_FSB_DATA_ACTIVITY) |606P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) |607P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN)) |608p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE),609};610611static struct p4_event_bind *p4_config_get_bind(u64 config)612{613unsigned int evnt = p4_config_unpack_event(config);614struct p4_event_bind *bind = NULL;615616if (evnt < ARRAY_SIZE(p4_event_bind_map))617bind = &p4_event_bind_map[evnt];618619return bind;620}621622static u64 p4_pmu_event_map(int hw_event)623{624struct p4_event_bind *bind;625unsigned int esel;626u64 config;627628config = p4_general_events[hw_event];629bind = p4_config_get_bind(config);630esel = P4_OPCODE_ESEL(bind->opcode);631config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel));632633return config;634}635636/* check cpu model specifics */637static bool p4_event_match_cpu_model(unsigned int event_idx)638{639/* INSTR_COMPLETED event only exist for model 3, 4, 6 (Prescott) */640if (event_idx == P4_EVENT_INSTR_COMPLETED) {641if (boot_cpu_data.x86_model != 3 &&642boot_cpu_data.x86_model != 4 &&643boot_cpu_data.x86_model != 6)644return false;645}646647/*648* For info649* - IQ_ESCR0, IQ_ESCR1 only for models 1 and 2650*/651652return true;653}654655static int p4_validate_raw_event(struct perf_event *event)656{657unsigned int v, emask;658659/* User data may have out-of-bound event index */660v = p4_config_unpack_event(event->attr.config);661if (v >= ARRAY_SIZE(p4_event_bind_map))662return -EINVAL;663664/* It may be unsupported: */665if (!p4_event_match_cpu_model(v))666return -EINVAL;667668/*669* NOTE: P4_CCCR_THREAD_ANY has not the same meaning as670* in Architectural Performance Monitoring, it means not671* on _which_ logical cpu to count but rather _when_, ie it672* depends on logical cpu state -- count event if one cpu active,673* none, both or any, so we just allow user to pass any value674* desired.675*676* In turn we always set Tx_OS/Tx_USR bits bound to logical677* cpu without their propagation to another cpu678*/679680/*681* if an event is shared across the logical threads682* the user needs special permissions to be able to use it683*/684if (p4_ht_active() && p4_event_bind_map[v].shared) {685if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))686return -EACCES;687}688689/* ESCR EventMask bits may be invalid */690emask = p4_config_unpack_escr(event->attr.config) & P4_ESCR_EVENTMASK_MASK;691if (emask & ~p4_event_bind_map[v].escr_emask)692return -EINVAL;693694/*695* it may have some invalid PEBS bits696*/697if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE))698return -EINVAL;699700v = p4_config_unpack_metric(event->attr.config);701if (v >= ARRAY_SIZE(p4_pebs_bind_map))702return -EINVAL;703704return 0;705}706707static int p4_hw_config(struct perf_event *event)708{709int cpu = get_cpu();710int rc = 0;711u32 escr, cccr;712713/*714* the reason we use cpu that early is that: if we get scheduled715* first time on the same cpu -- we will not need swap thread716* specific flags in config (and will save some cpu cycles)717*/718719cccr = p4_default_cccr_conf(cpu);720escr = p4_default_escr_conf(cpu, event->attr.exclude_kernel,721event->attr.exclude_user);722event->hw.config = p4_config_pack_escr(escr) |723p4_config_pack_cccr(cccr);724725if (p4_ht_active() && p4_ht_thread(cpu))726event->hw.config = p4_set_ht_bit(event->hw.config);727728if (event->attr.type == PERF_TYPE_RAW) {729struct p4_event_bind *bind;730unsigned int esel;731/*732* Clear bits we reserve to be managed by kernel itself733* and never allowed from a user space734*/735event->attr.config &= P4_CONFIG_MASK;736737rc = p4_validate_raw_event(event);738if (rc)739goto out;740741/*742* Note that for RAW events we allow user to use P4_CCCR_RESERVED743* bits since we keep additional info here (for cache events and etc)744*/745event->hw.config |= event->attr.config;746bind = p4_config_get_bind(event->attr.config);747if (!bind) {748rc = -EINVAL;749goto out;750}751esel = P4_OPCODE_ESEL(bind->opcode);752event->hw.config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel));753}754755rc = x86_setup_perfctr(event);756out:757put_cpu();758return rc;759}760761static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)762{763u64 v;764765/* an official way for overflow indication */766rdmsrl(hwc->config_base, v);767if (v & P4_CCCR_OVF) {768wrmsrl(hwc->config_base, v & ~P4_CCCR_OVF);769return 1;770}771772/*773* In some circumstances the overflow might issue an NMI but did774* not set P4_CCCR_OVF bit. Because a counter holds a negative value775* we simply check for high bit being set, if it's cleared it means776* the counter has reached zero value and continued counting before777* real NMI signal was received:778*/779rdmsrl(hwc->event_base, v);780if (!(v & ARCH_P4_UNFLAGGED_BIT))781return 1;782783return 0;784}785786static void p4_pmu_disable_pebs(void)787{788/*789* FIXME790*791* It's still allowed that two threads setup same cache792* events so we can't simply clear metrics until we knew793* no one is depending on us, so we need kind of counter794* for "ReplayEvent" users.795*796* What is more complex -- RAW events, if user (for some797* reason) will pass some cache event metric with improper798* event opcode -- it's fine from hardware point of view799* but completely nonsense from "meaning" of such action.800*801* So at moment let leave metrics turned on forever -- it's802* ok for now but need to be revisited!803*804* (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)0);805* (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)0);806*/807}808809static inline void p4_pmu_disable_event(struct perf_event *event)810{811struct hw_perf_event *hwc = &event->hw;812813/*814* If event gets disabled while counter is in overflowed815* state we need to clear P4_CCCR_OVF, otherwise interrupt get816* asserted again and again817*/818(void)checking_wrmsrl(hwc->config_base,819(u64)(p4_config_unpack_cccr(hwc->config)) &820~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED);821}822823static void p4_pmu_disable_all(void)824{825struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);826int idx;827828for (idx = 0; idx < x86_pmu.num_counters; idx++) {829struct perf_event *event = cpuc->events[idx];830if (!test_bit(idx, cpuc->active_mask))831continue;832p4_pmu_disable_event(event);833}834835p4_pmu_disable_pebs();836}837838/* configuration must be valid */839static void p4_pmu_enable_pebs(u64 config)840{841struct p4_pebs_bind *bind;842unsigned int idx;843844BUILD_BUG_ON(P4_PEBS_METRIC__max > P4_PEBS_CONFIG_METRIC_MASK);845846idx = p4_config_unpack_metric(config);847if (idx == P4_PEBS_METRIC__none)848return;849850bind = &p4_pebs_bind_map[idx];851852(void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind->metric_pebs);853(void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind->metric_vert);854}855856static void p4_pmu_enable_event(struct perf_event *event)857{858struct hw_perf_event *hwc = &event->hw;859int thread = p4_ht_config_thread(hwc->config);860u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config));861unsigned int idx = p4_config_unpack_event(hwc->config);862struct p4_event_bind *bind;863u64 escr_addr, cccr;864865bind = &p4_event_bind_map[idx];866escr_addr = (u64)bind->escr_msr[thread];867868/*869* - we dont support cascaded counters yet870* - and counter 1 is broken (erratum)871*/872WARN_ON_ONCE(p4_is_event_cascaded(hwc->config));873WARN_ON_ONCE(hwc->idx == 1);874875/* we need a real Event value */876escr_conf &= ~P4_ESCR_EVENT_MASK;877escr_conf |= P4_ESCR_EVENT(P4_OPCODE_EVNT(bind->opcode));878879cccr = p4_config_unpack_cccr(hwc->config);880881/*882* it could be Cache event so we need to write metrics883* into additional MSRs884*/885p4_pmu_enable_pebs(hwc->config);886887(void)checking_wrmsrl(escr_addr, escr_conf);888(void)checking_wrmsrl(hwc->config_base,889(cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE);890}891892static void p4_pmu_enable_all(int added)893{894struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);895int idx;896897for (idx = 0; idx < x86_pmu.num_counters; idx++) {898struct perf_event *event = cpuc->events[idx];899if (!test_bit(idx, cpuc->active_mask))900continue;901p4_pmu_enable_event(event);902}903}904905static int p4_pmu_handle_irq(struct pt_regs *regs)906{907struct perf_sample_data data;908struct cpu_hw_events *cpuc;909struct perf_event *event;910struct hw_perf_event *hwc;911int idx, handled = 0;912u64 val;913914perf_sample_data_init(&data, 0);915916cpuc = &__get_cpu_var(cpu_hw_events);917918for (idx = 0; idx < x86_pmu.num_counters; idx++) {919int overflow;920921if (!test_bit(idx, cpuc->active_mask)) {922/* catch in-flight IRQs */923if (__test_and_clear_bit(idx, cpuc->running))924handled++;925continue;926}927928event = cpuc->events[idx];929hwc = &event->hw;930931WARN_ON_ONCE(hwc->idx != idx);932933/* it might be unflagged overflow */934overflow = p4_pmu_clear_cccr_ovf(hwc);935936val = x86_perf_event_update(event);937if (!overflow && (val & (1ULL << (x86_pmu.cntval_bits - 1))))938continue;939940handled += overflow;941942/* event overflow for sure */943data.period = event->hw.last_period;944945if (!x86_perf_event_set_period(event))946continue;947if (perf_event_overflow(event, 1, &data, regs))948x86_pmu_stop(event, 0);949}950951if (handled)952inc_irq_stat(apic_perf_irqs);953954/*955* When dealing with the unmasking of the LVTPC on P4 perf hw, it has956* been observed that the OVF bit flag has to be cleared first _before_957* the LVTPC can be unmasked.958*959* The reason is the NMI line will continue to be asserted while the OVF960* bit is set. This causes a second NMI to generate if the LVTPC is961* unmasked before the OVF bit is cleared, leading to unknown NMI962* messages.963*/964apic_write(APIC_LVTPC, APIC_DM_NMI);965966return handled;967}968969/*970* swap thread specific fields according to a thread971* we are going to run on972*/973static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu)974{975u32 escr, cccr;976977/*978* we either lucky and continue on same cpu or no HT support979*/980if (!p4_should_swap_ts(hwc->config, cpu))981return;982983/*984* the event is migrated from an another logical985* cpu, so we need to swap thread specific flags986*/987988escr = p4_config_unpack_escr(hwc->config);989cccr = p4_config_unpack_cccr(hwc->config);990991if (p4_ht_thread(cpu)) {992cccr &= ~P4_CCCR_OVF_PMI_T0;993cccr |= P4_CCCR_OVF_PMI_T1;994if (escr & P4_ESCR_T0_OS) {995escr &= ~P4_ESCR_T0_OS;996escr |= P4_ESCR_T1_OS;997}998if (escr & P4_ESCR_T0_USR) {999escr &= ~P4_ESCR_T0_USR;1000escr |= P4_ESCR_T1_USR;1001}1002hwc->config = p4_config_pack_escr(escr);1003hwc->config |= p4_config_pack_cccr(cccr);1004hwc->config |= P4_CONFIG_HT;1005} else {1006cccr &= ~P4_CCCR_OVF_PMI_T1;1007cccr |= P4_CCCR_OVF_PMI_T0;1008if (escr & P4_ESCR_T1_OS) {1009escr &= ~P4_ESCR_T1_OS;1010escr |= P4_ESCR_T0_OS;1011}1012if (escr & P4_ESCR_T1_USR) {1013escr &= ~P4_ESCR_T1_USR;1014escr |= P4_ESCR_T0_USR;1015}1016hwc->config = p4_config_pack_escr(escr);1017hwc->config |= p4_config_pack_cccr(cccr);1018hwc->config &= ~P4_CONFIG_HT;1019}1020}10211022/*1023* ESCR address hashing is tricky, ESCRs are not sequential1024* in memory but all starts from MSR_P4_BSU_ESCR0 (0x03a0) and1025* the metric between any ESCRs is laid in range [0xa0,0xe1]1026*1027* so we make ~70% filled hashtable1028*/10291030#define P4_ESCR_MSR_BASE 0x000003a01031#define P4_ESCR_MSR_MAX 0x000003e11032#define P4_ESCR_MSR_TABLE_SIZE (P4_ESCR_MSR_MAX - P4_ESCR_MSR_BASE + 1)1033#define P4_ESCR_MSR_IDX(msr) (msr - P4_ESCR_MSR_BASE)1034#define P4_ESCR_MSR_TABLE_ENTRY(msr) [P4_ESCR_MSR_IDX(msr)] = msr10351036static const unsigned int p4_escr_table[P4_ESCR_MSR_TABLE_SIZE] = {1037P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR0),1038P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR1),1039P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR0),1040P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR1),1041P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR0),1042P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR1),1043P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR0),1044P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR1),1045P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR2),1046P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR3),1047P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR4),1048P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR5),1049P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR0),1050P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR1),1051P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR0),1052P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR1),1053P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR0),1054P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR1),1055P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR0),1056P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR1),1057P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR0),1058P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR1),1059P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR0),1060P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR1),1061P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR0),1062P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR1),1063P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR0),1064P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR1),1065P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR0),1066P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR1),1067P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR0),1068P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR1),1069P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR0),1070P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR1),1071P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR0),1072P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR1),1073P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR0),1074P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR1),1075P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR0),1076P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR1),1077P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR0),1078P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR1),1079P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR0),1080P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR1),1081P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR0),1082P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR1),1083};10841085static int p4_get_escr_idx(unsigned int addr)1086{1087unsigned int idx = P4_ESCR_MSR_IDX(addr);10881089if (unlikely(idx >= P4_ESCR_MSR_TABLE_SIZE ||1090!p4_escr_table[idx] ||1091p4_escr_table[idx] != addr)) {1092WARN_ONCE(1, "P4 PMU: Wrong address passed: %x\n", addr);1093return -1;1094}10951096return idx;1097}10981099static int p4_next_cntr(int thread, unsigned long *used_mask,1100struct p4_event_bind *bind)1101{1102int i, j;11031104for (i = 0; i < P4_CNTR_LIMIT; i++) {1105j = bind->cntr[thread][i];1106if (j != -1 && !test_bit(j, used_mask))1107return j;1108}11091110return -1;1111}11121113static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)1114{1115unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];1116unsigned long escr_mask[BITS_TO_LONGS(P4_ESCR_MSR_TABLE_SIZE)];1117int cpu = smp_processor_id();1118struct hw_perf_event *hwc;1119struct p4_event_bind *bind;1120unsigned int i, thread, num;1121int cntr_idx, escr_idx;11221123bitmap_zero(used_mask, X86_PMC_IDX_MAX);1124bitmap_zero(escr_mask, P4_ESCR_MSR_TABLE_SIZE);11251126for (i = 0, num = n; i < n; i++, num--) {11271128hwc = &cpuc->event_list[i]->hw;1129thread = p4_ht_thread(cpu);1130bind = p4_config_get_bind(hwc->config);1131escr_idx = p4_get_escr_idx(bind->escr_msr[thread]);1132if (unlikely(escr_idx == -1))1133goto done;11341135if (hwc->idx != -1 && !p4_should_swap_ts(hwc->config, cpu)) {1136cntr_idx = hwc->idx;1137if (assign)1138assign[i] = hwc->idx;1139goto reserve;1140}11411142cntr_idx = p4_next_cntr(thread, used_mask, bind);1143if (cntr_idx == -1 || test_bit(escr_idx, escr_mask))1144goto done;11451146p4_pmu_swap_config_ts(hwc, cpu);1147if (assign)1148assign[i] = cntr_idx;1149reserve:1150set_bit(cntr_idx, used_mask);1151set_bit(escr_idx, escr_mask);1152}11531154done:1155return num ? -ENOSPC : 0;1156}11571158static __initconst const struct x86_pmu p4_pmu = {1159.name = "Netburst P4/Xeon",1160.handle_irq = p4_pmu_handle_irq,1161.disable_all = p4_pmu_disable_all,1162.enable_all = p4_pmu_enable_all,1163.enable = p4_pmu_enable_event,1164.disable = p4_pmu_disable_event,1165.eventsel = MSR_P4_BPU_CCCR0,1166.perfctr = MSR_P4_BPU_PERFCTR0,1167.event_map = p4_pmu_event_map,1168.max_events = ARRAY_SIZE(p4_general_events),1169.get_event_constraints = x86_get_event_constraints,1170/*1171* IF HT disabled we may need to use all1172* ARCH_P4_MAX_CCCR counters simulaneously1173* though leave it restricted at moment assuming1174* HT is on1175*/1176.num_counters = ARCH_P4_MAX_CCCR,1177.apic = 1,1178.cntval_bits = ARCH_P4_CNTRVAL_BITS,1179.cntval_mask = ARCH_P4_CNTRVAL_MASK,1180.max_period = (1ULL << (ARCH_P4_CNTRVAL_BITS - 1)) - 1,1181.hw_config = p4_hw_config,1182.schedule_events = p4_pmu_schedule_events,1183/*1184* This handles erratum N15 in intel doc 249199-029,1185* the counter may not be updated correctly on write1186* so we need a second write operation to do the trick1187* (the official workaround didn't work)1188*1189* the former idea is taken from OProfile code1190*/1191.perfctr_second_write = 1,1192};11931194static __init int p4_pmu_init(void)1195{1196unsigned int low, high;11971198/* If we get stripped -- indexing fails */1199BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC);12001201rdmsr(MSR_IA32_MISC_ENABLE, low, high);1202if (!(low & (1 << 7))) {1203pr_cont("unsupported Netburst CPU model %d ",1204boot_cpu_data.x86_model);1205return -ENODEV;1206}12071208memcpy(hw_cache_event_ids, p4_hw_cache_event_ids,1209sizeof(hw_cache_event_ids));12101211pr_cont("Netburst events, ");12121213x86_pmu = p4_pmu;12141215return 0;1216}12171218#endif /* CONFIG_CPU_SUP_INTEL */121912201221