Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/x86/kernel/cpu/perf_event_p4.c
10699 views
1
/*
2
* Netburst Performance Events (P4, old Xeon)
3
*
4
* Copyright (C) 2010 Parallels, Inc., Cyrill Gorcunov <[email protected]>
5
* Copyright (C) 2010 Intel Corporation, Lin Ming <[email protected]>
6
*
7
* For licencing details see kernel-base/COPYING
8
*/
9
10
#ifdef CONFIG_CPU_SUP_INTEL
11
12
#include <asm/perf_event_p4.h>
13
14
#define P4_CNTR_LIMIT 3
15
/*
16
* array indices: 0,1 - HT threads, used with HT enabled cpu
17
*/
18
struct p4_event_bind {
19
unsigned int opcode; /* Event code and ESCR selector */
20
unsigned int escr_msr[2]; /* ESCR MSR for this event */
21
unsigned int escr_emask; /* valid ESCR EventMask bits */
22
unsigned int shared; /* event is shared across threads */
23
char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */
24
};
25
26
struct p4_pebs_bind {
27
unsigned int metric_pebs;
28
unsigned int metric_vert;
29
};
30
31
/* it sets P4_PEBS_ENABLE_UOP_TAG as well */
32
#define P4_GEN_PEBS_BIND(name, pebs, vert) \
33
[P4_PEBS_METRIC__##name] = { \
34
.metric_pebs = pebs | P4_PEBS_ENABLE_UOP_TAG, \
35
.metric_vert = vert, \
36
}
37
38
/*
39
* note we have P4_PEBS_ENABLE_UOP_TAG always set here
40
*
41
* it's needed for mapping P4_PEBS_CONFIG_METRIC_MASK bits of
42
* event configuration to find out which values are to be
43
* written into MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT
44
* resgisters
45
*/
46
static struct p4_pebs_bind p4_pebs_bind_map[] = {
47
P4_GEN_PEBS_BIND(1stl_cache_load_miss_retired, 0x0000001, 0x0000001),
48
P4_GEN_PEBS_BIND(2ndl_cache_load_miss_retired, 0x0000002, 0x0000001),
49
P4_GEN_PEBS_BIND(dtlb_load_miss_retired, 0x0000004, 0x0000001),
50
P4_GEN_PEBS_BIND(dtlb_store_miss_retired, 0x0000004, 0x0000002),
51
P4_GEN_PEBS_BIND(dtlb_all_miss_retired, 0x0000004, 0x0000003),
52
P4_GEN_PEBS_BIND(tagged_mispred_branch, 0x0018000, 0x0000010),
53
P4_GEN_PEBS_BIND(mob_load_replay_retired, 0x0000200, 0x0000001),
54
P4_GEN_PEBS_BIND(split_load_retired, 0x0000400, 0x0000001),
55
P4_GEN_PEBS_BIND(split_store_retired, 0x0000400, 0x0000002),
56
};
57
58
/*
59
* Note that we don't use CCCR1 here, there is an
60
* exception for P4_BSQ_ALLOCATION but we just have
61
* no workaround
62
*
63
* consider this binding as resources which particular
64
* event may borrow, it doesn't contain EventMask,
65
* Tags and friends -- they are left to a caller
66
*/
67
static struct p4_event_bind p4_event_bind_map[] = {
68
[P4_EVENT_TC_DELIVER_MODE] = {
69
.opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE),
70
.escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
71
.escr_emask =
72
P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD) |
73
P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DB) |
74
P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DI) |
75
P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BD) |
76
P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BB) |
77
P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BI) |
78
P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, ID),
79
.shared = 1,
80
.cntr = { {4, 5, -1}, {6, 7, -1} },
81
},
82
[P4_EVENT_BPU_FETCH_REQUEST] = {
83
.opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST),
84
.escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 },
85
.escr_emask =
86
P4_ESCR_EMASK_BIT(P4_EVENT_BPU_FETCH_REQUEST, TCMISS),
87
.cntr = { {0, -1, -1}, {2, -1, -1} },
88
},
89
[P4_EVENT_ITLB_REFERENCE] = {
90
.opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE),
91
.escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 },
92
.escr_emask =
93
P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT) |
94
P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, MISS) |
95
P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT_UK),
96
.cntr = { {0, -1, -1}, {2, -1, -1} },
97
},
98
[P4_EVENT_MEMORY_CANCEL] = {
99
.opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL),
100
.escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
101
.escr_emask =
102
P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, ST_RB_FULL) |
103
P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, 64K_CONF),
104
.cntr = { {8, 9, -1}, {10, 11, -1} },
105
},
106
[P4_EVENT_MEMORY_COMPLETE] = {
107
.opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE),
108
.escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
109
.escr_emask =
110
P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, LSC) |
111
P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, SSC),
112
.cntr = { {8, 9, -1}, {10, 11, -1} },
113
},
114
[P4_EVENT_LOAD_PORT_REPLAY] = {
115
.opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY),
116
.escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 },
117
.escr_emask =
118
P4_ESCR_EMASK_BIT(P4_EVENT_LOAD_PORT_REPLAY, SPLIT_LD),
119
.cntr = { {8, 9, -1}, {10, 11, -1} },
120
},
121
[P4_EVENT_STORE_PORT_REPLAY] = {
122
.opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY),
123
.escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
124
.escr_emask =
125
P4_ESCR_EMASK_BIT(P4_EVENT_STORE_PORT_REPLAY, SPLIT_ST),
126
.cntr = { {8, 9, -1}, {10, 11, -1} },
127
},
128
[P4_EVENT_MOB_LOAD_REPLAY] = {
129
.opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY),
130
.escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 },
131
.escr_emask =
132
P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STA) |
133
P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STD) |
134
P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, PARTIAL_DATA) |
135
P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, UNALGN_ADDR),
136
.cntr = { {0, -1, -1}, {2, -1, -1} },
137
},
138
[P4_EVENT_PAGE_WALK_TYPE] = {
139
.opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE),
140
.escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 },
141
.escr_emask =
142
P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, DTMISS) |
143
P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, ITMISS),
144
.shared = 1,
145
.cntr = { {0, -1, -1}, {2, -1, -1} },
146
},
147
[P4_EVENT_BSQ_CACHE_REFERENCE] = {
148
.opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE),
149
.escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 },
150
.escr_emask =
151
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) |
152
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) |
153
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) |
154
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) |
155
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) |
156
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM) |
157
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) |
158
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) |
159
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS),
160
.cntr = { {0, -1, -1}, {2, -1, -1} },
161
},
162
[P4_EVENT_IOQ_ALLOCATION] = {
163
.opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION),
164
.escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
165
.escr_emask =
166
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, DEFAULT) |
167
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_READ) |
168
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_WRITE) |
169
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_UC) |
170
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WC) |
171
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WT) |
172
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WP) |
173
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WB) |
174
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OWN) |
175
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OTHER) |
176
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, PREFETCH),
177
.cntr = { {0, -1, -1}, {2, -1, -1} },
178
},
179
[P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */
180
.opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES),
181
.escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 },
182
.escr_emask =
183
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, DEFAULT) |
184
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_READ) |
185
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_WRITE) |
186
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_UC) |
187
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WC) |
188
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WT) |
189
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WP) |
190
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WB) |
191
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OWN) |
192
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OTHER) |
193
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, PREFETCH),
194
.cntr = { {2, -1, -1}, {3, -1, -1} },
195
},
196
[P4_EVENT_FSB_DATA_ACTIVITY] = {
197
.opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY),
198
.escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
199
.escr_emask =
200
P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) |
201
P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN) |
202
P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OTHER) |
203
P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_DRV) |
204
P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OWN) |
205
P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OTHER),
206
.shared = 1,
207
.cntr = { {0, -1, -1}, {2, -1, -1} },
208
},
209
[P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */
210
.opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION),
211
.escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 },
212
.escr_emask =
213
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE0) |
214
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE1) |
215
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN0) |
216
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN1) |
217
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_IO_TYPE) |
218
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LOCK_TYPE) |
219
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_CACHE_TYPE) |
220
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_SPLIT_TYPE) |
221
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_DEM_TYPE) |
222
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_ORD_TYPE) |
223
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE0) |
224
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE1) |
225
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE2),
226
.cntr = { {0, -1, -1}, {1, -1, -1} },
227
},
228
[P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */
229
.opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES),
230
.escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 },
231
.escr_emask =
232
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE0) |
233
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE1) |
234
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN0) |
235
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN1) |
236
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE) |
237
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE) |
238
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE) |
239
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE) |
240
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE) |
241
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE) |
242
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE0) |
243
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE1) |
244
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE2),
245
.cntr = { {2, -1, -1}, {3, -1, -1} },
246
},
247
[P4_EVENT_SSE_INPUT_ASSIST] = {
248
.opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST),
249
.escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
250
.escr_emask =
251
P4_ESCR_EMASK_BIT(P4_EVENT_SSE_INPUT_ASSIST, ALL),
252
.shared = 1,
253
.cntr = { {8, 9, -1}, {10, 11, -1} },
254
},
255
[P4_EVENT_PACKED_SP_UOP] = {
256
.opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP),
257
.escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
258
.escr_emask =
259
P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_SP_UOP, ALL),
260
.shared = 1,
261
.cntr = { {8, 9, -1}, {10, 11, -1} },
262
},
263
[P4_EVENT_PACKED_DP_UOP] = {
264
.opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP),
265
.escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
266
.escr_emask =
267
P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_DP_UOP, ALL),
268
.shared = 1,
269
.cntr = { {8, 9, -1}, {10, 11, -1} },
270
},
271
[P4_EVENT_SCALAR_SP_UOP] = {
272
.opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP),
273
.escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
274
.escr_emask =
275
P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_SP_UOP, ALL),
276
.shared = 1,
277
.cntr = { {8, 9, -1}, {10, 11, -1} },
278
},
279
[P4_EVENT_SCALAR_DP_UOP] = {
280
.opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP),
281
.escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
282
.escr_emask =
283
P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_DP_UOP, ALL),
284
.shared = 1,
285
.cntr = { {8, 9, -1}, {10, 11, -1} },
286
},
287
[P4_EVENT_64BIT_MMX_UOP] = {
288
.opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP),
289
.escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
290
.escr_emask =
291
P4_ESCR_EMASK_BIT(P4_EVENT_64BIT_MMX_UOP, ALL),
292
.shared = 1,
293
.cntr = { {8, 9, -1}, {10, 11, -1} },
294
},
295
[P4_EVENT_128BIT_MMX_UOP] = {
296
.opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP),
297
.escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
298
.escr_emask =
299
P4_ESCR_EMASK_BIT(P4_EVENT_128BIT_MMX_UOP, ALL),
300
.shared = 1,
301
.cntr = { {8, 9, -1}, {10, 11, -1} },
302
},
303
[P4_EVENT_X87_FP_UOP] = {
304
.opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP),
305
.escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
306
.escr_emask =
307
P4_ESCR_EMASK_BIT(P4_EVENT_X87_FP_UOP, ALL),
308
.shared = 1,
309
.cntr = { {8, 9, -1}, {10, 11, -1} },
310
},
311
[P4_EVENT_TC_MISC] = {
312
.opcode = P4_OPCODE(P4_EVENT_TC_MISC),
313
.escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
314
.escr_emask =
315
P4_ESCR_EMASK_BIT(P4_EVENT_TC_MISC, FLUSH),
316
.cntr = { {4, 5, -1}, {6, 7, -1} },
317
},
318
[P4_EVENT_GLOBAL_POWER_EVENTS] = {
319
.opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS),
320
.escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
321
.escr_emask =
322
P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING),
323
.cntr = { {0, -1, -1}, {2, -1, -1} },
324
},
325
[P4_EVENT_TC_MS_XFER] = {
326
.opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER),
327
.escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
328
.escr_emask =
329
P4_ESCR_EMASK_BIT(P4_EVENT_TC_MS_XFER, CISC),
330
.cntr = { {4, 5, -1}, {6, 7, -1} },
331
},
332
[P4_EVENT_UOP_QUEUE_WRITES] = {
333
.opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES),
334
.escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
335
.escr_emask =
336
P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_BUILD) |
337
P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_DELIVER) |
338
P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_ROM),
339
.cntr = { {4, 5, -1}, {6, 7, -1} },
340
},
341
[P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = {
342
.opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE),
343
.escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 },
344
.escr_emask =
345
P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL) |
346
P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CALL) |
347
P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, RETURN) |
348
P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT),
349
.cntr = { {4, 5, -1}, {6, 7, -1} },
350
},
351
[P4_EVENT_RETIRED_BRANCH_TYPE] = {
352
.opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE),
353
.escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 },
354
.escr_emask =
355
P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) |
356
P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) |
357
P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) |
358
P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT),
359
.cntr = { {4, 5, -1}, {6, 7, -1} },
360
},
361
[P4_EVENT_RESOURCE_STALL] = {
362
.opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL),
363
.escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 },
364
.escr_emask =
365
P4_ESCR_EMASK_BIT(P4_EVENT_RESOURCE_STALL, SBFULL),
366
.cntr = { {12, 13, 16}, {14, 15, 17} },
367
},
368
[P4_EVENT_WC_BUFFER] = {
369
.opcode = P4_OPCODE(P4_EVENT_WC_BUFFER),
370
.escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
371
.escr_emask =
372
P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_EVICTS) |
373
P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_FULL_EVICTS),
374
.shared = 1,
375
.cntr = { {8, 9, -1}, {10, 11, -1} },
376
},
377
[P4_EVENT_B2B_CYCLES] = {
378
.opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES),
379
.escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
380
.escr_emask = 0,
381
.cntr = { {0, -1, -1}, {2, -1, -1} },
382
},
383
[P4_EVENT_BNR] = {
384
.opcode = P4_OPCODE(P4_EVENT_BNR),
385
.escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
386
.escr_emask = 0,
387
.cntr = { {0, -1, -1}, {2, -1, -1} },
388
},
389
[P4_EVENT_SNOOP] = {
390
.opcode = P4_OPCODE(P4_EVENT_SNOOP),
391
.escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
392
.escr_emask = 0,
393
.cntr = { {0, -1, -1}, {2, -1, -1} },
394
},
395
[P4_EVENT_RESPONSE] = {
396
.opcode = P4_OPCODE(P4_EVENT_RESPONSE),
397
.escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
398
.escr_emask = 0,
399
.cntr = { {0, -1, -1}, {2, -1, -1} },
400
},
401
[P4_EVENT_FRONT_END_EVENT] = {
402
.opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT),
403
.escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
404
.escr_emask =
405
P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, NBOGUS) |
406
P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, BOGUS),
407
.cntr = { {12, 13, 16}, {14, 15, 17} },
408
},
409
[P4_EVENT_EXECUTION_EVENT] = {
410
.opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT),
411
.escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
412
.escr_emask =
413
P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0) |
414
P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1) |
415
P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2) |
416
P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3) |
417
P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) |
418
P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) |
419
P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) |
420
P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3),
421
.cntr = { {12, 13, 16}, {14, 15, 17} },
422
},
423
[P4_EVENT_REPLAY_EVENT] = {
424
.opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT),
425
.escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
426
.escr_emask =
427
P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, NBOGUS) |
428
P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, BOGUS),
429
.cntr = { {12, 13, 16}, {14, 15, 17} },
430
},
431
[P4_EVENT_INSTR_RETIRED] = {
432
.opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED),
433
.escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
434
.escr_emask =
435
P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) |
436
P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSTAG) |
437
P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG) |
438
P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSTAG),
439
.cntr = { {12, 13, 16}, {14, 15, 17} },
440
},
441
[P4_EVENT_UOPS_RETIRED] = {
442
.opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED),
443
.escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
444
.escr_emask =
445
P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, NBOGUS) |
446
P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, BOGUS),
447
.cntr = { {12, 13, 16}, {14, 15, 17} },
448
},
449
[P4_EVENT_UOP_TYPE] = {
450
.opcode = P4_OPCODE(P4_EVENT_UOP_TYPE),
451
.escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 },
452
.escr_emask =
453
P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGLOADS) |
454
P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGSTORES),
455
.cntr = { {12, 13, 16}, {14, 15, 17} },
456
},
457
[P4_EVENT_BRANCH_RETIRED] = {
458
.opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED),
459
.escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
460
.escr_emask =
461
P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNP) |
462
P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNM) |
463
P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTP) |
464
P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTM),
465
.cntr = { {12, 13, 16}, {14, 15, 17} },
466
},
467
[P4_EVENT_MISPRED_BRANCH_RETIRED] = {
468
.opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED),
469
.escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
470
.escr_emask =
471
P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS),
472
.cntr = { {12, 13, 16}, {14, 15, 17} },
473
},
474
[P4_EVENT_X87_ASSIST] = {
475
.opcode = P4_OPCODE(P4_EVENT_X87_ASSIST),
476
.escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
477
.escr_emask =
478
P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSU) |
479
P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSO) |
480
P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAO) |
481
P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAU) |
482
P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, PREA),
483
.cntr = { {12, 13, 16}, {14, 15, 17} },
484
},
485
[P4_EVENT_MACHINE_CLEAR] = {
486
.opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR),
487
.escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
488
.escr_emask =
489
P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, CLEAR) |
490
P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, MOCLEAR) |
491
P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, SMCLEAR),
492
.cntr = { {12, 13, 16}, {14, 15, 17} },
493
},
494
[P4_EVENT_INSTR_COMPLETED] = {
495
.opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED),
496
.escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
497
.escr_emask =
498
P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, NBOGUS) |
499
P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, BOGUS),
500
.cntr = { {12, 13, 16}, {14, 15, 17} },
501
},
502
};
503
504
#define P4_GEN_CACHE_EVENT(event, bit, metric) \
505
p4_config_pack_escr(P4_ESCR_EVENT(event) | \
506
P4_ESCR_EMASK_BIT(event, bit)) | \
507
p4_config_pack_cccr(metric | \
508
P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event))))
509
510
static __initconst const u64 p4_hw_cache_event_ids
511
[PERF_COUNT_HW_CACHE_MAX]
512
[PERF_COUNT_HW_CACHE_OP_MAX]
513
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
514
{
515
[ C(L1D ) ] = {
516
[ C(OP_READ) ] = {
517
[ C(RESULT_ACCESS) ] = 0x0,
518
[ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
519
P4_PEBS_METRIC__1stl_cache_load_miss_retired),
520
},
521
},
522
[ C(LL ) ] = {
523
[ C(OP_READ) ] = {
524
[ C(RESULT_ACCESS) ] = 0x0,
525
[ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
526
P4_PEBS_METRIC__2ndl_cache_load_miss_retired),
527
},
528
},
529
[ C(DTLB) ] = {
530
[ C(OP_READ) ] = {
531
[ C(RESULT_ACCESS) ] = 0x0,
532
[ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
533
P4_PEBS_METRIC__dtlb_load_miss_retired),
534
},
535
[ C(OP_WRITE) ] = {
536
[ C(RESULT_ACCESS) ] = 0x0,
537
[ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
538
P4_PEBS_METRIC__dtlb_store_miss_retired),
539
},
540
},
541
[ C(ITLB) ] = {
542
[ C(OP_READ) ] = {
543
[ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT,
544
P4_PEBS_METRIC__none),
545
[ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS,
546
P4_PEBS_METRIC__none),
547
},
548
[ C(OP_WRITE) ] = {
549
[ C(RESULT_ACCESS) ] = -1,
550
[ C(RESULT_MISS) ] = -1,
551
},
552
[ C(OP_PREFETCH) ] = {
553
[ C(RESULT_ACCESS) ] = -1,
554
[ C(RESULT_MISS) ] = -1,
555
},
556
},
557
};
558
559
static u64 p4_general_events[PERF_COUNT_HW_MAX] = {
560
/* non-halted CPU clocks */
561
[PERF_COUNT_HW_CPU_CYCLES] =
562
p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) |
563
P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)),
564
565
/*
566
* retired instructions
567
* in a sake of simplicity we don't use the FSB tagging
568
*/
569
[PERF_COUNT_HW_INSTRUCTIONS] =
570
p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_INSTR_RETIRED) |
571
P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) |
572
P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG)),
573
574
/* cache hits */
575
[PERF_COUNT_HW_CACHE_REFERENCES] =
576
p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) |
577
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) |
578
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) |
579
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) |
580
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) |
581
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) |
582
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM)),
583
584
/* cache misses */
585
[PERF_COUNT_HW_CACHE_MISSES] =
586
p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) |
587
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) |
588
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) |
589
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS)),
590
591
/* branch instructions retired */
592
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =
593
p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_RETIRED_BRANCH_TYPE) |
594
P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) |
595
P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) |
596
P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) |
597
P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT)),
598
599
/* mispredicted branches retired */
600
[PERF_COUNT_HW_BRANCH_MISSES] =
601
p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_MISPRED_BRANCH_RETIRED) |
602
P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS)),
603
604
/* bus ready clocks (cpu is driving #DRDY_DRV\#DRDY_OWN): */
605
[PERF_COUNT_HW_BUS_CYCLES] =
606
p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_FSB_DATA_ACTIVITY) |
607
P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) |
608
P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN)) |
609
p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE),
610
};
611
612
static struct p4_event_bind *p4_config_get_bind(u64 config)
613
{
614
unsigned int evnt = p4_config_unpack_event(config);
615
struct p4_event_bind *bind = NULL;
616
617
if (evnt < ARRAY_SIZE(p4_event_bind_map))
618
bind = &p4_event_bind_map[evnt];
619
620
return bind;
621
}
622
623
static u64 p4_pmu_event_map(int hw_event)
624
{
625
struct p4_event_bind *bind;
626
unsigned int esel;
627
u64 config;
628
629
config = p4_general_events[hw_event];
630
bind = p4_config_get_bind(config);
631
esel = P4_OPCODE_ESEL(bind->opcode);
632
config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel));
633
634
return config;
635
}
636
637
/* check cpu model specifics */
638
static bool p4_event_match_cpu_model(unsigned int event_idx)
639
{
640
/* INSTR_COMPLETED event only exist for model 3, 4, 6 (Prescott) */
641
if (event_idx == P4_EVENT_INSTR_COMPLETED) {
642
if (boot_cpu_data.x86_model != 3 &&
643
boot_cpu_data.x86_model != 4 &&
644
boot_cpu_data.x86_model != 6)
645
return false;
646
}
647
648
/*
649
* For info
650
* - IQ_ESCR0, IQ_ESCR1 only for models 1 and 2
651
*/
652
653
return true;
654
}
655
656
static int p4_validate_raw_event(struct perf_event *event)
657
{
658
unsigned int v, emask;
659
660
/* User data may have out-of-bound event index */
661
v = p4_config_unpack_event(event->attr.config);
662
if (v >= ARRAY_SIZE(p4_event_bind_map))
663
return -EINVAL;
664
665
/* It may be unsupported: */
666
if (!p4_event_match_cpu_model(v))
667
return -EINVAL;
668
669
/*
670
* NOTE: P4_CCCR_THREAD_ANY has not the same meaning as
671
* in Architectural Performance Monitoring, it means not
672
* on _which_ logical cpu to count but rather _when_, ie it
673
* depends on logical cpu state -- count event if one cpu active,
674
* none, both or any, so we just allow user to pass any value
675
* desired.
676
*
677
* In turn we always set Tx_OS/Tx_USR bits bound to logical
678
* cpu without their propagation to another cpu
679
*/
680
681
/*
682
* if an event is shared across the logical threads
683
* the user needs special permissions to be able to use it
684
*/
685
if (p4_ht_active() && p4_event_bind_map[v].shared) {
686
if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
687
return -EACCES;
688
}
689
690
/* ESCR EventMask bits may be invalid */
691
emask = p4_config_unpack_escr(event->attr.config) & P4_ESCR_EVENTMASK_MASK;
692
if (emask & ~p4_event_bind_map[v].escr_emask)
693
return -EINVAL;
694
695
/*
696
* it may have some invalid PEBS bits
697
*/
698
if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE))
699
return -EINVAL;
700
701
v = p4_config_unpack_metric(event->attr.config);
702
if (v >= ARRAY_SIZE(p4_pebs_bind_map))
703
return -EINVAL;
704
705
return 0;
706
}
707
708
static int p4_hw_config(struct perf_event *event)
709
{
710
int cpu = get_cpu();
711
int rc = 0;
712
u32 escr, cccr;
713
714
/*
715
* the reason we use cpu that early is that: if we get scheduled
716
* first time on the same cpu -- we will not need swap thread
717
* specific flags in config (and will save some cpu cycles)
718
*/
719
720
cccr = p4_default_cccr_conf(cpu);
721
escr = p4_default_escr_conf(cpu, event->attr.exclude_kernel,
722
event->attr.exclude_user);
723
event->hw.config = p4_config_pack_escr(escr) |
724
p4_config_pack_cccr(cccr);
725
726
if (p4_ht_active() && p4_ht_thread(cpu))
727
event->hw.config = p4_set_ht_bit(event->hw.config);
728
729
if (event->attr.type == PERF_TYPE_RAW) {
730
struct p4_event_bind *bind;
731
unsigned int esel;
732
/*
733
* Clear bits we reserve to be managed by kernel itself
734
* and never allowed from a user space
735
*/
736
event->attr.config &= P4_CONFIG_MASK;
737
738
rc = p4_validate_raw_event(event);
739
if (rc)
740
goto out;
741
742
/*
743
* Note that for RAW events we allow user to use P4_CCCR_RESERVED
744
* bits since we keep additional info here (for cache events and etc)
745
*/
746
event->hw.config |= event->attr.config;
747
bind = p4_config_get_bind(event->attr.config);
748
if (!bind) {
749
rc = -EINVAL;
750
goto out;
751
}
752
esel = P4_OPCODE_ESEL(bind->opcode);
753
event->hw.config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel));
754
}
755
756
rc = x86_setup_perfctr(event);
757
out:
758
put_cpu();
759
return rc;
760
}
761
762
static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
763
{
764
u64 v;
765
766
/* an official way for overflow indication */
767
rdmsrl(hwc->config_base, v);
768
if (v & P4_CCCR_OVF) {
769
wrmsrl(hwc->config_base, v & ~P4_CCCR_OVF);
770
return 1;
771
}
772
773
/*
774
* In some circumstances the overflow might issue an NMI but did
775
* not set P4_CCCR_OVF bit. Because a counter holds a negative value
776
* we simply check for high bit being set, if it's cleared it means
777
* the counter has reached zero value and continued counting before
778
* real NMI signal was received:
779
*/
780
rdmsrl(hwc->event_base, v);
781
if (!(v & ARCH_P4_UNFLAGGED_BIT))
782
return 1;
783
784
return 0;
785
}
786
787
static void p4_pmu_disable_pebs(void)
788
{
789
/*
790
* FIXME
791
*
792
* It's still allowed that two threads setup same cache
793
* events so we can't simply clear metrics until we knew
794
* no one is depending on us, so we need kind of counter
795
* for "ReplayEvent" users.
796
*
797
* What is more complex -- RAW events, if user (for some
798
* reason) will pass some cache event metric with improper
799
* event opcode -- it's fine from hardware point of view
800
* but completely nonsense from "meaning" of such action.
801
*
802
* So at moment let leave metrics turned on forever -- it's
803
* ok for now but need to be revisited!
804
*
805
* (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)0);
806
* (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)0);
807
*/
808
}
809
810
static inline void p4_pmu_disable_event(struct perf_event *event)
811
{
812
struct hw_perf_event *hwc = &event->hw;
813
814
/*
815
* If event gets disabled while counter is in overflowed
816
* state we need to clear P4_CCCR_OVF, otherwise interrupt get
817
* asserted again and again
818
*/
819
(void)checking_wrmsrl(hwc->config_base,
820
(u64)(p4_config_unpack_cccr(hwc->config)) &
821
~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED);
822
}
823
824
static void p4_pmu_disable_all(void)
825
{
826
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
827
int idx;
828
829
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
830
struct perf_event *event = cpuc->events[idx];
831
if (!test_bit(idx, cpuc->active_mask))
832
continue;
833
p4_pmu_disable_event(event);
834
}
835
836
p4_pmu_disable_pebs();
837
}
838
839
/* configuration must be valid */
840
static void p4_pmu_enable_pebs(u64 config)
841
{
842
struct p4_pebs_bind *bind;
843
unsigned int idx;
844
845
BUILD_BUG_ON(P4_PEBS_METRIC__max > P4_PEBS_CONFIG_METRIC_MASK);
846
847
idx = p4_config_unpack_metric(config);
848
if (idx == P4_PEBS_METRIC__none)
849
return;
850
851
bind = &p4_pebs_bind_map[idx];
852
853
(void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind->metric_pebs);
854
(void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind->metric_vert);
855
}
856
857
static void p4_pmu_enable_event(struct perf_event *event)
858
{
859
struct hw_perf_event *hwc = &event->hw;
860
int thread = p4_ht_config_thread(hwc->config);
861
u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config));
862
unsigned int idx = p4_config_unpack_event(hwc->config);
863
struct p4_event_bind *bind;
864
u64 escr_addr, cccr;
865
866
bind = &p4_event_bind_map[idx];
867
escr_addr = (u64)bind->escr_msr[thread];
868
869
/*
870
* - we dont support cascaded counters yet
871
* - and counter 1 is broken (erratum)
872
*/
873
WARN_ON_ONCE(p4_is_event_cascaded(hwc->config));
874
WARN_ON_ONCE(hwc->idx == 1);
875
876
/* we need a real Event value */
877
escr_conf &= ~P4_ESCR_EVENT_MASK;
878
escr_conf |= P4_ESCR_EVENT(P4_OPCODE_EVNT(bind->opcode));
879
880
cccr = p4_config_unpack_cccr(hwc->config);
881
882
/*
883
* it could be Cache event so we need to write metrics
884
* into additional MSRs
885
*/
886
p4_pmu_enable_pebs(hwc->config);
887
888
(void)checking_wrmsrl(escr_addr, escr_conf);
889
(void)checking_wrmsrl(hwc->config_base,
890
(cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE);
891
}
892
893
static void p4_pmu_enable_all(int added)
894
{
895
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
896
int idx;
897
898
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
899
struct perf_event *event = cpuc->events[idx];
900
if (!test_bit(idx, cpuc->active_mask))
901
continue;
902
p4_pmu_enable_event(event);
903
}
904
}
905
906
static int p4_pmu_handle_irq(struct pt_regs *regs)
907
{
908
struct perf_sample_data data;
909
struct cpu_hw_events *cpuc;
910
struct perf_event *event;
911
struct hw_perf_event *hwc;
912
int idx, handled = 0;
913
u64 val;
914
915
perf_sample_data_init(&data, 0);
916
917
cpuc = &__get_cpu_var(cpu_hw_events);
918
919
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
920
int overflow;
921
922
if (!test_bit(idx, cpuc->active_mask)) {
923
/* catch in-flight IRQs */
924
if (__test_and_clear_bit(idx, cpuc->running))
925
handled++;
926
continue;
927
}
928
929
event = cpuc->events[idx];
930
hwc = &event->hw;
931
932
WARN_ON_ONCE(hwc->idx != idx);
933
934
/* it might be unflagged overflow */
935
overflow = p4_pmu_clear_cccr_ovf(hwc);
936
937
val = x86_perf_event_update(event);
938
if (!overflow && (val & (1ULL << (x86_pmu.cntval_bits - 1))))
939
continue;
940
941
handled += overflow;
942
943
/* event overflow for sure */
944
data.period = event->hw.last_period;
945
946
if (!x86_perf_event_set_period(event))
947
continue;
948
if (perf_event_overflow(event, 1, &data, regs))
949
x86_pmu_stop(event, 0);
950
}
951
952
if (handled)
953
inc_irq_stat(apic_perf_irqs);
954
955
/*
956
* When dealing with the unmasking of the LVTPC on P4 perf hw, it has
957
* been observed that the OVF bit flag has to be cleared first _before_
958
* the LVTPC can be unmasked.
959
*
960
* The reason is the NMI line will continue to be asserted while the OVF
961
* bit is set. This causes a second NMI to generate if the LVTPC is
962
* unmasked before the OVF bit is cleared, leading to unknown NMI
963
* messages.
964
*/
965
apic_write(APIC_LVTPC, APIC_DM_NMI);
966
967
return handled;
968
}
969
970
/*
971
* swap thread specific fields according to a thread
972
* we are going to run on
973
*/
974
static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu)
975
{
976
u32 escr, cccr;
977
978
/*
979
* we either lucky and continue on same cpu or no HT support
980
*/
981
if (!p4_should_swap_ts(hwc->config, cpu))
982
return;
983
984
/*
985
* the event is migrated from an another logical
986
* cpu, so we need to swap thread specific flags
987
*/
988
989
escr = p4_config_unpack_escr(hwc->config);
990
cccr = p4_config_unpack_cccr(hwc->config);
991
992
if (p4_ht_thread(cpu)) {
993
cccr &= ~P4_CCCR_OVF_PMI_T0;
994
cccr |= P4_CCCR_OVF_PMI_T1;
995
if (escr & P4_ESCR_T0_OS) {
996
escr &= ~P4_ESCR_T0_OS;
997
escr |= P4_ESCR_T1_OS;
998
}
999
if (escr & P4_ESCR_T0_USR) {
1000
escr &= ~P4_ESCR_T0_USR;
1001
escr |= P4_ESCR_T1_USR;
1002
}
1003
hwc->config = p4_config_pack_escr(escr);
1004
hwc->config |= p4_config_pack_cccr(cccr);
1005
hwc->config |= P4_CONFIG_HT;
1006
} else {
1007
cccr &= ~P4_CCCR_OVF_PMI_T1;
1008
cccr |= P4_CCCR_OVF_PMI_T0;
1009
if (escr & P4_ESCR_T1_OS) {
1010
escr &= ~P4_ESCR_T1_OS;
1011
escr |= P4_ESCR_T0_OS;
1012
}
1013
if (escr & P4_ESCR_T1_USR) {
1014
escr &= ~P4_ESCR_T1_USR;
1015
escr |= P4_ESCR_T0_USR;
1016
}
1017
hwc->config = p4_config_pack_escr(escr);
1018
hwc->config |= p4_config_pack_cccr(cccr);
1019
hwc->config &= ~P4_CONFIG_HT;
1020
}
1021
}
1022
1023
/*
1024
* ESCR address hashing is tricky, ESCRs are not sequential
1025
* in memory but all starts from MSR_P4_BSU_ESCR0 (0x03a0) and
1026
* the metric between any ESCRs is laid in range [0xa0,0xe1]
1027
*
1028
* so we make ~70% filled hashtable
1029
*/
1030
1031
#define P4_ESCR_MSR_BASE 0x000003a0
1032
#define P4_ESCR_MSR_MAX 0x000003e1
1033
#define P4_ESCR_MSR_TABLE_SIZE (P4_ESCR_MSR_MAX - P4_ESCR_MSR_BASE + 1)
1034
#define P4_ESCR_MSR_IDX(msr) (msr - P4_ESCR_MSR_BASE)
1035
#define P4_ESCR_MSR_TABLE_ENTRY(msr) [P4_ESCR_MSR_IDX(msr)] = msr
1036
1037
static const unsigned int p4_escr_table[P4_ESCR_MSR_TABLE_SIZE] = {
1038
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR0),
1039
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR1),
1040
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR0),
1041
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR1),
1042
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR0),
1043
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR1),
1044
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR0),
1045
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR1),
1046
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR2),
1047
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR3),
1048
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR4),
1049
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR5),
1050
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR0),
1051
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR1),
1052
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR0),
1053
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR1),
1054
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR0),
1055
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR1),
1056
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR0),
1057
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR1),
1058
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR0),
1059
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR1),
1060
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR0),
1061
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR1),
1062
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR0),
1063
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR1),
1064
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR0),
1065
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR1),
1066
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR0),
1067
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR1),
1068
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR0),
1069
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR1),
1070
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR0),
1071
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR1),
1072
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR0),
1073
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR1),
1074
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR0),
1075
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR1),
1076
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR0),
1077
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR1),
1078
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR0),
1079
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR1),
1080
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR0),
1081
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR1),
1082
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR0),
1083
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR1),
1084
};
1085
1086
static int p4_get_escr_idx(unsigned int addr)
1087
{
1088
unsigned int idx = P4_ESCR_MSR_IDX(addr);
1089
1090
if (unlikely(idx >= P4_ESCR_MSR_TABLE_SIZE ||
1091
!p4_escr_table[idx] ||
1092
p4_escr_table[idx] != addr)) {
1093
WARN_ONCE(1, "P4 PMU: Wrong address passed: %x\n", addr);
1094
return -1;
1095
}
1096
1097
return idx;
1098
}
1099
1100
static int p4_next_cntr(int thread, unsigned long *used_mask,
1101
struct p4_event_bind *bind)
1102
{
1103
int i, j;
1104
1105
for (i = 0; i < P4_CNTR_LIMIT; i++) {
1106
j = bind->cntr[thread][i];
1107
if (j != -1 && !test_bit(j, used_mask))
1108
return j;
1109
}
1110
1111
return -1;
1112
}
1113
1114
static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
1115
{
1116
unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
1117
unsigned long escr_mask[BITS_TO_LONGS(P4_ESCR_MSR_TABLE_SIZE)];
1118
int cpu = smp_processor_id();
1119
struct hw_perf_event *hwc;
1120
struct p4_event_bind *bind;
1121
unsigned int i, thread, num;
1122
int cntr_idx, escr_idx;
1123
1124
bitmap_zero(used_mask, X86_PMC_IDX_MAX);
1125
bitmap_zero(escr_mask, P4_ESCR_MSR_TABLE_SIZE);
1126
1127
for (i = 0, num = n; i < n; i++, num--) {
1128
1129
hwc = &cpuc->event_list[i]->hw;
1130
thread = p4_ht_thread(cpu);
1131
bind = p4_config_get_bind(hwc->config);
1132
escr_idx = p4_get_escr_idx(bind->escr_msr[thread]);
1133
if (unlikely(escr_idx == -1))
1134
goto done;
1135
1136
if (hwc->idx != -1 && !p4_should_swap_ts(hwc->config, cpu)) {
1137
cntr_idx = hwc->idx;
1138
if (assign)
1139
assign[i] = hwc->idx;
1140
goto reserve;
1141
}
1142
1143
cntr_idx = p4_next_cntr(thread, used_mask, bind);
1144
if (cntr_idx == -1 || test_bit(escr_idx, escr_mask))
1145
goto done;
1146
1147
p4_pmu_swap_config_ts(hwc, cpu);
1148
if (assign)
1149
assign[i] = cntr_idx;
1150
reserve:
1151
set_bit(cntr_idx, used_mask);
1152
set_bit(escr_idx, escr_mask);
1153
}
1154
1155
done:
1156
return num ? -ENOSPC : 0;
1157
}
1158
1159
static __initconst const struct x86_pmu p4_pmu = {
1160
.name = "Netburst P4/Xeon",
1161
.handle_irq = p4_pmu_handle_irq,
1162
.disable_all = p4_pmu_disable_all,
1163
.enable_all = p4_pmu_enable_all,
1164
.enable = p4_pmu_enable_event,
1165
.disable = p4_pmu_disable_event,
1166
.eventsel = MSR_P4_BPU_CCCR0,
1167
.perfctr = MSR_P4_BPU_PERFCTR0,
1168
.event_map = p4_pmu_event_map,
1169
.max_events = ARRAY_SIZE(p4_general_events),
1170
.get_event_constraints = x86_get_event_constraints,
1171
/*
1172
* IF HT disabled we may need to use all
1173
* ARCH_P4_MAX_CCCR counters simulaneously
1174
* though leave it restricted at moment assuming
1175
* HT is on
1176
*/
1177
.num_counters = ARCH_P4_MAX_CCCR,
1178
.apic = 1,
1179
.cntval_bits = ARCH_P4_CNTRVAL_BITS,
1180
.cntval_mask = ARCH_P4_CNTRVAL_MASK,
1181
.max_period = (1ULL << (ARCH_P4_CNTRVAL_BITS - 1)) - 1,
1182
.hw_config = p4_hw_config,
1183
.schedule_events = p4_pmu_schedule_events,
1184
/*
1185
* This handles erratum N15 in intel doc 249199-029,
1186
* the counter may not be updated correctly on write
1187
* so we need a second write operation to do the trick
1188
* (the official workaround didn't work)
1189
*
1190
* the former idea is taken from OProfile code
1191
*/
1192
.perfctr_second_write = 1,
1193
};
1194
1195
static __init int p4_pmu_init(void)
1196
{
1197
unsigned int low, high;
1198
1199
/* If we get stripped -- indexing fails */
1200
BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC);
1201
1202
rdmsr(MSR_IA32_MISC_ENABLE, low, high);
1203
if (!(low & (1 << 7))) {
1204
pr_cont("unsupported Netburst CPU model %d ",
1205
boot_cpu_data.x86_model);
1206
return -ENODEV;
1207
}
1208
1209
memcpy(hw_cache_event_ids, p4_hw_cache_event_ids,
1210
sizeof(hw_cache_event_ids));
1211
1212
pr_cont("Netburst events, ");
1213
1214
x86_pmu = p4_pmu;
1215
1216
return 0;
1217
}
1218
1219
#endif /* CONFIG_CPU_SUP_INTEL */
1220
1221