Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/events/intel/p4.c
26481 views
1
/*
2
* Netburst Performance Events (P4, old Xeon)
3
*
4
* Copyright (C) 2010 Parallels, Inc., Cyrill Gorcunov <[email protected]>
5
* Copyright (C) 2010 Intel Corporation, Lin Ming <[email protected]>
6
*
7
* For licencing details see kernel-base/COPYING
8
*/
9
10
#include <linux/perf_event.h>
11
12
#include <asm/perf_event_p4.h>
13
#include <asm/cpu_device_id.h>
14
#include <asm/hardirq.h>
15
#include <asm/apic.h>
16
#include <asm/msr.h>
17
18
#include "../perf_event.h"
19
20
#define P4_CNTR_LIMIT 3
21
/*
22
* array indices: 0,1 - HT threads, used with HT enabled cpu
23
*/
24
struct p4_event_bind {
25
unsigned int opcode; /* Event code and ESCR selector */
26
unsigned int escr_msr[2]; /* ESCR MSR for this event */
27
unsigned int escr_emask; /* valid ESCR EventMask bits */
28
unsigned int shared; /* event is shared across threads */
29
signed char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on absence */
30
};
31
32
struct p4_pebs_bind {
33
unsigned int metric_pebs;
34
unsigned int metric_vert;
35
};
36
37
/* it sets P4_PEBS_ENABLE_UOP_TAG as well */
38
#define P4_GEN_PEBS_BIND(name, pebs, vert) \
39
[P4_PEBS_METRIC__##name] = { \
40
.metric_pebs = pebs | P4_PEBS_ENABLE_UOP_TAG, \
41
.metric_vert = vert, \
42
}
43
44
/*
45
* note we have P4_PEBS_ENABLE_UOP_TAG always set here
46
*
47
* it's needed for mapping P4_PEBS_CONFIG_METRIC_MASK bits of
48
* event configuration to find out which values are to be
49
* written into MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT
50
* registers
51
*/
52
static struct p4_pebs_bind p4_pebs_bind_map[] = {
53
P4_GEN_PEBS_BIND(1stl_cache_load_miss_retired, 0x0000001, 0x0000001),
54
P4_GEN_PEBS_BIND(2ndl_cache_load_miss_retired, 0x0000002, 0x0000001),
55
P4_GEN_PEBS_BIND(dtlb_load_miss_retired, 0x0000004, 0x0000001),
56
P4_GEN_PEBS_BIND(dtlb_store_miss_retired, 0x0000004, 0x0000002),
57
P4_GEN_PEBS_BIND(dtlb_all_miss_retired, 0x0000004, 0x0000003),
58
P4_GEN_PEBS_BIND(tagged_mispred_branch, 0x0018000, 0x0000010),
59
P4_GEN_PEBS_BIND(mob_load_replay_retired, 0x0000200, 0x0000001),
60
P4_GEN_PEBS_BIND(split_load_retired, 0x0000400, 0x0000001),
61
P4_GEN_PEBS_BIND(split_store_retired, 0x0000400, 0x0000002),
62
};
63
64
/*
65
* Note that we don't use CCCR1 here, there is an
66
* exception for P4_BSQ_ALLOCATION but we just have
67
* no workaround
68
*
69
* consider this binding as resources which particular
70
* event may borrow, it doesn't contain EventMask,
71
* Tags and friends -- they are left to a caller
72
*/
73
static struct p4_event_bind p4_event_bind_map[] = {
74
[P4_EVENT_TC_DELIVER_MODE] = {
75
.opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE),
76
.escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
77
.escr_emask =
78
P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD) |
79
P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DB) |
80
P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DI) |
81
P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BD) |
82
P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BB) |
83
P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BI) |
84
P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, ID),
85
.shared = 1,
86
.cntr = { {4, 5, -1}, {6, 7, -1} },
87
},
88
[P4_EVENT_BPU_FETCH_REQUEST] = {
89
.opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST),
90
.escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 },
91
.escr_emask =
92
P4_ESCR_EMASK_BIT(P4_EVENT_BPU_FETCH_REQUEST, TCMISS),
93
.cntr = { {0, -1, -1}, {2, -1, -1} },
94
},
95
[P4_EVENT_ITLB_REFERENCE] = {
96
.opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE),
97
.escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 },
98
.escr_emask =
99
P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT) |
100
P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, MISS) |
101
P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT_UK),
102
.cntr = { {0, -1, -1}, {2, -1, -1} },
103
},
104
[P4_EVENT_MEMORY_CANCEL] = {
105
.opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL),
106
.escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
107
.escr_emask =
108
P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, ST_RB_FULL) |
109
P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, 64K_CONF),
110
.cntr = { {8, 9, -1}, {10, 11, -1} },
111
},
112
[P4_EVENT_MEMORY_COMPLETE] = {
113
.opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE),
114
.escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
115
.escr_emask =
116
P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, LSC) |
117
P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, SSC),
118
.cntr = { {8, 9, -1}, {10, 11, -1} },
119
},
120
[P4_EVENT_LOAD_PORT_REPLAY] = {
121
.opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY),
122
.escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 },
123
.escr_emask =
124
P4_ESCR_EMASK_BIT(P4_EVENT_LOAD_PORT_REPLAY, SPLIT_LD),
125
.cntr = { {8, 9, -1}, {10, 11, -1} },
126
},
127
[P4_EVENT_STORE_PORT_REPLAY] = {
128
.opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY),
129
.escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
130
.escr_emask =
131
P4_ESCR_EMASK_BIT(P4_EVENT_STORE_PORT_REPLAY, SPLIT_ST),
132
.cntr = { {8, 9, -1}, {10, 11, -1} },
133
},
134
[P4_EVENT_MOB_LOAD_REPLAY] = {
135
.opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY),
136
.escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 },
137
.escr_emask =
138
P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STA) |
139
P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STD) |
140
P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, PARTIAL_DATA) |
141
P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, UNALGN_ADDR),
142
.cntr = { {0, -1, -1}, {2, -1, -1} },
143
},
144
[P4_EVENT_PAGE_WALK_TYPE] = {
145
.opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE),
146
.escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 },
147
.escr_emask =
148
P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, DTMISS) |
149
P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, ITMISS),
150
.shared = 1,
151
.cntr = { {0, -1, -1}, {2, -1, -1} },
152
},
153
[P4_EVENT_BSQ_CACHE_REFERENCE] = {
154
.opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE),
155
.escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 },
156
.escr_emask =
157
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) |
158
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) |
159
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) |
160
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) |
161
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) |
162
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM) |
163
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) |
164
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) |
165
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS),
166
.cntr = { {0, -1, -1}, {2, -1, -1} },
167
},
168
[P4_EVENT_IOQ_ALLOCATION] = {
169
.opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION),
170
.escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
171
.escr_emask =
172
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, DEFAULT) |
173
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_READ) |
174
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_WRITE) |
175
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_UC) |
176
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WC) |
177
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WT) |
178
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WP) |
179
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WB) |
180
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OWN) |
181
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OTHER) |
182
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, PREFETCH),
183
.cntr = { {0, -1, -1}, {2, -1, -1} },
184
},
185
[P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */
186
.opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES),
187
.escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 },
188
.escr_emask =
189
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, DEFAULT) |
190
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_READ) |
191
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_WRITE) |
192
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_UC) |
193
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WC) |
194
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WT) |
195
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WP) |
196
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WB) |
197
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OWN) |
198
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OTHER) |
199
P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, PREFETCH),
200
.cntr = { {2, -1, -1}, {3, -1, -1} },
201
},
202
[P4_EVENT_FSB_DATA_ACTIVITY] = {
203
.opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY),
204
.escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
205
.escr_emask =
206
P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) |
207
P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN) |
208
P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OTHER) |
209
P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_DRV) |
210
P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OWN) |
211
P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OTHER),
212
.shared = 1,
213
.cntr = { {0, -1, -1}, {2, -1, -1} },
214
},
215
[P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */
216
.opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION),
217
.escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 },
218
.escr_emask =
219
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE0) |
220
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE1) |
221
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN0) |
222
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN1) |
223
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_IO_TYPE) |
224
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LOCK_TYPE) |
225
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_CACHE_TYPE) |
226
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_SPLIT_TYPE) |
227
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_DEM_TYPE) |
228
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_ORD_TYPE) |
229
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE0) |
230
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE1) |
231
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE2),
232
.cntr = { {0, -1, -1}, {1, -1, -1} },
233
},
234
[P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */
235
.opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES),
236
.escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 },
237
.escr_emask =
238
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE0) |
239
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE1) |
240
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN0) |
241
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN1) |
242
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE) |
243
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE) |
244
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE) |
245
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE) |
246
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE) |
247
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE) |
248
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE0) |
249
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE1) |
250
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE2),
251
.cntr = { {2, -1, -1}, {3, -1, -1} },
252
},
253
[P4_EVENT_SSE_INPUT_ASSIST] = {
254
.opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST),
255
.escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
256
.escr_emask =
257
P4_ESCR_EMASK_BIT(P4_EVENT_SSE_INPUT_ASSIST, ALL),
258
.shared = 1,
259
.cntr = { {8, 9, -1}, {10, 11, -1} },
260
},
261
[P4_EVENT_PACKED_SP_UOP] = {
262
.opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP),
263
.escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
264
.escr_emask =
265
P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_SP_UOP, ALL),
266
.shared = 1,
267
.cntr = { {8, 9, -1}, {10, 11, -1} },
268
},
269
[P4_EVENT_PACKED_DP_UOP] = {
270
.opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP),
271
.escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
272
.escr_emask =
273
P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_DP_UOP, ALL),
274
.shared = 1,
275
.cntr = { {8, 9, -1}, {10, 11, -1} },
276
},
277
[P4_EVENT_SCALAR_SP_UOP] = {
278
.opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP),
279
.escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
280
.escr_emask =
281
P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_SP_UOP, ALL),
282
.shared = 1,
283
.cntr = { {8, 9, -1}, {10, 11, -1} },
284
},
285
[P4_EVENT_SCALAR_DP_UOP] = {
286
.opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP),
287
.escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
288
.escr_emask =
289
P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_DP_UOP, ALL),
290
.shared = 1,
291
.cntr = { {8, 9, -1}, {10, 11, -1} },
292
},
293
[P4_EVENT_64BIT_MMX_UOP] = {
294
.opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP),
295
.escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
296
.escr_emask =
297
P4_ESCR_EMASK_BIT(P4_EVENT_64BIT_MMX_UOP, ALL),
298
.shared = 1,
299
.cntr = { {8, 9, -1}, {10, 11, -1} },
300
},
301
[P4_EVENT_128BIT_MMX_UOP] = {
302
.opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP),
303
.escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
304
.escr_emask =
305
P4_ESCR_EMASK_BIT(P4_EVENT_128BIT_MMX_UOP, ALL),
306
.shared = 1,
307
.cntr = { {8, 9, -1}, {10, 11, -1} },
308
},
309
[P4_EVENT_X87_FP_UOP] = {
310
.opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP),
311
.escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
312
.escr_emask =
313
P4_ESCR_EMASK_BIT(P4_EVENT_X87_FP_UOP, ALL),
314
.shared = 1,
315
.cntr = { {8, 9, -1}, {10, 11, -1} },
316
},
317
[P4_EVENT_TC_MISC] = {
318
.opcode = P4_OPCODE(P4_EVENT_TC_MISC),
319
.escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
320
.escr_emask =
321
P4_ESCR_EMASK_BIT(P4_EVENT_TC_MISC, FLUSH),
322
.cntr = { {4, 5, -1}, {6, 7, -1} },
323
},
324
[P4_EVENT_GLOBAL_POWER_EVENTS] = {
325
.opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS),
326
.escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
327
.escr_emask =
328
P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING),
329
.cntr = { {0, -1, -1}, {2, -1, -1} },
330
},
331
[P4_EVENT_TC_MS_XFER] = {
332
.opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER),
333
.escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
334
.escr_emask =
335
P4_ESCR_EMASK_BIT(P4_EVENT_TC_MS_XFER, CISC),
336
.cntr = { {4, 5, -1}, {6, 7, -1} },
337
},
338
[P4_EVENT_UOP_QUEUE_WRITES] = {
339
.opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES),
340
.escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
341
.escr_emask =
342
P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_BUILD) |
343
P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_DELIVER) |
344
P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_ROM),
345
.cntr = { {4, 5, -1}, {6, 7, -1} },
346
},
347
[P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = {
348
.opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE),
349
.escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 },
350
.escr_emask =
351
P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL) |
352
P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CALL) |
353
P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, RETURN) |
354
P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT),
355
.cntr = { {4, 5, -1}, {6, 7, -1} },
356
},
357
[P4_EVENT_RETIRED_BRANCH_TYPE] = {
358
.opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE),
359
.escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 },
360
.escr_emask =
361
P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) |
362
P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) |
363
P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) |
364
P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT),
365
.cntr = { {4, 5, -1}, {6, 7, -1} },
366
},
367
[P4_EVENT_RESOURCE_STALL] = {
368
.opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL),
369
.escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 },
370
.escr_emask =
371
P4_ESCR_EMASK_BIT(P4_EVENT_RESOURCE_STALL, SBFULL),
372
.cntr = { {12, 13, 16}, {14, 15, 17} },
373
},
374
[P4_EVENT_WC_BUFFER] = {
375
.opcode = P4_OPCODE(P4_EVENT_WC_BUFFER),
376
.escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
377
.escr_emask =
378
P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_EVICTS) |
379
P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_FULL_EVICTS),
380
.shared = 1,
381
.cntr = { {8, 9, -1}, {10, 11, -1} },
382
},
383
[P4_EVENT_B2B_CYCLES] = {
384
.opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES),
385
.escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
386
.escr_emask = 0,
387
.cntr = { {0, -1, -1}, {2, -1, -1} },
388
},
389
[P4_EVENT_BNR] = {
390
.opcode = P4_OPCODE(P4_EVENT_BNR),
391
.escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
392
.escr_emask = 0,
393
.cntr = { {0, -1, -1}, {2, -1, -1} },
394
},
395
[P4_EVENT_SNOOP] = {
396
.opcode = P4_OPCODE(P4_EVENT_SNOOP),
397
.escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
398
.escr_emask = 0,
399
.cntr = { {0, -1, -1}, {2, -1, -1} },
400
},
401
[P4_EVENT_RESPONSE] = {
402
.opcode = P4_OPCODE(P4_EVENT_RESPONSE),
403
.escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
404
.escr_emask = 0,
405
.cntr = { {0, -1, -1}, {2, -1, -1} },
406
},
407
[P4_EVENT_FRONT_END_EVENT] = {
408
.opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT),
409
.escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
410
.escr_emask =
411
P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, NBOGUS) |
412
P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, BOGUS),
413
.cntr = { {12, 13, 16}, {14, 15, 17} },
414
},
415
[P4_EVENT_EXECUTION_EVENT] = {
416
.opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT),
417
.escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
418
.escr_emask =
419
P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0) |
420
P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1) |
421
P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2) |
422
P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3) |
423
P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) |
424
P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) |
425
P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) |
426
P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3),
427
.cntr = { {12, 13, 16}, {14, 15, 17} },
428
},
429
[P4_EVENT_REPLAY_EVENT] = {
430
.opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT),
431
.escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
432
.escr_emask =
433
P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, NBOGUS) |
434
P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, BOGUS),
435
.cntr = { {12, 13, 16}, {14, 15, 17} },
436
},
437
[P4_EVENT_INSTR_RETIRED] = {
438
.opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED),
439
.escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
440
.escr_emask =
441
P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) |
442
P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSTAG) |
443
P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG) |
444
P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSTAG),
445
.cntr = { {12, 13, 16}, {14, 15, 17} },
446
},
447
[P4_EVENT_UOPS_RETIRED] = {
448
.opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED),
449
.escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
450
.escr_emask =
451
P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, NBOGUS) |
452
P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, BOGUS),
453
.cntr = { {12, 13, 16}, {14, 15, 17} },
454
},
455
[P4_EVENT_UOP_TYPE] = {
456
.opcode = P4_OPCODE(P4_EVENT_UOP_TYPE),
457
.escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 },
458
.escr_emask =
459
P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGLOADS) |
460
P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGSTORES),
461
.cntr = { {12, 13, 16}, {14, 15, 17} },
462
},
463
[P4_EVENT_BRANCH_RETIRED] = {
464
.opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED),
465
.escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
466
.escr_emask =
467
P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNP) |
468
P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNM) |
469
P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTP) |
470
P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTM),
471
.cntr = { {12, 13, 16}, {14, 15, 17} },
472
},
473
[P4_EVENT_MISPRED_BRANCH_RETIRED] = {
474
.opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED),
475
.escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
476
.escr_emask =
477
P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS),
478
.cntr = { {12, 13, 16}, {14, 15, 17} },
479
},
480
[P4_EVENT_X87_ASSIST] = {
481
.opcode = P4_OPCODE(P4_EVENT_X87_ASSIST),
482
.escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
483
.escr_emask =
484
P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSU) |
485
P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSO) |
486
P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAO) |
487
P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAU) |
488
P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, PREA),
489
.cntr = { {12, 13, 16}, {14, 15, 17} },
490
},
491
[P4_EVENT_MACHINE_CLEAR] = {
492
.opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR),
493
.escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
494
.escr_emask =
495
P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, CLEAR) |
496
P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, MOCLEAR) |
497
P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, SMCLEAR),
498
.cntr = { {12, 13, 16}, {14, 15, 17} },
499
},
500
[P4_EVENT_INSTR_COMPLETED] = {
501
.opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED),
502
.escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
503
.escr_emask =
504
P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, NBOGUS) |
505
P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, BOGUS),
506
.cntr = { {12, 13, 16}, {14, 15, 17} },
507
},
508
};
509
510
#define P4_GEN_CACHE_EVENT(event, bit, metric) \
511
p4_config_pack_escr(P4_ESCR_EVENT(event) | \
512
P4_ESCR_EMASK_BIT(event, bit)) | \
513
p4_config_pack_cccr(metric | \
514
P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event))))
515
516
static __initconst const u64 p4_hw_cache_event_ids
517
[PERF_COUNT_HW_CACHE_MAX]
518
[PERF_COUNT_HW_CACHE_OP_MAX]
519
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
520
{
521
[ C(L1D ) ] = {
522
[ C(OP_READ) ] = {
523
[ C(RESULT_ACCESS) ] = 0x0,
524
[ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
525
P4_PEBS_METRIC__1stl_cache_load_miss_retired),
526
},
527
},
528
[ C(LL ) ] = {
529
[ C(OP_READ) ] = {
530
[ C(RESULT_ACCESS) ] = 0x0,
531
[ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
532
P4_PEBS_METRIC__2ndl_cache_load_miss_retired),
533
},
534
},
535
[ C(DTLB) ] = {
536
[ C(OP_READ) ] = {
537
[ C(RESULT_ACCESS) ] = 0x0,
538
[ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
539
P4_PEBS_METRIC__dtlb_load_miss_retired),
540
},
541
[ C(OP_WRITE) ] = {
542
[ C(RESULT_ACCESS) ] = 0x0,
543
[ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
544
P4_PEBS_METRIC__dtlb_store_miss_retired),
545
},
546
},
547
[ C(ITLB) ] = {
548
[ C(OP_READ) ] = {
549
[ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT,
550
P4_PEBS_METRIC__none),
551
[ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS,
552
P4_PEBS_METRIC__none),
553
},
554
[ C(OP_WRITE) ] = {
555
[ C(RESULT_ACCESS) ] = -1,
556
[ C(RESULT_MISS) ] = -1,
557
},
558
[ C(OP_PREFETCH) ] = {
559
[ C(RESULT_ACCESS) ] = -1,
560
[ C(RESULT_MISS) ] = -1,
561
},
562
},
563
[ C(NODE) ] = {
564
[ C(OP_READ) ] = {
565
[ C(RESULT_ACCESS) ] = -1,
566
[ C(RESULT_MISS) ] = -1,
567
},
568
[ C(OP_WRITE) ] = {
569
[ C(RESULT_ACCESS) ] = -1,
570
[ C(RESULT_MISS) ] = -1,
571
},
572
[ C(OP_PREFETCH) ] = {
573
[ C(RESULT_ACCESS) ] = -1,
574
[ C(RESULT_MISS) ] = -1,
575
},
576
},
577
};
578
579
/*
580
* Because of Netburst being quite restricted in how many
581
* identical events may run simultaneously, we introduce event aliases,
582
* ie the different events which have the same functionality but
583
* utilize non-intersected resources (ESCR/CCCR/counter registers).
584
*
585
* This allow us to relax restrictions a bit and run two or more
586
* identical events together.
587
*
588
* Never set any custom internal bits such as P4_CONFIG_HT,
589
* P4_CONFIG_ALIASABLE or bits for P4_PEBS_METRIC, they are
590
* either up to date automatically or not applicable at all.
591
*/
592
static struct p4_event_alias {
593
u64 original;
594
u64 alternative;
595
} p4_event_aliases[] = {
596
{
597
/*
598
* Non-halted cycles can be substituted with non-sleeping cycles (see
599
* Intel SDM Vol3b for details). We need this alias to be able
600
* to run nmi-watchdog and 'perf top' (or any other user space tool
601
* which is interested in running PERF_COUNT_HW_CPU_CYCLES)
602
* simultaneously.
603
*/
604
.original =
605
p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) |
606
P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)),
607
.alternative =
608
p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_EXECUTION_EVENT) |
609
P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0)|
610
P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1)|
611
P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2)|
612
P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3)|
613
P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) |
614
P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) |
615
P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) |
616
P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3))|
617
p4_config_pack_cccr(P4_CCCR_THRESHOLD(15) | P4_CCCR_COMPLEMENT |
618
P4_CCCR_COMPARE),
619
},
620
};
621
622
static u64 p4_get_alias_event(u64 config)
623
{
624
u64 config_match;
625
int i;
626
627
/*
628
* Only event with special mark is allowed,
629
* we're to be sure it didn't come as malformed
630
* RAW event.
631
*/
632
if (!(config & P4_CONFIG_ALIASABLE))
633
return 0;
634
635
config_match = config & P4_CONFIG_EVENT_ALIAS_MASK;
636
637
for (i = 0; i < ARRAY_SIZE(p4_event_aliases); i++) {
638
if (config_match == p4_event_aliases[i].original) {
639
config_match = p4_event_aliases[i].alternative;
640
break;
641
} else if (config_match == p4_event_aliases[i].alternative) {
642
config_match = p4_event_aliases[i].original;
643
break;
644
}
645
}
646
647
if (i >= ARRAY_SIZE(p4_event_aliases))
648
return 0;
649
650
return config_match | (config & P4_CONFIG_EVENT_ALIAS_IMMUTABLE_BITS);
651
}
652
653
static u64 p4_general_events[PERF_COUNT_HW_MAX] = {
654
/* non-halted CPU clocks */
655
[PERF_COUNT_HW_CPU_CYCLES] =
656
p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) |
657
P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)) |
658
P4_CONFIG_ALIASABLE,
659
660
/*
661
* retired instructions
662
* in a sake of simplicity we don't use the FSB tagging
663
*/
664
[PERF_COUNT_HW_INSTRUCTIONS] =
665
p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_INSTR_RETIRED) |
666
P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) |
667
P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG)),
668
669
/* cache hits */
670
[PERF_COUNT_HW_CACHE_REFERENCES] =
671
p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) |
672
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) |
673
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) |
674
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) |
675
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) |
676
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) |
677
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM)),
678
679
/* cache misses */
680
[PERF_COUNT_HW_CACHE_MISSES] =
681
p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) |
682
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) |
683
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) |
684
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS)),
685
686
/* branch instructions retired */
687
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =
688
p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_RETIRED_BRANCH_TYPE) |
689
P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) |
690
P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) |
691
P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) |
692
P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT)),
693
694
/* mispredicted branches retired */
695
[PERF_COUNT_HW_BRANCH_MISSES] =
696
p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_MISPRED_BRANCH_RETIRED) |
697
P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS)),
698
699
/* bus ready clocks (cpu is driving #DRDY_DRV\#DRDY_OWN): */
700
[PERF_COUNT_HW_BUS_CYCLES] =
701
p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_FSB_DATA_ACTIVITY) |
702
P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) |
703
P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN)) |
704
p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE),
705
};
706
707
static struct p4_event_bind *p4_config_get_bind(u64 config)
708
{
709
unsigned int evnt = p4_config_unpack_event(config);
710
struct p4_event_bind *bind = NULL;
711
712
if (evnt < ARRAY_SIZE(p4_event_bind_map))
713
bind = &p4_event_bind_map[evnt];
714
715
return bind;
716
}
717
718
static u64 p4_pmu_event_map(int hw_event)
719
{
720
struct p4_event_bind *bind;
721
unsigned int esel;
722
u64 config;
723
724
config = p4_general_events[hw_event];
725
bind = p4_config_get_bind(config);
726
esel = P4_OPCODE_ESEL(bind->opcode);
727
config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel));
728
729
return config;
730
}
731
732
/* check cpu model specifics */
733
static bool p4_event_match_cpu_model(unsigned int event_idx)
734
{
735
/* INSTR_COMPLETED event only exist for model 3, 4, 6 (Prescott) */
736
if (event_idx == P4_EVENT_INSTR_COMPLETED) {
737
if (boot_cpu_data.x86_vfm != INTEL_P4_PRESCOTT &&
738
boot_cpu_data.x86_vfm != INTEL_P4_PRESCOTT_2M &&
739
boot_cpu_data.x86_vfm != INTEL_P4_CEDARMILL)
740
return false;
741
}
742
743
/*
744
* For info
745
* - IQ_ESCR0, IQ_ESCR1 only for models 1 and 2
746
*/
747
748
return true;
749
}
750
751
static int p4_validate_raw_event(struct perf_event *event)
752
{
753
unsigned int v, emask;
754
755
/* User data may have out-of-bound event index */
756
v = p4_config_unpack_event(event->attr.config);
757
if (v >= ARRAY_SIZE(p4_event_bind_map))
758
return -EINVAL;
759
760
/* It may be unsupported: */
761
if (!p4_event_match_cpu_model(v))
762
return -EINVAL;
763
764
/*
765
* NOTE: P4_CCCR_THREAD_ANY has not the same meaning as
766
* in Architectural Performance Monitoring, it means not
767
* on _which_ logical cpu to count but rather _when_, ie it
768
* depends on logical cpu state -- count event if one cpu active,
769
* none, both or any, so we just allow user to pass any value
770
* desired.
771
*
772
* In turn we always set Tx_OS/Tx_USR bits bound to logical
773
* cpu without their propagation to another cpu
774
*/
775
776
/*
777
* if an event is shared across the logical threads
778
* the user needs special permissions to be able to use it
779
*/
780
if (p4_ht_active() && p4_event_bind_map[v].shared) {
781
v = perf_allow_cpu();
782
if (v)
783
return v;
784
}
785
786
/* ESCR EventMask bits may be invalid */
787
emask = p4_config_unpack_escr(event->attr.config) & P4_ESCR_EVENTMASK_MASK;
788
if (emask & ~p4_event_bind_map[v].escr_emask)
789
return -EINVAL;
790
791
/*
792
* it may have some invalid PEBS bits
793
*/
794
if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE))
795
return -EINVAL;
796
797
v = p4_config_unpack_metric(event->attr.config);
798
if (v >= ARRAY_SIZE(p4_pebs_bind_map))
799
return -EINVAL;
800
801
return 0;
802
}
803
804
static int p4_hw_config(struct perf_event *event)
805
{
806
int cpu = get_cpu();
807
int rc = 0;
808
u32 escr, cccr;
809
810
/*
811
* the reason we use cpu that early is that: if we get scheduled
812
* first time on the same cpu -- we will not need swap thread
813
* specific flags in config (and will save some cpu cycles)
814
*/
815
816
cccr = p4_default_cccr_conf(cpu);
817
escr = p4_default_escr_conf(cpu, event->attr.exclude_kernel,
818
event->attr.exclude_user);
819
event->hw.config = p4_config_pack_escr(escr) |
820
p4_config_pack_cccr(cccr);
821
822
if (p4_ht_active() && p4_ht_thread(cpu))
823
event->hw.config = p4_set_ht_bit(event->hw.config);
824
825
if (event->attr.type == PERF_TYPE_RAW) {
826
struct p4_event_bind *bind;
827
unsigned int esel;
828
/*
829
* Clear bits we reserve to be managed by kernel itself
830
* and never allowed from a user space
831
*/
832
event->attr.config &= P4_CONFIG_MASK;
833
834
rc = p4_validate_raw_event(event);
835
if (rc)
836
goto out;
837
838
/*
839
* Note that for RAW events we allow user to use P4_CCCR_RESERVED
840
* bits since we keep additional info here (for cache events and etc)
841
*/
842
event->hw.config |= event->attr.config;
843
bind = p4_config_get_bind(event->attr.config);
844
if (!bind) {
845
rc = -EINVAL;
846
goto out;
847
}
848
esel = P4_OPCODE_ESEL(bind->opcode);
849
event->hw.config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel));
850
}
851
852
rc = x86_setup_perfctr(event);
853
out:
854
put_cpu();
855
return rc;
856
}
857
858
static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
859
{
860
u64 v;
861
862
/* an official way for overflow indication */
863
rdmsrq(hwc->config_base, v);
864
if (v & P4_CCCR_OVF) {
865
wrmsrq(hwc->config_base, v & ~P4_CCCR_OVF);
866
return 1;
867
}
868
869
/*
870
* In some circumstances the overflow might issue an NMI but did
871
* not set P4_CCCR_OVF bit. Because a counter holds a negative value
872
* we simply check for high bit being set, if it's cleared it means
873
* the counter has reached zero value and continued counting before
874
* real NMI signal was received:
875
*/
876
rdmsrq(hwc->event_base, v);
877
if (!(v & ARCH_P4_UNFLAGGED_BIT))
878
return 1;
879
880
return 0;
881
}
882
883
static void p4_pmu_disable_pebs(void)
884
{
885
/*
886
* FIXME
887
*
888
* It's still allowed that two threads setup same cache
889
* events so we can't simply clear metrics until we knew
890
* no one is depending on us, so we need kind of counter
891
* for "ReplayEvent" users.
892
*
893
* What is more complex -- RAW events, if user (for some
894
* reason) will pass some cache event metric with improper
895
* event opcode -- it's fine from hardware point of view
896
* but completely nonsense from "meaning" of such action.
897
*
898
* So at moment let leave metrics turned on forever -- it's
899
* ok for now but need to be revisited!
900
*
901
* (void)wrmsrq_safe(MSR_IA32_PEBS_ENABLE, 0);
902
* (void)wrmsrq_safe(MSR_P4_PEBS_MATRIX_VERT, 0);
903
*/
904
}
905
906
static inline void p4_pmu_disable_event(struct perf_event *event)
907
{
908
struct hw_perf_event *hwc = &event->hw;
909
910
/*
911
* If event gets disabled while counter is in overflowed
912
* state we need to clear P4_CCCR_OVF, otherwise interrupt get
913
* asserted again and again
914
*/
915
(void)wrmsrq_safe(hwc->config_base,
916
p4_config_unpack_cccr(hwc->config) & ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED);
917
}
918
919
static void p4_pmu_disable_all(void)
920
{
921
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
922
int idx;
923
924
for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
925
struct perf_event *event = cpuc->events[idx];
926
if (!test_bit(idx, cpuc->active_mask))
927
continue;
928
p4_pmu_disable_event(event);
929
}
930
931
p4_pmu_disable_pebs();
932
}
933
934
/* configuration must be valid */
935
static void p4_pmu_enable_pebs(u64 config)
936
{
937
struct p4_pebs_bind *bind;
938
unsigned int idx;
939
940
BUILD_BUG_ON(P4_PEBS_METRIC__max > P4_PEBS_CONFIG_METRIC_MASK);
941
942
idx = p4_config_unpack_metric(config);
943
if (idx == P4_PEBS_METRIC__none)
944
return;
945
946
bind = &p4_pebs_bind_map[idx];
947
948
(void)wrmsrq_safe(MSR_IA32_PEBS_ENABLE, (u64)bind->metric_pebs);
949
(void)wrmsrq_safe(MSR_P4_PEBS_MATRIX_VERT, (u64)bind->metric_vert);
950
}
951
952
static void __p4_pmu_enable_event(struct perf_event *event)
953
{
954
struct hw_perf_event *hwc = &event->hw;
955
int thread = p4_ht_config_thread(hwc->config);
956
u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config));
957
unsigned int idx = p4_config_unpack_event(hwc->config);
958
struct p4_event_bind *bind;
959
u64 escr_addr, cccr;
960
961
bind = &p4_event_bind_map[idx];
962
escr_addr = bind->escr_msr[thread];
963
964
/*
965
* - we dont support cascaded counters yet
966
* - and counter 1 is broken (erratum)
967
*/
968
WARN_ON_ONCE(p4_is_event_cascaded(hwc->config));
969
WARN_ON_ONCE(hwc->idx == 1);
970
971
/* we need a real Event value */
972
escr_conf &= ~P4_ESCR_EVENT_MASK;
973
escr_conf |= P4_ESCR_EVENT(P4_OPCODE_EVNT(bind->opcode));
974
975
cccr = p4_config_unpack_cccr(hwc->config);
976
977
/*
978
* it could be Cache event so we need to write metrics
979
* into additional MSRs
980
*/
981
p4_pmu_enable_pebs(hwc->config);
982
983
(void)wrmsrq_safe(escr_addr, escr_conf);
984
(void)wrmsrq_safe(hwc->config_base,
985
(cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE);
986
}
987
988
static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(X86_PMC_IDX_MAX)], p4_running);
989
990
static void p4_pmu_enable_event(struct perf_event *event)
991
{
992
int idx = event->hw.idx;
993
994
__set_bit(idx, per_cpu(p4_running, smp_processor_id()));
995
__p4_pmu_enable_event(event);
996
}
997
998
static void p4_pmu_enable_all(int added)
999
{
1000
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1001
int idx;
1002
1003
for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
1004
struct perf_event *event = cpuc->events[idx];
1005
if (!test_bit(idx, cpuc->active_mask))
1006
continue;
1007
__p4_pmu_enable_event(event);
1008
}
1009
}
1010
1011
static int p4_pmu_set_period(struct perf_event *event)
1012
{
1013
struct hw_perf_event *hwc = &event->hw;
1014
s64 left = this_cpu_read(pmc_prev_left[hwc->idx]);
1015
int ret;
1016
1017
ret = x86_perf_event_set_period(event);
1018
1019
if (hwc->event_base) {
1020
/*
1021
* This handles erratum N15 in intel doc 249199-029,
1022
* the counter may not be updated correctly on write
1023
* so we need a second write operation to do the trick
1024
* (the official workaround didn't work)
1025
*
1026
* the former idea is taken from OProfile code
1027
*/
1028
wrmsrq(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
1029
}
1030
1031
return ret;
1032
}
1033
1034
static int p4_pmu_handle_irq(struct pt_regs *regs)
1035
{
1036
struct perf_sample_data data;
1037
struct cpu_hw_events *cpuc;
1038
struct perf_event *event;
1039
struct hw_perf_event *hwc;
1040
int idx, handled = 0;
1041
u64 val;
1042
1043
cpuc = this_cpu_ptr(&cpu_hw_events);
1044
1045
for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
1046
int overflow;
1047
1048
if (!test_bit(idx, cpuc->active_mask)) {
1049
/* catch in-flight IRQs */
1050
if (__test_and_clear_bit(idx, per_cpu(p4_running, smp_processor_id())))
1051
handled++;
1052
continue;
1053
}
1054
1055
event = cpuc->events[idx];
1056
hwc = &event->hw;
1057
1058
WARN_ON_ONCE(hwc->idx != idx);
1059
1060
/* it might be unflagged overflow */
1061
overflow = p4_pmu_clear_cccr_ovf(hwc);
1062
1063
val = x86_perf_event_update(event);
1064
if (!overflow && (val & (1ULL << (x86_pmu.cntval_bits - 1))))
1065
continue;
1066
1067
handled += overflow;
1068
1069
/* event overflow for sure */
1070
perf_sample_data_init(&data, 0, hwc->last_period);
1071
1072
if (!static_call(x86_pmu_set_period)(event))
1073
continue;
1074
1075
1076
perf_event_overflow(event, &data, regs);
1077
}
1078
1079
if (handled)
1080
inc_irq_stat(apic_perf_irqs);
1081
1082
/*
1083
* When dealing with the unmasking of the LVTPC on P4 perf hw, it has
1084
* been observed that the OVF bit flag has to be cleared first _before_
1085
* the LVTPC can be unmasked.
1086
*
1087
* The reason is the NMI line will continue to be asserted while the OVF
1088
* bit is set. This causes a second NMI to generate if the LVTPC is
1089
* unmasked before the OVF bit is cleared, leading to unknown NMI
1090
* messages.
1091
*/
1092
apic_write(APIC_LVTPC, APIC_DM_NMI);
1093
1094
return handled;
1095
}
1096
1097
/*
1098
* swap thread specific fields according to a thread
1099
* we are going to run on
1100
*/
1101
static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu)
1102
{
1103
u32 escr, cccr;
1104
1105
/*
1106
* we either lucky and continue on same cpu or no HT support
1107
*/
1108
if (!p4_should_swap_ts(hwc->config, cpu))
1109
return;
1110
1111
/*
1112
* the event is migrated from an another logical
1113
* cpu, so we need to swap thread specific flags
1114
*/
1115
1116
escr = p4_config_unpack_escr(hwc->config);
1117
cccr = p4_config_unpack_cccr(hwc->config);
1118
1119
if (p4_ht_thread(cpu)) {
1120
cccr &= ~P4_CCCR_OVF_PMI_T0;
1121
cccr |= P4_CCCR_OVF_PMI_T1;
1122
if (escr & P4_ESCR_T0_OS) {
1123
escr &= ~P4_ESCR_T0_OS;
1124
escr |= P4_ESCR_T1_OS;
1125
}
1126
if (escr & P4_ESCR_T0_USR) {
1127
escr &= ~P4_ESCR_T0_USR;
1128
escr |= P4_ESCR_T1_USR;
1129
}
1130
hwc->config = p4_config_pack_escr(escr);
1131
hwc->config |= p4_config_pack_cccr(cccr);
1132
hwc->config |= P4_CONFIG_HT;
1133
} else {
1134
cccr &= ~P4_CCCR_OVF_PMI_T1;
1135
cccr |= P4_CCCR_OVF_PMI_T0;
1136
if (escr & P4_ESCR_T1_OS) {
1137
escr &= ~P4_ESCR_T1_OS;
1138
escr |= P4_ESCR_T0_OS;
1139
}
1140
if (escr & P4_ESCR_T1_USR) {
1141
escr &= ~P4_ESCR_T1_USR;
1142
escr |= P4_ESCR_T0_USR;
1143
}
1144
hwc->config = p4_config_pack_escr(escr);
1145
hwc->config |= p4_config_pack_cccr(cccr);
1146
hwc->config &= ~P4_CONFIG_HT;
1147
}
1148
}
1149
1150
/*
1151
* ESCR address hashing is tricky, ESCRs are not sequential
1152
* in memory but all starts from MSR_P4_BSU_ESCR0 (0x03a0) and
1153
* the metric between any ESCRs is laid in range [0xa0,0xe1]
1154
*
1155
* so we make ~70% filled hashtable
1156
*/
1157
1158
#define P4_ESCR_MSR_BASE 0x000003a0
1159
#define P4_ESCR_MSR_MAX 0x000003e1
1160
#define P4_ESCR_MSR_TABLE_SIZE (P4_ESCR_MSR_MAX - P4_ESCR_MSR_BASE + 1)
1161
#define P4_ESCR_MSR_IDX(msr) (msr - P4_ESCR_MSR_BASE)
1162
#define P4_ESCR_MSR_TABLE_ENTRY(msr) [P4_ESCR_MSR_IDX(msr)] = msr
1163
1164
static const unsigned int p4_escr_table[P4_ESCR_MSR_TABLE_SIZE] = {
1165
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR0),
1166
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR1),
1167
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR0),
1168
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR1),
1169
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR0),
1170
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR1),
1171
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR0),
1172
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR1),
1173
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR2),
1174
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR3),
1175
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR4),
1176
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR5),
1177
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR0),
1178
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR1),
1179
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR0),
1180
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR1),
1181
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR0),
1182
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR1),
1183
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR0),
1184
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR1),
1185
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR0),
1186
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR1),
1187
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR0),
1188
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR1),
1189
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR0),
1190
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR1),
1191
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR0),
1192
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR1),
1193
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR0),
1194
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR1),
1195
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR0),
1196
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR1),
1197
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR0),
1198
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR1),
1199
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR0),
1200
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR1),
1201
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR0),
1202
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR1),
1203
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR0),
1204
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR1),
1205
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR0),
1206
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR1),
1207
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR0),
1208
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR1),
1209
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR0),
1210
P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR1),
1211
};
1212
1213
static int p4_get_escr_idx(unsigned int addr)
1214
{
1215
unsigned int idx = P4_ESCR_MSR_IDX(addr);
1216
1217
if (unlikely(idx >= P4_ESCR_MSR_TABLE_SIZE ||
1218
!p4_escr_table[idx] ||
1219
p4_escr_table[idx] != addr)) {
1220
WARN_ONCE(1, "P4 PMU: Wrong address passed: %x\n", addr);
1221
return -1;
1222
}
1223
1224
return idx;
1225
}
1226
1227
static int p4_next_cntr(int thread, unsigned long *used_mask,
1228
struct p4_event_bind *bind)
1229
{
1230
int i, j;
1231
1232
for (i = 0; i < P4_CNTR_LIMIT; i++) {
1233
j = bind->cntr[thread][i];
1234
if (j != -1 && !test_bit(j, used_mask))
1235
return j;
1236
}
1237
1238
return -1;
1239
}
1240
1241
static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
1242
{
1243
unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
1244
unsigned long escr_mask[BITS_TO_LONGS(P4_ESCR_MSR_TABLE_SIZE)];
1245
int cpu = smp_processor_id();
1246
struct hw_perf_event *hwc;
1247
struct p4_event_bind *bind;
1248
unsigned int i, thread, num;
1249
int cntr_idx, escr_idx;
1250
u64 config_alias;
1251
int pass;
1252
1253
bitmap_zero(used_mask, X86_PMC_IDX_MAX);
1254
bitmap_zero(escr_mask, P4_ESCR_MSR_TABLE_SIZE);
1255
1256
for (i = 0, num = n; i < n; i++, num--) {
1257
1258
hwc = &cpuc->event_list[i]->hw;
1259
thread = p4_ht_thread(cpu);
1260
pass = 0;
1261
1262
again:
1263
/*
1264
* It's possible to hit a circular lock
1265
* between original and alternative events
1266
* if both are scheduled already.
1267
*/
1268
if (pass > 2)
1269
goto done;
1270
1271
bind = p4_config_get_bind(hwc->config);
1272
escr_idx = p4_get_escr_idx(bind->escr_msr[thread]);
1273
if (unlikely(escr_idx == -1))
1274
goto done;
1275
1276
if (hwc->idx != -1 && !p4_should_swap_ts(hwc->config, cpu)) {
1277
cntr_idx = hwc->idx;
1278
if (assign)
1279
assign[i] = hwc->idx;
1280
goto reserve;
1281
}
1282
1283
cntr_idx = p4_next_cntr(thread, used_mask, bind);
1284
if (cntr_idx == -1 || test_bit(escr_idx, escr_mask)) {
1285
/*
1286
* Check whether an event alias is still available.
1287
*/
1288
config_alias = p4_get_alias_event(hwc->config);
1289
if (!config_alias)
1290
goto done;
1291
hwc->config = config_alias;
1292
pass++;
1293
goto again;
1294
}
1295
/*
1296
* Perf does test runs to see if a whole group can be assigned
1297
* together successfully. There can be multiple rounds of this.
1298
* Unfortunately, p4_pmu_swap_config_ts touches the hwc->config
1299
* bits, such that the next round of group assignments will
1300
* cause the above p4_should_swap_ts to pass instead of fail.
1301
* This leads to counters exclusive to thread0 being used by
1302
* thread1.
1303
*
1304
* Solve this with a cheap hack, reset the idx back to -1 to
1305
* force a new lookup (p4_next_cntr) to get the right counter
1306
* for the right thread.
1307
*
1308
* This probably doesn't comply with the general spirit of how
1309
* perf wants to work, but P4 is special. :-(
1310
*/
1311
if (p4_should_swap_ts(hwc->config, cpu))
1312
hwc->idx = -1;
1313
p4_pmu_swap_config_ts(hwc, cpu);
1314
if (assign)
1315
assign[i] = cntr_idx;
1316
reserve:
1317
set_bit(cntr_idx, used_mask);
1318
set_bit(escr_idx, escr_mask);
1319
}
1320
1321
done:
1322
return num ? -EINVAL : 0;
1323
}
1324
1325
PMU_FORMAT_ATTR(cccr, "config:0-31" );
1326
PMU_FORMAT_ATTR(escr, "config:32-62");
1327
PMU_FORMAT_ATTR(ht, "config:63" );
1328
1329
static struct attribute *intel_p4_formats_attr[] = {
1330
&format_attr_cccr.attr,
1331
&format_attr_escr.attr,
1332
&format_attr_ht.attr,
1333
NULL,
1334
};
1335
1336
static __initconst const struct x86_pmu p4_pmu = {
1337
.name = "Netburst P4/Xeon",
1338
.handle_irq = p4_pmu_handle_irq,
1339
.disable_all = p4_pmu_disable_all,
1340
.enable_all = p4_pmu_enable_all,
1341
.enable = p4_pmu_enable_event,
1342
.disable = p4_pmu_disable_event,
1343
1344
.set_period = p4_pmu_set_period,
1345
1346
.eventsel = MSR_P4_BPU_CCCR0,
1347
.perfctr = MSR_P4_BPU_PERFCTR0,
1348
.event_map = p4_pmu_event_map,
1349
.max_events = ARRAY_SIZE(p4_general_events),
1350
.get_event_constraints = x86_get_event_constraints,
1351
/*
1352
* IF HT disabled we may need to use all
1353
* ARCH_P4_MAX_CCCR counters simultaneously
1354
* though leave it restricted at moment assuming
1355
* HT is on
1356
*/
1357
.cntr_mask64 = GENMASK_ULL(ARCH_P4_MAX_CCCR - 1, 0),
1358
.apic = 1,
1359
.cntval_bits = ARCH_P4_CNTRVAL_BITS,
1360
.cntval_mask = ARCH_P4_CNTRVAL_MASK,
1361
.max_period = (1ULL << (ARCH_P4_CNTRVAL_BITS - 1)) - 1,
1362
.hw_config = p4_hw_config,
1363
.schedule_events = p4_pmu_schedule_events,
1364
1365
.format_attrs = intel_p4_formats_attr,
1366
};
1367
1368
__init int p4_pmu_init(void)
1369
{
1370
unsigned int low, high;
1371
int i, reg;
1372
1373
/* If we get stripped -- indexing fails */
1374
BUILD_BUG_ON(ARCH_P4_MAX_CCCR > INTEL_PMC_MAX_GENERIC);
1375
1376
rdmsr(MSR_IA32_MISC_ENABLE, low, high);
1377
if (!(low & (1 << 7))) {
1378
pr_cont("unsupported Netburst CPU model %d ",
1379
boot_cpu_data.x86_model);
1380
return -ENODEV;
1381
}
1382
1383
memcpy(hw_cache_event_ids, p4_hw_cache_event_ids,
1384
sizeof(hw_cache_event_ids));
1385
1386
pr_cont("Netburst events, ");
1387
1388
x86_pmu = p4_pmu;
1389
1390
/*
1391
* Even though the counters are configured to interrupt a particular
1392
* logical processor when an overflow happens, testing has shown that
1393
* on kdump kernels (which uses a single cpu), thread1's counter
1394
* continues to run and will report an NMI on thread0. Due to the
1395
* overflow bug, this leads to a stream of unknown NMIs.
1396
*
1397
* Solve this by zero'ing out the registers to mimic a reset.
1398
*/
1399
for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
1400
reg = x86_pmu_config_addr(i);
1401
wrmsrq_safe(reg, 0ULL);
1402
}
1403
1404
return 0;
1405
}
1406
1407