Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/x86/oprofile/op_model_p4.c
10817 views
1
/**
2
* @file op_model_p4.c
3
* P4 model-specific MSR operations
4
*
5
* @remark Copyright 2002 OProfile authors
6
* @remark Read the file COPYING
7
*
8
* @author Graydon Hoare
9
*/
10
11
#include <linux/oprofile.h>
12
#include <linux/smp.h>
13
#include <linux/ptrace.h>
14
#include <asm/nmi.h>
15
#include <asm/msr.h>
16
#include <asm/fixmap.h>
17
#include <asm/apic.h>
18
19
20
#include "op_x86_model.h"
21
#include "op_counter.h"
22
23
#define NUM_EVENTS 39
24
25
#define NUM_COUNTERS_NON_HT 8
26
#define NUM_ESCRS_NON_HT 45
27
#define NUM_CCCRS_NON_HT 18
28
#define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT)
29
30
#define NUM_COUNTERS_HT2 4
31
#define NUM_ESCRS_HT2 23
32
#define NUM_CCCRS_HT2 9
33
#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
34
35
#define OP_CTR_OVERFLOW (1ULL<<31)
36
37
static unsigned int num_counters = NUM_COUNTERS_NON_HT;
38
static unsigned int num_controls = NUM_CONTROLS_NON_HT;
39
40
/* this has to be checked dynamically since the
41
hyper-threadedness of a chip is discovered at
42
kernel boot-time. */
43
static inline void setup_num_counters(void)
44
{
45
#ifdef CONFIG_SMP
46
if (smp_num_siblings == 2) {
47
num_counters = NUM_COUNTERS_HT2;
48
num_controls = NUM_CONTROLS_HT2;
49
}
50
#endif
51
}
52
53
static inline int addr_increment(void)
54
{
55
#ifdef CONFIG_SMP
56
return smp_num_siblings == 2 ? 2 : 1;
57
#else
58
return 1;
59
#endif
60
}
61
62
63
/* tables to simulate simplified hardware view of p4 registers */
64
struct p4_counter_binding {
65
int virt_counter;
66
int counter_address;
67
int cccr_address;
68
};
69
70
struct p4_event_binding {
71
int escr_select; /* value to put in CCCR */
72
int event_select; /* value to put in ESCR */
73
struct {
74
int virt_counter; /* for this counter... */
75
int escr_address; /* use this ESCR */
76
} bindings[2];
77
};
78
79
/* nb: these CTR_* defines are a duplicate of defines in
80
event/i386.p4*events. */
81
82
83
#define CTR_BPU_0 (1 << 0)
84
#define CTR_MS_0 (1 << 1)
85
#define CTR_FLAME_0 (1 << 2)
86
#define CTR_IQ_4 (1 << 3)
87
#define CTR_BPU_2 (1 << 4)
88
#define CTR_MS_2 (1 << 5)
89
#define CTR_FLAME_2 (1 << 6)
90
#define CTR_IQ_5 (1 << 7)
91
92
static struct p4_counter_binding p4_counters[NUM_COUNTERS_NON_HT] = {
93
{ CTR_BPU_0, MSR_P4_BPU_PERFCTR0, MSR_P4_BPU_CCCR0 },
94
{ CTR_MS_0, MSR_P4_MS_PERFCTR0, MSR_P4_MS_CCCR0 },
95
{ CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
96
{ CTR_IQ_4, MSR_P4_IQ_PERFCTR4, MSR_P4_IQ_CCCR4 },
97
{ CTR_BPU_2, MSR_P4_BPU_PERFCTR2, MSR_P4_BPU_CCCR2 },
98
{ CTR_MS_2, MSR_P4_MS_PERFCTR2, MSR_P4_MS_CCCR2 },
99
{ CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 },
100
{ CTR_IQ_5, MSR_P4_IQ_PERFCTR5, MSR_P4_IQ_CCCR5 }
101
};
102
103
#define NUM_UNUSED_CCCRS (NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT)
104
105
/* p4 event codes in libop/op_event.h are indices into this table. */
106
107
static struct p4_event_binding p4_events[NUM_EVENTS] = {
108
109
{ /* BRANCH_RETIRED */
110
0x05, 0x06,
111
{ {CTR_IQ_4, MSR_P4_CRU_ESCR2},
112
{CTR_IQ_5, MSR_P4_CRU_ESCR3} }
113
},
114
115
{ /* MISPRED_BRANCH_RETIRED */
116
0x04, 0x03,
117
{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
118
{ CTR_IQ_5, MSR_P4_CRU_ESCR1} }
119
},
120
121
{ /* TC_DELIVER_MODE */
122
0x01, 0x01,
123
{ { CTR_MS_0, MSR_P4_TC_ESCR0},
124
{ CTR_MS_2, MSR_P4_TC_ESCR1} }
125
},
126
127
{ /* BPU_FETCH_REQUEST */
128
0x00, 0x03,
129
{ { CTR_BPU_0, MSR_P4_BPU_ESCR0},
130
{ CTR_BPU_2, MSR_P4_BPU_ESCR1} }
131
},
132
133
{ /* ITLB_REFERENCE */
134
0x03, 0x18,
135
{ { CTR_BPU_0, MSR_P4_ITLB_ESCR0},
136
{ CTR_BPU_2, MSR_P4_ITLB_ESCR1} }
137
},
138
139
{ /* MEMORY_CANCEL */
140
0x05, 0x02,
141
{ { CTR_FLAME_0, MSR_P4_DAC_ESCR0},
142
{ CTR_FLAME_2, MSR_P4_DAC_ESCR1} }
143
},
144
145
{ /* MEMORY_COMPLETE */
146
0x02, 0x08,
147
{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
148
{ CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
149
},
150
151
{ /* LOAD_PORT_REPLAY */
152
0x02, 0x04,
153
{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
154
{ CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
155
},
156
157
{ /* STORE_PORT_REPLAY */
158
0x02, 0x05,
159
{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
160
{ CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
161
},
162
163
{ /* MOB_LOAD_REPLAY */
164
0x02, 0x03,
165
{ { CTR_BPU_0, MSR_P4_MOB_ESCR0},
166
{ CTR_BPU_2, MSR_P4_MOB_ESCR1} }
167
},
168
169
{ /* PAGE_WALK_TYPE */
170
0x04, 0x01,
171
{ { CTR_BPU_0, MSR_P4_PMH_ESCR0},
172
{ CTR_BPU_2, MSR_P4_PMH_ESCR1} }
173
},
174
175
{ /* BSQ_CACHE_REFERENCE */
176
0x07, 0x0c,
177
{ { CTR_BPU_0, MSR_P4_BSU_ESCR0},
178
{ CTR_BPU_2, MSR_P4_BSU_ESCR1} }
179
},
180
181
{ /* IOQ_ALLOCATION */
182
0x06, 0x03,
183
{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
184
{ 0, 0 } }
185
},
186
187
{ /* IOQ_ACTIVE_ENTRIES */
188
0x06, 0x1a,
189
{ { CTR_BPU_2, MSR_P4_FSB_ESCR1},
190
{ 0, 0 } }
191
},
192
193
{ /* FSB_DATA_ACTIVITY */
194
0x06, 0x17,
195
{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
196
{ CTR_BPU_2, MSR_P4_FSB_ESCR1} }
197
},
198
199
{ /* BSQ_ALLOCATION */
200
0x07, 0x05,
201
{ { CTR_BPU_0, MSR_P4_BSU_ESCR0},
202
{ 0, 0 } }
203
},
204
205
{ /* BSQ_ACTIVE_ENTRIES */
206
0x07, 0x06,
207
{ { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},
208
{ 0, 0 } }
209
},
210
211
{ /* X87_ASSIST */
212
0x05, 0x03,
213
{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
214
{ CTR_IQ_5, MSR_P4_CRU_ESCR3} }
215
},
216
217
{ /* SSE_INPUT_ASSIST */
218
0x01, 0x34,
219
{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
220
{ CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
221
},
222
223
{ /* PACKED_SP_UOP */
224
0x01, 0x08,
225
{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
226
{ CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
227
},
228
229
{ /* PACKED_DP_UOP */
230
0x01, 0x0c,
231
{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
232
{ CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
233
},
234
235
{ /* SCALAR_SP_UOP */
236
0x01, 0x0a,
237
{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
238
{ CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
239
},
240
241
{ /* SCALAR_DP_UOP */
242
0x01, 0x0e,
243
{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
244
{ CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
245
},
246
247
{ /* 64BIT_MMX_UOP */
248
0x01, 0x02,
249
{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
250
{ CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
251
},
252
253
{ /* 128BIT_MMX_UOP */
254
0x01, 0x1a,
255
{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
256
{ CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
257
},
258
259
{ /* X87_FP_UOP */
260
0x01, 0x04,
261
{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
262
{ CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
263
},
264
265
{ /* X87_SIMD_MOVES_UOP */
266
0x01, 0x2e,
267
{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
268
{ CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
269
},
270
271
{ /* MACHINE_CLEAR */
272
0x05, 0x02,
273
{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
274
{ CTR_IQ_5, MSR_P4_CRU_ESCR3} }
275
},
276
277
{ /* GLOBAL_POWER_EVENTS */
278
0x06, 0x13 /* older manual says 0x05, newer 0x13 */,
279
{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
280
{ CTR_BPU_2, MSR_P4_FSB_ESCR1} }
281
},
282
283
{ /* TC_MS_XFER */
284
0x00, 0x05,
285
{ { CTR_MS_0, MSR_P4_MS_ESCR0},
286
{ CTR_MS_2, MSR_P4_MS_ESCR1} }
287
},
288
289
{ /* UOP_QUEUE_WRITES */
290
0x00, 0x09,
291
{ { CTR_MS_0, MSR_P4_MS_ESCR0},
292
{ CTR_MS_2, MSR_P4_MS_ESCR1} }
293
},
294
295
{ /* FRONT_END_EVENT */
296
0x05, 0x08,
297
{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
298
{ CTR_IQ_5, MSR_P4_CRU_ESCR3} }
299
},
300
301
{ /* EXECUTION_EVENT */
302
0x05, 0x0c,
303
{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
304
{ CTR_IQ_5, MSR_P4_CRU_ESCR3} }
305
},
306
307
{ /* REPLAY_EVENT */
308
0x05, 0x09,
309
{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
310
{ CTR_IQ_5, MSR_P4_CRU_ESCR3} }
311
},
312
313
{ /* INSTR_RETIRED */
314
0x04, 0x02,
315
{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
316
{ CTR_IQ_5, MSR_P4_CRU_ESCR1} }
317
},
318
319
{ /* UOPS_RETIRED */
320
0x04, 0x01,
321
{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
322
{ CTR_IQ_5, MSR_P4_CRU_ESCR1} }
323
},
324
325
{ /* UOP_TYPE */
326
0x02, 0x02,
327
{ { CTR_IQ_4, MSR_P4_RAT_ESCR0},
328
{ CTR_IQ_5, MSR_P4_RAT_ESCR1} }
329
},
330
331
{ /* RETIRED_MISPRED_BRANCH_TYPE */
332
0x02, 0x05,
333
{ { CTR_MS_0, MSR_P4_TBPU_ESCR0},
334
{ CTR_MS_2, MSR_P4_TBPU_ESCR1} }
335
},
336
337
{ /* RETIRED_BRANCH_TYPE */
338
0x02, 0x04,
339
{ { CTR_MS_0, MSR_P4_TBPU_ESCR0},
340
{ CTR_MS_2, MSR_P4_TBPU_ESCR1} }
341
}
342
};
343
344
345
#define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7)
346
347
#define ESCR_RESERVED_BITS 0x80000003
348
#define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS)
349
#define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2))
350
#define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3))
351
#define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1)))
352
#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
353
#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
354
#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
355
356
#define CCCR_RESERVED_BITS 0x38030FFF
357
#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
358
#define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000)
359
#define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13))
360
#define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26))
361
#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
362
#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
363
#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
364
#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
365
#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
366
367
368
/* this assigns a "stagger" to the current CPU, which is used throughout
369
the code in this module as an extra array offset, to select the "even"
370
or "odd" part of all the divided resources. */
371
static unsigned int get_stagger(void)
372
{
373
#ifdef CONFIG_SMP
374
int cpu = smp_processor_id();
375
return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map));
376
#endif
377
return 0;
378
}
379
380
381
/* finally, mediate access to a real hardware counter
382
by passing a "virtual" counter numer to this macro,
383
along with your stagger setting. */
384
#define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger)))
385
386
static unsigned long reset_value[NUM_COUNTERS_NON_HT];
387
388
static void p4_shutdown(struct op_msrs const * const msrs)
389
{
390
int i;
391
392
for (i = 0; i < num_counters; ++i) {
393
if (msrs->counters[i].addr)
394
release_perfctr_nmi(msrs->counters[i].addr);
395
}
396
/*
397
* some of the control registers are specially reserved in
398
* conjunction with the counter registers (hence the starting offset).
399
* This saves a few bits.
400
*/
401
for (i = num_counters; i < num_controls; ++i) {
402
if (msrs->controls[i].addr)
403
release_evntsel_nmi(msrs->controls[i].addr);
404
}
405
}
406
407
static int p4_fill_in_addresses(struct op_msrs * const msrs)
408
{
409
unsigned int i;
410
unsigned int addr, cccraddr, stag;
411
412
setup_num_counters();
413
stag = get_stagger();
414
415
/* the counter & cccr registers we pay attention to */
416
for (i = 0; i < num_counters; ++i) {
417
addr = p4_counters[VIRT_CTR(stag, i)].counter_address;
418
cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address;
419
if (reserve_perfctr_nmi(addr)) {
420
msrs->counters[i].addr = addr;
421
msrs->controls[i].addr = cccraddr;
422
}
423
}
424
425
/* 43 ESCR registers in three or four discontiguous group */
426
for (addr = MSR_P4_BSU_ESCR0 + stag;
427
addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
428
if (reserve_evntsel_nmi(addr))
429
msrs->controls[i].addr = addr;
430
}
431
432
/* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
433
* to avoid special case in nmi_{save|restore}_registers() */
434
if (boot_cpu_data.x86_model >= 0x3) {
435
for (addr = MSR_P4_BSU_ESCR0 + stag;
436
addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
437
if (reserve_evntsel_nmi(addr))
438
msrs->controls[i].addr = addr;
439
}
440
} else {
441
for (addr = MSR_P4_IQ_ESCR0 + stag;
442
addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
443
if (reserve_evntsel_nmi(addr))
444
msrs->controls[i].addr = addr;
445
}
446
}
447
448
for (addr = MSR_P4_RAT_ESCR0 + stag;
449
addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
450
if (reserve_evntsel_nmi(addr))
451
msrs->controls[i].addr = addr;
452
}
453
454
for (addr = MSR_P4_MS_ESCR0 + stag;
455
addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
456
if (reserve_evntsel_nmi(addr))
457
msrs->controls[i].addr = addr;
458
}
459
460
for (addr = MSR_P4_IX_ESCR0 + stag;
461
addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
462
if (reserve_evntsel_nmi(addr))
463
msrs->controls[i].addr = addr;
464
}
465
466
/* there are 2 remaining non-contiguously located ESCRs */
467
468
if (num_counters == NUM_COUNTERS_NON_HT) {
469
/* standard non-HT CPUs handle both remaining ESCRs*/
470
if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5))
471
msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
472
if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
473
msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
474
475
} else if (stag == 0) {
476
/* HT CPUs give the first remainder to the even thread, as
477
the 32nd control register */
478
if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
479
msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
480
481
} else {
482
/* and two copies of the second to the odd thread,
483
for the 22st and 23nd control registers */
484
if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) {
485
msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
486
msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
487
}
488
}
489
490
for (i = 0; i < num_counters; ++i) {
491
if (!counter_config[i].enabled)
492
continue;
493
if (msrs->controls[i].addr)
494
continue;
495
op_x86_warn_reserved(i);
496
p4_shutdown(msrs);
497
return -EBUSY;
498
}
499
500
return 0;
501
}
502
503
504
static void pmc_setup_one_p4_counter(unsigned int ctr)
505
{
506
int i;
507
int const maxbind = 2;
508
unsigned int cccr = 0;
509
unsigned int escr = 0;
510
unsigned int high = 0;
511
unsigned int counter_bit;
512
struct p4_event_binding *ev = NULL;
513
unsigned int stag;
514
515
stag = get_stagger();
516
517
/* convert from counter *number* to counter *bit* */
518
counter_bit = 1 << VIRT_CTR(stag, ctr);
519
520
/* find our event binding structure. */
521
if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) {
522
printk(KERN_ERR
523
"oprofile: P4 event code 0x%lx out of range\n",
524
counter_config[ctr].event);
525
return;
526
}
527
528
ev = &(p4_events[counter_config[ctr].event - 1]);
529
530
for (i = 0; i < maxbind; i++) {
531
if (ev->bindings[i].virt_counter & counter_bit) {
532
533
/* modify ESCR */
534
rdmsr(ev->bindings[i].escr_address, escr, high);
535
ESCR_CLEAR(escr);
536
if (stag == 0) {
537
ESCR_SET_USR_0(escr, counter_config[ctr].user);
538
ESCR_SET_OS_0(escr, counter_config[ctr].kernel);
539
} else {
540
ESCR_SET_USR_1(escr, counter_config[ctr].user);
541
ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
542
}
543
ESCR_SET_EVENT_SELECT(escr, ev->event_select);
544
ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);
545
wrmsr(ev->bindings[i].escr_address, escr, high);
546
547
/* modify CCCR */
548
rdmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address,
549
cccr, high);
550
CCCR_CLEAR(cccr);
551
CCCR_SET_REQUIRED_BITS(cccr);
552
CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
553
if (stag == 0)
554
CCCR_SET_PMI_OVF_0(cccr);
555
else
556
CCCR_SET_PMI_OVF_1(cccr);
557
wrmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address,
558
cccr, high);
559
return;
560
}
561
}
562
563
printk(KERN_ERR
564
"oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n",
565
counter_config[ctr].event, stag, ctr);
566
}
567
568
569
static void p4_setup_ctrs(struct op_x86_model_spec const *model,
570
struct op_msrs const * const msrs)
571
{
572
unsigned int i;
573
unsigned int low, high;
574
unsigned int stag;
575
576
stag = get_stagger();
577
578
rdmsr(MSR_IA32_MISC_ENABLE, low, high);
579
if (!MISC_PMC_ENABLED_P(low)) {
580
printk(KERN_ERR "oprofile: P4 PMC not available\n");
581
return;
582
}
583
584
/* clear the cccrs we will use */
585
for (i = 0; i < num_counters; i++) {
586
if (unlikely(!msrs->controls[i].addr))
587
continue;
588
rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
589
CCCR_CLEAR(low);
590
CCCR_SET_REQUIRED_BITS(low);
591
wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
592
}
593
594
/* clear all escrs (including those outside our concern) */
595
for (i = num_counters; i < num_controls; i++) {
596
if (unlikely(!msrs->controls[i].addr))
597
continue;
598
wrmsr(msrs->controls[i].addr, 0, 0);
599
}
600
601
/* setup all counters */
602
for (i = 0; i < num_counters; ++i) {
603
if (counter_config[i].enabled && msrs->controls[i].addr) {
604
reset_value[i] = counter_config[i].count;
605
pmc_setup_one_p4_counter(i);
606
wrmsrl(p4_counters[VIRT_CTR(stag, i)].counter_address,
607
-(u64)counter_config[i].count);
608
} else {
609
reset_value[i] = 0;
610
}
611
}
612
}
613
614
615
static int p4_check_ctrs(struct pt_regs * const regs,
616
struct op_msrs const * const msrs)
617
{
618
unsigned long ctr, low, high, stag, real;
619
int i;
620
621
stag = get_stagger();
622
623
for (i = 0; i < num_counters; ++i) {
624
625
if (!reset_value[i])
626
continue;
627
628
/*
629
* there is some eccentricity in the hardware which
630
* requires that we perform 2 extra corrections:
631
*
632
* - check both the CCCR:OVF flag for overflow and the
633
* counter high bit for un-flagged overflows.
634
*
635
* - write the counter back twice to ensure it gets
636
* updated properly.
637
*
638
* the former seems to be related to extra NMIs happening
639
* during the current NMI; the latter is reported as errata
640
* N15 in intel doc 249199-029, pentium 4 specification
641
* update, though their suggested work-around does not
642
* appear to solve the problem.
643
*/
644
645
real = VIRT_CTR(stag, i);
646
647
rdmsr(p4_counters[real].cccr_address, low, high);
648
rdmsr(p4_counters[real].counter_address, ctr, high);
649
if (CCCR_OVF_P(low) || !(ctr & OP_CTR_OVERFLOW)) {
650
oprofile_add_sample(regs, i);
651
wrmsrl(p4_counters[real].counter_address,
652
-(u64)reset_value[i]);
653
CCCR_CLEAR_OVF(low);
654
wrmsr(p4_counters[real].cccr_address, low, high);
655
wrmsrl(p4_counters[real].counter_address,
656
-(u64)reset_value[i]);
657
}
658
}
659
660
/* P4 quirk: you have to re-unmask the apic vector */
661
apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
662
663
/* See op_model_ppro.c */
664
return 1;
665
}
666
667
668
static void p4_start(struct op_msrs const * const msrs)
669
{
670
unsigned int low, high, stag;
671
int i;
672
673
stag = get_stagger();
674
675
for (i = 0; i < num_counters; ++i) {
676
if (!reset_value[i])
677
continue;
678
rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
679
CCCR_SET_ENABLE(low);
680
wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
681
}
682
}
683
684
685
static void p4_stop(struct op_msrs const * const msrs)
686
{
687
unsigned int low, high, stag;
688
int i;
689
690
stag = get_stagger();
691
692
for (i = 0; i < num_counters; ++i) {
693
if (!reset_value[i])
694
continue;
695
rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
696
CCCR_SET_DISABLE(low);
697
wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
698
}
699
}
700
701
#ifdef CONFIG_SMP
702
struct op_x86_model_spec op_p4_ht2_spec = {
703
.num_counters = NUM_COUNTERS_HT2,
704
.num_controls = NUM_CONTROLS_HT2,
705
.fill_in_addresses = &p4_fill_in_addresses,
706
.setup_ctrs = &p4_setup_ctrs,
707
.check_ctrs = &p4_check_ctrs,
708
.start = &p4_start,
709
.stop = &p4_stop,
710
.shutdown = &p4_shutdown
711
};
712
#endif
713
714
struct op_x86_model_spec op_p4_spec = {
715
.num_counters = NUM_COUNTERS_NON_HT,
716
.num_controls = NUM_CONTROLS_NON_HT,
717
.fill_in_addresses = &p4_fill_in_addresses,
718
.setup_ctrs = &p4_setup_ctrs,
719
.check_ctrs = &p4_check_ctrs,
720
.start = &p4_start,
721
.stop = &p4_stop,
722
.shutdown = &p4_shutdown
723
};
724
725