Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/parisc/kernel/perf.c
26289 views
1
// SPDX-License-Identifier: GPL-2.0-or-later
2
/*
3
* Parisc performance counters
4
* Copyright (C) 2001 Randolph Chung <[email protected]>
5
*
6
* This code is derived, with permission, from HP/UX sources.
7
*/
8
9
/*
10
* Edited comment from original sources:
11
*
12
* This driver programs the PCX-U/PCX-W performance counters
13
* on the PA-RISC 2.0 chips. The driver keeps all images now
14
* internally to the kernel to hopefully eliminate the possibility
15
* of a bad image halting the CPU. Also, there are different
16
* images for the PCX-W and later chips vs the PCX-U chips.
17
*
18
* Only 1 process is allowed to access the driver at any time,
19
* so the only protection that is needed is at open and close.
20
* A variable "perf_enabled" is used to hold the state of the
21
* driver. The spinlock "perf_lock" is used to protect the
22
* modification of the state during open/close operations so
23
* multiple processes don't get into the driver simultaneously.
24
*
25
* This driver accesses the processor directly vs going through
26
* the PDC INTRIGUE calls. This is done to eliminate bugs introduced
27
* in various PDC revisions. The code is much more maintainable
28
* and reliable this way vs having to debug on every version of PDC
29
* on every box.
30
*/
31
32
#include <linux/capability.h>
33
#include <linux/init.h>
34
#include <linux/proc_fs.h>
35
#include <linux/miscdevice.h>
36
#include <linux/spinlock.h>
37
38
#include <linux/uaccess.h>
39
#include <asm/perf.h>
40
#include <asm/parisc-device.h>
41
#include <asm/processor.h>
42
#include <asm/runway.h>
43
#include <asm/io.h> /* for __raw_read() */
44
45
#include "perf_images.h"
46
47
#define MAX_RDR_WORDS 24
48
#define PERF_VERSION 2 /* derived from hpux's PI v2 interface */
49
50
/* definition of RDR regs */
51
struct rdr_tbl_ent {
52
uint16_t width;
53
uint8_t num_words;
54
uint8_t write_control;
55
};
56
57
static int perf_processor_interface __read_mostly = UNKNOWN_INTF;
58
static int perf_enabled __read_mostly;
59
static DEFINE_SPINLOCK(perf_lock);
60
static struct parisc_device *cpu_device __read_mostly;
61
62
/* RDRs to write for PCX-W */
63
static const int perf_rdrs_W[] =
64
{ 0, 1, 4, 5, 6, 15, 16, 17, 18, 20, 21, 22, 23, 24, 25, -1 };
65
66
/* RDRs to write for PCX-U */
67
static const int perf_rdrs_U[] =
68
{ 0, 1, 4, 5, 6, 7, 16, 17, 18, 20, 21, 22, 23, 24, 25, -1 };
69
70
/* RDR register descriptions for PCX-W */
71
static const struct rdr_tbl_ent perf_rdr_tbl_W[] = {
72
{ 19, 1, 8 }, /* RDR 0 */
73
{ 16, 1, 16 }, /* RDR 1 */
74
{ 72, 2, 0 }, /* RDR 2 */
75
{ 81, 2, 0 }, /* RDR 3 */
76
{ 328, 6, 0 }, /* RDR 4 */
77
{ 160, 3, 0 }, /* RDR 5 */
78
{ 336, 6, 0 }, /* RDR 6 */
79
{ 164, 3, 0 }, /* RDR 7 */
80
{ 0, 0, 0 }, /* RDR 8 */
81
{ 35, 1, 0 }, /* RDR 9 */
82
{ 6, 1, 0 }, /* RDR 10 */
83
{ 18, 1, 0 }, /* RDR 11 */
84
{ 13, 1, 0 }, /* RDR 12 */
85
{ 8, 1, 0 }, /* RDR 13 */
86
{ 8, 1, 0 }, /* RDR 14 */
87
{ 8, 1, 0 }, /* RDR 15 */
88
{ 1530, 24, 0 }, /* RDR 16 */
89
{ 16, 1, 0 }, /* RDR 17 */
90
{ 4, 1, 0 }, /* RDR 18 */
91
{ 0, 0, 0 }, /* RDR 19 */
92
{ 152, 3, 24 }, /* RDR 20 */
93
{ 152, 3, 24 }, /* RDR 21 */
94
{ 233, 4, 48 }, /* RDR 22 */
95
{ 233, 4, 48 }, /* RDR 23 */
96
{ 71, 2, 0 }, /* RDR 24 */
97
{ 71, 2, 0 }, /* RDR 25 */
98
{ 11, 1, 0 }, /* RDR 26 */
99
{ 18, 1, 0 }, /* RDR 27 */
100
{ 128, 2, 0 }, /* RDR 28 */
101
{ 0, 0, 0 }, /* RDR 29 */
102
{ 16, 1, 0 }, /* RDR 30 */
103
{ 16, 1, 0 }, /* RDR 31 */
104
};
105
106
/* RDR register descriptions for PCX-U */
107
static const struct rdr_tbl_ent perf_rdr_tbl_U[] = {
108
{ 19, 1, 8 }, /* RDR 0 */
109
{ 32, 1, 16 }, /* RDR 1 */
110
{ 20, 1, 0 }, /* RDR 2 */
111
{ 0, 0, 0 }, /* RDR 3 */
112
{ 344, 6, 0 }, /* RDR 4 */
113
{ 176, 3, 0 }, /* RDR 5 */
114
{ 336, 6, 0 }, /* RDR 6 */
115
{ 0, 0, 0 }, /* RDR 7 */
116
{ 0, 0, 0 }, /* RDR 8 */
117
{ 0, 0, 0 }, /* RDR 9 */
118
{ 28, 1, 0 }, /* RDR 10 */
119
{ 33, 1, 0 }, /* RDR 11 */
120
{ 0, 0, 0 }, /* RDR 12 */
121
{ 230, 4, 0 }, /* RDR 13 */
122
{ 32, 1, 0 }, /* RDR 14 */
123
{ 128, 2, 0 }, /* RDR 15 */
124
{ 1494, 24, 0 }, /* RDR 16 */
125
{ 18, 1, 0 }, /* RDR 17 */
126
{ 4, 1, 0 }, /* RDR 18 */
127
{ 0, 0, 0 }, /* RDR 19 */
128
{ 158, 3, 24 }, /* RDR 20 */
129
{ 158, 3, 24 }, /* RDR 21 */
130
{ 194, 4, 48 }, /* RDR 22 */
131
{ 194, 4, 48 }, /* RDR 23 */
132
{ 71, 2, 0 }, /* RDR 24 */
133
{ 71, 2, 0 }, /* RDR 25 */
134
{ 28, 1, 0 }, /* RDR 26 */
135
{ 33, 1, 0 }, /* RDR 27 */
136
{ 88, 2, 0 }, /* RDR 28 */
137
{ 32, 1, 0 }, /* RDR 29 */
138
{ 24, 1, 0 }, /* RDR 30 */
139
{ 16, 1, 0 }, /* RDR 31 */
140
};
141
142
/*
143
* A non-zero write_control in the above tables is a byte offset into
144
* this array.
145
*/
146
static const uint64_t perf_bitmasks[] = {
147
0x0000000000000000ul, /* first dbl word must be zero */
148
0xfdffe00000000000ul, /* RDR0 bitmask */
149
0x003f000000000000ul, /* RDR1 bitmask */
150
0x00fffffffffffffful, /* RDR20-RDR21 bitmask (152 bits) */
151
0xfffffffffffffffful,
152
0xfffffffc00000000ul,
153
0xfffffffffffffffful, /* RDR22-RDR23 bitmask (233 bits) */
154
0xfffffffffffffffful,
155
0xfffffffffffffffcul,
156
0xff00000000000000ul
157
};
158
159
/*
160
* Write control bitmasks for Pa-8700 processor given
161
* some things have changed slightly.
162
*/
163
static const uint64_t perf_bitmasks_piranha[] = {
164
0x0000000000000000ul, /* first dbl word must be zero */
165
0xfdffe00000000000ul, /* RDR0 bitmask */
166
0x003f000000000000ul, /* RDR1 bitmask */
167
0x00fffffffffffffful, /* RDR20-RDR21 bitmask (158 bits) */
168
0xfffffffffffffffful,
169
0xfffffffc00000000ul,
170
0xfffffffffffffffful, /* RDR22-RDR23 bitmask (210 bits) */
171
0xfffffffffffffffful,
172
0xfffffffffffffffful,
173
0xfffc000000000000ul
174
};
175
176
static const uint64_t *bitmask_array; /* array of bitmasks to use */
177
178
/******************************************************************************
179
* Function Prototypes
180
*****************************************************************************/
181
static int perf_config(uint32_t *image_ptr);
182
static int perf_release(struct inode *inode, struct file *file);
183
static int perf_open(struct inode *inode, struct file *file);
184
static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t *ppos);
185
static ssize_t perf_write(struct file *file, const char __user *buf,
186
size_t count, loff_t *ppos);
187
static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
188
static void perf_start_counters(void);
189
static int perf_stop_counters(uint32_t *raddr);
190
static const struct rdr_tbl_ent * perf_rdr_get_entry(uint32_t rdr_num);
191
static int perf_rdr_read_ubuf(uint32_t rdr_num, uint64_t *buffer);
192
static int perf_rdr_clear(uint32_t rdr_num);
193
static int perf_write_image(uint64_t *memaddr);
194
static void perf_rdr_write(uint32_t rdr_num, uint64_t *buffer);
195
196
/* External Assembly Routines */
197
extern uint64_t perf_rdr_shift_in_W (uint32_t rdr_num, uint16_t width);
198
extern uint64_t perf_rdr_shift_in_U (uint32_t rdr_num, uint16_t width);
199
extern void perf_rdr_shift_out_W (uint32_t rdr_num, uint64_t buffer);
200
extern void perf_rdr_shift_out_U (uint32_t rdr_num, uint64_t buffer);
201
extern void perf_intrigue_enable_perf_counters (void);
202
extern void perf_intrigue_disable_perf_counters (void);
203
204
/******************************************************************************
205
* Function Definitions
206
*****************************************************************************/
207
208
209
/*
210
* configure:
211
*
212
* Configure the cpu with a given data image. First turn off the counters,
213
* then download the image, then turn the counters back on.
214
*/
215
static int perf_config(uint32_t *image_ptr)
216
{
217
long error;
218
uint32_t raddr[4];
219
220
/* Stop the counters*/
221
error = perf_stop_counters(raddr);
222
if (error != 0) {
223
printk("perf_config: perf_stop_counters = %ld\n", error);
224
return -EINVAL;
225
}
226
227
printk("Preparing to write image\n");
228
/* Write the image to the chip */
229
error = perf_write_image((uint64_t *)image_ptr);
230
if (error != 0) {
231
printk("perf_config: DOWNLOAD = %ld\n", error);
232
return -EINVAL;
233
}
234
235
printk("Preparing to start counters\n");
236
237
/* Start the counters */
238
perf_start_counters();
239
240
return sizeof(uint32_t);
241
}
242
243
/*
244
* Open the device and initialize all of its memory. The device is only
245
* opened once, but can be "queried" by multiple processes that know its
246
* file descriptor.
247
*/
248
static int perf_open(struct inode *inode, struct file *file)
249
{
250
spin_lock(&perf_lock);
251
if (perf_enabled) {
252
spin_unlock(&perf_lock);
253
return -EBUSY;
254
}
255
perf_enabled = 1;
256
spin_unlock(&perf_lock);
257
258
return 0;
259
}
260
261
/*
262
* Close the device.
263
*/
264
static int perf_release(struct inode *inode, struct file *file)
265
{
266
spin_lock(&perf_lock);
267
perf_enabled = 0;
268
spin_unlock(&perf_lock);
269
270
return 0;
271
}
272
273
/*
274
* Read does nothing for this driver
275
*/
276
static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t *ppos)
277
{
278
return 0;
279
}
280
281
/*
282
* write:
283
*
284
* This routine downloads the image to the chip. It must be
285
* called on the processor that the download should happen
286
* on.
287
*/
288
static ssize_t perf_write(struct file *file, const char __user *buf,
289
size_t count, loff_t *ppos)
290
{
291
size_t image_size __maybe_unused;
292
uint32_t image_type;
293
uint32_t interface_type;
294
uint32_t test;
295
296
if (perf_processor_interface == ONYX_INTF)
297
image_size = PCXU_IMAGE_SIZE;
298
else if (perf_processor_interface == CUDA_INTF)
299
image_size = PCXW_IMAGE_SIZE;
300
else
301
return -EFAULT;
302
303
if (!perfmon_capable())
304
return -EACCES;
305
306
if (count != sizeof(uint32_t))
307
return -EIO;
308
309
if (copy_from_user(&image_type, buf, sizeof(uint32_t)))
310
return -EFAULT;
311
312
/* Get the interface type and test type */
313
interface_type = (image_type >> 16) & 0xffff;
314
test = (image_type & 0xffff);
315
316
/* Make sure everything makes sense */
317
318
/* First check the machine type is correct for
319
the requested image */
320
if (((perf_processor_interface == CUDA_INTF) &&
321
(interface_type != CUDA_INTF)) ||
322
((perf_processor_interface == ONYX_INTF) &&
323
(interface_type != ONYX_INTF)))
324
return -EINVAL;
325
326
/* Next check to make sure the requested image
327
is valid */
328
if (((interface_type == CUDA_INTF) &&
329
(test >= MAX_CUDA_IMAGES)) ||
330
((interface_type == ONYX_INTF) &&
331
(test >= MAX_ONYX_IMAGES)))
332
return -EINVAL;
333
334
/* Copy the image into the processor */
335
if (interface_type == CUDA_INTF)
336
return perf_config(cuda_images[test]);
337
else
338
return perf_config(onyx_images[test]);
339
340
return count;
341
}
342
343
/*
344
* Patch the images that need to know the IVA addresses.
345
*/
346
static void perf_patch_images(void)
347
{
348
#if 0 /* FIXME!! */
349
/*
350
* NOTE: this routine is VERY specific to the current TLB image.
351
* If the image is changed, this routine might also need to be changed.
352
*/
353
extern void $i_itlb_miss_2_0();
354
extern void $i_dtlb_miss_2_0();
355
extern void PA2_0_iva();
356
357
/*
358
* We can only use the lower 32-bits, the upper 32-bits should be 0
359
* anyway given this is in the kernel
360
*/
361
uint32_t itlb_addr = (uint32_t)&($i_itlb_miss_2_0);
362
uint32_t dtlb_addr = (uint32_t)&($i_dtlb_miss_2_0);
363
uint32_t IVAaddress = (uint32_t)&PA2_0_iva;
364
365
if (perf_processor_interface == ONYX_INTF) {
366
/* clear last 2 bytes */
367
onyx_images[TLBMISS][15] &= 0xffffff00;
368
/* set 2 bytes */
369
onyx_images[TLBMISS][15] |= (0x000000ff&((dtlb_addr) >> 24));
370
onyx_images[TLBMISS][16] = (dtlb_addr << 8)&0xffffff00;
371
onyx_images[TLBMISS][17] = itlb_addr;
372
373
/* clear last 2 bytes */
374
onyx_images[TLBHANDMISS][15] &= 0xffffff00;
375
/* set 2 bytes */
376
onyx_images[TLBHANDMISS][15] |= (0x000000ff&((dtlb_addr) >> 24));
377
onyx_images[TLBHANDMISS][16] = (dtlb_addr << 8)&0xffffff00;
378
onyx_images[TLBHANDMISS][17] = itlb_addr;
379
380
/* clear last 2 bytes */
381
onyx_images[BIG_CPI][15] &= 0xffffff00;
382
/* set 2 bytes */
383
onyx_images[BIG_CPI][15] |= (0x000000ff&((dtlb_addr) >> 24));
384
onyx_images[BIG_CPI][16] = (dtlb_addr << 8)&0xffffff00;
385
onyx_images[BIG_CPI][17] = itlb_addr;
386
387
onyx_images[PANIC][15] &= 0xffffff00; /* clear last 2 bytes */
388
onyx_images[PANIC][15] |= (0x000000ff&((IVAaddress) >> 24)); /* set 2 bytes */
389
onyx_images[PANIC][16] = (IVAaddress << 8)&0xffffff00;
390
391
392
} else if (perf_processor_interface == CUDA_INTF) {
393
/* Cuda interface */
394
cuda_images[TLBMISS][16] =
395
(cuda_images[TLBMISS][16]&0xffff0000) |
396
((dtlb_addr >> 8)&0x0000ffff);
397
cuda_images[TLBMISS][17] =
398
((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff);
399
cuda_images[TLBMISS][18] = (itlb_addr << 16)&0xffff0000;
400
401
cuda_images[TLBHANDMISS][16] =
402
(cuda_images[TLBHANDMISS][16]&0xffff0000) |
403
((dtlb_addr >> 8)&0x0000ffff);
404
cuda_images[TLBHANDMISS][17] =
405
((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff);
406
cuda_images[TLBHANDMISS][18] = (itlb_addr << 16)&0xffff0000;
407
408
cuda_images[BIG_CPI][16] =
409
(cuda_images[BIG_CPI][16]&0xffff0000) |
410
((dtlb_addr >> 8)&0x0000ffff);
411
cuda_images[BIG_CPI][17] =
412
((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff);
413
cuda_images[BIG_CPI][18] = (itlb_addr << 16)&0xffff0000;
414
} else {
415
/* Unknown type */
416
}
417
#endif
418
}
419
420
421
/*
422
* ioctl routine
423
* All routines effect the processor that they are executed on. Thus you
424
* must be running on the processor that you wish to change.
425
*/
426
427
static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
428
{
429
long error_start;
430
uint32_t raddr[4];
431
int error = 0;
432
433
switch (cmd) {
434
435
case PA_PERF_ON:
436
/* Start the counters */
437
perf_start_counters();
438
break;
439
440
case PA_PERF_OFF:
441
error_start = perf_stop_counters(raddr);
442
if (error_start != 0) {
443
printk(KERN_ERR "perf_off: perf_stop_counters = %ld\n", error_start);
444
error = -EFAULT;
445
break;
446
}
447
448
/* copy out the Counters */
449
if (copy_to_user((void __user *)arg, raddr,
450
sizeof (raddr)) != 0) {
451
error = -EFAULT;
452
break;
453
}
454
break;
455
456
case PA_PERF_VERSION:
457
/* Return the version # */
458
error = put_user(PERF_VERSION, (int *)arg);
459
break;
460
461
default:
462
error = -ENOTTY;
463
}
464
465
return error;
466
}
467
468
static const struct file_operations perf_fops = {
469
.read = perf_read,
470
.write = perf_write,
471
.unlocked_ioctl = perf_ioctl,
472
.compat_ioctl = perf_ioctl,
473
.open = perf_open,
474
.release = perf_release
475
};
476
477
static struct miscdevice perf_dev = {
478
.minor = MISC_DYNAMIC_MINOR,
479
.name = PA_PERF_DEV,
480
.fops = &perf_fops,
481
};
482
483
/*
484
* Initialize the module
485
*/
486
static int __init perf_init(void)
487
{
488
int ret;
489
490
/* Determine correct processor interface to use */
491
bitmask_array = perf_bitmasks;
492
493
if (boot_cpu_data.cpu_type == pcxu ||
494
boot_cpu_data.cpu_type == pcxu_) {
495
perf_processor_interface = ONYX_INTF;
496
} else if (boot_cpu_data.cpu_type == pcxw ||
497
boot_cpu_data.cpu_type == pcxw_ ||
498
boot_cpu_data.cpu_type == pcxw2 ||
499
boot_cpu_data.cpu_type == mako ||
500
boot_cpu_data.cpu_type == mako2) {
501
perf_processor_interface = CUDA_INTF;
502
if (boot_cpu_data.cpu_type == pcxw2 ||
503
boot_cpu_data.cpu_type == mako ||
504
boot_cpu_data.cpu_type == mako2)
505
bitmask_array = perf_bitmasks_piranha;
506
} else {
507
perf_processor_interface = UNKNOWN_INTF;
508
printk("Performance monitoring counters not supported on this processor\n");
509
return -ENODEV;
510
}
511
512
ret = misc_register(&perf_dev);
513
if (ret) {
514
printk(KERN_ERR "Performance monitoring counters: "
515
"cannot register misc device.\n");
516
return ret;
517
}
518
519
/* Patch the images to match the system */
520
perf_patch_images();
521
522
/* TODO: this only lets us access the first cpu.. what to do for SMP? */
523
cpu_device = per_cpu(cpu_data, 0).dev;
524
printk("Performance monitoring counters enabled for %s\n",
525
per_cpu(cpu_data, 0).dev->name);
526
527
return 0;
528
}
529
device_initcall(perf_init);
530
531
/*
532
* perf_start_counters(void)
533
*
534
* Start the counters.
535
*/
536
static void perf_start_counters(void)
537
{
538
/* Enable performance monitor counters */
539
perf_intrigue_enable_perf_counters();
540
}
541
542
/*
543
* perf_stop_counters
544
*
545
* Stop the performance counters and save counts
546
* in a per_processor array.
547
*/
548
static int perf_stop_counters(uint32_t *raddr)
549
{
550
uint64_t userbuf[MAX_RDR_WORDS];
551
552
/* Disable performance counters */
553
perf_intrigue_disable_perf_counters();
554
555
if (perf_processor_interface == ONYX_INTF) {
556
uint64_t tmp64;
557
/*
558
* Read the counters
559
*/
560
if (!perf_rdr_read_ubuf(16, userbuf))
561
return -13;
562
563
/* Counter0 is bits 1398 to 1429 */
564
tmp64 = (userbuf[21] << 22) & 0x00000000ffc00000;
565
tmp64 |= (userbuf[22] >> 42) & 0x00000000003fffff;
566
/* OR sticky0 (bit 1430) to counter0 bit 32 */
567
tmp64 |= (userbuf[22] >> 10) & 0x0000000080000000;
568
raddr[0] = (uint32_t)tmp64;
569
570
/* Counter1 is bits 1431 to 1462 */
571
tmp64 = (userbuf[22] >> 9) & 0x00000000ffffffff;
572
/* OR sticky1 (bit 1463) to counter1 bit 32 */
573
tmp64 |= (userbuf[22] << 23) & 0x0000000080000000;
574
raddr[1] = (uint32_t)tmp64;
575
576
/* Counter2 is bits 1464 to 1495 */
577
tmp64 = (userbuf[22] << 24) & 0x00000000ff000000;
578
tmp64 |= (userbuf[23] >> 40) & 0x0000000000ffffff;
579
/* OR sticky2 (bit 1496) to counter2 bit 32 */
580
tmp64 |= (userbuf[23] >> 8) & 0x0000000080000000;
581
raddr[2] = (uint32_t)tmp64;
582
583
/* Counter3 is bits 1497 to 1528 */
584
tmp64 = (userbuf[23] >> 7) & 0x00000000ffffffff;
585
/* OR sticky3 (bit 1529) to counter3 bit 32 */
586
tmp64 |= (userbuf[23] << 25) & 0x0000000080000000;
587
raddr[3] = (uint32_t)tmp64;
588
589
/*
590
* Zero out the counters
591
*/
592
593
/*
594
* The counters and sticky-bits comprise the last 132 bits
595
* (1398 - 1529) of RDR16 on a U chip. We'll zero these
596
* out the easy way: zero out last 10 bits of dword 21,
597
* all of dword 22 and 58 bits (plus 6 don't care bits) of
598
* dword 23.
599
*/
600
userbuf[21] &= 0xfffffffffffffc00ul; /* 0 to last 10 bits */
601
userbuf[22] = 0;
602
userbuf[23] = 0;
603
604
/*
605
* Write back the zeroed bytes + the image given
606
* the read was destructive.
607
*/
608
perf_rdr_write(16, userbuf);
609
} else {
610
611
/*
612
* Read RDR-15 which contains the counters and sticky bits
613
*/
614
if (!perf_rdr_read_ubuf(15, userbuf)) {
615
return -13;
616
}
617
618
/*
619
* Clear out the counters
620
*/
621
perf_rdr_clear(15);
622
623
/*
624
* Copy the counters
625
*/
626
raddr[0] = (uint32_t)((userbuf[0] >> 32) & 0x00000000ffffffffUL);
627
raddr[1] = (uint32_t)(userbuf[0] & 0x00000000ffffffffUL);
628
raddr[2] = (uint32_t)((userbuf[1] >> 32) & 0x00000000ffffffffUL);
629
raddr[3] = (uint32_t)(userbuf[1] & 0x00000000ffffffffUL);
630
}
631
632
return 0;
633
}
634
635
/*
636
* perf_rdr_get_entry
637
*
638
* Retrieve a pointer to the description of what this
639
* RDR contains.
640
*/
641
static const struct rdr_tbl_ent * perf_rdr_get_entry(uint32_t rdr_num)
642
{
643
if (perf_processor_interface == ONYX_INTF) {
644
return &perf_rdr_tbl_U[rdr_num];
645
} else {
646
return &perf_rdr_tbl_W[rdr_num];
647
}
648
}
649
650
/*
651
* perf_rdr_read_ubuf
652
*
653
* Read the RDR value into the buffer specified.
654
*/
655
static int perf_rdr_read_ubuf(uint32_t rdr_num, uint64_t *buffer)
656
{
657
uint64_t data, data_mask = 0;
658
uint32_t width, xbits, i;
659
const struct rdr_tbl_ent *tentry;
660
661
tentry = perf_rdr_get_entry(rdr_num);
662
if ((width = tentry->width) == 0)
663
return 0;
664
665
/* Clear out buffer */
666
i = tentry->num_words;
667
while (i--) {
668
buffer[i] = 0;
669
}
670
671
/* Check for bits an even number of 64 */
672
if ((xbits = width & 0x03f) != 0) {
673
data_mask = 1;
674
data_mask <<= (64 - xbits);
675
data_mask--;
676
}
677
678
/* Grab all of the data */
679
i = tentry->num_words;
680
while (i--) {
681
682
if (perf_processor_interface == ONYX_INTF) {
683
data = perf_rdr_shift_in_U(rdr_num, width);
684
} else {
685
data = perf_rdr_shift_in_W(rdr_num, width);
686
}
687
if (xbits) {
688
buffer[i] |= (data << (64 - xbits));
689
if (i) {
690
buffer[i-1] |= ((data >> xbits) & data_mask);
691
}
692
} else {
693
buffer[i] = data;
694
}
695
}
696
697
return 1;
698
}
699
700
/*
701
* perf_rdr_clear
702
*
703
* Zero out the given RDR register
704
*/
705
static int perf_rdr_clear(uint32_t rdr_num)
706
{
707
const struct rdr_tbl_ent *tentry;
708
int32_t i;
709
710
tentry = perf_rdr_get_entry(rdr_num);
711
712
if (tentry->width == 0) {
713
return -1;
714
}
715
716
i = tentry->num_words;
717
while (i--) {
718
if (perf_processor_interface == ONYX_INTF) {
719
perf_rdr_shift_out_U(rdr_num, 0UL);
720
} else {
721
perf_rdr_shift_out_W(rdr_num, 0UL);
722
}
723
}
724
725
return 0;
726
}
727
728
729
/*
730
* perf_write_image
731
*
732
* Write the given image out to the processor
733
*/
734
static int perf_write_image(uint64_t *memaddr)
735
{
736
uint64_t buffer[MAX_RDR_WORDS];
737
uint64_t *bptr;
738
uint32_t dwords;
739
const uint32_t *intrigue_rdr;
740
const uint64_t *intrigue_bitmask;
741
uint64_t tmp64;
742
void __iomem *runway;
743
const struct rdr_tbl_ent *tentry;
744
int i;
745
746
/* Clear out counters */
747
if (perf_processor_interface == ONYX_INTF) {
748
749
perf_rdr_clear(16);
750
751
/* Toggle performance monitor */
752
perf_intrigue_enable_perf_counters();
753
perf_intrigue_disable_perf_counters();
754
755
intrigue_rdr = perf_rdrs_U;
756
} else {
757
perf_rdr_clear(15);
758
intrigue_rdr = perf_rdrs_W;
759
}
760
761
/* Write all RDRs */
762
while (*intrigue_rdr != -1) {
763
tentry = perf_rdr_get_entry(*intrigue_rdr);
764
perf_rdr_read_ubuf(*intrigue_rdr, buffer);
765
bptr = &buffer[0];
766
dwords = tentry->num_words;
767
if (tentry->write_control) {
768
intrigue_bitmask = &bitmask_array[tentry->write_control >> 3];
769
while (dwords--) {
770
tmp64 = *intrigue_bitmask & *memaddr++;
771
tmp64 |= (~(*intrigue_bitmask++)) & *bptr;
772
*bptr++ = tmp64;
773
}
774
} else {
775
while (dwords--) {
776
*bptr++ = *memaddr++;
777
}
778
}
779
780
perf_rdr_write(*intrigue_rdr, buffer);
781
intrigue_rdr++;
782
}
783
784
/*
785
* Now copy out the Runway stuff which is not in RDRs
786
*/
787
788
if (cpu_device == NULL)
789
{
790
printk(KERN_ERR "write_image: cpu_device not yet initialized!\n");
791
return -1;
792
}
793
794
runway = ioremap(cpu_device->hpa.start, 4096);
795
if (!runway) {
796
pr_err("perf_write_image: ioremap failed!\n");
797
return -ENOMEM;
798
}
799
800
/* Merge intrigue bits into Runway STATUS 0 */
801
tmp64 = __raw_readq(runway + RUNWAY_STATUS) & 0xffecfffffffffffful;
802
__raw_writeq(tmp64 | (*memaddr++ & 0x0013000000000000ul),
803
runway + RUNWAY_STATUS);
804
805
/* Write RUNWAY DEBUG registers */
806
for (i = 0; i < 8; i++) {
807
__raw_writeq(*memaddr++, runway + RUNWAY_DEBUG);
808
}
809
810
return 0;
811
}
812
813
/*
814
* perf_rdr_write
815
*
816
* Write the given RDR register with the contents
817
* of the given buffer.
818
*/
819
static void perf_rdr_write(uint32_t rdr_num, uint64_t *buffer)
820
{
821
const struct rdr_tbl_ent *tentry;
822
int32_t i;
823
824
printk("perf_rdr_write\n");
825
tentry = perf_rdr_get_entry(rdr_num);
826
if (tentry->width == 0) { return; }
827
828
i = tentry->num_words;
829
while (i--) {
830
if (perf_processor_interface == ONYX_INTF) {
831
perf_rdr_shift_out_U(rdr_num, buffer[i]);
832
} else {
833
perf_rdr_shift_out_W(rdr_num, buffer[i]);
834
}
835
}
836
printk("perf_rdr_write done\n");
837
}
838
839