Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/tools/lib/perf/mmap.c
26282 views
1
// SPDX-License-Identifier: GPL-2.0
2
#include <sys/mman.h>
3
#include <inttypes.h>
4
#include <asm/bug.h>
5
#include <errno.h>
6
#include <string.h>
7
#include <linux/ring_buffer.h>
8
#include <linux/perf_event.h>
9
#include <perf/mmap.h>
10
#include <perf/event.h>
11
#include <perf/evsel.h>
12
#include <internal/mmap.h>
13
#include <internal/lib.h>
14
#include <linux/kernel.h>
15
#include <linux/math64.h>
16
#include <linux/stringify.h>
17
#include "internal.h"
18
19
void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev,
20
bool overwrite, libperf_unmap_cb_t unmap_cb)
21
{
22
/* Assume fields were zero initialized. */
23
map->fd = -1;
24
map->overwrite = overwrite;
25
map->unmap_cb = unmap_cb;
26
refcount_set(&map->refcnt, 0);
27
if (prev)
28
prev->next = map;
29
}
30
31
size_t perf_mmap__mmap_len(struct perf_mmap *map)
32
{
33
return map->mask + 1 + page_size;
34
}
35
36
int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp,
37
int fd, struct perf_cpu cpu)
38
{
39
map->prev = 0;
40
map->mask = mp->mask;
41
map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot,
42
MAP_SHARED, fd, 0);
43
if (map->base == MAP_FAILED) {
44
map->base = NULL;
45
return -1;
46
}
47
48
map->fd = fd;
49
map->cpu = cpu;
50
return 0;
51
}
52
53
void perf_mmap__munmap(struct perf_mmap *map)
54
{
55
if (!map)
56
return;
57
58
zfree(&map->event_copy);
59
map->event_copy_sz = 0;
60
if (map->base) {
61
munmap(map->base, perf_mmap__mmap_len(map));
62
map->base = NULL;
63
map->fd = -1;
64
refcount_set(&map->refcnt, 0);
65
}
66
if (map->unmap_cb)
67
map->unmap_cb(map);
68
}
69
70
void perf_mmap__get(struct perf_mmap *map)
71
{
72
refcount_inc(&map->refcnt);
73
}
74
75
void perf_mmap__put(struct perf_mmap *map)
76
{
77
BUG_ON(map->base && refcount_read(&map->refcnt) == 0);
78
79
if (refcount_dec_and_test(&map->refcnt))
80
perf_mmap__munmap(map);
81
}
82
83
static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail)
84
{
85
ring_buffer_write_tail(md->base, tail);
86
}
87
88
u64 perf_mmap__read_head(struct perf_mmap *map)
89
{
90
return ring_buffer_read_head(map->base);
91
}
92
93
static bool perf_mmap__empty(struct perf_mmap *map)
94
{
95
struct perf_event_mmap_page *pc = map->base;
96
97
return perf_mmap__read_head(map) == map->prev && !pc->aux_size;
98
}
99
100
void perf_mmap__consume(struct perf_mmap *map)
101
{
102
if (!map->overwrite) {
103
u64 old = map->prev;
104
105
perf_mmap__write_tail(map, old);
106
}
107
108
if (refcount_read(&map->refcnt) == 1 && perf_mmap__empty(map))
109
perf_mmap__put(map);
110
}
111
112
static int overwrite_rb_find_range(void *buf, int mask, u64 *start, u64 *end)
113
{
114
struct perf_event_header *pheader;
115
u64 evt_head = *start;
116
int size = mask + 1;
117
118
pr_debug2("%s: buf=%p, start=%"PRIx64"\n", __func__, buf, *start);
119
pheader = (struct perf_event_header *)(buf + (*start & mask));
120
while (true) {
121
if (evt_head - *start >= (unsigned int)size) {
122
pr_debug("Finished reading overwrite ring buffer: rewind\n");
123
if (evt_head - *start > (unsigned int)size)
124
evt_head -= pheader->size;
125
*end = evt_head;
126
return 0;
127
}
128
129
pheader = (struct perf_event_header *)(buf + (evt_head & mask));
130
131
if (pheader->size == 0) {
132
pr_debug("Finished reading overwrite ring buffer: get start\n");
133
*end = evt_head;
134
return 0;
135
}
136
137
evt_head += pheader->size;
138
pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
139
}
140
WARN_ONCE(1, "Shouldn't get here\n");
141
return -1;
142
}
143
144
/*
145
* Report the start and end of the available data in ringbuffer
146
*/
147
static int __perf_mmap__read_init(struct perf_mmap *md)
148
{
149
u64 head = perf_mmap__read_head(md);
150
u64 old = md->prev;
151
unsigned char *data = md->base + page_size;
152
unsigned long size;
153
154
md->start = md->overwrite ? head : old;
155
md->end = md->overwrite ? old : head;
156
157
if ((md->end - md->start) < md->flush)
158
return -EAGAIN;
159
160
size = md->end - md->start;
161
if (size > (unsigned long)(md->mask) + 1) {
162
if (!md->overwrite) {
163
WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
164
165
md->prev = head;
166
perf_mmap__consume(md);
167
return -EAGAIN;
168
}
169
170
/*
171
* Backward ring buffer is full. We still have a chance to read
172
* most of data from it.
173
*/
174
if (overwrite_rb_find_range(data, md->mask, &md->start, &md->end))
175
return -EINVAL;
176
}
177
178
return 0;
179
}
180
181
int perf_mmap__read_init(struct perf_mmap *map)
182
{
183
/*
184
* Check if event was unmapped due to a POLLHUP/POLLERR.
185
*/
186
if (!refcount_read(&map->refcnt))
187
return -ENOENT;
188
189
return __perf_mmap__read_init(map);
190
}
191
192
/*
193
* Mandatory for overwrite mode
194
* The direction of overwrite mode is backward.
195
* The last perf_mmap__read() will set tail to map->core.prev.
196
* Need to correct the map->core.prev to head which is the end of next read.
197
*/
198
void perf_mmap__read_done(struct perf_mmap *map)
199
{
200
/*
201
* Check if event was unmapped due to a POLLHUP/POLLERR.
202
*/
203
if (!refcount_read(&map->refcnt))
204
return;
205
206
map->prev = perf_mmap__read_head(map);
207
}
208
209
/* When check_messup is true, 'end' must points to a good entry */
210
static union perf_event *perf_mmap__read(struct perf_mmap *map,
211
u64 *startp, u64 end)
212
{
213
unsigned char *data = map->base + page_size;
214
union perf_event *event = NULL;
215
int diff = end - *startp;
216
217
if (diff >= (int)sizeof(event->header)) {
218
size_t size;
219
220
event = (union perf_event *)&data[*startp & map->mask];
221
size = event->header.size;
222
223
if (size < sizeof(event->header) || diff < (int)size)
224
return NULL;
225
226
/*
227
* Event straddles the mmap boundary -- header should always
228
* be inside due to u64 alignment of output.
229
*/
230
if ((*startp & map->mask) + size != ((*startp + size) & map->mask)) {
231
unsigned int offset = *startp;
232
unsigned int len = size, cpy;
233
void *dst = map->event_copy;
234
235
if (size > map->event_copy_sz) {
236
dst = realloc(map->event_copy, size);
237
if (!dst)
238
return NULL;
239
map->event_copy = dst;
240
map->event_copy_sz = size;
241
}
242
243
do {
244
cpy = min(map->mask + 1 - (offset & map->mask), len);
245
memcpy(dst, &data[offset & map->mask], cpy);
246
offset += cpy;
247
dst += cpy;
248
len -= cpy;
249
} while (len);
250
251
event = (union perf_event *)map->event_copy;
252
}
253
254
*startp += size;
255
}
256
257
return event;
258
}
259
260
/*
261
* Read event from ring buffer one by one.
262
* Return one event for each call.
263
*
264
* Usage:
265
* perf_mmap__read_init()
266
* while(event = perf_mmap__read_event()) {
267
* //process the event
268
* perf_mmap__consume()
269
* }
270
* perf_mmap__read_done()
271
*/
272
union perf_event *perf_mmap__read_event(struct perf_mmap *map)
273
{
274
union perf_event *event;
275
276
/*
277
* Check if event was unmapped due to a POLLHUP/POLLERR.
278
*/
279
if (!refcount_read(&map->refcnt))
280
return NULL;
281
282
/* non-overwrite doesn't pause the ringbuffer */
283
if (!map->overwrite)
284
map->end = perf_mmap__read_head(map);
285
286
event = perf_mmap__read(map, &map->start, map->end);
287
288
if (!map->overwrite)
289
map->prev = map->start;
290
291
return event;
292
}
293
294
#if defined(__i386__) || defined(__x86_64__)
295
static u64 read_perf_counter(unsigned int counter)
296
{
297
unsigned int low, high;
298
299
asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter));
300
301
return low | ((u64)high) << 32;
302
}
303
304
static u64 read_timestamp(void)
305
{
306
unsigned int low, high;
307
308
asm volatile("rdtsc" : "=a" (low), "=d" (high));
309
310
return low | ((u64)high) << 32;
311
}
312
#elif defined(__aarch64__)
313
#define read_sysreg(r) ({ \
314
u64 __val; \
315
asm volatile("mrs %0, " __stringify(r) : "=r" (__val)); \
316
__val; \
317
})
318
319
static u64 read_pmccntr(void)
320
{
321
return read_sysreg(pmccntr_el0);
322
}
323
324
#define PMEVCNTR_READ(idx) \
325
static u64 read_pmevcntr_##idx(void) { \
326
return read_sysreg(pmevcntr##idx##_el0); \
327
}
328
329
PMEVCNTR_READ(0);
330
PMEVCNTR_READ(1);
331
PMEVCNTR_READ(2);
332
PMEVCNTR_READ(3);
333
PMEVCNTR_READ(4);
334
PMEVCNTR_READ(5);
335
PMEVCNTR_READ(6);
336
PMEVCNTR_READ(7);
337
PMEVCNTR_READ(8);
338
PMEVCNTR_READ(9);
339
PMEVCNTR_READ(10);
340
PMEVCNTR_READ(11);
341
PMEVCNTR_READ(12);
342
PMEVCNTR_READ(13);
343
PMEVCNTR_READ(14);
344
PMEVCNTR_READ(15);
345
PMEVCNTR_READ(16);
346
PMEVCNTR_READ(17);
347
PMEVCNTR_READ(18);
348
PMEVCNTR_READ(19);
349
PMEVCNTR_READ(20);
350
PMEVCNTR_READ(21);
351
PMEVCNTR_READ(22);
352
PMEVCNTR_READ(23);
353
PMEVCNTR_READ(24);
354
PMEVCNTR_READ(25);
355
PMEVCNTR_READ(26);
356
PMEVCNTR_READ(27);
357
PMEVCNTR_READ(28);
358
PMEVCNTR_READ(29);
359
PMEVCNTR_READ(30);
360
361
/*
362
* Read a value direct from PMEVCNTR<idx>
363
*/
364
static u64 read_perf_counter(unsigned int counter)
365
{
366
static u64 (* const read_f[])(void) = {
367
read_pmevcntr_0,
368
read_pmevcntr_1,
369
read_pmevcntr_2,
370
read_pmevcntr_3,
371
read_pmevcntr_4,
372
read_pmevcntr_5,
373
read_pmevcntr_6,
374
read_pmevcntr_7,
375
read_pmevcntr_8,
376
read_pmevcntr_9,
377
read_pmevcntr_10,
378
read_pmevcntr_11,
379
read_pmevcntr_13,
380
read_pmevcntr_12,
381
read_pmevcntr_14,
382
read_pmevcntr_15,
383
read_pmevcntr_16,
384
read_pmevcntr_17,
385
read_pmevcntr_18,
386
read_pmevcntr_19,
387
read_pmevcntr_20,
388
read_pmevcntr_21,
389
read_pmevcntr_22,
390
read_pmevcntr_23,
391
read_pmevcntr_24,
392
read_pmevcntr_25,
393
read_pmevcntr_26,
394
read_pmevcntr_27,
395
read_pmevcntr_28,
396
read_pmevcntr_29,
397
read_pmevcntr_30,
398
read_pmccntr
399
};
400
401
if (counter < ARRAY_SIZE(read_f))
402
return (read_f[counter])();
403
404
return 0;
405
}
406
407
static u64 read_timestamp(void) { return read_sysreg(cntvct_el0); }
408
409
/* __riscv_xlen contains the witdh of the native base integer, here 64-bit */
410
#elif defined(__riscv) && __riscv_xlen == 64
411
412
/* TODO: implement rv32 support */
413
414
#define CSR_CYCLE 0xc00
415
#define CSR_TIME 0xc01
416
417
#define csr_read(csr) \
418
({ \
419
register unsigned long __v; \
420
__asm__ __volatile__ ("csrr %0, %1" \
421
: "=r" (__v) \
422
: "i" (csr) : ); \
423
__v; \
424
})
425
426
static unsigned long csr_read_num(int csr_num)
427
{
428
#define switchcase_csr_read(__csr_num, __val) {\
429
case __csr_num: \
430
__val = csr_read(__csr_num); \
431
break; }
432
#define switchcase_csr_read_2(__csr_num, __val) {\
433
switchcase_csr_read(__csr_num + 0, __val) \
434
switchcase_csr_read(__csr_num + 1, __val)}
435
#define switchcase_csr_read_4(__csr_num, __val) {\
436
switchcase_csr_read_2(__csr_num + 0, __val) \
437
switchcase_csr_read_2(__csr_num + 2, __val)}
438
#define switchcase_csr_read_8(__csr_num, __val) {\
439
switchcase_csr_read_4(__csr_num + 0, __val) \
440
switchcase_csr_read_4(__csr_num + 4, __val)}
441
#define switchcase_csr_read_16(__csr_num, __val) {\
442
switchcase_csr_read_8(__csr_num + 0, __val) \
443
switchcase_csr_read_8(__csr_num + 8, __val)}
444
#define switchcase_csr_read_32(__csr_num, __val) {\
445
switchcase_csr_read_16(__csr_num + 0, __val) \
446
switchcase_csr_read_16(__csr_num + 16, __val)}
447
448
unsigned long ret = 0;
449
450
switch (csr_num) {
451
switchcase_csr_read_32(CSR_CYCLE, ret)
452
default:
453
break;
454
}
455
456
return ret;
457
#undef switchcase_csr_read_32
458
#undef switchcase_csr_read_16
459
#undef switchcase_csr_read_8
460
#undef switchcase_csr_read_4
461
#undef switchcase_csr_read_2
462
#undef switchcase_csr_read
463
}
464
465
static u64 read_perf_counter(unsigned int counter)
466
{
467
return csr_read_num(CSR_CYCLE + counter);
468
}
469
470
static u64 read_timestamp(void)
471
{
472
return csr_read_num(CSR_TIME);
473
}
474
475
#else
476
static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; }
477
static u64 read_timestamp(void) { return 0; }
478
#endif
479
480
int perf_mmap__read_self(struct perf_mmap *map, struct perf_counts_values *count)
481
{
482
struct perf_event_mmap_page *pc = map->base;
483
u32 seq, idx, time_mult = 0, time_shift = 0;
484
u64 cnt, cyc = 0, time_offset = 0, time_cycles = 0, time_mask = ~0ULL;
485
486
if (!pc || !pc->cap_user_rdpmc)
487
return -1;
488
489
do {
490
seq = READ_ONCE(pc->lock);
491
barrier();
492
493
count->ena = READ_ONCE(pc->time_enabled);
494
count->run = READ_ONCE(pc->time_running);
495
496
if (pc->cap_user_time && count->ena != count->run) {
497
cyc = read_timestamp();
498
time_mult = READ_ONCE(pc->time_mult);
499
time_shift = READ_ONCE(pc->time_shift);
500
time_offset = READ_ONCE(pc->time_offset);
501
502
if (pc->cap_user_time_short) {
503
time_cycles = READ_ONCE(pc->time_cycles);
504
time_mask = READ_ONCE(pc->time_mask);
505
}
506
}
507
508
idx = READ_ONCE(pc->index);
509
cnt = READ_ONCE(pc->offset);
510
if (pc->cap_user_rdpmc && idx) {
511
s64 evcnt = read_perf_counter(idx - 1);
512
u16 width = READ_ONCE(pc->pmc_width);
513
514
evcnt <<= 64 - width;
515
evcnt >>= 64 - width;
516
cnt += evcnt;
517
} else
518
return -1;
519
520
barrier();
521
} while (READ_ONCE(pc->lock) != seq);
522
523
if (count->ena != count->run) {
524
u64 delta;
525
526
/* Adjust for cap_usr_time_short, a nop if not */
527
cyc = time_cycles + ((cyc - time_cycles) & time_mask);
528
529
delta = time_offset + mul_u64_u32_shr(cyc, time_mult, time_shift);
530
531
count->ena += delta;
532
if (idx)
533
count->run += delta;
534
}
535
536
count->val = cnt;
537
538
return 0;
539
}
540
541