Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/riscv/kvm/vcpu_pmu.c
26442 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* Copyright (c) 2023 Rivos Inc
4
*
5
* Authors:
6
* Atish Patra <[email protected]>
7
*/
8
9
#define pr_fmt(fmt) "riscv-kvm-pmu: " fmt
10
#include <linux/errno.h>
11
#include <linux/err.h>
12
#include <linux/kvm_host.h>
13
#include <linux/perf/riscv_pmu.h>
14
#include <asm/csr.h>
15
#include <asm/kvm_vcpu_sbi.h>
16
#include <asm/kvm_vcpu_pmu.h>
17
#include <asm/sbi.h>
18
#include <linux/bitops.h>
19
20
#define kvm_pmu_num_counters(pmu) ((pmu)->num_hw_ctrs + (pmu)->num_fw_ctrs)
21
#define get_event_type(x) (((x) & SBI_PMU_EVENT_IDX_TYPE_MASK) >> 16)
22
#define get_event_code(x) ((x) & SBI_PMU_EVENT_IDX_CODE_MASK)
23
24
static enum perf_hw_id hw_event_perf_map[SBI_PMU_HW_GENERAL_MAX] = {
25
[SBI_PMU_HW_CPU_CYCLES] = PERF_COUNT_HW_CPU_CYCLES,
26
[SBI_PMU_HW_INSTRUCTIONS] = PERF_COUNT_HW_INSTRUCTIONS,
27
[SBI_PMU_HW_CACHE_REFERENCES] = PERF_COUNT_HW_CACHE_REFERENCES,
28
[SBI_PMU_HW_CACHE_MISSES] = PERF_COUNT_HW_CACHE_MISSES,
29
[SBI_PMU_HW_BRANCH_INSTRUCTIONS] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
30
[SBI_PMU_HW_BRANCH_MISSES] = PERF_COUNT_HW_BRANCH_MISSES,
31
[SBI_PMU_HW_BUS_CYCLES] = PERF_COUNT_HW_BUS_CYCLES,
32
[SBI_PMU_HW_STALLED_CYCLES_FRONTEND] = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND,
33
[SBI_PMU_HW_STALLED_CYCLES_BACKEND] = PERF_COUNT_HW_STALLED_CYCLES_BACKEND,
34
[SBI_PMU_HW_REF_CPU_CYCLES] = PERF_COUNT_HW_REF_CPU_CYCLES,
35
};
36
37
static u64 kvm_pmu_get_sample_period(struct kvm_pmc *pmc)
38
{
39
u64 counter_val_mask = GENMASK(pmc->cinfo.width, 0);
40
u64 sample_period;
41
42
if (!pmc->counter_val)
43
sample_period = counter_val_mask;
44
else
45
sample_period = (-pmc->counter_val) & counter_val_mask;
46
47
return sample_period;
48
}
49
50
static u32 kvm_pmu_get_perf_event_type(unsigned long eidx)
51
{
52
enum sbi_pmu_event_type etype = get_event_type(eidx);
53
u32 type = PERF_TYPE_MAX;
54
55
switch (etype) {
56
case SBI_PMU_EVENT_TYPE_HW:
57
type = PERF_TYPE_HARDWARE;
58
break;
59
case SBI_PMU_EVENT_TYPE_CACHE:
60
type = PERF_TYPE_HW_CACHE;
61
break;
62
case SBI_PMU_EVENT_TYPE_RAW:
63
case SBI_PMU_EVENT_TYPE_FW:
64
type = PERF_TYPE_RAW;
65
break;
66
default:
67
break;
68
}
69
70
return type;
71
}
72
73
static bool kvm_pmu_is_fw_event(unsigned long eidx)
74
{
75
return get_event_type(eidx) == SBI_PMU_EVENT_TYPE_FW;
76
}
77
78
static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
79
{
80
if (pmc->perf_event) {
81
perf_event_disable(pmc->perf_event);
82
perf_event_release_kernel(pmc->perf_event);
83
pmc->perf_event = NULL;
84
}
85
}
86
87
static u64 kvm_pmu_get_perf_event_hw_config(u32 sbi_event_code)
88
{
89
return hw_event_perf_map[sbi_event_code];
90
}
91
92
static u64 kvm_pmu_get_perf_event_cache_config(u32 sbi_event_code)
93
{
94
u64 config = U64_MAX;
95
unsigned int cache_type, cache_op, cache_result;
96
97
/* All the cache event masks lie within 0xFF. No separate masking is necessary */
98
cache_type = (sbi_event_code & SBI_PMU_EVENT_CACHE_ID_CODE_MASK) >>
99
SBI_PMU_EVENT_CACHE_ID_SHIFT;
100
cache_op = (sbi_event_code & SBI_PMU_EVENT_CACHE_OP_ID_CODE_MASK) >>
101
SBI_PMU_EVENT_CACHE_OP_SHIFT;
102
cache_result = sbi_event_code & SBI_PMU_EVENT_CACHE_RESULT_ID_CODE_MASK;
103
104
if (cache_type >= PERF_COUNT_HW_CACHE_MAX ||
105
cache_op >= PERF_COUNT_HW_CACHE_OP_MAX ||
106
cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
107
return config;
108
109
config = cache_type | (cache_op << 8) | (cache_result << 16);
110
111
return config;
112
}
113
114
static u64 kvm_pmu_get_perf_event_config(unsigned long eidx, uint64_t evt_data)
115
{
116
enum sbi_pmu_event_type etype = get_event_type(eidx);
117
u32 ecode = get_event_code(eidx);
118
u64 config = U64_MAX;
119
120
switch (etype) {
121
case SBI_PMU_EVENT_TYPE_HW:
122
if (ecode < SBI_PMU_HW_GENERAL_MAX)
123
config = kvm_pmu_get_perf_event_hw_config(ecode);
124
break;
125
case SBI_PMU_EVENT_TYPE_CACHE:
126
config = kvm_pmu_get_perf_event_cache_config(ecode);
127
break;
128
case SBI_PMU_EVENT_TYPE_RAW:
129
config = evt_data & RISCV_PMU_RAW_EVENT_MASK;
130
break;
131
case SBI_PMU_EVENT_TYPE_FW:
132
if (ecode < SBI_PMU_FW_MAX)
133
config = (1ULL << 63) | ecode;
134
break;
135
default:
136
break;
137
}
138
139
return config;
140
}
141
142
static int kvm_pmu_get_fixed_pmc_index(unsigned long eidx)
143
{
144
u32 etype = kvm_pmu_get_perf_event_type(eidx);
145
u32 ecode = get_event_code(eidx);
146
147
if (etype != SBI_PMU_EVENT_TYPE_HW)
148
return -EINVAL;
149
150
if (ecode == SBI_PMU_HW_CPU_CYCLES)
151
return 0;
152
else if (ecode == SBI_PMU_HW_INSTRUCTIONS)
153
return 2;
154
else
155
return -EINVAL;
156
}
157
158
static int kvm_pmu_get_programmable_pmc_index(struct kvm_pmu *kvpmu, unsigned long eidx,
159
unsigned long cbase, unsigned long cmask)
160
{
161
int ctr_idx = -1;
162
int i, pmc_idx;
163
int min, max;
164
165
if (kvm_pmu_is_fw_event(eidx)) {
166
/* Firmware counters are mapped 1:1 starting from num_hw_ctrs for simplicity */
167
min = kvpmu->num_hw_ctrs;
168
max = min + kvpmu->num_fw_ctrs;
169
} else {
170
/* First 3 counters are reserved for fixed counters */
171
min = 3;
172
max = kvpmu->num_hw_ctrs;
173
}
174
175
for_each_set_bit(i, &cmask, BITS_PER_LONG) {
176
pmc_idx = i + cbase;
177
if ((pmc_idx >= min && pmc_idx < max) &&
178
!test_bit(pmc_idx, kvpmu->pmc_in_use)) {
179
ctr_idx = pmc_idx;
180
break;
181
}
182
}
183
184
return ctr_idx;
185
}
186
187
static int pmu_get_pmc_index(struct kvm_pmu *pmu, unsigned long eidx,
188
unsigned long cbase, unsigned long cmask)
189
{
190
int ret;
191
192
/* Fixed counters need to be have fixed mapping as they have different width */
193
ret = kvm_pmu_get_fixed_pmc_index(eidx);
194
if (ret >= 0)
195
return ret;
196
197
return kvm_pmu_get_programmable_pmc_index(pmu, eidx, cbase, cmask);
198
}
199
200
static int pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx,
201
unsigned long *out_val)
202
{
203
struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
204
struct kvm_pmc *pmc;
205
int fevent_code;
206
207
if (!IS_ENABLED(CONFIG_32BIT)) {
208
pr_warn("%s: should be invoked for only RV32\n", __func__);
209
return -EINVAL;
210
}
211
212
if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) {
213
pr_warn("Invalid counter id [%ld]during read\n", cidx);
214
return -EINVAL;
215
}
216
217
pmc = &kvpmu->pmc[cidx];
218
219
if (pmc->cinfo.type != SBI_PMU_CTR_TYPE_FW)
220
return -EINVAL;
221
222
fevent_code = get_event_code(pmc->event_idx);
223
pmc->counter_val = kvpmu->fw_event[fevent_code].value;
224
225
*out_val = pmc->counter_val >> 32;
226
227
return 0;
228
}
229
230
static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
231
unsigned long *out_val)
232
{
233
struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
234
struct kvm_pmc *pmc;
235
u64 enabled, running;
236
int fevent_code;
237
238
if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) {
239
pr_warn("Invalid counter id [%ld] during read\n", cidx);
240
return -EINVAL;
241
}
242
243
pmc = &kvpmu->pmc[cidx];
244
245
if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
246
fevent_code = get_event_code(pmc->event_idx);
247
pmc->counter_val = kvpmu->fw_event[fevent_code].value;
248
} else if (pmc->perf_event) {
249
pmc->counter_val += perf_event_read_value(pmc->perf_event, &enabled, &running);
250
} else {
251
return -EINVAL;
252
}
253
*out_val = pmc->counter_val;
254
255
return 0;
256
}
257
258
static int kvm_pmu_validate_counter_mask(struct kvm_pmu *kvpmu, unsigned long ctr_base,
259
unsigned long ctr_mask)
260
{
261
/* Make sure the we have a valid counter mask requested from the caller */
262
if (!ctr_mask || (ctr_base + __fls(ctr_mask) >= kvm_pmu_num_counters(kvpmu)))
263
return -EINVAL;
264
265
return 0;
266
}
267
268
static void kvm_riscv_pmu_overflow(struct perf_event *perf_event,
269
struct perf_sample_data *data,
270
struct pt_regs *regs)
271
{
272
struct kvm_pmc *pmc = perf_event->overflow_handler_context;
273
struct kvm_vcpu *vcpu = pmc->vcpu;
274
struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
275
struct riscv_pmu *rpmu = to_riscv_pmu(perf_event->pmu);
276
u64 period;
277
278
/*
279
* Stop the event counting by directly accessing the perf_event.
280
* Otherwise, this needs to deferred via a workqueue.
281
* That will introduce skew in the counter value because the actual
282
* physical counter would start after returning from this function.
283
* It will be stopped again once the workqueue is scheduled
284
*/
285
rpmu->pmu.stop(perf_event, PERF_EF_UPDATE);
286
287
/*
288
* The hw counter would start automatically when this function returns.
289
* Thus, the host may continue to interrupt and inject it to the guest
290
* even without the guest configuring the next event. Depending on the hardware
291
* the host may have some sluggishness only if privilege mode filtering is not
292
* available. In an ideal world, where qemu is not the only capable hardware,
293
* this can be removed.
294
* FYI: ARM64 does this way while x86 doesn't do anything as such.
295
* TODO: Should we keep it for RISC-V ?
296
*/
297
period = -(local64_read(&perf_event->count));
298
299
local64_set(&perf_event->hw.period_left, 0);
300
perf_event->attr.sample_period = period;
301
perf_event->hw.sample_period = period;
302
303
set_bit(pmc->idx, kvpmu->pmc_overflown);
304
kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_PMU_OVF);
305
306
rpmu->pmu.start(perf_event, PERF_EF_RELOAD);
307
}
308
309
static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr *attr,
310
unsigned long flags, unsigned long eidx,
311
unsigned long evtdata)
312
{
313
struct perf_event *event;
314
315
kvm_pmu_release_perf_event(pmc);
316
attr->config = kvm_pmu_get_perf_event_config(eidx, evtdata);
317
if (flags & SBI_PMU_CFG_FLAG_CLEAR_VALUE) {
318
//TODO: Do we really want to clear the value in hardware counter
319
pmc->counter_val = 0;
320
}
321
322
/*
323
* Set the default sample_period for now. The guest specified value
324
* will be updated in the start call.
325
*/
326
attr->sample_period = kvm_pmu_get_sample_period(pmc);
327
328
event = perf_event_create_kernel_counter(attr, -1, current, kvm_riscv_pmu_overflow, pmc);
329
if (IS_ERR(event)) {
330
pr_debug("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event));
331
return PTR_ERR(event);
332
}
333
334
pmc->perf_event = event;
335
if (flags & SBI_PMU_CFG_FLAG_AUTO_START)
336
perf_event_enable(pmc->perf_event);
337
338
return 0;
339
}
340
341
int kvm_riscv_vcpu_pmu_incr_fw(struct kvm_vcpu *vcpu, unsigned long fid)
342
{
343
struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
344
struct kvm_fw_event *fevent;
345
346
if (!kvpmu || fid >= SBI_PMU_FW_MAX)
347
return -EINVAL;
348
349
fevent = &kvpmu->fw_event[fid];
350
if (fevent->started)
351
fevent->value++;
352
353
return 0;
354
}
355
356
int kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu *vcpu, unsigned int csr_num,
357
unsigned long *val, unsigned long new_val,
358
unsigned long wr_mask)
359
{
360
struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
361
int cidx, ret = KVM_INSN_CONTINUE_NEXT_SEPC;
362
363
if (!kvpmu || !kvpmu->init_done) {
364
/*
365
* In absence of sscofpmf in the platform, the guest OS may use
366
* the legacy PMU driver to read cycle/instret. In that case,
367
* just return 0 to avoid any illegal trap. However, any other
368
* hpmcounter access should result in illegal trap as they must
369
* be access through SBI PMU only.
370
*/
371
if (csr_num == CSR_CYCLE || csr_num == CSR_INSTRET) {
372
*val = 0;
373
return ret;
374
} else {
375
return KVM_INSN_ILLEGAL_TRAP;
376
}
377
}
378
379
/* The counter CSR are read only. Thus, any write should result in illegal traps */
380
if (wr_mask)
381
return KVM_INSN_ILLEGAL_TRAP;
382
383
cidx = csr_num - CSR_CYCLE;
384
385
if (pmu_ctr_read(vcpu, cidx, val) < 0)
386
return KVM_INSN_ILLEGAL_TRAP;
387
388
return ret;
389
}
390
391
static void kvm_pmu_clear_snapshot_area(struct kvm_vcpu *vcpu)
392
{
393
struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
394
395
kfree(kvpmu->sdata);
396
kvpmu->sdata = NULL;
397
kvpmu->snapshot_addr = INVALID_GPA;
398
}
399
400
int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low,
401
unsigned long saddr_high, unsigned long flags,
402
struct kvm_vcpu_sbi_return *retdata)
403
{
404
struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
405
int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data);
406
int sbiret = 0;
407
gpa_t saddr;
408
unsigned long hva;
409
bool writable;
410
411
if (!kvpmu || flags) {
412
sbiret = SBI_ERR_INVALID_PARAM;
413
goto out;
414
}
415
416
if (saddr_low == SBI_SHMEM_DISABLE && saddr_high == SBI_SHMEM_DISABLE) {
417
kvm_pmu_clear_snapshot_area(vcpu);
418
return 0;
419
}
420
421
saddr = saddr_low;
422
423
if (saddr_high != 0) {
424
if (IS_ENABLED(CONFIG_32BIT))
425
saddr |= ((gpa_t)saddr_high << 32);
426
else
427
sbiret = SBI_ERR_INVALID_ADDRESS;
428
goto out;
429
}
430
431
hva = kvm_vcpu_gfn_to_hva_prot(vcpu, saddr >> PAGE_SHIFT, &writable);
432
if (kvm_is_error_hva(hva) || !writable) {
433
sbiret = SBI_ERR_INVALID_ADDRESS;
434
goto out;
435
}
436
437
kvpmu->sdata = kzalloc(snapshot_area_size, GFP_ATOMIC);
438
if (!kvpmu->sdata)
439
return -ENOMEM;
440
441
if (kvm_vcpu_write_guest(vcpu, saddr, kvpmu->sdata, snapshot_area_size)) {
442
kfree(kvpmu->sdata);
443
sbiret = SBI_ERR_FAILURE;
444
goto out;
445
}
446
447
kvpmu->snapshot_addr = saddr;
448
449
out:
450
retdata->err_val = sbiret;
451
452
return 0;
453
}
454
455
int kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu *vcpu,
456
struct kvm_vcpu_sbi_return *retdata)
457
{
458
struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
459
460
retdata->out_val = kvm_pmu_num_counters(kvpmu);
461
462
return 0;
463
}
464
465
int kvm_riscv_vcpu_pmu_ctr_info(struct kvm_vcpu *vcpu, unsigned long cidx,
466
struct kvm_vcpu_sbi_return *retdata)
467
{
468
struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
469
470
if (cidx > RISCV_KVM_MAX_COUNTERS || cidx == 1) {
471
retdata->err_val = SBI_ERR_INVALID_PARAM;
472
return 0;
473
}
474
475
retdata->out_val = kvpmu->pmc[cidx].cinfo.value;
476
477
return 0;
478
}
479
480
int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base,
481
unsigned long ctr_mask, unsigned long flags, u64 ival,
482
struct kvm_vcpu_sbi_return *retdata)
483
{
484
struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
485
int i, pmc_index, sbiret = 0;
486
struct kvm_pmc *pmc;
487
int fevent_code;
488
bool snap_flag_set = flags & SBI_PMU_START_FLAG_INIT_SNAPSHOT;
489
490
if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
491
sbiret = SBI_ERR_INVALID_PARAM;
492
goto out;
493
}
494
495
if (snap_flag_set) {
496
if (kvpmu->snapshot_addr == INVALID_GPA) {
497
sbiret = SBI_ERR_NO_SHMEM;
498
goto out;
499
}
500
if (kvm_vcpu_read_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata,
501
sizeof(struct riscv_pmu_snapshot_data))) {
502
pr_warn("Unable to read snapshot shared memory while starting counters\n");
503
sbiret = SBI_ERR_FAILURE;
504
goto out;
505
}
506
}
507
/* Start the counters that have been configured and requested by the guest */
508
for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) {
509
pmc_index = i + ctr_base;
510
if (!test_bit(pmc_index, kvpmu->pmc_in_use))
511
continue;
512
/* The guest started the counter again. Reset the overflow status */
513
clear_bit(pmc_index, kvpmu->pmc_overflown);
514
pmc = &kvpmu->pmc[pmc_index];
515
if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE) {
516
pmc->counter_val = ival;
517
} else if (snap_flag_set) {
518
/* The counter index in the snapshot are relative to the counter base */
519
pmc->counter_val = kvpmu->sdata->ctr_values[i];
520
}
521
522
if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
523
fevent_code = get_event_code(pmc->event_idx);
524
if (fevent_code >= SBI_PMU_FW_MAX) {
525
sbiret = SBI_ERR_INVALID_PARAM;
526
goto out;
527
}
528
529
/* Check if the counter was already started for some reason */
530
if (kvpmu->fw_event[fevent_code].started) {
531
sbiret = SBI_ERR_ALREADY_STARTED;
532
continue;
533
}
534
535
kvpmu->fw_event[fevent_code].started = true;
536
kvpmu->fw_event[fevent_code].value = pmc->counter_val;
537
} else if (pmc->perf_event) {
538
if (unlikely(pmc->started)) {
539
sbiret = SBI_ERR_ALREADY_STARTED;
540
continue;
541
}
542
perf_event_period(pmc->perf_event, kvm_pmu_get_sample_period(pmc));
543
perf_event_enable(pmc->perf_event);
544
pmc->started = true;
545
} else {
546
sbiret = SBI_ERR_INVALID_PARAM;
547
}
548
}
549
550
out:
551
retdata->err_val = sbiret;
552
553
return 0;
554
}
555
556
int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base,
557
unsigned long ctr_mask, unsigned long flags,
558
struct kvm_vcpu_sbi_return *retdata)
559
{
560
struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
561
int i, pmc_index, sbiret = 0;
562
u64 enabled, running;
563
struct kvm_pmc *pmc;
564
int fevent_code;
565
bool snap_flag_set = flags & SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT;
566
bool shmem_needs_update = false;
567
568
if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
569
sbiret = SBI_ERR_INVALID_PARAM;
570
goto out;
571
}
572
573
if (snap_flag_set && kvpmu->snapshot_addr == INVALID_GPA) {
574
sbiret = SBI_ERR_NO_SHMEM;
575
goto out;
576
}
577
578
/* Stop the counters that have been configured and requested by the guest */
579
for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) {
580
pmc_index = i + ctr_base;
581
if (!test_bit(pmc_index, kvpmu->pmc_in_use))
582
continue;
583
pmc = &kvpmu->pmc[pmc_index];
584
if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
585
fevent_code = get_event_code(pmc->event_idx);
586
if (fevent_code >= SBI_PMU_FW_MAX) {
587
sbiret = SBI_ERR_INVALID_PARAM;
588
goto out;
589
}
590
591
if (!kvpmu->fw_event[fevent_code].started)
592
sbiret = SBI_ERR_ALREADY_STOPPED;
593
594
kvpmu->fw_event[fevent_code].started = false;
595
} else if (pmc->perf_event) {
596
if (pmc->started) {
597
/* Stop counting the counter */
598
perf_event_disable(pmc->perf_event);
599
pmc->started = false;
600
} else {
601
sbiret = SBI_ERR_ALREADY_STOPPED;
602
}
603
604
if (flags & SBI_PMU_STOP_FLAG_RESET)
605
/* Release the counter if this is a reset request */
606
kvm_pmu_release_perf_event(pmc);
607
} else {
608
sbiret = SBI_ERR_INVALID_PARAM;
609
}
610
611
if (snap_flag_set && !sbiret) {
612
if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW)
613
pmc->counter_val = kvpmu->fw_event[fevent_code].value;
614
else if (pmc->perf_event)
615
pmc->counter_val += perf_event_read_value(pmc->perf_event,
616
&enabled, &running);
617
/*
618
* The counter and overflow indicies in the snapshot region are w.r.to
619
* cbase. Modify the set bit in the counter mask instead of the pmc_index
620
* which indicates the absolute counter index.
621
*/
622
if (test_bit(pmc_index, kvpmu->pmc_overflown))
623
kvpmu->sdata->ctr_overflow_mask |= BIT(i);
624
kvpmu->sdata->ctr_values[i] = pmc->counter_val;
625
shmem_needs_update = true;
626
}
627
628
if (flags & SBI_PMU_STOP_FLAG_RESET) {
629
pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
630
clear_bit(pmc_index, kvpmu->pmc_in_use);
631
clear_bit(pmc_index, kvpmu->pmc_overflown);
632
if (snap_flag_set) {
633
/*
634
* Only clear the given counter as the caller is responsible to
635
* validate both the overflow mask and configured counters.
636
*/
637
kvpmu->sdata->ctr_overflow_mask &= ~BIT(i);
638
shmem_needs_update = true;
639
}
640
}
641
}
642
643
if (shmem_needs_update)
644
kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata,
645
sizeof(struct riscv_pmu_snapshot_data));
646
647
out:
648
retdata->err_val = sbiret;
649
650
return 0;
651
}
652
653
int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_base,
654
unsigned long ctr_mask, unsigned long flags,
655
unsigned long eidx, u64 evtdata,
656
struct kvm_vcpu_sbi_return *retdata)
657
{
658
int ctr_idx, sbiret = 0;
659
long ret;
660
bool is_fevent;
661
unsigned long event_code;
662
u32 etype = kvm_pmu_get_perf_event_type(eidx);
663
struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
664
struct kvm_pmc *pmc = NULL;
665
struct perf_event_attr attr = {
666
.type = etype,
667
.size = sizeof(struct perf_event_attr),
668
.pinned = true,
669
.disabled = true,
670
/*
671
* It should never reach here if the platform doesn't support the sscofpmf
672
* extension as mode filtering won't work without it.
673
*/
674
.exclude_host = true,
675
.exclude_hv = true,
676
.exclude_user = !!(flags & SBI_PMU_CFG_FLAG_SET_UINH),
677
.exclude_kernel = !!(flags & SBI_PMU_CFG_FLAG_SET_SINH),
678
.config1 = RISCV_PMU_CONFIG1_GUEST_EVENTS,
679
};
680
681
if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
682
sbiret = SBI_ERR_INVALID_PARAM;
683
goto out;
684
}
685
686
event_code = get_event_code(eidx);
687
is_fevent = kvm_pmu_is_fw_event(eidx);
688
if (is_fevent && event_code >= SBI_PMU_FW_MAX) {
689
sbiret = SBI_ERR_NOT_SUPPORTED;
690
goto out;
691
}
692
693
/*
694
* SKIP_MATCH flag indicates the caller is aware of the assigned counter
695
* for this event. Just do a sanity check if it already marked used.
696
*/
697
if (flags & SBI_PMU_CFG_FLAG_SKIP_MATCH) {
698
if (!test_bit(ctr_base + __ffs(ctr_mask), kvpmu->pmc_in_use)) {
699
sbiret = SBI_ERR_FAILURE;
700
goto out;
701
}
702
ctr_idx = ctr_base + __ffs(ctr_mask);
703
} else {
704
ctr_idx = pmu_get_pmc_index(kvpmu, eidx, ctr_base, ctr_mask);
705
if (ctr_idx < 0) {
706
sbiret = SBI_ERR_NOT_SUPPORTED;
707
goto out;
708
}
709
}
710
711
pmc = &kvpmu->pmc[ctr_idx];
712
pmc->idx = ctr_idx;
713
714
if (is_fevent) {
715
if (flags & SBI_PMU_CFG_FLAG_AUTO_START)
716
kvpmu->fw_event[event_code].started = true;
717
} else {
718
ret = kvm_pmu_create_perf_event(pmc, &attr, flags, eidx, evtdata);
719
if (ret) {
720
sbiret = SBI_ERR_NOT_SUPPORTED;
721
goto out;
722
}
723
}
724
725
set_bit(ctr_idx, kvpmu->pmc_in_use);
726
pmc->event_idx = eidx;
727
retdata->out_val = ctr_idx;
728
out:
729
retdata->err_val = sbiret;
730
731
return 0;
732
}
733
734
int kvm_riscv_vcpu_pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx,
735
struct kvm_vcpu_sbi_return *retdata)
736
{
737
int ret;
738
739
ret = pmu_fw_ctr_read_hi(vcpu, cidx, &retdata->out_val);
740
if (ret == -EINVAL)
741
retdata->err_val = SBI_ERR_INVALID_PARAM;
742
743
return 0;
744
}
745
746
int kvm_riscv_vcpu_pmu_fw_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
747
struct kvm_vcpu_sbi_return *retdata)
748
{
749
int ret;
750
751
ret = pmu_ctr_read(vcpu, cidx, &retdata->out_val);
752
if (ret == -EINVAL)
753
retdata->err_val = SBI_ERR_INVALID_PARAM;
754
755
return 0;
756
}
757
758
void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
759
{
760
int i = 0, ret, num_hw_ctrs = 0, hpm_width = 0;
761
struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
762
struct kvm_pmc *pmc;
763
764
/*
765
* PMU functionality should be only available to guests if privilege mode
766
* filtering is available in the host. Otherwise, guest will always count
767
* events while the execution is in hypervisor mode.
768
*/
769
if (!riscv_isa_extension_available(NULL, SSCOFPMF))
770
return;
771
772
ret = riscv_pmu_get_hpm_info(&hpm_width, &num_hw_ctrs);
773
if (ret < 0 || !hpm_width || !num_hw_ctrs)
774
return;
775
776
/*
777
* Increase the number of hardware counters to offset the time counter.
778
*/
779
kvpmu->num_hw_ctrs = num_hw_ctrs + 1;
780
kvpmu->num_fw_ctrs = SBI_PMU_FW_MAX;
781
memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
782
kvpmu->snapshot_addr = INVALID_GPA;
783
784
if (kvpmu->num_hw_ctrs > RISCV_KVM_MAX_HW_CTRS) {
785
pr_warn_once("Limiting the hardware counters to 32 as specified by the ISA");
786
kvpmu->num_hw_ctrs = RISCV_KVM_MAX_HW_CTRS;
787
}
788
789
/*
790
* There is no correlation between the logical hardware counter and virtual counters.
791
* However, we need to encode a hpmcounter CSR in the counter info field so that
792
* KVM can trap n emulate the read. This works well in the migration use case as
793
* KVM doesn't care if the actual hpmcounter is available in the hardware or not.
794
*/
795
for (i = 0; i < kvm_pmu_num_counters(kvpmu); i++) {
796
/* TIME CSR shouldn't be read from perf interface */
797
if (i == 1)
798
continue;
799
pmc = &kvpmu->pmc[i];
800
pmc->idx = i;
801
pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
802
pmc->vcpu = vcpu;
803
if (i < kvpmu->num_hw_ctrs) {
804
pmc->cinfo.type = SBI_PMU_CTR_TYPE_HW;
805
if (i < 3)
806
/* CY, IR counters */
807
pmc->cinfo.width = 63;
808
else
809
pmc->cinfo.width = hpm_width;
810
/*
811
* The CSR number doesn't have any relation with the logical
812
* hardware counters. The CSR numbers are encoded sequentially
813
* to avoid maintaining a map between the virtual counter
814
* and CSR number.
815
*/
816
pmc->cinfo.csr = CSR_CYCLE + i;
817
} else {
818
pmc->cinfo.type = SBI_PMU_CTR_TYPE_FW;
819
pmc->cinfo.width = 63;
820
}
821
}
822
823
kvpmu->init_done = true;
824
}
825
826
void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu)
827
{
828
struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
829
struct kvm_pmc *pmc;
830
int i;
831
832
if (!kvpmu)
833
return;
834
835
for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS) {
836
pmc = &kvpmu->pmc[i];
837
pmc->counter_val = 0;
838
kvm_pmu_release_perf_event(pmc);
839
pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
840
}
841
bitmap_zero(kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS);
842
bitmap_zero(kvpmu->pmc_overflown, RISCV_KVM_MAX_COUNTERS);
843
memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
844
kvm_pmu_clear_snapshot_area(vcpu);
845
}
846
847
void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu)
848
{
849
kvm_riscv_vcpu_pmu_deinit(vcpu);
850
}
851
852