Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/arm64/kvm/arch_timer.c
49452 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* Copyright (C) 2012 ARM Ltd.
4
* Author: Marc Zyngier <[email protected]>
5
*/
6
7
#include <linux/cpu.h>
8
#include <linux/kvm.h>
9
#include <linux/kvm_host.h>
10
#include <linux/interrupt.h>
11
#include <linux/irq.h>
12
#include <linux/irqdomain.h>
13
#include <linux/uaccess.h>
14
15
#include <clocksource/arm_arch_timer.h>
16
#include <asm/arch_timer.h>
17
#include <asm/kvm_emulate.h>
18
#include <asm/kvm_hyp.h>
19
#include <asm/kvm_nested.h>
20
21
#include <kvm/arm_vgic.h>
22
#include <kvm/arm_arch_timer.h>
23
24
#include "trace.h"
25
26
static struct timecounter *timecounter;
27
static unsigned int host_vtimer_irq;
28
static unsigned int host_ptimer_irq;
29
static u32 host_vtimer_irq_flags;
30
static u32 host_ptimer_irq_flags;
31
32
static DEFINE_STATIC_KEY_FALSE(has_gic_active_state);
33
DEFINE_STATIC_KEY_FALSE(broken_cntvoff_key);
34
35
static const u8 default_ppi[] = {
36
[TIMER_PTIMER] = 30,
37
[TIMER_VTIMER] = 27,
38
[TIMER_HPTIMER] = 26,
39
[TIMER_HVTIMER] = 28,
40
};
41
42
static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx);
43
static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
44
struct arch_timer_context *timer_ctx);
45
static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx);
46
static void kvm_arm_timer_write(struct kvm_vcpu *vcpu,
47
struct arch_timer_context *timer,
48
enum kvm_arch_timer_regs treg,
49
u64 val);
50
static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
51
struct arch_timer_context *timer,
52
enum kvm_arch_timer_regs treg);
53
static bool kvm_arch_timer_get_input_level(int vintid);
54
55
static struct irq_ops arch_timer_irq_ops = {
56
.get_input_level = kvm_arch_timer_get_input_level,
57
};
58
59
static int nr_timers(struct kvm_vcpu *vcpu)
60
{
61
if (!vcpu_has_nv(vcpu))
62
return NR_KVM_EL0_TIMERS;
63
64
return NR_KVM_TIMERS;
65
}
66
67
u32 timer_get_ctl(struct arch_timer_context *ctxt)
68
{
69
struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt);
70
71
switch(arch_timer_ctx_index(ctxt)) {
72
case TIMER_VTIMER:
73
return __vcpu_sys_reg(vcpu, CNTV_CTL_EL0);
74
case TIMER_PTIMER:
75
return __vcpu_sys_reg(vcpu, CNTP_CTL_EL0);
76
case TIMER_HVTIMER:
77
return __vcpu_sys_reg(vcpu, CNTHV_CTL_EL2);
78
case TIMER_HPTIMER:
79
return __vcpu_sys_reg(vcpu, CNTHP_CTL_EL2);
80
default:
81
WARN_ON(1);
82
return 0;
83
}
84
}
85
86
u64 timer_get_cval(struct arch_timer_context *ctxt)
87
{
88
struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt);
89
90
switch(arch_timer_ctx_index(ctxt)) {
91
case TIMER_VTIMER:
92
return __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0);
93
case TIMER_PTIMER:
94
return __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0);
95
case TIMER_HVTIMER:
96
return __vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2);
97
case TIMER_HPTIMER:
98
return __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2);
99
default:
100
WARN_ON(1);
101
return 0;
102
}
103
}
104
105
static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl)
106
{
107
struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt);
108
109
switch(arch_timer_ctx_index(ctxt)) {
110
case TIMER_VTIMER:
111
__vcpu_assign_sys_reg(vcpu, CNTV_CTL_EL0, ctl);
112
break;
113
case TIMER_PTIMER:
114
__vcpu_assign_sys_reg(vcpu, CNTP_CTL_EL0, ctl);
115
break;
116
case TIMER_HVTIMER:
117
__vcpu_assign_sys_reg(vcpu, CNTHV_CTL_EL2, ctl);
118
break;
119
case TIMER_HPTIMER:
120
__vcpu_assign_sys_reg(vcpu, CNTHP_CTL_EL2, ctl);
121
break;
122
default:
123
WARN_ON(1);
124
}
125
}
126
127
static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval)
128
{
129
struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt);
130
131
switch(arch_timer_ctx_index(ctxt)) {
132
case TIMER_VTIMER:
133
__vcpu_assign_sys_reg(vcpu, CNTV_CVAL_EL0, cval);
134
break;
135
case TIMER_PTIMER:
136
__vcpu_assign_sys_reg(vcpu, CNTP_CVAL_EL0, cval);
137
break;
138
case TIMER_HVTIMER:
139
__vcpu_assign_sys_reg(vcpu, CNTHV_CVAL_EL2, cval);
140
break;
141
case TIMER_HPTIMER:
142
__vcpu_assign_sys_reg(vcpu, CNTHP_CVAL_EL2, cval);
143
break;
144
default:
145
WARN_ON(1);
146
}
147
}
148
149
u64 kvm_phys_timer_read(void)
150
{
151
return timecounter->cc->read(timecounter->cc);
152
}
153
154
void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map)
155
{
156
if (vcpu_has_nv(vcpu)) {
157
if (is_hyp_ctxt(vcpu)) {
158
map->direct_vtimer = vcpu_hvtimer(vcpu);
159
map->direct_ptimer = vcpu_hptimer(vcpu);
160
map->emul_vtimer = vcpu_vtimer(vcpu);
161
map->emul_ptimer = vcpu_ptimer(vcpu);
162
} else {
163
map->direct_vtimer = vcpu_vtimer(vcpu);
164
map->direct_ptimer = vcpu_ptimer(vcpu);
165
map->emul_vtimer = vcpu_hvtimer(vcpu);
166
map->emul_ptimer = vcpu_hptimer(vcpu);
167
}
168
} else if (has_vhe()) {
169
map->direct_vtimer = vcpu_vtimer(vcpu);
170
map->direct_ptimer = vcpu_ptimer(vcpu);
171
map->emul_vtimer = NULL;
172
map->emul_ptimer = NULL;
173
} else {
174
map->direct_vtimer = vcpu_vtimer(vcpu);
175
map->direct_ptimer = NULL;
176
map->emul_vtimer = NULL;
177
map->emul_ptimer = vcpu_ptimer(vcpu);
178
}
179
180
trace_kvm_get_timer_map(vcpu->vcpu_id, map);
181
}
182
183
static inline bool userspace_irqchip(struct kvm *kvm)
184
{
185
return unlikely(!irqchip_in_kernel(kvm));
186
}
187
188
static void soft_timer_start(struct hrtimer *hrt, u64 ns)
189
{
190
hrtimer_start(hrt, ktime_add_ns(ktime_get(), ns),
191
HRTIMER_MODE_ABS_HARD);
192
}
193
194
static void soft_timer_cancel(struct hrtimer *hrt)
195
{
196
hrtimer_cancel(hrt);
197
}
198
199
static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
200
{
201
struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
202
struct arch_timer_context *ctx;
203
struct timer_map map;
204
205
/*
206
* We may see a timer interrupt after vcpu_put() has been called which
207
* sets the CPU's vcpu pointer to NULL, because even though the timer
208
* has been disabled in timer_save_state(), the hardware interrupt
209
* signal may not have been retired from the interrupt controller yet.
210
*/
211
if (!vcpu)
212
return IRQ_HANDLED;
213
214
get_timer_map(vcpu, &map);
215
216
if (irq == host_vtimer_irq)
217
ctx = map.direct_vtimer;
218
else
219
ctx = map.direct_ptimer;
220
221
if (kvm_timer_should_fire(ctx))
222
kvm_timer_update_irq(vcpu, true, ctx);
223
224
if (userspace_irqchip(vcpu->kvm) &&
225
!static_branch_unlikely(&has_gic_active_state))
226
disable_percpu_irq(host_vtimer_irq);
227
228
return IRQ_HANDLED;
229
}
230
231
static u64 kvm_counter_compute_delta(struct arch_timer_context *timer_ctx,
232
u64 val)
233
{
234
u64 now = kvm_phys_timer_read() - timer_get_offset(timer_ctx);
235
236
if (now < val) {
237
u64 ns;
238
239
ns = cyclecounter_cyc2ns(timecounter->cc,
240
val - now,
241
timecounter->mask,
242
&timer_ctx->ns_frac);
243
return ns;
244
}
245
246
return 0;
247
}
248
249
static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx)
250
{
251
return kvm_counter_compute_delta(timer_ctx, timer_get_cval(timer_ctx));
252
}
253
254
static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx)
255
{
256
WARN_ON(timer_ctx && timer_ctx->loaded);
257
return timer_ctx &&
258
((timer_get_ctl(timer_ctx) &
259
(ARCH_TIMER_CTRL_IT_MASK | ARCH_TIMER_CTRL_ENABLE)) == ARCH_TIMER_CTRL_ENABLE);
260
}
261
262
static bool vcpu_has_wfit_active(struct kvm_vcpu *vcpu)
263
{
264
return (cpus_have_final_cap(ARM64_HAS_WFXT) &&
265
vcpu_get_flag(vcpu, IN_WFIT));
266
}
267
268
static u64 wfit_delay_ns(struct kvm_vcpu *vcpu)
269
{
270
u64 val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu));
271
struct arch_timer_context *ctx;
272
273
ctx = is_hyp_ctxt(vcpu) ? vcpu_hvtimer(vcpu) : vcpu_vtimer(vcpu);
274
275
return kvm_counter_compute_delta(ctx, val);
276
}
277
278
/*
279
* Returns the earliest expiration time in ns among guest timers.
280
* Note that it will return 0 if none of timers can fire.
281
*/
282
static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu)
283
{
284
u64 min_delta = ULLONG_MAX;
285
int i;
286
287
for (i = 0; i < nr_timers(vcpu); i++) {
288
struct arch_timer_context *ctx = &vcpu->arch.timer_cpu.timers[i];
289
290
WARN(ctx->loaded, "timer %d loaded\n", i);
291
if (kvm_timer_irq_can_fire(ctx))
292
min_delta = min(min_delta, kvm_timer_compute_delta(ctx));
293
}
294
295
if (vcpu_has_wfit_active(vcpu))
296
min_delta = min(min_delta, wfit_delay_ns(vcpu));
297
298
/* If none of timers can fire, then return 0 */
299
if (min_delta == ULLONG_MAX)
300
return 0;
301
302
return min_delta;
303
}
304
305
static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt)
306
{
307
struct arch_timer_cpu *timer;
308
struct kvm_vcpu *vcpu;
309
u64 ns;
310
311
timer = container_of(hrt, struct arch_timer_cpu, bg_timer);
312
vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu);
313
314
/*
315
* Check that the timer has really expired from the guest's
316
* PoV (NTP on the host may have forced it to expire
317
* early). If we should have slept longer, restart it.
318
*/
319
ns = kvm_timer_earliest_exp(vcpu);
320
if (unlikely(ns)) {
321
hrtimer_forward_now(hrt, ns_to_ktime(ns));
322
return HRTIMER_RESTART;
323
}
324
325
kvm_vcpu_wake_up(vcpu);
326
return HRTIMER_NORESTART;
327
}
328
329
static enum hrtimer_restart kvm_hrtimer_expire(struct hrtimer *hrt)
330
{
331
struct arch_timer_context *ctx;
332
struct kvm_vcpu *vcpu;
333
u64 ns;
334
335
ctx = container_of(hrt, struct arch_timer_context, hrtimer);
336
vcpu = timer_context_to_vcpu(ctx);
337
338
trace_kvm_timer_hrtimer_expire(ctx);
339
340
/*
341
* Check that the timer has really expired from the guest's
342
* PoV (NTP on the host may have forced it to expire
343
* early). If not ready, schedule for a later time.
344
*/
345
ns = kvm_timer_compute_delta(ctx);
346
if (unlikely(ns)) {
347
hrtimer_forward_now(hrt, ns_to_ktime(ns));
348
return HRTIMER_RESTART;
349
}
350
351
kvm_timer_update_irq(vcpu, true, ctx);
352
return HRTIMER_NORESTART;
353
}
354
355
static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
356
{
357
enum kvm_arch_timers index;
358
u64 cval, now;
359
360
if (!timer_ctx)
361
return false;
362
363
index = arch_timer_ctx_index(timer_ctx);
364
365
if (timer_ctx->loaded) {
366
u32 cnt_ctl = 0;
367
368
switch (index) {
369
case TIMER_VTIMER:
370
case TIMER_HVTIMER:
371
cnt_ctl = read_sysreg_el0(SYS_CNTV_CTL);
372
break;
373
case TIMER_PTIMER:
374
case TIMER_HPTIMER:
375
cnt_ctl = read_sysreg_el0(SYS_CNTP_CTL);
376
break;
377
case NR_KVM_TIMERS:
378
/* GCC is braindead */
379
cnt_ctl = 0;
380
break;
381
}
382
383
return (cnt_ctl & ARCH_TIMER_CTRL_ENABLE) &&
384
(cnt_ctl & ARCH_TIMER_CTRL_IT_STAT) &&
385
!(cnt_ctl & ARCH_TIMER_CTRL_IT_MASK);
386
}
387
388
if (!kvm_timer_irq_can_fire(timer_ctx))
389
return false;
390
391
cval = timer_get_cval(timer_ctx);
392
now = kvm_phys_timer_read() - timer_get_offset(timer_ctx);
393
394
return cval <= now;
395
}
396
397
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
398
{
399
return vcpu_has_wfit_active(vcpu) && wfit_delay_ns(vcpu) == 0;
400
}
401
402
/*
403
* Reflect the timer output level into the kvm_run structure
404
*/
405
void kvm_timer_update_run(struct kvm_vcpu *vcpu)
406
{
407
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
408
struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
409
struct kvm_sync_regs *regs = &vcpu->run->s.regs;
410
411
/* Populate the device bitmap with the timer states */
412
regs->device_irq_level &= ~(KVM_ARM_DEV_EL1_VTIMER |
413
KVM_ARM_DEV_EL1_PTIMER);
414
if (kvm_timer_should_fire(vtimer))
415
regs->device_irq_level |= KVM_ARM_DEV_EL1_VTIMER;
416
if (kvm_timer_should_fire(ptimer))
417
regs->device_irq_level |= KVM_ARM_DEV_EL1_PTIMER;
418
}
419
420
static void kvm_timer_update_status(struct arch_timer_context *ctx, bool level)
421
{
422
/*
423
* Paper over NV2 brokenness by publishing the interrupt status
424
* bit. This still results in a poor quality of emulation (guest
425
* writes will have no effect until the next exit).
426
*
427
* But hey, it's fast, right?
428
*/
429
struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctx);
430
if (is_hyp_ctxt(vcpu) &&
431
(ctx == vcpu_vtimer(vcpu) || ctx == vcpu_ptimer(vcpu))) {
432
unsigned long val = timer_get_ctl(ctx);
433
__assign_bit(__ffs(ARCH_TIMER_CTRL_IT_STAT), &val, level);
434
timer_set_ctl(ctx, val);
435
}
436
}
437
438
static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
439
struct arch_timer_context *timer_ctx)
440
{
441
kvm_timer_update_status(timer_ctx, new_level);
442
443
timer_ctx->irq.level = new_level;
444
trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_irq(timer_ctx),
445
timer_ctx->irq.level);
446
447
if (userspace_irqchip(vcpu->kvm))
448
return;
449
450
kvm_vgic_inject_irq(vcpu->kvm, vcpu,
451
timer_irq(timer_ctx),
452
timer_ctx->irq.level,
453
timer_ctx);
454
}
455
456
/* Only called for a fully emulated timer */
457
static void timer_emulate(struct arch_timer_context *ctx)
458
{
459
bool should_fire = kvm_timer_should_fire(ctx);
460
461
trace_kvm_timer_emulate(ctx, should_fire);
462
463
if (should_fire != ctx->irq.level)
464
kvm_timer_update_irq(timer_context_to_vcpu(ctx), should_fire, ctx);
465
466
kvm_timer_update_status(ctx, should_fire);
467
468
/*
469
* If the timer can fire now, we don't need to have a soft timer
470
* scheduled for the future. If the timer cannot fire at all,
471
* then we also don't need a soft timer.
472
*/
473
if (should_fire || !kvm_timer_irq_can_fire(ctx))
474
return;
475
476
soft_timer_start(&ctx->hrtimer, kvm_timer_compute_delta(ctx));
477
}
478
479
static void set_cntvoff(u64 cntvoff)
480
{
481
kvm_call_hyp(__kvm_timer_set_cntvoff, cntvoff);
482
}
483
484
static void set_cntpoff(u64 cntpoff)
485
{
486
if (has_cntpoff())
487
write_sysreg_s(cntpoff, SYS_CNTPOFF_EL2);
488
}
489
490
static void timer_save_state(struct arch_timer_context *ctx)
491
{
492
struct arch_timer_cpu *timer = vcpu_timer(timer_context_to_vcpu(ctx));
493
enum kvm_arch_timers index = arch_timer_ctx_index(ctx);
494
unsigned long flags;
495
496
if (!timer->enabled)
497
return;
498
499
local_irq_save(flags);
500
501
if (!ctx->loaded)
502
goto out;
503
504
switch (index) {
505
u64 cval;
506
507
case TIMER_VTIMER:
508
case TIMER_HVTIMER:
509
timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTV_CTL));
510
cval = read_sysreg_el0(SYS_CNTV_CVAL);
511
512
if (has_broken_cntvoff())
513
cval -= timer_get_offset(ctx);
514
515
timer_set_cval(ctx, cval);
516
517
/* Disable the timer */
518
write_sysreg_el0(0, SYS_CNTV_CTL);
519
isb();
520
521
/*
522
* The kernel may decide to run userspace after
523
* calling vcpu_put, so we reset cntvoff to 0 to
524
* ensure a consistent read between user accesses to
525
* the virtual counter and kernel access to the
526
* physical counter of non-VHE case.
527
*
528
* For VHE, the virtual counter uses a fixed virtual
529
* offset of zero, so no need to zero CNTVOFF_EL2
530
* register, but this is actually useful when switching
531
* between EL1/vEL2 with NV.
532
*
533
* Do it unconditionally, as this is either unavoidable
534
* or dirt cheap.
535
*/
536
set_cntvoff(0);
537
break;
538
case TIMER_PTIMER:
539
case TIMER_HPTIMER:
540
timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL));
541
cval = read_sysreg_el0(SYS_CNTP_CVAL);
542
543
cval -= timer_get_offset(ctx);
544
545
timer_set_cval(ctx, cval);
546
547
/* Disable the timer */
548
write_sysreg_el0(0, SYS_CNTP_CTL);
549
isb();
550
551
set_cntpoff(0);
552
break;
553
case NR_KVM_TIMERS:
554
BUG();
555
}
556
557
trace_kvm_timer_save_state(ctx);
558
559
ctx->loaded = false;
560
out:
561
local_irq_restore(flags);
562
}
563
564
/*
565
* Schedule the background timer before calling kvm_vcpu_halt, so that this
566
* thread is removed from its waitqueue and made runnable when there's a timer
567
* interrupt to handle.
568
*/
569
static void kvm_timer_blocking(struct kvm_vcpu *vcpu)
570
{
571
struct arch_timer_cpu *timer = vcpu_timer(vcpu);
572
struct timer_map map;
573
574
get_timer_map(vcpu, &map);
575
576
/*
577
* If no timers are capable of raising interrupts (disabled or
578
* masked), then there's no more work for us to do.
579
*/
580
if (!kvm_timer_irq_can_fire(map.direct_vtimer) &&
581
!kvm_timer_irq_can_fire(map.direct_ptimer) &&
582
!kvm_timer_irq_can_fire(map.emul_vtimer) &&
583
!kvm_timer_irq_can_fire(map.emul_ptimer) &&
584
!vcpu_has_wfit_active(vcpu))
585
return;
586
587
/*
588
* At least one guest time will expire. Schedule a background timer.
589
* Set the earliest expiration time among the guest timers.
590
*/
591
soft_timer_start(&timer->bg_timer, kvm_timer_earliest_exp(vcpu));
592
}
593
594
static void kvm_timer_unblocking(struct kvm_vcpu *vcpu)
595
{
596
struct arch_timer_cpu *timer = vcpu_timer(vcpu);
597
598
soft_timer_cancel(&timer->bg_timer);
599
}
600
601
static void timer_restore_state(struct arch_timer_context *ctx)
602
{
603
struct arch_timer_cpu *timer = vcpu_timer(timer_context_to_vcpu(ctx));
604
enum kvm_arch_timers index = arch_timer_ctx_index(ctx);
605
unsigned long flags;
606
607
if (!timer->enabled)
608
return;
609
610
local_irq_save(flags);
611
612
if (ctx->loaded)
613
goto out;
614
615
switch (index) {
616
u64 cval, offset;
617
618
case TIMER_VTIMER:
619
case TIMER_HVTIMER:
620
cval = timer_get_cval(ctx);
621
offset = timer_get_offset(ctx);
622
if (has_broken_cntvoff()) {
623
set_cntvoff(0);
624
cval += offset;
625
} else {
626
set_cntvoff(offset);
627
}
628
write_sysreg_el0(cval, SYS_CNTV_CVAL);
629
isb();
630
write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL);
631
break;
632
case TIMER_PTIMER:
633
case TIMER_HPTIMER:
634
cval = timer_get_cval(ctx);
635
offset = timer_get_offset(ctx);
636
set_cntpoff(offset);
637
cval += offset;
638
write_sysreg_el0(cval, SYS_CNTP_CVAL);
639
isb();
640
write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL);
641
break;
642
case NR_KVM_TIMERS:
643
BUG();
644
}
645
646
trace_kvm_timer_restore_state(ctx);
647
648
ctx->loaded = true;
649
out:
650
local_irq_restore(flags);
651
}
652
653
static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, bool active)
654
{
655
int r;
656
r = irq_set_irqchip_state(ctx->host_timer_irq, IRQCHIP_STATE_ACTIVE, active);
657
WARN_ON(r);
658
}
659
660
static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx)
661
{
662
struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctx);
663
bool phys_active = false;
664
665
/*
666
* Update the timer output so that it is likely to match the
667
* state we're about to restore. If the timer expires between
668
* this point and the register restoration, we'll take the
669
* interrupt anyway.
670
*/
671
kvm_timer_update_irq(vcpu, kvm_timer_should_fire(ctx), ctx);
672
673
if (irqchip_in_kernel(vcpu->kvm))
674
phys_active = kvm_vgic_map_is_active(vcpu, timer_irq(ctx));
675
676
phys_active |= ctx->irq.level;
677
678
set_timer_irq_phys_active(ctx, phys_active);
679
}
680
681
static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu)
682
{
683
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
684
685
/*
686
* Update the timer output so that it is likely to match the
687
* state we're about to restore. If the timer expires between
688
* this point and the register restoration, we'll take the
689
* interrupt anyway.
690
*/
691
kvm_timer_update_irq(vcpu, kvm_timer_should_fire(vtimer), vtimer);
692
693
/*
694
* When using a userspace irqchip with the architected timers and a
695
* host interrupt controller that doesn't support an active state, we
696
* must still prevent continuously exiting from the guest, and
697
* therefore mask the physical interrupt by disabling it on the host
698
* interrupt controller when the virtual level is high, such that the
699
* guest can make forward progress. Once we detect the output level
700
* being de-asserted, we unmask the interrupt again so that we exit
701
* from the guest when the timer fires.
702
*/
703
if (vtimer->irq.level)
704
disable_percpu_irq(host_vtimer_irq);
705
else
706
enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
707
}
708
709
/* If _pred is true, set bit in _set, otherwise set it in _clr */
710
#define assign_clear_set_bit(_pred, _bit, _clr, _set) \
711
do { \
712
if (_pred) \
713
(_set) |= (_bit); \
714
else \
715
(_clr) |= (_bit); \
716
} while (0)
717
718
static void kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu *vcpu,
719
struct timer_map *map)
720
{
721
int hw, ret;
722
723
if (!irqchip_in_kernel(vcpu->kvm))
724
return;
725
726
/*
727
* We only ever unmap the vtimer irq on a VHE system that runs nested
728
* virtualization, in which case we have both a valid emul_vtimer,
729
* emul_ptimer, direct_vtimer, and direct_ptimer.
730
*
731
* Since this is called from kvm_timer_vcpu_load(), a change between
732
* vEL2 and vEL1/0 will have just happened, and the timer_map will
733
* represent this, and therefore we switch the emul/direct mappings
734
* below.
735
*/
736
hw = kvm_vgic_get_map(vcpu, timer_irq(map->direct_vtimer));
737
if (hw < 0) {
738
kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_vtimer));
739
kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_ptimer));
740
741
ret = kvm_vgic_map_phys_irq(vcpu,
742
map->direct_vtimer->host_timer_irq,
743
timer_irq(map->direct_vtimer),
744
&arch_timer_irq_ops);
745
WARN_ON_ONCE(ret);
746
ret = kvm_vgic_map_phys_irq(vcpu,
747
map->direct_ptimer->host_timer_irq,
748
timer_irq(map->direct_ptimer),
749
&arch_timer_irq_ops);
750
WARN_ON_ONCE(ret);
751
}
752
}
753
754
static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map)
755
{
756
bool tvt, tpt, tvc, tpc, tvt02, tpt02;
757
u64 clr, set;
758
759
/*
760
* No trapping gets configured here with nVHE. See
761
* __timer_enable_traps(), which is where the stuff happens.
762
*/
763
if (!has_vhe())
764
return;
765
766
/*
767
* Our default policy is not to trap anything. As we progress
768
* within this function, reality kicks in and we start adding
769
* traps based on emulation requirements.
770
*/
771
tvt = tpt = tvc = tpc = false;
772
tvt02 = tpt02 = false;
773
774
/*
775
* NV2 badly breaks the timer semantics by redirecting accesses to
776
* the EL1 timer state to memory, so let's call ECV to the rescue if
777
* available: we trap all CNT{P,V}_{CTL,CVAL,TVAL}_EL0 accesses.
778
*
779
* The treatment slightly varies depending whether we run a nVHE or
780
* VHE guest: nVHE will use the _EL0 registers directly, while VHE
781
* will use the _EL02 accessors. This translates in different trap
782
* bits.
783
*
784
* None of the trapping is required when running in non-HYP context,
785
* unless required by the L1 hypervisor settings once we advertise
786
* ECV+NV in the guest, or that we need trapping for other reasons.
787
*/
788
if (cpus_have_final_cap(ARM64_HAS_ECV) && is_hyp_ctxt(vcpu)) {
789
if (vcpu_el2_e2h_is_set(vcpu))
790
tvt02 = tpt02 = true;
791
else
792
tvt = tpt = true;
793
}
794
795
/*
796
* We have two possibility to deal with a physical offset:
797
*
798
* - Either we have CNTPOFF (yay!) or the offset is 0:
799
* we let the guest freely access the HW
800
*
801
* - or neither of these condition apply:
802
* we trap accesses to the HW, but still use it
803
* after correcting the physical offset
804
*/
805
if (!has_cntpoff() && timer_get_offset(map->direct_ptimer))
806
tpt = tpc = true;
807
808
/*
809
* For the poor sods that could not correctly subtract one value
810
* from another, trap the full virtual timer and counter.
811
*/
812
if (has_broken_cntvoff() && timer_get_offset(map->direct_vtimer))
813
tvt = tvc = true;
814
815
/*
816
* Apply the enable bits that the guest hypervisor has requested for
817
* its own guest. We can only add traps that wouldn't have been set
818
* above.
819
* Implementation choices: we do not support NV when E2H=0 in the
820
* guest, and we don't support configuration where E2H is writable
821
* by the guest (either FEAT_VHE or FEAT_E2H0 is implemented, but
822
* not both). This simplifies the handling of the EL1NV* bits.
823
*/
824
if (is_nested_ctxt(vcpu)) {
825
u64 val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2);
826
827
/* Use the VHE format for mental sanity */
828
if (!vcpu_el2_e2h_is_set(vcpu))
829
val = (val & (CNTHCTL_EL1PCEN | CNTHCTL_EL1PCTEN)) << 10;
830
831
tpt |= !(val & (CNTHCTL_EL1PCEN << 10));
832
tpc |= !(val & (CNTHCTL_EL1PCTEN << 10));
833
834
tpt02 |= (val & CNTHCTL_EL1NVPCT);
835
tvt02 |= (val & CNTHCTL_EL1NVVCT);
836
}
837
838
/*
839
* Now that we have collected our requirements, compute the
840
* trap and enable bits.
841
*/
842
set = 0;
843
clr = 0;
844
845
assign_clear_set_bit(tpt, CNTHCTL_EL1PCEN << 10, set, clr);
846
assign_clear_set_bit(tpc, CNTHCTL_EL1PCTEN << 10, set, clr);
847
assign_clear_set_bit(tvt, CNTHCTL_EL1TVT, clr, set);
848
assign_clear_set_bit(tvc, CNTHCTL_EL1TVCT, clr, set);
849
assign_clear_set_bit(tvt02, CNTHCTL_EL1NVVCT, clr, set);
850
assign_clear_set_bit(tpt02, CNTHCTL_EL1NVPCT, clr, set);
851
852
/* This only happens on VHE, so use the CNTHCTL_EL2 accessor. */
853
sysreg_clear_set(cnthctl_el2, clr, set);
854
}
855
856
void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
857
{
858
struct arch_timer_cpu *timer = vcpu_timer(vcpu);
859
struct timer_map map;
860
861
if (unlikely(!timer->enabled))
862
return;
863
864
get_timer_map(vcpu, &map);
865
866
if (static_branch_likely(&has_gic_active_state)) {
867
if (vcpu_has_nv(vcpu))
868
kvm_timer_vcpu_load_nested_switch(vcpu, &map);
869
870
kvm_timer_vcpu_load_gic(map.direct_vtimer);
871
if (map.direct_ptimer)
872
kvm_timer_vcpu_load_gic(map.direct_ptimer);
873
} else {
874
kvm_timer_vcpu_load_nogic(vcpu);
875
}
876
877
kvm_timer_unblocking(vcpu);
878
879
timer_restore_state(map.direct_vtimer);
880
if (map.direct_ptimer)
881
timer_restore_state(map.direct_ptimer);
882
if (map.emul_vtimer)
883
timer_emulate(map.emul_vtimer);
884
if (map.emul_ptimer)
885
timer_emulate(map.emul_ptimer);
886
887
timer_set_traps(vcpu, &map);
888
}
889
890
bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
891
{
892
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
893
struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
894
struct kvm_sync_regs *sregs = &vcpu->run->s.regs;
895
bool vlevel, plevel;
896
897
if (likely(irqchip_in_kernel(vcpu->kvm)))
898
return false;
899
900
vlevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_VTIMER;
901
plevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_PTIMER;
902
903
return kvm_timer_should_fire(vtimer) != vlevel ||
904
kvm_timer_should_fire(ptimer) != plevel;
905
}
906
907
void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
908
{
909
struct arch_timer_cpu *timer = vcpu_timer(vcpu);
910
struct timer_map map;
911
912
if (unlikely(!timer->enabled))
913
return;
914
915
get_timer_map(vcpu, &map);
916
917
timer_save_state(map.direct_vtimer);
918
if (map.direct_ptimer)
919
timer_save_state(map.direct_ptimer);
920
921
/*
922
* Cancel soft timer emulation, because the only case where we
923
* need it after a vcpu_put is in the context of a sleeping VCPU, and
924
* in that case we already factor in the deadline for the physical
925
* timer when scheduling the bg_timer.
926
*
927
* In any case, we re-schedule the hrtimer for the physical timer when
928
* coming back to the VCPU thread in kvm_timer_vcpu_load().
929
*/
930
if (map.emul_vtimer)
931
soft_timer_cancel(&map.emul_vtimer->hrtimer);
932
if (map.emul_ptimer)
933
soft_timer_cancel(&map.emul_ptimer->hrtimer);
934
935
if (kvm_vcpu_is_blocking(vcpu))
936
kvm_timer_blocking(vcpu);
937
}
938
939
void kvm_timer_sync_nested(struct kvm_vcpu *vcpu)
940
{
941
/*
942
* When NV2 is on, guest hypervisors have their EL1 timer register
943
* accesses redirected to the VNCR page. Any guest action taken on
944
* the timer is postponed until the next exit, leading to a very
945
* poor quality of emulation.
946
*
947
* This is an unmitigated disaster, only papered over by FEAT_ECV,
948
* which allows trapping of the timer registers even with NV2.
949
* Still, this is still worse than FEAT_NV on its own. Meh.
950
*/
951
if (!cpus_have_final_cap(ARM64_HAS_ECV)) {
952
/*
953
* For a VHE guest hypervisor, the EL2 state is directly
954
* stored in the host EL1 timers, while the emulated EL1
955
* state is stored in the VNCR page. The latter could have
956
* been updated behind our back, and we must reset the
957
* emulation of the timers.
958
*
959
* A non-VHE guest hypervisor doesn't have any direct access
960
* to its timers: the EL2 registers trap despite being
961
* notionally direct (we use the EL1 HW, as for VHE), while
962
* the EL1 registers access memory.
963
*
964
* In both cases, process the emulated timers on each guest
965
* exit. Boo.
966
*/
967
struct timer_map map;
968
get_timer_map(vcpu, &map);
969
970
soft_timer_cancel(&map.emul_vtimer->hrtimer);
971
soft_timer_cancel(&map.emul_ptimer->hrtimer);
972
timer_emulate(map.emul_vtimer);
973
timer_emulate(map.emul_ptimer);
974
}
975
}
976
977
/*
978
* With a userspace irqchip we have to check if the guest de-asserted the
979
* timer and if so, unmask the timer irq signal on the host interrupt
980
* controller to ensure that we see future timer signals.
981
*/
982
static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu)
983
{
984
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
985
986
if (!kvm_timer_should_fire(vtimer)) {
987
kvm_timer_update_irq(vcpu, false, vtimer);
988
if (static_branch_likely(&has_gic_active_state))
989
set_timer_irq_phys_active(vtimer, false);
990
else
991
enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
992
}
993
}
994
995
void kvm_timer_sync_user(struct kvm_vcpu *vcpu)
996
{
997
struct arch_timer_cpu *timer = vcpu_timer(vcpu);
998
999
if (unlikely(!timer->enabled))
1000
return;
1001
1002
if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
1003
unmask_vtimer_irq_user(vcpu);
1004
}
1005
1006
void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
1007
{
1008
struct arch_timer_cpu *timer = vcpu_timer(vcpu);
1009
struct timer_map map;
1010
1011
get_timer_map(vcpu, &map);
1012
1013
/*
1014
* The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8
1015
* and to 0 for ARMv7. We provide an implementation that always
1016
* resets the timer to be disabled and unmasked and is compliant with
1017
* the ARMv7 architecture.
1018
*/
1019
for (int i = 0; i < nr_timers(vcpu); i++)
1020
timer_set_ctl(vcpu_get_timer(vcpu, i), 0);
1021
1022
/*
1023
* A vcpu running at EL2 is in charge of the offset applied to
1024
* the virtual timer, so use the physical VM offset, and point
1025
* the vcpu offset to CNTVOFF_EL2.
1026
*/
1027
if (vcpu_has_nv(vcpu)) {
1028
struct arch_timer_offset *offs = &vcpu_vtimer(vcpu)->offset;
1029
1030
offs->vcpu_offset = __ctxt_sys_reg(&vcpu->arch.ctxt, CNTVOFF_EL2);
1031
offs->vm_offset = &vcpu->kvm->arch.timer_data.poffset;
1032
}
1033
1034
if (timer->enabled) {
1035
for (int i = 0; i < nr_timers(vcpu); i++)
1036
kvm_timer_update_irq(vcpu, false,
1037
vcpu_get_timer(vcpu, i));
1038
1039
if (irqchip_in_kernel(vcpu->kvm)) {
1040
kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_vtimer));
1041
if (map.direct_ptimer)
1042
kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_ptimer));
1043
}
1044
}
1045
1046
if (map.emul_vtimer)
1047
soft_timer_cancel(&map.emul_vtimer->hrtimer);
1048
if (map.emul_ptimer)
1049
soft_timer_cancel(&map.emul_ptimer->hrtimer);
1050
}
1051
1052
static void timer_context_init(struct kvm_vcpu *vcpu, int timerid)
1053
{
1054
struct arch_timer_context *ctxt = vcpu_get_timer(vcpu, timerid);
1055
struct kvm *kvm = vcpu->kvm;
1056
1057
ctxt->timer_id = timerid;
1058
1059
if (timerid == TIMER_VTIMER)
1060
ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset;
1061
else
1062
ctxt->offset.vm_offset = &kvm->arch.timer_data.poffset;
1063
1064
hrtimer_setup(&ctxt->hrtimer, kvm_hrtimer_expire, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
1065
1066
switch (timerid) {
1067
case TIMER_PTIMER:
1068
case TIMER_HPTIMER:
1069
ctxt->host_timer_irq = host_ptimer_irq;
1070
break;
1071
case TIMER_VTIMER:
1072
case TIMER_HVTIMER:
1073
ctxt->host_timer_irq = host_vtimer_irq;
1074
break;
1075
}
1076
}
1077
1078
void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
1079
{
1080
struct arch_timer_cpu *timer = vcpu_timer(vcpu);
1081
1082
for (int i = 0; i < NR_KVM_TIMERS; i++)
1083
timer_context_init(vcpu, i);
1084
1085
/* Synchronize offsets across timers of a VM if not already provided */
1086
if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) {
1087
timer_set_offset(vcpu_vtimer(vcpu), kvm_phys_timer_read());
1088
timer_set_offset(vcpu_ptimer(vcpu), 0);
1089
}
1090
1091
hrtimer_setup(&timer->bg_timer, kvm_bg_timer_expire, CLOCK_MONOTONIC,
1092
HRTIMER_MODE_ABS_HARD);
1093
}
1094
1095
void kvm_timer_init_vm(struct kvm *kvm)
1096
{
1097
for (int i = 0; i < NR_KVM_TIMERS; i++)
1098
kvm->arch.timer_data.ppi[i] = default_ppi[i];
1099
}
1100
1101
void kvm_timer_cpu_up(void)
1102
{
1103
enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
1104
if (host_ptimer_irq)
1105
enable_percpu_irq(host_ptimer_irq, host_ptimer_irq_flags);
1106
}
1107
1108
void kvm_timer_cpu_down(void)
1109
{
1110
disable_percpu_irq(host_vtimer_irq);
1111
if (host_ptimer_irq)
1112
disable_percpu_irq(host_ptimer_irq);
1113
}
1114
1115
static u64 read_timer_ctl(struct arch_timer_context *timer)
1116
{
1117
/*
1118
* Set ISTATUS bit if it's expired.
1119
* Note that according to ARMv8 ARM Issue A.k, ISTATUS bit is
1120
* UNKNOWN when ENABLE bit is 0, so we chose to set ISTATUS bit
1121
* regardless of ENABLE bit for our implementation convenience.
1122
*/
1123
u32 ctl = timer_get_ctl(timer);
1124
1125
if (!kvm_timer_compute_delta(timer))
1126
ctl |= ARCH_TIMER_CTRL_IT_STAT;
1127
1128
return ctl;
1129
}
1130
1131
static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
1132
struct arch_timer_context *timer,
1133
enum kvm_arch_timer_regs treg)
1134
{
1135
u64 val;
1136
1137
switch (treg) {
1138
case TIMER_REG_TVAL:
1139
val = timer_get_cval(timer) - kvm_phys_timer_read() + timer_get_offset(timer);
1140
val = lower_32_bits(val);
1141
break;
1142
1143
case TIMER_REG_CTL:
1144
val = read_timer_ctl(timer);
1145
break;
1146
1147
case TIMER_REG_CVAL:
1148
val = timer_get_cval(timer);
1149
break;
1150
1151
case TIMER_REG_CNT:
1152
val = kvm_phys_timer_read() - timer_get_offset(timer);
1153
break;
1154
1155
case TIMER_REG_VOFF:
1156
val = *timer->offset.vcpu_offset;
1157
break;
1158
1159
default:
1160
BUG();
1161
}
1162
1163
return val;
1164
}
1165
1166
u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu,
1167
enum kvm_arch_timers tmr,
1168
enum kvm_arch_timer_regs treg)
1169
{
1170
struct arch_timer_context *timer;
1171
struct timer_map map;
1172
u64 val;
1173
1174
get_timer_map(vcpu, &map);
1175
timer = vcpu_get_timer(vcpu, tmr);
1176
1177
if (timer == map.emul_vtimer || timer == map.emul_ptimer)
1178
return kvm_arm_timer_read(vcpu, timer, treg);
1179
1180
preempt_disable();
1181
timer_save_state(timer);
1182
1183
val = kvm_arm_timer_read(vcpu, timer, treg);
1184
1185
timer_restore_state(timer);
1186
preempt_enable();
1187
1188
return val;
1189
}
1190
1191
static void kvm_arm_timer_write(struct kvm_vcpu *vcpu,
1192
struct arch_timer_context *timer,
1193
enum kvm_arch_timer_regs treg,
1194
u64 val)
1195
{
1196
switch (treg) {
1197
case TIMER_REG_TVAL:
1198
timer_set_cval(timer, kvm_phys_timer_read() - timer_get_offset(timer) + (s32)val);
1199
break;
1200
1201
case TIMER_REG_CTL:
1202
timer_set_ctl(timer, val & ~ARCH_TIMER_CTRL_IT_STAT);
1203
break;
1204
1205
case TIMER_REG_CVAL:
1206
timer_set_cval(timer, val);
1207
break;
1208
1209
case TIMER_REG_VOFF:
1210
*timer->offset.vcpu_offset = val;
1211
break;
1212
1213
default:
1214
BUG();
1215
}
1216
}
1217
1218
void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu,
1219
enum kvm_arch_timers tmr,
1220
enum kvm_arch_timer_regs treg,
1221
u64 val)
1222
{
1223
struct arch_timer_context *timer;
1224
struct timer_map map;
1225
1226
get_timer_map(vcpu, &map);
1227
timer = vcpu_get_timer(vcpu, tmr);
1228
if (timer == map.emul_vtimer || timer == map.emul_ptimer) {
1229
soft_timer_cancel(&timer->hrtimer);
1230
kvm_arm_timer_write(vcpu, timer, treg, val);
1231
timer_emulate(timer);
1232
} else {
1233
preempt_disable();
1234
timer_save_state(timer);
1235
kvm_arm_timer_write(vcpu, timer, treg, val);
1236
timer_restore_state(timer);
1237
preempt_enable();
1238
}
1239
}
1240
1241
static int timer_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu)
1242
{
1243
if (vcpu)
1244
irqd_set_forwarded_to_vcpu(d);
1245
else
1246
irqd_clr_forwarded_to_vcpu(d);
1247
1248
return 0;
1249
}
1250
1251
static int timer_irq_set_irqchip_state(struct irq_data *d,
1252
enum irqchip_irq_state which, bool val)
1253
{
1254
if (which != IRQCHIP_STATE_ACTIVE || !irqd_is_forwarded_to_vcpu(d))
1255
return irq_chip_set_parent_state(d, which, val);
1256
1257
if (val)
1258
irq_chip_mask_parent(d);
1259
else
1260
irq_chip_unmask_parent(d);
1261
1262
return 0;
1263
}
1264
1265
static void timer_irq_eoi(struct irq_data *d)
1266
{
1267
if (!irqd_is_forwarded_to_vcpu(d))
1268
irq_chip_eoi_parent(d);
1269
}
1270
1271
static void timer_irq_ack(struct irq_data *d)
1272
{
1273
d = d->parent_data;
1274
if (d->chip->irq_ack)
1275
d->chip->irq_ack(d);
1276
}
1277
1278
static struct irq_chip timer_chip = {
1279
.name = "KVM",
1280
.irq_ack = timer_irq_ack,
1281
.irq_mask = irq_chip_mask_parent,
1282
.irq_unmask = irq_chip_unmask_parent,
1283
.irq_eoi = timer_irq_eoi,
1284
.irq_set_type = irq_chip_set_type_parent,
1285
.irq_set_vcpu_affinity = timer_irq_set_vcpu_affinity,
1286
.irq_set_irqchip_state = timer_irq_set_irqchip_state,
1287
};
1288
1289
static int timer_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
1290
unsigned int nr_irqs, void *arg)
1291
{
1292
irq_hw_number_t hwirq = (uintptr_t)arg;
1293
1294
return irq_domain_set_hwirq_and_chip(domain, virq, hwirq,
1295
&timer_chip, NULL);
1296
}
1297
1298
static void timer_irq_domain_free(struct irq_domain *domain, unsigned int virq,
1299
unsigned int nr_irqs)
1300
{
1301
}
1302
1303
static const struct irq_domain_ops timer_domain_ops = {
1304
.alloc = timer_irq_domain_alloc,
1305
.free = timer_irq_domain_free,
1306
};
1307
1308
static void kvm_irq_fixup_flags(unsigned int virq, u32 *flags)
1309
{
1310
*flags = irq_get_trigger_type(virq);
1311
if (*flags != IRQF_TRIGGER_HIGH && *flags != IRQF_TRIGGER_LOW) {
1312
kvm_err("Invalid trigger for timer IRQ%d, assuming level low\n",
1313
virq);
1314
*flags = IRQF_TRIGGER_LOW;
1315
}
1316
}
1317
1318
static int kvm_irq_init(struct arch_timer_kvm_info *info)
1319
{
1320
struct irq_domain *domain = NULL;
1321
1322
if (info->virtual_irq <= 0) {
1323
kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n",
1324
info->virtual_irq);
1325
return -ENODEV;
1326
}
1327
1328
host_vtimer_irq = info->virtual_irq;
1329
kvm_irq_fixup_flags(host_vtimer_irq, &host_vtimer_irq_flags);
1330
1331
if (kvm_vgic_global_state.no_hw_deactivation) {
1332
struct fwnode_handle *fwnode;
1333
struct irq_data *data;
1334
1335
fwnode = irq_domain_alloc_named_fwnode("kvm-timer");
1336
if (!fwnode)
1337
return -ENOMEM;
1338
1339
/* Assume both vtimer and ptimer in the same parent */
1340
data = irq_get_irq_data(host_vtimer_irq);
1341
domain = irq_domain_create_hierarchy(data->domain, 0,
1342
NR_KVM_TIMERS, fwnode,
1343
&timer_domain_ops, NULL);
1344
if (!domain) {
1345
irq_domain_free_fwnode(fwnode);
1346
return -ENOMEM;
1347
}
1348
1349
arch_timer_irq_ops.flags |= VGIC_IRQ_SW_RESAMPLE;
1350
WARN_ON(irq_domain_push_irq(domain, host_vtimer_irq,
1351
(void *)TIMER_VTIMER));
1352
}
1353
1354
if (info->physical_irq > 0) {
1355
host_ptimer_irq = info->physical_irq;
1356
kvm_irq_fixup_flags(host_ptimer_irq, &host_ptimer_irq_flags);
1357
1358
if (domain)
1359
WARN_ON(irq_domain_push_irq(domain, host_ptimer_irq,
1360
(void *)TIMER_PTIMER));
1361
}
1362
1363
return 0;
1364
}
1365
1366
static void kvm_timer_handle_errata(void)
1367
{
1368
u64 mmfr0, mmfr1, mmfr4;
1369
1370
/*
1371
* CNTVOFF_EL2 is broken on some implementations. For those, we trap
1372
* all virtual timer/counter accesses, requiring FEAT_ECV.
1373
*
1374
* However, a hypervisor supporting nesting is likely to mitigate the
1375
* erratum at L0, and not require other levels to mitigate it (which
1376
* would otherwise be a terrible performance sink due to trap
1377
* amplification).
1378
*
1379
* Given that the affected HW implements both FEAT_VHE and FEAT_E2H0,
1380
* and that NV is likely not to (because of limitations of the
1381
* architecture), only enable the workaround when FEAT_VHE and
1382
* FEAT_E2H0 are both detected. Time will tell if this actually holds.
1383
*/
1384
mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
1385
mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
1386
mmfr4 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR4_EL1);
1387
if (SYS_FIELD_GET(ID_AA64MMFR1_EL1, VH, mmfr1) &&
1388
!SYS_FIELD_GET(ID_AA64MMFR4_EL1, E2H0, mmfr4) &&
1389
SYS_FIELD_GET(ID_AA64MMFR0_EL1, ECV, mmfr0) &&
1390
(has_vhe() || has_hvhe()) &&
1391
cpus_have_final_cap(ARM64_WORKAROUND_QCOM_ORYON_CNTVOFF)) {
1392
static_branch_enable(&broken_cntvoff_key);
1393
kvm_info("Broken CNTVOFF_EL2, trapping virtual timer\n");
1394
}
1395
}
1396
1397
int __init kvm_timer_hyp_init(bool has_gic)
1398
{
1399
struct arch_timer_kvm_info *info;
1400
int err;
1401
1402
info = arch_timer_get_kvm_info();
1403
timecounter = &info->timecounter;
1404
1405
if (!timecounter->cc) {
1406
kvm_err("kvm_arch_timer: uninitialized timecounter\n");
1407
return -ENODEV;
1408
}
1409
1410
err = kvm_irq_init(info);
1411
if (err)
1412
return err;
1413
1414
/* First, do the virtual EL1 timer irq */
1415
1416
err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler,
1417
"kvm guest vtimer", kvm_get_running_vcpus());
1418
if (err) {
1419
kvm_err("kvm_arch_timer: can't request vtimer interrupt %d (%d)\n",
1420
host_vtimer_irq, err);
1421
return err;
1422
}
1423
1424
if (has_gic) {
1425
err = irq_set_vcpu_affinity(host_vtimer_irq,
1426
kvm_get_running_vcpus());
1427
if (err) {
1428
kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
1429
goto out_free_vtimer_irq;
1430
}
1431
1432
static_branch_enable(&has_gic_active_state);
1433
}
1434
1435
kvm_debug("virtual timer IRQ%d\n", host_vtimer_irq);
1436
1437
/* Now let's do the physical EL1 timer irq */
1438
1439
if (info->physical_irq > 0) {
1440
err = request_percpu_irq(host_ptimer_irq, kvm_arch_timer_handler,
1441
"kvm guest ptimer", kvm_get_running_vcpus());
1442
if (err) {
1443
kvm_err("kvm_arch_timer: can't request ptimer interrupt %d (%d)\n",
1444
host_ptimer_irq, err);
1445
goto out_free_vtimer_irq;
1446
}
1447
1448
if (has_gic) {
1449
err = irq_set_vcpu_affinity(host_ptimer_irq,
1450
kvm_get_running_vcpus());
1451
if (err) {
1452
kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
1453
goto out_free_ptimer_irq;
1454
}
1455
}
1456
1457
kvm_debug("physical timer IRQ%d\n", host_ptimer_irq);
1458
} else if (has_vhe()) {
1459
kvm_err("kvm_arch_timer: invalid physical timer IRQ: %d\n",
1460
info->physical_irq);
1461
err = -ENODEV;
1462
goto out_free_vtimer_irq;
1463
}
1464
1465
kvm_timer_handle_errata();
1466
return 0;
1467
1468
out_free_ptimer_irq:
1469
if (info->physical_irq > 0)
1470
free_percpu_irq(host_ptimer_irq, kvm_get_running_vcpus());
1471
out_free_vtimer_irq:
1472
free_percpu_irq(host_vtimer_irq, kvm_get_running_vcpus());
1473
return err;
1474
}
1475
1476
void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
1477
{
1478
struct arch_timer_cpu *timer = vcpu_timer(vcpu);
1479
1480
soft_timer_cancel(&timer->bg_timer);
1481
}
1482
1483
static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu)
1484
{
1485
u32 ppis = 0;
1486
bool valid;
1487
1488
mutex_lock(&vcpu->kvm->arch.config_lock);
1489
1490
for (int i = 0; i < nr_timers(vcpu); i++) {
1491
struct arch_timer_context *ctx;
1492
int irq;
1493
1494
ctx = vcpu_get_timer(vcpu, i);
1495
irq = timer_irq(ctx);
1496
if (kvm_vgic_set_owner(vcpu, irq, ctx))
1497
break;
1498
1499
/*
1500
* We know by construction that we only have PPIs, so
1501
* all values are less than 32.
1502
*/
1503
ppis |= BIT(irq);
1504
}
1505
1506
valid = hweight32(ppis) == nr_timers(vcpu);
1507
1508
if (valid)
1509
set_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE, &vcpu->kvm->arch.flags);
1510
1511
mutex_unlock(&vcpu->kvm->arch.config_lock);
1512
1513
return valid;
1514
}
1515
1516
static bool kvm_arch_timer_get_input_level(int vintid)
1517
{
1518
struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
1519
1520
if (WARN(!vcpu, "No vcpu context!\n"))
1521
return false;
1522
1523
for (int i = 0; i < nr_timers(vcpu); i++) {
1524
struct arch_timer_context *ctx;
1525
1526
ctx = vcpu_get_timer(vcpu, i);
1527
if (timer_irq(ctx) == vintid)
1528
return kvm_timer_should_fire(ctx);
1529
}
1530
1531
/* A timer IRQ has fired, but no matching timer was found? */
1532
WARN_RATELIMIT(1, "timer INTID%d unknown\n", vintid);
1533
1534
return false;
1535
}
1536
1537
int kvm_timer_enable(struct kvm_vcpu *vcpu)
1538
{
1539
struct arch_timer_cpu *timer = vcpu_timer(vcpu);
1540
struct timer_map map;
1541
int ret;
1542
1543
if (timer->enabled)
1544
return 0;
1545
1546
/* Without a VGIC we do not map virtual IRQs to physical IRQs */
1547
if (!irqchip_in_kernel(vcpu->kvm))
1548
goto no_vgic;
1549
1550
/*
1551
* At this stage, we have the guarantee that the vgic is both
1552
* available and initialized.
1553
*/
1554
if (!timer_irqs_are_valid(vcpu)) {
1555
kvm_debug("incorrectly configured timer irqs\n");
1556
return -EINVAL;
1557
}
1558
1559
get_timer_map(vcpu, &map);
1560
1561
ret = kvm_vgic_map_phys_irq(vcpu,
1562
map.direct_vtimer->host_timer_irq,
1563
timer_irq(map.direct_vtimer),
1564
&arch_timer_irq_ops);
1565
if (ret)
1566
return ret;
1567
1568
if (map.direct_ptimer) {
1569
ret = kvm_vgic_map_phys_irq(vcpu,
1570
map.direct_ptimer->host_timer_irq,
1571
timer_irq(map.direct_ptimer),
1572
&arch_timer_irq_ops);
1573
}
1574
1575
if (ret)
1576
return ret;
1577
1578
no_vgic:
1579
timer->enabled = 1;
1580
return 0;
1581
}
1582
1583
/* If we have CNTPOFF, permanently set ECV to enable it */
1584
void kvm_timer_init_vhe(void)
1585
{
1586
if (cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF))
1587
sysreg_clear_set(cnthctl_el2, 0, CNTHCTL_ECV);
1588
}
1589
1590
int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1591
{
1592
int __user *uaddr = (int __user *)(long)attr->addr;
1593
int irq, idx, ret = 0;
1594
1595
if (!irqchip_in_kernel(vcpu->kvm))
1596
return -EINVAL;
1597
1598
if (get_user(irq, uaddr))
1599
return -EFAULT;
1600
1601
if (!(irq_is_ppi(irq)))
1602
return -EINVAL;
1603
1604
mutex_lock(&vcpu->kvm->arch.config_lock);
1605
1606
if (test_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE,
1607
&vcpu->kvm->arch.flags)) {
1608
ret = -EBUSY;
1609
goto out;
1610
}
1611
1612
switch (attr->attr) {
1613
case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
1614
idx = TIMER_VTIMER;
1615
break;
1616
case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
1617
idx = TIMER_PTIMER;
1618
break;
1619
case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER:
1620
idx = TIMER_HVTIMER;
1621
break;
1622
case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER:
1623
idx = TIMER_HPTIMER;
1624
break;
1625
default:
1626
ret = -ENXIO;
1627
goto out;
1628
}
1629
1630
/*
1631
* We cannot validate the IRQ unicity before we run, so take it at
1632
* face value. The verdict will be given on first vcpu run, for each
1633
* vcpu. Yes this is late. Blame it on the stupid API.
1634
*/
1635
vcpu->kvm->arch.timer_data.ppi[idx] = irq;
1636
1637
out:
1638
mutex_unlock(&vcpu->kvm->arch.config_lock);
1639
return ret;
1640
}
1641
1642
int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1643
{
1644
int __user *uaddr = (int __user *)(long)attr->addr;
1645
struct arch_timer_context *timer;
1646
int irq;
1647
1648
switch (attr->attr) {
1649
case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
1650
timer = vcpu_vtimer(vcpu);
1651
break;
1652
case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
1653
timer = vcpu_ptimer(vcpu);
1654
break;
1655
case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER:
1656
timer = vcpu_hvtimer(vcpu);
1657
break;
1658
case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER:
1659
timer = vcpu_hptimer(vcpu);
1660
break;
1661
default:
1662
return -ENXIO;
1663
}
1664
1665
irq = timer_irq(timer);
1666
return put_user(irq, uaddr);
1667
}
1668
1669
int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1670
{
1671
switch (attr->attr) {
1672
case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
1673
case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
1674
case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER:
1675
case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER:
1676
return 0;
1677
}
1678
1679
return -ENXIO;
1680
}
1681
1682
int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm,
1683
struct kvm_arm_counter_offset *offset)
1684
{
1685
int ret = 0;
1686
1687
if (offset->reserved)
1688
return -EINVAL;
1689
1690
mutex_lock(&kvm->lock);
1691
1692
if (!kvm_trylock_all_vcpus(kvm)) {
1693
set_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &kvm->arch.flags);
1694
1695
/*
1696
* If userspace decides to set the offset using this
1697
* API rather than merely restoring the counter
1698
* values, the offset applies to both the virtual and
1699
* physical views.
1700
*/
1701
kvm->arch.timer_data.voffset = offset->counter_offset;
1702
kvm->arch.timer_data.poffset = offset->counter_offset;
1703
1704
kvm_unlock_all_vcpus(kvm);
1705
} else {
1706
ret = -EBUSY;
1707
}
1708
1709
mutex_unlock(&kvm->lock);
1710
1711
return ret;
1712
}
1713
1714