Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/arm64/kvm/arch_timer.c
26424 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* Copyright (C) 2012 ARM Ltd.
4
* Author: Marc Zyngier <[email protected]>
5
*/
6
7
#include <linux/cpu.h>
8
#include <linux/kvm.h>
9
#include <linux/kvm_host.h>
10
#include <linux/interrupt.h>
11
#include <linux/irq.h>
12
#include <linux/irqdomain.h>
13
#include <linux/uaccess.h>
14
15
#include <clocksource/arm_arch_timer.h>
16
#include <asm/arch_timer.h>
17
#include <asm/kvm_emulate.h>
18
#include <asm/kvm_hyp.h>
19
#include <asm/kvm_nested.h>
20
21
#include <kvm/arm_vgic.h>
22
#include <kvm/arm_arch_timer.h>
23
24
#include "trace.h"
25
26
static struct timecounter *timecounter;
27
static unsigned int host_vtimer_irq;
28
static unsigned int host_ptimer_irq;
29
static u32 host_vtimer_irq_flags;
30
static u32 host_ptimer_irq_flags;
31
32
static DEFINE_STATIC_KEY_FALSE(has_gic_active_state);
33
DEFINE_STATIC_KEY_FALSE(broken_cntvoff_key);
34
35
static const u8 default_ppi[] = {
36
[TIMER_PTIMER] = 30,
37
[TIMER_VTIMER] = 27,
38
[TIMER_HPTIMER] = 26,
39
[TIMER_HVTIMER] = 28,
40
};
41
42
static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx);
43
static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
44
struct arch_timer_context *timer_ctx);
45
static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx);
46
static void kvm_arm_timer_write(struct kvm_vcpu *vcpu,
47
struct arch_timer_context *timer,
48
enum kvm_arch_timer_regs treg,
49
u64 val);
50
static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
51
struct arch_timer_context *timer,
52
enum kvm_arch_timer_regs treg);
53
static bool kvm_arch_timer_get_input_level(int vintid);
54
55
static struct irq_ops arch_timer_irq_ops = {
56
.get_input_level = kvm_arch_timer_get_input_level,
57
};
58
59
static int nr_timers(struct kvm_vcpu *vcpu)
60
{
61
if (!vcpu_has_nv(vcpu))
62
return NR_KVM_EL0_TIMERS;
63
64
return NR_KVM_TIMERS;
65
}
66
67
u32 timer_get_ctl(struct arch_timer_context *ctxt)
68
{
69
struct kvm_vcpu *vcpu = ctxt->vcpu;
70
71
switch(arch_timer_ctx_index(ctxt)) {
72
case TIMER_VTIMER:
73
return __vcpu_sys_reg(vcpu, CNTV_CTL_EL0);
74
case TIMER_PTIMER:
75
return __vcpu_sys_reg(vcpu, CNTP_CTL_EL0);
76
case TIMER_HVTIMER:
77
return __vcpu_sys_reg(vcpu, CNTHV_CTL_EL2);
78
case TIMER_HPTIMER:
79
return __vcpu_sys_reg(vcpu, CNTHP_CTL_EL2);
80
default:
81
WARN_ON(1);
82
return 0;
83
}
84
}
85
86
u64 timer_get_cval(struct arch_timer_context *ctxt)
87
{
88
struct kvm_vcpu *vcpu = ctxt->vcpu;
89
90
switch(arch_timer_ctx_index(ctxt)) {
91
case TIMER_VTIMER:
92
return __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0);
93
case TIMER_PTIMER:
94
return __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0);
95
case TIMER_HVTIMER:
96
return __vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2);
97
case TIMER_HPTIMER:
98
return __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2);
99
default:
100
WARN_ON(1);
101
return 0;
102
}
103
}
104
105
static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl)
106
{
107
struct kvm_vcpu *vcpu = ctxt->vcpu;
108
109
switch(arch_timer_ctx_index(ctxt)) {
110
case TIMER_VTIMER:
111
__vcpu_assign_sys_reg(vcpu, CNTV_CTL_EL0, ctl);
112
break;
113
case TIMER_PTIMER:
114
__vcpu_assign_sys_reg(vcpu, CNTP_CTL_EL0, ctl);
115
break;
116
case TIMER_HVTIMER:
117
__vcpu_assign_sys_reg(vcpu, CNTHV_CTL_EL2, ctl);
118
break;
119
case TIMER_HPTIMER:
120
__vcpu_assign_sys_reg(vcpu, CNTHP_CTL_EL2, ctl);
121
break;
122
default:
123
WARN_ON(1);
124
}
125
}
126
127
static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval)
128
{
129
struct kvm_vcpu *vcpu = ctxt->vcpu;
130
131
switch(arch_timer_ctx_index(ctxt)) {
132
case TIMER_VTIMER:
133
__vcpu_assign_sys_reg(vcpu, CNTV_CVAL_EL0, cval);
134
break;
135
case TIMER_PTIMER:
136
__vcpu_assign_sys_reg(vcpu, CNTP_CVAL_EL0, cval);
137
break;
138
case TIMER_HVTIMER:
139
__vcpu_assign_sys_reg(vcpu, CNTHV_CVAL_EL2, cval);
140
break;
141
case TIMER_HPTIMER:
142
__vcpu_assign_sys_reg(vcpu, CNTHP_CVAL_EL2, cval);
143
break;
144
default:
145
WARN_ON(1);
146
}
147
}
148
149
static void timer_set_offset(struct arch_timer_context *ctxt, u64 offset)
150
{
151
if (!ctxt->offset.vm_offset) {
152
WARN(offset, "timer %ld\n", arch_timer_ctx_index(ctxt));
153
return;
154
}
155
156
WRITE_ONCE(*ctxt->offset.vm_offset, offset);
157
}
158
159
u64 kvm_phys_timer_read(void)
160
{
161
return timecounter->cc->read(timecounter->cc);
162
}
163
164
void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map)
165
{
166
if (vcpu_has_nv(vcpu)) {
167
if (is_hyp_ctxt(vcpu)) {
168
map->direct_vtimer = vcpu_hvtimer(vcpu);
169
map->direct_ptimer = vcpu_hptimer(vcpu);
170
map->emul_vtimer = vcpu_vtimer(vcpu);
171
map->emul_ptimer = vcpu_ptimer(vcpu);
172
} else {
173
map->direct_vtimer = vcpu_vtimer(vcpu);
174
map->direct_ptimer = vcpu_ptimer(vcpu);
175
map->emul_vtimer = vcpu_hvtimer(vcpu);
176
map->emul_ptimer = vcpu_hptimer(vcpu);
177
}
178
} else if (has_vhe()) {
179
map->direct_vtimer = vcpu_vtimer(vcpu);
180
map->direct_ptimer = vcpu_ptimer(vcpu);
181
map->emul_vtimer = NULL;
182
map->emul_ptimer = NULL;
183
} else {
184
map->direct_vtimer = vcpu_vtimer(vcpu);
185
map->direct_ptimer = NULL;
186
map->emul_vtimer = NULL;
187
map->emul_ptimer = vcpu_ptimer(vcpu);
188
}
189
190
trace_kvm_get_timer_map(vcpu->vcpu_id, map);
191
}
192
193
static inline bool userspace_irqchip(struct kvm *kvm)
194
{
195
return unlikely(!irqchip_in_kernel(kvm));
196
}
197
198
static void soft_timer_start(struct hrtimer *hrt, u64 ns)
199
{
200
hrtimer_start(hrt, ktime_add_ns(ktime_get(), ns),
201
HRTIMER_MODE_ABS_HARD);
202
}
203
204
static void soft_timer_cancel(struct hrtimer *hrt)
205
{
206
hrtimer_cancel(hrt);
207
}
208
209
static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
210
{
211
struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
212
struct arch_timer_context *ctx;
213
struct timer_map map;
214
215
/*
216
* We may see a timer interrupt after vcpu_put() has been called which
217
* sets the CPU's vcpu pointer to NULL, because even though the timer
218
* has been disabled in timer_save_state(), the hardware interrupt
219
* signal may not have been retired from the interrupt controller yet.
220
*/
221
if (!vcpu)
222
return IRQ_HANDLED;
223
224
get_timer_map(vcpu, &map);
225
226
if (irq == host_vtimer_irq)
227
ctx = map.direct_vtimer;
228
else
229
ctx = map.direct_ptimer;
230
231
if (kvm_timer_should_fire(ctx))
232
kvm_timer_update_irq(vcpu, true, ctx);
233
234
if (userspace_irqchip(vcpu->kvm) &&
235
!static_branch_unlikely(&has_gic_active_state))
236
disable_percpu_irq(host_vtimer_irq);
237
238
return IRQ_HANDLED;
239
}
240
241
static u64 kvm_counter_compute_delta(struct arch_timer_context *timer_ctx,
242
u64 val)
243
{
244
u64 now = kvm_phys_timer_read() - timer_get_offset(timer_ctx);
245
246
if (now < val) {
247
u64 ns;
248
249
ns = cyclecounter_cyc2ns(timecounter->cc,
250
val - now,
251
timecounter->mask,
252
&timer_ctx->ns_frac);
253
return ns;
254
}
255
256
return 0;
257
}
258
259
static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx)
260
{
261
return kvm_counter_compute_delta(timer_ctx, timer_get_cval(timer_ctx));
262
}
263
264
static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx)
265
{
266
WARN_ON(timer_ctx && timer_ctx->loaded);
267
return timer_ctx &&
268
((timer_get_ctl(timer_ctx) &
269
(ARCH_TIMER_CTRL_IT_MASK | ARCH_TIMER_CTRL_ENABLE)) == ARCH_TIMER_CTRL_ENABLE);
270
}
271
272
static bool vcpu_has_wfit_active(struct kvm_vcpu *vcpu)
273
{
274
return (cpus_have_final_cap(ARM64_HAS_WFXT) &&
275
vcpu_get_flag(vcpu, IN_WFIT));
276
}
277
278
static u64 wfit_delay_ns(struct kvm_vcpu *vcpu)
279
{
280
u64 val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu));
281
struct arch_timer_context *ctx;
282
283
ctx = is_hyp_ctxt(vcpu) ? vcpu_hvtimer(vcpu) : vcpu_vtimer(vcpu);
284
285
return kvm_counter_compute_delta(ctx, val);
286
}
287
288
/*
289
* Returns the earliest expiration time in ns among guest timers.
290
* Note that it will return 0 if none of timers can fire.
291
*/
292
static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu)
293
{
294
u64 min_delta = ULLONG_MAX;
295
int i;
296
297
for (i = 0; i < nr_timers(vcpu); i++) {
298
struct arch_timer_context *ctx = &vcpu->arch.timer_cpu.timers[i];
299
300
WARN(ctx->loaded, "timer %d loaded\n", i);
301
if (kvm_timer_irq_can_fire(ctx))
302
min_delta = min(min_delta, kvm_timer_compute_delta(ctx));
303
}
304
305
if (vcpu_has_wfit_active(vcpu))
306
min_delta = min(min_delta, wfit_delay_ns(vcpu));
307
308
/* If none of timers can fire, then return 0 */
309
if (min_delta == ULLONG_MAX)
310
return 0;
311
312
return min_delta;
313
}
314
315
static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt)
316
{
317
struct arch_timer_cpu *timer;
318
struct kvm_vcpu *vcpu;
319
u64 ns;
320
321
timer = container_of(hrt, struct arch_timer_cpu, bg_timer);
322
vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu);
323
324
/*
325
* Check that the timer has really expired from the guest's
326
* PoV (NTP on the host may have forced it to expire
327
* early). If we should have slept longer, restart it.
328
*/
329
ns = kvm_timer_earliest_exp(vcpu);
330
if (unlikely(ns)) {
331
hrtimer_forward_now(hrt, ns_to_ktime(ns));
332
return HRTIMER_RESTART;
333
}
334
335
kvm_vcpu_wake_up(vcpu);
336
return HRTIMER_NORESTART;
337
}
338
339
static enum hrtimer_restart kvm_hrtimer_expire(struct hrtimer *hrt)
340
{
341
struct arch_timer_context *ctx;
342
struct kvm_vcpu *vcpu;
343
u64 ns;
344
345
ctx = container_of(hrt, struct arch_timer_context, hrtimer);
346
vcpu = ctx->vcpu;
347
348
trace_kvm_timer_hrtimer_expire(ctx);
349
350
/*
351
* Check that the timer has really expired from the guest's
352
* PoV (NTP on the host may have forced it to expire
353
* early). If not ready, schedule for a later time.
354
*/
355
ns = kvm_timer_compute_delta(ctx);
356
if (unlikely(ns)) {
357
hrtimer_forward_now(hrt, ns_to_ktime(ns));
358
return HRTIMER_RESTART;
359
}
360
361
kvm_timer_update_irq(vcpu, true, ctx);
362
return HRTIMER_NORESTART;
363
}
364
365
static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
366
{
367
enum kvm_arch_timers index;
368
u64 cval, now;
369
370
if (!timer_ctx)
371
return false;
372
373
index = arch_timer_ctx_index(timer_ctx);
374
375
if (timer_ctx->loaded) {
376
u32 cnt_ctl = 0;
377
378
switch (index) {
379
case TIMER_VTIMER:
380
case TIMER_HVTIMER:
381
cnt_ctl = read_sysreg_el0(SYS_CNTV_CTL);
382
break;
383
case TIMER_PTIMER:
384
case TIMER_HPTIMER:
385
cnt_ctl = read_sysreg_el0(SYS_CNTP_CTL);
386
break;
387
case NR_KVM_TIMERS:
388
/* GCC is braindead */
389
cnt_ctl = 0;
390
break;
391
}
392
393
return (cnt_ctl & ARCH_TIMER_CTRL_ENABLE) &&
394
(cnt_ctl & ARCH_TIMER_CTRL_IT_STAT) &&
395
!(cnt_ctl & ARCH_TIMER_CTRL_IT_MASK);
396
}
397
398
if (!kvm_timer_irq_can_fire(timer_ctx))
399
return false;
400
401
cval = timer_get_cval(timer_ctx);
402
now = kvm_phys_timer_read() - timer_get_offset(timer_ctx);
403
404
return cval <= now;
405
}
406
407
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
408
{
409
return vcpu_has_wfit_active(vcpu) && wfit_delay_ns(vcpu) == 0;
410
}
411
412
/*
413
* Reflect the timer output level into the kvm_run structure
414
*/
415
void kvm_timer_update_run(struct kvm_vcpu *vcpu)
416
{
417
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
418
struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
419
struct kvm_sync_regs *regs = &vcpu->run->s.regs;
420
421
/* Populate the device bitmap with the timer states */
422
regs->device_irq_level &= ~(KVM_ARM_DEV_EL1_VTIMER |
423
KVM_ARM_DEV_EL1_PTIMER);
424
if (kvm_timer_should_fire(vtimer))
425
regs->device_irq_level |= KVM_ARM_DEV_EL1_VTIMER;
426
if (kvm_timer_should_fire(ptimer))
427
regs->device_irq_level |= KVM_ARM_DEV_EL1_PTIMER;
428
}
429
430
static void kvm_timer_update_status(struct arch_timer_context *ctx, bool level)
431
{
432
/*
433
* Paper over NV2 brokenness by publishing the interrupt status
434
* bit. This still results in a poor quality of emulation (guest
435
* writes will have no effect until the next exit).
436
*
437
* But hey, it's fast, right?
438
*/
439
if (is_hyp_ctxt(ctx->vcpu) &&
440
(ctx == vcpu_vtimer(ctx->vcpu) || ctx == vcpu_ptimer(ctx->vcpu))) {
441
unsigned long val = timer_get_ctl(ctx);
442
__assign_bit(__ffs(ARCH_TIMER_CTRL_IT_STAT), &val, level);
443
timer_set_ctl(ctx, val);
444
}
445
}
446
447
static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
448
struct arch_timer_context *timer_ctx)
449
{
450
kvm_timer_update_status(timer_ctx, new_level);
451
452
timer_ctx->irq.level = new_level;
453
trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_irq(timer_ctx),
454
timer_ctx->irq.level);
455
456
if (userspace_irqchip(vcpu->kvm))
457
return;
458
459
kvm_vgic_inject_irq(vcpu->kvm, vcpu,
460
timer_irq(timer_ctx),
461
timer_ctx->irq.level,
462
timer_ctx);
463
}
464
465
/* Only called for a fully emulated timer */
466
static void timer_emulate(struct arch_timer_context *ctx)
467
{
468
bool should_fire = kvm_timer_should_fire(ctx);
469
470
trace_kvm_timer_emulate(ctx, should_fire);
471
472
if (should_fire != ctx->irq.level)
473
kvm_timer_update_irq(ctx->vcpu, should_fire, ctx);
474
475
kvm_timer_update_status(ctx, should_fire);
476
477
/*
478
* If the timer can fire now, we don't need to have a soft timer
479
* scheduled for the future. If the timer cannot fire at all,
480
* then we also don't need a soft timer.
481
*/
482
if (should_fire || !kvm_timer_irq_can_fire(ctx))
483
return;
484
485
soft_timer_start(&ctx->hrtimer, kvm_timer_compute_delta(ctx));
486
}
487
488
static void set_cntvoff(u64 cntvoff)
489
{
490
kvm_call_hyp(__kvm_timer_set_cntvoff, cntvoff);
491
}
492
493
static void set_cntpoff(u64 cntpoff)
494
{
495
if (has_cntpoff())
496
write_sysreg_s(cntpoff, SYS_CNTPOFF_EL2);
497
}
498
499
static void timer_save_state(struct arch_timer_context *ctx)
500
{
501
struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu);
502
enum kvm_arch_timers index = arch_timer_ctx_index(ctx);
503
unsigned long flags;
504
505
if (!timer->enabled)
506
return;
507
508
local_irq_save(flags);
509
510
if (!ctx->loaded)
511
goto out;
512
513
switch (index) {
514
u64 cval;
515
516
case TIMER_VTIMER:
517
case TIMER_HVTIMER:
518
timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTV_CTL));
519
cval = read_sysreg_el0(SYS_CNTV_CVAL);
520
521
if (has_broken_cntvoff())
522
cval -= timer_get_offset(ctx);
523
524
timer_set_cval(ctx, cval);
525
526
/* Disable the timer */
527
write_sysreg_el0(0, SYS_CNTV_CTL);
528
isb();
529
530
/*
531
* The kernel may decide to run userspace after
532
* calling vcpu_put, so we reset cntvoff to 0 to
533
* ensure a consistent read between user accesses to
534
* the virtual counter and kernel access to the
535
* physical counter of non-VHE case.
536
*
537
* For VHE, the virtual counter uses a fixed virtual
538
* offset of zero, so no need to zero CNTVOFF_EL2
539
* register, but this is actually useful when switching
540
* between EL1/vEL2 with NV.
541
*
542
* Do it unconditionally, as this is either unavoidable
543
* or dirt cheap.
544
*/
545
set_cntvoff(0);
546
break;
547
case TIMER_PTIMER:
548
case TIMER_HPTIMER:
549
timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL));
550
cval = read_sysreg_el0(SYS_CNTP_CVAL);
551
552
cval -= timer_get_offset(ctx);
553
554
timer_set_cval(ctx, cval);
555
556
/* Disable the timer */
557
write_sysreg_el0(0, SYS_CNTP_CTL);
558
isb();
559
560
set_cntpoff(0);
561
break;
562
case NR_KVM_TIMERS:
563
BUG();
564
}
565
566
trace_kvm_timer_save_state(ctx);
567
568
ctx->loaded = false;
569
out:
570
local_irq_restore(flags);
571
}
572
573
/*
574
* Schedule the background timer before calling kvm_vcpu_halt, so that this
575
* thread is removed from its waitqueue and made runnable when there's a timer
576
* interrupt to handle.
577
*/
578
static void kvm_timer_blocking(struct kvm_vcpu *vcpu)
579
{
580
struct arch_timer_cpu *timer = vcpu_timer(vcpu);
581
struct timer_map map;
582
583
get_timer_map(vcpu, &map);
584
585
/*
586
* If no timers are capable of raising interrupts (disabled or
587
* masked), then there's no more work for us to do.
588
*/
589
if (!kvm_timer_irq_can_fire(map.direct_vtimer) &&
590
!kvm_timer_irq_can_fire(map.direct_ptimer) &&
591
!kvm_timer_irq_can_fire(map.emul_vtimer) &&
592
!kvm_timer_irq_can_fire(map.emul_ptimer) &&
593
!vcpu_has_wfit_active(vcpu))
594
return;
595
596
/*
597
* At least one guest time will expire. Schedule a background timer.
598
* Set the earliest expiration time among the guest timers.
599
*/
600
soft_timer_start(&timer->bg_timer, kvm_timer_earliest_exp(vcpu));
601
}
602
603
static void kvm_timer_unblocking(struct kvm_vcpu *vcpu)
604
{
605
struct arch_timer_cpu *timer = vcpu_timer(vcpu);
606
607
soft_timer_cancel(&timer->bg_timer);
608
}
609
610
static void timer_restore_state(struct arch_timer_context *ctx)
611
{
612
struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu);
613
enum kvm_arch_timers index = arch_timer_ctx_index(ctx);
614
unsigned long flags;
615
616
if (!timer->enabled)
617
return;
618
619
local_irq_save(flags);
620
621
if (ctx->loaded)
622
goto out;
623
624
switch (index) {
625
u64 cval, offset;
626
627
case TIMER_VTIMER:
628
case TIMER_HVTIMER:
629
cval = timer_get_cval(ctx);
630
offset = timer_get_offset(ctx);
631
if (has_broken_cntvoff()) {
632
set_cntvoff(0);
633
cval += offset;
634
} else {
635
set_cntvoff(offset);
636
}
637
write_sysreg_el0(cval, SYS_CNTV_CVAL);
638
isb();
639
write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL);
640
break;
641
case TIMER_PTIMER:
642
case TIMER_HPTIMER:
643
cval = timer_get_cval(ctx);
644
offset = timer_get_offset(ctx);
645
set_cntpoff(offset);
646
cval += offset;
647
write_sysreg_el0(cval, SYS_CNTP_CVAL);
648
isb();
649
write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL);
650
break;
651
case NR_KVM_TIMERS:
652
BUG();
653
}
654
655
trace_kvm_timer_restore_state(ctx);
656
657
ctx->loaded = true;
658
out:
659
local_irq_restore(flags);
660
}
661
662
static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, bool active)
663
{
664
int r;
665
r = irq_set_irqchip_state(ctx->host_timer_irq, IRQCHIP_STATE_ACTIVE, active);
666
WARN_ON(r);
667
}
668
669
static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx)
670
{
671
struct kvm_vcpu *vcpu = ctx->vcpu;
672
bool phys_active = false;
673
674
/*
675
* Update the timer output so that it is likely to match the
676
* state we're about to restore. If the timer expires between
677
* this point and the register restoration, we'll take the
678
* interrupt anyway.
679
*/
680
kvm_timer_update_irq(ctx->vcpu, kvm_timer_should_fire(ctx), ctx);
681
682
if (irqchip_in_kernel(vcpu->kvm))
683
phys_active = kvm_vgic_map_is_active(vcpu, timer_irq(ctx));
684
685
phys_active |= ctx->irq.level;
686
687
set_timer_irq_phys_active(ctx, phys_active);
688
}
689
690
static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu)
691
{
692
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
693
694
/*
695
* Update the timer output so that it is likely to match the
696
* state we're about to restore. If the timer expires between
697
* this point and the register restoration, we'll take the
698
* interrupt anyway.
699
*/
700
kvm_timer_update_irq(vcpu, kvm_timer_should_fire(vtimer), vtimer);
701
702
/*
703
* When using a userspace irqchip with the architected timers and a
704
* host interrupt controller that doesn't support an active state, we
705
* must still prevent continuously exiting from the guest, and
706
* therefore mask the physical interrupt by disabling it on the host
707
* interrupt controller when the virtual level is high, such that the
708
* guest can make forward progress. Once we detect the output level
709
* being de-asserted, we unmask the interrupt again so that we exit
710
* from the guest when the timer fires.
711
*/
712
if (vtimer->irq.level)
713
disable_percpu_irq(host_vtimer_irq);
714
else
715
enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
716
}
717
718
/* If _pred is true, set bit in _set, otherwise set it in _clr */
719
#define assign_clear_set_bit(_pred, _bit, _clr, _set) \
720
do { \
721
if (_pred) \
722
(_set) |= (_bit); \
723
else \
724
(_clr) |= (_bit); \
725
} while (0)
726
727
static void kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu *vcpu,
728
struct timer_map *map)
729
{
730
int hw, ret;
731
732
if (!irqchip_in_kernel(vcpu->kvm))
733
return;
734
735
/*
736
* We only ever unmap the vtimer irq on a VHE system that runs nested
737
* virtualization, in which case we have both a valid emul_vtimer,
738
* emul_ptimer, direct_vtimer, and direct_ptimer.
739
*
740
* Since this is called from kvm_timer_vcpu_load(), a change between
741
* vEL2 and vEL1/0 will have just happened, and the timer_map will
742
* represent this, and therefore we switch the emul/direct mappings
743
* below.
744
*/
745
hw = kvm_vgic_get_map(vcpu, timer_irq(map->direct_vtimer));
746
if (hw < 0) {
747
kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_vtimer));
748
kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_ptimer));
749
750
ret = kvm_vgic_map_phys_irq(vcpu,
751
map->direct_vtimer->host_timer_irq,
752
timer_irq(map->direct_vtimer),
753
&arch_timer_irq_ops);
754
WARN_ON_ONCE(ret);
755
ret = kvm_vgic_map_phys_irq(vcpu,
756
map->direct_ptimer->host_timer_irq,
757
timer_irq(map->direct_ptimer),
758
&arch_timer_irq_ops);
759
WARN_ON_ONCE(ret);
760
}
761
}
762
763
static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map)
764
{
765
bool tvt, tpt, tvc, tpc, tvt02, tpt02;
766
u64 clr, set;
767
768
/*
769
* No trapping gets configured here with nVHE. See
770
* __timer_enable_traps(), which is where the stuff happens.
771
*/
772
if (!has_vhe())
773
return;
774
775
/*
776
* Our default policy is not to trap anything. As we progress
777
* within this function, reality kicks in and we start adding
778
* traps based on emulation requirements.
779
*/
780
tvt = tpt = tvc = tpc = false;
781
tvt02 = tpt02 = false;
782
783
/*
784
* NV2 badly breaks the timer semantics by redirecting accesses to
785
* the EL1 timer state to memory, so let's call ECV to the rescue if
786
* available: we trap all CNT{P,V}_{CTL,CVAL,TVAL}_EL0 accesses.
787
*
788
* The treatment slightly varies depending whether we run a nVHE or
789
* VHE guest: nVHE will use the _EL0 registers directly, while VHE
790
* will use the _EL02 accessors. This translates in different trap
791
* bits.
792
*
793
* None of the trapping is required when running in non-HYP context,
794
* unless required by the L1 hypervisor settings once we advertise
795
* ECV+NV in the guest, or that we need trapping for other reasons.
796
*/
797
if (cpus_have_final_cap(ARM64_HAS_ECV) && is_hyp_ctxt(vcpu)) {
798
if (vcpu_el2_e2h_is_set(vcpu))
799
tvt02 = tpt02 = true;
800
else
801
tvt = tpt = true;
802
}
803
804
/*
805
* We have two possibility to deal with a physical offset:
806
*
807
* - Either we have CNTPOFF (yay!) or the offset is 0:
808
* we let the guest freely access the HW
809
*
810
* - or neither of these condition apply:
811
* we trap accesses to the HW, but still use it
812
* after correcting the physical offset
813
*/
814
if (!has_cntpoff() && timer_get_offset(map->direct_ptimer))
815
tpt = tpc = true;
816
817
/*
818
* For the poor sods that could not correctly substract one value
819
* from another, trap the full virtual timer and counter.
820
*/
821
if (has_broken_cntvoff() && timer_get_offset(map->direct_vtimer))
822
tvt = tvc = true;
823
824
/*
825
* Apply the enable bits that the guest hypervisor has requested for
826
* its own guest. We can only add traps that wouldn't have been set
827
* above.
828
* Implementation choices: we do not support NV when E2H=0 in the
829
* guest, and we don't support configuration where E2H is writable
830
* by the guest (either FEAT_VHE or FEAT_E2H0 is implemented, but
831
* not both). This simplifies the handling of the EL1NV* bits.
832
*/
833
if (is_nested_ctxt(vcpu)) {
834
u64 val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2);
835
836
/* Use the VHE format for mental sanity */
837
if (!vcpu_el2_e2h_is_set(vcpu))
838
val = (val & (CNTHCTL_EL1PCEN | CNTHCTL_EL1PCTEN)) << 10;
839
840
tpt |= !(val & (CNTHCTL_EL1PCEN << 10));
841
tpc |= !(val & (CNTHCTL_EL1PCTEN << 10));
842
843
tpt02 |= (val & CNTHCTL_EL1NVPCT);
844
tvt02 |= (val & CNTHCTL_EL1NVVCT);
845
}
846
847
/*
848
* Now that we have collected our requirements, compute the
849
* trap and enable bits.
850
*/
851
set = 0;
852
clr = 0;
853
854
assign_clear_set_bit(tpt, CNTHCTL_EL1PCEN << 10, set, clr);
855
assign_clear_set_bit(tpc, CNTHCTL_EL1PCTEN << 10, set, clr);
856
assign_clear_set_bit(tvt, CNTHCTL_EL1TVT, clr, set);
857
assign_clear_set_bit(tvc, CNTHCTL_EL1TVCT, clr, set);
858
assign_clear_set_bit(tvt02, CNTHCTL_EL1NVVCT, clr, set);
859
assign_clear_set_bit(tpt02, CNTHCTL_EL1NVPCT, clr, set);
860
861
/* This only happens on VHE, so use the CNTHCTL_EL2 accessor. */
862
sysreg_clear_set(cnthctl_el2, clr, set);
863
}
864
865
void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
866
{
867
struct arch_timer_cpu *timer = vcpu_timer(vcpu);
868
struct timer_map map;
869
870
if (unlikely(!timer->enabled))
871
return;
872
873
get_timer_map(vcpu, &map);
874
875
if (static_branch_likely(&has_gic_active_state)) {
876
if (vcpu_has_nv(vcpu))
877
kvm_timer_vcpu_load_nested_switch(vcpu, &map);
878
879
kvm_timer_vcpu_load_gic(map.direct_vtimer);
880
if (map.direct_ptimer)
881
kvm_timer_vcpu_load_gic(map.direct_ptimer);
882
} else {
883
kvm_timer_vcpu_load_nogic(vcpu);
884
}
885
886
kvm_timer_unblocking(vcpu);
887
888
timer_restore_state(map.direct_vtimer);
889
if (map.direct_ptimer)
890
timer_restore_state(map.direct_ptimer);
891
if (map.emul_vtimer)
892
timer_emulate(map.emul_vtimer);
893
if (map.emul_ptimer)
894
timer_emulate(map.emul_ptimer);
895
896
timer_set_traps(vcpu, &map);
897
}
898
899
bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
900
{
901
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
902
struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
903
struct kvm_sync_regs *sregs = &vcpu->run->s.regs;
904
bool vlevel, plevel;
905
906
if (likely(irqchip_in_kernel(vcpu->kvm)))
907
return false;
908
909
vlevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_VTIMER;
910
plevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_PTIMER;
911
912
return kvm_timer_should_fire(vtimer) != vlevel ||
913
kvm_timer_should_fire(ptimer) != plevel;
914
}
915
916
void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
917
{
918
struct arch_timer_cpu *timer = vcpu_timer(vcpu);
919
struct timer_map map;
920
921
if (unlikely(!timer->enabled))
922
return;
923
924
get_timer_map(vcpu, &map);
925
926
timer_save_state(map.direct_vtimer);
927
if (map.direct_ptimer)
928
timer_save_state(map.direct_ptimer);
929
930
/*
931
* Cancel soft timer emulation, because the only case where we
932
* need it after a vcpu_put is in the context of a sleeping VCPU, and
933
* in that case we already factor in the deadline for the physical
934
* timer when scheduling the bg_timer.
935
*
936
* In any case, we re-schedule the hrtimer for the physical timer when
937
* coming back to the VCPU thread in kvm_timer_vcpu_load().
938
*/
939
if (map.emul_vtimer)
940
soft_timer_cancel(&map.emul_vtimer->hrtimer);
941
if (map.emul_ptimer)
942
soft_timer_cancel(&map.emul_ptimer->hrtimer);
943
944
if (kvm_vcpu_is_blocking(vcpu))
945
kvm_timer_blocking(vcpu);
946
}
947
948
void kvm_timer_sync_nested(struct kvm_vcpu *vcpu)
949
{
950
/*
951
* When NV2 is on, guest hypervisors have their EL1 timer register
952
* accesses redirected to the VNCR page. Any guest action taken on
953
* the timer is postponed until the next exit, leading to a very
954
* poor quality of emulation.
955
*
956
* This is an unmitigated disaster, only papered over by FEAT_ECV,
957
* which allows trapping of the timer registers even with NV2.
958
* Still, this is still worse than FEAT_NV on its own. Meh.
959
*/
960
if (!cpus_have_final_cap(ARM64_HAS_ECV)) {
961
/*
962
* For a VHE guest hypervisor, the EL2 state is directly
963
* stored in the host EL1 timers, while the emulated EL1
964
* state is stored in the VNCR page. The latter could have
965
* been updated behind our back, and we must reset the
966
* emulation of the timers.
967
*
968
* A non-VHE guest hypervisor doesn't have any direct access
969
* to its timers: the EL2 registers trap despite being
970
* notionally direct (we use the EL1 HW, as for VHE), while
971
* the EL1 registers access memory.
972
*
973
* In both cases, process the emulated timers on each guest
974
* exit. Boo.
975
*/
976
struct timer_map map;
977
get_timer_map(vcpu, &map);
978
979
soft_timer_cancel(&map.emul_vtimer->hrtimer);
980
soft_timer_cancel(&map.emul_ptimer->hrtimer);
981
timer_emulate(map.emul_vtimer);
982
timer_emulate(map.emul_ptimer);
983
}
984
}
985
986
/*
987
* With a userspace irqchip we have to check if the guest de-asserted the
988
* timer and if so, unmask the timer irq signal on the host interrupt
989
* controller to ensure that we see future timer signals.
990
*/
991
static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu)
992
{
993
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
994
995
if (!kvm_timer_should_fire(vtimer)) {
996
kvm_timer_update_irq(vcpu, false, vtimer);
997
if (static_branch_likely(&has_gic_active_state))
998
set_timer_irq_phys_active(vtimer, false);
999
else
1000
enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
1001
}
1002
}
1003
1004
void kvm_timer_sync_user(struct kvm_vcpu *vcpu)
1005
{
1006
struct arch_timer_cpu *timer = vcpu_timer(vcpu);
1007
1008
if (unlikely(!timer->enabled))
1009
return;
1010
1011
if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
1012
unmask_vtimer_irq_user(vcpu);
1013
}
1014
1015
void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
1016
{
1017
struct arch_timer_cpu *timer = vcpu_timer(vcpu);
1018
struct timer_map map;
1019
1020
get_timer_map(vcpu, &map);
1021
1022
/*
1023
* The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8
1024
* and to 0 for ARMv7. We provide an implementation that always
1025
* resets the timer to be disabled and unmasked and is compliant with
1026
* the ARMv7 architecture.
1027
*/
1028
for (int i = 0; i < nr_timers(vcpu); i++)
1029
timer_set_ctl(vcpu_get_timer(vcpu, i), 0);
1030
1031
/*
1032
* A vcpu running at EL2 is in charge of the offset applied to
1033
* the virtual timer, so use the physical VM offset, and point
1034
* the vcpu offset to CNTVOFF_EL2.
1035
*/
1036
if (vcpu_has_nv(vcpu)) {
1037
struct arch_timer_offset *offs = &vcpu_vtimer(vcpu)->offset;
1038
1039
offs->vcpu_offset = __ctxt_sys_reg(&vcpu->arch.ctxt, CNTVOFF_EL2);
1040
offs->vm_offset = &vcpu->kvm->arch.timer_data.poffset;
1041
}
1042
1043
if (timer->enabled) {
1044
for (int i = 0; i < nr_timers(vcpu); i++)
1045
kvm_timer_update_irq(vcpu, false,
1046
vcpu_get_timer(vcpu, i));
1047
1048
if (irqchip_in_kernel(vcpu->kvm)) {
1049
kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_vtimer));
1050
if (map.direct_ptimer)
1051
kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_ptimer));
1052
}
1053
}
1054
1055
if (map.emul_vtimer)
1056
soft_timer_cancel(&map.emul_vtimer->hrtimer);
1057
if (map.emul_ptimer)
1058
soft_timer_cancel(&map.emul_ptimer->hrtimer);
1059
}
1060
1061
static void timer_context_init(struct kvm_vcpu *vcpu, int timerid)
1062
{
1063
struct arch_timer_context *ctxt = vcpu_get_timer(vcpu, timerid);
1064
struct kvm *kvm = vcpu->kvm;
1065
1066
ctxt->vcpu = vcpu;
1067
1068
if (timerid == TIMER_VTIMER)
1069
ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset;
1070
else
1071
ctxt->offset.vm_offset = &kvm->arch.timer_data.poffset;
1072
1073
hrtimer_setup(&ctxt->hrtimer, kvm_hrtimer_expire, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
1074
1075
switch (timerid) {
1076
case TIMER_PTIMER:
1077
case TIMER_HPTIMER:
1078
ctxt->host_timer_irq = host_ptimer_irq;
1079
break;
1080
case TIMER_VTIMER:
1081
case TIMER_HVTIMER:
1082
ctxt->host_timer_irq = host_vtimer_irq;
1083
break;
1084
}
1085
}
1086
1087
void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
1088
{
1089
struct arch_timer_cpu *timer = vcpu_timer(vcpu);
1090
1091
for (int i = 0; i < NR_KVM_TIMERS; i++)
1092
timer_context_init(vcpu, i);
1093
1094
/* Synchronize offsets across timers of a VM if not already provided */
1095
if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) {
1096
timer_set_offset(vcpu_vtimer(vcpu), kvm_phys_timer_read());
1097
timer_set_offset(vcpu_ptimer(vcpu), 0);
1098
}
1099
1100
hrtimer_setup(&timer->bg_timer, kvm_bg_timer_expire, CLOCK_MONOTONIC,
1101
HRTIMER_MODE_ABS_HARD);
1102
}
1103
1104
void kvm_timer_init_vm(struct kvm *kvm)
1105
{
1106
for (int i = 0; i < NR_KVM_TIMERS; i++)
1107
kvm->arch.timer_data.ppi[i] = default_ppi[i];
1108
}
1109
1110
void kvm_timer_cpu_up(void)
1111
{
1112
enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
1113
if (host_ptimer_irq)
1114
enable_percpu_irq(host_ptimer_irq, host_ptimer_irq_flags);
1115
}
1116
1117
void kvm_timer_cpu_down(void)
1118
{
1119
disable_percpu_irq(host_vtimer_irq);
1120
if (host_ptimer_irq)
1121
disable_percpu_irq(host_ptimer_irq);
1122
}
1123
1124
int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
1125
{
1126
struct arch_timer_context *timer;
1127
1128
switch (regid) {
1129
case KVM_REG_ARM_TIMER_CTL:
1130
timer = vcpu_vtimer(vcpu);
1131
kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value);
1132
break;
1133
case KVM_REG_ARM_TIMER_CNT:
1134
if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET,
1135
&vcpu->kvm->arch.flags)) {
1136
timer = vcpu_vtimer(vcpu);
1137
timer_set_offset(timer, kvm_phys_timer_read() - value);
1138
}
1139
break;
1140
case KVM_REG_ARM_TIMER_CVAL:
1141
timer = vcpu_vtimer(vcpu);
1142
kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value);
1143
break;
1144
case KVM_REG_ARM_PTIMER_CTL:
1145
timer = vcpu_ptimer(vcpu);
1146
kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value);
1147
break;
1148
case KVM_REG_ARM_PTIMER_CNT:
1149
if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET,
1150
&vcpu->kvm->arch.flags)) {
1151
timer = vcpu_ptimer(vcpu);
1152
timer_set_offset(timer, kvm_phys_timer_read() - value);
1153
}
1154
break;
1155
case KVM_REG_ARM_PTIMER_CVAL:
1156
timer = vcpu_ptimer(vcpu);
1157
kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value);
1158
break;
1159
1160
default:
1161
return -1;
1162
}
1163
1164
return 0;
1165
}
1166
1167
static u64 read_timer_ctl(struct arch_timer_context *timer)
1168
{
1169
/*
1170
* Set ISTATUS bit if it's expired.
1171
* Note that according to ARMv8 ARM Issue A.k, ISTATUS bit is
1172
* UNKNOWN when ENABLE bit is 0, so we chose to set ISTATUS bit
1173
* regardless of ENABLE bit for our implementation convenience.
1174
*/
1175
u32 ctl = timer_get_ctl(timer);
1176
1177
if (!kvm_timer_compute_delta(timer))
1178
ctl |= ARCH_TIMER_CTRL_IT_STAT;
1179
1180
return ctl;
1181
}
1182
1183
u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
1184
{
1185
switch (regid) {
1186
case KVM_REG_ARM_TIMER_CTL:
1187
return kvm_arm_timer_read(vcpu,
1188
vcpu_vtimer(vcpu), TIMER_REG_CTL);
1189
case KVM_REG_ARM_TIMER_CNT:
1190
return kvm_arm_timer_read(vcpu,
1191
vcpu_vtimer(vcpu), TIMER_REG_CNT);
1192
case KVM_REG_ARM_TIMER_CVAL:
1193
return kvm_arm_timer_read(vcpu,
1194
vcpu_vtimer(vcpu), TIMER_REG_CVAL);
1195
case KVM_REG_ARM_PTIMER_CTL:
1196
return kvm_arm_timer_read(vcpu,
1197
vcpu_ptimer(vcpu), TIMER_REG_CTL);
1198
case KVM_REG_ARM_PTIMER_CNT:
1199
return kvm_arm_timer_read(vcpu,
1200
vcpu_ptimer(vcpu), TIMER_REG_CNT);
1201
case KVM_REG_ARM_PTIMER_CVAL:
1202
return kvm_arm_timer_read(vcpu,
1203
vcpu_ptimer(vcpu), TIMER_REG_CVAL);
1204
}
1205
return (u64)-1;
1206
}
1207
1208
static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
1209
struct arch_timer_context *timer,
1210
enum kvm_arch_timer_regs treg)
1211
{
1212
u64 val;
1213
1214
switch (treg) {
1215
case TIMER_REG_TVAL:
1216
val = timer_get_cval(timer) - kvm_phys_timer_read() + timer_get_offset(timer);
1217
val = lower_32_bits(val);
1218
break;
1219
1220
case TIMER_REG_CTL:
1221
val = read_timer_ctl(timer);
1222
break;
1223
1224
case TIMER_REG_CVAL:
1225
val = timer_get_cval(timer);
1226
break;
1227
1228
case TIMER_REG_CNT:
1229
val = kvm_phys_timer_read() - timer_get_offset(timer);
1230
break;
1231
1232
case TIMER_REG_VOFF:
1233
val = *timer->offset.vcpu_offset;
1234
break;
1235
1236
default:
1237
BUG();
1238
}
1239
1240
return val;
1241
}
1242
1243
u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu,
1244
enum kvm_arch_timers tmr,
1245
enum kvm_arch_timer_regs treg)
1246
{
1247
struct arch_timer_context *timer;
1248
struct timer_map map;
1249
u64 val;
1250
1251
get_timer_map(vcpu, &map);
1252
timer = vcpu_get_timer(vcpu, tmr);
1253
1254
if (timer == map.emul_vtimer || timer == map.emul_ptimer)
1255
return kvm_arm_timer_read(vcpu, timer, treg);
1256
1257
preempt_disable();
1258
timer_save_state(timer);
1259
1260
val = kvm_arm_timer_read(vcpu, timer, treg);
1261
1262
timer_restore_state(timer);
1263
preempt_enable();
1264
1265
return val;
1266
}
1267
1268
static void kvm_arm_timer_write(struct kvm_vcpu *vcpu,
1269
struct arch_timer_context *timer,
1270
enum kvm_arch_timer_regs treg,
1271
u64 val)
1272
{
1273
switch (treg) {
1274
case TIMER_REG_TVAL:
1275
timer_set_cval(timer, kvm_phys_timer_read() - timer_get_offset(timer) + (s32)val);
1276
break;
1277
1278
case TIMER_REG_CTL:
1279
timer_set_ctl(timer, val & ~ARCH_TIMER_CTRL_IT_STAT);
1280
break;
1281
1282
case TIMER_REG_CVAL:
1283
timer_set_cval(timer, val);
1284
break;
1285
1286
case TIMER_REG_VOFF:
1287
*timer->offset.vcpu_offset = val;
1288
break;
1289
1290
default:
1291
BUG();
1292
}
1293
}
1294
1295
void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu,
1296
enum kvm_arch_timers tmr,
1297
enum kvm_arch_timer_regs treg,
1298
u64 val)
1299
{
1300
struct arch_timer_context *timer;
1301
struct timer_map map;
1302
1303
get_timer_map(vcpu, &map);
1304
timer = vcpu_get_timer(vcpu, tmr);
1305
if (timer == map.emul_vtimer || timer == map.emul_ptimer) {
1306
soft_timer_cancel(&timer->hrtimer);
1307
kvm_arm_timer_write(vcpu, timer, treg, val);
1308
timer_emulate(timer);
1309
} else {
1310
preempt_disable();
1311
timer_save_state(timer);
1312
kvm_arm_timer_write(vcpu, timer, treg, val);
1313
timer_restore_state(timer);
1314
preempt_enable();
1315
}
1316
}
1317
1318
static int timer_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu)
1319
{
1320
if (vcpu)
1321
irqd_set_forwarded_to_vcpu(d);
1322
else
1323
irqd_clr_forwarded_to_vcpu(d);
1324
1325
return 0;
1326
}
1327
1328
static int timer_irq_set_irqchip_state(struct irq_data *d,
1329
enum irqchip_irq_state which, bool val)
1330
{
1331
if (which != IRQCHIP_STATE_ACTIVE || !irqd_is_forwarded_to_vcpu(d))
1332
return irq_chip_set_parent_state(d, which, val);
1333
1334
if (val)
1335
irq_chip_mask_parent(d);
1336
else
1337
irq_chip_unmask_parent(d);
1338
1339
return 0;
1340
}
1341
1342
static void timer_irq_eoi(struct irq_data *d)
1343
{
1344
if (!irqd_is_forwarded_to_vcpu(d))
1345
irq_chip_eoi_parent(d);
1346
}
1347
1348
static void timer_irq_ack(struct irq_data *d)
1349
{
1350
d = d->parent_data;
1351
if (d->chip->irq_ack)
1352
d->chip->irq_ack(d);
1353
}
1354
1355
static struct irq_chip timer_chip = {
1356
.name = "KVM",
1357
.irq_ack = timer_irq_ack,
1358
.irq_mask = irq_chip_mask_parent,
1359
.irq_unmask = irq_chip_unmask_parent,
1360
.irq_eoi = timer_irq_eoi,
1361
.irq_set_type = irq_chip_set_type_parent,
1362
.irq_set_vcpu_affinity = timer_irq_set_vcpu_affinity,
1363
.irq_set_irqchip_state = timer_irq_set_irqchip_state,
1364
};
1365
1366
static int timer_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
1367
unsigned int nr_irqs, void *arg)
1368
{
1369
irq_hw_number_t hwirq = (uintptr_t)arg;
1370
1371
return irq_domain_set_hwirq_and_chip(domain, virq, hwirq,
1372
&timer_chip, NULL);
1373
}
1374
1375
static void timer_irq_domain_free(struct irq_domain *domain, unsigned int virq,
1376
unsigned int nr_irqs)
1377
{
1378
}
1379
1380
static const struct irq_domain_ops timer_domain_ops = {
1381
.alloc = timer_irq_domain_alloc,
1382
.free = timer_irq_domain_free,
1383
};
1384
1385
static void kvm_irq_fixup_flags(unsigned int virq, u32 *flags)
1386
{
1387
*flags = irq_get_trigger_type(virq);
1388
if (*flags != IRQF_TRIGGER_HIGH && *flags != IRQF_TRIGGER_LOW) {
1389
kvm_err("Invalid trigger for timer IRQ%d, assuming level low\n",
1390
virq);
1391
*flags = IRQF_TRIGGER_LOW;
1392
}
1393
}
1394
1395
static int kvm_irq_init(struct arch_timer_kvm_info *info)
1396
{
1397
struct irq_domain *domain = NULL;
1398
1399
if (info->virtual_irq <= 0) {
1400
kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n",
1401
info->virtual_irq);
1402
return -ENODEV;
1403
}
1404
1405
host_vtimer_irq = info->virtual_irq;
1406
kvm_irq_fixup_flags(host_vtimer_irq, &host_vtimer_irq_flags);
1407
1408
if (kvm_vgic_global_state.no_hw_deactivation) {
1409
struct fwnode_handle *fwnode;
1410
struct irq_data *data;
1411
1412
fwnode = irq_domain_alloc_named_fwnode("kvm-timer");
1413
if (!fwnode)
1414
return -ENOMEM;
1415
1416
/* Assume both vtimer and ptimer in the same parent */
1417
data = irq_get_irq_data(host_vtimer_irq);
1418
domain = irq_domain_create_hierarchy(data->domain, 0,
1419
NR_KVM_TIMERS, fwnode,
1420
&timer_domain_ops, NULL);
1421
if (!domain) {
1422
irq_domain_free_fwnode(fwnode);
1423
return -ENOMEM;
1424
}
1425
1426
arch_timer_irq_ops.flags |= VGIC_IRQ_SW_RESAMPLE;
1427
WARN_ON(irq_domain_push_irq(domain, host_vtimer_irq,
1428
(void *)TIMER_VTIMER));
1429
}
1430
1431
if (info->physical_irq > 0) {
1432
host_ptimer_irq = info->physical_irq;
1433
kvm_irq_fixup_flags(host_ptimer_irq, &host_ptimer_irq_flags);
1434
1435
if (domain)
1436
WARN_ON(irq_domain_push_irq(domain, host_ptimer_irq,
1437
(void *)TIMER_PTIMER));
1438
}
1439
1440
return 0;
1441
}
1442
1443
static void kvm_timer_handle_errata(void)
1444
{
1445
u64 mmfr0, mmfr1, mmfr4;
1446
1447
/*
1448
* CNTVOFF_EL2 is broken on some implementations. For those, we trap
1449
* all virtual timer/counter accesses, requiring FEAT_ECV.
1450
*
1451
* However, a hypervisor supporting nesting is likely to mitigate the
1452
* erratum at L0, and not require other levels to mitigate it (which
1453
* would otherwise be a terrible performance sink due to trap
1454
* amplification).
1455
*
1456
* Given that the affected HW implements both FEAT_VHE and FEAT_E2H0,
1457
* and that NV is likely not to (because of limitations of the
1458
* architecture), only enable the workaround when FEAT_VHE and
1459
* FEAT_E2H0 are both detected. Time will tell if this actually holds.
1460
*/
1461
mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
1462
mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
1463
mmfr4 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR4_EL1);
1464
if (SYS_FIELD_GET(ID_AA64MMFR1_EL1, VH, mmfr1) &&
1465
!SYS_FIELD_GET(ID_AA64MMFR4_EL1, E2H0, mmfr4) &&
1466
SYS_FIELD_GET(ID_AA64MMFR0_EL1, ECV, mmfr0) &&
1467
(has_vhe() || has_hvhe()) &&
1468
cpus_have_final_cap(ARM64_WORKAROUND_QCOM_ORYON_CNTVOFF)) {
1469
static_branch_enable(&broken_cntvoff_key);
1470
kvm_info("Broken CNTVOFF_EL2, trapping virtual timer\n");
1471
}
1472
}
1473
1474
int __init kvm_timer_hyp_init(bool has_gic)
1475
{
1476
struct arch_timer_kvm_info *info;
1477
int err;
1478
1479
info = arch_timer_get_kvm_info();
1480
timecounter = &info->timecounter;
1481
1482
if (!timecounter->cc) {
1483
kvm_err("kvm_arch_timer: uninitialized timecounter\n");
1484
return -ENODEV;
1485
}
1486
1487
err = kvm_irq_init(info);
1488
if (err)
1489
return err;
1490
1491
/* First, do the virtual EL1 timer irq */
1492
1493
err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler,
1494
"kvm guest vtimer", kvm_get_running_vcpus());
1495
if (err) {
1496
kvm_err("kvm_arch_timer: can't request vtimer interrupt %d (%d)\n",
1497
host_vtimer_irq, err);
1498
return err;
1499
}
1500
1501
if (has_gic) {
1502
err = irq_set_vcpu_affinity(host_vtimer_irq,
1503
kvm_get_running_vcpus());
1504
if (err) {
1505
kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
1506
goto out_free_vtimer_irq;
1507
}
1508
1509
static_branch_enable(&has_gic_active_state);
1510
}
1511
1512
kvm_debug("virtual timer IRQ%d\n", host_vtimer_irq);
1513
1514
/* Now let's do the physical EL1 timer irq */
1515
1516
if (info->physical_irq > 0) {
1517
err = request_percpu_irq(host_ptimer_irq, kvm_arch_timer_handler,
1518
"kvm guest ptimer", kvm_get_running_vcpus());
1519
if (err) {
1520
kvm_err("kvm_arch_timer: can't request ptimer interrupt %d (%d)\n",
1521
host_ptimer_irq, err);
1522
goto out_free_vtimer_irq;
1523
}
1524
1525
if (has_gic) {
1526
err = irq_set_vcpu_affinity(host_ptimer_irq,
1527
kvm_get_running_vcpus());
1528
if (err) {
1529
kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
1530
goto out_free_ptimer_irq;
1531
}
1532
}
1533
1534
kvm_debug("physical timer IRQ%d\n", host_ptimer_irq);
1535
} else if (has_vhe()) {
1536
kvm_err("kvm_arch_timer: invalid physical timer IRQ: %d\n",
1537
info->physical_irq);
1538
err = -ENODEV;
1539
goto out_free_vtimer_irq;
1540
}
1541
1542
kvm_timer_handle_errata();
1543
return 0;
1544
1545
out_free_ptimer_irq:
1546
if (info->physical_irq > 0)
1547
free_percpu_irq(host_ptimer_irq, kvm_get_running_vcpus());
1548
out_free_vtimer_irq:
1549
free_percpu_irq(host_vtimer_irq, kvm_get_running_vcpus());
1550
return err;
1551
}
1552
1553
void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
1554
{
1555
struct arch_timer_cpu *timer = vcpu_timer(vcpu);
1556
1557
soft_timer_cancel(&timer->bg_timer);
1558
}
1559
1560
static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu)
1561
{
1562
u32 ppis = 0;
1563
bool valid;
1564
1565
mutex_lock(&vcpu->kvm->arch.config_lock);
1566
1567
for (int i = 0; i < nr_timers(vcpu); i++) {
1568
struct arch_timer_context *ctx;
1569
int irq;
1570
1571
ctx = vcpu_get_timer(vcpu, i);
1572
irq = timer_irq(ctx);
1573
if (kvm_vgic_set_owner(vcpu, irq, ctx))
1574
break;
1575
1576
/*
1577
* We know by construction that we only have PPIs, so
1578
* all values are less than 32.
1579
*/
1580
ppis |= BIT(irq);
1581
}
1582
1583
valid = hweight32(ppis) == nr_timers(vcpu);
1584
1585
if (valid)
1586
set_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE, &vcpu->kvm->arch.flags);
1587
1588
mutex_unlock(&vcpu->kvm->arch.config_lock);
1589
1590
return valid;
1591
}
1592
1593
static bool kvm_arch_timer_get_input_level(int vintid)
1594
{
1595
struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
1596
1597
if (WARN(!vcpu, "No vcpu context!\n"))
1598
return false;
1599
1600
for (int i = 0; i < nr_timers(vcpu); i++) {
1601
struct arch_timer_context *ctx;
1602
1603
ctx = vcpu_get_timer(vcpu, i);
1604
if (timer_irq(ctx) == vintid)
1605
return kvm_timer_should_fire(ctx);
1606
}
1607
1608
/* A timer IRQ has fired, but no matching timer was found? */
1609
WARN_RATELIMIT(1, "timer INTID%d unknown\n", vintid);
1610
1611
return false;
1612
}
1613
1614
int kvm_timer_enable(struct kvm_vcpu *vcpu)
1615
{
1616
struct arch_timer_cpu *timer = vcpu_timer(vcpu);
1617
struct timer_map map;
1618
int ret;
1619
1620
if (timer->enabled)
1621
return 0;
1622
1623
/* Without a VGIC we do not map virtual IRQs to physical IRQs */
1624
if (!irqchip_in_kernel(vcpu->kvm))
1625
goto no_vgic;
1626
1627
/*
1628
* At this stage, we have the guarantee that the vgic is both
1629
* available and initialized.
1630
*/
1631
if (!timer_irqs_are_valid(vcpu)) {
1632
kvm_debug("incorrectly configured timer irqs\n");
1633
return -EINVAL;
1634
}
1635
1636
get_timer_map(vcpu, &map);
1637
1638
ret = kvm_vgic_map_phys_irq(vcpu,
1639
map.direct_vtimer->host_timer_irq,
1640
timer_irq(map.direct_vtimer),
1641
&arch_timer_irq_ops);
1642
if (ret)
1643
return ret;
1644
1645
if (map.direct_ptimer) {
1646
ret = kvm_vgic_map_phys_irq(vcpu,
1647
map.direct_ptimer->host_timer_irq,
1648
timer_irq(map.direct_ptimer),
1649
&arch_timer_irq_ops);
1650
}
1651
1652
if (ret)
1653
return ret;
1654
1655
no_vgic:
1656
timer->enabled = 1;
1657
return 0;
1658
}
1659
1660
/* If we have CNTPOFF, permanently set ECV to enable it */
1661
void kvm_timer_init_vhe(void)
1662
{
1663
if (cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF))
1664
sysreg_clear_set(cnthctl_el2, 0, CNTHCTL_ECV);
1665
}
1666
1667
int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1668
{
1669
int __user *uaddr = (int __user *)(long)attr->addr;
1670
int irq, idx, ret = 0;
1671
1672
if (!irqchip_in_kernel(vcpu->kvm))
1673
return -EINVAL;
1674
1675
if (get_user(irq, uaddr))
1676
return -EFAULT;
1677
1678
if (!(irq_is_ppi(irq)))
1679
return -EINVAL;
1680
1681
mutex_lock(&vcpu->kvm->arch.config_lock);
1682
1683
if (test_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE,
1684
&vcpu->kvm->arch.flags)) {
1685
ret = -EBUSY;
1686
goto out;
1687
}
1688
1689
switch (attr->attr) {
1690
case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
1691
idx = TIMER_VTIMER;
1692
break;
1693
case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
1694
idx = TIMER_PTIMER;
1695
break;
1696
case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER:
1697
idx = TIMER_HVTIMER;
1698
break;
1699
case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER:
1700
idx = TIMER_HPTIMER;
1701
break;
1702
default:
1703
ret = -ENXIO;
1704
goto out;
1705
}
1706
1707
/*
1708
* We cannot validate the IRQ unicity before we run, so take it at
1709
* face value. The verdict will be given on first vcpu run, for each
1710
* vcpu. Yes this is late. Blame it on the stupid API.
1711
*/
1712
vcpu->kvm->arch.timer_data.ppi[idx] = irq;
1713
1714
out:
1715
mutex_unlock(&vcpu->kvm->arch.config_lock);
1716
return ret;
1717
}
1718
1719
int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1720
{
1721
int __user *uaddr = (int __user *)(long)attr->addr;
1722
struct arch_timer_context *timer;
1723
int irq;
1724
1725
switch (attr->attr) {
1726
case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
1727
timer = vcpu_vtimer(vcpu);
1728
break;
1729
case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
1730
timer = vcpu_ptimer(vcpu);
1731
break;
1732
case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER:
1733
timer = vcpu_hvtimer(vcpu);
1734
break;
1735
case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER:
1736
timer = vcpu_hptimer(vcpu);
1737
break;
1738
default:
1739
return -ENXIO;
1740
}
1741
1742
irq = timer_irq(timer);
1743
return put_user(irq, uaddr);
1744
}
1745
1746
int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1747
{
1748
switch (attr->attr) {
1749
case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
1750
case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
1751
case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER:
1752
case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER:
1753
return 0;
1754
}
1755
1756
return -ENXIO;
1757
}
1758
1759
int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm,
1760
struct kvm_arm_counter_offset *offset)
1761
{
1762
int ret = 0;
1763
1764
if (offset->reserved)
1765
return -EINVAL;
1766
1767
mutex_lock(&kvm->lock);
1768
1769
if (!kvm_trylock_all_vcpus(kvm)) {
1770
set_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &kvm->arch.flags);
1771
1772
/*
1773
* If userspace decides to set the offset using this
1774
* API rather than merely restoring the counter
1775
* values, the offset applies to both the virtual and
1776
* physical views.
1777
*/
1778
kvm->arch.timer_data.voffset = offset->counter_offset;
1779
kvm->arch.timer_data.poffset = offset->counter_offset;
1780
1781
kvm_unlock_all_vcpus(kvm);
1782
} else {
1783
ret = -EBUSY;
1784
}
1785
1786
mutex_unlock(&kvm->lock);
1787
1788
return ret;
1789
}
1790
1791