Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/x86/xen/time.c
10817 views
1
/*
2
* Xen time implementation.
3
*
4
* This is implemented in terms of a clocksource driver which uses
5
* the hypervisor clock as a nanosecond timebase, and a clockevent
6
* driver which uses the hypervisor's timer mechanism.
7
*
8
* Jeremy Fitzhardinge <[email protected]>, XenSource Inc, 2007
9
*/
10
#include <linux/kernel.h>
11
#include <linux/interrupt.h>
12
#include <linux/clocksource.h>
13
#include <linux/clockchips.h>
14
#include <linux/kernel_stat.h>
15
#include <linux/math64.h>
16
#include <linux/gfp.h>
17
18
#include <asm/pvclock.h>
19
#include <asm/xen/hypervisor.h>
20
#include <asm/xen/hypercall.h>
21
22
#include <xen/events.h>
23
#include <xen/features.h>
24
#include <xen/interface/xen.h>
25
#include <xen/interface/vcpu.h>
26
27
#include "xen-ops.h"
28
29
/* Xen may fire a timer up to this many ns early */
30
#define TIMER_SLOP 100000
31
#define NS_PER_TICK (1000000000LL / HZ)
32
33
/* runstate info updated by Xen */
34
static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
35
36
/* snapshots of runstate info */
37
static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot);
38
39
/* unused ns of stolen and blocked time */
40
static DEFINE_PER_CPU(u64, xen_residual_stolen);
41
static DEFINE_PER_CPU(u64, xen_residual_blocked);
42
43
/* return an consistent snapshot of 64-bit time/counter value */
44
static u64 get64(const u64 *p)
45
{
46
u64 ret;
47
48
if (BITS_PER_LONG < 64) {
49
u32 *p32 = (u32 *)p;
50
u32 h, l;
51
52
/*
53
* Read high then low, and then make sure high is
54
* still the same; this will only loop if low wraps
55
* and carries into high.
56
* XXX some clean way to make this endian-proof?
57
*/
58
do {
59
h = p32[1];
60
barrier();
61
l = p32[0];
62
barrier();
63
} while (p32[1] != h);
64
65
ret = (((u64)h) << 32) | l;
66
} else
67
ret = *p;
68
69
return ret;
70
}
71
72
/*
73
* Runstate accounting
74
*/
75
static void get_runstate_snapshot(struct vcpu_runstate_info *res)
76
{
77
u64 state_time;
78
struct vcpu_runstate_info *state;
79
80
BUG_ON(preemptible());
81
82
state = &__get_cpu_var(xen_runstate);
83
84
/*
85
* The runstate info is always updated by the hypervisor on
86
* the current CPU, so there's no need to use anything
87
* stronger than a compiler barrier when fetching it.
88
*/
89
do {
90
state_time = get64(&state->state_entry_time);
91
barrier();
92
*res = *state;
93
barrier();
94
} while (get64(&state->state_entry_time) != state_time);
95
}
96
97
/* return true when a vcpu could run but has no real cpu to run on */
98
bool xen_vcpu_stolen(int vcpu)
99
{
100
return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable;
101
}
102
103
void xen_setup_runstate_info(int cpu)
104
{
105
struct vcpu_register_runstate_memory_area area;
106
107
area.addr.v = &per_cpu(xen_runstate, cpu);
108
109
if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
110
cpu, &area))
111
BUG();
112
}
113
114
static void do_stolen_accounting(void)
115
{
116
struct vcpu_runstate_info state;
117
struct vcpu_runstate_info *snap;
118
s64 blocked, runnable, offline, stolen;
119
cputime_t ticks;
120
121
get_runstate_snapshot(&state);
122
123
WARN_ON(state.state != RUNSTATE_running);
124
125
snap = &__get_cpu_var(xen_runstate_snapshot);
126
127
/* work out how much time the VCPU has not been runn*ing* */
128
blocked = state.time[RUNSTATE_blocked] - snap->time[RUNSTATE_blocked];
129
runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable];
130
offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline];
131
132
*snap = state;
133
134
/* Add the appropriate number of ticks of stolen time,
135
including any left-overs from last time. */
136
stolen = runnable + offline + __this_cpu_read(xen_residual_stolen);
137
138
if (stolen < 0)
139
stolen = 0;
140
141
ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen);
142
__this_cpu_write(xen_residual_stolen, stolen);
143
account_steal_ticks(ticks);
144
145
/* Add the appropriate number of ticks of blocked time,
146
including any left-overs from last time. */
147
blocked += __this_cpu_read(xen_residual_blocked);
148
149
if (blocked < 0)
150
blocked = 0;
151
152
ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked);
153
__this_cpu_write(xen_residual_blocked, blocked);
154
account_idle_ticks(ticks);
155
}
156
157
/* Get the TSC speed from Xen */
158
static unsigned long xen_tsc_khz(void)
159
{
160
struct pvclock_vcpu_time_info *info =
161
&HYPERVISOR_shared_info->vcpu_info[0].time;
162
163
return pvclock_tsc_khz(info);
164
}
165
166
cycle_t xen_clocksource_read(void)
167
{
168
struct pvclock_vcpu_time_info *src;
169
cycle_t ret;
170
171
src = &get_cpu_var(xen_vcpu)->time;
172
ret = pvclock_clocksource_read(src);
173
put_cpu_var(xen_vcpu);
174
return ret;
175
}
176
177
static cycle_t xen_clocksource_get_cycles(struct clocksource *cs)
178
{
179
return xen_clocksource_read();
180
}
181
182
static void xen_read_wallclock(struct timespec *ts)
183
{
184
struct shared_info *s = HYPERVISOR_shared_info;
185
struct pvclock_wall_clock *wall_clock = &(s->wc);
186
struct pvclock_vcpu_time_info *vcpu_time;
187
188
vcpu_time = &get_cpu_var(xen_vcpu)->time;
189
pvclock_read_wallclock(wall_clock, vcpu_time, ts);
190
put_cpu_var(xen_vcpu);
191
}
192
193
static unsigned long xen_get_wallclock(void)
194
{
195
struct timespec ts;
196
197
xen_read_wallclock(&ts);
198
return ts.tv_sec;
199
}
200
201
static int xen_set_wallclock(unsigned long now)
202
{
203
/* do nothing for domU */
204
return -1;
205
}
206
207
static struct clocksource xen_clocksource __read_mostly = {
208
.name = "xen",
209
.rating = 400,
210
.read = xen_clocksource_get_cycles,
211
.mask = ~0,
212
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
213
};
214
215
/*
216
Xen clockevent implementation
217
218
Xen has two clockevent implementations:
219
220
The old timer_op one works with all released versions of Xen prior
221
to version 3.0.4. This version of the hypervisor provides a
222
single-shot timer with nanosecond resolution. However, sharing the
223
same event channel is a 100Hz tick which is delivered while the
224
vcpu is running. We don't care about or use this tick, but it will
225
cause the core time code to think the timer fired too soon, and
226
will end up resetting it each time. It could be filtered, but
227
doing so has complications when the ktime clocksource is not yet
228
the xen clocksource (ie, at boot time).
229
230
The new vcpu_op-based timer interface allows the tick timer period
231
to be changed or turned off. The tick timer is not useful as a
232
periodic timer because events are only delivered to running vcpus.
233
The one-shot timer can report when a timeout is in the past, so
234
set_next_event is capable of returning -ETIME when appropriate.
235
This interface is used when available.
236
*/
237
238
239
/*
240
Get a hypervisor absolute time. In theory we could maintain an
241
offset between the kernel's time and the hypervisor's time, and
242
apply that to a kernel's absolute timeout. Unfortunately the
243
hypervisor and kernel times can drift even if the kernel is using
244
the Xen clocksource, because ntp can warp the kernel's clocksource.
245
*/
246
static s64 get_abs_timeout(unsigned long delta)
247
{
248
return xen_clocksource_read() + delta;
249
}
250
251
static void xen_timerop_set_mode(enum clock_event_mode mode,
252
struct clock_event_device *evt)
253
{
254
switch (mode) {
255
case CLOCK_EVT_MODE_PERIODIC:
256
/* unsupported */
257
WARN_ON(1);
258
break;
259
260
case CLOCK_EVT_MODE_ONESHOT:
261
case CLOCK_EVT_MODE_RESUME:
262
break;
263
264
case CLOCK_EVT_MODE_UNUSED:
265
case CLOCK_EVT_MODE_SHUTDOWN:
266
HYPERVISOR_set_timer_op(0); /* cancel timeout */
267
break;
268
}
269
}
270
271
static int xen_timerop_set_next_event(unsigned long delta,
272
struct clock_event_device *evt)
273
{
274
WARN_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT);
275
276
if (HYPERVISOR_set_timer_op(get_abs_timeout(delta)) < 0)
277
BUG();
278
279
/* We may have missed the deadline, but there's no real way of
280
knowing for sure. If the event was in the past, then we'll
281
get an immediate interrupt. */
282
283
return 0;
284
}
285
286
static const struct clock_event_device xen_timerop_clockevent = {
287
.name = "xen",
288
.features = CLOCK_EVT_FEAT_ONESHOT,
289
290
.max_delta_ns = 0xffffffff,
291
.min_delta_ns = TIMER_SLOP,
292
293
.mult = 1,
294
.shift = 0,
295
.rating = 500,
296
297
.set_mode = xen_timerop_set_mode,
298
.set_next_event = xen_timerop_set_next_event,
299
};
300
301
302
303
static void xen_vcpuop_set_mode(enum clock_event_mode mode,
304
struct clock_event_device *evt)
305
{
306
int cpu = smp_processor_id();
307
308
switch (mode) {
309
case CLOCK_EVT_MODE_PERIODIC:
310
WARN_ON(1); /* unsupported */
311
break;
312
313
case CLOCK_EVT_MODE_ONESHOT:
314
if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
315
BUG();
316
break;
317
318
case CLOCK_EVT_MODE_UNUSED:
319
case CLOCK_EVT_MODE_SHUTDOWN:
320
if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, cpu, NULL) ||
321
HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
322
BUG();
323
break;
324
case CLOCK_EVT_MODE_RESUME:
325
break;
326
}
327
}
328
329
static int xen_vcpuop_set_next_event(unsigned long delta,
330
struct clock_event_device *evt)
331
{
332
int cpu = smp_processor_id();
333
struct vcpu_set_singleshot_timer single;
334
int ret;
335
336
WARN_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT);
337
338
single.timeout_abs_ns = get_abs_timeout(delta);
339
single.flags = VCPU_SSHOTTMR_future;
340
341
ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, cpu, &single);
342
343
BUG_ON(ret != 0 && ret != -ETIME);
344
345
return ret;
346
}
347
348
static const struct clock_event_device xen_vcpuop_clockevent = {
349
.name = "xen",
350
.features = CLOCK_EVT_FEAT_ONESHOT,
351
352
.max_delta_ns = 0xffffffff,
353
.min_delta_ns = TIMER_SLOP,
354
355
.mult = 1,
356
.shift = 0,
357
.rating = 500,
358
359
.set_mode = xen_vcpuop_set_mode,
360
.set_next_event = xen_vcpuop_set_next_event,
361
};
362
363
static const struct clock_event_device *xen_clockevent =
364
&xen_timerop_clockevent;
365
static DEFINE_PER_CPU(struct clock_event_device, xen_clock_events);
366
367
static irqreturn_t xen_timer_interrupt(int irq, void *dev_id)
368
{
369
struct clock_event_device *evt = &__get_cpu_var(xen_clock_events);
370
irqreturn_t ret;
371
372
ret = IRQ_NONE;
373
if (evt->event_handler) {
374
evt->event_handler(evt);
375
ret = IRQ_HANDLED;
376
}
377
378
do_stolen_accounting();
379
380
return ret;
381
}
382
383
void xen_setup_timer(int cpu)
384
{
385
const char *name;
386
struct clock_event_device *evt;
387
int irq;
388
389
printk(KERN_INFO "installing Xen timer for CPU %d\n", cpu);
390
391
name = kasprintf(GFP_KERNEL, "timer%d", cpu);
392
if (!name)
393
name = "<timer kasprintf failed>";
394
395
irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
396
IRQF_DISABLED|IRQF_PERCPU|
397
IRQF_NOBALANCING|IRQF_TIMER|
398
IRQF_FORCE_RESUME,
399
name, NULL);
400
401
evt = &per_cpu(xen_clock_events, cpu);
402
memcpy(evt, xen_clockevent, sizeof(*evt));
403
404
evt->cpumask = cpumask_of(cpu);
405
evt->irq = irq;
406
}
407
408
void xen_teardown_timer(int cpu)
409
{
410
struct clock_event_device *evt;
411
BUG_ON(cpu == 0);
412
evt = &per_cpu(xen_clock_events, cpu);
413
unbind_from_irqhandler(evt->irq, NULL);
414
}
415
416
void xen_setup_cpu_clockevents(void)
417
{
418
BUG_ON(preemptible());
419
420
clockevents_register_device(&__get_cpu_var(xen_clock_events));
421
}
422
423
void xen_timer_resume(void)
424
{
425
int cpu;
426
427
pvclock_resume();
428
429
if (xen_clockevent != &xen_vcpuop_clockevent)
430
return;
431
432
for_each_online_cpu(cpu) {
433
if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
434
BUG();
435
}
436
}
437
438
static const struct pv_time_ops xen_time_ops __initconst = {
439
.sched_clock = xen_clocksource_read,
440
};
441
442
static void __init xen_time_init(void)
443
{
444
int cpu = smp_processor_id();
445
struct timespec tp;
446
447
clocksource_register_hz(&xen_clocksource, NSEC_PER_SEC);
448
449
if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) {
450
/* Successfully turned off 100Hz tick, so we have the
451
vcpuop-based timer interface */
452
printk(KERN_DEBUG "Xen: using vcpuop timer interface\n");
453
xen_clockevent = &xen_vcpuop_clockevent;
454
}
455
456
/* Set initial system time with full resolution */
457
xen_read_wallclock(&tp);
458
do_settimeofday(&tp);
459
460
setup_force_cpu_cap(X86_FEATURE_TSC);
461
462
xen_setup_runstate_info(cpu);
463
xen_setup_timer(cpu);
464
xen_setup_cpu_clockevents();
465
}
466
467
void __init xen_init_time_ops(void)
468
{
469
pv_time_ops = xen_time_ops;
470
471
x86_init.timers.timer_init = xen_time_init;
472
x86_init.timers.setup_percpu_clockev = x86_init_noop;
473
x86_cpuinit.setup_percpu_clockev = x86_init_noop;
474
475
x86_platform.calibrate_tsc = xen_tsc_khz;
476
x86_platform.get_wallclock = xen_get_wallclock;
477
x86_platform.set_wallclock = xen_set_wallclock;
478
}
479
480
#ifdef CONFIG_XEN_PVHVM
481
static void xen_hvm_setup_cpu_clockevents(void)
482
{
483
int cpu = smp_processor_id();
484
xen_setup_runstate_info(cpu);
485
xen_setup_timer(cpu);
486
xen_setup_cpu_clockevents();
487
}
488
489
void __init xen_hvm_init_time_ops(void)
490
{
491
/* vector callback is needed otherwise we cannot receive interrupts
492
* on cpu > 0 and at this point we don't know how many cpus are
493
* available */
494
if (!xen_have_vector_callback)
495
return;
496
if (!xen_feature(XENFEAT_hvm_safe_pvclock)) {
497
printk(KERN_INFO "Xen doesn't support pvclock on HVM,"
498
"disable pv timer\n");
499
return;
500
}
501
502
pv_time_ops = xen_time_ops;
503
x86_init.timers.setup_percpu_clockev = xen_time_init;
504
x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents;
505
506
x86_platform.calibrate_tsc = xen_tsc_khz;
507
x86_platform.get_wallclock = xen_get_wallclock;
508
x86_platform.set_wallclock = xen_set_wallclock;
509
}
510
#endif
511
512