CoCalc -- time.c

GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/x86/xen/time.c
¹⁰⁸¹⁷ views
1
/*
2
 * Xen time implementation.
3
 *
4
 * This is implemented in terms of a clocksource driver which uses
5
 * the hypervisor clock as a nanosecond timebase, and a clockevent
6
 * driver which uses the hypervisor's timer mechanism.
7
 *
8
 * Jeremy Fitzhardinge <[email protected]>, XenSource Inc, 2007
9
 */
10
#include <linux/kernel.h>
11
#include <linux/interrupt.h>
12
#include <linux/clocksource.h>
13
#include <linux/clockchips.h>
14
#include <linux/kernel_stat.h>
15
#include <linux/math64.h>
16
#include <linux/gfp.h>
17

18
#include <asm/pvclock.h>
19
#include <asm/xen/hypervisor.h>
20
#include <asm/xen/hypercall.h>
21

22
#include <xen/events.h>
23
#include <xen/features.h>
24
#include <xen/interface/xen.h>
25
#include <xen/interface/vcpu.h>
26

27
#include "xen-ops.h"
28

29
/* Xen may fire a timer up to this many ns early */
30
#define TIMER_SLOP	100000
31
#define NS_PER_TICK	(1000000000LL / HZ)
32

33
/* runstate info updated by Xen */
34
static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
35

36
/* snapshots of runstate info */
37
static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot);
38

39
/* unused ns of stolen and blocked time */
40
static DEFINE_PER_CPU(u64, xen_residual_stolen);
41
static DEFINE_PER_CPU(u64, xen_residual_blocked);
42

43
/* return an consistent snapshot of 64-bit time/counter value */
44
static u64 get64(const u64 *p)
45
{
46
	u64 ret;
47

48
	if (BITS_PER_LONG < 64) {
49
		u32 *p32 = (u32 *)p;
50
		u32 h, l;
51

52
		/*
53
		 * Read high then low, and then make sure high is
54
		 * still the same; this will only loop if low wraps
55
		 * and carries into high.
56
		 * XXX some clean way to make this endian-proof?
57
		 */
58
		do {
59
			h = p32[1];
60
			barrier();
61
			l = p32[0];
62
			barrier();
63
		} while (p32[1] != h);
64

65
		ret = (((u64)h) << 32) | l;
66
	} else
67
		ret = *p;
68

69
	return ret;
70
}
71

72
/*
73
 * Runstate accounting
74
 */
75
static void get_runstate_snapshot(struct vcpu_runstate_info *res)
76
{
77
	u64 state_time;
78
	struct vcpu_runstate_info *state;
79

80
	BUG_ON(preemptible());
81

82
	state = &__get_cpu_var(xen_runstate);
83

84
	/*
85
	 * The runstate info is always updated by the hypervisor on
86
	 * the current CPU, so there's no need to use anything
87
	 * stronger than a compiler barrier when fetching it.
88
	 */
89
	do {
90
		state_time = get64(&state->state_entry_time);
91
		barrier();
92
		*res = *state;
93
		barrier();
94
	} while (get64(&state->state_entry_time) != state_time);
95
}
96

97
/* return true when a vcpu could run but has no real cpu to run on */
98
bool xen_vcpu_stolen(int vcpu)
99
{
100
	return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable;
101
}
102

103
void xen_setup_runstate_info(int cpu)
104
{
105
	struct vcpu_register_runstate_memory_area area;
106

107
	area.addr.v = &per_cpu(xen_runstate, cpu);
108

109
	if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
110
			       cpu, &area))
111
		BUG();
112
}
113

114
static void do_stolen_accounting(void)
115
{
116
	struct vcpu_runstate_info state;
117
	struct vcpu_runstate_info *snap;
118
	s64 blocked, runnable, offline, stolen;
119
	cputime_t ticks;
120

121
	get_runstate_snapshot(&state);
122

123
	WARN_ON(state.state != RUNSTATE_running);
124

125
	snap = &__get_cpu_var(xen_runstate_snapshot);
126

127
	/* work out how much time the VCPU has not been runn*ing*  */
128
	blocked = state.time[RUNSTATE_blocked] - snap->time[RUNSTATE_blocked];
129
	runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable];
130
	offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline];
131

132
	*snap = state;
133

134
	/* Add the appropriate number of ticks of stolen time,
135
	   including any left-overs from last time. */
136
	stolen = runnable + offline + __this_cpu_read(xen_residual_stolen);
137

138
	if (stolen < 0)
139
		stolen = 0;
140

141
	ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen);
142
	__this_cpu_write(xen_residual_stolen, stolen);
143
	account_steal_ticks(ticks);
144

145
	/* Add the appropriate number of ticks of blocked time,
146
	   including any left-overs from last time. */
147
	blocked += __this_cpu_read(xen_residual_blocked);
148

149
	if (blocked < 0)
150
		blocked = 0;
151

152
	ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked);
153
	__this_cpu_write(xen_residual_blocked, blocked);
154
	account_idle_ticks(ticks);
155
}
156

157
/* Get the TSC speed from Xen */
158
static unsigned long xen_tsc_khz(void)
159
{
160
	struct pvclock_vcpu_time_info *info =
161
		&HYPERVISOR_shared_info->vcpu_info[0].time;
162

163
	return pvclock_tsc_khz(info);
164
}
165

166
cycle_t xen_clocksource_read(void)
167
{
168
        struct pvclock_vcpu_time_info *src;
169
	cycle_t ret;
170

171
	src = &get_cpu_var(xen_vcpu)->time;
172
	ret = pvclock_clocksource_read(src);
173
	put_cpu_var(xen_vcpu);
174
	return ret;
175
}
176

177
static cycle_t xen_clocksource_get_cycles(struct clocksource *cs)
178
{
179
	return xen_clocksource_read();
180
}
181

182
static void xen_read_wallclock(struct timespec *ts)
183
{
184
	struct shared_info *s = HYPERVISOR_shared_info;
185
	struct pvclock_wall_clock *wall_clock = &(s->wc);
186
        struct pvclock_vcpu_time_info *vcpu_time;
187

188
	vcpu_time = &get_cpu_var(xen_vcpu)->time;
189
	pvclock_read_wallclock(wall_clock, vcpu_time, ts);
190
	put_cpu_var(xen_vcpu);
191
}
192

193
static unsigned long xen_get_wallclock(void)
194
{
195
	struct timespec ts;
196

197
	xen_read_wallclock(&ts);
198
	return ts.tv_sec;
199
}
200

201
static int xen_set_wallclock(unsigned long now)
202
{
203
	/* do nothing for domU */
204
	return -1;
205
}
206

207
static struct clocksource xen_clocksource __read_mostly = {
208
	.name = "xen",
209
	.rating = 400,
210
	.read = xen_clocksource_get_cycles,
211
	.mask = ~0,
212
	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
213
};
214

215
/*
216
   Xen clockevent implementation
217

218
   Xen has two clockevent implementations:
219

220
   The old timer_op one works with all released versions of Xen prior
221
   to version 3.0.4.  This version of the hypervisor provides a
222
   single-shot timer with nanosecond resolution.  However, sharing the
223
   same event channel is a 100Hz tick which is delivered while the
224
   vcpu is running.  We don't care about or use this tick, but it will
225
   cause the core time code to think the timer fired too soon, and
226
   will end up resetting it each time.  It could be filtered, but
227
   doing so has complications when the ktime clocksource is not yet
228
   the xen clocksource (ie, at boot time).
229

230
   The new vcpu_op-based timer interface allows the tick timer period
231
   to be changed or turned off.  The tick timer is not useful as a
232
   periodic timer because events are only delivered to running vcpus.
233
   The one-shot timer can report when a timeout is in the past, so
234
   set_next_event is capable of returning -ETIME when appropriate.
235
   This interface is used when available.
236
*/
237

238

239
/*
240
  Get a hypervisor absolute time.  In theory we could maintain an
241
  offset between the kernel's time and the hypervisor's time, and
242
  apply that to a kernel's absolute timeout.  Unfortunately the
243
  hypervisor and kernel times can drift even if the kernel is using
244
  the Xen clocksource, because ntp can warp the kernel's clocksource.
245
*/
246
static s64 get_abs_timeout(unsigned long delta)
247
{
248
	return xen_clocksource_read() + delta;
249
}
250

251
static void xen_timerop_set_mode(enum clock_event_mode mode,
252
				 struct clock_event_device *evt)
253
{
254
	switch (mode) {
255
	case CLOCK_EVT_MODE_PERIODIC:
256
		/* unsupported */
257
		WARN_ON(1);
258
		break;
259

260
	case CLOCK_EVT_MODE_ONESHOT:
261
	case CLOCK_EVT_MODE_RESUME:
262
		break;
263

264
	case CLOCK_EVT_MODE_UNUSED:
265
	case CLOCK_EVT_MODE_SHUTDOWN:
266
		HYPERVISOR_set_timer_op(0);  /* cancel timeout */
267
		break;
268
	}
269
}
270

271
static int xen_timerop_set_next_event(unsigned long delta,
272
				      struct clock_event_device *evt)
273
{
274
	WARN_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT);
275

276
	if (HYPERVISOR_set_timer_op(get_abs_timeout(delta)) < 0)
277
		BUG();
278

279
	/* We may have missed the deadline, but there's no real way of
280
	   knowing for sure.  If the event was in the past, then we'll
281
	   get an immediate interrupt. */
282

283
	return 0;
284
}
285

286
static const struct clock_event_device xen_timerop_clockevent = {
287
	.name = "xen",
288
	.features = CLOCK_EVT_FEAT_ONESHOT,
289

290
	.max_delta_ns = 0xffffffff,
291
	.min_delta_ns = TIMER_SLOP,
292

293
	.mult = 1,
294
	.shift = 0,
295
	.rating = 500,
296

297
	.set_mode = xen_timerop_set_mode,
298
	.set_next_event = xen_timerop_set_next_event,
299
};
300

301

302

303
static void xen_vcpuop_set_mode(enum clock_event_mode mode,
304
				struct clock_event_device *evt)
305
{
306
	int cpu = smp_processor_id();
307

308
	switch (mode) {
309
	case CLOCK_EVT_MODE_PERIODIC:
310
		WARN_ON(1);	/* unsupported */
311
		break;
312

313
	case CLOCK_EVT_MODE_ONESHOT:
314
		if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
315
			BUG();
316
		break;
317

318
	case CLOCK_EVT_MODE_UNUSED:
319
	case CLOCK_EVT_MODE_SHUTDOWN:
320
		if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, cpu, NULL) ||
321
		    HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
322
			BUG();
323
		break;
324
	case CLOCK_EVT_MODE_RESUME:
325
		break;
326
	}
327
}
328

329
static int xen_vcpuop_set_next_event(unsigned long delta,
330
				     struct clock_event_device *evt)
331
{
332
	int cpu = smp_processor_id();
333
	struct vcpu_set_singleshot_timer single;
334
	int ret;
335

336
	WARN_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT);
337

338
	single.timeout_abs_ns = get_abs_timeout(delta);
339
	single.flags = VCPU_SSHOTTMR_future;
340

341
	ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, cpu, &single);
342

343
	BUG_ON(ret != 0 && ret != -ETIME);
344

345
	return ret;
346
}
347

348
static const struct clock_event_device xen_vcpuop_clockevent = {
349
	.name = "xen",
350
	.features = CLOCK_EVT_FEAT_ONESHOT,
351

352
	.max_delta_ns = 0xffffffff,
353
	.min_delta_ns = TIMER_SLOP,
354

355
	.mult = 1,
356
	.shift = 0,
357
	.rating = 500,
358

359
	.set_mode = xen_vcpuop_set_mode,
360
	.set_next_event = xen_vcpuop_set_next_event,
361
};
362

363
static const struct clock_event_device *xen_clockevent =
364
	&xen_timerop_clockevent;
365
static DEFINE_PER_CPU(struct clock_event_device, xen_clock_events);
366

367
static irqreturn_t xen_timer_interrupt(int irq, void *dev_id)
368
{
369
	struct clock_event_device *evt = &__get_cpu_var(xen_clock_events);
370
	irqreturn_t ret;
371

372
	ret = IRQ_NONE;
373
	if (evt->event_handler) {
374
		evt->event_handler(evt);
375
		ret = IRQ_HANDLED;
376
	}
377

378
	do_stolen_accounting();
379

380
	return ret;
381
}
382

383
void xen_setup_timer(int cpu)
384
{
385
	const char *name;
386
	struct clock_event_device *evt;
387
	int irq;
388

389
	printk(KERN_INFO "installing Xen timer for CPU %d\n", cpu);
390

391
	name = kasprintf(GFP_KERNEL, "timer%d", cpu);
392
	if (!name)
393
		name = "<timer kasprintf failed>";
394

395
	irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
396
				      IRQF_DISABLED|IRQF_PERCPU|
397
				      IRQF_NOBALANCING|IRQF_TIMER|
398
				      IRQF_FORCE_RESUME,
399
				      name, NULL);
400

401
	evt = &per_cpu(xen_clock_events, cpu);
402
	memcpy(evt, xen_clockevent, sizeof(*evt));
403

404
	evt->cpumask = cpumask_of(cpu);
405
	evt->irq = irq;
406
}
407

408
void xen_teardown_timer(int cpu)
409
{
410
	struct clock_event_device *evt;
411
	BUG_ON(cpu == 0);
412
	evt = &per_cpu(xen_clock_events, cpu);
413
	unbind_from_irqhandler(evt->irq, NULL);
414
}
415

416
void xen_setup_cpu_clockevents(void)
417
{
418
	BUG_ON(preemptible());
419

420
	clockevents_register_device(&__get_cpu_var(xen_clock_events));
421
}
422

423
void xen_timer_resume(void)
424
{
425
	int cpu;
426

427
	pvclock_resume();
428

429
	if (xen_clockevent != &xen_vcpuop_clockevent)
430
		return;
431

432
	for_each_online_cpu(cpu) {
433
		if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
434
			BUG();
435
	}
436
}
437

438
static const struct pv_time_ops xen_time_ops __initconst = {
439
	.sched_clock = xen_clocksource_read,
440
};
441

442
static void __init xen_time_init(void)
443
{
444
	int cpu = smp_processor_id();
445
	struct timespec tp;
446

447
	clocksource_register_hz(&xen_clocksource, NSEC_PER_SEC);
448

449
	if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) {
450
		/* Successfully turned off 100Hz tick, so we have the
451
		   vcpuop-based timer interface */
452
		printk(KERN_DEBUG "Xen: using vcpuop timer interface\n");
453
		xen_clockevent = &xen_vcpuop_clockevent;
454
	}
455

456
	/* Set initial system time with full resolution */
457
	xen_read_wallclock(&tp);
458
	do_settimeofday(&tp);
459

460
	setup_force_cpu_cap(X86_FEATURE_TSC);
461

462
	xen_setup_runstate_info(cpu);
463
	xen_setup_timer(cpu);
464
	xen_setup_cpu_clockevents();
465
}
466

467
void __init xen_init_time_ops(void)
468
{
469
	pv_time_ops = xen_time_ops;
470

471
	x86_init.timers.timer_init = xen_time_init;
472
	x86_init.timers.setup_percpu_clockev = x86_init_noop;
473
	x86_cpuinit.setup_percpu_clockev = x86_init_noop;
474

475
	x86_platform.calibrate_tsc = xen_tsc_khz;
476
	x86_platform.get_wallclock = xen_get_wallclock;
477
	x86_platform.set_wallclock = xen_set_wallclock;
478
}
479

480
#ifdef CONFIG_XEN_PVHVM
481
static void xen_hvm_setup_cpu_clockevents(void)
482
{
483
	int cpu = smp_processor_id();
484
	xen_setup_runstate_info(cpu);
485
	xen_setup_timer(cpu);
486
	xen_setup_cpu_clockevents();
487
}
488

489
void __init xen_hvm_init_time_ops(void)
490
{
491
	/* vector callback is needed otherwise we cannot receive interrupts
492
	 * on cpu > 0 and at this point we don't know how many cpus are
493
	 * available */
494
	if (!xen_have_vector_callback)
495
		return;
496
	if (!xen_feature(XENFEAT_hvm_safe_pvclock)) {
497
		printk(KERN_INFO "Xen doesn't support pvclock on HVM,"
498
				"disable pv timer\n");
499
		return;
500
	}
501

502
	pv_time_ops = xen_time_ops;
503
	x86_init.timers.setup_percpu_clockev = xen_time_init;
504
	x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents;
505

506
	x86_platform.calibrate_tsc = xen_tsc_khz;
507
	x86_platform.get_wallclock = xen_get_wallclock;
508
	x86_platform.set_wallclock = xen_set_wallclock;
509
}
510
#endif
511

512
Product

Resources

Company