#include <linux/kernel.h>
#include <linux/interrupt.h>
#include <linux/clocksource.h>
#include <linux/clockchips.h>
#include <linux/kernel_stat.h>
#include <linux/math64.h>
#include <linux/gfp.h>
#include <asm/pvclock.h>
#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>
#include <xen/events.h>
#include <xen/features.h>
#include <xen/interface/xen.h>
#include <xen/interface/vcpu.h>
#include "xen-ops.h"
#define TIMER_SLOP 100000
#define NS_PER_TICK (1000000000LL / HZ)
static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot);
static DEFINE_PER_CPU(u64, xen_residual_stolen);
static DEFINE_PER_CPU(u64, xen_residual_blocked);
static u64 get64(const u64 *p)
{
u64 ret;
if (BITS_PER_LONG < 64) {
u32 *p32 = (u32 *)p;
u32 h, l;
do {
h = p32[1];
barrier();
l = p32[0];
barrier();
} while (p32[1] != h);
ret = (((u64)h) << 32) | l;
} else
ret = *p;
return ret;
}
static void get_runstate_snapshot(struct vcpu_runstate_info *res)
{
u64 state_time;
struct vcpu_runstate_info *state;
BUG_ON(preemptible());
state = &__get_cpu_var(xen_runstate);
do {
state_time = get64(&state->state_entry_time);
barrier();
*res = *state;
barrier();
} while (get64(&state->state_entry_time) != state_time);
}
bool xen_vcpu_stolen(int vcpu)
{
return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable;
}
void xen_setup_runstate_info(int cpu)
{
struct vcpu_register_runstate_memory_area area;
area.addr.v = &per_cpu(xen_runstate, cpu);
if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
cpu, &area))
BUG();
}
static void do_stolen_accounting(void)
{
struct vcpu_runstate_info state;
struct vcpu_runstate_info *snap;
s64 blocked, runnable, offline, stolen;
cputime_t ticks;
get_runstate_snapshot(&state);
WARN_ON(state.state != RUNSTATE_running);
snap = &__get_cpu_var(xen_runstate_snapshot);
blocked = state.time[RUNSTATE_blocked] - snap->time[RUNSTATE_blocked];
runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable];
offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline];
*snap = state;
stolen = runnable + offline + __this_cpu_read(xen_residual_stolen);
if (stolen < 0)
stolen = 0;
ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen);
__this_cpu_write(xen_residual_stolen, stolen);
account_steal_ticks(ticks);
blocked += __this_cpu_read(xen_residual_blocked);
if (blocked < 0)
blocked = 0;
ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked);
__this_cpu_write(xen_residual_blocked, blocked);
account_idle_ticks(ticks);
}
static unsigned long xen_tsc_khz(void)
{
struct pvclock_vcpu_time_info *info =
&HYPERVISOR_shared_info->vcpu_info[0].time;
return pvclock_tsc_khz(info);
}
cycle_t xen_clocksource_read(void)
{
struct pvclock_vcpu_time_info *src;
cycle_t ret;
src = &get_cpu_var(xen_vcpu)->time;
ret = pvclock_clocksource_read(src);
put_cpu_var(xen_vcpu);
return ret;
}
static cycle_t xen_clocksource_get_cycles(struct clocksource *cs)
{
return xen_clocksource_read();
}
static void xen_read_wallclock(struct timespec *ts)
{
struct shared_info *s = HYPERVISOR_shared_info;
struct pvclock_wall_clock *wall_clock = &(s->wc);
struct pvclock_vcpu_time_info *vcpu_time;
vcpu_time = &get_cpu_var(xen_vcpu)->time;
pvclock_read_wallclock(wall_clock, vcpu_time, ts);
put_cpu_var(xen_vcpu);
}
static unsigned long xen_get_wallclock(void)
{
struct timespec ts;
xen_read_wallclock(&ts);
return ts.tv_sec;
}
static int xen_set_wallclock(unsigned long now)
{
return -1;
}
static struct clocksource xen_clocksource __read_mostly = {
.name = "xen",
.rating = 400,
.read = xen_clocksource_get_cycles,
.mask = ~0,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
static s64 get_abs_timeout(unsigned long delta)
{
return xen_clocksource_read() + delta;
}
static void xen_timerop_set_mode(enum clock_event_mode mode,
struct clock_event_device *evt)
{
switch (mode) {
case CLOCK_EVT_MODE_PERIODIC:
WARN_ON(1);
break;
case CLOCK_EVT_MODE_ONESHOT:
case CLOCK_EVT_MODE_RESUME:
break;
case CLOCK_EVT_MODE_UNUSED:
case CLOCK_EVT_MODE_SHUTDOWN:
HYPERVISOR_set_timer_op(0);
break;
}
}
static int xen_timerop_set_next_event(unsigned long delta,
struct clock_event_device *evt)
{
WARN_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT);
if (HYPERVISOR_set_timer_op(get_abs_timeout(delta)) < 0)
BUG();
return 0;
}
static const struct clock_event_device xen_timerop_clockevent = {
.name = "xen",
.features = CLOCK_EVT_FEAT_ONESHOT,
.max_delta_ns = 0xffffffff,
.min_delta_ns = TIMER_SLOP,
.mult = 1,
.shift = 0,
.rating = 500,
.set_mode = xen_timerop_set_mode,
.set_next_event = xen_timerop_set_next_event,
};
static void xen_vcpuop_set_mode(enum clock_event_mode mode,
struct clock_event_device *evt)
{
int cpu = smp_processor_id();
switch (mode) {
case CLOCK_EVT_MODE_PERIODIC:
WARN_ON(1);
break;
case CLOCK_EVT_MODE_ONESHOT:
if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
BUG();
break;
case CLOCK_EVT_MODE_UNUSED:
case CLOCK_EVT_MODE_SHUTDOWN:
if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, cpu, NULL) ||
HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
BUG();
break;
case CLOCK_EVT_MODE_RESUME:
break;
}
}
static int xen_vcpuop_set_next_event(unsigned long delta,
struct clock_event_device *evt)
{
int cpu = smp_processor_id();
struct vcpu_set_singleshot_timer single;
int ret;
WARN_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT);
single.timeout_abs_ns = get_abs_timeout(delta);
single.flags = VCPU_SSHOTTMR_future;
ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, cpu, &single);
BUG_ON(ret != 0 && ret != -ETIME);
return ret;
}
static const struct clock_event_device xen_vcpuop_clockevent = {
.name = "xen",
.features = CLOCK_EVT_FEAT_ONESHOT,
.max_delta_ns = 0xffffffff,
.min_delta_ns = TIMER_SLOP,
.mult = 1,
.shift = 0,
.rating = 500,
.set_mode = xen_vcpuop_set_mode,
.set_next_event = xen_vcpuop_set_next_event,
};
static const struct clock_event_device *xen_clockevent =
&xen_timerop_clockevent;
static DEFINE_PER_CPU(struct clock_event_device, xen_clock_events);
static irqreturn_t xen_timer_interrupt(int irq, void *dev_id)
{
struct clock_event_device *evt = &__get_cpu_var(xen_clock_events);
irqreturn_t ret;
ret = IRQ_NONE;
if (evt->event_handler) {
evt->event_handler(evt);
ret = IRQ_HANDLED;
}
do_stolen_accounting();
return ret;
}
void xen_setup_timer(int cpu)
{
const char *name;
struct clock_event_device *evt;
int irq;
printk(KERN_INFO "installing Xen timer for CPU %d\n", cpu);
name = kasprintf(GFP_KERNEL, "timer%d", cpu);
if (!name)
name = "<timer kasprintf failed>";
irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
IRQF_DISABLED|IRQF_PERCPU|
IRQF_NOBALANCING|IRQF_TIMER|
IRQF_FORCE_RESUME,
name, NULL);
evt = &per_cpu(xen_clock_events, cpu);
memcpy(evt, xen_clockevent, sizeof(*evt));
evt->cpumask = cpumask_of(cpu);
evt->irq = irq;
}
void xen_teardown_timer(int cpu)
{
struct clock_event_device *evt;
BUG_ON(cpu == 0);
evt = &per_cpu(xen_clock_events, cpu);
unbind_from_irqhandler(evt->irq, NULL);
}
void xen_setup_cpu_clockevents(void)
{
BUG_ON(preemptible());
clockevents_register_device(&__get_cpu_var(xen_clock_events));
}
void xen_timer_resume(void)
{
int cpu;
pvclock_resume();
if (xen_clockevent != &xen_vcpuop_clockevent)
return;
for_each_online_cpu(cpu) {
if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
BUG();
}
}
static const struct pv_time_ops xen_time_ops __initconst = {
.sched_clock = xen_clocksource_read,
};
static void __init xen_time_init(void)
{
int cpu = smp_processor_id();
struct timespec tp;
clocksource_register_hz(&xen_clocksource, NSEC_PER_SEC);
if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) {
printk(KERN_DEBUG "Xen: using vcpuop timer interface\n");
xen_clockevent = &xen_vcpuop_clockevent;
}
xen_read_wallclock(&tp);
do_settimeofday(&tp);
setup_force_cpu_cap(X86_FEATURE_TSC);
xen_setup_runstate_info(cpu);
xen_setup_timer(cpu);
xen_setup_cpu_clockevents();
}
void __init xen_init_time_ops(void)
{
pv_time_ops = xen_time_ops;
x86_init.timers.timer_init = xen_time_init;
x86_init.timers.setup_percpu_clockev = x86_init_noop;
x86_cpuinit.setup_percpu_clockev = x86_init_noop;
x86_platform.calibrate_tsc = xen_tsc_khz;
x86_platform.get_wallclock = xen_get_wallclock;
x86_platform.set_wallclock = xen_set_wallclock;
}
#ifdef CONFIG_XEN_PVHVM
static void xen_hvm_setup_cpu_clockevents(void)
{
int cpu = smp_processor_id();
xen_setup_runstate_info(cpu);
xen_setup_timer(cpu);
xen_setup_cpu_clockevents();
}
void __init xen_hvm_init_time_ops(void)
{
if (!xen_have_vector_callback)
return;
if (!xen_feature(XENFEAT_hvm_safe_pvclock)) {
printk(KERN_INFO "Xen doesn't support pvclock on HVM,"
"disable pv timer\n");
return;
}
pv_time_ops = xen_time_ops;
x86_init.timers.setup_percpu_clockev = xen_time_init;
x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents;
x86_platform.calibrate_tsc = xen_tsc_khz;
x86_platform.get_wallclock = xen_get_wallclock;
x86_platform.set_wallclock = xen_set_wallclock;
}
#endif