#include <sys/cdefs.h>
#include "opt_device_polling.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/bus.h>
#include <sys/limits.h>
#include <sys/lock.h>
#include <sys/kdb.h>
#include <sys/ktr.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/kernel.h>
#include <sys/sched.h>
#include <sys/smp.h>
#include <sys/sysctl.h>
#include <sys/timeet.h>
#include <sys/timetc.h>
#include <machine/atomic.h>
#include <machine/clock.h>
#include <machine/cpu.h>
#include <machine/smp.h>
int cpu_disable_c2_sleep = 0;
int cpu_disable_c3_sleep = 0;
static void setuptimer(void);
static void loadtimer(sbintime_t now, int first);
static int doconfigtimer(void);
static void configtimer(int start);
static int round_freq(struct eventtimer *et, int freq);
struct pcpu_state;
static sbintime_t getnextcpuevent(struct pcpu_state *state, int idle);
static sbintime_t getnextevent(struct pcpu_state *state);
static int handleevents(sbintime_t now, int fake);
static struct mtx et_hw_mtx;
#define ET_HW_LOCK(state) \
{ \
if (timer->et_flags & ET_FLAGS_PERCPU) \
mtx_lock_spin(&(state)->et_hw_mtx); \
else \
mtx_lock_spin(&et_hw_mtx); \
}
#define ET_HW_UNLOCK(state) \
{ \
if (timer->et_flags & ET_FLAGS_PERCPU) \
mtx_unlock_spin(&(state)->et_hw_mtx); \
else \
mtx_unlock_spin(&et_hw_mtx); \
}
static struct eventtimer *timer = NULL;
static sbintime_t timerperiod;
static sbintime_t statperiod;
static sbintime_t profperiod;
static sbintime_t nexttick;
static u_int busy = 1;
static int profiling;
static char timername[32];
TUNABLE_STR("kern.eventtimer.timer", timername, sizeof(timername));
static int singlemul;
SYSCTL_INT(_kern_eventtimer, OID_AUTO, singlemul, CTLFLAG_RWTUN, &singlemul,
0, "Multiplier for periodic mode");
static u_int idletick;
SYSCTL_UINT(_kern_eventtimer, OID_AUTO, idletick, CTLFLAG_RWTUN, &idletick,
0, "Run periodic events when idle");
static int periodic;
static int want_periodic;
TUNABLE_INT("kern.eventtimer.periodic", &want_periodic);
struct pcpu_state {
struct mtx et_hw_mtx;
u_int action;
u_int handle;
sbintime_t now;
sbintime_t nextevent;
sbintime_t nexttick;
sbintime_t nexthard;
sbintime_t nextstat;
sbintime_t nextprof;
sbintime_t nextcall;
sbintime_t nextcallopt;
int ipi;
int idle;
};
DPCPU_DEFINE_STATIC(struct pcpu_state, timerstate);
DPCPU_DEFINE(sbintime_t, hardclocktime);
int
hardclockintr(void)
{
sbintime_t now;
struct pcpu_state *state;
int done;
if (doconfigtimer() || busy)
return (FILTER_HANDLED);
state = DPCPU_PTR(timerstate);
now = state->now;
CTR2(KTR_SPARE2, "ipi: now %d.%08x",
(int)(now >> 32), (u_int)(now & 0xffffffff));
done = handleevents(now, 0);
return (done ? FILTER_HANDLED : FILTER_STRAY);
}
static int
handleevents(sbintime_t now, int fake)
{
sbintime_t t, *hct;
struct trapframe *frame;
struct pcpu_state *state;
int usermode;
int done, runs;
CTR2(KTR_SPARE2, "handle: now %d.%08x",
(int)(now >> 32), (u_int)(now & 0xffffffff));
done = 0;
if (fake) {
frame = NULL;
usermode = 0;
} else {
frame = curthread->td_intr_frame;
usermode = TRAPF_USERMODE(frame);
}
state = DPCPU_PTR(timerstate);
runs = 0;
while (now >= state->nexthard) {
state->nexthard += tick_sbt;
runs++;
}
if (runs) {
hct = DPCPU_PTR(hardclocktime);
*hct = state->nexthard - tick_sbt;
if (fake < 2) {
hardclock(runs, usermode);
done = 1;
}
}
runs = 0;
while (now >= state->nextstat) {
state->nextstat += statperiod;
runs++;
}
if (runs && fake < 2) {
statclock(runs, usermode);
done = 1;
}
if (profiling) {
runs = 0;
while (now >= state->nextprof) {
state->nextprof += profperiod;
runs++;
}
if (runs && !fake) {
profclock(runs, usermode, TRAPF_PC(frame));
done = 1;
}
} else
state->nextprof = state->nextstat;
if (now >= state->nextcallopt || now >= state->nextcall) {
state->nextcall = state->nextcallopt = SBT_MAX;
callout_process(now);
}
ET_HW_LOCK(state);
t = getnextcpuevent(state, 0);
if (!busy) {
state->idle = 0;
state->nextevent = t;
loadtimer(now, (fake == 2) &&
(timer->et_flags & ET_FLAGS_PERCPU));
}
ET_HW_UNLOCK(state);
return (done);
}
static sbintime_t
getnextcpuevent(struct pcpu_state *state, int idle)
{
sbintime_t event;
u_int hardfreq;
event = state->nexthard;
if (idle) {
if (tc_min_ticktock_freq > 1
#ifdef SMP
&& curcpu == CPU_FIRST()
#endif
)
hardfreq = hz / tc_min_ticktock_freq;
else
hardfreq = hz;
if (hardfreq > 1)
event += tick_sbt * (hardfreq - 1);
}
if (event > state->nextcall)
event = state->nextcall;
if (!idle) {
if (event > state->nextstat)
event = state->nextstat;
if (profiling && event > state->nextprof)
event = state->nextprof;
}
return (event);
}
static sbintime_t
getnextevent(struct pcpu_state *state)
{
sbintime_t event;
#ifdef SMP
int cpu;
#endif
#ifdef KTR
int c;
c = -1;
#endif
event = state->nextevent;
#ifdef SMP
if ((timer->et_flags & ET_FLAGS_PERCPU) == 0) {
CPU_FOREACH(cpu) {
state = DPCPU_ID_PTR(cpu, timerstate);
if (event > state->nextevent) {
event = state->nextevent;
#ifdef KTR
c = cpu;
#endif
}
}
}
#endif
CTR3(KTR_SPARE2, "next: next %d.%08x by %d",
(int)(event >> 32), (u_int)(event & 0xffffffff), c);
return (event);
}
static void
timercb(struct eventtimer *et, void *arg)
{
sbintime_t now;
sbintime_t *next;
struct pcpu_state *state;
#ifdef SMP
int cpu, bcast;
#endif
if (busy)
return;
state = DPCPU_PTR(timerstate);
if (et->et_flags & ET_FLAGS_PERCPU) {
next = &state->nexttick;
} else
next = &nexttick;
now = sbinuptime();
if (periodic)
*next = now + timerperiod;
else
*next = -1;
state->now = now;
CTR2(KTR_SPARE2, "intr: now %d.%08x",
(int)(now >> 32), (u_int)(now & 0xffffffff));
#ifdef SMP
#ifdef EARLY_AP_STARTUP
MPASS(mp_ncpus == 1 || smp_started);
#endif
bcast = 0;
#ifdef EARLY_AP_STARTUP
if ((et->et_flags & ET_FLAGS_PERCPU) == 0) {
#else
if ((et->et_flags & ET_FLAGS_PERCPU) == 0 && smp_started) {
#endif
CPU_FOREACH(cpu) {
state = DPCPU_ID_PTR(cpu, timerstate);
ET_HW_LOCK(state);
state->now = now;
if (now >= state->nextevent) {
state->nextevent += SBT_1S;
if (curcpu != cpu) {
state->ipi = 1;
bcast = 1;
}
}
ET_HW_UNLOCK(state);
}
}
#endif
handleevents(now, 0);
#ifdef SMP
if (bcast) {
CPU_FOREACH(cpu) {
if (curcpu == cpu)
continue;
state = DPCPU_ID_PTR(cpu, timerstate);
if (state->ipi) {
state->ipi = 0;
ipi_cpu(cpu, IPI_HARDCLOCK);
}
}
}
#endif
}
static void
loadtimer(sbintime_t now, int start)
{
struct pcpu_state *state;
sbintime_t new;
sbintime_t *next;
uint64_t tmp;
int eq;
state = DPCPU_PTR(timerstate);
if (timer->et_flags & ET_FLAGS_PERCPU)
next = &state->nexttick;
else
next = &nexttick;
if (periodic) {
if (start) {
tmp = now % timerperiod;
new = timerperiod - tmp;
if (new < tmp)
new += timerperiod;
CTR4(KTR_SPARE2, "load p: now %d.%08x first in %d.%08x",
(int)(now >> 32), (u_int)(now & 0xffffffff),
(int)(new >> 32), (u_int)(new & 0xffffffff));
*next = new + now;
et_start(timer, new, timerperiod);
}
} else {
new = getnextevent(state);
eq = (new == *next);
CTR3(KTR_SPARE2, "load: next %d.%08x eq %d",
(int)(new >> 32), (u_int)(new & 0xffffffff), eq);
if (!eq) {
*next = new;
et_start(timer, new - now, 0);
}
}
}
static void
setuptimer(void)
{
int freq;
if (periodic && (timer->et_flags & ET_FLAGS_PERIODIC) == 0)
periodic = 0;
else if (!periodic && (timer->et_flags & ET_FLAGS_ONESHOT) == 0)
periodic = 1;
singlemul = MIN(MAX(singlemul, 1), 20);
freq = hz * singlemul;
while (freq < (profiling ? profhz : stathz))
freq += hz;
freq = round_freq(timer, freq);
timerperiod = SBT_1S / freq;
}
static int
doconfigtimer(void)
{
sbintime_t now;
struct pcpu_state *state;
state = DPCPU_PTR(timerstate);
switch (atomic_load_acq_int(&state->action)) {
case 1:
now = sbinuptime();
ET_HW_LOCK(state);
loadtimer(now, 1);
ET_HW_UNLOCK(state);
state->handle = 0;
atomic_store_rel_int(&state->action, 0);
return (1);
case 2:
ET_HW_LOCK(state);
et_stop(timer);
ET_HW_UNLOCK(state);
state->handle = 0;
atomic_store_rel_int(&state->action, 0);
return (1);
}
if (atomic_readandclear_int(&state->handle) && !busy) {
now = sbinuptime();
handleevents(now, 0);
return (1);
}
return (0);
}
static void
configtimer(int start)
{
sbintime_t now, next;
struct pcpu_state *state;
int cpu;
if (start) {
setuptimer();
now = sbinuptime();
} else
now = 0;
critical_enter();
ET_HW_LOCK(DPCPU_PTR(timerstate));
if (start) {
next = now + timerperiod;
if (periodic)
nexttick = next;
else
nexttick = -1;
#ifdef EARLY_AP_STARTUP
MPASS(mp_ncpus == 1 || smp_started);
#endif
CPU_FOREACH(cpu) {
state = DPCPU_ID_PTR(cpu, timerstate);
state->now = now;
#ifndef EARLY_AP_STARTUP
if (!smp_started && cpu != CPU_FIRST())
state->nextevent = SBT_MAX;
else
#endif
state->nextevent = next;
if (periodic)
state->nexttick = next;
else
state->nexttick = -1;
state->nexthard = next;
state->nextstat = next;
state->nextprof = next;
state->nextcall = next;
state->nextcallopt = next;
hardclock_sync(cpu);
}
busy = 0;
loadtimer(now, 1);
} else {
busy = 1;
et_stop(timer);
}
ET_HW_UNLOCK(DPCPU_PTR(timerstate));
#ifdef SMP
#ifdef EARLY_AP_STARTUP
if ((timer->et_flags & ET_FLAGS_PERCPU) == 0) {
#else
if ((timer->et_flags & ET_FLAGS_PERCPU) == 0 || !smp_started) {
#endif
critical_exit();
return;
}
CPU_FOREACH(cpu) {
state = DPCPU_ID_PTR(cpu, timerstate);
atomic_store_rel_int(&state->action,
(cpu == curcpu) ? 0 : ( start ? 1 : 2));
}
ipi_all_but_self(IPI_HARDCLOCK);
restart:
cpu_spinwait();
CPU_FOREACH(cpu) {
if (cpu == curcpu)
continue;
state = DPCPU_ID_PTR(cpu, timerstate);
if (atomic_load_acq_int(&state->action))
goto restart;
}
#endif
critical_exit();
}
static int
round_freq(struct eventtimer *et, int freq)
{
uint64_t div;
if (et->et_frequency != 0) {
div = lmax((et->et_frequency + freq / 2) / freq, 1);
if (et->et_flags & ET_FLAGS_POW2DIV)
div = 1 << (flsl(div + div / 2) - 1);
freq = (et->et_frequency + div / 2) / div;
}
if (et->et_min_period > SBT_1S)
panic("Event timer \"%s\" doesn't support sub-second periods!",
et->et_name);
else if (et->et_min_period != 0)
freq = min(freq, SBT2FREQ(et->et_min_period));
if (et->et_max_period < SBT_1S && et->et_max_period != 0)
freq = max(freq, SBT2FREQ(et->et_max_period));
return (freq);
}
void
cpu_initclocks_bsp(void)
{
struct pcpu_state *state;
int base, div, cpu;
mtx_init(&et_hw_mtx, "et_hw_mtx", NULL, MTX_SPIN);
CPU_FOREACH(cpu) {
state = DPCPU_ID_PTR(cpu, timerstate);
mtx_init(&state->et_hw_mtx, "et_hw_mtx", NULL, MTX_SPIN);
state->nextcall = SBT_MAX;
state->nextcallopt = SBT_MAX;
}
periodic = want_periodic;
if (timername[0])
timer = et_find(timername, 0, 0);
if (timer == NULL && periodic) {
timer = et_find(NULL,
ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC);
}
if (timer == NULL) {
timer = et_find(NULL,
ET_FLAGS_ONESHOT, ET_FLAGS_ONESHOT);
}
if (timer == NULL && !periodic) {
timer = et_find(NULL,
ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC);
}
if (timer == NULL)
panic("No usable event timer found!");
et_init(timer, timercb, NULL, NULL);
if (periodic && (timer->et_flags & ET_FLAGS_PERIODIC) == 0)
periodic = 0;
else if (!periodic && (timer->et_flags & ET_FLAGS_ONESHOT) == 0)
periodic = 1;
if (timer->et_flags & ET_FLAGS_C3STOP)
cpu_disable_c3_sleep++;
if (singlemul <= 0 || singlemul > 20) {
if (hz >= 1500 || (hz % 128) == 0)
singlemul = 1;
else if (hz >= 750)
singlemul = 2;
else
singlemul = 4;
}
if (periodic) {
base = round_freq(timer, hz * singlemul);
singlemul = max((base + hz / 2) / hz, 1);
hz = (base + singlemul / 2) / singlemul;
if (base <= 128)
stathz = base;
else {
div = base / 128;
if (div >= singlemul && (div % singlemul) == 0)
div++;
stathz = base / div;
}
profhz = stathz;
while ((profhz + stathz) <= 128 * 64)
profhz += stathz;
profhz = round_freq(timer, profhz);
} else {
hz = round_freq(timer, hz);
stathz = round_freq(timer, 127);
profhz = round_freq(timer, stathz * 64);
}
tick = 1000000 / hz;
tick_sbt = SBT_1S / hz;
tick_bt = sbttobt(tick_sbt);
statperiod = SBT_1S / stathz;
profperiod = SBT_1S / profhz;
ET_LOCK();
configtimer(1);
ET_UNLOCK();
}
void
cpu_initclocks_ap(void)
{
struct pcpu_state *state;
struct thread *td;
state = DPCPU_PTR(timerstate);
ET_HW_LOCK(state);
state->now = sbinuptime();
hardclock_sync(curcpu);
spinlock_enter();
ET_HW_UNLOCK(state);
td = curthread;
td->td_intr_nesting_level++;
handleevents(state->now, 2);
td->td_intr_nesting_level--;
spinlock_exit();
}
void
suspendclock(void)
{
ET_LOCK();
configtimer(0);
ET_UNLOCK();
}
void
resumeclock(void)
{
ET_LOCK();
configtimer(1);
ET_UNLOCK();
}
void
cpu_startprofclock(void)
{
ET_LOCK();
if (profiling == 0) {
if (periodic) {
configtimer(0);
profiling = 1;
configtimer(1);
} else
profiling = 1;
} else
profiling++;
ET_UNLOCK();
}
void
cpu_stopprofclock(void)
{
ET_LOCK();
if (profiling == 1) {
if (periodic) {
configtimer(0);
profiling = 0;
configtimer(1);
} else
profiling = 0;
} else
profiling--;
ET_UNLOCK();
}
sbintime_t
cpu_idleclock(void)
{
sbintime_t now, t;
struct pcpu_state *state;
if (idletick || busy ||
(periodic && (timer->et_flags & ET_FLAGS_PERCPU))
#ifdef DEVICE_POLLING
|| curcpu == CPU_FIRST()
#endif
)
return (-1);
state = DPCPU_PTR(timerstate);
ET_HW_LOCK(state);
if (periodic)
now = state->now;
else
now = sbinuptime();
CTR2(KTR_SPARE2, "idle: now %d.%08x",
(int)(now >> 32), (u_int)(now & 0xffffffff));
t = getnextcpuevent(state, 1);
state->idle = 1;
state->nextevent = t;
if (!periodic)
loadtimer(now, 0);
ET_HW_UNLOCK(state);
return (MAX(t - now, 0));
}
void
cpu_activeclock(void)
{
sbintime_t now;
struct pcpu_state *state;
struct thread *td;
state = DPCPU_PTR(timerstate);
if (atomic_load_int(&state->idle) == 0 || busy)
return;
spinlock_enter();
if (periodic)
now = state->now;
else
now = sbinuptime();
CTR2(KTR_SPARE2, "active: now %d.%08x",
(int)(now >> 32), (u_int)(now & 0xffffffff));
td = curthread;
td->td_intr_nesting_level++;
handleevents(now, 1);
td->td_intr_nesting_level--;
spinlock_exit();
}
void
cpu_et_frequency(struct eventtimer *et, uint64_t newfreq)
{
ET_LOCK();
if (et == timer) {
configtimer(0);
et->et_frequency = newfreq;
configtimer(1);
} else
et->et_frequency = newfreq;
ET_UNLOCK();
}
void
cpu_new_callout(int cpu, sbintime_t bt, sbintime_t bt_opt)
{
struct pcpu_state *state;
if (busy)
return;
CTR5(KTR_SPARE2, "new co: on %d at %d.%08x - %d.%08x",
cpu, (int)(bt_opt >> 32), (u_int)(bt_opt & 0xffffffff),
(int)(bt >> 32), (u_int)(bt & 0xffffffff));
KASSERT(!CPU_ABSENT(cpu), ("Absent CPU %d", cpu));
state = DPCPU_ID_PTR(cpu, timerstate);
ET_HW_LOCK(state);
state->nextcallopt = bt_opt;
if (bt >= state->nextcall)
goto done;
state->nextcall = bt;
if (bt >= state->nextevent)
goto done;
state->nextevent = bt;
if (periodic)
goto done;
if ((timer->et_flags & ET_FLAGS_PERCPU) == 0 || cpu == curcpu) {
loadtimer(sbinuptime(), 0);
done:
ET_HW_UNLOCK(state);
return;
}
state->handle = 1;
ET_HW_UNLOCK(state);
#ifdef SMP
ipi_cpu(cpu, IPI_HARDCLOCK);
#endif
}
static int
sysctl_kern_eventtimer_timer(SYSCTL_HANDLER_ARGS)
{
char buf[32];
struct eventtimer *et;
int error;
ET_LOCK();
et = timer;
snprintf(buf, sizeof(buf), "%s", et->et_name);
ET_UNLOCK();
error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
ET_LOCK();
et = timer;
if (error != 0 || req->newptr == NULL ||
strcasecmp(buf, et->et_name) == 0) {
ET_UNLOCK();
return (error);
}
et = et_find(buf, 0, 0);
if (et == NULL) {
ET_UNLOCK();
return (ENOENT);
}
configtimer(0);
et_free(timer);
if (et->et_flags & ET_FLAGS_C3STOP)
cpu_disable_c3_sleep++;
if (timer->et_flags & ET_FLAGS_C3STOP)
cpu_disable_c3_sleep--;
periodic = want_periodic;
timer = et;
et_init(timer, timercb, NULL, NULL);
configtimer(1);
ET_UNLOCK();
return (error);
}
SYSCTL_PROC(_kern_eventtimer, OID_AUTO, timer,
CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE,
0, 0, sysctl_kern_eventtimer_timer, "A", "Chosen event timer");
static int
sysctl_kern_eventtimer_periodic(SYSCTL_HANDLER_ARGS)
{
int error, val;
val = periodic;
error = sysctl_handle_int(oidp, &val, 0, req);
if (error != 0 || req->newptr == NULL)
return (error);
ET_LOCK();
configtimer(0);
periodic = want_periodic = val;
configtimer(1);
ET_UNLOCK();
return (error);
}
SYSCTL_PROC(_kern_eventtimer, OID_AUTO, periodic,
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
0, 0, sysctl_kern_eventtimer_periodic, "I", "Enable event timer periodic mode");
#include "opt_ddb.h"
#ifdef DDB
#include <ddb/ddb.h>
DB_SHOW_COMMAND(clocksource, db_show_clocksource)
{
struct pcpu_state *st;
int c;
CPU_FOREACH(c) {
st = DPCPU_ID_PTR(c, timerstate);
db_printf(
"CPU %2d: action %d handle %d ipi %d idle %d\n"
" now %#jx nevent %#jx (%jd)\n"
" ntick %#jx (%jd) nhard %#jx (%jd)\n"
" nstat %#jx (%jd) nprof %#jx (%jd)\n"
" ncall %#jx (%jd) ncallopt %#jx (%jd)\n",
c, st->action, st->handle, st->ipi, st->idle,
(uintmax_t)st->now,
(uintmax_t)st->nextevent,
(uintmax_t)(st->nextevent - st->now) / tick_sbt,
(uintmax_t)st->nexttick,
(uintmax_t)(st->nexttick - st->now) / tick_sbt,
(uintmax_t)st->nexthard,
(uintmax_t)(st->nexthard - st->now) / tick_sbt,
(uintmax_t)st->nextstat,
(uintmax_t)(st->nextstat - st->now) / tick_sbt,
(uintmax_t)st->nextprof,
(uintmax_t)(st->nextprof - st->now) / tick_sbt,
(uintmax_t)st->nextcall,
(uintmax_t)(st->nextcall - st->now) / tick_sbt,
(uintmax_t)st->nextcallopt,
(uintmax_t)(st->nextcallopt - st->now) / tick_sbt);
}
}
#endif