Path: blob/main/sys/cddl/dev/dtrace/amd64/dtrace_subr.c
48375 views
/*1* CDDL HEADER START2*3* The contents of this file are subject to the terms of the4* Common Development and Distribution License, Version 1.0 only5* (the "License"). You may not use this file except in compliance6* with the License.7*8* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE9* or http://www.opensolaris.org/os/licensing.10* See the License for the specific language governing permissions11* and limitations under the License.12*13* When distributing Covered Code, include this CDDL HEADER in each14* file and include the License file at usr/src/OPENSOLARIS.LICENSE.15* If applicable, add the following below this CDDL HEADER, with the16* fields enclosed by brackets "[]" replaced with your own identifying17* information: Portions Copyright [yyyy] [name of copyright owner]18*19* CDDL HEADER END20*21*/22/*23* Copyright 2005 Sun Microsystems, Inc. All rights reserved.24* Use is subject to license terms.25*/2627/*28* Copyright (c) 2011, Joyent, Inc. All rights reserved.29*/3031#include <sys/param.h>32#include <sys/systm.h>33#include <sys/kernel.h>34#include <sys/malloc.h>35#include <sys/msan.h>36#include <sys/proc.h>37#include <sys/smp.h>38#include <sys/dtrace_impl.h>39#include <sys/dtrace_bsd.h>40#include <cddl/dev/dtrace/dtrace_cddl.h>41#include <machine/clock.h>42#include <machine/cpufunc.h>43#include <machine/frame.h>44#include <machine/md_var.h>45#include <machine/psl.h>46#include <machine/trap.h>47#include <vm/pmap.h>4849extern void dtrace_getnanotime(struct timespec *tsp);50extern int (*dtrace_invop_jump_addr)(struct trapframe *);5152int dtrace_invop(uintptr_t, struct trapframe *, void **);53int dtrace_invop_start(struct trapframe *frame);54void dtrace_invop_init(void);55void dtrace_invop_uninit(void);5657typedef struct dtrace_invop_hdlr {58int (*dtih_func)(uintptr_t, struct trapframe *, uintptr_t);59struct dtrace_invop_hdlr *dtih_next;60} dtrace_invop_hdlr_t;6162dtrace_invop_hdlr_t *dtrace_invop_hdlr;6364int65dtrace_invop(uintptr_t addr, struct trapframe *frame, void **scratch)66{67struct thread *td;68dtrace_invop_hdlr_t *hdlr;69int rval;7071kmsan_mark(frame, sizeof(*frame), KMSAN_STATE_INITED);7273td = curthread;74td->t_dtrace_trapframe = frame;75rval = 0;76for (hdlr = dtrace_invop_hdlr; hdlr != NULL; hdlr = hdlr->dtih_next) {77rval = hdlr->dtih_func(addr, frame, (uintptr_t)scratch);78if (rval != 0)79break;80}81td->t_dtrace_trapframe = NULL;82return (rval);83}8485void86dtrace_invop_add(int (*func)(uintptr_t, struct trapframe *, uintptr_t))87{88dtrace_invop_hdlr_t *hdlr;8990hdlr = kmem_alloc(sizeof (dtrace_invop_hdlr_t), KM_SLEEP);91hdlr->dtih_func = func;92hdlr->dtih_next = dtrace_invop_hdlr;93dtrace_invop_hdlr = hdlr;94}9596void97dtrace_invop_remove(int (*func)(uintptr_t, struct trapframe *, uintptr_t))98{99dtrace_invop_hdlr_t *hdlr = dtrace_invop_hdlr, *prev = NULL;100101for (;;) {102if (hdlr == NULL)103panic("attempt to remove non-existent invop handler");104105if (hdlr->dtih_func == func)106break;107108prev = hdlr;109hdlr = hdlr->dtih_next;110}111112if (prev == NULL) {113ASSERT(dtrace_invop_hdlr == hdlr);114dtrace_invop_hdlr = hdlr->dtih_next;115} else {116ASSERT(dtrace_invop_hdlr != hdlr);117prev->dtih_next = hdlr->dtih_next;118}119120kmem_free(hdlr, 0);121}122123void124dtrace_invop_init(void)125{126127dtrace_invop_jump_addr = dtrace_invop_start;128}129130void131dtrace_invop_uninit(void)132{133134dtrace_invop_jump_addr = NULL;135}136137/*ARGSUSED*/138void139dtrace_toxic_ranges(void (*func)(uintptr_t base, uintptr_t limit))140{141(*func)(0, la57 ? (uintptr_t)addr_P5Tmap : (uintptr_t)addr_P4Tmap);142}143144#ifdef notyet145void146dtrace_safe_synchronous_signal(void)147{148kthread_t *t = curthread;149struct regs *rp = lwptoregs(ttolwp(t));150size_t isz = t->t_dtrace_npc - t->t_dtrace_pc;151152ASSERT(t->t_dtrace_on);153154/*155* If we're not in the range of scratch addresses, we're not actually156* tracing user instructions so turn off the flags. If the instruction157* we copied out caused a synchonous trap, reset the pc back to its158* original value and turn off the flags.159*/160if (rp->r_pc < t->t_dtrace_scrpc ||161rp->r_pc > t->t_dtrace_astpc + isz) {162t->t_dtrace_ft = 0;163} else if (rp->r_pc == t->t_dtrace_scrpc ||164rp->r_pc == t->t_dtrace_astpc) {165rp->r_pc = t->t_dtrace_pc;166t->t_dtrace_ft = 0;167}168}169170int171dtrace_safe_defer_signal(void)172{173kthread_t *t = curthread;174struct regs *rp = lwptoregs(ttolwp(t));175size_t isz = t->t_dtrace_npc - t->t_dtrace_pc;176177ASSERT(t->t_dtrace_on);178179/*180* If we're not in the range of scratch addresses, we're not actually181* tracing user instructions so turn off the flags.182*/183if (rp->r_pc < t->t_dtrace_scrpc ||184rp->r_pc > t->t_dtrace_astpc + isz) {185t->t_dtrace_ft = 0;186return (0);187}188189/*190* If we have executed the original instruction, but we have performed191* neither the jmp back to t->t_dtrace_npc nor the clean up of any192* registers used to emulate %rip-relative instructions in 64-bit mode,193* we'll save ourselves some effort by doing that here and taking the194* signal right away. We detect this condition by seeing if the program195* counter is the range [scrpc + isz, astpc).196*/197if (rp->r_pc >= t->t_dtrace_scrpc + isz &&198rp->r_pc < t->t_dtrace_astpc) {199#ifdef __amd64200/*201* If there is a scratch register and we're on the202* instruction immediately after the modified instruction,203* restore the value of that scratch register.204*/205if (t->t_dtrace_reg != 0 &&206rp->r_pc == t->t_dtrace_scrpc + isz) {207switch (t->t_dtrace_reg) {208case REG_RAX:209rp->r_rax = t->t_dtrace_regv;210break;211case REG_RCX:212rp->r_rcx = t->t_dtrace_regv;213break;214case REG_R8:215rp->r_r8 = t->t_dtrace_regv;216break;217case REG_R9:218rp->r_r9 = t->t_dtrace_regv;219break;220}221}222#endif223rp->r_pc = t->t_dtrace_npc;224t->t_dtrace_ft = 0;225return (0);226}227228/*229* Otherwise, make sure we'll return to the kernel after executing230* the copied out instruction and defer the signal.231*/232if (!t->t_dtrace_step) {233ASSERT(rp->r_pc < t->t_dtrace_astpc);234rp->r_pc += t->t_dtrace_astpc - t->t_dtrace_scrpc;235t->t_dtrace_step = 1;236}237238t->t_dtrace_ast = 1;239240return (1);241}242#endif243244static int64_t tgt_cpu_tsc;245static int64_t hst_cpu_tsc;246static int64_t tsc_skew[MAXCPU];247static uint64_t nsec_scale;248249/* See below for the explanation of this macro. */250#define SCALE_SHIFT 28251252static void253dtrace_gethrtime_init_cpu(void *arg)254{255uintptr_t cpu = (uintptr_t) arg;256257if (cpu == curcpu)258tgt_cpu_tsc = rdtsc();259else260hst_cpu_tsc = rdtsc();261}262263static void264dtrace_gethrtime_init(void *arg)265{266struct pcpu *pc;267uint64_t tsc_f;268cpuset_t map;269int i;270271/*272* Get TSC frequency known at this moment.273* This should be constant if TSC is invariant.274* Otherwise tick->time conversion will be inaccurate, but275* will preserve monotonic property of TSC.276*/277tsc_f = atomic_load_acq_64(&tsc_freq);278279/*280* The following line checks that nsec_scale calculated below281* doesn't overflow 32-bit unsigned integer, so that it can multiply282* another 32-bit integer without overflowing 64-bit.283* Thus minimum supported TSC frequency is 62.5MHz.284*/285KASSERT(tsc_f > (NANOSEC >> (32 - SCALE_SHIFT)),286("TSC frequency is too low"));287288/*289* We scale up NANOSEC/tsc_f ratio to preserve as much precision290* as possible.291* 2^28 factor was chosen quite arbitrarily from practical292* considerations:293* - it supports TSC frequencies as low as 62.5MHz (see above);294* - it provides quite good precision (e < 0.01%) up to THz295* (terahertz) values;296*/297nsec_scale = ((uint64_t)NANOSEC << SCALE_SHIFT) / tsc_f;298299if (vm_guest != VM_GUEST_NO)300return;301302/* The current CPU is the reference one. */303sched_pin();304tsc_skew[curcpu] = 0;305CPU_FOREACH(i) {306if (i == curcpu)307continue;308309pc = pcpu_find(i);310CPU_SETOF(PCPU_GET(cpuid), &map);311CPU_SET(pc->pc_cpuid, &map);312313smp_rendezvous_cpus(map, NULL,314dtrace_gethrtime_init_cpu,315smp_no_rendezvous_barrier, (void *)(uintptr_t) i);316317tsc_skew[i] = tgt_cpu_tsc - hst_cpu_tsc;318}319sched_unpin();320}321SYSINIT(dtrace_gethrtime_init, SI_SUB_DTRACE, SI_ORDER_ANY,322dtrace_gethrtime_init, NULL);323324/*325* DTrace needs a high resolution time function which can326* be called from a probe context and guaranteed not to have327* instrumented with probes itself.328*329* Returns nanoseconds since boot.330*/331uint64_t332dtrace_gethrtime(void)333{334uint64_t tsc;335uint32_t lo, hi;336register_t rflags;337338/*339* We split TSC value into lower and higher 32-bit halves and separately340* scale them with nsec_scale, then we scale them down by 2^28341* (see nsec_scale calculations) taking into account 32-bit shift of342* the higher half and finally add.343*/344rflags = intr_disable();345tsc = rdtsc() - tsc_skew[curcpu];346intr_restore(rflags);347348lo = tsc;349hi = tsc >> 32;350return (((lo * nsec_scale) >> SCALE_SHIFT) +351((hi * nsec_scale) << (32 - SCALE_SHIFT)));352}353354uint64_t355dtrace_gethrestime(void)356{357struct timespec current_time;358359dtrace_getnanotime(¤t_time);360361return (current_time.tv_sec * 1000000000ULL + current_time.tv_nsec);362}363364/* Function to handle DTrace traps during probes. See amd64/amd64/trap.c. */365int366dtrace_trap(struct trapframe *frame, u_int type)367{368uint16_t nofault;369370/*371* A trap can occur while DTrace executes a probe. Before372* executing the probe, DTrace blocks re-scheduling and sets373* a flag in its per-cpu flags to indicate that it doesn't374* want to fault. On returning from the probe, the no-fault375* flag is cleared and finally re-scheduling is enabled.376*377* Check if DTrace has enabled 'no-fault' mode:378*/379sched_pin();380nofault = cpu_core[curcpu].cpuc_dtrace_flags & CPU_DTRACE_NOFAULT;381sched_unpin();382if (nofault) {383KASSERT((read_rflags() & PSL_I) == 0, ("interrupts enabled"));384385/*386* There are only a couple of trap types that are expected.387* All the rest will be handled in the usual way.388*/389switch (type) {390/* General protection fault. */391case T_PROTFLT:392/* Flag an illegal operation. */393cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;394395/*396* Offset the instruction pointer to the instruction397* following the one causing the fault.398*/399frame->tf_rip += dtrace_instr_size((uint8_t *) frame->tf_rip);400return (1);401/* Page fault. */402case T_PAGEFLT:403/* Flag a bad address. */404cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_BADADDR;405cpu_core[curcpu].cpuc_dtrace_illval = frame->tf_addr;406407/*408* Offset the instruction pointer to the instruction409* following the one causing the fault.410*/411frame->tf_rip += dtrace_instr_size((uint8_t *) frame->tf_rip);412return (1);413default:414/* Handle all other traps in the usual way. */415break;416}417}418419/* Handle the trap in the usual way. */420return (0);421}422423424