Path: blob/main/sys/cddl/dev/dtrace/i386/dtrace_subr.c
48375 views
/*1* CDDL HEADER START2*3* The contents of this file are subject to the terms of the4* Common Development and Distribution License, Version 1.0 only5* (the "License"). You may not use this file except in compliance6* with the License.7*8* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE9* or http://www.opensolaris.org/os/licensing.10* See the License for the specific language governing permissions11* and limitations under the License.12*13* When distributing Covered Code, include this CDDL HEADER in each14* file and include the License file at usr/src/OPENSOLARIS.LICENSE.15* If applicable, add the following below this CDDL HEADER, with the16* fields enclosed by brackets "[]" replaced with your own identifying17* information: Portions Copyright [yyyy] [name of copyright owner]18*19* CDDL HEADER END20*21*/22/*23* Copyright 2005 Sun Microsystems, Inc. All rights reserved.24* Use is subject to license terms.25*/2627/*28* Copyright (c) 2011, Joyent, Inc. All rights reserved.29*/3031#include <sys/param.h>32#include <sys/systm.h>33#include <sys/cpuset.h>34#include <sys/kernel.h>35#include <sys/malloc.h>36#include <sys/kmem.h>37#include <sys/proc.h>38#include <sys/smp.h>39#include <sys/dtrace_impl.h>40#include <sys/dtrace_bsd.h>41#include <cddl/dev/dtrace/dtrace_cddl.h>42#include <machine/clock.h>43#include <machine/cpufunc.h>44#include <machine/frame.h>45#include <machine/psl.h>46#include <machine/trap.h>47#include <vm/pmap.h>4849extern uintptr_t kernelbase;5051extern void dtrace_getnanotime(struct timespec *tsp);52extern int (*dtrace_invop_jump_addr)(struct trapframe *);5354int dtrace_invop(uintptr_t, struct trapframe *, uintptr_t);55int dtrace_invop_start(struct trapframe *frame);56void dtrace_invop_init(void);57void dtrace_invop_uninit(void);5859typedef struct dtrace_invop_hdlr {60int (*dtih_func)(uintptr_t, struct trapframe *, uintptr_t);61struct dtrace_invop_hdlr *dtih_next;62} dtrace_invop_hdlr_t;6364dtrace_invop_hdlr_t *dtrace_invop_hdlr;6566int67dtrace_invop(uintptr_t addr, struct trapframe *frame, uintptr_t eax)68{69struct thread *td;70dtrace_invop_hdlr_t *hdlr;71int rval;7273rval = 0;74td = curthread;75td->t_dtrace_trapframe = frame;76for (hdlr = dtrace_invop_hdlr; hdlr != NULL; hdlr = hdlr->dtih_next)77if ((rval = hdlr->dtih_func(addr, frame, eax)) != 0)78break;79td->t_dtrace_trapframe = NULL;80return (rval);81}8283void84dtrace_invop_add(int (*func)(uintptr_t, struct trapframe *, uintptr_t))85{86dtrace_invop_hdlr_t *hdlr;8788hdlr = kmem_alloc(sizeof (dtrace_invop_hdlr_t), KM_SLEEP);89hdlr->dtih_func = func;90hdlr->dtih_next = dtrace_invop_hdlr;91dtrace_invop_hdlr = hdlr;92}9394void95dtrace_invop_remove(int (*func)(uintptr_t, struct trapframe *, uintptr_t))96{97dtrace_invop_hdlr_t *hdlr = dtrace_invop_hdlr, *prev = NULL;9899for (;;) {100if (hdlr == NULL)101panic("attempt to remove non-existent invop handler");102103if (hdlr->dtih_func == func)104break;105106prev = hdlr;107hdlr = hdlr->dtih_next;108}109110if (prev == NULL) {111ASSERT(dtrace_invop_hdlr == hdlr);112dtrace_invop_hdlr = hdlr->dtih_next;113} else {114ASSERT(dtrace_invop_hdlr != hdlr);115prev->dtih_next = hdlr->dtih_next;116}117118kmem_free(hdlr, 0);119}120121void122dtrace_invop_init(void)123{124125dtrace_invop_jump_addr = dtrace_invop_start;126}127128void129dtrace_invop_uninit(void)130{131132dtrace_invop_jump_addr = NULL;133}134135void136dtrace_toxic_ranges(void (*func)(uintptr_t base, uintptr_t limit))137{138(*func)(0, kernelbase);139}140141#ifdef notyet142void143dtrace_safe_synchronous_signal(void)144{145kthread_t *t = curthread;146struct regs *rp = lwptoregs(ttolwp(t));147size_t isz = t->t_dtrace_npc - t->t_dtrace_pc;148149ASSERT(t->t_dtrace_on);150151/*152* If we're not in the range of scratch addresses, we're not actually153* tracing user instructions so turn off the flags. If the instruction154* we copied out caused a synchonous trap, reset the pc back to its155* original value and turn off the flags.156*/157if (rp->r_pc < t->t_dtrace_scrpc ||158rp->r_pc > t->t_dtrace_astpc + isz) {159t->t_dtrace_ft = 0;160} else if (rp->r_pc == t->t_dtrace_scrpc ||161rp->r_pc == t->t_dtrace_astpc) {162rp->r_pc = t->t_dtrace_pc;163t->t_dtrace_ft = 0;164}165}166167int168dtrace_safe_defer_signal(void)169{170kthread_t *t = curthread;171struct regs *rp = lwptoregs(ttolwp(t));172size_t isz = t->t_dtrace_npc - t->t_dtrace_pc;173174ASSERT(t->t_dtrace_on);175176/*177* If we're not in the range of scratch addresses, we're not actually178* tracing user instructions so turn off the flags.179*/180if (rp->r_pc < t->t_dtrace_scrpc ||181rp->r_pc > t->t_dtrace_astpc + isz) {182t->t_dtrace_ft = 0;183return (0);184}185186/*187* If we have executed the original instruction, but we have performed188* neither the jmp back to t->t_dtrace_npc nor the clean up of any189* registers used to emulate %rip-relative instructions in 64-bit mode,190* we'll save ourselves some effort by doing that here and taking the191* signal right away. We detect this condition by seeing if the program192* counter is the range [scrpc + isz, astpc).193*/194if (rp->r_pc >= t->t_dtrace_scrpc + isz &&195rp->r_pc < t->t_dtrace_astpc) {196#ifdef __amd64197/*198* If there is a scratch register and we're on the199* instruction immediately after the modified instruction,200* restore the value of that scratch register.201*/202if (t->t_dtrace_reg != 0 &&203rp->r_pc == t->t_dtrace_scrpc + isz) {204switch (t->t_dtrace_reg) {205case REG_RAX:206rp->r_rax = t->t_dtrace_regv;207break;208case REG_RCX:209rp->r_rcx = t->t_dtrace_regv;210break;211case REG_R8:212rp->r_r8 = t->t_dtrace_regv;213break;214case REG_R9:215rp->r_r9 = t->t_dtrace_regv;216break;217}218}219#endif220rp->r_pc = t->t_dtrace_npc;221t->t_dtrace_ft = 0;222return (0);223}224225/*226* Otherwise, make sure we'll return to the kernel after executing227* the copied out instruction and defer the signal.228*/229if (!t->t_dtrace_step) {230ASSERT(rp->r_pc < t->t_dtrace_astpc);231rp->r_pc += t->t_dtrace_astpc - t->t_dtrace_scrpc;232t->t_dtrace_step = 1;233}234235t->t_dtrace_ast = 1;236237return (1);238}239#endif240241static int64_t tgt_cpu_tsc;242static int64_t hst_cpu_tsc;243static int64_t tsc_skew[MAXCPU];244static uint64_t nsec_scale;245246/* See below for the explanation of this macro. */247#define SCALE_SHIFT 28248249static void250dtrace_gethrtime_init_cpu(void *arg)251{252uintptr_t cpu = (uintptr_t) arg;253254if (cpu == curcpu)255tgt_cpu_tsc = rdtsc();256else257hst_cpu_tsc = rdtsc();258}259260static void261dtrace_gethrtime_init(void *arg)262{263struct pcpu *pc;264uint64_t tsc_f;265cpuset_t map;266int i;267268/*269* Get TSC frequency known at this moment.270* This should be constant if TSC is invariant.271* Otherwise tick->time conversion will be inaccurate, but272* will preserve monotonic property of TSC.273*/274tsc_f = atomic_load_acq_64(&tsc_freq);275276/*277* The following line checks that nsec_scale calculated below278* doesn't overflow 32-bit unsigned integer, so that it can multiply279* another 32-bit integer without overflowing 64-bit.280* Thus minimum supported TSC frequency is 62.5MHz.281*/282KASSERT(tsc_f > (NANOSEC >> (32 - SCALE_SHIFT)),283("TSC frequency is too low"));284285/*286* We scale up NANOSEC/tsc_f ratio to preserve as much precision287* as possible.288* 2^28 factor was chosen quite arbitrarily from practical289* considerations:290* - it supports TSC frequencies as low as 62.5MHz (see above);291* - it provides quite good precision (e < 0.01%) up to THz292* (terahertz) values;293*/294nsec_scale = ((uint64_t)NANOSEC << SCALE_SHIFT) / tsc_f;295296if (vm_guest != VM_GUEST_NO)297return;298299/* The current CPU is the reference one. */300sched_pin();301tsc_skew[curcpu] = 0;302CPU_FOREACH(i) {303if (i == curcpu)304continue;305306pc = pcpu_find(i);307CPU_SETOF(PCPU_GET(cpuid), &map);308CPU_SET(pc->pc_cpuid, &map);309310smp_rendezvous_cpus(map, NULL,311dtrace_gethrtime_init_cpu,312smp_no_rendezvous_barrier, (void *)(uintptr_t) i);313314tsc_skew[i] = tgt_cpu_tsc - hst_cpu_tsc;315}316sched_unpin();317}318SYSINIT(dtrace_gethrtime_init, SI_SUB_DTRACE, SI_ORDER_ANY,319dtrace_gethrtime_init, NULL);320321/*322* DTrace needs a high resolution time function which can323* be called from a probe context and guaranteed not to have324* instrumented with probes itself.325*326* Returns nanoseconds since boot.327*/328uint64_t329dtrace_gethrtime(void)330{331uint64_t tsc;332uint32_t lo, hi;333register_t eflags;334335/*336* We split TSC value into lower and higher 32-bit halves and separately337* scale them with nsec_scale, then we scale them down by 2^28338* (see nsec_scale calculations) taking into account 32-bit shift of339* the higher half and finally add.340*/341eflags = intr_disable();342tsc = rdtsc() - tsc_skew[curcpu];343intr_restore(eflags);344345lo = tsc;346hi = tsc >> 32;347return (((lo * nsec_scale) >> SCALE_SHIFT) +348((hi * nsec_scale) << (32 - SCALE_SHIFT)));349}350351uint64_t352dtrace_gethrestime(void)353{354struct timespec current_time;355356dtrace_getnanotime(¤t_time);357358return (current_time.tv_sec * 1000000000ULL + current_time.tv_nsec);359}360361/* Function to handle DTrace traps during probes. See i386/i386/trap.c */362int363dtrace_trap(struct trapframe *frame, u_int type)364{365uint16_t nofault;366367/*368* A trap can occur while DTrace executes a probe. Before369* executing the probe, DTrace blocks re-scheduling and sets370* a flag in its per-cpu flags to indicate that it doesn't371* want to fault. On returning from the probe, the no-fault372* flag is cleared and finally re-scheduling is enabled.373*374* Check if DTrace has enabled 'no-fault' mode:375*/376sched_pin();377nofault = cpu_core[curcpu].cpuc_dtrace_flags & CPU_DTRACE_NOFAULT;378sched_unpin();379if (nofault) {380KASSERT((read_eflags() & PSL_I) == 0, ("interrupts enabled"));381382/*383* There are only a couple of trap types that are expected.384* All the rest will be handled in the usual way.385*/386switch (type) {387/* General protection fault. */388case T_PROTFLT:389/* Flag an illegal operation. */390cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;391392/*393* Offset the instruction pointer to the instruction394* following the one causing the fault.395*/396frame->tf_eip += dtrace_instr_size((uint8_t *) frame->tf_eip);397return (1);398/* Page fault. */399case T_PAGEFLT:400/* Flag a bad address. */401cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_BADADDR;402cpu_core[curcpu].cpuc_dtrace_illval = rcr2();403404/*405* Offset the instruction pointer to the instruction406* following the one causing the fault.407*/408frame->tf_eip += dtrace_instr_size((uint8_t *) frame->tf_eip);409return (1);410default:411/* Handle all other traps in the usual way. */412break;413}414}415416/* Handle the trap in the usual way. */417return (0);418}419420421