Path: blob/main/sys/cddl/contrib/opensolaris/uts/intel/dtrace/fasttrap_isa.c
48546 views
/*1* CDDL HEADER START2*3* The contents of this file are subject to the terms of the4* Common Development and Distribution License (the "License").5* You may not use this file except in compliance with the License.6*7* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE8* or http://www.opensolaris.org/os/licensing.9* See the License for the specific language governing permissions10* and limitations under the License.11*12* When distributing Covered Code, include this CDDL HEADER in each13* file and include the License file at usr/src/OPENSOLARIS.LICENSE.14* If applicable, add the following below this CDDL HEADER, with the15* fields enclosed by brackets "[]" replaced with your own identifying16* information: Portions Copyright [yyyy] [name of copyright owner]17*18* CDDL HEADER END19*20* Portions Copyright 2010 The FreeBSD Foundation21*/2223/*24* Copyright 2008 Sun Microsystems, Inc. All rights reserved.25* Use is subject to license terms.26*/2728#include <sys/fasttrap_isa.h>29#include <sys/fasttrap_impl.h>30#include <sys/dtrace.h>31#include <sys/dtrace_impl.h>32#include <sys/cmn_err.h>33#include <sys/types.h>34#include <sys/dtrace_bsd.h>35#include <sys/proc.h>36#include <sys/reg.h>37#include <sys/rmlock.h>38#include <cddl/dev/dtrace/dtrace_cddl.h>39#include <cddl/dev/dtrace/x86/regset.h>40#include <machine/segments.h>41#include <machine/pcb.h>42#include <machine/trap.h>43#include <sys/sysmacros.h>44#include <sys/ptrace.h>4546#ifdef __i386__47#define r_rax r_eax48#define r_rbx r_ebx49#define r_rip r_eip50#define r_rflags r_eflags51#define r_rsp r_esp52#define r_rbp r_ebp53#endif5455/*56* Lossless User-Land Tracing on x8657* ---------------------------------58*59* The execution of most instructions is not dependent on the address; for60* these instructions it is sufficient to copy them into the user process's61* address space and execute them. To effectively single-step an instruction62* in user-land, we copy out the following sequence of instructions to scratch63* space in the user thread's ulwp_t structure.64*65* We then set the program counter (%eip or %rip) to point to this scratch66* space. Once execution resumes, the original instruction is executed and67* then control flow is redirected to what was originally the subsequent68* instruction. If the kernel attemps to deliver a signal while single-69* stepping, the signal is deferred and the program counter is moved into the70* second sequence of instructions. The second sequence ends in a trap into71* the kernel where the deferred signal is then properly handled and delivered.72*73* For instructions whose execute is position dependent, we perform simple74* emulation. These instructions are limited to control transfer75* instructions in 32-bit mode, but in 64-bit mode there's the added wrinkle76* of %rip-relative addressing that means that almost any instruction can be77* position dependent. For all the details on how we emulate generic78* instructions included %rip-relative instructions, see the code in79* fasttrap_pid_probe() below where we handle instructions of type80* FASTTRAP_T_COMMON (under the header: Generic Instruction Tracing).81*/8283#define FASTTRAP_MODRM_MOD(modrm) (((modrm) >> 6) & 0x3)84#define FASTTRAP_MODRM_REG(modrm) (((modrm) >> 3) & 0x7)85#define FASTTRAP_MODRM_RM(modrm) ((modrm) & 0x7)86#define FASTTRAP_MODRM(mod, reg, rm) (((mod) << 6) | ((reg) << 3) | (rm))8788#define FASTTRAP_SIB_SCALE(sib) (((sib) >> 6) & 0x3)89#define FASTTRAP_SIB_INDEX(sib) (((sib) >> 3) & 0x7)90#define FASTTRAP_SIB_BASE(sib) ((sib) & 0x7)9192#define FASTTRAP_REX_W(rex) (((rex) >> 3) & 1)93#define FASTTRAP_REX_R(rex) (((rex) >> 2) & 1)94#define FASTTRAP_REX_X(rex) (((rex) >> 1) & 1)95#define FASTTRAP_REX_B(rex) ((rex) & 1)96#define FASTTRAP_REX(w, r, x, b) \97(0x40 | ((w) << 3) | ((r) << 2) | ((x) << 1) | (b))9899/*100* Single-byte op-codes.101*/102#define FASTTRAP_PUSHL_EBP 0x55103104#define FASTTRAP_JO 0x70105#define FASTTRAP_JNO 0x71106#define FASTTRAP_JB 0x72107#define FASTTRAP_JAE 0x73108#define FASTTRAP_JE 0x74109#define FASTTRAP_JNE 0x75110#define FASTTRAP_JBE 0x76111#define FASTTRAP_JA 0x77112#define FASTTRAP_JS 0x78113#define FASTTRAP_JNS 0x79114#define FASTTRAP_JP 0x7a115#define FASTTRAP_JNP 0x7b116#define FASTTRAP_JL 0x7c117#define FASTTRAP_JGE 0x7d118#define FASTTRAP_JLE 0x7e119#define FASTTRAP_JG 0x7f120121#define FASTTRAP_NOP 0x90122123#define FASTTRAP_MOV_EAX 0xb8124#define FASTTRAP_MOV_ECX 0xb9125126#define FASTTRAP_RET16 0xc2127#define FASTTRAP_RET 0xc3128129#define FASTTRAP_LOOPNZ 0xe0130#define FASTTRAP_LOOPZ 0xe1131#define FASTTRAP_LOOP 0xe2132#define FASTTRAP_JCXZ 0xe3133134#define FASTTRAP_CALL 0xe8135#define FASTTRAP_JMP32 0xe9136#define FASTTRAP_JMP8 0xeb137138#define FASTTRAP_INT3 0xcc139#define FASTTRAP_INT 0xcd140141#define FASTTRAP_2_BYTE_OP 0x0f142#define FASTTRAP_GROUP5_OP 0xff143144/*145* Two-byte op-codes (second byte only).146*/147#define FASTTRAP_0F_JO 0x80148#define FASTTRAP_0F_JNO 0x81149#define FASTTRAP_0F_JB 0x82150#define FASTTRAP_0F_JAE 0x83151#define FASTTRAP_0F_JE 0x84152#define FASTTRAP_0F_JNE 0x85153#define FASTTRAP_0F_JBE 0x86154#define FASTTRAP_0F_JA 0x87155#define FASTTRAP_0F_JS 0x88156#define FASTTRAP_0F_JNS 0x89157#define FASTTRAP_0F_JP 0x8a158#define FASTTRAP_0F_JNP 0x8b159#define FASTTRAP_0F_JL 0x8c160#define FASTTRAP_0F_JGE 0x8d161#define FASTTRAP_0F_JLE 0x8e162#define FASTTRAP_0F_JG 0x8f163164#define FASTTRAP_EFLAGS_OF 0x800165#define FASTTRAP_EFLAGS_DF 0x400166#define FASTTRAP_EFLAGS_SF 0x080167#define FASTTRAP_EFLAGS_ZF 0x040168#define FASTTRAP_EFLAGS_AF 0x010169#define FASTTRAP_EFLAGS_PF 0x004170#define FASTTRAP_EFLAGS_CF 0x001171172/*173* Instruction prefixes.174*/175#define FASTTRAP_PREFIX_OPERAND 0x66176#define FASTTRAP_PREFIX_ADDRESS 0x67177#define FASTTRAP_PREFIX_CS 0x2E178#define FASTTRAP_PREFIX_DS 0x3E179#define FASTTRAP_PREFIX_ES 0x26180#define FASTTRAP_PREFIX_FS 0x64181#define FASTTRAP_PREFIX_GS 0x65182#define FASTTRAP_PREFIX_SS 0x36183#define FASTTRAP_PREFIX_LOCK 0xF0184#define FASTTRAP_PREFIX_REP 0xF3185#define FASTTRAP_PREFIX_REPNE 0xF2186187#define FASTTRAP_NOREG 0xff188189/*190* Map between instruction register encodings and the kernel constants which191* correspond to indicies into struct regs.192*/193#ifdef __amd64194static const uint8_t regmap[16] = {195REG_RAX, REG_RCX, REG_RDX, REG_RBX, REG_RSP, REG_RBP, REG_RSI, REG_RDI,196REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, REG_R13, REG_R14, REG_R15,197};198#else199static const uint8_t regmap[8] = {200EAX, ECX, EDX, EBX, UESP, EBP, ESI, EDI201};202#endif203204static ulong_t fasttrap_getreg(struct reg *, uint_t);205206static uint64_t207fasttrap_anarg(struct reg *rp, int function_entry, int argno)208{209uint64_t value = 0;210int shift = function_entry ? 1 : 0;211212#ifdef __amd64213if (curproc->p_model == DATAMODEL_LP64) {214uintptr_t *stack;215216/*217* In 64-bit mode, the first six arguments are stored in218* registers.219*/220if (argno < 6)221switch (argno) {222case 0:223return (rp->r_rdi);224case 1:225return (rp->r_rsi);226case 2:227return (rp->r_rdx);228case 3:229return (rp->r_rcx);230case 4:231return (rp->r_r8);232case 5:233return (rp->r_r9);234}235236stack = (uintptr_t *)rp->r_rsp;237DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);238value = dtrace_fulword(&stack[argno - 6 + shift]);239DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR);240} else {241#endif242uint32_t *stack = (uint32_t *)rp->r_rsp;243DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);244value = dtrace_fuword32(&stack[argno + shift]);245DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR);246#ifdef __amd64247}248#endif249250return (value);251}252253/*ARGSUSED*/254int255fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, uintptr_t pc,256fasttrap_probe_type_t type)257{258uint8_t instr[FASTTRAP_MAX_INSTR_SIZE + 10];259size_t len = FASTTRAP_MAX_INSTR_SIZE;260size_t first = MIN(len, PAGESIZE - (pc & PAGEOFFSET));261uint_t start = 0;262int rmindex, size;263uint8_t seg, rex = 0;264265/*266* Read the instruction at the given address out of the process's267* address space. We don't have to worry about a debugger268* changing this instruction before we overwrite it with our trap269* instruction since P_PR_LOCK is set. Since instructions can span270* pages, we potentially read the instruction in two parts. If the271* second part fails, we just zero out that part of the instruction.272*/273if (uread(p, &instr[0], first, pc) != 0)274return (-1);275if (len > first &&276uread(p, &instr[first], len - first, pc + first) != 0) {277bzero(&instr[first], len - first);278len = first;279}280281/*282* If the disassembly fails, then we have a malformed instruction.283*/284if ((size = dtrace_instr_size_isa(instr, p->p_model, &rmindex)) <= 0)285return (-1);286287/*288* Make sure the disassembler isn't completely broken.289*/290ASSERT(-1 <= rmindex && rmindex < size);291292/*293* If the computed size is greater than the number of bytes read,294* then it was a malformed instruction possibly because it fell on a295* page boundary and the subsequent page was missing or because of296* some malicious user.297*/298if (size > len)299return (-1);300301tp->ftt_size = (uint8_t)size;302tp->ftt_segment = FASTTRAP_SEG_NONE;303304/*305* Find the start of the instruction's opcode by processing any306* legacy prefixes.307*/308for (;;) {309seg = 0;310switch (instr[start]) {311case FASTTRAP_PREFIX_SS:312seg++;313/*FALLTHRU*/314case FASTTRAP_PREFIX_GS:315seg++;316/*FALLTHRU*/317case FASTTRAP_PREFIX_FS:318seg++;319/*FALLTHRU*/320case FASTTRAP_PREFIX_ES:321seg++;322/*FALLTHRU*/323case FASTTRAP_PREFIX_DS:324seg++;325/*FALLTHRU*/326case FASTTRAP_PREFIX_CS:327seg++;328/*FALLTHRU*/329case FASTTRAP_PREFIX_OPERAND:330case FASTTRAP_PREFIX_ADDRESS:331case FASTTRAP_PREFIX_LOCK:332case FASTTRAP_PREFIX_REP:333case FASTTRAP_PREFIX_REPNE:334if (seg != 0) {335/*336* It's illegal for an instruction to specify337* two segment prefixes -- give up on this338* illegal instruction.339*/340if (tp->ftt_segment != FASTTRAP_SEG_NONE)341return (-1);342343tp->ftt_segment = seg;344}345start++;346continue;347}348break;349}350351#ifdef __amd64352/*353* Identify the REX prefix on 64-bit processes.354*/355if (p->p_model == DATAMODEL_LP64 && (instr[start] & 0xf0) == 0x40)356rex = instr[start++];357#endif358359/*360* Now that we're pretty sure that the instruction is okay, copy the361* valid part to the tracepoint.362*/363bcopy(instr, tp->ftt_instr, FASTTRAP_MAX_INSTR_SIZE);364365tp->ftt_type = FASTTRAP_T_COMMON;366if (instr[start] == FASTTRAP_2_BYTE_OP) {367switch (instr[start + 1]) {368case FASTTRAP_0F_JO:369case FASTTRAP_0F_JNO:370case FASTTRAP_0F_JB:371case FASTTRAP_0F_JAE:372case FASTTRAP_0F_JE:373case FASTTRAP_0F_JNE:374case FASTTRAP_0F_JBE:375case FASTTRAP_0F_JA:376case FASTTRAP_0F_JS:377case FASTTRAP_0F_JNS:378case FASTTRAP_0F_JP:379case FASTTRAP_0F_JNP:380case FASTTRAP_0F_JL:381case FASTTRAP_0F_JGE:382case FASTTRAP_0F_JLE:383case FASTTRAP_0F_JG:384tp->ftt_type = FASTTRAP_T_JCC;385tp->ftt_code = (instr[start + 1] & 0x0f) | FASTTRAP_JO;386tp->ftt_dest = pc + tp->ftt_size +387/* LINTED - alignment */388*(int32_t *)&instr[start + 2];389break;390}391} else if (instr[start] == FASTTRAP_GROUP5_OP) {392uint_t mod = FASTTRAP_MODRM_MOD(instr[start + 1]);393uint_t reg = FASTTRAP_MODRM_REG(instr[start + 1]);394uint_t rm = FASTTRAP_MODRM_RM(instr[start + 1]);395396if (reg == 2 || reg == 4) {397uint_t i, sz;398399if (reg == 2)400tp->ftt_type = FASTTRAP_T_CALL;401else402tp->ftt_type = FASTTRAP_T_JMP;403404if (mod == 3)405tp->ftt_code = 2;406else407tp->ftt_code = 1;408409ASSERT(p->p_model == DATAMODEL_LP64 || rex == 0);410411/*412* See AMD x86-64 Architecture Programmer's Manual413* Volume 3, Section 1.2.7, Table 1-12, and414* Appendix A.3.1, Table A-15.415*/416if (mod != 3 && rm == 4) {417uint8_t sib = instr[start + 2];418uint_t index = FASTTRAP_SIB_INDEX(sib);419uint_t base = FASTTRAP_SIB_BASE(sib);420421tp->ftt_scale = FASTTRAP_SIB_SCALE(sib);422423tp->ftt_index = (index == 4) ?424FASTTRAP_NOREG :425regmap[index | (FASTTRAP_REX_X(rex) << 3)];426tp->ftt_base = (mod == 0 && base == 5) ?427FASTTRAP_NOREG :428regmap[base | (FASTTRAP_REX_B(rex) << 3)];429430i = 3;431sz = mod == 1 ? 1 : 4;432} else {433/*434* In 64-bit mode, mod == 0 and r/m == 5435* denotes %rip-relative addressing; in 32-bit436* mode, the base register isn't used. In both437* modes, there is a 32-bit operand.438*/439if (mod == 0 && rm == 5) {440#ifdef __amd64441if (p->p_model == DATAMODEL_LP64)442tp->ftt_base = REG_RIP;443else444#endif445tp->ftt_base = FASTTRAP_NOREG;446sz = 4;447} else {448uint8_t base = rm |449(FASTTRAP_REX_B(rex) << 3);450451tp->ftt_base = regmap[base];452sz = mod == 1 ? 1 : mod == 2 ? 4 : 0;453}454tp->ftt_index = FASTTRAP_NOREG;455i = 2;456}457458if (sz == 1) {459tp->ftt_dest = *(int8_t *)&instr[start + i];460} else if (sz == 4) {461/* LINTED - alignment */462tp->ftt_dest = *(int32_t *)&instr[start + i];463} else {464tp->ftt_dest = 0;465}466}467} else {468switch (instr[start]) {469case FASTTRAP_RET:470tp->ftt_type = FASTTRAP_T_RET;471break;472473case FASTTRAP_RET16:474tp->ftt_type = FASTTRAP_T_RET16;475/* LINTED - alignment */476tp->ftt_dest = *(uint16_t *)&instr[start + 1];477break;478479case FASTTRAP_JO:480case FASTTRAP_JNO:481case FASTTRAP_JB:482case FASTTRAP_JAE:483case FASTTRAP_JE:484case FASTTRAP_JNE:485case FASTTRAP_JBE:486case FASTTRAP_JA:487case FASTTRAP_JS:488case FASTTRAP_JNS:489case FASTTRAP_JP:490case FASTTRAP_JNP:491case FASTTRAP_JL:492case FASTTRAP_JGE:493case FASTTRAP_JLE:494case FASTTRAP_JG:495tp->ftt_type = FASTTRAP_T_JCC;496tp->ftt_code = instr[start];497tp->ftt_dest = pc + tp->ftt_size +498(int8_t)instr[start + 1];499break;500501case FASTTRAP_LOOPNZ:502case FASTTRAP_LOOPZ:503case FASTTRAP_LOOP:504tp->ftt_type = FASTTRAP_T_LOOP;505tp->ftt_code = instr[start];506tp->ftt_dest = pc + tp->ftt_size +507(int8_t)instr[start + 1];508break;509510case FASTTRAP_JCXZ:511tp->ftt_type = FASTTRAP_T_JCXZ;512tp->ftt_dest = pc + tp->ftt_size +513(int8_t)instr[start + 1];514break;515516case FASTTRAP_CALL:517tp->ftt_type = FASTTRAP_T_CALL;518tp->ftt_dest = pc + tp->ftt_size +519/* LINTED - alignment */520*(int32_t *)&instr[start + 1];521tp->ftt_code = 0;522break;523524case FASTTRAP_JMP32:525tp->ftt_type = FASTTRAP_T_JMP;526tp->ftt_dest = pc + tp->ftt_size +527/* LINTED - alignment */528*(int32_t *)&instr[start + 1];529break;530case FASTTRAP_JMP8:531tp->ftt_type = FASTTRAP_T_JMP;532tp->ftt_dest = pc + tp->ftt_size +533(int8_t)instr[start + 1];534break;535536case FASTTRAP_PUSHL_EBP:537if (start == 0)538tp->ftt_type = FASTTRAP_T_PUSHL_EBP;539break;540541case FASTTRAP_NOP:542#ifdef __amd64543ASSERT(p->p_model == DATAMODEL_LP64 || rex == 0);544545/*546* On amd64 we have to be careful not to confuse a nop547* (actually xchgl %eax, %eax) with an instruction using548* the same opcode, but that does something different549* (e.g. xchgl %r8d, %eax or xcghq %r8, %rax).550*/551if (FASTTRAP_REX_B(rex) == 0)552#endif553tp->ftt_type = FASTTRAP_T_NOP;554break;555556case FASTTRAP_INT3:557/*558* The pid provider shares the int3 trap with debugger559* breakpoints so we can't instrument them.560*/561ASSERT(instr[start] == FASTTRAP_INSTR);562return (-1);563564case FASTTRAP_INT:565/*566* Interrupts seem like they could be traced with567* no negative implications, but it's possible that568* a thread could be redirected by the trap handling569* code which would eventually return to the570* instruction after the interrupt. If the interrupt571* were in our scratch space, the subsequent572* instruction might be overwritten before we return.573* Accordingly we refuse to instrument any interrupt.574*/575return (-1);576}577}578579#ifdef __amd64580if (p->p_model == DATAMODEL_LP64 && tp->ftt_type == FASTTRAP_T_COMMON) {581/*582* If the process is 64-bit and the instruction type is still583* FASTTRAP_T_COMMON -- meaning we're going to copy it out an584* execute it -- we need to watch for %rip-relative585* addressing mode. See the portion of fasttrap_pid_probe()586* below where we handle tracepoints with type587* FASTTRAP_T_COMMON for how we emulate instructions that588* employ %rip-relative addressing.589*/590if (rmindex != -1) {591uint_t mod = FASTTRAP_MODRM_MOD(instr[rmindex]);592uint_t reg = FASTTRAP_MODRM_REG(instr[rmindex]);593uint_t rm = FASTTRAP_MODRM_RM(instr[rmindex]);594595ASSERT(rmindex > start);596597if (mod == 0 && rm == 5) {598/*599* We need to be sure to avoid other600* registers used by this instruction. While601* the reg field may determine the op code602* rather than denoting a register, assuming603* that it denotes a register is always safe.604* We leave the REX field intact and use605* whatever value's there for simplicity.606*/607if (reg != 0) {608tp->ftt_ripmode = FASTTRAP_RIP_1 |609(FASTTRAP_RIP_X *610FASTTRAP_REX_B(rex));611rm = 0;612} else {613tp->ftt_ripmode = FASTTRAP_RIP_2 |614(FASTTRAP_RIP_X *615FASTTRAP_REX_B(rex));616rm = 1;617}618619tp->ftt_modrm = tp->ftt_instr[rmindex];620tp->ftt_instr[rmindex] =621FASTTRAP_MODRM(2, reg, rm);622}623}624}625#endif626627return (0);628}629630int631fasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp)632{633fasttrap_instr_t instr = FASTTRAP_INSTR;634635if (uwrite(p, &instr, 1, tp->ftt_pc) != 0)636return (-1);637638return (0);639}640641int642fasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp)643{644uint8_t instr;645646/*647* Distinguish between read or write failures and a changed648* instruction.649*/650if (uread(p, &instr, 1, tp->ftt_pc) != 0)651return (0);652if (instr != FASTTRAP_INSTR)653return (0);654if (uwrite(p, &tp->ftt_instr[0], 1, tp->ftt_pc) != 0)655return (-1);656657return (0);658}659660#ifdef __amd64661static uintptr_t662fasttrap_fulword_noerr(const void *uaddr)663{664uintptr_t ret;665666if ((ret = fasttrap_fulword(uaddr)) != -1)667return (ret);668669return (0);670}671#endif672673static uint32_t674fasttrap_fuword32_noerr(const void *uaddr)675{676uint32_t ret;677678if ((ret = fasttrap_fuword32(uaddr)) != -1)679return (ret);680681return (0);682}683684static void685fasttrap_return_common(struct reg *rp, uintptr_t pc, pid_t pid,686uintptr_t new_pc)687{688fasttrap_tracepoint_t *tp;689fasttrap_bucket_t *bucket;690fasttrap_id_t *id;691struct rm_priotracker tracker;692693rm_rlock(&fasttrap_tp_lock, &tracker);694bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];695696for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {697if (pid == tp->ftt_pid && pc == tp->ftt_pc &&698tp->ftt_proc->ftpc_acount != 0)699break;700}701702/*703* Don't sweat it if we can't find the tracepoint again; unlike704* when we're in fasttrap_pid_probe(), finding the tracepoint here705* is not essential to the correct execution of the process.706*/707if (tp == NULL) {708rm_runlock(&fasttrap_tp_lock, &tracker);709return;710}711712for (id = tp->ftt_retids; id != NULL; id = id->fti_next) {713/*714* If there's a branch that could act as a return site, we715* need to trace it, and check here if the program counter is716* external to the function.717*/718if (tp->ftt_type != FASTTRAP_T_RET &&719tp->ftt_type != FASTTRAP_T_RET16 &&720new_pc - id->fti_probe->ftp_faddr <721id->fti_probe->ftp_fsize)722continue;723724dtrace_probe(id->fti_probe->ftp_id,725pc - id->fti_probe->ftp_faddr,726rp->r_rax, rp->r_rbx, 0, 0);727}728729rm_runlock(&fasttrap_tp_lock, &tracker);730}731732static void733fasttrap_sigsegv(proc_t *p, kthread_t *t, uintptr_t addr)734{735ksiginfo_t ksi;736737ksiginfo_init(&ksi);738ksi.ksi_signo = SIGSEGV;739ksi.ksi_code = SEGV_MAPERR;740ksi.ksi_addr = (caddr_t)addr;741PROC_LOCK(p);742(void)tdksignal(t, SIGSEGV, &ksi);743PROC_UNLOCK(p);744}745746#ifdef __amd64747static void748fasttrap_usdt_args64(fasttrap_probe_t *probe, struct reg *rp, int argc,749uintptr_t *argv)750{751int i, x, cap = MIN(argc, probe->ftp_nargs);752uintptr_t *stack = (uintptr_t *)rp->r_rsp;753754for (i = 0; i < cap; i++) {755x = probe->ftp_argmap[i];756757if (x < 6)758argv[i] = (&rp->r_rdi)[x];759else760argv[i] = fasttrap_fulword_noerr(&stack[x]);761}762763for (; i < argc; i++) {764argv[i] = 0;765}766}767#endif768769static void770fasttrap_usdt_args32(fasttrap_probe_t *probe, struct reg *rp, int argc,771uint32_t *argv)772{773int i, x, cap = MIN(argc, probe->ftp_nargs);774uint32_t *stack = (uint32_t *)rp->r_rsp;775776for (i = 0; i < cap; i++) {777x = probe->ftp_argmap[i];778779argv[i] = fasttrap_fuword32_noerr(&stack[x]);780}781782for (; i < argc; i++) {783argv[i] = 0;784}785}786787static int788fasttrap_do_seg(fasttrap_tracepoint_t *tp, struct reg *rp, uintptr_t *addr)789{790proc_t *p = curproc;791#ifdef __i386__792struct segment_descriptor *desc;793#else794struct user_segment_descriptor *desc;795#endif796uint16_t sel = 0, ndx, type;797uintptr_t limit;798799switch (tp->ftt_segment) {800case FASTTRAP_SEG_CS:801sel = rp->r_cs;802break;803case FASTTRAP_SEG_DS:804sel = rp->r_ds;805break;806case FASTTRAP_SEG_ES:807sel = rp->r_es;808break;809case FASTTRAP_SEG_FS:810sel = rp->r_fs;811break;812case FASTTRAP_SEG_GS:813sel = rp->r_gs;814break;815case FASTTRAP_SEG_SS:816sel = rp->r_ss;817break;818}819820/*821* Make sure the given segment register specifies a user priority822* selector rather than a kernel selector.823*/824if (ISPL(sel) != SEL_UPL)825return (-1);826827ndx = IDXSEL(sel);828829/*830* Check the bounds and grab the descriptor out of the specified831* descriptor table.832*/833if (ISLDT(sel)) {834#ifdef __i386__835if (ndx > p->p_md.md_ldt->ldt_len)836return (-1);837838desc = (struct segment_descriptor *)839p->p_md.md_ldt[ndx].ldt_base;840#else841if (ndx > max_ldt_segment)842return (-1);843844desc = (struct user_segment_descriptor *)845p->p_md.md_ldt[ndx].ldt_base;846#endif847848} else {849if (ndx >= NGDT)850return (-1);851852#ifdef __i386__853desc = &gdt[ndx].sd;854#else855desc = PCPU_PTR(gdt)[ndx];856#endif857}858859/*860* The descriptor must have user privilege level and it must be861* present in memory.862*/863if (desc->sd_dpl != SEL_UPL || desc->sd_p != 1)864return (-1);865866type = desc->sd_type;867868/*869* If the S bit in the type field is not set, this descriptor can870* only be used in system context.871*/872if ((type & 0x10) != 0x10)873return (-1);874875limit = USD_GETLIMIT(desc) * (desc->sd_gran ? PAGESIZE : 1);876877if (tp->ftt_segment == FASTTRAP_SEG_CS) {878/*879* The code/data bit and readable bit must both be set.880*/881if ((type & 0xa) != 0xa)882return (-1);883884if (*addr > limit)885return (-1);886} else {887/*888* The code/data bit must be clear.889*/890if ((type & 0x8) != 0)891return (-1);892893/*894* If the expand-down bit is clear, we just check the limit as895* it would naturally be applied. Otherwise, we need to check896* that the address is the range [limit + 1 .. 0xffff] or897* [limit + 1 ... 0xffffffff] depending on if the default898* operand size bit is set.899*/900if ((type & 0x4) == 0) {901if (*addr > limit)902return (-1);903} else if (desc->sd_def32) {904if (*addr < limit + 1 || 0xffff < *addr)905return (-1);906} else {907if (*addr < limit + 1 || 0xffffffff < *addr)908return (-1);909}910}911912*addr += USD_GETBASE(desc);913914return (0);915}916917int918fasttrap_pid_probe(struct trapframe *tf)919{920struct reg reg, *rp;921proc_t *p = curproc, *pp;922struct rm_priotracker tracker;923uint64_t gen;924uintptr_t pc;925uintptr_t new_pc = 0;926fasttrap_bucket_t *bucket;927fasttrap_tracepoint_t *tp, tp_local;928pid_t pid;929dtrace_icookie_t cookie;930uint_t is_enabled = 0;931932fill_frame_regs(tf, ®);933rp = ®934935pc = rp->r_rip - 1;936937/*938* It's possible that a user (in a veritable orgy of bad planning)939* could redirect this thread's flow of control before it reached the940* return probe fasttrap. In this case we need to kill the process941* since it's in a unrecoverable state.942*/943if (curthread->t_dtrace_step) {944ASSERT(curthread->t_dtrace_on);945fasttrap_sigtrap(p, curthread, pc);946return (0);947}948949/*950* Clear all user tracing flags.951*/952curthread->t_dtrace_ft = 0;953curthread->t_dtrace_pc = 0;954curthread->t_dtrace_npc = 0;955curthread->t_dtrace_scrpc = 0;956curthread->t_dtrace_astpc = 0;957#ifdef __amd64958curthread->t_dtrace_regv = 0;959#endif960961/*962* Treat a child created by a call to vfork(2) as if it were its963* parent. We know that there's only one thread of control in such a964* process: this one.965*/966pp = p;967sx_slock(&proctree_lock);968while (pp->p_vmspace == pp->p_pptr->p_vmspace)969pp = pp->p_pptr;970pid = pp->p_pid;971if (pp != p) {972PROC_LOCK(pp);973if ((pp->p_flag & P_WEXIT) != 0) {974/*975* This can happen if the child was created with976* rfork(2). Userspace tracing cannot work reliably in977* such a scenario, but we can at least try.978*/979PROC_UNLOCK(pp);980sx_sunlock(&proctree_lock);981return (-1);982}983_PHOLD(pp);984PROC_UNLOCK(pp);985}986sx_sunlock(&proctree_lock);987988rm_rlock(&fasttrap_tp_lock, &tracker);989990bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];991992/*993* Lookup the tracepoint that the process just hit.994*/995for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {996if (pid == tp->ftt_pid && pc == tp->ftt_pc &&997tp->ftt_proc->ftpc_acount != 0)998break;999}10001001/*1002* If we couldn't find a matching tracepoint, either a tracepoint has1003* been inserted without using the pid<pid> ioctl interface (see1004* fasttrap_ioctl), or somehow we have mislaid this tracepoint.1005*/1006if (tp == NULL) {1007rm_runlock(&fasttrap_tp_lock, &tracker);1008gen = atomic_load_acq_64(&pp->p_fasttrap_tp_gen);1009if (pp != p)1010PRELE(pp);1011if (curthread->t_fasttrap_tp_gen != gen) {1012/*1013* At least one tracepoint associated with this PID has1014* been removed from the table since #BP was raised.1015* Speculate that we hit a tracepoint that has since1016* been removed, and retry the instruction.1017*/1018curthread->t_fasttrap_tp_gen = gen;1019#ifdef __amd641020tf->tf_rip = pc;1021#else1022tf->tf_eip = pc;1023#endif1024return (0);1025}1026return (-1);1027}1028if (pp != p)1029PRELE(pp);10301031/*1032* Set the program counter to the address of the traced instruction1033* so that it looks right in ustack() output.1034*/1035rp->r_rip = pc;10361037if (tp->ftt_ids != NULL) {1038fasttrap_id_t *id;10391040#ifdef __amd641041if (p->p_model == DATAMODEL_LP64) {1042for (id = tp->ftt_ids; id != NULL; id = id->fti_next) {1043fasttrap_probe_t *probe = id->fti_probe;10441045if (id->fti_ptype == DTFTP_ENTRY) {1046/*1047* We note that this was an entry1048* probe to help ustack() find the1049* first caller.1050*/1051cookie = dtrace_interrupt_disable();1052DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY);1053dtrace_probe(probe->ftp_id, rp->r_rdi,1054rp->r_rsi, rp->r_rdx, rp->r_rcx,1055rp->r_r8);1056DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY);1057dtrace_interrupt_enable(cookie);1058} else if (id->fti_ptype == DTFTP_IS_ENABLED) {1059/*1060* Note that in this case, we don't1061* call dtrace_probe() since it's only1062* an artificial probe meant to change1063* the flow of control so that it1064* encounters the true probe.1065*/1066is_enabled = 1;1067} else if (probe->ftp_argmap == NULL) {1068dtrace_probe(probe->ftp_id, rp->r_rdi,1069rp->r_rsi, rp->r_rdx, rp->r_rcx,1070rp->r_r8);1071} else {1072uintptr_t t[5];10731074fasttrap_usdt_args64(probe, rp,1075sizeof (t) / sizeof (t[0]), t);10761077dtrace_probe(probe->ftp_id, t[0], t[1],1078t[2], t[3], t[4]);1079}1080}1081} else {1082#endif1083uintptr_t s0, s1, s2, s3, s4, s5;1084uint32_t *stack = (uint32_t *)rp->r_rsp;10851086/*1087* In 32-bit mode, all arguments are passed on the1088* stack. If this is a function entry probe, we need1089* to skip the first entry on the stack as it1090* represents the return address rather than a1091* parameter to the function.1092*/1093s0 = fasttrap_fuword32_noerr(&stack[0]);1094s1 = fasttrap_fuword32_noerr(&stack[1]);1095s2 = fasttrap_fuword32_noerr(&stack[2]);1096s3 = fasttrap_fuword32_noerr(&stack[3]);1097s4 = fasttrap_fuword32_noerr(&stack[4]);1098s5 = fasttrap_fuword32_noerr(&stack[5]);10991100for (id = tp->ftt_ids; id != NULL; id = id->fti_next) {1101fasttrap_probe_t *probe = id->fti_probe;11021103if (id->fti_ptype == DTFTP_ENTRY) {1104/*1105* We note that this was an entry1106* probe to help ustack() find the1107* first caller.1108*/1109cookie = dtrace_interrupt_disable();1110DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY);1111dtrace_probe(probe->ftp_id, s1, s2,1112s3, s4, s5);1113DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY);1114dtrace_interrupt_enable(cookie);1115} else if (id->fti_ptype == DTFTP_IS_ENABLED) {1116/*1117* Note that in this case, we don't1118* call dtrace_probe() since it's only1119* an artificial probe meant to change1120* the flow of control so that it1121* encounters the true probe.1122*/1123is_enabled = 1;1124} else if (probe->ftp_argmap == NULL) {1125dtrace_probe(probe->ftp_id, s0, s1,1126s2, s3, s4);1127} else {1128uint32_t t[5];11291130fasttrap_usdt_args32(probe, rp,1131sizeof (t) / sizeof (t[0]), t);11321133dtrace_probe(probe->ftp_id, t[0], t[1],1134t[2], t[3], t[4]);1135}1136}1137#ifdef __amd641138}1139#endif1140}11411142/*1143* We're about to do a bunch of work so we cache a local copy of1144* the tracepoint to emulate the instruction, and then find the1145* tracepoint again later if we need to light up any return probes.1146*/1147tp_local = *tp;1148rm_runlock(&fasttrap_tp_lock, &tracker);1149tp = &tp_local;11501151/*1152* Set the program counter to appear as though the traced instruction1153* had completely executed. This ensures that fasttrap_getreg() will1154* report the expected value for REG_RIP.1155*/1156rp->r_rip = pc + tp->ftt_size;11571158/*1159* If there's an is-enabled probe connected to this tracepoint it1160* means that there was a 'xorl %eax, %eax' or 'xorq %rax, %rax'1161* instruction that was placed there by DTrace when the binary was1162* linked. As this probe is, in fact, enabled, we need to stuff 11163* into %eax or %rax. Accordingly, we can bypass all the instruction1164* emulation logic since we know the inevitable result. It's possible1165* that a user could construct a scenario where the 'is-enabled'1166* probe was on some other instruction, but that would be a rather1167* exotic way to shoot oneself in the foot.1168*/1169if (is_enabled) {1170rp->r_rax = 1;1171new_pc = rp->r_rip;1172goto done;1173}11741175/*1176* We emulate certain types of instructions to ensure correctness1177* (in the case of position dependent instructions) or optimize1178* common cases. The rest we have the thread execute back in user-1179* land.1180*/1181switch (tp->ftt_type) {1182case FASTTRAP_T_RET:1183case FASTTRAP_T_RET16:1184{1185uintptr_t dst = 0;1186uintptr_t addr = 0;1187int ret = 0;11881189/*1190* We have to emulate _every_ facet of the behavior of a ret1191* instruction including what happens if the load from %esp1192* fails; in that case, we send a SIGSEGV.1193*/1194#ifdef __amd641195if (p->p_model == DATAMODEL_NATIVE) {1196ret = dst = fasttrap_fulword((void *)rp->r_rsp);1197addr = rp->r_rsp + sizeof (uintptr_t);1198} else {1199#endif1200uint32_t dst32;1201ret = dst32 = fasttrap_fuword32((void *)rp->r_rsp);1202dst = dst32;1203addr = rp->r_rsp + sizeof (uint32_t);1204#ifdef __amd641205}1206#endif12071208if (ret == -1) {1209fasttrap_sigsegv(p, curthread, rp->r_rsp);1210new_pc = pc;1211break;1212}12131214if (tp->ftt_type == FASTTRAP_T_RET16)1215addr += tp->ftt_dest;12161217rp->r_rsp = addr;1218new_pc = dst;1219break;1220}12211222case FASTTRAP_T_JCC:1223{1224uint_t taken = 0;12251226switch (tp->ftt_code) {1227case FASTTRAP_JO:1228taken = (rp->r_rflags & FASTTRAP_EFLAGS_OF) != 0;1229break;1230case FASTTRAP_JNO:1231taken = (rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0;1232break;1233case FASTTRAP_JB:1234taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) != 0;1235break;1236case FASTTRAP_JAE:1237taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) == 0;1238break;1239case FASTTRAP_JE:1240taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0;1241break;1242case FASTTRAP_JNE:1243taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0;1244break;1245case FASTTRAP_JBE:1246taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) != 0 ||1247(rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0;1248break;1249case FASTTRAP_JA:1250taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) == 0 &&1251(rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0;1252break;1253case FASTTRAP_JS:1254taken = (rp->r_rflags & FASTTRAP_EFLAGS_SF) != 0;1255break;1256case FASTTRAP_JNS:1257taken = (rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0;1258break;1259case FASTTRAP_JP:1260taken = (rp->r_rflags & FASTTRAP_EFLAGS_PF) != 0;1261break;1262case FASTTRAP_JNP:1263taken = (rp->r_rflags & FASTTRAP_EFLAGS_PF) == 0;1264break;1265case FASTTRAP_JL:1266taken = ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) !=1267((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0);1268break;1269case FASTTRAP_JGE:1270taken = ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) ==1271((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0);1272break;1273case FASTTRAP_JLE:1274taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0 ||1275((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) !=1276((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0);1277break;1278case FASTTRAP_JG:1279taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0 &&1280((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) ==1281((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0);1282break;12831284}12851286if (taken)1287new_pc = tp->ftt_dest;1288else1289new_pc = pc + tp->ftt_size;1290break;1291}12921293case FASTTRAP_T_LOOP:1294{1295uint_t taken = 0;1296#ifdef __amd641297greg_t cx = rp->r_rcx--;1298#else1299greg_t cx = rp->r_ecx--;1300#endif13011302switch (tp->ftt_code) {1303case FASTTRAP_LOOPNZ:1304taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0 &&1305cx != 0;1306break;1307case FASTTRAP_LOOPZ:1308taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0 &&1309cx != 0;1310break;1311case FASTTRAP_LOOP:1312taken = (cx != 0);1313break;1314}13151316if (taken)1317new_pc = tp->ftt_dest;1318else1319new_pc = pc + tp->ftt_size;1320break;1321}13221323case FASTTRAP_T_JCXZ:1324{1325#ifdef __amd641326greg_t cx = rp->r_rcx;1327#else1328greg_t cx = rp->r_ecx;1329#endif13301331if (cx == 0)1332new_pc = tp->ftt_dest;1333else1334new_pc = pc + tp->ftt_size;1335break;1336}13371338case FASTTRAP_T_PUSHL_EBP:1339{1340int ret = 0;13411342#ifdef __amd641343if (p->p_model == DATAMODEL_NATIVE) {1344rp->r_rsp -= sizeof (uintptr_t);1345ret = fasttrap_sulword((void *)rp->r_rsp, rp->r_rbp);1346} else {1347#endif1348rp->r_rsp -= sizeof (uint32_t);1349ret = fasttrap_suword32((void *)rp->r_rsp, rp->r_rbp);1350#ifdef __amd641351}1352#endif13531354if (ret == -1) {1355fasttrap_sigsegv(p, curthread, rp->r_rsp);1356new_pc = pc;1357break;1358}13591360new_pc = pc + tp->ftt_size;1361break;1362}13631364case FASTTRAP_T_NOP:1365new_pc = pc + tp->ftt_size;1366break;13671368case FASTTRAP_T_JMP:1369case FASTTRAP_T_CALL:1370if (tp->ftt_code == 0) {1371new_pc = tp->ftt_dest;1372} else {1373uintptr_t value, addr = tp->ftt_dest;13741375if (tp->ftt_base != FASTTRAP_NOREG)1376addr += fasttrap_getreg(rp, tp->ftt_base);1377if (tp->ftt_index != FASTTRAP_NOREG)1378addr += fasttrap_getreg(rp, tp->ftt_index) <<1379tp->ftt_scale;13801381if (tp->ftt_code == 1) {1382/*1383* If there's a segment prefix for this1384* instruction, we'll need to check permissions1385* and bounds on the given selector, and adjust1386* the address accordingly.1387*/1388if (tp->ftt_segment != FASTTRAP_SEG_NONE &&1389fasttrap_do_seg(tp, rp, &addr) != 0) {1390fasttrap_sigsegv(p, curthread, addr);1391new_pc = pc;1392break;1393}13941395#ifdef __amd641396if (p->p_model == DATAMODEL_NATIVE) {1397#endif1398if ((value = fasttrap_fulword((void *)addr))1399== -1) {1400fasttrap_sigsegv(p, curthread,1401addr);1402new_pc = pc;1403break;1404}1405new_pc = value;1406#ifdef __amd641407} else {1408uint32_t value32;1409addr = (uintptr_t)(uint32_t)addr;1410if ((value32 = fasttrap_fuword32((void *)addr))1411== -1) {1412fasttrap_sigsegv(p, curthread,1413addr);1414new_pc = pc;1415break;1416}1417new_pc = value32;1418}1419#endif1420} else {1421new_pc = addr;1422}1423}14241425/*1426* If this is a call instruction, we need to push the return1427* address onto the stack. If this fails, we send the process1428* a SIGSEGV and reset the pc to emulate what would happen if1429* this instruction weren't traced.1430*/1431if (tp->ftt_type == FASTTRAP_T_CALL) {1432int ret = 0;1433uintptr_t addr = 0, pcps;1434#ifdef __amd641435if (p->p_model == DATAMODEL_NATIVE) {1436addr = rp->r_rsp - sizeof (uintptr_t);1437pcps = pc + tp->ftt_size;1438ret = fasttrap_sulword((void *)addr, pcps);1439} else {1440#endif1441addr = rp->r_rsp - sizeof (uint32_t);1442pcps = (uint32_t)(pc + tp->ftt_size);1443ret = fasttrap_suword32((void *)addr, pcps);1444#ifdef __amd641445}1446#endif14471448if (ret == -1) {1449fasttrap_sigsegv(p, curthread, addr);1450new_pc = pc;1451break;1452}14531454rp->r_rsp = addr;1455}14561457break;14581459case FASTTRAP_T_COMMON:1460{1461uintptr_t addr;1462#if defined(__amd64)1463uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 22];1464#else1465uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 7];1466#endif1467uint_t i = 0;1468fasttrap_scrspace_t *scrspace;1469scrspace = fasttrap_scraddr(curthread, tp->ftt_proc);1470if (scrspace == NULL) {1471/*1472* We failed to allocate scratch space for this thread.1473* Try to write the original instruction back out and1474* reset the pc.1475*/1476if (fasttrap_copyout(tp->ftt_instr, (void *)pc,1477tp->ftt_size))1478fasttrap_sigtrap(p, curthread, pc);1479new_pc = pc;1480break;1481}1482addr = scrspace->ftss_addr;14831484/*1485* Generic Instruction Tracing1486* ---------------------------1487*1488* This is the layout of the scratch space in the user-land1489* thread structure for our generated instructions.1490*1491* 32-bit mode bytes1492* ------------------------ -----1493* a: <original instruction> <= 151494* jmp <pc + tp->ftt_size> 51495* b: <original instruction> <= 151496* int T_DTRACE_RET 21497* -----1498* <= 371499*1500* 64-bit mode bytes1501* ------------------------ -----1502* a: <original instruction> <= 151503* jmp 0(%rip) 61504* <pc + tp->ftt_size> 81505* b: <original instruction> <= 151506* int T_DTRACE_RET 21507* -----1508* <= 461509*1510* The %pc is set to a, and curthread->t_dtrace_astpc is set1511* to b. If we encounter a signal on the way out of the1512* kernel, trap() will set %pc to curthread->t_dtrace_astpc1513* so that we execute the original instruction and re-enter1514* the kernel rather than redirecting to the next instruction.1515*1516* If there are return probes (so we know that we're going to1517* need to reenter the kernel after executing the original1518* instruction), the scratch space will just contain the1519* original instruction followed by an interrupt -- the same1520* data as at b.1521*1522* %rip-relative Addressing1523* ------------------------1524*1525* There's a further complication in 64-bit mode due to %rip-1526* relative addressing. While this is clearly a beneficial1527* architectural decision for position independent code, it's1528* hard not to see it as a personal attack against the pid1529* provider since before there was a relatively small set of1530* instructions to emulate; with %rip-relative addressing,1531* almost every instruction can potentially depend on the1532* address at which it's executed. Rather than emulating1533* the broad spectrum of instructions that can now be1534* position dependent, we emulate jumps and others as in1535* 32-bit mode, and take a different tack for instructions1536* using %rip-relative addressing.1537*1538* For every instruction that uses the ModRM byte, the1539* in-kernel disassembler reports its location. We use the1540* ModRM byte to identify that an instruction uses1541* %rip-relative addressing and to see what other registers1542* the instruction uses. To emulate those instructions,1543* we modify the instruction to be %rax-relative rather than1544* %rip-relative (or %rcx-relative if the instruction uses1545* %rax; or %r8- or %r9-relative if the REX.B is present so1546* we don't have to rewrite the REX prefix). We then load1547* the value that %rip would have been into the scratch1548* register and generate an instruction to reset the scratch1549* register back to its original value. The instruction1550* sequence looks like this:1551*1552* 64-mode %rip-relative bytes1553* ------------------------ -----1554* a: <modified instruction> <= 151555* movq $<value>, %<scratch> 61556* jmp 0(%rip) 61557* <pc + tp->ftt_size> 81558* b: <modified instruction> <= 151559* int T_DTRACE_RET 21560* -----1561* 521562*1563* We set curthread->t_dtrace_regv so that upon receiving1564* a signal we can reset the value of the scratch register.1565*/15661567ASSERT(tp->ftt_size <= FASTTRAP_MAX_INSTR_SIZE);15681569curthread->t_dtrace_scrpc = addr;1570bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size);1571i += tp->ftt_size;15721573#ifdef __amd641574if (tp->ftt_ripmode != 0) {1575greg_t *reg = NULL;15761577ASSERT(p->p_model == DATAMODEL_LP64);1578ASSERT(tp->ftt_ripmode &1579(FASTTRAP_RIP_1 | FASTTRAP_RIP_2));15801581/*1582* If this was a %rip-relative instruction, we change1583* it to be either a %rax- or %rcx-relative1584* instruction (depending on whether those registers1585* are used as another operand; or %r8- or %r9-1586* relative depending on the value of REX.B). We then1587* set that register and generate a movq instruction1588* to reset the value.1589*/1590if (tp->ftt_ripmode & FASTTRAP_RIP_X)1591scratch[i++] = FASTTRAP_REX(1, 0, 0, 1);1592else1593scratch[i++] = FASTTRAP_REX(1, 0, 0, 0);15941595if (tp->ftt_ripmode & FASTTRAP_RIP_1)1596scratch[i++] = FASTTRAP_MOV_EAX;1597else1598scratch[i++] = FASTTRAP_MOV_ECX;15991600switch (tp->ftt_ripmode) {1601case FASTTRAP_RIP_1:1602reg = &rp->r_rax;1603curthread->t_dtrace_reg = REG_RAX;1604break;1605case FASTTRAP_RIP_2:1606reg = &rp->r_rcx;1607curthread->t_dtrace_reg = REG_RCX;1608break;1609case FASTTRAP_RIP_1 | FASTTRAP_RIP_X:1610reg = &rp->r_r8;1611curthread->t_dtrace_reg = REG_R8;1612break;1613case FASTTRAP_RIP_2 | FASTTRAP_RIP_X:1614reg = &rp->r_r9;1615curthread->t_dtrace_reg = REG_R9;1616break;1617}16181619/* LINTED - alignment */1620*(uint64_t *)&scratch[i] = *reg;1621curthread->t_dtrace_regv = *reg;1622*reg = pc + tp->ftt_size;1623i += sizeof (uint64_t);1624}1625#endif16261627/*1628* Generate the branch instruction to what would have1629* normally been the subsequent instruction. In 32-bit mode,1630* this is just a relative branch; in 64-bit mode this is a1631* %rip-relative branch that loads the 64-bit pc value1632* immediately after the jmp instruction.1633*/1634#ifdef __amd641635if (p->p_model == DATAMODEL_LP64) {1636scratch[i++] = FASTTRAP_GROUP5_OP;1637scratch[i++] = FASTTRAP_MODRM(0, 4, 5);1638/* LINTED - alignment */1639*(uint32_t *)&scratch[i] = 0;1640i += sizeof (uint32_t);1641/* LINTED - alignment */1642*(uint64_t *)&scratch[i] = pc + tp->ftt_size;1643i += sizeof (uint64_t);1644} else {1645#endif1646/*1647* Set up the jmp to the next instruction; note that1648* the size of the traced instruction cancels out.1649*/1650scratch[i++] = FASTTRAP_JMP32;1651/* LINTED - alignment */1652*(uint32_t *)&scratch[i] = pc - addr - 5;1653i += sizeof (uint32_t);1654#ifdef __amd641655}1656#endif16571658curthread->t_dtrace_astpc = addr + i;1659bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size);1660i += tp->ftt_size;1661scratch[i++] = FASTTRAP_INT;1662scratch[i++] = T_DTRACE_RET;16631664ASSERT(i <= sizeof (scratch));16651666if (uwrite(curproc, scratch, i, addr) != 0) {1667fasttrap_sigtrap(p, curthread, pc);1668new_pc = pc;1669break;1670}1671if (tp->ftt_retids != NULL) {1672curthread->t_dtrace_step = 1;1673curthread->t_dtrace_ret = 1;1674new_pc = curthread->t_dtrace_astpc;1675} else {1676new_pc = curthread->t_dtrace_scrpc;1677}16781679curthread->t_dtrace_pc = pc;1680curthread->t_dtrace_npc = pc + tp->ftt_size;1681curthread->t_dtrace_on = 1;1682break;1683}16841685default:1686panic("fasttrap: mishandled an instruction");1687}16881689done:1690/*1691* If there were no return probes when we first found the tracepoint,1692* we should feel no obligation to honor any return probes that were1693* subsequently enabled -- they'll just have to wait until the next1694* time around.1695*/1696if (tp->ftt_retids != NULL) {1697/*1698* We need to wait until the results of the instruction are1699* apparent before invoking any return probes. If this1700* instruction was emulated we can just call1701* fasttrap_return_common(); if it needs to be executed, we1702* need to wait until the user thread returns to the kernel.1703*/1704if (tp->ftt_type != FASTTRAP_T_COMMON) {1705/*1706* Set the program counter to the address of the traced1707* instruction so that it looks right in ustack()1708* output. We had previously set it to the end of the1709* instruction to simplify %rip-relative addressing.1710*/1711rp->r_rip = pc;17121713fasttrap_return_common(rp, pc, pid, new_pc);1714} else {1715ASSERT(curthread->t_dtrace_ret != 0);1716ASSERT(curthread->t_dtrace_pc == pc);1717ASSERT(curthread->t_dtrace_scrpc != 0);1718ASSERT(new_pc == curthread->t_dtrace_astpc);1719}1720}17211722rp->r_rip = new_pc;17231724PROC_LOCK(p);1725proc_write_regs(curthread, rp);1726PROC_UNLOCK(p);17271728return (0);1729}17301731int1732fasttrap_return_probe(struct trapframe *tf)1733{1734struct reg reg, *rp;1735proc_t *p = curproc;1736uintptr_t pc = curthread->t_dtrace_pc;1737uintptr_t npc = curthread->t_dtrace_npc;17381739fill_frame_regs(tf, ®);1740rp = ®17411742curthread->t_dtrace_pc = 0;1743curthread->t_dtrace_npc = 0;1744curthread->t_dtrace_scrpc = 0;1745curthread->t_dtrace_astpc = 0;17461747#ifdef illumos1748/*1749* Treat a child created by a call to vfork(2) as if it were its1750* parent. We know that there's only one thread of control in such a1751* process: this one.1752*/1753while (p->p_flag & SVFORK) {1754p = p->p_parent;1755}1756#endif17571758/*1759* We set rp->r_rip to the address of the traced instruction so1760* that it appears to dtrace_probe() that we're on the original1761* instruction.1762*/1763rp->r_rip = pc;17641765fasttrap_return_common(rp, pc, p->p_pid, npc);17661767return (0);1768}17691770/*ARGSUSED*/1771uint64_t1772fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno,1773int aframes)1774{1775struct reg r;17761777fill_regs(curthread, &r);17781779return (fasttrap_anarg(&r, 1, argno));1780}17811782/*ARGSUSED*/1783uint64_t1784fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno,1785int aframes)1786{1787struct reg r;17881789fill_regs(curthread, &r);17901791return (fasttrap_anarg(&r, 0, argno));1792}17931794static ulong_t1795fasttrap_getreg(struct reg *rp, uint_t reg)1796{1797#ifdef __amd641798switch (reg) {1799case REG_R15: return (rp->r_r15);1800case REG_R14: return (rp->r_r14);1801case REG_R13: return (rp->r_r13);1802case REG_R12: return (rp->r_r12);1803case REG_R11: return (rp->r_r11);1804case REG_R10: return (rp->r_r10);1805case REG_R9: return (rp->r_r9);1806case REG_R8: return (rp->r_r8);1807case REG_RDI: return (rp->r_rdi);1808case REG_RSI: return (rp->r_rsi);1809case REG_RBP: return (rp->r_rbp);1810case REG_RBX: return (rp->r_rbx);1811case REG_RDX: return (rp->r_rdx);1812case REG_RCX: return (rp->r_rcx);1813case REG_RAX: return (rp->r_rax);1814case REG_TRAPNO: return (rp->r_trapno);1815case REG_ERR: return (rp->r_err);1816case REG_RIP: return (rp->r_rip);1817case REG_CS: return (rp->r_cs);1818case REG_RFL: return (rp->r_rflags);1819case REG_RSP: return (rp->r_rsp);1820case REG_SS: return (rp->r_ss);1821case REG_FS: return (rp->r_fs);1822case REG_GS: return (rp->r_gs);1823case REG_DS: return (rp->r_ds);1824case REG_ES: return (rp->r_es);1825case REG_FSBASE: return (rdmsr(MSR_FSBASE));1826case REG_GSBASE: return (rdmsr(MSR_GSBASE));1827}18281829panic("dtrace: illegal register constant");1830/*NOTREACHED*/1831#else1832#define _NGREG 191833if (reg >= _NGREG)1834panic("dtrace: illegal register constant");18351836return (((greg_t *)&rp->r_gs)[reg]);1837#endif1838}183918401841