Path: blob/main/sys/cddl/dev/kinst/amd64/kinst_isa.c
48378 views
/*1* SPDX-License-Identifier: CDDL 1.02*3* Copyright (c) 2022 Christos Margiolis <[email protected]>4* Copyright (c) 2022 Mark Johnston <[email protected]>5* Copyright (c) 2023 The FreeBSD Foundation6*7* Portions of this software were developed by Christos Margiolis8* <[email protected]> under sponsorship from the FreeBSD Foundation.9*/1011#include <sys/param.h>12#include <sys/pcpu.h>1314#include <machine/cpufunc.h>15#include <machine/md_var.h>1617#include <sys/dtrace.h>18#include <cddl/dev/dtrace/dtrace_cddl.h>19#include <dis_tables.h>2021#include "kinst.h"2223#define KINST_PUSHL_RBP 0x5524#define KINST_STI 0xfb25#define KINST_POPF 0x9d2627#define KINST_MODRM_MOD(b) (((b) & 0xc0) >> 6)28#define KINST_MODRM_REG(b) (((b) & 0x38) >> 3)29#define KINST_MODRM_RM(b) ((b) & 0x07)3031#define KINST_SIB_SCALE(s) (((s) & 0xc0) >> 6)32#define KINST_SIB_INDEX(s) (((s) & 0x38) >> 3)33#define KINST_SIB_BASE(s) (((s) & 0x07) >> 0)3435#define KINST_REX_W(r) (((r) & 0x08) >> 3)36#define KINST_REX_R(r) (((r) & 0x04) >> 2)37#define KINST_REX_X(r) (((r) & 0x02) >> 1)38#define KINST_REX_B(r) (((r) & 0x01) >> 0)3940#define KINST_F_CALL 0x0001 /* instruction is a "call" */41#define KINST_F_DIRECT_CALL 0x0002 /* instruction is a direct call */42#define KINST_F_RIPREL 0x0004 /* instruction is position-dependent */43#define KINST_F_JMP 0x0008 /* instruction is a %rip-relative jmp */44#define KINST_F_MOD_DIRECT 0x0010 /* operand is not a memory address */4546/*47* Per-CPU trampolines used when the interrupted thread is executing with48* interrupts disabled. If an interrupt is raised while executing a trampoline,49* the interrupt thread cannot safely overwrite its trampoline if it hits a50* kinst probe while executing the interrupt handler.51*/52DPCPU_DEFINE_STATIC(uint8_t *, intr_tramp);5354/*55* Map ModR/M register bits to a trapframe offset.56*/57static int58kinst_regoff(int reg)59{60#define _MATCH_REG(i, reg) \61case i: \62return (offsetof(struct trapframe, tf_ ## reg) / \63sizeof(register_t))64switch (reg) {65_MATCH_REG( 0, rax);66_MATCH_REG( 1, rcx);67_MATCH_REG( 2, rdx);68_MATCH_REG( 3, rbx);69_MATCH_REG( 4, rsp); /* SIB when mod != 3 */70_MATCH_REG( 5, rbp);71_MATCH_REG( 6, rsi);72_MATCH_REG( 7, rdi);73_MATCH_REG( 8, r8); /* REX.R is set */74_MATCH_REG( 9, r9);75_MATCH_REG(10, r10);76_MATCH_REG(11, r11);77_MATCH_REG(12, r12);78_MATCH_REG(13, r13);79_MATCH_REG(14, r14);80_MATCH_REG(15, r15);81}82#undef _MATCH_REG83panic("%s: unhandled register index %d", __func__, reg);84}8586/*87* Obtain the specified register's value.88*/89static uint64_t90kinst_regval(struct trapframe *frame, int reg)91{92if (reg == -1)93return (0);94return (((register_t *)frame)[kinst_regoff(reg)]);95}9697static uint32_t98kinst_riprel_disp(struct kinst_probe *kp, void *dst)99{100return ((uint32_t)((intptr_t)kp->kp_patchpoint + kp->kp_md.disp -101(intptr_t)dst));102}103104static void105kinst_trampoline_populate(struct kinst_probe *kp, uint8_t *tramp)106{107uint8_t *instr;108uint32_t disp;109int ilen;110111ilen = kp->kp_md.tinstlen;112113kinst_memcpy(tramp, kp->kp_md.template, ilen);114if ((kp->kp_md.flags & KINST_F_RIPREL) != 0) {115disp = kinst_riprel_disp(kp, tramp);116kinst_memcpy(&tramp[kp->kp_md.dispoff], &disp, sizeof(uint32_t));117}118119/*120* The following position-independent jmp takes us back to the121* original code. It is encoded as "jmp *0(%rip)" (six bytes),122* followed by the absolute address of the instruction following123* the one that was traced (eight bytes).124*/125tramp[ilen + 0] = 0xff;126tramp[ilen + 1] = 0x25;127tramp[ilen + 2] = 0x00;128tramp[ilen + 3] = 0x00;129tramp[ilen + 4] = 0x00;130tramp[ilen + 5] = 0x00;131instr = kp->kp_patchpoint + kp->kp_md.instlen;132kinst_memcpy(&tramp[ilen + 6], &instr, sizeof(uintptr_t));133}134135int136kinst_invop(uintptr_t addr, struct trapframe *frame, uintptr_t scratch)137{138solaris_cpu_t *cpu;139uintptr_t *stack, retaddr;140struct kinst_probe *kp;141struct kinst_probe_md *kpmd;142uint8_t *tramp;143144stack = (uintptr_t *)frame->tf_rsp;145cpu = &solaris_cpu[curcpu];146147LIST_FOREACH(kp, KINST_GETPROBE(addr), kp_hashnext) {148if ((uintptr_t)kp->kp_patchpoint == addr)149break;150}151if (kp == NULL)152return (0);153154/*155* Report the address of the breakpoint for the benefit of consumers156* fetching register values with regs[].157*/158frame->tf_rip--;159160DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);161cpu->cpu_dtrace_caller = stack[0];162DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR);163dtrace_probe(kp->kp_id, 0, 0, 0, 0, 0);164cpu->cpu_dtrace_caller = 0;165166kpmd = &kp->kp_md;167if ((kpmd->flags & KINST_F_CALL) != 0) {168/*169* dtrace_invop_start() reserves space on the stack to170* store the return address of the call instruction.171*/172retaddr = (uintptr_t)(kp->kp_patchpoint + kpmd->instlen);173*(uintptr_t *)scratch = retaddr;174175if ((kpmd->flags & KINST_F_DIRECT_CALL) != 0) {176frame->tf_rip = (uintptr_t)(kp->kp_patchpoint +177kpmd->disp + kpmd->instlen);178} else {179register_t rval;180181if (kpmd->reg1 == -1 && kpmd->reg2 == -1) {182/* rip-relative */183rval = frame->tf_rip + kpmd->instlen;184} else {185/* indirect */186rval = kinst_regval(frame, kpmd->reg1) +187(kinst_regval(frame, kpmd->reg2) <<188kpmd->scale);189}190191if ((kpmd->flags & KINST_F_MOD_DIRECT) != 0) {192frame->tf_rip = rval + kpmd->disp;193} else {194frame->tf_rip =195*(uintptr_t *)(rval + kpmd->disp);196}197}198return (DTRACE_INVOP_CALL);199} else {200if ((frame->tf_rflags & PSL_I) == 0)201tramp = DPCPU_GET(intr_tramp);202else203tramp = curthread->t_kinst_tramp;204if (tramp == NULL) {205/*206* A trampoline allocation failed, so this probe is207* effectively disabled. Restore the original208* instruction.209*210* We can't safely print anything here, but the211* trampoline allocator should have left a breadcrumb in212* the dmesg.213*/214kinst_patch_tracepoint(kp, kp->kp_savedval);215frame->tf_rip = (register_t)kp->kp_patchpoint;216} else {217kinst_trampoline_populate(kp, tramp);218frame->tf_rip = (register_t)tramp;219}220return (DTRACE_INVOP_NOP);221}222}223224void225kinst_patch_tracepoint(struct kinst_probe *kp, kinst_patchval_t val)226{227register_t reg;228int oldwp;229230reg = intr_disable();231oldwp = disable_wp();232*kp->kp_patchpoint = val;233restore_wp(oldwp);234intr_restore(reg);235}236237static void238kinst_set_disp8(struct kinst_probe *kp, uint8_t byte)239{240kp->kp_md.disp = (int64_t)(int8_t)byte;241}242243static void244kinst_set_disp32(struct kinst_probe *kp, uint8_t *bytes)245{246int32_t disp32;247248memcpy(&disp32, bytes, sizeof(disp32));249kp->kp_md.disp = (int64_t)disp32;250}251252/*253* Set up all of the state needed to faithfully execute a probed instruction.254*255* In the simple case, we copy the instruction unmodified to a per-thread256* trampoline, wherein it is followed by a jump back to the original code.257* - Instructions can have %rip as an operand:258* - with %rip-relative addressing encoded in ModR/M, or259* - implicitly as a part of the instruction definition (jmp, call).260* - Call instructions (which may be %rip-relative) need to push the correct261* return address onto the stack.262*263* Call instructions are simple enough to be emulated in software, so we simply264* do not use the trampoline mechanism in that case. kinst_invop() will compute265* the branch target using the address info computed here (register operands and266* displacement).267*268* %rip-relative operands encoded using the ModR/M byte always use a 32-bit269* displacement; when populating the trampoline the displacement is adjusted to270* be relative to the trampoline address. Trampolines are always allocated271* above KERNBASE for this reason.272*273* For other %rip-relative operands (just jumps) we take the same approach.274* Instructions which specify an 8-bit displacement must be rewritten to use a275* 32-bit displacement.276*/277static int278kinst_instr_dissect(struct kinst_probe *kp, uint8_t **instr)279{280struct kinst_probe_md *kpmd;281dis86_t d86;282uint8_t *bytes, modrm, rex;283int dispoff, i, ilen, opcidx;284285kpmd = &kp->kp_md;286287d86.d86_data = instr;288d86.d86_get_byte = dtrace_dis_get_byte;289d86.d86_check_func = NULL;290if (dtrace_disx86(&d86, SIZE64) != 0) {291KINST_LOG("failed to disassemble instruction at: %p", *instr);292return (EINVAL);293}294bytes = d86.d86_bytes;295kpmd->instlen = kpmd->tinstlen = d86.d86_len;296297/*298* Skip over prefixes, save REX.299*/300rex = 0;301for (i = 0; i < kpmd->instlen; i++) {302switch (bytes[i]) {303case 0xf0 ... 0xf3:304/* group 1 */305continue;306case 0x26:307case 0x2e:308case 0x36:309case 0x3e:310case 0x64:311case 0x65:312/* group 2 */313continue;314case 0x66:315/* group 3 */316continue;317case 0x67:318/* group 4 */319continue;320case 0x40 ... 0x4f:321/* REX */322rex = bytes[i];323continue;324}325break;326}327KASSERT(i < kpmd->instlen,328("%s: failed to disassemble instruction at %p", __func__, bytes));329opcidx = i;330331/*332* Identify instructions of interest by opcode: calls and jumps.333* Extract displacements.334*/335dispoff = -1;336switch (bytes[opcidx]) {337case 0x0f:338switch (bytes[opcidx + 1]) {339case 0x80 ... 0x8f:340/* conditional jmp near */341kpmd->flags |= KINST_F_JMP | KINST_F_RIPREL;342dispoff = opcidx + 2;343kinst_set_disp32(kp, &bytes[dispoff]);344break;345}346break;347case 0xe3:348/*349* There is no straightforward way to translate this instruction350* to use a 32-bit displacement. Fortunately, it is rarely351* used.352*/353return (EINVAL);354case 0x70 ... 0x7f:355/* conditional jmp short */356kpmd->flags |= KINST_F_JMP | KINST_F_RIPREL;357dispoff = opcidx + 1;358kinst_set_disp8(kp, bytes[dispoff]);359break;360case 0xe9:361/* unconditional jmp near */362kpmd->flags |= KINST_F_JMP | KINST_F_RIPREL;363dispoff = opcidx + 1;364kinst_set_disp32(kp, &bytes[dispoff]);365break;366case 0xeb:367/* unconditional jmp short */368kpmd->flags |= KINST_F_JMP | KINST_F_RIPREL;369dispoff = opcidx + 1;370kinst_set_disp8(kp, bytes[dispoff]);371break;372case 0xe8:373case 0x9a:374/* direct call */375kpmd->flags |= KINST_F_CALL | KINST_F_DIRECT_CALL;376dispoff = opcidx + 1;377kinst_set_disp32(kp, &bytes[dispoff]);378break;379case 0xff:380KASSERT(d86.d86_got_modrm,381("no ModR/M byte for instr at %p", *instr - kpmd->instlen));382switch (KINST_MODRM_REG(bytes[d86.d86_rmindex])) {383case 0x02:384case 0x03:385/* indirect call */386kpmd->flags |= KINST_F_CALL;387break;388case 0x04:389case 0x05:390/* indirect jump */391kpmd->flags |= KINST_F_JMP;392break;393}394}395396/*397* If there's a ModR/M byte, we need to check it to see if the operand398* is %rip-relative, and rewrite the displacement if so. If not, we399* might still have to extract operand info if this is a call400* instruction.401*/402if (d86.d86_got_modrm) {403uint8_t mod, rm, sib;404405kpmd->reg1 = kpmd->reg2 = -1;406407modrm = bytes[d86.d86_rmindex];408mod = KINST_MODRM_MOD(modrm);409rm = KINST_MODRM_RM(modrm);410if (mod == 0 && rm == 5) {411kpmd->flags |= KINST_F_RIPREL;412dispoff = d86.d86_rmindex + 1;413kinst_set_disp32(kp, &bytes[dispoff]);414} else if ((kpmd->flags & KINST_F_CALL) != 0) {415bool havesib;416417havesib = (mod != 3 && rm == 4);418dispoff = d86.d86_rmindex + (havesib ? 2 : 1);419if (mod == 1)420kinst_set_disp8(kp, bytes[dispoff]);421else if (mod == 2)422kinst_set_disp32(kp, &bytes[dispoff]);423else if (mod == 3)424kpmd->flags |= KINST_F_MOD_DIRECT;425426if (havesib) {427sib = bytes[d86.d86_rmindex + 1];428if (KINST_SIB_BASE(sib) != 5) {429kpmd->reg1 = KINST_SIB_BASE(sib) |430(KINST_REX_B(rex) << 3);431}432kpmd->scale = KINST_SIB_SCALE(sib);433kpmd->reg2 = KINST_SIB_INDEX(sib) |434(KINST_REX_X(rex) << 3);435} else {436kpmd->reg1 = rm | (KINST_REX_B(rex) << 3);437}438}439}440441/*442* Calls are emulated in software; once operands are decoded we have443* nothing else to do.444*/445if ((kpmd->flags & KINST_F_CALL) != 0)446return (0);447448/*449* Allocate and populate an instruction trampoline template.450*451* Position-independent instructions can simply be copied, but452* position-dependent instructions require some surgery: jump453* instructions with an 8-bit displacement need to be converted to use a454* 32-bit displacement, and the adjusted displacement needs to be455* computed.456*/457ilen = kpmd->instlen;458if ((kpmd->flags & KINST_F_RIPREL) != 0) {459if ((kpmd->flags & KINST_F_JMP) == 0 ||460bytes[opcidx] == 0x0f ||461bytes[opcidx] == 0xe9 ||462bytes[opcidx] == 0xff) {463memcpy(kpmd->template, bytes, dispoff);464memcpy(&kpmd->template[dispoff + 4],465&bytes[dispoff + 4], ilen - (dispoff + 4));466kpmd->dispoff = dispoff;467} else if (bytes[opcidx] == 0xeb) {468memcpy(kpmd->template, bytes, opcidx);469kpmd->template[opcidx] = 0xe9;470kpmd->dispoff = opcidx + 1;471472/* Instruction length changes from 2 to 5. */473kpmd->tinstlen = 5;474kpmd->disp -= 3;475} else if (bytes[opcidx] >= 0x70 && bytes[opcidx] <= 0x7f) {476memcpy(kpmd->template, bytes, opcidx);477kpmd->template[opcidx] = 0x0f;478kpmd->template[opcidx + 1] = bytes[opcidx] + 0x10;479kpmd->dispoff = opcidx + 2;480481/* Instruction length changes from 2 to 6. */482kpmd->tinstlen = 6;483kpmd->disp -= 4;484} else {485panic("unhandled opcode %#x", bytes[opcidx]);486}487} else {488memcpy(kpmd->template, bytes, ilen);489}490491return (0);492}493494int495kinst_make_probe(linker_file_t lf, int symindx, linker_symval_t *symval,496void *opaque)497{498struct kinst_probe *kp;499dtrace_kinst_probedesc_t *pd;500const char *func;501int error, instrsize, n, off;502uint8_t *instr, *limit, *tmp;503bool push_found;504505pd = opaque;506func = symval->name;507if (kinst_excluded(func))508return (0);509if (strcmp(func, pd->kpd_func) != 0)510return (0);511512instr = (uint8_t *)symval->value;513limit = (uint8_t *)symval->value + symval->size;514if (instr >= limit)515return (0);516517/*518* Refuse to instrument functions lacking the usual frame pointer519* manipulations since they might correspond to exception handlers.520*/521tmp = instr;522push_found = false;523while (tmp < limit) {524/*525* Checking for 'pop %rbp' as well makes the filtering too526* strict as it would skip functions that never return (e.g.,527* vnlru_proc()).528*/529if (*tmp == KINST_PUSHL_RBP) {530push_found = true;531break;532}533tmp += dtrace_instr_size(tmp);534}535if (!push_found)536return (0);537538n = 0;539while (instr < limit) {540instrsize = dtrace_instr_size(instr);541off = (int)(instr - (uint8_t *)symval->value);542if (pd->kpd_off != -1 && off != pd->kpd_off) {543instr += instrsize;544continue;545}546547/*548* Check for instructions which may enable interrupts. Such549* instructions are tricky to trace since it is unclear whether550* to use the per-thread or per-CPU trampolines. Since they are551* rare, we don't bother to implement special handling for them.552*553* If the caller specified an offset, return an error, otherwise554* silently ignore the instruction so that it remains possible555* to enable all instructions in a function.556*/557if (instrsize == 1 &&558(instr[0] == KINST_POPF || instr[0] == KINST_STI)) {559if (pd->kpd_off != -1)560return (EINVAL);561instr += instrsize;562continue;563}564565/*566* Prevent separate dtrace(1) instances from creating copies of567* the same probe.568*/569LIST_FOREACH(kp, KINST_GETPROBE(instr), kp_hashnext) {570if (strcmp(kp->kp_func, func) == 0 &&571strtol(kp->kp_name, NULL, 10) == off)572return (0);573}574if (++n > KINST_PROBETAB_MAX) {575KINST_LOG("probe list full: %d entries", n);576return (ENOMEM);577}578kp = malloc(sizeof(struct kinst_probe), M_KINST,579M_WAITOK | M_ZERO);580kp->kp_func = func;581snprintf(kp->kp_name, sizeof(kp->kp_name), "%d", off);582kp->kp_savedval = *instr;583kp->kp_patchval = KINST_PATCHVAL;584kp->kp_patchpoint = instr;585586error = kinst_instr_dissect(kp, &instr);587if (error != 0)588return (error);589590kinst_probe_create(kp, lf);591}592593return (0);594}595596int597kinst_md_init(void)598{599uint8_t *tramp;600int cpu;601602CPU_FOREACH(cpu) {603tramp = kinst_trampoline_alloc(M_WAITOK);604if (tramp == NULL)605return (ENOMEM);606DPCPU_ID_SET(cpu, intr_tramp, tramp);607}608609return (0);610}611612void613kinst_md_deinit(void)614{615uint8_t *tramp;616int cpu;617618CPU_FOREACH(cpu) {619tramp = DPCPU_ID_GET(cpu, intr_tramp);620if (tramp != NULL) {621kinst_trampoline_dealloc(tramp);622DPCPU_ID_SET(cpu, intr_tramp, NULL);623}624}625}626627/*628* Exclude machine-dependent functions that are not safe-to-trace.629*/630bool631kinst_md_excluded(const char *name)632{633return (false);634}635636637