Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/cddl/dev/kinst/amd64/kinst_isa.c
48378 views
1
/*
2
* SPDX-License-Identifier: CDDL 1.0
3
*
4
* Copyright (c) 2022 Christos Margiolis <[email protected]>
5
* Copyright (c) 2022 Mark Johnston <[email protected]>
6
* Copyright (c) 2023 The FreeBSD Foundation
7
*
8
* Portions of this software were developed by Christos Margiolis
9
* <[email protected]> under sponsorship from the FreeBSD Foundation.
10
*/
11
12
#include <sys/param.h>
13
#include <sys/pcpu.h>
14
15
#include <machine/cpufunc.h>
16
#include <machine/md_var.h>
17
18
#include <sys/dtrace.h>
19
#include <cddl/dev/dtrace/dtrace_cddl.h>
20
#include <dis_tables.h>
21
22
#include "kinst.h"
23
24
#define KINST_PUSHL_RBP 0x55
25
#define KINST_STI 0xfb
26
#define KINST_POPF 0x9d
27
28
#define KINST_MODRM_MOD(b) (((b) & 0xc0) >> 6)
29
#define KINST_MODRM_REG(b) (((b) & 0x38) >> 3)
30
#define KINST_MODRM_RM(b) ((b) & 0x07)
31
32
#define KINST_SIB_SCALE(s) (((s) & 0xc0) >> 6)
33
#define KINST_SIB_INDEX(s) (((s) & 0x38) >> 3)
34
#define KINST_SIB_BASE(s) (((s) & 0x07) >> 0)
35
36
#define KINST_REX_W(r) (((r) & 0x08) >> 3)
37
#define KINST_REX_R(r) (((r) & 0x04) >> 2)
38
#define KINST_REX_X(r) (((r) & 0x02) >> 1)
39
#define KINST_REX_B(r) (((r) & 0x01) >> 0)
40
41
#define KINST_F_CALL 0x0001 /* instruction is a "call" */
42
#define KINST_F_DIRECT_CALL 0x0002 /* instruction is a direct call */
43
#define KINST_F_RIPREL 0x0004 /* instruction is position-dependent */
44
#define KINST_F_JMP 0x0008 /* instruction is a %rip-relative jmp */
45
#define KINST_F_MOD_DIRECT 0x0010 /* operand is not a memory address */
46
47
/*
48
* Per-CPU trampolines used when the interrupted thread is executing with
49
* interrupts disabled. If an interrupt is raised while executing a trampoline,
50
* the interrupt thread cannot safely overwrite its trampoline if it hits a
51
* kinst probe while executing the interrupt handler.
52
*/
53
DPCPU_DEFINE_STATIC(uint8_t *, intr_tramp);
54
55
/*
56
* Map ModR/M register bits to a trapframe offset.
57
*/
58
static int
59
kinst_regoff(int reg)
60
{
61
#define _MATCH_REG(i, reg) \
62
case i: \
63
return (offsetof(struct trapframe, tf_ ## reg) / \
64
sizeof(register_t))
65
switch (reg) {
66
_MATCH_REG( 0, rax);
67
_MATCH_REG( 1, rcx);
68
_MATCH_REG( 2, rdx);
69
_MATCH_REG( 3, rbx);
70
_MATCH_REG( 4, rsp); /* SIB when mod != 3 */
71
_MATCH_REG( 5, rbp);
72
_MATCH_REG( 6, rsi);
73
_MATCH_REG( 7, rdi);
74
_MATCH_REG( 8, r8); /* REX.R is set */
75
_MATCH_REG( 9, r9);
76
_MATCH_REG(10, r10);
77
_MATCH_REG(11, r11);
78
_MATCH_REG(12, r12);
79
_MATCH_REG(13, r13);
80
_MATCH_REG(14, r14);
81
_MATCH_REG(15, r15);
82
}
83
#undef _MATCH_REG
84
panic("%s: unhandled register index %d", __func__, reg);
85
}
86
87
/*
88
* Obtain the specified register's value.
89
*/
90
static uint64_t
91
kinst_regval(struct trapframe *frame, int reg)
92
{
93
if (reg == -1)
94
return (0);
95
return (((register_t *)frame)[kinst_regoff(reg)]);
96
}
97
98
static uint32_t
99
kinst_riprel_disp(struct kinst_probe *kp, void *dst)
100
{
101
return ((uint32_t)((intptr_t)kp->kp_patchpoint + kp->kp_md.disp -
102
(intptr_t)dst));
103
}
104
105
static void
106
kinst_trampoline_populate(struct kinst_probe *kp, uint8_t *tramp)
107
{
108
uint8_t *instr;
109
uint32_t disp;
110
int ilen;
111
112
ilen = kp->kp_md.tinstlen;
113
114
kinst_memcpy(tramp, kp->kp_md.template, ilen);
115
if ((kp->kp_md.flags & KINST_F_RIPREL) != 0) {
116
disp = kinst_riprel_disp(kp, tramp);
117
kinst_memcpy(&tramp[kp->kp_md.dispoff], &disp, sizeof(uint32_t));
118
}
119
120
/*
121
* The following position-independent jmp takes us back to the
122
* original code. It is encoded as "jmp *0(%rip)" (six bytes),
123
* followed by the absolute address of the instruction following
124
* the one that was traced (eight bytes).
125
*/
126
tramp[ilen + 0] = 0xff;
127
tramp[ilen + 1] = 0x25;
128
tramp[ilen + 2] = 0x00;
129
tramp[ilen + 3] = 0x00;
130
tramp[ilen + 4] = 0x00;
131
tramp[ilen + 5] = 0x00;
132
instr = kp->kp_patchpoint + kp->kp_md.instlen;
133
kinst_memcpy(&tramp[ilen + 6], &instr, sizeof(uintptr_t));
134
}
135
136
int
137
kinst_invop(uintptr_t addr, struct trapframe *frame, uintptr_t scratch)
138
{
139
solaris_cpu_t *cpu;
140
uintptr_t *stack, retaddr;
141
struct kinst_probe *kp;
142
struct kinst_probe_md *kpmd;
143
uint8_t *tramp;
144
145
stack = (uintptr_t *)frame->tf_rsp;
146
cpu = &solaris_cpu[curcpu];
147
148
LIST_FOREACH(kp, KINST_GETPROBE(addr), kp_hashnext) {
149
if ((uintptr_t)kp->kp_patchpoint == addr)
150
break;
151
}
152
if (kp == NULL)
153
return (0);
154
155
/*
156
* Report the address of the breakpoint for the benefit of consumers
157
* fetching register values with regs[].
158
*/
159
frame->tf_rip--;
160
161
DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
162
cpu->cpu_dtrace_caller = stack[0];
163
DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR);
164
dtrace_probe(kp->kp_id, 0, 0, 0, 0, 0);
165
cpu->cpu_dtrace_caller = 0;
166
167
kpmd = &kp->kp_md;
168
if ((kpmd->flags & KINST_F_CALL) != 0) {
169
/*
170
* dtrace_invop_start() reserves space on the stack to
171
* store the return address of the call instruction.
172
*/
173
retaddr = (uintptr_t)(kp->kp_patchpoint + kpmd->instlen);
174
*(uintptr_t *)scratch = retaddr;
175
176
if ((kpmd->flags & KINST_F_DIRECT_CALL) != 0) {
177
frame->tf_rip = (uintptr_t)(kp->kp_patchpoint +
178
kpmd->disp + kpmd->instlen);
179
} else {
180
register_t rval;
181
182
if (kpmd->reg1 == -1 && kpmd->reg2 == -1) {
183
/* rip-relative */
184
rval = frame->tf_rip + kpmd->instlen;
185
} else {
186
/* indirect */
187
rval = kinst_regval(frame, kpmd->reg1) +
188
(kinst_regval(frame, kpmd->reg2) <<
189
kpmd->scale);
190
}
191
192
if ((kpmd->flags & KINST_F_MOD_DIRECT) != 0) {
193
frame->tf_rip = rval + kpmd->disp;
194
} else {
195
frame->tf_rip =
196
*(uintptr_t *)(rval + kpmd->disp);
197
}
198
}
199
return (DTRACE_INVOP_CALL);
200
} else {
201
if ((frame->tf_rflags & PSL_I) == 0)
202
tramp = DPCPU_GET(intr_tramp);
203
else
204
tramp = curthread->t_kinst_tramp;
205
if (tramp == NULL) {
206
/*
207
* A trampoline allocation failed, so this probe is
208
* effectively disabled. Restore the original
209
* instruction.
210
*
211
* We can't safely print anything here, but the
212
* trampoline allocator should have left a breadcrumb in
213
* the dmesg.
214
*/
215
kinst_patch_tracepoint(kp, kp->kp_savedval);
216
frame->tf_rip = (register_t)kp->kp_patchpoint;
217
} else {
218
kinst_trampoline_populate(kp, tramp);
219
frame->tf_rip = (register_t)tramp;
220
}
221
return (DTRACE_INVOP_NOP);
222
}
223
}
224
225
void
226
kinst_patch_tracepoint(struct kinst_probe *kp, kinst_patchval_t val)
227
{
228
register_t reg;
229
int oldwp;
230
231
reg = intr_disable();
232
oldwp = disable_wp();
233
*kp->kp_patchpoint = val;
234
restore_wp(oldwp);
235
intr_restore(reg);
236
}
237
238
static void
239
kinst_set_disp8(struct kinst_probe *kp, uint8_t byte)
240
{
241
kp->kp_md.disp = (int64_t)(int8_t)byte;
242
}
243
244
static void
245
kinst_set_disp32(struct kinst_probe *kp, uint8_t *bytes)
246
{
247
int32_t disp32;
248
249
memcpy(&disp32, bytes, sizeof(disp32));
250
kp->kp_md.disp = (int64_t)disp32;
251
}
252
253
/*
254
* Set up all of the state needed to faithfully execute a probed instruction.
255
*
256
* In the simple case, we copy the instruction unmodified to a per-thread
257
* trampoline, wherein it is followed by a jump back to the original code.
258
* - Instructions can have %rip as an operand:
259
* - with %rip-relative addressing encoded in ModR/M, or
260
* - implicitly as a part of the instruction definition (jmp, call).
261
* - Call instructions (which may be %rip-relative) need to push the correct
262
* return address onto the stack.
263
*
264
* Call instructions are simple enough to be emulated in software, so we simply
265
* do not use the trampoline mechanism in that case. kinst_invop() will compute
266
* the branch target using the address info computed here (register operands and
267
* displacement).
268
*
269
* %rip-relative operands encoded using the ModR/M byte always use a 32-bit
270
* displacement; when populating the trampoline the displacement is adjusted to
271
* be relative to the trampoline address. Trampolines are always allocated
272
* above KERNBASE for this reason.
273
*
274
* For other %rip-relative operands (just jumps) we take the same approach.
275
* Instructions which specify an 8-bit displacement must be rewritten to use a
276
* 32-bit displacement.
277
*/
278
static int
279
kinst_instr_dissect(struct kinst_probe *kp, uint8_t **instr)
280
{
281
struct kinst_probe_md *kpmd;
282
dis86_t d86;
283
uint8_t *bytes, modrm, rex;
284
int dispoff, i, ilen, opcidx;
285
286
kpmd = &kp->kp_md;
287
288
d86.d86_data = instr;
289
d86.d86_get_byte = dtrace_dis_get_byte;
290
d86.d86_check_func = NULL;
291
if (dtrace_disx86(&d86, SIZE64) != 0) {
292
KINST_LOG("failed to disassemble instruction at: %p", *instr);
293
return (EINVAL);
294
}
295
bytes = d86.d86_bytes;
296
kpmd->instlen = kpmd->tinstlen = d86.d86_len;
297
298
/*
299
* Skip over prefixes, save REX.
300
*/
301
rex = 0;
302
for (i = 0; i < kpmd->instlen; i++) {
303
switch (bytes[i]) {
304
case 0xf0 ... 0xf3:
305
/* group 1 */
306
continue;
307
case 0x26:
308
case 0x2e:
309
case 0x36:
310
case 0x3e:
311
case 0x64:
312
case 0x65:
313
/* group 2 */
314
continue;
315
case 0x66:
316
/* group 3 */
317
continue;
318
case 0x67:
319
/* group 4 */
320
continue;
321
case 0x40 ... 0x4f:
322
/* REX */
323
rex = bytes[i];
324
continue;
325
}
326
break;
327
}
328
KASSERT(i < kpmd->instlen,
329
("%s: failed to disassemble instruction at %p", __func__, bytes));
330
opcidx = i;
331
332
/*
333
* Identify instructions of interest by opcode: calls and jumps.
334
* Extract displacements.
335
*/
336
dispoff = -1;
337
switch (bytes[opcidx]) {
338
case 0x0f:
339
switch (bytes[opcidx + 1]) {
340
case 0x80 ... 0x8f:
341
/* conditional jmp near */
342
kpmd->flags |= KINST_F_JMP | KINST_F_RIPREL;
343
dispoff = opcidx + 2;
344
kinst_set_disp32(kp, &bytes[dispoff]);
345
break;
346
}
347
break;
348
case 0xe3:
349
/*
350
* There is no straightforward way to translate this instruction
351
* to use a 32-bit displacement. Fortunately, it is rarely
352
* used.
353
*/
354
return (EINVAL);
355
case 0x70 ... 0x7f:
356
/* conditional jmp short */
357
kpmd->flags |= KINST_F_JMP | KINST_F_RIPREL;
358
dispoff = opcidx + 1;
359
kinst_set_disp8(kp, bytes[dispoff]);
360
break;
361
case 0xe9:
362
/* unconditional jmp near */
363
kpmd->flags |= KINST_F_JMP | KINST_F_RIPREL;
364
dispoff = opcidx + 1;
365
kinst_set_disp32(kp, &bytes[dispoff]);
366
break;
367
case 0xeb:
368
/* unconditional jmp short */
369
kpmd->flags |= KINST_F_JMP | KINST_F_RIPREL;
370
dispoff = opcidx + 1;
371
kinst_set_disp8(kp, bytes[dispoff]);
372
break;
373
case 0xe8:
374
case 0x9a:
375
/* direct call */
376
kpmd->flags |= KINST_F_CALL | KINST_F_DIRECT_CALL;
377
dispoff = opcidx + 1;
378
kinst_set_disp32(kp, &bytes[dispoff]);
379
break;
380
case 0xff:
381
KASSERT(d86.d86_got_modrm,
382
("no ModR/M byte for instr at %p", *instr - kpmd->instlen));
383
switch (KINST_MODRM_REG(bytes[d86.d86_rmindex])) {
384
case 0x02:
385
case 0x03:
386
/* indirect call */
387
kpmd->flags |= KINST_F_CALL;
388
break;
389
case 0x04:
390
case 0x05:
391
/* indirect jump */
392
kpmd->flags |= KINST_F_JMP;
393
break;
394
}
395
}
396
397
/*
398
* If there's a ModR/M byte, we need to check it to see if the operand
399
* is %rip-relative, and rewrite the displacement if so. If not, we
400
* might still have to extract operand info if this is a call
401
* instruction.
402
*/
403
if (d86.d86_got_modrm) {
404
uint8_t mod, rm, sib;
405
406
kpmd->reg1 = kpmd->reg2 = -1;
407
408
modrm = bytes[d86.d86_rmindex];
409
mod = KINST_MODRM_MOD(modrm);
410
rm = KINST_MODRM_RM(modrm);
411
if (mod == 0 && rm == 5) {
412
kpmd->flags |= KINST_F_RIPREL;
413
dispoff = d86.d86_rmindex + 1;
414
kinst_set_disp32(kp, &bytes[dispoff]);
415
} else if ((kpmd->flags & KINST_F_CALL) != 0) {
416
bool havesib;
417
418
havesib = (mod != 3 && rm == 4);
419
dispoff = d86.d86_rmindex + (havesib ? 2 : 1);
420
if (mod == 1)
421
kinst_set_disp8(kp, bytes[dispoff]);
422
else if (mod == 2)
423
kinst_set_disp32(kp, &bytes[dispoff]);
424
else if (mod == 3)
425
kpmd->flags |= KINST_F_MOD_DIRECT;
426
427
if (havesib) {
428
sib = bytes[d86.d86_rmindex + 1];
429
if (KINST_SIB_BASE(sib) != 5) {
430
kpmd->reg1 = KINST_SIB_BASE(sib) |
431
(KINST_REX_B(rex) << 3);
432
}
433
kpmd->scale = KINST_SIB_SCALE(sib);
434
kpmd->reg2 = KINST_SIB_INDEX(sib) |
435
(KINST_REX_X(rex) << 3);
436
} else {
437
kpmd->reg1 = rm | (KINST_REX_B(rex) << 3);
438
}
439
}
440
}
441
442
/*
443
* Calls are emulated in software; once operands are decoded we have
444
* nothing else to do.
445
*/
446
if ((kpmd->flags & KINST_F_CALL) != 0)
447
return (0);
448
449
/*
450
* Allocate and populate an instruction trampoline template.
451
*
452
* Position-independent instructions can simply be copied, but
453
* position-dependent instructions require some surgery: jump
454
* instructions with an 8-bit displacement need to be converted to use a
455
* 32-bit displacement, and the adjusted displacement needs to be
456
* computed.
457
*/
458
ilen = kpmd->instlen;
459
if ((kpmd->flags & KINST_F_RIPREL) != 0) {
460
if ((kpmd->flags & KINST_F_JMP) == 0 ||
461
bytes[opcidx] == 0x0f ||
462
bytes[opcidx] == 0xe9 ||
463
bytes[opcidx] == 0xff) {
464
memcpy(kpmd->template, bytes, dispoff);
465
memcpy(&kpmd->template[dispoff + 4],
466
&bytes[dispoff + 4], ilen - (dispoff + 4));
467
kpmd->dispoff = dispoff;
468
} else if (bytes[opcidx] == 0xeb) {
469
memcpy(kpmd->template, bytes, opcidx);
470
kpmd->template[opcidx] = 0xe9;
471
kpmd->dispoff = opcidx + 1;
472
473
/* Instruction length changes from 2 to 5. */
474
kpmd->tinstlen = 5;
475
kpmd->disp -= 3;
476
} else if (bytes[opcidx] >= 0x70 && bytes[opcidx] <= 0x7f) {
477
memcpy(kpmd->template, bytes, opcidx);
478
kpmd->template[opcidx] = 0x0f;
479
kpmd->template[opcidx + 1] = bytes[opcidx] + 0x10;
480
kpmd->dispoff = opcidx + 2;
481
482
/* Instruction length changes from 2 to 6. */
483
kpmd->tinstlen = 6;
484
kpmd->disp -= 4;
485
} else {
486
panic("unhandled opcode %#x", bytes[opcidx]);
487
}
488
} else {
489
memcpy(kpmd->template, bytes, ilen);
490
}
491
492
return (0);
493
}
494
495
int
496
kinst_make_probe(linker_file_t lf, int symindx, linker_symval_t *symval,
497
void *opaque)
498
{
499
struct kinst_probe *kp;
500
dtrace_kinst_probedesc_t *pd;
501
const char *func;
502
int error, instrsize, n, off;
503
uint8_t *instr, *limit, *tmp;
504
bool push_found;
505
506
pd = opaque;
507
func = symval->name;
508
if (kinst_excluded(func))
509
return (0);
510
if (strcmp(func, pd->kpd_func) != 0)
511
return (0);
512
513
instr = (uint8_t *)symval->value;
514
limit = (uint8_t *)symval->value + symval->size;
515
if (instr >= limit)
516
return (0);
517
518
/*
519
* Refuse to instrument functions lacking the usual frame pointer
520
* manipulations since they might correspond to exception handlers.
521
*/
522
tmp = instr;
523
push_found = false;
524
while (tmp < limit) {
525
/*
526
* Checking for 'pop %rbp' as well makes the filtering too
527
* strict as it would skip functions that never return (e.g.,
528
* vnlru_proc()).
529
*/
530
if (*tmp == KINST_PUSHL_RBP) {
531
push_found = true;
532
break;
533
}
534
tmp += dtrace_instr_size(tmp);
535
}
536
if (!push_found)
537
return (0);
538
539
n = 0;
540
while (instr < limit) {
541
instrsize = dtrace_instr_size(instr);
542
off = (int)(instr - (uint8_t *)symval->value);
543
if (pd->kpd_off != -1 && off != pd->kpd_off) {
544
instr += instrsize;
545
continue;
546
}
547
548
/*
549
* Check for instructions which may enable interrupts. Such
550
* instructions are tricky to trace since it is unclear whether
551
* to use the per-thread or per-CPU trampolines. Since they are
552
* rare, we don't bother to implement special handling for them.
553
*
554
* If the caller specified an offset, return an error, otherwise
555
* silently ignore the instruction so that it remains possible
556
* to enable all instructions in a function.
557
*/
558
if (instrsize == 1 &&
559
(instr[0] == KINST_POPF || instr[0] == KINST_STI)) {
560
if (pd->kpd_off != -1)
561
return (EINVAL);
562
instr += instrsize;
563
continue;
564
}
565
566
/*
567
* Prevent separate dtrace(1) instances from creating copies of
568
* the same probe.
569
*/
570
LIST_FOREACH(kp, KINST_GETPROBE(instr), kp_hashnext) {
571
if (strcmp(kp->kp_func, func) == 0 &&
572
strtol(kp->kp_name, NULL, 10) == off)
573
return (0);
574
}
575
if (++n > KINST_PROBETAB_MAX) {
576
KINST_LOG("probe list full: %d entries", n);
577
return (ENOMEM);
578
}
579
kp = malloc(sizeof(struct kinst_probe), M_KINST,
580
M_WAITOK | M_ZERO);
581
kp->kp_func = func;
582
snprintf(kp->kp_name, sizeof(kp->kp_name), "%d", off);
583
kp->kp_savedval = *instr;
584
kp->kp_patchval = KINST_PATCHVAL;
585
kp->kp_patchpoint = instr;
586
587
error = kinst_instr_dissect(kp, &instr);
588
if (error != 0)
589
return (error);
590
591
kinst_probe_create(kp, lf);
592
}
593
594
return (0);
595
}
596
597
int
598
kinst_md_init(void)
599
{
600
uint8_t *tramp;
601
int cpu;
602
603
CPU_FOREACH(cpu) {
604
tramp = kinst_trampoline_alloc(M_WAITOK);
605
if (tramp == NULL)
606
return (ENOMEM);
607
DPCPU_ID_SET(cpu, intr_tramp, tramp);
608
}
609
610
return (0);
611
}
612
613
void
614
kinst_md_deinit(void)
615
{
616
uint8_t *tramp;
617
int cpu;
618
619
CPU_FOREACH(cpu) {
620
tramp = DPCPU_ID_GET(cpu, intr_tramp);
621
if (tramp != NULL) {
622
kinst_trampoline_dealloc(tramp);
623
DPCPU_ID_SET(cpu, intr_tramp, NULL);
624
}
625
}
626
}
627
628
/*
629
* Exclude machine-dependent functions that are not safe-to-trace.
630
*/
631
bool
632
kinst_md_excluded(const char *name)
633
{
634
return (false);
635
}
636
637