Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/kernel/ftrace.c
49220 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* Dynamic function tracing support.
4
*
5
* Copyright (C) 2007-2008 Steven Rostedt <[email protected]>
6
*
7
* Thanks goes to Ingo Molnar, for suggesting the idea.
8
* Mathieu Desnoyers, for suggesting postponing the modifications.
9
* Arjan van de Ven, for keeping me straight, and explaining to me
10
* the dangers of modifying code on the run.
11
*/
12
13
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
15
#include <linux/spinlock.h>
16
#include <linux/hardirq.h>
17
#include <linux/uaccess.h>
18
#include <linux/ftrace.h>
19
#include <linux/percpu.h>
20
#include <linux/sched.h>
21
#include <linux/slab.h>
22
#include <linux/init.h>
23
#include <linux/list.h>
24
#include <linux/module.h>
25
#include <linux/memory.h>
26
#include <linux/vmalloc.h>
27
#include <linux/set_memory.h>
28
#include <linux/execmem.h>
29
30
#include <trace/syscall.h>
31
32
#include <asm/kprobes.h>
33
#include <asm/ftrace.h>
34
#include <asm/nops.h>
35
#include <asm/text-patching.h>
36
37
#ifdef CONFIG_DYNAMIC_FTRACE
38
39
static int ftrace_poke_late = 0;
40
41
void ftrace_arch_code_modify_prepare(void)
42
__acquires(&text_mutex)
43
{
44
/*
45
* Need to grab text_mutex to prevent a race from module loading
46
* and live kernel patching from changing the text permissions while
47
* ftrace has it set to "read/write".
48
*/
49
mutex_lock(&text_mutex);
50
ftrace_poke_late = 1;
51
}
52
53
void ftrace_arch_code_modify_post_process(void)
54
__releases(&text_mutex)
55
{
56
/*
57
* ftrace_make_{call,nop}() may be called during
58
* module load, and we need to finish the smp_text_poke_batch_add()
59
* that they do, here.
60
*/
61
smp_text_poke_batch_finish();
62
ftrace_poke_late = 0;
63
mutex_unlock(&text_mutex);
64
}
65
66
static const char *ftrace_nop_replace(void)
67
{
68
return x86_nops[5];
69
}
70
71
static const char *ftrace_call_replace(unsigned long ip, unsigned long addr)
72
{
73
/*
74
* No need to translate into a callthunk. The trampoline does
75
* the depth accounting itself.
76
*/
77
if (ftrace_is_jmp(addr)) {
78
addr = ftrace_jmp_get(addr);
79
return text_gen_insn(JMP32_INSN_OPCODE, (void *)ip, (void *)addr);
80
} else {
81
return text_gen_insn(CALL_INSN_OPCODE, (void *)ip, (void *)addr);
82
}
83
}
84
85
static int ftrace_verify_code(unsigned long ip, const char *old_code)
86
{
87
char cur_code[MCOUNT_INSN_SIZE];
88
89
/*
90
* Note:
91
* We are paranoid about modifying text, as if a bug was to happen, it
92
* could cause us to read or write to someplace that could cause harm.
93
* Carefully read and modify the code with probe_kernel_*(), and make
94
* sure what we read is what we expected it to be before modifying it.
95
*/
96
/* read the text we want to modify */
97
if (copy_from_kernel_nofault(cur_code, (void *)ip, MCOUNT_INSN_SIZE)) {
98
WARN_ON(1);
99
return -EFAULT;
100
}
101
102
/* Make sure it is what we expect it to be */
103
if (memcmp(cur_code, old_code, MCOUNT_INSN_SIZE) != 0) {
104
ftrace_expected = old_code;
105
WARN_ON(1);
106
return -EINVAL;
107
}
108
109
return 0;
110
}
111
112
/*
113
* Marked __ref because it calls text_poke_early() which is .init.text. That is
114
* ok because that call will happen early, during boot, when .init sections are
115
* still present.
116
*/
117
static int __ref
118
ftrace_modify_code_direct(unsigned long ip, const char *old_code,
119
const char *new_code)
120
{
121
int ret = ftrace_verify_code(ip, old_code);
122
if (ret)
123
return ret;
124
125
/* replace the text with the new text */
126
if (ftrace_poke_late)
127
smp_text_poke_batch_add((void *)ip, new_code, MCOUNT_INSN_SIZE, NULL);
128
else
129
text_poke_early((void *)ip, new_code, MCOUNT_INSN_SIZE);
130
return 0;
131
}
132
133
int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr)
134
{
135
unsigned long ip = rec->ip;
136
const char *new, *old;
137
138
old = ftrace_call_replace(ip, addr);
139
new = ftrace_nop_replace();
140
141
/*
142
* On boot up, and when modules are loaded, the MCOUNT_ADDR
143
* is converted to a nop, and will never become MCOUNT_ADDR
144
* again. This code is either running before SMP (on boot up)
145
* or before the code will ever be executed (module load).
146
* We do not want to use the breakpoint version in this case,
147
* just modify the code directly.
148
*/
149
if (addr == MCOUNT_ADDR)
150
return ftrace_modify_code_direct(ip, old, new);
151
152
/*
153
* x86 overrides ftrace_replace_code -- this function will never be used
154
* in this case.
155
*/
156
WARN_ONCE(1, "invalid use of ftrace_make_nop");
157
return -EINVAL;
158
}
159
160
int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
161
{
162
unsigned long ip = rec->ip;
163
const char *new, *old;
164
165
old = ftrace_nop_replace();
166
new = ftrace_call_replace(ip, addr);
167
168
/* Should only be called when module is loaded */
169
return ftrace_modify_code_direct(rec->ip, old, new);
170
}
171
172
/*
173
* Should never be called:
174
* As it is only called by __ftrace_replace_code() which is called by
175
* ftrace_replace_code() that x86 overrides, and by ftrace_update_code()
176
* which is called to turn mcount into nops or nops into function calls
177
* but not to convert a function from not using regs to one that uses
178
* regs, which ftrace_modify_call() is for.
179
*/
180
int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
181
unsigned long addr)
182
{
183
WARN_ON(1);
184
return -EINVAL;
185
}
186
187
int ftrace_update_ftrace_func(ftrace_func_t func)
188
{
189
unsigned long ip;
190
const char *new;
191
192
ip = (unsigned long)(&ftrace_call);
193
new = ftrace_call_replace(ip, (unsigned long)func);
194
smp_text_poke_single((void *)ip, new, MCOUNT_INSN_SIZE, NULL);
195
196
ip = (unsigned long)(&ftrace_regs_call);
197
new = ftrace_call_replace(ip, (unsigned long)func);
198
smp_text_poke_single((void *)ip, new, MCOUNT_INSN_SIZE, NULL);
199
200
return 0;
201
}
202
203
void ftrace_replace_code(int enable)
204
{
205
struct ftrace_rec_iter *iter;
206
struct dyn_ftrace *rec;
207
const char *new, *old;
208
int ret;
209
210
for_ftrace_rec_iter(iter) {
211
rec = ftrace_rec_iter_record(iter);
212
213
switch (ftrace_test_record(rec, enable)) {
214
case FTRACE_UPDATE_IGNORE:
215
default:
216
continue;
217
218
case FTRACE_UPDATE_MAKE_CALL:
219
old = ftrace_nop_replace();
220
break;
221
222
case FTRACE_UPDATE_MODIFY_CALL:
223
case FTRACE_UPDATE_MAKE_NOP:
224
old = ftrace_call_replace(rec->ip, ftrace_get_addr_curr(rec));
225
break;
226
}
227
228
ret = ftrace_verify_code(rec->ip, old);
229
if (ret) {
230
ftrace_expected = old;
231
ftrace_bug(ret, rec);
232
ftrace_expected = NULL;
233
return;
234
}
235
}
236
237
for_ftrace_rec_iter(iter) {
238
rec = ftrace_rec_iter_record(iter);
239
240
switch (ftrace_test_record(rec, enable)) {
241
case FTRACE_UPDATE_IGNORE:
242
default:
243
continue;
244
245
case FTRACE_UPDATE_MAKE_CALL:
246
case FTRACE_UPDATE_MODIFY_CALL:
247
new = ftrace_call_replace(rec->ip, ftrace_get_addr_new(rec));
248
break;
249
250
case FTRACE_UPDATE_MAKE_NOP:
251
new = ftrace_nop_replace();
252
break;
253
}
254
255
smp_text_poke_batch_add((void *)rec->ip, new, MCOUNT_INSN_SIZE, NULL);
256
ftrace_update_record(rec, enable);
257
}
258
smp_text_poke_batch_finish();
259
}
260
261
void arch_ftrace_update_code(int command)
262
{
263
ftrace_modify_all_code(command);
264
}
265
266
/* Currently only x86_64 supports dynamic trampolines */
267
#ifdef CONFIG_X86_64
268
269
static inline void *alloc_tramp(unsigned long size)
270
{
271
return execmem_alloc_rw(EXECMEM_FTRACE, size);
272
}
273
static inline void tramp_free(void *tramp)
274
{
275
execmem_free(tramp);
276
}
277
278
/* Defined as markers to the end of the ftrace default trampolines */
279
extern void ftrace_regs_caller_end(void);
280
extern void ftrace_caller_end(void);
281
extern void ftrace_caller_op_ptr(void);
282
extern void ftrace_regs_caller_op_ptr(void);
283
extern void ftrace_regs_caller_jmp(void);
284
285
/* movq function_trace_op(%rip), %rdx */
286
/* 0x48 0x8b 0x15 <offset-to-ftrace_trace_op (4 bytes)> */
287
#define OP_REF_SIZE 7
288
289
/*
290
* The ftrace_ops is passed to the function callback. Since the
291
* trampoline only services a single ftrace_ops, we can pass in
292
* that ops directly.
293
*
294
* The ftrace_op_code_union is used to create a pointer to the
295
* ftrace_ops that will be passed to the callback function.
296
*/
297
union ftrace_op_code_union {
298
char code[OP_REF_SIZE];
299
struct {
300
char op[3];
301
int offset;
302
} __attribute__((packed));
303
};
304
305
#define RET_SIZE \
306
(IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) ? 5 : 1 + IS_ENABLED(CONFIG_MITIGATION_SLS))
307
308
static unsigned long
309
create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
310
{
311
unsigned long start_offset;
312
unsigned long end_offset;
313
unsigned long op_offset;
314
unsigned long call_offset;
315
unsigned long jmp_offset;
316
unsigned long offset;
317
unsigned long npages;
318
unsigned long size;
319
unsigned long *ptr;
320
void *trampoline;
321
void *ip, *dest;
322
/* 48 8b 15 <offset> is movq <offset>(%rip), %rdx */
323
unsigned const char op_ref[] = { 0x48, 0x8b, 0x15 };
324
unsigned const char retq[] = { RET_INSN_OPCODE, INT3_INSN_OPCODE };
325
union ftrace_op_code_union op_ptr;
326
int ret;
327
328
if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
329
start_offset = (unsigned long)ftrace_regs_caller;
330
end_offset = (unsigned long)ftrace_regs_caller_end;
331
op_offset = (unsigned long)ftrace_regs_caller_op_ptr;
332
call_offset = (unsigned long)ftrace_regs_call;
333
jmp_offset = (unsigned long)ftrace_regs_caller_jmp;
334
} else {
335
start_offset = (unsigned long)ftrace_caller;
336
end_offset = (unsigned long)ftrace_caller_end;
337
op_offset = (unsigned long)ftrace_caller_op_ptr;
338
call_offset = (unsigned long)ftrace_call;
339
jmp_offset = 0;
340
}
341
342
size = end_offset - start_offset;
343
344
/*
345
* Allocate enough size to store the ftrace_caller code,
346
* the iret , as well as the address of the ftrace_ops this
347
* trampoline is used for.
348
*/
349
trampoline = alloc_tramp(size + RET_SIZE + sizeof(void *));
350
if (!trampoline)
351
return 0;
352
353
*tramp_size = size + RET_SIZE + sizeof(void *);
354
npages = DIV_ROUND_UP(*tramp_size, PAGE_SIZE);
355
356
/* Copy ftrace_caller onto the trampoline memory */
357
ret = copy_from_kernel_nofault(trampoline, (void *)start_offset, size);
358
if (WARN_ON(ret < 0))
359
goto fail;
360
361
ip = trampoline + size;
362
if (cpu_wants_rethunk_at(ip))
363
__text_gen_insn(ip, JMP32_INSN_OPCODE, ip, x86_return_thunk, JMP32_INSN_SIZE);
364
else
365
memcpy(ip, retq, sizeof(retq));
366
367
/* No need to test direct calls on created trampolines */
368
if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
369
/* NOP the jnz 1f; but make sure it's a 2 byte jnz */
370
ip = trampoline + (jmp_offset - start_offset);
371
if (WARN_ON(*(char *)ip != 0x75))
372
goto fail;
373
ret = copy_from_kernel_nofault(ip, x86_nops[2], 2);
374
if (ret < 0)
375
goto fail;
376
}
377
378
/*
379
* The address of the ftrace_ops that is used for this trampoline
380
* is stored at the end of the trampoline. This will be used to
381
* load the third parameter for the callback. Basically, that
382
* location at the end of the trampoline takes the place of
383
* the global function_trace_op variable.
384
*/
385
386
ptr = (unsigned long *)(trampoline + size + RET_SIZE);
387
*ptr = (unsigned long)ops;
388
389
op_offset -= start_offset;
390
memcpy(&op_ptr, trampoline + op_offset, OP_REF_SIZE);
391
392
/* Are we pointing to the reference? */
393
if (WARN_ON(memcmp(op_ptr.op, op_ref, 3) != 0))
394
goto fail;
395
396
/* Load the contents of ptr into the callback parameter */
397
offset = (unsigned long)ptr;
398
offset -= (unsigned long)trampoline + op_offset + OP_REF_SIZE;
399
400
op_ptr.offset = offset;
401
402
/* put in the new offset to the ftrace_ops */
403
memcpy(trampoline + op_offset, &op_ptr, OP_REF_SIZE);
404
405
/* put in the call to the function */
406
mutex_lock(&text_mutex);
407
call_offset -= start_offset;
408
/*
409
* No need to translate into a callthunk. The trampoline does
410
* the depth accounting before the call already.
411
*/
412
dest = ftrace_ops_get_func(ops);
413
memcpy(trampoline + call_offset,
414
text_gen_insn(CALL_INSN_OPCODE, trampoline + call_offset, dest),
415
CALL_INSN_SIZE);
416
mutex_unlock(&text_mutex);
417
418
/* ALLOC_TRAMP flags lets us know we created it */
419
ops->flags |= FTRACE_OPS_FL_ALLOC_TRAMP;
420
421
set_memory_rox((unsigned long)trampoline, npages);
422
return (unsigned long)trampoline;
423
fail:
424
tramp_free(trampoline);
425
return 0;
426
}
427
428
void set_ftrace_ops_ro(void)
429
{
430
struct ftrace_ops *ops;
431
unsigned long start_offset;
432
unsigned long end_offset;
433
unsigned long npages;
434
unsigned long size;
435
436
do_for_each_ftrace_op(ops, ftrace_ops_list) {
437
if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))
438
continue;
439
440
if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
441
start_offset = (unsigned long)ftrace_regs_caller;
442
end_offset = (unsigned long)ftrace_regs_caller_end;
443
} else {
444
start_offset = (unsigned long)ftrace_caller;
445
end_offset = (unsigned long)ftrace_caller_end;
446
}
447
size = end_offset - start_offset;
448
size = size + RET_SIZE + sizeof(void *);
449
npages = DIV_ROUND_UP(size, PAGE_SIZE);
450
set_memory_ro((unsigned long)ops->trampoline, npages);
451
} while_for_each_ftrace_op(ops);
452
}
453
454
static unsigned long calc_trampoline_call_offset(bool save_regs)
455
{
456
unsigned long start_offset;
457
unsigned long call_offset;
458
459
if (save_regs) {
460
start_offset = (unsigned long)ftrace_regs_caller;
461
call_offset = (unsigned long)ftrace_regs_call;
462
} else {
463
start_offset = (unsigned long)ftrace_caller;
464
call_offset = (unsigned long)ftrace_call;
465
}
466
467
return call_offset - start_offset;
468
}
469
470
void arch_ftrace_update_trampoline(struct ftrace_ops *ops)
471
{
472
ftrace_func_t func;
473
unsigned long offset;
474
unsigned long ip;
475
unsigned int size;
476
const char *new;
477
478
if (!ops->trampoline) {
479
ops->trampoline = create_trampoline(ops, &size);
480
if (!ops->trampoline)
481
return;
482
ops->trampoline_size = size;
483
return;
484
}
485
486
/*
487
* The ftrace_ops caller may set up its own trampoline.
488
* In such a case, this code must not modify it.
489
*/
490
if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))
491
return;
492
493
offset = calc_trampoline_call_offset(ops->flags & FTRACE_OPS_FL_SAVE_REGS);
494
ip = ops->trampoline + offset;
495
func = ftrace_ops_get_func(ops);
496
497
mutex_lock(&text_mutex);
498
/* Do a safe modify in case the trampoline is executing */
499
new = ftrace_call_replace(ip, (unsigned long)func);
500
smp_text_poke_single((void *)ip, new, MCOUNT_INSN_SIZE, NULL);
501
mutex_unlock(&text_mutex);
502
}
503
504
/* Return the address of the function the trampoline calls */
505
static void *addr_from_call(void *ptr)
506
{
507
union text_poke_insn call;
508
int ret;
509
510
ret = copy_from_kernel_nofault(&call, ptr, CALL_INSN_SIZE);
511
if (WARN_ON_ONCE(ret < 0))
512
return NULL;
513
514
/* Make sure this is a call */
515
if (WARN_ON_ONCE(call.opcode != CALL_INSN_OPCODE)) {
516
pr_warn("Expected E8, got %x\n", call.opcode);
517
return NULL;
518
}
519
520
return ptr + CALL_INSN_SIZE + call.disp;
521
}
522
523
/*
524
* If the ops->trampoline was not allocated, then it probably
525
* has a static trampoline func, or is the ftrace caller itself.
526
*/
527
static void *static_tramp_func(struct ftrace_ops *ops, struct dyn_ftrace *rec)
528
{
529
unsigned long offset;
530
bool save_regs = rec->flags & FTRACE_FL_REGS_EN;
531
void *ptr;
532
533
if (ops && ops->trampoline) {
534
#if !defined(CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS) && \
535
defined(CONFIG_FUNCTION_GRAPH_TRACER)
536
/*
537
* We only know about function graph tracer setting as static
538
* trampoline.
539
*/
540
if (ops->trampoline == FTRACE_GRAPH_ADDR)
541
return (void *)prepare_ftrace_return;
542
#endif
543
return NULL;
544
}
545
546
offset = calc_trampoline_call_offset(save_regs);
547
548
if (save_regs)
549
ptr = (void *)FTRACE_REGS_ADDR + offset;
550
else
551
ptr = (void *)FTRACE_ADDR + offset;
552
553
return addr_from_call(ptr);
554
}
555
556
void *arch_ftrace_trampoline_func(struct ftrace_ops *ops, struct dyn_ftrace *rec)
557
{
558
unsigned long offset;
559
560
/* If we didn't allocate this trampoline, consider it static */
561
if (!ops || !(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))
562
return static_tramp_func(ops, rec);
563
564
offset = calc_trampoline_call_offset(ops->flags & FTRACE_OPS_FL_SAVE_REGS);
565
return addr_from_call((void *)ops->trampoline + offset);
566
}
567
568
void arch_ftrace_trampoline_free(struct ftrace_ops *ops)
569
{
570
if (!ops || !(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))
571
return;
572
573
tramp_free((void *)ops->trampoline);
574
ops->trampoline = 0;
575
}
576
577
#endif /* CONFIG_X86_64 */
578
#endif /* CONFIG_DYNAMIC_FTRACE */
579
580
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
581
582
#if defined(CONFIG_DYNAMIC_FTRACE) && !defined(CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS)
583
extern void ftrace_graph_call(void);
584
static const char *ftrace_jmp_replace(unsigned long ip, unsigned long addr)
585
{
586
return text_gen_insn(JMP32_INSN_OPCODE, (void *)ip, (void *)addr);
587
}
588
589
static int ftrace_mod_jmp(unsigned long ip, void *func)
590
{
591
const char *new;
592
593
new = ftrace_jmp_replace(ip, (unsigned long)func);
594
smp_text_poke_single((void *)ip, new, MCOUNT_INSN_SIZE, NULL);
595
return 0;
596
}
597
598
int ftrace_enable_ftrace_graph_caller(void)
599
{
600
unsigned long ip = (unsigned long)(&ftrace_graph_call);
601
602
return ftrace_mod_jmp(ip, &ftrace_graph_caller);
603
}
604
605
int ftrace_disable_ftrace_graph_caller(void)
606
{
607
unsigned long ip = (unsigned long)(&ftrace_graph_call);
608
609
return ftrace_mod_jmp(ip, &ftrace_stub);
610
}
611
#endif /* CONFIG_DYNAMIC_FTRACE && !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */
612
613
static inline bool skip_ftrace_return(void)
614
{
615
/*
616
* When resuming from suspend-to-ram, this function can be indirectly
617
* called from early CPU startup code while the CPU is in real mode,
618
* which would fail miserably. Make sure the stack pointer is a
619
* virtual address.
620
*
621
* This check isn't as accurate as virt_addr_valid(), but it should be
622
* good enough for this purpose, and it's fast.
623
*/
624
if ((long)__builtin_frame_address(0) >= 0)
625
return true;
626
627
if (ftrace_graph_is_dead())
628
return true;
629
630
if (atomic_read(&current->tracing_graph_pause))
631
return true;
632
return false;
633
}
634
635
/*
636
* Hook the return address and push it in the stack of return addrs
637
* in current thread info.
638
*/
639
void prepare_ftrace_return(unsigned long ip, unsigned long *parent,
640
unsigned long frame_pointer)
641
{
642
unsigned long return_hooker = (unsigned long)&return_to_handler;
643
644
if (unlikely(skip_ftrace_return()))
645
return;
646
647
if (!function_graph_enter(*parent, ip, frame_pointer, parent))
648
*parent = return_hooker;
649
}
650
651
#ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS
652
void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
653
struct ftrace_ops *op, struct ftrace_regs *fregs)
654
{
655
struct pt_regs *regs = &arch_ftrace_regs(fregs)->regs;
656
unsigned long *stack = (unsigned long *)kernel_stack_pointer(regs);
657
unsigned long return_hooker = (unsigned long)&return_to_handler;
658
unsigned long *parent = (unsigned long *)stack;
659
660
if (unlikely(skip_ftrace_return()))
661
return;
662
663
664
if (!function_graph_enter_regs(*parent, ip, 0, parent, fregs))
665
*parent = return_hooker;
666
}
667
#endif
668
669
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
670
671