Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/x86/kernel/ftrace.c
10817 views
1
/*
2
* Code for replacing ftrace calls with jumps.
3
*
4
* Copyright (C) 2007-2008 Steven Rostedt <[email protected]>
5
*
6
* Thanks goes to Ingo Molnar, for suggesting the idea.
7
* Mathieu Desnoyers, for suggesting postponing the modifications.
8
* Arjan van de Ven, for keeping me straight, and explaining to me
9
* the dangers of modifying code on the run.
10
*/
11
12
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13
14
#include <linux/spinlock.h>
15
#include <linux/hardirq.h>
16
#include <linux/uaccess.h>
17
#include <linux/ftrace.h>
18
#include <linux/percpu.h>
19
#include <linux/sched.h>
20
#include <linux/init.h>
21
#include <linux/list.h>
22
#include <linux/module.h>
23
24
#include <trace/syscall.h>
25
26
#include <asm/cacheflush.h>
27
#include <asm/ftrace.h>
28
#include <asm/nops.h>
29
#include <asm/nmi.h>
30
31
32
#ifdef CONFIG_DYNAMIC_FTRACE
33
34
/*
35
* modifying_code is set to notify NMIs that they need to use
36
* memory barriers when entering or exiting. But we don't want
37
* to burden NMIs with unnecessary memory barriers when code
38
* modification is not being done (which is most of the time).
39
*
40
* A mutex is already held when ftrace_arch_code_modify_prepare
41
* and post_process are called. No locks need to be taken here.
42
*
43
* Stop machine will make sure currently running NMIs are done
44
* and new NMIs will see the updated variable before we need
45
* to worry about NMIs doing memory barriers.
46
*/
47
static int modifying_code __read_mostly;
48
static DEFINE_PER_CPU(int, save_modifying_code);
49
50
int ftrace_arch_code_modify_prepare(void)
51
{
52
set_kernel_text_rw();
53
set_all_modules_text_rw();
54
modifying_code = 1;
55
return 0;
56
}
57
58
int ftrace_arch_code_modify_post_process(void)
59
{
60
modifying_code = 0;
61
set_all_modules_text_ro();
62
set_kernel_text_ro();
63
return 0;
64
}
65
66
union ftrace_code_union {
67
char code[MCOUNT_INSN_SIZE];
68
struct {
69
char e8;
70
int offset;
71
} __attribute__((packed));
72
};
73
74
static int ftrace_calc_offset(long ip, long addr)
75
{
76
return (int)(addr - ip);
77
}
78
79
static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
80
{
81
static union ftrace_code_union calc;
82
83
calc.e8 = 0xe8;
84
calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr);
85
86
/*
87
* No locking needed, this must be called via kstop_machine
88
* which in essence is like running on a uniprocessor machine.
89
*/
90
return calc.code;
91
}
92
93
/*
94
* Modifying code must take extra care. On an SMP machine, if
95
* the code being modified is also being executed on another CPU
96
* that CPU will have undefined results and possibly take a GPF.
97
* We use kstop_machine to stop other CPUS from exectuing code.
98
* But this does not stop NMIs from happening. We still need
99
* to protect against that. We separate out the modification of
100
* the code to take care of this.
101
*
102
* Two buffers are added: An IP buffer and a "code" buffer.
103
*
104
* 1) Put the instruction pointer into the IP buffer
105
* and the new code into the "code" buffer.
106
* 2) Wait for any running NMIs to finish and set a flag that says
107
* we are modifying code, it is done in an atomic operation.
108
* 3) Write the code
109
* 4) clear the flag.
110
* 5) Wait for any running NMIs to finish.
111
*
112
* If an NMI is executed, the first thing it does is to call
113
* "ftrace_nmi_enter". This will check if the flag is set to write
114
* and if it is, it will write what is in the IP and "code" buffers.
115
*
116
* The trick is, it does not matter if everyone is writing the same
117
* content to the code location. Also, if a CPU is executing code
118
* it is OK to write to that code location if the contents being written
119
* are the same as what exists.
120
*/
121
122
#define MOD_CODE_WRITE_FLAG (1 << 31) /* set when NMI should do the write */
123
static atomic_t nmi_running = ATOMIC_INIT(0);
124
static int mod_code_status; /* holds return value of text write */
125
static void *mod_code_ip; /* holds the IP to write to */
126
static const void *mod_code_newcode; /* holds the text to write to the IP */
127
128
static unsigned nmi_wait_count;
129
static atomic_t nmi_update_count = ATOMIC_INIT(0);
130
131
int ftrace_arch_read_dyn_info(char *buf, int size)
132
{
133
int r;
134
135
r = snprintf(buf, size, "%u %u",
136
nmi_wait_count,
137
atomic_read(&nmi_update_count));
138
return r;
139
}
140
141
static void clear_mod_flag(void)
142
{
143
int old = atomic_read(&nmi_running);
144
145
for (;;) {
146
int new = old & ~MOD_CODE_WRITE_FLAG;
147
148
if (old == new)
149
break;
150
151
old = atomic_cmpxchg(&nmi_running, old, new);
152
}
153
}
154
155
static void ftrace_mod_code(void)
156
{
157
/*
158
* Yes, more than one CPU process can be writing to mod_code_status.
159
* (and the code itself)
160
* But if one were to fail, then they all should, and if one were
161
* to succeed, then they all should.
162
*/
163
mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
164
MCOUNT_INSN_SIZE);
165
166
/* if we fail, then kill any new writers */
167
if (mod_code_status)
168
clear_mod_flag();
169
}
170
171
void ftrace_nmi_enter(void)
172
{
173
__this_cpu_write(save_modifying_code, modifying_code);
174
175
if (!__this_cpu_read(save_modifying_code))
176
return;
177
178
if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) {
179
smp_rmb();
180
ftrace_mod_code();
181
atomic_inc(&nmi_update_count);
182
}
183
/* Must have previous changes seen before executions */
184
smp_mb();
185
}
186
187
void ftrace_nmi_exit(void)
188
{
189
if (!__this_cpu_read(save_modifying_code))
190
return;
191
192
/* Finish all executions before clearing nmi_running */
193
smp_mb();
194
atomic_dec(&nmi_running);
195
}
196
197
static void wait_for_nmi_and_set_mod_flag(void)
198
{
199
if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG))
200
return;
201
202
do {
203
cpu_relax();
204
} while (atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG));
205
206
nmi_wait_count++;
207
}
208
209
static void wait_for_nmi(void)
210
{
211
if (!atomic_read(&nmi_running))
212
return;
213
214
do {
215
cpu_relax();
216
} while (atomic_read(&nmi_running));
217
218
nmi_wait_count++;
219
}
220
221
static inline int
222
within(unsigned long addr, unsigned long start, unsigned long end)
223
{
224
return addr >= start && addr < end;
225
}
226
227
static int
228
do_ftrace_mod_code(unsigned long ip, const void *new_code)
229
{
230
/*
231
* On x86_64, kernel text mappings are mapped read-only with
232
* CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead
233
* of the kernel text mapping to modify the kernel text.
234
*
235
* For 32bit kernels, these mappings are same and we can use
236
* kernel identity mapping to modify code.
237
*/
238
if (within(ip, (unsigned long)_text, (unsigned long)_etext))
239
ip = (unsigned long)__va(__pa(ip));
240
241
mod_code_ip = (void *)ip;
242
mod_code_newcode = new_code;
243
244
/* The buffers need to be visible before we let NMIs write them */
245
smp_mb();
246
247
wait_for_nmi_and_set_mod_flag();
248
249
/* Make sure all running NMIs have finished before we write the code */
250
smp_mb();
251
252
ftrace_mod_code();
253
254
/* Make sure the write happens before clearing the bit */
255
smp_mb();
256
257
clear_mod_flag();
258
wait_for_nmi();
259
260
return mod_code_status;
261
}
262
263
static const unsigned char *ftrace_nop_replace(void)
264
{
265
return ideal_nops[NOP_ATOMIC5];
266
}
267
268
static int
269
ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
270
unsigned const char *new_code)
271
{
272
unsigned char replaced[MCOUNT_INSN_SIZE];
273
274
/*
275
* Note: Due to modules and __init, code can
276
* disappear and change, we need to protect against faulting
277
* as well as code changing. We do this by using the
278
* probe_kernel_* functions.
279
*
280
* No real locking needed, this code is run through
281
* kstop_machine, or before SMP starts.
282
*/
283
284
/* read the text we want to modify */
285
if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
286
return -EFAULT;
287
288
/* Make sure it is what we expect it to be */
289
if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
290
return -EINVAL;
291
292
/* replace the text with the new text */
293
if (do_ftrace_mod_code(ip, new_code))
294
return -EPERM;
295
296
sync_core();
297
298
return 0;
299
}
300
301
int ftrace_make_nop(struct module *mod,
302
struct dyn_ftrace *rec, unsigned long addr)
303
{
304
unsigned const char *new, *old;
305
unsigned long ip = rec->ip;
306
307
old = ftrace_call_replace(ip, addr);
308
new = ftrace_nop_replace();
309
310
return ftrace_modify_code(rec->ip, old, new);
311
}
312
313
int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
314
{
315
unsigned const char *new, *old;
316
unsigned long ip = rec->ip;
317
318
old = ftrace_nop_replace();
319
new = ftrace_call_replace(ip, addr);
320
321
return ftrace_modify_code(rec->ip, old, new);
322
}
323
324
int ftrace_update_ftrace_func(ftrace_func_t func)
325
{
326
unsigned long ip = (unsigned long)(&ftrace_call);
327
unsigned char old[MCOUNT_INSN_SIZE], *new;
328
int ret;
329
330
memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE);
331
new = ftrace_call_replace(ip, (unsigned long)func);
332
ret = ftrace_modify_code(ip, old, new);
333
334
return ret;
335
}
336
337
int __init ftrace_dyn_arch_init(void *data)
338
{
339
/* The return code is retured via data */
340
*(unsigned long *)data = 0;
341
342
return 0;
343
}
344
#endif
345
346
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
347
348
#ifdef CONFIG_DYNAMIC_FTRACE
349
extern void ftrace_graph_call(void);
350
351
static int ftrace_mod_jmp(unsigned long ip,
352
int old_offset, int new_offset)
353
{
354
unsigned char code[MCOUNT_INSN_SIZE];
355
356
if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE))
357
return -EFAULT;
358
359
if (code[0] != 0xe9 || old_offset != *(int *)(&code[1]))
360
return -EINVAL;
361
362
*(int *)(&code[1]) = new_offset;
363
364
if (do_ftrace_mod_code(ip, &code))
365
return -EPERM;
366
367
return 0;
368
}
369
370
int ftrace_enable_ftrace_graph_caller(void)
371
{
372
unsigned long ip = (unsigned long)(&ftrace_graph_call);
373
int old_offset, new_offset;
374
375
old_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE);
376
new_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE);
377
378
return ftrace_mod_jmp(ip, old_offset, new_offset);
379
}
380
381
int ftrace_disable_ftrace_graph_caller(void)
382
{
383
unsigned long ip = (unsigned long)(&ftrace_graph_call);
384
int old_offset, new_offset;
385
386
old_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE);
387
new_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE);
388
389
return ftrace_mod_jmp(ip, old_offset, new_offset);
390
}
391
392
#endif /* !CONFIG_DYNAMIC_FTRACE */
393
394
/*
395
* Hook the return address and push it in the stack of return addrs
396
* in current thread info.
397
*/
398
void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
399
unsigned long frame_pointer)
400
{
401
unsigned long old;
402
int faulted;
403
struct ftrace_graph_ent trace;
404
unsigned long return_hooker = (unsigned long)
405
&return_to_handler;
406
407
if (unlikely(atomic_read(&current->tracing_graph_pause)))
408
return;
409
410
/*
411
* Protect against fault, even if it shouldn't
412
* happen. This tool is too much intrusive to
413
* ignore such a protection.
414
*/
415
asm volatile(
416
"1: " _ASM_MOV " (%[parent]), %[old]\n"
417
"2: " _ASM_MOV " %[return_hooker], (%[parent])\n"
418
" movl $0, %[faulted]\n"
419
"3:\n"
420
421
".section .fixup, \"ax\"\n"
422
"4: movl $1, %[faulted]\n"
423
" jmp 3b\n"
424
".previous\n"
425
426
_ASM_EXTABLE(1b, 4b)
427
_ASM_EXTABLE(2b, 4b)
428
429
: [old] "=&r" (old), [faulted] "=r" (faulted)
430
: [parent] "r" (parent), [return_hooker] "r" (return_hooker)
431
: "memory"
432
);
433
434
if (unlikely(faulted)) {
435
ftrace_graph_stop();
436
WARN_ON(1);
437
return;
438
}
439
440
trace.func = self_addr;
441
trace.depth = current->curr_ret_stack + 1;
442
443
/* Only trace if the calling function expects to */
444
if (!ftrace_graph_entry(&trace)) {
445
*parent = old;
446
return;
447
}
448
449
if (ftrace_push_return_trace(old, self_addr, &trace.depth,
450
frame_pointer) == -EBUSY) {
451
*parent = old;
452
return;
453
}
454
}
455
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
456
457