/*1* Copyright (C) 2008 Matt Fleming <[email protected]>2* Copyright (C) 2008 Paul Mundt <[email protected]>3*4* Code for replacing ftrace calls with jumps.5*6* Copyright (C) 2007-2008 Steven Rostedt <[email protected]>7*8* Thanks goes to Ingo Molnar, for suggesting the idea.9* Mathieu Desnoyers, for suggesting postponing the modifications.10* Arjan van de Ven, for keeping me straight, and explaining to me11* the dangers of modifying code on the run.12*/13#include <linux/uaccess.h>14#include <linux/ftrace.h>15#include <linux/string.h>16#include <linux/init.h>17#include <linux/io.h>18#include <linux/kernel.h>19#include <asm/ftrace.h>20#include <asm/cacheflush.h>21#include <asm/unistd.h>22#include <trace/syscall.h>2324#ifdef CONFIG_DYNAMIC_FTRACE25static unsigned char ftrace_replaced_code[MCOUNT_INSN_SIZE];2627static unsigned char ftrace_nop[4];28/*29* If we're trying to nop out a call to a function, we instead30* place a call to the address after the memory table.31*32* 8c011060 <a>:33* 8c011060: 02 d1 mov.l 8c01106c <a+0xc>,r134* 8c011062: 22 4f sts.l pr,@-r1535* 8c011064: 02 c7 mova 8c011070 <a+0x10>,r036* 8c011066: 2b 41 jmp @r137* 8c011068: 2a 40 lds r0,pr38* 8c01106a: 09 00 nop39* 8c01106c: 68 24 .word 0x2468 <--- ip40* 8c01106e: 1d 8c .word 0x8c1d41* 8c011070: 26 4f lds.l @r15+,pr <--- ip + MCOUNT_INSN_SIZE42*43* We write 0x8c011070 to 0x8c01106c so that on entry to a() we branch44* past the _mcount call and continue executing code like normal.45*/46static unsigned char *ftrace_nop_replace(unsigned long ip)47{48__raw_writel(ip + MCOUNT_INSN_SIZE, ftrace_nop);49return ftrace_nop;50}5152static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)53{54/* Place the address in the memory table. */55__raw_writel(addr, ftrace_replaced_code);5657/*58* No locking needed, this must be called via kstop_machine59* which in essence is like running on a uniprocessor machine.60*/61return ftrace_replaced_code;62}6364/*65* Modifying code must take extra care. On an SMP machine, if66* the code being modified is also being executed on another CPU67* that CPU will have undefined results and possibly take a GPF.68* We use kstop_machine to stop other CPUS from exectuing code.69* But this does not stop NMIs from happening. We still need70* to protect against that. We separate out the modification of71* the code to take care of this.72*73* Two buffers are added: An IP buffer and a "code" buffer.74*75* 1) Put the instruction pointer into the IP buffer76* and the new code into the "code" buffer.77* 2) Wait for any running NMIs to finish and set a flag that says78* we are modifying code, it is done in an atomic operation.79* 3) Write the code80* 4) clear the flag.81* 5) Wait for any running NMIs to finish.82*83* If an NMI is executed, the first thing it does is to call84* "ftrace_nmi_enter". This will check if the flag is set to write85* and if it is, it will write what is in the IP and "code" buffers.86*87* The trick is, it does not matter if everyone is writing the same88* content to the code location. Also, if a CPU is executing code89* it is OK to write to that code location if the contents being written90* are the same as what exists.91*/92#define MOD_CODE_WRITE_FLAG (1 << 31) /* set when NMI should do the write */93static atomic_t nmi_running = ATOMIC_INIT(0);94static int mod_code_status; /* holds return value of text write */95static void *mod_code_ip; /* holds the IP to write to */96static void *mod_code_newcode; /* holds the text to write to the IP */9798static unsigned nmi_wait_count;99static atomic_t nmi_update_count = ATOMIC_INIT(0);100101int ftrace_arch_read_dyn_info(char *buf, int size)102{103int r;104105r = snprintf(buf, size, "%u %u",106nmi_wait_count,107atomic_read(&nmi_update_count));108return r;109}110111static void clear_mod_flag(void)112{113int old = atomic_read(&nmi_running);114115for (;;) {116int new = old & ~MOD_CODE_WRITE_FLAG;117118if (old == new)119break;120121old = atomic_cmpxchg(&nmi_running, old, new);122}123}124125static void ftrace_mod_code(void)126{127/*128* Yes, more than one CPU process can be writing to mod_code_status.129* (and the code itself)130* But if one were to fail, then they all should, and if one were131* to succeed, then they all should.132*/133mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,134MCOUNT_INSN_SIZE);135136/* if we fail, then kill any new writers */137if (mod_code_status)138clear_mod_flag();139}140141void ftrace_nmi_enter(void)142{143if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) {144smp_rmb();145ftrace_mod_code();146atomic_inc(&nmi_update_count);147}148/* Must have previous changes seen before executions */149smp_mb();150}151152void ftrace_nmi_exit(void)153{154/* Finish all executions before clearing nmi_running */155smp_mb();156atomic_dec(&nmi_running);157}158159static void wait_for_nmi_and_set_mod_flag(void)160{161if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG))162return;163164do {165cpu_relax();166} while (atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG));167168nmi_wait_count++;169}170171static void wait_for_nmi(void)172{173if (!atomic_read(&nmi_running))174return;175176do {177cpu_relax();178} while (atomic_read(&nmi_running));179180nmi_wait_count++;181}182183static int184do_ftrace_mod_code(unsigned long ip, void *new_code)185{186mod_code_ip = (void *)ip;187mod_code_newcode = new_code;188189/* The buffers need to be visible before we let NMIs write them */190smp_mb();191192wait_for_nmi_and_set_mod_flag();193194/* Make sure all running NMIs have finished before we write the code */195smp_mb();196197ftrace_mod_code();198199/* Make sure the write happens before clearing the bit */200smp_mb();201202clear_mod_flag();203wait_for_nmi();204205return mod_code_status;206}207208static int ftrace_modify_code(unsigned long ip, unsigned char *old_code,209unsigned char *new_code)210{211unsigned char replaced[MCOUNT_INSN_SIZE];212213/*214* Note: Due to modules and __init, code can215* disappear and change, we need to protect against faulting216* as well as code changing. We do this by using the217* probe_kernel_* functions.218*219* No real locking needed, this code is run through220* kstop_machine, or before SMP starts.221*/222223/* read the text we want to modify */224if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))225return -EFAULT;226227/* Make sure it is what we expect it to be */228if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)229return -EINVAL;230231/* replace the text with the new text */232if (do_ftrace_mod_code(ip, new_code))233return -EPERM;234235flush_icache_range(ip, ip + MCOUNT_INSN_SIZE);236237return 0;238}239240int ftrace_update_ftrace_func(ftrace_func_t func)241{242unsigned long ip = (unsigned long)(&ftrace_call) + MCOUNT_INSN_OFFSET;243unsigned char old[MCOUNT_INSN_SIZE], *new;244245memcpy(old, (unsigned char *)ip, MCOUNT_INSN_SIZE);246new = ftrace_call_replace(ip, (unsigned long)func);247248return ftrace_modify_code(ip, old, new);249}250251int ftrace_make_nop(struct module *mod,252struct dyn_ftrace *rec, unsigned long addr)253{254unsigned char *new, *old;255unsigned long ip = rec->ip;256257old = ftrace_call_replace(ip, addr);258new = ftrace_nop_replace(ip);259260return ftrace_modify_code(rec->ip, old, new);261}262263int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)264{265unsigned char *new, *old;266unsigned long ip = rec->ip;267268old = ftrace_nop_replace(ip);269new = ftrace_call_replace(ip, addr);270271return ftrace_modify_code(rec->ip, old, new);272}273274int __init ftrace_dyn_arch_init(void *data)275{276/* The return code is retured via data */277__raw_writel(0, (unsigned long)data);278279return 0;280}281#endif /* CONFIG_DYNAMIC_FTRACE */282283#ifdef CONFIG_FUNCTION_GRAPH_TRACER284#ifdef CONFIG_DYNAMIC_FTRACE285extern void ftrace_graph_call(void);286287static int ftrace_mod(unsigned long ip, unsigned long old_addr,288unsigned long new_addr)289{290unsigned char code[MCOUNT_INSN_SIZE];291292if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE))293return -EFAULT;294295if (old_addr != __raw_readl((unsigned long *)code))296return -EINVAL;297298__raw_writel(new_addr, ip);299return 0;300}301302int ftrace_enable_ftrace_graph_caller(void)303{304unsigned long ip, old_addr, new_addr;305306ip = (unsigned long)(&ftrace_graph_call) + GRAPH_INSN_OFFSET;307old_addr = (unsigned long)(&skip_trace);308new_addr = (unsigned long)(&ftrace_graph_caller);309310return ftrace_mod(ip, old_addr, new_addr);311}312313int ftrace_disable_ftrace_graph_caller(void)314{315unsigned long ip, old_addr, new_addr;316317ip = (unsigned long)(&ftrace_graph_call) + GRAPH_INSN_OFFSET;318old_addr = (unsigned long)(&ftrace_graph_caller);319new_addr = (unsigned long)(&skip_trace);320321return ftrace_mod(ip, old_addr, new_addr);322}323#endif /* CONFIG_DYNAMIC_FTRACE */324325/*326* Hook the return address and push it in the stack of return addrs327* in the current thread info.328*329* This is the main routine for the function graph tracer. The function330* graph tracer essentially works like this:331*332* parent is the stack address containing self_addr's return address.333* We pull the real return address out of parent and store it in334* current's ret_stack. Then, we replace the return address on the stack335* with the address of return_to_handler. self_addr is the function that336* called mcount.337*338* When self_addr returns, it will jump to return_to_handler which calls339* ftrace_return_to_handler. ftrace_return_to_handler will pull the real340* return address off of current's ret_stack and jump to it.341*/342void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)343{344unsigned long old;345int faulted, err;346struct ftrace_graph_ent trace;347unsigned long return_hooker = (unsigned long)&return_to_handler;348349if (unlikely(atomic_read(¤t->tracing_graph_pause)))350return;351352/*353* Protect against fault, even if it shouldn't354* happen. This tool is too much intrusive to355* ignore such a protection.356*/357__asm__ __volatile__(358"1: \n\t"359"mov.l @%2, %0 \n\t"360"2: \n\t"361"mov.l %3, @%2 \n\t"362"mov #0, %1 \n\t"363"3: \n\t"364".section .fixup, \"ax\" \n\t"365"4: \n\t"366"mov.l 5f, %0 \n\t"367"jmp @%0 \n\t"368" mov #1, %1 \n\t"369".balign 4 \n\t"370"5: .long 3b \n\t"371".previous \n\t"372".section __ex_table,\"a\" \n\t"373".long 1b, 4b \n\t"374".long 2b, 4b \n\t"375".previous \n\t"376: "=&r" (old), "=r" (faulted)377: "r" (parent), "r" (return_hooker)378);379380if (unlikely(faulted)) {381ftrace_graph_stop();382WARN_ON(1);383return;384}385386err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0);387if (err == -EBUSY) {388__raw_writel(old, parent);389return;390}391392trace.func = self_addr;393394/* Only trace if the calling function expects to */395if (!ftrace_graph_entry(&trace)) {396current->curr_ret_stack--;397__raw_writel(old, parent);398}399}400#endif /* CONFIG_FUNCTION_GRAPH_TRACER */401402403