/*1* linux/arch/x86_64/entry.S2*3* Copyright (C) 1991, 1992 Linus Torvalds4* Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs5* Copyright (C) 2000 Pavel Machek <[email protected]>6*/78/*9* entry.S contains the system-call and fault low-level handling routines.10*11* NOTE: This code handles signal-recognition, which happens every time12* after an interrupt and after each system call.13*14* Normal syscalls and interrupts don't save a full stack frame, this is15* only done for syscall tracing, signals or fork/exec et.al.16*17* A note on terminology:18* - top of stack: Architecture defined interrupt frame from SS to RIP19* at the top of the kernel process stack.20* - partial stack frame: partially saved registers up to R11.21* - full stack frame: Like partial stack frame, but all register saved.22*23* Some macro usage:24* - CFI macros are used to generate dwarf2 unwind information for better25* backtraces. They don't change any code.26* - SAVE_ALL/RESTORE_ALL - Save/restore all registers27* - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.28* There are unfortunately lots of special cases where some registers29* not touched. The macro is a big mess that should be cleaned up.30* - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.31* Gives a full stack frame.32* - ENTRY/END Define functions in the symbol table.33* - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack34* frame that is otherwise undefined after a SYSCALL35* - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.36* - errorentry/paranoidentry/zeroentry - Define exception entry points.37*/3839#include <linux/linkage.h>40#include <asm/segment.h>41#include <asm/cache.h>42#include <asm/errno.h>43#include <asm/dwarf2.h>44#include <asm/calling.h>45#include <asm/asm-offsets.h>46#include <asm/msr.h>47#include <asm/unistd.h>48#include <asm/thread_info.h>49#include <asm/hw_irq.h>50#include <asm/page_types.h>51#include <asm/irqflags.h>52#include <asm/paravirt.h>53#include <asm/ftrace.h>54#include <asm/percpu.h>5556/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */57#include <linux/elf-em.h>58#define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)59#define __AUDIT_ARCH_64BIT 0x8000000060#define __AUDIT_ARCH_LE 0x400000006162.code6463.section .entry.text, "ax"6465#ifdef CONFIG_FUNCTION_TRACER66#ifdef CONFIG_DYNAMIC_FTRACE67ENTRY(mcount)68retq69END(mcount)7071ENTRY(ftrace_caller)72cmpl $0, function_trace_stop73jne ftrace_stub7475MCOUNT_SAVE_FRAME7677movq 0x38(%rsp), %rdi78movq 8(%rbp), %rsi79subq $MCOUNT_INSN_SIZE, %rdi8081GLOBAL(ftrace_call)82call ftrace_stub8384MCOUNT_RESTORE_FRAME8586#ifdef CONFIG_FUNCTION_GRAPH_TRACER87GLOBAL(ftrace_graph_call)88jmp ftrace_stub89#endif9091GLOBAL(ftrace_stub)92retq93END(ftrace_caller)9495#else /* ! CONFIG_DYNAMIC_FTRACE */96ENTRY(mcount)97cmpl $0, function_trace_stop98jne ftrace_stub99100cmpq $ftrace_stub, ftrace_trace_function101jnz trace102103#ifdef CONFIG_FUNCTION_GRAPH_TRACER104cmpq $ftrace_stub, ftrace_graph_return105jnz ftrace_graph_caller106107cmpq $ftrace_graph_entry_stub, ftrace_graph_entry108jnz ftrace_graph_caller109#endif110111GLOBAL(ftrace_stub)112retq113114trace:115MCOUNT_SAVE_FRAME116117movq 0x38(%rsp), %rdi118movq 8(%rbp), %rsi119subq $MCOUNT_INSN_SIZE, %rdi120121call *ftrace_trace_function122123MCOUNT_RESTORE_FRAME124125jmp ftrace_stub126END(mcount)127#endif /* CONFIG_DYNAMIC_FTRACE */128#endif /* CONFIG_FUNCTION_TRACER */129130#ifdef CONFIG_FUNCTION_GRAPH_TRACER131ENTRY(ftrace_graph_caller)132cmpl $0, function_trace_stop133jne ftrace_stub134135MCOUNT_SAVE_FRAME136137leaq 8(%rbp), %rdi138movq 0x38(%rsp), %rsi139movq (%rbp), %rdx140subq $MCOUNT_INSN_SIZE, %rsi141142call prepare_ftrace_return143144MCOUNT_RESTORE_FRAME145146retq147END(ftrace_graph_caller)148149GLOBAL(return_to_handler)150subq $24, %rsp151152/* Save the return values */153movq %rax, (%rsp)154movq %rdx, 8(%rsp)155movq %rbp, %rdi156157call ftrace_return_to_handler158159movq %rax, %rdi160movq 8(%rsp), %rdx161movq (%rsp), %rax162addq $24, %rsp163jmp *%rdi164#endif165166167#ifndef CONFIG_PREEMPT168#define retint_kernel retint_restore_args169#endif170171#ifdef CONFIG_PARAVIRT172ENTRY(native_usergs_sysret64)173swapgs174sysretq175ENDPROC(native_usergs_sysret64)176#endif /* CONFIG_PARAVIRT */177178179.macro TRACE_IRQS_IRETQ offset=ARGOFFSET180#ifdef CONFIG_TRACE_IRQFLAGS181bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */182jnc 1f183TRACE_IRQS_ON1841:185#endif186.endm187188/*189* C code is not supposed to know about undefined top of stack. Every time190* a C function with an pt_regs argument is called from the SYSCALL based191* fast path FIXUP_TOP_OF_STACK is needed.192* RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs193* manipulation.194*/195196/* %rsp:at FRAMEEND */197.macro FIXUP_TOP_OF_STACK tmp offset=0198movq PER_CPU_VAR(old_rsp),\tmp199movq \tmp,RSP+\offset(%rsp)200movq $__USER_DS,SS+\offset(%rsp)201movq $__USER_CS,CS+\offset(%rsp)202movq $-1,RCX+\offset(%rsp)203movq R11+\offset(%rsp),\tmp /* get eflags */204movq \tmp,EFLAGS+\offset(%rsp)205.endm206207.macro RESTORE_TOP_OF_STACK tmp offset=0208movq RSP+\offset(%rsp),\tmp209movq \tmp,PER_CPU_VAR(old_rsp)210movq EFLAGS+\offset(%rsp),\tmp211movq \tmp,R11+\offset(%rsp)212.endm213214.macro FAKE_STACK_FRAME child_rip215/* push in order ss, rsp, eflags, cs, rip */216xorl %eax, %eax217pushq_cfi $__KERNEL_DS /* ss */218/*CFI_REL_OFFSET ss,0*/219pushq_cfi %rax /* rsp */220CFI_REL_OFFSET rsp,0221pushq_cfi $X86_EFLAGS_IF /* eflags - interrupts on */222/*CFI_REL_OFFSET rflags,0*/223pushq_cfi $__KERNEL_CS /* cs */224/*CFI_REL_OFFSET cs,0*/225pushq_cfi \child_rip /* rip */226CFI_REL_OFFSET rip,0227pushq_cfi %rax /* orig rax */228.endm229230.macro UNFAKE_STACK_FRAME231addq $8*6, %rsp232CFI_ADJUST_CFA_OFFSET -(6*8)233.endm234235/*236* initial frame state for interrupts (and exceptions without error code)237*/238.macro EMPTY_FRAME start=1 offset=0239.if \start240CFI_STARTPROC simple241CFI_SIGNAL_FRAME242CFI_DEF_CFA rsp,8+\offset243.else244CFI_DEF_CFA_OFFSET 8+\offset245.endif246.endm247248/*249* initial frame state for interrupts (and exceptions without error code)250*/251.macro INTR_FRAME start=1 offset=0252EMPTY_FRAME \start, SS+8+\offset-RIP253/*CFI_REL_OFFSET ss, SS+\offset-RIP*/254CFI_REL_OFFSET rsp, RSP+\offset-RIP255/*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/256/*CFI_REL_OFFSET cs, CS+\offset-RIP*/257CFI_REL_OFFSET rip, RIP+\offset-RIP258.endm259260/*261* initial frame state for exceptions with error code (and interrupts262* with vector already pushed)263*/264.macro XCPT_FRAME start=1 offset=0265INTR_FRAME \start, RIP+\offset-ORIG_RAX266/*CFI_REL_OFFSET orig_rax, ORIG_RAX-ORIG_RAX*/267.endm268269/*270* frame that enables calling into C.271*/272.macro PARTIAL_FRAME start=1 offset=0273XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET274CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET275CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET276CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET277CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET278CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET279CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET280CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET281CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET282CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET283.endm284285/*286* frame that enables passing a complete pt_regs to a C function.287*/288.macro DEFAULT_FRAME start=1 offset=0289PARTIAL_FRAME \start, R11+\offset-R15290CFI_REL_OFFSET rbx, RBX+\offset291CFI_REL_OFFSET rbp, RBP+\offset292CFI_REL_OFFSET r12, R12+\offset293CFI_REL_OFFSET r13, R13+\offset294CFI_REL_OFFSET r14, R14+\offset295CFI_REL_OFFSET r15, R15+\offset296.endm297298/* save partial stack frame */299.pushsection .kprobes.text, "ax"300ENTRY(save_args)301XCPT_FRAME302cld303/*304* start from rbp in pt_regs and jump over305* return address.306*/307movq_cfi rdi, RDI+8-RBP308movq_cfi rsi, RSI+8-RBP309movq_cfi rdx, RDX+8-RBP310movq_cfi rcx, RCX+8-RBP311movq_cfi rax, RAX+8-RBP312movq_cfi r8, R8+8-RBP313movq_cfi r9, R9+8-RBP314movq_cfi r10, R10+8-RBP315movq_cfi r11, R11+8-RBP316317leaq -RBP+8(%rsp),%rdi /* arg1 for handler */318movq_cfi rbp, 8 /* push %rbp */319leaq 8(%rsp), %rbp /* mov %rsp, %ebp */320testl $3, CS(%rdi)321je 1f322SWAPGS323/*324* irq_count is used to check if a CPU is already on an interrupt stack325* or not. While this is essentially redundant with preempt_count it is326* a little cheaper to use a separate counter in the PDA (short of327* moving irq_enter into assembly, which would be too much work)328*/3291: incl PER_CPU_VAR(irq_count)330jne 2f331popq_cfi %rax /* move return address... */332mov PER_CPU_VAR(irq_stack_ptr),%rsp333EMPTY_FRAME 0334pushq_cfi %rbp /* backlink for unwinder */335pushq_cfi %rax /* ... to the new stack */336/*337* We entered an interrupt context - irqs are off:338*/3392: TRACE_IRQS_OFF340ret341CFI_ENDPROC342END(save_args)343.popsection344345ENTRY(save_rest)346PARTIAL_FRAME 1 REST_SKIP+8347movq 5*8+16(%rsp), %r11 /* save return address */348movq_cfi rbx, RBX+16349movq_cfi rbp, RBP+16350movq_cfi r12, R12+16351movq_cfi r13, R13+16352movq_cfi r14, R14+16353movq_cfi r15, R15+16354movq %r11, 8(%rsp) /* return address */355FIXUP_TOP_OF_STACK %r11, 16356ret357CFI_ENDPROC358END(save_rest)359360/* save complete stack frame */361.pushsection .kprobes.text, "ax"362ENTRY(save_paranoid)363XCPT_FRAME 1 RDI+8364cld365movq_cfi rdi, RDI+8366movq_cfi rsi, RSI+8367movq_cfi rdx, RDX+8368movq_cfi rcx, RCX+8369movq_cfi rax, RAX+8370movq_cfi r8, R8+8371movq_cfi r9, R9+8372movq_cfi r10, R10+8373movq_cfi r11, R11+8374movq_cfi rbx, RBX+8375movq_cfi rbp, RBP+8376movq_cfi r12, R12+8377movq_cfi r13, R13+8378movq_cfi r14, R14+8379movq_cfi r15, R15+8380movl $1,%ebx381movl $MSR_GS_BASE,%ecx382rdmsr383testl %edx,%edx384js 1f /* negative -> in kernel */385SWAPGS386xorl %ebx,%ebx3871: ret388CFI_ENDPROC389END(save_paranoid)390.popsection391392/*393* A newly forked process directly context switches into this address.394*395* rdi: prev task we switched from396*/397ENTRY(ret_from_fork)398DEFAULT_FRAME399400LOCK ; btr $TIF_FORK,TI_flags(%r8)401402pushq_cfi kernel_eflags(%rip)403popfq_cfi # reset kernel eflags404405call schedule_tail # rdi: 'prev' task parameter406407GET_THREAD_INFO(%rcx)408409RESTORE_REST410411testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread?412je int_ret_from_sys_call413414testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET415jnz int_ret_from_sys_call416417RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET418jmp ret_from_sys_call # go to the SYSRET fastpath419420CFI_ENDPROC421END(ret_from_fork)422423/*424* System call entry. Up to 6 arguments in registers are supported.425*426* SYSCALL does not save anything on the stack and does not change the427* stack pointer.428*/429430/*431* Register setup:432* rax system call number433* rdi arg0434* rcx return address for syscall/sysret, C arg3435* rsi arg1436* rdx arg2437* r10 arg3 (--> moved to rcx for C)438* r8 arg4439* r9 arg5440* r11 eflags for syscall/sysret, temporary for C441* r12-r15,rbp,rbx saved by C code, not touched.442*443* Interrupts are off on entry.444* Only called from user space.445*446* XXX if we had a free scratch register we could save the RSP into the stack frame447* and report it properly in ps. Unfortunately we haven't.448*449* When user can change the frames always force IRET. That is because450* it deals with uncanonical addresses better. SYSRET has trouble451* with them due to bugs in both AMD and Intel CPUs.452*/453454ENTRY(system_call)455CFI_STARTPROC simple456CFI_SIGNAL_FRAME457CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET458CFI_REGISTER rip,rcx459/*CFI_REGISTER rflags,r11*/460SWAPGS_UNSAFE_STACK461/*462* A hypervisor implementation might want to use a label463* after the swapgs, so that it can do the swapgs464* for the guest and jump here on syscall.465*/466ENTRY(system_call_after_swapgs)467468movq %rsp,PER_CPU_VAR(old_rsp)469movq PER_CPU_VAR(kernel_stack),%rsp470/*471* No need to follow this irqs off/on section - it's straight472* and short:473*/474ENABLE_INTERRUPTS(CLBR_NONE)475SAVE_ARGS 8,1476movq %rax,ORIG_RAX-ARGOFFSET(%rsp)477movq %rcx,RIP-ARGOFFSET(%rsp)478CFI_REL_OFFSET rip,RIP-ARGOFFSET479GET_THREAD_INFO(%rcx)480testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)481jnz tracesys482system_call_fastpath:483cmpq $__NR_syscall_max,%rax484ja badsys485movq %r10,%rcx486call *sys_call_table(,%rax,8) # XXX: rip relative487movq %rax,RAX-ARGOFFSET(%rsp)488/*489* Syscall return path ending with SYSRET (fast path)490* Has incomplete stack frame and undefined top of stack.491*/492ret_from_sys_call:493movl $_TIF_ALLWORK_MASK,%edi494/* edi: flagmask */495sysret_check:496LOCKDEP_SYS_EXIT497GET_THREAD_INFO(%rcx)498DISABLE_INTERRUPTS(CLBR_NONE)499TRACE_IRQS_OFF500movl TI_flags(%rcx),%edx501andl %edi,%edx502jnz sysret_careful503CFI_REMEMBER_STATE504/*505* sysretq will re-enable interrupts:506*/507TRACE_IRQS_ON508movq RIP-ARGOFFSET(%rsp),%rcx509CFI_REGISTER rip,rcx510RESTORE_ARGS 0,-ARG_SKIP,1511/*CFI_REGISTER rflags,r11*/512movq PER_CPU_VAR(old_rsp), %rsp513USERGS_SYSRET64514515CFI_RESTORE_STATE516/* Handle reschedules */517/* edx: work, edi: workmask */518sysret_careful:519bt $TIF_NEED_RESCHED,%edx520jnc sysret_signal521TRACE_IRQS_ON522ENABLE_INTERRUPTS(CLBR_NONE)523pushq_cfi %rdi524call schedule525popq_cfi %rdi526jmp sysret_check527528/* Handle a signal */529sysret_signal:530TRACE_IRQS_ON531ENABLE_INTERRUPTS(CLBR_NONE)532#ifdef CONFIG_AUDITSYSCALL533bt $TIF_SYSCALL_AUDIT,%edx534jc sysret_audit535#endif536/*537* We have a signal, or exit tracing or single-step.538* These all wind up with the iret return path anyway,539* so just join that path right now.540*/541FIXUP_TOP_OF_STACK %r11, -ARGOFFSET542jmp int_check_syscall_exit_work543544badsys:545movq $-ENOSYS,RAX-ARGOFFSET(%rsp)546jmp ret_from_sys_call547548#ifdef CONFIG_AUDITSYSCALL549/*550* Fast path for syscall audit without full syscall trace.551* We just call audit_syscall_entry() directly, and then552* jump back to the normal fast path.553*/554auditsys:555movq %r10,%r9 /* 6th arg: 4th syscall arg */556movq %rdx,%r8 /* 5th arg: 3rd syscall arg */557movq %rsi,%rcx /* 4th arg: 2nd syscall arg */558movq %rdi,%rdx /* 3rd arg: 1st syscall arg */559movq %rax,%rsi /* 2nd arg: syscall number */560movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */561call audit_syscall_entry562LOAD_ARGS 0 /* reload call-clobbered registers */563jmp system_call_fastpath564565/*566* Return fast path for syscall audit. Call audit_syscall_exit()567* directly and then jump back to the fast path with TIF_SYSCALL_AUDIT568* masked off.569*/570sysret_audit:571movq RAX-ARGOFFSET(%rsp),%rsi /* second arg, syscall return value */572cmpq $0,%rsi /* is it < 0? */573setl %al /* 1 if so, 0 if not */574movzbl %al,%edi /* zero-extend that into %edi */575inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */576call audit_syscall_exit577movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi578jmp sysret_check579#endif /* CONFIG_AUDITSYSCALL */580581/* Do syscall tracing */582tracesys:583#ifdef CONFIG_AUDITSYSCALL584testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx)585jz auditsys586#endif587SAVE_REST588movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */589FIXUP_TOP_OF_STACK %rdi590movq %rsp,%rdi591call syscall_trace_enter592/*593* Reload arg registers from stack in case ptrace changed them.594* We don't reload %rax because syscall_trace_enter() returned595* the value it wants us to use in the table lookup.596*/597LOAD_ARGS ARGOFFSET, 1598RESTORE_REST599cmpq $__NR_syscall_max,%rax600ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */601movq %r10,%rcx /* fixup for C */602call *sys_call_table(,%rax,8)603movq %rax,RAX-ARGOFFSET(%rsp)604/* Use IRET because user could have changed frame */605606/*607* Syscall return path ending with IRET.608* Has correct top of stack, but partial stack frame.609*/610GLOBAL(int_ret_from_sys_call)611DISABLE_INTERRUPTS(CLBR_NONE)612TRACE_IRQS_OFF613testl $3,CS-ARGOFFSET(%rsp)614je retint_restore_args615movl $_TIF_ALLWORK_MASK,%edi616/* edi: mask to check */617GLOBAL(int_with_check)618LOCKDEP_SYS_EXIT_IRQ619GET_THREAD_INFO(%rcx)620movl TI_flags(%rcx),%edx621andl %edi,%edx622jnz int_careful623andl $~TS_COMPAT,TI_status(%rcx)624jmp retint_swapgs625626/* Either reschedule or signal or syscall exit tracking needed. */627/* First do a reschedule test. */628/* edx: work, edi: workmask */629int_careful:630bt $TIF_NEED_RESCHED,%edx631jnc int_very_careful632TRACE_IRQS_ON633ENABLE_INTERRUPTS(CLBR_NONE)634pushq_cfi %rdi635call schedule636popq_cfi %rdi637DISABLE_INTERRUPTS(CLBR_NONE)638TRACE_IRQS_OFF639jmp int_with_check640641/* handle signals and tracing -- both require a full stack frame */642int_very_careful:643TRACE_IRQS_ON644ENABLE_INTERRUPTS(CLBR_NONE)645int_check_syscall_exit_work:646SAVE_REST647/* Check for syscall exit trace */648testl $_TIF_WORK_SYSCALL_EXIT,%edx649jz int_signal650pushq_cfi %rdi651leaq 8(%rsp),%rdi # &ptregs -> arg1652call syscall_trace_leave653popq_cfi %rdi654andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi655jmp int_restore_rest656657int_signal:658testl $_TIF_DO_NOTIFY_MASK,%edx659jz 1f660movq %rsp,%rdi # &ptregs -> arg1661xorl %esi,%esi # oldset -> arg2662call do_notify_resume6631: movl $_TIF_WORK_MASK,%edi664int_restore_rest:665RESTORE_REST666DISABLE_INTERRUPTS(CLBR_NONE)667TRACE_IRQS_OFF668jmp int_with_check669CFI_ENDPROC670END(system_call)671672/*673* Certain special system calls that need to save a complete full stack frame.674*/675.macro PTREGSCALL label,func,arg676ENTRY(\label)677PARTIAL_FRAME 1 8 /* offset 8: return address */678subq $REST_SKIP, %rsp679CFI_ADJUST_CFA_OFFSET REST_SKIP680call save_rest681DEFAULT_FRAME 0 8 /* offset 8: return address */682leaq 8(%rsp), \arg /* pt_regs pointer */683call \func684jmp ptregscall_common685CFI_ENDPROC686END(\label)687.endm688689PTREGSCALL stub_clone, sys_clone, %r8690PTREGSCALL stub_fork, sys_fork, %rdi691PTREGSCALL stub_vfork, sys_vfork, %rdi692PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx693PTREGSCALL stub_iopl, sys_iopl, %rsi694695ENTRY(ptregscall_common)696DEFAULT_FRAME 1 8 /* offset 8: return address */697RESTORE_TOP_OF_STACK %r11, 8698movq_cfi_restore R15+8, r15699movq_cfi_restore R14+8, r14700movq_cfi_restore R13+8, r13701movq_cfi_restore R12+8, r12702movq_cfi_restore RBP+8, rbp703movq_cfi_restore RBX+8, rbx704ret $REST_SKIP /* pop extended registers */705CFI_ENDPROC706END(ptregscall_common)707708ENTRY(stub_execve)709CFI_STARTPROC710addq $8, %rsp711PARTIAL_FRAME 0712SAVE_REST713FIXUP_TOP_OF_STACK %r11714movq %rsp, %rcx715call sys_execve716RESTORE_TOP_OF_STACK %r11717movq %rax,RAX(%rsp)718RESTORE_REST719jmp int_ret_from_sys_call720CFI_ENDPROC721END(stub_execve)722723/*724* sigreturn is special because it needs to restore all registers on return.725* This cannot be done with SYSRET, so use the IRET return path instead.726*/727ENTRY(stub_rt_sigreturn)728CFI_STARTPROC729addq $8, %rsp730PARTIAL_FRAME 0731SAVE_REST732movq %rsp,%rdi733FIXUP_TOP_OF_STACK %r11734call sys_rt_sigreturn735movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer736RESTORE_REST737jmp int_ret_from_sys_call738CFI_ENDPROC739END(stub_rt_sigreturn)740741/*742* Build the entry stubs and pointer table with some assembler magic.743* We pack 7 stubs into a single 32-byte chunk, which will fit in a744* single cache line on all modern x86 implementations.745*/746.section .init.rodata,"a"747ENTRY(interrupt)748.section .entry.text749.p2align 5750.p2align CONFIG_X86_L1_CACHE_SHIFT751ENTRY(irq_entries_start)752INTR_FRAME753vector=FIRST_EXTERNAL_VECTOR754.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7755.balign 32756.rept 7757.if vector < NR_VECTORS758.if vector <> FIRST_EXTERNAL_VECTOR759CFI_ADJUST_CFA_OFFSET -8760.endif7611: pushq_cfi $(~vector+0x80) /* Note: always in signed byte range */762.if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6763jmp 2f764.endif765.previous766.quad 1b767.section .entry.text768vector=vector+1769.endif770.endr7712: jmp common_interrupt772.endr773CFI_ENDPROC774END(irq_entries_start)775776.previous777END(interrupt)778.previous779780/*781* Interrupt entry/exit.782*783* Interrupt entry points save only callee clobbered registers in fast path.784*785* Entry runs with interrupts off.786*/787788/* 0(%rsp): ~(interrupt number) */789.macro interrupt func790/* reserve pt_regs for scratch regs and rbp */791subq $ORIG_RAX-RBP, %rsp792CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP793call save_args794PARTIAL_FRAME 0795call \func796.endm797798/*799* Interrupt entry/exit should be protected against kprobes800*/801.pushsection .kprobes.text, "ax"802/*803* The interrupt stubs push (~vector+0x80) onto the stack and804* then jump to common_interrupt.805*/806.p2align CONFIG_X86_L1_CACHE_SHIFT807common_interrupt:808XCPT_FRAME809addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */810interrupt do_IRQ811/* 0(%rsp): old_rsp-ARGOFFSET */812ret_from_intr:813DISABLE_INTERRUPTS(CLBR_NONE)814TRACE_IRQS_OFF815decl PER_CPU_VAR(irq_count)816leaveq817818CFI_RESTORE rbp819CFI_DEF_CFA_REGISTER rsp820CFI_ADJUST_CFA_OFFSET -8821822/* we did not save rbx, restore only from ARGOFFSET */823addq $8, %rsp824CFI_ADJUST_CFA_OFFSET -8825exit_intr:826GET_THREAD_INFO(%rcx)827testl $3,CS-ARGOFFSET(%rsp)828je retint_kernel829830/* Interrupt came from user space */831/*832* Has a correct top of stack, but a partial stack frame833* %rcx: thread info. Interrupts off.834*/835retint_with_reschedule:836movl $_TIF_WORK_MASK,%edi837retint_check:838LOCKDEP_SYS_EXIT_IRQ839movl TI_flags(%rcx),%edx840andl %edi,%edx841CFI_REMEMBER_STATE842jnz retint_careful843844retint_swapgs: /* return to user-space */845/*846* The iretq could re-enable interrupts:847*/848DISABLE_INTERRUPTS(CLBR_ANY)849TRACE_IRQS_IRETQ850SWAPGS851jmp restore_args852853retint_restore_args: /* return to kernel space */854DISABLE_INTERRUPTS(CLBR_ANY)855/*856* The iretq could re-enable interrupts:857*/858TRACE_IRQS_IRETQ859restore_args:860RESTORE_ARGS 0,8,0861862irq_return:863INTERRUPT_RETURN864865.section __ex_table, "a"866.quad irq_return, bad_iret867.previous868869#ifdef CONFIG_PARAVIRT870ENTRY(native_iret)871iretq872873.section __ex_table,"a"874.quad native_iret, bad_iret875.previous876#endif877878.section .fixup,"ax"879bad_iret:880/*881* The iret traps when the %cs or %ss being restored is bogus.882* We've lost the original trap vector and error code.883* #GPF is the most likely one to get for an invalid selector.884* So pretend we completed the iret and took the #GPF in user mode.885*886* We are now running with the kernel GS after exception recovery.887* But error_entry expects us to have user GS to match the user %cs,888* so swap back.889*/890pushq $0891892SWAPGS893jmp general_protection894895.previous896897/* edi: workmask, edx: work */898retint_careful:899CFI_RESTORE_STATE900bt $TIF_NEED_RESCHED,%edx901jnc retint_signal902TRACE_IRQS_ON903ENABLE_INTERRUPTS(CLBR_NONE)904pushq_cfi %rdi905call schedule906popq_cfi %rdi907GET_THREAD_INFO(%rcx)908DISABLE_INTERRUPTS(CLBR_NONE)909TRACE_IRQS_OFF910jmp retint_check911912retint_signal:913testl $_TIF_DO_NOTIFY_MASK,%edx914jz retint_swapgs915TRACE_IRQS_ON916ENABLE_INTERRUPTS(CLBR_NONE)917SAVE_REST918movq $-1,ORIG_RAX(%rsp)919xorl %esi,%esi # oldset920movq %rsp,%rdi # &pt_regs921call do_notify_resume922RESTORE_REST923DISABLE_INTERRUPTS(CLBR_NONE)924TRACE_IRQS_OFF925GET_THREAD_INFO(%rcx)926jmp retint_with_reschedule927928#ifdef CONFIG_PREEMPT929/* Returning to kernel space. Check if we need preemption */930/* rcx: threadinfo. interrupts off. */931ENTRY(retint_kernel)932cmpl $0,TI_preempt_count(%rcx)933jnz retint_restore_args934bt $TIF_NEED_RESCHED,TI_flags(%rcx)935jnc retint_restore_args936bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */937jnc retint_restore_args938call preempt_schedule_irq939jmp exit_intr940#endif941942CFI_ENDPROC943END(common_interrupt)944/*945* End of kprobes section946*/947.popsection948949/*950* APIC interrupts.951*/952.macro apicinterrupt num sym do_sym953ENTRY(\sym)954INTR_FRAME955pushq_cfi $~(\num)956interrupt \do_sym957jmp ret_from_intr958CFI_ENDPROC959END(\sym)960.endm961962#ifdef CONFIG_SMP963apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \964irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt965apicinterrupt REBOOT_VECTOR \966reboot_interrupt smp_reboot_interrupt967#endif968969#ifdef CONFIG_X86_UV970apicinterrupt UV_BAU_MESSAGE \971uv_bau_message_intr1 uv_bau_message_interrupt972#endif973apicinterrupt LOCAL_TIMER_VECTOR \974apic_timer_interrupt smp_apic_timer_interrupt975apicinterrupt X86_PLATFORM_IPI_VECTOR \976x86_platform_ipi smp_x86_platform_ipi977978#ifdef CONFIG_SMP979.irp idx,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \98016,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31981.if NUM_INVALIDATE_TLB_VECTORS > \idx982apicinterrupt (INVALIDATE_TLB_VECTOR_START)+\idx \983invalidate_interrupt\idx smp_invalidate_interrupt984.endif985.endr986#endif987988apicinterrupt THRESHOLD_APIC_VECTOR \989threshold_interrupt smp_threshold_interrupt990apicinterrupt THERMAL_APIC_VECTOR \991thermal_interrupt smp_thermal_interrupt992993#ifdef CONFIG_X86_MCE994apicinterrupt MCE_SELF_VECTOR \995mce_self_interrupt smp_mce_self_interrupt996#endif997998#ifdef CONFIG_SMP999apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \1000call_function_single_interrupt smp_call_function_single_interrupt1001apicinterrupt CALL_FUNCTION_VECTOR \1002call_function_interrupt smp_call_function_interrupt1003apicinterrupt RESCHEDULE_VECTOR \1004reschedule_interrupt smp_reschedule_interrupt1005#endif10061007apicinterrupt ERROR_APIC_VECTOR \1008error_interrupt smp_error_interrupt1009apicinterrupt SPURIOUS_APIC_VECTOR \1010spurious_interrupt smp_spurious_interrupt10111012#ifdef CONFIG_IRQ_WORK1013apicinterrupt IRQ_WORK_VECTOR \1014irq_work_interrupt smp_irq_work_interrupt1015#endif10161017/*1018* Exception entry points.1019*/1020.macro zeroentry sym do_sym1021ENTRY(\sym)1022INTR_FRAME1023PARAVIRT_ADJUST_EXCEPTION_FRAME1024pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */1025subq $ORIG_RAX-R15, %rsp1026CFI_ADJUST_CFA_OFFSET ORIG_RAX-R151027call error_entry1028DEFAULT_FRAME 01029movq %rsp,%rdi /* pt_regs pointer */1030xorl %esi,%esi /* no error code */1031call \do_sym1032jmp error_exit /* %ebx: no swapgs flag */1033CFI_ENDPROC1034END(\sym)1035.endm10361037.macro paranoidzeroentry sym do_sym1038ENTRY(\sym)1039INTR_FRAME1040PARAVIRT_ADJUST_EXCEPTION_FRAME1041pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */1042subq $ORIG_RAX-R15, %rsp1043CFI_ADJUST_CFA_OFFSET ORIG_RAX-R151044call save_paranoid1045TRACE_IRQS_OFF1046movq %rsp,%rdi /* pt_regs pointer */1047xorl %esi,%esi /* no error code */1048call \do_sym1049jmp paranoid_exit /* %ebx: no swapgs flag */1050CFI_ENDPROC1051END(\sym)1052.endm10531054#define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8)1055.macro paranoidzeroentry_ist sym do_sym ist1056ENTRY(\sym)1057INTR_FRAME1058PARAVIRT_ADJUST_EXCEPTION_FRAME1059pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */1060subq $ORIG_RAX-R15, %rsp1061CFI_ADJUST_CFA_OFFSET ORIG_RAX-R151062call save_paranoid1063TRACE_IRQS_OFF1064movq %rsp,%rdi /* pt_regs pointer */1065xorl %esi,%esi /* no error code */1066subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist)1067call \do_sym1068addq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist)1069jmp paranoid_exit /* %ebx: no swapgs flag */1070CFI_ENDPROC1071END(\sym)1072.endm10731074.macro errorentry sym do_sym1075ENTRY(\sym)1076XCPT_FRAME1077PARAVIRT_ADJUST_EXCEPTION_FRAME1078subq $ORIG_RAX-R15, %rsp1079CFI_ADJUST_CFA_OFFSET ORIG_RAX-R151080call error_entry1081DEFAULT_FRAME 01082movq %rsp,%rdi /* pt_regs pointer */1083movq ORIG_RAX(%rsp),%rsi /* get error code */1084movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */1085call \do_sym1086jmp error_exit /* %ebx: no swapgs flag */1087CFI_ENDPROC1088END(\sym)1089.endm10901091/* error code is on the stack already */1092.macro paranoiderrorentry sym do_sym1093ENTRY(\sym)1094XCPT_FRAME1095PARAVIRT_ADJUST_EXCEPTION_FRAME1096subq $ORIG_RAX-R15, %rsp1097CFI_ADJUST_CFA_OFFSET ORIG_RAX-R151098call save_paranoid1099DEFAULT_FRAME 01100TRACE_IRQS_OFF1101movq %rsp,%rdi /* pt_regs pointer */1102movq ORIG_RAX(%rsp),%rsi /* get error code */1103movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */1104call \do_sym1105jmp paranoid_exit /* %ebx: no swapgs flag */1106CFI_ENDPROC1107END(\sym)1108.endm11091110zeroentry divide_error do_divide_error1111zeroentry overflow do_overflow1112zeroentry bounds do_bounds1113zeroentry invalid_op do_invalid_op1114zeroentry device_not_available do_device_not_available1115paranoiderrorentry double_fault do_double_fault1116zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun1117errorentry invalid_TSS do_invalid_TSS1118errorentry segment_not_present do_segment_not_present1119zeroentry spurious_interrupt_bug do_spurious_interrupt_bug1120zeroentry coprocessor_error do_coprocessor_error1121errorentry alignment_check do_alignment_check1122zeroentry simd_coprocessor_error do_simd_coprocessor_error11231124/* Reload gs selector with exception handling */1125/* edi: new selector */1126ENTRY(native_load_gs_index)1127CFI_STARTPROC1128pushfq_cfi1129DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)1130SWAPGS1131gs_change:1132movl %edi,%gs11332: mfence /* workaround */1134SWAPGS1135popfq_cfi1136ret1137CFI_ENDPROC1138END(native_load_gs_index)11391140.section __ex_table,"a"1141.align 81142.quad gs_change,bad_gs1143.previous1144.section .fixup,"ax"1145/* running with kernelgs */1146bad_gs:1147SWAPGS /* switch back to user gs */1148xorl %eax,%eax1149movl %eax,%gs1150jmp 2b1151.previous11521153ENTRY(kernel_thread_helper)1154pushq $0 # fake return address1155CFI_STARTPROC1156/*1157* Here we are in the child and the registers are set as they were1158* at kernel_thread() invocation in the parent.1159*/1160call *%rsi1161# exit1162mov %eax, %edi1163call do_exit1164ud2 # padding for call trace1165CFI_ENDPROC1166END(kernel_thread_helper)11671168/*1169* execve(). This function needs to use IRET, not SYSRET, to set up all state properly.1170*1171* C extern interface:1172* extern long execve(const char *name, char **argv, char **envp)1173*1174* asm input arguments:1175* rdi: name, rsi: argv, rdx: envp1176*1177* We want to fallback into:1178* extern long sys_execve(const char *name, char **argv,char **envp, struct pt_regs *regs)1179*1180* do_sys_execve asm fallback arguments:1181* rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack1182*/1183ENTRY(kernel_execve)1184CFI_STARTPROC1185FAKE_STACK_FRAME $01186SAVE_ALL1187movq %rsp,%rcx1188call sys_execve1189movq %rax, RAX(%rsp)1190RESTORE_REST1191testq %rax,%rax1192je int_ret_from_sys_call1193RESTORE_ARGS1194UNFAKE_STACK_FRAME1195ret1196CFI_ENDPROC1197END(kernel_execve)11981199/* Call softirq on interrupt stack. Interrupts are off. */1200ENTRY(call_softirq)1201CFI_STARTPROC1202pushq_cfi %rbp1203CFI_REL_OFFSET rbp,01204mov %rsp,%rbp1205CFI_DEF_CFA_REGISTER rbp1206incl PER_CPU_VAR(irq_count)1207cmove PER_CPU_VAR(irq_stack_ptr),%rsp1208push %rbp # backlink for old unwinder1209call __do_softirq1210leaveq1211CFI_RESTORE rbp1212CFI_DEF_CFA_REGISTER rsp1213CFI_ADJUST_CFA_OFFSET -81214decl PER_CPU_VAR(irq_count)1215ret1216CFI_ENDPROC1217END(call_softirq)12181219#ifdef CONFIG_XEN1220zeroentry xen_hypervisor_callback xen_do_hypervisor_callback12211222/*1223* A note on the "critical region" in our callback handler.1224* We want to avoid stacking callback handlers due to events occurring1225* during handling of the last event. To do this, we keep events disabled1226* until we've done all processing. HOWEVER, we must enable events before1227* popping the stack frame (can't be done atomically) and so it would still1228* be possible to get enough handler activations to overflow the stack.1229* Although unlikely, bugs of that kind are hard to track down, so we'd1230* like to avoid the possibility.1231* So, on entry to the handler we detect whether we interrupted an1232* existing activation in its critical region -- if so, we pop the current1233* activation and restart the handler using the previous one.1234*/1235ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)1236CFI_STARTPROC1237/*1238* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will1239* see the correct pointer to the pt_regs1240*/1241movq %rdi, %rsp # we don't return, adjust the stack frame1242CFI_ENDPROC1243DEFAULT_FRAME124411: incl PER_CPU_VAR(irq_count)1245movq %rsp,%rbp1246CFI_DEF_CFA_REGISTER rbp1247cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp1248pushq %rbp # backlink for old unwinder1249call xen_evtchn_do_upcall1250popq %rsp1251CFI_DEF_CFA_REGISTER rsp1252decl PER_CPU_VAR(irq_count)1253jmp error_exit1254CFI_ENDPROC1255END(xen_do_hypervisor_callback)12561257/*1258* Hypervisor uses this for application faults while it executes.1259* We get here for two reasons:1260* 1. Fault while reloading DS, ES, FS or GS1261* 2. Fault while executing IRET1262* Category 1 we do not need to fix up as Xen has already reloaded all segment1263* registers that could be reloaded and zeroed the others.1264* Category 2 we fix up by killing the current process. We cannot use the1265* normal Linux return path in this case because if we use the IRET hypercall1266* to pop the stack frame we end up in an infinite loop of failsafe callbacks.1267* We distinguish between categories by comparing each saved segment register1268* with its current contents: any discrepancy means we in category 1.1269*/1270ENTRY(xen_failsafe_callback)1271INTR_FRAME 1 (6*8)1272/*CFI_REL_OFFSET gs,GS*/1273/*CFI_REL_OFFSET fs,FS*/1274/*CFI_REL_OFFSET es,ES*/1275/*CFI_REL_OFFSET ds,DS*/1276CFI_REL_OFFSET r11,81277CFI_REL_OFFSET rcx,01278movw %ds,%cx1279cmpw %cx,0x10(%rsp)1280CFI_REMEMBER_STATE1281jne 1f1282movw %es,%cx1283cmpw %cx,0x18(%rsp)1284jne 1f1285movw %fs,%cx1286cmpw %cx,0x20(%rsp)1287jne 1f1288movw %gs,%cx1289cmpw %cx,0x28(%rsp)1290jne 1f1291/* All segments match their saved values => Category 2 (Bad IRET). */1292movq (%rsp),%rcx1293CFI_RESTORE rcx1294movq 8(%rsp),%r111295CFI_RESTORE r111296addq $0x30,%rsp1297CFI_ADJUST_CFA_OFFSET -0x301298pushq_cfi $0 /* RIP */1299pushq_cfi %r111300pushq_cfi %rcx1301jmp general_protection1302CFI_RESTORE_STATE13031: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */1304movq (%rsp),%rcx1305CFI_RESTORE rcx1306movq 8(%rsp),%r111307CFI_RESTORE r111308addq $0x30,%rsp1309CFI_ADJUST_CFA_OFFSET -0x301310pushq_cfi $01311SAVE_ALL1312jmp error_exit1313CFI_ENDPROC1314END(xen_failsafe_callback)13151316apicinterrupt XEN_HVM_EVTCHN_CALLBACK \1317xen_hvm_callback_vector xen_evtchn_do_upcall13181319#endif /* CONFIG_XEN */13201321/*1322* Some functions should be protected against kprobes1323*/1324.pushsection .kprobes.text, "ax"13251326paranoidzeroentry_ist debug do_debug DEBUG_STACK1327paranoidzeroentry_ist int3 do_int3 DEBUG_STACK1328paranoiderrorentry stack_segment do_stack_segment1329#ifdef CONFIG_XEN1330zeroentry xen_debug do_debug1331zeroentry xen_int3 do_int31332errorentry xen_stack_segment do_stack_segment1333#endif1334errorentry general_protection do_general_protection1335errorentry page_fault do_page_fault1336#ifdef CONFIG_KVM_GUEST1337errorentry async_page_fault do_async_page_fault1338#endif1339#ifdef CONFIG_X86_MCE1340paranoidzeroentry machine_check *machine_check_vector(%rip)1341#endif13421343/*1344* "Paranoid" exit path from exception stack.1345* Paranoid because this is used by NMIs and cannot take1346* any kernel state for granted.1347* We don't do kernel preemption checks here, because only1348* NMI should be common and it does not enable IRQs and1349* cannot get reschedule ticks.1350*1351* "trace" is 0 for the NMI handler only, because irq-tracing1352* is fundamentally NMI-unsafe. (we cannot change the soft and1353* hard flags at once, atomically)1354*/13551356/* ebx: no swapgs flag */1357ENTRY(paranoid_exit)1358DEFAULT_FRAME1359DISABLE_INTERRUPTS(CLBR_NONE)1360TRACE_IRQS_OFF1361testl %ebx,%ebx /* swapgs needed? */1362jnz paranoid_restore1363testl $3,CS(%rsp)1364jnz paranoid_userspace1365paranoid_swapgs:1366TRACE_IRQS_IRETQ 01367SWAPGS_UNSAFE_STACK1368RESTORE_ALL 81369jmp irq_return1370paranoid_restore:1371TRACE_IRQS_IRETQ 01372RESTORE_ALL 81373jmp irq_return1374paranoid_userspace:1375GET_THREAD_INFO(%rcx)1376movl TI_flags(%rcx),%ebx1377andl $_TIF_WORK_MASK,%ebx1378jz paranoid_swapgs1379movq %rsp,%rdi /* &pt_regs */1380call sync_regs1381movq %rax,%rsp /* switch stack for scheduling */1382testl $_TIF_NEED_RESCHED,%ebx1383jnz paranoid_schedule1384movl %ebx,%edx /* arg3: thread flags */1385TRACE_IRQS_ON1386ENABLE_INTERRUPTS(CLBR_NONE)1387xorl %esi,%esi /* arg2: oldset */1388movq %rsp,%rdi /* arg1: &pt_regs */1389call do_notify_resume1390DISABLE_INTERRUPTS(CLBR_NONE)1391TRACE_IRQS_OFF1392jmp paranoid_userspace1393paranoid_schedule:1394TRACE_IRQS_ON1395ENABLE_INTERRUPTS(CLBR_ANY)1396call schedule1397DISABLE_INTERRUPTS(CLBR_ANY)1398TRACE_IRQS_OFF1399jmp paranoid_userspace1400CFI_ENDPROC1401END(paranoid_exit)14021403/*1404* Exception entry point. This expects an error code/orig_rax on the stack.1405* returns in "no swapgs flag" in %ebx.1406*/1407ENTRY(error_entry)1408XCPT_FRAME1409CFI_ADJUST_CFA_OFFSET 15*81410/* oldrax contains error code */1411cld1412movq_cfi rdi, RDI+81413movq_cfi rsi, RSI+81414movq_cfi rdx, RDX+81415movq_cfi rcx, RCX+81416movq_cfi rax, RAX+81417movq_cfi r8, R8+81418movq_cfi r9, R9+81419movq_cfi r10, R10+81420movq_cfi r11, R11+81421movq_cfi rbx, RBX+81422movq_cfi rbp, RBP+81423movq_cfi r12, R12+81424movq_cfi r13, R13+81425movq_cfi r14, R14+81426movq_cfi r15, R15+81427xorl %ebx,%ebx1428testl $3,CS+8(%rsp)1429je error_kernelspace1430error_swapgs:1431SWAPGS1432error_sti:1433TRACE_IRQS_OFF1434ret14351436/*1437* There are two places in the kernel that can potentially fault with1438* usergs. Handle them here. The exception handlers after iret run with1439* kernel gs again, so don't set the user space flag. B stepping K8s1440* sometimes report an truncated RIP for IRET exceptions returning to1441* compat mode. Check for these here too.1442*/1443error_kernelspace:1444incl %ebx1445leaq irq_return(%rip),%rcx1446cmpq %rcx,RIP+8(%rsp)1447je error_swapgs1448movl %ecx,%eax /* zero extend */1449cmpq %rax,RIP+8(%rsp)1450je bstep_iret1451cmpq $gs_change,RIP+8(%rsp)1452je error_swapgs1453jmp error_sti14541455bstep_iret:1456/* Fix truncated RIP */1457movq %rcx,RIP+8(%rsp)1458jmp error_swapgs1459CFI_ENDPROC1460END(error_entry)146114621463/* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */1464ENTRY(error_exit)1465DEFAULT_FRAME1466movl %ebx,%eax1467RESTORE_REST1468DISABLE_INTERRUPTS(CLBR_NONE)1469TRACE_IRQS_OFF1470GET_THREAD_INFO(%rcx)1471testl %eax,%eax1472jne retint_kernel1473LOCKDEP_SYS_EXIT_IRQ1474movl TI_flags(%rcx),%edx1475movl $_TIF_WORK_MASK,%edi1476andl %edi,%edx1477jnz retint_careful1478jmp retint_swapgs1479CFI_ENDPROC1480END(error_exit)148114821483/* runs on exception stack */1484ENTRY(nmi)1485INTR_FRAME1486PARAVIRT_ADJUST_EXCEPTION_FRAME1487pushq_cfi $-11488subq $ORIG_RAX-R15, %rsp1489CFI_ADJUST_CFA_OFFSET ORIG_RAX-R151490call save_paranoid1491DEFAULT_FRAME 01492/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */1493movq %rsp,%rdi1494movq $-1,%rsi1495call do_nmi1496#ifdef CONFIG_TRACE_IRQFLAGS1497/* paranoidexit; without TRACE_IRQS_OFF */1498/* ebx: no swapgs flag */1499DISABLE_INTERRUPTS(CLBR_NONE)1500testl %ebx,%ebx /* swapgs needed? */1501jnz nmi_restore1502testl $3,CS(%rsp)1503jnz nmi_userspace1504nmi_swapgs:1505SWAPGS_UNSAFE_STACK1506nmi_restore:1507RESTORE_ALL 81508jmp irq_return1509nmi_userspace:1510GET_THREAD_INFO(%rcx)1511movl TI_flags(%rcx),%ebx1512andl $_TIF_WORK_MASK,%ebx1513jz nmi_swapgs1514movq %rsp,%rdi /* &pt_regs */1515call sync_regs1516movq %rax,%rsp /* switch stack for scheduling */1517testl $_TIF_NEED_RESCHED,%ebx1518jnz nmi_schedule1519movl %ebx,%edx /* arg3: thread flags */1520ENABLE_INTERRUPTS(CLBR_NONE)1521xorl %esi,%esi /* arg2: oldset */1522movq %rsp,%rdi /* arg1: &pt_regs */1523call do_notify_resume1524DISABLE_INTERRUPTS(CLBR_NONE)1525jmp nmi_userspace1526nmi_schedule:1527ENABLE_INTERRUPTS(CLBR_ANY)1528call schedule1529DISABLE_INTERRUPTS(CLBR_ANY)1530jmp nmi_userspace1531CFI_ENDPROC1532#else1533jmp paranoid_exit1534CFI_ENDPROC1535#endif1536END(nmi)15371538ENTRY(ignore_sysret)1539CFI_STARTPROC1540mov $-ENOSYS,%eax1541sysret1542CFI_ENDPROC1543END(ignore_sysret)15441545/*1546* End of kprobes section1547*/1548.popsection154915501551