/*1*2* Copyright (C) 1991, 1992 Linus Torvalds3*/45/*6* entry.S contains the system-call and fault low-level handling routines.7* This also contains the timer-interrupt handler, as well as all interrupts8* and faults that can result in a task-switch.9*10* NOTE: This code handles signal-recognition, which happens every time11* after a timer-interrupt and after each system call.12*13* I changed all the .align's to 4 (16 byte alignment), as that's faster14* on a 486.15*16* Stack layout in 'syscall_exit':17* ptrace needs to have all regs on the stack.18* if the order here is changed, it needs to be19* updated in fork.c:copy_process, signal.c:do_signal,20* ptrace.c and ptrace.h21*22* 0(%esp) - %ebx23* 4(%esp) - %ecx24* 8(%esp) - %edx25* C(%esp) - %esi26* 10(%esp) - %edi27* 14(%esp) - %ebp28* 18(%esp) - %eax29* 1C(%esp) - %ds30* 20(%esp) - %es31* 24(%esp) - %fs32* 28(%esp) - %gs saved iff !CONFIG_X86_32_LAZY_GS33* 2C(%esp) - orig_eax34* 30(%esp) - %eip35* 34(%esp) - %cs36* 38(%esp) - %eflags37* 3C(%esp) - %oldesp38* 40(%esp) - %oldss39*40* "current" is in register %ebx during any slow entries.41*/4243#include <linux/linkage.h>44#include <asm/thread_info.h>45#include <asm/irqflags.h>46#include <asm/errno.h>47#include <asm/segment.h>48#include <asm/smp.h>49#include <asm/page_types.h>50#include <asm/percpu.h>51#include <asm/dwarf2.h>52#include <asm/processor-flags.h>53#include <asm/ftrace.h>54#include <asm/irq_vectors.h>55#include <asm/cpufeature.h>5657/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */58#include <linux/elf-em.h>59#define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE)60#define __AUDIT_ARCH_LE 0x400000006162#ifndef CONFIG_AUDITSYSCALL63#define sysenter_audit syscall_trace_entry64#define sysexit_audit syscall_exit_work65#endif6667.section .entry.text, "ax"6869/*70* We use macros for low-level operations which need to be overridden71* for paravirtualization. The following will never clobber any registers:72* INTERRUPT_RETURN (aka. "iret")73* GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")74* ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").75*76* For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must77* specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).78* Allowing a register to be clobbered can shrink the paravirt replacement79* enough to patch inline, increasing performance.80*/8182#define nr_syscalls ((syscall_table_size)/4)8384#ifdef CONFIG_PREEMPT85#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF86#else87#define preempt_stop(clobbers)88#define resume_kernel restore_all89#endif9091.macro TRACE_IRQS_IRET92#ifdef CONFIG_TRACE_IRQFLAGS93testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off?94jz 1f95TRACE_IRQS_ON961:97#endif98.endm99100#ifdef CONFIG_VM86101#define resume_userspace_sig check_userspace102#else103#define resume_userspace_sig resume_userspace104#endif105106/*107* User gs save/restore108*109* %gs is used for userland TLS and kernel only uses it for stack110* canary which is required to be at %gs:20 by gcc. Read the comment111* at the top of stackprotector.h for more info.112*113* Local labels 98 and 99 are used.114*/115#ifdef CONFIG_X86_32_LAZY_GS116117/* unfortunately push/pop can't be no-op */118.macro PUSH_GS119pushl_cfi $0120.endm121.macro POP_GS pop=0122addl $(4 + \pop), %esp123CFI_ADJUST_CFA_OFFSET -(4 + \pop)124.endm125.macro POP_GS_EX126.endm127128/* all the rest are no-op */129.macro PTGS_TO_GS130.endm131.macro PTGS_TO_GS_EX132.endm133.macro GS_TO_REG reg134.endm135.macro REG_TO_PTGS reg136.endm137.macro SET_KERNEL_GS reg138.endm139140#else /* CONFIG_X86_32_LAZY_GS */141142.macro PUSH_GS143pushl_cfi %gs144/*CFI_REL_OFFSET gs, 0*/145.endm146147.macro POP_GS pop=014898: popl_cfi %gs149/*CFI_RESTORE gs*/150.if \pop <> 0151add $\pop, %esp152CFI_ADJUST_CFA_OFFSET -\pop153.endif154.endm155.macro POP_GS_EX156.pushsection .fixup, "ax"15799: movl $0, (%esp)158jmp 98b159.section __ex_table, "a"160.align 4161.long 98b, 99b162.popsection163.endm164165.macro PTGS_TO_GS16698: mov PT_GS(%esp), %gs167.endm168.macro PTGS_TO_GS_EX169.pushsection .fixup, "ax"17099: movl $0, PT_GS(%esp)171jmp 98b172.section __ex_table, "a"173.align 4174.long 98b, 99b175.popsection176.endm177178.macro GS_TO_REG reg179movl %gs, \reg180/*CFI_REGISTER gs, \reg*/181.endm182.macro REG_TO_PTGS reg183movl \reg, PT_GS(%esp)184/*CFI_REL_OFFSET gs, PT_GS*/185.endm186.macro SET_KERNEL_GS reg187movl $(__KERNEL_STACK_CANARY), \reg188movl \reg, %gs189.endm190191#endif /* CONFIG_X86_32_LAZY_GS */192193.macro SAVE_ALL194cld195PUSH_GS196pushl_cfi %fs197/*CFI_REL_OFFSET fs, 0;*/198pushl_cfi %es199/*CFI_REL_OFFSET es, 0;*/200pushl_cfi %ds201/*CFI_REL_OFFSET ds, 0;*/202pushl_cfi %eax203CFI_REL_OFFSET eax, 0204pushl_cfi %ebp205CFI_REL_OFFSET ebp, 0206pushl_cfi %edi207CFI_REL_OFFSET edi, 0208pushl_cfi %esi209CFI_REL_OFFSET esi, 0210pushl_cfi %edx211CFI_REL_OFFSET edx, 0212pushl_cfi %ecx213CFI_REL_OFFSET ecx, 0214pushl_cfi %ebx215CFI_REL_OFFSET ebx, 0216movl $(__USER_DS), %edx217movl %edx, %ds218movl %edx, %es219movl $(__KERNEL_PERCPU), %edx220movl %edx, %fs221SET_KERNEL_GS %edx222.endm223224.macro RESTORE_INT_REGS225popl_cfi %ebx226CFI_RESTORE ebx227popl_cfi %ecx228CFI_RESTORE ecx229popl_cfi %edx230CFI_RESTORE edx231popl_cfi %esi232CFI_RESTORE esi233popl_cfi %edi234CFI_RESTORE edi235popl_cfi %ebp236CFI_RESTORE ebp237popl_cfi %eax238CFI_RESTORE eax239.endm240241.macro RESTORE_REGS pop=0242RESTORE_INT_REGS2431: popl_cfi %ds244/*CFI_RESTORE ds;*/2452: popl_cfi %es246/*CFI_RESTORE es;*/2473: popl_cfi %fs248/*CFI_RESTORE fs;*/249POP_GS \pop250.pushsection .fixup, "ax"2514: movl $0, (%esp)252jmp 1b2535: movl $0, (%esp)254jmp 2b2556: movl $0, (%esp)256jmp 3b257.section __ex_table, "a"258.align 4259.long 1b, 4b260.long 2b, 5b261.long 3b, 6b262.popsection263POP_GS_EX264.endm265266.macro RING0_INT_FRAME267CFI_STARTPROC simple268CFI_SIGNAL_FRAME269CFI_DEF_CFA esp, 3*4270/*CFI_OFFSET cs, -2*4;*/271CFI_OFFSET eip, -3*4272.endm273274.macro RING0_EC_FRAME275CFI_STARTPROC simple276CFI_SIGNAL_FRAME277CFI_DEF_CFA esp, 4*4278/*CFI_OFFSET cs, -2*4;*/279CFI_OFFSET eip, -3*4280.endm281282.macro RING0_PTREGS_FRAME283CFI_STARTPROC simple284CFI_SIGNAL_FRAME285CFI_DEF_CFA esp, PT_OLDESP-PT_EBX286/*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/287CFI_OFFSET eip, PT_EIP-PT_OLDESP288/*CFI_OFFSET es, PT_ES-PT_OLDESP;*/289/*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/290CFI_OFFSET eax, PT_EAX-PT_OLDESP291CFI_OFFSET ebp, PT_EBP-PT_OLDESP292CFI_OFFSET edi, PT_EDI-PT_OLDESP293CFI_OFFSET esi, PT_ESI-PT_OLDESP294CFI_OFFSET edx, PT_EDX-PT_OLDESP295CFI_OFFSET ecx, PT_ECX-PT_OLDESP296CFI_OFFSET ebx, PT_EBX-PT_OLDESP297.endm298299ENTRY(ret_from_fork)300CFI_STARTPROC301pushl_cfi %eax302call schedule_tail303GET_THREAD_INFO(%ebp)304popl_cfi %eax305pushl_cfi $0x0202 # Reset kernel eflags306popfl_cfi307jmp syscall_exit308CFI_ENDPROC309END(ret_from_fork)310311/*312* Interrupt exit functions should be protected against kprobes313*/314.pushsection .kprobes.text, "ax"315/*316* Return to user mode is not as complex as all this looks,317* but we want the default path for a system call return to318* go as quickly as possible which is why some of this is319* less clear than it otherwise should be.320*/321322# userspace resumption stub bypassing syscall exit tracing323ALIGN324RING0_PTREGS_FRAME325ret_from_exception:326preempt_stop(CLBR_ANY)327ret_from_intr:328GET_THREAD_INFO(%ebp)329check_userspace:330movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS331movb PT_CS(%esp), %al332andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax333cmpl $USER_RPL, %eax334jb resume_kernel # not returning to v8086 or userspace335336ENTRY(resume_userspace)337LOCKDEP_SYS_EXIT338DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt339# setting need_resched or sigpending340# between sampling and the iret341TRACE_IRQS_OFF342movl TI_flags(%ebp), %ecx343andl $_TIF_WORK_MASK, %ecx # is there any work to be done on344# int/exception return?345jne work_pending346jmp restore_all347END(ret_from_exception)348349#ifdef CONFIG_PREEMPT350ENTRY(resume_kernel)351DISABLE_INTERRUPTS(CLBR_ANY)352cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?353jnz restore_all354need_resched:355movl TI_flags(%ebp), %ecx # need_resched set ?356testb $_TIF_NEED_RESCHED, %cl357jz restore_all358testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ?359jz restore_all360call preempt_schedule_irq361jmp need_resched362END(resume_kernel)363#endif364CFI_ENDPROC365/*366* End of kprobes section367*/368.popsection369370/* SYSENTER_RETURN points to after the "sysenter" instruction in371the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */372373# sysenter call handler stub374ENTRY(ia32_sysenter_target)375CFI_STARTPROC simple376CFI_SIGNAL_FRAME377CFI_DEF_CFA esp, 0378CFI_REGISTER esp, ebp379movl TSS_sysenter_sp0(%esp),%esp380sysenter_past_esp:381/*382* Interrupts are disabled here, but we can't trace it until383* enough kernel state to call TRACE_IRQS_OFF can be called - but384* we immediately enable interrupts at that point anyway.385*/386pushl_cfi $__USER_DS387/*CFI_REL_OFFSET ss, 0*/388pushl_cfi %ebp389CFI_REL_OFFSET esp, 0390pushfl_cfi391orl $X86_EFLAGS_IF, (%esp)392pushl_cfi $__USER_CS393/*CFI_REL_OFFSET cs, 0*/394/*395* Push current_thread_info()->sysenter_return to the stack.396* A tiny bit of offset fixup is necessary - 4*4 means the 4 words397* pushed above; +8 corresponds to copy_thread's esp0 setting.398*/399pushl_cfi ((TI_sysenter_return)-THREAD_SIZE+8+4*4)(%esp)400CFI_REL_OFFSET eip, 0401402pushl_cfi %eax403SAVE_ALL404ENABLE_INTERRUPTS(CLBR_NONE)405406/*407* Load the potential sixth argument from user stack.408* Careful about security.409*/410cmpl $__PAGE_OFFSET-3,%ebp411jae syscall_fault4121: movl (%ebp),%ebp413movl %ebp,PT_EBP(%esp)414.section __ex_table,"a"415.align 4416.long 1b,syscall_fault417.previous418419GET_THREAD_INFO(%ebp)420421testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)422jnz sysenter_audit423sysenter_do_call:424cmpl $(nr_syscalls), %eax425jae syscall_badsys426call *sys_call_table(,%eax,4)427movl %eax,PT_EAX(%esp)428LOCKDEP_SYS_EXIT429DISABLE_INTERRUPTS(CLBR_ANY)430TRACE_IRQS_OFF431movl TI_flags(%ebp), %ecx432testl $_TIF_ALLWORK_MASK, %ecx433jne sysexit_audit434sysenter_exit:435/* if something modifies registers it must also disable sysexit */436movl PT_EIP(%esp), %edx437movl PT_OLDESP(%esp), %ecx438xorl %ebp,%ebp439TRACE_IRQS_ON4401: mov PT_FS(%esp), %fs441PTGS_TO_GS442ENABLE_INTERRUPTS_SYSEXIT443444#ifdef CONFIG_AUDITSYSCALL445sysenter_audit:446testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp)447jnz syscall_trace_entry448addl $4,%esp449CFI_ADJUST_CFA_OFFSET -4450/* %esi already in 8(%esp) 6th arg: 4th syscall arg */451/* %edx already in 4(%esp) 5th arg: 3rd syscall arg */452/* %ecx already in 0(%esp) 4th arg: 2nd syscall arg */453movl %ebx,%ecx /* 3rd arg: 1st syscall arg */454movl %eax,%edx /* 2nd arg: syscall number */455movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */456call audit_syscall_entry457pushl_cfi %ebx458movl PT_EAX(%esp),%eax /* reload syscall number */459jmp sysenter_do_call460461sysexit_audit:462testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx463jne syscall_exit_work464TRACE_IRQS_ON465ENABLE_INTERRUPTS(CLBR_ANY)466movl %eax,%edx /* second arg, syscall return value */467cmpl $0,%eax /* is it < 0? */468setl %al /* 1 if so, 0 if not */469movzbl %al,%eax /* zero-extend that */470inc %eax /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */471call audit_syscall_exit472DISABLE_INTERRUPTS(CLBR_ANY)473TRACE_IRQS_OFF474movl TI_flags(%ebp), %ecx475testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx476jne syscall_exit_work477movl PT_EAX(%esp),%eax /* reload syscall return value */478jmp sysenter_exit479#endif480481CFI_ENDPROC482.pushsection .fixup,"ax"4832: movl $0,PT_FS(%esp)484jmp 1b485.section __ex_table,"a"486.align 4487.long 1b,2b488.popsection489PTGS_TO_GS_EX490ENDPROC(ia32_sysenter_target)491492/*493* syscall stub including irq exit should be protected against kprobes494*/495.pushsection .kprobes.text, "ax"496# system call handler stub497ENTRY(system_call)498RING0_INT_FRAME # can't unwind into user space anyway499pushl_cfi %eax # save orig_eax500SAVE_ALL501GET_THREAD_INFO(%ebp)502# system call tracing in operation / emulation503testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)504jnz syscall_trace_entry505cmpl $(nr_syscalls), %eax506jae syscall_badsys507syscall_call:508call *sys_call_table(,%eax,4)509movl %eax,PT_EAX(%esp) # store the return value510syscall_exit:511LOCKDEP_SYS_EXIT512DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt513# setting need_resched or sigpending514# between sampling and the iret515TRACE_IRQS_OFF516movl TI_flags(%ebp), %ecx517testl $_TIF_ALLWORK_MASK, %ecx # current->work518jne syscall_exit_work519520restore_all:521TRACE_IRQS_IRET522restore_all_notrace:523movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS524# Warning: PT_OLDSS(%esp) contains the wrong/random values if we525# are returning to the kernel.526# See comments in process.c:copy_thread() for details.527movb PT_OLDSS(%esp), %ah528movb PT_CS(%esp), %al529andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax530cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax531CFI_REMEMBER_STATE532je ldt_ss # returning to user-space with LDT SS533restore_nocheck:534RESTORE_REGS 4 # skip orig_eax/error_code535irq_return:536INTERRUPT_RETURN537.section .fixup,"ax"538ENTRY(iret_exc)539pushl $0 # no error code540pushl $do_iret_error541jmp error_code542.previous543.section __ex_table,"a"544.align 4545.long irq_return,iret_exc546.previous547548CFI_RESTORE_STATE549ldt_ss:550larl PT_OLDSS(%esp), %eax551jnz restore_nocheck552testl $0x00400000, %eax # returning to 32bit stack?553jnz restore_nocheck # allright, normal return554555#ifdef CONFIG_PARAVIRT556/*557* The kernel can't run on a non-flat stack if paravirt mode558* is active. Rather than try to fixup the high bits of559* ESP, bypass this code entirely. This may break DOSemu560* and/or Wine support in a paravirt VM, although the option561* is still available to implement the setting of the high562* 16-bits in the INTERRUPT_RETURN paravirt-op.563*/564cmpl $0, pv_info+PARAVIRT_enabled565jne restore_nocheck566#endif567568/*569* Setup and switch to ESPFIX stack570*571* We're returning to userspace with a 16 bit stack. The CPU will not572* restore the high word of ESP for us on executing iret... This is an573* "official" bug of all the x86-compatible CPUs, which we can work574* around to make dosemu and wine happy. We do this by preloading the575* high word of ESP with the high word of the userspace ESP while576* compensating for the offset by changing to the ESPFIX segment with577* a base address that matches for the difference.578*/579#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8)580mov %esp, %edx /* load kernel esp */581mov PT_OLDESP(%esp), %eax /* load userspace esp */582mov %dx, %ax /* eax: new kernel esp */583sub %eax, %edx /* offset (low word is 0) */584shr $16, %edx585mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */586mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */587pushl_cfi $__ESPFIX_SS588pushl_cfi %eax /* new kernel esp */589/* Disable interrupts, but do not irqtrace this section: we590* will soon execute iret and the tracer was already set to591* the irqstate after the iret */592DISABLE_INTERRUPTS(CLBR_EAX)593lss (%esp), %esp /* switch to espfix segment */594CFI_ADJUST_CFA_OFFSET -8595jmp restore_nocheck596CFI_ENDPROC597ENDPROC(system_call)598599# perform work that needs to be done immediately before resumption600ALIGN601RING0_PTREGS_FRAME # can't unwind into user space anyway602work_pending:603testb $_TIF_NEED_RESCHED, %cl604jz work_notifysig605work_resched:606call schedule607LOCKDEP_SYS_EXIT608DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt609# setting need_resched or sigpending610# between sampling and the iret611TRACE_IRQS_OFF612movl TI_flags(%ebp), %ecx613andl $_TIF_WORK_MASK, %ecx # is there any work to be done other614# than syscall tracing?615jz restore_all616testb $_TIF_NEED_RESCHED, %cl617jnz work_resched618619work_notifysig: # deal with pending signals and620# notify-resume requests621#ifdef CONFIG_VM86622testl $X86_EFLAGS_VM, PT_EFLAGS(%esp)623movl %esp, %eax624jne work_notifysig_v86 # returning to kernel-space or625# vm86-space626xorl %edx, %edx627call do_notify_resume628jmp resume_userspace_sig629630ALIGN631work_notifysig_v86:632pushl_cfi %ecx # save ti_flags for do_notify_resume633call save_v86_state # %eax contains pt_regs pointer634popl_cfi %ecx635movl %eax, %esp636#else637movl %esp, %eax638#endif639xorl %edx, %edx640call do_notify_resume641jmp resume_userspace_sig642END(work_pending)643644# perform syscall exit tracing645ALIGN646syscall_trace_entry:647movl $-ENOSYS,PT_EAX(%esp)648movl %esp, %eax649call syscall_trace_enter650/* What it returned is what we'll actually use. */651cmpl $(nr_syscalls), %eax652jnae syscall_call653jmp syscall_exit654END(syscall_trace_entry)655656# perform syscall exit tracing657ALIGN658syscall_exit_work:659testl $_TIF_WORK_SYSCALL_EXIT, %ecx660jz work_pending661TRACE_IRQS_ON662ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call663# schedule() instead664movl %esp, %eax665call syscall_trace_leave666jmp resume_userspace667END(syscall_exit_work)668CFI_ENDPROC669670RING0_INT_FRAME # can't unwind into user space anyway671syscall_fault:672GET_THREAD_INFO(%ebp)673movl $-EFAULT,PT_EAX(%esp)674jmp resume_userspace675END(syscall_fault)676677syscall_badsys:678movl $-ENOSYS,PT_EAX(%esp)679jmp resume_userspace680END(syscall_badsys)681CFI_ENDPROC682/*683* End of kprobes section684*/685.popsection686687/*688* System calls that need a pt_regs pointer.689*/690#define PTREGSCALL0(name) \691ALIGN; \692ptregs_##name: \693leal 4(%esp),%eax; \694jmp sys_##name;695696#define PTREGSCALL1(name) \697ALIGN; \698ptregs_##name: \699leal 4(%esp),%edx; \700movl (PT_EBX+4)(%esp),%eax; \701jmp sys_##name;702703#define PTREGSCALL2(name) \704ALIGN; \705ptregs_##name: \706leal 4(%esp),%ecx; \707movl (PT_ECX+4)(%esp),%edx; \708movl (PT_EBX+4)(%esp),%eax; \709jmp sys_##name;710711#define PTREGSCALL3(name) \712ALIGN; \713ptregs_##name: \714CFI_STARTPROC; \715leal 4(%esp),%eax; \716pushl_cfi %eax; \717movl PT_EDX(%eax),%ecx; \718movl PT_ECX(%eax),%edx; \719movl PT_EBX(%eax),%eax; \720call sys_##name; \721addl $4,%esp; \722CFI_ADJUST_CFA_OFFSET -4; \723ret; \724CFI_ENDPROC; \725ENDPROC(ptregs_##name)726727PTREGSCALL1(iopl)728PTREGSCALL0(fork)729PTREGSCALL0(vfork)730PTREGSCALL3(execve)731PTREGSCALL2(sigaltstack)732PTREGSCALL0(sigreturn)733PTREGSCALL0(rt_sigreturn)734PTREGSCALL2(vm86)735PTREGSCALL1(vm86old)736737/* Clone is an oddball. The 4th arg is in %edi */738ALIGN;739ptregs_clone:740CFI_STARTPROC741leal 4(%esp),%eax742pushl_cfi %eax743pushl_cfi PT_EDI(%eax)744movl PT_EDX(%eax),%ecx745movl PT_ECX(%eax),%edx746movl PT_EBX(%eax),%eax747call sys_clone748addl $8,%esp749CFI_ADJUST_CFA_OFFSET -8750ret751CFI_ENDPROC752ENDPROC(ptregs_clone)753754.macro FIXUP_ESPFIX_STACK755/*756* Switch back for ESPFIX stack to the normal zerobased stack757*758* We can't call C functions using the ESPFIX stack. This code reads759* the high word of the segment base from the GDT and swiches to the760* normal stack and adjusts ESP with the matching offset.761*/762/* fixup the stack */763mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */764mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */765shl $16, %eax766addl %esp, %eax /* the adjusted stack pointer */767pushl_cfi $__KERNEL_DS768pushl_cfi %eax769lss (%esp), %esp /* switch to the normal stack segment */770CFI_ADJUST_CFA_OFFSET -8771.endm772.macro UNWIND_ESPFIX_STACK773movl %ss, %eax774/* see if on espfix stack */775cmpw $__ESPFIX_SS, %ax776jne 27f777movl $__KERNEL_DS, %eax778movl %eax, %ds779movl %eax, %es780/* switch to normal stack */781FIXUP_ESPFIX_STACK78227:783.endm784785/*786* Build the entry stubs and pointer table with some assembler magic.787* We pack 7 stubs into a single 32-byte chunk, which will fit in a788* single cache line on all modern x86 implementations.789*/790.section .init.rodata,"a"791ENTRY(interrupt)792.section .entry.text, "ax"793.p2align 5794.p2align CONFIG_X86_L1_CACHE_SHIFT795ENTRY(irq_entries_start)796RING0_INT_FRAME797vector=FIRST_EXTERNAL_VECTOR798.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7799.balign 32800.rept 7801.if vector < NR_VECTORS802.if vector <> FIRST_EXTERNAL_VECTOR803CFI_ADJUST_CFA_OFFSET -4804.endif8051: pushl_cfi $(~vector+0x80) /* Note: always in signed byte range */806.if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6807jmp 2f808.endif809.previous810.long 1b811.section .entry.text, "ax"812vector=vector+1813.endif814.endr8152: jmp common_interrupt816.endr817END(irq_entries_start)818819.previous820END(interrupt)821.previous822823/*824* the CPU automatically disables interrupts when executing an IRQ vector,825* so IRQ-flags tracing has to follow that:826*/827.p2align CONFIG_X86_L1_CACHE_SHIFT828common_interrupt:829addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */830SAVE_ALL831TRACE_IRQS_OFF832movl %esp,%eax833call do_IRQ834jmp ret_from_intr835ENDPROC(common_interrupt)836CFI_ENDPROC837838/*839* Irq entries should be protected against kprobes840*/841.pushsection .kprobes.text, "ax"842#define BUILD_INTERRUPT3(name, nr, fn) \843ENTRY(name) \844RING0_INT_FRAME; \845pushl_cfi $~(nr); \846SAVE_ALL; \847TRACE_IRQS_OFF \848movl %esp,%eax; \849call fn; \850jmp ret_from_intr; \851CFI_ENDPROC; \852ENDPROC(name)853854#define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name)855856/* The include is where all of the SMP etc. interrupts come from */857#include <asm/entry_arch.h>858859ENTRY(coprocessor_error)860RING0_INT_FRAME861pushl_cfi $0862pushl_cfi $do_coprocessor_error863jmp error_code864CFI_ENDPROC865END(coprocessor_error)866867ENTRY(simd_coprocessor_error)868RING0_INT_FRAME869pushl_cfi $0870#ifdef CONFIG_X86_INVD_BUG871/* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */872661: pushl_cfi $do_general_protection873662:874.section .altinstructions,"a"875.balign 4876.long 661b877.long 663f878.word X86_FEATURE_XMM879.byte 662b-661b880.byte 664f-663f881.previous882.section .altinstr_replacement,"ax"883663: pushl $do_simd_coprocessor_error884664:885.previous886#else887pushl_cfi $do_simd_coprocessor_error888#endif889jmp error_code890CFI_ENDPROC891END(simd_coprocessor_error)892893ENTRY(device_not_available)894RING0_INT_FRAME895pushl_cfi $-1 # mark this as an int896pushl_cfi $do_device_not_available897jmp error_code898CFI_ENDPROC899END(device_not_available)900901#ifdef CONFIG_PARAVIRT902ENTRY(native_iret)903iret904.section __ex_table,"a"905.align 4906.long native_iret, iret_exc907.previous908END(native_iret)909910ENTRY(native_irq_enable_sysexit)911sti912sysexit913END(native_irq_enable_sysexit)914#endif915916ENTRY(overflow)917RING0_INT_FRAME918pushl_cfi $0919pushl_cfi $do_overflow920jmp error_code921CFI_ENDPROC922END(overflow)923924ENTRY(bounds)925RING0_INT_FRAME926pushl_cfi $0927pushl_cfi $do_bounds928jmp error_code929CFI_ENDPROC930END(bounds)931932ENTRY(invalid_op)933RING0_INT_FRAME934pushl_cfi $0935pushl_cfi $do_invalid_op936jmp error_code937CFI_ENDPROC938END(invalid_op)939940ENTRY(coprocessor_segment_overrun)941RING0_INT_FRAME942pushl_cfi $0943pushl_cfi $do_coprocessor_segment_overrun944jmp error_code945CFI_ENDPROC946END(coprocessor_segment_overrun)947948ENTRY(invalid_TSS)949RING0_EC_FRAME950pushl_cfi $do_invalid_TSS951jmp error_code952CFI_ENDPROC953END(invalid_TSS)954955ENTRY(segment_not_present)956RING0_EC_FRAME957pushl_cfi $do_segment_not_present958jmp error_code959CFI_ENDPROC960END(segment_not_present)961962ENTRY(stack_segment)963RING0_EC_FRAME964pushl_cfi $do_stack_segment965jmp error_code966CFI_ENDPROC967END(stack_segment)968969ENTRY(alignment_check)970RING0_EC_FRAME971pushl_cfi $do_alignment_check972jmp error_code973CFI_ENDPROC974END(alignment_check)975976ENTRY(divide_error)977RING0_INT_FRAME978pushl_cfi $0 # no error code979pushl_cfi $do_divide_error980jmp error_code981CFI_ENDPROC982END(divide_error)983984#ifdef CONFIG_X86_MCE985ENTRY(machine_check)986RING0_INT_FRAME987pushl_cfi $0988pushl_cfi machine_check_vector989jmp error_code990CFI_ENDPROC991END(machine_check)992#endif993994ENTRY(spurious_interrupt_bug)995RING0_INT_FRAME996pushl_cfi $0997pushl_cfi $do_spurious_interrupt_bug998jmp error_code999CFI_ENDPROC1000END(spurious_interrupt_bug)1001/*1002* End of kprobes section1003*/1004.popsection10051006ENTRY(kernel_thread_helper)1007pushl $0 # fake return address for unwinder1008CFI_STARTPROC1009movl %edi,%eax1010call *%esi1011call do_exit1012ud2 # padding for call trace1013CFI_ENDPROC1014ENDPROC(kernel_thread_helper)10151016#ifdef CONFIG_XEN1017/* Xen doesn't set %esp to be precisely what the normal sysenter1018entrypoint expects, so fix it up before using the normal path. */1019ENTRY(xen_sysenter_target)1020RING0_INT_FRAME1021addl $5*4, %esp /* remove xen-provided frame */1022CFI_ADJUST_CFA_OFFSET -5*41023jmp sysenter_past_esp1024CFI_ENDPROC10251026ENTRY(xen_hypervisor_callback)1027CFI_STARTPROC1028pushl_cfi $01029SAVE_ALL1030TRACE_IRQS_OFF10311032/* Check to see if we got the event in the critical1033region in xen_iret_direct, after we've reenabled1034events and checked for pending events. This simulates1035iret instruction's behaviour where it delivers a1036pending interrupt when enabling interrupts. */1037movl PT_EIP(%esp),%eax1038cmpl $xen_iret_start_crit,%eax1039jb 1f1040cmpl $xen_iret_end_crit,%eax1041jae 1f10421043jmp xen_iret_crit_fixup10441045ENTRY(xen_do_upcall)10461: mov %esp, %eax1047call xen_evtchn_do_upcall1048jmp ret_from_intr1049CFI_ENDPROC1050ENDPROC(xen_hypervisor_callback)10511052# Hypervisor uses this for application faults while it executes.1053# We get here for two reasons:1054# 1. Fault while reloading DS, ES, FS or GS1055# 2. Fault while executing IRET1056# Category 1 we fix up by reattempting the load, and zeroing the segment1057# register if the load fails.1058# Category 2 we fix up by jumping to do_iret_error. We cannot use the1059# normal Linux return path in this case because if we use the IRET hypercall1060# to pop the stack frame we end up in an infinite loop of failsafe callbacks.1061# We distinguish between categories by maintaining a status value in EAX.1062ENTRY(xen_failsafe_callback)1063CFI_STARTPROC1064pushl_cfi %eax1065movl $1,%eax10661: mov 4(%esp),%ds10672: mov 8(%esp),%es10683: mov 12(%esp),%fs10694: mov 16(%esp),%gs1070testl %eax,%eax1071popl_cfi %eax1072lea 16(%esp),%esp1073CFI_ADJUST_CFA_OFFSET -161074jz 5f1075addl $16,%esp1076jmp iret_exc # EAX != 0 => Category 2 (Bad IRET)10775: pushl_cfi $0 # EAX == 0 => Category 1 (Bad segment)1078SAVE_ALL1079jmp ret_from_exception1080CFI_ENDPROC10811082.section .fixup,"ax"10836: xorl %eax,%eax1084movl %eax,4(%esp)1085jmp 1b10867: xorl %eax,%eax1087movl %eax,8(%esp)1088jmp 2b10898: xorl %eax,%eax1090movl %eax,12(%esp)1091jmp 3b10929: xorl %eax,%eax1093movl %eax,16(%esp)1094jmp 4b1095.previous1096.section __ex_table,"a"1097.align 41098.long 1b,6b1099.long 2b,7b1100.long 3b,8b1101.long 4b,9b1102.previous1103ENDPROC(xen_failsafe_callback)11041105BUILD_INTERRUPT3(xen_hvm_callback_vector, XEN_HVM_EVTCHN_CALLBACK,1106xen_evtchn_do_upcall)11071108#endif /* CONFIG_XEN */11091110#ifdef CONFIG_FUNCTION_TRACER1111#ifdef CONFIG_DYNAMIC_FTRACE11121113ENTRY(mcount)1114ret1115END(mcount)11161117ENTRY(ftrace_caller)1118cmpl $0, function_trace_stop1119jne ftrace_stub11201121pushl %eax1122pushl %ecx1123pushl %edx1124movl 0xc(%esp), %eax1125movl 0x4(%ebp), %edx1126subl $MCOUNT_INSN_SIZE, %eax11271128.globl ftrace_call1129ftrace_call:1130call ftrace_stub11311132popl %edx1133popl %ecx1134popl %eax1135#ifdef CONFIG_FUNCTION_GRAPH_TRACER1136.globl ftrace_graph_call1137ftrace_graph_call:1138jmp ftrace_stub1139#endif11401141.globl ftrace_stub1142ftrace_stub:1143ret1144END(ftrace_caller)11451146#else /* ! CONFIG_DYNAMIC_FTRACE */11471148ENTRY(mcount)1149cmpl $0, function_trace_stop1150jne ftrace_stub11511152cmpl $ftrace_stub, ftrace_trace_function1153jnz trace1154#ifdef CONFIG_FUNCTION_GRAPH_TRACER1155cmpl $ftrace_stub, ftrace_graph_return1156jnz ftrace_graph_caller11571158cmpl $ftrace_graph_entry_stub, ftrace_graph_entry1159jnz ftrace_graph_caller1160#endif1161.globl ftrace_stub1162ftrace_stub:1163ret11641165/* taken from glibc */1166trace:1167pushl %eax1168pushl %ecx1169pushl %edx1170movl 0xc(%esp), %eax1171movl 0x4(%ebp), %edx1172subl $MCOUNT_INSN_SIZE, %eax11731174call *ftrace_trace_function11751176popl %edx1177popl %ecx1178popl %eax1179jmp ftrace_stub1180END(mcount)1181#endif /* CONFIG_DYNAMIC_FTRACE */1182#endif /* CONFIG_FUNCTION_TRACER */11831184#ifdef CONFIG_FUNCTION_GRAPH_TRACER1185ENTRY(ftrace_graph_caller)1186cmpl $0, function_trace_stop1187jne ftrace_stub11881189pushl %eax1190pushl %ecx1191pushl %edx1192movl 0xc(%esp), %edx1193lea 0x4(%ebp), %eax1194movl (%ebp), %ecx1195subl $MCOUNT_INSN_SIZE, %edx1196call prepare_ftrace_return1197popl %edx1198popl %ecx1199popl %eax1200ret1201END(ftrace_graph_caller)12021203.globl return_to_handler1204return_to_handler:1205pushl %eax1206pushl %edx1207movl %ebp, %eax1208call ftrace_return_to_handler1209movl %eax, %ecx1210popl %edx1211popl %eax1212jmp *%ecx1213#endif12141215.section .rodata,"a"1216#include "syscall_table_32.S"12171218syscall_table_size=(.-sys_call_table)12191220/*1221* Some functions should be protected against kprobes1222*/1223.pushsection .kprobes.text, "ax"12241225ENTRY(page_fault)1226RING0_EC_FRAME1227pushl_cfi $do_page_fault1228ALIGN1229error_code:1230/* the function address is in %gs's slot on the stack */1231pushl_cfi %fs1232/*CFI_REL_OFFSET fs, 0*/1233pushl_cfi %es1234/*CFI_REL_OFFSET es, 0*/1235pushl_cfi %ds1236/*CFI_REL_OFFSET ds, 0*/1237pushl_cfi %eax1238CFI_REL_OFFSET eax, 01239pushl_cfi %ebp1240CFI_REL_OFFSET ebp, 01241pushl_cfi %edi1242CFI_REL_OFFSET edi, 01243pushl_cfi %esi1244CFI_REL_OFFSET esi, 01245pushl_cfi %edx1246CFI_REL_OFFSET edx, 01247pushl_cfi %ecx1248CFI_REL_OFFSET ecx, 01249pushl_cfi %ebx1250CFI_REL_OFFSET ebx, 01251cld1252movl $(__KERNEL_PERCPU), %ecx1253movl %ecx, %fs1254UNWIND_ESPFIX_STACK1255GS_TO_REG %ecx1256movl PT_GS(%esp), %edi # get the function address1257movl PT_ORIG_EAX(%esp), %edx # get the error code1258movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart1259REG_TO_PTGS %ecx1260SET_KERNEL_GS %ecx1261movl $(__USER_DS), %ecx1262movl %ecx, %ds1263movl %ecx, %es1264TRACE_IRQS_OFF1265movl %esp,%eax # pt_regs pointer1266call *%edi1267jmp ret_from_exception1268CFI_ENDPROC1269END(page_fault)12701271/*1272* Debug traps and NMI can happen at the one SYSENTER instruction1273* that sets up the real kernel stack. Check here, since we can't1274* allow the wrong stack to be used.1275*1276* "TSS_sysenter_sp0+12" is because the NMI/debug handler will have1277* already pushed 3 words if it hits on the sysenter instruction:1278* eflags, cs and eip.1279*1280* We just load the right stack, and push the three (known) values1281* by hand onto the new stack - while updating the return eip past1282* the instruction that would have done it for sysenter.1283*/1284.macro FIX_STACK offset ok label1285cmpw $__KERNEL_CS, 4(%esp)1286jne \ok1287\label:1288movl TSS_sysenter_sp0 + \offset(%esp), %esp1289CFI_DEF_CFA esp, 01290CFI_UNDEFINED eip1291pushfl_cfi1292pushl_cfi $__KERNEL_CS1293pushl_cfi $sysenter_past_esp1294CFI_REL_OFFSET eip, 01295.endm12961297ENTRY(debug)1298RING0_INT_FRAME1299cmpl $ia32_sysenter_target,(%esp)1300jne debug_stack_correct1301FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn1302debug_stack_correct:1303pushl_cfi $-1 # mark this as an int1304SAVE_ALL1305TRACE_IRQS_OFF1306xorl %edx,%edx # error code 01307movl %esp,%eax # pt_regs pointer1308call do_debug1309jmp ret_from_exception1310CFI_ENDPROC1311END(debug)13121313/*1314* NMI is doubly nasty. It can happen _while_ we're handling1315* a debug fault, and the debug fault hasn't yet been able to1316* clear up the stack. So we first check whether we got an1317* NMI on the sysenter entry path, but after that we need to1318* check whether we got an NMI on the debug path where the debug1319* fault happened on the sysenter path.1320*/1321ENTRY(nmi)1322RING0_INT_FRAME1323pushl_cfi %eax1324movl %ss, %eax1325cmpw $__ESPFIX_SS, %ax1326popl_cfi %eax1327je nmi_espfix_stack1328cmpl $ia32_sysenter_target,(%esp)1329je nmi_stack_fixup1330pushl_cfi %eax1331movl %esp,%eax1332/* Do not access memory above the end of our stack page,1333* it might not exist.1334*/1335andl $(THREAD_SIZE-1),%eax1336cmpl $(THREAD_SIZE-20),%eax1337popl_cfi %eax1338jae nmi_stack_correct1339cmpl $ia32_sysenter_target,12(%esp)1340je nmi_debug_stack_check1341nmi_stack_correct:1342/* We have a RING0_INT_FRAME here */1343pushl_cfi %eax1344SAVE_ALL1345xorl %edx,%edx # zero error code1346movl %esp,%eax # pt_regs pointer1347call do_nmi1348jmp restore_all_notrace1349CFI_ENDPROC13501351nmi_stack_fixup:1352RING0_INT_FRAME1353FIX_STACK 12, nmi_stack_correct, 11354jmp nmi_stack_correct13551356nmi_debug_stack_check:1357/* We have a RING0_INT_FRAME here */1358cmpw $__KERNEL_CS,16(%esp)1359jne nmi_stack_correct1360cmpl $debug,(%esp)1361jb nmi_stack_correct1362cmpl $debug_esp_fix_insn,(%esp)1363ja nmi_stack_correct1364FIX_STACK 24, nmi_stack_correct, 11365jmp nmi_stack_correct13661367nmi_espfix_stack:1368/* We have a RING0_INT_FRAME here.1369*1370* create the pointer to lss back1371*/1372pushl_cfi %ss1373pushl_cfi %esp1374addl $4, (%esp)1375/* copy the iret frame of 12 bytes */1376.rept 31377pushl_cfi 16(%esp)1378.endr1379pushl_cfi %eax1380SAVE_ALL1381FIXUP_ESPFIX_STACK # %eax == %esp1382xorl %edx,%edx # zero error code1383call do_nmi1384RESTORE_REGS1385lss 12+4(%esp), %esp # back to espfix stack1386CFI_ADJUST_CFA_OFFSET -241387jmp irq_return1388CFI_ENDPROC1389END(nmi)13901391ENTRY(int3)1392RING0_INT_FRAME1393pushl_cfi $-1 # mark this as an int1394SAVE_ALL1395TRACE_IRQS_OFF1396xorl %edx,%edx # zero error code1397movl %esp,%eax # pt_regs pointer1398call do_int31399jmp ret_from_exception1400CFI_ENDPROC1401END(int3)14021403ENTRY(general_protection)1404RING0_EC_FRAME1405pushl_cfi $do_general_protection1406jmp error_code1407CFI_ENDPROC1408END(general_protection)14091410#ifdef CONFIG_KVM_GUEST1411ENTRY(async_page_fault)1412RING0_EC_FRAME1413pushl_cfi $do_async_page_fault1414jmp error_code1415CFI_ENDPROC1416END(async_page_fault)1417#endif14181419/*1420* End of kprobes section1421*/1422.popsection142314241425