/*1* arch/microblaze/mm/fault.c2*3* Copyright (C) 2007 Xilinx, Inc. All rights reserved.4*5* Derived from "arch/ppc/mm/fault.c"6* Copyright (C) 1995-1996 Gary Thomas ([email protected])7*8* Derived from "arch/i386/mm/fault.c"9* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds10*11* Modified by Cort Dougan and Paul Mackerras.12*13* This file is subject to the terms and conditions of the GNU General14* Public License. See the file COPYING in the main directory of this15* archive for more details.16*17*/1819#include <linux/extable.h>20#include <linux/signal.h>21#include <linux/sched.h>22#include <linux/kernel.h>23#include <linux/errno.h>24#include <linux/string.h>25#include <linux/types.h>26#include <linux/ptrace.h>27#include <linux/mman.h>28#include <linux/mm.h>29#include <linux/interrupt.h>30#include <linux/perf_event.h>3132#include <asm/page.h>33#include <asm/mmu.h>34#include <linux/mmu_context.h>35#include <linux/uaccess.h>36#include <asm/exceptions.h>3738static unsigned long pte_misses; /* updated by do_page_fault() */39static unsigned long pte_errors; /* updated by do_page_fault() */4041/*42* Check whether the instruction at regs->pc is a store using43* an update addressing form which will update r1.44*/45static int store_updates_sp(struct pt_regs *regs)46{47unsigned int inst;4849if (get_user(inst, (unsigned int __user *)regs->pc))50return 0;51/* check for 1 in the rD field */52if (((inst >> 21) & 0x1f) != 1)53return 0;54/* check for store opcodes */55if ((inst & 0xd0000000) == 0xd0000000)56return 1;57return 0;58}596061/*62* bad_page_fault is called when we have a bad access from the kernel.63* It is called from do_page_fault above and from some of the procedures64* in traps.c.65*/66void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)67{68const struct exception_table_entry *fixup;69/* MS: no context */70/* Are we prepared to handle this fault? */71fixup = search_exception_tables(regs->pc);72if (fixup) {73regs->pc = fixup->fixup;74return;75}7677/* kernel has accessed a bad area */78die("kernel access of bad area", regs, sig);79}8081/*82* The error_code parameter is ESR for a data fault,83* 0 for an instruction fault.84*/85void do_page_fault(struct pt_regs *regs, unsigned long address,86unsigned long error_code)87{88struct vm_area_struct *vma;89struct mm_struct *mm = current->mm;90int code = SEGV_MAPERR;91int is_write = error_code & ESR_S;92vm_fault_t fault;93unsigned int flags = FAULT_FLAG_DEFAULT;9495regs->ear = address;96regs->esr = error_code;9798/* On a kernel SLB miss we can only check for a valid exception entry */99if (unlikely(kernel_mode(regs) && (address >= TASK_SIZE))) {100pr_warn("kernel task_size exceed");101_exception(SIGSEGV, regs, code, address);102}103104/* for instr TLB miss and instr storage exception ESR_S is undefined */105if ((error_code & 0x13) == 0x13 || (error_code & 0x11) == 0x11)106is_write = 0;107108if (unlikely(faulthandler_disabled() || !mm)) {109if (kernel_mode(regs))110goto bad_area_nosemaphore;111112/* faulthandler_disabled() in user mode is really bad,113as is current->mm == NULL. */114pr_emerg("Page fault in user mode with faulthandler_disabled(), mm = %p\n",115mm);116pr_emerg("r15 = %lx MSR = %lx\n",117regs->r15, regs->msr);118die("Weird page fault", regs, SIGSEGV);119}120121if (user_mode(regs))122flags |= FAULT_FLAG_USER;123124perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);125126/* When running in the kernel we expect faults to occur only to127* addresses in user space. All other faults represent errors in the128* kernel and should generate an OOPS. Unfortunately, in the case of an129* erroneous fault occurring in a code path which already holds mmap_lock130* we will deadlock attempting to validate the fault against the131* address space. Luckily the kernel only validly references user132* space from well defined areas of code, which are listed in the133* exceptions table.134*135* As the vast majority of faults will be valid we will only perform136* the source reference check when there is a possibility of a deadlock.137* Attempt to lock the address space, if we cannot we then validate the138* source. If this is invalid we can skip the address space check,139* thus avoiding the deadlock.140*/141if (unlikely(!mmap_read_trylock(mm))) {142if (kernel_mode(regs) && !search_exception_tables(regs->pc))143goto bad_area_nosemaphore;144145retry:146mmap_read_lock(mm);147}148149vma = find_vma(mm, address);150if (unlikely(!vma))151goto bad_area;152153if (vma->vm_start <= address)154goto good_area;155156if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))157goto bad_area;158159if (unlikely(!is_write))160goto bad_area;161162/*163* N.B. The ABI allows programs to access up to164* a few hundred bytes below the stack pointer (TBD).165* The kernel signal delivery code writes up to about 1.5kB166* below the stack pointer (r1) before decrementing it.167* The exec code can write slightly over 640kB to the stack168* before setting the user r1. Thus we allow the stack to169* expand to 1MB without further checks.170*/171if (unlikely(address + 0x100000 < vma->vm_end)) {172173/* get user regs even if this fault is in kernel mode */174struct pt_regs *uregs = current->thread.regs;175if (uregs == NULL)176goto bad_area;177178/*179* A user-mode access to an address a long way below180* the stack pointer is only valid if the instruction181* is one which would update the stack pointer to the182* address accessed if the instruction completed,183* i.e. either stwu rs,n(r1) or stwux rs,r1,rb184* (or the byte, halfword, float or double forms).185*186* If we don't check this then any write to the area187* between the last mapped region and the stack will188* expand the stack rather than segfaulting.189*/190if (address + 2048 < uregs->r1191&& (kernel_mode(regs) || !store_updates_sp(regs)))192goto bad_area;193}194vma = expand_stack(mm, address);195if (!vma)196goto bad_area_nosemaphore;197198good_area:199code = SEGV_ACCERR;200201/* a write */202if (unlikely(is_write)) {203if (unlikely(!(vma->vm_flags & VM_WRITE)))204goto bad_area;205flags |= FAULT_FLAG_WRITE;206/* a read */207} else {208/* protection fault */209if (unlikely(error_code & 0x08000000))210goto bad_area;211if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC))))212goto bad_area;213}214215/*216* If for any reason at all we couldn't handle the fault,217* make sure we exit gracefully rather than endlessly redo218* the fault.219*/220fault = handle_mm_fault(vma, address, flags, regs);221222if (fault_signal_pending(fault, regs)) {223if (!user_mode(regs))224bad_page_fault(regs, address, SIGBUS);225return;226}227228/* The fault is fully completed (including releasing mmap lock) */229if (fault & VM_FAULT_COMPLETED)230return;231232if (unlikely(fault & VM_FAULT_ERROR)) {233if (fault & VM_FAULT_OOM)234goto out_of_memory;235else if (fault & VM_FAULT_SIGSEGV)236goto bad_area;237else if (fault & VM_FAULT_SIGBUS)238goto do_sigbus;239BUG();240}241242if (fault & VM_FAULT_RETRY) {243flags |= FAULT_FLAG_TRIED;244245/*246* No need to mmap_read_unlock(mm) as we would247* have already released it in __lock_page_or_retry248* in mm/filemap.c.249*/250251goto retry;252}253254mmap_read_unlock(mm);255256/*257* keep track of tlb+htab misses that are good addrs but258* just need pte's created via handle_mm_fault()259* -- Cort260*/261pte_misses++;262return;263264bad_area:265mmap_read_unlock(mm);266267bad_area_nosemaphore:268pte_errors++;269270/* User mode accesses cause a SIGSEGV */271if (user_mode(regs)) {272_exception(SIGSEGV, regs, code, address);273return;274}275276bad_page_fault(regs, address, SIGSEGV);277return;278279/*280* We ran out of memory, or some other thing happened to us that made281* us unable to handle the page fault gracefully.282*/283out_of_memory:284mmap_read_unlock(mm);285if (!user_mode(regs))286bad_page_fault(regs, address, SIGKILL);287else288pagefault_out_of_memory();289return;290291do_sigbus:292mmap_read_unlock(mm);293if (user_mode(regs)) {294force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address);295return;296}297bad_page_fault(regs, address, SIGBUS);298}299300301