Path: blob/master/arch/cris/arch-v32/lib/usercopy.c
15125 views
/*1* User address space access functions.2* The non-inlined parts of asm-cris/uaccess.h are here.3*4* Copyright (C) 2000, 2003 Axis Communications AB.5*6* Written by Hans-Peter Nilsson.7* Pieces used from memcpy, originally by Kenny Ranerup long time ago.8*/910#include <asm/uaccess.h>1112/* Asm:s have been tweaked (within the domain of correctness) to give13satisfactory results for "gcc version 3.2.1 Axis release R53/1.53-v32".1415Check regularly...1617Note that for CRISv32, the PC saved at a bus-fault is the address18*at* the faulting instruction, with a special case for instructions19in delay slots: then it's the address of the branch. Note also that20in contrast to v10, a postincrement in the instruction is *not*21performed at a bus-fault; the register is seen having the original22value in fault handlers. */232425/* Copy to userspace. This is based on the memcpy used for26kernel-to-kernel copying; see "string.c". */2728unsigned long29__copy_user (void __user *pdst, const void *psrc, unsigned long pn)30{31/* We want the parameters put in special registers.32Make sure the compiler is able to make something useful of this.33As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).3435FIXME: Comment for old gcc version. Check.36If gcc was alright, it really would need no temporaries, and no37stack space to save stuff on. */3839register char *dst __asm__ ("r13") = pdst;40register const char *src __asm__ ("r11") = psrc;41register int n __asm__ ("r12") = pn;42register int retn __asm__ ("r10") = 0;434445/* When src is aligned but not dst, this makes a few extra needless46cycles. I believe it would take as many to check that the47re-alignment was unnecessary. */48if (((unsigned long) dst & 3) != 049/* Don't align if we wouldn't copy more than a few bytes; so we50don't have to check further for overflows. */51&& n >= 3)52{53if ((unsigned long) dst & 1)54{55__asm_copy_to_user_1 (dst, src, retn);56n--;57}5859if ((unsigned long) dst & 2)60{61__asm_copy_to_user_2 (dst, src, retn);62n -= 2;63}64}6566/* Movem is dirt cheap. The overheap is low enough to always use the67minimum possible block size as the threshold. */68if (n >= 44)69{70/* For large copies we use 'movem'. */7172/* It is not optimal to tell the compiler about clobbering any73registers; that will move the saving/restoring of those registers74to the function prologue/epilogue, and make non-movem sizes75suboptimal. */76__asm__ volatile ("\77;; Check that the register asm declaration got right. \n\78;; The GCC manual explicitly says TRT will happen. \n\79.ifnc %0%1%2%3,$r13$r11$r12$r10 \n\80.err \n\81.endif \n\82\n\83;; Save the registers we'll use in the movem process \n\84;; on the stack. \n\85subq 11*4,$sp \n\86movem $r10,[$sp] \n\87\n\88;; Now we've got this: \n\89;; r11 - src \n\90;; r13 - dst \n\91;; r12 - n \n\92\n\93;; Update n for the first loop \n\94subq 44,$r12 \n\950: \n\96movem [$r11+],$r10 \n\97subq 44,$r12 \n\981: bge 0b \n\99movem $r10,[$r13+] \n\1003: \n\101addq 44,$r12 ;; compensate for last loop underflowing n \n\102\n\103;; Restore registers from stack \n\104movem [$sp+],$r10 \n\1052: \n\106.section .fixup,\"ax\" \n\1074: \n\108; When failing on any of the 1..44 bytes in a chunk, we adjust back the \n\109; source pointer and just drop through to the by-16 and by-4 loops to \n\110; get the correct number of failing bytes. This necessarily means a \n\111; few extra exceptions, but invalid user pointers shouldn't happen in \n\112; time-critical code anyway. \n\113jump 3b \n\114subq 44,$r11 \n\115\n\116.previous \n\117.section __ex_table,\"a\" \n\118.dword 1b,4b \n\119.previous"120121/* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn)122/* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn));123124}125126while (n >= 16)127{128__asm_copy_to_user_16 (dst, src, retn);129n -= 16;130}131132/* Having a separate by-four loops cuts down on cache footprint.133FIXME: Test with and without; increasing switch to be 0..15. */134while (n >= 4)135{136__asm_copy_to_user_4 (dst, src, retn);137n -= 4;138}139140switch (n)141{142case 0:143break;144case 1:145__asm_copy_to_user_1 (dst, src, retn);146break;147case 2:148__asm_copy_to_user_2 (dst, src, retn);149break;150case 3:151__asm_copy_to_user_3 (dst, src, retn);152break;153}154155return retn;156}157158/* Copy from user to kernel, zeroing the bytes that were inaccessible in159userland. The return-value is the number of bytes that were160inaccessible. */161162unsigned long163__copy_user_zeroing(void *pdst, const void __user *psrc, unsigned long pn)164{165/* We want the parameters put in special registers.166Make sure the compiler is able to make something useful of this.167As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).168169FIXME: Comment for old gcc version. Check.170If gcc was alright, it really would need no temporaries, and no171stack space to save stuff on. */172173register char *dst __asm__ ("r13") = pdst;174register const char *src __asm__ ("r11") = psrc;175register int n __asm__ ("r12") = pn;176register int retn __asm__ ("r10") = 0;177178/* The best reason to align src is that we then know that a read-fault179was for aligned bytes; there's no 1..3 remaining good bytes to180pickle. */181if (((unsigned long) src & 3) != 0)182{183if (((unsigned long) src & 1) && n != 0)184{185__asm_copy_from_user_1 (dst, src, retn);186n--;187}188189if (((unsigned long) src & 2) && n >= 2)190{191__asm_copy_from_user_2 (dst, src, retn);192n -= 2;193}194195/* We only need one check after the unalignment-adjustments, because196if both adjustments were done, either both or neither reference197had an exception. */198if (retn != 0)199goto copy_exception_bytes;200}201202/* Movem is dirt cheap. The overheap is low enough to always use the203minimum possible block size as the threshold. */204if (n >= 44)205{206/* It is not optimal to tell the compiler about clobbering any207registers; that will move the saving/restoring of those registers208to the function prologue/epilogue, and make non-movem sizes209suboptimal. */210__asm__ volatile ("\211.ifnc %0%1%2%3,$r13$r11$r12$r10 \n\212.err \n\213.endif \n\214\n\215;; Save the registers we'll use in the movem process \n\216;; on the stack. \n\217subq 11*4,$sp \n\218movem $r10,[$sp] \n\219\n\220;; Now we've got this: \n\221;; r11 - src \n\222;; r13 - dst \n\223;; r12 - n \n\224\n\225;; Update n for the first loop \n\226subq 44,$r12 \n\2270: \n\228movem [$r11+],$r10 \n\229\n\230subq 44,$r12 \n\231bge 0b \n\232movem $r10,[$r13+] \n\233\n\2344: \n\235addq 44,$r12 ;; compensate for last loop underflowing n \n\236\n\237;; Restore registers from stack \n\238movem [$sp+],$r10 \n\239.section .fixup,\"ax\" \n\240\n\241;; Do not jump back into the loop if we fail. For some uses, we get a \n\242;; page fault somewhere on the line. Without checking for page limits, \n\243;; we don't know where, but we need to copy accurately and keep an \n\244;; accurate count; not just clear the whole line. To do that, we fall \n\245;; down in the code below, proceeding with smaller amounts. It should \n\246;; be kept in mind that we have to cater to code like what at one time \n\247;; was in fs/super.c: \n\248;; i = size - copy_from_user((void *)page, data, size); \n\249;; which would cause repeated faults while clearing the remainder of \n\250;; the SIZE bytes at PAGE after the first fault. \n\251;; A caveat here is that we must not fall through from a failing page \n\252;; to a valid page. \n\253\n\2543: \n\255jump 4b ;; Fall through, pretending the fault didn't happen. \n\256nop \n\257\n\258.previous \n\259.section __ex_table,\"a\" \n\260.dword 0b,3b \n\261.previous"262263/* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn)264/* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn));265}266267/* Either we directly start copying here, using dword copying in a loop,268or we copy as much as possible with 'movem' and then the last block269(<44 bytes) is copied here. This will work since 'movem' will have270updated src, dst and n. (Except with failing src.)271272Since we want to keep src accurate, we can't use273__asm_copy_from_user_N with N != (1, 2, 4); it updates dst and274retn, but not src (by design; it's value is ignored elsewhere). */275276while (n >= 4)277{278__asm_copy_from_user_4 (dst, src, retn);279n -= 4;280281if (retn)282goto copy_exception_bytes;283}284285/* If we get here, there were no memory read faults. */286switch (n)287{288/* These copies are at least "naturally aligned" (so we don't have289to check each byte), due to the src alignment code before the290movem loop. The *_3 case *will* get the correct count for retn. */291case 0:292/* This case deliberately left in (if you have doubts check the293generated assembly code). */294break;295case 1:296__asm_copy_from_user_1 (dst, src, retn);297break;298case 2:299__asm_copy_from_user_2 (dst, src, retn);300break;301case 3:302__asm_copy_from_user_3 (dst, src, retn);303break;304}305306/* If we get here, retn correctly reflects the number of failing307bytes. */308return retn;309310copy_exception_bytes:311/* We already have "retn" bytes cleared, and need to clear the312remaining "n" bytes. A non-optimized simple byte-for-byte in-line313memset is preferred here, since this isn't speed-critical code and314we'd rather have this a leaf-function than calling memset. */315{316char *endp;317for (endp = dst + n; dst < endp; dst++)318*dst = 0;319}320321return retn + n;322}323324/* Zero userspace. */325326unsigned long327__do_clear_user (void __user *pto, unsigned long pn)328{329/* We want the parameters put in special registers.330Make sure the compiler is able to make something useful of this.331As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).332333FIXME: Comment for old gcc version. Check.334If gcc was alright, it really would need no temporaries, and no335stack space to save stuff on. */336337register char *dst __asm__ ("r13") = pto;338register int n __asm__ ("r12") = pn;339register int retn __asm__ ("r10") = 0;340341342if (((unsigned long) dst & 3) != 0343/* Don't align if we wouldn't copy more than a few bytes. */344&& n >= 3)345{346if ((unsigned long) dst & 1)347{348__asm_clear_1 (dst, retn);349n--;350}351352if ((unsigned long) dst & 2)353{354__asm_clear_2 (dst, retn);355n -= 2;356}357}358359/* Decide which copying method to use.360FIXME: This number is from the "ordinary" kernel memset. */361if (n >= 48)362{363/* For large clears we use 'movem' */364365/* It is not optimal to tell the compiler about clobbering any366call-saved registers; that will move the saving/restoring of367those registers to the function prologue/epilogue, and make368non-movem sizes suboptimal.369370This method is not foolproof; it assumes that the "asm reg"371declarations at the beginning of the function really are used372here (beware: they may be moved to temporary registers).373This way, we do not have to save/move the registers around into374temporaries; we can safely use them straight away.375376If you want to check that the allocation was right; then377check the equalities in the first comment. It should say378something like "r13=r13, r11=r11, r12=r12". */379__asm__ volatile ("\380.ifnc %0%1%2,$r13$r12$r10 \n\381.err \n\382.endif \n\383\n\384;; Save the registers we'll clobber in the movem process \n\385;; on the stack. Don't mention them to gcc, it will only be \n\386;; upset. \n\387subq 11*4,$sp \n\388movem $r10,[$sp] \n\389\n\390clear.d $r0 \n\391clear.d $r1 \n\392clear.d $r2 \n\393clear.d $r3 \n\394clear.d $r4 \n\395clear.d $r5 \n\396clear.d $r6 \n\397clear.d $r7 \n\398clear.d $r8 \n\399clear.d $r9 \n\400clear.d $r10 \n\401clear.d $r11 \n\402\n\403;; Now we've got this: \n\404;; r13 - dst \n\405;; r12 - n \n\406\n\407;; Update n for the first loop \n\408subq 12*4,$r12 \n\4090: \n\410subq 12*4,$r12 \n\4111: \n\412bge 0b \n\413movem $r11,[$r13+] \n\414\n\415addq 12*4,$r12 ;; compensate for last loop underflowing n \n\416\n\417;; Restore registers from stack \n\418movem [$sp+],$r10 \n\4192: \n\420.section .fixup,\"ax\" \n\4213: \n\422movem [$sp],$r10 \n\423addq 12*4,$r10 \n\424addq 12*4,$r13 \n\425movem $r10,[$sp] \n\426jump 0b \n\427clear.d $r10 \n\428\n\429.previous \n\430.section __ex_table,\"a\" \n\431.dword 1b,3b \n\432.previous"433434/* Outputs */ : "=r" (dst), "=r" (n), "=r" (retn)435/* Inputs */ : "0" (dst), "1" (n), "2" (retn)436/* Clobber */ : "r11");437}438439while (n >= 16)440{441__asm_clear_16 (dst, retn);442n -= 16;443}444445/* Having a separate by-four loops cuts down on cache footprint.446FIXME: Test with and without; increasing switch to be 0..15. */447while (n >= 4)448{449__asm_clear_4 (dst, retn);450n -= 4;451}452453switch (n)454{455case 0:456break;457case 1:458__asm_clear_1 (dst, retn);459break;460case 2:461__asm_clear_2 (dst, retn);462break;463case 3:464__asm_clear_3 (dst, retn);465break;466}467468return retn;469}470471472