Path: blob/master/arch/cris/arch-v10/lib/usercopy.c
15126 views
/*1* User address space access functions.2* The non-inlined parts of asm-cris/uaccess.h are here.3*4* Copyright (C) 2000, Axis Communications AB.5*6* Written by Hans-Peter Nilsson.7* Pieces used from memcpy, originally by Kenny Ranerup long time ago.8*/910#include <asm/uaccess.h>1112/* Asm:s have been tweaked (within the domain of correctness) to give13satisfactory results for "gcc version 2.96 20000427 (experimental)".1415Check regularly...1617Note that the PC saved at a bus-fault is the address *after* the18faulting instruction, which means the branch-target for instructions in19delay-slots for taken branches. Note also that the postincrement in20the instruction is performed regardless of bus-fault; the register is21seen updated in fault handlers.2223Oh, and on the code formatting issue, to whomever feels like "fixing24it" to Conformity: I'm too "lazy", but why don't you go ahead and "fix"25string.c too. I just don't think too many people will hack this file26for the code format to be an issue. */272829/* Copy to userspace. This is based on the memcpy used for30kernel-to-kernel copying; see "string.c". */3132unsigned long33__copy_user (void __user *pdst, const void *psrc, unsigned long pn)34{35/* We want the parameters put in special registers.36Make sure the compiler is able to make something useful of this.37As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).3839FIXME: Comment for old gcc version. Check.40If gcc was alright, it really would need no temporaries, and no41stack space to save stuff on. */4243register char *dst __asm__ ("r13") = pdst;44register const char *src __asm__ ("r11") = psrc;45register int n __asm__ ("r12") = pn;46register int retn __asm__ ("r10") = 0;474849/* When src is aligned but not dst, this makes a few extra needless50cycles. I believe it would take as many to check that the51re-alignment was unnecessary. */52if (((unsigned long) dst & 3) != 053/* Don't align if we wouldn't copy more than a few bytes; so we54don't have to check further for overflows. */55&& n >= 3)56{57if ((unsigned long) dst & 1)58{59__asm_copy_to_user_1 (dst, src, retn);60n--;61}6263if ((unsigned long) dst & 2)64{65__asm_copy_to_user_2 (dst, src, retn);66n -= 2;67}68}6970/* Decide which copying method to use. */71if (n >= 44*2) /* Break even between movem and72move16 is at 38.7*2, but modulo 44. */73{74/* For large copies we use 'movem'. */7576/* It is not optimal to tell the compiler about clobbering any77registers; that will move the saving/restoring of those registers78to the function prologue/epilogue, and make non-movem sizes79suboptimal.8081This method is not foolproof; it assumes that the "asm reg"82declarations at the beginning of the function really are used83here (beware: they may be moved to temporary registers).84This way, we do not have to save/move the registers around into85temporaries; we can safely use them straight away.8687If you want to check that the allocation was right; then88check the equalities in the first comment. It should say89"r13=r13, r11=r11, r12=r12". */90__asm__ volatile ("\91.ifnc %0%1%2%3,$r13$r11$r12$r10 \n\92.err \n\93.endif \n\94\n\95;; Save the registers we'll use in the movem process \n\96;; on the stack. \n\97subq 11*4,$sp \n\98movem $r10,[$sp] \n\99\n\100;; Now we've got this: \n\101;; r11 - src \n\102;; r13 - dst \n\103;; r12 - n \n\104\n\105;; Update n for the first loop \n\106subq 44,$r12 \n\107\n\108; Since the noted PC of a faulting instruction in a delay-slot of a taken \n\109; branch, is that of the branch target, we actually point at the from-movem \n\110; for this case. There is no ambiguity here; if there was a fault in that \n\111; instruction (meaning a kernel oops), the faulted PC would be the address \n\112; after *that* movem. \n\113\n\1140: \n\115movem [$r11+],$r10 \n\116subq 44,$r12 \n\117bge 0b \n\118movem $r10,[$r13+] \n\1191: \n\120addq 44,$r12 ;; compensate for last loop underflowing n \n\121\n\122;; Restore registers from stack \n\123movem [$sp+],$r10 \n\1242: \n\125.section .fixup,\"ax\" \n\126\n\127; To provide a correct count in r10 of bytes that failed to be copied, \n\128; we jump back into the loop if the loop-branch was taken. There is no \n\129; performance penalty for sany use; the program will segfault soon enough.\n\130\n\1313: \n\132move.d [$sp],$r10 \n\133addq 44,$r10 \n\134move.d $r10,[$sp] \n\135jump 0b \n\1364: \n\137movem [$sp+],$r10 \n\138addq 44,$r10 \n\139addq 44,$r12 \n\140jump 2b \n\141\n\142.previous \n\143.section __ex_table,\"a\" \n\144.dword 0b,3b \n\145.dword 1b,4b \n\146.previous"147148/* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn)149/* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn));150151}152153/* Either we directly start copying, using dword copying in a loop, or154we copy as much as possible with 'movem' and then the last block (<44155bytes) is copied here. This will work since 'movem' will have156updated SRC, DST and N. */157158while (n >= 16)159{160__asm_copy_to_user_16 (dst, src, retn);161n -= 16;162}163164/* Having a separate by-four loops cuts down on cache footprint.165FIXME: Test with and without; increasing switch to be 0..15. */166while (n >= 4)167{168__asm_copy_to_user_4 (dst, src, retn);169n -= 4;170}171172switch (n)173{174case 0:175break;176case 1:177__asm_copy_to_user_1 (dst, src, retn);178break;179case 2:180__asm_copy_to_user_2 (dst, src, retn);181break;182case 3:183__asm_copy_to_user_3 (dst, src, retn);184break;185}186187return retn;188}189190/* Copy from user to kernel, zeroing the bytes that were inaccessible in191userland. The return-value is the number of bytes that were192inaccessible. */193194unsigned long195__copy_user_zeroing(void *pdst, const void __user *psrc, unsigned long pn)196{197/* We want the parameters put in special registers.198Make sure the compiler is able to make something useful of this.199As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).200201FIXME: Comment for old gcc version. Check.202If gcc was alright, it really would need no temporaries, and no203stack space to save stuff on. */204205register char *dst __asm__ ("r13") = pdst;206register const char *src __asm__ ("r11") = psrc;207register int n __asm__ ("r12") = pn;208register int retn __asm__ ("r10") = 0;209210/* The best reason to align src is that we then know that a read-fault211was for aligned bytes; there's no 1..3 remaining good bytes to212pickle. */213if (((unsigned long) src & 3) != 0)214{215if (((unsigned long) src & 1) && n != 0)216{217__asm_copy_from_user_1 (dst, src, retn);218n--;219}220221if (((unsigned long) src & 2) && n >= 2)222{223__asm_copy_from_user_2 (dst, src, retn);224n -= 2;225}226227/* We only need one check after the unalignment-adjustments, because228if both adjustments were done, either both or neither reference229had an exception. */230if (retn != 0)231goto copy_exception_bytes;232}233234/* Decide which copying method to use. */235if (n >= 44*2) /* Break even between movem and236move16 is at 38.7*2, but modulo 44.237FIXME: We use move4 now. */238{239/* For large copies we use 'movem' */240241/* It is not optimal to tell the compiler about clobbering any242registers; that will move the saving/restoring of those registers243to the function prologue/epilogue, and make non-movem sizes244suboptimal.245246This method is not foolproof; it assumes that the "asm reg"247declarations at the beginning of the function really are used248here (beware: they may be moved to temporary registers).249This way, we do not have to save/move the registers around into250temporaries; we can safely use them straight away.251252If you want to check that the allocation was right; then253check the equalities in the first comment. It should say254"r13=r13, r11=r11, r12=r12" */255__asm__ volatile ("\n\256.ifnc %0%1%2%3,$r13$r11$r12$r10 \n\257.err \n\258.endif \n\259\n\260;; Save the registers we'll use in the movem process \n\261;; on the stack. \n\262subq 11*4,$sp \n\263movem $r10,[$sp] \n\264\n\265;; Now we've got this: \n\266;; r11 - src \n\267;; r13 - dst \n\268;; r12 - n \n\269\n\270;; Update n for the first loop \n\271subq 44,$r12 \n\2720: \n\273movem [$r11+],$r10 \n\2741: \n\275subq 44,$r12 \n\276bge 0b \n\277movem $r10,[$r13+] \n\278\n\279addq 44,$r12 ;; compensate for last loop underflowing n \n\280\n\281;; Restore registers from stack \n\282movem [$sp+],$r10 \n\2834: \n\284.section .fixup,\"ax\" \n\285\n\286;; Do not jump back into the loop if we fail. For some uses, we get a \n\287;; page fault somewhere on the line. Without checking for page limits, \n\288;; we don't know where, but we need to copy accurately and keep an \n\289;; accurate count; not just clear the whole line. To do that, we fall \n\290;; down in the code below, proceeding with smaller amounts. It should \n\291;; be kept in mind that we have to cater to code like what at one time \n\292;; was in fs/super.c: \n\293;; i = size - copy_from_user((void *)page, data, size); \n\294;; which would cause repeated faults while clearing the remainder of \n\295;; the SIZE bytes at PAGE after the first fault. \n\296;; A caveat here is that we must not fall through from a failing page \n\297;; to a valid page. \n\298\n\2993: \n\300movem [$sp+],$r10 \n\301addq 44,$r12 ;; Get back count before faulting point. \n\302subq 44,$r11 ;; Get back pointer to faulting movem-line. \n\303jump 4b ;; Fall through, pretending the fault didn't happen.\n\304\n\305.previous \n\306.section __ex_table,\"a\" \n\307.dword 1b,3b \n\308.previous"309310/* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn)311/* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn));312313}314315/* Either we directly start copying here, using dword copying in a loop,316or we copy as much as possible with 'movem' and then the last block317(<44 bytes) is copied here. This will work since 'movem' will have318updated src, dst and n. (Except with failing src.)319320Since we want to keep src accurate, we can't use321__asm_copy_from_user_N with N != (1, 2, 4); it updates dst and322retn, but not src (by design; it's value is ignored elsewhere). */323324while (n >= 4)325{326__asm_copy_from_user_4 (dst, src, retn);327n -= 4;328329if (retn)330goto copy_exception_bytes;331}332333/* If we get here, there were no memory read faults. */334switch (n)335{336/* These copies are at least "naturally aligned" (so we don't have337to check each byte), due to the src alignment code before the338movem loop. The *_3 case *will* get the correct count for retn. */339case 0:340/* This case deliberately left in (if you have doubts check the341generated assembly code). */342break;343case 1:344__asm_copy_from_user_1 (dst, src, retn);345break;346case 2:347__asm_copy_from_user_2 (dst, src, retn);348break;349case 3:350__asm_copy_from_user_3 (dst, src, retn);351break;352}353354/* If we get here, retn correctly reflects the number of failing355bytes. */356return retn;357358copy_exception_bytes:359/* We already have "retn" bytes cleared, and need to clear the360remaining "n" bytes. A non-optimized simple byte-for-byte in-line361memset is preferred here, since this isn't speed-critical code and362we'd rather have this a leaf-function than calling memset. */363{364char *endp;365for (endp = dst + n; dst < endp; dst++)366*dst = 0;367}368369return retn + n;370}371372/* Zero userspace. */373374unsigned long375__do_clear_user (void __user *pto, unsigned long pn)376{377/* We want the parameters put in special registers.378Make sure the compiler is able to make something useful of this.379As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).380381FIXME: Comment for old gcc version. Check.382If gcc was alright, it really would need no temporaries, and no383stack space to save stuff on. */384385register char *dst __asm__ ("r13") = pto;386register int n __asm__ ("r12") = pn;387register int retn __asm__ ("r10") = 0;388389390if (((unsigned long) dst & 3) != 0391/* Don't align if we wouldn't copy more than a few bytes. */392&& n >= 3)393{394if ((unsigned long) dst & 1)395{396__asm_clear_1 (dst, retn);397n--;398}399400if ((unsigned long) dst & 2)401{402__asm_clear_2 (dst, retn);403n -= 2;404}405}406407/* Decide which copying method to use.408FIXME: This number is from the "ordinary" kernel memset. */409if (n >= (1*48))410{411/* For large clears we use 'movem' */412413/* It is not optimal to tell the compiler about clobbering any414call-saved registers; that will move the saving/restoring of415those registers to the function prologue/epilogue, and make416non-movem sizes suboptimal.417418This method is not foolproof; it assumes that the "asm reg"419declarations at the beginning of the function really are used420here (beware: they may be moved to temporary registers).421This way, we do not have to save/move the registers around into422temporaries; we can safely use them straight away.423424If you want to check that the allocation was right; then425check the equalities in the first comment. It should say426something like "r13=r13, r11=r11, r12=r12". */427__asm__ volatile ("\n\428.ifnc %0%1%2,$r13$r12$r10 \n\429.err \n\430.endif \n\431\n\432;; Save the registers we'll clobber in the movem process \n\433;; on the stack. Don't mention them to gcc, it will only be \n\434;; upset. \n\435subq 11*4,$sp \n\436movem $r10,[$sp] \n\437\n\438clear.d $r0 \n\439clear.d $r1 \n\440clear.d $r2 \n\441clear.d $r3 \n\442clear.d $r4 \n\443clear.d $r5 \n\444clear.d $r6 \n\445clear.d $r7 \n\446clear.d $r8 \n\447clear.d $r9 \n\448clear.d $r10 \n\449clear.d $r11 \n\450\n\451;; Now we've got this: \n\452;; r13 - dst \n\453;; r12 - n \n\454\n\455;; Update n for the first loop \n\456subq 12*4,$r12 \n\4570: \n\458subq 12*4,$r12 \n\459bge 0b \n\460movem $r11,[$r13+] \n\4611: \n\462addq 12*4,$r12 ;; compensate for last loop underflowing n\n\463\n\464;; Restore registers from stack \n\465movem [$sp+],$r10 \n\4662: \n\467.section .fixup,\"ax\" \n\4683: \n\469move.d [$sp],$r10 \n\470addq 12*4,$r10 \n\471move.d $r10,[$sp] \n\472clear.d $r10 \n\473jump 0b \n\474\n\4754: \n\476movem [$sp+],$r10 \n\477addq 12*4,$r10 \n\478addq 12*4,$r12 \n\479jump 2b \n\480\n\481.previous \n\482.section __ex_table,\"a\" \n\483.dword 0b,3b \n\484.dword 1b,4b \n\485.previous"486487/* Outputs */ : "=r" (dst), "=r" (n), "=r" (retn)488/* Inputs */ : "0" (dst), "1" (n), "2" (retn)489/* Clobber */ : "r11");490}491492while (n >= 16)493{494__asm_clear_16 (dst, retn);495n -= 16;496}497498/* Having a separate by-four loops cuts down on cache footprint.499FIXME: Test with and without; increasing switch to be 0..15. */500while (n >= 4)501{502__asm_clear_4 (dst, retn);503n -= 4;504}505506switch (n)507{508case 0:509break;510case 1:511__asm_clear_1 (dst, retn);512break;513case 2:514__asm_clear_2 (dst, retn);515break;516case 3:517__asm_clear_3 (dst, retn);518break;519}520521return retn;522}523524525