/* SPDX-License-Identifier: GPL-2.0-only */1/*2* Copyright 2023 Linus Torvalds <[email protected]>3*/45#include <linux/export.h>6#include <linux/linkage.h>7#include <linux/objtool.h>8#include <asm/asm.h>910/*11* copy_user_nocache - Uncached memory copy with exception handling12*13* This copies from user space into kernel space, but the kernel14* space accesses can take a machine check exception, so they too15* need exception handling.16*17* Note: only 32-bit and 64-bit stores have non-temporal versions,18* and we only use aligned versions. Any unaligned parts at the19* start or end of the copy will be done using normal cached stores.20*21* Input:22* rdi destination23* rsi source24* edx count25*26* Output:27* rax uncopied bytes or 0 if successful.28*/29SYM_FUNC_START(__copy_user_nocache)30ANNOTATE_NOENDBR31/* If destination is not 7-byte aligned, we'll have to align it */32testb $7,%dil33jne .Lalign3435.Lis_aligned:36cmp $64,%edx37jb .Lquadwords3839.p2align 4,0x9040.Lunrolled:4110: movq (%rsi),%r84211: movq 8(%rsi),%r94312: movq 16(%rsi),%r104413: movq 24(%rsi),%r114520: movnti %r8,(%rdi)4621: movnti %r9,8(%rdi)4722: movnti %r10,16(%rdi)4823: movnti %r11,24(%rdi)4930: movq 32(%rsi),%r85031: movq 40(%rsi),%r95132: movq 48(%rsi),%r105233: movq 56(%rsi),%r115340: movnti %r8,32(%rdi)5441: movnti %r9,40(%rdi)5542: movnti %r10,48(%rdi)5643: movnti %r11,56(%rdi)5758addq $64,%rsi59addq $64,%rdi60sub $64,%edx61cmp $64,%edx62jae .Lunrolled6364/*65* First set of user mode loads have been done66* without any stores, so if they fail, we can67* just try the non-unrolled loop.68*/69_ASM_EXTABLE_UA(10b, .Lquadwords)70_ASM_EXTABLE_UA(11b, .Lquadwords)71_ASM_EXTABLE_UA(12b, .Lquadwords)72_ASM_EXTABLE_UA(13b, .Lquadwords)7374/*75* The second set of user mode loads have been76* done with 32 bytes stored to the destination,77* so we need to take that into account before78* falling back to the unrolled loop.79*/80_ASM_EXTABLE_UA(30b, .Lfixup32)81_ASM_EXTABLE_UA(31b, .Lfixup32)82_ASM_EXTABLE_UA(32b, .Lfixup32)83_ASM_EXTABLE_UA(33b, .Lfixup32)8485/*86* An exception on a write means that we're87* done, but we need to update the count88* depending on where in the unrolled loop89* we were.90*/91_ASM_EXTABLE_UA(20b, .Ldone0)92_ASM_EXTABLE_UA(21b, .Ldone8)93_ASM_EXTABLE_UA(22b, .Ldone16)94_ASM_EXTABLE_UA(23b, .Ldone24)95_ASM_EXTABLE_UA(40b, .Ldone32)96_ASM_EXTABLE_UA(41b, .Ldone40)97_ASM_EXTABLE_UA(42b, .Ldone48)98_ASM_EXTABLE_UA(43b, .Ldone56)99100.Lquadwords:101cmp $8,%edx102jb .Llong10350: movq (%rsi),%rax10451: movnti %rax,(%rdi)105addq $8,%rsi106addq $8,%rdi107sub $8,%edx108jmp .Lquadwords109110/*111* If we fail on the last full quadword, we will112* not try to do any byte-wise cached accesses.113* We will try to do one more 4-byte uncached114* one, though.115*/116_ASM_EXTABLE_UA(50b, .Llast4)117_ASM_EXTABLE_UA(51b, .Ldone0)118119.Llong:120test $4,%dl121je .Lword12260: movl (%rsi),%eax12361: movnti %eax,(%rdi)124addq $4,%rsi125addq $4,%rdi126sub $4,%edx127.Lword:128sfence129test $2,%dl130je .Lbyte13170: movw (%rsi),%ax13271: movw %ax,(%rdi)133addq $2,%rsi134addq $2,%rdi135sub $2,%edx136.Lbyte:137test $1,%dl138je .Ldone13980: movb (%rsi),%al14081: movb %al,(%rdi)141dec %edx142.Ldone:143mov %edx,%eax144RET145146/*147* If we fail on the last four bytes, we won't148* bother with any fixups. It's dead, Jim. Note149* that there's no need for 'sfence' for any150* of this, since the exception will have been151* serializing.152*/153_ASM_EXTABLE_UA(60b, .Ldone)154_ASM_EXTABLE_UA(61b, .Ldone)155_ASM_EXTABLE_UA(70b, .Ldone)156_ASM_EXTABLE_UA(71b, .Ldone)157_ASM_EXTABLE_UA(80b, .Ldone)158_ASM_EXTABLE_UA(81b, .Ldone)159160/*161* This is the "head needs aliging" case when162* the destination isn't 8-byte aligned. The163* 4-byte case can be done uncached, but any164* smaller alignment is done with regular stores.165*/166.Lalign:167test $1,%dil168je .Lalign_word169test %edx,%edx170je .Ldone17190: movb (%rsi),%al17291: movb %al,(%rdi)173inc %rsi174inc %rdi175dec %edx176.Lalign_word:177test $2,%dil178je .Lalign_long179cmp $2,%edx180jb .Lbyte18192: movw (%rsi),%ax18293: movw %ax,(%rdi)183addq $2,%rsi184addq $2,%rdi185sub $2,%edx186.Lalign_long:187test $4,%dil188je .Lis_aligned189cmp $4,%edx190jb .Lword19194: movl (%rsi),%eax19295: movnti %eax,(%rdi)193addq $4,%rsi194addq $4,%rdi195sub $4,%edx196jmp .Lis_aligned197198/*199* If we fail on the initial alignment accesses,200* we're all done. Again, no point in trying to201* do byte-by-byte probing if the 4-byte load202* fails - we're not doing any uncached accesses203* any more.204*/205_ASM_EXTABLE_UA(90b, .Ldone)206_ASM_EXTABLE_UA(91b, .Ldone)207_ASM_EXTABLE_UA(92b, .Ldone)208_ASM_EXTABLE_UA(93b, .Ldone)209_ASM_EXTABLE_UA(94b, .Ldone)210_ASM_EXTABLE_UA(95b, .Ldone)211212/*213* Exception table fixups for faults in the middle214*/215.Ldone56: sub $8,%edx216.Ldone48: sub $8,%edx217.Ldone40: sub $8,%edx218.Ldone32: sub $8,%edx219.Ldone24: sub $8,%edx220.Ldone16: sub $8,%edx221.Ldone8: sub $8,%edx222.Ldone0:223mov %edx,%eax224RET225226.Lfixup32:227addq $32,%rsi228addq $32,%rdi229sub $32,%edx230jmp .Lquadwords231232.Llast4:23352: movl (%rsi),%eax23453: movnti %eax,(%rdi)235sfence236sub $4,%edx237mov %edx,%eax238RET239_ASM_EXTABLE_UA(52b, .Ldone0)240_ASM_EXTABLE_UA(53b, .Ldone0)241242SYM_FUNC_END(__copy_user_nocache)243EXPORT_SYMBOL(__copy_user_nocache)244245246