/*-1* Copyright (c) 2018 The FreeBSD Foundation2*3* This software was developed by Mateusz Guzik <[email protected]>4* under sponsorship from the FreeBSD Foundation.5*6* Redistribution and use in source and binary forms, with or without7* modification, are permitted provided that the following conditions8* are met:9* 1. Redistributions of source code must retain the above copyright10* notice, this list of conditions and the following disclaimer.11* 2. Redistributions in binary form must reproduce the above copyright12* notice, this list of conditions and the following disclaimer in the13* documentation and/or other materials provided with the distribution.14*15* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND16* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE17* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE18* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE19* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL20* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS21* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)22* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT23* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY24* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF25* SUCH DAMAGE.26*/2728#include <machine/asm.h>29/*30* Note: this routine was written with kernel use in mind (read: no simd),31* it is only present in userspace as a temporary measure until something32* better gets imported.33*/3435#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */3637/*38* memmove(dst, src, cnt)39* rdi, rsi, rdx40*/4142/*43* Register state at entry is supposed to be as follows:44* rdi - destination45* rsi - source46* rdx - count47*48* The macro possibly clobbers the above and: rcx, r8, r9, 1049* It does not clobber rax nor r11.50*/51.macro MEMMOVE erms overlap begin end52\begin5354/*55* For sizes 0..32 all data is read before it is written, so there56* is no correctness issue with direction of copying.57*/58cmpq $32,%rcx59jbe 101632f6061.if \overlap == 162movq %rdi,%r863subq %rsi,%r864cmpq %rcx,%r8 /* overlapping && src < dst? */65jb 2f66.endif6768cmpq $256,%rcx69ja 1256f7071ALIGN_TEXT72103200:73movq (%rsi),%rdx74movq %rdx,(%rdi)75movq 8(%rsi),%rdx76movq %rdx,8(%rdi)77movq 16(%rsi),%rdx78movq %rdx,16(%rdi)79movq 24(%rsi),%rdx80movq %rdx,24(%rdi)81leaq 32(%rsi),%rsi82leaq 32(%rdi),%rdi83subq $32,%rcx84cmpq $32,%rcx85jae 103200b86cmpb $0,%cl87jne 101632f88\end89ret90ALIGN_TEXT91101632:92cmpb $16,%cl93jl 100816f94movq (%rsi),%rdx95movq 8(%rsi),%r896movq -16(%rsi,%rcx),%r997movq -8(%rsi,%rcx),%r1098movq %rdx,(%rdi)99movq %r8,8(%rdi)100movq %r9,-16(%rdi,%rcx)101movq %r10,-8(%rdi,%rcx)102\end103ret104ALIGN_TEXT105100816:106cmpb $8,%cl107jl 100408f108movq (%rsi),%rdx109movq -8(%rsi,%rcx),%r8110movq %rdx,(%rdi)111movq %r8,-8(%rdi,%rcx,)112\end113ret114ALIGN_TEXT115100408:116cmpb $4,%cl117jl 100204f118movl (%rsi),%edx119movl -4(%rsi,%rcx),%r8d120movl %edx,(%rdi)121movl %r8d,-4(%rdi,%rcx)122\end123ret124ALIGN_TEXT125100204:126cmpb $2,%cl127jl 100001f128movzwl (%rsi),%edx129movzwl -2(%rsi,%rcx),%r8d130movw %dx,(%rdi)131movw %r8w,-2(%rdi,%rcx)132\end133ret134ALIGN_TEXT135100001:136cmpb $1,%cl137jl 100000f138movb (%rsi),%dl139movb %dl,(%rdi)140100000:141\end142ret143144ALIGN_TEXT1451256:146testb $15,%dil147jnz 100f148.if \erms == 1149rep150movsb151.else152shrq $3,%rcx /* copy by 64-bit words */153rep154movsq155movq %rdx,%rcx156andl $7,%ecx /* any bytes left? */157jne 100408b158.endif159\end160ret161100:162movq (%rsi),%r8163movq 8(%rsi),%r9164movq %rdi,%r10165movq %rdi,%rcx166andq $15,%rcx167leaq -16(%rdx,%rcx),%rdx168neg %rcx169leaq 16(%rdi,%rcx),%rdi170leaq 16(%rsi,%rcx),%rsi171movq %rdx,%rcx172.if \erms == 1173rep174movsb175movq %r8,(%r10)176movq %r9,8(%r10)177.else178shrq $3,%rcx /* copy by 64-bit words */179rep180movsq181movq %r8,(%r10)182movq %r9,8(%r10)183movq %rdx,%rcx184andl $7,%ecx /* any bytes left? */185jne 100408b186.endif187\end188ret189190.if \overlap == 1191/*192* Copy backwards.193*/194ALIGN_TEXT1952:196cmpq $256,%rcx197ja 2256f198199leaq -8(%rdi,%rcx),%rdi200leaq -8(%rsi,%rcx),%rsi201202cmpq $32,%rcx203jb 2016f204205ALIGN_TEXT2062032:207movq (%rsi),%rdx208movq %rdx,(%rdi)209movq -8(%rsi),%rdx210movq %rdx,-8(%rdi)211movq -16(%rsi),%rdx212movq %rdx,-16(%rdi)213movq -24(%rsi),%rdx214movq %rdx,-24(%rdi)215leaq -32(%rsi),%rsi216leaq -32(%rdi),%rdi217subq $32,%rcx218cmpq $32,%rcx219jae 2032b220cmpb $0,%cl221jne 2016f222\end223ret224ALIGN_TEXT2252016:226cmpb $16,%cl227jl 2008f228movq (%rsi),%rdx229movq %rdx,(%rdi)230movq -8(%rsi),%rdx231movq %rdx,-8(%rdi)232subb $16,%cl233jz 2000f234leaq -16(%rsi),%rsi235leaq -16(%rdi),%rdi2362008:237cmpb $8,%cl238jl 2004f239movq (%rsi),%rdx240movq %rdx,(%rdi)241subb $8,%cl242jz 2000f243leaq -8(%rsi),%rsi244leaq -8(%rdi),%rdi2452004:246cmpb $4,%cl247jl 2002f248movl 4(%rsi),%edx249movl %edx,4(%rdi)250subb $4,%cl251jz 2000f252leaq -4(%rsi),%rsi253leaq -4(%rdi),%rdi2542002:255cmpb $2,%cl256jl 2001f257movw 6(%rsi),%dx258movw %dx,6(%rdi)259subb $2,%cl260jz 2000f261leaq -2(%rsi),%rsi262leaq -2(%rdi),%rdi2632001:264cmpb $1,%cl265jl 2000f266movb 7(%rsi),%dl267movb %dl,7(%rdi)2682000:269\end270ret271ALIGN_TEXT2722256:273std274leaq -8(%rdi,%rcx),%rdi275leaq -8(%rsi,%rcx),%rsi276shrq $3,%rcx277rep278movsq279cld280movq %rdx,%rcx281andb $7,%cl282jne 2004b283\end284ret285.endif286.endm287288289.macro MEMMOVE_BEGIN290movq %rdi,%rax291movq %rdx,%rcx292.endm293294.macro MEMMOVE_END295.endm296297#ifndef MEMCPY298ENTRY(memmove)299MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END300END(memmove)301#else302ENTRY(memcpy)303MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END304END(memcpy)305#endif306307.section .note.GNU-stack,"",%progbits308309310