Path: blob/main/contrib/bionic-x86_64-string/sse2-strcpy-slm.S
39475 views
/*1Copyright (c) 2014, Intel Corporation2All rights reserved.34Redistribution and use in source and binary forms, with or without5modification, are permitted provided that the following conditions are met:67* Redistributions of source code must retain the above copyright notice,8* this list of conditions and the following disclaimer.910* Redistributions in binary form must reproduce the above copyright notice,11* this list of conditions and the following disclaimer in the documentation12* and/or other materials provided with the distribution.1314* Neither the name of Intel Corporation nor the names of its contributors15* may be used to endorse or promote products derived from this software16* without specific prior written permission.1718THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND19ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED20WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE21DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR22ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES23(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;24LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON25ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT26(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS27SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.28*/2930#ifndef USE_AS_STRCAT3132# ifndef STRCPY33# define STRCPY strcpy34# endif3536# ifndef L37# define L(label) .L##label38# endif3940# ifndef cfi_startproc41# define cfi_startproc .cfi_startproc42# endif4344# ifndef cfi_endproc45# define cfi_endproc .cfi_endproc46# endif4748# ifndef ENTRY49# define ENTRY(name) \50.type name, @function; \51.globl name; \52.p2align 4; \53name: \54cfi_startproc55# endif5657# ifndef END58# define END(name) \59cfi_endproc; \60.size name, .-name61# endif6263#endif6465#define JMPTBL(I, B) I - B66#define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \67lea TABLE(%rip), %r11; \68movslq (%r11, INDEX, SCALE), %rcx; \69lea (%r11, %rcx), %rcx; \70jmp *%rcx7172#ifndef USE_AS_STRCAT7374# define RETURN ret7576.text77ENTRY (STRCPY)78# ifdef USE_AS_STRNCPY79mov %rdx, %r880test %r8, %r881jz L(ExitZero)82# endif83mov %rsi, %rcx84# ifndef USE_AS_STPCPY85mov %rdi, %rax /* save result */86# endif8788#endif89and $63, %rcx90cmp $32, %rcx91jbe L(SourceStringAlignmentLess32)9293and $-16, %rsi94and $15, %rcx95pxor %xmm0, %xmm096pxor %xmm1, %xmm19798pcmpeqb (%rsi), %xmm199pmovmskb %xmm1, %rdx100shr %cl, %rdx101#ifdef USE_AS_STRNCPY102# if defined USE_AS_STPCPY || defined USE_AS_STRCAT103mov $16, %r10104sub %rcx, %r10105cmp %r10, %r8106# else107mov $17, %r10108sub %rcx, %r10109cmp %r10, %r8110# endif111jbe L(CopyFrom1To16BytesTailCase2OrCase3)112#endif113test %rdx, %rdx114jnz L(CopyFrom1To16BytesTail)115116pcmpeqb 16(%rsi), %xmm0117pmovmskb %xmm0, %rdx118#ifdef USE_AS_STRNCPY119add $16, %r10120cmp %r10, %r8121jbe L(CopyFrom1To32BytesCase2OrCase3)122#endif123test %rdx, %rdx124jnz L(CopyFrom1To32Bytes)125126movdqu (%rsi, %rcx), %xmm1 /* copy 16 bytes */127movdqu %xmm1, (%rdi)128129/* If source adress alignment != destination adress alignment */130.p2align 4131L(Unalign16Both):132sub %rcx, %rdi133#ifdef USE_AS_STRNCPY134add %rcx, %r8135#endif136mov $16, %rcx137movdqa (%rsi, %rcx), %xmm1138movaps 16(%rsi, %rcx), %xmm2139movdqu %xmm1, (%rdi, %rcx)140pcmpeqb %xmm2, %xmm0141pmovmskb %xmm0, %rdx142add $16, %rcx143#ifdef USE_AS_STRNCPY144sub $48, %r8145jbe L(CopyFrom1To16BytesCase2OrCase3)146#endif147test %rdx, %rdx148#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT149jnz L(CopyFrom1To16BytesUnalignedXmm2)150#else151jnz L(CopyFrom1To16Bytes)152#endif153154movaps 16(%rsi, %rcx), %xmm3155movdqu %xmm2, (%rdi, %rcx)156pcmpeqb %xmm3, %xmm0157pmovmskb %xmm0, %rdx158add $16, %rcx159#ifdef USE_AS_STRNCPY160sub $16, %r8161jbe L(CopyFrom1To16BytesCase2OrCase3)162#endif163test %rdx, %rdx164#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT165jnz L(CopyFrom1To16BytesUnalignedXmm3)166#else167jnz L(CopyFrom1To16Bytes)168#endif169170movaps 16(%rsi, %rcx), %xmm4171movdqu %xmm3, (%rdi, %rcx)172pcmpeqb %xmm4, %xmm0173pmovmskb %xmm0, %rdx174add $16, %rcx175#ifdef USE_AS_STRNCPY176sub $16, %r8177jbe L(CopyFrom1To16BytesCase2OrCase3)178#endif179test %rdx, %rdx180#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT181jnz L(CopyFrom1To16BytesUnalignedXmm4)182#else183jnz L(CopyFrom1To16Bytes)184#endif185186movaps 16(%rsi, %rcx), %xmm1187movdqu %xmm4, (%rdi, %rcx)188pcmpeqb %xmm1, %xmm0189pmovmskb %xmm0, %rdx190add $16, %rcx191#ifdef USE_AS_STRNCPY192sub $16, %r8193jbe L(CopyFrom1To16BytesCase2OrCase3)194#endif195test %rdx, %rdx196#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT197jnz L(CopyFrom1To16BytesUnalignedXmm1)198#else199jnz L(CopyFrom1To16Bytes)200#endif201202movaps 16(%rsi, %rcx), %xmm2203movdqu %xmm1, (%rdi, %rcx)204pcmpeqb %xmm2, %xmm0205pmovmskb %xmm0, %rdx206add $16, %rcx207#ifdef USE_AS_STRNCPY208sub $16, %r8209jbe L(CopyFrom1To16BytesCase2OrCase3)210#endif211test %rdx, %rdx212#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT213jnz L(CopyFrom1To16BytesUnalignedXmm2)214#else215jnz L(CopyFrom1To16Bytes)216#endif217218movaps 16(%rsi, %rcx), %xmm3219movdqu %xmm2, (%rdi, %rcx)220pcmpeqb %xmm3, %xmm0221pmovmskb %xmm0, %rdx222add $16, %rcx223#ifdef USE_AS_STRNCPY224sub $16, %r8225jbe L(CopyFrom1To16BytesCase2OrCase3)226#endif227test %rdx, %rdx228#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT229jnz L(CopyFrom1To16BytesUnalignedXmm3)230#else231jnz L(CopyFrom1To16Bytes)232#endif233234movdqu %xmm3, (%rdi, %rcx)235mov %rsi, %rdx236lea 16(%rsi, %rcx), %rsi237and $-0x40, %rsi238sub %rsi, %rdx239sub %rdx, %rdi240#ifdef USE_AS_STRNCPY241lea 128(%r8, %rdx), %r8242#endif243L(Unaligned64Loop):244movaps (%rsi), %xmm2245movaps %xmm2, %xmm4246movaps 16(%rsi), %xmm5247movaps 32(%rsi), %xmm3248movaps %xmm3, %xmm6249movaps 48(%rsi), %xmm7250pminub %xmm5, %xmm2251pminub %xmm7, %xmm3252pminub %xmm2, %xmm3253pcmpeqb %xmm0, %xmm3254pmovmskb %xmm3, %rdx255#ifdef USE_AS_STRNCPY256sub $64, %r8257jbe L(UnalignedLeaveCase2OrCase3)258#endif259test %rdx, %rdx260jnz L(Unaligned64Leave)261262L(Unaligned64Loop_start):263add $64, %rdi264add $64, %rsi265movdqu %xmm4, -64(%rdi)266movaps (%rsi), %xmm2267movdqa %xmm2, %xmm4268movdqu %xmm5, -48(%rdi)269movaps 16(%rsi), %xmm5270pminub %xmm5, %xmm2271movaps 32(%rsi), %xmm3272movdqu %xmm6, -32(%rdi)273movaps %xmm3, %xmm6274movdqu %xmm7, -16(%rdi)275movaps 48(%rsi), %xmm7276pminub %xmm7, %xmm3277pminub %xmm2, %xmm3278pcmpeqb %xmm0, %xmm3279pmovmskb %xmm3, %rdx280#ifdef USE_AS_STRNCPY281sub $64, %r8282jbe L(UnalignedLeaveCase2OrCase3)283#endif284test %rdx, %rdx285jz L(Unaligned64Loop_start)286287L(Unaligned64Leave):288pxor %xmm1, %xmm1289290pcmpeqb %xmm4, %xmm0291pcmpeqb %xmm5, %xmm1292pmovmskb %xmm0, %rdx293pmovmskb %xmm1, %rcx294test %rdx, %rdx295jnz L(CopyFrom1To16BytesUnaligned_0)296test %rcx, %rcx297jnz L(CopyFrom1To16BytesUnaligned_16)298299pcmpeqb %xmm6, %xmm0300pcmpeqb %xmm7, %xmm1301pmovmskb %xmm0, %rdx302pmovmskb %xmm1, %rcx303test %rdx, %rdx304jnz L(CopyFrom1To16BytesUnaligned_32)305306bsf %rcx, %rdx307movdqu %xmm4, (%rdi)308movdqu %xmm5, 16(%rdi)309movdqu %xmm6, 32(%rdi)310#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT311# ifdef USE_AS_STPCPY312lea 48(%rdi, %rdx), %rax313# endif314movdqu %xmm7, 48(%rdi)315add $15, %r8316sub %rdx, %r8317lea 49(%rdi, %rdx), %rdi318jmp L(StrncpyFillTailWithZero)319#else320add $48, %rsi321add $48, %rdi322BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)323#endif324325/* If source adress alignment == destination adress alignment */326327L(SourceStringAlignmentLess32):328pxor %xmm0, %xmm0329movdqu (%rsi), %xmm1330movdqu 16(%rsi), %xmm2331pcmpeqb %xmm1, %xmm0332pmovmskb %xmm0, %rdx333334#ifdef USE_AS_STRNCPY335# if defined USE_AS_STPCPY || defined USE_AS_STRCAT336cmp $16, %r8337# else338cmp $17, %r8339# endif340jbe L(CopyFrom1To16BytesTail1Case2OrCase3)341#endif342test %rdx, %rdx343jnz L(CopyFrom1To16BytesTail1)344345pcmpeqb %xmm2, %xmm0346movdqu %xmm1, (%rdi)347pmovmskb %xmm0, %rdx348349#ifdef USE_AS_STRNCPY350# if defined USE_AS_STPCPY || defined USE_AS_STRCAT351cmp $32, %r8352# else353cmp $33, %r8354# endif355jbe L(CopyFrom1To32Bytes1Case2OrCase3)356#endif357test %rdx, %rdx358jnz L(CopyFrom1To32Bytes1)359360and $15, %rcx361and $-16, %rsi362363jmp L(Unalign16Both)364365/*------End of main part with loops---------------------*/366367/* Case1 */368369#if (!defined USE_AS_STRNCPY) || (defined USE_AS_STRCAT)370.p2align 4371L(CopyFrom1To16Bytes):372add %rcx, %rdi373add %rcx, %rsi374bsf %rdx, %rdx375BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)376#endif377.p2align 4378L(CopyFrom1To16BytesTail):379add %rcx, %rsi380bsf %rdx, %rdx381BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)382383.p2align 4384L(CopyFrom1To32Bytes1):385add $16, %rsi386add $16, %rdi387#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT388sub $16, %r8389#endif390L(CopyFrom1To16BytesTail1):391bsf %rdx, %rdx392BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)393394.p2align 4395L(CopyFrom1To32Bytes):396bsf %rdx, %rdx397add %rcx, %rsi398add $16, %rdx399sub %rcx, %rdx400BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)401402.p2align 4403L(CopyFrom1To16BytesUnaligned_0):404bsf %rdx, %rdx405#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT406# ifdef USE_AS_STPCPY407lea (%rdi, %rdx), %rax408# endif409movdqu %xmm4, (%rdi)410add $63, %r8411sub %rdx, %r8412lea 1(%rdi, %rdx), %rdi413jmp L(StrncpyFillTailWithZero)414#else415BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)416#endif417418.p2align 4419L(CopyFrom1To16BytesUnaligned_16):420bsf %rcx, %rdx421movdqu %xmm4, (%rdi)422#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT423# ifdef USE_AS_STPCPY424lea 16(%rdi, %rdx), %rax425# endif426movdqu %xmm5, 16(%rdi)427add $47, %r8428sub %rdx, %r8429lea 17(%rdi, %rdx), %rdi430jmp L(StrncpyFillTailWithZero)431#else432add $16, %rsi433add $16, %rdi434BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)435#endif436437.p2align 4438L(CopyFrom1To16BytesUnaligned_32):439bsf %rdx, %rdx440movdqu %xmm4, (%rdi)441movdqu %xmm5, 16(%rdi)442#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT443# ifdef USE_AS_STPCPY444lea 32(%rdi, %rdx), %rax445# endif446movdqu %xmm6, 32(%rdi)447add $31, %r8448sub %rdx, %r8449lea 33(%rdi, %rdx), %rdi450jmp L(StrncpyFillTailWithZero)451#else452add $32, %rsi453add $32, %rdi454BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)455#endif456457#ifdef USE_AS_STRNCPY458# ifndef USE_AS_STRCAT459.p2align 4460L(CopyFrom1To16BytesUnalignedXmm6):461movdqu %xmm6, (%rdi, %rcx)462jmp L(CopyFrom1To16BytesXmmExit)463464.p2align 4465L(CopyFrom1To16BytesUnalignedXmm5):466movdqu %xmm5, (%rdi, %rcx)467jmp L(CopyFrom1To16BytesXmmExit)468469.p2align 4470L(CopyFrom1To16BytesUnalignedXmm4):471movdqu %xmm4, (%rdi, %rcx)472jmp L(CopyFrom1To16BytesXmmExit)473474.p2align 4475L(CopyFrom1To16BytesUnalignedXmm3):476movdqu %xmm3, (%rdi, %rcx)477jmp L(CopyFrom1To16BytesXmmExit)478479.p2align 4480L(CopyFrom1To16BytesUnalignedXmm1):481movdqu %xmm1, (%rdi, %rcx)482jmp L(CopyFrom1To16BytesXmmExit)483# endif484485.p2align 4486L(CopyFrom1To16BytesExit):487BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)488489/* Case2 */490491.p2align 4492L(CopyFrom1To16BytesCase2):493add $16, %r8494add %rcx, %rdi495add %rcx, %rsi496bsf %rdx, %rdx497cmp %r8, %rdx498jb L(CopyFrom1To16BytesExit)499BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)500501.p2align 4502L(CopyFrom1To32BytesCase2):503add %rcx, %rsi504bsf %rdx, %rdx505add $16, %rdx506sub %rcx, %rdx507cmp %r8, %rdx508jb L(CopyFrom1To16BytesExit)509BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)510511L(CopyFrom1To16BytesTailCase2):512add %rcx, %rsi513bsf %rdx, %rdx514cmp %r8, %rdx515jb L(CopyFrom1To16BytesExit)516BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)517518L(CopyFrom1To16BytesTail1Case2):519bsf %rdx, %rdx520cmp %r8, %rdx521jb L(CopyFrom1To16BytesExit)522BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)523524/* Case2 or Case3, Case3 */525526.p2align 4527L(CopyFrom1To16BytesCase2OrCase3):528test %rdx, %rdx529jnz L(CopyFrom1To16BytesCase2)530L(CopyFrom1To16BytesCase3):531add $16, %r8532add %rcx, %rdi533add %rcx, %rsi534BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)535536.p2align 4537L(CopyFrom1To32BytesCase2OrCase3):538test %rdx, %rdx539jnz L(CopyFrom1To32BytesCase2)540add %rcx, %rsi541BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)542543.p2align 4544L(CopyFrom1To16BytesTailCase2OrCase3):545test %rdx, %rdx546jnz L(CopyFrom1To16BytesTailCase2)547add %rcx, %rsi548BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)549550.p2align 4551L(CopyFrom1To32Bytes1Case2OrCase3):552add $16, %rdi553add $16, %rsi554sub $16, %r8555L(CopyFrom1To16BytesTail1Case2OrCase3):556test %rdx, %rdx557jnz L(CopyFrom1To16BytesTail1Case2)558BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)559560#endif561562/*------------End labels regarding with copying 1-16 bytes--and 1-32 bytes----*/563564.p2align 4565L(Exit1):566mov %dh, (%rdi)567#ifdef USE_AS_STPCPY568lea (%rdi), %rax569#endif570#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT571sub $1, %r8572lea 1(%rdi), %rdi573jnz L(StrncpyFillTailWithZero)574#endif575RETURN576577.p2align 4578L(Exit2):579mov (%rsi), %dx580mov %dx, (%rdi)581#ifdef USE_AS_STPCPY582lea 1(%rdi), %rax583#endif584#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT585sub $2, %r8586lea 2(%rdi), %rdi587jnz L(StrncpyFillTailWithZero)588#endif589RETURN590591.p2align 4592L(Exit3):593mov (%rsi), %cx594mov %cx, (%rdi)595mov %dh, 2(%rdi)596#ifdef USE_AS_STPCPY597lea 2(%rdi), %rax598#endif599#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT600sub $3, %r8601lea 3(%rdi), %rdi602jnz L(StrncpyFillTailWithZero)603#endif604RETURN605606.p2align 4607L(Exit4):608mov (%rsi), %edx609mov %edx, (%rdi)610#ifdef USE_AS_STPCPY611lea 3(%rdi), %rax612#endif613#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT614sub $4, %r8615lea 4(%rdi), %rdi616jnz L(StrncpyFillTailWithZero)617#endif618RETURN619620.p2align 4621L(Exit5):622mov (%rsi), %ecx623mov %dh, 4(%rdi)624mov %ecx, (%rdi)625#ifdef USE_AS_STPCPY626lea 4(%rdi), %rax627#endif628#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT629sub $5, %r8630lea 5(%rdi), %rdi631jnz L(StrncpyFillTailWithZero)632#endif633RETURN634635.p2align 4636L(Exit6):637mov (%rsi), %ecx638mov 4(%rsi), %dx639mov %ecx, (%rdi)640mov %dx, 4(%rdi)641#ifdef USE_AS_STPCPY642lea 5(%rdi), %rax643#endif644#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT645sub $6, %r8646lea 6(%rdi), %rdi647jnz L(StrncpyFillTailWithZero)648#endif649RETURN650651.p2align 4652L(Exit7):653mov (%rsi), %ecx654mov 3(%rsi), %edx655mov %ecx, (%rdi)656mov %edx, 3(%rdi)657#ifdef USE_AS_STPCPY658lea 6(%rdi), %rax659#endif660#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT661sub $7, %r8662lea 7(%rdi), %rdi663jnz L(StrncpyFillTailWithZero)664#endif665RETURN666667.p2align 4668L(Exit8):669mov (%rsi), %rdx670mov %rdx, (%rdi)671#ifdef USE_AS_STPCPY672lea 7(%rdi), %rax673#endif674#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT675sub $8, %r8676lea 8(%rdi), %rdi677jnz L(StrncpyFillTailWithZero)678#endif679RETURN680681.p2align 4682L(Exit9):683mov (%rsi), %rcx684mov %dh, 8(%rdi)685mov %rcx, (%rdi)686#ifdef USE_AS_STPCPY687lea 8(%rdi), %rax688#endif689#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT690sub $9, %r8691lea 9(%rdi), %rdi692jnz L(StrncpyFillTailWithZero)693#endif694RETURN695696.p2align 4697L(Exit10):698mov (%rsi), %rcx699mov 8(%rsi), %dx700mov %rcx, (%rdi)701mov %dx, 8(%rdi)702#ifdef USE_AS_STPCPY703lea 9(%rdi), %rax704#endif705#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT706sub $10, %r8707lea 10(%rdi), %rdi708jnz L(StrncpyFillTailWithZero)709#endif710RETURN711712.p2align 4713L(Exit11):714mov (%rsi), %rcx715mov 7(%rsi), %edx716mov %rcx, (%rdi)717mov %edx, 7(%rdi)718#ifdef USE_AS_STPCPY719lea 10(%rdi), %rax720#endif721#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT722sub $11, %r8723lea 11(%rdi), %rdi724jnz L(StrncpyFillTailWithZero)725#endif726RETURN727728.p2align 4729L(Exit12):730mov (%rsi), %rcx731mov 8(%rsi), %edx732mov %rcx, (%rdi)733mov %edx, 8(%rdi)734#ifdef USE_AS_STPCPY735lea 11(%rdi), %rax736#endif737#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT738sub $12, %r8739lea 12(%rdi), %rdi740jnz L(StrncpyFillTailWithZero)741#endif742RETURN743744.p2align 4745L(Exit13):746mov (%rsi), %rcx747mov 5(%rsi), %rdx748mov %rcx, (%rdi)749mov %rdx, 5(%rdi)750#ifdef USE_AS_STPCPY751lea 12(%rdi), %rax752#endif753#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT754sub $13, %r8755lea 13(%rdi), %rdi756jnz L(StrncpyFillTailWithZero)757#endif758RETURN759760.p2align 4761L(Exit14):762mov (%rsi), %rcx763mov 6(%rsi), %rdx764mov %rcx, (%rdi)765mov %rdx, 6(%rdi)766#ifdef USE_AS_STPCPY767lea 13(%rdi), %rax768#endif769#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT770sub $14, %r8771lea 14(%rdi), %rdi772jnz L(StrncpyFillTailWithZero)773#endif774RETURN775776.p2align 4777L(Exit15):778mov (%rsi), %rcx779mov 7(%rsi), %rdx780mov %rcx, (%rdi)781mov %rdx, 7(%rdi)782#ifdef USE_AS_STPCPY783lea 14(%rdi), %rax784#endif785#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT786sub $15, %r8787lea 15(%rdi), %rdi788jnz L(StrncpyFillTailWithZero)789#endif790RETURN791792.p2align 4793L(Exit16):794movdqu (%rsi), %xmm0795movdqu %xmm0, (%rdi)796#ifdef USE_AS_STPCPY797lea 15(%rdi), %rax798#endif799#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT800sub $16, %r8801lea 16(%rdi), %rdi802jnz L(StrncpyFillTailWithZero)803#endif804RETURN805806.p2align 4807L(Exit17):808movdqu (%rsi), %xmm0809movdqu %xmm0, (%rdi)810mov %dh, 16(%rdi)811#ifdef USE_AS_STPCPY812lea 16(%rdi), %rax813#endif814#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT815sub $17, %r8816lea 17(%rdi), %rdi817jnz L(StrncpyFillTailWithZero)818#endif819RETURN820821.p2align 4822L(Exit18):823movdqu (%rsi), %xmm0824mov 16(%rsi), %cx825movdqu %xmm0, (%rdi)826mov %cx, 16(%rdi)827#ifdef USE_AS_STPCPY828lea 17(%rdi), %rax829#endif830#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT831sub $18, %r8832lea 18(%rdi), %rdi833jnz L(StrncpyFillTailWithZero)834#endif835RETURN836837.p2align 4838L(Exit19):839movdqu (%rsi), %xmm0840mov 15(%rsi), %ecx841movdqu %xmm0, (%rdi)842mov %ecx, 15(%rdi)843#ifdef USE_AS_STPCPY844lea 18(%rdi), %rax845#endif846#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT847sub $19, %r8848lea 19(%rdi), %rdi849jnz L(StrncpyFillTailWithZero)850#endif851RETURN852853.p2align 4854L(Exit20):855movdqu (%rsi), %xmm0856mov 16(%rsi), %ecx857movdqu %xmm0, (%rdi)858mov %ecx, 16(%rdi)859#ifdef USE_AS_STPCPY860lea 19(%rdi), %rax861#endif862#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT863sub $20, %r8864lea 20(%rdi), %rdi865jnz L(StrncpyFillTailWithZero)866#endif867RETURN868869.p2align 4870L(Exit21):871movdqu (%rsi), %xmm0872mov 16(%rsi), %ecx873movdqu %xmm0, (%rdi)874mov %ecx, 16(%rdi)875mov %dh, 20(%rdi)876#ifdef USE_AS_STPCPY877lea 20(%rdi), %rax878#endif879#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT880sub $21, %r8881lea 21(%rdi), %rdi882jnz L(StrncpyFillTailWithZero)883#endif884RETURN885886.p2align 4887L(Exit22):888movdqu (%rsi), %xmm0889mov 14(%rsi), %rcx890movdqu %xmm0, (%rdi)891mov %rcx, 14(%rdi)892#ifdef USE_AS_STPCPY893lea 21(%rdi), %rax894#endif895#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT896sub $22, %r8897lea 22(%rdi), %rdi898jnz L(StrncpyFillTailWithZero)899#endif900RETURN901902.p2align 4903L(Exit23):904movdqu (%rsi), %xmm0905mov 15(%rsi), %rcx906movdqu %xmm0, (%rdi)907mov %rcx, 15(%rdi)908#ifdef USE_AS_STPCPY909lea 22(%rdi), %rax910#endif911#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT912sub $23, %r8913lea 23(%rdi), %rdi914jnz L(StrncpyFillTailWithZero)915#endif916RETURN917918.p2align 4919L(Exit24):920movdqu (%rsi), %xmm0921mov 16(%rsi), %rcx922movdqu %xmm0, (%rdi)923mov %rcx, 16(%rdi)924#ifdef USE_AS_STPCPY925lea 23(%rdi), %rax926#endif927#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT928sub $24, %r8929lea 24(%rdi), %rdi930jnz L(StrncpyFillTailWithZero)931#endif932RETURN933934.p2align 4935L(Exit25):936movdqu (%rsi), %xmm0937mov 16(%rsi), %rcx938movdqu %xmm0, (%rdi)939mov %rcx, 16(%rdi)940mov %dh, 24(%rdi)941#ifdef USE_AS_STPCPY942lea 24(%rdi), %rax943#endif944#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT945sub $25, %r8946lea 25(%rdi), %rdi947jnz L(StrncpyFillTailWithZero)948#endif949RETURN950951.p2align 4952L(Exit26):953movdqu (%rsi), %xmm0954mov 16(%rsi), %rdx955mov 24(%rsi), %cx956movdqu %xmm0, (%rdi)957mov %rdx, 16(%rdi)958mov %cx, 24(%rdi)959#ifdef USE_AS_STPCPY960lea 25(%rdi), %rax961#endif962#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT963sub $26, %r8964lea 26(%rdi), %rdi965jnz L(StrncpyFillTailWithZero)966#endif967RETURN968969.p2align 4970L(Exit27):971movdqu (%rsi), %xmm0972mov 16(%rsi), %rdx973mov 23(%rsi), %ecx974movdqu %xmm0, (%rdi)975mov %rdx, 16(%rdi)976mov %ecx, 23(%rdi)977#ifdef USE_AS_STPCPY978lea 26(%rdi), %rax979#endif980#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT981sub $27, %r8982lea 27(%rdi), %rdi983jnz L(StrncpyFillTailWithZero)984#endif985RETURN986987.p2align 4988L(Exit28):989movdqu (%rsi), %xmm0990mov 16(%rsi), %rdx991mov 24(%rsi), %ecx992movdqu %xmm0, (%rdi)993mov %rdx, 16(%rdi)994mov %ecx, 24(%rdi)995#ifdef USE_AS_STPCPY996lea 27(%rdi), %rax997#endif998#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT999sub $28, %r81000lea 28(%rdi), %rdi1001jnz L(StrncpyFillTailWithZero)1002#endif1003RETURN10041005.p2align 41006L(Exit29):1007movdqu (%rsi), %xmm01008movdqu 13(%rsi), %xmm21009movdqu %xmm0, (%rdi)1010movdqu %xmm2, 13(%rdi)1011#ifdef USE_AS_STPCPY1012lea 28(%rdi), %rax1013#endif1014#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT1015sub $29, %r81016lea 29(%rdi), %rdi1017jnz L(StrncpyFillTailWithZero)1018#endif1019RETURN10201021.p2align 41022L(Exit30):1023movdqu (%rsi), %xmm01024movdqu 14(%rsi), %xmm21025movdqu %xmm0, (%rdi)1026movdqu %xmm2, 14(%rdi)1027#ifdef USE_AS_STPCPY1028lea 29(%rdi), %rax1029#endif1030#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT1031sub $30, %r81032lea 30(%rdi), %rdi1033jnz L(StrncpyFillTailWithZero)1034#endif1035RETURN10361037.p2align 41038L(Exit31):1039movdqu (%rsi), %xmm01040movdqu 15(%rsi), %xmm21041movdqu %xmm0, (%rdi)1042movdqu %xmm2, 15(%rdi)1043#ifdef USE_AS_STPCPY1044lea 30(%rdi), %rax1045#endif1046#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT1047sub $31, %r81048lea 31(%rdi), %rdi1049jnz L(StrncpyFillTailWithZero)1050#endif1051RETURN10521053.p2align 41054L(Exit32):1055movdqu (%rsi), %xmm01056movdqu 16(%rsi), %xmm21057movdqu %xmm0, (%rdi)1058movdqu %xmm2, 16(%rdi)1059#ifdef USE_AS_STPCPY1060lea 31(%rdi), %rax1061#endif1062#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT1063sub $32, %r81064lea 32(%rdi), %rdi1065jnz L(StrncpyFillTailWithZero)1066#endif1067RETURN10681069#ifdef USE_AS_STRNCPY10701071.p2align 41072L(StrncpyExit0):1073#ifdef USE_AS_STPCPY1074mov %rdi, %rax1075#endif1076#ifdef USE_AS_STRCAT1077xor %ch, %ch1078movb %ch, (%rdi)1079#endif1080RETURN10811082.p2align 41083L(StrncpyExit1):1084mov (%rsi), %dl1085mov %dl, (%rdi)1086#ifdef USE_AS_STPCPY1087lea 1(%rdi), %rax1088#endif1089#ifdef USE_AS_STRCAT1090xor %ch, %ch1091movb %ch, 1(%rdi)1092#endif1093RETURN10941095.p2align 41096L(StrncpyExit2):1097mov (%rsi), %dx1098mov %dx, (%rdi)1099#ifdef USE_AS_STPCPY1100lea 2(%rdi), %rax1101#endif1102#ifdef USE_AS_STRCAT1103xor %ch, %ch1104movb %ch, 2(%rdi)1105#endif1106RETURN11071108.p2align 41109L(StrncpyExit3):1110mov (%rsi), %cx1111mov 2(%rsi), %dl1112mov %cx, (%rdi)1113mov %dl, 2(%rdi)1114#ifdef USE_AS_STPCPY1115lea 3(%rdi), %rax1116#endif1117#ifdef USE_AS_STRCAT1118xor %ch, %ch1119movb %ch, 3(%rdi)1120#endif1121RETURN11221123.p2align 41124L(StrncpyExit4):1125mov (%rsi), %edx1126mov %edx, (%rdi)1127#ifdef USE_AS_STPCPY1128lea 4(%rdi), %rax1129#endif1130#ifdef USE_AS_STRCAT1131xor %ch, %ch1132movb %ch, 4(%rdi)1133#endif1134RETURN11351136.p2align 41137L(StrncpyExit5):1138mov (%rsi), %ecx1139mov 4(%rsi), %dl1140mov %ecx, (%rdi)1141mov %dl, 4(%rdi)1142#ifdef USE_AS_STPCPY1143lea 5(%rdi), %rax1144#endif1145#ifdef USE_AS_STRCAT1146xor %ch, %ch1147movb %ch, 5(%rdi)1148#endif1149RETURN11501151.p2align 41152L(StrncpyExit6):1153mov (%rsi), %ecx1154mov 4(%rsi), %dx1155mov %ecx, (%rdi)1156mov %dx, 4(%rdi)1157#ifdef USE_AS_STPCPY1158lea 6(%rdi), %rax1159#endif1160#ifdef USE_AS_STRCAT1161xor %ch, %ch1162movb %ch, 6(%rdi)1163#endif1164RETURN11651166.p2align 41167L(StrncpyExit7):1168mov (%rsi), %ecx1169mov 3(%rsi), %edx1170mov %ecx, (%rdi)1171mov %edx, 3(%rdi)1172#ifdef USE_AS_STPCPY1173lea 7(%rdi), %rax1174#endif1175#ifdef USE_AS_STRCAT1176xor %ch, %ch1177movb %ch, 7(%rdi)1178#endif1179RETURN11801181.p2align 41182L(StrncpyExit8):1183mov (%rsi), %rdx1184mov %rdx, (%rdi)1185#ifdef USE_AS_STPCPY1186lea 8(%rdi), %rax1187#endif1188#ifdef USE_AS_STRCAT1189xor %ch, %ch1190movb %ch, 8(%rdi)1191#endif1192RETURN11931194.p2align 41195L(StrncpyExit9):1196mov (%rsi), %rcx1197mov 8(%rsi), %dl1198mov %rcx, (%rdi)1199mov %dl, 8(%rdi)1200#ifdef USE_AS_STPCPY1201lea 9(%rdi), %rax1202#endif1203#ifdef USE_AS_STRCAT1204xor %ch, %ch1205movb %ch, 9(%rdi)1206#endif1207RETURN12081209.p2align 41210L(StrncpyExit10):1211mov (%rsi), %rcx1212mov 8(%rsi), %dx1213mov %rcx, (%rdi)1214mov %dx, 8(%rdi)1215#ifdef USE_AS_STPCPY1216lea 10(%rdi), %rax1217#endif1218#ifdef USE_AS_STRCAT1219xor %ch, %ch1220movb %ch, 10(%rdi)1221#endif1222RETURN12231224.p2align 41225L(StrncpyExit11):1226mov (%rsi), %rcx1227mov 7(%rsi), %edx1228mov %rcx, (%rdi)1229mov %edx, 7(%rdi)1230#ifdef USE_AS_STPCPY1231lea 11(%rdi), %rax1232#endif1233#ifdef USE_AS_STRCAT1234xor %ch, %ch1235movb %ch, 11(%rdi)1236#endif1237RETURN12381239.p2align 41240L(StrncpyExit12):1241mov (%rsi), %rcx1242mov 8(%rsi), %edx1243mov %rcx, (%rdi)1244mov %edx, 8(%rdi)1245#ifdef USE_AS_STPCPY1246lea 12(%rdi), %rax1247#endif1248#ifdef USE_AS_STRCAT1249xor %ch, %ch1250movb %ch, 12(%rdi)1251#endif1252RETURN12531254.p2align 41255L(StrncpyExit13):1256mov (%rsi), %rcx1257mov 5(%rsi), %rdx1258mov %rcx, (%rdi)1259mov %rdx, 5(%rdi)1260#ifdef USE_AS_STPCPY1261lea 13(%rdi), %rax1262#endif1263#ifdef USE_AS_STRCAT1264xor %ch, %ch1265movb %ch, 13(%rdi)1266#endif1267RETURN12681269.p2align 41270L(StrncpyExit14):1271mov (%rsi), %rcx1272mov 6(%rsi), %rdx1273mov %rcx, (%rdi)1274mov %rdx, 6(%rdi)1275#ifdef USE_AS_STPCPY1276lea 14(%rdi), %rax1277#endif1278#ifdef USE_AS_STRCAT1279xor %ch, %ch1280movb %ch, 14(%rdi)1281#endif1282RETURN12831284.p2align 41285L(StrncpyExit15):1286mov (%rsi), %rcx1287mov 7(%rsi), %rdx1288mov %rcx, (%rdi)1289mov %rdx, 7(%rdi)1290#ifdef USE_AS_STPCPY1291lea 15(%rdi), %rax1292#endif1293#ifdef USE_AS_STRCAT1294xor %ch, %ch1295movb %ch, 15(%rdi)1296#endif1297RETURN12981299.p2align 41300L(StrncpyExit16):1301movdqu (%rsi), %xmm01302movdqu %xmm0, (%rdi)1303#ifdef USE_AS_STPCPY1304lea 16(%rdi), %rax1305#endif1306#ifdef USE_AS_STRCAT1307xor %ch, %ch1308movb %ch, 16(%rdi)1309#endif1310RETURN13111312.p2align 41313L(StrncpyExit17):1314movdqu (%rsi), %xmm01315mov 16(%rsi), %cl1316movdqu %xmm0, (%rdi)1317mov %cl, 16(%rdi)1318#ifdef USE_AS_STPCPY1319lea 17(%rdi), %rax1320#endif1321#ifdef USE_AS_STRCAT1322xor %ch, %ch1323movb %ch, 17(%rdi)1324#endif1325RETURN13261327.p2align 41328L(StrncpyExit18):1329movdqu (%rsi), %xmm01330mov 16(%rsi), %cx1331movdqu %xmm0, (%rdi)1332mov %cx, 16(%rdi)1333#ifdef USE_AS_STPCPY1334lea 18(%rdi), %rax1335#endif1336#ifdef USE_AS_STRCAT1337xor %ch, %ch1338movb %ch, 18(%rdi)1339#endif1340RETURN13411342.p2align 41343L(StrncpyExit19):1344movdqu (%rsi), %xmm01345mov 15(%rsi), %ecx1346movdqu %xmm0, (%rdi)1347mov %ecx, 15(%rdi)1348#ifdef USE_AS_STPCPY1349lea 19(%rdi), %rax1350#endif1351#ifdef USE_AS_STRCAT1352xor %ch, %ch1353movb %ch, 19(%rdi)1354#endif1355RETURN13561357.p2align 41358L(StrncpyExit20):1359movdqu (%rsi), %xmm01360mov 16(%rsi), %ecx1361movdqu %xmm0, (%rdi)1362mov %ecx, 16(%rdi)1363#ifdef USE_AS_STPCPY1364lea 20(%rdi), %rax1365#endif1366#ifdef USE_AS_STRCAT1367xor %ch, %ch1368movb %ch, 20(%rdi)1369#endif1370RETURN13711372.p2align 41373L(StrncpyExit21):1374movdqu (%rsi), %xmm01375mov 16(%rsi), %ecx1376mov 20(%rsi), %dl1377movdqu %xmm0, (%rdi)1378mov %ecx, 16(%rdi)1379mov %dl, 20(%rdi)1380#ifdef USE_AS_STPCPY1381lea 21(%rdi), %rax1382#endif1383#ifdef USE_AS_STRCAT1384xor %ch, %ch1385movb %ch, 21(%rdi)1386#endif1387RETURN13881389.p2align 41390L(StrncpyExit22):1391movdqu (%rsi), %xmm01392mov 14(%rsi), %rcx1393movdqu %xmm0, (%rdi)1394mov %rcx, 14(%rdi)1395#ifdef USE_AS_STPCPY1396lea 22(%rdi), %rax1397#endif1398#ifdef USE_AS_STRCAT1399xor %ch, %ch1400movb %ch, 22(%rdi)1401#endif1402RETURN14031404.p2align 41405L(StrncpyExit23):1406movdqu (%rsi), %xmm01407mov 15(%rsi), %rcx1408movdqu %xmm0, (%rdi)1409mov %rcx, 15(%rdi)1410#ifdef USE_AS_STPCPY1411lea 23(%rdi), %rax1412#endif1413#ifdef USE_AS_STRCAT1414xor %ch, %ch1415movb %ch, 23(%rdi)1416#endif1417RETURN14181419.p2align 41420L(StrncpyExit24):1421movdqu (%rsi), %xmm01422mov 16(%rsi), %rcx1423movdqu %xmm0, (%rdi)1424mov %rcx, 16(%rdi)1425#ifdef USE_AS_STPCPY1426lea 24(%rdi), %rax1427#endif1428#ifdef USE_AS_STRCAT1429xor %ch, %ch1430movb %ch, 24(%rdi)1431#endif1432RETURN14331434.p2align 41435L(StrncpyExit25):1436movdqu (%rsi), %xmm01437mov 16(%rsi), %rdx1438mov 24(%rsi), %cl1439movdqu %xmm0, (%rdi)1440mov %rdx, 16(%rdi)1441mov %cl, 24(%rdi)1442#ifdef USE_AS_STPCPY1443lea 25(%rdi), %rax1444#endif1445#ifdef USE_AS_STRCAT1446xor %ch, %ch1447movb %ch, 25(%rdi)1448#endif1449RETURN14501451.p2align 41452L(StrncpyExit26):1453movdqu (%rsi), %xmm01454mov 16(%rsi), %rdx1455mov 24(%rsi), %cx1456movdqu %xmm0, (%rdi)1457mov %rdx, 16(%rdi)1458mov %cx, 24(%rdi)1459#ifdef USE_AS_STPCPY1460lea 26(%rdi), %rax1461#endif1462#ifdef USE_AS_STRCAT1463xor %ch, %ch1464movb %ch, 26(%rdi)1465#endif1466RETURN14671468.p2align 41469L(StrncpyExit27):1470movdqu (%rsi), %xmm01471mov 16(%rsi), %rdx1472mov 23(%rsi), %ecx1473movdqu %xmm0, (%rdi)1474mov %rdx, 16(%rdi)1475mov %ecx, 23(%rdi)1476#ifdef USE_AS_STPCPY1477lea 27(%rdi), %rax1478#endif1479#ifdef USE_AS_STRCAT1480xor %ch, %ch1481movb %ch, 27(%rdi)1482#endif1483RETURN14841485.p2align 41486L(StrncpyExit28):1487movdqu (%rsi), %xmm01488mov 16(%rsi), %rdx1489mov 24(%rsi), %ecx1490movdqu %xmm0, (%rdi)1491mov %rdx, 16(%rdi)1492mov %ecx, 24(%rdi)1493#ifdef USE_AS_STPCPY1494lea 28(%rdi), %rax1495#endif1496#ifdef USE_AS_STRCAT1497xor %ch, %ch1498movb %ch, 28(%rdi)1499#endif1500RETURN15011502.p2align 41503L(StrncpyExit29):1504movdqu (%rsi), %xmm01505movdqu 13(%rsi), %xmm21506movdqu %xmm0, (%rdi)1507movdqu %xmm2, 13(%rdi)1508#ifdef USE_AS_STPCPY1509lea 29(%rdi), %rax1510#endif1511#ifdef USE_AS_STRCAT1512xor %ch, %ch1513movb %ch, 29(%rdi)1514#endif1515RETURN15161517.p2align 41518L(StrncpyExit30):1519movdqu (%rsi), %xmm01520movdqu 14(%rsi), %xmm21521movdqu %xmm0, (%rdi)1522movdqu %xmm2, 14(%rdi)1523#ifdef USE_AS_STPCPY1524lea 30(%rdi), %rax1525#endif1526#ifdef USE_AS_STRCAT1527xor %ch, %ch1528movb %ch, 30(%rdi)1529#endif1530RETURN15311532.p2align 41533L(StrncpyExit31):1534movdqu (%rsi), %xmm01535movdqu 15(%rsi), %xmm21536movdqu %xmm0, (%rdi)1537movdqu %xmm2, 15(%rdi)1538#ifdef USE_AS_STPCPY1539lea 31(%rdi), %rax1540#endif1541#ifdef USE_AS_STRCAT1542xor %ch, %ch1543movb %ch, 31(%rdi)1544#endif1545RETURN15461547.p2align 41548L(StrncpyExit32):1549movdqu (%rsi), %xmm01550movdqu 16(%rsi), %xmm21551movdqu %xmm0, (%rdi)1552movdqu %xmm2, 16(%rdi)1553#ifdef USE_AS_STPCPY1554lea 32(%rdi), %rax1555#endif1556#ifdef USE_AS_STRCAT1557xor %ch, %ch1558movb %ch, 32(%rdi)1559#endif1560RETURN15611562.p2align 41563L(StrncpyExit33):1564movdqu (%rsi), %xmm01565movdqu 16(%rsi), %xmm21566mov 32(%rsi), %cl1567movdqu %xmm0, (%rdi)1568movdqu %xmm2, 16(%rdi)1569mov %cl, 32(%rdi)1570#ifdef USE_AS_STRCAT1571xor %ch, %ch1572movb %ch, 33(%rdi)1573#endif1574RETURN15751576#ifndef USE_AS_STRCAT15771578.p2align 41579L(Fill0):1580RETURN15811582.p2align 41583L(Fill1):1584mov %dl, (%rdi)1585RETURN15861587.p2align 41588L(Fill2):1589mov %dx, (%rdi)1590RETURN15911592.p2align 41593L(Fill3):1594mov %edx, -1(%rdi)1595RETURN15961597.p2align 41598L(Fill4):1599mov %edx, (%rdi)1600RETURN16011602.p2align 41603L(Fill5):1604mov %edx, (%rdi)1605mov %dl, 4(%rdi)1606RETURN16071608.p2align 41609L(Fill6):1610mov %edx, (%rdi)1611mov %dx, 4(%rdi)1612RETURN16131614.p2align 41615L(Fill7):1616mov %rdx, -1(%rdi)1617RETURN16181619.p2align 41620L(Fill8):1621mov %rdx, (%rdi)1622RETURN16231624.p2align 41625L(Fill9):1626mov %rdx, (%rdi)1627mov %dl, 8(%rdi)1628RETURN16291630.p2align 41631L(Fill10):1632mov %rdx, (%rdi)1633mov %dx, 8(%rdi)1634RETURN16351636.p2align 41637L(Fill11):1638mov %rdx, (%rdi)1639mov %edx, 7(%rdi)1640RETURN16411642.p2align 41643L(Fill12):1644mov %rdx, (%rdi)1645mov %edx, 8(%rdi)1646RETURN16471648.p2align 41649L(Fill13):1650mov %rdx, (%rdi)1651mov %rdx, 5(%rdi)1652RETURN16531654.p2align 41655L(Fill14):1656mov %rdx, (%rdi)1657mov %rdx, 6(%rdi)1658RETURN16591660.p2align 41661L(Fill15):1662movdqu %xmm0, -1(%rdi)1663RETURN16641665.p2align 41666L(Fill16):1667movdqu %xmm0, (%rdi)1668RETURN16691670.p2align 41671L(CopyFrom1To16BytesUnalignedXmm2):1672movdqu %xmm2, (%rdi, %rcx)16731674.p2align 41675L(CopyFrom1To16BytesXmmExit):1676bsf %rdx, %rdx1677add $15, %r81678add %rcx, %rdi1679#ifdef USE_AS_STPCPY1680lea (%rdi, %rdx), %rax1681#endif1682sub %rdx, %r81683lea 1(%rdi, %rdx), %rdi16841685.p2align 41686L(StrncpyFillTailWithZero):1687pxor %xmm0, %xmm01688xor %rdx, %rdx1689sub $16, %r81690jbe L(StrncpyFillExit)16911692movdqu %xmm0, (%rdi)1693add $16, %rdi16941695mov %rdi, %rsi1696and $0xf, %rsi1697sub %rsi, %rdi1698add %rsi, %r81699sub $64, %r81700jb L(StrncpyFillLess64)17011702L(StrncpyFillLoopMovdqa):1703movdqa %xmm0, (%rdi)1704movdqa %xmm0, 16(%rdi)1705movdqa %xmm0, 32(%rdi)1706movdqa %xmm0, 48(%rdi)1707add $64, %rdi1708sub $64, %r81709jae L(StrncpyFillLoopMovdqa)17101711L(StrncpyFillLess64):1712add $32, %r81713jl L(StrncpyFillLess32)1714movdqa %xmm0, (%rdi)1715movdqa %xmm0, 16(%rdi)1716add $32, %rdi1717sub $16, %r81718jl L(StrncpyFillExit)1719movdqa %xmm0, (%rdi)1720add $16, %rdi1721BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)17221723L(StrncpyFillLess32):1724add $16, %r81725jl L(StrncpyFillExit)1726movdqa %xmm0, (%rdi)1727add $16, %rdi1728BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)17291730L(StrncpyFillExit):1731add $16, %r81732BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)17331734/* end of ifndef USE_AS_STRCAT */1735#endif17361737.p2align 41738L(UnalignedLeaveCase2OrCase3):1739test %rdx, %rdx1740jnz L(Unaligned64LeaveCase2)1741L(Unaligned64LeaveCase3):1742lea 64(%r8), %rcx1743and $-16, %rcx1744add $48, %r81745jl L(CopyFrom1To16BytesCase3)1746movdqu %xmm4, (%rdi)1747sub $16, %r81748jb L(CopyFrom1To16BytesCase3)1749movdqu %xmm5, 16(%rdi)1750sub $16, %r81751jb L(CopyFrom1To16BytesCase3)1752movdqu %xmm6, 32(%rdi)1753sub $16, %r81754jb L(CopyFrom1To16BytesCase3)1755movdqu %xmm7, 48(%rdi)1756#ifdef USE_AS_STPCPY1757lea 64(%rdi), %rax1758#endif1759#ifdef USE_AS_STRCAT1760xor %ch, %ch1761movb %ch, 64(%rdi)1762#endif1763RETURN17641765.p2align 41766L(Unaligned64LeaveCase2):1767xor %rcx, %rcx1768pcmpeqb %xmm4, %xmm01769pmovmskb %xmm0, %rdx1770add $48, %r81771jle L(CopyFrom1To16BytesCase2OrCase3)1772test %rdx, %rdx1773#ifndef USE_AS_STRCAT1774jnz L(CopyFrom1To16BytesUnalignedXmm4)1775#else1776jnz L(CopyFrom1To16Bytes)1777#endif1778pcmpeqb %xmm5, %xmm01779pmovmskb %xmm0, %rdx1780movdqu %xmm4, (%rdi)1781add $16, %rcx1782sub $16, %r81783jbe L(CopyFrom1To16BytesCase2OrCase3)1784test %rdx, %rdx1785#ifndef USE_AS_STRCAT1786jnz L(CopyFrom1To16BytesUnalignedXmm5)1787#else1788jnz L(CopyFrom1To16Bytes)1789#endif17901791pcmpeqb %xmm6, %xmm01792pmovmskb %xmm0, %rdx1793movdqu %xmm5, 16(%rdi)1794add $16, %rcx1795sub $16, %r81796jbe L(CopyFrom1To16BytesCase2OrCase3)1797test %rdx, %rdx1798#ifndef USE_AS_STRCAT1799jnz L(CopyFrom1To16BytesUnalignedXmm6)1800#else1801jnz L(CopyFrom1To16Bytes)1802#endif18031804pcmpeqb %xmm7, %xmm01805pmovmskb %xmm0, %rdx1806movdqu %xmm6, 32(%rdi)1807lea 16(%rdi, %rcx), %rdi1808lea 16(%rsi, %rcx), %rsi1809bsf %rdx, %rdx1810cmp %r8, %rdx1811jb L(CopyFrom1To16BytesExit)1812BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)18131814.p2align 41815L(ExitZero):1816#ifndef USE_AS_STRCAT1817mov %rdi, %rax1818#endif1819RETURN18201821#endif18221823#ifndef USE_AS_STRCAT1824END (STRCPY)1825#else1826END (STRCAT)1827#endif1828.p2align 41829.section .rodata1830L(ExitTable):1831.int JMPTBL(L(Exit1), L(ExitTable))1832.int JMPTBL(L(Exit2), L(ExitTable))1833.int JMPTBL(L(Exit3), L(ExitTable))1834.int JMPTBL(L(Exit4), L(ExitTable))1835.int JMPTBL(L(Exit5), L(ExitTable))1836.int JMPTBL(L(Exit6), L(ExitTable))1837.int JMPTBL(L(Exit7), L(ExitTable))1838.int JMPTBL(L(Exit8), L(ExitTable))1839.int JMPTBL(L(Exit9), L(ExitTable))1840.int JMPTBL(L(Exit10), L(ExitTable))1841.int JMPTBL(L(Exit11), L(ExitTable))1842.int JMPTBL(L(Exit12), L(ExitTable))1843.int JMPTBL(L(Exit13), L(ExitTable))1844.int JMPTBL(L(Exit14), L(ExitTable))1845.int JMPTBL(L(Exit15), L(ExitTable))1846.int JMPTBL(L(Exit16), L(ExitTable))1847.int JMPTBL(L(Exit17), L(ExitTable))1848.int JMPTBL(L(Exit18), L(ExitTable))1849.int JMPTBL(L(Exit19), L(ExitTable))1850.int JMPTBL(L(Exit20), L(ExitTable))1851.int JMPTBL(L(Exit21), L(ExitTable))1852.int JMPTBL(L(Exit22), L(ExitTable))1853.int JMPTBL(L(Exit23), L(ExitTable))1854.int JMPTBL(L(Exit24), L(ExitTable))1855.int JMPTBL(L(Exit25), L(ExitTable))1856.int JMPTBL(L(Exit26), L(ExitTable))1857.int JMPTBL(L(Exit27), L(ExitTable))1858.int JMPTBL(L(Exit28), L(ExitTable))1859.int JMPTBL(L(Exit29), L(ExitTable))1860.int JMPTBL(L(Exit30), L(ExitTable))1861.int JMPTBL(L(Exit31), L(ExitTable))1862.int JMPTBL(L(Exit32), L(ExitTable))1863#ifdef USE_AS_STRNCPY1864L(ExitStrncpyTable):1865.int JMPTBL(L(StrncpyExit0), L(ExitStrncpyTable))1866.int JMPTBL(L(StrncpyExit1), L(ExitStrncpyTable))1867.int JMPTBL(L(StrncpyExit2), L(ExitStrncpyTable))1868.int JMPTBL(L(StrncpyExit3), L(ExitStrncpyTable))1869.int JMPTBL(L(StrncpyExit4), L(ExitStrncpyTable))1870.int JMPTBL(L(StrncpyExit5), L(ExitStrncpyTable))1871.int JMPTBL(L(StrncpyExit6), L(ExitStrncpyTable))1872.int JMPTBL(L(StrncpyExit7), L(ExitStrncpyTable))1873.int JMPTBL(L(StrncpyExit8), L(ExitStrncpyTable))1874.int JMPTBL(L(StrncpyExit9), L(ExitStrncpyTable))1875.int JMPTBL(L(StrncpyExit10), L(ExitStrncpyTable))1876.int JMPTBL(L(StrncpyExit11), L(ExitStrncpyTable))1877.int JMPTBL(L(StrncpyExit12), L(ExitStrncpyTable))1878.int JMPTBL(L(StrncpyExit13), L(ExitStrncpyTable))1879.int JMPTBL(L(StrncpyExit14), L(ExitStrncpyTable))1880.int JMPTBL(L(StrncpyExit15), L(ExitStrncpyTable))1881.int JMPTBL(L(StrncpyExit16), L(ExitStrncpyTable))1882.int JMPTBL(L(StrncpyExit17), L(ExitStrncpyTable))1883.int JMPTBL(L(StrncpyExit18), L(ExitStrncpyTable))1884.int JMPTBL(L(StrncpyExit19), L(ExitStrncpyTable))1885.int JMPTBL(L(StrncpyExit20), L(ExitStrncpyTable))1886.int JMPTBL(L(StrncpyExit21), L(ExitStrncpyTable))1887.int JMPTBL(L(StrncpyExit22), L(ExitStrncpyTable))1888.int JMPTBL(L(StrncpyExit23), L(ExitStrncpyTable))1889.int JMPTBL(L(StrncpyExit24), L(ExitStrncpyTable))1890.int JMPTBL(L(StrncpyExit25), L(ExitStrncpyTable))1891.int JMPTBL(L(StrncpyExit26), L(ExitStrncpyTable))1892.int JMPTBL(L(StrncpyExit27), L(ExitStrncpyTable))1893.int JMPTBL(L(StrncpyExit28), L(ExitStrncpyTable))1894.int JMPTBL(L(StrncpyExit29), L(ExitStrncpyTable))1895.int JMPTBL(L(StrncpyExit30), L(ExitStrncpyTable))1896.int JMPTBL(L(StrncpyExit31), L(ExitStrncpyTable))1897.int JMPTBL(L(StrncpyExit32), L(ExitStrncpyTable))1898.int JMPTBL(L(StrncpyExit33), L(ExitStrncpyTable))1899# ifndef USE_AS_STRCAT1900.p2align 41901L(FillTable):1902.int JMPTBL(L(Fill0), L(FillTable))1903.int JMPTBL(L(Fill1), L(FillTable))1904.int JMPTBL(L(Fill2), L(FillTable))1905.int JMPTBL(L(Fill3), L(FillTable))1906.int JMPTBL(L(Fill4), L(FillTable))1907.int JMPTBL(L(Fill5), L(FillTable))1908.int JMPTBL(L(Fill6), L(FillTable))1909.int JMPTBL(L(Fill7), L(FillTable))1910.int JMPTBL(L(Fill8), L(FillTable))1911.int JMPTBL(L(Fill9), L(FillTable))1912.int JMPTBL(L(Fill10), L(FillTable))1913.int JMPTBL(L(Fill11), L(FillTable))1914.int JMPTBL(L(Fill12), L(FillTable))1915.int JMPTBL(L(Fill13), L(FillTable))1916.int JMPTBL(L(Fill14), L(FillTable))1917.int JMPTBL(L(Fill15), L(FillTable))1918.int JMPTBL(L(Fill16), L(FillTable))1919# endif1920#endif192119221923