Path: blob/main/contrib/bionic-x86_64-string/sse2-strlen-slm.S
39475 views
/*1Copyright (c) 2014, Intel Corporation2All rights reserved.34Redistribution and use in source and binary forms, with or without5modification, are permitted provided that the following conditions are met:67* Redistributions of source code must retain the above copyright notice,8* this list of conditions and the following disclaimer.910* Redistributions in binary form must reproduce the above copyright notice,11* this list of conditions and the following disclaimer in the documentation12* and/or other materials provided with the distribution.1314* Neither the name of Intel Corporation nor the names of its contributors15* may be used to endorse or promote products derived from this software16* without specific prior written permission.1718THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND19ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED20WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE21DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR22ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES23(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;24LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON25ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT26(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS27SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.28*/2930#ifndef USE_AS_STRCAT3132#ifndef STRLEN33# define STRLEN strlen34#endif3536#ifndef L37# define L(label) .L##label38#endif3940#ifndef cfi_startproc41# define cfi_startproc .cfi_startproc42#endif4344#ifndef cfi_endproc45# define cfi_endproc .cfi_endproc46#endif4748#ifndef ENTRY49# define ENTRY(name) \50.type name, @function; \51.globl name; \52.p2align 4; \53name: \54cfi_startproc55#endif5657#ifndef END58# define END(name) \59cfi_endproc; \60.size name, .-name61#endif62#define RETURN ret63.section .text.sse2,"ax",@progbits64ENTRY (STRLEN)65/* end ifndef USE_AS_STRCAT */66#endif67xor %rax, %rax68mov %edi, %ecx69and $0x3f, %ecx70pxor %xmm0, %xmm071cmp $0x30, %ecx72ja L(next)73movdqu (%rdi), %xmm174pcmpeqb %xmm1, %xmm075pmovmskb %xmm0, %edx76test %edx, %edx77jnz L(exit_less16)78mov %rdi, %rax79and $-16, %rax80jmp L(align16_start)81L(next):82mov %rdi, %rax83and $-16, %rax84pcmpeqb (%rax), %xmm085mov $-1, %r10d86sub %rax, %rcx87shl %cl, %r10d88pmovmskb %xmm0, %edx89and %r10d, %edx90jnz L(exit)91L(align16_start):92pxor %xmm0, %xmm093pxor %xmm1, %xmm194pxor %xmm2, %xmm295pxor %xmm3, %xmm396pcmpeqb 16(%rax), %xmm097pmovmskb %xmm0, %edx98test %edx, %edx99jnz L(exit16)100101pcmpeqb 32(%rax), %xmm1102pmovmskb %xmm1, %edx103test %edx, %edx104jnz L(exit32)105106pcmpeqb 48(%rax), %xmm2107pmovmskb %xmm2, %edx108test %edx, %edx109jnz L(exit48)110111pcmpeqb 64(%rax), %xmm3112pmovmskb %xmm3, %edx113test %edx, %edx114jnz L(exit64)115116pcmpeqb 80(%rax), %xmm0117add $64, %rax118pmovmskb %xmm0, %edx119test %edx, %edx120jnz L(exit16)121122pcmpeqb 32(%rax), %xmm1123pmovmskb %xmm1, %edx124test %edx, %edx125jnz L(exit32)126127pcmpeqb 48(%rax), %xmm2128pmovmskb %xmm2, %edx129test %edx, %edx130jnz L(exit48)131132pcmpeqb 64(%rax), %xmm3133pmovmskb %xmm3, %edx134test %edx, %edx135jnz L(exit64)136137pcmpeqb 80(%rax), %xmm0138add $64, %rax139pmovmskb %xmm0, %edx140test %edx, %edx141jnz L(exit16)142143pcmpeqb 32(%rax), %xmm1144pmovmskb %xmm1, %edx145test %edx, %edx146jnz L(exit32)147148pcmpeqb 48(%rax), %xmm2149pmovmskb %xmm2, %edx150test %edx, %edx151jnz L(exit48)152153pcmpeqb 64(%rax), %xmm3154pmovmskb %xmm3, %edx155test %edx, %edx156jnz L(exit64)157158pcmpeqb 80(%rax), %xmm0159add $64, %rax160pmovmskb %xmm0, %edx161test %edx, %edx162jnz L(exit16)163164pcmpeqb 32(%rax), %xmm1165pmovmskb %xmm1, %edx166test %edx, %edx167jnz L(exit32)168169pcmpeqb 48(%rax), %xmm2170pmovmskb %xmm2, %edx171test %edx, %edx172jnz L(exit48)173174pcmpeqb 64(%rax), %xmm3175pmovmskb %xmm3, %edx176test %edx, %edx177jnz L(exit64)178179180test $0x3f, %rax181jz L(align64_loop)182183pcmpeqb 80(%rax), %xmm0184add $80, %rax185pmovmskb %xmm0, %edx186test %edx, %edx187jnz L(exit)188189test $0x3f, %rax190jz L(align64_loop)191192pcmpeqb 16(%rax), %xmm1193add $16, %rax194pmovmskb %xmm1, %edx195test %edx, %edx196jnz L(exit)197198test $0x3f, %rax199jz L(align64_loop)200201pcmpeqb 16(%rax), %xmm2202add $16, %rax203pmovmskb %xmm2, %edx204test %edx, %edx205jnz L(exit)206207test $0x3f, %rax208jz L(align64_loop)209210pcmpeqb 16(%rax), %xmm3211add $16, %rax212pmovmskb %xmm3, %edx213test %edx, %edx214jnz L(exit)215216add $16, %rax217.p2align 4218L(align64_loop):219movaps (%rax), %xmm4220pminub 16(%rax), %xmm4221movaps 32(%rax), %xmm5222pminub 48(%rax), %xmm5223add $64, %rax224pminub %xmm4, %xmm5225pcmpeqb %xmm0, %xmm5226pmovmskb %xmm5, %edx227test %edx, %edx228jz L(align64_loop)229230231pcmpeqb -64(%rax), %xmm0232sub $80, %rax233pmovmskb %xmm0, %edx234test %edx, %edx235jnz L(exit16)236237pcmpeqb 32(%rax), %xmm1238pmovmskb %xmm1, %edx239test %edx, %edx240jnz L(exit32)241242pcmpeqb 48(%rax), %xmm2243pmovmskb %xmm2, %edx244test %edx, %edx245jnz L(exit48)246247pcmpeqb 64(%rax), %xmm3248pmovmskb %xmm3, %edx249sub %rdi, %rax250bsf %rdx, %rdx251add %rdx, %rax252add $64, %rax253RETURN254255.p2align 4256L(exit):257sub %rdi, %rax258L(exit_less16):259bsf %rdx, %rdx260add %rdx, %rax261RETURN262.p2align 4263L(exit16):264sub %rdi, %rax265bsf %rdx, %rdx266add %rdx, %rax267add $16, %rax268RETURN269.p2align 4270L(exit32):271sub %rdi, %rax272bsf %rdx, %rdx273add %rdx, %rax274add $32, %rax275RETURN276.p2align 4277L(exit48):278sub %rdi, %rax279bsf %rdx, %rdx280add %rdx, %rax281add $48, %rax282RETURN283.p2align 4284L(exit64):285sub %rdi, %rax286bsf %rdx, %rdx287add %rdx, %rax288add $64, %rax289#ifndef USE_AS_STRCAT290RETURN291292END (STRLEN)293#endif294295296