Path: blob/main/contrib/bionic-x86_64-string/avx2-memset-kbl.S
39475 views
/*1Copyright (c) 2014, Intel Corporation2All rights reserved.34Redistribution and use in source and binary forms, with or without5modification, are permitted provided that the following conditions are met:67* Redistributions of source code must retain the above copyright notice,8* this list of conditions and the following disclaimer.910* Redistributions in binary form must reproduce the above copyright notice,11* this list of conditions and the following disclaimer in the documentation12* and/or other materials provided with the distribution.1314* Neither the name of Intel Corporation nor the names of its contributors15* may be used to endorse or promote products derived from this software16* without specific prior written permission.1718THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND19ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED20WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE21DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR22ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES23(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;24LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON25ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT26(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS27SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.28*/2930#include <private/bionic_asm.h>3132#include "cache.h"3334#ifndef L35# define L(label) .L##label36#endif3738#ifndef ALIGN39# define ALIGN(n) .p2align n40#endif4142.section .text.avx2,"ax",@progbits4344ENTRY(__memset_chk_avx2)45# %rdi = dst, %rsi = byte, %rdx = n, %rcx = dst_len46cmp %rcx, %rdx47ja __memset_chk_fail48// Fall through to memset...49END(__memset_chk_avx2)5051ENTRY(memset_avx2)52movq %rdi, %rax53and $0xff, %rsi54mov $0x0101010101010101, %rcx55imul %rsi, %rcx56cmpq $16, %rdx57jae L(16bytesormore)58testb $8, %dl59jnz L(8_15bytes)60testb $4, %dl61jnz L(4_7bytes)62testb $2, %dl63jnz L(2_3bytes)64testb $1, %dl65jz L(return)66movb %cl, (%rdi)67L(return):68ret6970L(8_15bytes):71movq %rcx, (%rdi)72movq %rcx, -8(%rdi, %rdx)73ret7475L(4_7bytes):76movl %ecx, (%rdi)77movl %ecx, -4(%rdi, %rdx)78ret7980L(2_3bytes):81movw %cx, (%rdi)82movw %cx, -2(%rdi, %rdx)83ret8485ALIGN (4)86L(16bytesormore):87movd %rcx, %xmm088pshufd $0, %xmm0, %xmm089movdqu %xmm0, (%rdi)90movdqu %xmm0, -16(%rdi, %rdx)91cmpq $32, %rdx92jbe L(32bytesless)93movdqu %xmm0, 16(%rdi)94movdqu %xmm0, -32(%rdi, %rdx)95cmpq $64, %rdx96jbe L(64bytesless)97movdqu %xmm0, 32(%rdi)98movdqu %xmm0, 48(%rdi)99movdqu %xmm0, -64(%rdi, %rdx)100movdqu %xmm0, -48(%rdi, %rdx)101cmpq $128, %rdx102jbe L(128bytesless)103vpbroadcastb %xmm0, %ymm0104vmovdqu %ymm0, 64(%rdi)105vmovdqu %ymm0, 96(%rdi)106vmovdqu %ymm0, -128(%rdi, %rdx)107vmovdqu %ymm0, -96(%rdi, %rdx)108cmpq $256, %rdx109ja L(256bytesmore)110L(32bytesless):111L(64bytesless):112L(128bytesless):113ret114115ALIGN (4)116L(256bytesmore):117leaq 128(%rdi), %rcx118andq $-128, %rcx119movq %rdx, %r8120addq %rdi, %rdx121andq $-128, %rdx122cmpq %rcx, %rdx123je L(return)124125#ifdef SHARED_CACHE_SIZE126cmp $SHARED_CACHE_SIZE, %r8127#else128cmp __x86_64_shared_cache_size(%rip), %r8129#endif130ja L(256bytesmore_nt)131132ALIGN (4)133L(256bytesmore_normal):134vmovdqa %ymm0, (%rcx)135vmovdqa %ymm0, 32(%rcx)136vmovdqa %ymm0, 64(%rcx)137vmovdqa %ymm0, 96(%rcx)138addq $128, %rcx139cmpq %rcx, %rdx140jne L(256bytesmore_normal)141ret142143ALIGN (4)144L(256bytesmore_nt):145movntdq %xmm0, (%rcx)146movntdq %xmm0, 16(%rcx)147movntdq %xmm0, 32(%rcx)148movntdq %xmm0, 48(%rcx)149movntdq %xmm0, 64(%rcx)150movntdq %xmm0, 80(%rcx)151movntdq %xmm0, 96(%rcx)152movntdq %xmm0, 112(%rcx)153leaq 128(%rcx), %rcx154cmpq %rcx, %rdx155jne L(256bytesmore_nt)156sfence157ret158159END(memset_avx2)160161162