Path: blob/main/contrib/arm-optimized-routines/string/aarch64/strchr.S
39486 views
/*1* strchr - find a character in a string2*3* Copyright (c) 2014-2022, Arm Limited.4* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception5*/67/* Assumptions:8*9* ARMv8-a, AArch6410* Neon Available.11*/1213#include "asmdefs.h"1415/* Arguments and results. */16#define srcin x017#define chrin w11819#define result x02021#define src x222#define tmp1 x323#define wtmp2 w424#define tmp3 x52526#define vrepchr v027#define vdata1 v128#define vdata2 v229#define vhas_nul1 v330#define vhas_nul2 v431#define vhas_chr1 v532#define vhas_chr2 v633#define vrepmask_0 v734#define vrepmask_c v1635#define vend1 v1736#define vend2 v183738/* Core algorithm.3940For each 32-byte hunk we calculate a 64-bit syndrome value, with41two bits per byte (LSB is always in bits 0 and 1, for both big42and little-endian systems). For each tuple, bit 0 is set iff43the relevant byte matched the requested character; bit 1 is set44iff the relevant byte matched the NUL end of string (we trigger45off bit0 for the special case of looking for NUL). Since the bits46in the syndrome reflect exactly the order in which things occur47in the original string a count_trailing_zeros() operation will48identify exactly which byte is causing the termination, and why. */4950/* Locals and temporaries. */5152ENTRY (__strchr_aarch64)53/* Magic constant 0xc0300c03 to allow us to identify which lane54matches the requested byte. Even bits are set if the character55matches, odd bits if either the char is NUL or matches. */56mov wtmp2, 0x0c0357movk wtmp2, 0xc030, lsl 1658dup vrepchr.16b, chrin59bic src, srcin, #31 /* Work with aligned 32-byte hunks. */60dup vrepmask_c.4s, wtmp261ands tmp1, srcin, #3162add vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */63b.eq L(loop)6465/* Input string is not 32-byte aligned. Rather than forcing66the padding bytes to a safe value, we calculate the syndrome67for all the bytes, but then mask off those bits of the68syndrome that are related to the padding. */69ld1 {vdata1.16b, vdata2.16b}, [src], #3270neg tmp1, tmp171cmeq vhas_nul1.16b, vdata1.16b, #072cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b73cmeq vhas_nul2.16b, vdata2.16b, #074cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b75bif vhas_nul1.16b, vhas_chr1.16b, vrepmask_0.16b76bif vhas_nul2.16b, vhas_chr2.16b, vrepmask_0.16b77and vend1.16b, vhas_nul1.16b, vrepmask_c.16b78and vend2.16b, vhas_nul2.16b, vrepmask_c.16b79lsl tmp1, tmp1, #180addp vend1.16b, vend1.16b, vend2.16b // 256->12881mov tmp3, #~082addp vend1.16b, vend1.16b, vend2.16b // 128->6483lsr tmp1, tmp3, tmp18485mov tmp3, vend1.d[0]86bic tmp1, tmp3, tmp1 // Mask padding bits.87cbnz tmp1, L(tail)8889.p2align 490L(loop):91ld1 {vdata1.16b, vdata2.16b}, [src], #3292cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b93cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b94cmhs vhas_nul1.16b, vhas_chr1.16b, vdata1.16b95cmhs vhas_nul2.16b, vhas_chr2.16b, vdata2.16b96orr vend1.16b, vhas_nul1.16b, vhas_nul2.16b97umaxp vend1.16b, vend1.16b, vend1.16b98mov tmp1, vend1.d[0]99cbz tmp1, L(loop)100101/* Termination condition found. Now need to establish exactly why102we terminated. */103bif vhas_nul1.16b, vhas_chr1.16b, vrepmask_0.16b104bif vhas_nul2.16b, vhas_chr2.16b, vrepmask_0.16b105and vend1.16b, vhas_nul1.16b, vrepmask_c.16b106and vend2.16b, vhas_nul2.16b, vrepmask_c.16b107addp vend1.16b, vend1.16b, vend2.16b // 256->128108addp vend1.16b, vend1.16b, vend2.16b // 128->64109mov tmp1, vend1.d[0]110L(tail):111/* Count the trailing zeros, by bit reversing... */112rbit tmp1, tmp1113/* Re-bias source. */114sub src, src, #32115clz tmp1, tmp1 /* And counting the leading zeros. */116/* Tmp1 is even if the target charager was found first. Otherwise117we've found the end of string and we weren't looking for NUL. */118tst tmp1, #1119add result, src, tmp1, lsr #1120csel result, result, xzr, eq121ret122123END (__strchr_aarch64)124125126127