Path: blob/main/contrib/arm-optimized-routines/string/aarch64/strrchr.S
39491 views
/*1* strrchr - find last position of a character in a string.2*3* Copyright (c) 2014-2022, Arm Limited.4* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception5*/67/* Assumptions:8*9* ARMv8-a, AArch6410* Neon Available.11*/1213#include "asmdefs.h"1415/* Arguments and results. */16#define srcin x017#define chrin w11819#define result x02021#define src x222#define tmp1 x323#define wtmp2 w424#define tmp3 x525#define src_match x626#define src_offset x727#define const_m1 x828#define tmp4 x929#define nul_match x1030#define chr_match x113132#define vrepchr v033#define vdata1 v134#define vdata2 v235#define vhas_nul1 v336#define vhas_nul2 v437#define vhas_chr1 v538#define vhas_chr2 v639#define vrepmask_0 v740#define vrepmask_c v1641#define vend1 v1742#define vend2 v184344/* Core algorithm.4546For each 32-byte hunk we calculate a 64-bit syndrome value, with47two bits per byte (LSB is always in bits 0 and 1, for both big48and little-endian systems). For each tuple, bit 0 is set iff49the relevant byte matched the requested character; bit 1 is set50iff the relevant byte matched the NUL end of string (we trigger51off bit0 for the special case of looking for NUL). Since the bits52in the syndrome reflect exactly the order in which things occur53in the original string a count_trailing_zeros() operation will54identify exactly which byte is causing the termination, and why. */5556ENTRY (__strrchr_aarch64)57/* Magic constant 0x40100401 to allow us to identify which lane58matches the requested byte. Magic constant 0x80200802 used59similarly for NUL termination. */60mov wtmp2, #0x040161movk wtmp2, #0x4010, lsl #1662dup vrepchr.16b, chrin63bic src, srcin, #31 /* Work with aligned 32-byte hunks. */64dup vrepmask_c.4s, wtmp265mov src_offset, #066ands tmp1, srcin, #3167add vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */68b.eq L(aligned)6970/* Input string is not 32-byte aligned. Rather than forcing71the padding bytes to a safe value, we calculate the syndrome72for all the bytes, but then mask off those bits of the73syndrome that are related to the padding. */74ld1 {vdata1.16b, vdata2.16b}, [src], #3275neg tmp1, tmp176cmeq vhas_nul1.16b, vdata1.16b, #077cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b78cmeq vhas_nul2.16b, vdata2.16b, #079cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b80and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b81and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b82and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b83and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b84addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul2.16b // 256->12885addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->12886addp vend1.16b, vhas_nul1.16b, vhas_chr1.16b // 128->6487mov nul_match, vend1.d[0]88lsl tmp1, tmp1, #189mov const_m1, #~090lsr tmp3, const_m1, tmp191mov chr_match, vend1.d[1]9293bic nul_match, nul_match, tmp3 // Mask padding bits.94bic chr_match, chr_match, tmp3 // Mask padding bits.95cbnz nul_match, L(tail)9697.p2align 498L(loop):99cmp chr_match, #0100csel src_match, src, src_match, ne101csel src_offset, chr_match, src_offset, ne102L(aligned):103ld1 {vdata1.16b, vdata2.16b}, [src], #32104cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b105cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b106uminp vend1.16b, vdata1.16b, vdata2.16b107and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b108and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b109cmeq vend1.16b, vend1.16b, 0110addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128111addp vend1.16b, vend1.16b, vhas_chr1.16b // 128->64112mov nul_match, vend1.d[0]113mov chr_match, vend1.d[1]114cbz nul_match, L(loop)115116cmeq vhas_nul1.16b, vdata1.16b, #0117cmeq vhas_nul2.16b, vdata2.16b, #0118and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b119and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b120addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul2.16b121addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul1.16b122mov nul_match, vhas_nul1.d[0]123124L(tail):125/* Work out exactly where the string ends. */126sub tmp4, nul_match, #1127eor tmp4, tmp4, nul_match128ands chr_match, chr_match, tmp4129/* And pick the values corresponding to the last match. */130csel src_match, src, src_match, ne131csel src_offset, chr_match, src_offset, ne132133/* Count down from the top of the syndrome to find the last match. */134clz tmp3, src_offset135/* Src_match points beyond the word containing the match, so we can136simply subtract half the bit-offset into the syndrome. Because137we are counting down, we need to go back one more character. */138add tmp3, tmp3, #2139sub result, src_match, tmp3, lsr #1140/* But if the syndrome shows no match was found, then return NULL. */141cmp src_offset, #0142csel result, result, xzr, ne143144ret145146END (__strrchr_aarch64)147148149150