Path: blob/main/contrib/arm-optimized-routines/string/aarch64/strchr-mte.S
39486 views
/*1* strchr - find a character in a string2*3* Copyright (c) 2020-2022, Arm Limited.4* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception5*/67/* Assumptions:8*9* ARMv8-a, AArch64, Advanced SIMD.10* MTE compatible.11*/1213#include "asmdefs.h"1415#define srcin x016#define chrin w117#define result x01819#define src x220#define tmp1 x121#define tmp2 x32223#define vrepchr v024#define vdata v125#define qdata q126#define vhas_nul v227#define vhas_chr v328#define vrepmask v429#define vend v530#define dend d53132/* Core algorithm.3334For each 16-byte chunk we calculate a 64-bit syndrome value with four bits35per byte. Bits 0-1 are set if the relevant byte matched the requested36character, bits 2-3 are set if the byte is NUL or matched. Count trailing37zeroes gives the position of the matching byte if it is a multiple of 4.38If it is not a multiple of 4, there was no match. */3940ENTRY (__strchr_aarch64_mte)41bic src, srcin, 1542dup vrepchr.16b, chrin43ld1 {vdata.16b}, [src]44movi vrepmask.16b, 0x3345cmeq vhas_nul.16b, vdata.16b, 046cmeq vhas_chr.16b, vdata.16b, vrepchr.16b47bit vhas_nul.16b, vhas_chr.16b, vrepmask.16b48lsl tmp2, srcin, 249shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */50fmov tmp1, dend51lsr tmp1, tmp1, tmp252cbz tmp1, L(loop)5354rbit tmp1, tmp155clz tmp1, tmp156/* Tmp1 is an even multiple of 2 if the target character was57found first. Otherwise we've found the end of string. */58tst tmp1, 259add result, srcin, tmp1, lsr 260csel result, result, xzr, eq61ret6263.p2align 464L(loop):65ldr qdata, [src, 16]66cmeq vhas_chr.16b, vdata.16b, vrepchr.16b67cmhs vhas_nul.16b, vhas_chr.16b, vdata.16b68umaxp vend.16b, vhas_nul.16b, vhas_nul.16b69fmov tmp1, dend70cbnz tmp1, L(end)71ldr qdata, [src, 32]!72cmeq vhas_chr.16b, vdata.16b, vrepchr.16b73cmhs vhas_nul.16b, vhas_chr.16b, vdata.16b74umaxp vend.16b, vhas_nul.16b, vhas_nul.16b75fmov tmp1, dend76cbz tmp1, L(loop)77sub src, src, 1678L(end):7980#ifdef __AARCH64EB__81bif vhas_nul.16b, vhas_chr.16b, vrepmask.16b82shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */83fmov tmp1, dend84#else85bit vhas_nul.16b, vhas_chr.16b, vrepmask.16b86shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */87fmov tmp1, dend88rbit tmp1, tmp189#endif90add src, src, 1691clz tmp1, tmp192/* Tmp1 is a multiple of 4 if the target character was found. */93tst tmp1, 294add result, src, tmp1, lsr 295csel result, result, xzr, eq96ret9798END (__strchr_aarch64_mte)99100101102