Path: blob/main/contrib/arm-optimized-routines/string/aarch64/memchr-mte.S
39486 views
/*1* memchr - find a character in a memory zone2*3* Copyright (c) 2020-2022, Arm Limited.4* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception5*/67/* Assumptions:8*9* ARMv8-a, AArch64, Advanced SIMD.10* MTE compatible.11*/1213#include "asmdefs.h"1415#define srcin x016#define chrin w117#define cntin x218#define result x01920#define src x321#define cntrem x422#define synd x523#define shift x624#define tmp x72526#define vrepchr v027#define qdata q128#define vdata v129#define vhas_chr v230#define vend v331#define dend d33233/*34Core algorithm:35For each 16-byte chunk we calculate a 64-bit nibble mask value with four bits36per byte. We take 4 bits of every comparison byte with shift right and narrow37by 4 instruction. Since the bits in the nibble mask reflect the order in38which things occur in the original string, counting leading zeros identifies39exactly which byte matched. */4041ENTRY (__memchr_aarch64_mte)42bic src, srcin, 1543cbz cntin, L(nomatch)44ld1 {vdata.16b}, [src]45dup vrepchr.16b, chrin46cmeq vhas_chr.16b, vdata.16b, vrepchr.16b47lsl shift, srcin, 248shrn vend.8b, vhas_chr.8h, 4 /* 128->64 */49fmov synd, dend50lsr synd, synd, shift51cbz synd, L(start_loop)5253rbit synd, synd54clz synd, synd55cmp cntin, synd, lsr 256add result, srcin, synd, lsr 257csel result, result, xzr, hi58ret5960.p2align 361L(start_loop):62sub tmp, src, srcin63add tmp, tmp, 1764subs cntrem, cntin, tmp65b.lo L(nomatch)6667/* Make sure that it won't overread by a 16-byte chunk */68tbz cntrem, 4, L(loop32_2)69sub src, src, 1670.p2align 471L(loop32):72ldr qdata, [src, 32]!73cmeq vhas_chr.16b, vdata.16b, vrepchr.16b74umaxp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */75fmov synd, dend76cbnz synd, L(end)7778L(loop32_2):79ldr qdata, [src, 16]80cmeq vhas_chr.16b, vdata.16b, vrepchr.16b81subs cntrem, cntrem, 3282b.lo L(end_2)83umaxp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */84fmov synd, dend85cbz synd, L(loop32)86L(end_2):87add src, src, 1688L(end):89shrn vend.8b, vhas_chr.8h, 4 /* 128->64 */90sub cntrem, src, srcin91fmov synd, dend92sub cntrem, cntin, cntrem93#ifndef __AARCH64EB__94rbit synd, synd95#endif96clz synd, synd97cmp cntrem, synd, lsr 298add result, src, synd, lsr 299csel result, result, xzr, hi100ret101102L(nomatch):103mov result, 0104ret105106END (__memchr_aarch64_mte)107108109110