Path: blob/main/contrib/arm-optimized-routines/string/aarch64/strchrnul-mte.S
39498 views
/*1* strchrnul - find a character or nul in a string2*3* Copyright (c) 2020-2022, Arm Limited.4* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception5*/67/* Assumptions:8*9* ARMv8-a, AArch64, Advanced SIMD.10* MTE compatible.11*/1213#include "asmdefs.h"1415#define srcin x016#define chrin w117#define result x01819#define src x220#define tmp1 x121#define tmp2 x32223#define vrepchr v024#define vdata v125#define qdata q126#define vhas_nul v227#define vhas_chr v328#define vend v429#define dend d43031/*32Core algorithm:33For each 16-byte chunk we calculate a 64-bit nibble mask value with four bits34per byte. We take 4 bits of every comparison byte with shift right and narrow35by 4 instruction. Since the bits in the nibble mask reflect the order in36which things occur in the original string, counting leading zeros identifies37exactly which byte matched. */3839ENTRY (__strchrnul_aarch64_mte)40bic src, srcin, 1541dup vrepchr.16b, chrin42ld1 {vdata.16b}, [src]43cmeq vhas_chr.16b, vdata.16b, vrepchr.16b44cmhs vhas_chr.16b, vhas_chr.16b, vdata.16b45lsl tmp2, srcin, 246shrn vend.8b, vhas_chr.8h, 4 /* 128->64 */47fmov tmp1, dend48lsr tmp1, tmp1, tmp2 /* Mask padding bits. */49cbz tmp1, L(loop)5051rbit tmp1, tmp152clz tmp1, tmp153add result, srcin, tmp1, lsr 254ret5556.p2align 457L(loop):58ldr qdata, [src, 16]59cmeq vhas_chr.16b, vdata.16b, vrepchr.16b60cmhs vhas_chr.16b, vhas_chr.16b, vdata.16b61umaxp vend.16b, vhas_chr.16b, vhas_chr.16b62fmov tmp1, dend63cbnz tmp1, L(end)64ldr qdata, [src, 32]!65cmeq vhas_chr.16b, vdata.16b, vrepchr.16b66cmhs vhas_chr.16b, vhas_chr.16b, vdata.16b67umaxp vend.16b, vhas_chr.16b, vhas_chr.16b68fmov tmp1, dend69cbz tmp1, L(loop)70sub src, src, 1671L(end):72shrn vend.8b, vhas_chr.8h, 4 /* 128->64 */73add src, src, 1674fmov tmp1, dend75#ifndef __AARCH64EB__76rbit tmp1, tmp177#endif78clz tmp1, tmp179add result, src, tmp1, lsr 280ret8182END (__strchrnul_aarch64_mte)83848586