Path: blob/main/contrib/arm-optimized-routines/string/aarch64/strlen-mte.S
39491 views
/*1* strlen - calculate the length of a string.2*3* Copyright (c) 2020-2022, Arm Limited.4* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception5*/67/* Assumptions:8*9* ARMv8-a, AArch64, Advanced SIMD.10* MTE compatible.11*/1213#include "asmdefs.h"1415#define srcin x016#define result x01718#define src x119#define synd x220#define tmp x321#define shift x42223#define data q024#define vdata v025#define vhas_nul v126#define vend v227#define dend d22829/* Core algorithm:30Process the string in 16-byte aligned chunks. Compute a 64-bit mask with31four bits per byte using the shrn instruction. A count trailing zeros then32identifies the first zero byte. */3334ENTRY (__strlen_aarch64_mte)35bic src, srcin, 1536ld1 {vdata.16b}, [src]37cmeq vhas_nul.16b, vdata.16b, 038lsl shift, srcin, 239shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */40fmov synd, dend41lsr synd, synd, shift42cbz synd, L(next16)4344rbit synd, synd45clz result, synd46lsr result, result, 247ret4849L(next16):50ldr data, [src, 16]51cmeq vhas_nul.16b, vdata.16b, 052shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */53fmov synd, dend54cbz synd, L(loop)55add src, src, 1656#ifndef __AARCH64EB__57rbit synd, synd58#endif59sub result, src, srcin60clz tmp, synd61add result, result, tmp, lsr 262ret6364.p2align 565L(loop):66ldr data, [src, 32]!67cmeq vhas_nul.16b, vdata.16b, 068addhn vend.8b, vhas_nul.8h, vhas_nul.8h69fmov synd, dend70cbnz synd, L(loop_end)71ldr data, [src, 16]72cmeq vhas_nul.16b, vdata.16b, 073addhn vend.8b, vhas_nul.8h, vhas_nul.8h74fmov synd, dend75cbz synd, L(loop)76add src, src, 1677L(loop_end):78sub result, shift, src, lsl 2 /* (srcin - src) << 2. */79#ifndef __AARCH64EB__80rbit synd, synd81sub result, result, 382#endif83clz tmp, synd84sub result, tmp, result85lsr result, result, 286ret8788END (__strlen_aarch64_mte)899091