Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/arm-optimized-routines/string/aarch64/strlen-mte.S
39491 views
1
/*
2
* strlen - calculate the length of a string.
3
*
4
* Copyright (c) 2020-2022, Arm Limited.
5
* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6
*/
7
8
/* Assumptions:
9
*
10
* ARMv8-a, AArch64, Advanced SIMD.
11
* MTE compatible.
12
*/
13
14
#include "asmdefs.h"
15
16
#define srcin x0
17
#define result x0
18
19
#define src x1
20
#define synd x2
21
#define tmp x3
22
#define shift x4
23
24
#define data q0
25
#define vdata v0
26
#define vhas_nul v1
27
#define vend v2
28
#define dend d2
29
30
/* Core algorithm:
31
Process the string in 16-byte aligned chunks. Compute a 64-bit mask with
32
four bits per byte using the shrn instruction. A count trailing zeros then
33
identifies the first zero byte. */
34
35
ENTRY (__strlen_aarch64_mte)
36
bic src, srcin, 15
37
ld1 {vdata.16b}, [src]
38
cmeq vhas_nul.16b, vdata.16b, 0
39
lsl shift, srcin, 2
40
shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */
41
fmov synd, dend
42
lsr synd, synd, shift
43
cbz synd, L(next16)
44
45
rbit synd, synd
46
clz result, synd
47
lsr result, result, 2
48
ret
49
50
L(next16):
51
ldr data, [src, 16]
52
cmeq vhas_nul.16b, vdata.16b, 0
53
shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */
54
fmov synd, dend
55
cbz synd, L(loop)
56
add src, src, 16
57
#ifndef __AARCH64EB__
58
rbit synd, synd
59
#endif
60
sub result, src, srcin
61
clz tmp, synd
62
add result, result, tmp, lsr 2
63
ret
64
65
.p2align 5
66
L(loop):
67
ldr data, [src, 32]!
68
cmeq vhas_nul.16b, vdata.16b, 0
69
addhn vend.8b, vhas_nul.8h, vhas_nul.8h
70
fmov synd, dend
71
cbnz synd, L(loop_end)
72
ldr data, [src, 16]
73
cmeq vhas_nul.16b, vdata.16b, 0
74
addhn vend.8b, vhas_nul.8h, vhas_nul.8h
75
fmov synd, dend
76
cbz synd, L(loop)
77
add src, src, 16
78
L(loop_end):
79
sub result, shift, src, lsl 2 /* (srcin - src) << 2. */
80
#ifndef __AARCH64EB__
81
rbit synd, synd
82
sub result, result, 3
83
#endif
84
clz tmp, synd
85
sub result, tmp, result
86
lsr result, result, 2
87
ret
88
89
END (__strlen_aarch64_mte)
90
91