Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/arm-optimized-routines/string/aarch64/strchr-mte.S
39486 views
1
/*
2
* strchr - find a character in a string
3
*
4
* Copyright (c) 2020-2022, Arm Limited.
5
* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6
*/
7
8
/* Assumptions:
9
*
10
* ARMv8-a, AArch64, Advanced SIMD.
11
* MTE compatible.
12
*/
13
14
#include "asmdefs.h"
15
16
#define srcin x0
17
#define chrin w1
18
#define result x0
19
20
#define src x2
21
#define tmp1 x1
22
#define tmp2 x3
23
24
#define vrepchr v0
25
#define vdata v1
26
#define qdata q1
27
#define vhas_nul v2
28
#define vhas_chr v3
29
#define vrepmask v4
30
#define vend v5
31
#define dend d5
32
33
/* Core algorithm.
34
35
For each 16-byte chunk we calculate a 64-bit syndrome value with four bits
36
per byte. Bits 0-1 are set if the relevant byte matched the requested
37
character, bits 2-3 are set if the byte is NUL or matched. Count trailing
38
zeroes gives the position of the matching byte if it is a multiple of 4.
39
If it is not a multiple of 4, there was no match. */
40
41
ENTRY (__strchr_aarch64_mte)
42
bic src, srcin, 15
43
dup vrepchr.16b, chrin
44
ld1 {vdata.16b}, [src]
45
movi vrepmask.16b, 0x33
46
cmeq vhas_nul.16b, vdata.16b, 0
47
cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
48
bit vhas_nul.16b, vhas_chr.16b, vrepmask.16b
49
lsl tmp2, srcin, 2
50
shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */
51
fmov tmp1, dend
52
lsr tmp1, tmp1, tmp2
53
cbz tmp1, L(loop)
54
55
rbit tmp1, tmp1
56
clz tmp1, tmp1
57
/* Tmp1 is an even multiple of 2 if the target character was
58
found first. Otherwise we've found the end of string. */
59
tst tmp1, 2
60
add result, srcin, tmp1, lsr 2
61
csel result, result, xzr, eq
62
ret
63
64
.p2align 4
65
L(loop):
66
ldr qdata, [src, 16]
67
cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
68
cmhs vhas_nul.16b, vhas_chr.16b, vdata.16b
69
umaxp vend.16b, vhas_nul.16b, vhas_nul.16b
70
fmov tmp1, dend
71
cbnz tmp1, L(end)
72
ldr qdata, [src, 32]!
73
cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
74
cmhs vhas_nul.16b, vhas_chr.16b, vdata.16b
75
umaxp vend.16b, vhas_nul.16b, vhas_nul.16b
76
fmov tmp1, dend
77
cbz tmp1, L(loop)
78
sub src, src, 16
79
L(end):
80
81
#ifdef __AARCH64EB__
82
bif vhas_nul.16b, vhas_chr.16b, vrepmask.16b
83
shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */
84
fmov tmp1, dend
85
#else
86
bit vhas_nul.16b, vhas_chr.16b, vrepmask.16b
87
shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */
88
fmov tmp1, dend
89
rbit tmp1, tmp1
90
#endif
91
add src, src, 16
92
clz tmp1, tmp1
93
/* Tmp1 is a multiple of 4 if the target character was found. */
94
tst tmp1, 2
95
add result, src, tmp1, lsr 2
96
csel result, result, xzr, eq
97
ret
98
99
END (__strchr_aarch64_mte)
100
101
102