Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/arm-optimized-routines/string/aarch64/strchr.S
39486 views
1
/*
2
* strchr - find a character in a string
3
*
4
* Copyright (c) 2014-2022, Arm Limited.
5
* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6
*/
7
8
/* Assumptions:
9
*
10
* ARMv8-a, AArch64
11
* Neon Available.
12
*/
13
14
#include "asmdefs.h"
15
16
/* Arguments and results. */
17
#define srcin x0
18
#define chrin w1
19
20
#define result x0
21
22
#define src x2
23
#define tmp1 x3
24
#define wtmp2 w4
25
#define tmp3 x5
26
27
#define vrepchr v0
28
#define vdata1 v1
29
#define vdata2 v2
30
#define vhas_nul1 v3
31
#define vhas_nul2 v4
32
#define vhas_chr1 v5
33
#define vhas_chr2 v6
34
#define vrepmask_0 v7
35
#define vrepmask_c v16
36
#define vend1 v17
37
#define vend2 v18
38
39
/* Core algorithm.
40
41
For each 32-byte hunk we calculate a 64-bit syndrome value, with
42
two bits per byte (LSB is always in bits 0 and 1, for both big
43
and little-endian systems). For each tuple, bit 0 is set iff
44
the relevant byte matched the requested character; bit 1 is set
45
iff the relevant byte matched the NUL end of string (we trigger
46
off bit0 for the special case of looking for NUL). Since the bits
47
in the syndrome reflect exactly the order in which things occur
48
in the original string a count_trailing_zeros() operation will
49
identify exactly which byte is causing the termination, and why. */
50
51
/* Locals and temporaries. */
52
53
ENTRY (__strchr_aarch64)
54
/* Magic constant 0xc0300c03 to allow us to identify which lane
55
matches the requested byte. Even bits are set if the character
56
matches, odd bits if either the char is NUL or matches. */
57
mov wtmp2, 0x0c03
58
movk wtmp2, 0xc030, lsl 16
59
dup vrepchr.16b, chrin
60
bic src, srcin, #31 /* Work with aligned 32-byte hunks. */
61
dup vrepmask_c.4s, wtmp2
62
ands tmp1, srcin, #31
63
add vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */
64
b.eq L(loop)
65
66
/* Input string is not 32-byte aligned. Rather than forcing
67
the padding bytes to a safe value, we calculate the syndrome
68
for all the bytes, but then mask off those bits of the
69
syndrome that are related to the padding. */
70
ld1 {vdata1.16b, vdata2.16b}, [src], #32
71
neg tmp1, tmp1
72
cmeq vhas_nul1.16b, vdata1.16b, #0
73
cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
74
cmeq vhas_nul2.16b, vdata2.16b, #0
75
cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
76
bif vhas_nul1.16b, vhas_chr1.16b, vrepmask_0.16b
77
bif vhas_nul2.16b, vhas_chr2.16b, vrepmask_0.16b
78
and vend1.16b, vhas_nul1.16b, vrepmask_c.16b
79
and vend2.16b, vhas_nul2.16b, vrepmask_c.16b
80
lsl tmp1, tmp1, #1
81
addp vend1.16b, vend1.16b, vend2.16b // 256->128
82
mov tmp3, #~0
83
addp vend1.16b, vend1.16b, vend2.16b // 128->64
84
lsr tmp1, tmp3, tmp1
85
86
mov tmp3, vend1.d[0]
87
bic tmp1, tmp3, tmp1 // Mask padding bits.
88
cbnz tmp1, L(tail)
89
90
.p2align 4
91
L(loop):
92
ld1 {vdata1.16b, vdata2.16b}, [src], #32
93
cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
94
cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
95
cmhs vhas_nul1.16b, vhas_chr1.16b, vdata1.16b
96
cmhs vhas_nul2.16b, vhas_chr2.16b, vdata2.16b
97
orr vend1.16b, vhas_nul1.16b, vhas_nul2.16b
98
umaxp vend1.16b, vend1.16b, vend1.16b
99
mov tmp1, vend1.d[0]
100
cbz tmp1, L(loop)
101
102
/* Termination condition found. Now need to establish exactly why
103
we terminated. */
104
bif vhas_nul1.16b, vhas_chr1.16b, vrepmask_0.16b
105
bif vhas_nul2.16b, vhas_chr2.16b, vrepmask_0.16b
106
and vend1.16b, vhas_nul1.16b, vrepmask_c.16b
107
and vend2.16b, vhas_nul2.16b, vrepmask_c.16b
108
addp vend1.16b, vend1.16b, vend2.16b // 256->128
109
addp vend1.16b, vend1.16b, vend2.16b // 128->64
110
mov tmp1, vend1.d[0]
111
L(tail):
112
/* Count the trailing zeros, by bit reversing... */
113
rbit tmp1, tmp1
114
/* Re-bias source. */
115
sub src, src, #32
116
clz tmp1, tmp1 /* And counting the leading zeros. */
117
/* Tmp1 is even if the target charager was found first. Otherwise
118
we've found the end of string and we weren't looking for NUL. */
119
tst tmp1, #1
120
add result, src, tmp1, lsr #1
121
csel result, result, xzr, eq
122
ret
123
124
END (__strchr_aarch64)
125
126
127