Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/lib/libc/riscv/string/memchr.S
96309 views
1
/*-
2
* SPDX-License-Identifier: BSD-2-Clause
3
*
4
* Copyright (c) 2024 Strahinja Stanisic <[email protected]>
5
*/
6
7
#include <machine/asm.h>
8
9
/*
10
* a0 - const void *b
11
* a1 - int c
12
* a2 - size_t len
13
*/
14
ENTRY(memchr)
15
/*
16
* a0 - const char *ptr
17
* a1 - char cccccccc[8]
18
* a2 - char iter[8]
19
* a3 - uint8_t *end
20
* a4 - uint64_t *end_align
21
* a5 - uint64_t *end_unroll
22
*/
23
24
beqz a2, .Lno_match
25
26
/* c = (uint8_t) c */
27
andi a1, a1, 0xFF
28
29
/*
30
* t0 = 0x0101010101010101
31
* t1 = 0x8080808080808080
32
* t2 = b << 3
33
* cccccccc = (uint8_t)c * t0
34
* end = b + len;
35
* ptr = b & ~0b111
36
*/
37
add a3, a0, a2
38
li t0, 0x01010101
39
sltu t2, a0, a3
40
slli t1, t0, 32
41
neg t2, t2
42
or t0, t0, t1
43
and a3, a3, t2
44
slli t1, t0, 7
45
slli t2, a0, 3
46
and a0, a0, ~0b111
47
mul a1, t0, a1
48
49
ld a2, (a0)
50
51
/*
52
* mask_start = REP8_0x01 ^ (REP8_0x01 << t2)
53
* iter = iter ^ cccccccc
54
* iter = iter | mask_start
55
*/
56
sll t2, t0, t2
57
xor a2, a2, a1
58
xor t2, t2, t0
59
or a2, a2, t2
60
61
/* has_zero(iter)
62
* end_align = (end + 7) & ~0b111;
63
*/
64
addi a4, a3, 7
65
not t2, a2
66
sub a2, a2, t0
67
and t2, t2, t1
68
andi a4, a4, ~0b111
69
and a2, a2, t2
70
71
/* ptr = ptr + 8 */
72
addi a0, a0, 8
73
74
bnez a2, .Lfind_zero
75
76
/* if(ptr == end_align) */
77
beq a0, a4, .Lno_match
78
79
/* end_unroll = end_align & ~0b1111 */
80
andi a5, a4, ~0b1111
81
82
/*
83
* Instead of branching to check if `ptr` is 16-byte aligned:
84
* - Probe the next 8 bytes for `c`
85
* - Align `ptr` down to the nearest 16-byte boundary
86
*
87
* If `ptr` was already 16-byte aligned, those 8 bytes will be
88
* checked again inside the unrolled loop.
89
*
90
* This removes an unpredictable branch and improves performance.
91
*/
92
93
ld a2, (a0)
94
xor a2, a2, a1
95
96
not t2, a2
97
sub a2, a2, t0
98
and t2, t2, t1
99
and a2, a2, t2
100
101
addi a0, a0, 8
102
103
bnez a2, .Lfind_zero
104
105
andi a0, a0, ~0b1111
106
107
/* while(ptr != end_unroll) */
108
beq a0, a5, .Lskip_loop
109
.Lloop:
110
ld a2, (a0)
111
ld t3, 8(a0)
112
113
xor a2, a2, a1
114
xor t3, t3, a1
115
116
not t2, a2
117
not t4, t3
118
sub a2, a2, t0
119
sub t3, t3, t0
120
and t2, t2, t1
121
and t4, t4, t1
122
and a2, a2, t2
123
and t3, t3, t4
124
125
addi a0, a0, 8
126
127
bnez a2, .Lfind_zero
128
129
/* move into iter for find_zero */
130
mv a2, t3
131
132
addi a0, a0, 8
133
134
bnez a2, .Lfind_zero
135
136
bne a0, a5, .Lloop
137
.Lskip_loop:
138
139
/* there might be one 8byte left */
140
beq a0, a4, .Lno_match
141
142
ld a2, (a0)
143
xor a2, a2, a1
144
145
not t2, a2
146
sub a2, a2, t0
147
and t2, t2, t1
148
and a2, a2, t2
149
150
addi a0, a0, 8
151
152
beqz a2, .Lno_match
153
154
.Lfind_zero:
155
/*
156
* ptr = ptr - 8
157
* t1 = 0x0001020304050607
158
* iter = iter & (-iter)
159
* iter = iter >> 7
160
* iter = iter * t1
161
* iter = iter >> 56
162
*/
163
li t1, 0x10203000
164
neg t0, a2
165
slli t1, t1, 4
166
and a2, a2, t0
167
addi t1, t1, 0x405
168
srli a2, a2, 7
169
slli t1, t1, 16
170
addi a0, a0, -8
171
addi t1, t1, 0x607
172
mul a2, a2, t1
173
srli a2, a2, 56
174
175
/* left = end - ptr */
176
sub t0, a3, a0
177
178
/* return iter < left ? ptr + iter : NULL */
179
sltu t1, a2, t0
180
neg t1, t1
181
add a0, a0, a2
182
and a0, a0, t1
183
ret
184
185
.Lno_match:
186
li a0, 0
187
ret
188
END(memchr)
189
190