Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/lib/libc/amd64/string/memchr.S
39491 views
1
/*-
2
* Copyright (c) 2023 The FreeBSD Foundation
3
*
4
* This software was developed by Robert Clausecker <[email protected]>
5
* under sponsorship from the FreeBSD Foundation.
6
*
7
* Redistribution and use in source and binary forms, with or without
8
* modification, are permitted provided that the following conditions
9
* are met:
10
* 1. Redistributions of source code must retain the above copyright
11
* notice, this list of conditions and the following disclaimer.
12
* 2. Redistributions in binary form must reproduce the above copyright
13
* notice, this list of conditions and the following disclaimer in the
14
* documentation and/or other materials provided with the distribution.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND
17
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26
* SUCH DAMAGE
27
*/
28
29
#include <machine/asm.h>
30
31
#include "amd64_archlevel.h"
32
33
#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */
34
35
.weak memchr
36
.set memchr, __memchr
37
ARCHFUNCS(__memchr)
38
ARCHFUNC(__memchr, scalar)
39
ARCHFUNC(__memchr, baseline)
40
ENDARCHFUNCS(__memchr)
41
42
ARCHENTRY(__memchr, scalar)
43
test %rdx, %rdx # empty input?
44
je .Lnomatch
45
46
lea (, %rdi, 8), %ecx
47
mov $-1, %rax
48
add %rdi, %rdx # pointer to end of buffer or to end of
49
cmovc %rax, %rdx # address space (whichever comes first)
50
and $~7, %rdi # align to 8 bytes
51
mov (%rdi), %rax # load first word
52
movzbl %sil, %esi # clear stray high bits
53
movabs $0x0101010101010101, %r8
54
imul %r8, %rsi # replicate char 8 times
55
56
/* compute head and tail masks */
57
mov %r8, %r10
58
movabs $0x8080808080808080, %r9
59
shl %cl, %r10 # 0x01 where string head is
60
lea (, %rdx, 8), %ecx
61
xor %r8, %r10 # 0x01 where it is not
62
neg %r8 # negate 01..01 so we can use lea
63
mov %r9, %r11
64
xor %rsi, %rax # str ^ c (0x00 where str[i] == c)
65
neg %ecx
66
or %r10, %rax # except before the string
67
shr %cl, %r11 # 0x80 where string tail is
68
69
add $8, %rdi # advance to next 8 bytes
70
cmp %rdx, %rdi # end of buffer reached during head?
71
jae .Ltail # and go to tail-processing code
72
73
/* main loop, unrolled twice */
74
ALIGN_TEXT
75
0: lea (%rax, %r8, 1), %rcx # (str ^ c) - 0x01..01
76
not %rax # ~(str ^ c)
77
and %r9, %rax # ((str^c) - 0x01..01) & ~(str^c)
78
and %rcx, %rax # not including junk bytes
79
jnz .Lmatch
80
81
mov (%rdi), %rax
82
add $8, %rdi
83
xor %rsi, %rax # str ^ c
84
cmp %rdx, %rdi
85
jae .Ltail
86
87
lea (%rax, %r8, 1), %rcx # (str ^ c) - 0x01..01
88
not %rax # ~(str ^ c)
89
and %r9, %rax # ((str^c) - 0x01..01) & ~(str^c)
90
and %rcx, %rax # not including junk bytes
91
jnz .Lmatch
92
93
mov (%rdi), %rax
94
add $8, %rdi
95
xor %rsi, %rax # str ^ c
96
cmp %rdx, %rdi
97
jb 0b
98
99
.Ltail: lea (%rax, %r8, 1), %rcx # (str ^ c) - 0x01..01
100
not %rax # ~(str ^ c)
101
and %r11, %rax # ((str^c) - 0x01..01) & ~(str^c)
102
and %rcx, %rax # not including junk bytes or bytes past buffer
103
jz .Lnomatch
104
105
.Lmatch:
106
tzcnt %rax, %rax # first match
107
shr $3, %eax # scale from bit to byte index
108
lea -8(%rdi, %rax), %rax # pointer to found c
109
ret
110
111
/* no match found */
112
.Lnomatch:
113
xor %eax, %eax # return null pointer
114
ret
115
ARCHEND(__memchr, scalar)
116
117
ARCHENTRY(__memchr, baseline)
118
test %rdx, %rdx # empty input?
119
je .Lnomatchb
120
121
movd %esi, %xmm2
122
mov %edi, %ecx
123
mov $-1, %r9
124
add %rdi, %rdx # pointer to end of buffer or to end of
125
cmovc %r9, %rdx # address space (whichever comes first)
126
and $~0x1f, %rdi # align to 32 bytes
127
movdqa (%rdi), %xmm0 # load first 32 bytes
128
movdqa 16(%rdi), %xmm1
129
130
punpcklbw %xmm2, %xmm2 # c -> cc
131
132
shl %cl, %r9d # mask with zeroes before the string
133
134
punpcklwd %xmm2, %xmm2 # cc -> cccc
135
136
mov $-1, %r8d
137
xor %ecx, %ecx
138
sub %edx, %ecx # edx = -ecx
139
shr %cl, %r8d # bytes in tail that are part of the buffer
140
141
pshufd $0, %xmm2, %xmm2 # cccc -> cccccccccccccccc
142
143
add $32, %rdi # advance to next 32 bytes
144
mov $-1, %eax
145
cmp %rdx, %rdi # end of buffer reached during head?
146
cmovae %r8d, %eax # if yes, do combined head/tail processing
147
and %r9d, %eax # mask of bytes in head part of string
148
149
/* process head */
150
pcmpeqb %xmm2, %xmm1
151
pcmpeqb %xmm2, %xmm0
152
pmovmskb %xmm1, %esi
153
pmovmskb %xmm0, %ecx
154
shl $16, %esi
155
or %esi, %ecx # locations of matches
156
and %ecx, %eax # any match inside buffer?
157
jnz .Lprecisematchb
158
159
cmp %rdx, %rdi # did the buffer end here?
160
jae .Lnomatchb # if yes we are done
161
162
/* main loop */
163
ALIGN_TEXT
164
0: movdqa (%rdi), %xmm0 # load next string chunk
165
movdqa 16(%rdi), %xmm1
166
add $32, %rdi
167
cmp %rdx, %rdi # ready for main loop?
168
jae .Ltailb
169
170
pcmpeqb %xmm2, %xmm0
171
pcmpeqb %xmm2, %xmm1
172
por %xmm1, %xmm0 # match in either half?
173
pmovmskb %xmm0, %eax
174
test %eax, %eax
175
jz 0b
176
177
.Lmatchb:
178
pcmpeqb -32(%rdi), %xmm2 # redo comparison of first 16 bytes
179
pmovmskb %xmm1, %ecx
180
pmovmskb %xmm2, %eax
181
shl $16, %ecx
182
or %ecx, %eax # location of matches
183
184
.Lprecisematchb:
185
tzcnt %eax, %eax # find location of match
186
lea -32(%rdi, %rax, 1), %rax # point to matching byte
187
ret
188
189
.Ltailb:
190
pcmpeqb %xmm2, %xmm1
191
pcmpeqb %xmm2, %xmm0
192
pmovmskb %xmm1, %edx
193
pmovmskb %xmm0, %eax
194
shl $16, %edx
195
or %edx, %eax # location of matches
196
and %r8d, %eax # mask out matches beyond buffer
197
bsf %eax, %edx # location of match
198
lea -32(%rdi, %rdx, 1), %rdx # pointer to match (if any)
199
cmovnz %rdx, %rax # point to match if present,
200
ret # else null pointer
201
202
.Lnomatchb:
203
xor %eax, %eax # return null pointer
204
ret
205
ARCHEND(__memchr, baseline)
206
207
.section .note.GNU-stack,"",%progbits
208
209