Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/lib/libc/amd64/string/strcspn.S
39492 views
1
/*
2
* Copyright (c) 2023 The FreeBSD Foundation
3
*
4
* This software was developed by Robert Clausecker <[email protected]>
5
* under sponsorship from the FreeBSD Foundation.
6
*
7
* Redistribution and use in source and binary forms, with or without
8
* modification, are permitted provided that the following conditions
9
* are met:
10
* 1. Redistributions of source code must retain the above copyright
11
* notice, this list of conditions and the following disclaimer.
12
* 2. Redistributions in binary form must reproduce the above copyright
13
* notice, this list of conditions and the following disclaimer in the
14
* documentation and/or other materials provided with the distribution.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND
17
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26
* SUCH DAMAGE
27
*/
28
29
#include <machine/asm.h>
30
#include <machine/param.h>
31
32
#include "amd64_archlevel.h"
33
34
#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */
35
36
.weak strcspn
37
.set strcspn, __strcspn
38
ARCHFUNCS(__strcspn)
39
ARCHFUNC(__strcspn, scalar)
40
NOARCHFUNC
41
ARCHFUNC(__strcspn, x86_64_v2)
42
ENDARCHFUNCS(__strcspn)
43
44
ARCHENTRY(__strcspn, scalar)
45
push %rbp # align stack to enable function call
46
mov %rsp, %rbp
47
sub $256, %rsp # allocate space for lookup table
48
49
/* check for special cases */
50
movzbl (%rsi), %eax # first character in the set
51
test %eax, %eax
52
jz .Lstrlen
53
54
movzbl 1(%rsi), %edx # second character in the set
55
test %edx, %edx
56
jz .Lstrchr
57
58
/* no special case matches -- prepare lookup table */
59
xor %r8d, %r8d
60
mov $28, %ecx
61
0: mov %r8, (%rsp, %rcx, 8)
62
mov %r8, 8(%rsp, %rcx, 8)
63
mov %r8, 16(%rsp, %rcx, 8)
64
mov %r8, 24(%rsp, %rcx, 8)
65
sub $4, %ecx
66
jnc 0b
67
68
add $2, %rsi
69
movb $1, (%rsp, %rax, 1) # register first chars in set
70
movb $1, (%rsp, %rdx, 1)
71
mov %rdi, %rax # a copy of the source to iterate over
72
73
/* process remaining chars in set */
74
ALIGN_TEXT
75
0: movzbl (%rsi), %ecx
76
movb $1, (%rsp, %rcx, 1)
77
test %ecx, %ecx
78
jz 1f
79
80
movzbl 1(%rsi), %ecx
81
movb $1, (%rsp, %rcx, 1)
82
test %ecx, %ecx
83
jz 1f
84
85
add $2, %rsi
86
jmp 0b
87
88
/* find match */
89
ALIGN_TEXT
90
1: movzbl (%rax), %ecx
91
cmpb $0, (%rsp, %rcx, 1)
92
jne 2f
93
94
movzbl 1(%rax), %ecx
95
cmpb $0, (%rsp, %rcx, 1)
96
jne 3f
97
98
movzbl 2(%rax), %ecx
99
cmpb $0, (%rsp, %rcx, 1)
100
jne 4f
101
102
movzbl 3(%rax), %ecx
103
add $4, %rax
104
cmpb $0, (%rsp, %rcx, 1)
105
je 1b
106
107
sub $3, %rax
108
4: dec %rdi
109
3: inc %rax
110
2: sub %rdi, %rax # number of characters preceding match
111
leave
112
ret
113
114
/* set is empty, degrades to strlen */
115
.Lstrlen:
116
leave
117
jmp CNAME(strlen)
118
119
/* just one character in set, degrades to strchr */
120
.Lstrchr:
121
mov %rdi, (%rsp) # stash a copy of the string
122
mov %eax, %esi # find the character in the set
123
call CNAME(strchrnul)
124
sub (%rsp), %rax # length of prefix before match
125
leave
126
ret
127
ARCHEND(__strcspn, scalar)
128
129
/*
130
* This kernel uses pcmpistri to do the heavy lifting.
131
* We provide five code paths, depending on set size:
132
*
133
* 0: call strlen()
134
* 1: call strchr()
135
* 2--16: one pcmpistri per 16 bytes of input
136
* 17--32: two pcmpistri per 16 bytes of input
137
* >=33: fall back to look up table
138
*/
139
ARCHENTRY(__strcspn, x86_64_v2)
140
push %rbp
141
mov %rsp, %rbp
142
sub $256, %rsp
143
144
/* check for special cases */
145
movzbl (%rsi), %eax
146
test %eax, %eax # empty string?
147
jz .Lstrlenv2
148
149
cmpb $0, 1(%rsi) # single character string?
150
jz .Lstrchrv2
151
152
/* find set size and copy up to 32 bytes to (%rsp) */
153
mov %esi, %ecx
154
and $~0xf, %rsi # align set pointer
155
movdqa (%rsi), %xmm0
156
pxor %xmm1, %xmm1
157
and $0xf, %ecx # amount of bytes rsi is past alignment
158
xor %edx, %edx
159
pcmpeqb %xmm0, %xmm1 # end of string reached?
160
movdqa %xmm0, 32(%rsp) # transfer head of set to stack
161
pmovmskb %xmm1, %eax
162
shr %cl, %eax # clear out junk before string
163
test %eax, %eax # end of set reached?
164
jnz 0f
165
166
movdqa 16(%rsi), %xmm0 # second chunk of the set
167
mov $16, %edx
168
sub %ecx, %edx # length of set preceding xmm0
169
pxor %xmm1, %xmm1
170
pcmpeqb %xmm0, %xmm1
171
movdqa %xmm0, 48(%rsp)
172
movdqu 32(%rsp, %rcx, 1), %xmm2 # head of set
173
pmovmskb %xmm1, %eax
174
test %eax, %eax
175
jnz 1f
176
177
movdqa 32(%rsi), %xmm0 # third chunk
178
add $16, %edx
179
pxor %xmm1, %xmm1
180
pcmpeqb %xmm0, %xmm1
181
movdqa %xmm0, 64(%rsp)
182
pmovmskb %xmm1, %eax
183
test %eax, %eax # still not done?
184
jz .Lgt32v2
185
186
0: movdqu 32(%rsp, %rcx, 1), %xmm2 # head of set
187
1: tzcnt %eax, %eax
188
add %eax, %edx # length of set (excluding NUL byte)
189
cmp $32, %edx # above 32 bytes?
190
ja .Lgt32v2
191
192
/*
193
* At this point we know that we want to use pcmpistri.
194
* one last problem obtains: the head of the string is not
195
* aligned and may cross a cacheline. If this is the case,
196
* we take the part before the page boundary and repeat the
197
* last byte to fill up the xmm register.
198
*/
199
mov %rdi, %rax # save original string pointer
200
lea 15(%rdi), %esi # last byte of the head
201
xor %edi, %esi
202
test $PAGE_SIZE, %esi # does the head cross a page?
203
jz 0f
204
205
/* head crosses page: copy to stack to fix up */
206
and $~0xf, %rax # align head pointer temporarily
207
movzbl 15(%rax), %esi # last head byte on the page
208
movdqa (%rax), %xmm0
209
movabs $0x0101010101010101, %r8
210
imul %r8, %rsi # repeated 8 times
211
movdqa %xmm0, (%rsp) # head word on stack
212
mov %rsi, 16(%rsp) # followed by filler (last byte x8)
213
mov %rsi, 24(%rsp)
214
mov %edi, %eax
215
and $0xf, %eax # offset of head from alignment
216
add %rsp, %rax # pointer to fake head
217
218
0: movdqu (%rax), %xmm0 # load head (fake or real)
219
lea 16(%rdi), %rax
220
and $~0xf, %rax # second 16 bytes of string (aligned)
221
1: cmp $16, %edx # 16--32 bytes?
222
ja .Lgt16v2
223
224
225
/* set is 2--16 bytes in size */
226
227
/* _SIDD_UBYTE_OPS|_SIDD_CMP_EQUAL_ANY|_SIDD_LEAST_SIGNIFICANT */
228
pcmpistri $0, %xmm0, %xmm2 # match in head?
229
jbe .Lheadmatchv2
230
231
ALIGN_TEXT
232
0: pcmpistri $0, (%rax), %xmm2
233
jbe 1f # match or end of string?
234
pcmpistri $0, 16(%rax), %xmm2
235
lea 32(%rax), %rax
236
ja 0b # match or end of string?
237
238
3: lea -16(%rax), %rax # go back to second half
239
1: jc 2f # jump if match found
240
movdqa (%rax), %xmm0 # reload string piece
241
pxor %xmm1, %xmm1
242
pcmpeqb %xmm1, %xmm0 # where is the NUL byte?
243
pmovmskb %xmm0, %ecx
244
tzcnt %ecx, %ecx # location of NUL byte in (%rax)
245
2: sub %rdi, %rax # offset of %xmm0 from beginning of string
246
add %rcx, %rax # prefix length before match/NUL
247
leave
248
ret
249
250
.Lheadmatchv2:
251
jc 2f # jump if match found
252
pxor %xmm1, %xmm1
253
pcmpeqb %xmm1, %xmm0
254
pmovmskb %xmm0, %ecx
255
tzcnt %ecx, %ecx # location of NUL byte
256
2: mov %ecx, %eax # prefix length before match/NUL
257
leave
258
ret
259
260
/* match in first set half during head */
261
.Lheadmatchv2first:
262
mov %ecx, %eax
263
pcmpistri $0, %xmm0, %xmm3 # match in second set half?
264
cmp %ecx, %eax # before the first half match?
265
cmova %ecx, %eax # use the earlier match
266
leave
267
ret
268
269
.Lgt16v2:
270
movdqu 48(%rsp, %rcx, 1), %xmm3 # second part of set
271
272
/* set is 17--32 bytes in size */
273
pcmpistri $0, %xmm0, %xmm2 # match in first set half?
274
jb .Lheadmatchv2first
275
pcmpistri $0, %xmm0, %xmm3 # match in second set half or end of string?
276
jbe .Lheadmatchv2
277
278
ALIGN_TEXT
279
0: movdqa (%rax), %xmm0
280
pcmpistri $0, %xmm0, %xmm2
281
jb 4f # match in first set half?
282
pcmpistri $0, %xmm0, %xmm3
283
jbe 1f # match in second set half or end of string?
284
movdqa 16(%rax), %xmm0
285
add $32, %rax
286
pcmpistri $0, %xmm0, %xmm2
287
jb 3f # match in first set half?
288
pcmpistri $0, %xmm0, %xmm3
289
ja 0b # neither match in 2nd half nor string end?
290
291
/* match in second half or NUL */
292
lea -16(%rax), %rax # go back to second half
293
1: jc 2f # jump if match found
294
pxor %xmm1, %xmm1
295
pcmpeqb %xmm1, %xmm0 # where is the NUL byte?
296
pmovmskb %xmm0, %ecx
297
tzcnt %ecx, %ecx # location of NUL byte in (%rax)
298
2: sub %rdi, %rax # offset of %xmm0 from beginning of string
299
add %rcx, %rax # prefix length before match/NUL
300
leave
301
ret
302
303
/* match in first half */
304
3: sub $16, %rax # go back to second half
305
4: sub %rdi, %rax # offset of %xmm0 from beginning of string
306
mov %ecx, %edx
307
pcmpistri $0, %xmm0, %xmm3 # match in second set half?
308
cmp %ecx, %edx # before the first half match?
309
cmova %ecx, %edx # use the earlier match
310
add %rdx, %rax # return full ofset
311
leave
312
ret
313
314
/* set is empty, degrades to strlen */
315
.Lstrlenv2:
316
leave
317
jmp CNAME(strlen)
318
319
/* just one character in set, degrades to strchr */
320
.Lstrchrv2:
321
mov %rdi, (%rsp) # stash a copy of the string
322
mov %eax, %esi # find this character
323
call CNAME(strchrnul)
324
sub (%rsp), %rax # length of prefix before match
325
leave
326
ret
327
328
/* set is >=33 bytes in size */
329
.Lgt32v2:
330
xorps %xmm0, %xmm0
331
mov $256-64, %edx
332
333
/* clear out look up table */
334
0: movaps %xmm0, (%rsp, %rdx, 1)
335
movaps %xmm0, 16(%rsp, %rdx, 1)
336
movaps %xmm0, 32(%rsp, %rdx, 1)
337
movaps %xmm0, 48(%rsp, %rdx, 1)
338
sub $64, %edx
339
jnc 0b
340
341
add %rcx, %rsi # restore string pointer
342
mov %rdi, %rax # keep a copy of the string
343
344
/* initialise look up table */
345
ALIGN_TEXT
346
0: movzbl (%rsi), %ecx
347
movb $1, (%rsp, %rcx, 1)
348
test %ecx, %ecx
349
jz 1f
350
351
movzbl 1(%rsi), %ecx
352
movb $1, (%rsp, %rcx, 1)
353
test %ecx, %ecx
354
jz 1f
355
356
movzbl 2(%rsi), %ecx
357
movb $1, (%rsp, %rcx, 1)
358
test %ecx, %ecx
359
jz 1f
360
361
movzbl 3(%rsi), %ecx
362
movb $1, (%rsp, %rcx, 1)
363
test %ecx, %ecx
364
jz 1f
365
366
add $4, %rsi
367
jmp 0b
368
369
/* find match */
370
ALIGN_TEXT
371
1: movzbl (%rax), %ecx
372
cmpb $0, (%rsp, %rcx, 1)
373
jne 2f
374
375
movzbl 1(%rax), %ecx
376
cmpb $0, (%rsp, %rcx, 1)
377
jne 3f
378
379
movzbl 2(%rax), %ecx
380
cmpb $0, (%rsp, %rcx, 1)
381
jne 4f
382
383
movzbl 3(%rax), %ecx
384
add $4, %rax
385
cmpb $0, (%rsp, %rcx, 1)
386
je 1b
387
388
sub $3, %rax
389
4: dec %rdi
390
3: inc %rax
391
2: sub %rdi, %rax # number of characters preceding match
392
leave
393
ret
394
ARCHEND(__strcspn, x86_64_v2)
395
396
.section .note.GNU-stack,"",%progbits
397
398