Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/lib/libc/amd64/string/strncmp.S
39486 views
1
/*-
2
* Copyright (c) 2023 The FreeBSD Foundation
3
*
4
* This software was developed by Robert Clausecker <[email protected]>
5
* under sponsorship from the FreeBSD Foundation.
6
*
7
* Redistribution and use in source and binary forms, with or without
8
* modification, are permitted provided that the following conditions
9
* are met:
10
* 1. Redistributions of source code must retain the above copyright
11
* notice, this list of conditions and the following disclaimer.
12
* 2. Redistributions in binary form must reproduce the above copyright
13
* notice, this list of conditions and the following disclaimer in the
14
* documentation and/or other materials provided with the distribution.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND
17
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26
* SUCH DAMAGE
27
*/
28
29
#include <machine/asm.h>
30
#include <machine/param.h>
31
32
#include "amd64_archlevel.h"
33
34
#define ALIGN_TEXT .p2align 4, 0x90
35
36
ARCHFUNCS(strncmp)
37
ARCHFUNC(strncmp, scalar)
38
ARCHFUNC(strncmp, baseline)
39
ENDARCHFUNCS(strncmp)
40
41
/*
42
* This is just the scalar loop unrolled a bunch of times.
43
*/
44
ARCHENTRY(strncmp, scalar)
45
xor %eax, %eax
46
sub $4, %rdx # 4 chars left to compare?
47
jbe 1f
48
49
ALIGN_TEXT
50
0: movzbl (%rdi), %ecx
51
test %ecx, %ecx # NUL char in first string?
52
jz .L0
53
cmpb (%rsi), %cl # mismatch between strings?
54
jnz .L0
55
56
movzbl 1(%rdi), %ecx
57
test %ecx, %ecx
58
jz .L1
59
cmpb 1(%rsi), %cl
60
jnz .L1
61
62
movzbl 2(%rdi), %ecx
63
test %ecx, %ecx
64
jz .L2
65
cmpb 2(%rsi), %cl
66
jnz .L2
67
68
movzbl 3(%rdi), %ecx
69
test %ecx, %ecx
70
jz .L3
71
cmpb 3(%rsi), %cl
72
jnz .L3
73
74
add $4, %rdi # advance to next iteration
75
add $4, %rsi
76
sub $4, %rdx
77
ja 0b
78
79
/* end of string within the next 4 characters */
80
1: cmp $-4, %edx # end of string reached immediately?
81
jz .Leq
82
movzbl (%rdi), %ecx
83
test %ecx, %ecx
84
jz .L0
85
cmpb (%rsi), %cl
86
jnz .L0
87
88
cmp $-3, %edx # end of string reached after 1 char?
89
jz .Leq
90
movzbl 1(%rdi), %ecx
91
test %ecx, %ecx
92
jz .L1
93
cmpb 1(%rsi), %cl
94
jnz .L1
95
96
cmp $-2, %edx
97
jz .Leq
98
movzbl 2(%rdi), %ecx
99
test %ecx, %ecx
100
jz .L2
101
cmpb 2(%rsi), %cl
102
jnz .L2
103
104
cmp $-1, %edx # either end of string after 3 chars,
105
jz .Leq # or it boils down to the last char
106
107
.L3: inc %eax
108
.L2: inc %eax
109
.L1: inc %eax
110
.L0: movzbl (%rsi, %rax, 1), %ecx
111
movzbl (%rdi, %rax, 1), %eax
112
sub %ecx, %eax
113
.Leq: ret
114
ARCHEND(strncmp, scalar)
115
116
ARCHENTRY(strncmp, baseline)
117
push %rbx
118
sub $1, %rdx # RDX--, so RDX points to the last byte to compare
119
jb .Lempty # where there any bytes to compare at all?
120
121
lea 15(%rdi), %r8d # end of head
122
lea 15(%rsi), %r9d
123
mov %edi, %eax
124
mov %esi, %ebx
125
xor %edi, %r8d # bits that changed between first and last byte
126
xor %esi, %r9d
127
and $~0xf, %rdi # align heads to 16 bytes
128
and $~0xf, %rsi
129
or %r8d, %r9d
130
and $0xf, %eax # offset from alignment
131
and $0xf, %ebx
132
movdqa (%rdi), %xmm0 # load aligned heads
133
movdqa (%rsi), %xmm2
134
pxor %xmm1, %xmm1
135
cmp $16, %rdx # end of buffer within the first 32 bytes?
136
jb .Llt16
137
138
test $PAGE_SIZE, %r9d # did the page change?
139
jz 0f # if not, take fast path
140
141
142
/* heads may cross page boundary, avoid unmapped loads */
143
movdqa %xmm0, -32(%rsp) # stash copies of the heads on the stack
144
movdqa %xmm2, -16(%rsp)
145
mov $-1, %r8d
146
mov $-1, %r9d
147
mov %eax, %ecx
148
shl %cl, %r8d # string head in XMM0
149
mov %ebx, %ecx
150
shl %cl, %r9d # string head in XMM2
151
pcmpeqb %xmm1, %xmm0
152
pcmpeqb %xmm1, %xmm2
153
pmovmskb %xmm0, %r10d
154
pmovmskb %xmm2, %r11d
155
test %r8d, %r10d # NUL byte present in first string?
156
lea -32(%rsp), %r8
157
cmovz %rdi, %r8
158
test %r9d, %r11d # NUL byte present in second string?
159
lea -16(%rsp), %r9
160
cmovz %rsi, %r9
161
movdqu (%r8, %rax, 1), %xmm0 # load true (or fake) heads
162
movdqu (%r9, %rbx, 1), %xmm4
163
jmp 1f
164
165
/* rdx == 0 */
166
.Lempty:
167
xor %eax, %eax # zero-length buffers compare equal
168
pop %rbx
169
ret
170
171
0: movdqu (%rdi, %rax, 1), %xmm0 # load true heads
172
movdqu (%rsi, %rbx, 1), %xmm4
173
1: pxor %xmm2, %xmm2
174
pcmpeqb %xmm0, %xmm2 # NUL byte present?
175
pcmpeqb %xmm0, %xmm4 # which bytes match?
176
pandn %xmm4, %xmm2 # match and not NUL byte?
177
pmovmskb %xmm2, %r9d
178
xor $0xffff, %r9d # mismatch or NUL byte?
179
jnz .Lhead_mismatch
180
181
/* load head and second chunk */
182
movdqa 16(%rdi), %xmm2 # load second chunks
183
movdqa 16(%rsi), %xmm3
184
lea -16(%rdx, %rbx, 1), %rdx # account for length of RSI chunk
185
sub %rbx, %rax # is a&0xf >= b&0xf?
186
jb .Lswapped # if not, proceed with swapped operands
187
jmp .Lnormal
188
189
/* buffer ends within the first 16 bytes */
190
.Llt16: test $PAGE_SIZE, %r9d # did the page change?
191
jz 0f # if not, take fast path
192
193
/* heads may cross page boundary */
194
movdqa %xmm0, -32(%rsp) # stash copies of the heads on the stack
195
movdqa %xmm2, -16(%rsp)
196
mov $-1, %r8d
197
mov $-1, %r9d
198
mov %eax, %ecx
199
shl %cl, %r8d # string head in XMM0
200
mov %ebx, %ecx
201
shl %cl, %r9d # string head in XMM2
202
pcmpeqb %xmm1, %xmm0
203
pcmpeqb %xmm1, %xmm2
204
pmovmskb %xmm0, %r10d
205
pmovmskb %xmm2, %r11d
206
lea (%rdx, %rax, 1), %ecx # location of last buffer byte in xmm0
207
bts %ecx, %r10d # treat as if NUL byte present
208
lea (%rdx, %rbx, 1), %ecx
209
bts %ecx, %r11d
210
test %r8w, %r10w # NUL byte present in first string head?
211
lea -32(%rsp), %r8
212
cmovz %rdi, %r8
213
test %r9w, %r11w # NUL byte present in second string head?
214
lea -16(%rsp), %r9
215
cmovz %rsi, %r9
216
movdqu (%r8, %rax, 1), %xmm0 # load true (or fake) heads
217
movdqu (%r9, %rbx, 1), %xmm4
218
jmp 1f
219
220
0: movdqu (%rdi, %rax, 1), %xmm0 # load true heads
221
movdqu (%rsi, %rbx, 1), %xmm4
222
1: pxor %xmm2, %xmm2
223
pcmpeqb %xmm0, %xmm2 # NUL byte present?
224
pcmpeqb %xmm0, %xmm4 # which bytes match?
225
pandn %xmm4, %xmm2 # match and not NUL byte?
226
pmovmskb %xmm2, %r9d
227
btr %edx, %r9d # induce mismatch in last byte of buffer
228
not %r9d # mismatch or NUL byte?
229
230
/* mismatch in true heads */
231
ALIGN_TEXT
232
.Lhead_mismatch:
233
tzcnt %r9d, %r9d # where is the mismatch?
234
add %rax, %rdi # return to true heads
235
add %rbx, %rsi
236
movzbl (%rdi, %r9, 1), %eax # mismatching characters
237
movzbl (%rsi, %r9, 1), %ecx
238
sub %ecx, %eax
239
pop %rbx
240
ret
241
242
/* rax >= 0 */
243
ALIGN_TEXT
244
.Lnormal:
245
neg %rax
246
movdqu 16(%rsi, %rax, 1), %xmm0
247
sub %rdi, %rsi # express RSI as distance from RDI
248
lea (%rsi, %rax, 1), %rbx # point RBX to offset in second string
249
neg %rax # ... corresponding to RDI
250
pcmpeqb %xmm3, %xmm1 # NUL present?
251
pcmpeqb %xmm2, %xmm0 # Mismatch between chunks?
252
pmovmskb %xmm1, %r8d
253
pmovmskb %xmm0, %r9d
254
mov $16, %ecx
255
cmp %rcx, %rdx # does the buffer end within (RDI,RSI,1)?
256
cmovb %edx, %ecx # ECX = min(16, RDX)
257
add $32, %rdi # advance to next iteration
258
bts %ecx, %r8d # mark end-of-buffer as if there was a NUL byte
259
test %r8w, %r8w # NUL or end of buffer found?
260
jnz .Lnul_found2
261
xor $0xffff, %r9d
262
jnz .Lmismatch2
263
sub $48, %rdx # end of buffer within first main loop iteration?
264
jb .Ltail # if yes, process tail
265
266
/*
267
* During the main loop, the layout of the two strings is something like:
268
*
269
* v ------1------ v ------2------ v
270
* RDI: AAAAAAAAAAAAABBBBBBBBBBBBBBBB...
271
* RSI: AAAAAAAAAAAAABBBBBBBBBBBBBBBBCCC...
272
*
273
* where v indicates the alignment boundaries and corresponding chunks
274
* of the strings have the same letters. Chunk A has been checked in
275
* the previous iteration. This iteration, we first check that string
276
* RSI doesn't end within region 2, then we compare chunk B between the
277
* two strings. As RSI is known not to hold a NUL byte in regsions 1
278
* and 2 at this point, this also ensures that RDI has not ended yet.
279
*/
280
ALIGN_TEXT
281
0: movdqu (%rdi, %rbx, 1), %xmm0 # chunk of 2nd string corresponding to RDI
282
pxor %xmm1, %xmm1
283
pcmpeqb (%rdi, %rsi, 1), %xmm1 # end of string in RSI?
284
pcmpeqb (%rdi), %xmm0 # where do the chunks match?
285
pmovmskb %xmm1, %r8d
286
pmovmskb %xmm0, %r9d
287
test %r8d, %r8d
288
jnz .Lnul_found
289
xor $0xffff, %r9d # any mismatches?
290
jnz .Lmismatch
291
292
/* main loop unrolled twice */
293
movdqu 16(%rdi, %rbx, 1), %xmm0
294
pxor %xmm1, %xmm1
295
pcmpeqb 16(%rdi, %rsi, 1), %xmm1
296
pcmpeqb 16(%rdi), %xmm0
297
pmovmskb %xmm1, %r8d
298
pmovmskb %xmm0, %r9d
299
add $32, %rdi
300
test %r8d, %r8d
301
jnz .Lnul_found2
302
xor $0xffff, %r9d
303
jnz .Lmismatch2
304
sub $32, %rdx # end of buffer within next iteration?
305
jae 0b
306
307
/* end of buffer will occur in next 32 bytes */
308
.Ltail: movdqu (%rdi, %rbx, 1), %xmm0 # chunk of 2nd string corresponding to RDI
309
pxor %xmm1, %xmm1
310
pcmpeqb (%rdi, %rsi, 1), %xmm1 # end of string in RSI?
311
pcmpeqb (%rdi), %xmm0 # where do the chunks match?
312
pmovmskb %xmm1, %r8d
313
pmovmskb %xmm0, %r9d
314
bts %edx, %r8d # indicate NUL byte at last byte in buffer
315
test %r8w, %r8w # NUL byte in first chunk?
316
jnz .Lnul_found
317
xor $0xffff, %r9d # any mismatches?
318
jnz .Lmismatch
319
320
/* main loop unrolled twice */
321
movdqu 16(%rdi, %rbx, 1), %xmm0
322
pxor %xmm1, %xmm1
323
pcmpeqb 16(%rdi, %rsi, 1), %xmm1
324
pcmpeqb 16(%rdi), %xmm0
325
pmovmskb %xmm1, %r8d
326
pmovmskb %xmm0, %r9d
327
sub $16, %edx # take first half into account
328
bts %edx, %r8d # indicate NUL byte at last byte in buffer
329
add $32, %rdi
330
331
.Lnul_found2:
332
sub $16, %rdi
333
334
.Lnul_found:
335
mov %eax, %ecx
336
mov %r8d, %r10d
337
shl %cl, %r8d # adjust NUL mask to positions in RDI/RBX
338
not %r9d # mask of mismatches
339
or %r8w, %r9w # NUL bytes als count as mismatches
340
jnz .Lmismatch
341
342
/*
343
* (RDI) == (RSI) and NUL is past the string.
344
* compare (RSI) with the corresponding part
345
* of the other string until the NUL byte.
346
*/
347
movdqu (%rdi, %rax, 1), %xmm0
348
pcmpeqb (%rdi, %rsi, 1), %xmm0
349
add %rdi, %rsi # restore RSI pointer
350
add %rax, %rdi # point RDI to chunk corresponding to (RSI)
351
pmovmskb %xmm0, %ecx # mask of matches
352
not %ecx # mask of mismatches
353
or %r10d, %ecx # mask of mismatches or NUL bytes
354
tzcnt %ecx, %ecx # location of first mismatch
355
movzbl (%rdi, %rcx, 1), %eax
356
movzbl (%rsi, %rcx, 1), %ecx
357
sub %ecx, %eax
358
pop %rbx
359
ret
360
361
.Lmismatch2:
362
sub $16, %rdi
363
364
/* a mismatch has been found between RBX and RSI */
365
.Lmismatch:
366
tzcnt %r9d, %r9d # where is the mismatch?
367
add %rdi, %rbx # turn RBX from offset into pointer
368
movzbl (%rbx, %r9, 1), %ecx
369
movzbl (%rdi, %r9, 1), %eax
370
sub %ecx, %eax
371
pop %rbx
372
ret
373
374
/* rax < 0 */
375
ALIGN_TEXT
376
.Lswapped:
377
movdqu 16(%rdi, %rax, 1), %xmm0
378
sub %rsi, %rdi # express RDI as distance from RDI
379
lea (%rdi, %rax, 1), %rbx # point RBX to offset in first string
380
pcmpeqb %xmm2, %xmm1 # NUL present?
381
pcmpeqb %xmm3, %xmm0 # mismatch between chunks?
382
pmovmskb %xmm1, %r8d
383
pmovmskb %xmm0, %r9d
384
add %rax, %rdx # RDX points to buffer end in RSI
385
neg %rax # ... corresponding to RSI
386
mov $16, %ecx
387
cmp %rcx, %rdx # does the buffer end within (RSI,RDI,1)?
388
cmovb %edx, %ecx # ECX = min(16, RDX)
389
add $32, %rsi
390
bts %ecx, %r8d # mark end-of-buffer as if there was a NUL byte
391
test %r8w, %r8w # NUL or end of buffer found?
392
jnz .Lnul_found2s
393
xor $0xffff, %r9d
394
jnz .Lmismatch2s
395
sub $48, %rdx # end of buffer within first main loop iteration?
396
jb .Ltails # if yes, process tail
397
398
ALIGN_TEXT
399
0: movdqu (%rsi, %rbx, 1), %xmm0 # chunk of 1st string corresponding to RSI
400
pxor %xmm1, %xmm1
401
pcmpeqb (%rsi, %rdi, 1), %xmm1 # end of string in RDI?
402
pcmpeqb (%rsi), %xmm0 # where do the chunks match?
403
pmovmskb %xmm1, %r8d
404
pmovmskb %xmm0, %r9d
405
test %r8d, %r8d
406
jnz .Lnul_founds
407
xor $0xffff, %r9d # any mismatches?
408
jnz .Lmismatchs
409
410
/* main loop unrolled twice */
411
movdqu 16(%rsi, %rbx, 1), %xmm0
412
pxor %xmm1, %xmm1
413
pcmpeqb 16(%rsi, %rdi, 1), %xmm1
414
pcmpeqb 16(%rsi), %xmm0
415
pmovmskb %xmm1, %r8d
416
pmovmskb %xmm0, %r9d
417
add $32, %rsi
418
test %r8d, %r8d
419
jnz .Lnul_found2s
420
xor $0xffff, %r9d
421
jnz .Lmismatch2s
422
sub $32, %rdx # end of buffer within next iteration?
423
jae 0b
424
425
/* end of buffer will occur in next 32 bytes */
426
.Ltails:
427
movdqu (%rsi, %rbx, 1), %xmm0 # chunk of 1st string corresponding to RSI
428
pxor %xmm1, %xmm1
429
pcmpeqb (%rsi, %rdi, 1), %xmm1 # end of string in RDI?
430
pcmpeqb (%rsi), %xmm0 # where do the chunks match?
431
pmovmskb %xmm1, %r8d
432
pmovmskb %xmm0, %r9d
433
bts %edx, %r8d # indicate NUL byte at laste byte in buffer
434
test %r8w, %r8w # NUL byte in first chunk?
435
jnz .Lnul_founds
436
xor $0xffff, %r9d # any mismatches?
437
jnz .Lmismatchs
438
439
/* main loop unrolled twice */
440
movdqu 16(%rsi, %rbx, 1), %xmm0
441
pxor %xmm1, %xmm1
442
pcmpeqb 16(%rsi, %rdi, 1), %xmm1
443
pcmpeqb 16(%rsi), %xmm0
444
pmovmskb %xmm1, %r8d
445
pmovmskb %xmm0, %r9d
446
sub $16, %edx # take first half into account
447
bts %edx, %r8d # indicate NUL byte at laste byte in buffer
448
add $32, %rsi
449
450
.Lnul_found2s:
451
sub $16, %rsi
452
453
.Lnul_founds:
454
mov %eax, %ecx
455
mov %r8d, %r10d
456
shl %cl, %r8d # adjust NUL mask to positions in RSI/RBX
457
not %r9d # mask of mismatches
458
or %r8w, %r9w # NUL bytes also count as mismatches
459
jnz .Lmismatchs
460
461
movdqu (%rsi, %rax, 1), %xmm0
462
pcmpeqb (%rsi, %rdi, 1), %xmm0
463
add %rsi, %rdi # restore RDI pointer
464
add %rax, %rsi # point RSI to chunk corresponding to (RDI)
465
pmovmskb %xmm0, %ecx # mask of matches
466
not %ecx # mask of mismatches
467
or %r10d, %ecx # mask of mismatches or NUL bytes
468
tzcnt %ecx, %ecx # location of first mismatch
469
movzbl (%rdi, %rcx, 1), %eax
470
movzbl (%rsi, %rcx, 1), %ecx
471
sub %ecx, %eax
472
pop %rbx
473
ret
474
475
.Lmismatch2s:
476
sub $16, %rsi
477
478
.Lmismatchs:
479
tzcnt %r9d, %r9d # where is the mismatch?
480
add %rsi, %rbx # turn RBX from offset into pointer
481
movzbl (%rbx, %r9, 1), %eax
482
movzbl (%rsi, %r9, 1), %ecx
483
sub %ecx, %eax
484
pop %rbx
485
ret
486
ARCHEND(strncmp, baseline)
487
488
.section .note.GNU-stack,"",%progbits
489
490