Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/lib/libc/amd64/string/memcmp.S
39500 views
1
/*-
2
* Copyright (c) 2018, 2023 The FreeBSD Foundation
3
*
4
* This software was developed by Mateusz Guzik <[email protected]>
5
* under sponsorship from the FreeBSD Foundation.
6
*
7
* Portions of this software were developed by Robert Clausecker
8
* <[email protected]> under sponsorship from the FreeBSD Foundation.
9
*
10
* Redistribution and use in source and binary forms, with or without
11
* modification, are permitted provided that the following conditions
12
* are met:
13
* 1. Redistributions of source code must retain the above copyright
14
* notice, this list of conditions and the following disclaimer.
15
* 2. Redistributions in binary form must reproduce the above copyright
16
* notice, this list of conditions and the following disclaimer in the
17
* documentation and/or other materials provided with the distribution.
18
*
19
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29
* SUCH DAMAGE.
30
*/
31
32
#include <machine/asm.h>
33
#include <machine/param.h>
34
35
#include "amd64_archlevel.h"
36
37
/*
38
* Note: this routine was written with kernel use in mind (read: no simd),
39
* it is only present in userspace as a temporary measure until something
40
* better gets imported.
41
*/
42
43
#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */
44
45
#ifdef BCMP
46
#define memcmp bcmp
47
#endif
48
49
ARCHFUNCS(memcmp)
50
ARCHFUNC(memcmp, scalar)
51
ARCHFUNC(memcmp, baseline)
52
ENDARCHFUNCS(memcmp)
53
54
ARCHENTRY(memcmp, scalar)
55
xorl %eax,%eax
56
10:
57
cmpq $16,%rdx
58
ja 101632f
59
60
cmpb $8,%dl
61
jg 100816f
62
63
cmpb $4,%dl
64
jg 100408f
65
66
cmpb $2,%dl
67
jge 100204f
68
69
cmpb $1,%dl
70
jl 100000f
71
movzbl (%rdi),%eax
72
movzbl (%rsi),%r8d
73
subl %r8d,%eax
74
100000:
75
ret
76
77
ALIGN_TEXT
78
100816:
79
movq (%rdi),%r8
80
movq (%rsi),%r9
81
cmpq %r8,%r9
82
jne 80f
83
movq -8(%rdi,%rdx),%r8
84
movq -8(%rsi,%rdx),%r9
85
cmpq %r8,%r9
86
jne 10081608f
87
ret
88
ALIGN_TEXT
89
100408:
90
movl (%rdi),%r8d
91
movl (%rsi),%r9d
92
cmpl %r8d,%r9d
93
jne 80f
94
movl -4(%rdi,%rdx),%r8d
95
movl -4(%rsi,%rdx),%r9d
96
cmpl %r8d,%r9d
97
jne 10040804f
98
ret
99
ALIGN_TEXT
100
100204:
101
movzwl (%rdi),%r8d
102
movzwl (%rsi),%r9d
103
cmpl %r8d,%r9d
104
jne 1f
105
movzwl -2(%rdi,%rdx),%r8d
106
movzwl -2(%rsi,%rdx),%r9d
107
cmpl %r8d,%r9d
108
jne 1f
109
ret
110
ALIGN_TEXT
111
101632:
112
cmpq $32,%rdx
113
ja 103200f
114
movq (%rdi),%r8
115
movq (%rsi),%r9
116
cmpq %r8,%r9
117
jne 80f
118
movq 8(%rdi),%r8
119
movq 8(%rsi),%r9
120
cmpq %r8,%r9
121
jne 10163208f
122
movq -16(%rdi,%rdx),%r8
123
movq -16(%rsi,%rdx),%r9
124
cmpq %r8,%r9
125
jne 10163216f
126
movq -8(%rdi,%rdx),%r8
127
movq -8(%rsi,%rdx),%r9
128
cmpq %r8,%r9
129
jne 10163224f
130
ret
131
ALIGN_TEXT
132
103200:
133
movq (%rdi),%r8
134
movq 8(%rdi),%r9
135
subq (%rsi),%r8
136
subq 8(%rsi),%r9
137
orq %r8,%r9
138
jnz 10320000f
139
140
movq 16(%rdi),%r8
141
movq 24(%rdi),%r9
142
subq 16(%rsi),%r8
143
subq 24(%rsi),%r9
144
orq %r8,%r9
145
jnz 10320016f
146
147
leaq 32(%rdi),%rdi
148
leaq 32(%rsi),%rsi
149
subq $32,%rdx
150
cmpq $32,%rdx
151
jae 103200b
152
cmpb $0,%dl
153
jne 10b
154
ret
155
156
/*
157
* Mismatch was found.
158
*/
159
#ifdef BCMP
160
ALIGN_TEXT
161
10320016:
162
10320000:
163
10081608:
164
10163224:
165
10163216:
166
10163208:
167
10040804:
168
80:
169
1:
170
leal 1(%eax),%eax
171
ret
172
#else
173
/*
174
* We need to compute the difference between strings.
175
* Start with narrowing the range down (16 -> 8 -> 4 bytes).
176
*/
177
ALIGN_TEXT
178
10320016:
179
leaq 16(%rdi),%rdi
180
leaq 16(%rsi),%rsi
181
10320000:
182
movq (%rdi),%r8
183
movq (%rsi),%r9
184
cmpq %r8,%r9
185
jne 80f
186
leaq 8(%rdi),%rdi
187
leaq 8(%rsi),%rsi
188
jmp 80f
189
ALIGN_TEXT
190
10081608:
191
10163224:
192
leaq -8(%rdi,%rdx),%rdi
193
leaq -8(%rsi,%rdx),%rsi
194
jmp 80f
195
ALIGN_TEXT
196
10163216:
197
leaq -16(%rdi,%rdx),%rdi
198
leaq -16(%rsi,%rdx),%rsi
199
jmp 80f
200
ALIGN_TEXT
201
10163208:
202
leaq 8(%rdi),%rdi
203
leaq 8(%rsi),%rsi
204
jmp 80f
205
ALIGN_TEXT
206
10040804:
207
leaq -4(%rdi,%rdx),%rdi
208
leaq -4(%rsi,%rdx),%rsi
209
jmp 1f
210
211
ALIGN_TEXT
212
80:
213
movl (%rdi),%r8d
214
movl (%rsi),%r9d
215
cmpl %r8d,%r9d
216
jne 1f
217
leaq 4(%rdi),%rdi
218
leaq 4(%rsi),%rsi
219
220
/*
221
* We have up to 4 bytes to inspect.
222
*/
223
1:
224
movzbl (%rdi),%eax
225
movzbl (%rsi),%r8d
226
cmpb %r8b,%al
227
jne 2f
228
229
movzbl 1(%rdi),%eax
230
movzbl 1(%rsi),%r8d
231
cmpb %r8b,%al
232
jne 2f
233
234
movzbl 2(%rdi),%eax
235
movzbl 2(%rsi),%r8d
236
cmpb %r8b,%al
237
jne 2f
238
239
movzbl 3(%rdi),%eax
240
movzbl 3(%rsi),%r8d
241
2:
242
subl %r8d,%eax
243
ret
244
#endif
245
ARCHEND(memcmp, scalar)
246
247
ARCHENTRY(memcmp, baseline)
248
cmp $32, %rdx # enough to permit use of the long kernel?
249
ja .Llong
250
251
test %rdx, %rdx # zero bytes buffer?
252
je .L0
253
254
/*
255
* Compare strings of 1--32 bytes. We want to do this by
256
* loading into two xmm registers and then comparing. To avoid
257
* crossing into unmapped pages, we either load 32 bytes from
258
* the start of the buffer or 32 bytes before its end, depending
259
* on whether there is a page boundary between the overread area
260
* or not.
261
*/
262
263
/* check for page boundaries overreads */
264
lea 31(%rdi), %eax # end of overread
265
lea 31(%rsi), %r8d
266
lea -1(%rdi, %rdx, 1), %ecx # last character in buffer
267
lea -1(%rsi, %rdx, 1), %r9d
268
xor %ecx, %eax
269
xor %r9d, %r8d
270
test $PAGE_SIZE, %eax # are they on different pages?
271
jz 0f
272
273
/* fix up rdi */
274
movdqu -32(%rdi, %rdx, 1), %xmm0
275
movdqu -16(%rdi, %rdx, 1), %xmm1
276
lea -8(%rsp), %rdi # end of replacement buffer
277
sub %rdx, %rdi # start of replacement buffer
278
movdqa %xmm0, -40(%rsp) # copy to replacement buffer
279
movdqa %xmm1, -24(%rsp)
280
281
0: test $PAGE_SIZE, %r8d
282
jz 0f
283
284
/* fix up rsi */
285
movdqu -32(%rsi, %rdx, 1), %xmm0
286
movdqu -16(%rsi, %rdx, 1), %xmm1
287
lea -40(%rsp), %rsi # end of replacement buffer
288
sub %rdx, %rsi # start of replacement buffer
289
movdqa %xmm0, -72(%rsp) # copy to replacement buffer
290
movdqa %xmm1, -56(%rsp)
291
292
/* load data and compare properly */
293
0: movdqu 16(%rdi), %xmm1
294
movdqu 16(%rsi), %xmm3
295
movdqu (%rdi), %xmm0
296
movdqu (%rsi), %xmm2
297
mov %edx, %ecx
298
mov $-1, %edx
299
shl %cl, %rdx # ones where the buffer is not
300
pcmpeqb %xmm3, %xmm1
301
pcmpeqb %xmm2, %xmm0
302
pmovmskb %xmm1, %ecx
303
pmovmskb %xmm0, %eax
304
shl $16, %ecx
305
or %ecx, %eax # ones where the buffers match
306
or %edx, %eax # including where the buffer is not
307
not %eax # ones where there is a mismatch
308
#ifndef BCMP
309
bsf %eax, %edx # location of the first mismatch
310
cmovz %eax, %edx # including if there is no mismatch
311
movzbl (%rdi, %rdx, 1), %eax # mismatching bytes
312
movzbl (%rsi, %rdx, 1), %edx
313
sub %edx, %eax
314
#endif
315
ret
316
317
/* empty input */
318
.L0: xor %eax, %eax
319
ret
320
321
/* compare 33+ bytes */
322
ALIGN_TEXT
323
.Llong: movdqu (%rdi), %xmm0 # load head
324
movdqu (%rsi), %xmm2
325
mov %rdi, %rcx
326
sub %rdi, %rsi # express rsi as distance from rdi
327
and $~0xf, %rdi # align rdi to 16 bytes
328
movdqu 16(%rsi, %rdi, 1), %xmm1
329
pcmpeqb 16(%rdi), %xmm1 # compare second half of this iteration
330
add %rcx, %rdx # pointer to last byte in buffer
331
jc .Loverflow # did this overflow?
332
0: pcmpeqb %xmm2, %xmm0
333
pmovmskb %xmm0, %eax
334
xor $0xffff, %eax # any mismatch?
335
jne .Lmismatch_head
336
add $64, %rdi # advance to next iteration
337
jmp 1f # and get going with the loop
338
339
/*
340
* If we got here, a buffer length was passed to memcmp(a, b, len)
341
* such that a + len < a. While this sort of usage is illegal,
342
* it is plausible that a caller tries to do something like
343
* memcmp(a, b, SIZE_MAX) if a and b are known to differ, intending
344
* for memcmp() to stop comparing at the first mismatch. This
345
* behaviour is not guaranteed by any version of ISO/IEC 9899,
346
* but usually works out in practice. Let's try to make this
347
* case work by comparing until the end of the address space.
348
*/
349
.Loverflow:
350
mov $-1, %rdx # compare until the end of memory
351
jmp 0b
352
353
/* process buffer 32 bytes at a time */
354
ALIGN_TEXT
355
0: movdqu -32(%rsi, %rdi, 1), %xmm0
356
movdqu -16(%rsi, %rdi, 1), %xmm1
357
pcmpeqb -32(%rdi), %xmm0
358
pcmpeqb -16(%rdi), %xmm1
359
add $32, %rdi # advance to next iteration
360
1: pand %xmm0, %xmm1 # 0xff where both halves matched
361
pmovmskb %xmm1, %eax
362
cmp $0xffff, %eax # all bytes matched?
363
jne .Lmismatch
364
cmp %rdx, %rdi # end of buffer reached?
365
jb 0b
366
367
/* less than 32 bytes left to compare */
368
movdqu -16(%rdx), %xmm1 # load 32 byte tail through end pointer
369
movdqu -16(%rdx, %rsi, 1), %xmm3
370
movdqu -32(%rdx), %xmm0
371
movdqu -32(%rdx, %rsi, 1), %xmm2
372
pcmpeqb %xmm3, %xmm1
373
pcmpeqb %xmm2, %xmm0
374
pmovmskb %xmm1, %ecx
375
pmovmskb %xmm0, %eax
376
shl $16, %ecx
377
or %ecx, %eax # ones where the buffers match
378
not %eax # ones where there is a mismatch
379
#ifndef BCMP
380
bsf %eax, %ecx # location of the first mismatch
381
cmovz %eax, %ecx # including if there is no mismatch
382
add %rcx, %rdx # pointer to potential mismatch
383
movzbl -32(%rdx), %eax # mismatching bytes
384
movzbl -32(%rdx, %rsi, 1), %edx
385
sub %edx, %eax
386
#endif
387
ret
388
389
#ifdef BCMP
390
.Lmismatch:
391
mov $1, %eax
392
.Lmismatch_head:
393
ret
394
#else /* memcmp */
395
.Lmismatch_head:
396
tzcnt %eax, %eax # location of mismatch
397
add %rax, %rcx # pointer to mismatch
398
movzbl (%rcx), %eax # mismatching bytes
399
movzbl (%rcx, %rsi, 1), %ecx
400
sub %ecx, %eax
401
ret
402
403
.Lmismatch:
404
movdqu -48(%rsi, %rdi, 1), %xmm1
405
pcmpeqb -48(%rdi), %xmm1 # reconstruct xmm1 before PAND
406
pmovmskb %xmm0, %eax # mismatches in first 16 bytes
407
pmovmskb %xmm1, %edx # mismatches in second 16 bytes
408
shl $16, %edx
409
or %edx, %eax # mismatches in both
410
not %eax # matches in both
411
tzcnt %eax, %eax # location of mismatch
412
add %rax, %rdi # pointer to mismatch
413
movzbl -64(%rdi), %eax # mismatching bytes
414
movzbl -64(%rdi, %rsi, 1), %ecx
415
sub %ecx, %eax
416
ret
417
#endif
418
ARCHEND(memcmp, baseline)
419
420
.section .note.GNU-stack,"",%progbits
421
422