Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/lib/libc/amd64/string/timingsafe_bcmp.S
39486 views
1
/*-
2
* Copyright (c) 2023 The FreeBSD Foundation
3
*
4
* This software was developed by Robert Clausecker <[email protected]>
5
* under sponsorship from the FreeBSD Foundation.
6
*
7
* Redistribution and use in source and binary forms, with or without
8
* modification, are permitted provided that the following conditions
9
* are met:
10
* 1. Redistributions of source code must retain the above copyright
11
* notice, this list of conditions and the following disclaimer.
12
* 2. Redistributions in binary form must reproduce the above copyright
13
* notice, this list of conditions and the following disclaimer in the
14
* documentation and/or other materials provided with the distribution.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND
17
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26
* SUCH DAMAGE
27
*/
28
29
#include <machine/asm.h>
30
31
#include "amd64_archlevel.h"
32
33
#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */
34
35
ARCHFUNCS(timingsafe_bcmp)
36
ARCHFUNC(timingsafe_bcmp, scalar)
37
ARCHFUNC(timingsafe_bcmp, baseline)
38
ENDARCHFUNCS(timingsafe_bcmp)
39
40
ARCHENTRY(timingsafe_bcmp, scalar)
41
cmp $16, %rdx # at least 17 bytes to process?
42
ja .Lgt16
43
44
cmp $8, %edx # at least 9 bytes to process?
45
ja .L0916
46
47
cmp $4, %edx # at least 5 bytes to process?
48
ja .L0508
49
50
cmp $2, %edx # at least 3 bytes to process?
51
ja .L0304
52
53
test %edx, %edx # buffer empty?
54
jnz .L0102
55
56
xor %eax, %eax # empty buffer always matches
57
ret
58
59
.L0102: movzbl (%rdi), %eax # load 1--2 bytes from first buffer
60
movzbl -1(%rdi, %rdx, 1), %ecx
61
xor (%rsi), %al # xor in second buffer
62
xor -1(%rsi, %rdx, 1), %cl
63
or %ecx, %eax # mismatch in any of the two?
64
ret
65
66
.L0304: movzwl (%rdi), %eax
67
movzwl -2(%rdi, %rdx, 1), %ecx
68
xor (%rsi), %ax
69
xor -2(%rsi, %rdx, 1), %cx
70
or %ecx, %eax
71
ret
72
73
.L0508: mov (%rdi), %eax
74
mov -4(%rdi, %rdx, 1), %ecx
75
xor (%rsi), %eax
76
xor -4(%rsi, %rdx, 1), %ecx
77
or %ecx, %eax
78
ret
79
80
.L0916: mov (%rdi), %rax
81
mov -8(%rdi, %rdx, 1), %rcx
82
xor (%rsi), %rax
83
xor -8(%rsi, %rdx, 1), %rcx
84
or %rcx, %rax
85
setnz %al # ensure EAX nonzero even if only
86
ret # high bits of RAX were set
87
88
/* more than 16 bytes: process buffer in a loop */
89
.Lgt16: mov (%rdi), %rax # process first 16 bytes
90
mov 8(%rdi), %r9
91
mov $32, %ecx
92
xor (%rsi), %rax
93
xor 8(%rsi), %r9
94
or %r9, %rax
95
96
cmp %rdx, %rcx # enough left for a full iteration?
97
jae .Ltail
98
99
/* main loop processing 16 bytes per iteration */
100
ALIGN_TEXT
101
0: mov -16(%rdi, %rcx, 1), %r8
102
mov -8(%rdi, %rcx, 1), %r9
103
xor -16(%rsi, %rcx, 1), %r8
104
xor -8(%rsi, %rcx, 1), %r9
105
add $16, %rcx
106
or %r9, %r8
107
or %r8, %rax
108
109
cmp %rdx, %rcx
110
jb 0b
111
112
/* process last 16 bytes */
113
.Ltail: mov -16(%rdi, %rdx, 1), %r8
114
mov -8(%rdi, %rdx, 1), %r9
115
xor -16(%rsi, %rdx, 1), %r8
116
xor -8(%rsi, %rdx, 1), %r9
117
or %r9, %r8
118
or %r8, %rax
119
setnz %al
120
ret
121
ARCHEND(timingsafe_bcmp, scalar)
122
123
ARCHENTRY(timingsafe_bcmp, baseline)
124
cmp $32, %rdx # at least 33 bytes to process?
125
ja .Lgt32b
126
127
cmp $16, %edx # at least 17 bytes to process?
128
ja .L1732b
129
130
cmp $8, %edx # at least 9 bytes to process?
131
ja .L0916b
132
133
cmp $4, %edx # at least 5 bytes to process?
134
ja .L0508b
135
136
cmp $2, %edx # at least 3 bytes to process?
137
ja .L0304b
138
139
test %edx, %edx # buffer empty?
140
jnz .L0102b
141
142
xor %eax, %eax # empty buffer always matches
143
ret
144
145
.L0102b:
146
movzbl (%rdi), %eax # load 1--2 bytes from first buffer
147
movzbl -1(%rdi, %rdx, 1), %ecx
148
xor (%rsi), %al # xor in second buffer
149
xor -1(%rsi, %rdx, 1), %cl
150
or %ecx, %eax # mismatch in any of the two?
151
ret
152
153
.L0304b:
154
movzwl (%rdi), %eax
155
movzwl -2(%rdi, %rdx, 1), %ecx
156
xor (%rsi), %ax
157
xor -2(%rsi, %rdx, 1), %cx
158
or %ecx, %eax
159
ret
160
161
.L0508b:
162
mov (%rdi), %eax
163
mov -4(%rdi, %rdx, 1), %ecx
164
xor (%rsi), %eax
165
xor -4(%rsi, %rdx, 1), %ecx
166
or %ecx, %eax
167
ret
168
169
.L0916b:
170
mov (%rdi), %rax
171
mov -8(%rdi, %rdx, 1), %rcx
172
xor (%rsi), %rax
173
xor -8(%rsi, %rdx, 1), %rcx
174
or %rcx, %rax
175
setnz %al # ensure EAX nonzero even if only
176
ret # high bits of RAX were set
177
178
.L1732b:
179
movdqu (%rdi), %xmm0
180
movdqu (%rsi), %xmm2
181
movdqu -16(%rdi, %rdx, 1), %xmm1
182
movdqu -16(%rsi, %rdx, 1), %xmm3
183
pcmpeqb %xmm2, %xmm0
184
pcmpeqb %xmm3, %xmm1
185
pand %xmm1, %xmm0
186
pmovmskb %xmm0, %eax # 1 where equal
187
xor $0xffff, %eax # 1 where not equal
188
ret
189
190
/* more than 32 bytes: process buffer in a loop */
191
.Lgt32b:
192
movdqu (%rdi), %xmm4
193
movdqu (%rsi), %xmm2
194
movdqu 16(%rdi), %xmm1
195
movdqu 16(%rsi), %xmm3
196
mov $64, %ecx
197
pcmpeqb %xmm2, %xmm4
198
pcmpeqb %xmm3, %xmm1
199
pand %xmm1, %xmm4
200
cmp %rdx, %rcx # enough left for a full iteration?
201
jae .Ltailb
202
203
/* main loop processing 32 bytes per iteration */
204
ALIGN_TEXT
205
0: movdqu -32(%rdi, %rcx, 1), %xmm0
206
movdqu -32(%rsi, %rcx, 1), %xmm2
207
movdqu -16(%rdi, %rcx, 1), %xmm1
208
movdqu -16(%rsi, %rcx, 1), %xmm3
209
add $32, %rcx
210
pcmpeqb %xmm2, %xmm0
211
pcmpeqb %xmm3, %xmm1
212
pand %xmm1, %xmm0
213
pand %xmm0, %xmm4
214
cmp %rdx, %rcx
215
jb 0b
216
217
/* process last 32 bytes */
218
.Ltailb:
219
movdqu -32(%rdi, %rdx, 1), %xmm0
220
movdqu -32(%rsi, %rdx, 1), %xmm2
221
movdqu -16(%rdi, %rdx, 1), %xmm1
222
movdqu -16(%rsi, %rdx, 1), %xmm3
223
pcmpeqb %xmm2, %xmm0
224
pcmpeqb %xmm3, %xmm1
225
pand %xmm1, %xmm0
226
pand %xmm4, %xmm0
227
pmovmskb %xmm0, %eax
228
xor $0xffff, %eax
229
ret
230
ARCHEND(timingsafe_bcmp, baseline)
231
232
.section .note.GNU-stack,"",%progbits
233
234