Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/bionic-x86_64-string/sse2-strlen-slm.S
39475 views
1
/*
2
Copyright (c) 2014, Intel Corporation
3
All rights reserved.
4
5
Redistribution and use in source and binary forms, with or without
6
modification, are permitted provided that the following conditions are met:
7
8
* Redistributions of source code must retain the above copyright notice,
9
* this list of conditions and the following disclaimer.
10
11
* Redistributions in binary form must reproduce the above copyright notice,
12
* this list of conditions and the following disclaimer in the documentation
13
* and/or other materials provided with the distribution.
14
15
* Neither the name of Intel Corporation nor the names of its contributors
16
* may be used to endorse or promote products derived from this software
17
* without specific prior written permission.
18
19
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
*/
30
31
#ifndef USE_AS_STRCAT
32
33
#ifndef STRLEN
34
# define STRLEN strlen
35
#endif
36
37
#ifndef L
38
# define L(label) .L##label
39
#endif
40
41
#ifndef cfi_startproc
42
# define cfi_startproc .cfi_startproc
43
#endif
44
45
#ifndef cfi_endproc
46
# define cfi_endproc .cfi_endproc
47
#endif
48
49
#ifndef ENTRY
50
# define ENTRY(name) \
51
.type name, @function; \
52
.globl name; \
53
.p2align 4; \
54
name: \
55
cfi_startproc
56
#endif
57
58
#ifndef END
59
# define END(name) \
60
cfi_endproc; \
61
.size name, .-name
62
#endif
63
#define RETURN ret
64
.section .text.sse2,"ax",@progbits
65
ENTRY (STRLEN)
66
/* end ifndef USE_AS_STRCAT */
67
#endif
68
xor %rax, %rax
69
mov %edi, %ecx
70
and $0x3f, %ecx
71
pxor %xmm0, %xmm0
72
cmp $0x30, %ecx
73
ja L(next)
74
movdqu (%rdi), %xmm1
75
pcmpeqb %xmm1, %xmm0
76
pmovmskb %xmm0, %edx
77
test %edx, %edx
78
jnz L(exit_less16)
79
mov %rdi, %rax
80
and $-16, %rax
81
jmp L(align16_start)
82
L(next):
83
mov %rdi, %rax
84
and $-16, %rax
85
pcmpeqb (%rax), %xmm0
86
mov $-1, %r10d
87
sub %rax, %rcx
88
shl %cl, %r10d
89
pmovmskb %xmm0, %edx
90
and %r10d, %edx
91
jnz L(exit)
92
L(align16_start):
93
pxor %xmm0, %xmm0
94
pxor %xmm1, %xmm1
95
pxor %xmm2, %xmm2
96
pxor %xmm3, %xmm3
97
pcmpeqb 16(%rax), %xmm0
98
pmovmskb %xmm0, %edx
99
test %edx, %edx
100
jnz L(exit16)
101
102
pcmpeqb 32(%rax), %xmm1
103
pmovmskb %xmm1, %edx
104
test %edx, %edx
105
jnz L(exit32)
106
107
pcmpeqb 48(%rax), %xmm2
108
pmovmskb %xmm2, %edx
109
test %edx, %edx
110
jnz L(exit48)
111
112
pcmpeqb 64(%rax), %xmm3
113
pmovmskb %xmm3, %edx
114
test %edx, %edx
115
jnz L(exit64)
116
117
pcmpeqb 80(%rax), %xmm0
118
add $64, %rax
119
pmovmskb %xmm0, %edx
120
test %edx, %edx
121
jnz L(exit16)
122
123
pcmpeqb 32(%rax), %xmm1
124
pmovmskb %xmm1, %edx
125
test %edx, %edx
126
jnz L(exit32)
127
128
pcmpeqb 48(%rax), %xmm2
129
pmovmskb %xmm2, %edx
130
test %edx, %edx
131
jnz L(exit48)
132
133
pcmpeqb 64(%rax), %xmm3
134
pmovmskb %xmm3, %edx
135
test %edx, %edx
136
jnz L(exit64)
137
138
pcmpeqb 80(%rax), %xmm0
139
add $64, %rax
140
pmovmskb %xmm0, %edx
141
test %edx, %edx
142
jnz L(exit16)
143
144
pcmpeqb 32(%rax), %xmm1
145
pmovmskb %xmm1, %edx
146
test %edx, %edx
147
jnz L(exit32)
148
149
pcmpeqb 48(%rax), %xmm2
150
pmovmskb %xmm2, %edx
151
test %edx, %edx
152
jnz L(exit48)
153
154
pcmpeqb 64(%rax), %xmm3
155
pmovmskb %xmm3, %edx
156
test %edx, %edx
157
jnz L(exit64)
158
159
pcmpeqb 80(%rax), %xmm0
160
add $64, %rax
161
pmovmskb %xmm0, %edx
162
test %edx, %edx
163
jnz L(exit16)
164
165
pcmpeqb 32(%rax), %xmm1
166
pmovmskb %xmm1, %edx
167
test %edx, %edx
168
jnz L(exit32)
169
170
pcmpeqb 48(%rax), %xmm2
171
pmovmskb %xmm2, %edx
172
test %edx, %edx
173
jnz L(exit48)
174
175
pcmpeqb 64(%rax), %xmm3
176
pmovmskb %xmm3, %edx
177
test %edx, %edx
178
jnz L(exit64)
179
180
181
test $0x3f, %rax
182
jz L(align64_loop)
183
184
pcmpeqb 80(%rax), %xmm0
185
add $80, %rax
186
pmovmskb %xmm0, %edx
187
test %edx, %edx
188
jnz L(exit)
189
190
test $0x3f, %rax
191
jz L(align64_loop)
192
193
pcmpeqb 16(%rax), %xmm1
194
add $16, %rax
195
pmovmskb %xmm1, %edx
196
test %edx, %edx
197
jnz L(exit)
198
199
test $0x3f, %rax
200
jz L(align64_loop)
201
202
pcmpeqb 16(%rax), %xmm2
203
add $16, %rax
204
pmovmskb %xmm2, %edx
205
test %edx, %edx
206
jnz L(exit)
207
208
test $0x3f, %rax
209
jz L(align64_loop)
210
211
pcmpeqb 16(%rax), %xmm3
212
add $16, %rax
213
pmovmskb %xmm3, %edx
214
test %edx, %edx
215
jnz L(exit)
216
217
add $16, %rax
218
.p2align 4
219
L(align64_loop):
220
movaps (%rax), %xmm4
221
pminub 16(%rax), %xmm4
222
movaps 32(%rax), %xmm5
223
pminub 48(%rax), %xmm5
224
add $64, %rax
225
pminub %xmm4, %xmm5
226
pcmpeqb %xmm0, %xmm5
227
pmovmskb %xmm5, %edx
228
test %edx, %edx
229
jz L(align64_loop)
230
231
232
pcmpeqb -64(%rax), %xmm0
233
sub $80, %rax
234
pmovmskb %xmm0, %edx
235
test %edx, %edx
236
jnz L(exit16)
237
238
pcmpeqb 32(%rax), %xmm1
239
pmovmskb %xmm1, %edx
240
test %edx, %edx
241
jnz L(exit32)
242
243
pcmpeqb 48(%rax), %xmm2
244
pmovmskb %xmm2, %edx
245
test %edx, %edx
246
jnz L(exit48)
247
248
pcmpeqb 64(%rax), %xmm3
249
pmovmskb %xmm3, %edx
250
sub %rdi, %rax
251
bsf %rdx, %rdx
252
add %rdx, %rax
253
add $64, %rax
254
RETURN
255
256
.p2align 4
257
L(exit):
258
sub %rdi, %rax
259
L(exit_less16):
260
bsf %rdx, %rdx
261
add %rdx, %rax
262
RETURN
263
.p2align 4
264
L(exit16):
265
sub %rdi, %rax
266
bsf %rdx, %rdx
267
add %rdx, %rax
268
add $16, %rax
269
RETURN
270
.p2align 4
271
L(exit32):
272
sub %rdi, %rax
273
bsf %rdx, %rdx
274
add %rdx, %rax
275
add $32, %rax
276
RETURN
277
.p2align 4
278
L(exit48):
279
sub %rdi, %rax
280
bsf %rdx, %rdx
281
add %rdx, %rax
282
add $48, %rax
283
RETURN
284
.p2align 4
285
L(exit64):
286
sub %rdi, %rax
287
bsf %rdx, %rdx
288
add %rdx, %rax
289
add $64, %rax
290
#ifndef USE_AS_STRCAT
291
RETURN
292
293
END (STRLEN)
294
#endif
295
296