Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/lib/libc/amd64/string/strlcpy.S
39535 views
1
/*
2
* Copyright (c) 2023 The FreeBSD Foundation
3
*
4
* This software was developed by Robert Clausecker <[email protected]>
5
* under sponsorship from the FreeBSD Foundation.
6
*
7
* Redistribution and use in source and binary forms, with or without
8
* modification, are permitted provided that the following conditions
9
* are met:
10
* 1. Redistributions of source code must retain the above copyright
11
* notice, this list of conditions and the following disclaimer.
12
* 2. Redistributions in binary form must reproduce the above copyright
13
* notice, this list of conditions and the following disclaimer in the
14
* documentation and/or other materials provided with the distribution.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND
17
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26
* SUCH DAMAGE
27
*/
28
29
#include <machine/asm.h>
30
31
#include "amd64_archlevel.h"
32
33
#define ALIGN_TEXT .p2align 4, 0x90
34
35
.weak strlcpy
36
.set strlcpy, __strlcpy
37
ARCHFUNCS(__strlcpy)
38
ARCHFUNC(__strlcpy, scalar)
39
ARCHFUNC(__strlcpy, baseline)
40
ENDARCHFUNCS(__strlcpy)
41
42
ARCHENTRY(__strlcpy, scalar)
43
push %rbp # establish stack frame
44
mov %rsp, %rbp
45
push %rsi
46
push %rbx
47
push %rdi
48
push %rdx
49
mov %rsi, %rdi
50
call CNAME(strlen) # strlen(src)
51
pop %rdx
52
pop %rdi
53
mov -8(%rbp), %rsi
54
mov %rax, %rbx # remember string length for return value
55
sub $1, %rdx # do not copy into the final byte of the buffer
56
jc 0f # skip copying altogether if buffer was empty
57
cmp %rax, %rdx # is the buffer longer than the input?
58
cmova %rax, %rdx # if yes, only copy the part that fits
59
movb $0, (%rdi, %rdx, 1) # NUL-terminate output buffer
60
call CNAME(memcpy) # copy string to output
61
0: mov %rbx, %rax # restore return value
62
pop %rbx
63
leave
64
ret
65
ARCHEND(__strlcpy, scalar)
66
67
ARCHENTRY(__strlcpy, baseline)
68
sub $1, %rdx # do not count NUL byte in buffer length
69
jb .L0 # go to special code path if len was 0
70
71
mov %esi, %ecx
72
pxor %xmm1, %xmm1
73
mov %rsi, %r9 # stash a copy of the source pointer for later
74
and $~0xf, %rsi
75
pcmpeqb (%rsi), %xmm1 # NUL found in head?
76
mov $-1, %r8d
77
and $0xf, %ecx
78
shl %cl, %r8d # mask of bytes in the string
79
pmovmskb %xmm1, %eax
80
and %r8d, %eax
81
jnz .Lhead_nul
82
83
movdqa 16(%rsi), %xmm3 # load second string chunk
84
movdqu (%r9), %xmm2 # load unaligned string head
85
mov $32, %r8d
86
sub %ecx, %r8d # head length + length of second chunk
87
pxor %xmm1, %xmm1
88
pcmpeqb %xmm3, %xmm1 # NUL found in second chunk?
89
90
sub %r8, %rdx # enough space left for the second chunk?
91
jbe .Lhead_buf_end
92
93
/* process second chunk */
94
pmovmskb %xmm1, %eax
95
test %eax, %eax
96
jnz .Lsecond_nul
97
98
/* string didn't end in second chunk and neither did buffer -- not a runt! */
99
movdqa 32(%rsi), %xmm0 # load next string chunk
100
pxor %xmm1, %xmm1
101
movdqu %xmm2, (%rdi) # deposit head into buffer
102
sub %rcx, %rdi # adjust RDI to correspond to RSI
103
movdqu %xmm3, 16(%rdi) # deposit second chunk
104
sub %rsi, %rdi # express RDI as distance from RSI
105
add $32, %rsi # advance RSI past first two chunks
106
sub $16, %rdx # enough left for another round?
107
jbe 1f
108
109
/* main loop unrolled twice */
110
ALIGN_TEXT
111
0: pcmpeqb %xmm0, %xmm1 # NUL byte encountered?
112
pmovmskb %xmm1, %eax
113
test %eax, %eax
114
jnz 3f
115
116
movdqu %xmm0, (%rsi, %rdi)
117
movdqa 16(%rsi), %xmm0 # load next string chunk
118
pxor %xmm1, %xmm1
119
cmp $16, %rdx # more than a full chunk left?
120
jbe 2f
121
122
add $32, %rsi # advance pointers to next chunk
123
pcmpeqb %xmm0, %xmm1 # NUL byte encountered?
124
pmovmskb %xmm1, %eax
125
test %eax, %eax
126
jnz 4f
127
128
movdqu %xmm0, -16(%rsi, %rdi)
129
movdqa (%rsi), %xmm0 # load next string chunk
130
pxor %xmm1, %xmm1
131
sub $32, %rdx
132
ja 0b
133
134
1: sub $16, %rsi # undo second advancement
135
add $16, %edx
136
137
/* 1--16 bytes left in the buffer but string has not ended yet */
138
2: pcmpeqb %xmm1, %xmm0 # NUL byte encountered?
139
pmovmskb %xmm0, %r8d
140
mov %r8d, %eax
141
bts %edx, %r8d # treat end of buffer as end of string
142
tzcnt %r8d, %r8d # find tail length
143
add %rsi, %rdi # restore RDI
144
movdqu (%rsi, %r8, 1), %xmm0 # load string tail
145
movdqu %xmm0, (%rdi, %r8, 1) # store string tail
146
movb $0, 16(%rdi, %r8, 1) # NUL terminate
147
148
/* continue to find the end of the string */
149
test %eax, %eax # end of string already reached?
150
jnz 1f
151
152
ALIGN_TEXT
153
0: pcmpeqb 32(%rsi), %xmm1
154
pmovmskb %xmm1, %eax
155
pxor %xmm1, %xmm1
156
test %eax, %eax
157
jnz 2f
158
159
pcmpeqb 48(%rsi), %xmm1
160
pmovmskb %xmm1, %eax
161
add $32, %rsi
162
pxor %xmm1, %xmm1
163
test %eax, %eax
164
jz 0b
165
166
1: sub $16, %rsi # undo second advancement
167
2: tzcnt %eax, %eax # where is the NUL byte?
168
sub %r9, %rsi
169
lea 32(%rsi, %rax, 1), %rax # return string length
170
ret
171
172
4: sub $16, %rsi # undo second advancement
173
add $16, %rdx # restore number of remaining bytes
174
175
/* string has ended but buffer has not */
176
3: tzcnt %eax, %eax # find length of string tail
177
movdqu -15(%rsi, %rax, 1), %xmm0 # load string tail (incl. NUL)
178
add %rsi, %rdi # restore destination pointer
179
movdqu %xmm0, -15(%rdi, %rax, 1) # store string tail (incl. NUL)
180
sub %r9, %rsi # string length to current chunk
181
add %rsi, %rax # plus length of current chunk
182
ret
183
184
.Lhead_buf_end:
185
pmovmskb %xmm1, %r8d
186
add $32, %edx # restore edx to (len-1) + ecx
187
mov %r8d, %eax
188
shl $16, %r8d # place 2nd chunk NUL mask into bits 16--31
189
bts %rdx, %r8 # treat end of buffer as end of string
190
tzcnt %r8, %rdx # find string/bufer len from alignment boundary
191
sub %ecx, %edx # find actual string/buffer len
192
movb $0, (%rdi, %rdx, 1) # write NUL terminator
193
194
/* continue to find the end of the string */
195
test %eax, %eax # end of string already reached?
196
jnz 1f
197
198
ALIGN_TEXT
199
0: pcmpeqb 32(%rsi), %xmm1
200
pmovmskb %xmm1, %eax
201
pxor %xmm1, %xmm1
202
test %eax, %eax
203
jnz 2f
204
205
pcmpeqb 48(%rsi), %xmm1
206
pmovmskb %xmm1, %eax
207
add $32, %rsi
208
pxor %xmm1, %xmm1
209
test %eax, %eax
210
jz 0b
211
212
1: sub $16, %rsi
213
2: tzcnt %eax, %eax
214
sub %r9, %rsi
215
lea 32(%rsi, %rax, 1), %rax # return string length
216
jmp .L0031
217
218
.Lsecond_nul:
219
add %r8, %rdx # restore buffer length
220
tzcnt %eax, %eax # where is the NUL byte?
221
lea -16(%rcx), %r8d
222
sub %r8d, %eax # string length
223
cmp %rax, %rdx # is the string shorter than the buffer?
224
cmova %rax, %rdx # copy only min(buflen, srclen) bytes
225
movb $0, (%rdi, %rdx, 1) # write NUL terminator
226
.L0031: cmp $16, %rdx # at least 16 bytes to copy (not incl NUL)?
227
jb .L0015
228
229
/* copy 16--31 bytes */
230
movdqu (%r9), %xmm0 # load first 16 bytes
231
movdqu -16(%r9, %rdx, 1), %xmm1 # load last 16 bytes
232
movdqu %xmm0, (%rdi)
233
movdqu %xmm1, -16(%rdi, %rdx, 1)
234
ret
235
236
.Lhead_nul:
237
tzcnt %eax, %eax # where is the NUL byte?
238
sub %ecx, %eax # ... from the beginning of the string?
239
cmp %rax, %rdx # is the string shorter than the buffer?
240
cmova %rax, %rdx # copy only min(buflen, srclen) bytes
241
movb $0, (%rdi, %rdx, 1) # write NUL terminator
242
243
/* process strings of 0--15 bytes (rdx: min(buflen, srclen), rax: srclen) */
244
.L0015: cmp $8, %rdx # at least 8 bytes to copy?
245
jae .L0815
246
247
cmp $4, %rdx # at least 4 bytes to copy?
248
jae .L0407
249
250
cmp $2, %rdx # at least 2 bytes to copy?
251
jae .L0203
252
253
movzbl (%r9), %ecx # load first byte from src
254
mov %cl, (%rdi) # deposit into destination
255
movb $0, (%rdi, %rdx, 1) # add NUL terminator (again)
256
ret
257
258
.L0203: movzwl (%r9), %ecx
259
movzwl -2(%r9, %rdx, 1), %esi
260
mov %cx, (%rdi)
261
mov %si, -2(%rdi, %rdx, 1)
262
ret
263
264
.L0407: mov (%r9), %ecx
265
mov -4(%r9, %rdx, 1), %esi
266
mov %ecx, (%rdi)
267
mov %esi, -4(%rdi, %rdx, 1)
268
ret
269
270
.L0815: mov (%r9), %rcx
271
mov -8(%r9, %rdx, 1), %rsi
272
mov %rcx, (%rdi)
273
mov %rsi, -8(%rdi, %rdx, 1)
274
ret
275
276
/* length zero destination: just return the string length */
277
.L0: mov %rsi, %rdi
278
jmp CNAME(strlen)
279
ARCHEND(__strlcpy, baseline)
280
281
.section .note.GNU-stack,"",%progbits
282
283