Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/bionic-x86_64-string/sse2-strcpy-slm.S
39475 views
1
/*
2
Copyright (c) 2014, Intel Corporation
3
All rights reserved.
4
5
Redistribution and use in source and binary forms, with or without
6
modification, are permitted provided that the following conditions are met:
7
8
* Redistributions of source code must retain the above copyright notice,
9
* this list of conditions and the following disclaimer.
10
11
* Redistributions in binary form must reproduce the above copyright notice,
12
* this list of conditions and the following disclaimer in the documentation
13
* and/or other materials provided with the distribution.
14
15
* Neither the name of Intel Corporation nor the names of its contributors
16
* may be used to endorse or promote products derived from this software
17
* without specific prior written permission.
18
19
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
*/
30
31
#ifndef USE_AS_STRCAT
32
33
# ifndef STRCPY
34
# define STRCPY strcpy
35
# endif
36
37
# ifndef L
38
# define L(label) .L##label
39
# endif
40
41
# ifndef cfi_startproc
42
# define cfi_startproc .cfi_startproc
43
# endif
44
45
# ifndef cfi_endproc
46
# define cfi_endproc .cfi_endproc
47
# endif
48
49
# ifndef ENTRY
50
# define ENTRY(name) \
51
.type name, @function; \
52
.globl name; \
53
.p2align 4; \
54
name: \
55
cfi_startproc
56
# endif
57
58
# ifndef END
59
# define END(name) \
60
cfi_endproc; \
61
.size name, .-name
62
# endif
63
64
#endif
65
66
#define JMPTBL(I, B) I - B
67
#define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
68
lea TABLE(%rip), %r11; \
69
movslq (%r11, INDEX, SCALE), %rcx; \
70
lea (%r11, %rcx), %rcx; \
71
jmp *%rcx
72
73
#ifndef USE_AS_STRCAT
74
75
# define RETURN ret
76
77
.text
78
ENTRY (STRCPY)
79
# ifdef USE_AS_STRNCPY
80
mov %rdx, %r8
81
test %r8, %r8
82
jz L(ExitZero)
83
# endif
84
mov %rsi, %rcx
85
# ifndef USE_AS_STPCPY
86
mov %rdi, %rax /* save result */
87
# endif
88
89
#endif
90
and $63, %rcx
91
cmp $32, %rcx
92
jbe L(SourceStringAlignmentLess32)
93
94
and $-16, %rsi
95
and $15, %rcx
96
pxor %xmm0, %xmm0
97
pxor %xmm1, %xmm1
98
99
pcmpeqb (%rsi), %xmm1
100
pmovmskb %xmm1, %rdx
101
shr %cl, %rdx
102
#ifdef USE_AS_STRNCPY
103
# if defined USE_AS_STPCPY || defined USE_AS_STRCAT
104
mov $16, %r10
105
sub %rcx, %r10
106
cmp %r10, %r8
107
# else
108
mov $17, %r10
109
sub %rcx, %r10
110
cmp %r10, %r8
111
# endif
112
jbe L(CopyFrom1To16BytesTailCase2OrCase3)
113
#endif
114
test %rdx, %rdx
115
jnz L(CopyFrom1To16BytesTail)
116
117
pcmpeqb 16(%rsi), %xmm0
118
pmovmskb %xmm0, %rdx
119
#ifdef USE_AS_STRNCPY
120
add $16, %r10
121
cmp %r10, %r8
122
jbe L(CopyFrom1To32BytesCase2OrCase3)
123
#endif
124
test %rdx, %rdx
125
jnz L(CopyFrom1To32Bytes)
126
127
movdqu (%rsi, %rcx), %xmm1 /* copy 16 bytes */
128
movdqu %xmm1, (%rdi)
129
130
/* If source adress alignment != destination adress alignment */
131
.p2align 4
132
L(Unalign16Both):
133
sub %rcx, %rdi
134
#ifdef USE_AS_STRNCPY
135
add %rcx, %r8
136
#endif
137
mov $16, %rcx
138
movdqa (%rsi, %rcx), %xmm1
139
movaps 16(%rsi, %rcx), %xmm2
140
movdqu %xmm1, (%rdi, %rcx)
141
pcmpeqb %xmm2, %xmm0
142
pmovmskb %xmm0, %rdx
143
add $16, %rcx
144
#ifdef USE_AS_STRNCPY
145
sub $48, %r8
146
jbe L(CopyFrom1To16BytesCase2OrCase3)
147
#endif
148
test %rdx, %rdx
149
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
150
jnz L(CopyFrom1To16BytesUnalignedXmm2)
151
#else
152
jnz L(CopyFrom1To16Bytes)
153
#endif
154
155
movaps 16(%rsi, %rcx), %xmm3
156
movdqu %xmm2, (%rdi, %rcx)
157
pcmpeqb %xmm3, %xmm0
158
pmovmskb %xmm0, %rdx
159
add $16, %rcx
160
#ifdef USE_AS_STRNCPY
161
sub $16, %r8
162
jbe L(CopyFrom1To16BytesCase2OrCase3)
163
#endif
164
test %rdx, %rdx
165
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
166
jnz L(CopyFrom1To16BytesUnalignedXmm3)
167
#else
168
jnz L(CopyFrom1To16Bytes)
169
#endif
170
171
movaps 16(%rsi, %rcx), %xmm4
172
movdqu %xmm3, (%rdi, %rcx)
173
pcmpeqb %xmm4, %xmm0
174
pmovmskb %xmm0, %rdx
175
add $16, %rcx
176
#ifdef USE_AS_STRNCPY
177
sub $16, %r8
178
jbe L(CopyFrom1To16BytesCase2OrCase3)
179
#endif
180
test %rdx, %rdx
181
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
182
jnz L(CopyFrom1To16BytesUnalignedXmm4)
183
#else
184
jnz L(CopyFrom1To16Bytes)
185
#endif
186
187
movaps 16(%rsi, %rcx), %xmm1
188
movdqu %xmm4, (%rdi, %rcx)
189
pcmpeqb %xmm1, %xmm0
190
pmovmskb %xmm0, %rdx
191
add $16, %rcx
192
#ifdef USE_AS_STRNCPY
193
sub $16, %r8
194
jbe L(CopyFrom1To16BytesCase2OrCase3)
195
#endif
196
test %rdx, %rdx
197
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
198
jnz L(CopyFrom1To16BytesUnalignedXmm1)
199
#else
200
jnz L(CopyFrom1To16Bytes)
201
#endif
202
203
movaps 16(%rsi, %rcx), %xmm2
204
movdqu %xmm1, (%rdi, %rcx)
205
pcmpeqb %xmm2, %xmm0
206
pmovmskb %xmm0, %rdx
207
add $16, %rcx
208
#ifdef USE_AS_STRNCPY
209
sub $16, %r8
210
jbe L(CopyFrom1To16BytesCase2OrCase3)
211
#endif
212
test %rdx, %rdx
213
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
214
jnz L(CopyFrom1To16BytesUnalignedXmm2)
215
#else
216
jnz L(CopyFrom1To16Bytes)
217
#endif
218
219
movaps 16(%rsi, %rcx), %xmm3
220
movdqu %xmm2, (%rdi, %rcx)
221
pcmpeqb %xmm3, %xmm0
222
pmovmskb %xmm0, %rdx
223
add $16, %rcx
224
#ifdef USE_AS_STRNCPY
225
sub $16, %r8
226
jbe L(CopyFrom1To16BytesCase2OrCase3)
227
#endif
228
test %rdx, %rdx
229
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
230
jnz L(CopyFrom1To16BytesUnalignedXmm3)
231
#else
232
jnz L(CopyFrom1To16Bytes)
233
#endif
234
235
movdqu %xmm3, (%rdi, %rcx)
236
mov %rsi, %rdx
237
lea 16(%rsi, %rcx), %rsi
238
and $-0x40, %rsi
239
sub %rsi, %rdx
240
sub %rdx, %rdi
241
#ifdef USE_AS_STRNCPY
242
lea 128(%r8, %rdx), %r8
243
#endif
244
L(Unaligned64Loop):
245
movaps (%rsi), %xmm2
246
movaps %xmm2, %xmm4
247
movaps 16(%rsi), %xmm5
248
movaps 32(%rsi), %xmm3
249
movaps %xmm3, %xmm6
250
movaps 48(%rsi), %xmm7
251
pminub %xmm5, %xmm2
252
pminub %xmm7, %xmm3
253
pminub %xmm2, %xmm3
254
pcmpeqb %xmm0, %xmm3
255
pmovmskb %xmm3, %rdx
256
#ifdef USE_AS_STRNCPY
257
sub $64, %r8
258
jbe L(UnalignedLeaveCase2OrCase3)
259
#endif
260
test %rdx, %rdx
261
jnz L(Unaligned64Leave)
262
263
L(Unaligned64Loop_start):
264
add $64, %rdi
265
add $64, %rsi
266
movdqu %xmm4, -64(%rdi)
267
movaps (%rsi), %xmm2
268
movdqa %xmm2, %xmm4
269
movdqu %xmm5, -48(%rdi)
270
movaps 16(%rsi), %xmm5
271
pminub %xmm5, %xmm2
272
movaps 32(%rsi), %xmm3
273
movdqu %xmm6, -32(%rdi)
274
movaps %xmm3, %xmm6
275
movdqu %xmm7, -16(%rdi)
276
movaps 48(%rsi), %xmm7
277
pminub %xmm7, %xmm3
278
pminub %xmm2, %xmm3
279
pcmpeqb %xmm0, %xmm3
280
pmovmskb %xmm3, %rdx
281
#ifdef USE_AS_STRNCPY
282
sub $64, %r8
283
jbe L(UnalignedLeaveCase2OrCase3)
284
#endif
285
test %rdx, %rdx
286
jz L(Unaligned64Loop_start)
287
288
L(Unaligned64Leave):
289
pxor %xmm1, %xmm1
290
291
pcmpeqb %xmm4, %xmm0
292
pcmpeqb %xmm5, %xmm1
293
pmovmskb %xmm0, %rdx
294
pmovmskb %xmm1, %rcx
295
test %rdx, %rdx
296
jnz L(CopyFrom1To16BytesUnaligned_0)
297
test %rcx, %rcx
298
jnz L(CopyFrom1To16BytesUnaligned_16)
299
300
pcmpeqb %xmm6, %xmm0
301
pcmpeqb %xmm7, %xmm1
302
pmovmskb %xmm0, %rdx
303
pmovmskb %xmm1, %rcx
304
test %rdx, %rdx
305
jnz L(CopyFrom1To16BytesUnaligned_32)
306
307
bsf %rcx, %rdx
308
movdqu %xmm4, (%rdi)
309
movdqu %xmm5, 16(%rdi)
310
movdqu %xmm6, 32(%rdi)
311
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
312
# ifdef USE_AS_STPCPY
313
lea 48(%rdi, %rdx), %rax
314
# endif
315
movdqu %xmm7, 48(%rdi)
316
add $15, %r8
317
sub %rdx, %r8
318
lea 49(%rdi, %rdx), %rdi
319
jmp L(StrncpyFillTailWithZero)
320
#else
321
add $48, %rsi
322
add $48, %rdi
323
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
324
#endif
325
326
/* If source adress alignment == destination adress alignment */
327
328
L(SourceStringAlignmentLess32):
329
pxor %xmm0, %xmm0
330
movdqu (%rsi), %xmm1
331
movdqu 16(%rsi), %xmm2
332
pcmpeqb %xmm1, %xmm0
333
pmovmskb %xmm0, %rdx
334
335
#ifdef USE_AS_STRNCPY
336
# if defined USE_AS_STPCPY || defined USE_AS_STRCAT
337
cmp $16, %r8
338
# else
339
cmp $17, %r8
340
# endif
341
jbe L(CopyFrom1To16BytesTail1Case2OrCase3)
342
#endif
343
test %rdx, %rdx
344
jnz L(CopyFrom1To16BytesTail1)
345
346
pcmpeqb %xmm2, %xmm0
347
movdqu %xmm1, (%rdi)
348
pmovmskb %xmm0, %rdx
349
350
#ifdef USE_AS_STRNCPY
351
# if defined USE_AS_STPCPY || defined USE_AS_STRCAT
352
cmp $32, %r8
353
# else
354
cmp $33, %r8
355
# endif
356
jbe L(CopyFrom1To32Bytes1Case2OrCase3)
357
#endif
358
test %rdx, %rdx
359
jnz L(CopyFrom1To32Bytes1)
360
361
and $15, %rcx
362
and $-16, %rsi
363
364
jmp L(Unalign16Both)
365
366
/*------End of main part with loops---------------------*/
367
368
/* Case1 */
369
370
#if (!defined USE_AS_STRNCPY) || (defined USE_AS_STRCAT)
371
.p2align 4
372
L(CopyFrom1To16Bytes):
373
add %rcx, %rdi
374
add %rcx, %rsi
375
bsf %rdx, %rdx
376
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
377
#endif
378
.p2align 4
379
L(CopyFrom1To16BytesTail):
380
add %rcx, %rsi
381
bsf %rdx, %rdx
382
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
383
384
.p2align 4
385
L(CopyFrom1To32Bytes1):
386
add $16, %rsi
387
add $16, %rdi
388
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
389
sub $16, %r8
390
#endif
391
L(CopyFrom1To16BytesTail1):
392
bsf %rdx, %rdx
393
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
394
395
.p2align 4
396
L(CopyFrom1To32Bytes):
397
bsf %rdx, %rdx
398
add %rcx, %rsi
399
add $16, %rdx
400
sub %rcx, %rdx
401
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
402
403
.p2align 4
404
L(CopyFrom1To16BytesUnaligned_0):
405
bsf %rdx, %rdx
406
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
407
# ifdef USE_AS_STPCPY
408
lea (%rdi, %rdx), %rax
409
# endif
410
movdqu %xmm4, (%rdi)
411
add $63, %r8
412
sub %rdx, %r8
413
lea 1(%rdi, %rdx), %rdi
414
jmp L(StrncpyFillTailWithZero)
415
#else
416
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
417
#endif
418
419
.p2align 4
420
L(CopyFrom1To16BytesUnaligned_16):
421
bsf %rcx, %rdx
422
movdqu %xmm4, (%rdi)
423
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
424
# ifdef USE_AS_STPCPY
425
lea 16(%rdi, %rdx), %rax
426
# endif
427
movdqu %xmm5, 16(%rdi)
428
add $47, %r8
429
sub %rdx, %r8
430
lea 17(%rdi, %rdx), %rdi
431
jmp L(StrncpyFillTailWithZero)
432
#else
433
add $16, %rsi
434
add $16, %rdi
435
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
436
#endif
437
438
.p2align 4
439
L(CopyFrom1To16BytesUnaligned_32):
440
bsf %rdx, %rdx
441
movdqu %xmm4, (%rdi)
442
movdqu %xmm5, 16(%rdi)
443
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
444
# ifdef USE_AS_STPCPY
445
lea 32(%rdi, %rdx), %rax
446
# endif
447
movdqu %xmm6, 32(%rdi)
448
add $31, %r8
449
sub %rdx, %r8
450
lea 33(%rdi, %rdx), %rdi
451
jmp L(StrncpyFillTailWithZero)
452
#else
453
add $32, %rsi
454
add $32, %rdi
455
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
456
#endif
457
458
#ifdef USE_AS_STRNCPY
459
# ifndef USE_AS_STRCAT
460
.p2align 4
461
L(CopyFrom1To16BytesUnalignedXmm6):
462
movdqu %xmm6, (%rdi, %rcx)
463
jmp L(CopyFrom1To16BytesXmmExit)
464
465
.p2align 4
466
L(CopyFrom1To16BytesUnalignedXmm5):
467
movdqu %xmm5, (%rdi, %rcx)
468
jmp L(CopyFrom1To16BytesXmmExit)
469
470
.p2align 4
471
L(CopyFrom1To16BytesUnalignedXmm4):
472
movdqu %xmm4, (%rdi, %rcx)
473
jmp L(CopyFrom1To16BytesXmmExit)
474
475
.p2align 4
476
L(CopyFrom1To16BytesUnalignedXmm3):
477
movdqu %xmm3, (%rdi, %rcx)
478
jmp L(CopyFrom1To16BytesXmmExit)
479
480
.p2align 4
481
L(CopyFrom1To16BytesUnalignedXmm1):
482
movdqu %xmm1, (%rdi, %rcx)
483
jmp L(CopyFrom1To16BytesXmmExit)
484
# endif
485
486
.p2align 4
487
L(CopyFrom1To16BytesExit):
488
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
489
490
/* Case2 */
491
492
.p2align 4
493
L(CopyFrom1To16BytesCase2):
494
add $16, %r8
495
add %rcx, %rdi
496
add %rcx, %rsi
497
bsf %rdx, %rdx
498
cmp %r8, %rdx
499
jb L(CopyFrom1To16BytesExit)
500
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
501
502
.p2align 4
503
L(CopyFrom1To32BytesCase2):
504
add %rcx, %rsi
505
bsf %rdx, %rdx
506
add $16, %rdx
507
sub %rcx, %rdx
508
cmp %r8, %rdx
509
jb L(CopyFrom1To16BytesExit)
510
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
511
512
L(CopyFrom1To16BytesTailCase2):
513
add %rcx, %rsi
514
bsf %rdx, %rdx
515
cmp %r8, %rdx
516
jb L(CopyFrom1To16BytesExit)
517
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
518
519
L(CopyFrom1To16BytesTail1Case2):
520
bsf %rdx, %rdx
521
cmp %r8, %rdx
522
jb L(CopyFrom1To16BytesExit)
523
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
524
525
/* Case2 or Case3, Case3 */
526
527
.p2align 4
528
L(CopyFrom1To16BytesCase2OrCase3):
529
test %rdx, %rdx
530
jnz L(CopyFrom1To16BytesCase2)
531
L(CopyFrom1To16BytesCase3):
532
add $16, %r8
533
add %rcx, %rdi
534
add %rcx, %rsi
535
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
536
537
.p2align 4
538
L(CopyFrom1To32BytesCase2OrCase3):
539
test %rdx, %rdx
540
jnz L(CopyFrom1To32BytesCase2)
541
add %rcx, %rsi
542
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
543
544
.p2align 4
545
L(CopyFrom1To16BytesTailCase2OrCase3):
546
test %rdx, %rdx
547
jnz L(CopyFrom1To16BytesTailCase2)
548
add %rcx, %rsi
549
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
550
551
.p2align 4
552
L(CopyFrom1To32Bytes1Case2OrCase3):
553
add $16, %rdi
554
add $16, %rsi
555
sub $16, %r8
556
L(CopyFrom1To16BytesTail1Case2OrCase3):
557
test %rdx, %rdx
558
jnz L(CopyFrom1To16BytesTail1Case2)
559
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
560
561
#endif
562
563
/*------------End labels regarding with copying 1-16 bytes--and 1-32 bytes----*/
564
565
.p2align 4
566
L(Exit1):
567
mov %dh, (%rdi)
568
#ifdef USE_AS_STPCPY
569
lea (%rdi), %rax
570
#endif
571
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
572
sub $1, %r8
573
lea 1(%rdi), %rdi
574
jnz L(StrncpyFillTailWithZero)
575
#endif
576
RETURN
577
578
.p2align 4
579
L(Exit2):
580
mov (%rsi), %dx
581
mov %dx, (%rdi)
582
#ifdef USE_AS_STPCPY
583
lea 1(%rdi), %rax
584
#endif
585
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
586
sub $2, %r8
587
lea 2(%rdi), %rdi
588
jnz L(StrncpyFillTailWithZero)
589
#endif
590
RETURN
591
592
.p2align 4
593
L(Exit3):
594
mov (%rsi), %cx
595
mov %cx, (%rdi)
596
mov %dh, 2(%rdi)
597
#ifdef USE_AS_STPCPY
598
lea 2(%rdi), %rax
599
#endif
600
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
601
sub $3, %r8
602
lea 3(%rdi), %rdi
603
jnz L(StrncpyFillTailWithZero)
604
#endif
605
RETURN
606
607
.p2align 4
608
L(Exit4):
609
mov (%rsi), %edx
610
mov %edx, (%rdi)
611
#ifdef USE_AS_STPCPY
612
lea 3(%rdi), %rax
613
#endif
614
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
615
sub $4, %r8
616
lea 4(%rdi), %rdi
617
jnz L(StrncpyFillTailWithZero)
618
#endif
619
RETURN
620
621
.p2align 4
622
L(Exit5):
623
mov (%rsi), %ecx
624
mov %dh, 4(%rdi)
625
mov %ecx, (%rdi)
626
#ifdef USE_AS_STPCPY
627
lea 4(%rdi), %rax
628
#endif
629
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
630
sub $5, %r8
631
lea 5(%rdi), %rdi
632
jnz L(StrncpyFillTailWithZero)
633
#endif
634
RETURN
635
636
.p2align 4
637
L(Exit6):
638
mov (%rsi), %ecx
639
mov 4(%rsi), %dx
640
mov %ecx, (%rdi)
641
mov %dx, 4(%rdi)
642
#ifdef USE_AS_STPCPY
643
lea 5(%rdi), %rax
644
#endif
645
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
646
sub $6, %r8
647
lea 6(%rdi), %rdi
648
jnz L(StrncpyFillTailWithZero)
649
#endif
650
RETURN
651
652
.p2align 4
653
L(Exit7):
654
mov (%rsi), %ecx
655
mov 3(%rsi), %edx
656
mov %ecx, (%rdi)
657
mov %edx, 3(%rdi)
658
#ifdef USE_AS_STPCPY
659
lea 6(%rdi), %rax
660
#endif
661
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
662
sub $7, %r8
663
lea 7(%rdi), %rdi
664
jnz L(StrncpyFillTailWithZero)
665
#endif
666
RETURN
667
668
.p2align 4
669
L(Exit8):
670
mov (%rsi), %rdx
671
mov %rdx, (%rdi)
672
#ifdef USE_AS_STPCPY
673
lea 7(%rdi), %rax
674
#endif
675
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
676
sub $8, %r8
677
lea 8(%rdi), %rdi
678
jnz L(StrncpyFillTailWithZero)
679
#endif
680
RETURN
681
682
.p2align 4
683
L(Exit9):
684
mov (%rsi), %rcx
685
mov %dh, 8(%rdi)
686
mov %rcx, (%rdi)
687
#ifdef USE_AS_STPCPY
688
lea 8(%rdi), %rax
689
#endif
690
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
691
sub $9, %r8
692
lea 9(%rdi), %rdi
693
jnz L(StrncpyFillTailWithZero)
694
#endif
695
RETURN
696
697
.p2align 4
698
L(Exit10):
699
mov (%rsi), %rcx
700
mov 8(%rsi), %dx
701
mov %rcx, (%rdi)
702
mov %dx, 8(%rdi)
703
#ifdef USE_AS_STPCPY
704
lea 9(%rdi), %rax
705
#endif
706
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
707
sub $10, %r8
708
lea 10(%rdi), %rdi
709
jnz L(StrncpyFillTailWithZero)
710
#endif
711
RETURN
712
713
.p2align 4
714
L(Exit11):
715
mov (%rsi), %rcx
716
mov 7(%rsi), %edx
717
mov %rcx, (%rdi)
718
mov %edx, 7(%rdi)
719
#ifdef USE_AS_STPCPY
720
lea 10(%rdi), %rax
721
#endif
722
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
723
sub $11, %r8
724
lea 11(%rdi), %rdi
725
jnz L(StrncpyFillTailWithZero)
726
#endif
727
RETURN
728
729
.p2align 4
730
L(Exit12):
731
mov (%rsi), %rcx
732
mov 8(%rsi), %edx
733
mov %rcx, (%rdi)
734
mov %edx, 8(%rdi)
735
#ifdef USE_AS_STPCPY
736
lea 11(%rdi), %rax
737
#endif
738
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
739
sub $12, %r8
740
lea 12(%rdi), %rdi
741
jnz L(StrncpyFillTailWithZero)
742
#endif
743
RETURN
744
745
.p2align 4
746
L(Exit13):
747
mov (%rsi), %rcx
748
mov 5(%rsi), %rdx
749
mov %rcx, (%rdi)
750
mov %rdx, 5(%rdi)
751
#ifdef USE_AS_STPCPY
752
lea 12(%rdi), %rax
753
#endif
754
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
755
sub $13, %r8
756
lea 13(%rdi), %rdi
757
jnz L(StrncpyFillTailWithZero)
758
#endif
759
RETURN
760
761
.p2align 4
762
L(Exit14):
763
mov (%rsi), %rcx
764
mov 6(%rsi), %rdx
765
mov %rcx, (%rdi)
766
mov %rdx, 6(%rdi)
767
#ifdef USE_AS_STPCPY
768
lea 13(%rdi), %rax
769
#endif
770
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
771
sub $14, %r8
772
lea 14(%rdi), %rdi
773
jnz L(StrncpyFillTailWithZero)
774
#endif
775
RETURN
776
777
.p2align 4
778
L(Exit15):
779
mov (%rsi), %rcx
780
mov 7(%rsi), %rdx
781
mov %rcx, (%rdi)
782
mov %rdx, 7(%rdi)
783
#ifdef USE_AS_STPCPY
784
lea 14(%rdi), %rax
785
#endif
786
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
787
sub $15, %r8
788
lea 15(%rdi), %rdi
789
jnz L(StrncpyFillTailWithZero)
790
#endif
791
RETURN
792
793
.p2align 4
794
L(Exit16):
795
movdqu (%rsi), %xmm0
796
movdqu %xmm0, (%rdi)
797
#ifdef USE_AS_STPCPY
798
lea 15(%rdi), %rax
799
#endif
800
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
801
sub $16, %r8
802
lea 16(%rdi), %rdi
803
jnz L(StrncpyFillTailWithZero)
804
#endif
805
RETURN
806
807
.p2align 4
808
L(Exit17):
809
movdqu (%rsi), %xmm0
810
movdqu %xmm0, (%rdi)
811
mov %dh, 16(%rdi)
812
#ifdef USE_AS_STPCPY
813
lea 16(%rdi), %rax
814
#endif
815
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
816
sub $17, %r8
817
lea 17(%rdi), %rdi
818
jnz L(StrncpyFillTailWithZero)
819
#endif
820
RETURN
821
822
.p2align 4
823
L(Exit18):
824
movdqu (%rsi), %xmm0
825
mov 16(%rsi), %cx
826
movdqu %xmm0, (%rdi)
827
mov %cx, 16(%rdi)
828
#ifdef USE_AS_STPCPY
829
lea 17(%rdi), %rax
830
#endif
831
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
832
sub $18, %r8
833
lea 18(%rdi), %rdi
834
jnz L(StrncpyFillTailWithZero)
835
#endif
836
RETURN
837
838
.p2align 4
839
L(Exit19):
840
movdqu (%rsi), %xmm0
841
mov 15(%rsi), %ecx
842
movdqu %xmm0, (%rdi)
843
mov %ecx, 15(%rdi)
844
#ifdef USE_AS_STPCPY
845
lea 18(%rdi), %rax
846
#endif
847
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
848
sub $19, %r8
849
lea 19(%rdi), %rdi
850
jnz L(StrncpyFillTailWithZero)
851
#endif
852
RETURN
853
854
.p2align 4
855
L(Exit20):
856
movdqu (%rsi), %xmm0
857
mov 16(%rsi), %ecx
858
movdqu %xmm0, (%rdi)
859
mov %ecx, 16(%rdi)
860
#ifdef USE_AS_STPCPY
861
lea 19(%rdi), %rax
862
#endif
863
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
864
sub $20, %r8
865
lea 20(%rdi), %rdi
866
jnz L(StrncpyFillTailWithZero)
867
#endif
868
RETURN
869
870
.p2align 4
871
L(Exit21):
872
movdqu (%rsi), %xmm0
873
mov 16(%rsi), %ecx
874
movdqu %xmm0, (%rdi)
875
mov %ecx, 16(%rdi)
876
mov %dh, 20(%rdi)
877
#ifdef USE_AS_STPCPY
878
lea 20(%rdi), %rax
879
#endif
880
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
881
sub $21, %r8
882
lea 21(%rdi), %rdi
883
jnz L(StrncpyFillTailWithZero)
884
#endif
885
RETURN
886
887
.p2align 4
888
L(Exit22):
889
movdqu (%rsi), %xmm0
890
mov 14(%rsi), %rcx
891
movdqu %xmm0, (%rdi)
892
mov %rcx, 14(%rdi)
893
#ifdef USE_AS_STPCPY
894
lea 21(%rdi), %rax
895
#endif
896
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
897
sub $22, %r8
898
lea 22(%rdi), %rdi
899
jnz L(StrncpyFillTailWithZero)
900
#endif
901
RETURN
902
903
.p2align 4
904
L(Exit23):
905
movdqu (%rsi), %xmm0
906
mov 15(%rsi), %rcx
907
movdqu %xmm0, (%rdi)
908
mov %rcx, 15(%rdi)
909
#ifdef USE_AS_STPCPY
910
lea 22(%rdi), %rax
911
#endif
912
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
913
sub $23, %r8
914
lea 23(%rdi), %rdi
915
jnz L(StrncpyFillTailWithZero)
916
#endif
917
RETURN
918
919
.p2align 4
920
L(Exit24):
921
movdqu (%rsi), %xmm0
922
mov 16(%rsi), %rcx
923
movdqu %xmm0, (%rdi)
924
mov %rcx, 16(%rdi)
925
#ifdef USE_AS_STPCPY
926
lea 23(%rdi), %rax
927
#endif
928
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
929
sub $24, %r8
930
lea 24(%rdi), %rdi
931
jnz L(StrncpyFillTailWithZero)
932
#endif
933
RETURN
934
935
.p2align 4
936
L(Exit25):
937
movdqu (%rsi), %xmm0
938
mov 16(%rsi), %rcx
939
movdqu %xmm0, (%rdi)
940
mov %rcx, 16(%rdi)
941
mov %dh, 24(%rdi)
942
#ifdef USE_AS_STPCPY
943
lea 24(%rdi), %rax
944
#endif
945
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
946
sub $25, %r8
947
lea 25(%rdi), %rdi
948
jnz L(StrncpyFillTailWithZero)
949
#endif
950
RETURN
951
952
.p2align 4
953
L(Exit26):
954
movdqu (%rsi), %xmm0
955
mov 16(%rsi), %rdx
956
mov 24(%rsi), %cx
957
movdqu %xmm0, (%rdi)
958
mov %rdx, 16(%rdi)
959
mov %cx, 24(%rdi)
960
#ifdef USE_AS_STPCPY
961
lea 25(%rdi), %rax
962
#endif
963
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
964
sub $26, %r8
965
lea 26(%rdi), %rdi
966
jnz L(StrncpyFillTailWithZero)
967
#endif
968
RETURN
969
970
.p2align 4
971
L(Exit27):
972
movdqu (%rsi), %xmm0
973
mov 16(%rsi), %rdx
974
mov 23(%rsi), %ecx
975
movdqu %xmm0, (%rdi)
976
mov %rdx, 16(%rdi)
977
mov %ecx, 23(%rdi)
978
#ifdef USE_AS_STPCPY
979
lea 26(%rdi), %rax
980
#endif
981
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
982
sub $27, %r8
983
lea 27(%rdi), %rdi
984
jnz L(StrncpyFillTailWithZero)
985
#endif
986
RETURN
987
988
.p2align 4
989
L(Exit28):
990
movdqu (%rsi), %xmm0
991
mov 16(%rsi), %rdx
992
mov 24(%rsi), %ecx
993
movdqu %xmm0, (%rdi)
994
mov %rdx, 16(%rdi)
995
mov %ecx, 24(%rdi)
996
#ifdef USE_AS_STPCPY
997
lea 27(%rdi), %rax
998
#endif
999
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
1000
sub $28, %r8
1001
lea 28(%rdi), %rdi
1002
jnz L(StrncpyFillTailWithZero)
1003
#endif
1004
RETURN
1005
1006
.p2align 4
1007
L(Exit29):
1008
movdqu (%rsi), %xmm0
1009
movdqu 13(%rsi), %xmm2
1010
movdqu %xmm0, (%rdi)
1011
movdqu %xmm2, 13(%rdi)
1012
#ifdef USE_AS_STPCPY
1013
lea 28(%rdi), %rax
1014
#endif
1015
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
1016
sub $29, %r8
1017
lea 29(%rdi), %rdi
1018
jnz L(StrncpyFillTailWithZero)
1019
#endif
1020
RETURN
1021
1022
.p2align 4
1023
L(Exit30):
1024
movdqu (%rsi), %xmm0
1025
movdqu 14(%rsi), %xmm2
1026
movdqu %xmm0, (%rdi)
1027
movdqu %xmm2, 14(%rdi)
1028
#ifdef USE_AS_STPCPY
1029
lea 29(%rdi), %rax
1030
#endif
1031
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
1032
sub $30, %r8
1033
lea 30(%rdi), %rdi
1034
jnz L(StrncpyFillTailWithZero)
1035
#endif
1036
RETURN
1037
1038
.p2align 4
1039
L(Exit31):
1040
movdqu (%rsi), %xmm0
1041
movdqu 15(%rsi), %xmm2
1042
movdqu %xmm0, (%rdi)
1043
movdqu %xmm2, 15(%rdi)
1044
#ifdef USE_AS_STPCPY
1045
lea 30(%rdi), %rax
1046
#endif
1047
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
1048
sub $31, %r8
1049
lea 31(%rdi), %rdi
1050
jnz L(StrncpyFillTailWithZero)
1051
#endif
1052
RETURN
1053
1054
.p2align 4
1055
L(Exit32):
1056
movdqu (%rsi), %xmm0
1057
movdqu 16(%rsi), %xmm2
1058
movdqu %xmm0, (%rdi)
1059
movdqu %xmm2, 16(%rdi)
1060
#ifdef USE_AS_STPCPY
1061
lea 31(%rdi), %rax
1062
#endif
1063
#if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
1064
sub $32, %r8
1065
lea 32(%rdi), %rdi
1066
jnz L(StrncpyFillTailWithZero)
1067
#endif
1068
RETURN
1069
1070
#ifdef USE_AS_STRNCPY
1071
1072
.p2align 4
1073
L(StrncpyExit0):
1074
#ifdef USE_AS_STPCPY
1075
mov %rdi, %rax
1076
#endif
1077
#ifdef USE_AS_STRCAT
1078
xor %ch, %ch
1079
movb %ch, (%rdi)
1080
#endif
1081
RETURN
1082
1083
.p2align 4
1084
L(StrncpyExit1):
1085
mov (%rsi), %dl
1086
mov %dl, (%rdi)
1087
#ifdef USE_AS_STPCPY
1088
lea 1(%rdi), %rax
1089
#endif
1090
#ifdef USE_AS_STRCAT
1091
xor %ch, %ch
1092
movb %ch, 1(%rdi)
1093
#endif
1094
RETURN
1095
1096
.p2align 4
1097
L(StrncpyExit2):
1098
mov (%rsi), %dx
1099
mov %dx, (%rdi)
1100
#ifdef USE_AS_STPCPY
1101
lea 2(%rdi), %rax
1102
#endif
1103
#ifdef USE_AS_STRCAT
1104
xor %ch, %ch
1105
movb %ch, 2(%rdi)
1106
#endif
1107
RETURN
1108
1109
.p2align 4
1110
L(StrncpyExit3):
1111
mov (%rsi), %cx
1112
mov 2(%rsi), %dl
1113
mov %cx, (%rdi)
1114
mov %dl, 2(%rdi)
1115
#ifdef USE_AS_STPCPY
1116
lea 3(%rdi), %rax
1117
#endif
1118
#ifdef USE_AS_STRCAT
1119
xor %ch, %ch
1120
movb %ch, 3(%rdi)
1121
#endif
1122
RETURN
1123
1124
.p2align 4
1125
L(StrncpyExit4):
1126
mov (%rsi), %edx
1127
mov %edx, (%rdi)
1128
#ifdef USE_AS_STPCPY
1129
lea 4(%rdi), %rax
1130
#endif
1131
#ifdef USE_AS_STRCAT
1132
xor %ch, %ch
1133
movb %ch, 4(%rdi)
1134
#endif
1135
RETURN
1136
1137
.p2align 4
1138
L(StrncpyExit5):
1139
mov (%rsi), %ecx
1140
mov 4(%rsi), %dl
1141
mov %ecx, (%rdi)
1142
mov %dl, 4(%rdi)
1143
#ifdef USE_AS_STPCPY
1144
lea 5(%rdi), %rax
1145
#endif
1146
#ifdef USE_AS_STRCAT
1147
xor %ch, %ch
1148
movb %ch, 5(%rdi)
1149
#endif
1150
RETURN
1151
1152
.p2align 4
1153
L(StrncpyExit6):
1154
mov (%rsi), %ecx
1155
mov 4(%rsi), %dx
1156
mov %ecx, (%rdi)
1157
mov %dx, 4(%rdi)
1158
#ifdef USE_AS_STPCPY
1159
lea 6(%rdi), %rax
1160
#endif
1161
#ifdef USE_AS_STRCAT
1162
xor %ch, %ch
1163
movb %ch, 6(%rdi)
1164
#endif
1165
RETURN
1166
1167
.p2align 4
1168
L(StrncpyExit7):
1169
mov (%rsi), %ecx
1170
mov 3(%rsi), %edx
1171
mov %ecx, (%rdi)
1172
mov %edx, 3(%rdi)
1173
#ifdef USE_AS_STPCPY
1174
lea 7(%rdi), %rax
1175
#endif
1176
#ifdef USE_AS_STRCAT
1177
xor %ch, %ch
1178
movb %ch, 7(%rdi)
1179
#endif
1180
RETURN
1181
1182
.p2align 4
1183
L(StrncpyExit8):
1184
mov (%rsi), %rdx
1185
mov %rdx, (%rdi)
1186
#ifdef USE_AS_STPCPY
1187
lea 8(%rdi), %rax
1188
#endif
1189
#ifdef USE_AS_STRCAT
1190
xor %ch, %ch
1191
movb %ch, 8(%rdi)
1192
#endif
1193
RETURN
1194
1195
.p2align 4
1196
L(StrncpyExit9):
1197
mov (%rsi), %rcx
1198
mov 8(%rsi), %dl
1199
mov %rcx, (%rdi)
1200
mov %dl, 8(%rdi)
1201
#ifdef USE_AS_STPCPY
1202
lea 9(%rdi), %rax
1203
#endif
1204
#ifdef USE_AS_STRCAT
1205
xor %ch, %ch
1206
movb %ch, 9(%rdi)
1207
#endif
1208
RETURN
1209
1210
.p2align 4
1211
L(StrncpyExit10):
1212
mov (%rsi), %rcx
1213
mov 8(%rsi), %dx
1214
mov %rcx, (%rdi)
1215
mov %dx, 8(%rdi)
1216
#ifdef USE_AS_STPCPY
1217
lea 10(%rdi), %rax
1218
#endif
1219
#ifdef USE_AS_STRCAT
1220
xor %ch, %ch
1221
movb %ch, 10(%rdi)
1222
#endif
1223
RETURN
1224
1225
.p2align 4
1226
L(StrncpyExit11):
1227
mov (%rsi), %rcx
1228
mov 7(%rsi), %edx
1229
mov %rcx, (%rdi)
1230
mov %edx, 7(%rdi)
1231
#ifdef USE_AS_STPCPY
1232
lea 11(%rdi), %rax
1233
#endif
1234
#ifdef USE_AS_STRCAT
1235
xor %ch, %ch
1236
movb %ch, 11(%rdi)
1237
#endif
1238
RETURN
1239
1240
.p2align 4
1241
L(StrncpyExit12):
1242
mov (%rsi), %rcx
1243
mov 8(%rsi), %edx
1244
mov %rcx, (%rdi)
1245
mov %edx, 8(%rdi)
1246
#ifdef USE_AS_STPCPY
1247
lea 12(%rdi), %rax
1248
#endif
1249
#ifdef USE_AS_STRCAT
1250
xor %ch, %ch
1251
movb %ch, 12(%rdi)
1252
#endif
1253
RETURN
1254
1255
.p2align 4
1256
L(StrncpyExit13):
1257
mov (%rsi), %rcx
1258
mov 5(%rsi), %rdx
1259
mov %rcx, (%rdi)
1260
mov %rdx, 5(%rdi)
1261
#ifdef USE_AS_STPCPY
1262
lea 13(%rdi), %rax
1263
#endif
1264
#ifdef USE_AS_STRCAT
1265
xor %ch, %ch
1266
movb %ch, 13(%rdi)
1267
#endif
1268
RETURN
1269
1270
.p2align 4
1271
L(StrncpyExit14):
1272
mov (%rsi), %rcx
1273
mov 6(%rsi), %rdx
1274
mov %rcx, (%rdi)
1275
mov %rdx, 6(%rdi)
1276
#ifdef USE_AS_STPCPY
1277
lea 14(%rdi), %rax
1278
#endif
1279
#ifdef USE_AS_STRCAT
1280
xor %ch, %ch
1281
movb %ch, 14(%rdi)
1282
#endif
1283
RETURN
1284
1285
.p2align 4
1286
L(StrncpyExit15):
1287
mov (%rsi), %rcx
1288
mov 7(%rsi), %rdx
1289
mov %rcx, (%rdi)
1290
mov %rdx, 7(%rdi)
1291
#ifdef USE_AS_STPCPY
1292
lea 15(%rdi), %rax
1293
#endif
1294
#ifdef USE_AS_STRCAT
1295
xor %ch, %ch
1296
movb %ch, 15(%rdi)
1297
#endif
1298
RETURN
1299
1300
.p2align 4
1301
L(StrncpyExit16):
1302
movdqu (%rsi), %xmm0
1303
movdqu %xmm0, (%rdi)
1304
#ifdef USE_AS_STPCPY
1305
lea 16(%rdi), %rax
1306
#endif
1307
#ifdef USE_AS_STRCAT
1308
xor %ch, %ch
1309
movb %ch, 16(%rdi)
1310
#endif
1311
RETURN
1312
1313
.p2align 4
1314
L(StrncpyExit17):
1315
movdqu (%rsi), %xmm0
1316
mov 16(%rsi), %cl
1317
movdqu %xmm0, (%rdi)
1318
mov %cl, 16(%rdi)
1319
#ifdef USE_AS_STPCPY
1320
lea 17(%rdi), %rax
1321
#endif
1322
#ifdef USE_AS_STRCAT
1323
xor %ch, %ch
1324
movb %ch, 17(%rdi)
1325
#endif
1326
RETURN
1327
1328
.p2align 4
1329
L(StrncpyExit18):
1330
movdqu (%rsi), %xmm0
1331
mov 16(%rsi), %cx
1332
movdqu %xmm0, (%rdi)
1333
mov %cx, 16(%rdi)
1334
#ifdef USE_AS_STPCPY
1335
lea 18(%rdi), %rax
1336
#endif
1337
#ifdef USE_AS_STRCAT
1338
xor %ch, %ch
1339
movb %ch, 18(%rdi)
1340
#endif
1341
RETURN
1342
1343
.p2align 4
1344
L(StrncpyExit19):
1345
movdqu (%rsi), %xmm0
1346
mov 15(%rsi), %ecx
1347
movdqu %xmm0, (%rdi)
1348
mov %ecx, 15(%rdi)
1349
#ifdef USE_AS_STPCPY
1350
lea 19(%rdi), %rax
1351
#endif
1352
#ifdef USE_AS_STRCAT
1353
xor %ch, %ch
1354
movb %ch, 19(%rdi)
1355
#endif
1356
RETURN
1357
1358
.p2align 4
1359
L(StrncpyExit20):
1360
movdqu (%rsi), %xmm0
1361
mov 16(%rsi), %ecx
1362
movdqu %xmm0, (%rdi)
1363
mov %ecx, 16(%rdi)
1364
#ifdef USE_AS_STPCPY
1365
lea 20(%rdi), %rax
1366
#endif
1367
#ifdef USE_AS_STRCAT
1368
xor %ch, %ch
1369
movb %ch, 20(%rdi)
1370
#endif
1371
RETURN
1372
1373
.p2align 4
1374
L(StrncpyExit21):
1375
movdqu (%rsi), %xmm0
1376
mov 16(%rsi), %ecx
1377
mov 20(%rsi), %dl
1378
movdqu %xmm0, (%rdi)
1379
mov %ecx, 16(%rdi)
1380
mov %dl, 20(%rdi)
1381
#ifdef USE_AS_STPCPY
1382
lea 21(%rdi), %rax
1383
#endif
1384
#ifdef USE_AS_STRCAT
1385
xor %ch, %ch
1386
movb %ch, 21(%rdi)
1387
#endif
1388
RETURN
1389
1390
.p2align 4
1391
L(StrncpyExit22):
1392
movdqu (%rsi), %xmm0
1393
mov 14(%rsi), %rcx
1394
movdqu %xmm0, (%rdi)
1395
mov %rcx, 14(%rdi)
1396
#ifdef USE_AS_STPCPY
1397
lea 22(%rdi), %rax
1398
#endif
1399
#ifdef USE_AS_STRCAT
1400
xor %ch, %ch
1401
movb %ch, 22(%rdi)
1402
#endif
1403
RETURN
1404
1405
.p2align 4
1406
L(StrncpyExit23):
1407
movdqu (%rsi), %xmm0
1408
mov 15(%rsi), %rcx
1409
movdqu %xmm0, (%rdi)
1410
mov %rcx, 15(%rdi)
1411
#ifdef USE_AS_STPCPY
1412
lea 23(%rdi), %rax
1413
#endif
1414
#ifdef USE_AS_STRCAT
1415
xor %ch, %ch
1416
movb %ch, 23(%rdi)
1417
#endif
1418
RETURN
1419
1420
.p2align 4
1421
L(StrncpyExit24):
1422
movdqu (%rsi), %xmm0
1423
mov 16(%rsi), %rcx
1424
movdqu %xmm0, (%rdi)
1425
mov %rcx, 16(%rdi)
1426
#ifdef USE_AS_STPCPY
1427
lea 24(%rdi), %rax
1428
#endif
1429
#ifdef USE_AS_STRCAT
1430
xor %ch, %ch
1431
movb %ch, 24(%rdi)
1432
#endif
1433
RETURN
1434
1435
.p2align 4
1436
L(StrncpyExit25):
1437
movdqu (%rsi), %xmm0
1438
mov 16(%rsi), %rdx
1439
mov 24(%rsi), %cl
1440
movdqu %xmm0, (%rdi)
1441
mov %rdx, 16(%rdi)
1442
mov %cl, 24(%rdi)
1443
#ifdef USE_AS_STPCPY
1444
lea 25(%rdi), %rax
1445
#endif
1446
#ifdef USE_AS_STRCAT
1447
xor %ch, %ch
1448
movb %ch, 25(%rdi)
1449
#endif
1450
RETURN
1451
1452
.p2align 4
1453
L(StrncpyExit26):
1454
movdqu (%rsi), %xmm0
1455
mov 16(%rsi), %rdx
1456
mov 24(%rsi), %cx
1457
movdqu %xmm0, (%rdi)
1458
mov %rdx, 16(%rdi)
1459
mov %cx, 24(%rdi)
1460
#ifdef USE_AS_STPCPY
1461
lea 26(%rdi), %rax
1462
#endif
1463
#ifdef USE_AS_STRCAT
1464
xor %ch, %ch
1465
movb %ch, 26(%rdi)
1466
#endif
1467
RETURN
1468
1469
.p2align 4
1470
L(StrncpyExit27):
1471
movdqu (%rsi), %xmm0
1472
mov 16(%rsi), %rdx
1473
mov 23(%rsi), %ecx
1474
movdqu %xmm0, (%rdi)
1475
mov %rdx, 16(%rdi)
1476
mov %ecx, 23(%rdi)
1477
#ifdef USE_AS_STPCPY
1478
lea 27(%rdi), %rax
1479
#endif
1480
#ifdef USE_AS_STRCAT
1481
xor %ch, %ch
1482
movb %ch, 27(%rdi)
1483
#endif
1484
RETURN
1485
1486
.p2align 4
1487
L(StrncpyExit28):
1488
movdqu (%rsi), %xmm0
1489
mov 16(%rsi), %rdx
1490
mov 24(%rsi), %ecx
1491
movdqu %xmm0, (%rdi)
1492
mov %rdx, 16(%rdi)
1493
mov %ecx, 24(%rdi)
1494
#ifdef USE_AS_STPCPY
1495
lea 28(%rdi), %rax
1496
#endif
1497
#ifdef USE_AS_STRCAT
1498
xor %ch, %ch
1499
movb %ch, 28(%rdi)
1500
#endif
1501
RETURN
1502
1503
.p2align 4
1504
L(StrncpyExit29):
1505
movdqu (%rsi), %xmm0
1506
movdqu 13(%rsi), %xmm2
1507
movdqu %xmm0, (%rdi)
1508
movdqu %xmm2, 13(%rdi)
1509
#ifdef USE_AS_STPCPY
1510
lea 29(%rdi), %rax
1511
#endif
1512
#ifdef USE_AS_STRCAT
1513
xor %ch, %ch
1514
movb %ch, 29(%rdi)
1515
#endif
1516
RETURN
1517
1518
.p2align 4
1519
L(StrncpyExit30):
1520
movdqu (%rsi), %xmm0
1521
movdqu 14(%rsi), %xmm2
1522
movdqu %xmm0, (%rdi)
1523
movdqu %xmm2, 14(%rdi)
1524
#ifdef USE_AS_STPCPY
1525
lea 30(%rdi), %rax
1526
#endif
1527
#ifdef USE_AS_STRCAT
1528
xor %ch, %ch
1529
movb %ch, 30(%rdi)
1530
#endif
1531
RETURN
1532
1533
.p2align 4
1534
L(StrncpyExit31):
1535
movdqu (%rsi), %xmm0
1536
movdqu 15(%rsi), %xmm2
1537
movdqu %xmm0, (%rdi)
1538
movdqu %xmm2, 15(%rdi)
1539
#ifdef USE_AS_STPCPY
1540
lea 31(%rdi), %rax
1541
#endif
1542
#ifdef USE_AS_STRCAT
1543
xor %ch, %ch
1544
movb %ch, 31(%rdi)
1545
#endif
1546
RETURN
1547
1548
.p2align 4
1549
L(StrncpyExit32):
1550
movdqu (%rsi), %xmm0
1551
movdqu 16(%rsi), %xmm2
1552
movdqu %xmm0, (%rdi)
1553
movdqu %xmm2, 16(%rdi)
1554
#ifdef USE_AS_STPCPY
1555
lea 32(%rdi), %rax
1556
#endif
1557
#ifdef USE_AS_STRCAT
1558
xor %ch, %ch
1559
movb %ch, 32(%rdi)
1560
#endif
1561
RETURN
1562
1563
.p2align 4
1564
L(StrncpyExit33):
1565
movdqu (%rsi), %xmm0
1566
movdqu 16(%rsi), %xmm2
1567
mov 32(%rsi), %cl
1568
movdqu %xmm0, (%rdi)
1569
movdqu %xmm2, 16(%rdi)
1570
mov %cl, 32(%rdi)
1571
#ifdef USE_AS_STRCAT
1572
xor %ch, %ch
1573
movb %ch, 33(%rdi)
1574
#endif
1575
RETURN
1576
1577
#ifndef USE_AS_STRCAT
1578
1579
.p2align 4
1580
L(Fill0):
1581
RETURN
1582
1583
.p2align 4
1584
L(Fill1):
1585
mov %dl, (%rdi)
1586
RETURN
1587
1588
.p2align 4
1589
L(Fill2):
1590
mov %dx, (%rdi)
1591
RETURN
1592
1593
.p2align 4
1594
L(Fill3):
1595
mov %edx, -1(%rdi)
1596
RETURN
1597
1598
.p2align 4
1599
L(Fill4):
1600
mov %edx, (%rdi)
1601
RETURN
1602
1603
.p2align 4
1604
L(Fill5):
1605
mov %edx, (%rdi)
1606
mov %dl, 4(%rdi)
1607
RETURN
1608
1609
.p2align 4
1610
L(Fill6):
1611
mov %edx, (%rdi)
1612
mov %dx, 4(%rdi)
1613
RETURN
1614
1615
.p2align 4
1616
L(Fill7):
1617
mov %rdx, -1(%rdi)
1618
RETURN
1619
1620
.p2align 4
1621
L(Fill8):
1622
mov %rdx, (%rdi)
1623
RETURN
1624
1625
.p2align 4
1626
L(Fill9):
1627
mov %rdx, (%rdi)
1628
mov %dl, 8(%rdi)
1629
RETURN
1630
1631
.p2align 4
1632
L(Fill10):
1633
mov %rdx, (%rdi)
1634
mov %dx, 8(%rdi)
1635
RETURN
1636
1637
.p2align 4
1638
L(Fill11):
1639
mov %rdx, (%rdi)
1640
mov %edx, 7(%rdi)
1641
RETURN
1642
1643
.p2align 4
1644
L(Fill12):
1645
mov %rdx, (%rdi)
1646
mov %edx, 8(%rdi)
1647
RETURN
1648
1649
.p2align 4
1650
L(Fill13):
1651
mov %rdx, (%rdi)
1652
mov %rdx, 5(%rdi)
1653
RETURN
1654
1655
.p2align 4
1656
L(Fill14):
1657
mov %rdx, (%rdi)
1658
mov %rdx, 6(%rdi)
1659
RETURN
1660
1661
.p2align 4
1662
L(Fill15):
1663
movdqu %xmm0, -1(%rdi)
1664
RETURN
1665
1666
.p2align 4
1667
L(Fill16):
1668
movdqu %xmm0, (%rdi)
1669
RETURN
1670
1671
.p2align 4
1672
L(CopyFrom1To16BytesUnalignedXmm2):
1673
movdqu %xmm2, (%rdi, %rcx)
1674
1675
.p2align 4
1676
L(CopyFrom1To16BytesXmmExit):
1677
bsf %rdx, %rdx
1678
add $15, %r8
1679
add %rcx, %rdi
1680
#ifdef USE_AS_STPCPY
1681
lea (%rdi, %rdx), %rax
1682
#endif
1683
sub %rdx, %r8
1684
lea 1(%rdi, %rdx), %rdi
1685
1686
.p2align 4
1687
L(StrncpyFillTailWithZero):
1688
pxor %xmm0, %xmm0
1689
xor %rdx, %rdx
1690
sub $16, %r8
1691
jbe L(StrncpyFillExit)
1692
1693
movdqu %xmm0, (%rdi)
1694
add $16, %rdi
1695
1696
mov %rdi, %rsi
1697
and $0xf, %rsi
1698
sub %rsi, %rdi
1699
add %rsi, %r8
1700
sub $64, %r8
1701
jb L(StrncpyFillLess64)
1702
1703
L(StrncpyFillLoopMovdqa):
1704
movdqa %xmm0, (%rdi)
1705
movdqa %xmm0, 16(%rdi)
1706
movdqa %xmm0, 32(%rdi)
1707
movdqa %xmm0, 48(%rdi)
1708
add $64, %rdi
1709
sub $64, %r8
1710
jae L(StrncpyFillLoopMovdqa)
1711
1712
L(StrncpyFillLess64):
1713
add $32, %r8
1714
jl L(StrncpyFillLess32)
1715
movdqa %xmm0, (%rdi)
1716
movdqa %xmm0, 16(%rdi)
1717
add $32, %rdi
1718
sub $16, %r8
1719
jl L(StrncpyFillExit)
1720
movdqa %xmm0, (%rdi)
1721
add $16, %rdi
1722
BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)
1723
1724
L(StrncpyFillLess32):
1725
add $16, %r8
1726
jl L(StrncpyFillExit)
1727
movdqa %xmm0, (%rdi)
1728
add $16, %rdi
1729
BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)
1730
1731
L(StrncpyFillExit):
1732
add $16, %r8
1733
BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)
1734
1735
/* end of ifndef USE_AS_STRCAT */
1736
#endif
1737
1738
.p2align 4
1739
L(UnalignedLeaveCase2OrCase3):
1740
test %rdx, %rdx
1741
jnz L(Unaligned64LeaveCase2)
1742
L(Unaligned64LeaveCase3):
1743
lea 64(%r8), %rcx
1744
and $-16, %rcx
1745
add $48, %r8
1746
jl L(CopyFrom1To16BytesCase3)
1747
movdqu %xmm4, (%rdi)
1748
sub $16, %r8
1749
jb L(CopyFrom1To16BytesCase3)
1750
movdqu %xmm5, 16(%rdi)
1751
sub $16, %r8
1752
jb L(CopyFrom1To16BytesCase3)
1753
movdqu %xmm6, 32(%rdi)
1754
sub $16, %r8
1755
jb L(CopyFrom1To16BytesCase3)
1756
movdqu %xmm7, 48(%rdi)
1757
#ifdef USE_AS_STPCPY
1758
lea 64(%rdi), %rax
1759
#endif
1760
#ifdef USE_AS_STRCAT
1761
xor %ch, %ch
1762
movb %ch, 64(%rdi)
1763
#endif
1764
RETURN
1765
1766
.p2align 4
1767
L(Unaligned64LeaveCase2):
1768
xor %rcx, %rcx
1769
pcmpeqb %xmm4, %xmm0
1770
pmovmskb %xmm0, %rdx
1771
add $48, %r8
1772
jle L(CopyFrom1To16BytesCase2OrCase3)
1773
test %rdx, %rdx
1774
#ifndef USE_AS_STRCAT
1775
jnz L(CopyFrom1To16BytesUnalignedXmm4)
1776
#else
1777
jnz L(CopyFrom1To16Bytes)
1778
#endif
1779
pcmpeqb %xmm5, %xmm0
1780
pmovmskb %xmm0, %rdx
1781
movdqu %xmm4, (%rdi)
1782
add $16, %rcx
1783
sub $16, %r8
1784
jbe L(CopyFrom1To16BytesCase2OrCase3)
1785
test %rdx, %rdx
1786
#ifndef USE_AS_STRCAT
1787
jnz L(CopyFrom1To16BytesUnalignedXmm5)
1788
#else
1789
jnz L(CopyFrom1To16Bytes)
1790
#endif
1791
1792
pcmpeqb %xmm6, %xmm0
1793
pmovmskb %xmm0, %rdx
1794
movdqu %xmm5, 16(%rdi)
1795
add $16, %rcx
1796
sub $16, %r8
1797
jbe L(CopyFrom1To16BytesCase2OrCase3)
1798
test %rdx, %rdx
1799
#ifndef USE_AS_STRCAT
1800
jnz L(CopyFrom1To16BytesUnalignedXmm6)
1801
#else
1802
jnz L(CopyFrom1To16Bytes)
1803
#endif
1804
1805
pcmpeqb %xmm7, %xmm0
1806
pmovmskb %xmm0, %rdx
1807
movdqu %xmm6, 32(%rdi)
1808
lea 16(%rdi, %rcx), %rdi
1809
lea 16(%rsi, %rcx), %rsi
1810
bsf %rdx, %rdx
1811
cmp %r8, %rdx
1812
jb L(CopyFrom1To16BytesExit)
1813
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
1814
1815
.p2align 4
1816
L(ExitZero):
1817
#ifndef USE_AS_STRCAT
1818
mov %rdi, %rax
1819
#endif
1820
RETURN
1821
1822
#endif
1823
1824
#ifndef USE_AS_STRCAT
1825
END (STRCPY)
1826
#else
1827
END (STRCAT)
1828
#endif
1829
.p2align 4
1830
.section .rodata
1831
L(ExitTable):
1832
.int JMPTBL(L(Exit1), L(ExitTable))
1833
.int JMPTBL(L(Exit2), L(ExitTable))
1834
.int JMPTBL(L(Exit3), L(ExitTable))
1835
.int JMPTBL(L(Exit4), L(ExitTable))
1836
.int JMPTBL(L(Exit5), L(ExitTable))
1837
.int JMPTBL(L(Exit6), L(ExitTable))
1838
.int JMPTBL(L(Exit7), L(ExitTable))
1839
.int JMPTBL(L(Exit8), L(ExitTable))
1840
.int JMPTBL(L(Exit9), L(ExitTable))
1841
.int JMPTBL(L(Exit10), L(ExitTable))
1842
.int JMPTBL(L(Exit11), L(ExitTable))
1843
.int JMPTBL(L(Exit12), L(ExitTable))
1844
.int JMPTBL(L(Exit13), L(ExitTable))
1845
.int JMPTBL(L(Exit14), L(ExitTable))
1846
.int JMPTBL(L(Exit15), L(ExitTable))
1847
.int JMPTBL(L(Exit16), L(ExitTable))
1848
.int JMPTBL(L(Exit17), L(ExitTable))
1849
.int JMPTBL(L(Exit18), L(ExitTable))
1850
.int JMPTBL(L(Exit19), L(ExitTable))
1851
.int JMPTBL(L(Exit20), L(ExitTable))
1852
.int JMPTBL(L(Exit21), L(ExitTable))
1853
.int JMPTBL(L(Exit22), L(ExitTable))
1854
.int JMPTBL(L(Exit23), L(ExitTable))
1855
.int JMPTBL(L(Exit24), L(ExitTable))
1856
.int JMPTBL(L(Exit25), L(ExitTable))
1857
.int JMPTBL(L(Exit26), L(ExitTable))
1858
.int JMPTBL(L(Exit27), L(ExitTable))
1859
.int JMPTBL(L(Exit28), L(ExitTable))
1860
.int JMPTBL(L(Exit29), L(ExitTable))
1861
.int JMPTBL(L(Exit30), L(ExitTable))
1862
.int JMPTBL(L(Exit31), L(ExitTable))
1863
.int JMPTBL(L(Exit32), L(ExitTable))
1864
#ifdef USE_AS_STRNCPY
1865
L(ExitStrncpyTable):
1866
.int JMPTBL(L(StrncpyExit0), L(ExitStrncpyTable))
1867
.int JMPTBL(L(StrncpyExit1), L(ExitStrncpyTable))
1868
.int JMPTBL(L(StrncpyExit2), L(ExitStrncpyTable))
1869
.int JMPTBL(L(StrncpyExit3), L(ExitStrncpyTable))
1870
.int JMPTBL(L(StrncpyExit4), L(ExitStrncpyTable))
1871
.int JMPTBL(L(StrncpyExit5), L(ExitStrncpyTable))
1872
.int JMPTBL(L(StrncpyExit6), L(ExitStrncpyTable))
1873
.int JMPTBL(L(StrncpyExit7), L(ExitStrncpyTable))
1874
.int JMPTBL(L(StrncpyExit8), L(ExitStrncpyTable))
1875
.int JMPTBL(L(StrncpyExit9), L(ExitStrncpyTable))
1876
.int JMPTBL(L(StrncpyExit10), L(ExitStrncpyTable))
1877
.int JMPTBL(L(StrncpyExit11), L(ExitStrncpyTable))
1878
.int JMPTBL(L(StrncpyExit12), L(ExitStrncpyTable))
1879
.int JMPTBL(L(StrncpyExit13), L(ExitStrncpyTable))
1880
.int JMPTBL(L(StrncpyExit14), L(ExitStrncpyTable))
1881
.int JMPTBL(L(StrncpyExit15), L(ExitStrncpyTable))
1882
.int JMPTBL(L(StrncpyExit16), L(ExitStrncpyTable))
1883
.int JMPTBL(L(StrncpyExit17), L(ExitStrncpyTable))
1884
.int JMPTBL(L(StrncpyExit18), L(ExitStrncpyTable))
1885
.int JMPTBL(L(StrncpyExit19), L(ExitStrncpyTable))
1886
.int JMPTBL(L(StrncpyExit20), L(ExitStrncpyTable))
1887
.int JMPTBL(L(StrncpyExit21), L(ExitStrncpyTable))
1888
.int JMPTBL(L(StrncpyExit22), L(ExitStrncpyTable))
1889
.int JMPTBL(L(StrncpyExit23), L(ExitStrncpyTable))
1890
.int JMPTBL(L(StrncpyExit24), L(ExitStrncpyTable))
1891
.int JMPTBL(L(StrncpyExit25), L(ExitStrncpyTable))
1892
.int JMPTBL(L(StrncpyExit26), L(ExitStrncpyTable))
1893
.int JMPTBL(L(StrncpyExit27), L(ExitStrncpyTable))
1894
.int JMPTBL(L(StrncpyExit28), L(ExitStrncpyTable))
1895
.int JMPTBL(L(StrncpyExit29), L(ExitStrncpyTable))
1896
.int JMPTBL(L(StrncpyExit30), L(ExitStrncpyTable))
1897
.int JMPTBL(L(StrncpyExit31), L(ExitStrncpyTable))
1898
.int JMPTBL(L(StrncpyExit32), L(ExitStrncpyTable))
1899
.int JMPTBL(L(StrncpyExit33), L(ExitStrncpyTable))
1900
# ifndef USE_AS_STRCAT
1901
.p2align 4
1902
L(FillTable):
1903
.int JMPTBL(L(Fill0), L(FillTable))
1904
.int JMPTBL(L(Fill1), L(FillTable))
1905
.int JMPTBL(L(Fill2), L(FillTable))
1906
.int JMPTBL(L(Fill3), L(FillTable))
1907
.int JMPTBL(L(Fill4), L(FillTable))
1908
.int JMPTBL(L(Fill5), L(FillTable))
1909
.int JMPTBL(L(Fill6), L(FillTable))
1910
.int JMPTBL(L(Fill7), L(FillTable))
1911
.int JMPTBL(L(Fill8), L(FillTable))
1912
.int JMPTBL(L(Fill9), L(FillTable))
1913
.int JMPTBL(L(Fill10), L(FillTable))
1914
.int JMPTBL(L(Fill11), L(FillTable))
1915
.int JMPTBL(L(Fill12), L(FillTable))
1916
.int JMPTBL(L(Fill13), L(FillTable))
1917
.int JMPTBL(L(Fill14), L(FillTable))
1918
.int JMPTBL(L(Fill15), L(FillTable))
1919
.int JMPTBL(L(Fill16), L(FillTable))
1920
# endif
1921
#endif
1922
1923