Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/lib/libc/amd64/string/strcat.S
39500 views
1
/*-
2
* Copyright (c) 2023, The FreeBSD Foundation
3
*
4
* SPDX-License-Expression: BSD-2-Clause
5
*
6
* Portions of this software were developed by Robert Clausecker
7
* <[email protected]> under sponsorship from the FreeBSD Foundation.
8
*
9
* Adapted from NetBSD's common/lib/libc/arch/x86_64/string/strcat.S
10
* written by J.T. Conklin <[email protected]>
11
* that was originally dedicated to the public domain
12
*/
13
14
#include <machine/asm.h>
15
#if 0
16
RCSID("$NetBSD: strcat.S,v 1.4 2004/07/26 18:51:21 drochner Exp $")
17
#endif
18
19
#include "amd64_archlevel.h"
20
21
ARCHFUNCS(strcat)
22
ARCHFUNC(strcat, scalar)
23
ARCHFUNC(strcat, baseline)
24
ENDARCHFUNCS(strcat)
25
26
ARCHENTRY(strcat, scalar)
27
movq %rdi,%rax
28
movabsq $0x0101010101010101,%r8
29
movabsq $0x8080808080808080,%r9
30
31
/*
32
* Align destination to word boundary.
33
* Consider unrolling loop?
34
*/
35
.Lscan:
36
.Lscan_align:
37
testb $7,%dil
38
je .Lscan_aligned
39
cmpb $0,(%rdi)
40
je .Lcopy
41
incq %rdi
42
jmp .Lscan_align
43
44
.align 4
45
.Lscan_aligned:
46
.Lscan_loop:
47
movq (%rdi),%rdx
48
addq $8,%rdi
49
subq %r8,%rdx
50
testq %r9,%rdx
51
je .Lscan_loop
52
53
/*
54
* In rare cases, the above loop may exit prematurely. We must
55
* return to the loop if none of the bytes in the word equal 0.
56
*/
57
58
cmpb $0,-8(%rdi) /* 1st byte == 0? */
59
jne 1f
60
subq $8,%rdi
61
jmp .Lcopy
62
63
1: cmpb $0,-7(%rdi) /* 2nd byte == 0? */
64
jne 1f
65
subq $7,%rdi
66
jmp .Lcopy
67
68
1: cmpb $0,-6(%rdi) /* 3rd byte == 0? */
69
jne 1f
70
subq $6,%rdi
71
jmp .Lcopy
72
73
1: cmpb $0,-5(%rdi) /* 4th byte == 0? */
74
jne 1f
75
subq $5,%rdi
76
jmp .Lcopy
77
78
1: cmpb $0,-4(%rdi) /* 5th byte == 0? */
79
jne 1f
80
subq $4,%rdi
81
jmp .Lcopy
82
83
1: cmpb $0,-3(%rdi) /* 6th byte == 0? */
84
jne 1f
85
subq $3,%rdi
86
jmp .Lcopy
87
88
1: cmpb $0,-2(%rdi) /* 7th byte == 0? */
89
jne 1f
90
subq $2,%rdi
91
jmp .Lcopy
92
93
1: cmpb $0,-1(%rdi) /* 8th byte == 0? */
94
jne .Lscan_loop
95
subq $1,%rdi
96
97
/*
98
* Align source to a word boundary.
99
* Consider unrolling loop?
100
*/
101
.Lcopy:
102
.Lcopy_align:
103
testb $7,%sil
104
je .Lcopy_aligned
105
movb (%rsi),%dl
106
incq %rsi
107
movb %dl,(%rdi)
108
incq %rdi
109
testb %dl,%dl
110
jne .Lcopy_align
111
ret
112
113
.align 4
114
.Lcopy_loop:
115
movq %rdx,(%rdi)
116
addq $8,%rdi
117
.Lcopy_aligned:
118
movq (%rsi),%rdx
119
movq %rdx,%rcx
120
addq $8,%rsi
121
subq %r8,%rcx
122
testq %r9,%rcx
123
je .Lcopy_loop
124
125
/*
126
* In rare cases, the above loop may exit prematurely. We must
127
* return to the loop if none of the bytes in the word equal 0.
128
*/
129
130
movb %dl,(%rdi)
131
incq %rdi
132
testb %dl,%dl /* 1st byte == 0? */
133
je .Ldone
134
135
shrq $8,%rdx
136
movb %dl,(%rdi)
137
incq %rdi
138
testb %dl,%dl /* 2nd byte == 0? */
139
je .Ldone
140
141
shrq $8,%rdx
142
movb %dl,(%rdi)
143
incq %rdi
144
testb %dl,%dl /* 3rd byte == 0? */
145
je .Ldone
146
147
shrq $8,%rdx
148
movb %dl,(%rdi)
149
incq %rdi
150
testb %dl,%dl /* 4th byte == 0? */
151
je .Ldone
152
153
shrq $8,%rdx
154
movb %dl,(%rdi)
155
incq %rdi
156
testb %dl,%dl /* 5th byte == 0? */
157
je .Ldone
158
159
shrq $8,%rdx
160
movb %dl,(%rdi)
161
incq %rdi
162
testb %dl,%dl /* 6th byte == 0? */
163
je .Ldone
164
165
shrq $8,%rdx
166
movb %dl,(%rdi)
167
incq %rdi
168
testb %dl,%dl /* 7th byte == 0? */
169
je .Ldone
170
171
shrq $8,%rdx
172
movb %dl,(%rdi)
173
incq %rdi
174
testb %dl,%dl /* 8th byte == 0? */
175
jne .Lcopy_aligned
176
177
.Ldone:
178
ret
179
ARCHEND(strcat, scalar)
180
181
/*
182
* Call into strlen + strcpy if we have any SIMD at all.
183
* The scalar implementation above is better for the scalar
184
* case as it avoids the function call overhead, but pessimal
185
* if we could call SIMD routines instead.
186
*/
187
ARCHENTRY(strcat, baseline)
188
push %rbp
189
mov %rsp, %rbp
190
push %rsi
191
push %rbx
192
mov %rdi, %rbx # remember destination for later
193
call CNAME(strlen) # strlen(dest)
194
mov -8(%rbp), %rsi
195
lea (%rbx, %rax, 1), %rdi # dest + strlen(dest)
196
call CNAME(__stpcpy) # stpcpy(dest + strlen(dest), src)
197
mov %rbx, %rax # return dest
198
pop %rbx
199
leave
200
ret
201
ARCHEND(strcat, baseline)
202
203
.section .note.GNU-stack,"",%progbits
204
205