Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/lib/libc/riscv/string/memcpy.S
96309 views
1
/*-
2
* SPDX-License-Identifier: BSD-2-Clause
3
*
4
* Copyright (c) 2024 Strahinja Stanisic <[email protected]>
5
*/
6
7
#include <machine/asm.h>
8
9
/*
10
* a0 - void* dst
11
* a1 - const void* src
12
* a2 - size_t len
13
*/
14
ENTRY(memcpy)
15
beqz a2, .Lreturn
16
17
/* diff = (dstv - srcv) & 0b111 */
18
sub t0, a0, a1
19
andi t0, t0, 0b111
20
21
sltiu t1, a2, 8
22
23
/* we never change a0, because memcpy returns the original dst */
24
mv a3, a0
25
26
/* len < 8 */
27
bnez t1, .Lend
28
29
/* t1 = (-dst) & 0b111 */
30
neg t1, a0
31
andi t1, t1, 0b111
32
33
sub a2, a2, t1
34
35
la t2, .Lduff_start
36
slli t3, t1, 3
37
sub t2, t2, t3
38
jr t2
39
lb t3, 6(a1)
40
sb t3, 6(a3)
41
lb t3, 5(a1)
42
sb t3, 5(a3)
43
lb t3, 4(a1)
44
sb t3, 4(a3)
45
lb t3, 3(a1)
46
sb t3, 3(a3)
47
lb t3, 2(a1)
48
sb t3, 2(a3)
49
lb t3, 1(a1)
50
sb t3, 1(a3)
51
lb t3, 0(a1)
52
sb t3, 0(a3)
53
.Lduff_start:
54
55
add a1, a1, t1
56
add a3, a3, t1
57
58
beqz a2, .Lreturn
59
60
beqz t0, .Lmemcpy8
61
62
/*
63
* a4 - size_t right_shift
64
* a5 - size_t left_shift
65
* a6 - size_t whole (number of dword stores)
66
*/
67
68
/* right_shift = (src % 0b111) * 8; */
69
andi a4, a1, 0b111
70
slli a4, a4, 3
71
72
/* left_shift = 64 - right_shift */
73
neg a5, a4
74
75
/* whole = len / 8 */
76
srli a6, a2, 3
77
78
/* len = len % 8 */
79
andi a2, a2, 0b111
80
81
/* t0 - uint64_t* ptr */
82
83
/* ptr = src & ~0b111 */
84
andi t0, a1, ~0b111
85
86
/* src += whole * 8 */
87
slli t1, a6, 3
88
add a1, a1, t1
89
90
/*
91
* t1 - uint64_t low
92
* t2 - uint64_t high
93
*/
94
95
/* low = *ptr++ */
96
ld t1, (t0)
97
addi t0, t0, 8
98
99
/* low >>= right_shift */
100
srl t1, t1, a4
101
102
beqz a6, .Llmain_skip
103
.Llmain:
104
/* high = *ptr++ */
105
ld t2, (t0)
106
addi t0, t0, 8
107
108
/* whole-- */
109
addi a6, a6, -1
110
111
/* temp = (high << left_shift) | low */
112
sll t3, t2, a5
113
or t3, t3, t1
114
115
/* low = high >> right_shift */
116
srl t1, t2, a4
117
118
/* *dst++ = temp */
119
sd t3, (a3)
120
addi a3, a3, 8
121
122
bnez a6, .Llmain
123
124
.Llmain_skip:
125
126
.Lend:
127
la t1, .Lduff_end
128
slli t2, a2, 3
129
sub t1, t1, t2
130
jr t1
131
lb t2, 6(a1)
132
sb t2, 6(a3)
133
lb t2, 5(a1)
134
sb t2, 5(a3)
135
lb t2, 4(a1)
136
sb t2, 4(a3)
137
lb t2, 3(a1)
138
sb t2, 3(a3)
139
lb t2, 2(a1)
140
sb t2, 2(a3)
141
lb t2, 1(a1)
142
sb t2, 1(a3)
143
lb t2, 0(a1)
144
sb t2, 0(a3)
145
.Lduff_end:
146
147
.Lreturn:
148
ret
149
150
/* exectued when dst - src is multiple of 8
151
* a0 - void* dst
152
* a1 - const void* src
153
* a2 - size_t len
154
*/
155
.Lmemcpy8:
156
157
beqz a2, .Lreturn
158
159
slti t0, a2, 128
160
bnez t0, .Llmain8_64_skip
161
162
/* a4 - uint64_t* end_unroll */
163
164
/* end_unroll = dst + len / 64 * 64 */
165
andi t0, a2, ~0b111111
166
add a4, a3, t0
167
168
/* len = len % 64 */
169
andi a2, a2, 0b111111
170
171
.Llmain8_64:
172
ld t0, 0(a1)
173
ld t1, 8(a1)
174
ld t2, 16(a1)
175
ld t3, 24(a1)
176
sd t0, 0(a3)
177
sd t1, 8(a3)
178
sd t2, 16(a3)
179
sd t3, 24(a3)
180
ld t0, 32(a1)
181
ld t1, 40(a1)
182
ld t2, 48(a1)
183
ld t3, 56(a1)
184
sd t0, 32(a3)
185
sd t1, 40(a3)
186
sd t2, 48(a3)
187
sd t3, 56(a3)
188
addi a3, a3, 64
189
addi a1, a1, 64
190
bne a3, a4, .Llmain8_64
191
.Llmain8_64_skip:
192
193
beqz a2, .Lreturn
194
195
/* a4 - uint64_t* end_align */
196
197
/* end_align = (dst + len) & ~0b111 */
198
add a4, a3, a2
199
andi a4, a4, ~0b111
200
201
/* len = len % 8 */
202
andi a2, a2, 0b111
203
204
beq a3, a4, .Llmain8_skip
205
.Llmain8:
206
ld t0, (a1)
207
sd t0, (a3)
208
addi a3, a3, 8
209
addi a1, a1, 8
210
bne a3, a4, .Llmain8
211
.Llmain8_skip:
212
213
la t1, .Lduff_end
214
slli t2, a2, 3
215
sub t1, t1, t2
216
jr t1
217
END(memcpy)
218
219