Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/loongarch/lib/memcpy.S
26436 views
1
/* SPDX-License-Identifier: GPL-2.0 */
2
/*
3
* Copyright (C) 2020-2022 Loongson Technology Corporation Limited
4
*/
5
6
#include <linux/export.h>
7
#include <asm/alternative-asm.h>
8
#include <asm/asm.h>
9
#include <asm/asmmacro.h>
10
#include <asm/cpu.h>
11
#include <asm/regdef.h>
12
#include <asm/unwind_hints.h>
13
14
.section .noinstr.text, "ax"
15
16
SYM_FUNC_START(memcpy)
17
/*
18
* Some CPUs support hardware unaligned access
19
*/
20
ALTERNATIVE "b __memcpy_generic", \
21
"b __memcpy_fast", CPU_FEATURE_UAL
22
SYM_FUNC_END(memcpy)
23
SYM_FUNC_ALIAS(__memcpy, memcpy)
24
25
EXPORT_SYMBOL(memcpy)
26
EXPORT_SYMBOL(__memcpy)
27
28
_ASM_NOKPROBE(memcpy)
29
_ASM_NOKPROBE(__memcpy)
30
31
/*
32
* void *__memcpy_generic(void *dst, const void *src, size_t n)
33
*
34
* a0: dst
35
* a1: src
36
* a2: n
37
*/
38
SYM_FUNC_START(__memcpy_generic)
39
move a3, a0
40
beqz a2, 2f
41
42
1: ld.b t0, a1, 0
43
st.b t0, a0, 0
44
addi.d a0, a0, 1
45
addi.d a1, a1, 1
46
addi.d a2, a2, -1
47
bgt a2, zero, 1b
48
49
2: move a0, a3
50
jr ra
51
SYM_FUNC_END(__memcpy_generic)
52
_ASM_NOKPROBE(__memcpy_generic)
53
54
.align 5
55
SYM_FUNC_START_NOALIGN(__memcpy_small)
56
pcaddi t0, 8
57
slli.d a2, a2, 5
58
add.d t0, t0, a2
59
jr t0
60
61
.align 5
62
0: jr ra
63
64
.align 5
65
1: ld.b t0, a1, 0
66
st.b t0, a0, 0
67
jr ra
68
69
.align 5
70
2: ld.h t0, a1, 0
71
st.h t0, a0, 0
72
jr ra
73
74
.align 5
75
3: ld.h t0, a1, 0
76
ld.b t1, a1, 2
77
st.h t0, a0, 0
78
st.b t1, a0, 2
79
jr ra
80
81
.align 5
82
4: ld.w t0, a1, 0
83
st.w t0, a0, 0
84
jr ra
85
86
.align 5
87
5: ld.w t0, a1, 0
88
ld.b t1, a1, 4
89
st.w t0, a0, 0
90
st.b t1, a0, 4
91
jr ra
92
93
.align 5
94
6: ld.w t0, a1, 0
95
ld.h t1, a1, 4
96
st.w t0, a0, 0
97
st.h t1, a0, 4
98
jr ra
99
100
.align 5
101
7: ld.w t0, a1, 0
102
ld.w t1, a1, 3
103
st.w t0, a0, 0
104
st.w t1, a0, 3
105
jr ra
106
107
.align 5
108
8: ld.d t0, a1, 0
109
st.d t0, a0, 0
110
jr ra
111
SYM_FUNC_END(__memcpy_small)
112
_ASM_NOKPROBE(__memcpy_small)
113
114
/*
115
* void *__memcpy_fast(void *dst, const void *src, size_t n)
116
*
117
* a0: dst
118
* a1: src
119
* a2: n
120
*/
121
SYM_FUNC_START(__memcpy_fast)
122
sltui t0, a2, 9
123
bnez t0, __memcpy_small
124
125
add.d a3, a1, a2
126
add.d a2, a0, a2
127
ld.d a6, a1, 0
128
ld.d a7, a3, -8
129
130
/* align up destination address */
131
andi t1, a0, 7
132
sub.d t0, zero, t1
133
addi.d t0, t0, 8
134
add.d a1, a1, t0
135
add.d a5, a0, t0
136
137
addi.d a4, a3, -64
138
bgeu a1, a4, .Llt64
139
140
/* copy 64 bytes at a time */
141
.Lloop64:
142
ld.d t0, a1, 0
143
ld.d t1, a1, 8
144
ld.d t2, a1, 16
145
ld.d t3, a1, 24
146
ld.d t4, a1, 32
147
ld.d t5, a1, 40
148
ld.d t6, a1, 48
149
ld.d t7, a1, 56
150
addi.d a1, a1, 64
151
st.d t0, a5, 0
152
st.d t1, a5, 8
153
st.d t2, a5, 16
154
st.d t3, a5, 24
155
st.d t4, a5, 32
156
st.d t5, a5, 40
157
st.d t6, a5, 48
158
st.d t7, a5, 56
159
addi.d a5, a5, 64
160
bltu a1, a4, .Lloop64
161
162
/* copy the remaining bytes */
163
.Llt64:
164
addi.d a4, a3, -32
165
bgeu a1, a4, .Llt32
166
ld.d t0, a1, 0
167
ld.d t1, a1, 8
168
ld.d t2, a1, 16
169
ld.d t3, a1, 24
170
addi.d a1, a1, 32
171
st.d t0, a5, 0
172
st.d t1, a5, 8
173
st.d t2, a5, 16
174
st.d t3, a5, 24
175
addi.d a5, a5, 32
176
177
.Llt32:
178
addi.d a4, a3, -16
179
bgeu a1, a4, .Llt16
180
ld.d t0, a1, 0
181
ld.d t1, a1, 8
182
addi.d a1, a1, 16
183
st.d t0, a5, 0
184
st.d t1, a5, 8
185
addi.d a5, a5, 16
186
187
.Llt16:
188
addi.d a4, a3, -8
189
bgeu a1, a4, .Llt8
190
ld.d t0, a1, 0
191
st.d t0, a5, 0
192
193
.Llt8:
194
st.d a6, a0, 0
195
st.d a7, a2, -8
196
197
/* return */
198
jr ra
199
SYM_FUNC_END(__memcpy_fast)
200
_ASM_NOKPROBE(__memcpy_fast)
201
202
STACK_FRAME_NON_STANDARD __memcpy_small
203
204