Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/loongarch/lib/memmove.S
26436 views
1
/* SPDX-License-Identifier: GPL-2.0 */
2
/*
3
* Copyright (C) 2020-2022 Loongson Technology Corporation Limited
4
*/
5
6
#include <linux/export.h>
7
#include <asm/alternative-asm.h>
8
#include <asm/asm.h>
9
#include <asm/asmmacro.h>
10
#include <asm/cpu.h>
11
#include <asm/regdef.h>
12
13
.section .noinstr.text, "ax"
14
15
SYM_FUNC_START(memmove)
16
blt a0, a1, __memcpy /* dst < src, memcpy */
17
blt a1, a0, __rmemcpy /* src < dst, rmemcpy */
18
jr ra /* dst == src, return */
19
SYM_FUNC_END(memmove)
20
SYM_FUNC_ALIAS(__memmove, memmove)
21
22
EXPORT_SYMBOL(memmove)
23
EXPORT_SYMBOL(__memmove)
24
25
_ASM_NOKPROBE(memmove)
26
_ASM_NOKPROBE(__memmove)
27
28
SYM_FUNC_START(__rmemcpy)
29
/*
30
* Some CPUs support hardware unaligned access
31
*/
32
ALTERNATIVE "b __rmemcpy_generic", \
33
"b __rmemcpy_fast", CPU_FEATURE_UAL
34
SYM_FUNC_END(__rmemcpy)
35
_ASM_NOKPROBE(__rmemcpy)
36
37
/*
38
* void *__rmemcpy_generic(void *dst, const void *src, size_t n)
39
*
40
* a0: dst
41
* a1: src
42
* a2: n
43
*/
44
SYM_FUNC_START(__rmemcpy_generic)
45
move a3, a0
46
beqz a2, 2f
47
48
add.d a0, a0, a2
49
add.d a1, a1, a2
50
51
1: ld.b t0, a1, -1
52
st.b t0, a0, -1
53
addi.d a0, a0, -1
54
addi.d a1, a1, -1
55
addi.d a2, a2, -1
56
bgt a2, zero, 1b
57
58
2: move a0, a3
59
jr ra
60
SYM_FUNC_END(__rmemcpy_generic)
61
_ASM_NOKPROBE(__rmemcpy_generic)
62
63
/*
64
* void *__rmemcpy_fast(void *dst, const void *src, size_t n)
65
*
66
* a0: dst
67
* a1: src
68
* a2: n
69
*/
70
SYM_FUNC_START(__rmemcpy_fast)
71
sltui t0, a2, 9
72
bnez t0, __memcpy_small
73
74
add.d a3, a1, a2
75
add.d a2, a0, a2
76
ld.d a6, a1, 0
77
ld.d a7, a3, -8
78
79
/* align up destination address */
80
andi t1, a2, 7
81
sub.d a3, a3, t1
82
sub.d a5, a2, t1
83
84
addi.d a4, a1, 64
85
bgeu a4, a3, .Llt64
86
87
/* copy 64 bytes at a time */
88
.Lloop64:
89
ld.d t0, a3, -8
90
ld.d t1, a3, -16
91
ld.d t2, a3, -24
92
ld.d t3, a3, -32
93
ld.d t4, a3, -40
94
ld.d t5, a3, -48
95
ld.d t6, a3, -56
96
ld.d t7, a3, -64
97
addi.d a3, a3, -64
98
st.d t0, a5, -8
99
st.d t1, a5, -16
100
st.d t2, a5, -24
101
st.d t3, a5, -32
102
st.d t4, a5, -40
103
st.d t5, a5, -48
104
st.d t6, a5, -56
105
st.d t7, a5, -64
106
addi.d a5, a5, -64
107
bltu a4, a3, .Lloop64
108
109
/* copy the remaining bytes */
110
.Llt64:
111
addi.d a4, a1, 32
112
bgeu a4, a3, .Llt32
113
ld.d t0, a3, -8
114
ld.d t1, a3, -16
115
ld.d t2, a3, -24
116
ld.d t3, a3, -32
117
addi.d a3, a3, -32
118
st.d t0, a5, -8
119
st.d t1, a5, -16
120
st.d t2, a5, -24
121
st.d t3, a5, -32
122
addi.d a5, a5, -32
123
124
.Llt32:
125
addi.d a4, a1, 16
126
bgeu a4, a3, .Llt16
127
ld.d t0, a3, -8
128
ld.d t1, a3, -16
129
addi.d a3, a3, -16
130
st.d t0, a5, -8
131
st.d t1, a5, -16
132
addi.d a5, a5, -16
133
134
.Llt16:
135
addi.d a4, a1, 8
136
bgeu a4, a3, .Llt8
137
ld.d t0, a3, -8
138
st.d t0, a5, -8
139
140
.Llt8:
141
st.d a6, a0, 0
142
st.d a7, a2, -8
143
144
/* return */
145
jr ra
146
SYM_FUNC_END(__rmemcpy_fast)
147
_ASM_NOKPROBE(__rmemcpy_fast)
148
149