Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/powerpc/lib/memcpy_64.S
26424 views
1
/* SPDX-License-Identifier: GPL-2.0-or-later */
2
/*
3
* Copyright (C) 2002 Paul Mackerras, IBM Corp.
4
*/
5
#include <linux/export.h>
6
#include <asm/processor.h>
7
#include <asm/ppc_asm.h>
8
#include <asm/asm-compat.h>
9
#include <asm/feature-fixups.h>
10
#include <asm/kasan.h>
11
12
#ifndef SELFTEST_CASE
13
/* For big-endian, 0 == most CPUs, 1 == POWER6, 2 == Cell */
14
#define SELFTEST_CASE 0
15
#endif
16
17
.align 7
18
_GLOBAL_TOC_KASAN(memcpy)
19
BEGIN_FTR_SECTION
20
#ifdef __LITTLE_ENDIAN__
21
cmpdi cr7,r5,0
22
#else
23
std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* save destination pointer for return value */
24
#endif
25
FTR_SECTION_ELSE
26
#ifdef CONFIG_PPC_BOOK3S_64
27
b memcpy_power7
28
#endif
29
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
30
#ifdef __LITTLE_ENDIAN__
31
/* dumb little-endian memcpy that will get replaced at runtime */
32
addi r9,r3,-1
33
addi r4,r4,-1
34
beqlr cr7
35
mtctr r5
36
1: lbzu r10,1(r4)
37
stbu r10,1(r9)
38
bdnz 1b
39
blr
40
#else
41
PPC_MTOCRF(0x01,r5)
42
cmpldi cr1,r5,16
43
neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry
44
andi. r6,r6,7
45
dcbt 0,r4
46
blt cr1,.Lshort_copy
47
/* Below we want to nop out the bne if we're on a CPU that has the
48
CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
49
cleared.
50
At the time of writing the only CPU that has this combination of bits
51
set is Power6. */
52
test_feature = (SELFTEST_CASE == 1)
53
BEGIN_FTR_SECTION
54
nop
55
FTR_SECTION_ELSE
56
bne .Ldst_unaligned
57
ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
58
CPU_FTR_UNALIGNED_LD_STD)
59
.Ldst_aligned:
60
addi r3,r3,-16
61
test_feature = (SELFTEST_CASE == 0)
62
BEGIN_FTR_SECTION
63
andi. r0,r4,7
64
bne .Lsrc_unaligned
65
END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
66
srdi r7,r5,4
67
ld r9,0(r4)
68
addi r4,r4,-8
69
mtctr r7
70
andi. r5,r5,7
71
bf cr7*4+0,2f
72
addi r3,r3,8
73
addi r4,r4,8
74
mr r8,r9
75
blt cr1,3f
76
1: ld r9,8(r4)
77
std r8,8(r3)
78
2: ldu r8,16(r4)
79
stdu r9,16(r3)
80
bdnz 1b
81
3: std r8,8(r3)
82
beq 3f
83
addi r3,r3,16
84
.Ldo_tail:
85
bf cr7*4+1,1f
86
lwz r9,8(r4)
87
addi r4,r4,4
88
stw r9,0(r3)
89
addi r3,r3,4
90
1: bf cr7*4+2,2f
91
lhz r9,8(r4)
92
addi r4,r4,2
93
sth r9,0(r3)
94
addi r3,r3,2
95
2: bf cr7*4+3,3f
96
lbz r9,8(r4)
97
stb r9,0(r3)
98
3: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */
99
blr
100
101
.Lsrc_unaligned:
102
srdi r6,r5,3
103
addi r5,r5,-16
104
subf r4,r0,r4
105
srdi r7,r5,4
106
sldi r10,r0,3
107
cmpdi cr6,r6,3
108
andi. r5,r5,7
109
mtctr r7
110
subfic r11,r10,64
111
add r5,r5,r0
112
113
bt cr7*4+0,0f
114
115
ld r9,0(r4) # 3+2n loads, 2+2n stores
116
ld r0,8(r4)
117
sld r6,r9,r10
118
ldu r9,16(r4)
119
srd r7,r0,r11
120
sld r8,r0,r10
121
or r7,r7,r6
122
blt cr6,4f
123
ld r0,8(r4)
124
# s1<< in r8, d0=(s0<<|s1>>) in r7, s3 in r0, s2 in r9, nix in r6 & r12
125
b 2f
126
127
0: ld r0,0(r4) # 4+2n loads, 3+2n stores
128
ldu r9,8(r4)
129
sld r8,r0,r10
130
addi r3,r3,-8
131
blt cr6,5f
132
ld r0,8(r4)
133
srd r12,r9,r11
134
sld r6,r9,r10
135
ldu r9,16(r4)
136
or r12,r8,r12
137
srd r7,r0,r11
138
sld r8,r0,r10
139
addi r3,r3,16
140
beq cr6,3f
141
142
# d0=(s0<<|s1>>) in r12, s1<< in r6, s2>> in r7, s2<< in r8, s3 in r9
143
1: or r7,r7,r6
144
ld r0,8(r4)
145
std r12,8(r3)
146
2: srd r12,r9,r11
147
sld r6,r9,r10
148
ldu r9,16(r4)
149
or r12,r8,r12
150
stdu r7,16(r3)
151
srd r7,r0,r11
152
sld r8,r0,r10
153
bdnz 1b
154
155
3: std r12,8(r3)
156
or r7,r7,r6
157
4: std r7,16(r3)
158
5: srd r12,r9,r11
159
or r12,r8,r12
160
std r12,24(r3)
161
beq 4f
162
cmpwi cr1,r5,8
163
addi r3,r3,32
164
sld r9,r9,r10
165
ble cr1,6f
166
ld r0,8(r4)
167
srd r7,r0,r11
168
or r9,r7,r9
169
6:
170
bf cr7*4+1,1f
171
rotldi r9,r9,32
172
stw r9,0(r3)
173
addi r3,r3,4
174
1: bf cr7*4+2,2f
175
rotldi r9,r9,16
176
sth r9,0(r3)
177
addi r3,r3,2
178
2: bf cr7*4+3,3f
179
rotldi r9,r9,8
180
stb r9,0(r3)
181
3: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */
182
blr
183
184
.Ldst_unaligned:
185
PPC_MTOCRF(0x01,r6) # put #bytes to 8B bdry into cr7
186
subf r5,r6,r5
187
li r7,0
188
cmpldi cr1,r5,16
189
bf cr7*4+3,1f
190
lbz r0,0(r4)
191
stb r0,0(r3)
192
addi r7,r7,1
193
1: bf cr7*4+2,2f
194
lhzx r0,r7,r4
195
sthx r0,r7,r3
196
addi r7,r7,2
197
2: bf cr7*4+1,3f
198
lwzx r0,r7,r4
199
stwx r0,r7,r3
200
3: PPC_MTOCRF(0x01,r5)
201
add r4,r6,r4
202
add r3,r6,r3
203
b .Ldst_aligned
204
205
.Lshort_copy:
206
bf cr7*4+0,1f
207
lwz r0,0(r4)
208
lwz r9,4(r4)
209
addi r4,r4,8
210
stw r0,0(r3)
211
stw r9,4(r3)
212
addi r3,r3,8
213
1: bf cr7*4+1,2f
214
lwz r0,0(r4)
215
addi r4,r4,4
216
stw r0,0(r3)
217
addi r3,r3,4
218
2: bf cr7*4+2,3f
219
lhz r0,0(r4)
220
addi r4,r4,2
221
sth r0,0(r3)
222
addi r3,r3,2
223
3: bf cr7*4+3,4f
224
lbz r0,0(r4)
225
stb r0,0(r3)
226
4: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */
227
blr
228
#endif
229
EXPORT_SYMBOL(memcpy)
230
EXPORT_SYMBOL_KASAN(memcpy)
231
232