Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/powerpc/lib/copy_mc_64.S
26424 views
1
/* SPDX-License-Identifier: GPL-2.0 */
2
/*
3
* Copyright (C) IBM Corporation, 2011
4
* Derived from copyuser_power7.s by Anton Blanchard <[email protected]>
5
* Author - Balbir Singh <[email protected]>
6
*/
7
#include <linux/export.h>
8
#include <asm/ppc_asm.h>
9
#include <asm/errno.h>
10
11
.macro err1
12
100:
13
EX_TABLE(100b,.Ldo_err1)
14
.endm
15
16
.macro err2
17
200:
18
EX_TABLE(200b,.Ldo_err2)
19
.endm
20
21
.macro err3
22
300: EX_TABLE(300b,.Ldone)
23
.endm
24
25
.Ldo_err2:
26
ld r22,STK_REG(R22)(r1)
27
ld r21,STK_REG(R21)(r1)
28
ld r20,STK_REG(R20)(r1)
29
ld r19,STK_REG(R19)(r1)
30
ld r18,STK_REG(R18)(r1)
31
ld r17,STK_REG(R17)(r1)
32
ld r16,STK_REG(R16)(r1)
33
ld r15,STK_REG(R15)(r1)
34
ld r14,STK_REG(R14)(r1)
35
addi r1,r1,STACKFRAMESIZE
36
.Ldo_err1:
37
/* Do a byte by byte copy to get the exact remaining size */
38
mtctr r7
39
46:
40
err3; lbz r0,0(r4)
41
addi r4,r4,1
42
err3; stb r0,0(r3)
43
addi r3,r3,1
44
bdnz 46b
45
li r3,0
46
blr
47
48
.Ldone:
49
mfctr r3
50
blr
51
52
53
_GLOBAL(copy_mc_generic)
54
mr r7,r5
55
cmpldi r5,16
56
blt .Lshort_copy
57
58
.Lcopy:
59
/* Get the source 8B aligned */
60
neg r6,r4
61
mtocrf 0x01,r6
62
clrldi r6,r6,(64-3)
63
64
bf cr7*4+3,1f
65
err1; lbz r0,0(r4)
66
addi r4,r4,1
67
err1; stb r0,0(r3)
68
addi r3,r3,1
69
subi r7,r7,1
70
71
1: bf cr7*4+2,2f
72
err1; lhz r0,0(r4)
73
addi r4,r4,2
74
err1; sth r0,0(r3)
75
addi r3,r3,2
76
subi r7,r7,2
77
78
2: bf cr7*4+1,3f
79
err1; lwz r0,0(r4)
80
addi r4,r4,4
81
err1; stw r0,0(r3)
82
addi r3,r3,4
83
subi r7,r7,4
84
85
3: sub r5,r5,r6
86
cmpldi r5,128
87
88
mflr r0
89
stdu r1,-STACKFRAMESIZE(r1)
90
std r14,STK_REG(R14)(r1)
91
std r15,STK_REG(R15)(r1)
92
std r16,STK_REG(R16)(r1)
93
std r17,STK_REG(R17)(r1)
94
std r18,STK_REG(R18)(r1)
95
std r19,STK_REG(R19)(r1)
96
std r20,STK_REG(R20)(r1)
97
std r21,STK_REG(R21)(r1)
98
std r22,STK_REG(R22)(r1)
99
std r0,STACKFRAMESIZE+16(r1)
100
101
blt 5f
102
srdi r6,r5,7
103
mtctr r6
104
105
/* Now do cacheline (128B) sized loads and stores. */
106
.align 5
107
4:
108
err2; ld r0,0(r4)
109
err2; ld r6,8(r4)
110
err2; ld r8,16(r4)
111
err2; ld r9,24(r4)
112
err2; ld r10,32(r4)
113
err2; ld r11,40(r4)
114
err2; ld r12,48(r4)
115
err2; ld r14,56(r4)
116
err2; ld r15,64(r4)
117
err2; ld r16,72(r4)
118
err2; ld r17,80(r4)
119
err2; ld r18,88(r4)
120
err2; ld r19,96(r4)
121
err2; ld r20,104(r4)
122
err2; ld r21,112(r4)
123
err2; ld r22,120(r4)
124
addi r4,r4,128
125
err2; std r0,0(r3)
126
err2; std r6,8(r3)
127
err2; std r8,16(r3)
128
err2; std r9,24(r3)
129
err2; std r10,32(r3)
130
err2; std r11,40(r3)
131
err2; std r12,48(r3)
132
err2; std r14,56(r3)
133
err2; std r15,64(r3)
134
err2; std r16,72(r3)
135
err2; std r17,80(r3)
136
err2; std r18,88(r3)
137
err2; std r19,96(r3)
138
err2; std r20,104(r3)
139
err2; std r21,112(r3)
140
err2; std r22,120(r3)
141
addi r3,r3,128
142
subi r7,r7,128
143
bdnz 4b
144
145
clrldi r5,r5,(64-7)
146
147
/* Up to 127B to go */
148
5: srdi r6,r5,4
149
mtocrf 0x01,r6
150
151
6: bf cr7*4+1,7f
152
err2; ld r0,0(r4)
153
err2; ld r6,8(r4)
154
err2; ld r8,16(r4)
155
err2; ld r9,24(r4)
156
err2; ld r10,32(r4)
157
err2; ld r11,40(r4)
158
err2; ld r12,48(r4)
159
err2; ld r14,56(r4)
160
addi r4,r4,64
161
err2; std r0,0(r3)
162
err2; std r6,8(r3)
163
err2; std r8,16(r3)
164
err2; std r9,24(r3)
165
err2; std r10,32(r3)
166
err2; std r11,40(r3)
167
err2; std r12,48(r3)
168
err2; std r14,56(r3)
169
addi r3,r3,64
170
subi r7,r7,64
171
172
7: ld r14,STK_REG(R14)(r1)
173
ld r15,STK_REG(R15)(r1)
174
ld r16,STK_REG(R16)(r1)
175
ld r17,STK_REG(R17)(r1)
176
ld r18,STK_REG(R18)(r1)
177
ld r19,STK_REG(R19)(r1)
178
ld r20,STK_REG(R20)(r1)
179
ld r21,STK_REG(R21)(r1)
180
ld r22,STK_REG(R22)(r1)
181
addi r1,r1,STACKFRAMESIZE
182
183
/* Up to 63B to go */
184
bf cr7*4+2,8f
185
err1; ld r0,0(r4)
186
err1; ld r6,8(r4)
187
err1; ld r8,16(r4)
188
err1; ld r9,24(r4)
189
addi r4,r4,32
190
err1; std r0,0(r3)
191
err1; std r6,8(r3)
192
err1; std r8,16(r3)
193
err1; std r9,24(r3)
194
addi r3,r3,32
195
subi r7,r7,32
196
197
/* Up to 31B to go */
198
8: bf cr7*4+3,9f
199
err1; ld r0,0(r4)
200
err1; ld r6,8(r4)
201
addi r4,r4,16
202
err1; std r0,0(r3)
203
err1; std r6,8(r3)
204
addi r3,r3,16
205
subi r7,r7,16
206
207
9: clrldi r5,r5,(64-4)
208
209
/* Up to 15B to go */
210
.Lshort_copy:
211
mtocrf 0x01,r5
212
bf cr7*4+0,12f
213
err1; lwz r0,0(r4) /* Less chance of a reject with word ops */
214
err1; lwz r6,4(r4)
215
addi r4,r4,8
216
err1; stw r0,0(r3)
217
err1; stw r6,4(r3)
218
addi r3,r3,8
219
subi r7,r7,8
220
221
12: bf cr7*4+1,13f
222
err1; lwz r0,0(r4)
223
addi r4,r4,4
224
err1; stw r0,0(r3)
225
addi r3,r3,4
226
subi r7,r7,4
227
228
13: bf cr7*4+2,14f
229
err1; lhz r0,0(r4)
230
addi r4,r4,2
231
err1; sth r0,0(r3)
232
addi r3,r3,2
233
subi r7,r7,2
234
235
14: bf cr7*4+3,15f
236
err1; lbz r0,0(r4)
237
err1; stb r0,0(r3)
238
239
15: li r3,0
240
blr
241
242
EXPORT_SYMBOL_GPL(copy_mc_generic);
243
244