Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/powerpc/lib/memcpy_64.S
10820 views
1
/*
2
* Copyright (C) 2002 Paul Mackerras, IBM Corp.
3
*
4
* This program is free software; you can redistribute it and/or
5
* modify it under the terms of the GNU General Public License
6
* as published by the Free Software Foundation; either version
7
* 2 of the License, or (at your option) any later version.
8
*/
9
#include <asm/processor.h>
10
#include <asm/ppc_asm.h>
11
12
.align 7
13
_GLOBAL(memcpy)
14
std r3,48(r1) /* save destination pointer for return value */
15
PPC_MTOCRF 0x01,r5
16
cmpldi cr1,r5,16
17
neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry
18
andi. r6,r6,7
19
dcbt 0,r4
20
blt cr1,.Lshort_copy
21
/* Below we want to nop out the bne if we're on a CPU that has the
22
CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
23
cleared.
24
At the time of writing the only CPU that has this combination of bits
25
set is Power6. */
26
BEGIN_FTR_SECTION
27
nop
28
FTR_SECTION_ELSE
29
bne .Ldst_unaligned
30
ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
31
CPU_FTR_UNALIGNED_LD_STD)
32
.Ldst_aligned:
33
addi r3,r3,-16
34
BEGIN_FTR_SECTION
35
andi. r0,r4,7
36
bne .Lsrc_unaligned
37
END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
38
srdi r7,r5,4
39
ld r9,0(r4)
40
addi r4,r4,-8
41
mtctr r7
42
andi. r5,r5,7
43
bf cr7*4+0,2f
44
addi r3,r3,8
45
addi r4,r4,8
46
mr r8,r9
47
blt cr1,3f
48
1: ld r9,8(r4)
49
std r8,8(r3)
50
2: ldu r8,16(r4)
51
stdu r9,16(r3)
52
bdnz 1b
53
3: std r8,8(r3)
54
beq 3f
55
addi r3,r3,16
56
.Ldo_tail:
57
bf cr7*4+1,1f
58
lwz r9,8(r4)
59
addi r4,r4,4
60
stw r9,0(r3)
61
addi r3,r3,4
62
1: bf cr7*4+2,2f
63
lhz r9,8(r4)
64
addi r4,r4,2
65
sth r9,0(r3)
66
addi r3,r3,2
67
2: bf cr7*4+3,3f
68
lbz r9,8(r4)
69
stb r9,0(r3)
70
3: ld r3,48(r1) /* return dest pointer */
71
blr
72
73
.Lsrc_unaligned:
74
srdi r6,r5,3
75
addi r5,r5,-16
76
subf r4,r0,r4
77
srdi r7,r5,4
78
sldi r10,r0,3
79
cmpdi cr6,r6,3
80
andi. r5,r5,7
81
mtctr r7
82
subfic r11,r10,64
83
add r5,r5,r0
84
85
bt cr7*4+0,0f
86
87
ld r9,0(r4) # 3+2n loads, 2+2n stores
88
ld r0,8(r4)
89
sld r6,r9,r10
90
ldu r9,16(r4)
91
srd r7,r0,r11
92
sld r8,r0,r10
93
or r7,r7,r6
94
blt cr6,4f
95
ld r0,8(r4)
96
# s1<< in r8, d0=(s0<<|s1>>) in r7, s3 in r0, s2 in r9, nix in r6 & r12
97
b 2f
98
99
0: ld r0,0(r4) # 4+2n loads, 3+2n stores
100
ldu r9,8(r4)
101
sld r8,r0,r10
102
addi r3,r3,-8
103
blt cr6,5f
104
ld r0,8(r4)
105
srd r12,r9,r11
106
sld r6,r9,r10
107
ldu r9,16(r4)
108
or r12,r8,r12
109
srd r7,r0,r11
110
sld r8,r0,r10
111
addi r3,r3,16
112
beq cr6,3f
113
114
# d0=(s0<<|s1>>) in r12, s1<< in r6, s2>> in r7, s2<< in r8, s3 in r9
115
1: or r7,r7,r6
116
ld r0,8(r4)
117
std r12,8(r3)
118
2: srd r12,r9,r11
119
sld r6,r9,r10
120
ldu r9,16(r4)
121
or r12,r8,r12
122
stdu r7,16(r3)
123
srd r7,r0,r11
124
sld r8,r0,r10
125
bdnz 1b
126
127
3: std r12,8(r3)
128
or r7,r7,r6
129
4: std r7,16(r3)
130
5: srd r12,r9,r11
131
or r12,r8,r12
132
std r12,24(r3)
133
beq 4f
134
cmpwi cr1,r5,8
135
addi r3,r3,32
136
sld r9,r9,r10
137
ble cr1,6f
138
ld r0,8(r4)
139
srd r7,r0,r11
140
or r9,r7,r9
141
6:
142
bf cr7*4+1,1f
143
rotldi r9,r9,32
144
stw r9,0(r3)
145
addi r3,r3,4
146
1: bf cr7*4+2,2f
147
rotldi r9,r9,16
148
sth r9,0(r3)
149
addi r3,r3,2
150
2: bf cr7*4+3,3f
151
rotldi r9,r9,8
152
stb r9,0(r3)
153
3: ld r3,48(r1) /* return dest pointer */
154
blr
155
156
.Ldst_unaligned:
157
PPC_MTOCRF 0x01,r6 # put #bytes to 8B bdry into cr7
158
subf r5,r6,r5
159
li r7,0
160
cmpldi cr1,r5,16
161
bf cr7*4+3,1f
162
lbz r0,0(r4)
163
stb r0,0(r3)
164
addi r7,r7,1
165
1: bf cr7*4+2,2f
166
lhzx r0,r7,r4
167
sthx r0,r7,r3
168
addi r7,r7,2
169
2: bf cr7*4+1,3f
170
lwzx r0,r7,r4
171
stwx r0,r7,r3
172
3: PPC_MTOCRF 0x01,r5
173
add r4,r6,r4
174
add r3,r6,r3
175
b .Ldst_aligned
176
177
.Lshort_copy:
178
bf cr7*4+0,1f
179
lwz r0,0(r4)
180
lwz r9,4(r4)
181
addi r4,r4,8
182
stw r0,0(r3)
183
stw r9,4(r3)
184
addi r3,r3,8
185
1: bf cr7*4+1,2f
186
lwz r0,0(r4)
187
addi r4,r4,4
188
stw r0,0(r3)
189
addi r3,r3,4
190
2: bf cr7*4+2,3f
191
lhz r0,0(r4)
192
addi r4,r4,2
193
sth r0,0(r3)
194
addi r3,r3,2
195
3: bf cr7*4+3,4f
196
lbz r0,0(r4)
197
stb r0,0(r3)
198
4: ld r3,48(r1) /* return dest pointer */
199
blr
200
201