Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/parisc/lib/lusercopy.S
26299 views
1
/* SPDX-License-Identifier: GPL-2.0-or-later */
2
/*
3
* User Space Access Routines
4
*
5
* Copyright (C) 2000-2002 Hewlett-Packard (John Marvin)
6
* Copyright (C) 2000 Richard Hirst <rhirst with parisc-linux.org>
7
* Copyright (C) 2001 Matthieu Delahaye <delahaym at esiee.fr>
8
* Copyright (C) 2003 Randolph Chung <tausq with parisc-linux.org>
9
* Copyright (C) 2017 Helge Deller <[email protected]>
10
* Copyright (C) 2017 John David Anglin <[email protected]>
11
*/
12
13
/*
14
* These routines still have plenty of room for optimization
15
* (word & doubleword load/store, dual issue, store hints, etc.).
16
*/
17
18
/*
19
* The following routines assume that space register 3 (sr3) contains
20
* the space id associated with the current users address space.
21
*/
22
23
24
.text
25
26
#include <asm/assembly.h>
27
#include <asm/errno.h>
28
#include <linux/linkage.h>
29
30
/*
31
* unsigned long lclear_user(void *to, unsigned long n)
32
*
33
* Returns 0 for success.
34
* otherwise, returns number of bytes not transferred.
35
*/
36
37
ENTRY_CFI(lclear_user)
38
comib,=,n 0,%r25,$lclu_done
39
$lclu_loop:
40
addib,<> -1,%r25,$lclu_loop
41
1: stbs,ma %r0,1(%sr3,%r26)
42
43
$lclu_done:
44
bv %r0(%r2)
45
copy %r25,%r28
46
47
2: b $lclu_done
48
ldo 1(%r25),%r25
49
50
ASM_EXCEPTIONTABLE_ENTRY(1b,2b)
51
ENDPROC_CFI(lclear_user)
52
53
54
/*
55
* unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len)
56
*
57
* Inputs:
58
* - sr1 already contains space of source region
59
* - sr2 already contains space of destination region
60
*
61
* Returns:
62
* - number of bytes that could not be copied.
63
* On success, this will be zero.
64
*
65
* This code is based on a C-implementation of a copy routine written by
66
* Randolph Chung, which in turn was derived from the glibc.
67
*
68
* Several strategies are tried to try to get the best performance for various
69
* conditions. In the optimal case, we copy by loops that copy 32- or 16-bytes
70
* at a time using general registers. Unaligned copies are handled either by
71
* aligning the destination and then using shift-and-write method, or in a few
72
* cases by falling back to a byte-at-a-time copy.
73
*
74
* Testing with various alignments and buffer sizes shows that this code is
75
* often >10x faster than a simple byte-at-a-time copy, even for strangely
76
* aligned operands. It is interesting to note that the glibc version of memcpy
77
* (written in C) is actually quite fast already. This routine is able to beat
78
* it by 30-40% for aligned copies because of the loop unrolling, but in some
79
* cases the glibc version is still slightly faster. This lends more
80
* credibility that gcc can generate very good code as long as we are careful.
81
*
82
* Possible optimizations:
83
* - add cache prefetching
84
* - try not to use the post-increment address modifiers; they may create
85
* additional interlocks. Assumption is that those were only efficient on old
86
* machines (pre PA8000 processors)
87
*/
88
89
dst = arg0
90
src = arg1
91
len = arg2
92
end = arg3
93
t1 = r19
94
t2 = r20
95
t3 = r21
96
t4 = r22
97
srcspc = sr1
98
dstspc = sr2
99
100
t0 = r1
101
a1 = t1
102
a2 = t2
103
a3 = t3
104
a0 = t4
105
106
save_src = ret0
107
save_dst = ret1
108
save_len = r31
109
110
ENTRY_CFI(pa_memcpy)
111
/* Last destination address */
112
add dst,len,end
113
114
/* short copy with less than 16 bytes? */
115
cmpib,COND(>>=),n 15,len,.Lbyte_loop
116
117
/* same alignment? */
118
xor src,dst,t0
119
extru t0,31,2,t1
120
cmpib,<>,n 0,t1,.Lunaligned_copy
121
122
#ifdef CONFIG_64BIT
123
/* only do 64-bit copies if we can get aligned. */
124
extru t0,31,3,t1
125
cmpib,<>,n 0,t1,.Lalign_loop32
126
127
/* loop until we are 64-bit aligned */
128
.Lalign_loop64:
129
extru dst,31,3,t1
130
cmpib,=,n 0,t1,.Lcopy_loop_16_start
131
20: ldb,ma 1(srcspc,src),t1
132
21: stb,ma t1,1(dstspc,dst)
133
b .Lalign_loop64
134
ldo -1(len),len
135
136
ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
137
ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
138
139
.Lcopy_loop_16_start:
140
ldi 31,t0
141
.Lcopy_loop_16:
142
cmpb,COND(>>=),n t0,len,.Lword_loop
143
144
10: ldd 0(srcspc,src),t1
145
11: ldd 8(srcspc,src),t2
146
ldo 16(src),src
147
12: std,ma t1,8(dstspc,dst)
148
13: std,ma t2,8(dstspc,dst)
149
14: ldd 0(srcspc,src),t1
150
15: ldd 8(srcspc,src),t2
151
ldo 16(src),src
152
16: std,ma t1,8(dstspc,dst)
153
17: std,ma t2,8(dstspc,dst)
154
155
ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
156
ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy16_fault)
157
ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done)
158
ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done)
159
ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done)
160
ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy16_fault)
161
ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done)
162
ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done)
163
164
b .Lcopy_loop_16
165
ldo -32(len),len
166
167
.Lword_loop:
168
cmpib,COND(>>=),n 3,len,.Lbyte_loop
169
20: ldw,ma 4(srcspc,src),t1
170
21: stw,ma t1,4(dstspc,dst)
171
b .Lword_loop
172
ldo -4(len),len
173
174
ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
175
ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
176
177
#endif /* CONFIG_64BIT */
178
179
/* loop until we are 32-bit aligned */
180
.Lalign_loop32:
181
extru dst,31,2,t1
182
cmpib,=,n 0,t1,.Lcopy_loop_8
183
20: ldb,ma 1(srcspc,src),t1
184
21: stb,ma t1,1(dstspc,dst)
185
b .Lalign_loop32
186
ldo -1(len),len
187
188
ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
189
ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
190
191
192
.Lcopy_loop_8:
193
cmpib,COND(>>=),n 15,len,.Lbyte_loop
194
195
10: ldw 0(srcspc,src),t1
196
11: ldw 4(srcspc,src),t2
197
12: stw,ma t1,4(dstspc,dst)
198
13: stw,ma t2,4(dstspc,dst)
199
14: ldw 8(srcspc,src),t1
200
15: ldw 12(srcspc,src),t2
201
ldo 16(src),src
202
16: stw,ma t1,4(dstspc,dst)
203
17: stw,ma t2,4(dstspc,dst)
204
205
ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
206
ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy8_fault)
207
ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done)
208
ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done)
209
ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done)
210
ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy8_fault)
211
ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done)
212
ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done)
213
214
b .Lcopy_loop_8
215
ldo -16(len),len
216
217
.Lbyte_loop:
218
cmpclr,COND(<>) len,%r0,%r0
219
b,n .Lcopy_done
220
20: ldb 0(srcspc,src),t1
221
ldo 1(src),src
222
21: stb,ma t1,1(dstspc,dst)
223
b .Lbyte_loop
224
ldo -1(len),len
225
226
ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
227
ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
228
229
.Lcopy_done:
230
bv %r0(%r2)
231
sub end,dst,ret0
232
233
234
/* src and dst are not aligned the same way. */
235
/* need to go the hard way */
236
.Lunaligned_copy:
237
/* align until dst is 32bit-word-aligned */
238
extru dst,31,2,t1
239
cmpib,=,n 0,t1,.Lcopy_dstaligned
240
20: ldb 0(srcspc,src),t1
241
ldo 1(src),src
242
21: stb,ma t1,1(dstspc,dst)
243
b .Lunaligned_copy
244
ldo -1(len),len
245
246
ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
247
ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
248
249
.Lcopy_dstaligned:
250
251
/* store src, dst and len in safe place */
252
copy src,save_src
253
copy dst,save_dst
254
copy len,save_len
255
256
/* len now needs give number of words to copy */
257
SHRREG len,2,len
258
259
/*
260
* Copy from a not-aligned src to an aligned dst using shifts.
261
* Handles 4 words per loop.
262
*/
263
264
depw,z src,28,2,t0
265
subi 32,t0,t0
266
mtsar t0
267
extru len,31,2,t0
268
cmpib,= 2,t0,.Lcase2
269
/* Make src aligned by rounding it down. */
270
depi 0,31,2,src
271
272
cmpiclr,<> 3,t0,%r0
273
b,n .Lcase3
274
cmpiclr,<> 1,t0,%r0
275
b,n .Lcase1
276
.Lcase0:
277
cmpb,COND(=) %r0,len,.Lcda_finish
278
nop
279
280
1: ldw,ma 4(srcspc,src), a3
281
ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
282
1: ldw,ma 4(srcspc,src), a0
283
ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
284
b,n .Ldo3
285
.Lcase1:
286
1: ldw,ma 4(srcspc,src), a2
287
ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
288
1: ldw,ma 4(srcspc,src), a3
289
ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
290
ldo -1(len),len
291
cmpb,COND(=),n %r0,len,.Ldo0
292
.Ldo4:
293
1: ldw,ma 4(srcspc,src), a0
294
ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
295
shrpw a2, a3, %sar, t0
296
1: stw,ma t0, 4(dstspc,dst)
297
ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
298
.Ldo3:
299
1: ldw,ma 4(srcspc,src), a1
300
ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
301
shrpw a3, a0, %sar, t0
302
1: stw,ma t0, 4(dstspc,dst)
303
ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
304
.Ldo2:
305
1: ldw,ma 4(srcspc,src), a2
306
ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
307
shrpw a0, a1, %sar, t0
308
1: stw,ma t0, 4(dstspc,dst)
309
ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
310
.Ldo1:
311
1: ldw,ma 4(srcspc,src), a3
312
ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
313
shrpw a1, a2, %sar, t0
314
1: stw,ma t0, 4(dstspc,dst)
315
ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
316
ldo -4(len),len
317
cmpb,COND(<>) %r0,len,.Ldo4
318
nop
319
.Ldo0:
320
shrpw a2, a3, %sar, t0
321
1: stw,ma t0, 4(dstspc,dst)
322
ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
323
324
.Lcda_rdfault:
325
.Lcda_finish:
326
/* calculate new src, dst and len and jump to byte-copy loop */
327
sub dst,save_dst,t0
328
add save_src,t0,src
329
b .Lbyte_loop
330
sub save_len,t0,len
331
332
.Lcase3:
333
1: ldw,ma 4(srcspc,src), a0
334
ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
335
1: ldw,ma 4(srcspc,src), a1
336
ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
337
b .Ldo2
338
ldo 1(len),len
339
.Lcase2:
340
1: ldw,ma 4(srcspc,src), a1
341
ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
342
1: ldw,ma 4(srcspc,src), a2
343
ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
344
b .Ldo1
345
ldo 2(len),len
346
347
348
/* fault exception fixup handlers: */
349
#ifdef CONFIG_64BIT
350
.Lcopy16_fault:
351
b .Lcopy_done
352
10: std,ma t1,8(dstspc,dst)
353
ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
354
#endif
355
356
.Lcopy8_fault:
357
b .Lcopy_done
358
10: stw,ma t1,4(dstspc,dst)
359
ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
360
ENDPROC_CFI(pa_memcpy)
361
362
.end
363
364