CoCalc -- copy_user

GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/sh/lib64/copy_user_memcpy.S
¹⁰⁸¹⁷ views
1
!
2
! Fast SH memcpy
3
!
4
! by Toshiyasu Morita (tm@netcom.com)
5
! hacked by J"orn Rernnecke ([email protected]) ("o for o-umlaut)
6
! SH5 code Copyright 2002 SuperH Ltd.
7
!
8
! Entry: ARG0: destination pointer
9
!        ARG1: source pointer
10
!        ARG2: byte count
11
!
12
! Exit:  RESULT: destination pointer
13
!        any other registers in the range r0-r7: trashed
14
!
15
! Notes: Usually one wants to do small reads and write a longword, but
16
!        unfortunately it is difficult in some cases to concatanate bytes
17
!        into a longword on the SH, so this does a longword read and small
18
!        writes.
19
!
20
! This implementation makes two assumptions about how it is called:
21
!
22
! 1.: If the byte count is nonzero, the address of the last byte to be
23
!     copied is unsigned greater than the address of the first byte to
24
!     be copied.  This could be easily swapped for a signed comparison,
25
!     but the algorithm used needs some comparison.
26
!
27
! 2.: When there are two or three bytes in the last word of an 11-or-more
28
!     bytes memory chunk to b copied, the rest of the word can be read
29
!     without side effects.
30
!     This could be easily changed by increasing the minimum size of
31
!     a fast memcpy and the amount subtracted from r7 before L_2l_loop be 2,
32
!     however, this would cost a few extra cyles on average.
33
!     For SHmedia, the assumption is that any quadword can be read in its
34
!     enirety if at least one byte is included in the copy.
35

36
/* Imported into Linux kernel by Richard Curnow.  This is used to implement the
37
   __copy_user function in the general case, so it has to be a distinct
38
   function from intra-kernel memcpy to allow for exception fix-ups in the
39
   event that the user pointer is bad somewhere in the copy (e.g. due to
40
   running off the end of the vma).
41

42
   Note, this algorithm will be slightly wasteful in the case where the source
43
   and destination pointers are equally aligned, because the stlo/sthi pairs
44
   could then be merged back into single stores.  If there are a lot of cache
45
   misses, this is probably offset by the stall lengths on the preloads.
46

47
*/
48

49
/* NOTE : Prefetches removed and allocos guarded by synco to avoid TAKum03020
50
 * erratum.  The first two prefetches are nop-ed out to avoid upsetting the
51
 * instruction counts used in the jump address calculation.
52
 * */
53

54
	.section .text..SHmedia32,"ax"
55
	.little
56
	.balign 32
57
	.global copy_user_memcpy
58
	.global copy_user_memcpy_end
59
copy_user_memcpy:
60

61
#define LDUAQ(P,O,D0,D1) ldlo.q P,O,D0; ldhi.q P,O+7,D1
62
#define STUAQ(P,O,D0,D1) stlo.q P,O,D0; sthi.q P,O+7,D1
63
#define LDUAL(P,O,D0,D1) ldlo.l P,O,D0; ldhi.l P,O+3,D1
64
#define STUAL(P,O,D0,D1) stlo.l P,O,D0; sthi.l P,O+3,D1
65

66
	nop ! ld.b r3,0,r63 ! TAKum03020
67
	pta/l Large,tr0
68
	movi 25,r0
69
	bgeu/u r4,r0,tr0
70
	nsb r4,r0
71
	shlli r0,5,r0
72
	movi (L1-L0+63*32 + 1) & 0xffff,r1
73
	sub r1, r0, r0
74
L0:	ptrel r0,tr0
75
	add r2,r4,r5
76
	ptabs r18,tr1
77
	add r3,r4,r6
78
	blink tr0,r63
79

80
/* Rearranged to make cut2 safe */
81
	.balign 8
82
L4_7:	/* 4..7 byte memcpy cntd. */
83
	stlo.l r2, 0, r0
84
	or r6, r7, r6
85
	sthi.l r5, -1, r6
86
	stlo.l r5, -4, r6
87
	blink tr1,r63
88

89
	.balign 8
90
L1:	/* 0 byte memcpy */
91
	nop
92
	blink tr1,r63
93
	nop
94
	nop
95
	nop
96
	nop
97

98
L2_3:	/* 2 or 3 byte memcpy cntd. */
99
	st.b r5,-1,r6
100
	blink tr1,r63
101

102
	/* 1 byte memcpy */
103
	ld.b r3,0,r0
104
	st.b r2,0,r0
105
	blink tr1,r63
106

107
L8_15:	/* 8..15 byte memcpy cntd. */
108
	stlo.q r2, 0, r0
109
	or r6, r7, r6
110
	sthi.q r5, -1, r6
111
	stlo.q r5, -8, r6
112
	blink tr1,r63
113

114
	/* 2 or 3 byte memcpy */
115
	ld.b r3,0,r0
116
	nop ! ld.b r2,0,r63 ! TAKum03020
117
	ld.b r3,1,r1
118
	st.b r2,0,r0
119
	pta/l L2_3,tr0
120
	ld.b r6,-1,r6
121
	st.b r2,1,r1
122
	blink tr0, r63
123

124
	/* 4 .. 7 byte memcpy */
125
	LDUAL (r3, 0, r0, r1)
126
	pta L4_7, tr0
127
	ldlo.l r6, -4, r7
128
	or r0, r1, r0
129
	sthi.l r2, 3, r0
130
	ldhi.l r6, -1, r6
131
	blink tr0, r63
132

133
	/* 8 .. 15 byte memcpy */
134
	LDUAQ (r3, 0, r0, r1)
135
	pta L8_15, tr0
136
	ldlo.q r6, -8, r7
137
	or r0, r1, r0
138
	sthi.q r2, 7, r0
139
	ldhi.q r6, -1, r6
140
	blink tr0, r63
141

142
	/* 16 .. 24 byte memcpy */
143
	LDUAQ (r3, 0, r0, r1)
144
	LDUAQ (r3, 8, r8, r9)
145
	or r0, r1, r0
146
	sthi.q r2, 7, r0
147
	or r8, r9, r8
148
	sthi.q r2, 15, r8
149
	ldlo.q r6, -8, r7
150
	ldhi.q r6, -1, r6
151
	stlo.q r2, 8, r8
152
	stlo.q r2, 0, r0
153
	or r6, r7, r6
154
	sthi.q r5, -1, r6
155
	stlo.q r5, -8, r6
156
	blink tr1,r63
157

158
Large:
159
	! ld.b r2, 0, r63 ! TAKum03020
160
	pta/l  Loop_ua, tr1
161
	ori r3, -8, r7
162
	sub r2, r7, r22
163
	sub r3, r2, r6
164
	add r2, r4, r5
165
	ldlo.q r3, 0, r0
166
	addi r5, -16, r5
167
	movi 64+8, r27 ! could subtract r7 from that.
168
	stlo.q r2, 0, r0
169
	sthi.q r2, 7, r0
170
	ldx.q r22, r6, r0
171
	bgtu/l r27, r4, tr1
172

173
	addi r5, -48, r27
174
	pta/l Loop_line, tr0
175
	addi r6, 64, r36
176
	addi r6, -24, r19
177
	addi r6, -16, r20
178
	addi r6, -8, r21
179

180
Loop_line:
181
	! ldx.q r22, r36, r63 ! TAKum03020
182
	alloco r22, 32
183
	synco
184
	addi r22, 32, r22
185
	ldx.q r22, r19, r23
186
	sthi.q r22, -25, r0
187
	ldx.q r22, r20, r24
188
	ldx.q r22, r21, r25
189
	stlo.q r22, -32, r0
190
	ldx.q r22, r6,  r0
191
	sthi.q r22, -17, r23
192
	sthi.q r22,  -9, r24
193
	sthi.q r22,  -1, r25
194
	stlo.q r22, -24, r23
195
	stlo.q r22, -16, r24
196
	stlo.q r22,  -8, r25
197
	bgeu r27, r22, tr0
198

199
Loop_ua:
200
	addi r22, 8, r22
201
	sthi.q r22, -1, r0
202
	stlo.q r22, -8, r0
203
	ldx.q r22, r6, r0
204
	bgtu/l r5, r22, tr1
205

206
	add r3, r4, r7
207
	ldlo.q r7, -8, r1
208
	sthi.q r22, 7, r0
209
	ldhi.q r7, -1, r7
210
	ptabs r18,tr1
211
	stlo.q r22, 0, r0
212
	or r1, r7, r1
213
	sthi.q r5, 15, r1
214
	stlo.q r5, 8, r1
215
	blink tr1, r63
216
copy_user_memcpy_end:
217
	nop
218

219
Product

Resources

Company