Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/arc/lib/memcpy-archs.S
26424 views
1
/* SPDX-License-Identifier: GPL-2.0-only */
2
/*
3
* Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
4
*/
5
6
#include <linux/linkage.h>
7
8
#ifdef __LITTLE_ENDIAN__
9
# define SHIFT_1(RX,RY,IMM) asl RX, RY, IMM ; <<
10
# define SHIFT_2(RX,RY,IMM) lsr RX, RY, IMM ; >>
11
# define MERGE_1(RX,RY,IMM) asl RX, RY, IMM
12
# define MERGE_2(RX,RY,IMM)
13
# define EXTRACT_1(RX,RY,IMM) and RX, RY, 0xFFFF
14
# define EXTRACT_2(RX,RY,IMM) lsr RX, RY, IMM
15
#else
16
# define SHIFT_1(RX,RY,IMM) lsr RX, RY, IMM ; >>
17
# define SHIFT_2(RX,RY,IMM) asl RX, RY, IMM ; <<
18
# define MERGE_1(RX,RY,IMM) asl RX, RY, IMM ; <<
19
# define MERGE_2(RX,RY,IMM) asl RX, RY, IMM ; <<
20
# define EXTRACT_1(RX,RY,IMM) lsr RX, RY, IMM
21
# define EXTRACT_2(RX,RY,IMM) lsr RX, RY, 0x08
22
#endif
23
24
#ifdef CONFIG_ARC_HAS_LL64
25
# define LOADX(DST,RX) ldd.ab DST, [RX, 8]
26
# define STOREX(SRC,RX) std.ab SRC, [RX, 8]
27
# define ZOLSHFT 5
28
# define ZOLAND 0x1F
29
#else
30
# define LOADX(DST,RX) ld.ab DST, [RX, 4]
31
# define STOREX(SRC,RX) st.ab SRC, [RX, 4]
32
# define ZOLSHFT 4
33
# define ZOLAND 0xF
34
#endif
35
36
ENTRY_CFI(memcpy)
37
mov.f 0, r2
38
;;; if size is zero
39
jz.d [blink]
40
mov r3, r0 ; don;t clobber ret val
41
42
;;; if size <= 8
43
cmp r2, 8
44
bls.d @.Lsmallchunk
45
mov.f lp_count, r2
46
47
and.f r4, r0, 0x03
48
rsub lp_count, r4, 4
49
lpnz @.Laligndestination
50
;; LOOP BEGIN
51
ldb.ab r5, [r1,1]
52
sub r2, r2, 1
53
stb.ab r5, [r3,1]
54
.Laligndestination:
55
56
;;; Check the alignment of the source
57
and.f r4, r1, 0x03
58
bnz.d @.Lsourceunaligned
59
60
;;; CASE 0: Both source and destination are 32bit aligned
61
;;; Convert len to Dwords, unfold x4
62
lsr.f lp_count, r2, ZOLSHFT
63
lpnz @.Lcopy32_64bytes
64
;; LOOP START
65
LOADX (r6, r1)
66
LOADX (r8, r1)
67
LOADX (r10, r1)
68
LOADX (r4, r1)
69
STOREX (r6, r3)
70
STOREX (r8, r3)
71
STOREX (r10, r3)
72
STOREX (r4, r3)
73
.Lcopy32_64bytes:
74
75
and.f lp_count, r2, ZOLAND ;Last remaining 31 bytes
76
.Lsmallchunk:
77
lpnz @.Lcopyremainingbytes
78
;; LOOP START
79
ldb.ab r5, [r1,1]
80
stb.ab r5, [r3,1]
81
.Lcopyremainingbytes:
82
83
j [blink]
84
;;; END CASE 0
85
86
.Lsourceunaligned:
87
cmp r4, 2
88
beq.d @.LunalignedOffby2
89
sub r2, r2, 1
90
91
bhi.d @.LunalignedOffby3
92
ldb.ab r5, [r1, 1]
93
94
;;; CASE 1: The source is unaligned, off by 1
95
;; Hence I need to read 1 byte for a 16bit alignment
96
;; and 2bytes to reach 32bit alignment
97
ldh.ab r6, [r1, 2]
98
sub r2, r2, 2
99
;; Convert to words, unfold x2
100
lsr.f lp_count, r2, 3
101
MERGE_1 (r6, r6, 8)
102
MERGE_2 (r5, r5, 24)
103
or r5, r5, r6
104
105
;; Both src and dst are aligned
106
lpnz @.Lcopy8bytes_1
107
;; LOOP START
108
ld.ab r6, [r1, 4]
109
ld.ab r8, [r1,4]
110
111
SHIFT_1 (r7, r6, 24)
112
or r7, r7, r5
113
SHIFT_2 (r5, r6, 8)
114
115
SHIFT_1 (r9, r8, 24)
116
or r9, r9, r5
117
SHIFT_2 (r5, r8, 8)
118
119
st.ab r7, [r3, 4]
120
st.ab r9, [r3, 4]
121
.Lcopy8bytes_1:
122
123
;; Write back the remaining 16bits
124
EXTRACT_1 (r6, r5, 16)
125
sth.ab r6, [r3, 2]
126
;; Write back the remaining 8bits
127
EXTRACT_2 (r5, r5, 16)
128
stb.ab r5, [r3, 1]
129
130
and.f lp_count, r2, 0x07 ;Last 8bytes
131
lpnz @.Lcopybytewise_1
132
;; LOOP START
133
ldb.ab r6, [r1,1]
134
stb.ab r6, [r3,1]
135
.Lcopybytewise_1:
136
j [blink]
137
138
.LunalignedOffby2:
139
;;; CASE 2: The source is unaligned, off by 2
140
ldh.ab r5, [r1, 2]
141
sub r2, r2, 1
142
143
;; Both src and dst are aligned
144
;; Convert to words, unfold x2
145
lsr.f lp_count, r2, 3
146
#ifdef __BIG_ENDIAN__
147
asl.nz r5, r5, 16
148
#endif
149
lpnz @.Lcopy8bytes_2
150
;; LOOP START
151
ld.ab r6, [r1, 4]
152
ld.ab r8, [r1,4]
153
154
SHIFT_1 (r7, r6, 16)
155
or r7, r7, r5
156
SHIFT_2 (r5, r6, 16)
157
158
SHIFT_1 (r9, r8, 16)
159
or r9, r9, r5
160
SHIFT_2 (r5, r8, 16)
161
162
st.ab r7, [r3, 4]
163
st.ab r9, [r3, 4]
164
.Lcopy8bytes_2:
165
166
#ifdef __BIG_ENDIAN__
167
lsr.nz r5, r5, 16
168
#endif
169
sth.ab r5, [r3, 2]
170
171
and.f lp_count, r2, 0x07 ;Last 8bytes
172
lpnz @.Lcopybytewise_2
173
;; LOOP START
174
ldb.ab r6, [r1,1]
175
stb.ab r6, [r3,1]
176
.Lcopybytewise_2:
177
j [blink]
178
179
.LunalignedOffby3:
180
;;; CASE 3: The source is unaligned, off by 3
181
;;; Hence, I need to read 1byte for achieve the 32bit alignment
182
183
;; Both src and dst are aligned
184
;; Convert to words, unfold x2
185
lsr.f lp_count, r2, 3
186
#ifdef __BIG_ENDIAN__
187
asl.ne r5, r5, 24
188
#endif
189
lpnz @.Lcopy8bytes_3
190
;; LOOP START
191
ld.ab r6, [r1, 4]
192
ld.ab r8, [r1,4]
193
194
SHIFT_1 (r7, r6, 8)
195
or r7, r7, r5
196
SHIFT_2 (r5, r6, 24)
197
198
SHIFT_1 (r9, r8, 8)
199
or r9, r9, r5
200
SHIFT_2 (r5, r8, 24)
201
202
st.ab r7, [r3, 4]
203
st.ab r9, [r3, 4]
204
.Lcopy8bytes_3:
205
206
#ifdef __BIG_ENDIAN__
207
lsr.nz r5, r5, 24
208
#endif
209
stb.ab r5, [r3, 1]
210
211
and.f lp_count, r2, 0x07 ;Last 8bytes
212
lpnz @.Lcopybytewise_3
213
;; LOOP START
214
ldb.ab r6, [r1,1]
215
stb.ab r6, [r3,1]
216
.Lcopybytewise_3:
217
j [blink]
218
219
END_CFI(memcpy)
220
221