Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/hexagon/lib/memset.S
26424 views
1
/* SPDX-License-Identifier: GPL-2.0-only */
2
/*
3
* Copyright (c) 2011, The Linux Foundation. All rights reserved.
4
*/
5
6
7
/* HEXAGON assembly optimized memset */
8
/* Replaces the standard library function memset */
9
10
11
.macro HEXAGON_OPT_FUNC_BEGIN name
12
.text
13
.p2align 4
14
.globl \name
15
.type \name, @function
16
\name:
17
.endm
18
19
.macro HEXAGON_OPT_FUNC_FINISH name
20
.size \name, . - \name
21
.endm
22
23
/* FUNCTION: memset (v2 version) */
24
#if __HEXAGON_ARCH__ < 3
25
HEXAGON_OPT_FUNC_BEGIN memset
26
{
27
r6 = #8
28
r7 = extractu(r0, #3 , #0)
29
p0 = cmp.eq(r2, #0)
30
p1 = cmp.gtu(r2, #7)
31
}
32
{
33
r4 = vsplatb(r1)
34
r8 = r0 /* leave r0 intact for return val */
35
r9 = sub(r6, r7) /* bytes until double alignment */
36
if p0 jumpr r31 /* count == 0, so return */
37
}
38
{
39
r3 = #0
40
r7 = #0
41
p0 = tstbit(r9, #0)
42
if p1 jump 2f /* skip byte loop */
43
}
44
45
/* less than 8 bytes to set, so just set a byte at a time and return */
46
47
loop0(1f, r2) /* byte loop */
48
.falign
49
1: /* byte loop */
50
{
51
memb(r8++#1) = r4
52
}:endloop0
53
jumpr r31
54
.falign
55
2: /* skip byte loop */
56
{
57
r6 = #1
58
p0 = tstbit(r9, #1)
59
p1 = cmp.eq(r2, #1)
60
if !p0 jump 3f /* skip initial byte store */
61
}
62
{
63
memb(r8++#1) = r4
64
r3:2 = sub(r3:2, r7:6)
65
if p1 jumpr r31
66
}
67
.falign
68
3: /* skip initial byte store */
69
{
70
r6 = #2
71
p0 = tstbit(r9, #2)
72
p1 = cmp.eq(r2, #2)
73
if !p0 jump 4f /* skip initial half store */
74
}
75
{
76
memh(r8++#2) = r4
77
r3:2 = sub(r3:2, r7:6)
78
if p1 jumpr r31
79
}
80
.falign
81
4: /* skip initial half store */
82
{
83
r6 = #4
84
p0 = cmp.gtu(r2, #7)
85
p1 = cmp.eq(r2, #4)
86
if !p0 jump 5f /* skip initial word store */
87
}
88
{
89
memw(r8++#4) = r4
90
r3:2 = sub(r3:2, r7:6)
91
p0 = cmp.gtu(r2, #11)
92
if p1 jumpr r31
93
}
94
.falign
95
5: /* skip initial word store */
96
{
97
r10 = lsr(r2, #3)
98
p1 = cmp.eq(r3, #1)
99
if !p0 jump 7f /* skip double loop */
100
}
101
{
102
r5 = r4
103
r6 = #8
104
loop0(6f, r10) /* double loop */
105
}
106
107
/* set bytes a double word at a time */
108
109
.falign
110
6: /* double loop */
111
{
112
memd(r8++#8) = r5:4
113
r3:2 = sub(r3:2, r7:6)
114
p1 = cmp.eq(r2, #8)
115
}:endloop0
116
.falign
117
7: /* skip double loop */
118
{
119
p0 = tstbit(r2, #2)
120
if p1 jumpr r31
121
}
122
{
123
r6 = #4
124
p0 = tstbit(r2, #1)
125
p1 = cmp.eq(r2, #4)
126
if !p0 jump 8f /* skip final word store */
127
}
128
{
129
memw(r8++#4) = r4
130
r3:2 = sub(r3:2, r7:6)
131
if p1 jumpr r31
132
}
133
.falign
134
8: /* skip final word store */
135
{
136
p1 = cmp.eq(r2, #2)
137
if !p0 jump 9f /* skip final half store */
138
}
139
{
140
memh(r8++#2) = r4
141
if p1 jumpr r31
142
}
143
.falign
144
9: /* skip final half store */
145
{
146
memb(r8++#1) = r4
147
jumpr r31
148
}
149
HEXAGON_OPT_FUNC_FINISH memset
150
#endif
151
152
153
/* FUNCTION: memset (v3 and higher version) */
154
#if __HEXAGON_ARCH__ >= 3
155
HEXAGON_OPT_FUNC_BEGIN memset
156
{
157
r7=vsplatb(r1)
158
r6 = r0
159
if (r2==#0) jump:nt .L1
160
}
161
{
162
r5:4=combine(r7,r7)
163
p0 = cmp.gtu(r2,#8)
164
if (p0.new) jump:nt .L3
165
}
166
{
167
r3 = r0
168
loop0(.L47,r2)
169
}
170
.falign
171
.L47:
172
{
173
memb(r3++#1) = r1
174
}:endloop0 /* start=.L47 */
175
jumpr r31
176
.L3:
177
{
178
p0 = tstbit(r0,#0)
179
if (!p0.new) jump:nt .L8
180
p1 = cmp.eq(r2, #1)
181
}
182
{
183
r6 = add(r0, #1)
184
r2 = add(r2,#-1)
185
memb(r0) = r1
186
if (p1) jump .L1
187
}
188
.L8:
189
{
190
p0 = tstbit(r6,#1)
191
if (!p0.new) jump:nt .L10
192
}
193
{
194
r2 = add(r2,#-2)
195
memh(r6++#2) = r7
196
p0 = cmp.eq(r2, #2)
197
if (p0.new) jump:nt .L1
198
}
199
.L10:
200
{
201
p0 = tstbit(r6,#2)
202
if (!p0.new) jump:nt .L12
203
}
204
{
205
r2 = add(r2,#-4)
206
memw(r6++#4) = r7
207
p0 = cmp.eq(r2, #4)
208
if (p0.new) jump:nt .L1
209
}
210
.L12:
211
{
212
p0 = cmp.gtu(r2,#127)
213
if (!p0.new) jump:nt .L14
214
}
215
r3 = and(r6,#31)
216
if (r3==#0) jump:nt .L17
217
{
218
memd(r6++#8) = r5:4
219
r2 = add(r2,#-8)
220
}
221
r3 = and(r6,#31)
222
if (r3==#0) jump:nt .L17
223
{
224
memd(r6++#8) = r5:4
225
r2 = add(r2,#-8)
226
}
227
r3 = and(r6,#31)
228
if (r3==#0) jump:nt .L17
229
{
230
memd(r6++#8) = r5:4
231
r2 = add(r2,#-8)
232
}
233
.L17:
234
{
235
r3 = lsr(r2,#5)
236
if (r1!=#0) jump:nt .L18
237
}
238
{
239
r8 = r3
240
r3 = r6
241
loop0(.L46,r3)
242
}
243
.falign
244
.L46:
245
{
246
dczeroa(r6)
247
r6 = add(r6,#32)
248
r2 = add(r2,#-32)
249
}:endloop0 /* start=.L46 */
250
.L14:
251
{
252
p0 = cmp.gtu(r2,#7)
253
if (!p0.new) jump:nt .L28
254
r8 = lsr(r2,#3)
255
}
256
loop0(.L44,r8)
257
.falign
258
.L44:
259
{
260
memd(r6++#8) = r5:4
261
r2 = add(r2,#-8)
262
}:endloop0 /* start=.L44 */
263
.L28:
264
{
265
p0 = tstbit(r2,#2)
266
if (!p0.new) jump:nt .L33
267
}
268
{
269
r2 = add(r2,#-4)
270
memw(r6++#4) = r7
271
}
272
.L33:
273
{
274
p0 = tstbit(r2,#1)
275
if (!p0.new) jump:nt .L35
276
}
277
{
278
r2 = add(r2,#-2)
279
memh(r6++#2) = r7
280
}
281
.L35:
282
p0 = cmp.eq(r2,#1)
283
if (p0) memb(r6) = r1
284
.L1:
285
jumpr r31
286
.L18:
287
loop0(.L45,r3)
288
.falign
289
.L45:
290
dczeroa(r6)
291
{
292
memd(r6++#8) = r5:4
293
r2 = add(r2,#-32)
294
}
295
memd(r6++#8) = r5:4
296
memd(r6++#8) = r5:4
297
{
298
memd(r6++#8) = r5:4
299
}:endloop0 /* start=.L45 */
300
jump .L14
301
HEXAGON_OPT_FUNC_FINISH memset
302
#endif
303
304