Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/loongarch/lib/memset.S
26436 views
1
/* SPDX-License-Identifier: GPL-2.0 */
2
/*
3
* Copyright (C) 2020-2022 Loongson Technology Corporation Limited
4
*/
5
6
#include <linux/export.h>
7
#include <asm/alternative-asm.h>
8
#include <asm/asm.h>
9
#include <asm/asmmacro.h>
10
#include <asm/cpu.h>
11
#include <asm/regdef.h>
12
#include <asm/unwind_hints.h>
13
14
.macro fill_to_64 r0
15
bstrins.d \r0, \r0, 15, 8
16
bstrins.d \r0, \r0, 31, 16
17
bstrins.d \r0, \r0, 63, 32
18
.endm
19
20
.section .noinstr.text, "ax"
21
22
SYM_FUNC_START(memset)
23
/*
24
* Some CPUs support hardware unaligned access
25
*/
26
ALTERNATIVE "b __memset_generic", \
27
"b __memset_fast", CPU_FEATURE_UAL
28
SYM_FUNC_END(memset)
29
SYM_FUNC_ALIAS(__memset, memset)
30
31
EXPORT_SYMBOL(memset)
32
EXPORT_SYMBOL(__memset)
33
34
_ASM_NOKPROBE(memset)
35
_ASM_NOKPROBE(__memset)
36
37
/*
38
* void *__memset_generic(void *s, int c, size_t n)
39
*
40
* a0: s
41
* a1: c
42
* a2: n
43
*/
44
SYM_FUNC_START(__memset_generic)
45
move a3, a0
46
beqz a2, 2f
47
48
1: st.b a1, a0, 0
49
addi.d a0, a0, 1
50
addi.d a2, a2, -1
51
bgt a2, zero, 1b
52
53
2: move a0, a3
54
jr ra
55
SYM_FUNC_END(__memset_generic)
56
_ASM_NOKPROBE(__memset_generic)
57
58
/*
59
* void *__memset_fast(void *s, int c, size_t n)
60
*
61
* a0: s
62
* a1: c
63
* a2: n
64
*/
65
SYM_FUNC_START(__memset_fast)
66
/* fill a1 to 64 bits */
67
fill_to_64 a1
68
69
sltui t0, a2, 9
70
bnez t0, .Lsmall
71
72
add.d a2, a0, a2
73
st.d a1, a0, 0
74
75
/* align up address */
76
addi.d a3, a0, 8
77
bstrins.d a3, zero, 2, 0
78
79
addi.d a4, a2, -64
80
bgeu a3, a4, .Llt64
81
82
/* set 64 bytes at a time */
83
.Lloop64:
84
st.d a1, a3, 0
85
st.d a1, a3, 8
86
st.d a1, a3, 16
87
st.d a1, a3, 24
88
st.d a1, a3, 32
89
st.d a1, a3, 40
90
st.d a1, a3, 48
91
st.d a1, a3, 56
92
addi.d a3, a3, 64
93
bltu a3, a4, .Lloop64
94
95
/* set the remaining bytes */
96
.Llt64:
97
addi.d a4, a2, -32
98
bgeu a3, a4, .Llt32
99
st.d a1, a3, 0
100
st.d a1, a3, 8
101
st.d a1, a3, 16
102
st.d a1, a3, 24
103
addi.d a3, a3, 32
104
105
.Llt32:
106
addi.d a4, a2, -16
107
bgeu a3, a4, .Llt16
108
st.d a1, a3, 0
109
st.d a1, a3, 8
110
addi.d a3, a3, 16
111
112
.Llt16:
113
addi.d a4, a2, -8
114
bgeu a3, a4, .Llt8
115
st.d a1, a3, 0
116
117
.Llt8:
118
st.d a1, a2, -8
119
120
/* return */
121
jr ra
122
123
.align 4
124
.Lsmall:
125
pcaddi t0, 4
126
slli.d a2, a2, 4
127
add.d t0, t0, a2
128
jr t0
129
130
.align 4
131
0: jr ra
132
133
.align 4
134
1: st.b a1, a0, 0
135
jr ra
136
137
.align 4
138
2: st.h a1, a0, 0
139
jr ra
140
141
.align 4
142
3: st.h a1, a0, 0
143
st.b a1, a0, 2
144
jr ra
145
146
.align 4
147
4: st.w a1, a0, 0
148
jr ra
149
150
.align 4
151
5: st.w a1, a0, 0
152
st.b a1, a0, 4
153
jr ra
154
155
.align 4
156
6: st.w a1, a0, 0
157
st.h a1, a0, 4
158
jr ra
159
160
.align 4
161
7: st.w a1, a0, 0
162
st.w a1, a0, 3
163
jr ra
164
165
.align 4
166
8: st.d a1, a0, 0
167
jr ra
168
SYM_FUNC_END(__memset_fast)
169
_ASM_NOKPROBE(__memset_fast)
170
171
STACK_FRAME_NON_STANDARD __memset_fast
172
173