Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/frv/lib/memset.S
10817 views
1
/* memset.S: optimised assembly memset
2
*
3
* Copyright (C) 2003 Red Hat, Inc. All Rights Reserved.
4
* Written by David Howells ([email protected])
5
*
6
* This program is free software; you can redistribute it and/or
7
* modify it under the terms of the GNU General Public License
8
* as published by the Free Software Foundation; either version
9
* 2 of the License, or (at your option) any later version.
10
*/
11
12
13
.text
14
.p2align 4
15
16
###############################################################################
17
#
18
# void *memset(void *p, char ch, size_t count)
19
#
20
# - NOTE: must not use any stack. exception detection performs function return
21
# to caller's fixup routine, aborting the remainder of the set
22
# GR4, GR7, GR8, and GR11 must be managed
23
#
24
###############################################################################
25
.globl memset,__memset_end
26
.type memset,@function
27
memset:
28
orcc.p gr10,gr0,gr5,icc3 ; GR5 = count
29
andi gr9,#0xff,gr9
30
or.p gr8,gr0,gr4 ; GR4 = address
31
beqlr icc3,#0
32
33
# conditionally write a byte to 2b-align the address
34
setlos.p #1,gr6
35
andicc gr4,#1,gr0,icc0
36
ckne icc0,cc7
37
cstb.p gr9,@(gr4,gr0) ,cc7,#1
38
csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3
39
cadd.p gr4,gr6,gr4 ,cc7,#1
40
beqlr icc3,#0
41
42
# conditionally write a word to 4b-align the address
43
andicc.p gr4,#2,gr0,icc0
44
subicc gr5,#2,gr0,icc1
45
setlos.p #2,gr6
46
ckne icc0,cc7
47
slli.p gr9,#8,gr12 ; need to double up the pattern
48
cknc icc1,cc5
49
or.p gr9,gr12,gr12
50
andcr cc7,cc5,cc7
51
52
csth.p gr12,@(gr4,gr0) ,cc7,#1
53
csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3
54
cadd.p gr4,gr6,gr4 ,cc7,#1
55
beqlr icc3,#0
56
57
# conditionally write a dword to 8b-align the address
58
andicc.p gr4,#4,gr0,icc0
59
subicc gr5,#4,gr0,icc1
60
setlos.p #4,gr6
61
ckne icc0,cc7
62
slli.p gr12,#16,gr13 ; need to quadruple-up the pattern
63
cknc icc1,cc5
64
or.p gr13,gr12,gr12
65
andcr cc7,cc5,cc7
66
67
cst.p gr12,@(gr4,gr0) ,cc7,#1
68
csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3
69
cadd.p gr4,gr6,gr4 ,cc7,#1
70
beqlr icc3,#0
71
72
or.p gr12,gr12,gr13 ; need to octuple-up the pattern
73
74
# the address is now 8b-aligned - loop around writing 64b chunks
75
setlos #8,gr7
76
subi.p gr4,#8,gr4 ; store with update index does weird stuff
77
setlos #64,gr6
78
79
subicc gr5,#64,gr0,icc0
80
0: cknc icc0,cc7
81
cstdu gr12,@(gr4,gr7) ,cc7,#1
82
cstdu gr12,@(gr4,gr7) ,cc7,#1
83
cstdu gr12,@(gr4,gr7) ,cc7,#1
84
cstdu gr12,@(gr4,gr7) ,cc7,#1
85
cstdu gr12,@(gr4,gr7) ,cc7,#1
86
cstdu.p gr12,@(gr4,gr7) ,cc7,#1
87
csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3
88
cstdu.p gr12,@(gr4,gr7) ,cc7,#1
89
subicc gr5,#64,gr0,icc0
90
cstdu.p gr12,@(gr4,gr7) ,cc7,#1
91
beqlr icc3,#0
92
bnc icc0,#2,0b
93
94
# now do 32-byte remnant
95
subicc.p gr5,#32,gr0,icc0
96
setlos #32,gr6
97
cknc icc0,cc7
98
cstdu.p gr12,@(gr4,gr7) ,cc7,#1
99
csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3
100
cstdu.p gr12,@(gr4,gr7) ,cc7,#1
101
setlos #16,gr6
102
cstdu.p gr12,@(gr4,gr7) ,cc7,#1
103
subicc gr5,#16,gr0,icc0
104
cstdu.p gr12,@(gr4,gr7) ,cc7,#1
105
beqlr icc3,#0
106
107
# now do 16-byte remnant
108
cknc icc0,cc7
109
cstdu.p gr12,@(gr4,gr7) ,cc7,#1
110
csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3
111
cstdu.p gr12,@(gr4,gr7) ,cc7,#1
112
beqlr icc3,#0
113
114
# now do 8-byte remnant
115
subicc gr5,#8,gr0,icc1
116
cknc icc1,cc7
117
cstdu.p gr12,@(gr4,gr7) ,cc7,#1
118
csubcc gr5,gr7,gr5 ,cc7,#1 ; also set ICC3
119
setlos.p #4,gr7
120
beqlr icc3,#0
121
122
# now do 4-byte remnant
123
subicc gr5,#4,gr0,icc0
124
addi.p gr4,#4,gr4
125
cknc icc0,cc7
126
cstu.p gr12,@(gr4,gr7) ,cc7,#1
127
csubcc gr5,gr7,gr5 ,cc7,#1 ; also set ICC3
128
subicc.p gr5,#2,gr0,icc1
129
beqlr icc3,#0
130
131
# now do 2-byte remnant
132
setlos #2,gr7
133
addi.p gr4,#2,gr4
134
cknc icc1,cc7
135
csthu.p gr12,@(gr4,gr7) ,cc7,#1
136
csubcc gr5,gr7,gr5 ,cc7,#1 ; also set ICC3
137
subicc.p gr5,#1,gr0,icc0
138
beqlr icc3,#0
139
140
# now do 1-byte remnant
141
setlos #0,gr7
142
addi.p gr4,#2,gr4
143
cknc icc0,cc7
144
cstb.p gr12,@(gr4,gr0) ,cc7,#1
145
bralr
146
__memset_end:
147
148
.size memset, __memset_end-memset
149
150
###############################################################################
151
#
152
# clear memory in userspace
153
# - return the number of bytes that could not be cleared (0 on complete success)
154
#
155
# long __memset_user(void *p, size_t count)
156
#
157
###############################################################################
158
.globl __memset_user, __memset_user_error_lr, __memset_user_error_handler
159
.type __memset_user,@function
160
__memset_user:
161
movsg lr,gr11
162
163
# abuse memset to do the dirty work
164
or.p gr9,gr9,gr10
165
setlos #0,gr9
166
call memset
167
__memset_user_error_lr:
168
jmpl.p @(gr11,gr0)
169
setlos #0,gr8
170
171
# deal any exception generated by memset
172
# GR4 - memset's address tracking pointer
173
# GR7 - memset's step value (index register for store insns)
174
# GR8 - memset's original start address
175
# GR10 - memset's original count
176
__memset_user_error_handler:
177
add.p gr4,gr7,gr4
178
add gr8,gr10,gr8
179
jmpl.p @(gr11,gr0)
180
sub gr8,gr4,gr8 ; we return the amount left uncleared
181
182
.size __memset_user, .-__memset_user
183
184