Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/ia64/lib/xor.S
10817 views
1
/*
2
* arch/ia64/lib/xor.S
3
*
4
* Optimized RAID-5 checksumming functions for IA-64.
5
*
6
* This program is free software; you can redistribute it and/or modify
7
* it under the terms of the GNU General Public License as published by
8
* the Free Software Foundation; either version 2, or (at your option)
9
* any later version.
10
*
11
* You should have received a copy of the GNU General Public License
12
* (for example /usr/src/linux/COPYING); if not, write to the Free
13
* Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
14
*/
15
16
#include <asm/asmmacro.h>
17
18
GLOBAL_ENTRY(xor_ia64_2)
19
.prologue
20
.fframe 0
21
.save ar.pfs, r31
22
alloc r31 = ar.pfs, 3, 0, 13, 16
23
.save ar.lc, r30
24
mov r30 = ar.lc
25
.save pr, r29
26
mov r29 = pr
27
;;
28
.body
29
mov r8 = in1
30
mov ar.ec = 6 + 2
31
shr in0 = in0, 3
32
;;
33
adds in0 = -1, in0
34
mov r16 = in1
35
mov r17 = in2
36
;;
37
mov ar.lc = in0
38
mov pr.rot = 1 << 16
39
;;
40
.rotr s1[6+1], s2[6+1], d[2]
41
.rotp p[6+2]
42
0:
43
(p[0]) ld8.nta s1[0] = [r16], 8
44
(p[0]) ld8.nta s2[0] = [r17], 8
45
(p[6]) xor d[0] = s1[6], s2[6]
46
(p[6+1])st8.nta [r8] = d[1], 8
47
nop.f 0
48
br.ctop.dptk.few 0b
49
;;
50
mov ar.lc = r30
51
mov pr = r29, -1
52
br.ret.sptk.few rp
53
END(xor_ia64_2)
54
55
GLOBAL_ENTRY(xor_ia64_3)
56
.prologue
57
.fframe 0
58
.save ar.pfs, r31
59
alloc r31 = ar.pfs, 4, 0, 20, 24
60
.save ar.lc, r30
61
mov r30 = ar.lc
62
.save pr, r29
63
mov r29 = pr
64
;;
65
.body
66
mov r8 = in1
67
mov ar.ec = 6 + 2
68
shr in0 = in0, 3
69
;;
70
adds in0 = -1, in0
71
mov r16 = in1
72
mov r17 = in2
73
;;
74
mov r18 = in3
75
mov ar.lc = in0
76
mov pr.rot = 1 << 16
77
;;
78
.rotr s1[6+1], s2[6+1], s3[6+1], d[2]
79
.rotp p[6+2]
80
0:
81
(p[0]) ld8.nta s1[0] = [r16], 8
82
(p[0]) ld8.nta s2[0] = [r17], 8
83
(p[6]) xor d[0] = s1[6], s2[6]
84
;;
85
(p[0]) ld8.nta s3[0] = [r18], 8
86
(p[6+1])st8.nta [r8] = d[1], 8
87
(p[6]) xor d[0] = d[0], s3[6]
88
br.ctop.dptk.few 0b
89
;;
90
mov ar.lc = r30
91
mov pr = r29, -1
92
br.ret.sptk.few rp
93
END(xor_ia64_3)
94
95
GLOBAL_ENTRY(xor_ia64_4)
96
.prologue
97
.fframe 0
98
.save ar.pfs, r31
99
alloc r31 = ar.pfs, 5, 0, 27, 32
100
.save ar.lc, r30
101
mov r30 = ar.lc
102
.save pr, r29
103
mov r29 = pr
104
;;
105
.body
106
mov r8 = in1
107
mov ar.ec = 6 + 2
108
shr in0 = in0, 3
109
;;
110
adds in0 = -1, in0
111
mov r16 = in1
112
mov r17 = in2
113
;;
114
mov r18 = in3
115
mov ar.lc = in0
116
mov pr.rot = 1 << 16
117
mov r19 = in4
118
;;
119
.rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2]
120
.rotp p[6+2]
121
0:
122
(p[0]) ld8.nta s1[0] = [r16], 8
123
(p[0]) ld8.nta s2[0] = [r17], 8
124
(p[6]) xor d[0] = s1[6], s2[6]
125
(p[0]) ld8.nta s3[0] = [r18], 8
126
(p[0]) ld8.nta s4[0] = [r19], 8
127
(p[6]) xor r20 = s3[6], s4[6]
128
;;
129
(p[6+1])st8.nta [r8] = d[1], 8
130
(p[6]) xor d[0] = d[0], r20
131
br.ctop.dptk.few 0b
132
;;
133
mov ar.lc = r30
134
mov pr = r29, -1
135
br.ret.sptk.few rp
136
END(xor_ia64_4)
137
138
GLOBAL_ENTRY(xor_ia64_5)
139
.prologue
140
.fframe 0
141
.save ar.pfs, r31
142
alloc r31 = ar.pfs, 6, 0, 34, 40
143
.save ar.lc, r30
144
mov r30 = ar.lc
145
.save pr, r29
146
mov r29 = pr
147
;;
148
.body
149
mov r8 = in1
150
mov ar.ec = 6 + 2
151
shr in0 = in0, 3
152
;;
153
adds in0 = -1, in0
154
mov r16 = in1
155
mov r17 = in2
156
;;
157
mov r18 = in3
158
mov ar.lc = in0
159
mov pr.rot = 1 << 16
160
mov r19 = in4
161
mov r20 = in5
162
;;
163
.rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2]
164
.rotp p[6+2]
165
0:
166
(p[0]) ld8.nta s1[0] = [r16], 8
167
(p[0]) ld8.nta s2[0] = [r17], 8
168
(p[6]) xor d[0] = s1[6], s2[6]
169
(p[0]) ld8.nta s3[0] = [r18], 8
170
(p[0]) ld8.nta s4[0] = [r19], 8
171
(p[6]) xor r21 = s3[6], s4[6]
172
;;
173
(p[0]) ld8.nta s5[0] = [r20], 8
174
(p[6+1])st8.nta [r8] = d[1], 8
175
(p[6]) xor d[0] = d[0], r21
176
;;
177
(p[6]) xor d[0] = d[0], s5[6]
178
nop.f 0
179
br.ctop.dptk.few 0b
180
;;
181
mov ar.lc = r30
182
mov pr = r29, -1
183
br.ret.sptk.few rp
184
END(xor_ia64_5)
185
186