Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/sparc/lib/checksum_64.S
10818 views
1
/* checksum.S: Sparc V9 optimized checksum code.
2
*
3
* Copyright(C) 1995 Linus Torvalds
4
* Copyright(C) 1995 Miguel de Icaza
5
* Copyright(C) 1996, 2000 David S. Miller
6
* Copyright(C) 1997 Jakub Jelinek
7
*
8
* derived from:
9
* Linux/Alpha checksum c-code
10
* Linux/ix86 inline checksum assembly
11
* RFC1071 Computing the Internet Checksum (esp. Jacobsons m68k code)
12
* David Mosberger-Tang for optimized reference c-code
13
* BSD4.4 portable checksum routine
14
*/
15
16
.text
17
18
csum_partial_fix_alignment:
19
/* We checked for zero length already, so there must be
20
* at least one byte.
21
*/
22
be,pt %icc, 1f
23
nop
24
ldub [%o0 + 0x00], %o4
25
add %o0, 1, %o0
26
sub %o1, 1, %o1
27
1: andcc %o0, 0x2, %g0
28
be,pn %icc, csum_partial_post_align
29
cmp %o1, 2
30
blu,pn %icc, csum_partial_end_cruft
31
nop
32
lduh [%o0 + 0x00], %o5
33
add %o0, 2, %o0
34
sub %o1, 2, %o1
35
ba,pt %xcc, csum_partial_post_align
36
add %o5, %o4, %o4
37
38
.align 32
39
.globl csum_partial
40
csum_partial: /* %o0=buff, %o1=len, %o2=sum */
41
prefetch [%o0 + 0x000], #n_reads
42
clr %o4
43
prefetch [%o0 + 0x040], #n_reads
44
brz,pn %o1, csum_partial_finish
45
andcc %o0, 0x3, %g0
46
47
/* We "remember" whether the lowest bit in the address
48
* was set in %g7. Because if it is, we have to swap
49
* upper and lower 8 bit fields of the sum we calculate.
50
*/
51
bne,pn %icc, csum_partial_fix_alignment
52
andcc %o0, 0x1, %g7
53
54
csum_partial_post_align:
55
prefetch [%o0 + 0x080], #n_reads
56
andncc %o1, 0x3f, %o3
57
58
prefetch [%o0 + 0x0c0], #n_reads
59
sub %o1, %o3, %o1
60
brz,pn %o3, 2f
61
prefetch [%o0 + 0x100], #n_reads
62
63
/* So that we don't need to use the non-pairing
64
* add-with-carry instructions we accumulate 32-bit
65
* values into a 64-bit register. At the end of the
66
* loop we fold it down to 32-bits and so on.
67
*/
68
prefetch [%o0 + 0x140], #n_reads
69
1: lduw [%o0 + 0x00], %o5
70
lduw [%o0 + 0x04], %g1
71
lduw [%o0 + 0x08], %g2
72
add %o4, %o5, %o4
73
lduw [%o0 + 0x0c], %g3
74
add %o4, %g1, %o4
75
lduw [%o0 + 0x10], %o5
76
add %o4, %g2, %o4
77
lduw [%o0 + 0x14], %g1
78
add %o4, %g3, %o4
79
lduw [%o0 + 0x18], %g2
80
add %o4, %o5, %o4
81
lduw [%o0 + 0x1c], %g3
82
add %o4, %g1, %o4
83
lduw [%o0 + 0x20], %o5
84
add %o4, %g2, %o4
85
lduw [%o0 + 0x24], %g1
86
add %o4, %g3, %o4
87
lduw [%o0 + 0x28], %g2
88
add %o4, %o5, %o4
89
lduw [%o0 + 0x2c], %g3
90
add %o4, %g1, %o4
91
lduw [%o0 + 0x30], %o5
92
add %o4, %g2, %o4
93
lduw [%o0 + 0x34], %g1
94
add %o4, %g3, %o4
95
lduw [%o0 + 0x38], %g2
96
add %o4, %o5, %o4
97
lduw [%o0 + 0x3c], %g3
98
add %o4, %g1, %o4
99
prefetch [%o0 + 0x180], #n_reads
100
add %o4, %g2, %o4
101
subcc %o3, 0x40, %o3
102
add %o0, 0x40, %o0
103
bne,pt %icc, 1b
104
add %o4, %g3, %o4
105
106
2: and %o1, 0x3c, %o3
107
brz,pn %o3, 2f
108
sub %o1, %o3, %o1
109
1: lduw [%o0 + 0x00], %o5
110
subcc %o3, 0x4, %o3
111
add %o0, 0x4, %o0
112
bne,pt %icc, 1b
113
add %o4, %o5, %o4
114
115
2:
116
/* fold 64-->32 */
117
srlx %o4, 32, %o5
118
srl %o4, 0, %o4
119
add %o4, %o5, %o4
120
srlx %o4, 32, %o5
121
srl %o4, 0, %o4
122
add %o4, %o5, %o4
123
124
/* fold 32-->16 */
125
sethi %hi(0xffff0000), %g1
126
srl %o4, 16, %o5
127
andn %o4, %g1, %g2
128
add %o5, %g2, %o4
129
srl %o4, 16, %o5
130
andn %o4, %g1, %g2
131
add %o5, %g2, %o4
132
133
csum_partial_end_cruft:
134
/* %o4 has the 16-bit sum we have calculated so-far. */
135
cmp %o1, 2
136
blu,pt %icc, 1f
137
nop
138
lduh [%o0 + 0x00], %o5
139
sub %o1, 2, %o1
140
add %o0, 2, %o0
141
add %o4, %o5, %o4
142
1: brz,pt %o1, 1f
143
nop
144
ldub [%o0 + 0x00], %o5
145
sub %o1, 1, %o1
146
add %o0, 1, %o0
147
sllx %o5, 8, %o5
148
add %o4, %o5, %o4
149
1:
150
/* fold 32-->16 */
151
sethi %hi(0xffff0000), %g1
152
srl %o4, 16, %o5
153
andn %o4, %g1, %g2
154
add %o5, %g2, %o4
155
srl %o4, 16, %o5
156
andn %o4, %g1, %g2
157
add %o5, %g2, %o4
158
159
1: brz,pt %g7, 1f
160
nop
161
162
/* We started with an odd byte, byte-swap the result. */
163
srl %o4, 8, %o5
164
and %o4, 0xff, %g1
165
sll %g1, 8, %g1
166
or %o5, %g1, %o4
167
168
1: addcc %o2, %o4, %o2
169
addc %g0, %o2, %o2
170
171
csum_partial_finish:
172
retl
173
srl %o2, 0, %o0
174
175