CoCalc -- checksum

GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/sparc/lib/checksum_64.S
¹⁰⁸¹⁸ views
1
/* checksum.S: Sparc V9 optimized checksum code.
2
 *
3
 *  Copyright(C) 1995 Linus Torvalds
4
 *  Copyright(C) 1995 Miguel de Icaza
5
 *  Copyright(C) 1996, 2000 David S. Miller
6
 *  Copyright(C) 1997 Jakub Jelinek
7
 *
8
 * derived from:
9
 *	Linux/Alpha checksum c-code
10
 *      Linux/ix86 inline checksum assembly
11
 *      RFC1071 Computing the Internet Checksum (esp. Jacobsons m68k code)
12
 *	David Mosberger-Tang for optimized reference c-code
13
 *	BSD4.4 portable checksum routine
14
 */
15

16
	.text
17

18
csum_partial_fix_alignment:
19
	/* We checked for zero length already, so there must be
20
	 * at least one byte.
21
	 */
22
	be,pt		%icc, 1f
23
	 nop
24
	ldub		[%o0 + 0x00], %o4
25
	add		%o0, 1, %o0
26
	sub		%o1, 1, %o1
27
1:	andcc		%o0, 0x2, %g0
28
	be,pn		%icc, csum_partial_post_align
29
	 cmp		%o1, 2
30
	blu,pn		%icc, csum_partial_end_cruft
31
	 nop
32
	lduh		[%o0 + 0x00], %o5
33
	add		%o0, 2, %o0
34
	sub		%o1, 2, %o1
35
	ba,pt		%xcc, csum_partial_post_align
36
	 add		%o5, %o4, %o4
37

38
	.align		32
39
	.globl		csum_partial
40
csum_partial:		/* %o0=buff, %o1=len, %o2=sum */
41
	prefetch	[%o0 + 0x000], #n_reads
42
	clr		%o4
43
	prefetch	[%o0 + 0x040], #n_reads
44
	brz,pn		%o1, csum_partial_finish
45
	 andcc		%o0, 0x3, %g0
46

47
	/* We "remember" whether the lowest bit in the address
48
	 * was set in %g7.  Because if it is, we have to swap
49
	 * upper and lower 8 bit fields of the sum we calculate.
50
	*/
51
	bne,pn		%icc, csum_partial_fix_alignment
52
	 andcc		%o0, 0x1, %g7
53

54
csum_partial_post_align:
55
	prefetch	[%o0 + 0x080], #n_reads
56
	andncc		%o1, 0x3f, %o3
57

58
	prefetch	[%o0 + 0x0c0], #n_reads
59
	sub		%o1, %o3, %o1
60
	brz,pn		%o3, 2f
61
	 prefetch	[%o0 + 0x100], #n_reads
62

63
	/* So that we don't need to use the non-pairing
64
	 * add-with-carry instructions we accumulate 32-bit
65
	 * values into a 64-bit register.  At the end of the
66
	 * loop we fold it down to 32-bits and so on.
67
	 */
68
	prefetch	[%o0 + 0x140], #n_reads
69
1:	lduw		[%o0 + 0x00], %o5
70
	lduw		[%o0 + 0x04], %g1
71
	lduw		[%o0 + 0x08], %g2
72
	add		%o4, %o5, %o4
73
	lduw		[%o0 + 0x0c], %g3
74
	add		%o4, %g1, %o4
75
	lduw		[%o0 + 0x10], %o5
76
	add		%o4, %g2, %o4
77
	lduw		[%o0 + 0x14], %g1
78
	add		%o4, %g3, %o4
79
	lduw		[%o0 + 0x18], %g2
80
	add		%o4, %o5, %o4
81
	lduw		[%o0 + 0x1c], %g3
82
	add		%o4, %g1, %o4
83
	lduw		[%o0 + 0x20], %o5
84
	add		%o4, %g2, %o4
85
	lduw		[%o0 + 0x24], %g1
86
	add		%o4, %g3, %o4
87
	lduw		[%o0 + 0x28], %g2
88
	add		%o4, %o5, %o4
89
	lduw		[%o0 + 0x2c], %g3
90
	add		%o4, %g1, %o4
91
	lduw		[%o0 + 0x30], %o5
92
	add		%o4, %g2, %o4
93
	lduw		[%o0 + 0x34], %g1
94
	add		%o4, %g3, %o4
95
	lduw		[%o0 + 0x38], %g2
96
	add		%o4, %o5, %o4
97
	lduw		[%o0 + 0x3c], %g3
98
	add		%o4, %g1, %o4
99
	prefetch	[%o0 + 0x180], #n_reads
100
	add		%o4, %g2, %o4
101
	subcc		%o3, 0x40, %o3
102
	add		%o0, 0x40, %o0
103
	bne,pt		%icc, 1b
104
	 add		%o4, %g3, %o4
105

106
2:	and		%o1, 0x3c, %o3
107
	brz,pn		%o3, 2f
108
	 sub		%o1, %o3, %o1
109
1:	lduw		[%o0 + 0x00], %o5
110
	subcc		%o3, 0x4, %o3
111
	add		%o0, 0x4, %o0
112
	bne,pt		%icc, 1b
113
	 add		%o4, %o5, %o4
114

115
2:
116
	/* fold 64-->32 */
117
	srlx		%o4, 32, %o5
118
	srl		%o4, 0, %o4
119
	add		%o4, %o5, %o4
120
	srlx		%o4, 32, %o5
121
	srl		%o4, 0, %o4
122
	add		%o4, %o5, %o4
123

124
	/* fold 32-->16 */
125
	sethi		%hi(0xffff0000), %g1
126
	srl		%o4, 16, %o5
127
	andn		%o4, %g1, %g2
128
	add		%o5, %g2, %o4
129
	srl		%o4, 16, %o5
130
	andn		%o4, %g1, %g2
131
	add		%o5, %g2, %o4
132

133
csum_partial_end_cruft:
134
	/* %o4 has the 16-bit sum we have calculated so-far.  */
135
	cmp		%o1, 2
136
	blu,pt		%icc, 1f
137
	 nop
138
	lduh		[%o0 + 0x00], %o5
139
	sub		%o1, 2, %o1
140
	add		%o0, 2, %o0
141
	add		%o4, %o5, %o4
142
1:	brz,pt		%o1, 1f
143
	 nop
144
	ldub		[%o0 + 0x00], %o5
145
	sub		%o1, 1, %o1
146
	add		%o0, 1, %o0
147
	sllx		%o5, 8, %o5
148
	add		%o4, %o5, %o4
149
1:
150
	/* fold 32-->16 */
151
	sethi		%hi(0xffff0000), %g1
152
	srl		%o4, 16, %o5
153
	andn		%o4, %g1, %g2
154
	add		%o5, %g2, %o4
155
	srl		%o4, 16, %o5
156
	andn		%o4, %g1, %g2
157
	add		%o5, %g2, %o4
158

159
1:	brz,pt		%g7, 1f
160
	 nop
161

162
	/* We started with an odd byte, byte-swap the result.  */
163
	srl		%o4, 8, %o5
164
	and		%o4, 0xff, %g1
165
	sll		%g1, 8, %g1
166
	or		%o5, %g1, %o4
167

168
1:	addcc		%o2, %o4, %o2
169
	addc		%g0, %o2, %o2
170

171
csum_partial_finish:
172
	retl
173
	 srl		%o2, 0, %o0
174

175
Product

Resources

Company