Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/x86/lib/csum-partial_64.c
10817 views
1
/*
2
* arch/x86_64/lib/csum-partial.c
3
*
4
* This file contains network checksum routines that are better done
5
* in an architecture-specific manner due to speed.
6
*/
7
8
#include <linux/compiler.h>
9
#include <linux/module.h>
10
#include <asm/checksum.h>
11
12
static inline unsigned short from32to16(unsigned a)
13
{
14
unsigned short b = a >> 16;
15
asm("addw %w2,%w0\n\t"
16
"adcw $0,%w0\n"
17
: "=r" (b)
18
: "0" (b), "r" (a));
19
return b;
20
}
21
22
/*
23
* Do a 64-bit checksum on an arbitrary memory area.
24
* Returns a 32bit checksum.
25
*
26
* This isn't as time critical as it used to be because many NICs
27
* do hardware checksumming these days.
28
*
29
* Things tried and found to not make it faster:
30
* Manual Prefetching
31
* Unrolling to an 128 bytes inner loop.
32
* Using interleaving with more registers to break the carry chains.
33
*/
34
static unsigned do_csum(const unsigned char *buff, unsigned len)
35
{
36
unsigned odd, count;
37
unsigned long result = 0;
38
39
if (unlikely(len == 0))
40
return result;
41
odd = 1 & (unsigned long) buff;
42
if (unlikely(odd)) {
43
result = *buff << 8;
44
len--;
45
buff++;
46
}
47
count = len >> 1; /* nr of 16-bit words.. */
48
if (count) {
49
if (2 & (unsigned long) buff) {
50
result += *(unsigned short *)buff;
51
count--;
52
len -= 2;
53
buff += 2;
54
}
55
count >>= 1; /* nr of 32-bit words.. */
56
if (count) {
57
unsigned long zero;
58
unsigned count64;
59
if (4 & (unsigned long) buff) {
60
result += *(unsigned int *) buff;
61
count--;
62
len -= 4;
63
buff += 4;
64
}
65
count >>= 1; /* nr of 64-bit words.. */
66
67
/* main loop using 64byte blocks */
68
zero = 0;
69
count64 = count >> 3;
70
while (count64) {
71
asm("addq 0*8(%[src]),%[res]\n\t"
72
"adcq 1*8(%[src]),%[res]\n\t"
73
"adcq 2*8(%[src]),%[res]\n\t"
74
"adcq 3*8(%[src]),%[res]\n\t"
75
"adcq 4*8(%[src]),%[res]\n\t"
76
"adcq 5*8(%[src]),%[res]\n\t"
77
"adcq 6*8(%[src]),%[res]\n\t"
78
"adcq 7*8(%[src]),%[res]\n\t"
79
"adcq %[zero],%[res]"
80
: [res] "=r" (result)
81
: [src] "r" (buff), [zero] "r" (zero),
82
"[res]" (result));
83
buff += 64;
84
count64--;
85
}
86
87
/* last up to 7 8byte blocks */
88
count %= 8;
89
while (count) {
90
asm("addq %1,%0\n\t"
91
"adcq %2,%0\n"
92
: "=r" (result)
93
: "m" (*(unsigned long *)buff),
94
"r" (zero), "0" (result));
95
--count;
96
buff += 8;
97
}
98
result = add32_with_carry(result>>32,
99
result&0xffffffff);
100
101
if (len & 4) {
102
result += *(unsigned int *) buff;
103
buff += 4;
104
}
105
}
106
if (len & 2) {
107
result += *(unsigned short *) buff;
108
buff += 2;
109
}
110
}
111
if (len & 1)
112
result += *buff;
113
result = add32_with_carry(result>>32, result & 0xffffffff);
114
if (unlikely(odd)) {
115
result = from32to16(result);
116
result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
117
}
118
return result;
119
}
120
121
/*
122
* computes the checksum of a memory block at buff, length len,
123
* and adds in "sum" (32-bit)
124
*
125
* returns a 32-bit number suitable for feeding into itself
126
* or csum_tcpudp_magic
127
*
128
* this function must be called with even lengths, except
129
* for the last fragment, which may be odd
130
*
131
* it's best to have buff aligned on a 64-bit boundary
132
*/
133
__wsum csum_partial(const void *buff, int len, __wsum sum)
134
{
135
return (__force __wsum)add32_with_carry(do_csum(buff, len),
136
(__force u32)sum);
137
}
138
139
/*
140
* this routine is used for miscellaneous IP-like checksums, mainly
141
* in icmp.c
142
*/
143
__sum16 ip_compute_csum(const void *buff, int len)
144
{
145
return csum_fold(csum_partial(buff,len,0));
146
}
147
EXPORT_SYMBOL(ip_compute_csum);
148
149
150