Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/net/ipv4/inet_lro.c
15109 views
1
/*
2
* linux/net/ipv4/inet_lro.c
3
*
4
* Large Receive Offload (ipv4 / tcp)
5
*
6
* (C) Copyright IBM Corp. 2007
7
*
8
* Authors:
9
* Jan-Bernd Themann <[email protected]>
10
* Christoph Raisch <[email protected]>
11
*
12
*
13
* This program is free software; you can redistribute it and/or modify
14
* it under the terms of the GNU General Public License as published by
15
* the Free Software Foundation; either version 2, or (at your option)
16
* any later version.
17
*
18
* This program is distributed in the hope that it will be useful,
19
* but WITHOUT ANY WARRANTY; without even the implied warranty of
20
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21
* GNU General Public License for more details.
22
*
23
* You should have received a copy of the GNU General Public License
24
* along with this program; if not, write to the Free Software
25
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26
*/
27
28
29
#include <linux/module.h>
30
#include <linux/if_vlan.h>
31
#include <linux/inet_lro.h>
32
33
MODULE_LICENSE("GPL");
34
MODULE_AUTHOR("Jan-Bernd Themann <[email protected]>");
35
MODULE_DESCRIPTION("Large Receive Offload (ipv4 / tcp)");
36
37
#define TCP_HDR_LEN(tcph) (tcph->doff << 2)
38
#define IP_HDR_LEN(iph) (iph->ihl << 2)
39
#define TCP_PAYLOAD_LENGTH(iph, tcph) \
40
(ntohs(iph->tot_len) - IP_HDR_LEN(iph) - TCP_HDR_LEN(tcph))
41
42
#define IPH_LEN_WO_OPTIONS 5
43
#define TCPH_LEN_WO_OPTIONS 5
44
#define TCPH_LEN_W_TIMESTAMP 8
45
46
#define LRO_MAX_PG_HLEN 64
47
48
#define LRO_INC_STATS(lro_mgr, attr) { lro_mgr->stats.attr++; }
49
50
/*
51
* Basic tcp checks whether packet is suitable for LRO
52
*/
53
54
static int lro_tcp_ip_check(const struct iphdr *iph, const struct tcphdr *tcph,
55
int len, const struct net_lro_desc *lro_desc)
56
{
57
/* check ip header: don't aggregate padded frames */
58
if (ntohs(iph->tot_len) != len)
59
return -1;
60
61
if (TCP_PAYLOAD_LENGTH(iph, tcph) == 0)
62
return -1;
63
64
if (iph->ihl != IPH_LEN_WO_OPTIONS)
65
return -1;
66
67
if (tcph->cwr || tcph->ece || tcph->urg || !tcph->ack ||
68
tcph->rst || tcph->syn || tcph->fin)
69
return -1;
70
71
if (INET_ECN_is_ce(ipv4_get_dsfield(iph)))
72
return -1;
73
74
if (tcph->doff != TCPH_LEN_WO_OPTIONS &&
75
tcph->doff != TCPH_LEN_W_TIMESTAMP)
76
return -1;
77
78
/* check tcp options (only timestamp allowed) */
79
if (tcph->doff == TCPH_LEN_W_TIMESTAMP) {
80
__be32 *topt = (__be32 *)(tcph + 1);
81
82
if (*topt != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
83
| (TCPOPT_TIMESTAMP << 8)
84
| TCPOLEN_TIMESTAMP))
85
return -1;
86
87
/* timestamp should be in right order */
88
topt++;
89
if (lro_desc && after(ntohl(lro_desc->tcp_rcv_tsval),
90
ntohl(*topt)))
91
return -1;
92
93
/* timestamp reply should not be zero */
94
topt++;
95
if (*topt == 0)
96
return -1;
97
}
98
99
return 0;
100
}
101
102
static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc)
103
{
104
struct iphdr *iph = lro_desc->iph;
105
struct tcphdr *tcph = lro_desc->tcph;
106
__be32 *p;
107
__wsum tcp_hdr_csum;
108
109
tcph->ack_seq = lro_desc->tcp_ack;
110
tcph->window = lro_desc->tcp_window;
111
112
if (lro_desc->tcp_saw_tstamp) {
113
p = (__be32 *)(tcph + 1);
114
*(p+2) = lro_desc->tcp_rcv_tsecr;
115
}
116
117
iph->tot_len = htons(lro_desc->ip_tot_len);
118
119
iph->check = 0;
120
iph->check = ip_fast_csum((u8 *)lro_desc->iph, iph->ihl);
121
122
tcph->check = 0;
123
tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), 0);
124
lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum);
125
tcph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
126
lro_desc->ip_tot_len -
127
IP_HDR_LEN(iph), IPPROTO_TCP,
128
lro_desc->data_csum);
129
}
130
131
static __wsum lro_tcp_data_csum(struct iphdr *iph, struct tcphdr *tcph, int len)
132
{
133
__wsum tcp_csum;
134
__wsum tcp_hdr_csum;
135
__wsum tcp_ps_hdr_csum;
136
137
tcp_csum = ~csum_unfold(tcph->check);
138
tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), tcp_csum);
139
140
tcp_ps_hdr_csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
141
len + TCP_HDR_LEN(tcph),
142
IPPROTO_TCP, 0);
143
144
return csum_sub(csum_sub(tcp_csum, tcp_hdr_csum),
145
tcp_ps_hdr_csum);
146
}
147
148
static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb,
149
struct iphdr *iph, struct tcphdr *tcph,
150
u16 vlan_tag, struct vlan_group *vgrp)
151
{
152
int nr_frags;
153
__be32 *ptr;
154
u32 tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
155
156
nr_frags = skb_shinfo(skb)->nr_frags;
157
lro_desc->parent = skb;
158
lro_desc->next_frag = &(skb_shinfo(skb)->frags[nr_frags]);
159
lro_desc->iph = iph;
160
lro_desc->tcph = tcph;
161
lro_desc->tcp_next_seq = ntohl(tcph->seq) + tcp_data_len;
162
lro_desc->tcp_ack = tcph->ack_seq;
163
lro_desc->tcp_window = tcph->window;
164
165
lro_desc->pkt_aggr_cnt = 1;
166
lro_desc->ip_tot_len = ntohs(iph->tot_len);
167
168
if (tcph->doff == 8) {
169
ptr = (__be32 *)(tcph+1);
170
lro_desc->tcp_saw_tstamp = 1;
171
lro_desc->tcp_rcv_tsval = *(ptr+1);
172
lro_desc->tcp_rcv_tsecr = *(ptr+2);
173
}
174
175
lro_desc->mss = tcp_data_len;
176
lro_desc->vgrp = vgrp;
177
lro_desc->vlan_tag = vlan_tag;
178
lro_desc->active = 1;
179
180
lro_desc->data_csum = lro_tcp_data_csum(iph, tcph,
181
tcp_data_len);
182
}
183
184
static inline void lro_clear_desc(struct net_lro_desc *lro_desc)
185
{
186
memset(lro_desc, 0, sizeof(struct net_lro_desc));
187
}
188
189
static void lro_add_common(struct net_lro_desc *lro_desc, struct iphdr *iph,
190
struct tcphdr *tcph, int tcp_data_len)
191
{
192
struct sk_buff *parent = lro_desc->parent;
193
__be32 *topt;
194
195
lro_desc->pkt_aggr_cnt++;
196
lro_desc->ip_tot_len += tcp_data_len;
197
lro_desc->tcp_next_seq += tcp_data_len;
198
lro_desc->tcp_window = tcph->window;
199
lro_desc->tcp_ack = tcph->ack_seq;
200
201
/* don't update tcp_rcv_tsval, would not work with PAWS */
202
if (lro_desc->tcp_saw_tstamp) {
203
topt = (__be32 *) (tcph + 1);
204
lro_desc->tcp_rcv_tsecr = *(topt + 2);
205
}
206
207
lro_desc->data_csum = csum_block_add(lro_desc->data_csum,
208
lro_tcp_data_csum(iph, tcph,
209
tcp_data_len),
210
parent->len);
211
212
parent->len += tcp_data_len;
213
parent->data_len += tcp_data_len;
214
if (tcp_data_len > lro_desc->mss)
215
lro_desc->mss = tcp_data_len;
216
}
217
218
static void lro_add_packet(struct net_lro_desc *lro_desc, struct sk_buff *skb,
219
struct iphdr *iph, struct tcphdr *tcph)
220
{
221
struct sk_buff *parent = lro_desc->parent;
222
int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
223
224
lro_add_common(lro_desc, iph, tcph, tcp_data_len);
225
226
skb_pull(skb, (skb->len - tcp_data_len));
227
parent->truesize += skb->truesize;
228
229
if (lro_desc->last_skb)
230
lro_desc->last_skb->next = skb;
231
else
232
skb_shinfo(parent)->frag_list = skb;
233
234
lro_desc->last_skb = skb;
235
}
236
237
static void lro_add_frags(struct net_lro_desc *lro_desc,
238
int len, int hlen, int truesize,
239
struct skb_frag_struct *skb_frags,
240
struct iphdr *iph, struct tcphdr *tcph)
241
{
242
struct sk_buff *skb = lro_desc->parent;
243
int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
244
245
lro_add_common(lro_desc, iph, tcph, tcp_data_len);
246
247
skb->truesize += truesize;
248
249
skb_frags[0].page_offset += hlen;
250
skb_frags[0].size -= hlen;
251
252
while (tcp_data_len > 0) {
253
*(lro_desc->next_frag) = *skb_frags;
254
tcp_data_len -= skb_frags->size;
255
lro_desc->next_frag++;
256
skb_frags++;
257
skb_shinfo(skb)->nr_frags++;
258
}
259
}
260
261
static int lro_check_tcp_conn(struct net_lro_desc *lro_desc,
262
struct iphdr *iph,
263
struct tcphdr *tcph)
264
{
265
if ((lro_desc->iph->saddr != iph->saddr) ||
266
(lro_desc->iph->daddr != iph->daddr) ||
267
(lro_desc->tcph->source != tcph->source) ||
268
(lro_desc->tcph->dest != tcph->dest))
269
return -1;
270
return 0;
271
}
272
273
static struct net_lro_desc *lro_get_desc(struct net_lro_mgr *lro_mgr,
274
struct net_lro_desc *lro_arr,
275
struct iphdr *iph,
276
struct tcphdr *tcph)
277
{
278
struct net_lro_desc *lro_desc = NULL;
279
struct net_lro_desc *tmp;
280
int max_desc = lro_mgr->max_desc;
281
int i;
282
283
for (i = 0; i < max_desc; i++) {
284
tmp = &lro_arr[i];
285
if (tmp->active)
286
if (!lro_check_tcp_conn(tmp, iph, tcph)) {
287
lro_desc = tmp;
288
goto out;
289
}
290
}
291
292
for (i = 0; i < max_desc; i++) {
293
if (!lro_arr[i].active) {
294
lro_desc = &lro_arr[i];
295
goto out;
296
}
297
}
298
299
LRO_INC_STATS(lro_mgr, no_desc);
300
out:
301
return lro_desc;
302
}
303
304
static void lro_flush(struct net_lro_mgr *lro_mgr,
305
struct net_lro_desc *lro_desc)
306
{
307
if (lro_desc->pkt_aggr_cnt > 1)
308
lro_update_tcp_ip_header(lro_desc);
309
310
skb_shinfo(lro_desc->parent)->gso_size = lro_desc->mss;
311
312
if (lro_desc->vgrp) {
313
if (lro_mgr->features & LRO_F_NAPI)
314
vlan_hwaccel_receive_skb(lro_desc->parent,
315
lro_desc->vgrp,
316
lro_desc->vlan_tag);
317
else
318
vlan_hwaccel_rx(lro_desc->parent,
319
lro_desc->vgrp,
320
lro_desc->vlan_tag);
321
322
} else {
323
if (lro_mgr->features & LRO_F_NAPI)
324
netif_receive_skb(lro_desc->parent);
325
else
326
netif_rx(lro_desc->parent);
327
}
328
329
LRO_INC_STATS(lro_mgr, flushed);
330
lro_clear_desc(lro_desc);
331
}
332
333
static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb,
334
struct vlan_group *vgrp, u16 vlan_tag, void *priv)
335
{
336
struct net_lro_desc *lro_desc;
337
struct iphdr *iph;
338
struct tcphdr *tcph;
339
u64 flags;
340
int vlan_hdr_len = 0;
341
342
if (!lro_mgr->get_skb_header ||
343
lro_mgr->get_skb_header(skb, (void *)&iph, (void *)&tcph,
344
&flags, priv))
345
goto out;
346
347
if (!(flags & LRO_IPV4) || !(flags & LRO_TCP))
348
goto out;
349
350
lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
351
if (!lro_desc)
352
goto out;
353
354
if ((skb->protocol == htons(ETH_P_8021Q)) &&
355
!(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID))
356
vlan_hdr_len = VLAN_HLEN;
357
358
if (!lro_desc->active) { /* start new lro session */
359
if (lro_tcp_ip_check(iph, tcph, skb->len - vlan_hdr_len, NULL))
360
goto out;
361
362
skb->ip_summed = lro_mgr->ip_summed_aggr;
363
lro_init_desc(lro_desc, skb, iph, tcph, vlan_tag, vgrp);
364
LRO_INC_STATS(lro_mgr, aggregated);
365
return 0;
366
}
367
368
if (lro_desc->tcp_next_seq != ntohl(tcph->seq))
369
goto out2;
370
371
if (lro_tcp_ip_check(iph, tcph, skb->len, lro_desc))
372
goto out2;
373
374
lro_add_packet(lro_desc, skb, iph, tcph);
375
LRO_INC_STATS(lro_mgr, aggregated);
376
377
if ((lro_desc->pkt_aggr_cnt >= lro_mgr->max_aggr) ||
378
lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu))
379
lro_flush(lro_mgr, lro_desc);
380
381
return 0;
382
383
out2: /* send aggregated SKBs to stack */
384
lro_flush(lro_mgr, lro_desc);
385
386
out:
387
return 1;
388
}
389
390
391
static struct sk_buff *lro_gen_skb(struct net_lro_mgr *lro_mgr,
392
struct skb_frag_struct *frags,
393
int len, int true_size,
394
void *mac_hdr,
395
int hlen, __wsum sum,
396
u32 ip_summed)
397
{
398
struct sk_buff *skb;
399
struct skb_frag_struct *skb_frags;
400
int data_len = len;
401
int hdr_len = min(len, hlen);
402
403
skb = netdev_alloc_skb(lro_mgr->dev, hlen + lro_mgr->frag_align_pad);
404
if (!skb)
405
return NULL;
406
407
skb_reserve(skb, lro_mgr->frag_align_pad);
408
skb->len = len;
409
skb->data_len = len - hdr_len;
410
skb->truesize += true_size;
411
skb->tail += hdr_len;
412
413
memcpy(skb->data, mac_hdr, hdr_len);
414
415
skb_frags = skb_shinfo(skb)->frags;
416
while (data_len > 0) {
417
*skb_frags = *frags;
418
data_len -= frags->size;
419
skb_frags++;
420
frags++;
421
skb_shinfo(skb)->nr_frags++;
422
}
423
424
skb_shinfo(skb)->frags[0].page_offset += hdr_len;
425
skb_shinfo(skb)->frags[0].size -= hdr_len;
426
427
skb->ip_summed = ip_summed;
428
skb->csum = sum;
429
skb->protocol = eth_type_trans(skb, lro_mgr->dev);
430
return skb;
431
}
432
433
static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr,
434
struct skb_frag_struct *frags,
435
int len, int true_size,
436
struct vlan_group *vgrp,
437
u16 vlan_tag, void *priv, __wsum sum)
438
{
439
struct net_lro_desc *lro_desc;
440
struct iphdr *iph;
441
struct tcphdr *tcph;
442
struct sk_buff *skb;
443
u64 flags;
444
void *mac_hdr;
445
int mac_hdr_len;
446
int hdr_len = LRO_MAX_PG_HLEN;
447
int vlan_hdr_len = 0;
448
449
if (!lro_mgr->get_frag_header ||
450
lro_mgr->get_frag_header(frags, (void *)&mac_hdr, (void *)&iph,
451
(void *)&tcph, &flags, priv)) {
452
mac_hdr = page_address(frags->page) + frags->page_offset;
453
goto out1;
454
}
455
456
if (!(flags & LRO_IPV4) || !(flags & LRO_TCP))
457
goto out1;
458
459
hdr_len = (int)((void *)(tcph) + TCP_HDR_LEN(tcph) - mac_hdr);
460
mac_hdr_len = (int)((void *)(iph) - mac_hdr);
461
462
lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
463
if (!lro_desc)
464
goto out1;
465
466
if (!lro_desc->active) { /* start new lro session */
467
if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, NULL))
468
goto out1;
469
470
skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr,
471
hdr_len, 0, lro_mgr->ip_summed_aggr);
472
if (!skb)
473
goto out;
474
475
if ((skb->protocol == htons(ETH_P_8021Q)) &&
476
!(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID))
477
vlan_hdr_len = VLAN_HLEN;
478
479
iph = (void *)(skb->data + vlan_hdr_len);
480
tcph = (void *)((u8 *)skb->data + vlan_hdr_len
481
+ IP_HDR_LEN(iph));
482
483
lro_init_desc(lro_desc, skb, iph, tcph, 0, NULL);
484
LRO_INC_STATS(lro_mgr, aggregated);
485
return NULL;
486
}
487
488
if (lro_desc->tcp_next_seq != ntohl(tcph->seq))
489
goto out2;
490
491
if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, lro_desc))
492
goto out2;
493
494
lro_add_frags(lro_desc, len, hdr_len, true_size, frags, iph, tcph);
495
LRO_INC_STATS(lro_mgr, aggregated);
496
497
if ((skb_shinfo(lro_desc->parent)->nr_frags >= lro_mgr->max_aggr) ||
498
lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu))
499
lro_flush(lro_mgr, lro_desc);
500
501
return NULL;
502
503
out2: /* send aggregated packets to the stack */
504
lro_flush(lro_mgr, lro_desc);
505
506
out1: /* Original packet has to be posted to the stack */
507
skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr,
508
hdr_len, sum, lro_mgr->ip_summed);
509
out:
510
return skb;
511
}
512
513
void lro_receive_skb(struct net_lro_mgr *lro_mgr,
514
struct sk_buff *skb,
515
void *priv)
516
{
517
if (__lro_proc_skb(lro_mgr, skb, NULL, 0, priv)) {
518
if (lro_mgr->features & LRO_F_NAPI)
519
netif_receive_skb(skb);
520
else
521
netif_rx(skb);
522
}
523
}
524
EXPORT_SYMBOL(lro_receive_skb);
525
526
void lro_vlan_hwaccel_receive_skb(struct net_lro_mgr *lro_mgr,
527
struct sk_buff *skb,
528
struct vlan_group *vgrp,
529
u16 vlan_tag,
530
void *priv)
531
{
532
if (__lro_proc_skb(lro_mgr, skb, vgrp, vlan_tag, priv)) {
533
if (lro_mgr->features & LRO_F_NAPI)
534
vlan_hwaccel_receive_skb(skb, vgrp, vlan_tag);
535
else
536
vlan_hwaccel_rx(skb, vgrp, vlan_tag);
537
}
538
}
539
EXPORT_SYMBOL(lro_vlan_hwaccel_receive_skb);
540
541
void lro_receive_frags(struct net_lro_mgr *lro_mgr,
542
struct skb_frag_struct *frags,
543
int len, int true_size, void *priv, __wsum sum)
544
{
545
struct sk_buff *skb;
546
547
skb = __lro_proc_segment(lro_mgr, frags, len, true_size, NULL, 0,
548
priv, sum);
549
if (!skb)
550
return;
551
552
if (lro_mgr->features & LRO_F_NAPI)
553
netif_receive_skb(skb);
554
else
555
netif_rx(skb);
556
}
557
EXPORT_SYMBOL(lro_receive_frags);
558
559
void lro_vlan_hwaccel_receive_frags(struct net_lro_mgr *lro_mgr,
560
struct skb_frag_struct *frags,
561
int len, int true_size,
562
struct vlan_group *vgrp,
563
u16 vlan_tag, void *priv, __wsum sum)
564
{
565
struct sk_buff *skb;
566
567
skb = __lro_proc_segment(lro_mgr, frags, len, true_size, vgrp,
568
vlan_tag, priv, sum);
569
if (!skb)
570
return;
571
572
if (lro_mgr->features & LRO_F_NAPI)
573
vlan_hwaccel_receive_skb(skb, vgrp, vlan_tag);
574
else
575
vlan_hwaccel_rx(skb, vgrp, vlan_tag);
576
}
577
EXPORT_SYMBOL(lro_vlan_hwaccel_receive_frags);
578
579
void lro_flush_all(struct net_lro_mgr *lro_mgr)
580
{
581
int i;
582
struct net_lro_desc *lro_desc = lro_mgr->lro_arr;
583
584
for (i = 0; i < lro_mgr->max_desc; i++) {
585
if (lro_desc[i].active)
586
lro_flush(lro_mgr, &lro_desc[i]);
587
}
588
}
589
EXPORT_SYMBOL(lro_flush_all);
590
591
void lro_flush_pkt(struct net_lro_mgr *lro_mgr,
592
struct iphdr *iph, struct tcphdr *tcph)
593
{
594
struct net_lro_desc *lro_desc;
595
596
lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
597
if (lro_desc->active)
598
lro_flush(lro_mgr, lro_desc);
599
}
600
EXPORT_SYMBOL(lro_flush_pkt);
601
602