Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/net/ipv6/ip6_output.c
15109 views
1
/*
2
* IPv6 output functions
3
* Linux INET6 implementation
4
*
5
* Authors:
6
* Pedro Roque <[email protected]>
7
*
8
* Based on linux/net/ipv4/ip_output.c
9
*
10
* This program is free software; you can redistribute it and/or
11
* modify it under the terms of the GNU General Public License
12
* as published by the Free Software Foundation; either version
13
* 2 of the License, or (at your option) any later version.
14
*
15
* Changes:
16
* A.N.Kuznetsov : airthmetics in fragmentation.
17
* extension headers are implemented.
18
* route changes now work.
19
* ip6_forward does not confuse sniffers.
20
* etc.
21
*
22
* H. von Brand : Added missing #include <linux/string.h>
23
* Imran Patel : frag id should be in NBO
24
* Kazunori MIYAZAWA @USAGI
25
* : add ip6_append_data and related functions
26
* for datagram xmit
27
*/
28
29
#include <linux/errno.h>
30
#include <linux/kernel.h>
31
#include <linux/string.h>
32
#include <linux/socket.h>
33
#include <linux/net.h>
34
#include <linux/netdevice.h>
35
#include <linux/if_arp.h>
36
#include <linux/in6.h>
37
#include <linux/tcp.h>
38
#include <linux/route.h>
39
#include <linux/module.h>
40
#include <linux/slab.h>
41
42
#include <linux/netfilter.h>
43
#include <linux/netfilter_ipv6.h>
44
45
#include <net/sock.h>
46
#include <net/snmp.h>
47
48
#include <net/ipv6.h>
49
#include <net/ndisc.h>
50
#include <net/protocol.h>
51
#include <net/ip6_route.h>
52
#include <net/addrconf.h>
53
#include <net/rawv6.h>
54
#include <net/icmp.h>
55
#include <net/xfrm.h>
56
#include <net/checksum.h>
57
#include <linux/mroute6.h>
58
59
int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
60
61
int __ip6_local_out(struct sk_buff *skb)
62
{
63
int len;
64
65
len = skb->len - sizeof(struct ipv6hdr);
66
if (len > IPV6_MAXPLEN)
67
len = 0;
68
ipv6_hdr(skb)->payload_len = htons(len);
69
70
return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
71
skb_dst(skb)->dev, dst_output);
72
}
73
74
int ip6_local_out(struct sk_buff *skb)
75
{
76
int err;
77
78
err = __ip6_local_out(skb);
79
if (likely(err == 1))
80
err = dst_output(skb);
81
82
return err;
83
}
84
EXPORT_SYMBOL_GPL(ip6_local_out);
85
86
/* dev_loopback_xmit for use with netfilter. */
87
static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
88
{
89
skb_reset_mac_header(newskb);
90
__skb_pull(newskb, skb_network_offset(newskb));
91
newskb->pkt_type = PACKET_LOOPBACK;
92
newskb->ip_summed = CHECKSUM_UNNECESSARY;
93
WARN_ON(!skb_dst(newskb));
94
95
netif_rx_ni(newskb);
96
return 0;
97
}
98
99
static int ip6_finish_output2(struct sk_buff *skb)
100
{
101
struct dst_entry *dst = skb_dst(skb);
102
struct net_device *dev = dst->dev;
103
104
skb->protocol = htons(ETH_P_IPV6);
105
skb->dev = dev;
106
107
if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
108
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
109
110
if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
111
((mroute6_socket(dev_net(dev), skb) &&
112
!(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
113
ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
114
&ipv6_hdr(skb)->saddr))) {
115
struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
116
117
/* Do not check for IFF_ALLMULTI; multicast routing
118
is not supported in any case.
119
*/
120
if (newskb)
121
NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
122
newskb, NULL, newskb->dev,
123
ip6_dev_loopback_xmit);
124
125
if (ipv6_hdr(skb)->hop_limit == 0) {
126
IP6_INC_STATS(dev_net(dev), idev,
127
IPSTATS_MIB_OUTDISCARDS);
128
kfree_skb(skb);
129
return 0;
130
}
131
}
132
133
IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
134
skb->len);
135
}
136
137
if (dst->hh)
138
return neigh_hh_output(dst->hh, skb);
139
else if (dst->neighbour)
140
return dst->neighbour->output(skb);
141
142
IP6_INC_STATS_BH(dev_net(dst->dev),
143
ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
144
kfree_skb(skb);
145
return -EINVAL;
146
}
147
148
static int ip6_finish_output(struct sk_buff *skb)
149
{
150
if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
151
dst_allfrag(skb_dst(skb)))
152
return ip6_fragment(skb, ip6_finish_output2);
153
else
154
return ip6_finish_output2(skb);
155
}
156
157
int ip6_output(struct sk_buff *skb)
158
{
159
struct net_device *dev = skb_dst(skb)->dev;
160
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
161
if (unlikely(idev->cnf.disable_ipv6)) {
162
IP6_INC_STATS(dev_net(dev), idev,
163
IPSTATS_MIB_OUTDISCARDS);
164
kfree_skb(skb);
165
return 0;
166
}
167
168
return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
169
ip6_finish_output,
170
!(IP6CB(skb)->flags & IP6SKB_REROUTED));
171
}
172
173
/*
174
* xmit an sk_buff (used by TCP, SCTP and DCCP)
175
*/
176
177
int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
178
struct ipv6_txoptions *opt)
179
{
180
struct net *net = sock_net(sk);
181
struct ipv6_pinfo *np = inet6_sk(sk);
182
struct in6_addr *first_hop = &fl6->daddr;
183
struct dst_entry *dst = skb_dst(skb);
184
struct ipv6hdr *hdr;
185
u8 proto = fl6->flowi6_proto;
186
int seg_len = skb->len;
187
int hlimit = -1;
188
int tclass = 0;
189
u32 mtu;
190
191
if (opt) {
192
unsigned int head_room;
193
194
/* First: exthdrs may take lots of space (~8K for now)
195
MAX_HEADER is not enough.
196
*/
197
head_room = opt->opt_nflen + opt->opt_flen;
198
seg_len += head_room;
199
head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
200
201
if (skb_headroom(skb) < head_room) {
202
struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
203
if (skb2 == NULL) {
204
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
205
IPSTATS_MIB_OUTDISCARDS);
206
kfree_skb(skb);
207
return -ENOBUFS;
208
}
209
kfree_skb(skb);
210
skb = skb2;
211
skb_set_owner_w(skb, sk);
212
}
213
if (opt->opt_flen)
214
ipv6_push_frag_opts(skb, opt, &proto);
215
if (opt->opt_nflen)
216
ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
217
}
218
219
skb_push(skb, sizeof(struct ipv6hdr));
220
skb_reset_network_header(skb);
221
hdr = ipv6_hdr(skb);
222
223
/*
224
* Fill in the IPv6 header
225
*/
226
if (np) {
227
tclass = np->tclass;
228
hlimit = np->hop_limit;
229
}
230
if (hlimit < 0)
231
hlimit = ip6_dst_hoplimit(dst);
232
233
*(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl6->flowlabel;
234
235
hdr->payload_len = htons(seg_len);
236
hdr->nexthdr = proto;
237
hdr->hop_limit = hlimit;
238
239
ipv6_addr_copy(&hdr->saddr, &fl6->saddr);
240
ipv6_addr_copy(&hdr->daddr, first_hop);
241
242
skb->priority = sk->sk_priority;
243
skb->mark = sk->sk_mark;
244
245
mtu = dst_mtu(dst);
246
if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
247
IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
248
IPSTATS_MIB_OUT, skb->len);
249
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
250
dst->dev, dst_output);
251
}
252
253
if (net_ratelimit())
254
printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
255
skb->dev = dst->dev;
256
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
257
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
258
kfree_skb(skb);
259
return -EMSGSIZE;
260
}
261
262
EXPORT_SYMBOL(ip6_xmit);
263
264
/*
265
* To avoid extra problems ND packets are send through this
266
* routine. It's code duplication but I really want to avoid
267
* extra checks since ipv6_build_header is used by TCP (which
268
* is for us performance critical)
269
*/
270
271
int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
272
const struct in6_addr *saddr, const struct in6_addr *daddr,
273
int proto, int len)
274
{
275
struct ipv6_pinfo *np = inet6_sk(sk);
276
struct ipv6hdr *hdr;
277
278
skb->protocol = htons(ETH_P_IPV6);
279
skb->dev = dev;
280
281
skb_reset_network_header(skb);
282
skb_put(skb, sizeof(struct ipv6hdr));
283
hdr = ipv6_hdr(skb);
284
285
*(__be32*)hdr = htonl(0x60000000);
286
287
hdr->payload_len = htons(len);
288
hdr->nexthdr = proto;
289
hdr->hop_limit = np->hop_limit;
290
291
ipv6_addr_copy(&hdr->saddr, saddr);
292
ipv6_addr_copy(&hdr->daddr, daddr);
293
294
return 0;
295
}
296
297
static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
298
{
299
struct ip6_ra_chain *ra;
300
struct sock *last = NULL;
301
302
read_lock(&ip6_ra_lock);
303
for (ra = ip6_ra_chain; ra; ra = ra->next) {
304
struct sock *sk = ra->sk;
305
if (sk && ra->sel == sel &&
306
(!sk->sk_bound_dev_if ||
307
sk->sk_bound_dev_if == skb->dev->ifindex)) {
308
if (last) {
309
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
310
if (skb2)
311
rawv6_rcv(last, skb2);
312
}
313
last = sk;
314
}
315
}
316
317
if (last) {
318
rawv6_rcv(last, skb);
319
read_unlock(&ip6_ra_lock);
320
return 1;
321
}
322
read_unlock(&ip6_ra_lock);
323
return 0;
324
}
325
326
static int ip6_forward_proxy_check(struct sk_buff *skb)
327
{
328
struct ipv6hdr *hdr = ipv6_hdr(skb);
329
u8 nexthdr = hdr->nexthdr;
330
int offset;
331
332
if (ipv6_ext_hdr(nexthdr)) {
333
offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
334
if (offset < 0)
335
return 0;
336
} else
337
offset = sizeof(struct ipv6hdr);
338
339
if (nexthdr == IPPROTO_ICMPV6) {
340
struct icmp6hdr *icmp6;
341
342
if (!pskb_may_pull(skb, (skb_network_header(skb) +
343
offset + 1 - skb->data)))
344
return 0;
345
346
icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
347
348
switch (icmp6->icmp6_type) {
349
case NDISC_ROUTER_SOLICITATION:
350
case NDISC_ROUTER_ADVERTISEMENT:
351
case NDISC_NEIGHBOUR_SOLICITATION:
352
case NDISC_NEIGHBOUR_ADVERTISEMENT:
353
case NDISC_REDIRECT:
354
/* For reaction involving unicast neighbor discovery
355
* message destined to the proxied address, pass it to
356
* input function.
357
*/
358
return 1;
359
default:
360
break;
361
}
362
}
363
364
/*
365
* The proxying router can't forward traffic sent to a link-local
366
* address, so signal the sender and discard the packet. This
367
* behavior is clarified by the MIPv6 specification.
368
*/
369
if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
370
dst_link_failure(skb);
371
return -1;
372
}
373
374
return 0;
375
}
376
377
static inline int ip6_forward_finish(struct sk_buff *skb)
378
{
379
return dst_output(skb);
380
}
381
382
int ip6_forward(struct sk_buff *skb)
383
{
384
struct dst_entry *dst = skb_dst(skb);
385
struct ipv6hdr *hdr = ipv6_hdr(skb);
386
struct inet6_skb_parm *opt = IP6CB(skb);
387
struct net *net = dev_net(dst->dev);
388
u32 mtu;
389
390
if (net->ipv6.devconf_all->forwarding == 0)
391
goto error;
392
393
if (skb_warn_if_lro(skb))
394
goto drop;
395
396
if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
397
IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
398
goto drop;
399
}
400
401
if (skb->pkt_type != PACKET_HOST)
402
goto drop;
403
404
skb_forward_csum(skb);
405
406
/*
407
* We DO NOT make any processing on
408
* RA packets, pushing them to user level AS IS
409
* without ane WARRANTY that application will be able
410
* to interpret them. The reason is that we
411
* cannot make anything clever here.
412
*
413
* We are not end-node, so that if packet contains
414
* AH/ESP, we cannot make anything.
415
* Defragmentation also would be mistake, RA packets
416
* cannot be fragmented, because there is no warranty
417
* that different fragments will go along one path. --ANK
418
*/
419
if (opt->ra) {
420
u8 *ptr = skb_network_header(skb) + opt->ra;
421
if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
422
return 0;
423
}
424
425
/*
426
* check and decrement ttl
427
*/
428
if (hdr->hop_limit <= 1) {
429
/* Force OUTPUT device used as source address */
430
skb->dev = dst->dev;
431
icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
432
IP6_INC_STATS_BH(net,
433
ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
434
435
kfree_skb(skb);
436
return -ETIMEDOUT;
437
}
438
439
/* XXX: idev->cnf.proxy_ndp? */
440
if (net->ipv6.devconf_all->proxy_ndp &&
441
pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
442
int proxied = ip6_forward_proxy_check(skb);
443
if (proxied > 0)
444
return ip6_input(skb);
445
else if (proxied < 0) {
446
IP6_INC_STATS(net, ip6_dst_idev(dst),
447
IPSTATS_MIB_INDISCARDS);
448
goto drop;
449
}
450
}
451
452
if (!xfrm6_route_forward(skb)) {
453
IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
454
goto drop;
455
}
456
dst = skb_dst(skb);
457
458
/* IPv6 specs say nothing about it, but it is clear that we cannot
459
send redirects to source routed frames.
460
We don't send redirects to frames decapsulated from IPsec.
461
*/
462
if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
463
!skb_sec_path(skb)) {
464
struct in6_addr *target = NULL;
465
struct rt6_info *rt;
466
struct neighbour *n = dst->neighbour;
467
468
/*
469
* incoming and outgoing devices are the same
470
* send a redirect.
471
*/
472
473
rt = (struct rt6_info *) dst;
474
if ((rt->rt6i_flags & RTF_GATEWAY))
475
target = (struct in6_addr*)&n->primary_key;
476
else
477
target = &hdr->daddr;
478
479
if (!rt->rt6i_peer)
480
rt6_bind_peer(rt, 1);
481
482
/* Limit redirects both by destination (here)
483
and by source (inside ndisc_send_redirect)
484
*/
485
if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
486
ndisc_send_redirect(skb, n, target);
487
} else {
488
int addrtype = ipv6_addr_type(&hdr->saddr);
489
490
/* This check is security critical. */
491
if (addrtype == IPV6_ADDR_ANY ||
492
addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
493
goto error;
494
if (addrtype & IPV6_ADDR_LINKLOCAL) {
495
icmpv6_send(skb, ICMPV6_DEST_UNREACH,
496
ICMPV6_NOT_NEIGHBOUR, 0);
497
goto error;
498
}
499
}
500
501
mtu = dst_mtu(dst);
502
if (mtu < IPV6_MIN_MTU)
503
mtu = IPV6_MIN_MTU;
504
505
if (skb->len > mtu && !skb_is_gso(skb)) {
506
/* Again, force OUTPUT device used as source address */
507
skb->dev = dst->dev;
508
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
509
IP6_INC_STATS_BH(net,
510
ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
511
IP6_INC_STATS_BH(net,
512
ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
513
kfree_skb(skb);
514
return -EMSGSIZE;
515
}
516
517
if (skb_cow(skb, dst->dev->hard_header_len)) {
518
IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
519
goto drop;
520
}
521
522
hdr = ipv6_hdr(skb);
523
524
/* Mangling hops number delayed to point after skb COW */
525
526
hdr->hop_limit--;
527
528
IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
529
return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
530
ip6_forward_finish);
531
532
error:
533
IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
534
drop:
535
kfree_skb(skb);
536
return -EINVAL;
537
}
538
539
static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
540
{
541
to->pkt_type = from->pkt_type;
542
to->priority = from->priority;
543
to->protocol = from->protocol;
544
skb_dst_drop(to);
545
skb_dst_set(to, dst_clone(skb_dst(from)));
546
to->dev = from->dev;
547
to->mark = from->mark;
548
549
#ifdef CONFIG_NET_SCHED
550
to->tc_index = from->tc_index;
551
#endif
552
nf_copy(to, from);
553
#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
554
defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
555
to->nf_trace = from->nf_trace;
556
#endif
557
skb_copy_secmark(to, from);
558
}
559
560
int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
561
{
562
u16 offset = sizeof(struct ipv6hdr);
563
struct ipv6_opt_hdr *exthdr =
564
(struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
565
unsigned int packet_len = skb->tail - skb->network_header;
566
int found_rhdr = 0;
567
*nexthdr = &ipv6_hdr(skb)->nexthdr;
568
569
while (offset + 1 <= packet_len) {
570
571
switch (**nexthdr) {
572
573
case NEXTHDR_HOP:
574
break;
575
case NEXTHDR_ROUTING:
576
found_rhdr = 1;
577
break;
578
case NEXTHDR_DEST:
579
#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
580
if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
581
break;
582
#endif
583
if (found_rhdr)
584
return offset;
585
break;
586
default :
587
return offset;
588
}
589
590
offset += ipv6_optlen(exthdr);
591
*nexthdr = &exthdr->nexthdr;
592
exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
593
offset);
594
}
595
596
return offset;
597
}
598
599
int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
600
{
601
struct sk_buff *frag;
602
struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
603
struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
604
struct ipv6hdr *tmp_hdr;
605
struct frag_hdr *fh;
606
unsigned int mtu, hlen, left, len;
607
__be32 frag_id = 0;
608
int ptr, offset = 0, err=0;
609
u8 *prevhdr, nexthdr = 0;
610
struct net *net = dev_net(skb_dst(skb)->dev);
611
612
hlen = ip6_find_1stfragopt(skb, &prevhdr);
613
nexthdr = *prevhdr;
614
615
mtu = ip6_skb_dst_mtu(skb);
616
617
/* We must not fragment if the socket is set to force MTU discovery
618
* or if the skb it not generated by a local socket.
619
*/
620
if (!skb->local_df && skb->len > mtu) {
621
skb->dev = skb_dst(skb)->dev;
622
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
623
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
624
IPSTATS_MIB_FRAGFAILS);
625
kfree_skb(skb);
626
return -EMSGSIZE;
627
}
628
629
if (np && np->frag_size < mtu) {
630
if (np->frag_size)
631
mtu = np->frag_size;
632
}
633
mtu -= hlen + sizeof(struct frag_hdr);
634
635
if (skb_has_frag_list(skb)) {
636
int first_len = skb_pagelen(skb);
637
struct sk_buff *frag2;
638
639
if (first_len - hlen > mtu ||
640
((first_len - hlen) & 7) ||
641
skb_cloned(skb))
642
goto slow_path;
643
644
skb_walk_frags(skb, frag) {
645
/* Correct geometry. */
646
if (frag->len > mtu ||
647
((frag->len & 7) && frag->next) ||
648
skb_headroom(frag) < hlen)
649
goto slow_path_clean;
650
651
/* Partially cloned skb? */
652
if (skb_shared(frag))
653
goto slow_path_clean;
654
655
BUG_ON(frag->sk);
656
if (skb->sk) {
657
frag->sk = skb->sk;
658
frag->destructor = sock_wfree;
659
}
660
skb->truesize -= frag->truesize;
661
}
662
663
err = 0;
664
offset = 0;
665
frag = skb_shinfo(skb)->frag_list;
666
skb_frag_list_init(skb);
667
/* BUILD HEADER */
668
669
*prevhdr = NEXTHDR_FRAGMENT;
670
tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
671
if (!tmp_hdr) {
672
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
673
IPSTATS_MIB_FRAGFAILS);
674
return -ENOMEM;
675
}
676
677
__skb_pull(skb, hlen);
678
fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
679
__skb_push(skb, hlen);
680
skb_reset_network_header(skb);
681
memcpy(skb_network_header(skb), tmp_hdr, hlen);
682
683
ipv6_select_ident(fh);
684
fh->nexthdr = nexthdr;
685
fh->reserved = 0;
686
fh->frag_off = htons(IP6_MF);
687
frag_id = fh->identification;
688
689
first_len = skb_pagelen(skb);
690
skb->data_len = first_len - skb_headlen(skb);
691
skb->len = first_len;
692
ipv6_hdr(skb)->payload_len = htons(first_len -
693
sizeof(struct ipv6hdr));
694
695
dst_hold(&rt->dst);
696
697
for (;;) {
698
/* Prepare header of the next frame,
699
* before previous one went down. */
700
if (frag) {
701
frag->ip_summed = CHECKSUM_NONE;
702
skb_reset_transport_header(frag);
703
fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
704
__skb_push(frag, hlen);
705
skb_reset_network_header(frag);
706
memcpy(skb_network_header(frag), tmp_hdr,
707
hlen);
708
offset += skb->len - hlen - sizeof(struct frag_hdr);
709
fh->nexthdr = nexthdr;
710
fh->reserved = 0;
711
fh->frag_off = htons(offset);
712
if (frag->next != NULL)
713
fh->frag_off |= htons(IP6_MF);
714
fh->identification = frag_id;
715
ipv6_hdr(frag)->payload_len =
716
htons(frag->len -
717
sizeof(struct ipv6hdr));
718
ip6_copy_metadata(frag, skb);
719
}
720
721
err = output(skb);
722
if(!err)
723
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
724
IPSTATS_MIB_FRAGCREATES);
725
726
if (err || !frag)
727
break;
728
729
skb = frag;
730
frag = skb->next;
731
skb->next = NULL;
732
}
733
734
kfree(tmp_hdr);
735
736
if (err == 0) {
737
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
738
IPSTATS_MIB_FRAGOKS);
739
dst_release(&rt->dst);
740
return 0;
741
}
742
743
while (frag) {
744
skb = frag->next;
745
kfree_skb(frag);
746
frag = skb;
747
}
748
749
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
750
IPSTATS_MIB_FRAGFAILS);
751
dst_release(&rt->dst);
752
return err;
753
754
slow_path_clean:
755
skb_walk_frags(skb, frag2) {
756
if (frag2 == frag)
757
break;
758
frag2->sk = NULL;
759
frag2->destructor = NULL;
760
skb->truesize += frag2->truesize;
761
}
762
}
763
764
slow_path:
765
left = skb->len - hlen; /* Space per frame */
766
ptr = hlen; /* Where to start from */
767
768
/*
769
* Fragment the datagram.
770
*/
771
772
*prevhdr = NEXTHDR_FRAGMENT;
773
774
/*
775
* Keep copying data until we run out.
776
*/
777
while(left > 0) {
778
len = left;
779
/* IF: it doesn't fit, use 'mtu' - the data space left */
780
if (len > mtu)
781
len = mtu;
782
/* IF: we are not sending up to and including the packet end
783
then align the next start on an eight byte boundary */
784
if (len < left) {
785
len &= ~7;
786
}
787
/*
788
* Allocate buffer.
789
*/
790
791
if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->dst.dev), GFP_ATOMIC)) == NULL) {
792
NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
793
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
794
IPSTATS_MIB_FRAGFAILS);
795
err = -ENOMEM;
796
goto fail;
797
}
798
799
/*
800
* Set up data on packet
801
*/
802
803
ip6_copy_metadata(frag, skb);
804
skb_reserve(frag, LL_RESERVED_SPACE(rt->dst.dev));
805
skb_put(frag, len + hlen + sizeof(struct frag_hdr));
806
skb_reset_network_header(frag);
807
fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
808
frag->transport_header = (frag->network_header + hlen +
809
sizeof(struct frag_hdr));
810
811
/*
812
* Charge the memory for the fragment to any owner
813
* it might possess
814
*/
815
if (skb->sk)
816
skb_set_owner_w(frag, skb->sk);
817
818
/*
819
* Copy the packet header into the new buffer.
820
*/
821
skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
822
823
/*
824
* Build fragment header.
825
*/
826
fh->nexthdr = nexthdr;
827
fh->reserved = 0;
828
if (!frag_id) {
829
ipv6_select_ident(fh);
830
frag_id = fh->identification;
831
} else
832
fh->identification = frag_id;
833
834
/*
835
* Copy a block of the IP datagram.
836
*/
837
if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
838
BUG();
839
left -= len;
840
841
fh->frag_off = htons(offset);
842
if (left > 0)
843
fh->frag_off |= htons(IP6_MF);
844
ipv6_hdr(frag)->payload_len = htons(frag->len -
845
sizeof(struct ipv6hdr));
846
847
ptr += len;
848
offset += len;
849
850
/*
851
* Put this fragment into the sending queue.
852
*/
853
err = output(frag);
854
if (err)
855
goto fail;
856
857
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
858
IPSTATS_MIB_FRAGCREATES);
859
}
860
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
861
IPSTATS_MIB_FRAGOKS);
862
kfree_skb(skb);
863
return err;
864
865
fail:
866
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
867
IPSTATS_MIB_FRAGFAILS);
868
kfree_skb(skb);
869
return err;
870
}
871
872
static inline int ip6_rt_check(const struct rt6key *rt_key,
873
const struct in6_addr *fl_addr,
874
const struct in6_addr *addr_cache)
875
{
876
return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
877
(addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
878
}
879
880
static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
881
struct dst_entry *dst,
882
const struct flowi6 *fl6)
883
{
884
struct ipv6_pinfo *np = inet6_sk(sk);
885
struct rt6_info *rt = (struct rt6_info *)dst;
886
887
if (!dst)
888
goto out;
889
890
/* Yes, checking route validity in not connected
891
* case is not very simple. Take into account,
892
* that we do not support routing by source, TOS,
893
* and MSG_DONTROUTE --ANK (980726)
894
*
895
* 1. ip6_rt_check(): If route was host route,
896
* check that cached destination is current.
897
* If it is network route, we still may
898
* check its validity using saved pointer
899
* to the last used address: daddr_cache.
900
* We do not want to save whole address now,
901
* (because main consumer of this service
902
* is tcp, which has not this problem),
903
* so that the last trick works only on connected
904
* sockets.
905
* 2. oif also should be the same.
906
*/
907
if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
908
#ifdef CONFIG_IPV6_SUBTREES
909
ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
910
#endif
911
(fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
912
dst_release(dst);
913
dst = NULL;
914
}
915
916
out:
917
return dst;
918
}
919
920
static int ip6_dst_lookup_tail(struct sock *sk,
921
struct dst_entry **dst, struct flowi6 *fl6)
922
{
923
int err;
924
struct net *net = sock_net(sk);
925
926
if (*dst == NULL)
927
*dst = ip6_route_output(net, sk, fl6);
928
929
if ((err = (*dst)->error))
930
goto out_err_release;
931
932
if (ipv6_addr_any(&fl6->saddr)) {
933
struct rt6_info *rt = (struct rt6_info *) *dst;
934
err = ip6_route_get_saddr(net, rt, &fl6->daddr,
935
sk ? inet6_sk(sk)->srcprefs : 0,
936
&fl6->saddr);
937
if (err)
938
goto out_err_release;
939
}
940
941
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
942
/*
943
* Here if the dst entry we've looked up
944
* has a neighbour entry that is in the INCOMPLETE
945
* state and the src address from the flow is
946
* marked as OPTIMISTIC, we release the found
947
* dst entry and replace it instead with the
948
* dst entry of the nexthop router
949
*/
950
if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
951
struct inet6_ifaddr *ifp;
952
struct flowi6 fl_gw6;
953
int redirect;
954
955
ifp = ipv6_get_ifaddr(net, &fl6->saddr,
956
(*dst)->dev, 1);
957
958
redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
959
if (ifp)
960
in6_ifa_put(ifp);
961
962
if (redirect) {
963
/*
964
* We need to get the dst entry for the
965
* default router instead
966
*/
967
dst_release(*dst);
968
memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
969
memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
970
*dst = ip6_route_output(net, sk, &fl_gw6);
971
if ((err = (*dst)->error))
972
goto out_err_release;
973
}
974
}
975
#endif
976
977
return 0;
978
979
out_err_release:
980
if (err == -ENETUNREACH)
981
IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
982
dst_release(*dst);
983
*dst = NULL;
984
return err;
985
}
986
987
/**
988
* ip6_dst_lookup - perform route lookup on flow
989
* @sk: socket which provides route info
990
* @dst: pointer to dst_entry * for result
991
* @fl6: flow to lookup
992
*
993
* This function performs a route lookup on the given flow.
994
*
995
* It returns zero on success, or a standard errno code on error.
996
*/
997
int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
998
{
999
*dst = NULL;
1000
return ip6_dst_lookup_tail(sk, dst, fl6);
1001
}
1002
EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1003
1004
/**
1005
* ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1006
* @sk: socket which provides route info
1007
* @fl6: flow to lookup
1008
* @final_dst: final destination address for ipsec lookup
1009
* @can_sleep: we are in a sleepable context
1010
*
1011
* This function performs a route lookup on the given flow.
1012
*
1013
* It returns a valid dst pointer on success, or a pointer encoded
1014
* error code.
1015
*/
1016
struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1017
const struct in6_addr *final_dst,
1018
bool can_sleep)
1019
{
1020
struct dst_entry *dst = NULL;
1021
int err;
1022
1023
err = ip6_dst_lookup_tail(sk, &dst, fl6);
1024
if (err)
1025
return ERR_PTR(err);
1026
if (final_dst)
1027
ipv6_addr_copy(&fl6->daddr, final_dst);
1028
if (can_sleep)
1029
fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
1030
1031
return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1032
}
1033
EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1034
1035
/**
1036
* ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1037
* @sk: socket which provides the dst cache and route info
1038
* @fl6: flow to lookup
1039
* @final_dst: final destination address for ipsec lookup
1040
* @can_sleep: we are in a sleepable context
1041
*
1042
* This function performs a route lookup on the given flow with the
1043
* possibility of using the cached route in the socket if it is valid.
1044
* It will take the socket dst lock when operating on the dst cache.
1045
* As a result, this function can only be used in process context.
1046
*
1047
* It returns a valid dst pointer on success, or a pointer encoded
1048
* error code.
1049
*/
1050
struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1051
const struct in6_addr *final_dst,
1052
bool can_sleep)
1053
{
1054
struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1055
int err;
1056
1057
dst = ip6_sk_dst_check(sk, dst, fl6);
1058
1059
err = ip6_dst_lookup_tail(sk, &dst, fl6);
1060
if (err)
1061
return ERR_PTR(err);
1062
if (final_dst)
1063
ipv6_addr_copy(&fl6->daddr, final_dst);
1064
if (can_sleep)
1065
fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
1066
1067
return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1068
}
1069
EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1070
1071
static inline int ip6_ufo_append_data(struct sock *sk,
1072
int getfrag(void *from, char *to, int offset, int len,
1073
int odd, struct sk_buff *skb),
1074
void *from, int length, int hh_len, int fragheaderlen,
1075
int transhdrlen, int mtu,unsigned int flags)
1076
1077
{
1078
struct sk_buff *skb;
1079
int err;
1080
1081
/* There is support for UDP large send offload by network
1082
* device, so create one single skb packet containing complete
1083
* udp datagram
1084
*/
1085
if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1086
skb = sock_alloc_send_skb(sk,
1087
hh_len + fragheaderlen + transhdrlen + 20,
1088
(flags & MSG_DONTWAIT), &err);
1089
if (skb == NULL)
1090
return -ENOMEM;
1091
1092
/* reserve space for Hardware header */
1093
skb_reserve(skb, hh_len);
1094
1095
/* create space for UDP/IP header */
1096
skb_put(skb,fragheaderlen + transhdrlen);
1097
1098
/* initialize network header pointer */
1099
skb_reset_network_header(skb);
1100
1101
/* initialize protocol header pointer */
1102
skb->transport_header = skb->network_header + fragheaderlen;
1103
1104
skb->ip_summed = CHECKSUM_PARTIAL;
1105
skb->csum = 0;
1106
}
1107
1108
err = skb_append_datato_frags(sk,skb, getfrag, from,
1109
(length - transhdrlen));
1110
if (!err) {
1111
struct frag_hdr fhdr;
1112
1113
/* Specify the length of each IPv6 datagram fragment.
1114
* It has to be a multiple of 8.
1115
*/
1116
skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1117
sizeof(struct frag_hdr)) & ~7;
1118
skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1119
ipv6_select_ident(&fhdr);
1120
skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1121
__skb_queue_tail(&sk->sk_write_queue, skb);
1122
1123
return 0;
1124
}
1125
/* There is not enough support do UPD LSO,
1126
* so follow normal path
1127
*/
1128
kfree_skb(skb);
1129
1130
return err;
1131
}
1132
1133
static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1134
gfp_t gfp)
1135
{
1136
return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1137
}
1138
1139
static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1140
gfp_t gfp)
1141
{
1142
return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1143
}
1144
1145
int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1146
int offset, int len, int odd, struct sk_buff *skb),
1147
void *from, int length, int transhdrlen,
1148
int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1149
struct rt6_info *rt, unsigned int flags, int dontfrag)
1150
{
1151
struct inet_sock *inet = inet_sk(sk);
1152
struct ipv6_pinfo *np = inet6_sk(sk);
1153
struct inet_cork *cork;
1154
struct sk_buff *skb;
1155
unsigned int maxfraglen, fragheaderlen;
1156
int exthdrlen;
1157
int hh_len;
1158
int mtu;
1159
int copy;
1160
int err;
1161
int offset = 0;
1162
int csummode = CHECKSUM_NONE;
1163
__u8 tx_flags = 0;
1164
1165
if (flags&MSG_PROBE)
1166
return 0;
1167
cork = &inet->cork.base;
1168
if (skb_queue_empty(&sk->sk_write_queue)) {
1169
/*
1170
* setup for corking
1171
*/
1172
if (opt) {
1173
if (WARN_ON(np->cork.opt))
1174
return -EINVAL;
1175
1176
np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1177
if (unlikely(np->cork.opt == NULL))
1178
return -ENOBUFS;
1179
1180
np->cork.opt->tot_len = opt->tot_len;
1181
np->cork.opt->opt_flen = opt->opt_flen;
1182
np->cork.opt->opt_nflen = opt->opt_nflen;
1183
1184
np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1185
sk->sk_allocation);
1186
if (opt->dst0opt && !np->cork.opt->dst0opt)
1187
return -ENOBUFS;
1188
1189
np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1190
sk->sk_allocation);
1191
if (opt->dst1opt && !np->cork.opt->dst1opt)
1192
return -ENOBUFS;
1193
1194
np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1195
sk->sk_allocation);
1196
if (opt->hopopt && !np->cork.opt->hopopt)
1197
return -ENOBUFS;
1198
1199
np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1200
sk->sk_allocation);
1201
if (opt->srcrt && !np->cork.opt->srcrt)
1202
return -ENOBUFS;
1203
1204
/* need source address above miyazawa*/
1205
}
1206
dst_hold(&rt->dst);
1207
cork->dst = &rt->dst;
1208
inet->cork.fl.u.ip6 = *fl6;
1209
np->cork.hop_limit = hlimit;
1210
np->cork.tclass = tclass;
1211
mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1212
rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1213
if (np->frag_size < mtu) {
1214
if (np->frag_size)
1215
mtu = np->frag_size;
1216
}
1217
cork->fragsize = mtu;
1218
if (dst_allfrag(rt->dst.path))
1219
cork->flags |= IPCORK_ALLFRAG;
1220
cork->length = 0;
1221
sk->sk_sndmsg_page = NULL;
1222
sk->sk_sndmsg_off = 0;
1223
exthdrlen = rt->dst.header_len + (opt ? opt->opt_flen : 0) -
1224
rt->rt6i_nfheader_len;
1225
length += exthdrlen;
1226
transhdrlen += exthdrlen;
1227
} else {
1228
rt = (struct rt6_info *)cork->dst;
1229
fl6 = &inet->cork.fl.u.ip6;
1230
opt = np->cork.opt;
1231
transhdrlen = 0;
1232
exthdrlen = 0;
1233
mtu = cork->fragsize;
1234
}
1235
1236
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1237
1238
fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1239
(opt ? opt->opt_nflen : 0);
1240
maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1241
1242
if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1243
if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1244
ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen);
1245
return -EMSGSIZE;
1246
}
1247
}
1248
1249
/* For UDP, check if TX timestamp is enabled */
1250
if (sk->sk_type == SOCK_DGRAM) {
1251
err = sock_tx_timestamp(sk, &tx_flags);
1252
if (err)
1253
goto error;
1254
}
1255
1256
/*
1257
* Let's try using as much space as possible.
1258
* Use MTU if total length of the message fits into the MTU.
1259
* Otherwise, we need to reserve fragment header and
1260
* fragment alignment (= 8-15 octects, in total).
1261
*
1262
* Note that we may need to "move" the data from the tail of
1263
* of the buffer to the new fragment when we split
1264
* the message.
1265
*
1266
* FIXME: It may be fragmented into multiple chunks
1267
* at once if non-fragmentable extension headers
1268
* are too large.
1269
* --yoshfuji
1270
*/
1271
1272
cork->length += length;
1273
if (length > mtu) {
1274
int proto = sk->sk_protocol;
1275
if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
1276
ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
1277
return -EMSGSIZE;
1278
}
1279
1280
if (proto == IPPROTO_UDP &&
1281
(rt->dst.dev->features & NETIF_F_UFO)) {
1282
1283
err = ip6_ufo_append_data(sk, getfrag, from, length,
1284
hh_len, fragheaderlen,
1285
transhdrlen, mtu, flags);
1286
if (err)
1287
goto error;
1288
return 0;
1289
}
1290
}
1291
1292
if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1293
goto alloc_new_skb;
1294
1295
while (length > 0) {
1296
/* Check if the remaining data fits into current packet. */
1297
copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1298
if (copy < length)
1299
copy = maxfraglen - skb->len;
1300
1301
if (copy <= 0) {
1302
char *data;
1303
unsigned int datalen;
1304
unsigned int fraglen;
1305
unsigned int fraggap;
1306
unsigned int alloclen;
1307
struct sk_buff *skb_prev;
1308
alloc_new_skb:
1309
skb_prev = skb;
1310
1311
/* There's no room in the current skb */
1312
if (skb_prev)
1313
fraggap = skb_prev->len - maxfraglen;
1314
else
1315
fraggap = 0;
1316
1317
/*
1318
* If remaining data exceeds the mtu,
1319
* we know we need more fragment(s).
1320
*/
1321
datalen = length + fraggap;
1322
if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1323
datalen = maxfraglen - fragheaderlen;
1324
1325
fraglen = datalen + fragheaderlen;
1326
if ((flags & MSG_MORE) &&
1327
!(rt->dst.dev->features&NETIF_F_SG))
1328
alloclen = mtu;
1329
else
1330
alloclen = datalen + fragheaderlen;
1331
1332
/*
1333
* The last fragment gets additional space at tail.
1334
* Note: we overallocate on fragments with MSG_MODE
1335
* because we have no idea if we're the last one.
1336
*/
1337
if (datalen == length + fraggap)
1338
alloclen += rt->dst.trailer_len;
1339
1340
/*
1341
* We just reserve space for fragment header.
1342
* Note: this may be overallocation if the message
1343
* (without MSG_MORE) fits into the MTU.
1344
*/
1345
alloclen += sizeof(struct frag_hdr);
1346
1347
if (transhdrlen) {
1348
skb = sock_alloc_send_skb(sk,
1349
alloclen + hh_len,
1350
(flags & MSG_DONTWAIT), &err);
1351
} else {
1352
skb = NULL;
1353
if (atomic_read(&sk->sk_wmem_alloc) <=
1354
2 * sk->sk_sndbuf)
1355
skb = sock_wmalloc(sk,
1356
alloclen + hh_len, 1,
1357
sk->sk_allocation);
1358
if (unlikely(skb == NULL))
1359
err = -ENOBUFS;
1360
else {
1361
/* Only the initial fragment
1362
* is time stamped.
1363
*/
1364
tx_flags = 0;
1365
}
1366
}
1367
if (skb == NULL)
1368
goto error;
1369
/*
1370
* Fill in the control structures
1371
*/
1372
skb->ip_summed = csummode;
1373
skb->csum = 0;
1374
/* reserve for fragmentation */
1375
skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1376
1377
if (sk->sk_type == SOCK_DGRAM)
1378
skb_shinfo(skb)->tx_flags = tx_flags;
1379
1380
/*
1381
* Find where to start putting bytes
1382
*/
1383
data = skb_put(skb, fraglen);
1384
skb_set_network_header(skb, exthdrlen);
1385
data += fragheaderlen;
1386
skb->transport_header = (skb->network_header +
1387
fragheaderlen);
1388
if (fraggap) {
1389
skb->csum = skb_copy_and_csum_bits(
1390
skb_prev, maxfraglen,
1391
data + transhdrlen, fraggap, 0);
1392
skb_prev->csum = csum_sub(skb_prev->csum,
1393
skb->csum);
1394
data += fraggap;
1395
pskb_trim_unique(skb_prev, maxfraglen);
1396
}
1397
copy = datalen - transhdrlen - fraggap;
1398
if (copy < 0) {
1399
err = -EINVAL;
1400
kfree_skb(skb);
1401
goto error;
1402
} else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1403
err = -EFAULT;
1404
kfree_skb(skb);
1405
goto error;
1406
}
1407
1408
offset += copy;
1409
length -= datalen - fraggap;
1410
transhdrlen = 0;
1411
exthdrlen = 0;
1412
csummode = CHECKSUM_NONE;
1413
1414
/*
1415
* Put the packet on the pending queue
1416
*/
1417
__skb_queue_tail(&sk->sk_write_queue, skb);
1418
continue;
1419
}
1420
1421
if (copy > length)
1422
copy = length;
1423
1424
if (!(rt->dst.dev->features&NETIF_F_SG)) {
1425
unsigned int off;
1426
1427
off = skb->len;
1428
if (getfrag(from, skb_put(skb, copy),
1429
offset, copy, off, skb) < 0) {
1430
__skb_trim(skb, off);
1431
err = -EFAULT;
1432
goto error;
1433
}
1434
} else {
1435
int i = skb_shinfo(skb)->nr_frags;
1436
skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1437
struct page *page = sk->sk_sndmsg_page;
1438
int off = sk->sk_sndmsg_off;
1439
unsigned int left;
1440
1441
if (page && (left = PAGE_SIZE - off) > 0) {
1442
if (copy >= left)
1443
copy = left;
1444
if (page != frag->page) {
1445
if (i == MAX_SKB_FRAGS) {
1446
err = -EMSGSIZE;
1447
goto error;
1448
}
1449
get_page(page);
1450
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1451
frag = &skb_shinfo(skb)->frags[i];
1452
}
1453
} else if(i < MAX_SKB_FRAGS) {
1454
if (copy > PAGE_SIZE)
1455
copy = PAGE_SIZE;
1456
page = alloc_pages(sk->sk_allocation, 0);
1457
if (page == NULL) {
1458
err = -ENOMEM;
1459
goto error;
1460
}
1461
sk->sk_sndmsg_page = page;
1462
sk->sk_sndmsg_off = 0;
1463
1464
skb_fill_page_desc(skb, i, page, 0, 0);
1465
frag = &skb_shinfo(skb)->frags[i];
1466
} else {
1467
err = -EMSGSIZE;
1468
goto error;
1469
}
1470
if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1471
err = -EFAULT;
1472
goto error;
1473
}
1474
sk->sk_sndmsg_off += copy;
1475
frag->size += copy;
1476
skb->len += copy;
1477
skb->data_len += copy;
1478
skb->truesize += copy;
1479
atomic_add(copy, &sk->sk_wmem_alloc);
1480
}
1481
offset += copy;
1482
length -= copy;
1483
}
1484
return 0;
1485
error:
1486
cork->length -= length;
1487
IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1488
return err;
1489
}
1490
1491
static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1492
{
1493
if (np->cork.opt) {
1494
kfree(np->cork.opt->dst0opt);
1495
kfree(np->cork.opt->dst1opt);
1496
kfree(np->cork.opt->hopopt);
1497
kfree(np->cork.opt->srcrt);
1498
kfree(np->cork.opt);
1499
np->cork.opt = NULL;
1500
}
1501
1502
if (inet->cork.base.dst) {
1503
dst_release(inet->cork.base.dst);
1504
inet->cork.base.dst = NULL;
1505
inet->cork.base.flags &= ~IPCORK_ALLFRAG;
1506
}
1507
memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1508
}
1509
1510
int ip6_push_pending_frames(struct sock *sk)
1511
{
1512
struct sk_buff *skb, *tmp_skb;
1513
struct sk_buff **tail_skb;
1514
struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1515
struct inet_sock *inet = inet_sk(sk);
1516
struct ipv6_pinfo *np = inet6_sk(sk);
1517
struct net *net = sock_net(sk);
1518
struct ipv6hdr *hdr;
1519
struct ipv6_txoptions *opt = np->cork.opt;
1520
struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
1521
struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
1522
unsigned char proto = fl6->flowi6_proto;
1523
int err = 0;
1524
1525
if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1526
goto out;
1527
tail_skb = &(skb_shinfo(skb)->frag_list);
1528
1529
/* move skb->data to ip header from ext header */
1530
if (skb->data < skb_network_header(skb))
1531
__skb_pull(skb, skb_network_offset(skb));
1532
while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1533
__skb_pull(tmp_skb, skb_network_header_len(skb));
1534
*tail_skb = tmp_skb;
1535
tail_skb = &(tmp_skb->next);
1536
skb->len += tmp_skb->len;
1537
skb->data_len += tmp_skb->len;
1538
skb->truesize += tmp_skb->truesize;
1539
tmp_skb->destructor = NULL;
1540
tmp_skb->sk = NULL;
1541
}
1542
1543
/* Allow local fragmentation. */
1544
if (np->pmtudisc < IPV6_PMTUDISC_DO)
1545
skb->local_df = 1;
1546
1547
ipv6_addr_copy(final_dst, &fl6->daddr);
1548
__skb_pull(skb, skb_network_header_len(skb));
1549
if (opt && opt->opt_flen)
1550
ipv6_push_frag_opts(skb, opt, &proto);
1551
if (opt && opt->opt_nflen)
1552
ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1553
1554
skb_push(skb, sizeof(struct ipv6hdr));
1555
skb_reset_network_header(skb);
1556
hdr = ipv6_hdr(skb);
1557
1558
*(__be32*)hdr = fl6->flowlabel |
1559
htonl(0x60000000 | ((int)np->cork.tclass << 20));
1560
1561
hdr->hop_limit = np->cork.hop_limit;
1562
hdr->nexthdr = proto;
1563
ipv6_addr_copy(&hdr->saddr, &fl6->saddr);
1564
ipv6_addr_copy(&hdr->daddr, final_dst);
1565
1566
skb->priority = sk->sk_priority;
1567
skb->mark = sk->sk_mark;
1568
1569
skb_dst_set(skb, dst_clone(&rt->dst));
1570
IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1571
if (proto == IPPROTO_ICMPV6) {
1572
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1573
1574
ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1575
ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1576
}
1577
1578
err = ip6_local_out(skb);
1579
if (err) {
1580
if (err > 0)
1581
err = net_xmit_errno(err);
1582
if (err)
1583
goto error;
1584
}
1585
1586
out:
1587
ip6_cork_release(inet, np);
1588
return err;
1589
error:
1590
IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1591
goto out;
1592
}
1593
1594
void ip6_flush_pending_frames(struct sock *sk)
1595
{
1596
struct sk_buff *skb;
1597
1598
while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1599
if (skb_dst(skb))
1600
IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1601
IPSTATS_MIB_OUTDISCARDS);
1602
kfree_skb(skb);
1603
}
1604
1605
ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1606
}
1607
1608