Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/net/ipv6/ip6_output.c
26285 views
1
// SPDX-License-Identifier: GPL-2.0-or-later
2
/*
3
* IPv6 output functions
4
* Linux INET6 implementation
5
*
6
* Authors:
7
* Pedro Roque <[email protected]>
8
*
9
* Based on linux/net/ipv4/ip_output.c
10
*
11
* Changes:
12
* A.N.Kuznetsov : airthmetics in fragmentation.
13
* extension headers are implemented.
14
* route changes now work.
15
* ip6_forward does not confuse sniffers.
16
* etc.
17
*
18
* H. von Brand : Added missing #include <linux/string.h>
19
* Imran Patel : frag id should be in NBO
20
* Kazunori MIYAZAWA @USAGI
21
* : add ip6_append_data and related functions
22
* for datagram xmit
23
*/
24
25
#include <linux/errno.h>
26
#include <linux/kernel.h>
27
#include <linux/string.h>
28
#include <linux/socket.h>
29
#include <linux/net.h>
30
#include <linux/netdevice.h>
31
#include <linux/if_arp.h>
32
#include <linux/in6.h>
33
#include <linux/tcp.h>
34
#include <linux/route.h>
35
#include <linux/module.h>
36
#include <linux/slab.h>
37
38
#include <linux/bpf-cgroup.h>
39
#include <linux/netfilter.h>
40
#include <linux/netfilter_ipv6.h>
41
42
#include <net/sock.h>
43
#include <net/snmp.h>
44
45
#include <net/gso.h>
46
#include <net/ipv6.h>
47
#include <net/ndisc.h>
48
#include <net/protocol.h>
49
#include <net/ip6_route.h>
50
#include <net/addrconf.h>
51
#include <net/rawv6.h>
52
#include <net/icmp.h>
53
#include <net/xfrm.h>
54
#include <net/checksum.h>
55
#include <linux/mroute6.h>
56
#include <net/l3mdev.h>
57
#include <net/lwtunnel.h>
58
#include <net/ip_tunnels.h>
59
60
static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
61
{
62
struct dst_entry *dst = skb_dst(skb);
63
struct net_device *dev = dst_dev(dst);
64
struct inet6_dev *idev = ip6_dst_idev(dst);
65
unsigned int hh_len = LL_RESERVED_SPACE(dev);
66
const struct in6_addr *daddr, *nexthop;
67
struct ipv6hdr *hdr;
68
struct neighbour *neigh;
69
int ret;
70
71
/* Be paranoid, rather than too clever. */
72
if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) {
73
/* Make sure idev stays alive */
74
rcu_read_lock();
75
skb = skb_expand_head(skb, hh_len);
76
if (!skb) {
77
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
78
rcu_read_unlock();
79
return -ENOMEM;
80
}
81
rcu_read_unlock();
82
}
83
84
hdr = ipv6_hdr(skb);
85
daddr = &hdr->daddr;
86
if (ipv6_addr_is_multicast(daddr)) {
87
if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
88
((mroute6_is_socket(net, skb) &&
89
!(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
90
ipv6_chk_mcast_addr(dev, daddr, &hdr->saddr))) {
91
struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
92
93
/* Do not check for IFF_ALLMULTI; multicast routing
94
is not supported in any case.
95
*/
96
if (newskb)
97
NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
98
net, sk, newskb, NULL, newskb->dev,
99
dev_loopback_xmit);
100
101
if (hdr->hop_limit == 0) {
102
IP6_INC_STATS(net, idev,
103
IPSTATS_MIB_OUTDISCARDS);
104
kfree_skb(skb);
105
return 0;
106
}
107
}
108
109
IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
110
if (IPV6_ADDR_MC_SCOPE(daddr) <= IPV6_ADDR_SCOPE_NODELOCAL &&
111
!(dev->flags & IFF_LOOPBACK)) {
112
kfree_skb(skb);
113
return 0;
114
}
115
}
116
117
if (lwtunnel_xmit_redirect(dst->lwtstate)) {
118
int res = lwtunnel_xmit(skb);
119
120
if (res != LWTUNNEL_XMIT_CONTINUE)
121
return res;
122
}
123
124
IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
125
126
rcu_read_lock();
127
nexthop = rt6_nexthop(dst_rt6_info(dst), daddr);
128
neigh = __ipv6_neigh_lookup_noref(dev, nexthop);
129
130
if (IS_ERR_OR_NULL(neigh)) {
131
if (unlikely(!neigh))
132
neigh = __neigh_create(&nd_tbl, nexthop, dev, false);
133
if (IS_ERR(neigh)) {
134
rcu_read_unlock();
135
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES);
136
kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL);
137
return -EINVAL;
138
}
139
}
140
sock_confirm_neigh(skb, neigh);
141
ret = neigh_output(neigh, skb, false);
142
rcu_read_unlock();
143
return ret;
144
}
145
146
static int
147
ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk,
148
struct sk_buff *skb, unsigned int mtu)
149
{
150
struct sk_buff *segs, *nskb;
151
netdev_features_t features;
152
int ret = 0;
153
154
/* Please see corresponding comment in ip_finish_output_gso
155
* describing the cases where GSO segment length exceeds the
156
* egress MTU.
157
*/
158
features = netif_skb_features(skb);
159
segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
160
if (IS_ERR_OR_NULL(segs)) {
161
kfree_skb(skb);
162
return -ENOMEM;
163
}
164
165
consume_skb(skb);
166
167
skb_list_walk_safe(segs, segs, nskb) {
168
int err;
169
170
skb_mark_not_on_list(segs);
171
/* Last GSO segment can be smaller than gso_size (and MTU).
172
* Adding a fragment header would produce an "atomic fragment",
173
* which is considered harmful (RFC-8021). Avoid that.
174
*/
175
err = segs->len > mtu ?
176
ip6_fragment(net, sk, segs, ip6_finish_output2) :
177
ip6_finish_output2(net, sk, segs);
178
if (err && ret == 0)
179
ret = err;
180
}
181
182
return ret;
183
}
184
185
static int ip6_finish_output_gso(struct net *net, struct sock *sk,
186
struct sk_buff *skb, unsigned int mtu)
187
{
188
if (!(IP6CB(skb)->flags & IP6SKB_FAKEJUMBO) &&
189
!skb_gso_validate_network_len(skb, mtu))
190
return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu);
191
192
return ip6_finish_output2(net, sk, skb);
193
}
194
195
static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
196
{
197
unsigned int mtu;
198
199
#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
200
/* Policy lookup after SNAT yielded a new policy */
201
if (skb_dst(skb)->xfrm) {
202
IP6CB(skb)->flags |= IP6SKB_REROUTED;
203
return dst_output(net, sk, skb);
204
}
205
#endif
206
207
mtu = ip6_skb_dst_mtu(skb);
208
if (skb_is_gso(skb))
209
return ip6_finish_output_gso(net, sk, skb, mtu);
210
211
if (skb->len > mtu ||
212
(IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
213
return ip6_fragment(net, sk, skb, ip6_finish_output2);
214
215
return ip6_finish_output2(net, sk, skb);
216
}
217
218
static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
219
{
220
int ret;
221
222
ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
223
switch (ret) {
224
case NET_XMIT_SUCCESS:
225
case NET_XMIT_CN:
226
return __ip6_finish_output(net, sk, skb) ? : ret;
227
default:
228
kfree_skb_reason(skb, SKB_DROP_REASON_BPF_CGROUP_EGRESS);
229
return ret;
230
}
231
}
232
233
int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
234
{
235
struct dst_entry *dst = skb_dst(skb);
236
struct net_device *dev = dst_dev(dst), *indev = skb->dev;
237
struct inet6_dev *idev = ip6_dst_idev(dst);
238
239
skb->protocol = htons(ETH_P_IPV6);
240
skb->dev = dev;
241
242
if (unlikely(!idev || READ_ONCE(idev->cnf.disable_ipv6))) {
243
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
244
kfree_skb_reason(skb, SKB_DROP_REASON_IPV6DISABLED);
245
return 0;
246
}
247
248
return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
249
net, sk, skb, indev, dev,
250
ip6_finish_output,
251
!(IP6CB(skb)->flags & IP6SKB_REROUTED));
252
}
253
EXPORT_SYMBOL(ip6_output);
254
255
bool ip6_autoflowlabel(struct net *net, const struct sock *sk)
256
{
257
if (!inet6_test_bit(AUTOFLOWLABEL_SET, sk))
258
return ip6_default_np_autolabel(net);
259
return inet6_test_bit(AUTOFLOWLABEL, sk);
260
}
261
262
/*
263
* xmit an sk_buff (used by TCP and SCTP)
264
* Note : socket lock is not held for SYNACK packets, but might be modified
265
* by calls to skb_set_owner_w() and ipv6_local_error(),
266
* which are using proper atomic operations or spinlocks.
267
*/
268
int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
269
__u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
270
{
271
struct net *net = sock_net(sk);
272
const struct ipv6_pinfo *np = inet6_sk(sk);
273
struct in6_addr *first_hop = &fl6->daddr;
274
struct dst_entry *dst = skb_dst(skb);
275
struct net_device *dev = dst_dev(dst);
276
struct inet6_dev *idev = ip6_dst_idev(dst);
277
struct hop_jumbo_hdr *hop_jumbo;
278
int hoplen = sizeof(*hop_jumbo);
279
unsigned int head_room;
280
struct ipv6hdr *hdr;
281
u8 proto = fl6->flowi6_proto;
282
int seg_len = skb->len;
283
int hlimit = -1;
284
u32 mtu;
285
286
head_room = sizeof(struct ipv6hdr) + hoplen + LL_RESERVED_SPACE(dev);
287
if (opt)
288
head_room += opt->opt_nflen + opt->opt_flen;
289
290
if (unlikely(head_room > skb_headroom(skb))) {
291
/* Make sure idev stays alive */
292
rcu_read_lock();
293
skb = skb_expand_head(skb, head_room);
294
if (!skb) {
295
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
296
rcu_read_unlock();
297
return -ENOBUFS;
298
}
299
rcu_read_unlock();
300
}
301
302
if (opt) {
303
seg_len += opt->opt_nflen + opt->opt_flen;
304
305
if (opt->opt_flen)
306
ipv6_push_frag_opts(skb, opt, &proto);
307
308
if (opt->opt_nflen)
309
ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
310
&fl6->saddr);
311
}
312
313
if (unlikely(seg_len > IPV6_MAXPLEN)) {
314
hop_jumbo = skb_push(skb, hoplen);
315
316
hop_jumbo->nexthdr = proto;
317
hop_jumbo->hdrlen = 0;
318
hop_jumbo->tlv_type = IPV6_TLV_JUMBO;
319
hop_jumbo->tlv_len = 4;
320
hop_jumbo->jumbo_payload_len = htonl(seg_len + hoplen);
321
322
proto = IPPROTO_HOPOPTS;
323
seg_len = 0;
324
IP6CB(skb)->flags |= IP6SKB_FAKEJUMBO;
325
}
326
327
skb_push(skb, sizeof(struct ipv6hdr));
328
skb_reset_network_header(skb);
329
hdr = ipv6_hdr(skb);
330
331
/*
332
* Fill in the IPv6 header
333
*/
334
if (np)
335
hlimit = READ_ONCE(np->hop_limit);
336
if (hlimit < 0)
337
hlimit = ip6_dst_hoplimit(dst);
338
339
ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
340
ip6_autoflowlabel(net, sk), fl6));
341
342
hdr->payload_len = htons(seg_len);
343
hdr->nexthdr = proto;
344
hdr->hop_limit = hlimit;
345
346
hdr->saddr = fl6->saddr;
347
hdr->daddr = *first_hop;
348
349
skb->protocol = htons(ETH_P_IPV6);
350
skb->priority = priority;
351
skb->mark = mark;
352
353
mtu = dst_mtu(dst);
354
if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
355
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS);
356
357
/* if egress device is enslaved to an L3 master device pass the
358
* skb to its handler for processing
359
*/
360
skb = l3mdev_ip6_out((struct sock *)sk, skb);
361
if (unlikely(!skb))
362
return 0;
363
364
/* hooks should never assume socket lock is held.
365
* we promote our socket to non const
366
*/
367
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
368
net, (struct sock *)sk, skb, NULL, dev,
369
dst_output);
370
}
371
372
skb->dev = dev;
373
/* ipv6_local_error() does not require socket lock,
374
* we promote our socket to non const
375
*/
376
ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
377
378
IP6_INC_STATS(net, idev, IPSTATS_MIB_FRAGFAILS);
379
kfree_skb(skb);
380
return -EMSGSIZE;
381
}
382
EXPORT_SYMBOL(ip6_xmit);
383
384
static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
385
{
386
struct ip6_ra_chain *ra;
387
struct sock *last = NULL;
388
389
read_lock(&ip6_ra_lock);
390
for (ra = ip6_ra_chain; ra; ra = ra->next) {
391
struct sock *sk = ra->sk;
392
if (sk && ra->sel == sel &&
393
(!sk->sk_bound_dev_if ||
394
sk->sk_bound_dev_if == skb->dev->ifindex)) {
395
396
if (inet6_test_bit(RTALERT_ISOLATE, sk) &&
397
!net_eq(sock_net(sk), dev_net(skb->dev))) {
398
continue;
399
}
400
if (last) {
401
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
402
if (skb2)
403
rawv6_rcv(last, skb2);
404
}
405
last = sk;
406
}
407
}
408
409
if (last) {
410
rawv6_rcv(last, skb);
411
read_unlock(&ip6_ra_lock);
412
return 1;
413
}
414
read_unlock(&ip6_ra_lock);
415
return 0;
416
}
417
418
static int ip6_forward_proxy_check(struct sk_buff *skb)
419
{
420
struct ipv6hdr *hdr = ipv6_hdr(skb);
421
u8 nexthdr = hdr->nexthdr;
422
__be16 frag_off;
423
int offset;
424
425
if (ipv6_ext_hdr(nexthdr)) {
426
offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
427
if (offset < 0)
428
return 0;
429
} else
430
offset = sizeof(struct ipv6hdr);
431
432
if (nexthdr == IPPROTO_ICMPV6) {
433
struct icmp6hdr *icmp6;
434
435
if (!pskb_may_pull(skb, (skb_network_header(skb) +
436
offset + 1 - skb->data)))
437
return 0;
438
439
icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
440
441
switch (icmp6->icmp6_type) {
442
case NDISC_ROUTER_SOLICITATION:
443
case NDISC_ROUTER_ADVERTISEMENT:
444
case NDISC_NEIGHBOUR_SOLICITATION:
445
case NDISC_NEIGHBOUR_ADVERTISEMENT:
446
case NDISC_REDIRECT:
447
/* For reaction involving unicast neighbor discovery
448
* message destined to the proxied address, pass it to
449
* input function.
450
*/
451
return 1;
452
default:
453
break;
454
}
455
}
456
457
/*
458
* The proxying router can't forward traffic sent to a link-local
459
* address, so signal the sender and discard the packet. This
460
* behavior is clarified by the MIPv6 specification.
461
*/
462
if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
463
dst_link_failure(skb);
464
return -1;
465
}
466
467
return 0;
468
}
469
470
static inline int ip6_forward_finish(struct net *net, struct sock *sk,
471
struct sk_buff *skb)
472
{
473
#ifdef CONFIG_NET_SWITCHDEV
474
if (skb->offload_l3_fwd_mark) {
475
consume_skb(skb);
476
return 0;
477
}
478
#endif
479
480
skb_clear_tstamp(skb);
481
return dst_output(net, sk, skb);
482
}
483
484
static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
485
{
486
if (skb->len <= mtu)
487
return false;
488
489
/* ipv6 conntrack defrag sets max_frag_size + ignore_df */
490
if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
491
return true;
492
493
if (skb->ignore_df)
494
return false;
495
496
if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
497
return false;
498
499
return true;
500
}
501
502
int ip6_forward(struct sk_buff *skb)
503
{
504
struct dst_entry *dst = skb_dst(skb);
505
struct ipv6hdr *hdr = ipv6_hdr(skb);
506
struct inet6_skb_parm *opt = IP6CB(skb);
507
struct net *net = dev_net(dst_dev(dst));
508
struct net_device *dev;
509
struct inet6_dev *idev;
510
SKB_DR(reason);
511
u32 mtu;
512
513
idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
514
if (!READ_ONCE(net->ipv6.devconf_all->forwarding) &&
515
(!idev || !READ_ONCE(idev->cnf.force_forwarding)))
516
goto error;
517
518
if (skb->pkt_type != PACKET_HOST)
519
goto drop;
520
521
if (unlikely(skb->sk))
522
goto drop;
523
524
if (skb_warn_if_lro(skb))
525
goto drop;
526
527
if (!READ_ONCE(net->ipv6.devconf_all->disable_policy) &&
528
(!idev || !READ_ONCE(idev->cnf.disable_policy)) &&
529
!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
530
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
531
goto drop;
532
}
533
534
skb_forward_csum(skb);
535
536
/*
537
* We DO NOT make any processing on
538
* RA packets, pushing them to user level AS IS
539
* without ane WARRANTY that application will be able
540
* to interpret them. The reason is that we
541
* cannot make anything clever here.
542
*
543
* We are not end-node, so that if packet contains
544
* AH/ESP, we cannot make anything.
545
* Defragmentation also would be mistake, RA packets
546
* cannot be fragmented, because there is no warranty
547
* that different fragments will go along one path. --ANK
548
*/
549
if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
550
if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
551
return 0;
552
}
553
554
/*
555
* check and decrement ttl
556
*/
557
if (hdr->hop_limit <= 1) {
558
icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
559
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
560
561
kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR);
562
return -ETIMEDOUT;
563
}
564
565
/* XXX: idev->cnf.proxy_ndp? */
566
if (READ_ONCE(net->ipv6.devconf_all->proxy_ndp) &&
567
pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev)) {
568
int proxied = ip6_forward_proxy_check(skb);
569
if (proxied > 0) {
570
/* It's tempting to decrease the hop limit
571
* here by 1, as we do at the end of the
572
* function too.
573
*
574
* But that would be incorrect, as proxying is
575
* not forwarding. The ip6_input function
576
* will handle this packet locally, and it
577
* depends on the hop limit being unchanged.
578
*
579
* One example is the NDP hop limit, that
580
* always has to stay 255, but other would be
581
* similar checks around RA packets, where the
582
* user can even change the desired limit.
583
*/
584
return ip6_input(skb);
585
} else if (proxied < 0) {
586
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
587
goto drop;
588
}
589
}
590
591
if (!xfrm6_route_forward(skb)) {
592
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
593
SKB_DR_SET(reason, XFRM_POLICY);
594
goto drop;
595
}
596
dst = skb_dst(skb);
597
dev = dst_dev(dst);
598
/* IPv6 specs say nothing about it, but it is clear that we cannot
599
send redirects to source routed frames.
600
We don't send redirects to frames decapsulated from IPsec.
601
*/
602
if (IP6CB(skb)->iif == dev->ifindex &&
603
opt->srcrt == 0 && !skb_sec_path(skb)) {
604
struct in6_addr *target = NULL;
605
struct inet_peer *peer;
606
struct rt6_info *rt;
607
608
/*
609
* incoming and outgoing devices are the same
610
* send a redirect.
611
*/
612
613
rt = dst_rt6_info(dst);
614
if (rt->rt6i_flags & RTF_GATEWAY)
615
target = &rt->rt6i_gateway;
616
else
617
target = &hdr->daddr;
618
619
rcu_read_lock();
620
peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr);
621
622
/* Limit redirects both by destination (here)
623
and by source (inside ndisc_send_redirect)
624
*/
625
if (inet_peer_xrlim_allow(peer, 1*HZ))
626
ndisc_send_redirect(skb, target);
627
rcu_read_unlock();
628
} else {
629
int addrtype = ipv6_addr_type(&hdr->saddr);
630
631
/* This check is security critical. */
632
if (addrtype == IPV6_ADDR_ANY ||
633
addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
634
goto error;
635
if (addrtype & IPV6_ADDR_LINKLOCAL) {
636
icmpv6_send(skb, ICMPV6_DEST_UNREACH,
637
ICMPV6_NOT_NEIGHBOUR, 0);
638
goto error;
639
}
640
}
641
642
__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
643
644
mtu = ip6_dst_mtu_maybe_forward(dst, true);
645
if (mtu < IPV6_MIN_MTU)
646
mtu = IPV6_MIN_MTU;
647
648
if (ip6_pkt_too_big(skb, mtu)) {
649
/* Again, force OUTPUT device used as source address */
650
skb->dev = dev;
651
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
652
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
653
__IP6_INC_STATS(net, ip6_dst_idev(dst),
654
IPSTATS_MIB_FRAGFAILS);
655
kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG);
656
return -EMSGSIZE;
657
}
658
659
if (skb_cow(skb, dev->hard_header_len)) {
660
__IP6_INC_STATS(net, ip6_dst_idev(dst),
661
IPSTATS_MIB_OUTDISCARDS);
662
goto drop;
663
}
664
665
hdr = ipv6_hdr(skb);
666
667
/* Mangling hops number delayed to point after skb COW */
668
669
hdr->hop_limit--;
670
671
return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
672
net, NULL, skb, skb->dev, dev,
673
ip6_forward_finish);
674
675
error:
676
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
677
SKB_DR_SET(reason, IP_INADDRERRORS);
678
drop:
679
kfree_skb_reason(skb, reason);
680
return -EINVAL;
681
}
682
683
static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
684
{
685
to->pkt_type = from->pkt_type;
686
to->priority = from->priority;
687
to->protocol = from->protocol;
688
skb_dst_drop(to);
689
skb_dst_set(to, dst_clone(skb_dst(from)));
690
to->dev = from->dev;
691
to->mark = from->mark;
692
693
skb_copy_hash(to, from);
694
695
#ifdef CONFIG_NET_SCHED
696
to->tc_index = from->tc_index;
697
#endif
698
nf_copy(to, from);
699
skb_ext_copy(to, from);
700
skb_copy_secmark(to, from);
701
}
702
703
int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
704
u8 nexthdr, __be32 frag_id,
705
struct ip6_fraglist_iter *iter)
706
{
707
unsigned int first_len;
708
struct frag_hdr *fh;
709
710
/* BUILD HEADER */
711
*prevhdr = NEXTHDR_FRAGMENT;
712
iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
713
if (!iter->tmp_hdr)
714
return -ENOMEM;
715
716
iter->frag = skb_shinfo(skb)->frag_list;
717
skb_frag_list_init(skb);
718
719
iter->offset = 0;
720
iter->hlen = hlen;
721
iter->frag_id = frag_id;
722
iter->nexthdr = nexthdr;
723
724
__skb_pull(skb, hlen);
725
fh = __skb_push(skb, sizeof(struct frag_hdr));
726
__skb_push(skb, hlen);
727
skb_reset_network_header(skb);
728
memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
729
730
fh->nexthdr = nexthdr;
731
fh->reserved = 0;
732
fh->frag_off = htons(IP6_MF);
733
fh->identification = frag_id;
734
735
first_len = skb_pagelen(skb);
736
skb->data_len = first_len - skb_headlen(skb);
737
skb->len = first_len;
738
ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
739
740
return 0;
741
}
742
EXPORT_SYMBOL(ip6_fraglist_init);
743
744
void ip6_fraglist_prepare(struct sk_buff *skb,
745
struct ip6_fraglist_iter *iter)
746
{
747
struct sk_buff *frag = iter->frag;
748
unsigned int hlen = iter->hlen;
749
struct frag_hdr *fh;
750
751
frag->ip_summed = CHECKSUM_NONE;
752
skb_reset_transport_header(frag);
753
fh = __skb_push(frag, sizeof(struct frag_hdr));
754
__skb_push(frag, hlen);
755
skb_reset_network_header(frag);
756
memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
757
iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
758
fh->nexthdr = iter->nexthdr;
759
fh->reserved = 0;
760
fh->frag_off = htons(iter->offset);
761
if (frag->next)
762
fh->frag_off |= htons(IP6_MF);
763
fh->identification = iter->frag_id;
764
ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
765
ip6_copy_metadata(frag, skb);
766
}
767
EXPORT_SYMBOL(ip6_fraglist_prepare);
768
769
void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
770
unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
771
u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
772
{
773
state->prevhdr = prevhdr;
774
state->nexthdr = nexthdr;
775
state->frag_id = frag_id;
776
777
state->hlen = hlen;
778
state->mtu = mtu;
779
780
state->left = skb->len - hlen; /* Space per frame */
781
state->ptr = hlen; /* Where to start from */
782
783
state->hroom = hdr_room;
784
state->troom = needed_tailroom;
785
786
state->offset = 0;
787
}
788
EXPORT_SYMBOL(ip6_frag_init);
789
790
struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
791
{
792
u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
793
struct sk_buff *frag;
794
struct frag_hdr *fh;
795
unsigned int len;
796
797
len = state->left;
798
/* IF: it doesn't fit, use 'mtu' - the data space left */
799
if (len > state->mtu)
800
len = state->mtu;
801
/* IF: we are not sending up to and including the packet end
802
then align the next start on an eight byte boundary */
803
if (len < state->left)
804
len &= ~7;
805
806
/* Allocate buffer */
807
frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
808
state->hroom + state->troom, GFP_ATOMIC);
809
if (!frag)
810
return ERR_PTR(-ENOMEM);
811
812
/*
813
* Set up data on packet
814
*/
815
816
ip6_copy_metadata(frag, skb);
817
skb_reserve(frag, state->hroom);
818
skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
819
skb_reset_network_header(frag);
820
fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
821
frag->transport_header = (frag->network_header + state->hlen +
822
sizeof(struct frag_hdr));
823
824
/*
825
* Charge the memory for the fragment to any owner
826
* it might possess
827
*/
828
if (skb->sk)
829
skb_set_owner_w(frag, skb->sk);
830
831
/*
832
* Copy the packet header into the new buffer.
833
*/
834
skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
835
836
fragnexthdr_offset = skb_network_header(frag);
837
fragnexthdr_offset += prevhdr - skb_network_header(skb);
838
*fragnexthdr_offset = NEXTHDR_FRAGMENT;
839
840
/*
841
* Build fragment header.
842
*/
843
fh->nexthdr = state->nexthdr;
844
fh->reserved = 0;
845
fh->identification = state->frag_id;
846
847
/*
848
* Copy a block of the IP datagram.
849
*/
850
BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
851
len));
852
state->left -= len;
853
854
fh->frag_off = htons(state->offset);
855
if (state->left > 0)
856
fh->frag_off |= htons(IP6_MF);
857
ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
858
859
state->ptr += len;
860
state->offset += len;
861
862
return frag;
863
}
864
EXPORT_SYMBOL(ip6_frag_next);
865
866
int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
867
int (*output)(struct net *, struct sock *, struct sk_buff *))
868
{
869
struct sk_buff *frag;
870
struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
871
struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
872
inet6_sk(skb->sk) : NULL;
873
u8 tstamp_type = skb->tstamp_type;
874
struct ip6_frag_state state;
875
unsigned int mtu, hlen, nexthdr_offset;
876
ktime_t tstamp = skb->tstamp;
877
int hroom, err = 0;
878
__be32 frag_id;
879
u8 *prevhdr, nexthdr = 0;
880
881
err = ip6_find_1stfragopt(skb, &prevhdr);
882
if (err < 0)
883
goto fail;
884
hlen = err;
885
nexthdr = *prevhdr;
886
nexthdr_offset = prevhdr - skb_network_header(skb);
887
888
mtu = ip6_skb_dst_mtu(skb);
889
890
/* We must not fragment if the socket is set to force MTU discovery
891
* or if the skb it not generated by a local socket.
892
*/
893
if (unlikely(!skb->ignore_df && skb->len > mtu))
894
goto fail_toobig;
895
896
if (IP6CB(skb)->frag_max_size) {
897
if (IP6CB(skb)->frag_max_size > mtu)
898
goto fail_toobig;
899
900
/* don't send fragments larger than what we received */
901
mtu = IP6CB(skb)->frag_max_size;
902
if (mtu < IPV6_MIN_MTU)
903
mtu = IPV6_MIN_MTU;
904
}
905
906
if (np) {
907
u32 frag_size = READ_ONCE(np->frag_size);
908
909
if (frag_size && frag_size < mtu)
910
mtu = frag_size;
911
}
912
if (mtu < hlen + sizeof(struct frag_hdr) + 8)
913
goto fail_toobig;
914
mtu -= hlen + sizeof(struct frag_hdr);
915
916
frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
917
&ipv6_hdr(skb)->saddr);
918
919
if (skb->ip_summed == CHECKSUM_PARTIAL &&
920
(err = skb_checksum_help(skb)))
921
goto fail;
922
923
prevhdr = skb_network_header(skb) + nexthdr_offset;
924
hroom = LL_RESERVED_SPACE(rt->dst.dev);
925
if (skb_has_frag_list(skb)) {
926
unsigned int first_len = skb_pagelen(skb);
927
struct ip6_fraglist_iter iter;
928
struct sk_buff *frag2;
929
930
if (first_len - hlen > mtu ||
931
((first_len - hlen) & 7) ||
932
skb_cloned(skb) ||
933
skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
934
goto slow_path;
935
936
skb_walk_frags(skb, frag) {
937
/* Correct geometry. */
938
if (frag->len > mtu ||
939
((frag->len & 7) && frag->next) ||
940
skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
941
goto slow_path_clean;
942
943
/* Partially cloned skb? */
944
if (skb_shared(frag))
945
goto slow_path_clean;
946
947
BUG_ON(frag->sk);
948
if (skb->sk) {
949
frag->sk = skb->sk;
950
frag->destructor = sock_wfree;
951
}
952
skb->truesize -= frag->truesize;
953
}
954
955
err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
956
&iter);
957
if (err < 0)
958
goto fail;
959
960
/* We prevent @rt from being freed. */
961
rcu_read_lock();
962
963
for (;;) {
964
/* Prepare header of the next frame,
965
* before previous one went down. */
966
if (iter.frag)
967
ip6_fraglist_prepare(skb, &iter);
968
969
skb_set_delivery_time(skb, tstamp, tstamp_type);
970
err = output(net, sk, skb);
971
if (!err)
972
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
973
IPSTATS_MIB_FRAGCREATES);
974
975
if (err || !iter.frag)
976
break;
977
978
skb = ip6_fraglist_next(&iter);
979
}
980
981
kfree(iter.tmp_hdr);
982
983
if (err == 0) {
984
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
985
IPSTATS_MIB_FRAGOKS);
986
rcu_read_unlock();
987
return 0;
988
}
989
990
kfree_skb_list(iter.frag);
991
992
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
993
IPSTATS_MIB_FRAGFAILS);
994
rcu_read_unlock();
995
return err;
996
997
slow_path_clean:
998
skb_walk_frags(skb, frag2) {
999
if (frag2 == frag)
1000
break;
1001
frag2->sk = NULL;
1002
frag2->destructor = NULL;
1003
skb->truesize += frag2->truesize;
1004
}
1005
}
1006
1007
slow_path:
1008
/*
1009
* Fragment the datagram.
1010
*/
1011
1012
ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
1013
LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
1014
&state);
1015
1016
/*
1017
* Keep copying data until we run out.
1018
*/
1019
1020
while (state.left > 0) {
1021
frag = ip6_frag_next(skb, &state);
1022
if (IS_ERR(frag)) {
1023
err = PTR_ERR(frag);
1024
goto fail;
1025
}
1026
1027
/*
1028
* Put this fragment into the sending queue.
1029
*/
1030
skb_set_delivery_time(frag, tstamp, tstamp_type);
1031
err = output(net, sk, frag);
1032
if (err)
1033
goto fail;
1034
1035
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1036
IPSTATS_MIB_FRAGCREATES);
1037
}
1038
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1039
IPSTATS_MIB_FRAGOKS);
1040
consume_skb(skb);
1041
return err;
1042
1043
fail_toobig:
1044
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1045
err = -EMSGSIZE;
1046
1047
fail:
1048
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1049
IPSTATS_MIB_FRAGFAILS);
1050
kfree_skb(skb);
1051
return err;
1052
}
1053
1054
static inline int ip6_rt_check(const struct rt6key *rt_key,
1055
const struct in6_addr *fl_addr,
1056
const struct in6_addr *addr_cache)
1057
{
1058
return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
1059
(!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
1060
}
1061
1062
static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
1063
struct dst_entry *dst,
1064
const struct flowi6 *fl6)
1065
{
1066
struct ipv6_pinfo *np = inet6_sk(sk);
1067
struct rt6_info *rt;
1068
1069
if (!dst)
1070
goto out;
1071
1072
if (dst->ops->family != AF_INET6) {
1073
dst_release(dst);
1074
return NULL;
1075
}
1076
1077
rt = dst_rt6_info(dst);
1078
/* Yes, checking route validity in not connected
1079
* case is not very simple. Take into account,
1080
* that we do not support routing by source, TOS,
1081
* and MSG_DONTROUTE --ANK (980726)
1082
*
1083
* 1. ip6_rt_check(): If route was host route,
1084
* check that cached destination is current.
1085
* If it is network route, we still may
1086
* check its validity using saved pointer
1087
* to the last used address: daddr_cache.
1088
* We do not want to save whole address now,
1089
* (because main consumer of this service
1090
* is tcp, which has not this problem),
1091
* so that the last trick works only on connected
1092
* sockets.
1093
* 2. oif also should be the same.
1094
*/
1095
if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
1096
#ifdef CONFIG_IPV6_SUBTREES
1097
ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
1098
#endif
1099
(fl6->flowi6_oif && fl6->flowi6_oif != dst_dev(dst)->ifindex)) {
1100
dst_release(dst);
1101
dst = NULL;
1102
}
1103
1104
out:
1105
return dst;
1106
}
1107
1108
static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
1109
struct dst_entry **dst, struct flowi6 *fl6)
1110
{
1111
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1112
struct neighbour *n;
1113
struct rt6_info *rt;
1114
#endif
1115
int err;
1116
int flags = 0;
1117
1118
/* The correct way to handle this would be to do
1119
* ip6_route_get_saddr, and then ip6_route_output; however,
1120
* the route-specific preferred source forces the
1121
* ip6_route_output call _before_ ip6_route_get_saddr.
1122
*
1123
* In source specific routing (no src=any default route),
1124
* ip6_route_output will fail given src=any saddr, though, so
1125
* that's why we try it again later.
1126
*/
1127
if (ipv6_addr_any(&fl6->saddr)) {
1128
struct fib6_info *from;
1129
struct rt6_info *rt;
1130
1131
*dst = ip6_route_output(net, sk, fl6);
1132
rt = (*dst)->error ? NULL : dst_rt6_info(*dst);
1133
1134
rcu_read_lock();
1135
from = rt ? rcu_dereference(rt->from) : NULL;
1136
err = ip6_route_get_saddr(net, from, &fl6->daddr,
1137
sk ? READ_ONCE(inet6_sk(sk)->srcprefs) : 0,
1138
fl6->flowi6_l3mdev,
1139
&fl6->saddr);
1140
rcu_read_unlock();
1141
1142
if (err)
1143
goto out_err_release;
1144
1145
/* If we had an erroneous initial result, pretend it
1146
* never existed and let the SA-enabled version take
1147
* over.
1148
*/
1149
if ((*dst)->error) {
1150
dst_release(*dst);
1151
*dst = NULL;
1152
}
1153
1154
if (fl6->flowi6_oif)
1155
flags |= RT6_LOOKUP_F_IFACE;
1156
}
1157
1158
if (!*dst)
1159
*dst = ip6_route_output_flags(net, sk, fl6, flags);
1160
1161
err = (*dst)->error;
1162
if (err)
1163
goto out_err_release;
1164
1165
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1166
/*
1167
* Here if the dst entry we've looked up
1168
* has a neighbour entry that is in the INCOMPLETE
1169
* state and the src address from the flow is
1170
* marked as OPTIMISTIC, we release the found
1171
* dst entry and replace it instead with the
1172
* dst entry of the nexthop router
1173
*/
1174
rt = dst_rt6_info(*dst);
1175
rcu_read_lock();
1176
n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1177
rt6_nexthop(rt, &fl6->daddr));
1178
err = n && !(READ_ONCE(n->nud_state) & NUD_VALID) ? -EINVAL : 0;
1179
rcu_read_unlock();
1180
1181
if (err) {
1182
struct inet6_ifaddr *ifp;
1183
struct flowi6 fl_gw6;
1184
int redirect;
1185
1186
ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1187
(*dst)->dev, 1);
1188
1189
redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1190
if (ifp)
1191
in6_ifa_put(ifp);
1192
1193
if (redirect) {
1194
/*
1195
* We need to get the dst entry for the
1196
* default router instead
1197
*/
1198
dst_release(*dst);
1199
memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1200
memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1201
*dst = ip6_route_output(net, sk, &fl_gw6);
1202
err = (*dst)->error;
1203
if (err)
1204
goto out_err_release;
1205
}
1206
}
1207
#endif
1208
if (ipv6_addr_v4mapped(&fl6->saddr) &&
1209
!(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1210
err = -EAFNOSUPPORT;
1211
goto out_err_release;
1212
}
1213
1214
return 0;
1215
1216
out_err_release:
1217
dst_release(*dst);
1218
*dst = NULL;
1219
1220
if (err == -ENETUNREACH)
1221
IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1222
return err;
1223
}
1224
1225
/**
1226
* ip6_dst_lookup - perform route lookup on flow
1227
* @net: Network namespace to perform lookup in
1228
* @sk: socket which provides route info
1229
* @dst: pointer to dst_entry * for result
1230
* @fl6: flow to lookup
1231
*
1232
* This function performs a route lookup on the given flow.
1233
*
1234
* It returns zero on success, or a standard errno code on error.
1235
*/
1236
int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1237
struct flowi6 *fl6)
1238
{
1239
*dst = NULL;
1240
return ip6_dst_lookup_tail(net, sk, dst, fl6);
1241
}
1242
EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1243
1244
/**
1245
* ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1246
* @net: Network namespace to perform lookup in
1247
* @sk: socket which provides route info
1248
* @fl6: flow to lookup
1249
* @final_dst: final destination address for ipsec lookup
1250
*
1251
* This function performs a route lookup on the given flow.
1252
*
1253
* It returns a valid dst pointer on success, or a pointer encoded
1254
* error code.
1255
*/
1256
struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
1257
const struct in6_addr *final_dst)
1258
{
1259
struct dst_entry *dst = NULL;
1260
int err;
1261
1262
err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
1263
if (err)
1264
return ERR_PTR(err);
1265
if (final_dst)
1266
fl6->daddr = *final_dst;
1267
1268
return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
1269
}
1270
EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1271
1272
/**
1273
* ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1274
* @sk: socket which provides the dst cache and route info
1275
* @fl6: flow to lookup
1276
* @final_dst: final destination address for ipsec lookup
1277
* @connected: whether @sk is connected or not
1278
*
1279
* This function performs a route lookup on the given flow with the
1280
* possibility of using the cached route in the socket if it is valid.
1281
* It will take the socket dst lock when operating on the dst cache.
1282
* As a result, this function can only be used in process context.
1283
*
1284
* In addition, for a connected socket, cache the dst in the socket
1285
* if the current cache is not valid.
1286
*
1287
* It returns a valid dst pointer on success, or a pointer encoded
1288
* error code.
1289
*/
1290
struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1291
const struct in6_addr *final_dst,
1292
bool connected)
1293
{
1294
struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1295
1296
dst = ip6_sk_dst_check(sk, dst, fl6);
1297
if (dst)
1298
return dst;
1299
1300
dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
1301
if (connected && !IS_ERR(dst))
1302
ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1303
1304
return dst;
1305
}
1306
EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1307
1308
static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1309
gfp_t gfp)
1310
{
1311
return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1312
}
1313
1314
static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1315
gfp_t gfp)
1316
{
1317
return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1318
}
1319
1320
static void ip6_append_data_mtu(unsigned int *mtu,
1321
int *maxfraglen,
1322
unsigned int fragheaderlen,
1323
struct sk_buff *skb,
1324
struct rt6_info *rt,
1325
unsigned int orig_mtu)
1326
{
1327
if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1328
if (!skb) {
1329
/* first fragment, reserve header_len */
1330
*mtu = orig_mtu - rt->dst.header_len;
1331
1332
} else {
1333
/*
1334
* this fragment is not first, the headers
1335
* space is regarded as data space.
1336
*/
1337
*mtu = orig_mtu;
1338
}
1339
*maxfraglen = ((*mtu - fragheaderlen) & ~7)
1340
+ fragheaderlen - sizeof(struct frag_hdr);
1341
}
1342
}
1343
1344
static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1345
struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1346
struct rt6_info *rt)
1347
{
1348
struct ipv6_pinfo *np = inet6_sk(sk);
1349
unsigned int mtu, frag_size;
1350
struct ipv6_txoptions *nopt, *opt = ipc6->opt;
1351
1352
/* callers pass dst together with a reference, set it first so
1353
* ip6_cork_release() can put it down even in case of an error.
1354
*/
1355
cork->base.dst = &rt->dst;
1356
1357
/*
1358
* setup for corking
1359
*/
1360
if (opt) {
1361
if (WARN_ON(v6_cork->opt))
1362
return -EINVAL;
1363
1364
nopt = v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1365
if (unlikely(!nopt))
1366
return -ENOBUFS;
1367
1368
nopt->tot_len = sizeof(*opt);
1369
nopt->opt_flen = opt->opt_flen;
1370
nopt->opt_nflen = opt->opt_nflen;
1371
1372
nopt->dst0opt = ip6_opt_dup(opt->dst0opt, sk->sk_allocation);
1373
if (opt->dst0opt && !nopt->dst0opt)
1374
return -ENOBUFS;
1375
1376
nopt->dst1opt = ip6_opt_dup(opt->dst1opt, sk->sk_allocation);
1377
if (opt->dst1opt && !nopt->dst1opt)
1378
return -ENOBUFS;
1379
1380
nopt->hopopt = ip6_opt_dup(opt->hopopt, sk->sk_allocation);
1381
if (opt->hopopt && !nopt->hopopt)
1382
return -ENOBUFS;
1383
1384
nopt->srcrt = ip6_rthdr_dup(opt->srcrt, sk->sk_allocation);
1385
if (opt->srcrt && !nopt->srcrt)
1386
return -ENOBUFS;
1387
1388
/* need source address above miyazawa*/
1389
}
1390
v6_cork->hop_limit = ipc6->hlimit;
1391
v6_cork->tclass = ipc6->tclass;
1392
v6_cork->dontfrag = ipc6->dontfrag;
1393
if (rt->dst.flags & DST_XFRM_TUNNEL)
1394
mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ?
1395
READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1396
else
1397
mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ?
1398
READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1399
1400
frag_size = READ_ONCE(np->frag_size);
1401
if (frag_size && frag_size < mtu)
1402
mtu = frag_size;
1403
1404
cork->base.fragsize = mtu;
1405
cork->base.gso_size = ipc6->gso_size;
1406
cork->base.tx_flags = 0;
1407
cork->base.mark = ipc6->sockc.mark;
1408
cork->base.priority = ipc6->sockc.priority;
1409
sock_tx_timestamp(sk, &ipc6->sockc, &cork->base.tx_flags);
1410
if (ipc6->sockc.tsflags & SOCKCM_FLAG_TS_OPT_ID) {
1411
cork->base.flags |= IPCORK_TS_OPT_ID;
1412
cork->base.ts_opt_id = ipc6->sockc.ts_opt_id;
1413
}
1414
cork->base.length = 0;
1415
cork->base.transmit_time = ipc6->sockc.transmit_time;
1416
1417
return 0;
1418
}
1419
1420
static int __ip6_append_data(struct sock *sk,
1421
struct sk_buff_head *queue,
1422
struct inet_cork_full *cork_full,
1423
struct inet6_cork *v6_cork,
1424
struct page_frag *pfrag,
1425
int getfrag(void *from, char *to, int offset,
1426
int len, int odd, struct sk_buff *skb),
1427
void *from, size_t length, int transhdrlen,
1428
unsigned int flags)
1429
{
1430
struct sk_buff *skb, *skb_prev = NULL;
1431
struct inet_cork *cork = &cork_full->base;
1432
struct flowi6 *fl6 = &cork_full->fl.u.ip6;
1433
unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1434
struct ubuf_info *uarg = NULL;
1435
int exthdrlen = 0;
1436
int dst_exthdrlen = 0;
1437
int hh_len;
1438
int copy;
1439
int err;
1440
int offset = 0;
1441
bool zc = false;
1442
u32 tskey = 0;
1443
struct rt6_info *rt = dst_rt6_info(cork->dst);
1444
bool paged, hold_tskey = false, extra_uref = false;
1445
struct ipv6_txoptions *opt = v6_cork->opt;
1446
int csummode = CHECKSUM_NONE;
1447
unsigned int maxnonfragsize, headersize;
1448
unsigned int wmem_alloc_delta = 0;
1449
1450
skb = skb_peek_tail(queue);
1451
if (!skb) {
1452
exthdrlen = opt ? opt->opt_flen : 0;
1453
dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1454
}
1455
1456
paged = !!cork->gso_size;
1457
mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1458
orig_mtu = mtu;
1459
1460
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1461
1462
fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1463
(opt ? opt->opt_nflen : 0);
1464
1465
headersize = sizeof(struct ipv6hdr) +
1466
(opt ? opt->opt_flen + opt->opt_nflen : 0) +
1467
rt->rt6i_nfheader_len;
1468
1469
if (mtu <= fragheaderlen ||
1470
((mtu - fragheaderlen) & ~7) + fragheaderlen <= sizeof(struct frag_hdr))
1471
goto emsgsize;
1472
1473
maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1474
sizeof(struct frag_hdr);
1475
1476
/* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1477
* the first fragment
1478
*/
1479
if (headersize + transhdrlen > mtu)
1480
goto emsgsize;
1481
1482
if (cork->length + length > mtu - headersize && v6_cork->dontfrag &&
1483
(sk->sk_protocol == IPPROTO_UDP ||
1484
sk->sk_protocol == IPPROTO_ICMPV6 ||
1485
sk->sk_protocol == IPPROTO_RAW)) {
1486
ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1487
sizeof(struct ipv6hdr));
1488
goto emsgsize;
1489
}
1490
1491
if (ip6_sk_ignore_df(sk))
1492
maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1493
else
1494
maxnonfragsize = mtu;
1495
1496
if (cork->length + length > maxnonfragsize - headersize) {
1497
emsgsize:
1498
pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1499
ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1500
return -EMSGSIZE;
1501
}
1502
1503
/* CHECKSUM_PARTIAL only with no extension headers and when
1504
* we are not going to fragment
1505
*/
1506
if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1507
headersize == sizeof(struct ipv6hdr) &&
1508
length <= mtu - headersize &&
1509
(!(flags & MSG_MORE) || cork->gso_size) &&
1510
rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1511
csummode = CHECKSUM_PARTIAL;
1512
1513
if ((flags & MSG_ZEROCOPY) && length) {
1514
struct msghdr *msg = from;
1515
1516
if (getfrag == ip_generic_getfrag && msg->msg_ubuf) {
1517
if (skb_zcopy(skb) && msg->msg_ubuf != skb_zcopy(skb))
1518
return -EINVAL;
1519
1520
/* Leave uarg NULL if can't zerocopy, callers should
1521
* be able to handle it.
1522
*/
1523
if ((rt->dst.dev->features & NETIF_F_SG) &&
1524
csummode == CHECKSUM_PARTIAL) {
1525
paged = true;
1526
zc = true;
1527
uarg = msg->msg_ubuf;
1528
}
1529
} else if (sock_flag(sk, SOCK_ZEROCOPY)) {
1530
uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb),
1531
false);
1532
if (!uarg)
1533
return -ENOBUFS;
1534
extra_uref = !skb_zcopy(skb); /* only ref on new uarg */
1535
if (rt->dst.dev->features & NETIF_F_SG &&
1536
csummode == CHECKSUM_PARTIAL) {
1537
paged = true;
1538
zc = true;
1539
} else {
1540
uarg_to_msgzc(uarg)->zerocopy = 0;
1541
skb_zcopy_set(skb, uarg, &extra_uref);
1542
}
1543
}
1544
} else if ((flags & MSG_SPLICE_PAGES) && length) {
1545
if (inet_test_bit(HDRINCL, sk))
1546
return -EPERM;
1547
if (rt->dst.dev->features & NETIF_F_SG &&
1548
getfrag == ip_generic_getfrag)
1549
/* We need an empty buffer to attach stuff to */
1550
paged = true;
1551
else
1552
flags &= ~MSG_SPLICE_PAGES;
1553
}
1554
1555
if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
1556
READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) {
1557
if (cork->flags & IPCORK_TS_OPT_ID) {
1558
tskey = cork->ts_opt_id;
1559
} else {
1560
tskey = atomic_inc_return(&sk->sk_tskey) - 1;
1561
hold_tskey = true;
1562
}
1563
}
1564
1565
/*
1566
* Let's try using as much space as possible.
1567
* Use MTU if total length of the message fits into the MTU.
1568
* Otherwise, we need to reserve fragment header and
1569
* fragment alignment (= 8-15 octects, in total).
1570
*
1571
* Note that we may need to "move" the data from the tail
1572
* of the buffer to the new fragment when we split
1573
* the message.
1574
*
1575
* FIXME: It may be fragmented into multiple chunks
1576
* at once if non-fragmentable extension headers
1577
* are too large.
1578
* --yoshfuji
1579
*/
1580
1581
cork->length += length;
1582
if (!skb)
1583
goto alloc_new_skb;
1584
1585
while (length > 0) {
1586
/* Check if the remaining data fits into current packet. */
1587
copy = (cork->length <= mtu ? mtu : maxfraglen) - skb->len;
1588
if (copy < length)
1589
copy = maxfraglen - skb->len;
1590
1591
if (copy <= 0) {
1592
char *data;
1593
unsigned int datalen;
1594
unsigned int fraglen;
1595
unsigned int fraggap;
1596
unsigned int alloclen, alloc_extra;
1597
unsigned int pagedlen;
1598
alloc_new_skb:
1599
/* There's no room in the current skb */
1600
if (skb)
1601
fraggap = skb->len - maxfraglen;
1602
else
1603
fraggap = 0;
1604
/* update mtu and maxfraglen if necessary */
1605
if (!skb || !skb_prev)
1606
ip6_append_data_mtu(&mtu, &maxfraglen,
1607
fragheaderlen, skb, rt,
1608
orig_mtu);
1609
1610
skb_prev = skb;
1611
1612
/*
1613
* If remaining data exceeds the mtu,
1614
* we know we need more fragment(s).
1615
*/
1616
datalen = length + fraggap;
1617
1618
if (datalen > (cork->length <= mtu ? mtu : maxfraglen) - fragheaderlen)
1619
datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1620
fraglen = datalen + fragheaderlen;
1621
pagedlen = 0;
1622
1623
alloc_extra = hh_len;
1624
alloc_extra += dst_exthdrlen;
1625
alloc_extra += rt->dst.trailer_len;
1626
1627
/* We just reserve space for fragment header.
1628
* Note: this may be overallocation if the message
1629
* (without MSG_MORE) fits into the MTU.
1630
*/
1631
alloc_extra += sizeof(struct frag_hdr);
1632
1633
if ((flags & MSG_MORE) &&
1634
!(rt->dst.dev->features&NETIF_F_SG))
1635
alloclen = mtu;
1636
else if (!paged &&
1637
(fraglen + alloc_extra < SKB_MAX_ALLOC ||
1638
!(rt->dst.dev->features & NETIF_F_SG)))
1639
alloclen = fraglen;
1640
else {
1641
alloclen = fragheaderlen + transhdrlen;
1642
pagedlen = datalen - transhdrlen;
1643
}
1644
alloclen += alloc_extra;
1645
1646
if (datalen != length + fraggap) {
1647
/*
1648
* this is not the last fragment, the trailer
1649
* space is regarded as data space.
1650
*/
1651
datalen += rt->dst.trailer_len;
1652
}
1653
1654
fraglen = datalen + fragheaderlen;
1655
1656
copy = datalen - transhdrlen - fraggap - pagedlen;
1657
/* [!] NOTE: copy may be negative if pagedlen>0
1658
* because then the equation may reduces to -fraggap.
1659
*/
1660
if (copy < 0 && !(flags & MSG_SPLICE_PAGES)) {
1661
err = -EINVAL;
1662
goto error;
1663
}
1664
if (transhdrlen) {
1665
skb = sock_alloc_send_skb(sk, alloclen,
1666
(flags & MSG_DONTWAIT), &err);
1667
} else {
1668
skb = NULL;
1669
if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1670
2 * sk->sk_sndbuf)
1671
skb = alloc_skb(alloclen,
1672
sk->sk_allocation);
1673
if (unlikely(!skb))
1674
err = -ENOBUFS;
1675
}
1676
if (!skb)
1677
goto error;
1678
/*
1679
* Fill in the control structures
1680
*/
1681
skb->protocol = htons(ETH_P_IPV6);
1682
skb->ip_summed = csummode;
1683
skb->csum = 0;
1684
/* reserve for fragmentation and ipsec header */
1685
skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1686
dst_exthdrlen);
1687
1688
/*
1689
* Find where to start putting bytes
1690
*/
1691
data = skb_put(skb, fraglen - pagedlen);
1692
skb_set_network_header(skb, exthdrlen);
1693
data += fragheaderlen;
1694
skb->transport_header = (skb->network_header +
1695
fragheaderlen);
1696
if (fraggap) {
1697
skb->csum = skb_copy_and_csum_bits(
1698
skb_prev, maxfraglen,
1699
data + transhdrlen, fraggap);
1700
skb_prev->csum = csum_sub(skb_prev->csum,
1701
skb->csum);
1702
data += fraggap;
1703
pskb_trim_unique(skb_prev, maxfraglen);
1704
}
1705
if (copy > 0 &&
1706
INDIRECT_CALL_1(getfrag, ip_generic_getfrag,
1707
from, data + transhdrlen, offset,
1708
copy, fraggap, skb) < 0) {
1709
err = -EFAULT;
1710
kfree_skb(skb);
1711
goto error;
1712
} else if (flags & MSG_SPLICE_PAGES) {
1713
copy = 0;
1714
}
1715
1716
offset += copy;
1717
length -= copy + transhdrlen;
1718
transhdrlen = 0;
1719
exthdrlen = 0;
1720
dst_exthdrlen = 0;
1721
1722
/* Only the initial fragment is time stamped */
1723
skb_shinfo(skb)->tx_flags = cork->tx_flags;
1724
cork->tx_flags = 0;
1725
skb_shinfo(skb)->tskey = tskey;
1726
tskey = 0;
1727
skb_zcopy_set(skb, uarg, &extra_uref);
1728
1729
if ((flags & MSG_CONFIRM) && !skb_prev)
1730
skb_set_dst_pending_confirm(skb, 1);
1731
1732
/*
1733
* Put the packet on the pending queue
1734
*/
1735
if (!skb->destructor) {
1736
skb->destructor = sock_wfree;
1737
skb->sk = sk;
1738
wmem_alloc_delta += skb->truesize;
1739
}
1740
__skb_queue_tail(queue, skb);
1741
continue;
1742
}
1743
1744
if (copy > length)
1745
copy = length;
1746
1747
if (!(rt->dst.dev->features&NETIF_F_SG) &&
1748
skb_tailroom(skb) >= copy) {
1749
unsigned int off;
1750
1751
off = skb->len;
1752
if (INDIRECT_CALL_1(getfrag, ip_generic_getfrag,
1753
from, skb_put(skb, copy),
1754
offset, copy, off, skb) < 0) {
1755
__skb_trim(skb, off);
1756
err = -EFAULT;
1757
goto error;
1758
}
1759
} else if (flags & MSG_SPLICE_PAGES) {
1760
struct msghdr *msg = from;
1761
1762
err = -EIO;
1763
if (WARN_ON_ONCE(copy > msg->msg_iter.count))
1764
goto error;
1765
1766
err = skb_splice_from_iter(skb, &msg->msg_iter, copy);
1767
if (err < 0)
1768
goto error;
1769
copy = err;
1770
wmem_alloc_delta += copy;
1771
} else if (!zc) {
1772
int i = skb_shinfo(skb)->nr_frags;
1773
1774
err = -ENOMEM;
1775
if (!sk_page_frag_refill(sk, pfrag))
1776
goto error;
1777
1778
skb_zcopy_downgrade_managed(skb);
1779
if (!skb_can_coalesce(skb, i, pfrag->page,
1780
pfrag->offset)) {
1781
err = -EMSGSIZE;
1782
if (i == MAX_SKB_FRAGS)
1783
goto error;
1784
1785
__skb_fill_page_desc(skb, i, pfrag->page,
1786
pfrag->offset, 0);
1787
skb_shinfo(skb)->nr_frags = ++i;
1788
get_page(pfrag->page);
1789
}
1790
copy = min_t(int, copy, pfrag->size - pfrag->offset);
1791
if (INDIRECT_CALL_1(getfrag, ip_generic_getfrag,
1792
from,
1793
page_address(pfrag->page) + pfrag->offset,
1794
offset, copy, skb->len, skb) < 0)
1795
goto error_efault;
1796
1797
pfrag->offset += copy;
1798
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1799
skb->len += copy;
1800
skb->data_len += copy;
1801
skb->truesize += copy;
1802
wmem_alloc_delta += copy;
1803
} else {
1804
err = skb_zerocopy_iter_dgram(skb, from, copy);
1805
if (err < 0)
1806
goto error;
1807
}
1808
offset += copy;
1809
length -= copy;
1810
}
1811
1812
if (wmem_alloc_delta)
1813
refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1814
return 0;
1815
1816
error_efault:
1817
err = -EFAULT;
1818
error:
1819
net_zcopy_put_abort(uarg, extra_uref);
1820
cork->length -= length;
1821
IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1822
refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1823
if (hold_tskey)
1824
atomic_dec(&sk->sk_tskey);
1825
return err;
1826
}
1827
1828
int ip6_append_data(struct sock *sk,
1829
int getfrag(void *from, char *to, int offset, int len,
1830
int odd, struct sk_buff *skb),
1831
void *from, size_t length, int transhdrlen,
1832
struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1833
struct rt6_info *rt, unsigned int flags)
1834
{
1835
struct inet_sock *inet = inet_sk(sk);
1836
struct ipv6_pinfo *np = inet6_sk(sk);
1837
int exthdrlen;
1838
int err;
1839
1840
if (flags&MSG_PROBE)
1841
return 0;
1842
if (skb_queue_empty(&sk->sk_write_queue)) {
1843
/*
1844
* setup for corking
1845
*/
1846
dst_hold(&rt->dst);
1847
err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1848
ipc6, rt);
1849
if (err)
1850
return err;
1851
1852
inet->cork.fl.u.ip6 = *fl6;
1853
exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1854
length += exthdrlen;
1855
transhdrlen += exthdrlen;
1856
} else {
1857
transhdrlen = 0;
1858
}
1859
1860
return __ip6_append_data(sk, &sk->sk_write_queue, &inet->cork,
1861
&np->cork, sk_page_frag(sk), getfrag,
1862
from, length, transhdrlen, flags);
1863
}
1864
EXPORT_SYMBOL_GPL(ip6_append_data);
1865
1866
static void ip6_cork_steal_dst(struct sk_buff *skb, struct inet_cork_full *cork)
1867
{
1868
struct dst_entry *dst = cork->base.dst;
1869
1870
cork->base.dst = NULL;
1871
skb_dst_set(skb, dst);
1872
}
1873
1874
static void ip6_cork_release(struct inet_cork_full *cork,
1875
struct inet6_cork *v6_cork)
1876
{
1877
if (v6_cork->opt) {
1878
struct ipv6_txoptions *opt = v6_cork->opt;
1879
1880
kfree(opt->dst0opt);
1881
kfree(opt->dst1opt);
1882
kfree(opt->hopopt);
1883
kfree(opt->srcrt);
1884
kfree(opt);
1885
v6_cork->opt = NULL;
1886
}
1887
1888
if (cork->base.dst) {
1889
dst_release(cork->base.dst);
1890
cork->base.dst = NULL;
1891
}
1892
}
1893
1894
struct sk_buff *__ip6_make_skb(struct sock *sk,
1895
struct sk_buff_head *queue,
1896
struct inet_cork_full *cork,
1897
struct inet6_cork *v6_cork)
1898
{
1899
struct sk_buff *skb, *tmp_skb;
1900
struct sk_buff **tail_skb;
1901
struct in6_addr *final_dst;
1902
struct net *net = sock_net(sk);
1903
struct ipv6hdr *hdr;
1904
struct ipv6_txoptions *opt = v6_cork->opt;
1905
struct rt6_info *rt = dst_rt6_info(cork->base.dst);
1906
struct flowi6 *fl6 = &cork->fl.u.ip6;
1907
unsigned char proto = fl6->flowi6_proto;
1908
1909
skb = __skb_dequeue(queue);
1910
if (!skb)
1911
goto out;
1912
tail_skb = &(skb_shinfo(skb)->frag_list);
1913
1914
/* move skb->data to ip header from ext header */
1915
if (skb->data < skb_network_header(skb))
1916
__skb_pull(skb, skb_network_offset(skb));
1917
while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1918
__skb_pull(tmp_skb, skb_network_header_len(skb));
1919
*tail_skb = tmp_skb;
1920
tail_skb = &(tmp_skb->next);
1921
skb->len += tmp_skb->len;
1922
skb->data_len += tmp_skb->len;
1923
skb->truesize += tmp_skb->truesize;
1924
tmp_skb->destructor = NULL;
1925
tmp_skb->sk = NULL;
1926
}
1927
1928
/* Allow local fragmentation. */
1929
skb->ignore_df = ip6_sk_ignore_df(sk);
1930
__skb_pull(skb, skb_network_header_len(skb));
1931
1932
final_dst = &fl6->daddr;
1933
if (opt && opt->opt_flen)
1934
ipv6_push_frag_opts(skb, opt, &proto);
1935
if (opt && opt->opt_nflen)
1936
ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1937
1938
skb_push(skb, sizeof(struct ipv6hdr));
1939
skb_reset_network_header(skb);
1940
hdr = ipv6_hdr(skb);
1941
1942
ip6_flow_hdr(hdr, v6_cork->tclass,
1943
ip6_make_flowlabel(net, skb, fl6->flowlabel,
1944
ip6_autoflowlabel(net, sk), fl6));
1945
hdr->hop_limit = v6_cork->hop_limit;
1946
hdr->nexthdr = proto;
1947
hdr->saddr = fl6->saddr;
1948
hdr->daddr = *final_dst;
1949
1950
skb->priority = cork->base.priority;
1951
skb->mark = cork->base.mark;
1952
if (sk_is_tcp(sk))
1953
skb_set_delivery_time(skb, cork->base.transmit_time, SKB_CLOCK_MONOTONIC);
1954
else
1955
skb_set_delivery_type_by_clockid(skb, cork->base.transmit_time, sk->sk_clockid);
1956
1957
ip6_cork_steal_dst(skb, cork);
1958
IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
1959
if (proto == IPPROTO_ICMPV6) {
1960
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1961
u8 icmp6_type;
1962
1963
if (sk->sk_socket->type == SOCK_RAW &&
1964
!(fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH))
1965
icmp6_type = fl6->fl6_icmp_type;
1966
else
1967
icmp6_type = icmp6_hdr(skb)->icmp6_type;
1968
ICMP6MSGOUT_INC_STATS(net, idev, icmp6_type);
1969
ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1970
}
1971
1972
ip6_cork_release(cork, v6_cork);
1973
out:
1974
return skb;
1975
}
1976
1977
int ip6_send_skb(struct sk_buff *skb)
1978
{
1979
struct net *net = sock_net(skb->sk);
1980
struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
1981
int err;
1982
1983
rcu_read_lock();
1984
err = ip6_local_out(net, skb->sk, skb);
1985
if (err) {
1986
if (err > 0)
1987
err = net_xmit_errno(err);
1988
if (err)
1989
IP6_INC_STATS(net, rt->rt6i_idev,
1990
IPSTATS_MIB_OUTDISCARDS);
1991
}
1992
1993
rcu_read_unlock();
1994
return err;
1995
}
1996
1997
int ip6_push_pending_frames(struct sock *sk)
1998
{
1999
struct sk_buff *skb;
2000
2001
skb = ip6_finish_skb(sk);
2002
if (!skb)
2003
return 0;
2004
2005
return ip6_send_skb(skb);
2006
}
2007
EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
2008
2009
static void __ip6_flush_pending_frames(struct sock *sk,
2010
struct sk_buff_head *queue,
2011
struct inet_cork_full *cork,
2012
struct inet6_cork *v6_cork)
2013
{
2014
struct sk_buff *skb;
2015
2016
while ((skb = __skb_dequeue_tail(queue)) != NULL) {
2017
if (skb_dst(skb))
2018
IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
2019
IPSTATS_MIB_OUTDISCARDS);
2020
kfree_skb(skb);
2021
}
2022
2023
ip6_cork_release(cork, v6_cork);
2024
}
2025
2026
void ip6_flush_pending_frames(struct sock *sk)
2027
{
2028
__ip6_flush_pending_frames(sk, &sk->sk_write_queue,
2029
&inet_sk(sk)->cork, &inet6_sk(sk)->cork);
2030
}
2031
EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
2032
2033
struct sk_buff *ip6_make_skb(struct sock *sk,
2034
int getfrag(void *from, char *to, int offset,
2035
int len, int odd, struct sk_buff *skb),
2036
void *from, size_t length, int transhdrlen,
2037
struct ipcm6_cookie *ipc6, struct rt6_info *rt,
2038
unsigned int flags, struct inet_cork_full *cork)
2039
{
2040
struct inet6_cork v6_cork;
2041
struct sk_buff_head queue;
2042
int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
2043
int err;
2044
2045
if (flags & MSG_PROBE) {
2046
dst_release(&rt->dst);
2047
return NULL;
2048
}
2049
2050
__skb_queue_head_init(&queue);
2051
2052
cork->base.flags = 0;
2053
cork->base.addr = 0;
2054
cork->base.opt = NULL;
2055
v6_cork.opt = NULL;
2056
err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt);
2057
if (err) {
2058
ip6_cork_release(cork, &v6_cork);
2059
return ERR_PTR(err);
2060
}
2061
2062
err = __ip6_append_data(sk, &queue, cork, &v6_cork,
2063
&current->task_frag, getfrag, from,
2064
length + exthdrlen, transhdrlen + exthdrlen,
2065
flags);
2066
if (err) {
2067
__ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
2068
return ERR_PTR(err);
2069
}
2070
2071
return __ip6_make_skb(sk, &queue, cork, &v6_cork);
2072
}
2073
2074