Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/net/ipv6/icmp.c
26285 views
1
// SPDX-License-Identifier: GPL-2.0-or-later
2
/*
3
* Internet Control Message Protocol (ICMPv6)
4
* Linux INET6 implementation
5
*
6
* Authors:
7
* Pedro Roque <[email protected]>
8
*
9
* Based on net/ipv4/icmp.c
10
*
11
* RFC 1885
12
*/
13
14
/*
15
* Changes:
16
*
17
* Andi Kleen : exception handling
18
* Andi Kleen add rate limits. never reply to a icmp.
19
* add more length checks and other fixes.
20
* yoshfuji : ensure to sent parameter problem for
21
* fragments.
22
* YOSHIFUJI Hideaki @USAGI: added sysctl for icmp rate limit.
23
* Randy Dunlap and
24
* YOSHIFUJI Hideaki @USAGI: Per-interface statistics support
25
* Kazunori MIYAZAWA @USAGI: change output process to use ip6_append_data
26
*/
27
28
#define pr_fmt(fmt) "IPv6: " fmt
29
30
#include <linux/module.h>
31
#include <linux/errno.h>
32
#include <linux/types.h>
33
#include <linux/socket.h>
34
#include <linux/in.h>
35
#include <linux/kernel.h>
36
#include <linux/sockios.h>
37
#include <linux/net.h>
38
#include <linux/skbuff.h>
39
#include <linux/init.h>
40
#include <linux/netfilter.h>
41
#include <linux/slab.h>
42
43
#ifdef CONFIG_SYSCTL
44
#include <linux/sysctl.h>
45
#endif
46
47
#include <linux/inet.h>
48
#include <linux/netdevice.h>
49
#include <linux/icmpv6.h>
50
51
#include <net/ip.h>
52
#include <net/sock.h>
53
54
#include <net/ipv6.h>
55
#include <net/ip6_checksum.h>
56
#include <net/ping.h>
57
#include <net/protocol.h>
58
#include <net/raw.h>
59
#include <net/rawv6.h>
60
#include <net/seg6.h>
61
#include <net/transp_v6.h>
62
#include <net/ip6_route.h>
63
#include <net/addrconf.h>
64
#include <net/icmp.h>
65
#include <net/xfrm.h>
66
#include <net/inet_common.h>
67
#include <net/dsfield.h>
68
#include <net/l3mdev.h>
69
70
#include <linux/uaccess.h>
71
72
static DEFINE_PER_CPU(struct sock *, ipv6_icmp_sk);
73
74
static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
75
u8 type, u8 code, int offset, __be32 info)
76
{
77
/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
78
struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
79
struct net *net = dev_net_rcu(skb->dev);
80
81
if (type == ICMPV6_PKT_TOOBIG)
82
ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL));
83
else if (type == NDISC_REDIRECT)
84
ip6_redirect(skb, net, skb->dev->ifindex, 0,
85
sock_net_uid(net, NULL));
86
87
if (!(type & ICMPV6_INFOMSG_MASK))
88
if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
89
ping_err(skb, offset, ntohl(info));
90
91
return 0;
92
}
93
94
static int icmpv6_rcv(struct sk_buff *skb);
95
96
static const struct inet6_protocol icmpv6_protocol = {
97
.handler = icmpv6_rcv,
98
.err_handler = icmpv6_err,
99
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
100
};
101
102
/* Called with BH disabled */
103
static struct sock *icmpv6_xmit_lock(struct net *net)
104
{
105
struct sock *sk;
106
107
sk = this_cpu_read(ipv6_icmp_sk);
108
if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
109
/* This can happen if the output path (f.e. SIT or
110
* ip6ip6 tunnel) signals dst_link_failure() for an
111
* outgoing ICMP6 packet.
112
*/
113
return NULL;
114
}
115
sock_net_set(sk, net);
116
return sk;
117
}
118
119
static void icmpv6_xmit_unlock(struct sock *sk)
120
{
121
sock_net_set(sk, &init_net);
122
spin_unlock(&sk->sk_lock.slock);
123
}
124
125
/*
126
* Figure out, may we reply to this packet with icmp error.
127
*
128
* We do not reply, if:
129
* - it was icmp error message.
130
* - it is truncated, so that it is known, that protocol is ICMPV6
131
* (i.e. in the middle of some exthdr)
132
*
133
* --ANK (980726)
134
*/
135
136
static bool is_ineligible(const struct sk_buff *skb)
137
{
138
int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
139
int len = skb->len - ptr;
140
__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
141
__be16 frag_off;
142
143
if (len < 0)
144
return true;
145
146
ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
147
if (ptr < 0)
148
return false;
149
if (nexthdr == IPPROTO_ICMPV6) {
150
u8 _type, *tp;
151
tp = skb_header_pointer(skb,
152
ptr+offsetof(struct icmp6hdr, icmp6_type),
153
sizeof(_type), &_type);
154
155
/* Based on RFC 8200, Section 4.5 Fragment Header, return
156
* false if this is a fragment packet with no icmp header info.
157
*/
158
if (!tp && frag_off != 0)
159
return false;
160
else if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
161
return true;
162
}
163
return false;
164
}
165
166
static bool icmpv6_mask_allow(struct net *net, int type)
167
{
168
if (type > ICMPV6_MSG_MAX)
169
return true;
170
171
/* Limit if icmp type is set in ratemask. */
172
if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask))
173
return true;
174
175
return false;
176
}
177
178
static bool icmpv6_global_allow(struct net *net, int type,
179
bool *apply_ratelimit)
180
{
181
if (icmpv6_mask_allow(net, type))
182
return true;
183
184
if (icmp_global_allow(net)) {
185
*apply_ratelimit = true;
186
return true;
187
}
188
__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL);
189
return false;
190
}
191
192
/*
193
* Check the ICMP output rate limit
194
*/
195
static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
196
struct flowi6 *fl6, bool apply_ratelimit)
197
{
198
struct net *net = sock_net(sk);
199
struct net_device *dev;
200
struct dst_entry *dst;
201
bool res = false;
202
203
if (!apply_ratelimit)
204
return true;
205
206
/*
207
* Look up the output route.
208
* XXX: perhaps the expire for routing entries cloned by
209
* this lookup should be more aggressive (not longer than timeout).
210
*/
211
dst = ip6_route_output(net, sk, fl6);
212
dev = dst_dev(dst);
213
if (dst->error) {
214
IP6_INC_STATS(net, ip6_dst_idev(dst),
215
IPSTATS_MIB_OUTNOROUTES);
216
} else if (dev && (dev->flags & IFF_LOOPBACK)) {
217
res = true;
218
} else {
219
struct rt6_info *rt = dst_rt6_info(dst);
220
int tmo = net->ipv6.sysctl.icmpv6_time;
221
struct inet_peer *peer;
222
223
/* Give more bandwidth to wider prefixes. */
224
if (rt->rt6i_dst.plen < 128)
225
tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
226
227
rcu_read_lock();
228
peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr);
229
res = inet_peer_xrlim_allow(peer, tmo);
230
rcu_read_unlock();
231
}
232
if (!res)
233
__ICMP6_INC_STATS(net, ip6_dst_idev(dst),
234
ICMP6_MIB_RATELIMITHOST);
235
else
236
icmp_global_consume(net);
237
dst_release(dst);
238
return res;
239
}
240
241
static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type,
242
struct flowi6 *fl6)
243
{
244
struct net *net = sock_net(sk);
245
struct dst_entry *dst;
246
bool res = false;
247
248
dst = ip6_route_output(net, sk, fl6);
249
if (!dst->error) {
250
struct rt6_info *rt = dst_rt6_info(dst);
251
struct in6_addr prefsrc;
252
253
rt6_get_prefsrc(rt, &prefsrc);
254
res = !ipv6_addr_any(&prefsrc);
255
}
256
dst_release(dst);
257
return res;
258
}
259
260
/*
261
* an inline helper for the "simple" if statement below
262
* checks if parameter problem report is caused by an
263
* unrecognized IPv6 option that has the Option Type
264
* highest-order two bits set to 10
265
*/
266
267
static bool opt_unrec(struct sk_buff *skb, __u32 offset)
268
{
269
u8 _optval, *op;
270
271
offset += skb_network_offset(skb);
272
op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
273
if (!op)
274
return true;
275
return (*op & 0xC0) == 0x80;
276
}
277
278
void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
279
struct icmp6hdr *thdr, int len)
280
{
281
struct sk_buff *skb;
282
struct icmp6hdr *icmp6h;
283
284
skb = skb_peek(&sk->sk_write_queue);
285
if (!skb)
286
return;
287
288
icmp6h = icmp6_hdr(skb);
289
memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
290
icmp6h->icmp6_cksum = 0;
291
292
if (skb_queue_len(&sk->sk_write_queue) == 1) {
293
skb->csum = csum_partial(icmp6h,
294
sizeof(struct icmp6hdr), skb->csum);
295
icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
296
&fl6->daddr,
297
len, fl6->flowi6_proto,
298
skb->csum);
299
} else {
300
__wsum tmp_csum = 0;
301
302
skb_queue_walk(&sk->sk_write_queue, skb) {
303
tmp_csum = csum_add(tmp_csum, skb->csum);
304
}
305
306
tmp_csum = csum_partial(icmp6h,
307
sizeof(struct icmp6hdr), tmp_csum);
308
icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
309
&fl6->daddr,
310
len, fl6->flowi6_proto,
311
tmp_csum);
312
}
313
ip6_push_pending_frames(sk);
314
}
315
316
struct icmpv6_msg {
317
struct sk_buff *skb;
318
int offset;
319
uint8_t type;
320
};
321
322
static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
323
{
324
struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
325
struct sk_buff *org_skb = msg->skb;
326
__wsum csum;
327
328
csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
329
to, len);
330
skb->csum = csum_block_add(skb->csum, csum, odd);
331
if (!(msg->type & ICMPV6_INFOMSG_MASK))
332
nf_ct_attach(skb, org_skb);
333
return 0;
334
}
335
336
#if IS_ENABLED(CONFIG_IPV6_MIP6)
337
static void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt)
338
{
339
struct ipv6hdr *iph = ipv6_hdr(skb);
340
struct ipv6_destopt_hao *hao;
341
int off;
342
343
if (opt->dsthao) {
344
off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
345
if (likely(off >= 0)) {
346
hao = (struct ipv6_destopt_hao *)
347
(skb_network_header(skb) + off);
348
swap(iph->saddr, hao->addr);
349
}
350
}
351
}
352
#else
353
static inline void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt) {}
354
#endif
355
356
static struct dst_entry *icmpv6_route_lookup(struct net *net,
357
struct sk_buff *skb,
358
struct sock *sk,
359
struct flowi6 *fl6)
360
{
361
struct dst_entry *dst, *dst2;
362
struct flowi6 fl2;
363
int err;
364
365
err = ip6_dst_lookup(net, sk, &dst, fl6);
366
if (err)
367
return ERR_PTR(err);
368
369
/*
370
* We won't send icmp if the destination is known
371
* anycast unless we need to treat anycast as unicast.
372
*/
373
if (!READ_ONCE(net->ipv6.sysctl.icmpv6_error_anycast_as_unicast) &&
374
ipv6_anycast_destination(dst, &fl6->daddr)) {
375
net_dbg_ratelimited("icmp6_send: acast source\n");
376
dst_release(dst);
377
return ERR_PTR(-EINVAL);
378
}
379
380
/* No need to clone since we're just using its address. */
381
dst2 = dst;
382
383
dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
384
if (!IS_ERR(dst)) {
385
if (dst != dst2)
386
return dst;
387
} else {
388
if (PTR_ERR(dst) == -EPERM)
389
dst = NULL;
390
else
391
return dst;
392
}
393
394
err = xfrm_decode_session_reverse(net, skb, flowi6_to_flowi(&fl2), AF_INET6);
395
if (err)
396
goto relookup_failed;
397
398
err = ip6_dst_lookup(net, sk, &dst2, &fl2);
399
if (err)
400
goto relookup_failed;
401
402
dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
403
if (!IS_ERR(dst2)) {
404
dst_release(dst);
405
dst = dst2;
406
} else {
407
err = PTR_ERR(dst2);
408
if (err == -EPERM) {
409
dst_release(dst);
410
return dst2;
411
} else
412
goto relookup_failed;
413
}
414
415
relookup_failed:
416
if (dst)
417
return dst;
418
return ERR_PTR(err);
419
}
420
421
static struct net_device *icmp6_dev(const struct sk_buff *skb)
422
{
423
struct net_device *dev = skb->dev;
424
425
/* for local traffic to local address, skb dev is the loopback
426
* device. Check if there is a dst attached to the skb and if so
427
* get the real device index. Same is needed for replies to a link
428
* local address on a device enslaved to an L3 master device
429
*/
430
if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
431
const struct rt6_info *rt6 = skb_rt6_info(skb);
432
433
/* The destination could be an external IP in Ext Hdr (SRv6, RPL, etc.),
434
* and ip6_null_entry could be set to skb if no route is found.
435
*/
436
if (rt6 && rt6->rt6i_idev)
437
dev = rt6->rt6i_idev->dev;
438
}
439
440
return dev;
441
}
442
443
static int icmp6_iif(const struct sk_buff *skb)
444
{
445
return icmp6_dev(skb)->ifindex;
446
}
447
448
/*
449
* Send an ICMP message in response to a packet in error
450
*/
451
void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
452
const struct in6_addr *force_saddr,
453
const struct inet6_skb_parm *parm)
454
{
455
struct inet6_dev *idev = NULL;
456
struct ipv6hdr *hdr = ipv6_hdr(skb);
457
struct sock *sk;
458
struct net *net;
459
struct ipv6_pinfo *np;
460
const struct in6_addr *saddr = NULL;
461
bool apply_ratelimit = false;
462
struct dst_entry *dst;
463
struct icmp6hdr tmp_hdr;
464
struct flowi6 fl6;
465
struct icmpv6_msg msg;
466
struct ipcm6_cookie ipc6;
467
int iif = 0;
468
int addr_type = 0;
469
int len;
470
u32 mark;
471
472
if ((u8 *)hdr < skb->head ||
473
(skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
474
return;
475
476
if (!skb->dev)
477
return;
478
479
rcu_read_lock();
480
481
net = dev_net_rcu(skb->dev);
482
mark = IP6_REPLY_MARK(net, skb->mark);
483
/*
484
* Make sure we respect the rules
485
* i.e. RFC 1885 2.4(e)
486
* Rule (e.1) is enforced by not using icmp6_send
487
* in any code that processes icmp errors.
488
*/
489
addr_type = ipv6_addr_type(&hdr->daddr);
490
491
if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
492
ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
493
saddr = &hdr->daddr;
494
495
/*
496
* Dest addr check
497
*/
498
499
if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
500
if (type != ICMPV6_PKT_TOOBIG &&
501
!(type == ICMPV6_PARAMPROB &&
502
code == ICMPV6_UNK_OPTION &&
503
(opt_unrec(skb, info))))
504
goto out;
505
506
saddr = NULL;
507
}
508
509
addr_type = ipv6_addr_type(&hdr->saddr);
510
511
/*
512
* Source addr check
513
*/
514
515
if (__ipv6_addr_needs_scope_id(addr_type)) {
516
iif = icmp6_iif(skb);
517
} else {
518
/*
519
* The source device is used for looking up which routing table
520
* to use for sending an ICMP error.
521
*/
522
iif = l3mdev_master_ifindex(skb->dev);
523
}
524
525
/*
526
* Must not send error if the source does not uniquely
527
* identify a single node (RFC2463 Section 2.4).
528
* We check unspecified / multicast addresses here,
529
* and anycast addresses will be checked later.
530
*/
531
if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
532
net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
533
&hdr->saddr, &hdr->daddr);
534
goto out;
535
}
536
537
/*
538
* Never answer to a ICMP packet.
539
*/
540
if (is_ineligible(skb)) {
541
net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
542
&hdr->saddr, &hdr->daddr);
543
goto out;
544
}
545
546
/* Needed by both icmpv6_global_allow and icmpv6_xmit_lock */
547
local_bh_disable();
548
549
/* Check global sysctl_icmp_msgs_per_sec ratelimit */
550
if (!(skb->dev->flags & IFF_LOOPBACK) &&
551
!icmpv6_global_allow(net, type, &apply_ratelimit))
552
goto out_bh_enable;
553
554
mip6_addr_swap(skb, parm);
555
556
sk = icmpv6_xmit_lock(net);
557
if (!sk)
558
goto out_bh_enable;
559
560
memset(&fl6, 0, sizeof(fl6));
561
fl6.flowi6_proto = IPPROTO_ICMPV6;
562
fl6.daddr = hdr->saddr;
563
if (force_saddr)
564
saddr = force_saddr;
565
if (saddr) {
566
fl6.saddr = *saddr;
567
} else if (!icmpv6_rt_has_prefsrc(sk, type, &fl6)) {
568
/* select a more meaningful saddr from input if */
569
struct net_device *in_netdev;
570
571
in_netdev = dev_get_by_index(net, parm->iif);
572
if (in_netdev) {
573
ipv6_dev_get_saddr(net, in_netdev, &fl6.daddr,
574
inet6_sk(sk)->srcprefs,
575
&fl6.saddr);
576
dev_put(in_netdev);
577
}
578
}
579
fl6.flowi6_mark = mark;
580
fl6.flowi6_oif = iif;
581
fl6.fl6_icmp_type = type;
582
fl6.fl6_icmp_code = code;
583
fl6.flowi6_uid = sock_net_uid(net, NULL);
584
fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
585
security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
586
587
np = inet6_sk(sk);
588
589
if (!icmpv6_xrlim_allow(sk, type, &fl6, apply_ratelimit))
590
goto out_unlock;
591
592
tmp_hdr.icmp6_type = type;
593
tmp_hdr.icmp6_code = code;
594
tmp_hdr.icmp6_cksum = 0;
595
tmp_hdr.icmp6_pointer = htonl(info);
596
597
if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
598
fl6.flowi6_oif = READ_ONCE(np->mcast_oif);
599
else if (!fl6.flowi6_oif)
600
fl6.flowi6_oif = READ_ONCE(np->ucast_oif);
601
602
ipcm6_init_sk(&ipc6, sk);
603
ipc6.sockc.mark = mark;
604
fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
605
606
dst = icmpv6_route_lookup(net, skb, sk, &fl6);
607
if (IS_ERR(dst))
608
goto out_unlock;
609
610
ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
611
612
msg.skb = skb;
613
msg.offset = skb_network_offset(skb);
614
msg.type = type;
615
616
len = skb->len - msg.offset;
617
len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
618
if (len < 0) {
619
net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
620
&hdr->saddr, &hdr->daddr);
621
goto out_dst_release;
622
}
623
624
idev = __in6_dev_get(skb->dev);
625
626
if (ip6_append_data(sk, icmpv6_getfrag, &msg,
627
len + sizeof(struct icmp6hdr),
628
sizeof(struct icmp6hdr),
629
&ipc6, &fl6, dst_rt6_info(dst),
630
MSG_DONTWAIT)) {
631
ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
632
ip6_flush_pending_frames(sk);
633
} else {
634
icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
635
len + sizeof(struct icmp6hdr));
636
}
637
638
out_dst_release:
639
dst_release(dst);
640
out_unlock:
641
icmpv6_xmit_unlock(sk);
642
out_bh_enable:
643
local_bh_enable();
644
out:
645
rcu_read_unlock();
646
}
647
EXPORT_SYMBOL(icmp6_send);
648
649
/* Slightly more convenient version of icmp6_send with drop reasons.
650
*/
651
void icmpv6_param_prob_reason(struct sk_buff *skb, u8 code, int pos,
652
enum skb_drop_reason reason)
653
{
654
icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL, IP6CB(skb));
655
kfree_skb_reason(skb, reason);
656
}
657
658
/* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
659
* if sufficient data bytes are available
660
* @nhs is the size of the tunnel header(s) :
661
* Either an IPv4 header for SIT encap
662
* an IPv4 header + GRE header for GRE encap
663
*/
664
int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
665
unsigned int data_len)
666
{
667
struct in6_addr temp_saddr;
668
struct rt6_info *rt;
669
struct sk_buff *skb2;
670
u32 info = 0;
671
672
if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
673
return 1;
674
675
/* RFC 4884 (partial) support for ICMP extensions */
676
if (data_len < 128 || (data_len & 7) || skb->len < data_len)
677
data_len = 0;
678
679
skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
680
681
if (!skb2)
682
return 1;
683
684
skb_dst_drop(skb2);
685
skb_pull(skb2, nhs);
686
skb_reset_network_header(skb2);
687
688
rt = rt6_lookup(dev_net_rcu(skb->dev), &ipv6_hdr(skb2)->saddr,
689
NULL, 0, skb, 0);
690
691
if (rt && rt->dst.dev)
692
skb2->dev = rt->dst.dev;
693
694
ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
695
696
if (data_len) {
697
/* RFC 4884 (partial) support :
698
* insert 0 padding at the end, before the extensions
699
*/
700
__skb_push(skb2, nhs);
701
skb_reset_network_header(skb2);
702
memmove(skb2->data, skb2->data + nhs, data_len - nhs);
703
memset(skb2->data + data_len - nhs, 0, nhs);
704
/* RFC 4884 4.5 : Length is measured in 64-bit words,
705
* and stored in reserved[0]
706
*/
707
info = (data_len/8) << 24;
708
}
709
if (type == ICMP_TIME_EXCEEDED)
710
icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
711
info, &temp_saddr, IP6CB(skb2));
712
else
713
icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
714
info, &temp_saddr, IP6CB(skb2));
715
if (rt)
716
ip6_rt_put(rt);
717
718
kfree_skb(skb2);
719
720
return 0;
721
}
722
EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
723
724
static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
725
{
726
struct net *net = dev_net_rcu(skb->dev);
727
struct sock *sk;
728
struct inet6_dev *idev;
729
struct ipv6_pinfo *np;
730
const struct in6_addr *saddr = NULL;
731
struct icmp6hdr *icmph = icmp6_hdr(skb);
732
bool apply_ratelimit = false;
733
struct icmp6hdr tmp_hdr;
734
struct flowi6 fl6;
735
struct icmpv6_msg msg;
736
struct dst_entry *dst;
737
struct ipcm6_cookie ipc6;
738
u32 mark = IP6_REPLY_MARK(net, skb->mark);
739
SKB_DR(reason);
740
bool acast;
741
u8 type;
742
743
if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) &&
744
net->ipv6.sysctl.icmpv6_echo_ignore_multicast)
745
return reason;
746
747
saddr = &ipv6_hdr(skb)->daddr;
748
749
acast = ipv6_anycast_destination(skb_dst(skb), saddr);
750
if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast)
751
return reason;
752
753
if (!ipv6_unicast_destination(skb) &&
754
!(net->ipv6.sysctl.anycast_src_echo_reply && acast))
755
saddr = NULL;
756
757
if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
758
type = ICMPV6_EXT_ECHO_REPLY;
759
else
760
type = ICMPV6_ECHO_REPLY;
761
762
memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
763
tmp_hdr.icmp6_type = type;
764
765
memset(&fl6, 0, sizeof(fl6));
766
if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES)
767
fl6.flowlabel = ip6_flowlabel(ipv6_hdr(skb));
768
769
fl6.flowi6_proto = IPPROTO_ICMPV6;
770
fl6.daddr = ipv6_hdr(skb)->saddr;
771
if (saddr)
772
fl6.saddr = *saddr;
773
fl6.flowi6_oif = icmp6_iif(skb);
774
fl6.fl6_icmp_type = type;
775
fl6.flowi6_mark = mark;
776
fl6.flowi6_uid = sock_net_uid(net, NULL);
777
security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
778
779
local_bh_disable();
780
sk = icmpv6_xmit_lock(net);
781
if (!sk)
782
goto out_bh_enable;
783
np = inet6_sk(sk);
784
785
if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
786
fl6.flowi6_oif = READ_ONCE(np->mcast_oif);
787
else if (!fl6.flowi6_oif)
788
fl6.flowi6_oif = READ_ONCE(np->ucast_oif);
789
790
if (ip6_dst_lookup(net, sk, &dst, &fl6))
791
goto out;
792
dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
793
if (IS_ERR(dst))
794
goto out;
795
796
/* Check the ratelimit */
797
if ((!(skb->dev->flags & IFF_LOOPBACK) &&
798
!icmpv6_global_allow(net, ICMPV6_ECHO_REPLY, &apply_ratelimit)) ||
799
!icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6, apply_ratelimit))
800
goto out_dst_release;
801
802
idev = __in6_dev_get(skb->dev);
803
804
msg.skb = skb;
805
msg.offset = 0;
806
msg.type = type;
807
808
ipcm6_init_sk(&ipc6, sk);
809
ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
810
ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
811
ipc6.sockc.mark = mark;
812
813
if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
814
if (!icmp_build_probe(skb, (struct icmphdr *)&tmp_hdr))
815
goto out_dst_release;
816
817
if (ip6_append_data(sk, icmpv6_getfrag, &msg,
818
skb->len + sizeof(struct icmp6hdr),
819
sizeof(struct icmp6hdr), &ipc6, &fl6,
820
dst_rt6_info(dst), MSG_DONTWAIT)) {
821
__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
822
ip6_flush_pending_frames(sk);
823
} else {
824
icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
825
skb->len + sizeof(struct icmp6hdr));
826
reason = SKB_CONSUMED;
827
}
828
out_dst_release:
829
dst_release(dst);
830
out:
831
icmpv6_xmit_unlock(sk);
832
out_bh_enable:
833
local_bh_enable();
834
return reason;
835
}
836
837
enum skb_drop_reason icmpv6_notify(struct sk_buff *skb, u8 type,
838
u8 code, __be32 info)
839
{
840
struct inet6_skb_parm *opt = IP6CB(skb);
841
struct net *net = dev_net_rcu(skb->dev);
842
const struct inet6_protocol *ipprot;
843
enum skb_drop_reason reason;
844
int inner_offset;
845
__be16 frag_off;
846
u8 nexthdr;
847
848
reason = pskb_may_pull_reason(skb, sizeof(struct ipv6hdr));
849
if (reason != SKB_NOT_DROPPED_YET)
850
goto out;
851
852
seg6_icmp_srh(skb, opt);
853
854
nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
855
if (ipv6_ext_hdr(nexthdr)) {
856
/* now skip over extension headers */
857
inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
858
&nexthdr, &frag_off);
859
if (inner_offset < 0) {
860
SKB_DR_SET(reason, IPV6_BAD_EXTHDR);
861
goto out;
862
}
863
} else {
864
inner_offset = sizeof(struct ipv6hdr);
865
}
866
867
/* Checkin header including 8 bytes of inner protocol header. */
868
reason = pskb_may_pull_reason(skb, inner_offset + 8);
869
if (reason != SKB_NOT_DROPPED_YET)
870
goto out;
871
872
/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
873
Without this we will not able f.e. to make source routed
874
pmtu discovery.
875
Corresponding argument (opt) to notifiers is already added.
876
--ANK (980726)
877
*/
878
879
ipprot = rcu_dereference(inet6_protos[nexthdr]);
880
if (ipprot && ipprot->err_handler)
881
ipprot->err_handler(skb, opt, type, code, inner_offset, info);
882
883
raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
884
return SKB_CONSUMED;
885
886
out:
887
__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
888
return reason;
889
}
890
891
/*
892
* Handle icmp messages
893
*/
894
895
static int icmpv6_rcv(struct sk_buff *skb)
896
{
897
enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
898
struct net *net = dev_net_rcu(skb->dev);
899
struct net_device *dev = icmp6_dev(skb);
900
struct inet6_dev *idev = __in6_dev_get(dev);
901
const struct in6_addr *saddr, *daddr;
902
struct icmp6hdr *hdr;
903
u8 type;
904
905
if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
906
struct sec_path *sp = skb_sec_path(skb);
907
int nh;
908
909
if (!(sp && sp->xvec[sp->len - 1]->props.flags &
910
XFRM_STATE_ICMP)) {
911
reason = SKB_DROP_REASON_XFRM_POLICY;
912
goto drop_no_count;
913
}
914
915
if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
916
goto drop_no_count;
917
918
nh = skb_network_offset(skb);
919
skb_set_network_header(skb, sizeof(*hdr));
920
921
if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN,
922
skb)) {
923
reason = SKB_DROP_REASON_XFRM_POLICY;
924
goto drop_no_count;
925
}
926
927
skb_set_network_header(skb, nh);
928
}
929
930
__ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_INMSGS);
931
932
saddr = &ipv6_hdr(skb)->saddr;
933
daddr = &ipv6_hdr(skb)->daddr;
934
935
if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
936
net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
937
saddr, daddr);
938
goto csum_error;
939
}
940
941
if (!pskb_pull(skb, sizeof(*hdr)))
942
goto discard_it;
943
944
hdr = icmp6_hdr(skb);
945
946
type = hdr->icmp6_type;
947
948
ICMP6MSGIN_INC_STATS(dev_net_rcu(dev), idev, type);
949
950
switch (type) {
951
case ICMPV6_ECHO_REQUEST:
952
if (!net->ipv6.sysctl.icmpv6_echo_ignore_all)
953
reason = icmpv6_echo_reply(skb);
954
break;
955
case ICMPV6_EXT_ECHO_REQUEST:
956
if (!net->ipv6.sysctl.icmpv6_echo_ignore_all &&
957
READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe))
958
reason = icmpv6_echo_reply(skb);
959
break;
960
961
case ICMPV6_ECHO_REPLY:
962
case ICMPV6_EXT_ECHO_REPLY:
963
ping_rcv(skb);
964
return 0;
965
966
case ICMPV6_PKT_TOOBIG:
967
/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
968
standard destination cache. Seems, only "advanced"
969
destination cache will allow to solve this problem
970
--ANK (980726)
971
*/
972
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
973
goto discard_it;
974
hdr = icmp6_hdr(skb);
975
976
/* to notify */
977
fallthrough;
978
case ICMPV6_DEST_UNREACH:
979
case ICMPV6_TIME_EXCEED:
980
case ICMPV6_PARAMPROB:
981
reason = icmpv6_notify(skb, type, hdr->icmp6_code,
982
hdr->icmp6_mtu);
983
break;
984
985
case NDISC_ROUTER_SOLICITATION:
986
case NDISC_ROUTER_ADVERTISEMENT:
987
case NDISC_NEIGHBOUR_SOLICITATION:
988
case NDISC_NEIGHBOUR_ADVERTISEMENT:
989
case NDISC_REDIRECT:
990
reason = ndisc_rcv(skb);
991
break;
992
993
case ICMPV6_MGM_QUERY:
994
igmp6_event_query(skb);
995
return 0;
996
997
case ICMPV6_MGM_REPORT:
998
igmp6_event_report(skb);
999
return 0;
1000
1001
case ICMPV6_MGM_REDUCTION:
1002
case ICMPV6_NI_QUERY:
1003
case ICMPV6_NI_REPLY:
1004
case ICMPV6_MLD2_REPORT:
1005
case ICMPV6_DHAAD_REQUEST:
1006
case ICMPV6_DHAAD_REPLY:
1007
case ICMPV6_MOBILE_PREFIX_SOL:
1008
case ICMPV6_MOBILE_PREFIX_ADV:
1009
break;
1010
1011
default:
1012
/* informational */
1013
if (type & ICMPV6_INFOMSG_MASK)
1014
break;
1015
1016
net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
1017
saddr, daddr);
1018
1019
/*
1020
* error of unknown type.
1021
* must pass to upper level
1022
*/
1023
1024
reason = icmpv6_notify(skb, type, hdr->icmp6_code,
1025
hdr->icmp6_mtu);
1026
}
1027
1028
/* until the v6 path can be better sorted assume failure and
1029
* preserve the status quo behaviour for the rest of the paths to here
1030
*/
1031
if (reason)
1032
kfree_skb_reason(skb, reason);
1033
else
1034
consume_skb(skb);
1035
1036
return 0;
1037
1038
csum_error:
1039
reason = SKB_DROP_REASON_ICMP_CSUM;
1040
__ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_CSUMERRORS);
1041
discard_it:
1042
__ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_INERRORS);
1043
drop_no_count:
1044
kfree_skb_reason(skb, reason);
1045
return 0;
1046
}
1047
1048
void icmpv6_flow_init(const struct sock *sk, struct flowi6 *fl6, u8 type,
1049
const struct in6_addr *saddr,
1050
const struct in6_addr *daddr, int oif)
1051
{
1052
memset(fl6, 0, sizeof(*fl6));
1053
fl6->saddr = *saddr;
1054
fl6->daddr = *daddr;
1055
fl6->flowi6_proto = IPPROTO_ICMPV6;
1056
fl6->fl6_icmp_type = type;
1057
fl6->fl6_icmp_code = 0;
1058
fl6->flowi6_oif = oif;
1059
security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
1060
}
1061
1062
int __init icmpv6_init(void)
1063
{
1064
struct sock *sk;
1065
int err, i;
1066
1067
for_each_possible_cpu(i) {
1068
err = inet_ctl_sock_create(&sk, PF_INET6,
1069
SOCK_RAW, IPPROTO_ICMPV6, &init_net);
1070
if (err < 0) {
1071
pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
1072
err);
1073
return err;
1074
}
1075
1076
per_cpu(ipv6_icmp_sk, i) = sk;
1077
1078
/* Enough space for 2 64K ICMP packets, including
1079
* sk_buff struct overhead.
1080
*/
1081
sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
1082
}
1083
1084
err = -EAGAIN;
1085
if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1086
goto fail;
1087
1088
err = inet6_register_icmp_sender(icmp6_send);
1089
if (err)
1090
goto sender_reg_err;
1091
return 0;
1092
1093
sender_reg_err:
1094
inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1095
fail:
1096
pr_err("Failed to register ICMP6 protocol\n");
1097
return err;
1098
}
1099
1100
void icmpv6_cleanup(void)
1101
{
1102
inet6_unregister_icmp_sender(icmp6_send);
1103
inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1104
}
1105
1106
1107
static const struct icmp6_err {
1108
int err;
1109
int fatal;
1110
} tab_unreach[] = {
1111
{ /* NOROUTE */
1112
.err = ENETUNREACH,
1113
.fatal = 0,
1114
},
1115
{ /* ADM_PROHIBITED */
1116
.err = EACCES,
1117
.fatal = 1,
1118
},
1119
{ /* Was NOT_NEIGHBOUR, now reserved */
1120
.err = EHOSTUNREACH,
1121
.fatal = 0,
1122
},
1123
{ /* ADDR_UNREACH */
1124
.err = EHOSTUNREACH,
1125
.fatal = 0,
1126
},
1127
{ /* PORT_UNREACH */
1128
.err = ECONNREFUSED,
1129
.fatal = 1,
1130
},
1131
{ /* POLICY_FAIL */
1132
.err = EACCES,
1133
.fatal = 1,
1134
},
1135
{ /* REJECT_ROUTE */
1136
.err = EACCES,
1137
.fatal = 1,
1138
},
1139
};
1140
1141
int icmpv6_err_convert(u8 type, u8 code, int *err)
1142
{
1143
int fatal = 0;
1144
1145
*err = EPROTO;
1146
1147
switch (type) {
1148
case ICMPV6_DEST_UNREACH:
1149
fatal = 1;
1150
if (code < ARRAY_SIZE(tab_unreach)) {
1151
*err = tab_unreach[code].err;
1152
fatal = tab_unreach[code].fatal;
1153
}
1154
break;
1155
1156
case ICMPV6_PKT_TOOBIG:
1157
*err = EMSGSIZE;
1158
break;
1159
1160
case ICMPV6_PARAMPROB:
1161
*err = EPROTO;
1162
fatal = 1;
1163
break;
1164
1165
case ICMPV6_TIME_EXCEED:
1166
*err = EHOSTUNREACH;
1167
break;
1168
}
1169
1170
return fatal;
1171
}
1172
EXPORT_SYMBOL(icmpv6_err_convert);
1173
1174
#ifdef CONFIG_SYSCTL
1175
static struct ctl_table ipv6_icmp_table_template[] = {
1176
{
1177
.procname = "ratelimit",
1178
.data = &init_net.ipv6.sysctl.icmpv6_time,
1179
.maxlen = sizeof(int),
1180
.mode = 0644,
1181
.proc_handler = proc_dointvec_ms_jiffies,
1182
},
1183
{
1184
.procname = "echo_ignore_all",
1185
.data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_all,
1186
.maxlen = sizeof(u8),
1187
.mode = 0644,
1188
.proc_handler = proc_dou8vec_minmax,
1189
},
1190
{
1191
.procname = "echo_ignore_multicast",
1192
.data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast,
1193
.maxlen = sizeof(u8),
1194
.mode = 0644,
1195
.proc_handler = proc_dou8vec_minmax,
1196
},
1197
{
1198
.procname = "echo_ignore_anycast",
1199
.data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast,
1200
.maxlen = sizeof(u8),
1201
.mode = 0644,
1202
.proc_handler = proc_dou8vec_minmax,
1203
},
1204
{
1205
.procname = "ratemask",
1206
.data = &init_net.ipv6.sysctl.icmpv6_ratemask_ptr,
1207
.maxlen = ICMPV6_MSG_MAX + 1,
1208
.mode = 0644,
1209
.proc_handler = proc_do_large_bitmap,
1210
},
1211
{
1212
.procname = "error_anycast_as_unicast",
1213
.data = &init_net.ipv6.sysctl.icmpv6_error_anycast_as_unicast,
1214
.maxlen = sizeof(u8),
1215
.mode = 0644,
1216
.proc_handler = proc_dou8vec_minmax,
1217
.extra1 = SYSCTL_ZERO,
1218
.extra2 = SYSCTL_ONE,
1219
},
1220
};
1221
1222
struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1223
{
1224
struct ctl_table *table;
1225
1226
table = kmemdup(ipv6_icmp_table_template,
1227
sizeof(ipv6_icmp_table_template),
1228
GFP_KERNEL);
1229
1230
if (table) {
1231
table[0].data = &net->ipv6.sysctl.icmpv6_time;
1232
table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
1233
table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast;
1234
table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast;
1235
table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr;
1236
table[5].data = &net->ipv6.sysctl.icmpv6_error_anycast_as_unicast;
1237
}
1238
return table;
1239
}
1240
1241
size_t ipv6_icmp_sysctl_table_size(void)
1242
{
1243
return ARRAY_SIZE(ipv6_icmp_table_template);
1244
}
1245
#endif
1246
1247