Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/net/ipv4/ip_gre.c
15111 views
1
/*
2
* Linux NET3: GRE over IP protocol decoder.
3
*
4
* Authors: Alexey Kuznetsov ([email protected])
5
*
6
* This program is free software; you can redistribute it and/or
7
* modify it under the terms of the GNU General Public License
8
* as published by the Free Software Foundation; either version
9
* 2 of the License, or (at your option) any later version.
10
*
11
*/
12
13
#include <linux/capability.h>
14
#include <linux/module.h>
15
#include <linux/types.h>
16
#include <linux/kernel.h>
17
#include <linux/slab.h>
18
#include <asm/uaccess.h>
19
#include <linux/skbuff.h>
20
#include <linux/netdevice.h>
21
#include <linux/in.h>
22
#include <linux/tcp.h>
23
#include <linux/udp.h>
24
#include <linux/if_arp.h>
25
#include <linux/mroute.h>
26
#include <linux/init.h>
27
#include <linux/in6.h>
28
#include <linux/inetdevice.h>
29
#include <linux/igmp.h>
30
#include <linux/netfilter_ipv4.h>
31
#include <linux/etherdevice.h>
32
#include <linux/if_ether.h>
33
34
#include <net/sock.h>
35
#include <net/ip.h>
36
#include <net/icmp.h>
37
#include <net/protocol.h>
38
#include <net/ipip.h>
39
#include <net/arp.h>
40
#include <net/checksum.h>
41
#include <net/dsfield.h>
42
#include <net/inet_ecn.h>
43
#include <net/xfrm.h>
44
#include <net/net_namespace.h>
45
#include <net/netns/generic.h>
46
#include <net/rtnetlink.h>
47
#include <net/gre.h>
48
49
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
50
#include <net/ipv6.h>
51
#include <net/ip6_fib.h>
52
#include <net/ip6_route.h>
53
#endif
54
55
/*
56
Problems & solutions
57
--------------------
58
59
1. The most important issue is detecting local dead loops.
60
They would cause complete host lockup in transmit, which
61
would be "resolved" by stack overflow or, if queueing is enabled,
62
with infinite looping in net_bh.
63
64
We cannot track such dead loops during route installation,
65
it is infeasible task. The most general solutions would be
66
to keep skb->encapsulation counter (sort of local ttl),
67
and silently drop packet when it expires. It is a good
68
solution, but it supposes maintaing new variable in ALL
69
skb, even if no tunneling is used.
70
71
Current solution: xmit_recursion breaks dead loops. This is a percpu
72
counter, since when we enter the first ndo_xmit(), cpu migration is
73
forbidden. We force an exit if this counter reaches RECURSION_LIMIT
74
75
2. Networking dead loops would not kill routers, but would really
76
kill network. IP hop limit plays role of "t->recursion" in this case,
77
if we copy it from packet being encapsulated to upper header.
78
It is very good solution, but it introduces two problems:
79
80
- Routing protocols, using packets with ttl=1 (OSPF, RIP2),
81
do not work over tunnels.
82
- traceroute does not work. I planned to relay ICMP from tunnel,
83
so that this problem would be solved and traceroute output
84
would even more informative. This idea appeared to be wrong:
85
only Linux complies to rfc1812 now (yes, guys, Linux is the only
86
true router now :-)), all routers (at least, in neighbourhood of mine)
87
return only 8 bytes of payload. It is the end.
88
89
Hence, if we want that OSPF worked or traceroute said something reasonable,
90
we should search for another solution.
91
92
One of them is to parse packet trying to detect inner encapsulation
93
made by our node. It is difficult or even impossible, especially,
94
taking into account fragmentation. TO be short, tt is not solution at all.
95
96
Current solution: The solution was UNEXPECTEDLY SIMPLE.
97
We force DF flag on tunnels with preconfigured hop limit,
98
that is ALL. :-) Well, it does not remove the problem completely,
99
but exponential growth of network traffic is changed to linear
100
(branches, that exceed pmtu are pruned) and tunnel mtu
101
fastly degrades to value <68, where looping stops.
102
Yes, it is not good if there exists a router in the loop,
103
which does not force DF, even when encapsulating packets have DF set.
104
But it is not our problem! Nobody could accuse us, we made
105
all that we could make. Even if it is your gated who injected
106
fatal route to network, even if it were you who configured
107
fatal static route: you are innocent. :-)
108
109
110
111
3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
112
practically identical code. It would be good to glue them
113
together, but it is not very evident, how to make them modular.
114
sit is integral part of IPv6, ipip and gre are naturally modular.
115
We could extract common parts (hash table, ioctl etc)
116
to a separate module (ip_tunnel.c).
117
118
Alexey Kuznetsov.
119
*/
120
121
static struct rtnl_link_ops ipgre_link_ops __read_mostly;
122
static int ipgre_tunnel_init(struct net_device *dev);
123
static void ipgre_tunnel_setup(struct net_device *dev);
124
static int ipgre_tunnel_bind_dev(struct net_device *dev);
125
126
/* Fallback tunnel: no source, no destination, no key, no options */
127
128
#define HASH_SIZE 16
129
130
static int ipgre_net_id __read_mostly;
131
struct ipgre_net {
132
struct ip_tunnel __rcu *tunnels[4][HASH_SIZE];
133
134
struct net_device *fb_tunnel_dev;
135
};
136
137
/* Tunnel hash table */
138
139
/*
140
4 hash tables:
141
142
3: (remote,local)
143
2: (remote,*)
144
1: (*,local)
145
0: (*,*)
146
147
We require exact key match i.e. if a key is present in packet
148
it will match only tunnel with the same key; if it is not present,
149
it will match only keyless tunnel.
150
151
All keysless packets, if not matched configured keyless tunnels
152
will match fallback tunnel.
153
*/
154
155
#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
156
157
#define tunnels_r_l tunnels[3]
158
#define tunnels_r tunnels[2]
159
#define tunnels_l tunnels[1]
160
#define tunnels_wc tunnels[0]
161
/*
162
* Locking : hash tables are protected by RCU and RTNL
163
*/
164
165
#define for_each_ip_tunnel_rcu(start) \
166
for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
167
168
/* often modified stats are per cpu, other are shared (netdev->stats) */
169
struct pcpu_tstats {
170
unsigned long rx_packets;
171
unsigned long rx_bytes;
172
unsigned long tx_packets;
173
unsigned long tx_bytes;
174
};
175
176
static struct net_device_stats *ipgre_get_stats(struct net_device *dev)
177
{
178
struct pcpu_tstats sum = { 0 };
179
int i;
180
181
for_each_possible_cpu(i) {
182
const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
183
184
sum.rx_packets += tstats->rx_packets;
185
sum.rx_bytes += tstats->rx_bytes;
186
sum.tx_packets += tstats->tx_packets;
187
sum.tx_bytes += tstats->tx_bytes;
188
}
189
dev->stats.rx_packets = sum.rx_packets;
190
dev->stats.rx_bytes = sum.rx_bytes;
191
dev->stats.tx_packets = sum.tx_packets;
192
dev->stats.tx_bytes = sum.tx_bytes;
193
return &dev->stats;
194
}
195
196
/* Given src, dst and key, find appropriate for input tunnel. */
197
198
static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
199
__be32 remote, __be32 local,
200
__be32 key, __be16 gre_proto)
201
{
202
struct net *net = dev_net(dev);
203
int link = dev->ifindex;
204
unsigned int h0 = HASH(remote);
205
unsigned int h1 = HASH(key);
206
struct ip_tunnel *t, *cand = NULL;
207
struct ipgre_net *ign = net_generic(net, ipgre_net_id);
208
int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
209
ARPHRD_ETHER : ARPHRD_IPGRE;
210
int score, cand_score = 4;
211
212
for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
213
if (local != t->parms.iph.saddr ||
214
remote != t->parms.iph.daddr ||
215
key != t->parms.i_key ||
216
!(t->dev->flags & IFF_UP))
217
continue;
218
219
if (t->dev->type != ARPHRD_IPGRE &&
220
t->dev->type != dev_type)
221
continue;
222
223
score = 0;
224
if (t->parms.link != link)
225
score |= 1;
226
if (t->dev->type != dev_type)
227
score |= 2;
228
if (score == 0)
229
return t;
230
231
if (score < cand_score) {
232
cand = t;
233
cand_score = score;
234
}
235
}
236
237
for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
238
if (remote != t->parms.iph.daddr ||
239
key != t->parms.i_key ||
240
!(t->dev->flags & IFF_UP))
241
continue;
242
243
if (t->dev->type != ARPHRD_IPGRE &&
244
t->dev->type != dev_type)
245
continue;
246
247
score = 0;
248
if (t->parms.link != link)
249
score |= 1;
250
if (t->dev->type != dev_type)
251
score |= 2;
252
if (score == 0)
253
return t;
254
255
if (score < cand_score) {
256
cand = t;
257
cand_score = score;
258
}
259
}
260
261
for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
262
if ((local != t->parms.iph.saddr &&
263
(local != t->parms.iph.daddr ||
264
!ipv4_is_multicast(local))) ||
265
key != t->parms.i_key ||
266
!(t->dev->flags & IFF_UP))
267
continue;
268
269
if (t->dev->type != ARPHRD_IPGRE &&
270
t->dev->type != dev_type)
271
continue;
272
273
score = 0;
274
if (t->parms.link != link)
275
score |= 1;
276
if (t->dev->type != dev_type)
277
score |= 2;
278
if (score == 0)
279
return t;
280
281
if (score < cand_score) {
282
cand = t;
283
cand_score = score;
284
}
285
}
286
287
for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
288
if (t->parms.i_key != key ||
289
!(t->dev->flags & IFF_UP))
290
continue;
291
292
if (t->dev->type != ARPHRD_IPGRE &&
293
t->dev->type != dev_type)
294
continue;
295
296
score = 0;
297
if (t->parms.link != link)
298
score |= 1;
299
if (t->dev->type != dev_type)
300
score |= 2;
301
if (score == 0)
302
return t;
303
304
if (score < cand_score) {
305
cand = t;
306
cand_score = score;
307
}
308
}
309
310
if (cand != NULL)
311
return cand;
312
313
dev = ign->fb_tunnel_dev;
314
if (dev->flags & IFF_UP)
315
return netdev_priv(dev);
316
317
return NULL;
318
}
319
320
static struct ip_tunnel __rcu **__ipgre_bucket(struct ipgre_net *ign,
321
struct ip_tunnel_parm *parms)
322
{
323
__be32 remote = parms->iph.daddr;
324
__be32 local = parms->iph.saddr;
325
__be32 key = parms->i_key;
326
unsigned int h = HASH(key);
327
int prio = 0;
328
329
if (local)
330
prio |= 1;
331
if (remote && !ipv4_is_multicast(remote)) {
332
prio |= 2;
333
h ^= HASH(remote);
334
}
335
336
return &ign->tunnels[prio][h];
337
}
338
339
static inline struct ip_tunnel __rcu **ipgre_bucket(struct ipgre_net *ign,
340
struct ip_tunnel *t)
341
{
342
return __ipgre_bucket(ign, &t->parms);
343
}
344
345
static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
346
{
347
struct ip_tunnel __rcu **tp = ipgre_bucket(ign, t);
348
349
rcu_assign_pointer(t->next, rtnl_dereference(*tp));
350
rcu_assign_pointer(*tp, t);
351
}
352
353
static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
354
{
355
struct ip_tunnel __rcu **tp;
356
struct ip_tunnel *iter;
357
358
for (tp = ipgre_bucket(ign, t);
359
(iter = rtnl_dereference(*tp)) != NULL;
360
tp = &iter->next) {
361
if (t == iter) {
362
rcu_assign_pointer(*tp, t->next);
363
break;
364
}
365
}
366
}
367
368
static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
369
struct ip_tunnel_parm *parms,
370
int type)
371
{
372
__be32 remote = parms->iph.daddr;
373
__be32 local = parms->iph.saddr;
374
__be32 key = parms->i_key;
375
int link = parms->link;
376
struct ip_tunnel *t;
377
struct ip_tunnel __rcu **tp;
378
struct ipgre_net *ign = net_generic(net, ipgre_net_id);
379
380
for (tp = __ipgre_bucket(ign, parms);
381
(t = rtnl_dereference(*tp)) != NULL;
382
tp = &t->next)
383
if (local == t->parms.iph.saddr &&
384
remote == t->parms.iph.daddr &&
385
key == t->parms.i_key &&
386
link == t->parms.link &&
387
type == t->dev->type)
388
break;
389
390
return t;
391
}
392
393
static struct ip_tunnel *ipgre_tunnel_locate(struct net *net,
394
struct ip_tunnel_parm *parms, int create)
395
{
396
struct ip_tunnel *t, *nt;
397
struct net_device *dev;
398
char name[IFNAMSIZ];
399
struct ipgre_net *ign = net_generic(net, ipgre_net_id);
400
401
t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
402
if (t || !create)
403
return t;
404
405
if (parms->name[0])
406
strlcpy(name, parms->name, IFNAMSIZ);
407
else
408
strcpy(name, "gre%d");
409
410
dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
411
if (!dev)
412
return NULL;
413
414
dev_net_set(dev, net);
415
416
nt = netdev_priv(dev);
417
nt->parms = *parms;
418
dev->rtnl_link_ops = &ipgre_link_ops;
419
420
dev->mtu = ipgre_tunnel_bind_dev(dev);
421
422
if (register_netdevice(dev) < 0)
423
goto failed_free;
424
425
dev_hold(dev);
426
ipgre_tunnel_link(ign, nt);
427
return nt;
428
429
failed_free:
430
free_netdev(dev);
431
return NULL;
432
}
433
434
static void ipgre_tunnel_uninit(struct net_device *dev)
435
{
436
struct net *net = dev_net(dev);
437
struct ipgre_net *ign = net_generic(net, ipgre_net_id);
438
439
ipgre_tunnel_unlink(ign, netdev_priv(dev));
440
dev_put(dev);
441
}
442
443
444
static void ipgre_err(struct sk_buff *skb, u32 info)
445
{
446
447
/* All the routers (except for Linux) return only
448
8 bytes of packet payload. It means, that precise relaying of
449
ICMP in the real Internet is absolutely infeasible.
450
451
Moreover, Cisco "wise men" put GRE key to the third word
452
in GRE header. It makes impossible maintaining even soft state for keyed
453
GRE tunnels with enabled checksum. Tell them "thank you".
454
455
Well, I wonder, rfc1812 was written by Cisco employee,
456
what the hell these idiots break standrads established
457
by themself???
458
*/
459
460
const struct iphdr *iph = (const struct iphdr *)skb->data;
461
__be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
462
int grehlen = (iph->ihl<<2) + 4;
463
const int type = icmp_hdr(skb)->type;
464
const int code = icmp_hdr(skb)->code;
465
struct ip_tunnel *t;
466
__be16 flags;
467
468
flags = p[0];
469
if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
470
if (flags&(GRE_VERSION|GRE_ROUTING))
471
return;
472
if (flags&GRE_KEY) {
473
grehlen += 4;
474
if (flags&GRE_CSUM)
475
grehlen += 4;
476
}
477
}
478
479
/* If only 8 bytes returned, keyed message will be dropped here */
480
if (skb_headlen(skb) < grehlen)
481
return;
482
483
switch (type) {
484
default:
485
case ICMP_PARAMETERPROB:
486
return;
487
488
case ICMP_DEST_UNREACH:
489
switch (code) {
490
case ICMP_SR_FAILED:
491
case ICMP_PORT_UNREACH:
492
/* Impossible event. */
493
return;
494
case ICMP_FRAG_NEEDED:
495
/* Soft state for pmtu is maintained by IP core. */
496
return;
497
default:
498
/* All others are translated to HOST_UNREACH.
499
rfc2003 contains "deep thoughts" about NET_UNREACH,
500
I believe they are just ether pollution. --ANK
501
*/
502
break;
503
}
504
break;
505
case ICMP_TIME_EXCEEDED:
506
if (code != ICMP_EXC_TTL)
507
return;
508
break;
509
}
510
511
rcu_read_lock();
512
t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
513
flags & GRE_KEY ?
514
*(((__be32 *)p) + (grehlen / 4) - 1) : 0,
515
p[1]);
516
if (t == NULL || t->parms.iph.daddr == 0 ||
517
ipv4_is_multicast(t->parms.iph.daddr))
518
goto out;
519
520
if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
521
goto out;
522
523
if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
524
t->err_count++;
525
else
526
t->err_count = 1;
527
t->err_time = jiffies;
528
out:
529
rcu_read_unlock();
530
}
531
532
static inline void ipgre_ecn_decapsulate(const struct iphdr *iph, struct sk_buff *skb)
533
{
534
if (INET_ECN_is_ce(iph->tos)) {
535
if (skb->protocol == htons(ETH_P_IP)) {
536
IP_ECN_set_ce(ip_hdr(skb));
537
} else if (skb->protocol == htons(ETH_P_IPV6)) {
538
IP6_ECN_set_ce(ipv6_hdr(skb));
539
}
540
}
541
}
542
543
static inline u8
544
ipgre_ecn_encapsulate(u8 tos, const struct iphdr *old_iph, struct sk_buff *skb)
545
{
546
u8 inner = 0;
547
if (skb->protocol == htons(ETH_P_IP))
548
inner = old_iph->tos;
549
else if (skb->protocol == htons(ETH_P_IPV6))
550
inner = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
551
return INET_ECN_encapsulate(tos, inner);
552
}
553
554
static int ipgre_rcv(struct sk_buff *skb)
555
{
556
const struct iphdr *iph;
557
u8 *h;
558
__be16 flags;
559
__sum16 csum = 0;
560
__be32 key = 0;
561
u32 seqno = 0;
562
struct ip_tunnel *tunnel;
563
int offset = 4;
564
__be16 gre_proto;
565
566
if (!pskb_may_pull(skb, 16))
567
goto drop_nolock;
568
569
iph = ip_hdr(skb);
570
h = skb->data;
571
flags = *(__be16*)h;
572
573
if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
574
/* - Version must be 0.
575
- We do not support routing headers.
576
*/
577
if (flags&(GRE_VERSION|GRE_ROUTING))
578
goto drop_nolock;
579
580
if (flags&GRE_CSUM) {
581
switch (skb->ip_summed) {
582
case CHECKSUM_COMPLETE:
583
csum = csum_fold(skb->csum);
584
if (!csum)
585
break;
586
/* fall through */
587
case CHECKSUM_NONE:
588
skb->csum = 0;
589
csum = __skb_checksum_complete(skb);
590
skb->ip_summed = CHECKSUM_COMPLETE;
591
}
592
offset += 4;
593
}
594
if (flags&GRE_KEY) {
595
key = *(__be32*)(h + offset);
596
offset += 4;
597
}
598
if (flags&GRE_SEQ) {
599
seqno = ntohl(*(__be32*)(h + offset));
600
offset += 4;
601
}
602
}
603
604
gre_proto = *(__be16 *)(h + 2);
605
606
rcu_read_lock();
607
if ((tunnel = ipgre_tunnel_lookup(skb->dev,
608
iph->saddr, iph->daddr, key,
609
gre_proto))) {
610
struct pcpu_tstats *tstats;
611
612
secpath_reset(skb);
613
614
skb->protocol = gre_proto;
615
/* WCCP version 1 and 2 protocol decoding.
616
* - Change protocol to IP
617
* - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
618
*/
619
if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
620
skb->protocol = htons(ETH_P_IP);
621
if ((*(h + offset) & 0xF0) != 0x40)
622
offset += 4;
623
}
624
625
skb->mac_header = skb->network_header;
626
__pskb_pull(skb, offset);
627
skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
628
skb->pkt_type = PACKET_HOST;
629
#ifdef CONFIG_NET_IPGRE_BROADCAST
630
if (ipv4_is_multicast(iph->daddr)) {
631
/* Looped back packet, drop it! */
632
if (rt_is_output_route(skb_rtable(skb)))
633
goto drop;
634
tunnel->dev->stats.multicast++;
635
skb->pkt_type = PACKET_BROADCAST;
636
}
637
#endif
638
639
if (((flags&GRE_CSUM) && csum) ||
640
(!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
641
tunnel->dev->stats.rx_crc_errors++;
642
tunnel->dev->stats.rx_errors++;
643
goto drop;
644
}
645
if (tunnel->parms.i_flags&GRE_SEQ) {
646
if (!(flags&GRE_SEQ) ||
647
(tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
648
tunnel->dev->stats.rx_fifo_errors++;
649
tunnel->dev->stats.rx_errors++;
650
goto drop;
651
}
652
tunnel->i_seqno = seqno + 1;
653
}
654
655
/* Warning: All skb pointers will be invalidated! */
656
if (tunnel->dev->type == ARPHRD_ETHER) {
657
if (!pskb_may_pull(skb, ETH_HLEN)) {
658
tunnel->dev->stats.rx_length_errors++;
659
tunnel->dev->stats.rx_errors++;
660
goto drop;
661
}
662
663
iph = ip_hdr(skb);
664
skb->protocol = eth_type_trans(skb, tunnel->dev);
665
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
666
}
667
668
tstats = this_cpu_ptr(tunnel->dev->tstats);
669
tstats->rx_packets++;
670
tstats->rx_bytes += skb->len;
671
672
__skb_tunnel_rx(skb, tunnel->dev);
673
674
skb_reset_network_header(skb);
675
ipgre_ecn_decapsulate(iph, skb);
676
677
netif_rx(skb);
678
679
rcu_read_unlock();
680
return 0;
681
}
682
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
683
684
drop:
685
rcu_read_unlock();
686
drop_nolock:
687
kfree_skb(skb);
688
return 0;
689
}
690
691
static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
692
{
693
struct ip_tunnel *tunnel = netdev_priv(dev);
694
struct pcpu_tstats *tstats;
695
const struct iphdr *old_iph = ip_hdr(skb);
696
const struct iphdr *tiph;
697
struct flowi4 fl4;
698
u8 tos;
699
__be16 df;
700
struct rtable *rt; /* Route to the other host */
701
struct net_device *tdev; /* Device to other host */
702
struct iphdr *iph; /* Our new IP header */
703
unsigned int max_headroom; /* The extra header space needed */
704
int gre_hlen;
705
__be32 dst;
706
int mtu;
707
708
if (dev->type == ARPHRD_ETHER)
709
IPCB(skb)->flags = 0;
710
711
if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
712
gre_hlen = 0;
713
tiph = (const struct iphdr *)skb->data;
714
} else {
715
gre_hlen = tunnel->hlen;
716
tiph = &tunnel->parms.iph;
717
}
718
719
if ((dst = tiph->daddr) == 0) {
720
/* NBMA tunnel */
721
722
if (skb_dst(skb) == NULL) {
723
dev->stats.tx_fifo_errors++;
724
goto tx_error;
725
}
726
727
if (skb->protocol == htons(ETH_P_IP)) {
728
rt = skb_rtable(skb);
729
if ((dst = rt->rt_gateway) == 0)
730
goto tx_error_icmp;
731
}
732
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
733
else if (skb->protocol == htons(ETH_P_IPV6)) {
734
const struct in6_addr *addr6;
735
int addr_type;
736
struct neighbour *neigh = skb_dst(skb)->neighbour;
737
738
if (neigh == NULL)
739
goto tx_error;
740
741
addr6 = (const struct in6_addr *)&neigh->primary_key;
742
addr_type = ipv6_addr_type(addr6);
743
744
if (addr_type == IPV6_ADDR_ANY) {
745
addr6 = &ipv6_hdr(skb)->daddr;
746
addr_type = ipv6_addr_type(addr6);
747
}
748
749
if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
750
goto tx_error_icmp;
751
752
dst = addr6->s6_addr32[3];
753
}
754
#endif
755
else
756
goto tx_error;
757
}
758
759
tos = tiph->tos;
760
if (tos == 1) {
761
tos = 0;
762
if (skb->protocol == htons(ETH_P_IP))
763
tos = old_iph->tos;
764
else if (skb->protocol == htons(ETH_P_IPV6))
765
tos = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
766
}
767
768
rt = ip_route_output_gre(dev_net(dev), &fl4, dst, tiph->saddr,
769
tunnel->parms.o_key, RT_TOS(tos),
770
tunnel->parms.link);
771
if (IS_ERR(rt)) {
772
dev->stats.tx_carrier_errors++;
773
goto tx_error;
774
}
775
tdev = rt->dst.dev;
776
777
if (tdev == dev) {
778
ip_rt_put(rt);
779
dev->stats.collisions++;
780
goto tx_error;
781
}
782
783
df = tiph->frag_off;
784
if (df)
785
mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen;
786
else
787
mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
788
789
if (skb_dst(skb))
790
skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
791
792
if (skb->protocol == htons(ETH_P_IP)) {
793
df |= (old_iph->frag_off&htons(IP_DF));
794
795
if ((old_iph->frag_off&htons(IP_DF)) &&
796
mtu < ntohs(old_iph->tot_len)) {
797
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
798
ip_rt_put(rt);
799
goto tx_error;
800
}
801
}
802
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
803
else if (skb->protocol == htons(ETH_P_IPV6)) {
804
struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
805
806
if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
807
if ((tunnel->parms.iph.daddr &&
808
!ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
809
rt6->rt6i_dst.plen == 128) {
810
rt6->rt6i_flags |= RTF_MODIFIED;
811
dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
812
}
813
}
814
815
if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
816
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
817
ip_rt_put(rt);
818
goto tx_error;
819
}
820
}
821
#endif
822
823
if (tunnel->err_count > 0) {
824
if (time_before(jiffies,
825
tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
826
tunnel->err_count--;
827
828
dst_link_failure(skb);
829
} else
830
tunnel->err_count = 0;
831
}
832
833
max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len;
834
835
if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
836
(skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
837
struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
838
if (max_headroom > dev->needed_headroom)
839
dev->needed_headroom = max_headroom;
840
if (!new_skb) {
841
ip_rt_put(rt);
842
dev->stats.tx_dropped++;
843
dev_kfree_skb(skb);
844
return NETDEV_TX_OK;
845
}
846
if (skb->sk)
847
skb_set_owner_w(new_skb, skb->sk);
848
dev_kfree_skb(skb);
849
skb = new_skb;
850
old_iph = ip_hdr(skb);
851
}
852
853
skb_reset_transport_header(skb);
854
skb_push(skb, gre_hlen);
855
skb_reset_network_header(skb);
856
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
857
IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
858
IPSKB_REROUTED);
859
skb_dst_drop(skb);
860
skb_dst_set(skb, &rt->dst);
861
862
/*
863
* Push down and install the IPIP header.
864
*/
865
866
iph = ip_hdr(skb);
867
iph->version = 4;
868
iph->ihl = sizeof(struct iphdr) >> 2;
869
iph->frag_off = df;
870
iph->protocol = IPPROTO_GRE;
871
iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
872
iph->daddr = fl4.daddr;
873
iph->saddr = fl4.saddr;
874
875
if ((iph->ttl = tiph->ttl) == 0) {
876
if (skb->protocol == htons(ETH_P_IP))
877
iph->ttl = old_iph->ttl;
878
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
879
else if (skb->protocol == htons(ETH_P_IPV6))
880
iph->ttl = ((const struct ipv6hdr *)old_iph)->hop_limit;
881
#endif
882
else
883
iph->ttl = ip4_dst_hoplimit(&rt->dst);
884
}
885
886
((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
887
((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
888
htons(ETH_P_TEB) : skb->protocol;
889
890
if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
891
__be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
892
893
if (tunnel->parms.o_flags&GRE_SEQ) {
894
++tunnel->o_seqno;
895
*ptr = htonl(tunnel->o_seqno);
896
ptr--;
897
}
898
if (tunnel->parms.o_flags&GRE_KEY) {
899
*ptr = tunnel->parms.o_key;
900
ptr--;
901
}
902
if (tunnel->parms.o_flags&GRE_CSUM) {
903
*ptr = 0;
904
*(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
905
}
906
}
907
908
nf_reset(skb);
909
tstats = this_cpu_ptr(dev->tstats);
910
__IPTUNNEL_XMIT(tstats, &dev->stats);
911
return NETDEV_TX_OK;
912
913
tx_error_icmp:
914
dst_link_failure(skb);
915
916
tx_error:
917
dev->stats.tx_errors++;
918
dev_kfree_skb(skb);
919
return NETDEV_TX_OK;
920
}
921
922
static int ipgre_tunnel_bind_dev(struct net_device *dev)
923
{
924
struct net_device *tdev = NULL;
925
struct ip_tunnel *tunnel;
926
const struct iphdr *iph;
927
int hlen = LL_MAX_HEADER;
928
int mtu = ETH_DATA_LEN;
929
int addend = sizeof(struct iphdr) + 4;
930
931
tunnel = netdev_priv(dev);
932
iph = &tunnel->parms.iph;
933
934
/* Guess output device to choose reasonable mtu and needed_headroom */
935
936
if (iph->daddr) {
937
struct flowi4 fl4;
938
struct rtable *rt;
939
940
rt = ip_route_output_gre(dev_net(dev), &fl4,
941
iph->daddr, iph->saddr,
942
tunnel->parms.o_key,
943
RT_TOS(iph->tos),
944
tunnel->parms.link);
945
if (!IS_ERR(rt)) {
946
tdev = rt->dst.dev;
947
ip_rt_put(rt);
948
}
949
950
if (dev->type != ARPHRD_ETHER)
951
dev->flags |= IFF_POINTOPOINT;
952
}
953
954
if (!tdev && tunnel->parms.link)
955
tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
956
957
if (tdev) {
958
hlen = tdev->hard_header_len + tdev->needed_headroom;
959
mtu = tdev->mtu;
960
}
961
dev->iflink = tunnel->parms.link;
962
963
/* Precalculate GRE options length */
964
if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
965
if (tunnel->parms.o_flags&GRE_CSUM)
966
addend += 4;
967
if (tunnel->parms.o_flags&GRE_KEY)
968
addend += 4;
969
if (tunnel->parms.o_flags&GRE_SEQ)
970
addend += 4;
971
}
972
dev->needed_headroom = addend + hlen;
973
mtu -= dev->hard_header_len + addend;
974
975
if (mtu < 68)
976
mtu = 68;
977
978
tunnel->hlen = addend;
979
980
return mtu;
981
}
982
983
static int
984
ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
985
{
986
int err = 0;
987
struct ip_tunnel_parm p;
988
struct ip_tunnel *t;
989
struct net *net = dev_net(dev);
990
struct ipgre_net *ign = net_generic(net, ipgre_net_id);
991
992
switch (cmd) {
993
case SIOCGETTUNNEL:
994
t = NULL;
995
if (dev == ign->fb_tunnel_dev) {
996
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
997
err = -EFAULT;
998
break;
999
}
1000
t = ipgre_tunnel_locate(net, &p, 0);
1001
}
1002
if (t == NULL)
1003
t = netdev_priv(dev);
1004
memcpy(&p, &t->parms, sizeof(p));
1005
if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1006
err = -EFAULT;
1007
break;
1008
1009
case SIOCADDTUNNEL:
1010
case SIOCCHGTUNNEL:
1011
err = -EPERM;
1012
if (!capable(CAP_NET_ADMIN))
1013
goto done;
1014
1015
err = -EFAULT;
1016
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1017
goto done;
1018
1019
err = -EINVAL;
1020
if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
1021
p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
1022
((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
1023
goto done;
1024
if (p.iph.ttl)
1025
p.iph.frag_off |= htons(IP_DF);
1026
1027
if (!(p.i_flags&GRE_KEY))
1028
p.i_key = 0;
1029
if (!(p.o_flags&GRE_KEY))
1030
p.o_key = 0;
1031
1032
t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1033
1034
if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1035
if (t != NULL) {
1036
if (t->dev != dev) {
1037
err = -EEXIST;
1038
break;
1039
}
1040
} else {
1041
unsigned int nflags = 0;
1042
1043
t = netdev_priv(dev);
1044
1045
if (ipv4_is_multicast(p.iph.daddr))
1046
nflags = IFF_BROADCAST;
1047
else if (p.iph.daddr)
1048
nflags = IFF_POINTOPOINT;
1049
1050
if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1051
err = -EINVAL;
1052
break;
1053
}
1054
ipgre_tunnel_unlink(ign, t);
1055
synchronize_net();
1056
t->parms.iph.saddr = p.iph.saddr;
1057
t->parms.iph.daddr = p.iph.daddr;
1058
t->parms.i_key = p.i_key;
1059
t->parms.o_key = p.o_key;
1060
memcpy(dev->dev_addr, &p.iph.saddr, 4);
1061
memcpy(dev->broadcast, &p.iph.daddr, 4);
1062
ipgre_tunnel_link(ign, t);
1063
netdev_state_change(dev);
1064
}
1065
}
1066
1067
if (t) {
1068
err = 0;
1069
if (cmd == SIOCCHGTUNNEL) {
1070
t->parms.iph.ttl = p.iph.ttl;
1071
t->parms.iph.tos = p.iph.tos;
1072
t->parms.iph.frag_off = p.iph.frag_off;
1073
if (t->parms.link != p.link) {
1074
t->parms.link = p.link;
1075
dev->mtu = ipgre_tunnel_bind_dev(dev);
1076
netdev_state_change(dev);
1077
}
1078
}
1079
if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1080
err = -EFAULT;
1081
} else
1082
err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1083
break;
1084
1085
case SIOCDELTUNNEL:
1086
err = -EPERM;
1087
if (!capable(CAP_NET_ADMIN))
1088
goto done;
1089
1090
if (dev == ign->fb_tunnel_dev) {
1091
err = -EFAULT;
1092
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1093
goto done;
1094
err = -ENOENT;
1095
if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1096
goto done;
1097
err = -EPERM;
1098
if (t == netdev_priv(ign->fb_tunnel_dev))
1099
goto done;
1100
dev = t->dev;
1101
}
1102
unregister_netdevice(dev);
1103
err = 0;
1104
break;
1105
1106
default:
1107
err = -EINVAL;
1108
}
1109
1110
done:
1111
return err;
1112
}
1113
1114
static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1115
{
1116
struct ip_tunnel *tunnel = netdev_priv(dev);
1117
if (new_mtu < 68 ||
1118
new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
1119
return -EINVAL;
1120
dev->mtu = new_mtu;
1121
return 0;
1122
}
1123
1124
/* Nice toy. Unfortunately, useless in real life :-)
1125
It allows to construct virtual multiprotocol broadcast "LAN"
1126
over the Internet, provided multicast routing is tuned.
1127
1128
1129
I have no idea was this bicycle invented before me,
1130
so that I had to set ARPHRD_IPGRE to a random value.
1131
I have an impression, that Cisco could make something similar,
1132
but this feature is apparently missing in IOS<=11.2(8).
1133
1134
I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1135
with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1136
1137
ping -t 255 224.66.66.66
1138
1139
If nobody answers, mbone does not work.
1140
1141
ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1142
ip addr add 10.66.66.<somewhat>/24 dev Universe
1143
ifconfig Universe up
1144
ifconfig Universe add fe80::<Your_real_addr>/10
1145
ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1146
ftp 10.66.66.66
1147
...
1148
ftp fec0:6666:6666::193.233.7.65
1149
...
1150
1151
*/
1152
1153
static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1154
unsigned short type,
1155
const void *daddr, const void *saddr, unsigned int len)
1156
{
1157
struct ip_tunnel *t = netdev_priv(dev);
1158
struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1159
__be16 *p = (__be16*)(iph+1);
1160
1161
memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1162
p[0] = t->parms.o_flags;
1163
p[1] = htons(type);
1164
1165
/*
1166
* Set the source hardware address.
1167
*/
1168
1169
if (saddr)
1170
memcpy(&iph->saddr, saddr, 4);
1171
if (daddr)
1172
memcpy(&iph->daddr, daddr, 4);
1173
if (iph->daddr)
1174
return t->hlen;
1175
1176
return -t->hlen;
1177
}
1178
1179
static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1180
{
1181
const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
1182
memcpy(haddr, &iph->saddr, 4);
1183
return 4;
1184
}
1185
1186
static const struct header_ops ipgre_header_ops = {
1187
.create = ipgre_header,
1188
.parse = ipgre_header_parse,
1189
};
1190
1191
#ifdef CONFIG_NET_IPGRE_BROADCAST
1192
static int ipgre_open(struct net_device *dev)
1193
{
1194
struct ip_tunnel *t = netdev_priv(dev);
1195
1196
if (ipv4_is_multicast(t->parms.iph.daddr)) {
1197
struct flowi4 fl4;
1198
struct rtable *rt;
1199
1200
rt = ip_route_output_gre(dev_net(dev), &fl4,
1201
t->parms.iph.daddr,
1202
t->parms.iph.saddr,
1203
t->parms.o_key,
1204
RT_TOS(t->parms.iph.tos),
1205
t->parms.link);
1206
if (IS_ERR(rt))
1207
return -EADDRNOTAVAIL;
1208
dev = rt->dst.dev;
1209
ip_rt_put(rt);
1210
if (__in_dev_get_rtnl(dev) == NULL)
1211
return -EADDRNOTAVAIL;
1212
t->mlink = dev->ifindex;
1213
ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1214
}
1215
return 0;
1216
}
1217
1218
static int ipgre_close(struct net_device *dev)
1219
{
1220
struct ip_tunnel *t = netdev_priv(dev);
1221
1222
if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1223
struct in_device *in_dev;
1224
in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1225
if (in_dev)
1226
ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1227
}
1228
return 0;
1229
}
1230
1231
#endif
1232
1233
static const struct net_device_ops ipgre_netdev_ops = {
1234
.ndo_init = ipgre_tunnel_init,
1235
.ndo_uninit = ipgre_tunnel_uninit,
1236
#ifdef CONFIG_NET_IPGRE_BROADCAST
1237
.ndo_open = ipgre_open,
1238
.ndo_stop = ipgre_close,
1239
#endif
1240
.ndo_start_xmit = ipgre_tunnel_xmit,
1241
.ndo_do_ioctl = ipgre_tunnel_ioctl,
1242
.ndo_change_mtu = ipgre_tunnel_change_mtu,
1243
.ndo_get_stats = ipgre_get_stats,
1244
};
1245
1246
static void ipgre_dev_free(struct net_device *dev)
1247
{
1248
free_percpu(dev->tstats);
1249
free_netdev(dev);
1250
}
1251
1252
static void ipgre_tunnel_setup(struct net_device *dev)
1253
{
1254
dev->netdev_ops = &ipgre_netdev_ops;
1255
dev->destructor = ipgre_dev_free;
1256
1257
dev->type = ARPHRD_IPGRE;
1258
dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1259
dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1260
dev->flags = IFF_NOARP;
1261
dev->iflink = 0;
1262
dev->addr_len = 4;
1263
dev->features |= NETIF_F_NETNS_LOCAL;
1264
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1265
}
1266
1267
static int ipgre_tunnel_init(struct net_device *dev)
1268
{
1269
struct ip_tunnel *tunnel;
1270
struct iphdr *iph;
1271
1272
tunnel = netdev_priv(dev);
1273
iph = &tunnel->parms.iph;
1274
1275
tunnel->dev = dev;
1276
strcpy(tunnel->parms.name, dev->name);
1277
1278
memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1279
memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1280
1281
if (iph->daddr) {
1282
#ifdef CONFIG_NET_IPGRE_BROADCAST
1283
if (ipv4_is_multicast(iph->daddr)) {
1284
if (!iph->saddr)
1285
return -EINVAL;
1286
dev->flags = IFF_BROADCAST;
1287
dev->header_ops = &ipgre_header_ops;
1288
}
1289
#endif
1290
} else
1291
dev->header_ops = &ipgre_header_ops;
1292
1293
dev->tstats = alloc_percpu(struct pcpu_tstats);
1294
if (!dev->tstats)
1295
return -ENOMEM;
1296
1297
return 0;
1298
}
1299
1300
static void ipgre_fb_tunnel_init(struct net_device *dev)
1301
{
1302
struct ip_tunnel *tunnel = netdev_priv(dev);
1303
struct iphdr *iph = &tunnel->parms.iph;
1304
1305
tunnel->dev = dev;
1306
strcpy(tunnel->parms.name, dev->name);
1307
1308
iph->version = 4;
1309
iph->protocol = IPPROTO_GRE;
1310
iph->ihl = 5;
1311
tunnel->hlen = sizeof(struct iphdr) + 4;
1312
1313
dev_hold(dev);
1314
}
1315
1316
1317
static const struct gre_protocol ipgre_protocol = {
1318
.handler = ipgre_rcv,
1319
.err_handler = ipgre_err,
1320
};
1321
1322
static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
1323
{
1324
int prio;
1325
1326
for (prio = 0; prio < 4; prio++) {
1327
int h;
1328
for (h = 0; h < HASH_SIZE; h++) {
1329
struct ip_tunnel *t;
1330
1331
t = rtnl_dereference(ign->tunnels[prio][h]);
1332
1333
while (t != NULL) {
1334
unregister_netdevice_queue(t->dev, head);
1335
t = rtnl_dereference(t->next);
1336
}
1337
}
1338
}
1339
}
1340
1341
static int __net_init ipgre_init_net(struct net *net)
1342
{
1343
struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1344
int err;
1345
1346
ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1347
ipgre_tunnel_setup);
1348
if (!ign->fb_tunnel_dev) {
1349
err = -ENOMEM;
1350
goto err_alloc_dev;
1351
}
1352
dev_net_set(ign->fb_tunnel_dev, net);
1353
1354
ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
1355
ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
1356
1357
if ((err = register_netdev(ign->fb_tunnel_dev)))
1358
goto err_reg_dev;
1359
1360
rcu_assign_pointer(ign->tunnels_wc[0],
1361
netdev_priv(ign->fb_tunnel_dev));
1362
return 0;
1363
1364
err_reg_dev:
1365
ipgre_dev_free(ign->fb_tunnel_dev);
1366
err_alloc_dev:
1367
return err;
1368
}
1369
1370
static void __net_exit ipgre_exit_net(struct net *net)
1371
{
1372
struct ipgre_net *ign;
1373
LIST_HEAD(list);
1374
1375
ign = net_generic(net, ipgre_net_id);
1376
rtnl_lock();
1377
ipgre_destroy_tunnels(ign, &list);
1378
unregister_netdevice_many(&list);
1379
rtnl_unlock();
1380
}
1381
1382
static struct pernet_operations ipgre_net_ops = {
1383
.init = ipgre_init_net,
1384
.exit = ipgre_exit_net,
1385
.id = &ipgre_net_id,
1386
.size = sizeof(struct ipgre_net),
1387
};
1388
1389
static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1390
{
1391
__be16 flags;
1392
1393
if (!data)
1394
return 0;
1395
1396
flags = 0;
1397
if (data[IFLA_GRE_IFLAGS])
1398
flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1399
if (data[IFLA_GRE_OFLAGS])
1400
flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1401
if (flags & (GRE_VERSION|GRE_ROUTING))
1402
return -EINVAL;
1403
1404
return 0;
1405
}
1406
1407
static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1408
{
1409
__be32 daddr;
1410
1411
if (tb[IFLA_ADDRESS]) {
1412
if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1413
return -EINVAL;
1414
if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1415
return -EADDRNOTAVAIL;
1416
}
1417
1418
if (!data)
1419
goto out;
1420
1421
if (data[IFLA_GRE_REMOTE]) {
1422
memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1423
if (!daddr)
1424
return -EINVAL;
1425
}
1426
1427
out:
1428
return ipgre_tunnel_validate(tb, data);
1429
}
1430
1431
static void ipgre_netlink_parms(struct nlattr *data[],
1432
struct ip_tunnel_parm *parms)
1433
{
1434
memset(parms, 0, sizeof(*parms));
1435
1436
parms->iph.protocol = IPPROTO_GRE;
1437
1438
if (!data)
1439
return;
1440
1441
if (data[IFLA_GRE_LINK])
1442
parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1443
1444
if (data[IFLA_GRE_IFLAGS])
1445
parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1446
1447
if (data[IFLA_GRE_OFLAGS])
1448
parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1449
1450
if (data[IFLA_GRE_IKEY])
1451
parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1452
1453
if (data[IFLA_GRE_OKEY])
1454
parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1455
1456
if (data[IFLA_GRE_LOCAL])
1457
parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
1458
1459
if (data[IFLA_GRE_REMOTE])
1460
parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
1461
1462
if (data[IFLA_GRE_TTL])
1463
parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1464
1465
if (data[IFLA_GRE_TOS])
1466
parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1467
1468
if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1469
parms->iph.frag_off = htons(IP_DF);
1470
}
1471
1472
static int ipgre_tap_init(struct net_device *dev)
1473
{
1474
struct ip_tunnel *tunnel;
1475
1476
tunnel = netdev_priv(dev);
1477
1478
tunnel->dev = dev;
1479
strcpy(tunnel->parms.name, dev->name);
1480
1481
ipgre_tunnel_bind_dev(dev);
1482
1483
dev->tstats = alloc_percpu(struct pcpu_tstats);
1484
if (!dev->tstats)
1485
return -ENOMEM;
1486
1487
return 0;
1488
}
1489
1490
static const struct net_device_ops ipgre_tap_netdev_ops = {
1491
.ndo_init = ipgre_tap_init,
1492
.ndo_uninit = ipgre_tunnel_uninit,
1493
.ndo_start_xmit = ipgre_tunnel_xmit,
1494
.ndo_set_mac_address = eth_mac_addr,
1495
.ndo_validate_addr = eth_validate_addr,
1496
.ndo_change_mtu = ipgre_tunnel_change_mtu,
1497
.ndo_get_stats = ipgre_get_stats,
1498
};
1499
1500
static void ipgre_tap_setup(struct net_device *dev)
1501
{
1502
1503
ether_setup(dev);
1504
1505
dev->netdev_ops = &ipgre_tap_netdev_ops;
1506
dev->destructor = ipgre_dev_free;
1507
1508
dev->iflink = 0;
1509
dev->features |= NETIF_F_NETNS_LOCAL;
1510
}
1511
1512
static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
1513
struct nlattr *data[])
1514
{
1515
struct ip_tunnel *nt;
1516
struct net *net = dev_net(dev);
1517
struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1518
int mtu;
1519
int err;
1520
1521
nt = netdev_priv(dev);
1522
ipgre_netlink_parms(data, &nt->parms);
1523
1524
if (ipgre_tunnel_find(net, &nt->parms, dev->type))
1525
return -EEXIST;
1526
1527
if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1528
random_ether_addr(dev->dev_addr);
1529
1530
mtu = ipgre_tunnel_bind_dev(dev);
1531
if (!tb[IFLA_MTU])
1532
dev->mtu = mtu;
1533
1534
/* Can use a lockless transmit, unless we generate output sequences */
1535
if (!(nt->parms.o_flags & GRE_SEQ))
1536
dev->features |= NETIF_F_LLTX;
1537
1538
err = register_netdevice(dev);
1539
if (err)
1540
goto out;
1541
1542
dev_hold(dev);
1543
ipgre_tunnel_link(ign, nt);
1544
1545
out:
1546
return err;
1547
}
1548
1549
static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1550
struct nlattr *data[])
1551
{
1552
struct ip_tunnel *t, *nt;
1553
struct net *net = dev_net(dev);
1554
struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1555
struct ip_tunnel_parm p;
1556
int mtu;
1557
1558
if (dev == ign->fb_tunnel_dev)
1559
return -EINVAL;
1560
1561
nt = netdev_priv(dev);
1562
ipgre_netlink_parms(data, &p);
1563
1564
t = ipgre_tunnel_locate(net, &p, 0);
1565
1566
if (t) {
1567
if (t->dev != dev)
1568
return -EEXIST;
1569
} else {
1570
t = nt;
1571
1572
if (dev->type != ARPHRD_ETHER) {
1573
unsigned int nflags = 0;
1574
1575
if (ipv4_is_multicast(p.iph.daddr))
1576
nflags = IFF_BROADCAST;
1577
else if (p.iph.daddr)
1578
nflags = IFF_POINTOPOINT;
1579
1580
if ((dev->flags ^ nflags) &
1581
(IFF_POINTOPOINT | IFF_BROADCAST))
1582
return -EINVAL;
1583
}
1584
1585
ipgre_tunnel_unlink(ign, t);
1586
t->parms.iph.saddr = p.iph.saddr;
1587
t->parms.iph.daddr = p.iph.daddr;
1588
t->parms.i_key = p.i_key;
1589
if (dev->type != ARPHRD_ETHER) {
1590
memcpy(dev->dev_addr, &p.iph.saddr, 4);
1591
memcpy(dev->broadcast, &p.iph.daddr, 4);
1592
}
1593
ipgre_tunnel_link(ign, t);
1594
netdev_state_change(dev);
1595
}
1596
1597
t->parms.o_key = p.o_key;
1598
t->parms.iph.ttl = p.iph.ttl;
1599
t->parms.iph.tos = p.iph.tos;
1600
t->parms.iph.frag_off = p.iph.frag_off;
1601
1602
if (t->parms.link != p.link) {
1603
t->parms.link = p.link;
1604
mtu = ipgre_tunnel_bind_dev(dev);
1605
if (!tb[IFLA_MTU])
1606
dev->mtu = mtu;
1607
netdev_state_change(dev);
1608
}
1609
1610
return 0;
1611
}
1612
1613
static size_t ipgre_get_size(const struct net_device *dev)
1614
{
1615
return
1616
/* IFLA_GRE_LINK */
1617
nla_total_size(4) +
1618
/* IFLA_GRE_IFLAGS */
1619
nla_total_size(2) +
1620
/* IFLA_GRE_OFLAGS */
1621
nla_total_size(2) +
1622
/* IFLA_GRE_IKEY */
1623
nla_total_size(4) +
1624
/* IFLA_GRE_OKEY */
1625
nla_total_size(4) +
1626
/* IFLA_GRE_LOCAL */
1627
nla_total_size(4) +
1628
/* IFLA_GRE_REMOTE */
1629
nla_total_size(4) +
1630
/* IFLA_GRE_TTL */
1631
nla_total_size(1) +
1632
/* IFLA_GRE_TOS */
1633
nla_total_size(1) +
1634
/* IFLA_GRE_PMTUDISC */
1635
nla_total_size(1) +
1636
0;
1637
}
1638
1639
static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1640
{
1641
struct ip_tunnel *t = netdev_priv(dev);
1642
struct ip_tunnel_parm *p = &t->parms;
1643
1644
NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1645
NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1646
NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
1647
NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
1648
NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
1649
NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr);
1650
NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
1651
NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1652
NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1653
NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1654
1655
return 0;
1656
1657
nla_put_failure:
1658
return -EMSGSIZE;
1659
}
1660
1661
static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1662
[IFLA_GRE_LINK] = { .type = NLA_U32 },
1663
[IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1664
[IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1665
[IFLA_GRE_IKEY] = { .type = NLA_U32 },
1666
[IFLA_GRE_OKEY] = { .type = NLA_U32 },
1667
[IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1668
[IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1669
[IFLA_GRE_TTL] = { .type = NLA_U8 },
1670
[IFLA_GRE_TOS] = { .type = NLA_U8 },
1671
[IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1672
};
1673
1674
static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1675
.kind = "gre",
1676
.maxtype = IFLA_GRE_MAX,
1677
.policy = ipgre_policy,
1678
.priv_size = sizeof(struct ip_tunnel),
1679
.setup = ipgre_tunnel_setup,
1680
.validate = ipgre_tunnel_validate,
1681
.newlink = ipgre_newlink,
1682
.changelink = ipgre_changelink,
1683
.get_size = ipgre_get_size,
1684
.fill_info = ipgre_fill_info,
1685
};
1686
1687
static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1688
.kind = "gretap",
1689
.maxtype = IFLA_GRE_MAX,
1690
.policy = ipgre_policy,
1691
.priv_size = sizeof(struct ip_tunnel),
1692
.setup = ipgre_tap_setup,
1693
.validate = ipgre_tap_validate,
1694
.newlink = ipgre_newlink,
1695
.changelink = ipgre_changelink,
1696
.get_size = ipgre_get_size,
1697
.fill_info = ipgre_fill_info,
1698
};
1699
1700
/*
1701
* And now the modules code and kernel interface.
1702
*/
1703
1704
static int __init ipgre_init(void)
1705
{
1706
int err;
1707
1708
printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1709
1710
err = register_pernet_device(&ipgre_net_ops);
1711
if (err < 0)
1712
return err;
1713
1714
err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1715
if (err < 0) {
1716
printk(KERN_INFO "ipgre init: can't add protocol\n");
1717
goto add_proto_failed;
1718
}
1719
1720
err = rtnl_link_register(&ipgre_link_ops);
1721
if (err < 0)
1722
goto rtnl_link_failed;
1723
1724
err = rtnl_link_register(&ipgre_tap_ops);
1725
if (err < 0)
1726
goto tap_ops_failed;
1727
1728
out:
1729
return err;
1730
1731
tap_ops_failed:
1732
rtnl_link_unregister(&ipgre_link_ops);
1733
rtnl_link_failed:
1734
gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1735
add_proto_failed:
1736
unregister_pernet_device(&ipgre_net_ops);
1737
goto out;
1738
}
1739
1740
static void __exit ipgre_fini(void)
1741
{
1742
rtnl_link_unregister(&ipgre_tap_ops);
1743
rtnl_link_unregister(&ipgre_link_ops);
1744
if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
1745
printk(KERN_INFO "ipgre close: can't remove protocol\n");
1746
unregister_pernet_device(&ipgre_net_ops);
1747
}
1748
1749
module_init(ipgre_init);
1750
module_exit(ipgre_fini);
1751
MODULE_LICENSE("GPL");
1752
MODULE_ALIAS_RTNL_LINK("gre");
1753
MODULE_ALIAS_RTNL_LINK("gretap");
1754
MODULE_ALIAS_NETDEV("gre0");
1755
1756