Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/net/bridge/br_if.c
48869 views
1
// SPDX-License-Identifier: GPL-2.0-or-later
2
/*
3
* Userspace interface
4
* Linux ethernet bridge
5
*
6
* Authors:
7
* Lennert Buytenhek <[email protected]>
8
*/
9
10
#include <linux/kernel.h>
11
#include <linux/netdevice.h>
12
#include <linux/etherdevice.h>
13
#include <linux/netpoll.h>
14
#include <linux/ethtool.h>
15
#include <linux/if_arp.h>
16
#include <linux/module.h>
17
#include <linux/init.h>
18
#include <linux/rtnetlink.h>
19
#include <linux/if_ether.h>
20
#include <linux/slab.h>
21
#include <net/dsa.h>
22
#include <net/sock.h>
23
#include <linux/if_vlan.h>
24
#include <net/switchdev.h>
25
#include <net/net_namespace.h>
26
27
#include "br_private.h"
28
29
/*
30
* Determine initial path cost based on speed.
31
* using recommendations from 802.1d standard
32
*
33
* Since driver might sleep need to not be holding any locks.
34
*/
35
static int port_cost(struct net_device *dev)
36
{
37
struct ethtool_link_ksettings ecmd;
38
39
if (!__ethtool_get_link_ksettings(dev, &ecmd)) {
40
switch (ecmd.base.speed) {
41
case SPEED_10000:
42
return 2;
43
case SPEED_5000:
44
return 3;
45
case SPEED_2500:
46
return 4;
47
case SPEED_1000:
48
return 5;
49
case SPEED_100:
50
return 19;
51
case SPEED_10:
52
return 100;
53
case SPEED_UNKNOWN:
54
return 100;
55
default:
56
if (ecmd.base.speed > SPEED_10000)
57
return 1;
58
}
59
}
60
61
/* Old silly heuristics based on name */
62
if (!strncmp(dev->name, "lec", 3))
63
return 7;
64
65
if (!strncmp(dev->name, "plip", 4))
66
return 2500;
67
68
return 100; /* assume old 10Mbps */
69
}
70
71
72
/* Check for port carrier transitions. */
73
void br_port_carrier_check(struct net_bridge_port *p, bool *notified)
74
{
75
struct net_device *dev = p->dev;
76
struct net_bridge *br = p->br;
77
78
if (!(p->flags & BR_ADMIN_COST) &&
79
netif_running(dev) && netif_oper_up(dev))
80
p->path_cost = port_cost(dev);
81
82
*notified = false;
83
if (!netif_running(br->dev))
84
return;
85
86
spin_lock_bh(&br->lock);
87
if (netif_running(dev) && netif_oper_up(dev)) {
88
if (p->state == BR_STATE_DISABLED) {
89
br_stp_enable_port(p);
90
*notified = true;
91
}
92
} else {
93
if (p->state != BR_STATE_DISABLED) {
94
br_stp_disable_port(p);
95
*notified = true;
96
}
97
}
98
spin_unlock_bh(&br->lock);
99
}
100
101
static void br_port_set_promisc(struct net_bridge_port *p)
102
{
103
int err = 0;
104
105
if (br_promisc_port(p))
106
return;
107
108
err = dev_set_promiscuity(p->dev, 1);
109
if (err)
110
return;
111
112
br_fdb_unsync_static(p->br, p);
113
p->flags |= BR_PROMISC;
114
}
115
116
static void br_port_clear_promisc(struct net_bridge_port *p)
117
{
118
int err;
119
120
/* Check if the port is already non-promisc or if it doesn't
121
* support UNICAST filtering. Without unicast filtering support
122
* we'll end up re-enabling promisc mode anyway, so just check for
123
* it here.
124
*/
125
if (!br_promisc_port(p) || !(p->dev->priv_flags & IFF_UNICAST_FLT))
126
return;
127
128
/* Since we'll be clearing the promisc mode, program the port
129
* first so that we don't have interruption in traffic.
130
*/
131
err = br_fdb_sync_static(p->br, p);
132
if (err)
133
return;
134
135
dev_set_promiscuity(p->dev, -1);
136
p->flags &= ~BR_PROMISC;
137
}
138
139
/* When a port is added or removed or when certain port flags
140
* change, this function is called to automatically manage
141
* promiscuity setting of all the bridge ports. We are always called
142
* under RTNL so can skip using rcu primitives.
143
*/
144
void br_manage_promisc(struct net_bridge *br)
145
{
146
struct net_bridge_port *p;
147
bool set_all = false;
148
149
/* If vlan filtering is disabled or bridge interface is placed
150
* into promiscuous mode, place all ports in promiscuous mode.
151
*/
152
if ((br->dev->flags & IFF_PROMISC) || !br_vlan_enabled(br->dev))
153
set_all = true;
154
155
list_for_each_entry(p, &br->port_list, list) {
156
if (set_all) {
157
br_port_set_promisc(p);
158
} else {
159
/* If the number of auto-ports is <= 1, then all other
160
* ports will have their output configuration
161
* statically specified through fdbs. Since ingress
162
* on the auto-port becomes forwarding/egress to other
163
* ports and egress configuration is statically known,
164
* we can say that ingress configuration of the
165
* auto-port is also statically known.
166
* This lets us disable promiscuous mode and write
167
* this config to hw.
168
*/
169
if ((p->dev->priv_flags & IFF_UNICAST_FLT) &&
170
(br->auto_cnt == 0 ||
171
(br->auto_cnt == 1 && br_auto_port(p))))
172
br_port_clear_promisc(p);
173
else
174
br_port_set_promisc(p);
175
}
176
}
177
}
178
179
int nbp_backup_change(struct net_bridge_port *p,
180
struct net_device *backup_dev)
181
{
182
struct net_bridge_port *old_backup = rtnl_dereference(p->backup_port);
183
struct net_bridge_port *backup_p = NULL;
184
185
ASSERT_RTNL();
186
187
if (backup_dev) {
188
if (!netif_is_bridge_port(backup_dev))
189
return -ENOENT;
190
191
backup_p = br_port_get_rtnl(backup_dev);
192
if (backup_p->br != p->br)
193
return -EINVAL;
194
}
195
196
if (p == backup_p)
197
return -EINVAL;
198
199
if (old_backup == backup_p)
200
return 0;
201
202
/* if the backup link is already set, clear it */
203
if (old_backup)
204
old_backup->backup_redirected_cnt--;
205
206
if (backup_p)
207
backup_p->backup_redirected_cnt++;
208
rcu_assign_pointer(p->backup_port, backup_p);
209
210
return 0;
211
}
212
213
static void nbp_backup_clear(struct net_bridge_port *p)
214
{
215
nbp_backup_change(p, NULL);
216
if (p->backup_redirected_cnt) {
217
struct net_bridge_port *cur_p;
218
219
list_for_each_entry(cur_p, &p->br->port_list, list) {
220
struct net_bridge_port *backup_p;
221
222
backup_p = rtnl_dereference(cur_p->backup_port);
223
if (backup_p == p)
224
nbp_backup_change(cur_p, NULL);
225
}
226
}
227
228
WARN_ON(rcu_access_pointer(p->backup_port) || p->backup_redirected_cnt);
229
}
230
231
static void nbp_update_port_count(struct net_bridge *br)
232
{
233
struct net_bridge_port *p;
234
u32 cnt = 0;
235
236
list_for_each_entry(p, &br->port_list, list) {
237
if (br_auto_port(p))
238
cnt++;
239
}
240
if (br->auto_cnt != cnt) {
241
br->auto_cnt = cnt;
242
br_manage_promisc(br);
243
}
244
}
245
246
static void nbp_delete_promisc(struct net_bridge_port *p)
247
{
248
/* If port is currently promiscuous, unset promiscuity.
249
* Otherwise, it is a static port so remove all addresses
250
* from it.
251
*/
252
dev_set_allmulti(p->dev, -1);
253
if (br_promisc_port(p))
254
dev_set_promiscuity(p->dev, -1);
255
else
256
br_fdb_unsync_static(p->br, p);
257
}
258
259
static void release_nbp(struct kobject *kobj)
260
{
261
struct net_bridge_port *p
262
= container_of(kobj, struct net_bridge_port, kobj);
263
kfree(p);
264
}
265
266
static void brport_get_ownership(const struct kobject *kobj, kuid_t *uid, kgid_t *gid)
267
{
268
struct net_bridge_port *p = kobj_to_brport(kobj);
269
270
net_ns_get_ownership(dev_net(p->dev), uid, gid);
271
}
272
273
static const struct kobj_type brport_ktype = {
274
#ifdef CONFIG_SYSFS
275
.sysfs_ops = &brport_sysfs_ops,
276
#endif
277
.release = release_nbp,
278
.get_ownership = brport_get_ownership,
279
};
280
281
static void destroy_nbp(struct net_bridge_port *p)
282
{
283
struct net_device *dev = p->dev;
284
285
p->br = NULL;
286
p->dev = NULL;
287
netdev_put(dev, &p->dev_tracker);
288
289
kobject_put(&p->kobj);
290
}
291
292
static void destroy_nbp_rcu(struct rcu_head *head)
293
{
294
struct net_bridge_port *p =
295
container_of(head, struct net_bridge_port, rcu);
296
destroy_nbp(p);
297
}
298
299
static unsigned get_max_headroom(struct net_bridge *br)
300
{
301
unsigned max_headroom = 0;
302
struct net_bridge_port *p;
303
304
list_for_each_entry(p, &br->port_list, list) {
305
unsigned dev_headroom = netdev_get_fwd_headroom(p->dev);
306
307
if (dev_headroom > max_headroom)
308
max_headroom = dev_headroom;
309
}
310
311
return max_headroom;
312
}
313
314
static void update_headroom(struct net_bridge *br, int new_hr)
315
{
316
struct net_bridge_port *p;
317
318
list_for_each_entry(p, &br->port_list, list)
319
netdev_set_rx_headroom(p->dev, new_hr);
320
321
br->dev->needed_headroom = new_hr;
322
}
323
324
/* Delete port(interface) from bridge is done in two steps.
325
* via RCU. First step, marks device as down. That deletes
326
* all the timers and stops new packets from flowing through.
327
*
328
* Final cleanup doesn't occur until after all CPU's finished
329
* processing packets.
330
*
331
* Protected from multiple admin operations by RTNL mutex
332
*/
333
static void del_nbp(struct net_bridge_port *p)
334
{
335
struct net_bridge *br = p->br;
336
struct net_device *dev = p->dev;
337
338
sysfs_remove_link(br->ifobj, p->dev->name);
339
340
nbp_delete_promisc(p);
341
342
spin_lock_bh(&br->lock);
343
br_stp_disable_port(p);
344
spin_unlock_bh(&br->lock);
345
346
br_mrp_port_del(br, p);
347
br_cfm_port_del(br, p);
348
349
br_ifinfo_notify(RTM_DELLINK, NULL, p);
350
351
list_del_rcu(&p->list);
352
if (netdev_get_fwd_headroom(dev) == br->dev->needed_headroom)
353
update_headroom(br, get_max_headroom(br));
354
netdev_reset_rx_headroom(dev);
355
356
nbp_vlan_flush(p);
357
br_fdb_delete_by_port(br, p, 0, 1);
358
switchdev_deferred_process();
359
nbp_backup_clear(p);
360
361
nbp_update_port_count(br);
362
363
netdev_upper_dev_unlink(dev, br->dev);
364
365
dev->priv_flags &= ~IFF_BRIDGE_PORT;
366
367
netdev_rx_handler_unregister(dev);
368
369
br_multicast_del_port(p);
370
371
kobject_uevent(&p->kobj, KOBJ_REMOVE);
372
kobject_del(&p->kobj);
373
374
br_netpoll_disable(p);
375
376
call_rcu(&p->rcu, destroy_nbp_rcu);
377
}
378
379
/* Delete bridge device */
380
void br_dev_delete(struct net_device *dev, struct list_head *head)
381
{
382
struct net_bridge *br = netdev_priv(dev);
383
struct net_bridge_port *p, *n;
384
385
list_for_each_entry_safe(p, n, &br->port_list, list) {
386
del_nbp(p);
387
}
388
389
br_mst_uninit(br);
390
br_recalculate_neigh_suppress_enabled(br);
391
392
br_fdb_delete_by_port(br, NULL, 0, 1);
393
394
cancel_delayed_work_sync(&br->gc_work);
395
396
br_sysfs_delbr(br->dev);
397
unregister_netdevice_queue(br->dev, head);
398
}
399
400
/* find an available port number */
401
static int find_portno(struct net_bridge *br)
402
{
403
int index;
404
struct net_bridge_port *p;
405
unsigned long *inuse;
406
407
inuse = bitmap_zalloc(BR_MAX_PORTS, GFP_KERNEL);
408
if (!inuse)
409
return -ENOMEM;
410
411
__set_bit(0, inuse); /* zero is reserved */
412
list_for_each_entry(p, &br->port_list, list)
413
__set_bit(p->port_no, inuse);
414
415
index = find_first_zero_bit(inuse, BR_MAX_PORTS);
416
bitmap_free(inuse);
417
418
return (index >= BR_MAX_PORTS) ? -EXFULL : index;
419
}
420
421
/* called with RTNL but without bridge lock */
422
static struct net_bridge_port *new_nbp(struct net_bridge *br,
423
struct net_device *dev)
424
{
425
struct net_bridge_port *p;
426
int index, err;
427
428
index = find_portno(br);
429
if (index < 0)
430
return ERR_PTR(index);
431
432
p = kzalloc(sizeof(*p), GFP_KERNEL);
433
if (p == NULL)
434
return ERR_PTR(-ENOMEM);
435
436
p->br = br;
437
netdev_hold(dev, &p->dev_tracker, GFP_KERNEL);
438
p->dev = dev;
439
p->path_cost = port_cost(dev);
440
p->priority = 0x8000 >> BR_PORT_BITS;
441
p->port_no = index;
442
p->flags = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD;
443
br_init_port(p);
444
br_set_state(p, BR_STATE_DISABLED);
445
br_stp_port_timer_init(p);
446
err = br_multicast_add_port(p);
447
if (err) {
448
netdev_put(dev, &p->dev_tracker);
449
kfree(p);
450
p = ERR_PTR(err);
451
}
452
453
return p;
454
}
455
456
int br_add_bridge(struct net *net, const char *name)
457
{
458
struct net_device *dev;
459
int res;
460
461
dev = alloc_netdev(sizeof(struct net_bridge), name, NET_NAME_UNKNOWN,
462
br_dev_setup);
463
464
if (!dev)
465
return -ENOMEM;
466
467
dev_net_set(dev, net);
468
dev->rtnl_link_ops = &br_link_ops;
469
470
res = register_netdevice(dev);
471
if (res)
472
free_netdev(dev);
473
return res;
474
}
475
476
int br_del_bridge(struct net *net, const char *name)
477
{
478
struct net_device *dev;
479
int ret = 0;
480
481
dev = __dev_get_by_name(net, name);
482
if (dev == NULL)
483
ret = -ENXIO; /* Could not find device */
484
485
else if (!netif_is_bridge_master(dev)) {
486
/* Attempt to delete non bridge device! */
487
ret = -EPERM;
488
}
489
490
else if (dev->flags & IFF_UP) {
491
/* Not shutdown yet. */
492
ret = -EBUSY;
493
}
494
495
else
496
br_dev_delete(dev, NULL);
497
498
return ret;
499
}
500
501
/* MTU of the bridge pseudo-device: ETH_DATA_LEN or the minimum of the ports */
502
static int br_mtu_min(const struct net_bridge *br)
503
{
504
const struct net_bridge_port *p;
505
int ret_mtu = 0;
506
507
list_for_each_entry(p, &br->port_list, list)
508
if (!ret_mtu || ret_mtu > p->dev->mtu)
509
ret_mtu = p->dev->mtu;
510
511
return ret_mtu ? ret_mtu : ETH_DATA_LEN;
512
}
513
514
void br_mtu_auto_adjust(struct net_bridge *br)
515
{
516
ASSERT_RTNL();
517
518
/* if the bridge MTU was manually configured don't mess with it */
519
if (br_opt_get(br, BROPT_MTU_SET_BY_USER))
520
return;
521
522
/* change to the minimum MTU and clear the flag which was set by
523
* the bridge ndo_change_mtu callback
524
*/
525
dev_set_mtu(br->dev, br_mtu_min(br));
526
br_opt_toggle(br, BROPT_MTU_SET_BY_USER, false);
527
}
528
529
/*
530
* Recomputes features using slave's features
531
*/
532
netdev_features_t br_features_recompute(struct net_bridge *br,
533
netdev_features_t features)
534
{
535
struct net_bridge_port *p;
536
netdev_features_t mask;
537
538
if (list_empty(&br->port_list))
539
return features;
540
541
mask = features;
542
features &= ~NETIF_F_ONE_FOR_ALL;
543
544
list_for_each_entry(p, &br->port_list, list) {
545
features = netdev_increment_features(features,
546
p->dev->features, mask);
547
}
548
features = netdev_add_tso_features(features, mask);
549
550
return features;
551
}
552
553
/* called with RTNL */
554
int br_add_if(struct net_bridge *br, struct net_device *dev,
555
struct netlink_ext_ack *extack)
556
{
557
struct net_bridge_port *p;
558
int err = 0;
559
unsigned br_hr, dev_hr;
560
bool changed_addr, fdb_synced = false;
561
562
/* Don't allow bridging non-ethernet like devices. */
563
if ((dev->flags & IFF_LOOPBACK) ||
564
dev->type != ARPHRD_ETHER || dev->addr_len != ETH_ALEN ||
565
!is_valid_ether_addr(dev->dev_addr))
566
return -EINVAL;
567
568
/* No bridging of bridges */
569
if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit) {
570
NL_SET_ERR_MSG(extack,
571
"Can not enslave a bridge to a bridge");
572
return -ELOOP;
573
}
574
575
/* Device has master upper dev */
576
if (netdev_master_upper_dev_get(dev))
577
return -EBUSY;
578
579
/* No bridging devices that dislike that (e.g. wireless) */
580
if (dev->priv_flags & IFF_DONT_BRIDGE) {
581
NL_SET_ERR_MSG(extack,
582
"Device does not allow enslaving to a bridge");
583
return -EOPNOTSUPP;
584
}
585
586
p = new_nbp(br, dev);
587
if (IS_ERR(p))
588
return PTR_ERR(p);
589
590
call_netdevice_notifiers(NETDEV_JOIN, dev);
591
592
err = dev_set_allmulti(dev, 1);
593
if (err) {
594
br_multicast_del_port(p);
595
netdev_put(dev, &p->dev_tracker);
596
kfree(p); /* kobject not yet init'd, manually free */
597
goto err1;
598
}
599
600
err = kobject_init_and_add(&p->kobj, &brport_ktype, &(dev->dev.kobj),
601
SYSFS_BRIDGE_PORT_ATTR);
602
if (err)
603
goto err2;
604
605
err = br_sysfs_addif(p);
606
if (err)
607
goto err2;
608
609
err = br_netpoll_enable(p);
610
if (err)
611
goto err3;
612
613
err = netdev_rx_handler_register(dev, br_get_rx_handler(dev), p);
614
if (err)
615
goto err4;
616
617
dev->priv_flags |= IFF_BRIDGE_PORT;
618
619
err = netdev_master_upper_dev_link(dev, br->dev, NULL, NULL, extack);
620
if (err)
621
goto err5;
622
623
dev_disable_lro(dev);
624
625
list_add_rcu(&p->list, &br->port_list);
626
627
nbp_update_port_count(br);
628
if (!br_promisc_port(p) && (p->dev->priv_flags & IFF_UNICAST_FLT)) {
629
/* When updating the port count we also update all ports'
630
* promiscuous mode.
631
* A port leaving promiscuous mode normally gets the bridge's
632
* fdb synced to the unicast filter (if supported), however,
633
* `br_port_clear_promisc` does not distinguish between
634
* non-promiscuous ports and *new* ports, so we need to
635
* sync explicitly here.
636
*/
637
fdb_synced = br_fdb_sync_static(br, p) == 0;
638
if (!fdb_synced)
639
netdev_err(dev, "failed to sync bridge static fdb addresses to this port\n");
640
}
641
642
br_hr = br->dev->needed_headroom;
643
dev_hr = netdev_get_fwd_headroom(dev);
644
if (br_hr < dev_hr)
645
update_headroom(br, dev_hr);
646
else
647
netdev_set_rx_headroom(dev, br_hr);
648
649
if (br_fdb_add_local(br, p, dev->dev_addr, 0))
650
netdev_err(dev, "failed insert local address bridge forwarding table\n");
651
652
if (br->dev->addr_assign_type != NET_ADDR_SET) {
653
/* Ask for permission to use this MAC address now, even if we
654
* don't end up choosing it below.
655
*/
656
err = netif_pre_changeaddr_notify(br->dev, dev->dev_addr,
657
extack);
658
if (err)
659
goto err6;
660
}
661
662
err = nbp_vlan_init(p, extack);
663
if (err) {
664
netdev_err(dev, "failed to initialize vlan filtering on this port\n");
665
goto err6;
666
}
667
668
spin_lock_bh(&br->lock);
669
changed_addr = br_stp_recalculate_bridge_id(br);
670
671
if (netif_running(dev) && netif_oper_up(dev) &&
672
(br->dev->flags & IFF_UP))
673
br_stp_enable_port(p);
674
spin_unlock_bh(&br->lock);
675
676
br_ifinfo_notify(RTM_NEWLINK, NULL, p);
677
678
if (changed_addr)
679
call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
680
681
br_mtu_auto_adjust(br);
682
683
netdev_compute_master_upper_features(br->dev, false);
684
685
kobject_uevent(&p->kobj, KOBJ_ADD);
686
687
return 0;
688
689
err6:
690
if (fdb_synced)
691
br_fdb_unsync_static(br, p);
692
list_del_rcu(&p->list);
693
br_fdb_delete_by_port(br, p, 0, 1);
694
nbp_update_port_count(br);
695
netdev_upper_dev_unlink(dev, br->dev);
696
err5:
697
dev->priv_flags &= ~IFF_BRIDGE_PORT;
698
netdev_rx_handler_unregister(dev);
699
err4:
700
br_netpoll_disable(p);
701
err3:
702
sysfs_remove_link(br->ifobj, p->dev->name);
703
err2:
704
br_multicast_del_port(p);
705
netdev_put(dev, &p->dev_tracker);
706
kobject_put(&p->kobj);
707
dev_set_allmulti(dev, -1);
708
err1:
709
return err;
710
}
711
712
/* called with RTNL */
713
int br_del_if(struct net_bridge *br, struct net_device *dev)
714
{
715
struct net_bridge_port *p;
716
bool changed_addr;
717
718
p = br_port_get_rtnl(dev);
719
if (!p || p->br != br)
720
return -EINVAL;
721
722
/* Since more than one interface can be attached to a bridge,
723
* there still maybe an alternate path for netconsole to use;
724
* therefore there is no reason for a NETDEV_RELEASE event.
725
*/
726
del_nbp(p);
727
728
br_mtu_auto_adjust(br);
729
730
spin_lock_bh(&br->lock);
731
changed_addr = br_stp_recalculate_bridge_id(br);
732
spin_unlock_bh(&br->lock);
733
734
if (changed_addr)
735
call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
736
737
netdev_compute_master_upper_features(br->dev, false);
738
739
return 0;
740
}
741
742
void br_port_flags_change(struct net_bridge_port *p, unsigned long mask)
743
{
744
struct net_bridge *br = p->br;
745
746
if (mask & BR_AUTO_MASK)
747
nbp_update_port_count(br);
748
749
if (mask & (BR_NEIGH_SUPPRESS | BR_NEIGH_VLAN_SUPPRESS))
750
br_recalculate_neigh_suppress_enabled(br);
751
}
752
753
bool br_port_flag_is_set(const struct net_device *dev, unsigned long flag)
754
{
755
struct net_bridge_port *p;
756
757
p = br_port_get_rtnl_rcu(dev);
758
if (!p)
759
return false;
760
761
return p->flags & flag;
762
}
763
EXPORT_SYMBOL_GPL(br_port_flag_is_set);
764
765