Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/net/openvswitch/datapath.c
170831 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* Copyright (c) 2007-2014 Nicira, Inc.
4
*/
5
6
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7
8
#include <linux/init.h>
9
#include <linux/module.h>
10
#include <linux/if_arp.h>
11
#include <linux/if_vlan.h>
12
#include <linux/in.h>
13
#include <linux/ip.h>
14
#include <linux/jhash.h>
15
#include <linux/delay.h>
16
#include <linux/time.h>
17
#include <linux/etherdevice.h>
18
#include <linux/kernel.h>
19
#include <linux/kthread.h>
20
#include <linux/mutex.h>
21
#include <linux/percpu.h>
22
#include <linux/rcupdate.h>
23
#include <linux/tcp.h>
24
#include <linux/udp.h>
25
#include <linux/ethtool.h>
26
#include <linux/wait.h>
27
#include <asm/div64.h>
28
#include <linux/highmem.h>
29
#include <linux/netfilter_bridge.h>
30
#include <linux/netfilter_ipv4.h>
31
#include <linux/inetdevice.h>
32
#include <linux/list.h>
33
#include <linux/openvswitch.h>
34
#include <linux/rculist.h>
35
#include <linux/dmi.h>
36
#include <net/genetlink.h>
37
#include <net/gso.h>
38
#include <net/net_namespace.h>
39
#include <net/netns/generic.h>
40
#include <net/pkt_cls.h>
41
42
#include "datapath.h"
43
#include "drop.h"
44
#include "flow.h"
45
#include "flow_table.h"
46
#include "flow_netlink.h"
47
#include "meter.h"
48
#include "openvswitch_trace.h"
49
#include "vport-internal_dev.h"
50
#include "vport-netdev.h"
51
52
unsigned int ovs_net_id __read_mostly;
53
54
static struct genl_family dp_packet_genl_family;
55
static struct genl_family dp_flow_genl_family;
56
static struct genl_family dp_datapath_genl_family;
57
58
static const struct nla_policy flow_policy[];
59
60
static const struct genl_multicast_group ovs_dp_flow_multicast_group = {
61
.name = OVS_FLOW_MCGROUP,
62
};
63
64
static const struct genl_multicast_group ovs_dp_datapath_multicast_group = {
65
.name = OVS_DATAPATH_MCGROUP,
66
};
67
68
static const struct genl_multicast_group ovs_dp_vport_multicast_group = {
69
.name = OVS_VPORT_MCGROUP,
70
};
71
72
/* Check if need to build a reply message.
73
* OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */
74
static bool ovs_must_notify(struct genl_family *family, struct genl_info *info,
75
unsigned int group)
76
{
77
return info->nlhdr->nlmsg_flags & NLM_F_ECHO ||
78
genl_has_listeners(family, genl_info_net(info), group);
79
}
80
81
static void ovs_notify(struct genl_family *family,
82
struct sk_buff *skb, struct genl_info *info)
83
{
84
genl_notify(family, skb, info, 0, GFP_KERNEL);
85
}
86
87
/**
88
* DOC: Locking:
89
*
90
* All writes e.g. Writes to device state (add/remove datapath, port, set
91
* operations on vports, etc.), Writes to other state (flow table
92
* modifications, set miscellaneous datapath parameters, etc.) are protected
93
* by ovs_lock.
94
*
95
* Reads are protected by RCU.
96
*
97
* There are a few special cases (mostly stats) that have their own
98
* synchronization but they nest under all of above and don't interact with
99
* each other.
100
*
101
* The RTNL lock nests inside ovs_mutex.
102
*/
103
104
static DEFINE_MUTEX(ovs_mutex);
105
106
void ovs_lock(void)
107
{
108
mutex_lock(&ovs_mutex);
109
}
110
111
void ovs_unlock(void)
112
{
113
mutex_unlock(&ovs_mutex);
114
}
115
116
#ifdef CONFIG_LOCKDEP
117
int lockdep_ovsl_is_held(void)
118
{
119
if (debug_locks)
120
return lockdep_is_held(&ovs_mutex);
121
else
122
return 1;
123
}
124
#endif
125
126
static struct vport *new_vport(const struct vport_parms *);
127
static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
128
const struct sw_flow_key *,
129
const struct dp_upcall_info *,
130
uint32_t cutlen);
131
static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
132
const struct sw_flow_key *,
133
const struct dp_upcall_info *,
134
uint32_t cutlen);
135
136
static void ovs_dp_masks_rebalance(struct work_struct *work);
137
138
static int ovs_dp_set_upcall_portids(struct datapath *, const struct nlattr *);
139
140
/* Must be called with rcu_read_lock or ovs_mutex. */
141
const char *ovs_dp_name(const struct datapath *dp)
142
{
143
struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
144
return ovs_vport_name(vport);
145
}
146
147
static int get_dpifindex(const struct datapath *dp)
148
{
149
struct vport *local;
150
int ifindex;
151
152
rcu_read_lock();
153
154
local = ovs_vport_rcu(dp, OVSP_LOCAL);
155
if (local)
156
ifindex = local->dev->ifindex;
157
else
158
ifindex = 0;
159
160
rcu_read_unlock();
161
162
return ifindex;
163
}
164
165
static void destroy_dp_rcu(struct rcu_head *rcu)
166
{
167
struct datapath *dp = container_of(rcu, struct datapath, rcu);
168
169
ovs_flow_tbl_destroy(&dp->table);
170
free_percpu(dp->stats_percpu);
171
kfree(dp->ports);
172
ovs_meters_exit(dp);
173
kfree(rcu_dereference_raw(dp->upcall_portids));
174
kfree(dp);
175
}
176
177
static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
178
u16 port_no)
179
{
180
return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
181
}
182
183
/* Called with ovs_mutex or RCU read lock. */
184
struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
185
{
186
struct vport *vport;
187
struct hlist_head *head;
188
189
head = vport_hash_bucket(dp, port_no);
190
hlist_for_each_entry_rcu(vport, head, dp_hash_node,
191
lockdep_ovsl_is_held()) {
192
if (vport->port_no == port_no)
193
return vport;
194
}
195
return NULL;
196
}
197
198
/* Called with ovs_mutex. */
199
static struct vport *new_vport(const struct vport_parms *parms)
200
{
201
struct vport *vport;
202
203
vport = ovs_vport_add(parms);
204
if (!IS_ERR(vport)) {
205
struct datapath *dp = parms->dp;
206
struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
207
208
hlist_add_head_rcu(&vport->dp_hash_node, head);
209
}
210
return vport;
211
}
212
213
static void ovs_vport_update_upcall_stats(struct sk_buff *skb,
214
const struct dp_upcall_info *upcall_info,
215
bool upcall_result)
216
{
217
struct vport *p = OVS_CB(skb)->input_vport;
218
struct vport_upcall_stats_percpu *stats;
219
220
if (upcall_info->cmd != OVS_PACKET_CMD_MISS &&
221
upcall_info->cmd != OVS_PACKET_CMD_ACTION)
222
return;
223
224
stats = this_cpu_ptr(p->upcall_stats);
225
u64_stats_update_begin(&stats->syncp);
226
if (upcall_result)
227
u64_stats_inc(&stats->n_success);
228
else
229
u64_stats_inc(&stats->n_fail);
230
u64_stats_update_end(&stats->syncp);
231
}
232
233
void ovs_dp_detach_port(struct vport *p)
234
{
235
ASSERT_OVSL();
236
237
/* First drop references to device. */
238
hlist_del_rcu(&p->dp_hash_node);
239
240
/* Then destroy it. */
241
ovs_vport_del(p);
242
}
243
244
/* Must be called with rcu_read_lock. */
245
void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
246
{
247
struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(ovs_pcpu_storage);
248
const struct vport *p = OVS_CB(skb)->input_vport;
249
struct datapath *dp = p->dp;
250
struct sw_flow *flow;
251
struct sw_flow_actions *sf_acts;
252
struct dp_stats_percpu *stats;
253
bool ovs_pcpu_locked = false;
254
u64 *stats_counter;
255
u32 n_mask_hit;
256
u32 n_cache_hit;
257
int error;
258
259
stats = this_cpu_ptr(dp->stats_percpu);
260
261
/* Look up flow. */
262
flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb),
263
&n_mask_hit, &n_cache_hit);
264
if (unlikely(!flow)) {
265
struct dp_upcall_info upcall;
266
267
memset(&upcall, 0, sizeof(upcall));
268
upcall.cmd = OVS_PACKET_CMD_MISS;
269
270
if (OVS_CB(skb)->upcall_pid)
271
upcall.portid = OVS_CB(skb)->upcall_pid;
272
else if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU)
273
upcall.portid =
274
ovs_dp_get_upcall_portid(dp, smp_processor_id());
275
else
276
upcall.portid = ovs_vport_find_upcall_portid(p, skb);
277
278
upcall.mru = OVS_CB(skb)->mru;
279
error = ovs_dp_upcall(dp, skb, key, &upcall, 0);
280
switch (error) {
281
case 0:
282
case -EAGAIN:
283
case -ERESTARTSYS:
284
case -EINTR:
285
consume_skb(skb);
286
break;
287
default:
288
kfree_skb(skb);
289
break;
290
}
291
stats_counter = &stats->n_missed;
292
goto out;
293
}
294
295
ovs_flow_stats_update(flow, key->tp.flags, skb);
296
sf_acts = rcu_dereference(flow->sf_acts);
297
/* This path can be invoked recursively: Use the current task to
298
* identify recursive invocation - the lock must be acquired only once.
299
* Even with disabled bottom halves this can be preempted on PREEMPT_RT.
300
* Limit the locking to RT to avoid assigning `owner' if it can be
301
* avoided.
302
*/
303
if (IS_ENABLED(CONFIG_PREEMPT_RT) && ovs_pcpu->owner != current) {
304
local_lock_nested_bh(&ovs_pcpu_storage->bh_lock);
305
ovs_pcpu->owner = current;
306
ovs_pcpu_locked = true;
307
}
308
309
error = ovs_execute_actions(dp, skb, sf_acts, key);
310
if (unlikely(error))
311
net_dbg_ratelimited("ovs: action execution error on datapath %s: %d\n",
312
ovs_dp_name(dp), error);
313
if (ovs_pcpu_locked) {
314
ovs_pcpu->owner = NULL;
315
local_unlock_nested_bh(&ovs_pcpu_storage->bh_lock);
316
}
317
318
stats_counter = &stats->n_hit;
319
320
out:
321
/* Update datapath statistics. */
322
u64_stats_update_begin(&stats->syncp);
323
(*stats_counter)++;
324
stats->n_mask_hit += n_mask_hit;
325
stats->n_cache_hit += n_cache_hit;
326
u64_stats_update_end(&stats->syncp);
327
}
328
329
int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
330
const struct sw_flow_key *key,
331
const struct dp_upcall_info *upcall_info,
332
uint32_t cutlen)
333
{
334
struct dp_stats_percpu *stats;
335
int err;
336
337
if (trace_ovs_dp_upcall_enabled())
338
trace_ovs_dp_upcall(dp, skb, key, upcall_info);
339
340
if (upcall_info->portid == 0) {
341
err = -ENOTCONN;
342
goto err;
343
}
344
345
if (!skb_is_gso(skb))
346
err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
347
else
348
err = queue_gso_packets(dp, skb, key, upcall_info, cutlen);
349
350
ovs_vport_update_upcall_stats(skb, upcall_info, !err);
351
if (err)
352
goto err;
353
354
return 0;
355
356
err:
357
stats = this_cpu_ptr(dp->stats_percpu);
358
359
u64_stats_update_begin(&stats->syncp);
360
stats->n_lost++;
361
u64_stats_update_end(&stats->syncp);
362
363
return err;
364
}
365
366
static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
367
const struct sw_flow_key *key,
368
const struct dp_upcall_info *upcall_info,
369
uint32_t cutlen)
370
{
371
unsigned int gso_type = skb_shinfo(skb)->gso_type;
372
struct sw_flow_key later_key;
373
struct sk_buff *segs, *nskb;
374
int err;
375
376
BUILD_BUG_ON(sizeof(*OVS_CB(skb)) > SKB_GSO_CB_OFFSET);
377
segs = __skb_gso_segment(skb, NETIF_F_SG, false);
378
if (IS_ERR(segs))
379
return PTR_ERR(segs);
380
if (segs == NULL)
381
return -EINVAL;
382
383
if (gso_type & SKB_GSO_UDP) {
384
/* The initial flow key extracted by ovs_flow_key_extract()
385
* in this case is for a first fragment, so we need to
386
* properly mark later fragments.
387
*/
388
later_key = *key;
389
later_key.ip.frag = OVS_FRAG_TYPE_LATER;
390
}
391
392
/* Queue all of the segments. */
393
skb_list_walk_safe(segs, skb, nskb) {
394
if (gso_type & SKB_GSO_UDP && skb != segs)
395
key = &later_key;
396
397
err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
398
if (err)
399
break;
400
401
}
402
403
/* Free all of the segments. */
404
skb_list_walk_safe(segs, skb, nskb) {
405
if (err)
406
kfree_skb(skb);
407
else
408
consume_skb(skb);
409
}
410
return err;
411
}
412
413
static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
414
unsigned int hdrlen, int actions_attrlen)
415
{
416
size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
417
+ nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
418
+ nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */
419
+ nla_total_size(sizeof(unsigned int)) /* OVS_PACKET_ATTR_LEN */
420
+ nla_total_size(sizeof(u64)); /* OVS_PACKET_ATTR_HASH */
421
422
/* OVS_PACKET_ATTR_USERDATA */
423
if (upcall_info->userdata)
424
size += NLA_ALIGN(upcall_info->userdata->nla_len);
425
426
/* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
427
if (upcall_info->egress_tun_info)
428
size += nla_total_size(ovs_tun_key_attr_size());
429
430
/* OVS_PACKET_ATTR_ACTIONS */
431
if (upcall_info->actions_len)
432
size += nla_total_size(actions_attrlen);
433
434
/* OVS_PACKET_ATTR_MRU */
435
if (upcall_info->mru)
436
size += nla_total_size(sizeof(upcall_info->mru));
437
438
return size;
439
}
440
441
static void pad_packet(struct datapath *dp, struct sk_buff *skb)
442
{
443
if (!(dp->user_features & OVS_DP_F_UNALIGNED)) {
444
size_t plen = NLA_ALIGN(skb->len) - skb->len;
445
446
if (plen > 0)
447
skb_put_zero(skb, plen);
448
}
449
}
450
451
static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
452
const struct sw_flow_key *key,
453
const struct dp_upcall_info *upcall_info,
454
uint32_t cutlen)
455
{
456
struct ovs_header *upcall;
457
struct sk_buff *nskb = NULL;
458
struct sk_buff *user_skb = NULL; /* to be queued to userspace */
459
struct nlattr *nla;
460
size_t len;
461
unsigned int hlen;
462
int err, dp_ifindex;
463
u64 hash;
464
465
dp_ifindex = get_dpifindex(dp);
466
if (!dp_ifindex)
467
return -ENODEV;
468
469
if (skb_vlan_tag_present(skb)) {
470
nskb = skb_clone(skb, GFP_ATOMIC);
471
if (!nskb)
472
return -ENOMEM;
473
474
nskb = __vlan_hwaccel_push_inside(nskb);
475
if (!nskb)
476
return -ENOMEM;
477
478
skb = nskb;
479
}
480
481
if (nla_attr_size(skb->len) > USHRT_MAX) {
482
err = -EFBIG;
483
goto out;
484
}
485
486
/* Complete checksum if needed */
487
if (skb->ip_summed == CHECKSUM_PARTIAL &&
488
(err = skb_csum_hwoffload_help(skb, 0)))
489
goto out;
490
491
/* Older versions of OVS user space enforce alignment of the last
492
* Netlink attribute to NLA_ALIGNTO which would require extensive
493
* padding logic. Only perform zerocopy if padding is not required.
494
*/
495
if (dp->user_features & OVS_DP_F_UNALIGNED)
496
hlen = skb_zerocopy_headlen(skb);
497
else
498
hlen = skb->len;
499
500
len = upcall_msg_size(upcall_info, hlen - cutlen,
501
OVS_CB(skb)->acts_origlen);
502
user_skb = genlmsg_new(len, GFP_ATOMIC);
503
if (!user_skb) {
504
err = -ENOMEM;
505
goto out;
506
}
507
508
upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
509
0, upcall_info->cmd);
510
if (!upcall) {
511
err = -EINVAL;
512
goto out;
513
}
514
upcall->dp_ifindex = dp_ifindex;
515
516
err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);
517
if (err)
518
goto out;
519
520
if (upcall_info->userdata)
521
__nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
522
nla_len(upcall_info->userdata),
523
nla_data(upcall_info->userdata));
524
525
if (upcall_info->egress_tun_info) {
526
nla = nla_nest_start_noflag(user_skb,
527
OVS_PACKET_ATTR_EGRESS_TUN_KEY);
528
if (!nla) {
529
err = -EMSGSIZE;
530
goto out;
531
}
532
err = ovs_nla_put_tunnel_info(user_skb,
533
upcall_info->egress_tun_info);
534
if (err)
535
goto out;
536
537
nla_nest_end(user_skb, nla);
538
}
539
540
if (upcall_info->actions_len) {
541
nla = nla_nest_start_noflag(user_skb, OVS_PACKET_ATTR_ACTIONS);
542
if (!nla) {
543
err = -EMSGSIZE;
544
goto out;
545
}
546
err = ovs_nla_put_actions(upcall_info->actions,
547
upcall_info->actions_len,
548
user_skb);
549
if (!err)
550
nla_nest_end(user_skb, nla);
551
else
552
nla_nest_cancel(user_skb, nla);
553
}
554
555
/* Add OVS_PACKET_ATTR_MRU */
556
if (upcall_info->mru &&
557
nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU, upcall_info->mru)) {
558
err = -ENOBUFS;
559
goto out;
560
}
561
562
/* Add OVS_PACKET_ATTR_LEN when packet is truncated */
563
if (cutlen > 0 &&
564
nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN, skb->len)) {
565
err = -ENOBUFS;
566
goto out;
567
}
568
569
/* Add OVS_PACKET_ATTR_HASH */
570
hash = skb_get_hash_raw(skb);
571
if (skb->sw_hash)
572
hash |= OVS_PACKET_HASH_SW_BIT;
573
574
if (skb->l4_hash)
575
hash |= OVS_PACKET_HASH_L4_BIT;
576
577
if (nla_put(user_skb, OVS_PACKET_ATTR_HASH, sizeof (u64), &hash)) {
578
err = -ENOBUFS;
579
goto out;
580
}
581
582
/* Only reserve room for attribute header, packet data is added
583
* in skb_zerocopy() */
584
if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
585
err = -ENOBUFS;
586
goto out;
587
}
588
nla->nla_len = nla_attr_size(skb->len - cutlen);
589
590
err = skb_zerocopy(user_skb, skb, skb->len - cutlen, hlen);
591
if (err)
592
goto out;
593
594
/* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
595
pad_packet(dp, user_skb);
596
597
((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
598
599
err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);
600
user_skb = NULL;
601
out:
602
if (err)
603
skb_tx_error(skb);
604
consume_skb(user_skb);
605
consume_skb(nskb);
606
607
return err;
608
}
609
610
static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
611
{
612
struct ovs_header *ovs_header = genl_info_userhdr(info);
613
struct net *net = sock_net(skb->sk);
614
struct nlattr **a = info->attrs;
615
struct sw_flow_actions *acts;
616
struct sk_buff *packet;
617
struct sw_flow *flow;
618
struct sw_flow_actions *sf_acts;
619
struct datapath *dp;
620
struct vport *input_vport;
621
u16 mru = 0;
622
u64 hash;
623
int len;
624
int err;
625
bool log = !a[OVS_PACKET_ATTR_PROBE];
626
627
err = -EINVAL;
628
if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
629
!a[OVS_PACKET_ATTR_ACTIONS])
630
goto err;
631
632
len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
633
packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
634
err = -ENOMEM;
635
if (!packet)
636
goto err;
637
skb_reserve(packet, NET_IP_ALIGN);
638
639
nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
640
641
/* Set packet's mru */
642
if (a[OVS_PACKET_ATTR_MRU]) {
643
mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]);
644
packet->ignore_df = 1;
645
}
646
OVS_CB(packet)->mru = mru;
647
648
if (a[OVS_PACKET_ATTR_HASH]) {
649
hash = nla_get_u64(a[OVS_PACKET_ATTR_HASH]);
650
651
__skb_set_hash(packet, hash & 0xFFFFFFFFULL,
652
!!(hash & OVS_PACKET_HASH_SW_BIT),
653
!!(hash & OVS_PACKET_HASH_L4_BIT));
654
}
655
656
OVS_CB(packet)->upcall_pid =
657
nla_get_u32_default(a[OVS_PACKET_ATTR_UPCALL_PID], 0);
658
659
/* Build an sw_flow for sending this packet. */
660
flow = ovs_flow_alloc();
661
err = PTR_ERR(flow);
662
if (IS_ERR(flow))
663
goto err_kfree_skb;
664
665
err = ovs_flow_key_extract_userspace(net, a[OVS_PACKET_ATTR_KEY],
666
packet, &flow->key, log);
667
if (err)
668
goto err_flow_free;
669
670
err = ovs_nla_copy_actions(net, a[OVS_PACKET_ATTR_ACTIONS],
671
&flow->key, &acts, log);
672
if (err)
673
goto err_flow_free;
674
675
rcu_assign_pointer(flow->sf_acts, acts);
676
packet->priority = flow->key.phy.priority;
677
packet->mark = flow->key.phy.skb_mark;
678
679
rcu_read_lock();
680
dp = get_dp_rcu(net, ovs_header->dp_ifindex);
681
err = -ENODEV;
682
if (!dp)
683
goto err_unlock;
684
685
input_vport = ovs_vport_rcu(dp, flow->key.phy.in_port);
686
if (!input_vport)
687
input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
688
689
if (!input_vport)
690
goto err_unlock;
691
692
packet->dev = input_vport->dev;
693
OVS_CB(packet)->input_vport = input_vport;
694
sf_acts = rcu_dereference(flow->sf_acts);
695
696
local_bh_disable();
697
local_lock_nested_bh(&ovs_pcpu_storage->bh_lock);
698
if (IS_ENABLED(CONFIG_PREEMPT_RT))
699
this_cpu_write(ovs_pcpu_storage->owner, current);
700
err = ovs_execute_actions(dp, packet, sf_acts, &flow->key);
701
if (IS_ENABLED(CONFIG_PREEMPT_RT))
702
this_cpu_write(ovs_pcpu_storage->owner, NULL);
703
local_unlock_nested_bh(&ovs_pcpu_storage->bh_lock);
704
local_bh_enable();
705
rcu_read_unlock();
706
707
ovs_flow_free(flow, false);
708
return err;
709
710
err_unlock:
711
rcu_read_unlock();
712
err_flow_free:
713
ovs_flow_free(flow, false);
714
err_kfree_skb:
715
kfree_skb(packet);
716
err:
717
return err;
718
}
719
720
static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
721
[OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
722
[OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
723
[OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
724
[OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
725
[OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 },
726
[OVS_PACKET_ATTR_HASH] = { .type = NLA_U64 },
727
[OVS_PACKET_ATTR_UPCALL_PID] = { .type = NLA_U32 },
728
};
729
730
static const struct genl_small_ops dp_packet_genl_ops[] = {
731
{ .cmd = OVS_PACKET_CMD_EXECUTE,
732
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
733
.flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
734
.doit = ovs_packet_cmd_execute
735
}
736
};
737
738
static struct genl_family dp_packet_genl_family __ro_after_init = {
739
.hdrsize = sizeof(struct ovs_header),
740
.name = OVS_PACKET_FAMILY,
741
.version = OVS_PACKET_VERSION,
742
.maxattr = OVS_PACKET_ATTR_MAX,
743
.policy = packet_policy,
744
.netnsok = true,
745
.parallel_ops = true,
746
.small_ops = dp_packet_genl_ops,
747
.n_small_ops = ARRAY_SIZE(dp_packet_genl_ops),
748
.resv_start_op = OVS_PACKET_CMD_EXECUTE + 1,
749
.module = THIS_MODULE,
750
};
751
752
static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
753
struct ovs_dp_megaflow_stats *mega_stats)
754
{
755
int i;
756
757
memset(mega_stats, 0, sizeof(*mega_stats));
758
759
stats->n_flows = ovs_flow_tbl_count(&dp->table);
760
mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table);
761
762
stats->n_hit = stats->n_missed = stats->n_lost = 0;
763
764
for_each_possible_cpu(i) {
765
const struct dp_stats_percpu *percpu_stats;
766
struct dp_stats_percpu local_stats;
767
unsigned int start;
768
769
percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
770
771
do {
772
start = u64_stats_fetch_begin(&percpu_stats->syncp);
773
local_stats = *percpu_stats;
774
} while (u64_stats_fetch_retry(&percpu_stats->syncp, start));
775
776
stats->n_hit += local_stats.n_hit;
777
stats->n_missed += local_stats.n_missed;
778
stats->n_lost += local_stats.n_lost;
779
mega_stats->n_mask_hit += local_stats.n_mask_hit;
780
mega_stats->n_cache_hit += local_stats.n_cache_hit;
781
}
782
}
783
784
static bool should_fill_key(const struct sw_flow_id *sfid, uint32_t ufid_flags)
785
{
786
return ovs_identifier_is_ufid(sfid) &&
787
!(ufid_flags & OVS_UFID_F_OMIT_KEY);
788
}
789
790
static bool should_fill_mask(uint32_t ufid_flags)
791
{
792
return !(ufid_flags & OVS_UFID_F_OMIT_MASK);
793
}
794
795
static bool should_fill_actions(uint32_t ufid_flags)
796
{
797
return !(ufid_flags & OVS_UFID_F_OMIT_ACTIONS);
798
}
799
800
static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
801
const struct sw_flow_id *sfid,
802
uint32_t ufid_flags)
803
{
804
size_t len = NLMSG_ALIGN(sizeof(struct ovs_header));
805
806
/* OVS_FLOW_ATTR_UFID, or unmasked flow key as fallback
807
* see ovs_nla_put_identifier()
808
*/
809
if (sfid && ovs_identifier_is_ufid(sfid))
810
len += nla_total_size(sfid->ufid_len);
811
else
812
len += nla_total_size(ovs_key_attr_size());
813
814
/* OVS_FLOW_ATTR_KEY */
815
if (!sfid || should_fill_key(sfid, ufid_flags))
816
len += nla_total_size(ovs_key_attr_size());
817
818
/* OVS_FLOW_ATTR_MASK */
819
if (should_fill_mask(ufid_flags))
820
len += nla_total_size(ovs_key_attr_size());
821
822
/* OVS_FLOW_ATTR_ACTIONS */
823
if (should_fill_actions(ufid_flags))
824
len += nla_total_size(acts->orig_len);
825
826
return len
827
+ nla_total_size_64bit(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
828
+ nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
829
+ nla_total_size_64bit(8); /* OVS_FLOW_ATTR_USED */
830
}
831
832
/* Called with ovs_mutex or RCU read lock. */
833
static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
834
struct sk_buff *skb)
835
{
836
struct ovs_flow_stats stats;
837
__be16 tcp_flags;
838
unsigned long used;
839
840
ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
841
842
if (used &&
843
nla_put_u64_64bit(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used),
844
OVS_FLOW_ATTR_PAD))
845
return -EMSGSIZE;
846
847
if (stats.n_packets &&
848
nla_put_64bit(skb, OVS_FLOW_ATTR_STATS,
849
sizeof(struct ovs_flow_stats), &stats,
850
OVS_FLOW_ATTR_PAD))
851
return -EMSGSIZE;
852
853
if ((u8)ntohs(tcp_flags) &&
854
nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags)))
855
return -EMSGSIZE;
856
857
return 0;
858
}
859
860
/* Called with ovs_mutex or RCU read lock. */
861
static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
862
struct sk_buff *skb, int skb_orig_len)
863
{
864
struct nlattr *start;
865
int err;
866
867
/* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
868
* this is the first flow to be dumped into 'skb'. This is unusual for
869
* Netlink but individual action lists can be longer than
870
* NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
871
* The userspace caller can always fetch the actions separately if it
872
* really wants them. (Most userspace callers in fact don't care.)
873
*
874
* This can only fail for dump operations because the skb is always
875
* properly sized for single flows.
876
*/
877
start = nla_nest_start_noflag(skb, OVS_FLOW_ATTR_ACTIONS);
878
if (start) {
879
const struct sw_flow_actions *sf_acts;
880
881
sf_acts = rcu_dereference_ovsl(flow->sf_acts);
882
err = ovs_nla_put_actions(sf_acts->actions,
883
sf_acts->actions_len, skb);
884
885
if (!err)
886
nla_nest_end(skb, start);
887
else {
888
if (skb_orig_len)
889
return err;
890
891
nla_nest_cancel(skb, start);
892
}
893
} else if (skb_orig_len) {
894
return -EMSGSIZE;
895
}
896
897
return 0;
898
}
899
900
/* Called with ovs_mutex or RCU read lock. */
901
static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
902
struct sk_buff *skb, u32 portid,
903
u32 seq, u32 flags, u8 cmd, u32 ufid_flags)
904
{
905
const int skb_orig_len = skb->len;
906
struct ovs_header *ovs_header;
907
int err;
908
909
ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family,
910
flags, cmd);
911
if (!ovs_header)
912
return -EMSGSIZE;
913
914
ovs_header->dp_ifindex = dp_ifindex;
915
916
err = ovs_nla_put_identifier(flow, skb);
917
if (err)
918
goto error;
919
920
if (should_fill_key(&flow->id, ufid_flags)) {
921
err = ovs_nla_put_masked_key(flow, skb);
922
if (err)
923
goto error;
924
}
925
926
if (should_fill_mask(ufid_flags)) {
927
err = ovs_nla_put_mask(flow, skb);
928
if (err)
929
goto error;
930
}
931
932
err = ovs_flow_cmd_fill_stats(flow, skb);
933
if (err)
934
goto error;
935
936
if (should_fill_actions(ufid_flags)) {
937
err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
938
if (err)
939
goto error;
940
}
941
942
genlmsg_end(skb, ovs_header);
943
return 0;
944
945
error:
946
genlmsg_cancel(skb, ovs_header);
947
return err;
948
}
949
950
/* May not be called with RCU read lock. */
951
static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts,
952
const struct sw_flow_id *sfid,
953
struct genl_info *info,
954
bool always,
955
uint32_t ufid_flags)
956
{
957
struct sk_buff *skb;
958
size_t len;
959
960
if (!always && !ovs_must_notify(&dp_flow_genl_family, info, 0))
961
return NULL;
962
963
len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags);
964
skb = genlmsg_new(len, GFP_KERNEL);
965
if (!skb)
966
return ERR_PTR(-ENOMEM);
967
968
return skb;
969
}
970
971
/* Called with ovs_mutex. */
972
static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
973
int dp_ifindex,
974
struct genl_info *info, u8 cmd,
975
bool always, u32 ufid_flags)
976
{
977
struct sk_buff *skb;
978
int retval;
979
980
skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts),
981
&flow->id, info, always, ufid_flags);
982
if (IS_ERR_OR_NULL(skb))
983
return skb;
984
985
retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
986
info->snd_portid, info->snd_seq, 0,
987
cmd, ufid_flags);
988
if (WARN_ON_ONCE(retval < 0)) {
989
kfree_skb(skb);
990
skb = ERR_PTR(retval);
991
}
992
return skb;
993
}
994
995
static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
996
{
997
struct net *net = sock_net(skb->sk);
998
struct nlattr **a = info->attrs;
999
struct ovs_header *ovs_header = genl_info_userhdr(info);
1000
struct sw_flow *flow = NULL, *new_flow;
1001
struct sw_flow_mask mask;
1002
struct sk_buff *reply;
1003
struct datapath *dp;
1004
struct sw_flow_key *key;
1005
struct sw_flow_actions *acts;
1006
struct sw_flow_match match;
1007
u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1008
int error;
1009
bool log = !a[OVS_FLOW_ATTR_PROBE];
1010
1011
/* Must have key and actions. */
1012
error = -EINVAL;
1013
if (!a[OVS_FLOW_ATTR_KEY]) {
1014
OVS_NLERR(log, "Flow key attr not present in new flow.");
1015
goto error;
1016
}
1017
if (!a[OVS_FLOW_ATTR_ACTIONS]) {
1018
OVS_NLERR(log, "Flow actions attr not present in new flow.");
1019
goto error;
1020
}
1021
1022
/* Most of the time we need to allocate a new flow, do it before
1023
* locking.
1024
*/
1025
new_flow = ovs_flow_alloc();
1026
if (IS_ERR(new_flow)) {
1027
error = PTR_ERR(new_flow);
1028
goto error;
1029
}
1030
1031
/* Extract key. */
1032
key = kzalloc_obj(*key);
1033
if (!key) {
1034
error = -ENOMEM;
1035
goto err_kfree_flow;
1036
}
1037
1038
ovs_match_init(&match, key, false, &mask);
1039
error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
1040
a[OVS_FLOW_ATTR_MASK], log);
1041
if (error)
1042
goto err_kfree_key;
1043
1044
ovs_flow_mask_key(&new_flow->key, key, true, &mask);
1045
1046
/* Extract flow identifier. */
1047
error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
1048
key, log);
1049
if (error)
1050
goto err_kfree_key;
1051
1052
/* Validate actions. */
1053
error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS],
1054
&new_flow->key, &acts, log);
1055
if (error) {
1056
OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
1057
goto err_kfree_key;
1058
}
1059
1060
reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false,
1061
ufid_flags);
1062
if (IS_ERR(reply)) {
1063
error = PTR_ERR(reply);
1064
goto err_kfree_acts;
1065
}
1066
1067
ovs_lock();
1068
dp = get_dp(net, ovs_header->dp_ifindex);
1069
if (unlikely(!dp)) {
1070
error = -ENODEV;
1071
goto err_unlock_ovs;
1072
}
1073
1074
/* Check if this is a duplicate flow */
1075
if (ovs_identifier_is_ufid(&new_flow->id))
1076
flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id);
1077
if (!flow)
1078
flow = ovs_flow_tbl_lookup(&dp->table, key);
1079
if (likely(!flow)) {
1080
rcu_assign_pointer(new_flow->sf_acts, acts);
1081
1082
/* Put flow in bucket. */
1083
error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask);
1084
if (unlikely(error)) {
1085
acts = NULL;
1086
goto err_unlock_ovs;
1087
}
1088
1089
if (unlikely(reply)) {
1090
error = ovs_flow_cmd_fill_info(new_flow,
1091
ovs_header->dp_ifindex,
1092
reply, info->snd_portid,
1093
info->snd_seq, 0,
1094
OVS_FLOW_CMD_NEW,
1095
ufid_flags);
1096
BUG_ON(error < 0);
1097
}
1098
ovs_unlock();
1099
} else {
1100
struct sw_flow_actions *old_acts;
1101
1102
/* Bail out if we're not allowed to modify an existing flow.
1103
* We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
1104
* because Generic Netlink treats the latter as a dump
1105
* request. We also accept NLM_F_EXCL in case that bug ever
1106
* gets fixed.
1107
*/
1108
if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE
1109
| NLM_F_EXCL))) {
1110
error = -EEXIST;
1111
goto err_unlock_ovs;
1112
}
1113
/* The flow identifier has to be the same for flow updates.
1114
* Look for any overlapping flow.
1115
*/
1116
if (unlikely(!ovs_flow_cmp(flow, &match))) {
1117
if (ovs_identifier_is_key(&flow->id))
1118
flow = ovs_flow_tbl_lookup_exact(&dp->table,
1119
&match);
1120
else /* UFID matches but key is different */
1121
flow = NULL;
1122
if (!flow) {
1123
error = -ENOENT;
1124
goto err_unlock_ovs;
1125
}
1126
}
1127
/* Update actions. */
1128
old_acts = ovsl_dereference(flow->sf_acts);
1129
rcu_assign_pointer(flow->sf_acts, acts);
1130
1131
if (unlikely(reply)) {
1132
error = ovs_flow_cmd_fill_info(flow,
1133
ovs_header->dp_ifindex,
1134
reply, info->snd_portid,
1135
info->snd_seq, 0,
1136
OVS_FLOW_CMD_NEW,
1137
ufid_flags);
1138
BUG_ON(error < 0);
1139
}
1140
ovs_unlock();
1141
1142
ovs_nla_free_flow_actions_rcu(old_acts);
1143
ovs_flow_free(new_flow, false);
1144
}
1145
1146
if (reply)
1147
ovs_notify(&dp_flow_genl_family, reply, info);
1148
1149
kfree(key);
1150
return 0;
1151
1152
err_unlock_ovs:
1153
ovs_unlock();
1154
kfree_skb(reply);
1155
err_kfree_acts:
1156
ovs_nla_free_flow_actions(acts);
1157
err_kfree_key:
1158
kfree(key);
1159
err_kfree_flow:
1160
ovs_flow_free(new_flow, false);
1161
error:
1162
return error;
1163
}
1164
1165
/* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
1166
static noinline_for_stack
1167
struct sw_flow_actions *get_flow_actions(struct net *net,
1168
const struct nlattr *a,
1169
const struct sw_flow_key *key,
1170
const struct sw_flow_mask *mask,
1171
bool log)
1172
{
1173
struct sw_flow_actions *acts;
1174
struct sw_flow_key masked_key;
1175
int error;
1176
1177
ovs_flow_mask_key(&masked_key, key, true, mask);
1178
error = ovs_nla_copy_actions(net, a, &masked_key, &acts, log);
1179
if (error) {
1180
OVS_NLERR(log,
1181
"Actions may not be safe on all matching packets");
1182
return ERR_PTR(error);
1183
}
1184
1185
return acts;
1186
}
1187
1188
/* Factor out match-init and action-copy to avoid
1189
* "Wframe-larger-than=1024" warning. Because mask is only
1190
* used to get actions, we new a function to save some
1191
* stack space.
1192
*
1193
* If there are not key and action attrs, we return 0
1194
* directly. In the case, the caller will also not use the
1195
* match as before. If there is action attr, we try to get
1196
* actions and save them to *acts. Before returning from
1197
* the function, we reset the match->mask pointer. Because
1198
* we should not to return match object with dangling reference
1199
* to mask.
1200
* */
1201
static noinline_for_stack int
1202
ovs_nla_init_match_and_action(struct net *net,
1203
struct sw_flow_match *match,
1204
struct sw_flow_key *key,
1205
struct nlattr **a,
1206
struct sw_flow_actions **acts,
1207
bool log)
1208
{
1209
struct sw_flow_mask mask;
1210
int error = 0;
1211
1212
if (a[OVS_FLOW_ATTR_KEY]) {
1213
ovs_match_init(match, key, true, &mask);
1214
error = ovs_nla_get_match(net, match, a[OVS_FLOW_ATTR_KEY],
1215
a[OVS_FLOW_ATTR_MASK], log);
1216
if (error)
1217
goto error;
1218
}
1219
1220
if (a[OVS_FLOW_ATTR_ACTIONS]) {
1221
if (!a[OVS_FLOW_ATTR_KEY]) {
1222
OVS_NLERR(log,
1223
"Flow key attribute not present in set flow.");
1224
error = -EINVAL;
1225
goto error;
1226
}
1227
1228
*acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], key,
1229
&mask, log);
1230
if (IS_ERR(*acts)) {
1231
error = PTR_ERR(*acts);
1232
goto error;
1233
}
1234
}
1235
1236
/* On success, error is 0. */
1237
error:
1238
match->mask = NULL;
1239
return error;
1240
}
1241
1242
static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
1243
{
1244
struct net *net = sock_net(skb->sk);
1245
struct nlattr **a = info->attrs;
1246
struct ovs_header *ovs_header = genl_info_userhdr(info);
1247
struct sw_flow_key key;
1248
struct sw_flow *flow;
1249
struct sk_buff *reply = NULL;
1250
struct datapath *dp;
1251
struct sw_flow_actions *old_acts = NULL, *acts = NULL;
1252
struct sw_flow_match match;
1253
struct sw_flow_id sfid;
1254
u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1255
int error = 0;
1256
bool log = !a[OVS_FLOW_ATTR_PROBE];
1257
bool ufid_present;
1258
1259
ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log);
1260
if (!a[OVS_FLOW_ATTR_KEY] && !ufid_present) {
1261
OVS_NLERR(log,
1262
"Flow set message rejected, Key attribute missing.");
1263
return -EINVAL;
1264
}
1265
1266
error = ovs_nla_init_match_and_action(net, &match, &key, a,
1267
&acts, log);
1268
if (error)
1269
goto error;
1270
1271
if (acts) {
1272
/* Can allocate before locking if have acts. */
1273
reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false,
1274
ufid_flags);
1275
if (IS_ERR(reply)) {
1276
error = PTR_ERR(reply);
1277
goto err_kfree_acts;
1278
}
1279
}
1280
1281
ovs_lock();
1282
dp = get_dp(net, ovs_header->dp_ifindex);
1283
if (unlikely(!dp)) {
1284
error = -ENODEV;
1285
goto err_unlock_ovs;
1286
}
1287
/* Check that the flow exists. */
1288
if (ufid_present)
1289
flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid);
1290
else
1291
flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1292
if (unlikely(!flow)) {
1293
error = -ENOENT;
1294
goto err_unlock_ovs;
1295
}
1296
1297
/* Update actions, if present. */
1298
if (likely(acts)) {
1299
old_acts = ovsl_dereference(flow->sf_acts);
1300
rcu_assign_pointer(flow->sf_acts, acts);
1301
1302
if (unlikely(reply)) {
1303
error = ovs_flow_cmd_fill_info(flow,
1304
ovs_header->dp_ifindex,
1305
reply, info->snd_portid,
1306
info->snd_seq, 0,
1307
OVS_FLOW_CMD_SET,
1308
ufid_flags);
1309
BUG_ON(error < 0);
1310
}
1311
} else {
1312
/* Could not alloc without acts before locking. */
1313
reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
1314
info, OVS_FLOW_CMD_SET, false,
1315
ufid_flags);
1316
1317
if (IS_ERR(reply)) {
1318
error = PTR_ERR(reply);
1319
goto err_unlock_ovs;
1320
}
1321
}
1322
1323
/* Clear stats. */
1324
if (a[OVS_FLOW_ATTR_CLEAR])
1325
ovs_flow_stats_clear(flow);
1326
ovs_unlock();
1327
1328
if (reply)
1329
ovs_notify(&dp_flow_genl_family, reply, info);
1330
if (old_acts)
1331
ovs_nla_free_flow_actions_rcu(old_acts);
1332
1333
return 0;
1334
1335
err_unlock_ovs:
1336
ovs_unlock();
1337
kfree_skb(reply);
1338
err_kfree_acts:
1339
ovs_nla_free_flow_actions(acts);
1340
error:
1341
return error;
1342
}
1343
1344
static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1345
{
1346
struct nlattr **a = info->attrs;
1347
struct ovs_header *ovs_header = genl_info_userhdr(info);
1348
struct net *net = sock_net(skb->sk);
1349
struct sw_flow_key key;
1350
struct sk_buff *reply;
1351
struct sw_flow *flow;
1352
struct datapath *dp;
1353
struct sw_flow_match match;
1354
struct sw_flow_id ufid;
1355
u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1356
int err = 0;
1357
bool log = !a[OVS_FLOW_ATTR_PROBE];
1358
bool ufid_present;
1359
1360
ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1361
if (a[OVS_FLOW_ATTR_KEY]) {
1362
ovs_match_init(&match, &key, true, NULL);
1363
err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL,
1364
log);
1365
} else if (!ufid_present) {
1366
OVS_NLERR(log,
1367
"Flow get message rejected, Key attribute missing.");
1368
err = -EINVAL;
1369
}
1370
if (err)
1371
return err;
1372
1373
ovs_lock();
1374
dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1375
if (!dp) {
1376
err = -ENODEV;
1377
goto unlock;
1378
}
1379
1380
if (ufid_present)
1381
flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1382
else
1383
flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1384
if (!flow) {
1385
err = -ENOENT;
1386
goto unlock;
1387
}
1388
1389
reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
1390
OVS_FLOW_CMD_GET, true, ufid_flags);
1391
if (IS_ERR(reply)) {
1392
err = PTR_ERR(reply);
1393
goto unlock;
1394
}
1395
1396
ovs_unlock();
1397
return genlmsg_reply(reply, info);
1398
unlock:
1399
ovs_unlock();
1400
return err;
1401
}
1402
1403
static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1404
{
1405
struct nlattr **a = info->attrs;
1406
struct ovs_header *ovs_header = genl_info_userhdr(info);
1407
struct net *net = sock_net(skb->sk);
1408
struct sw_flow_key key;
1409
struct sk_buff *reply;
1410
struct sw_flow *flow = NULL;
1411
struct datapath *dp;
1412
struct sw_flow_match match;
1413
struct sw_flow_id ufid;
1414
u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1415
int err;
1416
bool log = !a[OVS_FLOW_ATTR_PROBE];
1417
bool ufid_present;
1418
1419
ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1420
if (a[OVS_FLOW_ATTR_KEY]) {
1421
ovs_match_init(&match, &key, true, NULL);
1422
err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
1423
NULL, log);
1424
if (unlikely(err))
1425
return err;
1426
}
1427
1428
ovs_lock();
1429
dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1430
if (unlikely(!dp)) {
1431
err = -ENODEV;
1432
goto unlock;
1433
}
1434
1435
if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) {
1436
err = ovs_flow_tbl_flush(&dp->table);
1437
goto unlock;
1438
}
1439
1440
if (ufid_present)
1441
flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1442
else
1443
flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1444
if (unlikely(!flow)) {
1445
err = -ENOENT;
1446
goto unlock;
1447
}
1448
1449
ovs_flow_tbl_remove(&dp->table, flow);
1450
ovs_unlock();
1451
1452
reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts,
1453
&flow->id, info, false, ufid_flags);
1454
if (likely(reply)) {
1455
if (!IS_ERR(reply)) {
1456
rcu_read_lock(); /*To keep RCU checker happy. */
1457
err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
1458
reply, info->snd_portid,
1459
info->snd_seq, 0,
1460
OVS_FLOW_CMD_DEL,
1461
ufid_flags);
1462
rcu_read_unlock();
1463
if (WARN_ON_ONCE(err < 0)) {
1464
kfree_skb(reply);
1465
goto out_free;
1466
}
1467
1468
ovs_notify(&dp_flow_genl_family, reply, info);
1469
} else {
1470
netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0,
1471
PTR_ERR(reply));
1472
}
1473
}
1474
1475
out_free:
1476
ovs_flow_free(flow, true);
1477
return 0;
1478
unlock:
1479
ovs_unlock();
1480
return err;
1481
}
1482
1483
static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1484
{
1485
struct nlattr *a[__OVS_FLOW_ATTR_MAX];
1486
struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1487
struct table_instance *ti;
1488
struct datapath *dp;
1489
u32 ufid_flags;
1490
int err;
1491
1492
err = genlmsg_parse_deprecated(cb->nlh, &dp_flow_genl_family, a,
1493
OVS_FLOW_ATTR_MAX, flow_policy, NULL);
1494
if (err)
1495
return err;
1496
ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1497
1498
rcu_read_lock();
1499
dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
1500
if (!dp) {
1501
rcu_read_unlock();
1502
return -ENODEV;
1503
}
1504
1505
ti = rcu_dereference(dp->table.ti);
1506
for (;;) {
1507
struct sw_flow *flow;
1508
u32 bucket, obj;
1509
1510
bucket = cb->args[0];
1511
obj = cb->args[1];
1512
flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj);
1513
if (!flow)
1514
break;
1515
1516
if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
1517
NETLINK_CB(cb->skb).portid,
1518
cb->nlh->nlmsg_seq, NLM_F_MULTI,
1519
OVS_FLOW_CMD_GET, ufid_flags) < 0)
1520
break;
1521
1522
cb->args[0] = bucket;
1523
cb->args[1] = obj;
1524
}
1525
rcu_read_unlock();
1526
return skb->len;
1527
}
1528
1529
static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
1530
[OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
1531
[OVS_FLOW_ATTR_MASK] = { .type = NLA_NESTED },
1532
[OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
1533
[OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
1534
[OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG },
1535
[OVS_FLOW_ATTR_UFID] = { .type = NLA_UNSPEC, .len = 1 },
1536
[OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 },
1537
};
1538
1539
static const struct genl_small_ops dp_flow_genl_ops[] = {
1540
{ .cmd = OVS_FLOW_CMD_NEW,
1541
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1542
.flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1543
.doit = ovs_flow_cmd_new
1544
},
1545
{ .cmd = OVS_FLOW_CMD_DEL,
1546
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1547
.flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1548
.doit = ovs_flow_cmd_del
1549
},
1550
{ .cmd = OVS_FLOW_CMD_GET,
1551
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1552
.flags = 0, /* OK for unprivileged users. */
1553
.doit = ovs_flow_cmd_get,
1554
.dumpit = ovs_flow_cmd_dump
1555
},
1556
{ .cmd = OVS_FLOW_CMD_SET,
1557
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1558
.flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1559
.doit = ovs_flow_cmd_set,
1560
},
1561
};
1562
1563
static struct genl_family dp_flow_genl_family __ro_after_init = {
1564
.hdrsize = sizeof(struct ovs_header),
1565
.name = OVS_FLOW_FAMILY,
1566
.version = OVS_FLOW_VERSION,
1567
.maxattr = OVS_FLOW_ATTR_MAX,
1568
.policy = flow_policy,
1569
.netnsok = true,
1570
.parallel_ops = true,
1571
.small_ops = dp_flow_genl_ops,
1572
.n_small_ops = ARRAY_SIZE(dp_flow_genl_ops),
1573
.resv_start_op = OVS_FLOW_CMD_SET + 1,
1574
.mcgrps = &ovs_dp_flow_multicast_group,
1575
.n_mcgrps = 1,
1576
.module = THIS_MODULE,
1577
};
1578
1579
static size_t ovs_dp_cmd_msg_size(void)
1580
{
1581
size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
1582
1583
msgsize += nla_total_size(IFNAMSIZ);
1584
msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats));
1585
msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats));
1586
msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
1587
msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_MASKS_CACHE_SIZE */
1588
msgsize += nla_total_size(sizeof(u32) * nr_cpu_ids); /* OVS_DP_ATTR_PER_CPU_PIDS */
1589
1590
return msgsize;
1591
}
1592
1593
/* Called with ovs_mutex. */
1594
static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1595
u32 portid, u32 seq, u32 flags, u8 cmd)
1596
{
1597
struct ovs_header *ovs_header;
1598
struct ovs_dp_stats dp_stats;
1599
struct ovs_dp_megaflow_stats dp_megaflow_stats;
1600
struct dp_nlsk_pids *pids = ovsl_dereference(dp->upcall_portids);
1601
int err, pids_len;
1602
1603
ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
1604
flags, cmd);
1605
if (!ovs_header)
1606
goto error;
1607
1608
ovs_header->dp_ifindex = get_dpifindex(dp);
1609
1610
err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
1611
if (err)
1612
goto nla_put_failure;
1613
1614
get_dp_stats(dp, &dp_stats, &dp_megaflow_stats);
1615
if (nla_put_64bit(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats),
1616
&dp_stats, OVS_DP_ATTR_PAD))
1617
goto nla_put_failure;
1618
1619
if (nla_put_64bit(skb, OVS_DP_ATTR_MEGAFLOW_STATS,
1620
sizeof(struct ovs_dp_megaflow_stats),
1621
&dp_megaflow_stats, OVS_DP_ATTR_PAD))
1622
goto nla_put_failure;
1623
1624
if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
1625
goto nla_put_failure;
1626
1627
if (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE,
1628
ovs_flow_tbl_masks_cache_size(&dp->table)))
1629
goto nla_put_failure;
1630
1631
if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU && pids) {
1632
pids_len = min(pids->n_pids, nr_cpu_ids) * sizeof(u32);
1633
if (nla_put(skb, OVS_DP_ATTR_PER_CPU_PIDS, pids_len, &pids->pids))
1634
goto nla_put_failure;
1635
}
1636
1637
genlmsg_end(skb, ovs_header);
1638
return 0;
1639
1640
nla_put_failure:
1641
genlmsg_cancel(skb, ovs_header);
1642
error:
1643
return -EMSGSIZE;
1644
}
1645
1646
static struct sk_buff *ovs_dp_cmd_alloc_info(void)
1647
{
1648
return genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL);
1649
}
1650
1651
/* Called with rcu_read_lock or ovs_mutex. */
1652
static struct datapath *lookup_datapath(struct net *net,
1653
const struct ovs_header *ovs_header,
1654
struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1655
{
1656
struct datapath *dp;
1657
1658
if (!a[OVS_DP_ATTR_NAME])
1659
dp = get_dp(net, ovs_header->dp_ifindex);
1660
else {
1661
struct vport *vport;
1662
1663
vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
1664
dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
1665
}
1666
return dp ? dp : ERR_PTR(-ENODEV);
1667
}
1668
1669
static void ovs_dp_reset_user_features(struct sk_buff *skb,
1670
struct genl_info *info)
1671
{
1672
struct datapath *dp;
1673
1674
dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info),
1675
info->attrs);
1676
if (IS_ERR(dp))
1677
return;
1678
1679
pr_warn("%s: Dropping previously announced user features\n",
1680
ovs_dp_name(dp));
1681
dp->user_features = 0;
1682
}
1683
1684
static int ovs_dp_set_upcall_portids(struct datapath *dp,
1685
const struct nlattr *ids)
1686
{
1687
struct dp_nlsk_pids *old, *dp_nlsk_pids;
1688
1689
if (!nla_len(ids) || nla_len(ids) % sizeof(u32))
1690
return -EINVAL;
1691
1692
old = ovsl_dereference(dp->upcall_portids);
1693
1694
dp_nlsk_pids = kmalloc(sizeof(*dp_nlsk_pids) + nla_len(ids),
1695
GFP_KERNEL);
1696
if (!dp_nlsk_pids)
1697
return -ENOMEM;
1698
1699
dp_nlsk_pids->n_pids = nla_len(ids) / sizeof(u32);
1700
nla_memcpy(dp_nlsk_pids->pids, ids, nla_len(ids));
1701
1702
rcu_assign_pointer(dp->upcall_portids, dp_nlsk_pids);
1703
1704
kfree_rcu(old, rcu);
1705
1706
return 0;
1707
}
1708
1709
u32 ovs_dp_get_upcall_portid(const struct datapath *dp, uint32_t cpu_id)
1710
{
1711
struct dp_nlsk_pids *dp_nlsk_pids;
1712
1713
dp_nlsk_pids = rcu_dereference(dp->upcall_portids);
1714
1715
if (dp_nlsk_pids) {
1716
if (cpu_id < dp_nlsk_pids->n_pids) {
1717
return dp_nlsk_pids->pids[cpu_id];
1718
} else if (dp_nlsk_pids->n_pids > 0 &&
1719
cpu_id >= dp_nlsk_pids->n_pids) {
1720
/* If the number of netlink PIDs is mismatched with
1721
* the number of CPUs as seen by the kernel, log this
1722
* and send the upcall to an arbitrary socket (0) in
1723
* order to not drop packets
1724
*/
1725
pr_info_ratelimited("cpu_id mismatch with handler threads");
1726
return dp_nlsk_pids->pids[cpu_id %
1727
dp_nlsk_pids->n_pids];
1728
} else {
1729
return 0;
1730
}
1731
} else {
1732
return 0;
1733
}
1734
}
1735
1736
static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
1737
{
1738
u32 user_features = 0, old_features = dp->user_features;
1739
int err;
1740
1741
if (a[OVS_DP_ATTR_USER_FEATURES]) {
1742
user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
1743
1744
if (user_features & ~(OVS_DP_F_VPORT_PIDS |
1745
OVS_DP_F_UNALIGNED |
1746
OVS_DP_F_TC_RECIRC_SHARING |
1747
OVS_DP_F_DISPATCH_UPCALL_PER_CPU))
1748
return -EOPNOTSUPP;
1749
1750
#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
1751
if (user_features & OVS_DP_F_TC_RECIRC_SHARING)
1752
return -EOPNOTSUPP;
1753
#endif
1754
}
1755
1756
if (a[OVS_DP_ATTR_MASKS_CACHE_SIZE]) {
1757
int err;
1758
u32 cache_size;
1759
1760
cache_size = nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]);
1761
err = ovs_flow_tbl_masks_cache_resize(&dp->table, cache_size);
1762
if (err)
1763
return err;
1764
}
1765
1766
dp->user_features = user_features;
1767
1768
if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU &&
1769
a[OVS_DP_ATTR_PER_CPU_PIDS]) {
1770
/* Upcall Netlink Port IDs have been updated */
1771
err = ovs_dp_set_upcall_portids(dp,
1772
a[OVS_DP_ATTR_PER_CPU_PIDS]);
1773
if (err)
1774
return err;
1775
}
1776
1777
if ((dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) &&
1778
!(old_features & OVS_DP_F_TC_RECIRC_SHARING))
1779
tc_skb_ext_tc_enable();
1780
else if (!(dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) &&
1781
(old_features & OVS_DP_F_TC_RECIRC_SHARING))
1782
tc_skb_ext_tc_disable();
1783
1784
return 0;
1785
}
1786
1787
static int ovs_dp_stats_init(struct datapath *dp)
1788
{
1789
dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
1790
if (!dp->stats_percpu)
1791
return -ENOMEM;
1792
1793
return 0;
1794
}
1795
1796
static int ovs_dp_vport_init(struct datapath *dp)
1797
{
1798
int i;
1799
1800
dp->ports = kmalloc_objs(struct hlist_head, DP_VPORT_HASH_BUCKETS);
1801
if (!dp->ports)
1802
return -ENOMEM;
1803
1804
for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1805
INIT_HLIST_HEAD(&dp->ports[i]);
1806
1807
return 0;
1808
}
1809
1810
static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1811
{
1812
struct nlattr **a = info->attrs;
1813
struct vport_parms parms;
1814
struct sk_buff *reply;
1815
struct datapath *dp;
1816
struct vport *vport;
1817
struct ovs_net *ovs_net;
1818
int err;
1819
1820
err = -EINVAL;
1821
if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
1822
goto err;
1823
1824
reply = ovs_dp_cmd_alloc_info();
1825
if (!reply)
1826
return -ENOMEM;
1827
1828
err = -ENOMEM;
1829
dp = kzalloc_obj(*dp);
1830
if (dp == NULL)
1831
goto err_destroy_reply;
1832
1833
ovs_dp_set_net(dp, sock_net(skb->sk));
1834
1835
/* Allocate table. */
1836
err = ovs_flow_tbl_init(&dp->table);
1837
if (err)
1838
goto err_destroy_dp;
1839
1840
err = ovs_dp_stats_init(dp);
1841
if (err)
1842
goto err_destroy_table;
1843
1844
err = ovs_dp_vport_init(dp);
1845
if (err)
1846
goto err_destroy_stats;
1847
1848
err = ovs_meters_init(dp);
1849
if (err)
1850
goto err_destroy_ports;
1851
1852
/* Set up our datapath device. */
1853
parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1854
parms.type = OVS_VPORT_TYPE_INTERNAL;
1855
parms.options = NULL;
1856
parms.dp = dp;
1857
parms.port_no = OVSP_LOCAL;
1858
parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
1859
parms.desired_ifindex = nla_get_s32_default(a[OVS_DP_ATTR_IFINDEX], 0);
1860
1861
/* So far only local changes have been made, now need the lock. */
1862
ovs_lock();
1863
1864
err = ovs_dp_change(dp, a);
1865
if (err)
1866
goto err_unlock_and_destroy_meters;
1867
1868
vport = new_vport(&parms);
1869
if (IS_ERR(vport)) {
1870
err = PTR_ERR(vport);
1871
if (err == -EBUSY)
1872
err = -EEXIST;
1873
1874
if (err == -EEXIST) {
1875
/* An outdated user space instance that does not understand
1876
* the concept of user_features has attempted to create a new
1877
* datapath and is likely to reuse it. Drop all user features.
1878
*/
1879
if (info->genlhdr->version < OVS_DP_VER_FEATURES)
1880
ovs_dp_reset_user_features(skb, info);
1881
}
1882
1883
goto err_destroy_portids;
1884
}
1885
1886
err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1887
info->snd_seq, 0, OVS_DP_CMD_NEW);
1888
BUG_ON(err < 0);
1889
1890
ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
1891
list_add_tail_rcu(&dp->list_node, &ovs_net->dps);
1892
1893
ovs_unlock();
1894
1895
ovs_notify(&dp_datapath_genl_family, reply, info);
1896
return 0;
1897
1898
err_destroy_portids:
1899
kfree(rcu_dereference_raw(dp->upcall_portids));
1900
err_unlock_and_destroy_meters:
1901
ovs_unlock();
1902
ovs_meters_exit(dp);
1903
err_destroy_ports:
1904
kfree(dp->ports);
1905
err_destroy_stats:
1906
free_percpu(dp->stats_percpu);
1907
err_destroy_table:
1908
ovs_flow_tbl_destroy(&dp->table);
1909
err_destroy_dp:
1910
kfree(dp);
1911
err_destroy_reply:
1912
kfree_skb(reply);
1913
err:
1914
return err;
1915
}
1916
1917
/* Called with ovs_mutex. */
1918
static void __dp_destroy(struct datapath *dp)
1919
{
1920
struct flow_table *table = &dp->table;
1921
int i;
1922
1923
if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
1924
tc_skb_ext_tc_disable();
1925
1926
for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1927
struct vport *vport;
1928
struct hlist_node *n;
1929
1930
hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node)
1931
if (vport->port_no != OVSP_LOCAL)
1932
ovs_dp_detach_port(vport);
1933
}
1934
1935
list_del_rcu(&dp->list_node);
1936
1937
/* OVSP_LOCAL is datapath internal port. We need to make sure that
1938
* all ports in datapath are destroyed first before freeing datapath.
1939
*/
1940
ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
1941
1942
/* Flush sw_flow in the tables. RCU cb only releases resource
1943
* such as dp, ports and tables. That may avoid some issues
1944
* such as RCU usage warning.
1945
*/
1946
table_instance_flow_flush(table, ovsl_dereference(table->ti),
1947
ovsl_dereference(table->ufid_ti));
1948
1949
/* RCU destroy the ports, meters and flow tables. */
1950
call_rcu(&dp->rcu, destroy_dp_rcu);
1951
}
1952
1953
static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1954
{
1955
struct sk_buff *reply;
1956
struct datapath *dp;
1957
int err;
1958
1959
reply = ovs_dp_cmd_alloc_info();
1960
if (!reply)
1961
return -ENOMEM;
1962
1963
ovs_lock();
1964
dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info),
1965
info->attrs);
1966
err = PTR_ERR(dp);
1967
if (IS_ERR(dp))
1968
goto err_unlock_free;
1969
1970
err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1971
info->snd_seq, 0, OVS_DP_CMD_DEL);
1972
BUG_ON(err < 0);
1973
1974
__dp_destroy(dp);
1975
ovs_unlock();
1976
1977
ovs_notify(&dp_datapath_genl_family, reply, info);
1978
1979
return 0;
1980
1981
err_unlock_free:
1982
ovs_unlock();
1983
kfree_skb(reply);
1984
return err;
1985
}
1986
1987
static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1988
{
1989
struct sk_buff *reply;
1990
struct datapath *dp;
1991
int err;
1992
1993
reply = ovs_dp_cmd_alloc_info();
1994
if (!reply)
1995
return -ENOMEM;
1996
1997
ovs_lock();
1998
dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info),
1999
info->attrs);
2000
err = PTR_ERR(dp);
2001
if (IS_ERR(dp))
2002
goto err_unlock_free;
2003
2004
err = ovs_dp_change(dp, info->attrs);
2005
if (err)
2006
goto err_unlock_free;
2007
2008
err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
2009
info->snd_seq, 0, OVS_DP_CMD_SET);
2010
BUG_ON(err < 0);
2011
2012
ovs_unlock();
2013
ovs_notify(&dp_datapath_genl_family, reply, info);
2014
2015
return 0;
2016
2017
err_unlock_free:
2018
ovs_unlock();
2019
kfree_skb(reply);
2020
return err;
2021
}
2022
2023
static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
2024
{
2025
struct sk_buff *reply;
2026
struct datapath *dp;
2027
int err;
2028
2029
reply = ovs_dp_cmd_alloc_info();
2030
if (!reply)
2031
return -ENOMEM;
2032
2033
ovs_lock();
2034
dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info),
2035
info->attrs);
2036
if (IS_ERR(dp)) {
2037
err = PTR_ERR(dp);
2038
goto err_unlock_free;
2039
}
2040
err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
2041
info->snd_seq, 0, OVS_DP_CMD_GET);
2042
BUG_ON(err < 0);
2043
ovs_unlock();
2044
2045
return genlmsg_reply(reply, info);
2046
2047
err_unlock_free:
2048
ovs_unlock();
2049
kfree_skb(reply);
2050
return err;
2051
}
2052
2053
static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
2054
{
2055
struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
2056
struct datapath *dp;
2057
int skip = cb->args[0];
2058
int i = 0;
2059
2060
ovs_lock();
2061
list_for_each_entry(dp, &ovs_net->dps, list_node) {
2062
if (i >= skip &&
2063
ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
2064
cb->nlh->nlmsg_seq, NLM_F_MULTI,
2065
OVS_DP_CMD_GET) < 0)
2066
break;
2067
i++;
2068
}
2069
ovs_unlock();
2070
2071
cb->args[0] = i;
2072
2073
return skb->len;
2074
}
2075
2076
static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
2077
[OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
2078
[OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
2079
[OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
2080
[OVS_DP_ATTR_MASKS_CACHE_SIZE] = NLA_POLICY_RANGE(NLA_U32, 0,
2081
PCPU_MIN_UNIT_SIZE / sizeof(struct mask_cache_entry)),
2082
[OVS_DP_ATTR_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 0),
2083
};
2084
2085
static const struct genl_small_ops dp_datapath_genl_ops[] = {
2086
{ .cmd = OVS_DP_CMD_NEW,
2087
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2088
.flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2089
.doit = ovs_dp_cmd_new
2090
},
2091
{ .cmd = OVS_DP_CMD_DEL,
2092
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2093
.flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2094
.doit = ovs_dp_cmd_del
2095
},
2096
{ .cmd = OVS_DP_CMD_GET,
2097
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2098
.flags = 0, /* OK for unprivileged users. */
2099
.doit = ovs_dp_cmd_get,
2100
.dumpit = ovs_dp_cmd_dump
2101
},
2102
{ .cmd = OVS_DP_CMD_SET,
2103
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2104
.flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2105
.doit = ovs_dp_cmd_set,
2106
},
2107
};
2108
2109
static struct genl_family dp_datapath_genl_family __ro_after_init = {
2110
.hdrsize = sizeof(struct ovs_header),
2111
.name = OVS_DATAPATH_FAMILY,
2112
.version = OVS_DATAPATH_VERSION,
2113
.maxattr = OVS_DP_ATTR_MAX,
2114
.policy = datapath_policy,
2115
.netnsok = true,
2116
.parallel_ops = true,
2117
.small_ops = dp_datapath_genl_ops,
2118
.n_small_ops = ARRAY_SIZE(dp_datapath_genl_ops),
2119
.resv_start_op = OVS_DP_CMD_SET + 1,
2120
.mcgrps = &ovs_dp_datapath_multicast_group,
2121
.n_mcgrps = 1,
2122
.module = THIS_MODULE,
2123
};
2124
2125
/* Called with ovs_mutex or RCU read lock. */
2126
static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
2127
struct net *net, u32 portid, u32 seq,
2128
u32 flags, u8 cmd, gfp_t gfp)
2129
{
2130
struct ovs_header *ovs_header;
2131
struct ovs_vport_stats vport_stats;
2132
struct net *net_vport;
2133
int err;
2134
2135
ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
2136
flags, cmd);
2137
if (!ovs_header)
2138
return -EMSGSIZE;
2139
2140
ovs_header->dp_ifindex = get_dpifindex(vport->dp);
2141
2142
if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
2143
nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
2144
nla_put_string(skb, OVS_VPORT_ATTR_NAME,
2145
ovs_vport_name(vport)) ||
2146
nla_put_u32(skb, OVS_VPORT_ATTR_IFINDEX, vport->dev->ifindex))
2147
goto nla_put_failure;
2148
2149
rcu_read_lock();
2150
net_vport = dev_net_rcu(vport->dev);
2151
if (!net_eq(net, net_vport)) {
2152
int id = peernet2id_alloc(net, net_vport, GFP_ATOMIC);
2153
2154
if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id))
2155
goto nla_put_failure_unlock;
2156
}
2157
rcu_read_unlock();
2158
2159
ovs_vport_get_stats(vport, &vport_stats);
2160
if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS,
2161
sizeof(struct ovs_vport_stats), &vport_stats,
2162
OVS_VPORT_ATTR_PAD))
2163
goto nla_put_failure;
2164
2165
if (ovs_vport_get_upcall_stats(vport, skb))
2166
goto nla_put_failure;
2167
2168
if (ovs_vport_get_upcall_portids(vport, skb))
2169
goto nla_put_failure;
2170
2171
err = ovs_vport_get_options(vport, skb);
2172
if (err == -EMSGSIZE)
2173
goto error;
2174
2175
genlmsg_end(skb, ovs_header);
2176
return 0;
2177
2178
nla_put_failure_unlock:
2179
rcu_read_unlock();
2180
nla_put_failure:
2181
err = -EMSGSIZE;
2182
error:
2183
genlmsg_cancel(skb, ovs_header);
2184
return err;
2185
}
2186
2187
static size_t ovs_vport_cmd_msg_size(void)
2188
{
2189
size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
2190
2191
msgsize += nla_total_size(sizeof(u32)); /* OVS_VPORT_ATTR_PORT_NO */
2192
msgsize += nla_total_size(sizeof(u32)); /* OVS_VPORT_ATTR_TYPE */
2193
msgsize += nla_total_size(IFNAMSIZ); /* OVS_VPORT_ATTR_NAME */
2194
msgsize += nla_total_size(sizeof(u32)); /* OVS_VPORT_ATTR_IFINDEX */
2195
msgsize += nla_total_size(sizeof(s32)); /* OVS_VPORT_ATTR_NETNSID */
2196
2197
/* OVS_VPORT_ATTR_STATS */
2198
msgsize += nla_total_size_64bit(sizeof(struct ovs_vport_stats));
2199
2200
/* OVS_VPORT_ATTR_UPCALL_STATS(OVS_VPORT_UPCALL_ATTR_SUCCESS +
2201
* OVS_VPORT_UPCALL_ATTR_FAIL)
2202
*/
2203
msgsize += nla_total_size(nla_total_size_64bit(sizeof(u64)) +
2204
nla_total_size_64bit(sizeof(u64)));
2205
2206
/* OVS_VPORT_ATTR_UPCALL_PID */
2207
msgsize += nla_total_size(nr_cpu_ids * sizeof(u32));
2208
2209
/* OVS_VPORT_ATTR_OPTIONS(OVS_TUNNEL_ATTR_DST_PORT +
2210
* OVS_TUNNEL_ATTR_EXTENSION(OVS_VXLAN_EXT_GBP))
2211
*/
2212
msgsize += nla_total_size(nla_total_size(sizeof(u16)) +
2213
nla_total_size(nla_total_size(0)));
2214
2215
return msgsize;
2216
}
2217
2218
static struct sk_buff *ovs_vport_cmd_alloc_info(void)
2219
{
2220
return genlmsg_new(ovs_vport_cmd_msg_size(), GFP_KERNEL);
2221
}
2222
2223
/* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
2224
struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
2225
u32 portid, u32 seq, u8 cmd)
2226
{
2227
struct sk_buff *skb;
2228
int retval;
2229
2230
skb = ovs_vport_cmd_alloc_info();
2231
if (!skb)
2232
return ERR_PTR(-ENOMEM);
2233
2234
retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd,
2235
GFP_KERNEL);
2236
BUG_ON(retval < 0);
2237
2238
return skb;
2239
}
2240
2241
/* Called with ovs_mutex or RCU read lock. */
2242
static struct vport *lookup_vport(struct net *net,
2243
const struct ovs_header *ovs_header,
2244
struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
2245
{
2246
struct datapath *dp;
2247
struct vport *vport;
2248
2249
if (a[OVS_VPORT_ATTR_IFINDEX])
2250
return ERR_PTR(-EOPNOTSUPP);
2251
if (a[OVS_VPORT_ATTR_NAME]) {
2252
vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
2253
if (!vport)
2254
return ERR_PTR(-ENODEV);
2255
if (ovs_header->dp_ifindex &&
2256
ovs_header->dp_ifindex != get_dpifindex(vport->dp))
2257
return ERR_PTR(-ENODEV);
2258
return vport;
2259
} else if (a[OVS_VPORT_ATTR_PORT_NO]) {
2260
u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
2261
2262
if (port_no >= DP_MAX_PORTS)
2263
return ERR_PTR(-EFBIG);
2264
2265
dp = get_dp(net, ovs_header->dp_ifindex);
2266
if (!dp)
2267
return ERR_PTR(-ENODEV);
2268
2269
vport = ovs_vport_ovsl_rcu(dp, port_no);
2270
if (!vport)
2271
return ERR_PTR(-ENODEV);
2272
return vport;
2273
} else
2274
return ERR_PTR(-EINVAL);
2275
2276
}
2277
2278
static unsigned int ovs_get_max_headroom(struct datapath *dp)
2279
{
2280
unsigned int dev_headroom, max_headroom = 0;
2281
struct net_device *dev;
2282
struct vport *vport;
2283
int i;
2284
2285
for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
2286
hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node,
2287
lockdep_ovsl_is_held()) {
2288
dev = vport->dev;
2289
dev_headroom = netdev_get_fwd_headroom(dev);
2290
if (dev_headroom > max_headroom)
2291
max_headroom = dev_headroom;
2292
}
2293
}
2294
2295
return max_headroom;
2296
}
2297
2298
/* Called with ovs_mutex */
2299
static void ovs_update_headroom(struct datapath *dp, unsigned int new_headroom)
2300
{
2301
struct vport *vport;
2302
int i;
2303
2304
dp->max_headroom = new_headroom;
2305
for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
2306
hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node,
2307
lockdep_ovsl_is_held())
2308
netdev_set_rx_headroom(vport->dev, new_headroom);
2309
}
2310
}
2311
2312
static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
2313
{
2314
struct nlattr **a = info->attrs;
2315
struct ovs_header *ovs_header = genl_info_userhdr(info);
2316
struct vport_parms parms;
2317
struct sk_buff *reply;
2318
struct vport *vport;
2319
struct datapath *dp;
2320
unsigned int new_headroom;
2321
u32 port_no;
2322
int err;
2323
2324
if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
2325
!a[OVS_VPORT_ATTR_UPCALL_PID])
2326
return -EINVAL;
2327
2328
parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
2329
2330
if (a[OVS_VPORT_ATTR_IFINDEX] && parms.type != OVS_VPORT_TYPE_INTERNAL)
2331
return -EOPNOTSUPP;
2332
2333
port_no = nla_get_u32_default(a[OVS_VPORT_ATTR_PORT_NO], 0);
2334
if (port_no >= DP_MAX_PORTS)
2335
return -EFBIG;
2336
2337
reply = ovs_vport_cmd_alloc_info();
2338
if (!reply)
2339
return -ENOMEM;
2340
2341
ovs_lock();
2342
restart:
2343
dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
2344
err = -ENODEV;
2345
if (!dp)
2346
goto exit_unlock_free;
2347
2348
if (port_no) {
2349
vport = ovs_vport_ovsl(dp, port_no);
2350
err = -EBUSY;
2351
if (vport)
2352
goto exit_unlock_free;
2353
} else {
2354
for (port_no = 1; ; port_no++) {
2355
if (port_no >= DP_MAX_PORTS) {
2356
err = -EFBIG;
2357
goto exit_unlock_free;
2358
}
2359
vport = ovs_vport_ovsl(dp, port_no);
2360
if (!vport)
2361
break;
2362
}
2363
}
2364
2365
parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
2366
parms.options = a[OVS_VPORT_ATTR_OPTIONS];
2367
parms.dp = dp;
2368
parms.port_no = port_no;
2369
parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
2370
parms.desired_ifindex = nla_get_s32_default(a[OVS_VPORT_ATTR_IFINDEX],
2371
0);
2372
2373
vport = new_vport(&parms);
2374
err = PTR_ERR(vport);
2375
if (IS_ERR(vport)) {
2376
if (err == -EAGAIN)
2377
goto restart;
2378
goto exit_unlock_free;
2379
}
2380
2381
err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2382
info->snd_portid, info->snd_seq, 0,
2383
OVS_VPORT_CMD_NEW, GFP_KERNEL);
2384
2385
new_headroom = netdev_get_fwd_headroom(vport->dev);
2386
2387
if (new_headroom > dp->max_headroom)
2388
ovs_update_headroom(dp, new_headroom);
2389
else
2390
netdev_set_rx_headroom(vport->dev, dp->max_headroom);
2391
2392
BUG_ON(err < 0);
2393
ovs_unlock();
2394
2395
ovs_notify(&dp_vport_genl_family, reply, info);
2396
return 0;
2397
2398
exit_unlock_free:
2399
ovs_unlock();
2400
kfree_skb(reply);
2401
return err;
2402
}
2403
2404
static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
2405
{
2406
struct nlattr **a = info->attrs;
2407
struct sk_buff *reply;
2408
struct vport *vport;
2409
int err;
2410
2411
reply = ovs_vport_cmd_alloc_info();
2412
if (!reply)
2413
return -ENOMEM;
2414
2415
ovs_lock();
2416
vport = lookup_vport(sock_net(skb->sk), genl_info_userhdr(info), a);
2417
err = PTR_ERR(vport);
2418
if (IS_ERR(vport))
2419
goto exit_unlock_free;
2420
2421
if (a[OVS_VPORT_ATTR_TYPE] &&
2422
nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) {
2423
err = -EINVAL;
2424
goto exit_unlock_free;
2425
}
2426
2427
if (a[OVS_VPORT_ATTR_OPTIONS]) {
2428
err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
2429
if (err)
2430
goto exit_unlock_free;
2431
}
2432
2433
2434
if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
2435
struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID];
2436
2437
err = ovs_vport_set_upcall_portids(vport, ids);
2438
if (err)
2439
goto exit_unlock_free;
2440
}
2441
2442
err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2443
info->snd_portid, info->snd_seq, 0,
2444
OVS_VPORT_CMD_SET, GFP_KERNEL);
2445
BUG_ON(err < 0);
2446
2447
ovs_unlock();
2448
ovs_notify(&dp_vport_genl_family, reply, info);
2449
return 0;
2450
2451
exit_unlock_free:
2452
ovs_unlock();
2453
kfree_skb(reply);
2454
return err;
2455
}
2456
2457
static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
2458
{
2459
bool update_headroom = false;
2460
struct nlattr **a = info->attrs;
2461
struct sk_buff *reply;
2462
struct datapath *dp;
2463
struct vport *vport;
2464
unsigned int new_headroom;
2465
int err;
2466
2467
reply = ovs_vport_cmd_alloc_info();
2468
if (!reply)
2469
return -ENOMEM;
2470
2471
ovs_lock();
2472
vport = lookup_vport(sock_net(skb->sk), genl_info_userhdr(info), a);
2473
err = PTR_ERR(vport);
2474
if (IS_ERR(vport))
2475
goto exit_unlock_free;
2476
2477
if (vport->port_no == OVSP_LOCAL) {
2478
err = -EINVAL;
2479
goto exit_unlock_free;
2480
}
2481
2482
err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2483
info->snd_portid, info->snd_seq, 0,
2484
OVS_VPORT_CMD_DEL, GFP_KERNEL);
2485
BUG_ON(err < 0);
2486
2487
/* the vport deletion may trigger dp headroom update */
2488
dp = vport->dp;
2489
if (netdev_get_fwd_headroom(vport->dev) == dp->max_headroom)
2490
update_headroom = true;
2491
2492
netdev_reset_rx_headroom(vport->dev);
2493
ovs_dp_detach_port(vport);
2494
2495
if (update_headroom) {
2496
new_headroom = ovs_get_max_headroom(dp);
2497
2498
if (new_headroom < dp->max_headroom)
2499
ovs_update_headroom(dp, new_headroom);
2500
}
2501
ovs_unlock();
2502
2503
ovs_notify(&dp_vport_genl_family, reply, info);
2504
return 0;
2505
2506
exit_unlock_free:
2507
ovs_unlock();
2508
kfree_skb(reply);
2509
return err;
2510
}
2511
2512
static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
2513
{
2514
struct nlattr **a = info->attrs;
2515
struct ovs_header *ovs_header = genl_info_userhdr(info);
2516
struct sk_buff *reply;
2517
struct vport *vport;
2518
int err;
2519
2520
reply = ovs_vport_cmd_alloc_info();
2521
if (!reply)
2522
return -ENOMEM;
2523
2524
rcu_read_lock();
2525
vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
2526
err = PTR_ERR(vport);
2527
if (IS_ERR(vport))
2528
goto exit_unlock_free;
2529
err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2530
info->snd_portid, info->snd_seq, 0,
2531
OVS_VPORT_CMD_GET, GFP_ATOMIC);
2532
BUG_ON(err < 0);
2533
rcu_read_unlock();
2534
2535
return genlmsg_reply(reply, info);
2536
2537
exit_unlock_free:
2538
rcu_read_unlock();
2539
kfree_skb(reply);
2540
return err;
2541
}
2542
2543
static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
2544
{
2545
struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
2546
struct datapath *dp;
2547
int bucket = cb->args[0], skip = cb->args[1];
2548
int i, j = 0;
2549
2550
rcu_read_lock();
2551
dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
2552
if (!dp) {
2553
rcu_read_unlock();
2554
return -ENODEV;
2555
}
2556
for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
2557
struct vport *vport;
2558
2559
j = 0;
2560
hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
2561
if (j >= skip &&
2562
ovs_vport_cmd_fill_info(vport, skb,
2563
sock_net(skb->sk),
2564
NETLINK_CB(cb->skb).portid,
2565
cb->nlh->nlmsg_seq,
2566
NLM_F_MULTI,
2567
OVS_VPORT_CMD_GET,
2568
GFP_ATOMIC) < 0)
2569
goto out;
2570
2571
j++;
2572
}
2573
skip = 0;
2574
}
2575
out:
2576
rcu_read_unlock();
2577
2578
cb->args[0] = i;
2579
cb->args[1] = j;
2580
2581
return skb->len;
2582
}
2583
2584
static void ovs_dp_masks_rebalance(struct work_struct *work)
2585
{
2586
struct ovs_net *ovs_net = container_of(work, struct ovs_net,
2587
masks_rebalance.work);
2588
struct datapath *dp;
2589
2590
ovs_lock();
2591
2592
list_for_each_entry(dp, &ovs_net->dps, list_node)
2593
ovs_flow_masks_rebalance(&dp->table);
2594
2595
ovs_unlock();
2596
2597
schedule_delayed_work(&ovs_net->masks_rebalance,
2598
msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL));
2599
}
2600
2601
static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
2602
[OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
2603
[OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
2604
[OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
2605
[OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
2606
[OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_UNSPEC },
2607
[OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
2608
[OVS_VPORT_ATTR_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 0),
2609
[OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 },
2610
[OVS_VPORT_ATTR_UPCALL_STATS] = { .type = NLA_NESTED },
2611
};
2612
2613
static const struct genl_small_ops dp_vport_genl_ops[] = {
2614
{ .cmd = OVS_VPORT_CMD_NEW,
2615
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2616
.flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2617
.doit = ovs_vport_cmd_new
2618
},
2619
{ .cmd = OVS_VPORT_CMD_DEL,
2620
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2621
.flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2622
.doit = ovs_vport_cmd_del
2623
},
2624
{ .cmd = OVS_VPORT_CMD_GET,
2625
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2626
.flags = 0, /* OK for unprivileged users. */
2627
.doit = ovs_vport_cmd_get,
2628
.dumpit = ovs_vport_cmd_dump
2629
},
2630
{ .cmd = OVS_VPORT_CMD_SET,
2631
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2632
.flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2633
.doit = ovs_vport_cmd_set,
2634
},
2635
};
2636
2637
struct genl_family dp_vport_genl_family __ro_after_init = {
2638
.hdrsize = sizeof(struct ovs_header),
2639
.name = OVS_VPORT_FAMILY,
2640
.version = OVS_VPORT_VERSION,
2641
.maxattr = OVS_VPORT_ATTR_MAX,
2642
.policy = vport_policy,
2643
.netnsok = true,
2644
.parallel_ops = true,
2645
.small_ops = dp_vport_genl_ops,
2646
.n_small_ops = ARRAY_SIZE(dp_vport_genl_ops),
2647
.resv_start_op = OVS_VPORT_CMD_SET + 1,
2648
.mcgrps = &ovs_dp_vport_multicast_group,
2649
.n_mcgrps = 1,
2650
.module = THIS_MODULE,
2651
};
2652
2653
static struct genl_family * const dp_genl_families[] = {
2654
&dp_datapath_genl_family,
2655
&dp_vport_genl_family,
2656
&dp_flow_genl_family,
2657
&dp_packet_genl_family,
2658
&dp_meter_genl_family,
2659
#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
2660
&dp_ct_limit_genl_family,
2661
#endif
2662
};
2663
2664
static void dp_unregister_genl(int n_families)
2665
{
2666
int i;
2667
2668
for (i = 0; i < n_families; i++)
2669
genl_unregister_family(dp_genl_families[i]);
2670
}
2671
2672
static int __init dp_register_genl(void)
2673
{
2674
int err;
2675
int i;
2676
2677
for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
2678
2679
err = genl_register_family(dp_genl_families[i]);
2680
if (err)
2681
goto error;
2682
}
2683
2684
return 0;
2685
2686
error:
2687
dp_unregister_genl(i);
2688
return err;
2689
}
2690
2691
static int __net_init ovs_init_net(struct net *net)
2692
{
2693
struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2694
int err;
2695
2696
INIT_LIST_HEAD(&ovs_net->dps);
2697
INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
2698
INIT_DELAYED_WORK(&ovs_net->masks_rebalance, ovs_dp_masks_rebalance);
2699
2700
err = ovs_ct_init(net);
2701
if (err)
2702
return err;
2703
2704
schedule_delayed_work(&ovs_net->masks_rebalance,
2705
msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL));
2706
return 0;
2707
}
2708
2709
static void __net_exit list_vports_from_net(struct net *net, struct net *dnet,
2710
struct list_head *head)
2711
{
2712
struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2713
struct datapath *dp;
2714
2715
list_for_each_entry(dp, &ovs_net->dps, list_node) {
2716
int i;
2717
2718
for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
2719
struct vport *vport;
2720
2721
hlist_for_each_entry(vport, &dp->ports[i], dp_hash_node) {
2722
if (vport->ops->type != OVS_VPORT_TYPE_INTERNAL)
2723
continue;
2724
2725
if (dev_net(vport->dev) == dnet)
2726
list_add(&vport->detach_list, head);
2727
}
2728
}
2729
}
2730
}
2731
2732
static void __net_exit ovs_exit_net(struct net *dnet)
2733
{
2734
struct datapath *dp, *dp_next;
2735
struct ovs_net *ovs_net = net_generic(dnet, ovs_net_id);
2736
struct vport *vport, *vport_next;
2737
struct net *net;
2738
LIST_HEAD(head);
2739
2740
ovs_lock();
2741
2742
ovs_ct_exit(dnet);
2743
2744
list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
2745
__dp_destroy(dp);
2746
2747
down_read(&net_rwsem);
2748
for_each_net(net)
2749
list_vports_from_net(net, dnet, &head);
2750
up_read(&net_rwsem);
2751
2752
/* Detach all vports from given namespace. */
2753
list_for_each_entry_safe(vport, vport_next, &head, detach_list) {
2754
list_del(&vport->detach_list);
2755
ovs_dp_detach_port(vport);
2756
}
2757
2758
ovs_unlock();
2759
2760
cancel_delayed_work_sync(&ovs_net->masks_rebalance);
2761
cancel_work_sync(&ovs_net->dp_notify_work);
2762
}
2763
2764
static struct pernet_operations ovs_net_ops = {
2765
.init = ovs_init_net,
2766
.exit = ovs_exit_net,
2767
.id = &ovs_net_id,
2768
.size = sizeof(struct ovs_net),
2769
};
2770
2771
static const char * const ovs_drop_reasons[] = {
2772
#define S(x) [(x) & ~SKB_DROP_REASON_SUBSYS_MASK] = (#x),
2773
OVS_DROP_REASONS(S)
2774
#undef S
2775
};
2776
2777
static struct drop_reason_list drop_reason_list_ovs = {
2778
.reasons = ovs_drop_reasons,
2779
.n_reasons = ARRAY_SIZE(ovs_drop_reasons),
2780
};
2781
2782
static int __init ovs_alloc_percpu_storage(void)
2783
{
2784
unsigned int cpu;
2785
2786
ovs_pcpu_storage = alloc_percpu(*ovs_pcpu_storage);
2787
if (!ovs_pcpu_storage)
2788
return -ENOMEM;
2789
2790
for_each_possible_cpu(cpu) {
2791
struct ovs_pcpu_storage *ovs_pcpu;
2792
2793
ovs_pcpu = per_cpu_ptr(ovs_pcpu_storage, cpu);
2794
local_lock_init(&ovs_pcpu->bh_lock);
2795
}
2796
return 0;
2797
}
2798
2799
static void ovs_free_percpu_storage(void)
2800
{
2801
free_percpu(ovs_pcpu_storage);
2802
}
2803
2804
static int __init dp_init(void)
2805
{
2806
int err;
2807
2808
BUILD_BUG_ON(sizeof(struct ovs_skb_cb) >
2809
sizeof_field(struct sk_buff, cb));
2810
2811
pr_info("Open vSwitch switching datapath\n");
2812
2813
err = ovs_alloc_percpu_storage();
2814
if (err)
2815
goto error;
2816
2817
err = ovs_internal_dev_rtnl_link_register();
2818
if (err)
2819
goto error;
2820
2821
err = ovs_flow_init();
2822
if (err)
2823
goto error_unreg_rtnl_link;
2824
2825
err = ovs_vport_init();
2826
if (err)
2827
goto error_flow_exit;
2828
2829
err = register_pernet_device(&ovs_net_ops);
2830
if (err)
2831
goto error_vport_exit;
2832
2833
err = register_netdevice_notifier(&ovs_dp_device_notifier);
2834
if (err)
2835
goto error_netns_exit;
2836
2837
err = ovs_netdev_init();
2838
if (err)
2839
goto error_unreg_notifier;
2840
2841
err = dp_register_genl();
2842
if (err < 0)
2843
goto error_unreg_netdev;
2844
2845
drop_reasons_register_subsys(SKB_DROP_REASON_SUBSYS_OPENVSWITCH,
2846
&drop_reason_list_ovs);
2847
2848
return 0;
2849
2850
error_unreg_netdev:
2851
ovs_netdev_exit();
2852
error_unreg_notifier:
2853
unregister_netdevice_notifier(&ovs_dp_device_notifier);
2854
error_netns_exit:
2855
unregister_pernet_device(&ovs_net_ops);
2856
error_vport_exit:
2857
ovs_vport_exit();
2858
error_flow_exit:
2859
ovs_flow_exit();
2860
error_unreg_rtnl_link:
2861
ovs_internal_dev_rtnl_link_unregister();
2862
error:
2863
ovs_free_percpu_storage();
2864
return err;
2865
}
2866
2867
static void dp_cleanup(void)
2868
{
2869
dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
2870
ovs_netdev_exit();
2871
unregister_netdevice_notifier(&ovs_dp_device_notifier);
2872
unregister_pernet_device(&ovs_net_ops);
2873
drop_reasons_unregister_subsys(SKB_DROP_REASON_SUBSYS_OPENVSWITCH);
2874
rcu_barrier();
2875
ovs_vport_exit();
2876
ovs_flow_exit();
2877
ovs_internal_dev_rtnl_link_unregister();
2878
ovs_free_percpu_storage();
2879
}
2880
2881
module_init(dp_init);
2882
module_exit(dp_cleanup);
2883
2884
MODULE_DESCRIPTION("Open vSwitch switching datapath");
2885
MODULE_LICENSE("GPL");
2886
MODULE_ALIAS_GENL_FAMILY(OVS_DATAPATH_FAMILY);
2887
MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY);
2888
MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY);
2889
MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY);
2890
MODULE_ALIAS_GENL_FAMILY(OVS_METER_FAMILY);
2891
MODULE_ALIAS_GENL_FAMILY(OVS_CT_LIMIT_FAMILY);
2892
2893