Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/net/openvswitch/actions.c
170831 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* Copyright (c) 2007-2017 Nicira, Inc.
4
*/
5
6
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7
8
#include <linux/skbuff.h>
9
#include <linux/in.h>
10
#include <linux/ip.h>
11
#include <linux/openvswitch.h>
12
#include <linux/sctp.h>
13
#include <linux/tcp.h>
14
#include <linux/udp.h>
15
#include <linux/in6.h>
16
#include <linux/if_arp.h>
17
#include <linux/if_vlan.h>
18
19
#include <net/dst.h>
20
#include <net/gso.h>
21
#include <net/ip.h>
22
#include <net/ipv6.h>
23
#include <net/ip6_fib.h>
24
#include <net/ip6_route.h>
25
#include <net/checksum.h>
26
#include <net/dsfield.h>
27
#include <net/mpls.h>
28
29
#if IS_ENABLED(CONFIG_PSAMPLE)
30
#include <net/psample.h>
31
#endif
32
33
#include <net/sctp/checksum.h>
34
35
#include "datapath.h"
36
#include "drop.h"
37
#include "flow.h"
38
#include "conntrack.h"
39
#include "vport.h"
40
#include "flow_netlink.h"
41
#include "openvswitch_trace.h"
42
43
struct ovs_pcpu_storage __percpu *ovs_pcpu_storage;
44
45
/* Make a clone of the 'key', using the pre-allocated percpu 'flow_keys'
46
* space. Return NULL if out of key spaces.
47
*/
48
static struct sw_flow_key *clone_key(const struct sw_flow_key *key_)
49
{
50
struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(ovs_pcpu_storage);
51
struct action_flow_keys *keys = &ovs_pcpu->flow_keys;
52
int level = ovs_pcpu->exec_level;
53
struct sw_flow_key *key = NULL;
54
55
if (level <= OVS_DEFERRED_ACTION_THRESHOLD) {
56
key = &keys->key[level - 1];
57
*key = *key_;
58
}
59
60
return key;
61
}
62
63
static void action_fifo_init(struct action_fifo *fifo)
64
{
65
fifo->head = 0;
66
fifo->tail = 0;
67
}
68
69
static bool action_fifo_is_empty(const struct action_fifo *fifo)
70
{
71
return (fifo->head == fifo->tail);
72
}
73
74
static struct deferred_action *action_fifo_get(struct action_fifo *fifo)
75
{
76
if (action_fifo_is_empty(fifo))
77
return NULL;
78
79
return &fifo->fifo[fifo->tail++];
80
}
81
82
static struct deferred_action *action_fifo_put(struct action_fifo *fifo)
83
{
84
if (fifo->head >= DEFERRED_ACTION_FIFO_SIZE - 1)
85
return NULL;
86
87
return &fifo->fifo[fifo->head++];
88
}
89
90
/* Return true if fifo is not full */
91
static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
92
const struct sw_flow_key *key,
93
const struct nlattr *actions,
94
const int actions_len)
95
{
96
struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage->action_fifos);
97
struct deferred_action *da;
98
99
da = action_fifo_put(fifo);
100
if (da) {
101
da->skb = skb;
102
da->actions = actions;
103
da->actions_len = actions_len;
104
da->pkt_key = *key;
105
}
106
107
return da;
108
}
109
110
static void invalidate_flow_key(struct sw_flow_key *key)
111
{
112
key->mac_proto |= SW_FLOW_KEY_INVALID;
113
}
114
115
static bool is_flow_key_valid(const struct sw_flow_key *key)
116
{
117
return !(key->mac_proto & SW_FLOW_KEY_INVALID);
118
}
119
120
static int clone_execute(struct datapath *dp, struct sk_buff *skb,
121
struct sw_flow_key *key,
122
u32 recirc_id,
123
const struct nlattr *actions, int len,
124
bool last, bool clone_flow_key);
125
126
static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
127
struct sw_flow_key *key,
128
const struct nlattr *attr, int len);
129
130
static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
131
__be32 mpls_lse, __be16 mpls_ethertype, __u16 mac_len)
132
{
133
int err;
134
135
err = skb_mpls_push(skb, mpls_lse, mpls_ethertype, mac_len, !!mac_len);
136
if (err)
137
return err;
138
139
if (!mac_len)
140
key->mac_proto = MAC_PROTO_NONE;
141
142
invalidate_flow_key(key);
143
return 0;
144
}
145
146
static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
147
const __be16 ethertype)
148
{
149
int err;
150
151
err = skb_mpls_pop(skb, ethertype, skb->mac_len,
152
ovs_key_mac_proto(key) == MAC_PROTO_ETHERNET);
153
if (err)
154
return err;
155
156
if (ethertype == htons(ETH_P_TEB))
157
key->mac_proto = MAC_PROTO_ETHERNET;
158
159
invalidate_flow_key(key);
160
return 0;
161
}
162
163
static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key,
164
const __be32 *mpls_lse, const __be32 *mask)
165
{
166
struct mpls_shim_hdr *stack;
167
__be32 lse;
168
int err;
169
170
if (!pskb_may_pull(skb, skb_network_offset(skb) + MPLS_HLEN))
171
return -ENOMEM;
172
173
stack = mpls_hdr(skb);
174
lse = OVS_MASKED(stack->label_stack_entry, *mpls_lse, *mask);
175
err = skb_mpls_update_lse(skb, lse);
176
if (err)
177
return err;
178
179
flow_key->mpls.lse[0] = lse;
180
return 0;
181
}
182
183
static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key)
184
{
185
int err;
186
187
err = skb_vlan_pop(skb);
188
if (skb_vlan_tag_present(skb)) {
189
invalidate_flow_key(key);
190
} else {
191
key->eth.vlan.tci = 0;
192
key->eth.vlan.tpid = 0;
193
}
194
return err;
195
}
196
197
static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key,
198
const struct ovs_action_push_vlan *vlan)
199
{
200
int err;
201
202
if (skb_vlan_tag_present(skb)) {
203
invalidate_flow_key(key);
204
} else {
205
key->eth.vlan.tci = vlan->vlan_tci;
206
key->eth.vlan.tpid = vlan->vlan_tpid;
207
}
208
err = skb_vlan_push(skb, vlan->vlan_tpid,
209
ntohs(vlan->vlan_tci) & ~VLAN_CFI_MASK);
210
skb_reset_mac_len(skb);
211
return err;
212
}
213
214
/* 'src' is already properly masked. */
215
static void ether_addr_copy_masked(u8 *dst_, const u8 *src_, const u8 *mask_)
216
{
217
u16 *dst = (u16 *)dst_;
218
const u16 *src = (const u16 *)src_;
219
const u16 *mask = (const u16 *)mask_;
220
221
OVS_SET_MASKED(dst[0], src[0], mask[0]);
222
OVS_SET_MASKED(dst[1], src[1], mask[1]);
223
OVS_SET_MASKED(dst[2], src[2], mask[2]);
224
}
225
226
static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key,
227
const struct ovs_key_ethernet *key,
228
const struct ovs_key_ethernet *mask)
229
{
230
int err;
231
232
err = skb_ensure_writable(skb, ETH_HLEN);
233
if (unlikely(err))
234
return err;
235
236
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
237
238
ether_addr_copy_masked(eth_hdr(skb)->h_source, key->eth_src,
239
mask->eth_src);
240
ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst,
241
mask->eth_dst);
242
243
skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
244
245
ether_addr_copy(flow_key->eth.src, eth_hdr(skb)->h_source);
246
ether_addr_copy(flow_key->eth.dst, eth_hdr(skb)->h_dest);
247
return 0;
248
}
249
250
/* pop_eth does not support VLAN packets as this action is never called
251
* for them.
252
*/
253
static int pop_eth(struct sk_buff *skb, struct sw_flow_key *key)
254
{
255
int err;
256
257
err = skb_eth_pop(skb);
258
if (err)
259
return err;
260
261
/* safe right before invalidate_flow_key */
262
key->mac_proto = MAC_PROTO_NONE;
263
invalidate_flow_key(key);
264
return 0;
265
}
266
267
static int push_eth(struct sk_buff *skb, struct sw_flow_key *key,
268
const struct ovs_action_push_eth *ethh)
269
{
270
int err;
271
272
err = skb_eth_push(skb, ethh->addresses.eth_dst,
273
ethh->addresses.eth_src);
274
if (err)
275
return err;
276
277
/* safe right before invalidate_flow_key */
278
key->mac_proto = MAC_PROTO_ETHERNET;
279
invalidate_flow_key(key);
280
return 0;
281
}
282
283
static noinline_for_stack int push_nsh(struct sk_buff *skb,
284
struct sw_flow_key *key,
285
const struct nlattr *a)
286
{
287
u8 buffer[NSH_HDR_MAX_LEN];
288
struct nshhdr *nh = (struct nshhdr *)buffer;
289
int err;
290
291
err = nsh_hdr_from_nlattr(a, nh, NSH_HDR_MAX_LEN);
292
if (err)
293
return err;
294
295
err = nsh_push(skb, nh);
296
if (err)
297
return err;
298
299
/* safe right before invalidate_flow_key */
300
key->mac_proto = MAC_PROTO_NONE;
301
invalidate_flow_key(key);
302
return 0;
303
}
304
305
static int pop_nsh(struct sk_buff *skb, struct sw_flow_key *key)
306
{
307
int err;
308
309
err = nsh_pop(skb);
310
if (err)
311
return err;
312
313
/* safe right before invalidate_flow_key */
314
if (skb->protocol == htons(ETH_P_TEB))
315
key->mac_proto = MAC_PROTO_ETHERNET;
316
else
317
key->mac_proto = MAC_PROTO_NONE;
318
invalidate_flow_key(key);
319
return 0;
320
}
321
322
static void update_ip_l4_checksum(struct sk_buff *skb, struct iphdr *nh,
323
__be32 addr, __be32 new_addr)
324
{
325
int transport_len = skb->len - skb_transport_offset(skb);
326
327
if (nh->frag_off & htons(IP_OFFSET))
328
return;
329
330
if (nh->protocol == IPPROTO_TCP) {
331
if (likely(transport_len >= sizeof(struct tcphdr)))
332
inet_proto_csum_replace4(&tcp_hdr(skb)->check, skb,
333
addr, new_addr, true);
334
} else if (nh->protocol == IPPROTO_UDP) {
335
if (likely(transport_len >= sizeof(struct udphdr))) {
336
struct udphdr *uh = udp_hdr(skb);
337
338
if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
339
inet_proto_csum_replace4(&uh->check, skb,
340
addr, new_addr, true);
341
if (!uh->check)
342
uh->check = CSUM_MANGLED_0;
343
}
344
}
345
}
346
}
347
348
static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
349
__be32 *addr, __be32 new_addr)
350
{
351
update_ip_l4_checksum(skb, nh, *addr, new_addr);
352
csum_replace4(&nh->check, *addr, new_addr);
353
skb_clear_hash(skb);
354
ovs_ct_clear(skb, NULL);
355
*addr = new_addr;
356
}
357
358
static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto,
359
__be32 addr[4], const __be32 new_addr[4])
360
{
361
int transport_len = skb->len - skb_transport_offset(skb);
362
363
if (l4_proto == NEXTHDR_TCP) {
364
if (likely(transport_len >= sizeof(struct tcphdr)))
365
inet_proto_csum_replace16(&tcp_hdr(skb)->check, skb,
366
addr, new_addr, true);
367
} else if (l4_proto == NEXTHDR_UDP) {
368
if (likely(transport_len >= sizeof(struct udphdr))) {
369
struct udphdr *uh = udp_hdr(skb);
370
371
if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
372
inet_proto_csum_replace16(&uh->check, skb,
373
addr, new_addr, true);
374
if (!uh->check)
375
uh->check = CSUM_MANGLED_0;
376
}
377
}
378
} else if (l4_proto == NEXTHDR_ICMP) {
379
if (likely(transport_len >= sizeof(struct icmp6hdr)))
380
inet_proto_csum_replace16(&icmp6_hdr(skb)->icmp6_cksum,
381
skb, addr, new_addr, true);
382
}
383
}
384
385
static void mask_ipv6_addr(const __be32 old[4], const __be32 addr[4],
386
const __be32 mask[4], __be32 masked[4])
387
{
388
masked[0] = OVS_MASKED(old[0], addr[0], mask[0]);
389
masked[1] = OVS_MASKED(old[1], addr[1], mask[1]);
390
masked[2] = OVS_MASKED(old[2], addr[2], mask[2]);
391
masked[3] = OVS_MASKED(old[3], addr[3], mask[3]);
392
}
393
394
static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
395
__be32 addr[4], const __be32 new_addr[4],
396
bool recalculate_csum)
397
{
398
if (recalculate_csum)
399
update_ipv6_checksum(skb, l4_proto, addr, new_addr);
400
401
skb_clear_hash(skb);
402
ovs_ct_clear(skb, NULL);
403
memcpy(addr, new_addr, sizeof(__be32[4]));
404
}
405
406
static void set_ipv6_dsfield(struct sk_buff *skb, struct ipv6hdr *nh, u8 ipv6_tclass, u8 mask)
407
{
408
u8 old_ipv6_tclass = ipv6_get_dsfield(nh);
409
410
ipv6_tclass = OVS_MASKED(old_ipv6_tclass, ipv6_tclass, mask);
411
412
if (skb->ip_summed == CHECKSUM_COMPLETE)
413
csum_replace(&skb->csum, (__force __wsum)(old_ipv6_tclass << 12),
414
(__force __wsum)(ipv6_tclass << 12));
415
416
ipv6_change_dsfield(nh, ~mask, ipv6_tclass);
417
}
418
419
static void set_ipv6_fl(struct sk_buff *skb, struct ipv6hdr *nh, u32 fl, u32 mask)
420
{
421
u32 ofl;
422
423
ofl = nh->flow_lbl[0] << 16 | nh->flow_lbl[1] << 8 | nh->flow_lbl[2];
424
fl = OVS_MASKED(ofl, fl, mask);
425
426
/* Bits 21-24 are always unmasked, so this retains their values. */
427
nh->flow_lbl[0] = (u8)(fl >> 16);
428
nh->flow_lbl[1] = (u8)(fl >> 8);
429
nh->flow_lbl[2] = (u8)fl;
430
431
if (skb->ip_summed == CHECKSUM_COMPLETE)
432
csum_replace(&skb->csum, (__force __wsum)htonl(ofl), (__force __wsum)htonl(fl));
433
}
434
435
static void set_ipv6_ttl(struct sk_buff *skb, struct ipv6hdr *nh, u8 new_ttl, u8 mask)
436
{
437
new_ttl = OVS_MASKED(nh->hop_limit, new_ttl, mask);
438
439
if (skb->ip_summed == CHECKSUM_COMPLETE)
440
csum_replace(&skb->csum, (__force __wsum)(nh->hop_limit << 8),
441
(__force __wsum)(new_ttl << 8));
442
nh->hop_limit = new_ttl;
443
}
444
445
static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl,
446
u8 mask)
447
{
448
new_ttl = OVS_MASKED(nh->ttl, new_ttl, mask);
449
450
csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8));
451
nh->ttl = new_ttl;
452
}
453
454
static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *flow_key,
455
const struct ovs_key_ipv4 *key,
456
const struct ovs_key_ipv4 *mask)
457
{
458
struct iphdr *nh;
459
__be32 new_addr;
460
int err;
461
462
err = skb_ensure_writable(skb, skb_network_offset(skb) +
463
sizeof(struct iphdr));
464
if (unlikely(err))
465
return err;
466
467
nh = ip_hdr(skb);
468
469
/* Setting an IP addresses is typically only a side effect of
470
* matching on them in the current userspace implementation, so it
471
* makes sense to check if the value actually changed.
472
*/
473
if (mask->ipv4_src) {
474
new_addr = OVS_MASKED(nh->saddr, key->ipv4_src, mask->ipv4_src);
475
476
if (unlikely(new_addr != nh->saddr)) {
477
set_ip_addr(skb, nh, &nh->saddr, new_addr);
478
flow_key->ipv4.addr.src = new_addr;
479
}
480
}
481
if (mask->ipv4_dst) {
482
new_addr = OVS_MASKED(nh->daddr, key->ipv4_dst, mask->ipv4_dst);
483
484
if (unlikely(new_addr != nh->daddr)) {
485
set_ip_addr(skb, nh, &nh->daddr, new_addr);
486
flow_key->ipv4.addr.dst = new_addr;
487
}
488
}
489
if (mask->ipv4_tos) {
490
ipv4_change_dsfield(nh, ~mask->ipv4_tos, key->ipv4_tos);
491
flow_key->ip.tos = nh->tos;
492
}
493
if (mask->ipv4_ttl) {
494
set_ip_ttl(skb, nh, key->ipv4_ttl, mask->ipv4_ttl);
495
flow_key->ip.ttl = nh->ttl;
496
}
497
498
return 0;
499
}
500
501
static bool is_ipv6_mask_nonzero(const __be32 addr[4])
502
{
503
return !!(addr[0] | addr[1] | addr[2] | addr[3]);
504
}
505
506
static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
507
const struct ovs_key_ipv6 *key,
508
const struct ovs_key_ipv6 *mask)
509
{
510
struct ipv6hdr *nh;
511
int err;
512
513
err = skb_ensure_writable(skb, skb_network_offset(skb) +
514
sizeof(struct ipv6hdr));
515
if (unlikely(err))
516
return err;
517
518
nh = ipv6_hdr(skb);
519
520
/* Setting an IP addresses is typically only a side effect of
521
* matching on them in the current userspace implementation, so it
522
* makes sense to check if the value actually changed.
523
*/
524
if (is_ipv6_mask_nonzero(mask->ipv6_src)) {
525
__be32 *saddr = (__be32 *)&nh->saddr;
526
__be32 masked[4];
527
528
mask_ipv6_addr(saddr, key->ipv6_src, mask->ipv6_src, masked);
529
530
if (unlikely(memcmp(saddr, masked, sizeof(masked)))) {
531
set_ipv6_addr(skb, flow_key->ip.proto, saddr, masked,
532
true);
533
memcpy(&flow_key->ipv6.addr.src, masked,
534
sizeof(flow_key->ipv6.addr.src));
535
}
536
}
537
if (is_ipv6_mask_nonzero(mask->ipv6_dst)) {
538
unsigned int offset = 0;
539
int flags = IP6_FH_F_SKIP_RH;
540
bool recalc_csum = true;
541
__be32 *daddr = (__be32 *)&nh->daddr;
542
__be32 masked[4];
543
544
mask_ipv6_addr(daddr, key->ipv6_dst, mask->ipv6_dst, masked);
545
546
if (unlikely(memcmp(daddr, masked, sizeof(masked)))) {
547
if (ipv6_ext_hdr(nh->nexthdr))
548
recalc_csum = (ipv6_find_hdr(skb, &offset,
549
NEXTHDR_ROUTING,
550
NULL, &flags)
551
!= NEXTHDR_ROUTING);
552
553
set_ipv6_addr(skb, flow_key->ip.proto, daddr, masked,
554
recalc_csum);
555
memcpy(&flow_key->ipv6.addr.dst, masked,
556
sizeof(flow_key->ipv6.addr.dst));
557
}
558
}
559
if (mask->ipv6_tclass) {
560
set_ipv6_dsfield(skb, nh, key->ipv6_tclass, mask->ipv6_tclass);
561
flow_key->ip.tos = ipv6_get_dsfield(nh);
562
}
563
if (mask->ipv6_label) {
564
set_ipv6_fl(skb, nh, ntohl(key->ipv6_label),
565
ntohl(mask->ipv6_label));
566
flow_key->ipv6.label =
567
*(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
568
}
569
if (mask->ipv6_hlimit) {
570
set_ipv6_ttl(skb, nh, key->ipv6_hlimit, mask->ipv6_hlimit);
571
flow_key->ip.ttl = nh->hop_limit;
572
}
573
return 0;
574
}
575
576
/* Must follow skb_ensure_writable() since that can move the skb data. */
577
static void set_tp_port(struct sk_buff *skb, __be16 *port,
578
__be16 new_port, __sum16 *check)
579
{
580
ovs_ct_clear(skb, NULL);
581
inet_proto_csum_replace2(check, skb, *port, new_port, false);
582
*port = new_port;
583
}
584
585
static int set_udp(struct sk_buff *skb, struct sw_flow_key *flow_key,
586
const struct ovs_key_udp *key,
587
const struct ovs_key_udp *mask)
588
{
589
struct udphdr *uh;
590
__be16 src, dst;
591
int err;
592
593
err = skb_ensure_writable(skb, skb_transport_offset(skb) +
594
sizeof(struct udphdr));
595
if (unlikely(err))
596
return err;
597
598
uh = udp_hdr(skb);
599
/* Either of the masks is non-zero, so do not bother checking them. */
600
src = OVS_MASKED(uh->source, key->udp_src, mask->udp_src);
601
dst = OVS_MASKED(uh->dest, key->udp_dst, mask->udp_dst);
602
603
if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) {
604
if (likely(src != uh->source)) {
605
set_tp_port(skb, &uh->source, src, &uh->check);
606
flow_key->tp.src = src;
607
}
608
if (likely(dst != uh->dest)) {
609
set_tp_port(skb, &uh->dest, dst, &uh->check);
610
flow_key->tp.dst = dst;
611
}
612
613
if (unlikely(!uh->check))
614
uh->check = CSUM_MANGLED_0;
615
} else {
616
uh->source = src;
617
uh->dest = dst;
618
flow_key->tp.src = src;
619
flow_key->tp.dst = dst;
620
ovs_ct_clear(skb, NULL);
621
}
622
623
skb_clear_hash(skb);
624
625
return 0;
626
}
627
628
static int set_tcp(struct sk_buff *skb, struct sw_flow_key *flow_key,
629
const struct ovs_key_tcp *key,
630
const struct ovs_key_tcp *mask)
631
{
632
struct tcphdr *th;
633
__be16 src, dst;
634
int err;
635
636
err = skb_ensure_writable(skb, skb_transport_offset(skb) +
637
sizeof(struct tcphdr));
638
if (unlikely(err))
639
return err;
640
641
th = tcp_hdr(skb);
642
src = OVS_MASKED(th->source, key->tcp_src, mask->tcp_src);
643
if (likely(src != th->source)) {
644
set_tp_port(skb, &th->source, src, &th->check);
645
flow_key->tp.src = src;
646
}
647
dst = OVS_MASKED(th->dest, key->tcp_dst, mask->tcp_dst);
648
if (likely(dst != th->dest)) {
649
set_tp_port(skb, &th->dest, dst, &th->check);
650
flow_key->tp.dst = dst;
651
}
652
skb_clear_hash(skb);
653
654
return 0;
655
}
656
657
static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
658
const struct ovs_key_sctp *key,
659
const struct ovs_key_sctp *mask)
660
{
661
unsigned int sctphoff = skb_transport_offset(skb);
662
struct sctphdr *sh;
663
__le32 old_correct_csum, new_csum, old_csum;
664
int err;
665
666
err = skb_ensure_writable(skb, sctphoff + sizeof(struct sctphdr));
667
if (unlikely(err))
668
return err;
669
670
sh = sctp_hdr(skb);
671
old_csum = sh->checksum;
672
old_correct_csum = sctp_compute_cksum(skb, sctphoff);
673
674
sh->source = OVS_MASKED(sh->source, key->sctp_src, mask->sctp_src);
675
sh->dest = OVS_MASKED(sh->dest, key->sctp_dst, mask->sctp_dst);
676
677
new_csum = sctp_compute_cksum(skb, sctphoff);
678
679
/* Carry any checksum errors through. */
680
sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
681
682
skb_clear_hash(skb);
683
ovs_ct_clear(skb, NULL);
684
685
flow_key->tp.src = sh->source;
686
flow_key->tp.dst = sh->dest;
687
688
return 0;
689
}
690
691
static int ovs_vport_output(struct net *net, struct sock *sk,
692
struct sk_buff *skb)
693
{
694
struct ovs_frag_data *data = this_cpu_ptr(&ovs_pcpu_storage->frag_data);
695
struct vport *vport = data->vport;
696
697
if (skb_cow_head(skb, data->l2_len) < 0) {
698
kfree_skb_reason(skb, SKB_DROP_REASON_NOMEM);
699
return -ENOMEM;
700
}
701
702
__skb_dst_copy(skb, data->dst);
703
*OVS_CB(skb) = data->cb;
704
skb->inner_protocol = data->inner_protocol;
705
if (data->vlan_tci & VLAN_CFI_MASK)
706
__vlan_hwaccel_put_tag(skb, data->vlan_proto, data->vlan_tci & ~VLAN_CFI_MASK);
707
else
708
__vlan_hwaccel_clear_tag(skb);
709
710
/* Reconstruct the MAC header. */
711
skb_push(skb, data->l2_len);
712
memcpy(skb->data, &data->l2_data, data->l2_len);
713
skb_postpush_rcsum(skb, skb->data, data->l2_len);
714
skb_reset_mac_header(skb);
715
716
if (eth_p_mpls(skb->protocol)) {
717
skb->inner_network_header = skb->network_header;
718
skb_set_network_header(skb, data->network_offset);
719
skb_reset_mac_len(skb);
720
}
721
722
ovs_vport_send(vport, skb, data->mac_proto);
723
return 0;
724
}
725
726
static unsigned int
727
ovs_dst_get_mtu(const struct dst_entry *dst)
728
{
729
return dst->dev->mtu;
730
}
731
732
static struct dst_ops ovs_dst_ops = {
733
.family = AF_UNSPEC,
734
.mtu = ovs_dst_get_mtu,
735
};
736
737
/* prepare_frag() is called once per (larger-than-MTU) frame; its inverse is
738
* ovs_vport_output(), which is called once per fragmented packet.
739
*/
740
static void prepare_frag(struct vport *vport, struct sk_buff *skb,
741
u16 orig_network_offset, u8 mac_proto)
742
{
743
unsigned int hlen = skb_network_offset(skb);
744
struct ovs_frag_data *data;
745
746
data = this_cpu_ptr(&ovs_pcpu_storage->frag_data);
747
data->dst = skb->_skb_refdst;
748
data->vport = vport;
749
data->cb = *OVS_CB(skb);
750
data->inner_protocol = skb->inner_protocol;
751
data->network_offset = orig_network_offset;
752
if (skb_vlan_tag_present(skb))
753
data->vlan_tci = skb_vlan_tag_get(skb) | VLAN_CFI_MASK;
754
else
755
data->vlan_tci = 0;
756
data->vlan_proto = skb->vlan_proto;
757
data->mac_proto = mac_proto;
758
data->l2_len = hlen;
759
memcpy(&data->l2_data, skb->data, hlen);
760
761
memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
762
skb_pull(skb, hlen);
763
}
764
765
static void ovs_fragment(struct net *net, struct vport *vport,
766
struct sk_buff *skb, u16 mru,
767
struct sw_flow_key *key)
768
{
769
enum ovs_drop_reason reason;
770
u16 orig_network_offset = 0;
771
772
if (eth_p_mpls(skb->protocol)) {
773
orig_network_offset = skb_network_offset(skb);
774
skb->network_header = skb->inner_network_header;
775
}
776
777
if (skb_network_offset(skb) > MAX_L2_LEN) {
778
OVS_NLERR(1, "L2 header too long to fragment");
779
reason = OVS_DROP_FRAG_L2_TOO_LONG;
780
goto err;
781
}
782
783
if (key->eth.type == htons(ETH_P_IP)) {
784
struct rtable ovs_rt = { 0 };
785
unsigned long orig_dst;
786
787
prepare_frag(vport, skb, orig_network_offset,
788
ovs_key_mac_proto(key));
789
dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL,
790
DST_OBSOLETE_NONE, DST_NOCOUNT);
791
ovs_rt.dst.dev = vport->dev;
792
793
orig_dst = skb->_skb_refdst;
794
skb_dst_set_noref(skb, &ovs_rt.dst);
795
IPCB(skb)->frag_max_size = mru;
796
797
ip_do_fragment(net, skb->sk, skb, ovs_vport_output);
798
refdst_drop(orig_dst);
799
} else if (key->eth.type == htons(ETH_P_IPV6)) {
800
unsigned long orig_dst;
801
struct rt6_info ovs_rt;
802
803
prepare_frag(vport, skb, orig_network_offset,
804
ovs_key_mac_proto(key));
805
memset(&ovs_rt, 0, sizeof(ovs_rt));
806
dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL,
807
DST_OBSOLETE_NONE, DST_NOCOUNT);
808
ovs_rt.dst.dev = vport->dev;
809
810
orig_dst = skb->_skb_refdst;
811
skb_dst_set_noref(skb, &ovs_rt.dst);
812
IP6CB(skb)->frag_max_size = mru;
813
814
ip6_fragment(net, skb->sk, skb, ovs_vport_output);
815
refdst_drop(orig_dst);
816
} else {
817
WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.",
818
ovs_vport_name(vport), ntohs(key->eth.type), mru,
819
vport->dev->mtu);
820
reason = OVS_DROP_FRAG_INVALID_PROTO;
821
goto err;
822
}
823
824
return;
825
err:
826
ovs_kfree_skb_reason(skb, reason);
827
}
828
829
static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
830
struct sw_flow_key *key)
831
{
832
struct vport *vport = ovs_vport_rcu(dp, out_port);
833
834
if (likely(vport &&
835
netif_running(vport->dev) &&
836
netif_carrier_ok(vport->dev))) {
837
u16 mru = OVS_CB(skb)->mru;
838
u32 cutlen = OVS_CB(skb)->cutlen;
839
840
if (unlikely(cutlen > 0)) {
841
if (skb->len - cutlen > ovs_mac_header_len(key))
842
pskb_trim(skb, skb->len - cutlen);
843
else
844
pskb_trim(skb, ovs_mac_header_len(key));
845
}
846
847
if (likely(!mru ||
848
(skb->len <= mru + vport->dev->hard_header_len))) {
849
ovs_vport_send(vport, skb, ovs_key_mac_proto(key));
850
} else if (mru <= vport->dev->mtu) {
851
struct net *net = read_pnet(&dp->net);
852
853
ovs_fragment(net, vport, skb, mru, key);
854
} else {
855
kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG);
856
}
857
} else {
858
kfree_skb_reason(skb, SKB_DROP_REASON_DEV_READY);
859
}
860
}
861
862
static int output_userspace(struct datapath *dp, struct sk_buff *skb,
863
struct sw_flow_key *key, const struct nlattr *attr,
864
const struct nlattr *actions, int actions_len,
865
uint32_t cutlen)
866
{
867
struct dp_upcall_info upcall;
868
const struct nlattr *a;
869
int rem;
870
871
memset(&upcall, 0, sizeof(upcall));
872
upcall.cmd = OVS_PACKET_CMD_ACTION;
873
upcall.mru = OVS_CB(skb)->mru;
874
875
nla_for_each_nested(a, attr, rem) {
876
switch (nla_type(a)) {
877
case OVS_USERSPACE_ATTR_USERDATA:
878
upcall.userdata = a;
879
break;
880
881
case OVS_USERSPACE_ATTR_PID:
882
if (OVS_CB(skb)->upcall_pid)
883
upcall.portid = OVS_CB(skb)->upcall_pid;
884
else if (dp->user_features &
885
OVS_DP_F_DISPATCH_UPCALL_PER_CPU)
886
upcall.portid =
887
ovs_dp_get_upcall_portid(dp,
888
smp_processor_id());
889
else
890
upcall.portid = nla_get_u32(a);
891
break;
892
893
case OVS_USERSPACE_ATTR_EGRESS_TUN_PORT: {
894
/* Get out tunnel info. */
895
struct vport *vport;
896
897
vport = ovs_vport_rcu(dp, nla_get_u32(a));
898
if (vport) {
899
int err;
900
901
err = dev_fill_metadata_dst(vport->dev, skb);
902
if (!err)
903
upcall.egress_tun_info = skb_tunnel_info(skb);
904
}
905
906
break;
907
}
908
909
case OVS_USERSPACE_ATTR_ACTIONS: {
910
/* Include actions. */
911
upcall.actions = actions;
912
upcall.actions_len = actions_len;
913
break;
914
}
915
916
} /* End of switch. */
917
}
918
919
return ovs_dp_upcall(dp, skb, key, &upcall, cutlen);
920
}
921
922
static int dec_ttl_exception_handler(struct datapath *dp, struct sk_buff *skb,
923
struct sw_flow_key *key,
924
const struct nlattr *attr)
925
{
926
/* The first attribute is always 'OVS_DEC_TTL_ATTR_ACTION'. */
927
struct nlattr *actions = nla_data(attr);
928
929
if (nla_len(actions))
930
return clone_execute(dp, skb, key, 0, nla_data(actions),
931
nla_len(actions), true, false);
932
933
ovs_kfree_skb_reason(skb, OVS_DROP_IP_TTL);
934
return 0;
935
}
936
937
/* When 'last' is true, sample() should always consume the 'skb'.
938
* Otherwise, sample() should keep 'skb' intact regardless what
939
* actions are executed within sample().
940
*/
941
static int sample(struct datapath *dp, struct sk_buff *skb,
942
struct sw_flow_key *key, const struct nlattr *attr,
943
bool last)
944
{
945
struct nlattr *actions;
946
struct nlattr *sample_arg;
947
int rem = nla_len(attr);
948
const struct sample_arg *arg;
949
u32 init_probability;
950
bool clone_flow_key;
951
int err;
952
953
/* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */
954
sample_arg = nla_data(attr);
955
arg = nla_data(sample_arg);
956
actions = nla_next(sample_arg, &rem);
957
init_probability = OVS_CB(skb)->probability;
958
959
if ((arg->probability != U32_MAX) &&
960
(!arg->probability || get_random_u32() > arg->probability)) {
961
if (last)
962
ovs_kfree_skb_reason(skb, OVS_DROP_LAST_ACTION);
963
return 0;
964
}
965
966
OVS_CB(skb)->probability = arg->probability;
967
968
clone_flow_key = !arg->exec;
969
err = clone_execute(dp, skb, key, 0, actions, rem, last,
970
clone_flow_key);
971
972
if (!last)
973
OVS_CB(skb)->probability = init_probability;
974
975
return err;
976
}
977
978
/* When 'last' is true, clone() should always consume the 'skb'.
979
* Otherwise, clone() should keep 'skb' intact regardless what
980
* actions are executed within clone().
981
*/
982
static int clone(struct datapath *dp, struct sk_buff *skb,
983
struct sw_flow_key *key, const struct nlattr *attr,
984
bool last)
985
{
986
struct nlattr *actions;
987
struct nlattr *clone_arg;
988
int rem = nla_len(attr);
989
bool dont_clone_flow_key;
990
991
/* The first action is always 'OVS_CLONE_ATTR_EXEC'. */
992
clone_arg = nla_data(attr);
993
dont_clone_flow_key = nla_get_u32(clone_arg);
994
actions = nla_next(clone_arg, &rem);
995
996
return clone_execute(dp, skb, key, 0, actions, rem, last,
997
!dont_clone_flow_key);
998
}
999
1000
static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key,
1001
const struct nlattr *attr)
1002
{
1003
struct ovs_action_hash *hash_act = nla_data(attr);
1004
u32 hash = 0;
1005
1006
if (hash_act->hash_alg == OVS_HASH_ALG_L4) {
1007
/* OVS_HASH_ALG_L4 hasing type. */
1008
hash = skb_get_hash(skb);
1009
} else if (hash_act->hash_alg == OVS_HASH_ALG_SYM_L4) {
1010
/* OVS_HASH_ALG_SYM_L4 hashing type. NOTE: this doesn't
1011
* extend past an encapsulated header.
1012
*/
1013
hash = __skb_get_hash_symmetric(skb);
1014
}
1015
1016
hash = jhash_1word(hash, hash_act->hash_basis);
1017
if (!hash)
1018
hash = 0x1;
1019
1020
key->ovs_flow_hash = hash;
1021
}
1022
1023
static int execute_set_action(struct sk_buff *skb,
1024
struct sw_flow_key *flow_key,
1025
const struct nlattr *a)
1026
{
1027
/* Only tunnel set execution is supported without a mask. */
1028
if (nla_type(a) == OVS_KEY_ATTR_TUNNEL_INFO) {
1029
struct ovs_tunnel_info *tun = nla_data(a);
1030
1031
skb_dst_drop(skb);
1032
dst_hold((struct dst_entry *)tun->tun_dst);
1033
skb_dst_set(skb, (struct dst_entry *)tun->tun_dst);
1034
return 0;
1035
}
1036
1037
return -EINVAL;
1038
}
1039
1040
/* Mask is at the midpoint of the data. */
1041
#define get_mask(a, type) ((const type)nla_data(a) + 1)
1042
1043
static int execute_masked_set_action(struct sk_buff *skb,
1044
struct sw_flow_key *flow_key,
1045
const struct nlattr *a)
1046
{
1047
int err = 0;
1048
1049
switch (nla_type(a)) {
1050
case OVS_KEY_ATTR_PRIORITY:
1051
OVS_SET_MASKED(skb->priority, nla_get_u32(a),
1052
*get_mask(a, u32 *));
1053
flow_key->phy.priority = skb->priority;
1054
break;
1055
1056
case OVS_KEY_ATTR_SKB_MARK:
1057
OVS_SET_MASKED(skb->mark, nla_get_u32(a), *get_mask(a, u32 *));
1058
flow_key->phy.skb_mark = skb->mark;
1059
break;
1060
1061
case OVS_KEY_ATTR_TUNNEL_INFO:
1062
/* Masked data not supported for tunnel. */
1063
err = -EINVAL;
1064
break;
1065
1066
case OVS_KEY_ATTR_ETHERNET:
1067
err = set_eth_addr(skb, flow_key, nla_data(a),
1068
get_mask(a, struct ovs_key_ethernet *));
1069
break;
1070
1071
case OVS_KEY_ATTR_IPV4:
1072
err = set_ipv4(skb, flow_key, nla_data(a),
1073
get_mask(a, struct ovs_key_ipv4 *));
1074
break;
1075
1076
case OVS_KEY_ATTR_IPV6:
1077
err = set_ipv6(skb, flow_key, nla_data(a),
1078
get_mask(a, struct ovs_key_ipv6 *));
1079
break;
1080
1081
case OVS_KEY_ATTR_TCP:
1082
err = set_tcp(skb, flow_key, nla_data(a),
1083
get_mask(a, struct ovs_key_tcp *));
1084
break;
1085
1086
case OVS_KEY_ATTR_UDP:
1087
err = set_udp(skb, flow_key, nla_data(a),
1088
get_mask(a, struct ovs_key_udp *));
1089
break;
1090
1091
case OVS_KEY_ATTR_SCTP:
1092
err = set_sctp(skb, flow_key, nla_data(a),
1093
get_mask(a, struct ovs_key_sctp *));
1094
break;
1095
1096
case OVS_KEY_ATTR_MPLS:
1097
err = set_mpls(skb, flow_key, nla_data(a), get_mask(a,
1098
__be32 *));
1099
break;
1100
1101
case OVS_KEY_ATTR_CT_STATE:
1102
case OVS_KEY_ATTR_CT_ZONE:
1103
case OVS_KEY_ATTR_CT_MARK:
1104
case OVS_KEY_ATTR_CT_LABELS:
1105
case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4:
1106
case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6:
1107
case OVS_KEY_ATTR_NSH:
1108
err = -EINVAL;
1109
break;
1110
}
1111
1112
return err;
1113
}
1114
1115
static int execute_recirc(struct datapath *dp, struct sk_buff *skb,
1116
struct sw_flow_key *key,
1117
const struct nlattr *a, bool last)
1118
{
1119
u32 recirc_id;
1120
1121
if (!is_flow_key_valid(key)) {
1122
int err;
1123
1124
err = ovs_flow_key_update(skb, key);
1125
if (err)
1126
return err;
1127
}
1128
BUG_ON(!is_flow_key_valid(key));
1129
1130
recirc_id = nla_get_u32(a);
1131
return clone_execute(dp, skb, key, recirc_id, NULL, 0, last, true);
1132
}
1133
1134
static int execute_check_pkt_len(struct datapath *dp, struct sk_buff *skb,
1135
struct sw_flow_key *key,
1136
const struct nlattr *attr, bool last)
1137
{
1138
struct ovs_skb_cb *ovs_cb = OVS_CB(skb);
1139
const struct nlattr *actions, *cpl_arg;
1140
int len, max_len, rem = nla_len(attr);
1141
const struct check_pkt_len_arg *arg;
1142
bool clone_flow_key;
1143
1144
/* The first netlink attribute in 'attr' is always
1145
* 'OVS_CHECK_PKT_LEN_ATTR_ARG'.
1146
*/
1147
cpl_arg = nla_data(attr);
1148
arg = nla_data(cpl_arg);
1149
1150
len = ovs_cb->mru ? ovs_cb->mru + skb->mac_len : skb->len;
1151
max_len = arg->pkt_len;
1152
1153
if ((skb_is_gso(skb) && skb_gso_validate_mac_len(skb, max_len)) ||
1154
len <= max_len) {
1155
/* Second netlink attribute in 'attr' is always
1156
* 'OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL'.
1157
*/
1158
actions = nla_next(cpl_arg, &rem);
1159
clone_flow_key = !arg->exec_for_lesser_equal;
1160
} else {
1161
/* Third netlink attribute in 'attr' is always
1162
* 'OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER'.
1163
*/
1164
actions = nla_next(cpl_arg, &rem);
1165
actions = nla_next(actions, &rem);
1166
clone_flow_key = !arg->exec_for_greater;
1167
}
1168
1169
return clone_execute(dp, skb, key, 0, nla_data(actions),
1170
nla_len(actions), last, clone_flow_key);
1171
}
1172
1173
static int execute_dec_ttl(struct sk_buff *skb, struct sw_flow_key *key)
1174
{
1175
int err;
1176
1177
if (skb->protocol == htons(ETH_P_IPV6)) {
1178
struct ipv6hdr *nh;
1179
1180
err = skb_ensure_writable(skb, skb_network_offset(skb) +
1181
sizeof(*nh));
1182
if (unlikely(err))
1183
return err;
1184
1185
nh = ipv6_hdr(skb);
1186
1187
if (nh->hop_limit <= 1)
1188
return -EHOSTUNREACH;
1189
1190
key->ip.ttl = --nh->hop_limit;
1191
} else if (skb->protocol == htons(ETH_P_IP)) {
1192
struct iphdr *nh;
1193
u8 old_ttl;
1194
1195
err = skb_ensure_writable(skb, skb_network_offset(skb) +
1196
sizeof(*nh));
1197
if (unlikely(err))
1198
return err;
1199
1200
nh = ip_hdr(skb);
1201
if (nh->ttl <= 1)
1202
return -EHOSTUNREACH;
1203
1204
old_ttl = nh->ttl--;
1205
csum_replace2(&nh->check, htons(old_ttl << 8),
1206
htons(nh->ttl << 8));
1207
key->ip.ttl = nh->ttl;
1208
}
1209
return 0;
1210
}
1211
1212
#if IS_ENABLED(CONFIG_PSAMPLE)
1213
static void execute_psample(struct datapath *dp, struct sk_buff *skb,
1214
const struct nlattr *attr)
1215
{
1216
struct psample_group psample_group = {};
1217
struct psample_metadata md = {};
1218
const struct nlattr *a;
1219
u32 rate;
1220
int rem;
1221
1222
nla_for_each_attr(a, nla_data(attr), nla_len(attr), rem) {
1223
switch (nla_type(a)) {
1224
case OVS_PSAMPLE_ATTR_GROUP:
1225
psample_group.group_num = nla_get_u32(a);
1226
break;
1227
1228
case OVS_PSAMPLE_ATTR_COOKIE:
1229
md.user_cookie = nla_data(a);
1230
md.user_cookie_len = nla_len(a);
1231
break;
1232
}
1233
}
1234
1235
psample_group.net = ovs_dp_get_net(dp);
1236
md.in_ifindex = OVS_CB(skb)->input_vport->dev->ifindex;
1237
md.trunc_size = skb->len - OVS_CB(skb)->cutlen;
1238
md.rate_as_probability = 1;
1239
1240
rate = OVS_CB(skb)->probability ? OVS_CB(skb)->probability : U32_MAX;
1241
1242
psample_sample_packet(&psample_group, skb, rate, &md);
1243
}
1244
#else
1245
static void execute_psample(struct datapath *dp, struct sk_buff *skb,
1246
const struct nlattr *attr)
1247
{}
1248
#endif
1249
1250
/* Execute a list of actions against 'skb'. */
1251
static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
1252
struct sw_flow_key *key,
1253
const struct nlattr *attr, int len)
1254
{
1255
const struct nlattr *a;
1256
int rem;
1257
1258
for (a = attr, rem = len; rem > 0;
1259
a = nla_next(a, &rem)) {
1260
int err = 0;
1261
1262
if (trace_ovs_do_execute_action_enabled())
1263
trace_ovs_do_execute_action(dp, skb, key, a, rem);
1264
1265
/* Actions that rightfully have to consume the skb should do it
1266
* and return directly.
1267
*/
1268
switch (nla_type(a)) {
1269
case OVS_ACTION_ATTR_OUTPUT: {
1270
int port = nla_get_u32(a);
1271
struct sk_buff *clone;
1272
1273
/* Every output action needs a separate clone
1274
* of 'skb', In case the output action is the
1275
* last action, cloning can be avoided.
1276
*/
1277
if (nla_is_last(a, rem)) {
1278
do_output(dp, skb, port, key);
1279
/* 'skb' has been used for output.
1280
*/
1281
return 0;
1282
}
1283
1284
clone = skb_clone(skb, GFP_ATOMIC);
1285
if (clone)
1286
do_output(dp, clone, port, key);
1287
OVS_CB(skb)->cutlen = 0;
1288
break;
1289
}
1290
1291
case OVS_ACTION_ATTR_TRUNC: {
1292
struct ovs_action_trunc *trunc = nla_data(a);
1293
1294
if (skb->len > trunc->max_len)
1295
OVS_CB(skb)->cutlen = skb->len - trunc->max_len;
1296
break;
1297
}
1298
1299
case OVS_ACTION_ATTR_USERSPACE:
1300
output_userspace(dp, skb, key, a, attr,
1301
len, OVS_CB(skb)->cutlen);
1302
OVS_CB(skb)->cutlen = 0;
1303
if (nla_is_last(a, rem)) {
1304
consume_skb(skb);
1305
return 0;
1306
}
1307
break;
1308
1309
case OVS_ACTION_ATTR_HASH:
1310
execute_hash(skb, key, a);
1311
break;
1312
1313
case OVS_ACTION_ATTR_PUSH_MPLS: {
1314
struct ovs_action_push_mpls *mpls = nla_data(a);
1315
1316
err = push_mpls(skb, key, mpls->mpls_lse,
1317
mpls->mpls_ethertype, skb->mac_len);
1318
break;
1319
}
1320
case OVS_ACTION_ATTR_ADD_MPLS: {
1321
struct ovs_action_add_mpls *mpls = nla_data(a);
1322
__u16 mac_len = 0;
1323
1324
if (mpls->tun_flags & OVS_MPLS_L3_TUNNEL_FLAG_MASK)
1325
mac_len = skb->mac_len;
1326
1327
err = push_mpls(skb, key, mpls->mpls_lse,
1328
mpls->mpls_ethertype, mac_len);
1329
break;
1330
}
1331
case OVS_ACTION_ATTR_POP_MPLS:
1332
err = pop_mpls(skb, key, nla_get_be16(a));
1333
break;
1334
1335
case OVS_ACTION_ATTR_PUSH_VLAN:
1336
err = push_vlan(skb, key, nla_data(a));
1337
break;
1338
1339
case OVS_ACTION_ATTR_POP_VLAN:
1340
err = pop_vlan(skb, key);
1341
break;
1342
1343
case OVS_ACTION_ATTR_RECIRC: {
1344
bool last = nla_is_last(a, rem);
1345
1346
err = execute_recirc(dp, skb, key, a, last);
1347
if (last) {
1348
/* If this is the last action, the skb has
1349
* been consumed or freed.
1350
* Return immediately.
1351
*/
1352
return err;
1353
}
1354
break;
1355
}
1356
1357
case OVS_ACTION_ATTR_SET:
1358
err = execute_set_action(skb, key, nla_data(a));
1359
break;
1360
1361
case OVS_ACTION_ATTR_SET_MASKED:
1362
case OVS_ACTION_ATTR_SET_TO_MASKED:
1363
err = execute_masked_set_action(skb, key, nla_data(a));
1364
break;
1365
1366
case OVS_ACTION_ATTR_SAMPLE: {
1367
bool last = nla_is_last(a, rem);
1368
1369
err = sample(dp, skb, key, a, last);
1370
if (last)
1371
return err;
1372
1373
break;
1374
}
1375
1376
case OVS_ACTION_ATTR_CT:
1377
if (!is_flow_key_valid(key)) {
1378
err = ovs_flow_key_update(skb, key);
1379
if (err)
1380
return err;
1381
}
1382
1383
err = ovs_ct_execute(ovs_dp_get_net(dp), skb, key,
1384
nla_data(a));
1385
1386
/* Hide stolen IP fragments from user space. */
1387
if (err)
1388
return err == -EINPROGRESS ? 0 : err;
1389
break;
1390
1391
case OVS_ACTION_ATTR_CT_CLEAR:
1392
err = ovs_ct_clear(skb, key);
1393
break;
1394
1395
case OVS_ACTION_ATTR_PUSH_ETH:
1396
err = push_eth(skb, key, nla_data(a));
1397
break;
1398
1399
case OVS_ACTION_ATTR_POP_ETH:
1400
err = pop_eth(skb, key);
1401
break;
1402
1403
case OVS_ACTION_ATTR_PUSH_NSH:
1404
err = push_nsh(skb, key, nla_data(a));
1405
break;
1406
1407
case OVS_ACTION_ATTR_POP_NSH:
1408
err = pop_nsh(skb, key);
1409
break;
1410
1411
case OVS_ACTION_ATTR_METER:
1412
if (ovs_meter_execute(dp, skb, key, nla_get_u32(a))) {
1413
ovs_kfree_skb_reason(skb, OVS_DROP_METER);
1414
return 0;
1415
}
1416
break;
1417
1418
case OVS_ACTION_ATTR_CLONE: {
1419
bool last = nla_is_last(a, rem);
1420
1421
err = clone(dp, skb, key, a, last);
1422
if (last)
1423
return err;
1424
1425
break;
1426
}
1427
1428
case OVS_ACTION_ATTR_CHECK_PKT_LEN: {
1429
bool last = nla_is_last(a, rem);
1430
1431
err = execute_check_pkt_len(dp, skb, key, a, last);
1432
if (last)
1433
return err;
1434
1435
break;
1436
}
1437
1438
case OVS_ACTION_ATTR_DEC_TTL:
1439
err = execute_dec_ttl(skb, key);
1440
if (err == -EHOSTUNREACH)
1441
return dec_ttl_exception_handler(dp, skb,
1442
key, a);
1443
break;
1444
1445
case OVS_ACTION_ATTR_DROP: {
1446
enum ovs_drop_reason reason = nla_get_u32(a)
1447
? OVS_DROP_EXPLICIT_WITH_ERROR
1448
: OVS_DROP_EXPLICIT;
1449
1450
ovs_kfree_skb_reason(skb, reason);
1451
return 0;
1452
}
1453
1454
case OVS_ACTION_ATTR_PSAMPLE:
1455
execute_psample(dp, skb, a);
1456
OVS_CB(skb)->cutlen = 0;
1457
if (nla_is_last(a, rem)) {
1458
consume_skb(skb);
1459
return 0;
1460
}
1461
break;
1462
}
1463
1464
if (unlikely(err)) {
1465
ovs_kfree_skb_reason(skb, OVS_DROP_ACTION_ERROR);
1466
return err;
1467
}
1468
}
1469
1470
ovs_kfree_skb_reason(skb, OVS_DROP_LAST_ACTION);
1471
return 0;
1472
}
1473
1474
/* Execute the actions on the clone of the packet. The effect of the
1475
* execution does not affect the original 'skb' nor the original 'key'.
1476
*
1477
* The execution may be deferred in case the actions can not be executed
1478
* immediately.
1479
*/
1480
static int clone_execute(struct datapath *dp, struct sk_buff *skb,
1481
struct sw_flow_key *key, u32 recirc_id,
1482
const struct nlattr *actions, int len,
1483
bool last, bool clone_flow_key)
1484
{
1485
struct deferred_action *da;
1486
struct sw_flow_key *clone;
1487
1488
skb = last ? skb : skb_clone(skb, GFP_ATOMIC);
1489
if (!skb) {
1490
/* Out of memory, skip this action.
1491
*/
1492
return 0;
1493
}
1494
1495
/* When clone_flow_key is false, the 'key' will not be change
1496
* by the actions, then the 'key' can be used directly.
1497
* Otherwise, try to clone key from the next recursion level of
1498
* 'flow_keys'. If clone is successful, execute the actions
1499
* without deferring.
1500
*/
1501
clone = clone_flow_key ? clone_key(key) : key;
1502
if (clone) {
1503
int err = 0;
1504
if (actions) { /* Sample action */
1505
if (clone_flow_key)
1506
__this_cpu_inc(ovs_pcpu_storage->exec_level);
1507
1508
err = do_execute_actions(dp, skb, clone,
1509
actions, len);
1510
1511
if (clone_flow_key)
1512
__this_cpu_dec(ovs_pcpu_storage->exec_level);
1513
} else { /* Recirc action */
1514
clone->recirc_id = recirc_id;
1515
ovs_dp_process_packet(skb, clone);
1516
}
1517
return err;
1518
}
1519
1520
/* Out of 'flow_keys' space. Defer actions */
1521
da = add_deferred_actions(skb, key, actions, len);
1522
if (da) {
1523
if (!actions) { /* Recirc action */
1524
key = &da->pkt_key;
1525
key->recirc_id = recirc_id;
1526
}
1527
} else {
1528
/* Out of per CPU action FIFO space. Drop the 'skb' and
1529
* log an error.
1530
*/
1531
ovs_kfree_skb_reason(skb, OVS_DROP_DEFERRED_LIMIT);
1532
1533
if (net_ratelimit()) {
1534
if (actions) { /* Sample action */
1535
pr_warn("%s: deferred action limit reached, drop sample action\n",
1536
ovs_dp_name(dp));
1537
} else { /* Recirc action */
1538
pr_warn("%s: deferred action limit reached, drop recirc action (recirc_id=%#x)\n",
1539
ovs_dp_name(dp), recirc_id);
1540
}
1541
}
1542
}
1543
return 0;
1544
}
1545
1546
static void process_deferred_actions(struct datapath *dp)
1547
{
1548
struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage->action_fifos);
1549
1550
/* Do not touch the FIFO in case there is no deferred actions. */
1551
if (action_fifo_is_empty(fifo))
1552
return;
1553
1554
/* Finishing executing all deferred actions. */
1555
do {
1556
struct deferred_action *da = action_fifo_get(fifo);
1557
struct sk_buff *skb = da->skb;
1558
struct sw_flow_key *key = &da->pkt_key;
1559
const struct nlattr *actions = da->actions;
1560
int actions_len = da->actions_len;
1561
1562
if (actions)
1563
do_execute_actions(dp, skb, key, actions, actions_len);
1564
else
1565
ovs_dp_process_packet(skb, key);
1566
} while (!action_fifo_is_empty(fifo));
1567
1568
/* Reset FIFO for the next packet. */
1569
action_fifo_init(fifo);
1570
}
1571
1572
/* Execute a list of actions against 'skb'. */
1573
int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
1574
const struct sw_flow_actions *acts,
1575
struct sw_flow_key *key)
1576
{
1577
int err, level;
1578
1579
level = __this_cpu_inc_return(ovs_pcpu_storage->exec_level);
1580
if (unlikely(level > OVS_RECURSION_LIMIT)) {
1581
net_crit_ratelimited("ovs: recursion limit reached on datapath %s, probable configuration error\n",
1582
ovs_dp_name(dp));
1583
ovs_kfree_skb_reason(skb, OVS_DROP_RECURSION_LIMIT);
1584
err = -ENETDOWN;
1585
goto out;
1586
}
1587
1588
OVS_CB(skb)->acts_origlen = acts->orig_len;
1589
err = do_execute_actions(dp, skb, key,
1590
acts->actions, acts->actions_len);
1591
1592
if (level == 1)
1593
process_deferred_actions(dp);
1594
1595
out:
1596
__this_cpu_dec(ovs_pcpu_storage->exec_level);
1597
return err;
1598
}
1599
1600