Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/net/if_geneve.c
266460 views
1
/*-
2
* SPDX-License-Identifier: BSD-2-Clause
3
*
4
* Copyright (c) 2025-2026 Pouria Mousavizadeh Tehrani <[email protected]>
5
* All rights reserved.
6
*
7
* Redistribution and use in source and binary forms, with or without
8
* modification, are permitted provided that the following conditions
9
* are met:
10
* 1. Redistributions of source code must retain the above copyright
11
* notice, this list of conditions and the following disclaimer.
12
* 2. Redistributions in binary form must reproduce the above copyright
13
* notice, this list of conditions and the following disclaimer in the
14
* documentation and/or other materials provided with the distribution.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26
* SUCH DAMAGE.
27
*/
28
29
#include "opt_inet.h"
30
#include "opt_inet6.h"
31
32
#include <sys/param.h>
33
#include <sys/kernel.h>
34
#include <sys/lock.h>
35
#include <sys/hash.h>
36
#include <sys/malloc.h>
37
#include <sys/mbuf.h>
38
#include <sys/module.h>
39
#include <sys/refcount.h>
40
#include <sys/rmlock.h>
41
#include <sys/priv.h>
42
#include <sys/proc.h>
43
#include <sys/queue.h>
44
#include <sys/sdt.h>
45
#include <sys/socket.h>
46
#include <sys/socketvar.h>
47
#include <sys/sockio.h>
48
#include <sys/sx.h>
49
#include <sys/systm.h>
50
#include <sys/counter.h>
51
#include <sys/jail.h>
52
53
#include <net/bpf.h>
54
#include <net/ethernet.h>
55
#include <net/if.h>
56
#include <net/if_var.h>
57
#include <net/if_private.h>
58
#include <net/if_arp.h>
59
#include <net/if_clone.h>
60
#include <net/if_media.h>
61
#include <net/if_types.h>
62
#include <net/netisr.h>
63
#include <net/route.h>
64
#include <net/route/nhop.h>
65
66
#include <netinet/in.h>
67
#include <netinet/in_systm.h>
68
#include <netinet/in_var.h>
69
#include <netinet/in_pcb.h>
70
#include <netinet/ip.h>
71
#include <netinet/ip_var.h>
72
#include <netinet/ip6.h>
73
#include <netinet6/ip6_var.h>
74
#include <netinet6/in6_var.h>
75
#include <netinet6/scope6_var.h>
76
#include <netinet/udp.h>
77
#include <netinet/udp_var.h>
78
#include <netinet/in_fib.h>
79
#include <netinet6/in6_fib.h>
80
#include <netinet/ip_ecn.h>
81
#include <net/if_geneve.h>
82
83
#include <netlink/netlink.h>
84
#include <netlink/netlink_ctl.h>
85
#include <netlink/netlink_var.h>
86
#include <netlink/netlink_route.h>
87
#include <netlink/route/route_var.h>
88
89
#include <security/mac/mac_framework.h>
90
91
SDT_PROVIDER_DEFINE(if_geneve);
92
93
struct geneve_softc;
94
LIST_HEAD(geneve_softc_head, geneve_softc);
95
96
static struct sx geneve_sx;
97
SX_SYSINIT(geneve, &geneve_sx, "GENEVE global start/stop lock");
98
99
static unsigned geneve_osd_jail_slot;
100
101
union sockaddr_union {
102
struct sockaddr sa;
103
struct sockaddr_in sin;
104
struct sockaddr_in6 sin6;
105
};
106
107
struct geneve_socket_mc_info {
108
union sockaddr_union gnvsomc_saddr;
109
union sockaddr_union gnvsomc_gaddr;
110
int gnvsomc_ifidx;
111
int gnvsomc_users;
112
};
113
114
/* The maximum MTU of encapsulated geneve packet. */
115
#define GENEVE_MAX_L3MTU (IP_MAXPACKET - \
116
60 /* Maximum IPv4 header len */ - \
117
sizeof(struct udphdr) - \
118
sizeof(struct genevehdr))
119
#define GENEVE_MAX_MTU (GENEVE_MAX_L3MTU - \
120
ETHER_HDR_LEN - ETHER_VLAN_ENCAP_LEN)
121
122
#define GENEVE_BASIC_IFCAPS (IFCAP_LINKSTATE | IFCAP_JUMBO_MTU | IFCAP_NV)
123
124
#define GENEVE_VERSION 0
125
#define GENEVE_VNI_MASK (GENEVE_VNI_MAX - 1)
126
127
#define GENEVE_HDR_VNI_SHIFT 8
128
129
#define GENEVE_SO_MC_MAX_GROUPS 32
130
131
#define GENEVE_SO_VNI_HASH_SHIFT 6
132
#define GENEVE_SO_VNI_HASH_SIZE (1 << GENEVE_SO_VNI_HASH_SHIFT)
133
#define GENEVE_SO_VNI_HASH(_vni) ((_vni) % GENEVE_SO_VNI_HASH_SIZE)
134
135
struct geneve_socket {
136
struct socket *gnvso_sock;
137
struct rmlock gnvso_lock;
138
u_int gnvso_refcnt;
139
union sockaddr_union gnvso_laddr;
140
LIST_ENTRY(geneve_socket) gnvso_entry;
141
struct geneve_softc_head gnvso_vni_hash[GENEVE_SO_VNI_HASH_SIZE];
142
struct geneve_socket_mc_info gnvso_mc[GENEVE_SO_MC_MAX_GROUPS];
143
};
144
145
#define GENEVE_SO_RLOCK(_gnvso, _p) rm_rlock(&(_gnvso)->gnvso_lock, (_p))
146
#define GENEVE_SO_RUNLOCK(_gnvso, _p) rm_runlock(&(_gnvso)->gnvso_lock, (_p))
147
#define GENEVE_SO_WLOCK(_gnvso) rm_wlock(&(_gnvso)->gnvso_lock)
148
#define GENEVE_SO_WUNLOCK(_gnvso) rm_wunlock(&(_gnvso)->gnvso_lock)
149
#define GENEVE_SO_LOCK_ASSERT(_gnvso) \
150
rm_assert(&(_gnvso)->gnvso_lock, RA_LOCKED)
151
#define GENEVE_SO_LOCK_WASSERT(_gnvso) \
152
rm_assert(&(_gnvso)->gnvso_lock, RA_WLOCKED)
153
154
#define GENEVE_SO_ACQUIRE(_gnvso) refcount_acquire(&(_gnvso)->gnvso_refcnt)
155
#define GENEVE_SO_RELEASE(_gnvso) refcount_release(&(_gnvso)->gnvso_refcnt)
156
157
struct gnv_ftable_entry {
158
LIST_ENTRY(gnv_ftable_entry) gnvfe_hash;
159
uint16_t gnvfe_flags;
160
uint8_t gnvfe_mac[ETHER_ADDR_LEN];
161
union sockaddr_union gnvfe_raddr;
162
time_t gnvfe_expire;
163
};
164
165
#define GENEVE_FE_FLAG_DYNAMIC 0x01
166
#define GENEVE_FE_FLAG_STATIC 0x02
167
168
#define GENEVE_FE_IS_DYNAMIC(_fe) \
169
((_fe)->gnvfe_flags & GENEVE_FE_FLAG_DYNAMIC)
170
171
#define GENEVE_SC_FTABLE_SHIFT 9
172
#define GENEVE_SC_FTABLE_SIZE (1 << GENEVE_SC_FTABLE_SHIFT)
173
#define GENEVE_SC_FTABLE_MASK (GENEVE_SC_FTABLE_SIZE - 1)
174
#define GENEVE_SC_FTABLE_HASH(_sc, _mac) \
175
(geneve_mac_hash(_sc, _mac) % GENEVE_SC_FTABLE_SIZE)
176
177
LIST_HEAD(geneve_ftable_head, gnv_ftable_entry);
178
179
struct geneve_statistics {
180
uint32_t ftable_nospace;
181
uint32_t ftable_lock_upgrade_failed;
182
counter_u64_t txcsum;
183
counter_u64_t tso;
184
counter_u64_t rxcsum;
185
};
186
187
struct geneve_softc {
188
LIST_ENTRY(geneve_softc) gnv_entry;
189
190
struct ifnet *gnv_ifp;
191
uint32_t gnv_flags;
192
#define GENEVE_FLAG_INIT 0x0001
193
#define GENEVE_FLAG_RUNNING 0x0002
194
#define GENEVE_FLAG_TEARDOWN 0x0004
195
#define GENEVE_FLAG_LEARN 0x0008
196
#define GENEVE_FLAG_USER_MTU 0x0010
197
#define GENEVE_FLAG_TTL_INHERIT 0x0020
198
#define GENEVE_FLAG_DSCP_INHERIT 0x0040
199
#define GENEVE_FLAG_COLLECT_METADATA 0x0080
200
201
int gnv_reqcap;
202
int gnv_reqcap2;
203
struct geneve_socket *gnv_sock;
204
union sockaddr_union gnv_src_addr;
205
union sockaddr_union gnv_dst_addr;
206
uint32_t gnv_fibnum;
207
uint32_t gnv_vni;
208
uint32_t gnv_port_hash_key;
209
uint16_t gnv_proto;
210
uint16_t gnv_min_port;
211
uint16_t gnv_max_port;
212
uint8_t gnv_ttl;
213
enum ifla_geneve_df gnv_df;
214
215
/* Lookup table from MAC address to forwarding entry. */
216
uint32_t gnv_ftable_cnt;
217
uint32_t gnv_ftable_max;
218
uint32_t gnv_ftable_timeout;
219
uint32_t gnv_ftable_hash_key;
220
struct geneve_ftable_head *gnv_ftable;
221
222
/* Derived from gnv_dst_addr. */
223
struct gnv_ftable_entry gnv_default_fe;
224
225
struct ip_moptions *gnv_im4o;
226
struct ip6_moptions *gnv_im6o;
227
228
struct rmlock gnv_lock;
229
volatile u_int gnv_refcnt;
230
231
int gnv_so_mc_index;
232
struct geneve_statistics gnv_stats;
233
struct callout gnv_callout;
234
struct ether_addr gnv_hwaddr;
235
int gnv_mc_ifindex;
236
struct ifnet *gnv_mc_ifp;
237
struct ifmedia gnv_media;
238
char gnv_mc_ifname[IFNAMSIZ];
239
240
/* For rate limiting errors on the tx fast path. */
241
struct timeval err_time;
242
int err_pps;
243
};
244
245
#define GENEVE_RLOCK(_sc, _p) rm_rlock(&(_sc)->gnv_lock, (_p))
246
#define GENEVE_RUNLOCK(_sc, _p) rm_runlock(&(_sc)->gnv_lock, (_p))
247
#define GENEVE_WLOCK(_sc) rm_wlock(&(_sc)->gnv_lock)
248
#define GENEVE_WUNLOCK(_sc) rm_wunlock(&(_sc)->gnv_lock)
249
#define GENEVE_LOCK_WOWNED(_sc) rm_wowned(&(_sc)->gnv_lock)
250
#define GENEVE_LOCK_ASSERT(_sc) rm_assert(&(_sc)->gnv_lock, RA_LOCKED)
251
#define GENEVE_LOCK_WASSERT(_sc) rm_assert(&(_sc)->gnv_lock, RA_WLOCKED)
252
#define GENEVE_UNLOCK(_sc, _p) do { \
253
if (GENEVE_LOCK_WOWNED(_sc)) \
254
GENEVE_WUNLOCK(_sc); \
255
else \
256
GENEVE_RUNLOCK(_sc, _p); \
257
} while (0)
258
259
#define GENEVE_ACQUIRE(_sc) refcount_acquire(&(_sc)->gnv_refcnt)
260
#define GENEVE_RELEASE(_sc) refcount_release(&(_sc)->gnv_refcnt)
261
262
#define SATOCONSTSIN(sa) ((const struct sockaddr_in *)(sa))
263
#define SATOCONSTSIN6(sa) ((const struct sockaddr_in6 *)(sa))
264
265
struct geneve_pkt_info {
266
u_int isr;
267
uint16_t ethertype;
268
uint8_t ecn;
269
uint8_t ttl;
270
};
271
272
struct nl_parsed_geneve {
273
/* essential */
274
uint32_t ifla_vni;
275
uint16_t ifla_proto;
276
struct sockaddr *ifla_local;
277
struct sockaddr *ifla_remote;
278
uint16_t ifla_local_port;
279
uint16_t ifla_remote_port;
280
281
/* optional */
282
struct ifla_geneve_port_range ifla_port_range;
283
enum ifla_geneve_df ifla_df;
284
uint8_t ifla_ttl;
285
bool ifla_ttl_inherit;
286
bool ifla_dscp_inherit;
287
bool ifla_external;
288
289
/* l2 specific */
290
bool ifla_ftable_learn;
291
bool ifla_ftable_flush;
292
uint32_t ifla_ftable_max;
293
uint32_t ifla_ftable_timeout;
294
uint32_t ifla_ftable_count; /* read-only */
295
296
/* multicast specific */
297
char *ifla_mc_ifname;
298
uint32_t ifla_mc_ifindex; /* read-only */
299
};
300
301
/* The multicast-based learning parts of the code are taken from if_vxlan */
302
static int geneve_ftable_addr_cmp(const uint8_t *, const uint8_t *);
303
static void geneve_ftable_init(struct geneve_softc *);
304
static void geneve_ftable_fini(struct geneve_softc *);
305
static void geneve_ftable_flush(struct geneve_softc *, int);
306
static void geneve_ftable_expire(struct geneve_softc *);
307
static int geneve_ftable_update_locked(struct geneve_softc *,
308
const union sockaddr_union *, const uint8_t *,
309
struct rm_priotracker *);
310
static int geneve_ftable_learn(struct geneve_softc *,
311
const struct sockaddr *, const uint8_t *);
312
313
static struct gnv_ftable_entry *
314
geneve_ftable_entry_alloc(void);
315
static void geneve_ftable_entry_free(struct gnv_ftable_entry *);
316
static void geneve_ftable_entry_init(struct geneve_softc *,
317
struct gnv_ftable_entry *, const uint8_t *,
318
const struct sockaddr *, uint32_t);
319
static void geneve_ftable_entry_destroy(struct geneve_softc *,
320
struct gnv_ftable_entry *);
321
static int geneve_ftable_entry_insert(struct geneve_softc *,
322
struct gnv_ftable_entry *);
323
static struct gnv_ftable_entry *
324
geneve_ftable_entry_lookup(struct geneve_softc *,
325
const uint8_t *);
326
327
static struct geneve_socket *
328
geneve_socket_alloc(union sockaddr_union *laddr);
329
static void geneve_socket_destroy(struct geneve_socket *);
330
static void geneve_socket_release(struct geneve_socket *);
331
static struct geneve_socket *
332
geneve_socket_lookup(union sockaddr_union *);
333
static void geneve_socket_insert(struct geneve_socket *);
334
static int geneve_socket_init(struct geneve_socket *, struct ifnet *);
335
static int geneve_socket_bind(struct geneve_socket *, struct ifnet *);
336
static int geneve_socket_create(struct ifnet *, int,
337
const union sockaddr_union *, struct geneve_socket **);
338
static int geneve_socket_set_df(struct geneve_socket *, bool);
339
340
static struct geneve_socket *
341
geneve_socket_mc_lookup(const union sockaddr_union *);
342
static int geneve_sockaddr_mc_info_match(
343
const struct geneve_socket_mc_info *,
344
const union sockaddr_union *,
345
const union sockaddr_union *, int);
346
static int geneve_socket_mc_join_group(struct geneve_socket *,
347
const union sockaddr_union *, const union sockaddr_union *,
348
int *, union sockaddr_union *);
349
static int geneve_socket_mc_leave_group(struct geneve_socket *,
350
const union sockaddr_union *,
351
const union sockaddr_union *, int);
352
static int geneve_socket_mc_add_group(struct geneve_socket *,
353
const union sockaddr_union *,
354
const union sockaddr_union *, int, int *);
355
static void geneve_socket_mc_release_group(struct geneve_socket *, int);
356
357
static struct geneve_softc *
358
geneve_socket_lookup_softc_locked(struct geneve_socket *,
359
uint32_t);
360
static struct geneve_softc *
361
geneve_socket_lookup_softc(struct geneve_socket *, uint32_t);
362
static int geneve_socket_insert_softc(struct geneve_socket *,
363
struct geneve_softc *);
364
static void geneve_socket_remove_softc(struct geneve_socket *,
365
struct geneve_softc *);
366
367
static struct ifnet *
368
geneve_multicast_if_ref(struct geneve_softc *, uint32_t);
369
static void geneve_free_multicast(struct geneve_softc *);
370
static int geneve_setup_multicast_interface(struct geneve_softc *);
371
372
static int geneve_setup_multicast(struct geneve_softc *);
373
static int geneve_setup_socket(struct geneve_softc *);
374
static void geneve_setup_interface_hdrlen(struct geneve_softc *);
375
static int geneve_valid_init_config(struct geneve_softc *);
376
static void geneve_init_complete(struct geneve_softc *);
377
static void geneve_init(void *);
378
static void geneve_release(struct geneve_softc *);
379
static void geneve_teardown_wait(struct geneve_softc *);
380
static void geneve_teardown_locked(struct geneve_softc *);
381
static void geneve_teardown(struct geneve_softc *);
382
static void geneve_timer(void *);
383
384
static int geneve_flush_ftable(struct geneve_softc *, bool);
385
static uint16_t geneve_get_local_port(struct geneve_softc *);
386
static uint16_t geneve_get_remote_port(struct geneve_softc *);
387
388
static int geneve_set_vni_nl(struct geneve_softc *, struct nl_pstate *,
389
uint32_t);
390
static int geneve_set_local_addr_nl(struct geneve_softc *, struct nl_pstate *,
391
struct sockaddr *);
392
static int geneve_set_remote_addr_nl(struct geneve_softc *, struct nl_pstate *,
393
struct sockaddr *);
394
static int geneve_set_local_port_nl(struct geneve_softc *, struct nl_pstate *,
395
uint16_t);
396
static int geneve_set_remote_port_nl(struct geneve_softc *, struct nl_pstate *,
397
uint16_t);
398
static int geneve_set_port_range_nl(struct geneve_softc *, struct nl_pstate *,
399
struct ifla_geneve_port_range);
400
static int geneve_set_df_nl(struct geneve_softc *, struct nl_pstate *,
401
enum ifla_geneve_df);
402
static int geneve_set_ttl_nl(struct geneve_softc *, struct nl_pstate *,
403
uint8_t);
404
static int geneve_set_ttl_inherit_nl(struct geneve_softc *, struct nl_pstate *,
405
bool);
406
static int geneve_set_dscp_inherit_nl(struct geneve_softc *, struct nl_pstate *,
407
bool);
408
static int geneve_set_collect_metadata_nl(struct geneve_softc *,
409
struct nl_pstate *, bool);
410
static int geneve_set_learn_nl(struct geneve_softc *, struct nl_pstate *,
411
bool);
412
static int geneve_set_ftable_max_nl(struct geneve_softc *, struct nl_pstate *,
413
uint32_t);
414
static int geneve_set_ftable_timeout_nl(struct geneve_softc *,
415
struct nl_pstate *, uint32_t);
416
static int geneve_set_mc_if_nl(struct geneve_softc *, struct nl_pstate *,
417
char *);
418
static int geneve_flush_ftable_nl(struct geneve_softc *, struct nl_pstate *,
419
bool);
420
static void geneve_get_local_addr_nl(struct geneve_softc *, struct nl_writer *);
421
static void geneve_get_remote_addr_nl(struct geneve_softc *, struct nl_writer *);
422
423
static int geneve_ioctl_ifflags(struct geneve_softc *);
424
static int geneve_ioctl(struct ifnet *, u_long, caddr_t);
425
426
static uint16_t geneve_pick_source_port(struct geneve_softc *, struct mbuf *);
427
static void geneve_encap_header(struct geneve_softc *, struct mbuf *,
428
int, uint16_t, uint16_t, uint16_t);
429
static uint16_t geneve_get_ethertype(struct mbuf *);
430
static int geneve_inherit_l3_hdr(struct mbuf *, struct geneve_softc *,
431
uint16_t, uint8_t *, uint8_t *, u_short *);
432
#ifdef INET
433
static int geneve_encap4(struct geneve_softc *,
434
const union sockaddr_union *, struct mbuf *);
435
#endif
436
#ifdef INET6
437
static int geneve_encap6(struct geneve_softc *,
438
const union sockaddr_union *, struct mbuf *);
439
#endif
440
static int geneve_transmit(struct ifnet *, struct mbuf *);
441
static void geneve_qflush(struct ifnet *);
442
static int geneve_output(struct ifnet *, struct mbuf *,
443
const struct sockaddr *, struct route *);
444
static uint32_t geneve_map_etype_to_af(uint32_t);
445
static bool geneve_udp_input(struct mbuf *, int, struct inpcb *,
446
const struct sockaddr *, void *);
447
static int geneve_input_ether(struct geneve_softc *, struct mbuf **,
448
const struct sockaddr *, struct geneve_pkt_info *);
449
static int geneve_input_inherit(struct geneve_softc *,
450
struct mbuf **, int, struct geneve_pkt_info *);
451
static int geneve_next_option(struct geneve_socket *, struct genevehdr *,
452
struct mbuf **);
453
static void geneve_input_csum(struct mbuf *m, struct ifnet *ifp,
454
counter_u64_t rxcsum);
455
456
static void geneve_stats_alloc(struct geneve_softc *);
457
static void geneve_stats_free(struct geneve_softc *);
458
static void geneve_set_default_config(struct geneve_softc *);
459
static int geneve_set_reqcap(struct geneve_softc *, struct ifnet *, int,
460
int);
461
static void geneve_set_hwcaps(struct geneve_softc *);
462
static int geneve_clone_create(struct if_clone *, char *, size_t,
463
struct ifc_data *, struct ifnet **);
464
static int geneve_clone_destroy(struct if_clone *, struct ifnet *,
465
uint32_t);
466
static int geneve_clone_create_nl(struct if_clone *, char *, size_t,
467
struct ifc_data_nl *);
468
static int geneve_clone_modify_nl(struct ifnet *, struct ifc_data_nl *);
469
static void geneve_clone_dump_nl(struct ifnet *, struct nl_writer *);
470
471
static uint32_t geneve_mac_hash(struct geneve_softc *, const uint8_t *);
472
static int geneve_media_change(struct ifnet *);
473
static void geneve_media_status(struct ifnet *, struct ifmediareq *);
474
475
static int geneve_sockaddr_cmp(const union sockaddr_union *,
476
const struct sockaddr *);
477
static void geneve_sockaddr_copy(union sockaddr_union *,
478
const struct sockaddr *);
479
static int geneve_sockaddr_in_equal(const union sockaddr_union *,
480
const struct sockaddr *);
481
static void geneve_sockaddr_in_copy(union sockaddr_union *,
482
const struct sockaddr *);
483
static int geneve_sockaddr_supported(const union sockaddr_union *, int);
484
static int geneve_sockaddr_in_any(const union sockaddr_union *);
485
486
static int geneve_can_change_config(struct geneve_softc *);
487
static int geneve_check_proto(uint16_t);
488
static int geneve_check_multicast_addr(const union sockaddr_union *);
489
static int geneve_check_sockaddr(const union sockaddr_union *, const int);
490
491
static int geneve_prison_remove(void *, void *);
492
static void vnet_geneve_load(void);
493
static void vnet_geneve_unload(void);
494
static void geneve_module_init(void);
495
static void geneve_module_deinit(void);
496
static int geneve_modevent(module_t, int, void *);
497
498
499
static const char geneve_name[] = "geneve";
500
static MALLOC_DEFINE(M_GENEVE, geneve_name,
501
"Generic Network Virtualization Encapsulation Interface");
502
#define MTAG_GENEVE_LOOP 0x93d66dc0 /* geneve mtag */
503
504
VNET_DEFINE_STATIC(struct if_clone *, geneve_cloner);
505
#define V_geneve_cloner VNET(geneve_cloner)
506
507
static struct mtx geneve_list_mtx;
508
#define GENEVE_LIST_LOCK() mtx_lock(&geneve_list_mtx)
509
#define GENEVE_LIST_UNLOCK() mtx_unlock(&geneve_list_mtx)
510
511
static LIST_HEAD(, geneve_socket) geneve_socket_list = LIST_HEAD_INITIALIZER(geneve_socket_list);
512
513
/* Default maximum number of addresses in the forwarding table. */
514
#define GENEVE_FTABLE_MAX 2000
515
516
/* Timeout (in seconds) of addresses learned in the forwarding table. */
517
#define GENEVE_FTABLE_TIMEOUT (20 * 60)
518
519
/* Maximum timeout (in seconds) of addresses learned in the forwarding table. */
520
#define GENEVE_FTABLE_MAX_TIMEOUT (60 * 60 * 24)
521
522
/* Number of seconds between pruning attempts of the forwarding table. */
523
#define GENEVE_FTABLE_PRUNE (5 * 60)
524
525
static int geneve_ftable_prune_period = GENEVE_FTABLE_PRUNE;
526
527
#define _OUT(_field) offsetof(struct nl_parsed_geneve, _field)
528
static const struct nlattr_parser nla_p_geneve_create[] = {
529
{ .type = IFLA_GENEVE_PROTOCOL, .off = _OUT(ifla_proto), .cb = nlattr_get_uint16 },
530
};
531
#undef _OUT
532
NL_DECLARE_ATTR_PARSER(geneve_create_parser, nla_p_geneve_create);
533
534
#define _OUT(_field) offsetof(struct nl_parsed_geneve, _field)
535
static const struct nlattr_parser nla_p_geneve[] = {
536
{ .type = IFLA_GENEVE_ID, .off = _OUT(ifla_vni), .cb = nlattr_get_uint32 },
537
{ .type = IFLA_GENEVE_PROTOCOL, .off = _OUT(ifla_proto), .cb = nlattr_get_uint16 },
538
{ .type = IFLA_GENEVE_LOCAL, .off = _OUT(ifla_local), .cb = nlattr_get_ip },
539
{ .type = IFLA_GENEVE_REMOTE, .off = _OUT(ifla_remote), .cb = nlattr_get_ip },
540
{ .type = IFLA_GENEVE_LOCAL_PORT, .off = _OUT(ifla_local_port), .cb = nlattr_get_uint16 },
541
{ .type = IFLA_GENEVE_PORT, .off = _OUT(ifla_remote_port), .cb = nlattr_get_uint16 },
542
{ .type = IFLA_GENEVE_PORT_RANGE, .off = _OUT(ifla_port_range),
543
.arg = (void *)sizeof(struct ifla_geneve_port_range), .cb = nlattr_get_bytes },
544
{ .type = IFLA_GENEVE_DF, .off = _OUT(ifla_df), .cb = nlattr_get_uint8 },
545
{ .type = IFLA_GENEVE_TTL, .off = _OUT(ifla_ttl), .cb = nlattr_get_uint8 },
546
{ .type = IFLA_GENEVE_TTL_INHERIT, .off = _OUT(ifla_ttl_inherit), .cb = nlattr_get_bool },
547
{ .type = IFLA_GENEVE_DSCP_INHERIT, .off = _OUT(ifla_dscp_inherit), .cb = nlattr_get_bool },
548
{ .type = IFLA_GENEVE_COLLECT_METADATA, .off = _OUT(ifla_external), .cb = nlattr_get_bool },
549
{ .type = IFLA_GENEVE_FTABLE_LEARN, .off = _OUT(ifla_ftable_learn), .cb = nlattr_get_bool },
550
{ .type = IFLA_GENEVE_FTABLE_FLUSH, .off = _OUT(ifla_ftable_flush), .cb = nlattr_get_bool },
551
{ .type = IFLA_GENEVE_FTABLE_MAX, .off = _OUT(ifla_ftable_max), .cb = nlattr_get_uint32 },
552
{ .type = IFLA_GENEVE_FTABLE_TIMEOUT, .off = _OUT(ifla_ftable_timeout), .cb = nlattr_get_uint32 },
553
{ .type = IFLA_GENEVE_MC_IFNAME, .off = _OUT(ifla_mc_ifname), .cb = nlattr_get_string },
554
};
555
#undef _OUT
556
NL_DECLARE_ATTR_PARSER(geneve_modify_parser, nla_p_geneve);
557
558
static const struct nlhdr_parser *all_parsers[] = {
559
&geneve_create_parser, &geneve_modify_parser,
560
};
561
562
static int
563
geneve_ftable_addr_cmp(const uint8_t *a, const uint8_t *b)
564
{
565
int i, d;
566
567
for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++)
568
d = (int)a[i] - (int)b[i];
569
570
return (d);
571
}
572
573
static void
574
geneve_ftable_init(struct geneve_softc *sc)
575
{
576
int i;
577
578
sc->gnv_ftable = malloc(sizeof(struct geneve_ftable_head) *
579
GENEVE_SC_FTABLE_SIZE, M_GENEVE, M_ZERO | M_WAITOK);
580
581
for (i = 0; i < GENEVE_SC_FTABLE_SIZE; i++)
582
LIST_INIT(&sc->gnv_ftable[i]);
583
sc->gnv_ftable_hash_key = arc4random();
584
}
585
586
static void
587
geneve_ftable_fini(struct geneve_softc *sc)
588
{
589
int i;
590
591
for (i = 0; i < GENEVE_SC_FTABLE_SIZE; i++) {
592
KASSERT(LIST_EMPTY(&sc->gnv_ftable[i]),
593
("%s: geneve %p ftable[%d] not empty", __func__, sc, i));
594
}
595
MPASS(sc->gnv_ftable_cnt == 0);
596
597
free(sc->gnv_ftable, M_GENEVE);
598
sc->gnv_ftable = NULL;
599
}
600
601
static void
602
geneve_ftable_flush(struct geneve_softc *sc, int all)
603
{
604
struct gnv_ftable_entry *fe, *tfe;
605
606
for (int i = 0; i < GENEVE_SC_FTABLE_SIZE; i++) {
607
LIST_FOREACH_SAFE(fe, &sc->gnv_ftable[i], gnvfe_hash, tfe) {
608
if (all || GENEVE_FE_IS_DYNAMIC(fe))
609
geneve_ftable_entry_destroy(sc, fe);
610
}
611
}
612
}
613
614
static void
615
geneve_ftable_expire(struct geneve_softc *sc)
616
{
617
struct gnv_ftable_entry *fe, *tfe;
618
619
GENEVE_LOCK_WASSERT(sc);
620
621
for (int i = 0; i < GENEVE_SC_FTABLE_SIZE; i++) {
622
LIST_FOREACH_SAFE(fe, &sc->gnv_ftable[i], gnvfe_hash, tfe) {
623
if (GENEVE_FE_IS_DYNAMIC(fe) &&
624
time_uptime >= fe->gnvfe_expire)
625
geneve_ftable_entry_destroy(sc, fe);
626
}
627
}
628
}
629
630
static int
631
geneve_ftable_update_locked(struct geneve_softc *sc,
632
const union sockaddr_union *unsa, const uint8_t *mac,
633
struct rm_priotracker *tracker)
634
{
635
struct gnv_ftable_entry *fe;
636
int error;
637
638
GENEVE_LOCK_ASSERT(sc);
639
640
again:
641
/*
642
* A forwarding entry for this MAC address might already exist. If
643
* so, update it, otherwise create a new one. We may have to upgrade
644
* the lock if we have to change or create an entry.
645
*/
646
fe = geneve_ftable_entry_lookup(sc, mac);
647
if (fe != NULL) {
648
fe->gnvfe_expire = time_uptime + sc->gnv_ftable_timeout;
649
650
if (!GENEVE_FE_IS_DYNAMIC(fe) ||
651
geneve_sockaddr_in_equal(&fe->gnvfe_raddr, &unsa->sa))
652
return (0);
653
if (!GENEVE_LOCK_WOWNED(sc)) {
654
GENEVE_RUNLOCK(sc, tracker);
655
GENEVE_WLOCK(sc);
656
sc->gnv_stats.ftable_lock_upgrade_failed++;
657
goto again;
658
}
659
geneve_sockaddr_in_copy(&fe->gnvfe_raddr, &unsa->sa);
660
return (0);
661
}
662
663
if (!GENEVE_LOCK_WOWNED(sc)) {
664
GENEVE_RUNLOCK(sc, tracker);
665
GENEVE_WLOCK(sc);
666
sc->gnv_stats.ftable_lock_upgrade_failed++;
667
goto again;
668
}
669
670
if (sc->gnv_ftable_cnt >= sc->gnv_ftable_max) {
671
sc->gnv_stats.ftable_nospace++;
672
return (ENOSPC);
673
}
674
675
fe = geneve_ftable_entry_alloc();
676
if (fe == NULL)
677
return (ENOMEM);
678
679
geneve_ftable_entry_init(sc, fe, mac, &unsa->sa, GENEVE_FE_FLAG_DYNAMIC);
680
681
/* The prior lookup failed, so the insert should not. */
682
error = geneve_ftable_entry_insert(sc, fe);
683
MPASS(error == 0);
684
685
return (error);
686
}
687
688
static int
689
geneve_ftable_learn(struct geneve_softc *sc, const struct sockaddr *sa,
690
const uint8_t *mac)
691
{
692
struct rm_priotracker tracker;
693
union sockaddr_union unsa;
694
int error;
695
696
/*
697
* The source port may be randomly selected by the remote host, so
698
* use the port of the default destination address.
699
*/
700
geneve_sockaddr_copy(&unsa, sa);
701
unsa.sin.sin_port = sc->gnv_dst_addr.sin.sin_port;
702
703
#ifdef INET6
704
if (unsa.sa.sa_family == AF_INET6) {
705
error = sa6_embedscope(&unsa.sin6, V_ip6_use_defzone);
706
if (error)
707
return (error);
708
}
709
#endif
710
711
GENEVE_RLOCK(sc, &tracker);
712
error = geneve_ftable_update_locked(sc, &unsa, mac, &tracker);
713
GENEVE_UNLOCK(sc, &tracker);
714
715
return (error);
716
}
717
718
static struct gnv_ftable_entry *
719
geneve_ftable_entry_alloc(void)
720
{
721
struct gnv_ftable_entry *fe;
722
723
fe = malloc(sizeof(*fe), M_GENEVE, M_ZERO | M_NOWAIT);
724
725
return (fe);
726
}
727
728
static void
729
geneve_ftable_entry_free(struct gnv_ftable_entry *fe)
730
{
731
732
free(fe, M_GENEVE);
733
}
734
735
static void
736
geneve_ftable_entry_init(struct geneve_softc *sc, struct gnv_ftable_entry *fe,
737
const uint8_t *mac, const struct sockaddr *sa, uint32_t flags)
738
{
739
740
fe->gnvfe_flags = flags;
741
fe->gnvfe_expire = time_uptime + sc->gnv_ftable_timeout;
742
memcpy(fe->gnvfe_mac, mac, ETHER_ADDR_LEN);
743
geneve_sockaddr_copy(&fe->gnvfe_raddr, sa);
744
}
745
746
static void
747
geneve_ftable_entry_destroy(struct geneve_softc *sc,
748
struct gnv_ftable_entry *fe)
749
{
750
751
sc->gnv_ftable_cnt--;
752
LIST_REMOVE(fe, gnvfe_hash);
753
geneve_ftable_entry_free(fe);
754
}
755
756
static int
757
geneve_ftable_entry_insert(struct geneve_softc *sc,
758
struct gnv_ftable_entry *fe)
759
{
760
struct gnv_ftable_entry *lfe;
761
uint32_t hash;
762
int dir;
763
764
GENEVE_LOCK_WASSERT(sc);
765
hash = GENEVE_SC_FTABLE_HASH(sc, fe->gnvfe_mac);
766
767
lfe = LIST_FIRST(&sc->gnv_ftable[hash]);
768
if (lfe == NULL) {
769
LIST_INSERT_HEAD(&sc->gnv_ftable[hash], fe, gnvfe_hash);
770
goto out;
771
}
772
773
do {
774
dir = geneve_ftable_addr_cmp(fe->gnvfe_mac, lfe->gnvfe_mac);
775
if (dir == 0)
776
return (EEXIST);
777
if (dir > 0) {
778
LIST_INSERT_BEFORE(lfe, fe, gnvfe_hash);
779
goto out;
780
} else if (LIST_NEXT(lfe, gnvfe_hash) == NULL) {
781
LIST_INSERT_AFTER(lfe, fe, gnvfe_hash);
782
goto out;
783
} else
784
lfe = LIST_NEXT(lfe, gnvfe_hash);
785
} while (lfe != NULL);
786
787
out:
788
sc->gnv_ftable_cnt++;
789
790
return (0);
791
}
792
793
static struct gnv_ftable_entry *
794
geneve_ftable_entry_lookup(struct geneve_softc *sc, const uint8_t *mac)
795
{
796
struct gnv_ftable_entry *fe;
797
uint32_t hash;
798
int dir;
799
800
GENEVE_LOCK_ASSERT(sc);
801
802
hash = GENEVE_SC_FTABLE_HASH(sc, mac);
803
LIST_FOREACH(fe, &sc->gnv_ftable[hash], gnvfe_hash) {
804
dir = geneve_ftable_addr_cmp(mac, fe->gnvfe_mac);
805
if (dir == 0)
806
return (fe);
807
if (dir > 0)
808
break;
809
}
810
811
return (NULL);
812
}
813
814
static struct geneve_socket *
815
geneve_socket_alloc(union sockaddr_union *laddr)
816
{
817
struct geneve_socket *gnvso;
818
819
gnvso = malloc(sizeof(*gnvso), M_GENEVE, M_WAITOK | M_ZERO);
820
rm_init(&gnvso->gnvso_lock, "genevesorm");
821
refcount_init(&gnvso->gnvso_refcnt, 0);
822
for (int i = 0; i < GENEVE_SO_VNI_HASH_SIZE; i++)
823
LIST_INIT(&gnvso->gnvso_vni_hash[i]);
824
gnvso->gnvso_laddr = *laddr;
825
826
return (gnvso);
827
}
828
829
static void
830
geneve_socket_destroy(struct geneve_socket *gnvso)
831
{
832
struct socket *so;
833
834
so = gnvso->gnvso_sock;
835
if (so != NULL) {
836
gnvso->gnvso_sock = NULL;
837
soclose(so);
838
}
839
840
rm_destroy(&gnvso->gnvso_lock);
841
free(gnvso, M_GENEVE);
842
}
843
844
static void
845
geneve_socket_release(struct geneve_socket *gnvso)
846
{
847
int destroy;
848
849
GENEVE_LIST_LOCK();
850
destroy = GENEVE_SO_RELEASE(gnvso);
851
if (destroy != 0)
852
LIST_REMOVE(gnvso, gnvso_entry);
853
GENEVE_LIST_UNLOCK();
854
855
if (destroy != 0)
856
geneve_socket_destroy(gnvso);
857
}
858
859
static struct geneve_socket *
860
geneve_socket_lookup(union sockaddr_union *unsa)
861
{
862
struct geneve_socket *gnvso;
863
864
GENEVE_LIST_LOCK();
865
LIST_FOREACH(gnvso, &geneve_socket_list, gnvso_entry) {
866
if (geneve_sockaddr_cmp(&gnvso->gnvso_laddr, &unsa->sa) == 0) {
867
GENEVE_SO_ACQUIRE(gnvso);
868
break;
869
}
870
}
871
GENEVE_LIST_UNLOCK();
872
873
return (gnvso);
874
}
875
876
static void
877
geneve_socket_insert(struct geneve_socket *gnvso)
878
{
879
880
GENEVE_LIST_LOCK();
881
GENEVE_SO_ACQUIRE(gnvso);
882
LIST_INSERT_HEAD(&geneve_socket_list, gnvso, gnvso_entry);
883
GENEVE_LIST_UNLOCK();
884
}
885
886
static int
887
geneve_socket_init(struct geneve_socket *gnvso, struct ifnet *ifp)
888
{
889
struct thread *td;
890
int error;
891
892
td = curthread;
893
error = socreate(gnvso->gnvso_laddr.sa.sa_family, &gnvso->gnvso_sock,
894
SOCK_DGRAM, IPPROTO_UDP, td->td_ucred, td);
895
if (error) {
896
if_printf(ifp, "cannot create socket: %d\n", error);
897
return (error);
898
}
899
900
/*
901
* XXX: If Geneve traffic is shared with other UDP listeners on
902
* the same IP address, tunnel endpoints SHOULD implement a mechanism
903
* to ensure ICMP return traffic arising from network errors is
904
* directed to the correct listener. Unfortunately,
905
* udp_set_kernel_tunneling does not handle icmp errors from transit
906
* devices other than specified source.
907
*/
908
error = udp_set_kernel_tunneling(gnvso->gnvso_sock,
909
geneve_udp_input, NULL, gnvso);
910
if (error)
911
if_printf(ifp, "cannot set tunneling function: %d\n", error);
912
913
return (error);
914
}
915
916
static int
917
geneve_socket_bind(struct geneve_socket *gnvso, struct ifnet *ifp)
918
{
919
union sockaddr_union laddr;
920
int error;
921
922
laddr = gnvso->gnvso_laddr;
923
error = sobind(gnvso->gnvso_sock, &laddr.sa, curthread);
924
if (error)
925
return (error);
926
927
return (0);
928
}
929
930
static int
931
geneve_socket_create(struct ifnet *ifp, int multicast,
932
const union sockaddr_union *unsa, struct geneve_socket **xgnvso)
933
{
934
union sockaddr_union laddr;
935
struct geneve_socket *gnvso;
936
int error;
937
938
laddr = *unsa;
939
940
/*
941
* If this socket will be multicast, then only the local port
942
* must be specified when binding.
943
*/
944
if (multicast != 0) {
945
switch (laddr.sa.sa_family) {
946
#ifdef INET
947
case AF_INET:
948
laddr.sin.sin_addr.s_addr = INADDR_ANY;
949
break;
950
#endif
951
#ifdef INET6
952
case AF_INET6:
953
laddr.sin6.sin6_addr = in6addr_any;
954
break;
955
#endif
956
default:
957
return (EAFNOSUPPORT);
958
}
959
}
960
gnvso = geneve_socket_alloc(&laddr);
961
if (gnvso == NULL)
962
return (ENOMEM);
963
964
error = geneve_socket_init(gnvso, ifp);
965
if (error)
966
goto fail;
967
968
error = geneve_socket_bind(gnvso, ifp);
969
if (error)
970
goto fail;
971
972
/*
973
* There is a small window between the bind completing and
974
* inserting the socket, so that a concurrent create may fail.
975
* Let's not worry about that for now.
976
*/
977
if_printf(ifp, "new geneve socket inserted to socket list\n");
978
geneve_socket_insert(gnvso);
979
*xgnvso = gnvso;
980
981
return (0);
982
983
fail:
984
if_printf(ifp, "can't create new socket (error: %d)\n", error);
985
geneve_socket_destroy(gnvso);
986
987
return (error);
988
}
989
990
static struct geneve_socket *
991
geneve_socket_mc_lookup(const union sockaddr_union *unsa)
992
{
993
union sockaddr_union laddr;
994
995
laddr = *unsa;
996
997
switch (laddr.sa.sa_family) {
998
#ifdef INET
999
case AF_INET:
1000
laddr.sin.sin_addr.s_addr = INADDR_ANY;
1001
break;
1002
#endif
1003
#ifdef INET6
1004
case AF_INET6:
1005
laddr.sin6.sin6_addr = in6addr_any;
1006
break;
1007
#endif
1008
default:
1009
return (NULL);
1010
}
1011
1012
return (geneve_socket_lookup(&laddr));
1013
}
1014
1015
static int
1016
geneve_sockaddr_mc_info_match(const struct geneve_socket_mc_info *mc,
1017
const union sockaddr_union *group, const union sockaddr_union *local,
1018
int ifidx)
1019
{
1020
1021
if (!geneve_sockaddr_in_any(local) &&
1022
!geneve_sockaddr_in_equal(&mc->gnvsomc_saddr, &local->sa))
1023
return (0);
1024
if (!geneve_sockaddr_in_equal(&mc->gnvsomc_gaddr, &group->sa))
1025
return (0);
1026
if (ifidx != 0 && ifidx != mc->gnvsomc_ifidx)
1027
return (0);
1028
1029
return (1);
1030
}
1031
1032
static int
1033
geneve_socket_mc_join_group(struct geneve_socket *gnvso,
1034
const union sockaddr_union *group, const union sockaddr_union *local,
1035
int *ifidx, union sockaddr_union *source)
1036
{
1037
struct sockopt sopt;
1038
int error;
1039
1040
*source = *local;
1041
1042
if (group->sa.sa_family == AF_INET) {
1043
struct ip_mreq mreq;
1044
1045
mreq.imr_multiaddr = group->sin.sin_addr;
1046
mreq.imr_interface = local->sin.sin_addr;
1047
1048
memset(&sopt, 0, sizeof(sopt));
1049
sopt.sopt_dir = SOPT_SET;
1050
sopt.sopt_level = IPPROTO_IP;
1051
sopt.sopt_name = IP_ADD_MEMBERSHIP;
1052
sopt.sopt_val = &mreq;
1053
sopt.sopt_valsize = sizeof(mreq);
1054
error = sosetopt(gnvso->gnvso_sock, &sopt);
1055
if (error)
1056
return (error);
1057
1058
/*
1059
* BMV: Ideally, there would be a formal way for us to get
1060
* the local interface that was selected based on the
1061
* imr_interface address. We could then update *ifidx so
1062
* geneve_sockaddr_mc_info_match() would return a match for
1063
* later creates that explicitly set the multicast interface.
1064
*
1065
* If we really need to, we can of course look in the INP's
1066
* membership list:
1067
* sotoinpcb(gnvso->gnvso_sock)->inp_moptions->
1068
* imo_head[]->imf_inm->inm_ifp
1069
* similarly to imo_match_group().
1070
*/
1071
source->sin.sin_addr = local->sin.sin_addr;
1072
1073
} else if (group->sa.sa_family == AF_INET6) {
1074
struct ipv6_mreq mreq;
1075
1076
mreq.ipv6mr_multiaddr = group->sin6.sin6_addr;
1077
mreq.ipv6mr_interface = *ifidx;
1078
1079
memset(&sopt, 0, sizeof(sopt));
1080
sopt.sopt_dir = SOPT_SET;
1081
sopt.sopt_level = IPPROTO_IPV6;
1082
sopt.sopt_name = IPV6_JOIN_GROUP;
1083
sopt.sopt_val = &mreq;
1084
sopt.sopt_valsize = sizeof(mreq);
1085
error = sosetopt(gnvso->gnvso_sock, &sopt);
1086
1087
/*
1088
* BMV: As with IPv4, we would really like to know what
1089
* interface in6p_lookup_mcast_ifp() selected.
1090
*/
1091
} else
1092
error = EAFNOSUPPORT;
1093
1094
return (error);
1095
}
1096
1097
static int
1098
geneve_socket_mc_leave_group(struct geneve_socket *gnvso,
1099
const union sockaddr_union *group, const union sockaddr_union *source,
1100
int ifidx)
1101
{
1102
struct sockopt sopt;
1103
int error;
1104
1105
memset(&sopt, 0, sizeof(sopt));
1106
sopt.sopt_dir = SOPT_SET;
1107
1108
if (group->sa.sa_family == AF_INET) {
1109
struct ip_mreq mreq;
1110
1111
mreq.imr_multiaddr = group->sin.sin_addr;
1112
mreq.imr_interface = source->sin.sin_addr;
1113
1114
sopt.sopt_level = IPPROTO_IP;
1115
sopt.sopt_name = IP_DROP_MEMBERSHIP;
1116
sopt.sopt_val = &mreq;
1117
sopt.sopt_valsize = sizeof(mreq);
1118
error = sosetopt(gnvso->gnvso_sock, &sopt);
1119
} else if (group->sa.sa_family == AF_INET6) {
1120
struct ipv6_mreq mreq;
1121
1122
mreq.ipv6mr_multiaddr = group->sin6.sin6_addr;
1123
mreq.ipv6mr_interface = ifidx;
1124
1125
sopt.sopt_level = IPPROTO_IPV6;
1126
sopt.sopt_name = IPV6_LEAVE_GROUP;
1127
sopt.sopt_val = &mreq;
1128
sopt.sopt_valsize = sizeof(mreq);
1129
error = sosetopt(gnvso->gnvso_sock, &sopt);
1130
} else
1131
error = EAFNOSUPPORT;
1132
1133
return (error);
1134
}
1135
1136
static int
1137
geneve_socket_mc_add_group(struct geneve_socket *gnvso,
1138
const union sockaddr_union *group, const union sockaddr_union *local,
1139
int ifidx, int *idx)
1140
{
1141
union sockaddr_union source;
1142
struct geneve_socket_mc_info *mc;
1143
int i, empty, error;
1144
1145
/*
1146
* Within a socket, the same multicast group may be used by multiple
1147
* interfaces, each with a different network identifier. But a socket
1148
* may only join a multicast group once, so keep track of the users
1149
* here.
1150
*/
1151
1152
GENEVE_SO_WLOCK(gnvso);
1153
for (empty = 0, i = 0; i < GENEVE_SO_MC_MAX_GROUPS; i++) {
1154
mc = &gnvso->gnvso_mc[i];
1155
1156
if (mc->gnvsomc_gaddr.sa.sa_family == AF_UNSPEC) {
1157
empty++;
1158
continue;
1159
}
1160
if (geneve_sockaddr_mc_info_match(mc, group, local, ifidx))
1161
goto out;
1162
}
1163
GENEVE_SO_WUNLOCK(gnvso);
1164
1165
if (empty == 0)
1166
return (ENOSPC);
1167
1168
error = geneve_socket_mc_join_group(gnvso, group, local, &ifidx, &source);
1169
if (error)
1170
return (error);
1171
1172
GENEVE_SO_WLOCK(gnvso);
1173
for (i = 0; i < GENEVE_SO_MC_MAX_GROUPS; i++) {
1174
mc = &gnvso->gnvso_mc[i];
1175
1176
if (mc->gnvsomc_gaddr.sa.sa_family == AF_UNSPEC) {
1177
geneve_sockaddr_copy(&mc->gnvsomc_gaddr, &group->sa);
1178
geneve_sockaddr_copy(&mc->gnvsomc_saddr, &source.sa);
1179
mc->gnvsomc_ifidx = ifidx;
1180
goto out;
1181
}
1182
}
1183
GENEVE_SO_WUNLOCK(gnvso);
1184
1185
error = geneve_socket_mc_leave_group(gnvso, group, &source, ifidx);
1186
MPASS(error == 0);
1187
1188
return (ENOSPC);
1189
1190
out:
1191
mc->gnvsomc_users++;
1192
GENEVE_SO_WUNLOCK(gnvso);
1193
*idx = i;
1194
1195
return (0);
1196
}
1197
1198
static void
1199
geneve_socket_mc_release_group(struct geneve_socket *vso, int idx)
1200
{
1201
union sockaddr_union group, source;
1202
struct geneve_socket_mc_info *mc;
1203
int ifidx, leave;
1204
1205
KASSERT(idx >= 0 && idx < GENEVE_SO_MC_MAX_GROUPS,
1206
("%s: vso %p idx %d out of bounds", __func__, vso, idx));
1207
1208
leave = 0;
1209
mc = &vso->gnvso_mc[idx];
1210
1211
GENEVE_SO_WLOCK(vso);
1212
mc->gnvsomc_users--;
1213
if (mc->gnvsomc_users == 0) {
1214
group = mc->gnvsomc_gaddr;
1215
source = mc->gnvsomc_saddr;
1216
ifidx = mc->gnvsomc_ifidx;
1217
memset(mc, 0, sizeof(*mc));
1218
leave = 1;
1219
}
1220
GENEVE_SO_WUNLOCK(vso);
1221
1222
if (leave != 0) {
1223
/*
1224
* Our socket's membership in this group may have already
1225
* been removed if we joined through an interface that's
1226
* been detached.
1227
*/
1228
geneve_socket_mc_leave_group(vso, &group, &source, ifidx);
1229
}
1230
}
1231
1232
static struct geneve_softc *
1233
geneve_socket_lookup_softc_locked(struct geneve_socket *gnvso, uint32_t vni)
1234
{
1235
struct geneve_softc *sc;
1236
uint32_t hash;
1237
1238
GENEVE_SO_LOCK_ASSERT(gnvso);
1239
hash = GENEVE_SO_VNI_HASH(vni);
1240
1241
LIST_FOREACH(sc, &gnvso->gnvso_vni_hash[hash], gnv_entry) {
1242
if (sc->gnv_vni == vni) {
1243
GENEVE_ACQUIRE(sc);
1244
break;
1245
}
1246
}
1247
1248
return (sc);
1249
}
1250
1251
static struct geneve_softc *
1252
geneve_socket_lookup_softc(struct geneve_socket *gnvso, uint32_t vni)
1253
{
1254
struct rm_priotracker tracker;
1255
struct geneve_softc *sc;
1256
1257
GENEVE_SO_RLOCK(gnvso, &tracker);
1258
sc = geneve_socket_lookup_softc_locked(gnvso, vni);
1259
GENEVE_SO_RUNLOCK(gnvso, &tracker);
1260
1261
return (sc);
1262
}
1263
1264
static int
1265
geneve_socket_insert_softc(struct geneve_socket *gnvso, struct geneve_softc *sc)
1266
{
1267
struct geneve_softc *tsc;
1268
uint32_t vni, hash;
1269
1270
vni = sc->gnv_vni;
1271
hash = GENEVE_SO_VNI_HASH(vni);
1272
1273
GENEVE_SO_WLOCK(gnvso);
1274
tsc = geneve_socket_lookup_softc_locked(gnvso, vni);
1275
if (tsc != NULL) {
1276
GENEVE_SO_WUNLOCK(gnvso);
1277
geneve_release(tsc);
1278
return (EEXIST);
1279
}
1280
1281
GENEVE_ACQUIRE(sc);
1282
LIST_INSERT_HEAD(&gnvso->gnvso_vni_hash[hash], sc, gnv_entry);
1283
GENEVE_SO_WUNLOCK(gnvso);
1284
1285
return (0);
1286
}
1287
1288
static void
1289
geneve_socket_remove_softc(struct geneve_socket *gnvso, struct geneve_softc *sc)
1290
{
1291
1292
GENEVE_SO_WLOCK(gnvso);
1293
LIST_REMOVE(sc, gnv_entry);
1294
GENEVE_SO_WUNLOCK(gnvso);
1295
1296
geneve_release(sc);
1297
}
1298
1299
static struct ifnet *
1300
geneve_multicast_if_ref(struct geneve_softc *sc, uint32_t af)
1301
{
1302
struct ifnet *ifp;
1303
1304
GENEVE_LOCK_ASSERT(sc);
1305
1306
ifp = NULL;
1307
if (af == AF_INET && sc->gnv_im4o != NULL)
1308
ifp = sc->gnv_im4o->imo_multicast_ifp;
1309
else if (af == AF_INET6 && sc->gnv_im6o != NULL)
1310
ifp = sc->gnv_im6o->im6o_multicast_ifp;
1311
1312
if (ifp != NULL)
1313
if_ref(ifp);
1314
1315
return (ifp);
1316
}
1317
1318
static void
1319
geneve_free_multicast(struct geneve_softc *sc)
1320
{
1321
1322
if (sc->gnv_mc_ifp != NULL) {
1323
if_rele(sc->gnv_mc_ifp);
1324
sc->gnv_mc_ifp = NULL;
1325
sc->gnv_mc_ifindex = 0;
1326
}
1327
1328
if (sc->gnv_im4o != NULL) {
1329
free(sc->gnv_im4o, M_GENEVE);
1330
sc->gnv_im4o = NULL;
1331
}
1332
1333
if (sc->gnv_im6o != NULL) {
1334
free(sc->gnv_im6o, M_GENEVE);
1335
sc->gnv_im6o = NULL;
1336
}
1337
}
1338
1339
static int
1340
geneve_setup_multicast_interface(struct geneve_softc *sc)
1341
{
1342
struct ifnet *ifp;
1343
1344
ifp = ifunit_ref(sc->gnv_mc_ifname);
1345
if (ifp == NULL) {
1346
if_printf(sc->gnv_ifp, "multicast interface %s does not exist\n",
1347
sc->gnv_mc_ifname);
1348
return (ENOENT);
1349
}
1350
1351
if ((ifp->if_flags & IFF_MULTICAST) == 0) {
1352
if_printf(sc->gnv_ifp, "interface %s does not support multicast\n",
1353
sc->gnv_mc_ifname);
1354
if_rele(ifp);
1355
return (ENOTSUP);
1356
}
1357
1358
sc->gnv_mc_ifp = ifp;
1359
sc->gnv_mc_ifindex = ifp->if_index;
1360
1361
return (0);
1362
}
1363
1364
static int
1365
geneve_setup_multicast(struct geneve_softc *sc)
1366
{
1367
const union sockaddr_union *group;
1368
int error;
1369
1370
group = &sc->gnv_dst_addr;
1371
error = 0;
1372
1373
if (sc->gnv_mc_ifname[0] != '\0') {
1374
error = geneve_setup_multicast_interface(sc);
1375
if (error)
1376
return (error);
1377
}
1378
1379
/*
1380
* Initialize an multicast options structure that is sufficiently
1381
* populated for use in the respective IP output routine. This
1382
* structure is typically stored in the socket, but our sockets
1383
* may be shared among multiple interfaces.
1384
*/
1385
if (group->sa.sa_family == AF_INET) {
1386
sc->gnv_im4o = malloc(sizeof(struct ip_moptions), M_GENEVE,
1387
M_ZERO | M_WAITOK);
1388
sc->gnv_im4o->imo_multicast_ifp = sc->gnv_mc_ifp;
1389
sc->gnv_im4o->imo_multicast_ttl = sc->gnv_ttl;
1390
sc->gnv_im4o->imo_multicast_vif = -1;
1391
} else if (group->sa.sa_family == AF_INET6) {
1392
sc->gnv_im6o = malloc(sizeof(struct ip6_moptions), M_GENEVE,
1393
M_ZERO | M_WAITOK);
1394
sc->gnv_im6o->im6o_multicast_ifp = sc->gnv_mc_ifp;
1395
sc->gnv_im6o->im6o_multicast_hlim = sc->gnv_ttl;
1396
}
1397
1398
return (error);
1399
}
1400
1401
static int
1402
geneve_setup_socket(struct geneve_softc *sc)
1403
{
1404
struct geneve_socket *gnvso;
1405
struct ifnet *ifp;
1406
union sockaddr_union *saddr, *daddr;
1407
int multicast, error;
1408
1409
gnvso = NULL;
1410
ifp = sc->gnv_ifp;
1411
saddr = &sc->gnv_src_addr;
1412
daddr = &sc->gnv_dst_addr;
1413
multicast = geneve_check_multicast_addr(daddr);
1414
MPASS(multicast != EINVAL);
1415
sc->gnv_so_mc_index = -1;
1416
1417
/* Try to create the socket. If that fails, attempt to use an existing one. */
1418
error = geneve_socket_create(ifp, multicast, saddr, &gnvso);
1419
if (error) {
1420
if (multicast != 0)
1421
gnvso = geneve_socket_mc_lookup(saddr);
1422
else
1423
gnvso = geneve_socket_lookup(saddr);
1424
1425
if (gnvso == NULL) {
1426
if_printf(ifp, "can't find existing socket\n");
1427
goto out;
1428
}
1429
}
1430
1431
if (sc->gnv_df == IFLA_GENEVE_DF_SET) {
1432
error = geneve_socket_set_df(gnvso, true);
1433
if (error)
1434
goto out;
1435
}
1436
1437
if (multicast != 0) {
1438
error = geneve_setup_multicast(sc);
1439
if (error)
1440
goto out;
1441
1442
error = geneve_socket_mc_add_group(gnvso, daddr, saddr,
1443
sc->gnv_mc_ifindex, &sc->gnv_so_mc_index);
1444
if (error)
1445
goto out;
1446
}
1447
1448
sc->gnv_sock = gnvso;
1449
error = geneve_socket_insert_softc(gnvso, sc);
1450
if (error) {
1451
sc->gnv_sock = NULL;
1452
if_printf(ifp, "network identifier %d already exists\n", sc->gnv_vni);
1453
goto out;
1454
}
1455
1456
return (0);
1457
1458
out:
1459
if (gnvso != NULL) {
1460
if (sc->gnv_so_mc_index != -1) {
1461
geneve_socket_mc_release_group(gnvso, sc->gnv_so_mc_index);
1462
sc->gnv_so_mc_index = -1;
1463
}
1464
if (multicast != 0)
1465
geneve_free_multicast(sc);
1466
geneve_socket_release(gnvso);
1467
}
1468
1469
return (error);
1470
}
1471
1472
static void
1473
geneve_setup_interface_hdrlen(struct geneve_softc *sc)
1474
{
1475
struct ifnet *ifp;
1476
1477
GENEVE_LOCK_WASSERT(sc);
1478
1479
ifp = sc->gnv_ifp;
1480
ifp->if_hdrlen = ETHER_HDR_LEN + sizeof(struct geneveudphdr);
1481
if (sc->gnv_proto == GENEVE_PROTO_ETHER)
1482
ifp->if_hdrlen += ETHER_HDR_LEN;
1483
1484
if (sc->gnv_dst_addr.sa.sa_family == AF_INET)
1485
ifp->if_hdrlen += sizeof(struct ip);
1486
else
1487
ifp->if_hdrlen += sizeof(struct ip6_hdr);
1488
1489
if ((sc->gnv_flags & GENEVE_FLAG_USER_MTU) == 0)
1490
ifp->if_mtu = ETHERMTU - ifp->if_hdrlen;
1491
}
1492
1493
static int
1494
geneve_socket_set_df(struct geneve_socket *gnvso, bool df)
1495
{
1496
struct sockopt sopt;
1497
int optval;
1498
1499
memset(&sopt, 0, sizeof(sopt));
1500
sopt.sopt_dir = SOPT_SET;
1501
1502
switch (gnvso->gnvso_laddr.sa.sa_family) {
1503
case AF_INET:
1504
sopt.sopt_level = IPPROTO_IP;
1505
sopt.sopt_name = IP_DONTFRAG;
1506
break;
1507
1508
case AF_INET6:
1509
sopt.sopt_level = IPPROTO_IPV6;
1510
sopt.sopt_name = IPV6_DONTFRAG;
1511
break;
1512
1513
default:
1514
return (EAFNOSUPPORT);
1515
}
1516
1517
optval = df ? 1 : 0;
1518
sopt.sopt_val = &optval;
1519
sopt.sopt_valsize = sizeof(optval);
1520
1521
return (sosetopt(gnvso->gnvso_sock, &sopt));
1522
}
1523
1524
static int
1525
geneve_valid_init_config(struct geneve_softc *sc)
1526
{
1527
const char *reason;
1528
1529
if (sc->gnv_vni >= GENEVE_VNI_MAX) {
1530
if_printf(sc->gnv_ifp, "%u", sc->gnv_vni);
1531
reason = "invalid virtual network identifier specified";
1532
goto fail;
1533
}
1534
1535
if (geneve_sockaddr_supported(&sc->gnv_src_addr, 1) == 0) {
1536
reason = "source address type is not supported";
1537
goto fail;
1538
}
1539
1540
if (geneve_sockaddr_supported(&sc->gnv_dst_addr, 0) == 0) {
1541
reason = "destination address type is not supported";
1542
goto fail;
1543
}
1544
1545
if (geneve_sockaddr_in_any(&sc->gnv_dst_addr) != 0) {
1546
reason = "no valid destination address specified";
1547
goto fail;
1548
}
1549
1550
if (geneve_check_multicast_addr(&sc->gnv_dst_addr) == 0 &&
1551
sc->gnv_mc_ifname[0] != '\0') {
1552
reason = "can only specify interface with a group address";
1553
goto fail;
1554
}
1555
1556
if (geneve_sockaddr_in_any(&sc->gnv_src_addr) == 0) {
1557
if (&sc->gnv_src_addr.sa.sa_family ==
1558
&sc->gnv_dst_addr.sa.sa_family) {
1559
reason = "source and destination address must both be either IPv4 or IPv6";
1560
goto fail;
1561
}
1562
}
1563
1564
if (sc->gnv_src_addr.sin.sin_port == 0) {
1565
reason = "local port not specified";
1566
goto fail;
1567
}
1568
1569
if (sc->gnv_dst_addr.sin.sin_port == 0) {
1570
reason = "remote port not specified";
1571
goto fail;
1572
}
1573
1574
return (0);
1575
1576
fail:
1577
if_printf(sc->gnv_ifp, "cannot initialize interface: %s\n", reason);
1578
return (EINVAL);
1579
}
1580
1581
static void
1582
geneve_init_complete(struct geneve_softc *sc)
1583
{
1584
1585
GENEVE_WLOCK(sc);
1586
sc->gnv_flags |= GENEVE_FLAG_RUNNING;
1587
sc->gnv_flags &= ~GENEVE_FLAG_INIT;
1588
wakeup(sc);
1589
GENEVE_WUNLOCK(sc);
1590
}
1591
1592
static void
1593
geneve_init(void *xsc)
1594
{
1595
static const uint8_t empty_mac[ETHER_ADDR_LEN];
1596
struct geneve_softc *sc;
1597
struct ifnet *ifp;
1598
1599
sc = xsc;
1600
sx_xlock(&geneve_sx);
1601
GENEVE_WLOCK(sc);
1602
ifp = sc->gnv_ifp;
1603
if (sc->gnv_flags & GENEVE_FLAG_RUNNING) {
1604
GENEVE_WUNLOCK(sc);
1605
sx_xunlock(&geneve_sx);
1606
return;
1607
}
1608
sc->gnv_flags |= GENEVE_FLAG_INIT;
1609
GENEVE_WUNLOCK(sc);
1610
1611
if (geneve_valid_init_config(sc) != 0)
1612
goto out;
1613
1614
if (geneve_setup_socket(sc) != 0)
1615
goto out;
1616
1617
/* Initialize the default forwarding entry. */
1618
if (sc->gnv_proto == GENEVE_PROTO_ETHER) {
1619
geneve_ftable_entry_init(sc, &sc->gnv_default_fe, empty_mac,
1620
&sc->gnv_dst_addr.sa, GENEVE_FE_FLAG_STATIC);
1621
1622
GENEVE_WLOCK(sc);
1623
callout_reset(&sc->gnv_callout, geneve_ftable_prune_period * hz,
1624
geneve_timer, sc);
1625
GENEVE_WUNLOCK(sc);
1626
}
1627
ifp->if_drv_flags |= IFF_DRV_RUNNING;
1628
if_link_state_change(ifp, LINK_STATE_UP);
1629
1630
out:
1631
geneve_init_complete(sc);
1632
sx_xunlock(&geneve_sx);
1633
}
1634
1635
static void
1636
geneve_release(struct geneve_softc *sc)
1637
{
1638
1639
/*
1640
* The softc may be destroyed as soon as we release our reference,
1641
* so we cannot serialize the wakeup with the softc lock. We use a
1642
* timeout in our sleeps so a missed wakeup is unfortunate but not fatal.
1643
*/
1644
if (GENEVE_RELEASE(sc) != 0)
1645
wakeup(sc);
1646
}
1647
1648
static void
1649
geneve_teardown_wait(struct geneve_softc *sc)
1650
{
1651
1652
GENEVE_LOCK_WASSERT(sc);
1653
while (sc->gnv_flags & GENEVE_FLAG_TEARDOWN)
1654
rm_sleep(sc, &sc->gnv_lock, 0, "gnvtrn", hz);
1655
}
1656
1657
static void
1658
geneve_teardown_locked(struct geneve_softc *sc)
1659
{
1660
struct ifnet *ifp;
1661
struct geneve_socket *gnvso;
1662
1663
sx_assert(&geneve_sx, SA_XLOCKED);
1664
GENEVE_LOCK_WASSERT(sc);
1665
MPASS(sc->gnv_flags & GENEVE_FLAG_TEARDOWN);
1666
1667
ifp = sc->gnv_ifp;
1668
ifp->if_flags &= ~IFF_UP;
1669
sc->gnv_flags &= ~GENEVE_FLAG_RUNNING;
1670
1671
if (sc->gnv_proto == GENEVE_PROTO_ETHER)
1672
callout_stop(&sc->gnv_callout);
1673
gnvso = sc->gnv_sock;
1674
sc->gnv_sock = NULL;
1675
1676
GENEVE_WUNLOCK(sc);
1677
if_link_state_change(ifp, LINK_STATE_DOWN);
1678
1679
if (gnvso != NULL) {
1680
geneve_socket_remove_softc(gnvso, sc);
1681
1682
if (sc->gnv_so_mc_index != -1) {
1683
geneve_socket_mc_release_group(gnvso, sc->gnv_so_mc_index);
1684
sc->gnv_so_mc_index = -1;
1685
}
1686
}
1687
1688
GENEVE_WLOCK(sc);
1689
while (sc->gnv_refcnt != 0)
1690
rm_sleep(sc, &sc->gnv_lock, 0, "gnvdrn", hz);
1691
GENEVE_WUNLOCK(sc);
1692
1693
if (sc->gnv_proto == GENEVE_PROTO_ETHER)
1694
callout_drain(&sc->gnv_callout);
1695
1696
geneve_free_multicast(sc);
1697
if (gnvso != NULL)
1698
geneve_socket_release(gnvso);
1699
1700
GENEVE_WLOCK(sc);
1701
sc->gnv_flags &= ~GENEVE_FLAG_TEARDOWN;
1702
wakeup(sc);
1703
GENEVE_WUNLOCK(sc);
1704
}
1705
1706
static void
1707
geneve_teardown(struct geneve_softc *sc)
1708
{
1709
1710
sx_xlock(&geneve_sx);
1711
GENEVE_WLOCK(sc);
1712
if (sc->gnv_flags & GENEVE_FLAG_TEARDOWN) {
1713
geneve_teardown_wait(sc);
1714
GENEVE_WUNLOCK(sc);
1715
sx_xunlock(&geneve_sx);
1716
return;
1717
}
1718
1719
sc->gnv_flags |= GENEVE_FLAG_TEARDOWN;
1720
geneve_teardown_locked(sc);
1721
sx_xunlock(&geneve_sx);
1722
}
1723
1724
static void
1725
geneve_timer(void *xsc)
1726
{
1727
struct geneve_softc *sc;
1728
1729
sc = xsc;
1730
GENEVE_LOCK_WASSERT(sc);
1731
1732
geneve_ftable_expire(sc);
1733
callout_schedule(&sc->gnv_callout, geneve_ftable_prune_period * hz);
1734
}
1735
1736
static int
1737
geneve_ioctl_ifflags(struct geneve_softc *sc)
1738
{
1739
struct ifnet *ifp;
1740
1741
ifp = sc->gnv_ifp;
1742
1743
if ((ifp->if_flags & IFF_UP) != 0) {
1744
if ((sc->gnv_flags & GENEVE_FLAG_RUNNING) == 0)
1745
geneve_init(sc);
1746
} else {
1747
if (sc->gnv_flags & GENEVE_FLAG_RUNNING)
1748
geneve_teardown(sc);
1749
}
1750
1751
return (0);
1752
}
1753
1754
static int
1755
geneve_flush_ftable(struct geneve_softc *sc, bool flush)
1756
{
1757
1758
GENEVE_WLOCK(sc);
1759
geneve_ftable_flush(sc, flush);
1760
GENEVE_WUNLOCK(sc);
1761
1762
return (0);
1763
}
1764
1765
static uint16_t
1766
geneve_get_local_port(struct geneve_softc *sc)
1767
{
1768
uint16_t port = 0;
1769
1770
GENEVE_LOCK_ASSERT(sc);
1771
1772
switch (sc->gnv_src_addr.sa.sa_family) {
1773
case AF_INET:
1774
port = ntohs(sc->gnv_src_addr.sin.sin_port);
1775
break;
1776
case AF_INET6:
1777
port = ntohs(sc->gnv_src_addr.sin6.sin6_port);
1778
break;
1779
}
1780
1781
return (port);
1782
}
1783
1784
static uint16_t
1785
geneve_get_remote_port(struct geneve_softc *sc)
1786
{
1787
uint16_t port = 0;
1788
1789
GENEVE_LOCK_ASSERT(sc);
1790
1791
switch (sc->gnv_dst_addr.sa.sa_family) {
1792
case AF_INET:
1793
port = ntohs(sc->gnv_dst_addr.sin.sin_port);
1794
break;
1795
case AF_INET6:
1796
port = ntohs(sc->gnv_dst_addr.sin6.sin6_port);
1797
break;
1798
}
1799
1800
return (port);
1801
}
1802
1803
/* Netlink Helpers */
1804
static int
1805
geneve_set_vni_nl(struct geneve_softc *sc, struct nl_pstate *npt, uint32_t vni)
1806
{
1807
int error;
1808
1809
error = 0;
1810
if (vni >= GENEVE_VNI_MAX) {
1811
error = EINVAL;
1812
goto ret;
1813
}
1814
1815
GENEVE_WLOCK(sc);
1816
if (geneve_can_change_config(sc))
1817
sc->gnv_vni = vni;
1818
else
1819
error = EBUSY;
1820
GENEVE_WUNLOCK(sc);
1821
1822
ret:
1823
if (error == EINVAL)
1824
nlmsg_report_err_msg(npt, "geneve vni is invalid: %u", vni);
1825
1826
if (error == EBUSY)
1827
nlmsg_report_err_msg(npt, "geneve interface is busy.");
1828
1829
return (error);
1830
}
1831
1832
static int
1833
geneve_set_local_addr_nl(struct geneve_softc *sc, struct nl_pstate *npt,
1834
struct sockaddr *sa)
1835
{
1836
union sockaddr_union *unsa = (union sockaddr_union *)sa;
1837
int error;
1838
1839
error = geneve_check_sockaddr(unsa, sa->sa_len);
1840
if (error != 0)
1841
goto ret;
1842
1843
error = geneve_check_multicast_addr(unsa);
1844
if (error != 0)
1845
goto ret;
1846
1847
#ifdef INET6
1848
if (unsa->sa.sa_family == AF_INET6) {
1849
error = sa6_embedscope(&unsa->sin6, V_ip6_use_defzone);
1850
if (error != 0)
1851
goto ret;
1852
}
1853
#endif
1854
1855
GENEVE_WLOCK(sc);
1856
if (geneve_can_change_config(sc)) {
1857
geneve_sockaddr_in_copy(&sc->gnv_src_addr, &unsa->sa);
1858
geneve_set_hwcaps(sc);
1859
} else
1860
error = EBUSY;
1861
GENEVE_WUNLOCK(sc);
1862
1863
ret:
1864
if (error == EINVAL)
1865
nlmsg_report_err_msg(npt, "local address is invalid.");
1866
1867
if (error == EAFNOSUPPORT)
1868
nlmsg_report_err_msg(npt, "address family is not supported.");
1869
1870
if (error == EBUSY)
1871
nlmsg_report_err_msg(npt, "geneve interface is busy.");
1872
1873
return (error);
1874
}
1875
1876
static int
1877
geneve_set_remote_addr_nl(struct geneve_softc *sc, struct nl_pstate *npt,
1878
struct sockaddr *sa)
1879
{
1880
union sockaddr_union *unsa = (union sockaddr_union *)sa;
1881
int error;
1882
1883
error = geneve_check_sockaddr(unsa, sa->sa_len);
1884
if (error != 0)
1885
goto ret;
1886
1887
#ifdef INET6
1888
if (unsa->sa.sa_family == AF_INET6) {
1889
error = sa6_embedscope(&unsa->sin6, V_ip6_use_defzone);
1890
if (error != 0)
1891
goto ret;
1892
}
1893
#endif
1894
1895
GENEVE_WLOCK(sc);
1896
if (geneve_can_change_config(sc)) {
1897
geneve_sockaddr_in_copy(&sc->gnv_dst_addr, &unsa->sa);
1898
geneve_setup_interface_hdrlen(sc);
1899
} else
1900
error = EBUSY;
1901
GENEVE_WUNLOCK(sc);
1902
1903
ret:
1904
if (error == EINVAL)
1905
nlmsg_report_err_msg(npt, "remote address is invalid.");
1906
1907
if (error == EAFNOSUPPORT)
1908
nlmsg_report_err_msg(npt, "address family is not supported.");
1909
1910
if (error == EBUSY)
1911
nlmsg_report_err_msg(npt, "geneve interface is busy.");
1912
1913
return (error);
1914
}
1915
1916
static int
1917
geneve_set_local_port_nl(struct geneve_softc *sc, struct nl_pstate *npt, uint16_t port)
1918
{
1919
int error;
1920
1921
error = 0;
1922
if (port == 0 || port > UINT16_MAX) {
1923
error = EINVAL;
1924
goto ret;
1925
}
1926
1927
GENEVE_WLOCK(sc);
1928
if (geneve_can_change_config(sc) == 0) {
1929
GENEVE_WUNLOCK(sc);
1930
error = EBUSY;
1931
goto ret;
1932
}
1933
1934
switch (sc->gnv_src_addr.sa.sa_family) {
1935
case AF_INET:
1936
sc->gnv_src_addr.sin.sin_port = htons(port);
1937
break;
1938
case AF_INET6:
1939
sc->gnv_src_addr.sin6.sin6_port = htons(port);
1940
break;
1941
}
1942
GENEVE_WUNLOCK(sc);
1943
1944
ret:
1945
if (error == EINVAL)
1946
nlmsg_report_err_msg(npt, "local port is invalid: %u", port);
1947
1948
if (error == EBUSY)
1949
nlmsg_report_err_msg(npt, "geneve interface is busy.");
1950
1951
return (error);
1952
}
1953
1954
static int
1955
geneve_set_remote_port_nl(struct geneve_softc *sc, struct nl_pstate *npt, uint16_t port)
1956
{
1957
int error;
1958
1959
error = 0;
1960
if (port == 0 || port > UINT16_MAX) {
1961
error = EINVAL;
1962
goto ret;
1963
}
1964
1965
GENEVE_WLOCK(sc);
1966
if (geneve_can_change_config(sc) == 0) {
1967
GENEVE_WUNLOCK(sc);
1968
error = EBUSY;
1969
goto ret;
1970
}
1971
1972
switch (sc->gnv_dst_addr.sa.sa_family) {
1973
case AF_INET:
1974
sc->gnv_dst_addr.sin.sin_port = htons(port);
1975
break;
1976
case AF_INET6:
1977
sc->gnv_dst_addr.sin6.sin6_port = htons(port);
1978
break;
1979
}
1980
GENEVE_WUNLOCK(sc);
1981
1982
ret:
1983
if (error == EINVAL)
1984
nlmsg_report_err_msg(npt, "remote port is invalid: %u", port);
1985
1986
if (error == EBUSY)
1987
nlmsg_report_err_msg(npt, "geneve interface is busy.");
1988
1989
return (error);
1990
}
1991
1992
static int
1993
geneve_set_port_range_nl(struct geneve_softc *sc, struct nl_pstate *npt,
1994
struct ifla_geneve_port_range port_range)
1995
{
1996
int error;
1997
1998
error = 0;
1999
if (port_range.low <= 0 || port_range.high > UINT16_MAX ||
2000
port_range.high < port_range.low) {
2001
error = EINVAL;
2002
goto ret;
2003
}
2004
2005
GENEVE_WLOCK(sc);
2006
if (geneve_can_change_config(sc)) {
2007
sc->gnv_min_port = port_range.low;
2008
sc->gnv_max_port = port_range.high;
2009
} else
2010
error = EBUSY;
2011
GENEVE_WUNLOCK(sc);
2012
2013
ret:
2014
if (error == EINVAL)
2015
nlmsg_report_err_msg(npt, "port range is invalid: %u-%u",
2016
port_range.low, port_range.high);
2017
2018
if (error == EBUSY)
2019
nlmsg_report_err_msg(npt, "geneve interface is busy.");
2020
2021
return (error);
2022
}
2023
2024
static int
2025
geneve_set_df_nl(struct geneve_softc *sc, struct nl_pstate *npt,
2026
enum ifla_geneve_df df)
2027
{
2028
int error;
2029
2030
error = 0;
2031
GENEVE_WLOCK(sc);
2032
if (geneve_can_change_config(sc))
2033
sc->gnv_df = df;
2034
else
2035
error = EBUSY;
2036
GENEVE_WUNLOCK(sc);
2037
2038
if (error == EBUSY)
2039
nlmsg_report_err_msg(npt, "geneve interface is busy.");
2040
2041
return (error);
2042
}
2043
2044
static int
2045
geneve_set_ttl_nl(struct geneve_softc *sc, struct nl_pstate *npt __unused,
2046
uint8_t ttl)
2047
{
2048
2049
GENEVE_WLOCK(sc);
2050
sc->gnv_ttl = ttl;
2051
if (sc->gnv_im4o != NULL)
2052
sc->gnv_im4o->imo_multicast_ttl = sc->gnv_ttl;
2053
if (sc->gnv_im6o != NULL)
2054
sc->gnv_im6o->im6o_multicast_hlim = sc->gnv_ttl;
2055
GENEVE_WUNLOCK(sc);
2056
2057
return (0);
2058
}
2059
2060
static int
2061
geneve_set_ttl_inherit_nl(struct geneve_softc *sc,
2062
struct nl_pstate *npt __unused, bool inherit)
2063
{
2064
2065
GENEVE_WLOCK(sc);
2066
if (inherit)
2067
sc->gnv_flags |= GENEVE_FLAG_TTL_INHERIT;
2068
else
2069
sc->gnv_flags &= ~GENEVE_FLAG_TTL_INHERIT;
2070
GENEVE_WUNLOCK(sc);
2071
2072
return (0);
2073
}
2074
2075
static int
2076
geneve_set_dscp_inherit_nl(struct geneve_softc *sc,
2077
struct nl_pstate *npt __unused, bool inherit)
2078
{
2079
2080
GENEVE_WLOCK(sc);
2081
if (inherit)
2082
sc->gnv_flags |= GENEVE_FLAG_DSCP_INHERIT;
2083
else
2084
sc->gnv_flags &= ~GENEVE_FLAG_DSCP_INHERIT;
2085
GENEVE_WUNLOCK(sc);
2086
2087
return (0);
2088
}
2089
2090
static int
2091
geneve_set_collect_metadata_nl(struct geneve_softc *sc,
2092
struct nl_pstate *npt __unused, bool external)
2093
{
2094
2095
GENEVE_WLOCK(sc);
2096
if (external)
2097
sc->gnv_flags |= GENEVE_FLAG_COLLECT_METADATA;
2098
else
2099
sc->gnv_flags &= ~GENEVE_FLAG_COLLECT_METADATA;
2100
GENEVE_WUNLOCK(sc);
2101
2102
return (0);
2103
}
2104
2105
static int
2106
geneve_set_learn_nl(struct geneve_softc *sc, struct nl_pstate *npt,
2107
bool learn)
2108
{
2109
2110
GENEVE_WLOCK(sc);
2111
if (learn)
2112
sc->gnv_flags |= GENEVE_FLAG_LEARN;
2113
else
2114
sc->gnv_flags &= ~GENEVE_FLAG_LEARN;
2115
GENEVE_WUNLOCK(sc);
2116
2117
return (0);
2118
}
2119
2120
static int
2121
geneve_set_ftable_max_nl(struct geneve_softc *sc, struct nl_pstate *npt,
2122
uint32_t max)
2123
{
2124
int error;
2125
2126
error = 0;
2127
GENEVE_WLOCK(sc);
2128
if (max <= GENEVE_FTABLE_MAX)
2129
sc->gnv_ftable_max = max;
2130
else
2131
error = EINVAL;
2132
GENEVE_WUNLOCK(sc);
2133
2134
if (error == EINVAL)
2135
nlmsg_report_err_msg(npt,
2136
"maximum number of entries in the table can not be more than %u",
2137
GENEVE_FTABLE_MAX);
2138
2139
return (error);
2140
}
2141
2142
static int
2143
geneve_set_ftable_timeout_nl(struct geneve_softc *sc, struct nl_pstate *npt,
2144
uint32_t timeout)
2145
{
2146
int error;
2147
2148
error = 0;
2149
GENEVE_WLOCK(sc);
2150
if (timeout <= GENEVE_FTABLE_MAX_TIMEOUT)
2151
sc->gnv_ftable_timeout = timeout;
2152
else
2153
error = EINVAL;
2154
GENEVE_WUNLOCK(sc);
2155
2156
if (error == EINVAL)
2157
nlmsg_report_err_msg(npt,
2158
"maximum timeout for stale entries in the table can not be more than %u",
2159
GENEVE_FTABLE_MAX_TIMEOUT);
2160
2161
return (error);
2162
}
2163
2164
static int
2165
geneve_set_mc_if_nl(struct geneve_softc *sc, struct nl_pstate *npt,
2166
char *ifname)
2167
{
2168
int error;
2169
2170
error = 0;
2171
GENEVE_WLOCK(sc);
2172
if (geneve_can_change_config(sc)) {
2173
strlcpy(sc->gnv_mc_ifname, ifname, IFNAMSIZ);
2174
geneve_set_hwcaps(sc);
2175
} else
2176
error = EBUSY;
2177
GENEVE_WUNLOCK(sc);
2178
2179
if (error == EBUSY)
2180
nlmsg_report_err_msg(npt, "geneve interface is busy.");
2181
2182
return (error);
2183
}
2184
2185
static int
2186
geneve_flush_ftable_nl(struct geneve_softc *sc, struct nl_pstate *npt,
2187
bool flush)
2188
{
2189
2190
return (geneve_flush_ftable(sc, flush));
2191
}
2192
2193
static void
2194
geneve_get_local_addr_nl(struct geneve_softc *sc, struct nl_writer *nw)
2195
{
2196
struct sockaddr *sa;
2197
2198
GENEVE_LOCK_ASSERT(sc);
2199
2200
sa = &sc->gnv_src_addr.sa;
2201
if (sa->sa_family == AF_INET) {
2202
const struct in_addr *in4 = &SATOCONSTSIN(sa)->sin_addr;
2203
nlattr_add_in_addr(nw, IFLA_GENEVE_LOCAL, in4);
2204
} else if (sa->sa_family == AF_INET6) {
2205
const struct in6_addr *in6 = &SATOCONSTSIN6(sa)->sin6_addr;
2206
nlattr_add_in6_addr(nw, IFLA_GENEVE_LOCAL, in6);
2207
}
2208
}
2209
2210
static void
2211
geneve_get_remote_addr_nl(struct geneve_softc *sc, struct nl_writer *nw)
2212
{
2213
struct sockaddr *sa;
2214
2215
GENEVE_LOCK_ASSERT(sc);
2216
2217
sa = &sc->gnv_dst_addr.sa;
2218
if (sa->sa_family == AF_INET) {
2219
const struct in_addr *in4 = &SATOCONSTSIN(sa)->sin_addr;
2220
nlattr_add_in_addr(nw, IFLA_GENEVE_REMOTE, in4);
2221
} else if (sa->sa_family == AF_INET6) {
2222
const struct in6_addr *in6 = &SATOCONSTSIN6(sa)->sin6_addr;
2223
nlattr_add_in6_addr(nw, IFLA_GENEVE_REMOTE, in6);
2224
}
2225
}
2226
2227
static int
2228
geneve_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2229
{
2230
struct rm_priotracker tracker;
2231
struct geneve_softc *sc;
2232
struct siocsifcapnv_driver_data *drv_ioctl_data, drv_ioctl_data_d;
2233
struct ifreq *ifr;
2234
int max, error;
2235
2236
CURVNET_ASSERT_SET();
2237
2238
error = 0;
2239
sc = ifp->if_softc;
2240
ifr = (struct ifreq *)data;
2241
2242
switch (cmd) {
2243
case SIOCADDMULTI:
2244
case SIOCDELMULTI:
2245
break;
2246
2247
case SIOCGDRVSPEC:
2248
break;
2249
case SIOCSDRVSPEC:
2250
error = priv_check(curthread, PRIV_NET_GENEVE);
2251
if (error)
2252
return (error);
2253
break;
2254
}
2255
2256
switch (cmd) {
2257
case SIOCSIFFLAGS:
2258
error = geneve_ioctl_ifflags(sc);
2259
break;
2260
2261
case SIOCSIFMEDIA:
2262
case SIOCGIFMEDIA:
2263
if (sc->gnv_proto == GENEVE_PROTO_ETHER)
2264
error = ifmedia_ioctl(ifp, ifr, &sc->gnv_media, cmd);
2265
else
2266
error = EINVAL;
2267
break;
2268
2269
case SIOCSIFMTU:
2270
if (sc->gnv_proto == GENEVE_PROTO_ETHER)
2271
max = GENEVE_MAX_MTU;
2272
else
2273
max = GENEVE_MAX_L3MTU;
2274
2275
if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > max)
2276
error = EINVAL;
2277
else {
2278
GENEVE_WLOCK(sc);
2279
ifp->if_mtu = ifr->ifr_mtu;
2280
sc->gnv_flags |= GENEVE_FLAG_USER_MTU;
2281
GENEVE_WUNLOCK(sc);
2282
}
2283
break;
2284
2285
case SIOCGIFCAPNV:
2286
break;
2287
case SIOCSIFCAP:
2288
drv_ioctl_data = &drv_ioctl_data_d;
2289
drv_ioctl_data->reqcap = ifr->ifr_reqcap;
2290
drv_ioctl_data->reqcap2 = if_getcapenable2(ifp);
2291
drv_ioctl_data->nvcap = NULL;
2292
/* FALLTHROUGH */
2293
case SIOCSIFCAPNV:
2294
if (cmd == SIOCSIFCAPNV)
2295
drv_ioctl_data = (struct siocsifcapnv_driver_data *)data;
2296
2297
GENEVE_WLOCK(sc);
2298
error = geneve_set_reqcap(sc, ifp, drv_ioctl_data->reqcap,
2299
drv_ioctl_data->reqcap2);
2300
if (error == 0)
2301
geneve_set_hwcaps(sc);
2302
GENEVE_WUNLOCK(sc);
2303
break;
2304
2305
case SIOCGTUNFIB:
2306
GENEVE_RLOCK(sc, &tracker);
2307
ifr->ifr_fib = sc->gnv_fibnum;
2308
GENEVE_RUNLOCK(sc, &tracker);
2309
break;
2310
2311
case SIOCSTUNFIB:
2312
if ((error = priv_check(curthread, PRIV_NET_GENEVE)) != 0)
2313
break;
2314
2315
if (ifr->ifr_fib >= rt_numfibs)
2316
error = EINVAL;
2317
else {
2318
GENEVE_WLOCK(sc);
2319
sc->gnv_fibnum = ifr->ifr_fib;
2320
GENEVE_WUNLOCK(sc);
2321
}
2322
break;
2323
2324
case SIOCSIFADDR:
2325
ifp->if_flags |= IFF_UP;
2326
/* FALLTHROUGH */
2327
case SIOCGIFADDR:
2328
if (sc->gnv_proto == GENEVE_PROTO_ETHER)
2329
error = ether_ioctl(ifp, cmd, data);
2330
break;
2331
2332
default:
2333
if (sc->gnv_proto == GENEVE_PROTO_ETHER)
2334
error = ether_ioctl(ifp, cmd, data);
2335
else
2336
error = EINVAL;
2337
break;
2338
}
2339
2340
return (error);
2341
}
2342
2343
static uint16_t
2344
geneve_pick_source_port(struct geneve_softc *sc, struct mbuf *m)
2345
{
2346
int range;
2347
uint32_t hash;
2348
2349
range = sc->gnv_max_port - sc->gnv_min_port + 1;
2350
2351
/* RFC 8926 Section 3.3-2.2.1 */
2352
if (M_HASHTYPE_ISHASH(m))
2353
hash = m->m_pkthdr.flowid;
2354
else
2355
hash = jenkins_hash(m->m_data, ETHER_HDR_LEN, sc->gnv_port_hash_key);
2356
2357
return (sc->gnv_min_port + (hash % range));
2358
}
2359
2360
static void
2361
geneve_encap_header(struct geneve_softc *sc, struct mbuf *m, int ipoff,
2362
uint16_t srcport, uint16_t dstport, uint16_t proto)
2363
{
2364
struct geneveudphdr *hdr;
2365
struct udphdr *udph;
2366
struct genevehdr *gnvh;
2367
int len;
2368
2369
len = m->m_pkthdr.len - ipoff;
2370
MPASS(len >= sizeof(struct geneveudphdr));
2371
hdr = mtodo(m, ipoff);
2372
2373
udph = &hdr->geneve_udp;
2374
udph->uh_sport = srcport;
2375
udph->uh_dport = dstport;
2376
udph->uh_ulen = htons(len);
2377
udph->uh_sum = 0;
2378
2379
gnvh = &hdr->geneve_hdr;
2380
gnvh->geneve_ver = 0;
2381
gnvh->geneve_optlen = 0;
2382
gnvh->geneve_critical = 0;
2383
gnvh->geneve_control = 0;
2384
gnvh->geneve_flags = 0;
2385
gnvh->geneve_proto = proto;
2386
gnvh->geneve_vni = htonl(sc->gnv_vni << GENEVE_HDR_VNI_SHIFT);
2387
}
2388
2389
/* Return the CSUM_INNER_* equivalent of CSUM_* caps. */
2390
static uint32_t
2391
csum_flags_to_inner_flags(uint32_t csum_flags_in, const uint32_t encap)
2392
{
2393
uint32_t csum_flags = encap;
2394
const uint32_t v4 = CSUM_IP | CSUM_IP_UDP | CSUM_IP_TCP;
2395
2396
/*
2397
* csum_flags can request either v4 or v6 offload but not both.
2398
* tcp_output always sets CSUM_TSO (both CSUM_IP_TSO and CSUM_IP6_TSO)
2399
* so those bits are no good to detect the IP version. Other bits are
2400
* always set with CSUM_TSO and we use those to figure out the IP
2401
* version.
2402
*/
2403
if (csum_flags_in & v4) {
2404
if (csum_flags_in & CSUM_IP)
2405
csum_flags |= CSUM_INNER_IP;
2406
if (csum_flags_in & CSUM_IP_UDP)
2407
csum_flags |= CSUM_INNER_IP_UDP;
2408
if (csum_flags_in & CSUM_IP_TCP)
2409
csum_flags |= CSUM_INNER_IP_TCP;
2410
if (csum_flags_in & CSUM_IP_TSO)
2411
csum_flags |= CSUM_INNER_IP_TSO;
2412
} else {
2413
#ifdef INVARIANTS
2414
const uint32_t v6 = CSUM_IP6_UDP | CSUM_IP6_TCP;
2415
MPASS((csum_flags_in & v6) != 0);
2416
#endif
2417
if (csum_flags_in & CSUM_IP6_UDP)
2418
csum_flags |= CSUM_INNER_IP6_UDP;
2419
if (csum_flags_in & CSUM_IP6_TCP)
2420
csum_flags |= CSUM_INNER_IP6_TCP;
2421
if (csum_flags_in & CSUM_IP6_TSO)
2422
csum_flags |= CSUM_INNER_IP6_TSO;
2423
}
2424
2425
return (csum_flags);
2426
}
2427
2428
static uint16_t
2429
geneve_get_ethertype(struct mbuf *m)
2430
{
2431
struct ip *ip;
2432
struct ip6_hdr *ip6;
2433
2434
/*
2435
* We should pullup, but we're only interested in the first byte, so
2436
* that'll always be contiguous.
2437
*/
2438
ip = mtod(m, struct ip *);
2439
if (ip->ip_v == IPVERSION)
2440
return (ETHERTYPE_IP);
2441
2442
ip6 = mtod(m, struct ip6_hdr *);
2443
if ((ip6->ip6_vfc & IPV6_VERSION_MASK) == IPV6_VERSION)
2444
return (ETHERTYPE_IPV6);
2445
2446
return (0);
2447
}
2448
2449
/* RFC 8926 Section 4.4.2. DSCP, ECN, and TTL */
2450
static int
2451
geneve_inherit_l3_hdr(struct mbuf *m, struct geneve_softc *sc, uint16_t proto,
2452
uint8_t *tos, uint8_t *ttl, u_short *ip_off)
2453
{
2454
struct ether_header *eh;
2455
struct ip *ip_inner, iphdr;
2456
struct ip6_hdr *ip6_inner, ip6hdr;
2457
int offset;
2458
2459
*tos = 0;
2460
*ttl = sc->gnv_ttl;
2461
if (sc->gnv_df == IFLA_GENEVE_DF_SET)
2462
*ip_off = htons(IP_DF);
2463
else
2464
*ip_off = 0;
2465
2466
/* Set offset and address family if proto is ethernet */
2467
if (proto == GENEVE_PROTO_ETHER) {
2468
eh = mtod(m, struct ether_header *);
2469
if (eh->ether_type == htons(ETHERTYPE_IP)) {
2470
if (m->m_pkthdr.len < ETHER_HDR_LEN + sizeof(struct ip)) {
2471
m_freem(m);
2472
return (EINVAL);
2473
}
2474
proto = ETHERTYPE_IP;
2475
} else if (eh->ether_type == htons(ETHERTYPE_IPV6)) {
2476
if (m->m_pkthdr.len < ETHER_HDR_LEN + sizeof(struct ip6_hdr)) {
2477
m_freem(m);
2478
return (EINVAL);
2479
}
2480
proto = ETHERTYPE_IPV6;
2481
} else
2482
return (0);
2483
2484
offset = ETHER_HDR_LEN;
2485
} else
2486
offset = 0;
2487
2488
switch (proto) {
2489
case ETHERTYPE_IP:
2490
if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
2491
m_copydata(m, offset, sizeof(struct ip), (caddr_t)&iphdr);
2492
ip_inner = &iphdr;
2493
} else
2494
ip_inner = mtodo(m, offset);
2495
2496
*tos = ip_inner->ip_tos;
2497
if (sc->gnv_flags & GENEVE_FLAG_TTL_INHERIT)
2498
*ttl = ip_inner->ip_ttl;
2499
if (sc->gnv_df == IFLA_GENEVE_DF_INHERIT)
2500
*ip_off = ip_inner->ip_off;
2501
break;
2502
2503
case ETHERTYPE_IPV6:
2504
if (__predict_false(m->m_len < offset + sizeof(struct ip6_hdr))) {
2505
m_copydata(m, offset, sizeof(struct ip6_hdr), (caddr_t)&ip6hdr);
2506
ip6_inner = &ip6hdr;
2507
} else
2508
ip6_inner = mtodo(m, offset);
2509
2510
*tos = IPV6_TRAFFIC_CLASS(ip6_inner);
2511
if (sc->gnv_flags & GENEVE_FLAG_TTL_INHERIT)
2512
*ttl = ip6_inner->ip6_hlim;
2513
break;
2514
}
2515
2516
return (0);
2517
}
2518
2519
#ifdef INET
2520
static int
2521
geneve_encap4(struct geneve_softc *sc, const union sockaddr_union *funsa,
2522
struct mbuf *m)
2523
{
2524
struct ifnet *ifp;
2525
struct ip *ip;
2526
struct in_addr srcaddr, dstaddr;
2527
struct route route, *ro;
2528
struct sockaddr_in *sin;
2529
int plen, error;
2530
uint32_t csum_flags;
2531
uint16_t srcport, dstport, proto;
2532
u_short ip_off;
2533
uint8_t tos, ecn, ttl;
2534
bool mcast;
2535
2536
NET_EPOCH_ASSERT();
2537
2538
ifp = sc->gnv_ifp;
2539
srcaddr = sc->gnv_src_addr.sin.sin_addr;
2540
srcport = htons(geneve_pick_source_port(sc, m));
2541
dstaddr = funsa->sin.sin_addr;
2542
dstport = funsa->sin.sin_port;
2543
plen = m->m_pkthdr.len;
2544
2545
if (sc->gnv_proto == GENEVE_PROTO_ETHER)
2546
proto = sc->gnv_proto;
2547
else
2548
proto = geneve_get_ethertype(m);
2549
2550
error = geneve_inherit_l3_hdr(m, sc, proto, &tos, &ttl, &ip_off);
2551
if (error) {
2552
if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2553
return (error);
2554
}
2555
2556
M_PREPEND(m, sizeof(struct ip) + sizeof(struct geneveudphdr), M_NOWAIT);
2557
if (m == NULL) {
2558
if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2559
return (ENOBUFS);
2560
}
2561
ip = mtod(m, struct ip *);
2562
2563
ecn = (tos & IPTOS_ECN_MASK);
2564
ip_ecn_ingress(ECN_ALLOWED, &ip->ip_tos, &ecn);
2565
if (sc->gnv_flags & GENEVE_FLAG_DSCP_INHERIT)
2566
ip->ip_tos |= (tos & ~IPTOS_ECN_MASK);
2567
2568
ip->ip_len = htons(m->m_pkthdr.len);
2569
ip->ip_off = ip_off;
2570
ip->ip_ttl = ttl;
2571
ip->ip_p = IPPROTO_UDP;
2572
ip->ip_sum = 0;
2573
ip->ip_src = srcaddr;
2574
ip->ip_dst = dstaddr;
2575
2576
geneve_encap_header(sc, m, sizeof(struct ip), srcport, dstport, htons(proto));
2577
mcast = (m->m_flags & (M_MCAST | M_BCAST));
2578
m->m_flags &= ~(M_MCAST | M_BCAST);
2579
2580
m->m_pkthdr.csum_flags &= CSUM_FLAGS_TX;
2581
if (m->m_pkthdr.csum_flags != 0) {
2582
/*
2583
* HW checksum (L3 and/or L4) or TSO has been requested.
2584
* Look up the ifnet for the outbound route and verify that the
2585
* outbound ifnet can perform the requested operation on the inner frame.
2586
*/
2587
memset(&route, 0, sizeof(route));
2588
ro = &route;
2589
sin = (struct sockaddr_in *)&ro->ro_dst;
2590
sin->sin_family = AF_INET;
2591
sin->sin_len = sizeof(*sin);
2592
sin->sin_addr = ip->ip_dst;
2593
ro->ro_nh = fib4_lookup(M_GETFIB(m), ip->ip_dst, 0, NHR_NONE, 0);
2594
if (ro->ro_nh == NULL) {
2595
m_freem(m);
2596
if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2597
return (EHOSTUNREACH);
2598
}
2599
2600
csum_flags = csum_flags_to_inner_flags(m->m_pkthdr.csum_flags,
2601
CSUM_ENCAP_GENEVE);
2602
if ((csum_flags & ro->ro_nh->nh_ifp->if_hwassist) != csum_flags) {
2603
if (ppsratecheck(&sc->err_time, &sc->err_pps, 1)) {
2604
const struct ifnet *nh_ifp = ro->ro_nh->nh_ifp;
2605
2606
if_printf(ifp, "interface %s is missing hwcaps "
2607
"0x%08x, csum_flags 0x%08x -> 0x%08x, "
2608
"hwassist 0x%08x\n", nh_ifp->if_xname,
2609
csum_flags & ~(uint32_t)nh_ifp->if_hwassist,
2610
m->m_pkthdr.csum_flags, csum_flags,
2611
(uint32_t)nh_ifp->if_hwassist);
2612
}
2613
m_freem(m);
2614
if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2615
return (ENXIO);
2616
}
2617
m->m_pkthdr.csum_flags = csum_flags;
2618
if (csum_flags & (CSUM_INNER_IP | CSUM_INNER_IP_UDP |
2619
CSUM_INNER_IP6_UDP | CSUM_INNER_IP_TCP | CSUM_INNER_IP6_TCP)) {
2620
counter_u64_add(sc->gnv_stats.txcsum, 1);
2621
if (csum_flags & CSUM_INNER_TSO)
2622
counter_u64_add(sc->gnv_stats.tso, 1);
2623
}
2624
} else
2625
ro = NULL;
2626
2627
error = ip_output(m, NULL, ro, 0, sc->gnv_im4o, NULL);
2628
if (error == 0) {
2629
if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
2630
if_inc_counter(ifp, IFCOUNTER_OBYTES, plen);
2631
if (mcast)
2632
if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
2633
} else
2634
if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2635
2636
return (error);
2637
}
2638
#endif
2639
2640
#ifdef INET6
2641
static int
2642
geneve_encap6(struct geneve_softc *sc, const union sockaddr_union *funsa,
2643
struct mbuf *m)
2644
{
2645
struct ifnet *ifp;
2646
struct ip6_hdr *ip6;
2647
struct ip6_pktopts opts;
2648
struct sockaddr_in6 *sin6;
2649
struct route_in6 route, *ro;
2650
const struct in6_addr *srcaddr, *dstaddr;
2651
int plen, error;
2652
uint32_t csum_flags;
2653
uint16_t srcport, dstport, proto;
2654
u_short ip6_df;
2655
uint8_t tos, ecn, etos, ttl;
2656
bool mcast;
2657
2658
NET_EPOCH_ASSERT();
2659
2660
ifp = sc->gnv_ifp;
2661
srcaddr = &sc->gnv_src_addr.sin6.sin6_addr;
2662
srcport = htons(geneve_pick_source_port(sc, m));
2663
dstaddr = &funsa->sin6.sin6_addr;
2664
dstport = funsa->sin6.sin6_port;
2665
plen = m->m_pkthdr.len;
2666
2667
if (sc->gnv_proto == GENEVE_PROTO_ETHER)
2668
proto = sc->gnv_proto;
2669
else
2670
proto = geneve_get_ethertype(m);
2671
2672
error = geneve_inherit_l3_hdr(m, sc, proto, &tos, &ttl, &ip6_df);
2673
if (error) {
2674
if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2675
return (error);
2676
}
2677
2678
ip6_initpktopts(&opts);
2679
if (ip6_df)
2680
opts.ip6po_flags = IP6PO_DONTFRAG;
2681
2682
M_PREPEND(m, sizeof(struct ip6_hdr) + sizeof(struct geneveudphdr), M_NOWAIT);
2683
if (m == NULL) {
2684
if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2685
return (ENOBUFS);
2686
}
2687
2688
ip6 = mtod(m, struct ip6_hdr *);
2689
ip6->ip6_flow = 0;
2690
ip6->ip6_vfc = IPV6_VERSION;
2691
2692
ecn = (tos & IPTOS_ECN_MASK);
2693
ip_ecn_ingress(ECN_ALLOWED, &etos, &ecn);
2694
ip6->ip6_flow |= htonl((u_int32_t)etos << IPV6_FLOWLABEL_LEN);
2695
if (sc->gnv_flags & GENEVE_FLAG_DSCP_INHERIT)
2696
ip6->ip6_flow |= htonl((u_int32_t)tos << IPV6_FLOWLABEL_LEN);
2697
2698
ip6->ip6_plen = 0;
2699
ip6->ip6_nxt = IPPROTO_UDP;
2700
ip6->ip6_hlim = ttl;
2701
ip6->ip6_src = *srcaddr;
2702
ip6->ip6_dst = *dstaddr;
2703
2704
geneve_encap_header(sc, m, sizeof(struct ip6_hdr), srcport, dstport,
2705
htons(proto));
2706
mcast = (m->m_flags & (M_MCAST | M_BCAST));
2707
m->m_flags &= ~(M_MCAST | M_BCAST);
2708
2709
ro = NULL;
2710
m->m_pkthdr.csum_flags &= CSUM_FLAGS_TX;
2711
if (mcast || m->m_pkthdr.csum_flags != 0) {
2712
/*
2713
* HW checksum (L3 and/or L4) or TSO has been requested. Look
2714
* up the ifnet for the outbound route and verify that the
2715
* outbound ifnet can perform the requested operation on the
2716
* inner frame.
2717
* XXX: There's a rare scenario with ipv6 over multicast
2718
* underlay where, when mc_ifname is set, it causes panics
2719
* inside a jail. We'll force geneve to select its own outbound
2720
* interface to avoid this.
2721
*/
2722
memset(&route, 0, sizeof(route));
2723
ro = &route;
2724
sin6 = (struct sockaddr_in6 *)&ro->ro_dst;
2725
sin6->sin6_family = AF_INET6;
2726
sin6->sin6_len = sizeof(*sin6);
2727
sin6->sin6_addr = ip6->ip6_dst;
2728
ro->ro_nh = fib6_lookup(M_GETFIB(m), &ip6->ip6_dst, 0, NHR_NONE, 0);
2729
if (ro->ro_nh == NULL) {
2730
m_freem(m);
2731
if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2732
return (EHOSTUNREACH);
2733
}
2734
}
2735
if (m->m_pkthdr.csum_flags != 0) {
2736
csum_flags = csum_flags_to_inner_flags(m->m_pkthdr.csum_flags,
2737
CSUM_ENCAP_GENEVE);
2738
if ((csum_flags & ro->ro_nh->nh_ifp->if_hwassist) != csum_flags) {
2739
if (ppsratecheck(&sc->err_time, &sc->err_pps, 1)) {
2740
const struct ifnet *nh_ifp = ro->ro_nh->nh_ifp;
2741
2742
if_printf(ifp, "interface %s is missing hwcaps "
2743
"0x%08x, csum_flags 0x%08x -> 0x%08x, "
2744
"hwassist 0x%08x\n", nh_ifp->if_xname,
2745
csum_flags & ~(uint32_t)nh_ifp->if_hwassist,
2746
m->m_pkthdr.csum_flags, csum_flags,
2747
(uint32_t)nh_ifp->if_hwassist);
2748
}
2749
m_freem(m);
2750
if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2751
return (ENXIO);
2752
}
2753
m->m_pkthdr.csum_flags = csum_flags;
2754
if (csum_flags &
2755
(CSUM_INNER_IP | CSUM_INNER_IP_UDP | CSUM_INNER_IP6_UDP |
2756
CSUM_INNER_IP_TCP | CSUM_INNER_IP6_TCP)) {
2757
counter_u64_add(sc->gnv_stats.txcsum, 1);
2758
if (csum_flags & CSUM_INNER_TSO)
2759
counter_u64_add(sc->gnv_stats.tso, 1);
2760
}
2761
} else if (ntohs(dstport) != V_zero_checksum_port) {
2762
struct udphdr *hdr = mtodo(m, sizeof(struct ip6_hdr));
2763
2764
hdr->uh_sum = in6_cksum_pseudo(ip6,
2765
m->m_pkthdr.len - sizeof(struct ip6_hdr), IPPROTO_UDP, 0);
2766
m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
2767
m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
2768
}
2769
error = ip6_output(m, &opts, ro, 0, sc->gnv_im6o, NULL, NULL);
2770
if (error == 0) {
2771
if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
2772
if_inc_counter(ifp, IFCOUNTER_OBYTES, plen);
2773
if (mcast)
2774
if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
2775
} else
2776
if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2777
2778
return (error);
2779
}
2780
#endif
2781
2782
static int
2783
geneve_transmit(struct ifnet *ifp, struct mbuf *m)
2784
{
2785
struct rm_priotracker tracker;
2786
union sockaddr_union unsa;
2787
struct geneve_softc *sc;
2788
struct gnv_ftable_entry *fe;
2789
struct ifnet *mcifp;
2790
struct ether_header *eh;
2791
uint32_t af;
2792
int error;
2793
2794
mcifp = NULL;
2795
sc = ifp->if_softc;
2796
GENEVE_RLOCK(sc, &tracker);
2797
M_SETFIB(m, sc->gnv_fibnum);
2798
2799
if ((sc->gnv_flags & GENEVE_FLAG_RUNNING) == 0) {
2800
GENEVE_RUNLOCK(sc, &tracker);
2801
m_freem(m);
2802
return (ENETDOWN);
2803
}
2804
if (__predict_false(if_tunnel_check_nesting(ifp, m,
2805
MTAG_GENEVE_LOOP, 1) != 0)) {
2806
GENEVE_RUNLOCK(sc, &tracker);
2807
m_freem(m);
2808
if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2809
return (ELOOP);
2810
}
2811
2812
if (sc->gnv_proto == GENEVE_PROTO_ETHER) {
2813
fe = NULL;
2814
eh = mtod(m, struct ether_header *);
2815
2816
ETHER_BPF_MTAP(ifp, m);
2817
if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
2818
fe = geneve_ftable_entry_lookup(sc, eh->ether_dhost);
2819
if (fe == NULL)
2820
fe = &sc->gnv_default_fe;
2821
geneve_sockaddr_copy(&unsa, &fe->gnvfe_raddr.sa);
2822
} else
2823
geneve_sockaddr_copy(&unsa, &sc->gnv_dst_addr.sa);
2824
2825
af = unsa.sa.sa_family;
2826
if (geneve_check_multicast_addr(&unsa) != 0)
2827
mcifp = geneve_multicast_if_ref(sc, af);
2828
2829
GENEVE_ACQUIRE(sc);
2830
GENEVE_RUNLOCK(sc, &tracker);
2831
2832
switch (af) {
2833
#ifdef INET
2834
case AF_INET:
2835
error = geneve_encap4(sc, &unsa, m);
2836
break;
2837
#endif
2838
#ifdef INET6
2839
case AF_INET6:
2840
error = geneve_encap6(sc, &unsa, m);
2841
break;
2842
#endif
2843
default:
2844
error = EAFNOSUPPORT;
2845
}
2846
2847
geneve_release(sc);
2848
if (mcifp != NULL)
2849
if_rele(mcifp);
2850
2851
return (error);
2852
}
2853
2854
static int
2855
geneve_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
2856
struct route *ro)
2857
{
2858
uint32_t af;
2859
int error;
2860
2861
#ifdef MAC
2862
error = mac_ifnet_check_transmit(ifp, m);
2863
if (error) {
2864
m_freem(m);
2865
return (error);
2866
}
2867
#endif
2868
2869
/* BPF writes need to be handled specially. */
2870
if (dst->sa_family == AF_UNSPEC || dst->sa_family == pseudo_AF_HDRCMPLT)
2871
memmove(&af, dst->sa_data, sizeof(af));
2872
else
2873
af = RO_GET_FAMILY(ro, dst);
2874
2875
BPF_MTAP2(ifp, &af, sizeof(af), m);
2876
error = (ifp->if_transmit)(ifp, m);
2877
if (error)
2878
return (ENOBUFS);
2879
return (0);
2880
}
2881
2882
static int
2883
geneve_next_option(struct geneve_socket *gnvso, struct genevehdr *gnvh,
2884
struct mbuf **m0)
2885
{
2886
int optlen, error;
2887
2888
error = 0;
2889
/*
2890
* We MUST NOT forward the packet if control (O) bit is set
2891
* and currently there is not standard specification for it.
2892
* Therefore, we drop it.
2893
*/
2894
if (gnvh->geneve_control)
2895
return (EINVAL);
2896
2897
optlen = gnvh->geneve_optlen;
2898
if (optlen == 0)
2899
return (error);
2900
2901
/*
2902
* XXX: Geneve options processing
2903
* We MUST drop the packet if there are options to process
2904
* and we are not able to process it.
2905
*/
2906
if (gnvh->geneve_critical)
2907
error = EINVAL;
2908
2909
return (error);
2910
}
2911
2912
static void
2913
geneve_qflush(struct ifnet *ifp __unused)
2914
{
2915
}
2916
2917
static void
2918
geneve_input_csum(struct mbuf *m, struct ifnet *ifp, counter_u64_t rxcsum)
2919
{
2920
uint32_t csum_flags;
2921
2922
if ((((ifp->if_capenable & IFCAP_RXCSUM) != 0 &&
2923
(m->m_pkthdr.csum_flags & CSUM_INNER_L3_CALC) != 0) ||
2924
((ifp->if_capenable & IFCAP_RXCSUM_IPV6) != 0 &&
2925
(m->m_pkthdr.csum_flags & CSUM_INNER_L3_CALC) == 0))) {
2926
csum_flags = 0;
2927
2928
if (m->m_pkthdr.csum_flags & CSUM_INNER_L3_CALC)
2929
csum_flags |= CSUM_L3_CALC;
2930
if (m->m_pkthdr.csum_flags & CSUM_INNER_L3_VALID)
2931
csum_flags |= CSUM_L3_VALID;
2932
if (m->m_pkthdr.csum_flags & CSUM_INNER_L4_CALC)
2933
csum_flags |= CSUM_L4_CALC;
2934
if (m->m_pkthdr.csum_flags & CSUM_INNER_L4_VALID)
2935
csum_flags |= CSUM_L4_VALID;
2936
m->m_pkthdr.csum_flags = csum_flags;
2937
counter_u64_add(rxcsum, 1);
2938
} else {
2939
/* clear everything */
2940
m->m_pkthdr.csum_flags = 0;
2941
m->m_pkthdr.csum_data = 0;
2942
}
2943
}
2944
2945
static uint32_t
2946
geneve_map_etype_to_af(uint32_t ethertype)
2947
{
2948
2949
if (ethertype == ETHERTYPE_IP)
2950
return (AF_INET);
2951
if (ethertype == ETHERTYPE_IPV6)
2952
return (AF_INET6);
2953
if (ethertype == ETHERTYPE_ARP)
2954
return (AF_LINK);
2955
return (0);
2956
}
2957
2958
static bool
2959
geneve_udp_input(struct mbuf *m, int offset, struct inpcb *inpcb,
2960
const struct sockaddr *srcsa, void *xgnvso)
2961
{
2962
struct geneve_socket *gnvso;
2963
struct geneve_pkt_info info;
2964
struct genevehdr *gnvh, gnvhdr;
2965
struct geneve_softc *sc;
2966
struct ip *iphdr;
2967
struct ip6_hdr *ip6hdr;
2968
struct ifnet *ifp;
2969
int32_t plen, af;
2970
uint32_t vni;
2971
uint16_t optlen, proto;
2972
int error;
2973
2974
M_ASSERTPKTHDR(m);
2975
plen = m->m_pkthdr.len;
2976
gnvso = xgnvso;
2977
2978
if (m->m_pkthdr.len < offset + sizeof(struct geneveudphdr))
2979
return (false);
2980
2981
/* Get ECN and TTL values for future processing */
2982
memset(&info, 0, sizeof(info));
2983
info.ethertype = geneve_get_ethertype(m);
2984
if (info.ethertype == ETHERTYPE_IP) {
2985
iphdr = mtodo(m, offset - sizeof(struct ip));
2986
info.ecn = (iphdr->ip_tos & IPTOS_ECN_MASK);
2987
info.ttl = iphdr->ip_ttl;
2988
} else if (info.ethertype == ETHERTYPE_IPV6) {
2989
ip6hdr = mtodo(m, offset - sizeof(struct ip6_hdr));
2990
info.ecn = IPV6_ECN(ip6hdr);
2991
info.ttl = ip6hdr->ip6_hlim;
2992
}
2993
2994
/* Get geneve header */
2995
offset += sizeof(struct udphdr);
2996
if (__predict_false(m->m_len < offset + sizeof(struct genevehdr))) {
2997
m_copydata(m, offset, sizeof(struct genevehdr), (caddr_t)&gnvhdr);
2998
gnvh = &gnvhdr;
2999
} else
3000
gnvh = mtodo(m, offset);
3001
3002
/*
3003
* Drop if there is a reserved bit or unknown version set in the header.
3004
* As defined in RFC 8926 3.4
3005
*/
3006
if (gnvh->geneve_ver != htons(GENEVE_VERSION) ||
3007
gnvh->geneve_vni & ~GENEVE_VNI_MASK)
3008
return (false);
3009
3010
/*
3011
* The length of the option fields, expressed in 4-byte multiples, not
3012
* including the 8-byte fixed tunnel header.
3013
*/
3014
optlen = ntohs(gnvh->geneve_optlen) * 4;
3015
error = geneve_next_option(gnvso, gnvh, &m);
3016
if (error != 0)
3017
return (false);
3018
3019
vni = ntohl(gnvh->geneve_vni) >> GENEVE_HDR_VNI_SHIFT;
3020
sc = geneve_socket_lookup_softc(gnvso, vni);
3021
if (sc == NULL)
3022
return (false);
3023
3024
ifp = sc->gnv_ifp;
3025
if ((sc->gnv_flags & GENEVE_FLAG_RUNNING) == 0)
3026
goto out;
3027
3028
proto = ntohs(gnvh->geneve_proto);
3029
m_adj(m, offset + sizeof(struct genevehdr) + optlen);
3030
3031
/* if next protocol is ethernet, check its ethertype and learn it */
3032
if (proto == GENEVE_PROTO_ETHER) {
3033
offset = ETHER_HDR_LEN;
3034
error = geneve_input_ether(sc, &m, srcsa, &info);
3035
if (error != 0)
3036
goto out;
3037
} else {
3038
info.ethertype = proto;
3039
af = geneve_map_etype_to_af(info.ethertype);
3040
offset = 0;
3041
}
3042
3043
error = geneve_input_inherit(sc, &m, offset, &info);
3044
if (error != 0)
3045
goto out;
3046
3047
if (ifp == m->m_pkthdr.rcvif)
3048
/* XXX Does not catch more complex loops. */
3049
goto out;
3050
3051
m_clrprotoflags(m);
3052
m->m_pkthdr.rcvif = ifp;
3053
M_SETFIB(m, ifp->if_fib);
3054
geneve_input_csum(m, ifp, sc->gnv_stats.rxcsum);
3055
if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
3056
if_inc_counter(ifp, IFCOUNTER_IBYTES, plen);
3057
if (sc->gnv_mc_ifp != NULL)
3058
if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
3059
3060
MPASS(m != NULL);
3061
3062
if (proto == GENEVE_PROTO_ETHER)
3063
(*ifp->if_input)(ifp, m);
3064
else {
3065
BPF_MTAP2(ifp, &af, sizeof(af), m);
3066
netisr_dispatch_src(info.isr, (uintptr_t)xgnvso, m);
3067
}
3068
3069
m = NULL;
3070
out:
3071
geneve_release(sc);
3072
if (m != NULL) {
3073
if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
3074
m_freem(m);
3075
}
3076
3077
return (true);
3078
}
3079
3080
static int
3081
geneve_input_ether(struct geneve_softc *sc, struct mbuf **m0,
3082
const struct sockaddr *sa, struct geneve_pkt_info *info)
3083
{
3084
struct mbuf *m;
3085
struct ether_header *eh;
3086
3087
m = *m0;
3088
3089
if (sc->gnv_proto != GENEVE_PROTO_ETHER)
3090
return (EPROTOTYPE);
3091
3092
if (m->m_pkthdr.len < ETHER_HDR_LEN)
3093
return (EINVAL);
3094
3095
if (m->m_len < ETHER_HDR_LEN &&
3096
(m = m_pullup(m, ETHER_HDR_LEN)) == NULL) {
3097
*m0 = NULL;
3098
return (ENOBUFS);
3099
}
3100
3101
eh = mtod(m, struct ether_header *);
3102
info->ethertype = ntohs(eh->ether_type);
3103
if (sc->gnv_flags & GENEVE_FLAG_LEARN)
3104
geneve_ftable_learn(sc, sa, eh->ether_shost);
3105
3106
*m0 = m;
3107
return (0);
3108
}
3109
3110
static int
3111
geneve_input_inherit(struct geneve_softc *sc, struct mbuf **m0,
3112
int offset, struct geneve_pkt_info *info)
3113
{
3114
struct mbuf *m;
3115
struct ip *iphdr;
3116
struct ip6_hdr *ip6hdr;
3117
uint8_t itos;
3118
3119
m = *m0;
3120
3121
switch (info->ethertype) {
3122
case ETHERTYPE_IP:
3123
offset += sizeof(struct ip);
3124
if (m->m_pkthdr.len < offset)
3125
return (EINVAL);
3126
3127
if (m->m_len < offset &&
3128
(m = m_pullup(m, offset)) == NULL) {
3129
*m0 = NULL;
3130
return (ENOBUFS);
3131
}
3132
iphdr = mtodo(m, offset - sizeof(struct ip));
3133
3134
if (ip_ecn_egress(ECN_COMPLETE, &info->ecn, &iphdr->ip_tos) == 0) {
3135
*m0 = NULL;
3136
return (ENOBUFS);
3137
}
3138
3139
if ((sc->gnv_flags & GENEVE_FLAG_TTL_INHERIT) != 0 && info->ttl > 0)
3140
iphdr->ip_ttl = info->ttl;
3141
3142
info->isr = NETISR_IP;
3143
break;
3144
3145
case ETHERTYPE_IPV6:
3146
offset += sizeof(struct ip6_hdr);
3147
if (m->m_pkthdr.len < offset)
3148
return (EINVAL);
3149
3150
if (m->m_len < offset &&
3151
(m = m_pullup(m, offset)) == NULL) {
3152
*m0 = NULL;
3153
return (ENOBUFS);
3154
}
3155
ip6hdr = mtodo(m, offset - sizeof(struct ip6_hdr));
3156
3157
itos = (ntohl(ip6hdr->ip6_flow) >> IPV6_FLOWLABEL_LEN) & 0xff;
3158
if (ip_ecn_egress(ECN_COMPLETE, &info->ecn, &itos) == 0) {
3159
*m0 = NULL;
3160
return (ENOBUFS);
3161
}
3162
ip6hdr->ip6_flow |= htonl((uint32_t)itos << IPV6_FLOWLABEL_LEN);
3163
3164
if ((sc->gnv_flags & GENEVE_FLAG_TTL_INHERIT) && (info->ttl > 0))
3165
ip6hdr->ip6_hlim = info->ttl;
3166
3167
info->isr = NETISR_IPV6;
3168
break;
3169
3170
case ETHERTYPE_ARP:
3171
if (sc->gnv_proto == GENEVE_PROTO_INHERIT)
3172
return (EINVAL);
3173
3174
offset += sizeof(struct arphdr);
3175
if (m->m_pkthdr.len < offset)
3176
return (EINVAL);
3177
3178
if (m->m_len < offset &&
3179
(m = m_pullup(m, offset)) == NULL) {
3180
*m0 = NULL;
3181
return (ENOBUFS);
3182
}
3183
info->isr = NETISR_ARP;
3184
break;
3185
3186
default:
3187
if_inc_counter(sc->gnv_ifp, IFCOUNTER_NOPROTO, 1);
3188
return (EINVAL);
3189
}
3190
3191
*m0 = m;
3192
return (0);
3193
}
3194
3195
static void
3196
geneve_stats_alloc(struct geneve_softc *sc)
3197
{
3198
struct geneve_statistics *stats = &sc->gnv_stats;
3199
3200
stats->txcsum = counter_u64_alloc(M_WAITOK);
3201
stats->tso = counter_u64_alloc(M_WAITOK);
3202
stats->rxcsum = counter_u64_alloc(M_WAITOK);
3203
}
3204
3205
static void
3206
geneve_stats_free(struct geneve_softc *sc)
3207
{
3208
struct geneve_statistics *stats = &sc->gnv_stats;
3209
3210
counter_u64_free(stats->txcsum);
3211
counter_u64_free(stats->tso);
3212
counter_u64_free(stats->rxcsum);
3213
}
3214
3215
static void
3216
geneve_set_default_config(struct geneve_softc *sc)
3217
{
3218
3219
sc->gnv_flags |= GENEVE_FLAG_LEARN;
3220
3221
sc->gnv_vni = GENEVE_VNI_MAX;
3222
sc->gnv_ttl = V_ip_defttl;
3223
3224
sc->gnv_src_addr.sin.sin_port = htons(GENEVE_UDPPORT);
3225
sc->gnv_dst_addr.sin.sin_port = htons(GENEVE_UDPPORT);
3226
3227
/*
3228
* RFC 8926 Section 3.3, the entire 16-bit range MAY
3229
* be used to maximize entropy.
3230
*/
3231
sc->gnv_min_port = V_ipport_firstauto;
3232
sc->gnv_max_port = V_ipport_lastauto;
3233
3234
sc->gnv_proto = GENEVE_PROTO_ETHER;
3235
3236
sc->gnv_ftable_max = GENEVE_FTABLE_MAX;
3237
sc->gnv_ftable_timeout = GENEVE_FTABLE_TIMEOUT;
3238
}
3239
3240
static int
3241
geneve_set_reqcap(struct geneve_softc *sc, struct ifnet *ifp, int reqcap,
3242
int reqcap2)
3243
{
3244
int mask = reqcap ^ ifp->if_capenable;
3245
3246
/* Disable TSO if tx checksums are disabled. */
3247
if (mask & IFCAP_TXCSUM && !(reqcap & IFCAP_TXCSUM) &&
3248
reqcap & IFCAP_TSO4) {
3249
reqcap &= ~IFCAP_TSO4;
3250
if_printf(ifp, "tso4 disabled due to -txcsum.\n");
3251
}
3252
if (mask & IFCAP_TXCSUM_IPV6 && !(reqcap & IFCAP_TXCSUM_IPV6) &&
3253
reqcap & IFCAP_TSO6) {
3254
reqcap &= ~IFCAP_TSO6;
3255
if_printf(ifp, "tso6 disabled due to -txcsum6.\n");
3256
}
3257
3258
/* Do not enable TSO if tx checksums are disabled. */
3259
if (mask & IFCAP_TSO4 && reqcap & IFCAP_TSO4 &&
3260
!(reqcap & IFCAP_TXCSUM)) {
3261
if_printf(ifp, "enable txcsum first.\n");
3262
return (EAGAIN);
3263
}
3264
if (mask & IFCAP_TSO6 && reqcap & IFCAP_TSO6 &&
3265
!(reqcap & IFCAP_TXCSUM_IPV6)) {
3266
if_printf(ifp, "enable txcsum6 first.\n");
3267
return (EAGAIN);
3268
}
3269
3270
sc->gnv_reqcap = reqcap;
3271
sc->gnv_reqcap2 = reqcap2;
3272
return (0);
3273
}
3274
3275
/*
3276
* A GENEVE interface inherits the capabilities of the genevedev or the interface
3277
* hosting the genevelocal address.
3278
*/
3279
static void
3280
geneve_set_hwcaps(struct geneve_softc *sc)
3281
{
3282
struct epoch_tracker et;
3283
struct ifnet *p, *ifp;
3284
struct ifaddr *ifa;
3285
u_long hwa;
3286
int cap, ena;
3287
bool rel;
3288
3289
/* reset caps */
3290
ifp = sc->gnv_ifp;
3291
ifp->if_capabilities &= GENEVE_BASIC_IFCAPS;
3292
ifp->if_capenable &= GENEVE_BASIC_IFCAPS;
3293
ifp->if_hwassist = 0;
3294
3295
NET_EPOCH_ENTER(et);
3296
CURVNET_SET(ifp->if_vnet);
3297
3298
p = NULL;
3299
rel = false;
3300
if (sc->gnv_mc_ifname[0] != '\0') {
3301
rel = true;
3302
p = ifunit_ref(sc->gnv_mc_ifname);
3303
} else if (geneve_sockaddr_in_any(&sc->gnv_src_addr) == 0) {
3304
if (sc->gnv_src_addr.sa.sa_family == AF_INET) {
3305
struct sockaddr_in in4 = sc->gnv_src_addr.sin;
3306
3307
in4.sin_port = 0;
3308
ifa = ifa_ifwithaddr((struct sockaddr *)&in4);
3309
if (ifa != NULL)
3310
p = ifa->ifa_ifp;
3311
} else if (sc->gnv_src_addr.sa.sa_family == AF_INET6) {
3312
struct sockaddr_in6 in6 = sc->gnv_src_addr.sin6;
3313
3314
in6.sin6_port = 0;
3315
ifa = ifa_ifwithaddr((struct sockaddr *)&in6);
3316
if (ifa != NULL)
3317
p = ifa->ifa_ifp;
3318
}
3319
}
3320
if (p == NULL) {
3321
CURVNET_RESTORE();
3322
NET_EPOCH_EXIT(et);
3323
return;
3324
}
3325
3326
cap = ena = hwa = 0;
3327
3328
/* checksum offload */
3329
if ((p->if_capabilities2 & IFCAP2_BIT(IFCAP2_GENEVE_HWCSUM)) != 0)
3330
cap |= p->if_capabilities & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6);
3331
if ((p->if_capenable2 & IFCAP2_BIT(IFCAP2_GENEVE_HWCSUM)) != 0) {
3332
ena |= sc->gnv_reqcap & p->if_capenable & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6);
3333
if (ena & IFCAP_TXCSUM) {
3334
if (p->if_hwassist & CSUM_INNER_IP)
3335
hwa |= CSUM_IP;
3336
if (p->if_hwassist & CSUM_INNER_IP_UDP)
3337
hwa |= CSUM_IP_UDP;
3338
if (p->if_hwassist & CSUM_INNER_IP_TCP)
3339
hwa |= CSUM_IP_TCP;
3340
}
3341
if (ena & IFCAP_TXCSUM_IPV6) {
3342
if (p->if_hwassist & CSUM_INNER_IP6_UDP)
3343
hwa |= CSUM_IP6_UDP;
3344
if (p->if_hwassist & CSUM_INNER_IP6_TCP)
3345
hwa |= CSUM_IP6_TCP;
3346
}
3347
}
3348
3349
/* hardware TSO */
3350
if ((p->if_capabilities2 & IFCAP2_BIT(IFCAP2_GENEVE_HWTSO)) != 0) {
3351
cap |= p->if_capabilities & IFCAP_TSO;
3352
if (p->if_hw_tsomax > IP_MAXPACKET - ifp->if_hdrlen)
3353
ifp->if_hw_tsomax = IP_MAXPACKET - ifp->if_hdrlen;
3354
else
3355
ifp->if_hw_tsomax = p->if_hw_tsomax;
3356
ifp->if_hw_tsomaxsegcount = p->if_hw_tsomaxsegcount - 1;
3357
ifp->if_hw_tsomaxsegsize = p->if_hw_tsomaxsegsize;
3358
}
3359
if ((p->if_capenable2 & IFCAP2_BIT(IFCAP2_GENEVE_HWTSO)) != 0) {
3360
ena |= sc->gnv_reqcap & p->if_capenable & IFCAP_TSO;
3361
if (ena & IFCAP_TSO) {
3362
if (p->if_hwassist & CSUM_INNER_IP_TSO)
3363
hwa |= CSUM_IP_TSO;
3364
if (p->if_hwassist & CSUM_INNER_IP6_TSO)
3365
hwa |= CSUM_IP6_TSO;
3366
}
3367
}
3368
3369
ifp->if_capabilities |= cap;
3370
ifp->if_capenable |= ena;
3371
ifp->if_hwassist |= hwa;
3372
if (rel)
3373
if_rele(p);
3374
3375
CURVNET_RESTORE();
3376
NET_EPOCH_EXIT(et);
3377
}
3378
3379
static int
3380
geneve_clone_create_nl(struct if_clone *ifc, char *name, size_t len,
3381
struct ifc_data_nl *ifd)
3382
{
3383
struct nl_parsed_link *lattrs = ifd->lattrs;
3384
struct nl_pstate *npt = ifd->npt;
3385
struct nl_parsed_geneve attrs = {};
3386
int error;
3387
3388
if ((lattrs->ifla_idata == NULL) ||
3389
(!nl_has_attr(ifd->bm, IFLA_LINKINFO))) {
3390
nlmsg_report_err_msg(npt, "geneve protocol is required");
3391
return (ENOTSUP);
3392
}
3393
3394
error = nl_parse_nested(lattrs->ifla_idata, &geneve_create_parser, npt, &attrs);
3395
if (error != 0)
3396
return (error);
3397
if (geneve_check_proto(attrs.ifla_proto)) {
3398
nlmsg_report_err_msg(npt, "Unsupported ethertype: 0x%04X", attrs.ifla_proto);
3399
return (ENOTSUP);
3400
}
3401
3402
struct geneve_params gnvp = { .ifla_proto = attrs.ifla_proto };
3403
struct ifc_data ifd_new = {
3404
.flags = IFC_F_SYSSPACE,
3405
.unit = ifd->unit,
3406
.params = &gnvp
3407
};
3408
3409
return (geneve_clone_create(ifc, name, len, &ifd_new, &ifd->ifp));
3410
}
3411
3412
static int
3413
geneve_clone_modify_nl(struct ifnet *ifp, struct ifc_data_nl *ifd)
3414
{
3415
struct geneve_softc *sc = ifp->if_softc;
3416
struct nl_parsed_link *lattrs = ifd->lattrs;
3417
struct nl_pstate *npt = ifd->npt;
3418
struct nl_parsed_geneve params;
3419
struct nlattr *attrs = lattrs->ifla_idata;
3420
struct nlattr_bmask bm;
3421
int error = 0;
3422
3423
if ((attrs == NULL) ||
3424
(nl_has_attr(ifd->bm, IFLA_LINKINFO) == 0)) {
3425
error = nl_modify_ifp_generic(ifp, lattrs, ifd->bm, npt);
3426
return (error);
3427
}
3428
3429
error = priv_check(curthread, PRIV_NET_GENEVE);
3430
if (error)
3431
return (error);
3432
3433
/* make sure ignored attributes by nl_parse will not cause panics */
3434
memset(&params, 0, sizeof(params));
3435
3436
nl_get_attrs_bmask_raw(NLA_DATA(attrs), NLA_DATA_LEN(attrs), &bm);
3437
error = nl_parse_nested(attrs, &geneve_modify_parser, npt, &params);
3438
3439
if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_ID))
3440
error = geneve_set_vni_nl(sc, npt, params.ifla_vni);
3441
3442
if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_LOCAL))
3443
error = geneve_set_local_addr_nl(sc, npt, params.ifla_local);
3444
3445
if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_REMOTE))
3446
error = geneve_set_remote_addr_nl(sc, npt, params.ifla_remote);
3447
3448
if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_LOCAL_PORT))
3449
error = geneve_set_local_port_nl(sc, npt, params.ifla_local_port);
3450
3451
if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_PORT))
3452
error = geneve_set_remote_port_nl(sc, npt, params.ifla_remote_port);
3453
3454
if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_PORT_RANGE))
3455
error = geneve_set_port_range_nl(sc, npt, params.ifla_port_range);
3456
3457
if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_DF))
3458
error = geneve_set_df_nl(sc, npt, params.ifla_df);
3459
3460
if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_TTL))
3461
error = geneve_set_ttl_nl(sc, npt, params.ifla_ttl);
3462
3463
if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_TTL_INHERIT))
3464
error = geneve_set_ttl_inherit_nl(sc, npt, params.ifla_ttl_inherit);
3465
3466
if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_DSCP_INHERIT))
3467
error = geneve_set_dscp_inherit_nl(sc, npt, params.ifla_dscp_inherit);
3468
3469
if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_COLLECT_METADATA))
3470
error = geneve_set_collect_metadata_nl(sc, npt, params.ifla_external);
3471
3472
if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_FTABLE_LEARN))
3473
error = geneve_set_learn_nl(sc, npt, params.ifla_ftable_learn);
3474
3475
if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_FTABLE_FLUSH))
3476
error = geneve_flush_ftable_nl(sc, npt, params.ifla_ftable_flush);
3477
3478
if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_FTABLE_MAX))
3479
error = geneve_set_ftable_max_nl(sc, npt, params.ifla_ftable_max);
3480
3481
if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_FTABLE_TIMEOUT))
3482
error = geneve_set_ftable_timeout_nl(sc, npt, params.ifla_ftable_timeout);
3483
3484
if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_MC_IFNAME))
3485
error = geneve_set_mc_if_nl(sc, npt, params.ifla_mc_ifname);
3486
3487
if (error == 0)
3488
error = nl_modify_ifp_generic(ifp, lattrs, ifd->bm, npt);
3489
3490
return (error);
3491
}
3492
3493
static void
3494
geneve_clone_dump_nl(struct ifnet *ifp, struct nl_writer *nw)
3495
{
3496
struct geneve_softc *sc;
3497
struct rm_priotracker tracker;
3498
int off, off2;
3499
3500
nlattr_add_u32(nw, IFLA_LINK, ifp->if_index);
3501
nlattr_add_string(nw, IFLA_IFNAME, ifp->if_xname);
3502
3503
off = nlattr_add_nested(nw, IFLA_LINKINFO);
3504
if (off == 0)
3505
return;
3506
3507
nlattr_add_string(nw, IFLA_INFO_KIND, "geneve");
3508
off2 = nlattr_add_nested(nw, IFLA_INFO_DATA);
3509
if (off2 == 0) {
3510
nlattr_set_len(nw, off);
3511
return;
3512
}
3513
3514
sc = ifp->if_softc;
3515
GENEVE_RLOCK(sc, &tracker);
3516
3517
nlattr_add_u32(nw, IFLA_GENEVE_ID, sc->gnv_vni);
3518
nlattr_add_u16(nw, IFLA_GENEVE_PROTOCOL, sc->gnv_proto);
3519
geneve_get_local_addr_nl(sc, nw);
3520
geneve_get_remote_addr_nl(sc, nw);
3521
nlattr_add_u16(nw, IFLA_GENEVE_LOCAL_PORT, geneve_get_local_port(sc));
3522
nlattr_add_u16(nw, IFLA_GENEVE_PORT, geneve_get_remote_port(sc));
3523
3524
const struct ifla_geneve_port_range port_range = {
3525
.low = sc->gnv_min_port,
3526
.high = sc->gnv_max_port
3527
};
3528
nlattr_add(nw, IFLA_GENEVE_PORT_RANGE, sizeof(port_range), &port_range);
3529
3530
nlattr_add_u8(nw, IFLA_GENEVE_DF, (uint8_t)sc->gnv_df);
3531
nlattr_add_u8(nw, IFLA_GENEVE_TTL, sc->gnv_ttl);
3532
nlattr_add_bool(nw, IFLA_GENEVE_TTL_INHERIT,
3533
sc->gnv_flags & GENEVE_FLAG_TTL_INHERIT);
3534
nlattr_add_bool(nw, IFLA_GENEVE_DSCP_INHERIT,
3535
sc->gnv_flags & GENEVE_FLAG_DSCP_INHERIT);
3536
nlattr_add_bool(nw, IFLA_GENEVE_COLLECT_METADATA,
3537
sc->gnv_flags & GENEVE_FLAG_COLLECT_METADATA);
3538
3539
nlattr_add_bool(nw, IFLA_GENEVE_FTABLE_LEARN,
3540
sc->gnv_flags & GENEVE_FLAG_LEARN);
3541
nlattr_add_u32(nw, IFLA_GENEVE_FTABLE_MAX, sc->gnv_ftable_max);
3542
nlattr_add_u32(nw, IFLA_GENEVE_FTABLE_TIMEOUT, sc->gnv_ftable_timeout);
3543
nlattr_add_u32(nw, IFLA_GENEVE_FTABLE_COUNT, sc->gnv_ftable_cnt);
3544
nlattr_add_u32(nw, IFLA_GENEVE_FTABLE_NOSPACE_CNT, sc->gnv_stats.ftable_nospace);
3545
nlattr_add_u32(nw, IFLA_GENEVE_FTABLE_LOCK_UP_FAIL_CNT,
3546
sc->gnv_stats.ftable_lock_upgrade_failed);
3547
3548
nlattr_add_string(nw, IFLA_GENEVE_MC_IFNAME, sc->gnv_mc_ifname);
3549
nlattr_add_u32(nw, IFLA_GENEVE_MC_IFINDEX, sc->gnv_mc_ifindex);
3550
3551
nlattr_add_u64(nw, IFLA_GENEVE_TXCSUM_CNT,
3552
counter_u64_fetch(sc->gnv_stats.txcsum));
3553
nlattr_add_u64(nw, IFLA_GENEVE_TSO_CNT,
3554
counter_u64_fetch(sc->gnv_stats.tso));
3555
nlattr_add_u64(nw, IFLA_GENEVE_RXCSUM_CNT,
3556
counter_u64_fetch(sc->gnv_stats.rxcsum));
3557
3558
nlattr_set_len(nw, off2);
3559
nlattr_set_len(nw, off);
3560
3561
GENEVE_RUNLOCK(sc, &tracker);
3562
}
3563
3564
static int
3565
geneve_clone_create(struct if_clone *ifc, char *name, size_t len,
3566
struct ifc_data *ifd, struct ifnet **ifpp)
3567
{
3568
struct geneve_softc *sc;
3569
struct geneve_params gnvp;
3570
struct ifnet *ifp;
3571
int error;
3572
3573
sc = malloc(sizeof(struct geneve_softc), M_GENEVE, M_WAITOK | M_ZERO);
3574
sc->gnv_fibnum = curthread->td_proc->p_fibnum;
3575
geneve_set_default_config(sc);
3576
3577
if (ifd != NULL) {
3578
error = ifc_copyin(ifd, &gnvp, sizeof(gnvp));
3579
if (error != 0 ||
3580
(error = geneve_check_proto(gnvp.ifla_proto)) != 0) {
3581
free(sc, M_GENEVE);
3582
return (error);
3583
}
3584
3585
sc->gnv_proto = gnvp.ifla_proto;
3586
}
3587
3588
if (sc->gnv_proto == GENEVE_PROTO_ETHER) {
3589
ifp = if_alloc(IFT_ETHER);
3590
ifp->if_flags |= IFF_SIMPLEX | IFF_BROADCAST;
3591
geneve_ftable_init(sc);
3592
callout_init_rw(&sc->gnv_callout, &sc->gnv_lock, 0);
3593
} else if (sc->gnv_proto == GENEVE_PROTO_INHERIT) {
3594
ifp = if_alloc(IFT_TUNNEL);
3595
ifp->if_flags |= IFF_NOARP;
3596
} else {
3597
free(sc, M_GENEVE);
3598
return (EINVAL);
3599
}
3600
3601
geneve_stats_alloc(sc);
3602
sc->gnv_ifp = ifp;
3603
rm_init(&sc->gnv_lock, "geneverm");
3604
sc->gnv_port_hash_key = arc4random();
3605
3606
ifp->if_softc = sc;
3607
if_initname(ifp, geneve_name, ifd->unit);
3608
ifp->if_flags |= IFF_MULTICAST;
3609
ifp->if_init = geneve_init;
3610
ifp->if_ioctl = geneve_ioctl;
3611
ifp->if_transmit = geneve_transmit;
3612
ifp->if_qflush = geneve_qflush;
3613
ifp->if_capabilities = GENEVE_BASIC_IFCAPS;
3614
ifp->if_capenable = GENEVE_BASIC_IFCAPS;
3615
sc->gnv_reqcap = -1;
3616
geneve_set_hwcaps(sc);
3617
3618
if (sc->gnv_proto == GENEVE_PROTO_ETHER) {
3619
ifmedia_init(&sc->gnv_media, 0, geneve_media_change, geneve_media_status);
3620
ifmedia_add(&sc->gnv_media, IFM_ETHER | IFM_AUTO, 0, NULL);
3621
ifmedia_set(&sc->gnv_media, IFM_ETHER | IFM_AUTO);
3622
3623
ether_gen_addr(ifp, &sc->gnv_hwaddr);
3624
ether_ifattach(ifp, sc->gnv_hwaddr.octet);
3625
3626
ifp->if_baudrate = 0;
3627
} else {
3628
ifp->if_output = geneve_output;
3629
3630
if_attach(ifp);
3631
bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
3632
}
3633
3634
GENEVE_WLOCK(sc);
3635
geneve_setup_interface_hdrlen(sc);
3636
GENEVE_WUNLOCK(sc);
3637
*ifpp = ifp;
3638
3639
return (0);
3640
}
3641
3642
static int
3643
geneve_clone_destroy(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags)
3644
{
3645
struct geneve_softc *sc;
3646
3647
sc = if_getsoftc(ifp);
3648
geneve_teardown(sc);
3649
3650
if (sc->gnv_proto == GENEVE_PROTO_ETHER) {
3651
geneve_ftable_flush(sc, 1);
3652
3653
ether_ifdetach(ifp);
3654
if_free(ifp);
3655
ifmedia_removeall(&sc->gnv_media);
3656
3657
geneve_ftable_fini(sc);
3658
} else {
3659
bpfdetach(ifp);
3660
if_detach(ifp);
3661
if_free(ifp);
3662
}
3663
3664
rm_destroy(&sc->gnv_lock);
3665
geneve_stats_free(sc);
3666
free(sc, M_GENEVE);
3667
3668
return (0);
3669
}
3670
3671
/* BMV: Taken from if_bridge. */
3672
static uint32_t
3673
geneve_mac_hash(struct geneve_softc *sc, const uint8_t *addr)
3674
{
3675
uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->gnv_ftable_hash_key;
3676
3677
b += addr[5] << 8;
3678
b += addr[4];
3679
a += addr[3] << 24;
3680
a += addr[2] << 16;
3681
a += addr[1] << 8;
3682
a += addr[0];
3683
3684
/*
3685
* The following hash function is adapted from "Hash Functions" by Bob Jenkins
3686
* ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
3687
*/
3688
#define mix(a, b, c) \
3689
do { \
3690
a -= b; a -= c; a ^= (c >> 13); \
3691
b -= c; b -= a; b ^= (a << 8); \
3692
c -= a; c -= b; c ^= (b >> 13); \
3693
a -= b; a -= c; a ^= (c >> 12); \
3694
b -= c; b -= a; b ^= (a << 16); \
3695
c -= a; c -= b; c ^= (b >> 5); \
3696
a -= b; a -= c; a ^= (c >> 3); \
3697
b -= c; b -= a; b ^= (a << 10); \
3698
c -= a; c -= b; c ^= (b >> 15); \
3699
} while (0)
3700
3701
mix(a, b, c);
3702
3703
#undef mix
3704
3705
return (c);
3706
}
3707
3708
static int
3709
geneve_media_change(struct ifnet *ifp)
3710
{
3711
3712
/* Ignore. */
3713
return (0);
3714
}
3715
3716
static void
3717
geneve_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
3718
{
3719
3720
ifmr->ifm_status = IFM_ACTIVE | IFM_AVALID;
3721
ifmr->ifm_active = IFM_ETHER | IFM_FDX;
3722
}
3723
3724
static int
3725
geneve_sockaddr_cmp(const union sockaddr_union *unsa,
3726
const struct sockaddr *sa)
3727
{
3728
3729
return (memcmp(&unsa->sa, sa, unsa->sa.sa_len));
3730
}
3731
3732
static void
3733
geneve_sockaddr_copy(union sockaddr_union *dst,
3734
const struct sockaddr *sa)
3735
{
3736
3737
MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6);
3738
memset(dst, 0, sizeof(*dst));
3739
3740
if (sa->sa_family == AF_INET) {
3741
dst->sin = *SATOCONSTSIN(sa);
3742
dst->sin.sin_len = sizeof(struct sockaddr_in);
3743
} else if (sa->sa_family == AF_INET6) {
3744
dst->sin6 = *SATOCONSTSIN6(sa);
3745
dst->sin6.sin6_len = sizeof(struct sockaddr_in6);
3746
}
3747
}
3748
3749
static int
3750
geneve_sockaddr_in_equal(const union sockaddr_union *unsa,
3751
const struct sockaddr *sa)
3752
{
3753
int equal;
3754
3755
if (sa->sa_family == AF_INET) {
3756
const struct in_addr *in4 = &SATOCONSTSIN(sa)->sin_addr;
3757
equal = in4->s_addr == unsa->sin.sin_addr.s_addr;
3758
} else if (sa->sa_family == AF_INET6) {
3759
const struct in6_addr *in6 = &SATOCONSTSIN6(sa)->sin6_addr;
3760
equal = IN6_ARE_ADDR_EQUAL(in6, &unsa->sin6.sin6_addr);
3761
} else
3762
equal = 0;
3763
3764
return (equal);
3765
}
3766
3767
static void
3768
geneve_sockaddr_in_copy(union sockaddr_union *dst,
3769
const struct sockaddr *sa)
3770
{
3771
3772
MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6);
3773
3774
if (sa->sa_family == AF_INET) {
3775
const struct in_addr *in4 = &SATOCONSTSIN(sa)->sin_addr;
3776
dst->sin.sin_family = AF_INET;
3777
dst->sin.sin_len = sizeof(struct sockaddr_in);
3778
dst->sin.sin_addr = *in4;
3779
} else if (sa->sa_family == AF_INET6) {
3780
const struct in6_addr *in6 = &SATOCONSTSIN6(sa)->sin6_addr;
3781
dst->sin6.sin6_family = AF_INET6;
3782
dst->sin6.sin6_len = sizeof(struct sockaddr_in6);
3783
dst->sin6.sin6_addr = *in6;
3784
}
3785
}
3786
3787
static int
3788
geneve_sockaddr_supported(const union sockaddr_union *gnvaddr, int unspec)
3789
{
3790
const struct sockaddr *sa;
3791
int supported;
3792
3793
sa = &gnvaddr->sa;
3794
supported = 0;
3795
3796
if (sa->sa_family == AF_UNSPEC && unspec != 0) {
3797
supported = 1;
3798
} else if (sa->sa_family == AF_INET) {
3799
supported = 1;
3800
} else if (sa->sa_family == AF_INET6) {
3801
supported = 1;
3802
}
3803
3804
return (supported);
3805
}
3806
3807
static int
3808
geneve_sockaddr_in_any(const union sockaddr_union *gnvaddr)
3809
{
3810
const struct sockaddr *sa;
3811
int any;
3812
3813
sa = &gnvaddr->sa;
3814
3815
if (sa->sa_family == AF_INET) {
3816
const struct in_addr *in4 = &SATOCONSTSIN(sa)->sin_addr;
3817
any = in4->s_addr == INADDR_ANY;
3818
} else if (sa->sa_family == AF_INET6) {
3819
const struct in6_addr *in6 = &SATOCONSTSIN6(sa)->sin6_addr;
3820
any = IN6_IS_ADDR_UNSPECIFIED(in6);
3821
} else
3822
any = -1;
3823
3824
return (any);
3825
}
3826
3827
static int
3828
geneve_can_change_config(struct geneve_softc *sc)
3829
{
3830
3831
GENEVE_LOCK_ASSERT(sc);
3832
3833
if (sc->gnv_flags & GENEVE_FLAG_RUNNING)
3834
return (0);
3835
if (sc->gnv_flags & (GENEVE_FLAG_INIT | GENEVE_FLAG_TEARDOWN))
3836
return (0);
3837
if (sc->gnv_flags & GENEVE_FLAG_COLLECT_METADATA)
3838
return (0);
3839
3840
return (1);
3841
}
3842
3843
static int
3844
geneve_check_proto(uint16_t proto)
3845
{
3846
int error;
3847
3848
switch (proto) {
3849
case GENEVE_PROTO_ETHER:
3850
case GENEVE_PROTO_INHERIT:
3851
error = 0;
3852
break;
3853
3854
default:
3855
error = EAFNOSUPPORT;
3856
break;
3857
}
3858
3859
return (error);
3860
}
3861
3862
static int
3863
geneve_check_multicast_addr(const union sockaddr_union *sa)
3864
{
3865
int mc;
3866
3867
if (sa->sa.sa_family == AF_INET) {
3868
const struct in_addr *in4 = &SATOCONSTSIN(sa)->sin_addr;
3869
mc = IN_MULTICAST(ntohl(in4->s_addr));
3870
} else if (sa->sa.sa_family == AF_INET6) {
3871
const struct in6_addr *in6 = &SATOCONSTSIN6(sa)->sin6_addr;
3872
mc = IN6_IS_ADDR_MULTICAST(in6);
3873
} else
3874
mc = EINVAL;
3875
3876
return (mc);
3877
}
3878
3879
static int
3880
geneve_check_sockaddr(const union sockaddr_union *sa, const int len)
3881
{
3882
int error;
3883
3884
error = 0;
3885
switch (sa->sa.sa_family) {
3886
case AF_INET:
3887
case AF_INET6:
3888
if (len < sizeof(struct sockaddr))
3889
error = EINVAL;
3890
break;
3891
3892
default:
3893
error = EAFNOSUPPORT;
3894
}
3895
3896
return (error);
3897
}
3898
3899
static int
3900
geneve_prison_remove(void *obj, void *data __unused)
3901
{
3902
#ifdef VIMAGE
3903
struct prison *pr;
3904
3905
pr = obj;
3906
if (prison_owns_vnet(pr)) {
3907
CURVNET_SET(pr->pr_vnet);
3908
if (V_geneve_cloner != NULL) {
3909
ifc_detach_cloner(V_geneve_cloner);
3910
V_geneve_cloner = NULL;
3911
}
3912
CURVNET_RESTORE();
3913
}
3914
#endif
3915
return (0);
3916
}
3917
3918
static void
3919
vnet_geneve_load(void)
3920
{
3921
struct if_clone_addreq_v2 req = {
3922
.version = 2,
3923
.flags = IFC_F_AUTOUNIT,
3924
.match_f = NULL,
3925
.create_f = geneve_clone_create,
3926
.destroy_f = geneve_clone_destroy,
3927
.create_nl_f = geneve_clone_create_nl,
3928
.modify_nl_f = geneve_clone_modify_nl,
3929
.dump_nl_f = geneve_clone_dump_nl,
3930
};
3931
V_geneve_cloner = ifc_attach_cloner(geneve_name, (struct if_clone_addreq *)&req);
3932
}
3933
VNET_SYSINIT(vnet_geneve_load, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_geneve_load, NULL);
3934
3935
static void
3936
vnet_geneve_unload(void)
3937
{
3938
3939
if (V_geneve_cloner != NULL)
3940
ifc_detach_cloner(V_geneve_cloner);
3941
}
3942
VNET_SYSUNINIT(vnet_geneve_unload, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_geneve_unload, NULL);
3943
3944
static void
3945
geneve_module_init(void)
3946
{
3947
mtx_init(&geneve_list_mtx, "geneve list", NULL, MTX_DEF);
3948
osd_method_t methods[PR_MAXMETHOD] = {
3949
[PR_METHOD_REMOVE] = geneve_prison_remove,
3950
};
3951
3952
geneve_osd_jail_slot = osd_jail_register(NULL, methods);
3953
NL_VERIFY_PARSERS(all_parsers);
3954
}
3955
3956
static void
3957
geneve_module_deinit(void)
3958
{
3959
struct if_clone *clone;
3960
VNET_ITERATOR_DECL(vnet_iter);
3961
3962
VNET_LIST_RLOCK();
3963
VNET_FOREACH(vnet_iter) {
3964
clone = VNET_VNET(vnet_iter, geneve_cloner);
3965
if (clone != NULL) {
3966
ifc_detach_cloner(clone);
3967
VNET_VNET(vnet_iter, geneve_cloner) = NULL;
3968
}
3969
}
3970
VNET_LIST_RUNLOCK();
3971
NET_EPOCH_WAIT();
3972
MPASS(LIST_EMPTY(&geneve_socket_list));
3973
mtx_destroy(&geneve_list_mtx);
3974
if (geneve_osd_jail_slot != 0)
3975
osd_jail_deregister(geneve_osd_jail_slot);
3976
}
3977
3978
static int
3979
geneve_modevent(module_t mod, int type, void *unused)
3980
{
3981
int error;
3982
3983
error = 0;
3984
3985
switch (type) {
3986
case MOD_LOAD:
3987
geneve_module_init();
3988
break;
3989
3990
case MOD_UNLOAD:
3991
geneve_module_deinit();
3992
break;
3993
3994
default:
3995
error = ENOTSUP;
3996
break;
3997
}
3998
3999
return (error);
4000
}
4001
4002
static moduledata_t geneve_mod = {
4003
"if_geneve",
4004
geneve_modevent,
4005
0
4006
};
4007
4008
DECLARE_MODULE(if_geneve, geneve_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
4009
MODULE_VERSION(if_geneve, 1);
4010
4011