Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/net/mctp/route.c
49639 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* Management Component Transport Protocol (MCTP) - routing
4
* implementation.
5
*
6
* This is currently based on a simple routing table, with no dst cache. The
7
* number of routes should stay fairly small, so the lookup cost is small.
8
*
9
* Copyright (c) 2021 Code Construct
10
* Copyright (c) 2021 Google
11
*/
12
13
#include <linux/idr.h>
14
#include <linux/kconfig.h>
15
#include <linux/mctp.h>
16
#include <linux/netdevice.h>
17
#include <linux/rtnetlink.h>
18
#include <linux/skbuff.h>
19
20
#include <kunit/static_stub.h>
21
22
#include <uapi/linux/if_arp.h>
23
24
#include <net/mctp.h>
25
#include <net/mctpdevice.h>
26
#include <net/netlink.h>
27
#include <net/sock.h>
28
29
#include <trace/events/mctp.h>
30
31
static const unsigned int mctp_message_maxlen = 64 * 1024;
32
static const unsigned long mctp_key_lifetime = 6 * CONFIG_HZ;
33
34
static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev);
35
36
/* route output callbacks */
37
static int mctp_dst_discard(struct mctp_dst *dst, struct sk_buff *skb)
38
{
39
kfree_skb(skb);
40
return 0;
41
}
42
43
static struct mctp_sock *mctp_lookup_bind_details(struct net *net,
44
struct sk_buff *skb,
45
u8 type, u8 dest,
46
u8 src, bool allow_net_any)
47
{
48
struct mctp_skb_cb *cb = mctp_cb(skb);
49
struct sock *sk;
50
u8 hash;
51
52
WARN_ON_ONCE(!rcu_read_lock_held());
53
54
hash = mctp_bind_hash(type, dest, src);
55
56
sk_for_each_rcu(sk, &net->mctp.binds[hash]) {
57
struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
58
59
if (!allow_net_any && msk->bind_net == MCTP_NET_ANY)
60
continue;
61
62
if (msk->bind_net != MCTP_NET_ANY && msk->bind_net != cb->net)
63
continue;
64
65
if (msk->bind_type != type)
66
continue;
67
68
if (msk->bind_peer_set &&
69
!mctp_address_matches(msk->bind_peer_addr, src))
70
continue;
71
72
if (!mctp_address_matches(msk->bind_local_addr, dest))
73
continue;
74
75
return msk;
76
}
77
78
return NULL;
79
}
80
81
static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb)
82
{
83
struct mctp_sock *msk;
84
struct mctp_hdr *mh;
85
u8 type;
86
87
/* TODO: look up in skb->cb? */
88
mh = mctp_hdr(skb);
89
90
if (!skb_headlen(skb))
91
return NULL;
92
93
type = (*(u8 *)skb->data) & 0x7f;
94
95
/* Look for binds in order of widening scope. A given destination or
96
* source address also implies matching on a particular network.
97
*
98
* - Matching destination and source
99
* - Matching destination
100
* - Matching source
101
* - Matching network, any address
102
* - Any network or address
103
*/
104
105
msk = mctp_lookup_bind_details(net, skb, type, mh->dest, mh->src,
106
false);
107
if (msk)
108
return msk;
109
msk = mctp_lookup_bind_details(net, skb, type, MCTP_ADDR_ANY, mh->src,
110
false);
111
if (msk)
112
return msk;
113
msk = mctp_lookup_bind_details(net, skb, type, mh->dest, MCTP_ADDR_ANY,
114
false);
115
if (msk)
116
return msk;
117
msk = mctp_lookup_bind_details(net, skb, type, MCTP_ADDR_ANY,
118
MCTP_ADDR_ANY, false);
119
if (msk)
120
return msk;
121
msk = mctp_lookup_bind_details(net, skb, type, MCTP_ADDR_ANY,
122
MCTP_ADDR_ANY, true);
123
if (msk)
124
return msk;
125
126
return NULL;
127
}
128
129
/* A note on the key allocations.
130
*
131
* struct net->mctp.keys contains our set of currently-allocated keys for
132
* MCTP tag management. The lookup tuple for these is the peer EID,
133
* local EID and MCTP tag.
134
*
135
* In some cases, the peer EID may be MCTP_EID_ANY: for example, when a
136
* broadcast message is sent, we may receive responses from any peer EID.
137
* Because the broadcast dest address is equivalent to ANY, we create
138
* a key with (local = local-eid, peer = ANY). This allows a match on the
139
* incoming broadcast responses from any peer.
140
*
141
* We perform lookups when packets are received, and when tags are allocated
142
* in two scenarios:
143
*
144
* - when a packet is sent, with a locally-owned tag: we need to find an
145
* unused tag value for the (local, peer) EID pair.
146
*
147
* - when a tag is manually allocated: we need to find an unused tag value
148
* for the peer EID, but don't have a specific local EID at that stage.
149
*
150
* in the latter case, on successful allocation, we end up with a tag with
151
* (local = ANY, peer = peer-eid).
152
*
153
* So, the key set allows both a local EID of ANY, as well as a peer EID of
154
* ANY in the lookup tuple. Both may be ANY if we prealloc for a broadcast.
155
* The matching (in mctp_key_match()) during lookup allows the match value to
156
* be ANY in either the dest or source addresses.
157
*
158
* When allocating (+ inserting) a tag, we need to check for conflicts amongst
159
* the existing tag set. This requires macthing either exactly on the local
160
* and peer addresses, or either being ANY.
161
*/
162
163
static bool mctp_key_match(struct mctp_sk_key *key, unsigned int net,
164
mctp_eid_t local, mctp_eid_t peer, u8 tag)
165
{
166
if (key->net != net)
167
return false;
168
169
if (!mctp_address_matches(key->local_addr, local))
170
return false;
171
172
if (!mctp_address_matches(key->peer_addr, peer))
173
return false;
174
175
if (key->tag != tag)
176
return false;
177
178
return true;
179
}
180
181
/* returns a key (with key->lock held, and refcounted), or NULL if no such
182
* key exists.
183
*/
184
static struct mctp_sk_key *mctp_lookup_key(struct net *net, struct sk_buff *skb,
185
unsigned int netid, mctp_eid_t peer,
186
unsigned long *irqflags)
187
__acquires(&key->lock)
188
{
189
struct mctp_sk_key *key, *ret;
190
unsigned long flags;
191
struct mctp_hdr *mh;
192
u8 tag;
193
194
mh = mctp_hdr(skb);
195
tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
196
197
ret = NULL;
198
spin_lock_irqsave(&net->mctp.keys_lock, flags);
199
200
hlist_for_each_entry(key, &net->mctp.keys, hlist) {
201
if (!mctp_key_match(key, netid, mh->dest, peer, tag))
202
continue;
203
204
spin_lock(&key->lock);
205
if (key->valid) {
206
refcount_inc(&key->refs);
207
ret = key;
208
break;
209
}
210
spin_unlock(&key->lock);
211
}
212
213
if (ret) {
214
spin_unlock(&net->mctp.keys_lock);
215
*irqflags = flags;
216
} else {
217
spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
218
}
219
220
return ret;
221
}
222
223
static struct mctp_sk_key *mctp_key_alloc(struct mctp_sock *msk,
224
unsigned int net,
225
mctp_eid_t local, mctp_eid_t peer,
226
u8 tag, gfp_t gfp)
227
{
228
struct mctp_sk_key *key;
229
230
key = kzalloc(sizeof(*key), gfp);
231
if (!key)
232
return NULL;
233
234
key->net = net;
235
key->peer_addr = peer;
236
key->local_addr = local;
237
key->tag = tag;
238
key->sk = &msk->sk;
239
key->valid = true;
240
spin_lock_init(&key->lock);
241
refcount_set(&key->refs, 1);
242
sock_hold(key->sk);
243
244
return key;
245
}
246
247
void mctp_key_unref(struct mctp_sk_key *key)
248
{
249
unsigned long flags;
250
251
if (!refcount_dec_and_test(&key->refs))
252
return;
253
254
/* even though no refs exist here, the lock allows us to stay
255
* consistent with the locking requirement of mctp_dev_release_key
256
*/
257
spin_lock_irqsave(&key->lock, flags);
258
mctp_dev_release_key(key->dev, key);
259
spin_unlock_irqrestore(&key->lock, flags);
260
261
sock_put(key->sk);
262
kfree(key);
263
}
264
265
static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk)
266
{
267
struct net *net = sock_net(&msk->sk);
268
struct mctp_sk_key *tmp;
269
unsigned long flags;
270
int rc = 0;
271
272
spin_lock_irqsave(&net->mctp.keys_lock, flags);
273
274
if (sock_flag(&msk->sk, SOCK_DEAD)) {
275
rc = -EINVAL;
276
goto out_unlock;
277
}
278
279
hlist_for_each_entry(tmp, &net->mctp.keys, hlist) {
280
if (mctp_key_match(tmp, key->net, key->local_addr,
281
key->peer_addr, key->tag)) {
282
spin_lock(&tmp->lock);
283
if (tmp->valid)
284
rc = -EEXIST;
285
spin_unlock(&tmp->lock);
286
if (rc)
287
break;
288
}
289
}
290
291
if (!rc) {
292
refcount_inc(&key->refs);
293
key->expiry = jiffies + mctp_key_lifetime;
294
timer_reduce(&msk->key_expiry, key->expiry);
295
296
hlist_add_head(&key->hlist, &net->mctp.keys);
297
hlist_add_head(&key->sklist, &msk->keys);
298
}
299
300
out_unlock:
301
spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
302
303
return rc;
304
}
305
306
/* Helper for mctp_route_input().
307
* We're done with the key; unlock and unref the key.
308
* For the usual case of automatic expiry we remove the key from lists.
309
* In the case that manual allocation is set on a key we release the lock
310
* and local ref, reset reassembly, but don't remove from lists.
311
*/
312
static void __mctp_key_done_in(struct mctp_sk_key *key, struct net *net,
313
unsigned long flags, unsigned long reason)
314
__releases(&key->lock)
315
{
316
struct sk_buff *skb;
317
318
trace_mctp_key_release(key, reason);
319
skb = key->reasm_head;
320
key->reasm_head = NULL;
321
322
if (!key->manual_alloc) {
323
key->reasm_dead = true;
324
key->valid = false;
325
mctp_dev_release_key(key->dev, key);
326
}
327
spin_unlock_irqrestore(&key->lock, flags);
328
329
if (!key->manual_alloc) {
330
spin_lock_irqsave(&net->mctp.keys_lock, flags);
331
if (!hlist_unhashed(&key->hlist)) {
332
hlist_del_init(&key->hlist);
333
hlist_del_init(&key->sklist);
334
mctp_key_unref(key);
335
}
336
spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
337
}
338
339
/* and one for the local reference */
340
mctp_key_unref(key);
341
342
kfree_skb(skb);
343
}
344
345
#ifdef CONFIG_MCTP_FLOWS
346
static void mctp_skb_set_flow(struct sk_buff *skb, struct mctp_sk_key *key)
347
{
348
struct mctp_flow *flow;
349
350
flow = skb_ext_add(skb, SKB_EXT_MCTP);
351
if (!flow)
352
return;
353
354
refcount_inc(&key->refs);
355
flow->key = key;
356
}
357
358
static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev)
359
{
360
struct mctp_sk_key *key;
361
struct mctp_flow *flow;
362
363
flow = skb_ext_find(skb, SKB_EXT_MCTP);
364
if (!flow)
365
return;
366
367
key = flow->key;
368
369
if (key->dev) {
370
WARN_ON(key->dev != dev);
371
return;
372
}
373
374
mctp_dev_set_key(dev, key);
375
}
376
#else
377
static void mctp_skb_set_flow(struct sk_buff *skb, struct mctp_sk_key *key) {}
378
static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev) {}
379
#endif
380
381
/* takes ownership of skb, both in success and failure cases */
382
static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb)
383
{
384
struct mctp_hdr *hdr = mctp_hdr(skb);
385
u8 exp_seq, this_seq;
386
387
this_seq = (hdr->flags_seq_tag >> MCTP_HDR_SEQ_SHIFT)
388
& MCTP_HDR_SEQ_MASK;
389
390
if (!key->reasm_head) {
391
/* Since we're manipulating the shared frag_list, ensure it
392
* isn't shared with any other SKBs. In the cloned case,
393
* this will free the skb; callers can no longer access it
394
* safely.
395
*/
396
key->reasm_head = skb_unshare(skb, GFP_ATOMIC);
397
if (!key->reasm_head)
398
return -ENOMEM;
399
400
key->reasm_tailp = &(skb_shinfo(key->reasm_head)->frag_list);
401
key->last_seq = this_seq;
402
return 0;
403
}
404
405
exp_seq = (key->last_seq + 1) & MCTP_HDR_SEQ_MASK;
406
407
if (this_seq != exp_seq)
408
goto err_free;
409
410
if (key->reasm_head->len + skb->len > mctp_message_maxlen)
411
goto err_free;
412
413
skb->next = NULL;
414
skb->sk = NULL;
415
*key->reasm_tailp = skb;
416
key->reasm_tailp = &skb->next;
417
418
key->last_seq = this_seq;
419
420
key->reasm_head->data_len += skb->len;
421
key->reasm_head->len += skb->len;
422
key->reasm_head->truesize += skb->truesize;
423
424
return 0;
425
426
err_free:
427
kfree_skb(skb);
428
return -EINVAL;
429
}
430
431
static int mctp_dst_input(struct mctp_dst *dst, struct sk_buff *skb)
432
{
433
struct mctp_sk_key *key, *any_key = NULL;
434
struct net *net = dev_net(skb->dev);
435
struct mctp_sock *msk;
436
struct mctp_hdr *mh;
437
unsigned int netid;
438
unsigned long f;
439
u8 tag, flags;
440
int rc;
441
442
msk = NULL;
443
rc = -EINVAL;
444
445
/* We may be receiving a locally-routed packet; drop source sk
446
* accounting.
447
*
448
* From here, we will either queue the skb - either to a frag_queue, or
449
* to a receiving socket. When that succeeds, we clear the skb pointer;
450
* a non-NULL skb on exit will be otherwise unowned, and hence
451
* kfree_skb()-ed.
452
*/
453
skb_orphan(skb);
454
455
if (skb->pkt_type == PACKET_OUTGOING)
456
skb->pkt_type = PACKET_LOOPBACK;
457
458
/* ensure we have enough data for a header and a type */
459
if (skb->len < sizeof(struct mctp_hdr) + 1)
460
goto out;
461
462
/* grab header, advance data ptr */
463
mh = mctp_hdr(skb);
464
netid = mctp_cb(skb)->net;
465
skb_pull(skb, sizeof(struct mctp_hdr));
466
467
if (mh->ver != 1)
468
goto out;
469
470
flags = mh->flags_seq_tag & (MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM);
471
tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
472
473
rcu_read_lock();
474
475
/* lookup socket / reasm context, exactly matching (src,dest,tag).
476
* we hold a ref on the key, and key->lock held.
477
*/
478
key = mctp_lookup_key(net, skb, netid, mh->src, &f);
479
480
if (flags & MCTP_HDR_FLAG_SOM) {
481
if (key) {
482
msk = container_of(key->sk, struct mctp_sock, sk);
483
} else {
484
/* first response to a broadcast? do a more general
485
* key lookup to find the socket, but don't use this
486
* key for reassembly - we'll create a more specific
487
* one for future packets if required (ie, !EOM).
488
*
489
* this lookup requires key->peer to be MCTP_ADDR_ANY,
490
* it doesn't match just any key->peer.
491
*/
492
any_key = mctp_lookup_key(net, skb, netid,
493
MCTP_ADDR_ANY, &f);
494
if (any_key) {
495
msk = container_of(any_key->sk,
496
struct mctp_sock, sk);
497
spin_unlock_irqrestore(&any_key->lock, f);
498
}
499
}
500
501
if (!key && !msk && (tag & MCTP_HDR_FLAG_TO))
502
msk = mctp_lookup_bind(net, skb);
503
504
if (!msk) {
505
rc = -ENOENT;
506
goto out_unlock;
507
}
508
509
/* single-packet message? deliver to socket, clean up any
510
* pending key.
511
*/
512
if (flags & MCTP_HDR_FLAG_EOM) {
513
rc = sock_queue_rcv_skb(&msk->sk, skb);
514
if (!rc)
515
skb = NULL;
516
if (key) {
517
/* we've hit a pending reassembly; not much we
518
* can do but drop it
519
*/
520
__mctp_key_done_in(key, net, f,
521
MCTP_TRACE_KEY_REPLIED);
522
key = NULL;
523
}
524
goto out_unlock;
525
}
526
527
/* broadcast response or a bind() - create a key for further
528
* packets for this message
529
*/
530
if (!key) {
531
key = mctp_key_alloc(msk, netid, mh->dest, mh->src,
532
tag, GFP_ATOMIC);
533
if (!key) {
534
rc = -ENOMEM;
535
goto out_unlock;
536
}
537
538
/* we can queue without the key lock here, as the
539
* key isn't observable yet
540
*/
541
mctp_frag_queue(key, skb);
542
skb = NULL;
543
544
/* if the key_add fails, we've raced with another
545
* SOM packet with the same src, dest and tag. There's
546
* no way to distinguish future packets, so all we
547
* can do is drop.
548
*/
549
rc = mctp_key_add(key, msk);
550
if (!rc)
551
trace_mctp_key_acquire(key);
552
553
/* we don't need to release key->lock on exit, so
554
* clean up here and suppress the unlock via
555
* setting to NULL
556
*/
557
mctp_key_unref(key);
558
key = NULL;
559
560
} else {
561
if (key->reasm_head || key->reasm_dead) {
562
/* duplicate start? drop everything */
563
__mctp_key_done_in(key, net, f,
564
MCTP_TRACE_KEY_INVALIDATED);
565
rc = -EEXIST;
566
key = NULL;
567
} else {
568
rc = mctp_frag_queue(key, skb);
569
skb = NULL;
570
}
571
}
572
573
} else if (key) {
574
/* this packet continues a previous message; reassemble
575
* using the message-specific key
576
*/
577
578
/* we need to be continuing an existing reassembly... */
579
if (!key->reasm_head) {
580
rc = -EINVAL;
581
} else {
582
rc = mctp_frag_queue(key, skb);
583
skb = NULL;
584
}
585
586
if (rc)
587
goto out_unlock;
588
589
/* end of message? deliver to socket, and we're done with
590
* the reassembly/response key
591
*/
592
if (flags & MCTP_HDR_FLAG_EOM) {
593
rc = sock_queue_rcv_skb(key->sk, key->reasm_head);
594
if (!rc)
595
key->reasm_head = NULL;
596
__mctp_key_done_in(key, net, f, MCTP_TRACE_KEY_REPLIED);
597
key = NULL;
598
}
599
600
} else {
601
/* not a start, no matching key */
602
rc = -ENOENT;
603
}
604
605
out_unlock:
606
rcu_read_unlock();
607
if (key) {
608
spin_unlock_irqrestore(&key->lock, f);
609
mctp_key_unref(key);
610
}
611
if (any_key)
612
mctp_key_unref(any_key);
613
out:
614
kfree_skb(skb);
615
return rc;
616
}
617
618
static int mctp_dst_output(struct mctp_dst *dst, struct sk_buff *skb)
619
{
620
char daddr_buf[MAX_ADDR_LEN];
621
char *daddr = NULL;
622
int rc;
623
624
skb->protocol = htons(ETH_P_MCTP);
625
skb->pkt_type = PACKET_OUTGOING;
626
skb->dev = dst->dev->dev;
627
628
if (skb->len > dst->mtu) {
629
kfree_skb(skb);
630
return -EMSGSIZE;
631
}
632
633
/* direct route; use the hwaddr we stashed in sendmsg */
634
if (dst->halen) {
635
if (dst->halen != skb->dev->addr_len) {
636
/* sanity check, sendmsg should have already caught this */
637
kfree_skb(skb);
638
return -EMSGSIZE;
639
}
640
daddr = dst->haddr;
641
} else {
642
/* If lookup fails let the device handle daddr==NULL */
643
if (mctp_neigh_lookup(dst->dev, dst->nexthop, daddr_buf) == 0)
644
daddr = daddr_buf;
645
}
646
647
rc = dev_hard_header(skb, skb->dev, ntohs(skb->protocol),
648
daddr, skb->dev->dev_addr, skb->len);
649
if (rc < 0) {
650
kfree_skb(skb);
651
return -EHOSTUNREACH;
652
}
653
654
mctp_flow_prepare_output(skb, dst->dev);
655
656
rc = dev_queue_xmit(skb);
657
if (rc)
658
rc = net_xmit_errno(rc);
659
660
return rc;
661
}
662
663
/* route alloc/release */
664
static void mctp_route_release(struct mctp_route *rt)
665
{
666
if (refcount_dec_and_test(&rt->refs)) {
667
if (rt->dst_type == MCTP_ROUTE_DIRECT)
668
mctp_dev_put(rt->dev);
669
kfree_rcu(rt, rcu);
670
}
671
}
672
673
/* returns a route with the refcount at 1 */
674
static struct mctp_route *mctp_route_alloc(void)
675
{
676
struct mctp_route *rt;
677
678
rt = kzalloc(sizeof(*rt), GFP_KERNEL);
679
if (!rt)
680
return NULL;
681
682
INIT_LIST_HEAD(&rt->list);
683
refcount_set(&rt->refs, 1);
684
rt->output = mctp_dst_discard;
685
686
return rt;
687
}
688
689
unsigned int mctp_default_net(struct net *net)
690
{
691
return READ_ONCE(net->mctp.default_net);
692
}
693
694
int mctp_default_net_set(struct net *net, unsigned int index)
695
{
696
if (index == 0)
697
return -EINVAL;
698
WRITE_ONCE(net->mctp.default_net, index);
699
return 0;
700
}
701
702
/* tag management */
703
static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key,
704
struct mctp_sock *msk)
705
{
706
struct netns_mctp *mns = &net->mctp;
707
708
lockdep_assert_held(&mns->keys_lock);
709
710
key->expiry = jiffies + mctp_key_lifetime;
711
timer_reduce(&msk->key_expiry, key->expiry);
712
713
/* we hold the net->key_lock here, allowing updates to both
714
* then net and sk
715
*/
716
hlist_add_head_rcu(&key->hlist, &mns->keys);
717
hlist_add_head_rcu(&key->sklist, &msk->keys);
718
refcount_inc(&key->refs);
719
}
720
721
/* Allocate a locally-owned tag value for (local, peer), and reserve
722
* it for the socket msk
723
*/
724
struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
725
unsigned int netid,
726
mctp_eid_t local, mctp_eid_t peer,
727
bool manual, u8 *tagp)
728
{
729
struct net *net = sock_net(&msk->sk);
730
struct netns_mctp *mns = &net->mctp;
731
struct mctp_sk_key *key, *tmp;
732
unsigned long flags;
733
u8 tagbits;
734
735
/* for NULL destination EIDs, we may get a response from any peer */
736
if (peer == MCTP_ADDR_NULL)
737
peer = MCTP_ADDR_ANY;
738
739
/* be optimistic, alloc now */
740
key = mctp_key_alloc(msk, netid, local, peer, 0, GFP_KERNEL);
741
if (!key)
742
return ERR_PTR(-ENOMEM);
743
744
/* 8 possible tag values */
745
tagbits = 0xff;
746
747
spin_lock_irqsave(&mns->keys_lock, flags);
748
749
/* Walk through the existing keys, looking for potential conflicting
750
* tags. If we find a conflict, clear that bit from tagbits
751
*/
752
hlist_for_each_entry(tmp, &mns->keys, hlist) {
753
/* We can check the lookup fields (*_addr, tag) without the
754
* lock held, they don't change over the lifetime of the key.
755
*/
756
757
/* tags are net-specific */
758
if (tmp->net != netid)
759
continue;
760
761
/* if we don't own the tag, it can't conflict */
762
if (tmp->tag & MCTP_HDR_FLAG_TO)
763
continue;
764
765
/* Since we're avoiding conflicting entries, match peer and
766
* local addresses, including with a wildcard on ANY. See
767
* 'A note on key allocations' for background.
768
*/
769
if (peer != MCTP_ADDR_ANY &&
770
!mctp_address_matches(tmp->peer_addr, peer))
771
continue;
772
773
if (local != MCTP_ADDR_ANY &&
774
!mctp_address_matches(tmp->local_addr, local))
775
continue;
776
777
spin_lock(&tmp->lock);
778
/* key must still be valid. If we find a match, clear the
779
* potential tag value
780
*/
781
if (tmp->valid)
782
tagbits &= ~(1 << tmp->tag);
783
spin_unlock(&tmp->lock);
784
785
if (!tagbits)
786
break;
787
}
788
789
if (tagbits) {
790
key->tag = __ffs(tagbits);
791
mctp_reserve_tag(net, key, msk);
792
trace_mctp_key_acquire(key);
793
794
key->manual_alloc = manual;
795
*tagp = key->tag;
796
}
797
798
spin_unlock_irqrestore(&mns->keys_lock, flags);
799
800
if (!tagbits) {
801
mctp_key_unref(key);
802
return ERR_PTR(-EBUSY);
803
}
804
805
return key;
806
}
807
808
static struct mctp_sk_key *mctp_lookup_prealloc_tag(struct mctp_sock *msk,
809
unsigned int netid,
810
mctp_eid_t daddr,
811
u8 req_tag, u8 *tagp)
812
{
813
struct net *net = sock_net(&msk->sk);
814
struct netns_mctp *mns = &net->mctp;
815
struct mctp_sk_key *key, *tmp;
816
unsigned long flags;
817
818
req_tag &= ~(MCTP_TAG_PREALLOC | MCTP_TAG_OWNER);
819
key = NULL;
820
821
spin_lock_irqsave(&mns->keys_lock, flags);
822
823
hlist_for_each_entry(tmp, &mns->keys, hlist) {
824
if (tmp->net != netid)
825
continue;
826
827
if (tmp->tag != req_tag)
828
continue;
829
830
if (!mctp_address_matches(tmp->peer_addr, daddr))
831
continue;
832
833
if (!tmp->manual_alloc)
834
continue;
835
836
spin_lock(&tmp->lock);
837
if (tmp->valid) {
838
key = tmp;
839
refcount_inc(&key->refs);
840
spin_unlock(&tmp->lock);
841
break;
842
}
843
spin_unlock(&tmp->lock);
844
}
845
spin_unlock_irqrestore(&mns->keys_lock, flags);
846
847
if (!key)
848
return ERR_PTR(-ENOENT);
849
850
if (tagp)
851
*tagp = key->tag;
852
853
return key;
854
}
855
856
/* routing lookups */
857
static unsigned int mctp_route_netid(struct mctp_route *rt)
858
{
859
return rt->dst_type == MCTP_ROUTE_DIRECT ?
860
READ_ONCE(rt->dev->net) : rt->gateway.net;
861
}
862
863
static bool mctp_rt_match_eid(struct mctp_route *rt,
864
unsigned int net, mctp_eid_t eid)
865
{
866
return mctp_route_netid(rt) == net &&
867
rt->min <= eid && rt->max >= eid;
868
}
869
870
/* compares match, used for duplicate prevention */
871
static bool mctp_rt_compare_exact(struct mctp_route *rt1,
872
struct mctp_route *rt2)
873
{
874
ASSERT_RTNL();
875
return mctp_route_netid(rt1) == mctp_route_netid(rt2) &&
876
rt1->min == rt2->min &&
877
rt1->max == rt2->max;
878
}
879
880
/* must only be called on a direct route, as the final output hop */
881
static void mctp_dst_from_route(struct mctp_dst *dst, mctp_eid_t eid,
882
unsigned int mtu, struct mctp_route *route)
883
{
884
mctp_dev_hold(route->dev);
885
dst->nexthop = eid;
886
dst->dev = route->dev;
887
dst->mtu = READ_ONCE(dst->dev->dev->mtu);
888
if (mtu)
889
dst->mtu = min(dst->mtu, mtu);
890
dst->halen = 0;
891
dst->output = route->output;
892
}
893
894
int mctp_dst_from_extaddr(struct mctp_dst *dst, struct net *net, int ifindex,
895
unsigned char halen, const unsigned char *haddr)
896
{
897
struct net_device *netdev;
898
struct mctp_dev *dev;
899
int rc = -ENOENT;
900
901
if (halen > sizeof(dst->haddr))
902
return -EINVAL;
903
904
rcu_read_lock();
905
906
netdev = dev_get_by_index_rcu(net, ifindex);
907
if (!netdev)
908
goto out_unlock;
909
910
if (netdev->addr_len != halen) {
911
rc = -EINVAL;
912
goto out_unlock;
913
}
914
915
dev = __mctp_dev_get(netdev);
916
if (!dev)
917
goto out_unlock;
918
919
dst->dev = dev;
920
dst->mtu = READ_ONCE(netdev->mtu);
921
dst->halen = halen;
922
dst->output = mctp_dst_output;
923
dst->nexthop = 0;
924
memcpy(dst->haddr, haddr, halen);
925
926
rc = 0;
927
928
out_unlock:
929
rcu_read_unlock();
930
return rc;
931
}
932
933
void mctp_dst_release(struct mctp_dst *dst)
934
{
935
mctp_dev_put(dst->dev);
936
}
937
938
static struct mctp_route *mctp_route_lookup_single(struct net *net,
939
unsigned int dnet,
940
mctp_eid_t daddr)
941
{
942
struct mctp_route *rt;
943
944
list_for_each_entry_rcu(rt, &net->mctp.routes, list) {
945
if (mctp_rt_match_eid(rt, dnet, daddr))
946
return rt;
947
}
948
949
return NULL;
950
}
951
952
/* populates *dst on successful lookup, if set */
953
int mctp_route_lookup(struct net *net, unsigned int dnet,
954
mctp_eid_t daddr, struct mctp_dst *dst)
955
{
956
const unsigned int max_depth = 32;
957
unsigned int depth, mtu = 0;
958
int rc = -EHOSTUNREACH;
959
960
rcu_read_lock();
961
962
for (depth = 0; depth < max_depth; depth++) {
963
struct mctp_route *rt;
964
965
rt = mctp_route_lookup_single(net, dnet, daddr);
966
if (!rt)
967
break;
968
969
/* clamp mtu to the smallest in the path, allowing 0
970
* to specify no restrictions
971
*/
972
if (mtu && rt->mtu)
973
mtu = min(mtu, rt->mtu);
974
else
975
mtu = mtu ?: rt->mtu;
976
977
if (rt->dst_type == MCTP_ROUTE_DIRECT) {
978
if (dst)
979
mctp_dst_from_route(dst, daddr, mtu, rt);
980
rc = 0;
981
break;
982
983
} else if (rt->dst_type == MCTP_ROUTE_GATEWAY) {
984
daddr = rt->gateway.eid;
985
}
986
}
987
988
rcu_read_unlock();
989
990
return rc;
991
}
992
993
static int mctp_route_lookup_null(struct net *net, struct net_device *dev,
994
struct mctp_dst *dst)
995
{
996
int rc = -EHOSTUNREACH;
997
struct mctp_route *rt;
998
999
rcu_read_lock();
1000
1001
list_for_each_entry_rcu(rt, &net->mctp.routes, list) {
1002
if (rt->dst_type != MCTP_ROUTE_DIRECT || rt->type != RTN_LOCAL)
1003
continue;
1004
1005
if (rt->dev->dev != dev)
1006
continue;
1007
1008
mctp_dst_from_route(dst, 0, 0, rt);
1009
rc = 0;
1010
break;
1011
}
1012
1013
rcu_read_unlock();
1014
1015
return rc;
1016
}
1017
1018
static int mctp_do_fragment_route(struct mctp_dst *dst, struct sk_buff *skb,
1019
unsigned int mtu, u8 tag)
1020
{
1021
const unsigned int hlen = sizeof(struct mctp_hdr);
1022
struct mctp_hdr *hdr, *hdr2;
1023
unsigned int pos, size, headroom;
1024
struct sk_buff *skb2;
1025
int rc;
1026
u8 seq;
1027
1028
hdr = mctp_hdr(skb);
1029
seq = 0;
1030
rc = 0;
1031
1032
if (mtu < hlen + 1) {
1033
kfree_skb(skb);
1034
return -EMSGSIZE;
1035
}
1036
1037
/* keep same headroom as the original skb */
1038
headroom = skb_headroom(skb);
1039
1040
/* we've got the header */
1041
skb_pull(skb, hlen);
1042
1043
for (pos = 0; pos < skb->len;) {
1044
/* size of message payload */
1045
size = min(mtu - hlen, skb->len - pos);
1046
1047
skb2 = alloc_skb(headroom + hlen + size, GFP_KERNEL);
1048
if (!skb2) {
1049
rc = -ENOMEM;
1050
break;
1051
}
1052
1053
/* generic skb copy */
1054
skb2->protocol = skb->protocol;
1055
skb2->priority = skb->priority;
1056
skb2->dev = skb->dev;
1057
memcpy(skb2->cb, skb->cb, sizeof(skb2->cb));
1058
1059
if (skb->sk)
1060
skb_set_owner_w(skb2, skb->sk);
1061
1062
/* establish packet */
1063
skb_reserve(skb2, headroom);
1064
skb_reset_network_header(skb2);
1065
skb_put(skb2, hlen + size);
1066
skb2->transport_header = skb2->network_header + hlen;
1067
1068
/* copy header fields, calculate SOM/EOM flags & seq */
1069
hdr2 = mctp_hdr(skb2);
1070
hdr2->ver = hdr->ver;
1071
hdr2->dest = hdr->dest;
1072
hdr2->src = hdr->src;
1073
hdr2->flags_seq_tag = tag &
1074
(MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
1075
1076
if (pos == 0)
1077
hdr2->flags_seq_tag |= MCTP_HDR_FLAG_SOM;
1078
1079
if (pos + size == skb->len)
1080
hdr2->flags_seq_tag |= MCTP_HDR_FLAG_EOM;
1081
1082
hdr2->flags_seq_tag |= seq << MCTP_HDR_SEQ_SHIFT;
1083
1084
/* copy message payload */
1085
skb_copy_bits(skb, pos, skb_transport_header(skb2), size);
1086
1087
/* we need to copy the extensions, for MCTP flow data */
1088
skb_ext_copy(skb2, skb);
1089
1090
/* do route */
1091
rc = dst->output(dst, skb2);
1092
if (rc)
1093
break;
1094
1095
seq = (seq + 1) & MCTP_HDR_SEQ_MASK;
1096
pos += size;
1097
}
1098
1099
consume_skb(skb);
1100
return rc;
1101
}
1102
1103
int mctp_local_output(struct sock *sk, struct mctp_dst *dst,
1104
struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag)
1105
{
1106
struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
1107
struct mctp_sk_key *key;
1108
struct mctp_hdr *hdr;
1109
unsigned long flags;
1110
unsigned int netid;
1111
unsigned int mtu;
1112
mctp_eid_t saddr;
1113
int rc;
1114
u8 tag;
1115
1116
KUNIT_STATIC_STUB_REDIRECT(mctp_local_output, sk, dst, skb, daddr,
1117
req_tag);
1118
1119
rc = -ENODEV;
1120
1121
spin_lock_irqsave(&dst->dev->addrs_lock, flags);
1122
if (dst->dev->num_addrs == 0) {
1123
rc = -EHOSTUNREACH;
1124
} else {
1125
/* use the outbound interface's first address as our source */
1126
saddr = dst->dev->addrs[0];
1127
rc = 0;
1128
}
1129
spin_unlock_irqrestore(&dst->dev->addrs_lock, flags);
1130
netid = READ_ONCE(dst->dev->net);
1131
1132
if (rc)
1133
goto out_release;
1134
1135
if (req_tag & MCTP_TAG_OWNER) {
1136
if (req_tag & MCTP_TAG_PREALLOC)
1137
key = mctp_lookup_prealloc_tag(msk, netid, daddr,
1138
req_tag, &tag);
1139
else
1140
key = mctp_alloc_local_tag(msk, netid, saddr, daddr,
1141
false, &tag);
1142
1143
if (IS_ERR(key)) {
1144
rc = PTR_ERR(key);
1145
goto out_release;
1146
}
1147
mctp_skb_set_flow(skb, key);
1148
/* done with the key in this scope */
1149
mctp_key_unref(key);
1150
tag |= MCTP_HDR_FLAG_TO;
1151
} else {
1152
key = NULL;
1153
tag = req_tag & MCTP_TAG_MASK;
1154
}
1155
1156
skb->pkt_type = PACKET_OUTGOING;
1157
skb->protocol = htons(ETH_P_MCTP);
1158
skb->priority = 0;
1159
skb_reset_transport_header(skb);
1160
skb_push(skb, sizeof(struct mctp_hdr));
1161
skb_reset_network_header(skb);
1162
skb->dev = dst->dev->dev;
1163
1164
/* set up common header fields */
1165
hdr = mctp_hdr(skb);
1166
hdr->ver = 1;
1167
hdr->dest = daddr;
1168
hdr->src = saddr;
1169
1170
mtu = dst->mtu;
1171
1172
if (skb->len + sizeof(struct mctp_hdr) <= mtu) {
1173
hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM |
1174
MCTP_HDR_FLAG_EOM | tag;
1175
rc = dst->output(dst, skb);
1176
} else {
1177
rc = mctp_do_fragment_route(dst, skb, mtu, tag);
1178
}
1179
1180
/* route output functions consume the skb, even on error */
1181
skb = NULL;
1182
1183
out_release:
1184
kfree_skb(skb);
1185
return rc;
1186
}
1187
1188
/* route management */
1189
1190
/* mctp_route_add(): Add the provided route, previously allocated via
1191
* mctp_route_alloc(). On success, takes ownership of @rt, which includes a
1192
* hold on rt->dev for usage in the route table. On failure a caller will want
1193
* to mctp_route_release().
1194
*
1195
* We expect that the caller has set rt->type, rt->dst_type, rt->min, rt->max,
1196
* rt->mtu and either rt->dev (with a reference held appropriately) or
1197
* rt->gateway. Other fields will be populated.
1198
*/
1199
static int mctp_route_add(struct net *net, struct mctp_route *rt)
1200
{
1201
struct mctp_route *ert;
1202
1203
if (!mctp_address_unicast(rt->min) || !mctp_address_unicast(rt->max))
1204
return -EINVAL;
1205
1206
if (rt->dst_type == MCTP_ROUTE_DIRECT && !rt->dev)
1207
return -EINVAL;
1208
1209
if (rt->dst_type == MCTP_ROUTE_GATEWAY && !rt->gateway.eid)
1210
return -EINVAL;
1211
1212
switch (rt->type) {
1213
case RTN_LOCAL:
1214
rt->output = mctp_dst_input;
1215
break;
1216
case RTN_UNICAST:
1217
rt->output = mctp_dst_output;
1218
break;
1219
default:
1220
return -EINVAL;
1221
}
1222
1223
ASSERT_RTNL();
1224
1225
/* Prevent duplicate identical routes. */
1226
list_for_each_entry(ert, &net->mctp.routes, list) {
1227
if (mctp_rt_compare_exact(rt, ert)) {
1228
return -EEXIST;
1229
}
1230
}
1231
1232
list_add_rcu(&rt->list, &net->mctp.routes);
1233
1234
return 0;
1235
}
1236
1237
static int mctp_route_remove(struct net *net, unsigned int netid,
1238
mctp_eid_t daddr_start, unsigned int daddr_extent,
1239
unsigned char type)
1240
{
1241
struct mctp_route *rt, *tmp;
1242
mctp_eid_t daddr_end;
1243
bool dropped;
1244
1245
if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255)
1246
return -EINVAL;
1247
1248
daddr_end = daddr_start + daddr_extent;
1249
dropped = false;
1250
1251
ASSERT_RTNL();
1252
1253
list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) {
1254
if (mctp_route_netid(rt) == netid &&
1255
rt->min == daddr_start && rt->max == daddr_end &&
1256
rt->type == type) {
1257
list_del_rcu(&rt->list);
1258
/* TODO: immediate RTM_DELROUTE */
1259
mctp_route_release(rt);
1260
dropped = true;
1261
}
1262
}
1263
1264
return dropped ? 0 : -ENOENT;
1265
}
1266
1267
int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr)
1268
{
1269
struct mctp_route *rt;
1270
int rc;
1271
1272
rt = mctp_route_alloc();
1273
if (!rt)
1274
return -ENOMEM;
1275
1276
rt->min = addr;
1277
rt->max = addr;
1278
rt->dst_type = MCTP_ROUTE_DIRECT;
1279
rt->dev = mdev;
1280
rt->type = RTN_LOCAL;
1281
1282
mctp_dev_hold(rt->dev);
1283
1284
rc = mctp_route_add(dev_net(mdev->dev), rt);
1285
if (rc)
1286
mctp_route_release(rt);
1287
1288
return rc;
1289
}
1290
1291
int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr)
1292
{
1293
return mctp_route_remove(dev_net(mdev->dev), mdev->net,
1294
addr, 0, RTN_LOCAL);
1295
}
1296
1297
/* removes all entries for a given device */
1298
void mctp_route_remove_dev(struct mctp_dev *mdev)
1299
{
1300
struct net *net = dev_net(mdev->dev);
1301
struct mctp_route *rt, *tmp;
1302
1303
ASSERT_RTNL();
1304
list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) {
1305
if (rt->dst_type == MCTP_ROUTE_DIRECT && rt->dev == mdev) {
1306
list_del_rcu(&rt->list);
1307
/* TODO: immediate RTM_DELROUTE */
1308
mctp_route_release(rt);
1309
}
1310
}
1311
}
1312
1313
/* Incoming packet-handling */
1314
1315
static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev,
1316
struct packet_type *pt,
1317
struct net_device *orig_dev)
1318
{
1319
struct net *net = dev_net(dev);
1320
struct mctp_dev *mdev;
1321
struct mctp_skb_cb *cb;
1322
struct mctp_dst dst;
1323
struct mctp_hdr *mh;
1324
int rc;
1325
1326
rcu_read_lock();
1327
mdev = __mctp_dev_get(dev);
1328
rcu_read_unlock();
1329
if (!mdev) {
1330
/* basic non-data sanity checks */
1331
goto err_drop;
1332
}
1333
1334
if (!pskb_may_pull(skb, sizeof(struct mctp_hdr)))
1335
goto err_drop;
1336
1337
skb_reset_transport_header(skb);
1338
skb_reset_network_header(skb);
1339
1340
/* We have enough for a header; decode and route */
1341
mh = mctp_hdr(skb);
1342
if (mh->ver < MCTP_VER_MIN || mh->ver > MCTP_VER_MAX)
1343
goto err_drop;
1344
1345
/* source must be valid unicast or null; drop reserved ranges and
1346
* broadcast
1347
*/
1348
if (!(mctp_address_unicast(mh->src) || mctp_address_null(mh->src)))
1349
goto err_drop;
1350
1351
/* dest address: as above, but allow broadcast */
1352
if (!(mctp_address_unicast(mh->dest) || mctp_address_null(mh->dest) ||
1353
mctp_address_broadcast(mh->dest)))
1354
goto err_drop;
1355
1356
/* MCTP drivers must populate halen/haddr */
1357
if (dev->type == ARPHRD_MCTP) {
1358
cb = mctp_cb(skb);
1359
} else {
1360
cb = __mctp_cb(skb);
1361
cb->halen = 0;
1362
}
1363
cb->net = READ_ONCE(mdev->net);
1364
cb->ifindex = dev->ifindex;
1365
1366
rc = mctp_route_lookup(net, cb->net, mh->dest, &dst);
1367
1368
/* NULL EID, but addressed to our physical address */
1369
if (rc && mh->dest == MCTP_ADDR_NULL && skb->pkt_type == PACKET_HOST)
1370
rc = mctp_route_lookup_null(net, dev, &dst);
1371
1372
if (rc)
1373
goto err_drop;
1374
1375
dst.output(&dst, skb);
1376
mctp_dst_release(&dst);
1377
mctp_dev_put(mdev);
1378
1379
return NET_RX_SUCCESS;
1380
1381
err_drop:
1382
kfree_skb(skb);
1383
mctp_dev_put(mdev);
1384
return NET_RX_DROP;
1385
}
1386
1387
static struct packet_type mctp_packet_type = {
1388
.type = cpu_to_be16(ETH_P_MCTP),
1389
.func = mctp_pkttype_receive,
1390
};
1391
1392
/* netlink interface */
1393
1394
static const struct nla_policy rta_mctp_policy[RTA_MAX + 1] = {
1395
[RTA_DST] = { .type = NLA_U8 },
1396
[RTA_METRICS] = { .type = NLA_NESTED },
1397
[RTA_OIF] = { .type = NLA_U32 },
1398
[RTA_GATEWAY] = NLA_POLICY_EXACT_LEN(sizeof(struct mctp_fq_addr)),
1399
};
1400
1401
static const struct nla_policy rta_metrics_policy[RTAX_MAX + 1] = {
1402
[RTAX_MTU] = { .type = NLA_U32 },
1403
};
1404
1405
/* base parsing; common to both _lookup and _populate variants.
1406
*
1407
* For gateway routes (which have a RTA_GATEWAY, and no RTA_OIF), we populate
1408
* *gatweayp. for direct routes (RTA_OIF, no RTA_GATEWAY), we populate *mdev.
1409
*/
1410
static int mctp_route_nlparse_common(struct net *net, struct nlmsghdr *nlh,
1411
struct netlink_ext_ack *extack,
1412
struct nlattr **tb, struct rtmsg **rtm,
1413
struct mctp_dev **mdev,
1414
struct mctp_fq_addr *gatewayp,
1415
mctp_eid_t *daddr_start)
1416
{
1417
struct mctp_fq_addr *gateway = NULL;
1418
unsigned int ifindex = 0;
1419
struct net_device *dev;
1420
int rc;
1421
1422
rc = nlmsg_parse(nlh, sizeof(struct rtmsg), tb, RTA_MAX,
1423
rta_mctp_policy, extack);
1424
if (rc < 0) {
1425
NL_SET_ERR_MSG(extack, "incorrect format");
1426
return rc;
1427
}
1428
1429
if (!tb[RTA_DST]) {
1430
NL_SET_ERR_MSG(extack, "dst EID missing");
1431
return -EINVAL;
1432
}
1433
*daddr_start = nla_get_u8(tb[RTA_DST]);
1434
1435
if (tb[RTA_OIF])
1436
ifindex = nla_get_u32(tb[RTA_OIF]);
1437
1438
if (tb[RTA_GATEWAY])
1439
gateway = nla_data(tb[RTA_GATEWAY]);
1440
1441
if (ifindex && gateway) {
1442
NL_SET_ERR_MSG(extack,
1443
"cannot specify both ifindex and gateway");
1444
return -EINVAL;
1445
1446
} else if (ifindex) {
1447
dev = __dev_get_by_index(net, ifindex);
1448
if (!dev) {
1449
NL_SET_ERR_MSG(extack, "bad ifindex");
1450
return -ENODEV;
1451
}
1452
*mdev = mctp_dev_get_rtnl(dev);
1453
if (!*mdev)
1454
return -ENODEV;
1455
gatewayp->eid = 0;
1456
1457
} else if (gateway) {
1458
if (!mctp_address_unicast(gateway->eid)) {
1459
NL_SET_ERR_MSG(extack, "bad gateway");
1460
return -EINVAL;
1461
}
1462
1463
gatewayp->eid = gateway->eid;
1464
gatewayp->net = gateway->net != MCTP_NET_ANY ?
1465
gateway->net :
1466
READ_ONCE(net->mctp.default_net);
1467
*mdev = NULL;
1468
1469
} else {
1470
NL_SET_ERR_MSG(extack, "no route output provided");
1471
return -EINVAL;
1472
}
1473
1474
*rtm = nlmsg_data(nlh);
1475
if ((*rtm)->rtm_family != AF_MCTP) {
1476
NL_SET_ERR_MSG(extack, "route family must be AF_MCTP");
1477
return -EINVAL;
1478
}
1479
1480
if ((*rtm)->rtm_type != RTN_UNICAST) {
1481
NL_SET_ERR_MSG(extack, "rtm_type must be RTN_UNICAST");
1482
return -EINVAL;
1483
}
1484
1485
return 0;
1486
}
1487
1488
/* Route parsing for lookup operations; we only need the "route target"
1489
* components (ie., network and dest-EID range).
1490
*/
1491
static int mctp_route_nlparse_lookup(struct net *net, struct nlmsghdr *nlh,
1492
struct netlink_ext_ack *extack,
1493
unsigned char *type, unsigned int *netid,
1494
mctp_eid_t *daddr_start,
1495
unsigned int *daddr_extent)
1496
{
1497
struct nlattr *tb[RTA_MAX + 1];
1498
struct mctp_fq_addr gw;
1499
struct mctp_dev *mdev;
1500
struct rtmsg *rtm;
1501
int rc;
1502
1503
rc = mctp_route_nlparse_common(net, nlh, extack, tb, &rtm,
1504
&mdev, &gw, daddr_start);
1505
if (rc)
1506
return rc;
1507
1508
if (mdev) {
1509
*netid = mdev->net;
1510
} else if (gw.eid) {
1511
*netid = gw.net;
1512
} else {
1513
/* bug: _nlparse_common should not allow this */
1514
return -1;
1515
}
1516
1517
*type = rtm->rtm_type;
1518
*daddr_extent = rtm->rtm_dst_len;
1519
1520
return 0;
1521
}
1522
1523
/* Full route parse for RTM_NEWROUTE: populate @rt. On success,
1524
* MCTP_ROUTE_DIRECT routes (ie, those with a direct dev) will hold a reference
1525
* to that dev.
1526
*/
1527
static int mctp_route_nlparse_populate(struct net *net, struct nlmsghdr *nlh,
1528
struct netlink_ext_ack *extack,
1529
struct mctp_route *rt)
1530
{
1531
struct nlattr *tbx[RTAX_MAX + 1];
1532
struct nlattr *tb[RTA_MAX + 1];
1533
unsigned int daddr_extent;
1534
struct mctp_fq_addr gw;
1535
mctp_eid_t daddr_start;
1536
struct mctp_dev *dev;
1537
struct rtmsg *rtm;
1538
u32 mtu = 0;
1539
int rc;
1540
1541
rc = mctp_route_nlparse_common(net, nlh, extack, tb, &rtm,
1542
&dev, &gw, &daddr_start);
1543
if (rc)
1544
return rc;
1545
1546
daddr_extent = rtm->rtm_dst_len;
1547
1548
if (daddr_extent > 0xff || daddr_extent + daddr_start >= 255) {
1549
NL_SET_ERR_MSG(extack, "invalid eid range");
1550
return -EINVAL;
1551
}
1552
1553
if (tb[RTA_METRICS]) {
1554
rc = nla_parse_nested(tbx, RTAX_MAX, tb[RTA_METRICS],
1555
rta_metrics_policy, NULL);
1556
if (rc < 0) {
1557
NL_SET_ERR_MSG(extack, "incorrect RTA_METRICS format");
1558
return rc;
1559
}
1560
if (tbx[RTAX_MTU])
1561
mtu = nla_get_u32(tbx[RTAX_MTU]);
1562
}
1563
1564
rt->type = rtm->rtm_type;
1565
rt->min = daddr_start;
1566
rt->max = daddr_start + daddr_extent;
1567
rt->mtu = mtu;
1568
if (gw.eid) {
1569
rt->dst_type = MCTP_ROUTE_GATEWAY;
1570
rt->gateway.eid = gw.eid;
1571
rt->gateway.net = gw.net;
1572
} else {
1573
rt->dst_type = MCTP_ROUTE_DIRECT;
1574
rt->dev = dev;
1575
mctp_dev_hold(rt->dev);
1576
}
1577
1578
return 0;
1579
}
1580
1581
static int mctp_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
1582
struct netlink_ext_ack *extack)
1583
{
1584
struct net *net = sock_net(skb->sk);
1585
struct mctp_route *rt;
1586
int rc;
1587
1588
rt = mctp_route_alloc();
1589
if (!rt)
1590
return -ENOMEM;
1591
1592
rc = mctp_route_nlparse_populate(net, nlh, extack, rt);
1593
if (rc < 0)
1594
goto err_free;
1595
1596
if (rt->dst_type == MCTP_ROUTE_DIRECT &&
1597
rt->dev->dev->flags & IFF_LOOPBACK) {
1598
NL_SET_ERR_MSG(extack, "no routes to loopback");
1599
rc = -EINVAL;
1600
goto err_free;
1601
}
1602
1603
rc = mctp_route_add(net, rt);
1604
if (!rc)
1605
return 0;
1606
1607
err_free:
1608
mctp_route_release(rt);
1609
return rc;
1610
}
1611
1612
static int mctp_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
1613
struct netlink_ext_ack *extack)
1614
{
1615
struct net *net = sock_net(skb->sk);
1616
unsigned int netid, daddr_extent;
1617
unsigned char type = RTN_UNSPEC;
1618
mctp_eid_t daddr_start;
1619
int rc;
1620
1621
rc = mctp_route_nlparse_lookup(net, nlh, extack, &type, &netid,
1622
&daddr_start, &daddr_extent);
1623
if (rc < 0)
1624
return rc;
1625
1626
/* we only have unicast routes */
1627
if (type != RTN_UNICAST)
1628
return -EINVAL;
1629
1630
rc = mctp_route_remove(net, netid, daddr_start, daddr_extent, type);
1631
return rc;
1632
}
1633
1634
static int mctp_fill_rtinfo(struct sk_buff *skb, struct mctp_route *rt,
1635
u32 portid, u32 seq, int event, unsigned int flags)
1636
{
1637
struct nlmsghdr *nlh;
1638
struct rtmsg *hdr;
1639
void *metrics;
1640
1641
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags);
1642
if (!nlh)
1643
return -EMSGSIZE;
1644
1645
hdr = nlmsg_data(nlh);
1646
hdr->rtm_family = AF_MCTP;
1647
1648
/* we use the _len fields as a number of EIDs, rather than
1649
* a number of bits in the address
1650
*/
1651
hdr->rtm_dst_len = rt->max - rt->min;
1652
hdr->rtm_src_len = 0;
1653
hdr->rtm_tos = 0;
1654
hdr->rtm_table = RT_TABLE_DEFAULT;
1655
hdr->rtm_protocol = RTPROT_STATIC; /* everything is user-defined */
1656
hdr->rtm_type = rt->type;
1657
1658
if (nla_put_u8(skb, RTA_DST, rt->min))
1659
goto cancel;
1660
1661
metrics = nla_nest_start_noflag(skb, RTA_METRICS);
1662
if (!metrics)
1663
goto cancel;
1664
1665
if (rt->mtu) {
1666
if (nla_put_u32(skb, RTAX_MTU, rt->mtu))
1667
goto cancel;
1668
}
1669
1670
nla_nest_end(skb, metrics);
1671
1672
if (rt->dst_type == MCTP_ROUTE_DIRECT) {
1673
hdr->rtm_scope = RT_SCOPE_LINK;
1674
if (nla_put_u32(skb, RTA_OIF, rt->dev->dev->ifindex))
1675
goto cancel;
1676
} else if (rt->dst_type == MCTP_ROUTE_GATEWAY) {
1677
hdr->rtm_scope = RT_SCOPE_UNIVERSE;
1678
if (nla_put(skb, RTA_GATEWAY,
1679
sizeof(rt->gateway), &rt->gateway))
1680
goto cancel;
1681
}
1682
1683
nlmsg_end(skb, nlh);
1684
1685
return 0;
1686
1687
cancel:
1688
nlmsg_cancel(skb, nlh);
1689
return -EMSGSIZE;
1690
}
1691
1692
static int mctp_dump_rtinfo(struct sk_buff *skb, struct netlink_callback *cb)
1693
{
1694
struct net *net = sock_net(skb->sk);
1695
struct mctp_route *rt;
1696
int s_idx, idx;
1697
1698
/* TODO: allow filtering on route data, possibly under
1699
* cb->strict_check
1700
*/
1701
1702
/* TODO: change to struct overlay */
1703
s_idx = cb->args[0];
1704
idx = 0;
1705
1706
rcu_read_lock();
1707
list_for_each_entry_rcu(rt, &net->mctp.routes, list) {
1708
if (idx++ < s_idx)
1709
continue;
1710
if (mctp_fill_rtinfo(skb, rt,
1711
NETLINK_CB(cb->skb).portid,
1712
cb->nlh->nlmsg_seq,
1713
RTM_NEWROUTE, NLM_F_MULTI) < 0)
1714
break;
1715
}
1716
1717
rcu_read_unlock();
1718
cb->args[0] = idx;
1719
1720
return skb->len;
1721
}
1722
1723
/* net namespace implementation */
1724
static int __net_init mctp_routes_net_init(struct net *net)
1725
{
1726
struct netns_mctp *ns = &net->mctp;
1727
1728
INIT_LIST_HEAD(&ns->routes);
1729
hash_init(ns->binds);
1730
mutex_init(&ns->bind_lock);
1731
INIT_HLIST_HEAD(&ns->keys);
1732
spin_lock_init(&ns->keys_lock);
1733
WARN_ON(mctp_default_net_set(net, MCTP_INITIAL_DEFAULT_NET));
1734
return 0;
1735
}
1736
1737
static void __net_exit mctp_routes_net_exit(struct net *net)
1738
{
1739
struct mctp_route *rt;
1740
1741
rcu_read_lock();
1742
list_for_each_entry_rcu(rt, &net->mctp.routes, list)
1743
mctp_route_release(rt);
1744
rcu_read_unlock();
1745
}
1746
1747
static struct pernet_operations mctp_net_ops = {
1748
.init = mctp_routes_net_init,
1749
.exit = mctp_routes_net_exit,
1750
};
1751
1752
static const struct rtnl_msg_handler mctp_route_rtnl_msg_handlers[] = {
1753
{THIS_MODULE, PF_MCTP, RTM_NEWROUTE, mctp_newroute, NULL, 0},
1754
{THIS_MODULE, PF_MCTP, RTM_DELROUTE, mctp_delroute, NULL, 0},
1755
{THIS_MODULE, PF_MCTP, RTM_GETROUTE, NULL, mctp_dump_rtinfo, 0},
1756
};
1757
1758
int __init mctp_routes_init(void)
1759
{
1760
int err;
1761
1762
dev_add_pack(&mctp_packet_type);
1763
1764
err = register_pernet_subsys(&mctp_net_ops);
1765
if (err)
1766
goto err_pernet;
1767
1768
err = rtnl_register_many(mctp_route_rtnl_msg_handlers);
1769
if (err)
1770
goto err_rtnl;
1771
1772
return 0;
1773
1774
err_rtnl:
1775
unregister_pernet_subsys(&mctp_net_ops);
1776
err_pernet:
1777
dev_remove_pack(&mctp_packet_type);
1778
return err;
1779
}
1780
1781
void mctp_routes_exit(void)
1782
{
1783
rtnl_unregister_many(mctp_route_rtnl_msg_handlers);
1784
unregister_pernet_subsys(&mctp_net_ops);
1785
dev_remove_pack(&mctp_packet_type);
1786
}
1787
1788
#if IS_ENABLED(CONFIG_MCTP_TEST)
1789
#include "test/route-test.c"
1790
#endif
1791
1792