Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/net/mctp/route.c
26278 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* Management Component Transport Protocol (MCTP) - routing
4
* implementation.
5
*
6
* This is currently based on a simple routing table, with no dst cache. The
7
* number of routes should stay fairly small, so the lookup cost is small.
8
*
9
* Copyright (c) 2021 Code Construct
10
* Copyright (c) 2021 Google
11
*/
12
13
#include <linux/idr.h>
14
#include <linux/kconfig.h>
15
#include <linux/mctp.h>
16
#include <linux/netdevice.h>
17
#include <linux/rtnetlink.h>
18
#include <linux/skbuff.h>
19
20
#include <kunit/static_stub.h>
21
22
#include <uapi/linux/if_arp.h>
23
24
#include <net/mctp.h>
25
#include <net/mctpdevice.h>
26
#include <net/netlink.h>
27
#include <net/sock.h>
28
29
#include <trace/events/mctp.h>
30
31
static const unsigned int mctp_message_maxlen = 64 * 1024;
32
static const unsigned long mctp_key_lifetime = 6 * CONFIG_HZ;
33
34
static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev);
35
36
/* route output callbacks */
37
static int mctp_dst_discard(struct mctp_dst *dst, struct sk_buff *skb)
38
{
39
kfree_skb(skb);
40
return 0;
41
}
42
43
static struct mctp_sock *mctp_lookup_bind_details(struct net *net,
44
struct sk_buff *skb,
45
u8 type, u8 dest,
46
u8 src, bool allow_net_any)
47
{
48
struct mctp_skb_cb *cb = mctp_cb(skb);
49
struct sock *sk;
50
u8 hash;
51
52
WARN_ON_ONCE(!rcu_read_lock_held());
53
54
hash = mctp_bind_hash(type, dest, src);
55
56
sk_for_each_rcu(sk, &net->mctp.binds[hash]) {
57
struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
58
59
if (!allow_net_any && msk->bind_net == MCTP_NET_ANY)
60
continue;
61
62
if (msk->bind_net != MCTP_NET_ANY && msk->bind_net != cb->net)
63
continue;
64
65
if (msk->bind_type != type)
66
continue;
67
68
if (msk->bind_peer_set &&
69
!mctp_address_matches(msk->bind_peer_addr, src))
70
continue;
71
72
if (!mctp_address_matches(msk->bind_local_addr, dest))
73
continue;
74
75
return msk;
76
}
77
78
return NULL;
79
}
80
81
static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb)
82
{
83
struct mctp_sock *msk;
84
struct mctp_hdr *mh;
85
u8 type;
86
87
/* TODO: look up in skb->cb? */
88
mh = mctp_hdr(skb);
89
90
if (!skb_headlen(skb))
91
return NULL;
92
93
type = (*(u8 *)skb->data) & 0x7f;
94
95
/* Look for binds in order of widening scope. A given destination or
96
* source address also implies matching on a particular network.
97
*
98
* - Matching destination and source
99
* - Matching destination
100
* - Matching source
101
* - Matching network, any address
102
* - Any network or address
103
*/
104
105
msk = mctp_lookup_bind_details(net, skb, type, mh->dest, mh->src,
106
false);
107
if (msk)
108
return msk;
109
msk = mctp_lookup_bind_details(net, skb, type, MCTP_ADDR_ANY, mh->src,
110
false);
111
if (msk)
112
return msk;
113
msk = mctp_lookup_bind_details(net, skb, type, mh->dest, MCTP_ADDR_ANY,
114
false);
115
if (msk)
116
return msk;
117
msk = mctp_lookup_bind_details(net, skb, type, MCTP_ADDR_ANY,
118
MCTP_ADDR_ANY, false);
119
if (msk)
120
return msk;
121
msk = mctp_lookup_bind_details(net, skb, type, MCTP_ADDR_ANY,
122
MCTP_ADDR_ANY, true);
123
if (msk)
124
return msk;
125
126
return NULL;
127
}
128
129
/* A note on the key allocations.
130
*
131
* struct net->mctp.keys contains our set of currently-allocated keys for
132
* MCTP tag management. The lookup tuple for these is the peer EID,
133
* local EID and MCTP tag.
134
*
135
* In some cases, the peer EID may be MCTP_EID_ANY: for example, when a
136
* broadcast message is sent, we may receive responses from any peer EID.
137
* Because the broadcast dest address is equivalent to ANY, we create
138
* a key with (local = local-eid, peer = ANY). This allows a match on the
139
* incoming broadcast responses from any peer.
140
*
141
* We perform lookups when packets are received, and when tags are allocated
142
* in two scenarios:
143
*
144
* - when a packet is sent, with a locally-owned tag: we need to find an
145
* unused tag value for the (local, peer) EID pair.
146
*
147
* - when a tag is manually allocated: we need to find an unused tag value
148
* for the peer EID, but don't have a specific local EID at that stage.
149
*
150
* in the latter case, on successful allocation, we end up with a tag with
151
* (local = ANY, peer = peer-eid).
152
*
153
* So, the key set allows both a local EID of ANY, as well as a peer EID of
154
* ANY in the lookup tuple. Both may be ANY if we prealloc for a broadcast.
155
* The matching (in mctp_key_match()) during lookup allows the match value to
156
* be ANY in either the dest or source addresses.
157
*
158
* When allocating (+ inserting) a tag, we need to check for conflicts amongst
159
* the existing tag set. This requires macthing either exactly on the local
160
* and peer addresses, or either being ANY.
161
*/
162
163
static bool mctp_key_match(struct mctp_sk_key *key, unsigned int net,
164
mctp_eid_t local, mctp_eid_t peer, u8 tag)
165
{
166
if (key->net != net)
167
return false;
168
169
if (!mctp_address_matches(key->local_addr, local))
170
return false;
171
172
if (!mctp_address_matches(key->peer_addr, peer))
173
return false;
174
175
if (key->tag != tag)
176
return false;
177
178
return true;
179
}
180
181
/* returns a key (with key->lock held, and refcounted), or NULL if no such
182
* key exists.
183
*/
184
static struct mctp_sk_key *mctp_lookup_key(struct net *net, struct sk_buff *skb,
185
unsigned int netid, mctp_eid_t peer,
186
unsigned long *irqflags)
187
__acquires(&key->lock)
188
{
189
struct mctp_sk_key *key, *ret;
190
unsigned long flags;
191
struct mctp_hdr *mh;
192
u8 tag;
193
194
mh = mctp_hdr(skb);
195
tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
196
197
ret = NULL;
198
spin_lock_irqsave(&net->mctp.keys_lock, flags);
199
200
hlist_for_each_entry(key, &net->mctp.keys, hlist) {
201
if (!mctp_key_match(key, netid, mh->dest, peer, tag))
202
continue;
203
204
spin_lock(&key->lock);
205
if (key->valid) {
206
refcount_inc(&key->refs);
207
ret = key;
208
break;
209
}
210
spin_unlock(&key->lock);
211
}
212
213
if (ret) {
214
spin_unlock(&net->mctp.keys_lock);
215
*irqflags = flags;
216
} else {
217
spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
218
}
219
220
return ret;
221
}
222
223
static struct mctp_sk_key *mctp_key_alloc(struct mctp_sock *msk,
224
unsigned int net,
225
mctp_eid_t local, mctp_eid_t peer,
226
u8 tag, gfp_t gfp)
227
{
228
struct mctp_sk_key *key;
229
230
key = kzalloc(sizeof(*key), gfp);
231
if (!key)
232
return NULL;
233
234
key->net = net;
235
key->peer_addr = peer;
236
key->local_addr = local;
237
key->tag = tag;
238
key->sk = &msk->sk;
239
key->valid = true;
240
spin_lock_init(&key->lock);
241
refcount_set(&key->refs, 1);
242
sock_hold(key->sk);
243
244
return key;
245
}
246
247
void mctp_key_unref(struct mctp_sk_key *key)
248
{
249
unsigned long flags;
250
251
if (!refcount_dec_and_test(&key->refs))
252
return;
253
254
/* even though no refs exist here, the lock allows us to stay
255
* consistent with the locking requirement of mctp_dev_release_key
256
*/
257
spin_lock_irqsave(&key->lock, flags);
258
mctp_dev_release_key(key->dev, key);
259
spin_unlock_irqrestore(&key->lock, flags);
260
261
sock_put(key->sk);
262
kfree(key);
263
}
264
265
static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk)
266
{
267
struct net *net = sock_net(&msk->sk);
268
struct mctp_sk_key *tmp;
269
unsigned long flags;
270
int rc = 0;
271
272
spin_lock_irqsave(&net->mctp.keys_lock, flags);
273
274
if (sock_flag(&msk->sk, SOCK_DEAD)) {
275
rc = -EINVAL;
276
goto out_unlock;
277
}
278
279
hlist_for_each_entry(tmp, &net->mctp.keys, hlist) {
280
if (mctp_key_match(tmp, key->net, key->local_addr,
281
key->peer_addr, key->tag)) {
282
spin_lock(&tmp->lock);
283
if (tmp->valid)
284
rc = -EEXIST;
285
spin_unlock(&tmp->lock);
286
if (rc)
287
break;
288
}
289
}
290
291
if (!rc) {
292
refcount_inc(&key->refs);
293
key->expiry = jiffies + mctp_key_lifetime;
294
timer_reduce(&msk->key_expiry, key->expiry);
295
296
hlist_add_head(&key->hlist, &net->mctp.keys);
297
hlist_add_head(&key->sklist, &msk->keys);
298
}
299
300
out_unlock:
301
spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
302
303
return rc;
304
}
305
306
/* Helper for mctp_route_input().
307
* We're done with the key; unlock and unref the key.
308
* For the usual case of automatic expiry we remove the key from lists.
309
* In the case that manual allocation is set on a key we release the lock
310
* and local ref, reset reassembly, but don't remove from lists.
311
*/
312
static void __mctp_key_done_in(struct mctp_sk_key *key, struct net *net,
313
unsigned long flags, unsigned long reason)
314
__releases(&key->lock)
315
{
316
struct sk_buff *skb;
317
318
trace_mctp_key_release(key, reason);
319
skb = key->reasm_head;
320
key->reasm_head = NULL;
321
322
if (!key->manual_alloc) {
323
key->reasm_dead = true;
324
key->valid = false;
325
mctp_dev_release_key(key->dev, key);
326
}
327
spin_unlock_irqrestore(&key->lock, flags);
328
329
if (!key->manual_alloc) {
330
spin_lock_irqsave(&net->mctp.keys_lock, flags);
331
if (!hlist_unhashed(&key->hlist)) {
332
hlist_del_init(&key->hlist);
333
hlist_del_init(&key->sklist);
334
mctp_key_unref(key);
335
}
336
spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
337
}
338
339
/* and one for the local reference */
340
mctp_key_unref(key);
341
342
kfree_skb(skb);
343
}
344
345
#ifdef CONFIG_MCTP_FLOWS
346
static void mctp_skb_set_flow(struct sk_buff *skb, struct mctp_sk_key *key)
347
{
348
struct mctp_flow *flow;
349
350
flow = skb_ext_add(skb, SKB_EXT_MCTP);
351
if (!flow)
352
return;
353
354
refcount_inc(&key->refs);
355
flow->key = key;
356
}
357
358
static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev)
359
{
360
struct mctp_sk_key *key;
361
struct mctp_flow *flow;
362
363
flow = skb_ext_find(skb, SKB_EXT_MCTP);
364
if (!flow)
365
return;
366
367
key = flow->key;
368
369
if (key->dev) {
370
WARN_ON(key->dev != dev);
371
return;
372
}
373
374
mctp_dev_set_key(dev, key);
375
}
376
#else
377
static void mctp_skb_set_flow(struct sk_buff *skb, struct mctp_sk_key *key) {}
378
static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev) {}
379
#endif
380
381
/* takes ownership of skb, both in success and failure cases */
382
static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb)
383
{
384
struct mctp_hdr *hdr = mctp_hdr(skb);
385
u8 exp_seq, this_seq;
386
387
this_seq = (hdr->flags_seq_tag >> MCTP_HDR_SEQ_SHIFT)
388
& MCTP_HDR_SEQ_MASK;
389
390
if (!key->reasm_head) {
391
/* Since we're manipulating the shared frag_list, ensure it
392
* isn't shared with any other SKBs. In the cloned case,
393
* this will free the skb; callers can no longer access it
394
* safely.
395
*/
396
key->reasm_head = skb_unshare(skb, GFP_ATOMIC);
397
if (!key->reasm_head)
398
return -ENOMEM;
399
400
key->reasm_tailp = &(skb_shinfo(key->reasm_head)->frag_list);
401
key->last_seq = this_seq;
402
return 0;
403
}
404
405
exp_seq = (key->last_seq + 1) & MCTP_HDR_SEQ_MASK;
406
407
if (this_seq != exp_seq)
408
goto err_free;
409
410
if (key->reasm_head->len + skb->len > mctp_message_maxlen)
411
goto err_free;
412
413
skb->next = NULL;
414
skb->sk = NULL;
415
*key->reasm_tailp = skb;
416
key->reasm_tailp = &skb->next;
417
418
key->last_seq = this_seq;
419
420
key->reasm_head->data_len += skb->len;
421
key->reasm_head->len += skb->len;
422
key->reasm_head->truesize += skb->truesize;
423
424
return 0;
425
426
err_free:
427
kfree_skb(skb);
428
return -EINVAL;
429
}
430
431
static int mctp_dst_input(struct mctp_dst *dst, struct sk_buff *skb)
432
{
433
struct mctp_sk_key *key, *any_key = NULL;
434
struct net *net = dev_net(skb->dev);
435
struct mctp_sock *msk;
436
struct mctp_hdr *mh;
437
unsigned int netid;
438
unsigned long f;
439
u8 tag, flags;
440
int rc;
441
442
msk = NULL;
443
rc = -EINVAL;
444
445
/* We may be receiving a locally-routed packet; drop source sk
446
* accounting.
447
*
448
* From here, we will either queue the skb - either to a frag_queue, or
449
* to a receiving socket. When that succeeds, we clear the skb pointer;
450
* a non-NULL skb on exit will be otherwise unowned, and hence
451
* kfree_skb()-ed.
452
*/
453
skb_orphan(skb);
454
455
if (skb->pkt_type == PACKET_OUTGOING)
456
skb->pkt_type = PACKET_LOOPBACK;
457
458
/* ensure we have enough data for a header and a type */
459
if (skb->len < sizeof(struct mctp_hdr) + 1)
460
goto out;
461
462
/* grab header, advance data ptr */
463
mh = mctp_hdr(skb);
464
netid = mctp_cb(skb)->net;
465
skb_pull(skb, sizeof(struct mctp_hdr));
466
467
if (mh->ver != 1)
468
goto out;
469
470
flags = mh->flags_seq_tag & (MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM);
471
tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
472
473
rcu_read_lock();
474
475
/* lookup socket / reasm context, exactly matching (src,dest,tag).
476
* we hold a ref on the key, and key->lock held.
477
*/
478
key = mctp_lookup_key(net, skb, netid, mh->src, &f);
479
480
if (flags & MCTP_HDR_FLAG_SOM) {
481
if (key) {
482
msk = container_of(key->sk, struct mctp_sock, sk);
483
} else {
484
/* first response to a broadcast? do a more general
485
* key lookup to find the socket, but don't use this
486
* key for reassembly - we'll create a more specific
487
* one for future packets if required (ie, !EOM).
488
*
489
* this lookup requires key->peer to be MCTP_ADDR_ANY,
490
* it doesn't match just any key->peer.
491
*/
492
any_key = mctp_lookup_key(net, skb, netid,
493
MCTP_ADDR_ANY, &f);
494
if (any_key) {
495
msk = container_of(any_key->sk,
496
struct mctp_sock, sk);
497
spin_unlock_irqrestore(&any_key->lock, f);
498
}
499
}
500
501
if (!key && !msk && (tag & MCTP_HDR_FLAG_TO))
502
msk = mctp_lookup_bind(net, skb);
503
504
if (!msk) {
505
rc = -ENOENT;
506
goto out_unlock;
507
}
508
509
/* single-packet message? deliver to socket, clean up any
510
* pending key.
511
*/
512
if (flags & MCTP_HDR_FLAG_EOM) {
513
rc = sock_queue_rcv_skb(&msk->sk, skb);
514
if (!rc)
515
skb = NULL;
516
if (key) {
517
/* we've hit a pending reassembly; not much we
518
* can do but drop it
519
*/
520
__mctp_key_done_in(key, net, f,
521
MCTP_TRACE_KEY_REPLIED);
522
key = NULL;
523
}
524
goto out_unlock;
525
}
526
527
/* broadcast response or a bind() - create a key for further
528
* packets for this message
529
*/
530
if (!key) {
531
key = mctp_key_alloc(msk, netid, mh->dest, mh->src,
532
tag, GFP_ATOMIC);
533
if (!key) {
534
rc = -ENOMEM;
535
goto out_unlock;
536
}
537
538
/* we can queue without the key lock here, as the
539
* key isn't observable yet
540
*/
541
mctp_frag_queue(key, skb);
542
skb = NULL;
543
544
/* if the key_add fails, we've raced with another
545
* SOM packet with the same src, dest and tag. There's
546
* no way to distinguish future packets, so all we
547
* can do is drop.
548
*/
549
rc = mctp_key_add(key, msk);
550
if (!rc)
551
trace_mctp_key_acquire(key);
552
553
/* we don't need to release key->lock on exit, so
554
* clean up here and suppress the unlock via
555
* setting to NULL
556
*/
557
mctp_key_unref(key);
558
key = NULL;
559
560
} else {
561
if (key->reasm_head || key->reasm_dead) {
562
/* duplicate start? drop everything */
563
__mctp_key_done_in(key, net, f,
564
MCTP_TRACE_KEY_INVALIDATED);
565
rc = -EEXIST;
566
key = NULL;
567
} else {
568
rc = mctp_frag_queue(key, skb);
569
skb = NULL;
570
}
571
}
572
573
} else if (key) {
574
/* this packet continues a previous message; reassemble
575
* using the message-specific key
576
*/
577
578
/* we need to be continuing an existing reassembly... */
579
if (!key->reasm_head) {
580
rc = -EINVAL;
581
} else {
582
rc = mctp_frag_queue(key, skb);
583
skb = NULL;
584
}
585
586
if (rc)
587
goto out_unlock;
588
589
/* end of message? deliver to socket, and we're done with
590
* the reassembly/response key
591
*/
592
if (flags & MCTP_HDR_FLAG_EOM) {
593
rc = sock_queue_rcv_skb(key->sk, key->reasm_head);
594
if (!rc)
595
key->reasm_head = NULL;
596
__mctp_key_done_in(key, net, f, MCTP_TRACE_KEY_REPLIED);
597
key = NULL;
598
}
599
600
} else {
601
/* not a start, no matching key */
602
rc = -ENOENT;
603
}
604
605
out_unlock:
606
rcu_read_unlock();
607
if (key) {
608
spin_unlock_irqrestore(&key->lock, f);
609
mctp_key_unref(key);
610
}
611
if (any_key)
612
mctp_key_unref(any_key);
613
out:
614
kfree_skb(skb);
615
return rc;
616
}
617
618
static int mctp_dst_output(struct mctp_dst *dst, struct sk_buff *skb)
619
{
620
char daddr_buf[MAX_ADDR_LEN];
621
char *daddr = NULL;
622
int rc;
623
624
skb->protocol = htons(ETH_P_MCTP);
625
skb->pkt_type = PACKET_OUTGOING;
626
627
if (skb->len > dst->mtu) {
628
kfree_skb(skb);
629
return -EMSGSIZE;
630
}
631
632
/* direct route; use the hwaddr we stashed in sendmsg */
633
if (dst->halen) {
634
if (dst->halen != skb->dev->addr_len) {
635
/* sanity check, sendmsg should have already caught this */
636
kfree_skb(skb);
637
return -EMSGSIZE;
638
}
639
daddr = dst->haddr;
640
} else {
641
/* If lookup fails let the device handle daddr==NULL */
642
if (mctp_neigh_lookup(dst->dev, dst->nexthop, daddr_buf) == 0)
643
daddr = daddr_buf;
644
}
645
646
rc = dev_hard_header(skb, skb->dev, ntohs(skb->protocol),
647
daddr, skb->dev->dev_addr, skb->len);
648
if (rc < 0) {
649
kfree_skb(skb);
650
return -EHOSTUNREACH;
651
}
652
653
mctp_flow_prepare_output(skb, dst->dev);
654
655
rc = dev_queue_xmit(skb);
656
if (rc)
657
rc = net_xmit_errno(rc);
658
659
return rc;
660
}
661
662
/* route alloc/release */
663
static void mctp_route_release(struct mctp_route *rt)
664
{
665
if (refcount_dec_and_test(&rt->refs)) {
666
if (rt->dst_type == MCTP_ROUTE_DIRECT)
667
mctp_dev_put(rt->dev);
668
kfree_rcu(rt, rcu);
669
}
670
}
671
672
/* returns a route with the refcount at 1 */
673
static struct mctp_route *mctp_route_alloc(void)
674
{
675
struct mctp_route *rt;
676
677
rt = kzalloc(sizeof(*rt), GFP_KERNEL);
678
if (!rt)
679
return NULL;
680
681
INIT_LIST_HEAD(&rt->list);
682
refcount_set(&rt->refs, 1);
683
rt->output = mctp_dst_discard;
684
685
return rt;
686
}
687
688
unsigned int mctp_default_net(struct net *net)
689
{
690
return READ_ONCE(net->mctp.default_net);
691
}
692
693
int mctp_default_net_set(struct net *net, unsigned int index)
694
{
695
if (index == 0)
696
return -EINVAL;
697
WRITE_ONCE(net->mctp.default_net, index);
698
return 0;
699
}
700
701
/* tag management */
702
static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key,
703
struct mctp_sock *msk)
704
{
705
struct netns_mctp *mns = &net->mctp;
706
707
lockdep_assert_held(&mns->keys_lock);
708
709
key->expiry = jiffies + mctp_key_lifetime;
710
timer_reduce(&msk->key_expiry, key->expiry);
711
712
/* we hold the net->key_lock here, allowing updates to both
713
* then net and sk
714
*/
715
hlist_add_head_rcu(&key->hlist, &mns->keys);
716
hlist_add_head_rcu(&key->sklist, &msk->keys);
717
refcount_inc(&key->refs);
718
}
719
720
/* Allocate a locally-owned tag value for (local, peer), and reserve
721
* it for the socket msk
722
*/
723
struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
724
unsigned int netid,
725
mctp_eid_t local, mctp_eid_t peer,
726
bool manual, u8 *tagp)
727
{
728
struct net *net = sock_net(&msk->sk);
729
struct netns_mctp *mns = &net->mctp;
730
struct mctp_sk_key *key, *tmp;
731
unsigned long flags;
732
u8 tagbits;
733
734
/* for NULL destination EIDs, we may get a response from any peer */
735
if (peer == MCTP_ADDR_NULL)
736
peer = MCTP_ADDR_ANY;
737
738
/* be optimistic, alloc now */
739
key = mctp_key_alloc(msk, netid, local, peer, 0, GFP_KERNEL);
740
if (!key)
741
return ERR_PTR(-ENOMEM);
742
743
/* 8 possible tag values */
744
tagbits = 0xff;
745
746
spin_lock_irqsave(&mns->keys_lock, flags);
747
748
/* Walk through the existing keys, looking for potential conflicting
749
* tags. If we find a conflict, clear that bit from tagbits
750
*/
751
hlist_for_each_entry(tmp, &mns->keys, hlist) {
752
/* We can check the lookup fields (*_addr, tag) without the
753
* lock held, they don't change over the lifetime of the key.
754
*/
755
756
/* tags are net-specific */
757
if (tmp->net != netid)
758
continue;
759
760
/* if we don't own the tag, it can't conflict */
761
if (tmp->tag & MCTP_HDR_FLAG_TO)
762
continue;
763
764
/* Since we're avoiding conflicting entries, match peer and
765
* local addresses, including with a wildcard on ANY. See
766
* 'A note on key allocations' for background.
767
*/
768
if (peer != MCTP_ADDR_ANY &&
769
!mctp_address_matches(tmp->peer_addr, peer))
770
continue;
771
772
if (local != MCTP_ADDR_ANY &&
773
!mctp_address_matches(tmp->local_addr, local))
774
continue;
775
776
spin_lock(&tmp->lock);
777
/* key must still be valid. If we find a match, clear the
778
* potential tag value
779
*/
780
if (tmp->valid)
781
tagbits &= ~(1 << tmp->tag);
782
spin_unlock(&tmp->lock);
783
784
if (!tagbits)
785
break;
786
}
787
788
if (tagbits) {
789
key->tag = __ffs(tagbits);
790
mctp_reserve_tag(net, key, msk);
791
trace_mctp_key_acquire(key);
792
793
key->manual_alloc = manual;
794
*tagp = key->tag;
795
}
796
797
spin_unlock_irqrestore(&mns->keys_lock, flags);
798
799
if (!tagbits) {
800
mctp_key_unref(key);
801
return ERR_PTR(-EBUSY);
802
}
803
804
return key;
805
}
806
807
static struct mctp_sk_key *mctp_lookup_prealloc_tag(struct mctp_sock *msk,
808
unsigned int netid,
809
mctp_eid_t daddr,
810
u8 req_tag, u8 *tagp)
811
{
812
struct net *net = sock_net(&msk->sk);
813
struct netns_mctp *mns = &net->mctp;
814
struct mctp_sk_key *key, *tmp;
815
unsigned long flags;
816
817
req_tag &= ~(MCTP_TAG_PREALLOC | MCTP_TAG_OWNER);
818
key = NULL;
819
820
spin_lock_irqsave(&mns->keys_lock, flags);
821
822
hlist_for_each_entry(tmp, &mns->keys, hlist) {
823
if (tmp->net != netid)
824
continue;
825
826
if (tmp->tag != req_tag)
827
continue;
828
829
if (!mctp_address_matches(tmp->peer_addr, daddr))
830
continue;
831
832
if (!tmp->manual_alloc)
833
continue;
834
835
spin_lock(&tmp->lock);
836
if (tmp->valid) {
837
key = tmp;
838
refcount_inc(&key->refs);
839
spin_unlock(&tmp->lock);
840
break;
841
}
842
spin_unlock(&tmp->lock);
843
}
844
spin_unlock_irqrestore(&mns->keys_lock, flags);
845
846
if (!key)
847
return ERR_PTR(-ENOENT);
848
849
if (tagp)
850
*tagp = key->tag;
851
852
return key;
853
}
854
855
/* routing lookups */
856
static unsigned int mctp_route_netid(struct mctp_route *rt)
857
{
858
return rt->dst_type == MCTP_ROUTE_DIRECT ?
859
READ_ONCE(rt->dev->net) : rt->gateway.net;
860
}
861
862
static bool mctp_rt_match_eid(struct mctp_route *rt,
863
unsigned int net, mctp_eid_t eid)
864
{
865
return mctp_route_netid(rt) == net &&
866
rt->min <= eid && rt->max >= eid;
867
}
868
869
/* compares match, used for duplicate prevention */
870
static bool mctp_rt_compare_exact(struct mctp_route *rt1,
871
struct mctp_route *rt2)
872
{
873
ASSERT_RTNL();
874
return mctp_route_netid(rt1) == mctp_route_netid(rt2) &&
875
rt1->min == rt2->min &&
876
rt1->max == rt2->max;
877
}
878
879
/* must only be called on a direct route, as the final output hop */
880
static void mctp_dst_from_route(struct mctp_dst *dst, mctp_eid_t eid,
881
unsigned int mtu, struct mctp_route *route)
882
{
883
mctp_dev_hold(route->dev);
884
dst->nexthop = eid;
885
dst->dev = route->dev;
886
dst->mtu = READ_ONCE(dst->dev->dev->mtu);
887
if (mtu)
888
dst->mtu = min(dst->mtu, mtu);
889
dst->halen = 0;
890
dst->output = route->output;
891
}
892
893
int mctp_dst_from_extaddr(struct mctp_dst *dst, struct net *net, int ifindex,
894
unsigned char halen, const unsigned char *haddr)
895
{
896
struct net_device *netdev;
897
struct mctp_dev *dev;
898
int rc = -ENOENT;
899
900
if (halen > sizeof(dst->haddr))
901
return -EINVAL;
902
903
rcu_read_lock();
904
905
netdev = dev_get_by_index_rcu(net, ifindex);
906
if (!netdev)
907
goto out_unlock;
908
909
if (netdev->addr_len != halen) {
910
rc = -EINVAL;
911
goto out_unlock;
912
}
913
914
dev = __mctp_dev_get(netdev);
915
if (!dev)
916
goto out_unlock;
917
918
dst->dev = dev;
919
dst->mtu = READ_ONCE(netdev->mtu);
920
dst->halen = halen;
921
dst->output = mctp_dst_output;
922
dst->nexthop = 0;
923
memcpy(dst->haddr, haddr, halen);
924
925
rc = 0;
926
927
out_unlock:
928
rcu_read_unlock();
929
return rc;
930
}
931
932
void mctp_dst_release(struct mctp_dst *dst)
933
{
934
mctp_dev_put(dst->dev);
935
}
936
937
static struct mctp_route *mctp_route_lookup_single(struct net *net,
938
unsigned int dnet,
939
mctp_eid_t daddr)
940
{
941
struct mctp_route *rt;
942
943
list_for_each_entry_rcu(rt, &net->mctp.routes, list) {
944
if (mctp_rt_match_eid(rt, dnet, daddr))
945
return rt;
946
}
947
948
return NULL;
949
}
950
951
/* populates *dst on successful lookup, if set */
952
int mctp_route_lookup(struct net *net, unsigned int dnet,
953
mctp_eid_t daddr, struct mctp_dst *dst)
954
{
955
const unsigned int max_depth = 32;
956
unsigned int depth, mtu = 0;
957
int rc = -EHOSTUNREACH;
958
959
rcu_read_lock();
960
961
for (depth = 0; depth < max_depth; depth++) {
962
struct mctp_route *rt;
963
964
rt = mctp_route_lookup_single(net, dnet, daddr);
965
if (!rt)
966
break;
967
968
/* clamp mtu to the smallest in the path, allowing 0
969
* to specify no restrictions
970
*/
971
if (mtu && rt->mtu)
972
mtu = min(mtu, rt->mtu);
973
else
974
mtu = mtu ?: rt->mtu;
975
976
if (rt->dst_type == MCTP_ROUTE_DIRECT) {
977
if (dst)
978
mctp_dst_from_route(dst, daddr, mtu, rt);
979
rc = 0;
980
break;
981
982
} else if (rt->dst_type == MCTP_ROUTE_GATEWAY) {
983
daddr = rt->gateway.eid;
984
}
985
}
986
987
rcu_read_unlock();
988
989
return rc;
990
}
991
992
static int mctp_route_lookup_null(struct net *net, struct net_device *dev,
993
struct mctp_dst *dst)
994
{
995
int rc = -EHOSTUNREACH;
996
struct mctp_route *rt;
997
998
rcu_read_lock();
999
1000
list_for_each_entry_rcu(rt, &net->mctp.routes, list) {
1001
if (rt->dst_type != MCTP_ROUTE_DIRECT || rt->type != RTN_LOCAL)
1002
continue;
1003
1004
if (rt->dev->dev != dev)
1005
continue;
1006
1007
mctp_dst_from_route(dst, 0, 0, rt);
1008
rc = 0;
1009
break;
1010
}
1011
1012
rcu_read_unlock();
1013
1014
return rc;
1015
}
1016
1017
static int mctp_do_fragment_route(struct mctp_dst *dst, struct sk_buff *skb,
1018
unsigned int mtu, u8 tag)
1019
{
1020
const unsigned int hlen = sizeof(struct mctp_hdr);
1021
struct mctp_hdr *hdr, *hdr2;
1022
unsigned int pos, size, headroom;
1023
struct sk_buff *skb2;
1024
int rc;
1025
u8 seq;
1026
1027
hdr = mctp_hdr(skb);
1028
seq = 0;
1029
rc = 0;
1030
1031
if (mtu < hlen + 1) {
1032
kfree_skb(skb);
1033
return -EMSGSIZE;
1034
}
1035
1036
/* keep same headroom as the original skb */
1037
headroom = skb_headroom(skb);
1038
1039
/* we've got the header */
1040
skb_pull(skb, hlen);
1041
1042
for (pos = 0; pos < skb->len;) {
1043
/* size of message payload */
1044
size = min(mtu - hlen, skb->len - pos);
1045
1046
skb2 = alloc_skb(headroom + hlen + size, GFP_KERNEL);
1047
if (!skb2) {
1048
rc = -ENOMEM;
1049
break;
1050
}
1051
1052
/* generic skb copy */
1053
skb2->protocol = skb->protocol;
1054
skb2->priority = skb->priority;
1055
skb2->dev = skb->dev;
1056
memcpy(skb2->cb, skb->cb, sizeof(skb2->cb));
1057
1058
if (skb->sk)
1059
skb_set_owner_w(skb2, skb->sk);
1060
1061
/* establish packet */
1062
skb_reserve(skb2, headroom);
1063
skb_reset_network_header(skb2);
1064
skb_put(skb2, hlen + size);
1065
skb2->transport_header = skb2->network_header + hlen;
1066
1067
/* copy header fields, calculate SOM/EOM flags & seq */
1068
hdr2 = mctp_hdr(skb2);
1069
hdr2->ver = hdr->ver;
1070
hdr2->dest = hdr->dest;
1071
hdr2->src = hdr->src;
1072
hdr2->flags_seq_tag = tag &
1073
(MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
1074
1075
if (pos == 0)
1076
hdr2->flags_seq_tag |= MCTP_HDR_FLAG_SOM;
1077
1078
if (pos + size == skb->len)
1079
hdr2->flags_seq_tag |= MCTP_HDR_FLAG_EOM;
1080
1081
hdr2->flags_seq_tag |= seq << MCTP_HDR_SEQ_SHIFT;
1082
1083
/* copy message payload */
1084
skb_copy_bits(skb, pos, skb_transport_header(skb2), size);
1085
1086
/* we need to copy the extensions, for MCTP flow data */
1087
skb_ext_copy(skb2, skb);
1088
1089
/* do route */
1090
rc = dst->output(dst, skb2);
1091
if (rc)
1092
break;
1093
1094
seq = (seq + 1) & MCTP_HDR_SEQ_MASK;
1095
pos += size;
1096
}
1097
1098
consume_skb(skb);
1099
return rc;
1100
}
1101
1102
int mctp_local_output(struct sock *sk, struct mctp_dst *dst,
1103
struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag)
1104
{
1105
struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
1106
struct mctp_sk_key *key;
1107
struct mctp_hdr *hdr;
1108
unsigned long flags;
1109
unsigned int netid;
1110
unsigned int mtu;
1111
mctp_eid_t saddr;
1112
int rc;
1113
u8 tag;
1114
1115
KUNIT_STATIC_STUB_REDIRECT(mctp_local_output, sk, dst, skb, daddr,
1116
req_tag);
1117
1118
rc = -ENODEV;
1119
1120
spin_lock_irqsave(&dst->dev->addrs_lock, flags);
1121
if (dst->dev->num_addrs == 0) {
1122
rc = -EHOSTUNREACH;
1123
} else {
1124
/* use the outbound interface's first address as our source */
1125
saddr = dst->dev->addrs[0];
1126
rc = 0;
1127
}
1128
spin_unlock_irqrestore(&dst->dev->addrs_lock, flags);
1129
netid = READ_ONCE(dst->dev->net);
1130
1131
if (rc)
1132
goto out_release;
1133
1134
if (req_tag & MCTP_TAG_OWNER) {
1135
if (req_tag & MCTP_TAG_PREALLOC)
1136
key = mctp_lookup_prealloc_tag(msk, netid, daddr,
1137
req_tag, &tag);
1138
else
1139
key = mctp_alloc_local_tag(msk, netid, saddr, daddr,
1140
false, &tag);
1141
1142
if (IS_ERR(key)) {
1143
rc = PTR_ERR(key);
1144
goto out_release;
1145
}
1146
mctp_skb_set_flow(skb, key);
1147
/* done with the key in this scope */
1148
mctp_key_unref(key);
1149
tag |= MCTP_HDR_FLAG_TO;
1150
} else {
1151
key = NULL;
1152
tag = req_tag & MCTP_TAG_MASK;
1153
}
1154
1155
skb->pkt_type = PACKET_OUTGOING;
1156
skb->protocol = htons(ETH_P_MCTP);
1157
skb->priority = 0;
1158
skb_reset_transport_header(skb);
1159
skb_push(skb, sizeof(struct mctp_hdr));
1160
skb_reset_network_header(skb);
1161
skb->dev = dst->dev->dev;
1162
1163
/* set up common header fields */
1164
hdr = mctp_hdr(skb);
1165
hdr->ver = 1;
1166
hdr->dest = daddr;
1167
hdr->src = saddr;
1168
1169
mtu = dst->mtu;
1170
1171
if (skb->len + sizeof(struct mctp_hdr) <= mtu) {
1172
hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM |
1173
MCTP_HDR_FLAG_EOM | tag;
1174
rc = dst->output(dst, skb);
1175
} else {
1176
rc = mctp_do_fragment_route(dst, skb, mtu, tag);
1177
}
1178
1179
/* route output functions consume the skb, even on error */
1180
skb = NULL;
1181
1182
out_release:
1183
kfree_skb(skb);
1184
return rc;
1185
}
1186
1187
/* route management */
1188
1189
/* mctp_route_add(): Add the provided route, previously allocated via
1190
* mctp_route_alloc(). On success, takes ownership of @rt, which includes a
1191
* hold on rt->dev for usage in the route table. On failure a caller will want
1192
* to mctp_route_release().
1193
*
1194
* We expect that the caller has set rt->type, rt->dst_type, rt->min, rt->max,
1195
* rt->mtu and either rt->dev (with a reference held appropriately) or
1196
* rt->gateway. Other fields will be populated.
1197
*/
1198
static int mctp_route_add(struct net *net, struct mctp_route *rt)
1199
{
1200
struct mctp_route *ert;
1201
1202
if (!mctp_address_unicast(rt->min) || !mctp_address_unicast(rt->max))
1203
return -EINVAL;
1204
1205
if (rt->dst_type == MCTP_ROUTE_DIRECT && !rt->dev)
1206
return -EINVAL;
1207
1208
if (rt->dst_type == MCTP_ROUTE_GATEWAY && !rt->gateway.eid)
1209
return -EINVAL;
1210
1211
switch (rt->type) {
1212
case RTN_LOCAL:
1213
rt->output = mctp_dst_input;
1214
break;
1215
case RTN_UNICAST:
1216
rt->output = mctp_dst_output;
1217
break;
1218
default:
1219
return -EINVAL;
1220
}
1221
1222
ASSERT_RTNL();
1223
1224
/* Prevent duplicate identical routes. */
1225
list_for_each_entry(ert, &net->mctp.routes, list) {
1226
if (mctp_rt_compare_exact(rt, ert)) {
1227
return -EEXIST;
1228
}
1229
}
1230
1231
list_add_rcu(&rt->list, &net->mctp.routes);
1232
1233
return 0;
1234
}
1235
1236
static int mctp_route_remove(struct net *net, unsigned int netid,
1237
mctp_eid_t daddr_start, unsigned int daddr_extent,
1238
unsigned char type)
1239
{
1240
struct mctp_route *rt, *tmp;
1241
mctp_eid_t daddr_end;
1242
bool dropped;
1243
1244
if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255)
1245
return -EINVAL;
1246
1247
daddr_end = daddr_start + daddr_extent;
1248
dropped = false;
1249
1250
ASSERT_RTNL();
1251
1252
list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) {
1253
if (mctp_route_netid(rt) == netid &&
1254
rt->min == daddr_start && rt->max == daddr_end &&
1255
rt->type == type) {
1256
list_del_rcu(&rt->list);
1257
/* TODO: immediate RTM_DELROUTE */
1258
mctp_route_release(rt);
1259
dropped = true;
1260
}
1261
}
1262
1263
return dropped ? 0 : -ENOENT;
1264
}
1265
1266
int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr)
1267
{
1268
struct mctp_route *rt;
1269
int rc;
1270
1271
rt = mctp_route_alloc();
1272
if (!rt)
1273
return -ENOMEM;
1274
1275
rt->min = addr;
1276
rt->max = addr;
1277
rt->dst_type = MCTP_ROUTE_DIRECT;
1278
rt->dev = mdev;
1279
rt->type = RTN_LOCAL;
1280
1281
mctp_dev_hold(rt->dev);
1282
1283
rc = mctp_route_add(dev_net(mdev->dev), rt);
1284
if (rc)
1285
mctp_route_release(rt);
1286
1287
return rc;
1288
}
1289
1290
int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr)
1291
{
1292
return mctp_route_remove(dev_net(mdev->dev), mdev->net,
1293
addr, 0, RTN_LOCAL);
1294
}
1295
1296
/* removes all entries for a given device */
1297
void mctp_route_remove_dev(struct mctp_dev *mdev)
1298
{
1299
struct net *net = dev_net(mdev->dev);
1300
struct mctp_route *rt, *tmp;
1301
1302
ASSERT_RTNL();
1303
list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) {
1304
if (rt->dst_type == MCTP_ROUTE_DIRECT && rt->dev == mdev) {
1305
list_del_rcu(&rt->list);
1306
/* TODO: immediate RTM_DELROUTE */
1307
mctp_route_release(rt);
1308
}
1309
}
1310
}
1311
1312
/* Incoming packet-handling */
1313
1314
static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev,
1315
struct packet_type *pt,
1316
struct net_device *orig_dev)
1317
{
1318
struct net *net = dev_net(dev);
1319
struct mctp_dev *mdev;
1320
struct mctp_skb_cb *cb;
1321
struct mctp_dst dst;
1322
struct mctp_hdr *mh;
1323
int rc;
1324
1325
rcu_read_lock();
1326
mdev = __mctp_dev_get(dev);
1327
rcu_read_unlock();
1328
if (!mdev) {
1329
/* basic non-data sanity checks */
1330
goto err_drop;
1331
}
1332
1333
if (!pskb_may_pull(skb, sizeof(struct mctp_hdr)))
1334
goto err_drop;
1335
1336
skb_reset_transport_header(skb);
1337
skb_reset_network_header(skb);
1338
1339
/* We have enough for a header; decode and route */
1340
mh = mctp_hdr(skb);
1341
if (mh->ver < MCTP_VER_MIN || mh->ver > MCTP_VER_MAX)
1342
goto err_drop;
1343
1344
/* source must be valid unicast or null; drop reserved ranges and
1345
* broadcast
1346
*/
1347
if (!(mctp_address_unicast(mh->src) || mctp_address_null(mh->src)))
1348
goto err_drop;
1349
1350
/* dest address: as above, but allow broadcast */
1351
if (!(mctp_address_unicast(mh->dest) || mctp_address_null(mh->dest) ||
1352
mctp_address_broadcast(mh->dest)))
1353
goto err_drop;
1354
1355
/* MCTP drivers must populate halen/haddr */
1356
if (dev->type == ARPHRD_MCTP) {
1357
cb = mctp_cb(skb);
1358
} else {
1359
cb = __mctp_cb(skb);
1360
cb->halen = 0;
1361
}
1362
cb->net = READ_ONCE(mdev->net);
1363
cb->ifindex = dev->ifindex;
1364
1365
rc = mctp_route_lookup(net, cb->net, mh->dest, &dst);
1366
1367
/* NULL EID, but addressed to our physical address */
1368
if (rc && mh->dest == MCTP_ADDR_NULL && skb->pkt_type == PACKET_HOST)
1369
rc = mctp_route_lookup_null(net, dev, &dst);
1370
1371
if (rc)
1372
goto err_drop;
1373
1374
dst.output(&dst, skb);
1375
mctp_dst_release(&dst);
1376
mctp_dev_put(mdev);
1377
1378
return NET_RX_SUCCESS;
1379
1380
err_drop:
1381
kfree_skb(skb);
1382
mctp_dev_put(mdev);
1383
return NET_RX_DROP;
1384
}
1385
1386
static struct packet_type mctp_packet_type = {
1387
.type = cpu_to_be16(ETH_P_MCTP),
1388
.func = mctp_pkttype_receive,
1389
};
1390
1391
/* netlink interface */
1392
1393
static const struct nla_policy rta_mctp_policy[RTA_MAX + 1] = {
1394
[RTA_DST] = { .type = NLA_U8 },
1395
[RTA_METRICS] = { .type = NLA_NESTED },
1396
[RTA_OIF] = { .type = NLA_U32 },
1397
[RTA_GATEWAY] = NLA_POLICY_EXACT_LEN(sizeof(struct mctp_fq_addr)),
1398
};
1399
1400
static const struct nla_policy rta_metrics_policy[RTAX_MAX + 1] = {
1401
[RTAX_MTU] = { .type = NLA_U32 },
1402
};
1403
1404
/* base parsing; common to both _lookup and _populate variants.
1405
*
1406
* For gateway routes (which have a RTA_GATEWAY, and no RTA_OIF), we populate
1407
* *gatweayp. for direct routes (RTA_OIF, no RTA_GATEWAY), we populate *mdev.
1408
*/
1409
static int mctp_route_nlparse_common(struct net *net, struct nlmsghdr *nlh,
1410
struct netlink_ext_ack *extack,
1411
struct nlattr **tb, struct rtmsg **rtm,
1412
struct mctp_dev **mdev,
1413
struct mctp_fq_addr *gatewayp,
1414
mctp_eid_t *daddr_start)
1415
{
1416
struct mctp_fq_addr *gateway = NULL;
1417
unsigned int ifindex = 0;
1418
struct net_device *dev;
1419
int rc;
1420
1421
rc = nlmsg_parse(nlh, sizeof(struct rtmsg), tb, RTA_MAX,
1422
rta_mctp_policy, extack);
1423
if (rc < 0) {
1424
NL_SET_ERR_MSG(extack, "incorrect format");
1425
return rc;
1426
}
1427
1428
if (!tb[RTA_DST]) {
1429
NL_SET_ERR_MSG(extack, "dst EID missing");
1430
return -EINVAL;
1431
}
1432
*daddr_start = nla_get_u8(tb[RTA_DST]);
1433
1434
if (tb[RTA_OIF])
1435
ifindex = nla_get_u32(tb[RTA_OIF]);
1436
1437
if (tb[RTA_GATEWAY])
1438
gateway = nla_data(tb[RTA_GATEWAY]);
1439
1440
if (ifindex && gateway) {
1441
NL_SET_ERR_MSG(extack,
1442
"cannot specify both ifindex and gateway");
1443
return -EINVAL;
1444
1445
} else if (ifindex) {
1446
dev = __dev_get_by_index(net, ifindex);
1447
if (!dev) {
1448
NL_SET_ERR_MSG(extack, "bad ifindex");
1449
return -ENODEV;
1450
}
1451
*mdev = mctp_dev_get_rtnl(dev);
1452
if (!*mdev)
1453
return -ENODEV;
1454
gatewayp->eid = 0;
1455
1456
} else if (gateway) {
1457
if (!mctp_address_unicast(gateway->eid)) {
1458
NL_SET_ERR_MSG(extack, "bad gateway");
1459
return -EINVAL;
1460
}
1461
1462
gatewayp->eid = gateway->eid;
1463
gatewayp->net = gateway->net != MCTP_NET_ANY ?
1464
gateway->net :
1465
READ_ONCE(net->mctp.default_net);
1466
*mdev = NULL;
1467
1468
} else {
1469
NL_SET_ERR_MSG(extack, "no route output provided");
1470
return -EINVAL;
1471
}
1472
1473
*rtm = nlmsg_data(nlh);
1474
if ((*rtm)->rtm_family != AF_MCTP) {
1475
NL_SET_ERR_MSG(extack, "route family must be AF_MCTP");
1476
return -EINVAL;
1477
}
1478
1479
if ((*rtm)->rtm_type != RTN_UNICAST) {
1480
NL_SET_ERR_MSG(extack, "rtm_type must be RTN_UNICAST");
1481
return -EINVAL;
1482
}
1483
1484
return 0;
1485
}
1486
1487
/* Route parsing for lookup operations; we only need the "route target"
1488
* components (ie., network and dest-EID range).
1489
*/
1490
static int mctp_route_nlparse_lookup(struct net *net, struct nlmsghdr *nlh,
1491
struct netlink_ext_ack *extack,
1492
unsigned char *type, unsigned int *netid,
1493
mctp_eid_t *daddr_start,
1494
unsigned int *daddr_extent)
1495
{
1496
struct nlattr *tb[RTA_MAX + 1];
1497
struct mctp_fq_addr gw;
1498
struct mctp_dev *mdev;
1499
struct rtmsg *rtm;
1500
int rc;
1501
1502
rc = mctp_route_nlparse_common(net, nlh, extack, tb, &rtm,
1503
&mdev, &gw, daddr_start);
1504
if (rc)
1505
return rc;
1506
1507
if (mdev) {
1508
*netid = mdev->net;
1509
} else if (gw.eid) {
1510
*netid = gw.net;
1511
} else {
1512
/* bug: _nlparse_common should not allow this */
1513
return -1;
1514
}
1515
1516
*type = rtm->rtm_type;
1517
*daddr_extent = rtm->rtm_dst_len;
1518
1519
return 0;
1520
}
1521
1522
/* Full route parse for RTM_NEWROUTE: populate @rt. On success,
1523
* MCTP_ROUTE_DIRECT routes (ie, those with a direct dev) will hold a reference
1524
* to that dev.
1525
*/
1526
static int mctp_route_nlparse_populate(struct net *net, struct nlmsghdr *nlh,
1527
struct netlink_ext_ack *extack,
1528
struct mctp_route *rt)
1529
{
1530
struct nlattr *tbx[RTAX_MAX + 1];
1531
struct nlattr *tb[RTA_MAX + 1];
1532
unsigned int daddr_extent;
1533
struct mctp_fq_addr gw;
1534
mctp_eid_t daddr_start;
1535
struct mctp_dev *dev;
1536
struct rtmsg *rtm;
1537
u32 mtu = 0;
1538
int rc;
1539
1540
rc = mctp_route_nlparse_common(net, nlh, extack, tb, &rtm,
1541
&dev, &gw, &daddr_start);
1542
if (rc)
1543
return rc;
1544
1545
daddr_extent = rtm->rtm_dst_len;
1546
1547
if (daddr_extent > 0xff || daddr_extent + daddr_start >= 255) {
1548
NL_SET_ERR_MSG(extack, "invalid eid range");
1549
return -EINVAL;
1550
}
1551
1552
if (tb[RTA_METRICS]) {
1553
rc = nla_parse_nested(tbx, RTAX_MAX, tb[RTA_METRICS],
1554
rta_metrics_policy, NULL);
1555
if (rc < 0) {
1556
NL_SET_ERR_MSG(extack, "incorrect RTA_METRICS format");
1557
return rc;
1558
}
1559
if (tbx[RTAX_MTU])
1560
mtu = nla_get_u32(tbx[RTAX_MTU]);
1561
}
1562
1563
rt->type = rtm->rtm_type;
1564
rt->min = daddr_start;
1565
rt->max = daddr_start + daddr_extent;
1566
rt->mtu = mtu;
1567
if (gw.eid) {
1568
rt->dst_type = MCTP_ROUTE_GATEWAY;
1569
rt->gateway.eid = gw.eid;
1570
rt->gateway.net = gw.net;
1571
} else {
1572
rt->dst_type = MCTP_ROUTE_DIRECT;
1573
rt->dev = dev;
1574
mctp_dev_hold(rt->dev);
1575
}
1576
1577
return 0;
1578
}
1579
1580
static int mctp_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
1581
struct netlink_ext_ack *extack)
1582
{
1583
struct net *net = sock_net(skb->sk);
1584
struct mctp_route *rt;
1585
int rc;
1586
1587
rt = mctp_route_alloc();
1588
if (!rt)
1589
return -ENOMEM;
1590
1591
rc = mctp_route_nlparse_populate(net, nlh, extack, rt);
1592
if (rc < 0)
1593
goto err_free;
1594
1595
if (rt->dst_type == MCTP_ROUTE_DIRECT &&
1596
rt->dev->dev->flags & IFF_LOOPBACK) {
1597
NL_SET_ERR_MSG(extack, "no routes to loopback");
1598
rc = -EINVAL;
1599
goto err_free;
1600
}
1601
1602
rc = mctp_route_add(net, rt);
1603
if (!rc)
1604
return 0;
1605
1606
err_free:
1607
mctp_route_release(rt);
1608
return rc;
1609
}
1610
1611
static int mctp_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
1612
struct netlink_ext_ack *extack)
1613
{
1614
struct net *net = sock_net(skb->sk);
1615
unsigned int netid, daddr_extent;
1616
unsigned char type = RTN_UNSPEC;
1617
mctp_eid_t daddr_start;
1618
int rc;
1619
1620
rc = mctp_route_nlparse_lookup(net, nlh, extack, &type, &netid,
1621
&daddr_start, &daddr_extent);
1622
if (rc < 0)
1623
return rc;
1624
1625
/* we only have unicast routes */
1626
if (type != RTN_UNICAST)
1627
return -EINVAL;
1628
1629
rc = mctp_route_remove(net, netid, daddr_start, daddr_extent, type);
1630
return rc;
1631
}
1632
1633
static int mctp_fill_rtinfo(struct sk_buff *skb, struct mctp_route *rt,
1634
u32 portid, u32 seq, int event, unsigned int flags)
1635
{
1636
struct nlmsghdr *nlh;
1637
struct rtmsg *hdr;
1638
void *metrics;
1639
1640
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags);
1641
if (!nlh)
1642
return -EMSGSIZE;
1643
1644
hdr = nlmsg_data(nlh);
1645
hdr->rtm_family = AF_MCTP;
1646
1647
/* we use the _len fields as a number of EIDs, rather than
1648
* a number of bits in the address
1649
*/
1650
hdr->rtm_dst_len = rt->max - rt->min;
1651
hdr->rtm_src_len = 0;
1652
hdr->rtm_tos = 0;
1653
hdr->rtm_table = RT_TABLE_DEFAULT;
1654
hdr->rtm_protocol = RTPROT_STATIC; /* everything is user-defined */
1655
hdr->rtm_type = rt->type;
1656
1657
if (nla_put_u8(skb, RTA_DST, rt->min))
1658
goto cancel;
1659
1660
metrics = nla_nest_start_noflag(skb, RTA_METRICS);
1661
if (!metrics)
1662
goto cancel;
1663
1664
if (rt->mtu) {
1665
if (nla_put_u32(skb, RTAX_MTU, rt->mtu))
1666
goto cancel;
1667
}
1668
1669
nla_nest_end(skb, metrics);
1670
1671
if (rt->dst_type == MCTP_ROUTE_DIRECT) {
1672
hdr->rtm_scope = RT_SCOPE_LINK;
1673
if (nla_put_u32(skb, RTA_OIF, rt->dev->dev->ifindex))
1674
goto cancel;
1675
} else if (rt->dst_type == MCTP_ROUTE_GATEWAY) {
1676
hdr->rtm_scope = RT_SCOPE_UNIVERSE;
1677
if (nla_put(skb, RTA_GATEWAY,
1678
sizeof(rt->gateway), &rt->gateway))
1679
goto cancel;
1680
}
1681
1682
nlmsg_end(skb, nlh);
1683
1684
return 0;
1685
1686
cancel:
1687
nlmsg_cancel(skb, nlh);
1688
return -EMSGSIZE;
1689
}
1690
1691
static int mctp_dump_rtinfo(struct sk_buff *skb, struct netlink_callback *cb)
1692
{
1693
struct net *net = sock_net(skb->sk);
1694
struct mctp_route *rt;
1695
int s_idx, idx;
1696
1697
/* TODO: allow filtering on route data, possibly under
1698
* cb->strict_check
1699
*/
1700
1701
/* TODO: change to struct overlay */
1702
s_idx = cb->args[0];
1703
idx = 0;
1704
1705
rcu_read_lock();
1706
list_for_each_entry_rcu(rt, &net->mctp.routes, list) {
1707
if (idx++ < s_idx)
1708
continue;
1709
if (mctp_fill_rtinfo(skb, rt,
1710
NETLINK_CB(cb->skb).portid,
1711
cb->nlh->nlmsg_seq,
1712
RTM_NEWROUTE, NLM_F_MULTI) < 0)
1713
break;
1714
}
1715
1716
rcu_read_unlock();
1717
cb->args[0] = idx;
1718
1719
return skb->len;
1720
}
1721
1722
/* net namespace implementation */
1723
static int __net_init mctp_routes_net_init(struct net *net)
1724
{
1725
struct netns_mctp *ns = &net->mctp;
1726
1727
INIT_LIST_HEAD(&ns->routes);
1728
hash_init(ns->binds);
1729
mutex_init(&ns->bind_lock);
1730
INIT_HLIST_HEAD(&ns->keys);
1731
spin_lock_init(&ns->keys_lock);
1732
WARN_ON(mctp_default_net_set(net, MCTP_INITIAL_DEFAULT_NET));
1733
return 0;
1734
}
1735
1736
static void __net_exit mctp_routes_net_exit(struct net *net)
1737
{
1738
struct mctp_route *rt;
1739
1740
rcu_read_lock();
1741
list_for_each_entry_rcu(rt, &net->mctp.routes, list)
1742
mctp_route_release(rt);
1743
rcu_read_unlock();
1744
}
1745
1746
static struct pernet_operations mctp_net_ops = {
1747
.init = mctp_routes_net_init,
1748
.exit = mctp_routes_net_exit,
1749
};
1750
1751
static const struct rtnl_msg_handler mctp_route_rtnl_msg_handlers[] = {
1752
{THIS_MODULE, PF_MCTP, RTM_NEWROUTE, mctp_newroute, NULL, 0},
1753
{THIS_MODULE, PF_MCTP, RTM_DELROUTE, mctp_delroute, NULL, 0},
1754
{THIS_MODULE, PF_MCTP, RTM_GETROUTE, NULL, mctp_dump_rtinfo, 0},
1755
};
1756
1757
int __init mctp_routes_init(void)
1758
{
1759
int err;
1760
1761
dev_add_pack(&mctp_packet_type);
1762
1763
err = register_pernet_subsys(&mctp_net_ops);
1764
if (err)
1765
goto err_pernet;
1766
1767
err = rtnl_register_many(mctp_route_rtnl_msg_handlers);
1768
if (err)
1769
goto err_rtnl;
1770
1771
return 0;
1772
1773
err_rtnl:
1774
unregister_pernet_subsys(&mctp_net_ops);
1775
err_pernet:
1776
dev_remove_pack(&mctp_packet_type);
1777
return err;
1778
}
1779
1780
void mctp_routes_exit(void)
1781
{
1782
rtnl_unregister_many(mctp_route_rtnl_msg_handlers);
1783
unregister_pernet_subsys(&mctp_net_ops);
1784
dev_remove_pack(&mctp_packet_type);
1785
}
1786
1787
#if IS_ENABLED(CONFIG_MCTP_TEST)
1788
#include "test/route-test.c"
1789
#endif
1790
1791