Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/net/mptcp/pm_kernel.c
49054 views
1
// SPDX-License-Identifier: GPL-2.0
2
/* Multipath TCP
3
*
4
* Copyright (c) 2025, Matthieu Baerts.
5
*/
6
7
#define pr_fmt(fmt) "MPTCP: " fmt
8
9
#include <net/netns/generic.h>
10
11
#include "protocol.h"
12
#include "mib.h"
13
#include "mptcp_pm_gen.h"
14
15
static int pm_nl_pernet_id;
16
17
struct pm_nl_pernet {
18
/* protects pernet updates */
19
spinlock_t lock;
20
struct list_head endp_list;
21
u8 endpoints;
22
u8 endp_signal_max;
23
u8 endp_subflow_max;
24
u8 endp_laminar_max;
25
u8 endp_fullmesh_max;
26
u8 limit_add_addr_accepted;
27
u8 limit_extra_subflows;
28
u8 next_id;
29
DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
30
};
31
32
#define MPTCP_PM_ADDR_MAX 8
33
34
static struct pm_nl_pernet *pm_nl_get_pernet(const struct net *net)
35
{
36
return net_generic(net, pm_nl_pernet_id);
37
}
38
39
static struct pm_nl_pernet *
40
pm_nl_get_pernet_from_msk(const struct mptcp_sock *msk)
41
{
42
return pm_nl_get_pernet(sock_net((struct sock *)msk));
43
}
44
45
static struct pm_nl_pernet *genl_info_pm_nl(struct genl_info *info)
46
{
47
return pm_nl_get_pernet(genl_info_net(info));
48
}
49
50
u8 mptcp_pm_get_endp_signal_max(const struct mptcp_sock *msk)
51
{
52
const struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
53
54
return READ_ONCE(pernet->endp_signal_max);
55
}
56
EXPORT_SYMBOL_GPL(mptcp_pm_get_endp_signal_max);
57
58
u8 mptcp_pm_get_endp_subflow_max(const struct mptcp_sock *msk)
59
{
60
struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
61
62
return READ_ONCE(pernet->endp_subflow_max);
63
}
64
EXPORT_SYMBOL_GPL(mptcp_pm_get_endp_subflow_max);
65
66
u8 mptcp_pm_get_endp_laminar_max(const struct mptcp_sock *msk)
67
{
68
struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
69
70
return READ_ONCE(pernet->endp_laminar_max);
71
}
72
EXPORT_SYMBOL_GPL(mptcp_pm_get_endp_laminar_max);
73
74
u8 mptcp_pm_get_endp_fullmesh_max(const struct mptcp_sock *msk)
75
{
76
struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
77
78
return READ_ONCE(pernet->endp_fullmesh_max);
79
}
80
EXPORT_SYMBOL_GPL(mptcp_pm_get_endp_fullmesh_max);
81
82
u8 mptcp_pm_get_limit_add_addr_accepted(const struct mptcp_sock *msk)
83
{
84
struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
85
86
return READ_ONCE(pernet->limit_add_addr_accepted);
87
}
88
EXPORT_SYMBOL_GPL(mptcp_pm_get_limit_add_addr_accepted);
89
90
u8 mptcp_pm_get_limit_extra_subflows(const struct mptcp_sock *msk)
91
{
92
struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
93
94
return READ_ONCE(pernet->limit_extra_subflows);
95
}
96
EXPORT_SYMBOL_GPL(mptcp_pm_get_limit_extra_subflows);
97
98
static bool lookup_subflow_by_daddr(const struct list_head *list,
99
const struct mptcp_addr_info *daddr)
100
{
101
struct mptcp_subflow_context *subflow;
102
struct mptcp_addr_info cur;
103
104
list_for_each_entry(subflow, list, node) {
105
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
106
107
if (!((1 << inet_sk_state_load(ssk)) &
108
(TCPF_ESTABLISHED | TCPF_SYN_SENT | TCPF_SYN_RECV)))
109
continue;
110
111
mptcp_remote_address((struct sock_common *)ssk, &cur);
112
if (mptcp_addresses_equal(&cur, daddr, daddr->port))
113
return true;
114
}
115
116
return false;
117
}
118
119
static bool
120
select_local_address(const struct pm_nl_pernet *pernet,
121
const struct mptcp_sock *msk,
122
struct mptcp_pm_local *new_local)
123
{
124
struct mptcp_pm_addr_entry *entry;
125
bool found = false;
126
127
msk_owned_by_me(msk);
128
129
rcu_read_lock();
130
list_for_each_entry_rcu(entry, &pernet->endp_list, list) {
131
if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW))
132
continue;
133
134
if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap))
135
continue;
136
137
new_local->addr = entry->addr;
138
new_local->flags = entry->flags;
139
new_local->ifindex = entry->ifindex;
140
found = true;
141
break;
142
}
143
rcu_read_unlock();
144
145
return found;
146
}
147
148
static bool
149
select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk,
150
struct mptcp_pm_local *new_local)
151
{
152
struct mptcp_pm_addr_entry *entry;
153
bool found = false;
154
155
rcu_read_lock();
156
/* do not keep any additional per socket state, just signal
157
* the address list in order.
158
* Note: removal from the local address list during the msk life-cycle
159
* can lead to additional addresses not being announced.
160
*/
161
list_for_each_entry_rcu(entry, &pernet->endp_list, list) {
162
if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap))
163
continue;
164
165
if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL))
166
continue;
167
168
new_local->addr = entry->addr;
169
new_local->flags = entry->flags;
170
new_local->ifindex = entry->ifindex;
171
found = true;
172
break;
173
}
174
rcu_read_unlock();
175
176
return found;
177
}
178
179
static unsigned int
180
fill_remote_addr(struct mptcp_sock *msk, struct mptcp_addr_info *local,
181
struct mptcp_addr_info *addrs)
182
{
183
bool deny_id0 = READ_ONCE(msk->pm.remote_deny_join_id0);
184
struct mptcp_addr_info remote = { 0 };
185
struct sock *sk = (struct sock *)msk;
186
187
if (deny_id0)
188
return 0;
189
190
mptcp_remote_address((struct sock_common *)sk, &remote);
191
192
if (!mptcp_pm_addr_families_match(sk, local, &remote))
193
return 0;
194
195
msk->pm.extra_subflows++;
196
*addrs = remote;
197
198
return 1;
199
}
200
201
static unsigned int
202
fill_remote_addresses_fullmesh(struct mptcp_sock *msk,
203
struct mptcp_addr_info *local,
204
struct mptcp_addr_info *addrs)
205
{
206
u8 limit_extra_subflows = mptcp_pm_get_limit_extra_subflows(msk);
207
bool deny_id0 = READ_ONCE(msk->pm.remote_deny_join_id0);
208
DECLARE_BITMAP(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1);
209
struct sock *sk = (struct sock *)msk, *ssk;
210
struct mptcp_subflow_context *subflow;
211
int i = 0;
212
213
/* Forbid creation of new subflows matching existing ones, possibly
214
* already created by incoming ADD_ADDR
215
*/
216
bitmap_zero(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1);
217
mptcp_for_each_subflow(msk, subflow)
218
if (READ_ONCE(subflow->local_id) == local->id)
219
__set_bit(subflow->remote_id, unavail_id);
220
221
mptcp_for_each_subflow(msk, subflow) {
222
ssk = mptcp_subflow_tcp_sock(subflow);
223
mptcp_remote_address((struct sock_common *)ssk, &addrs[i]);
224
addrs[i].id = READ_ONCE(subflow->remote_id);
225
if (deny_id0 && !addrs[i].id)
226
continue;
227
228
if (test_bit(addrs[i].id, unavail_id))
229
continue;
230
231
if (!mptcp_pm_addr_families_match(sk, local, &addrs[i]))
232
continue;
233
234
/* forbid creating multiple address towards this id */
235
__set_bit(addrs[i].id, unavail_id);
236
msk->pm.extra_subflows++;
237
i++;
238
239
if (msk->pm.extra_subflows >= limit_extra_subflows)
240
break;
241
}
242
243
return i;
244
}
245
246
/* Fill all the remote addresses into the array addrs[],
247
* and return the array size.
248
*/
249
static unsigned int
250
fill_remote_addresses_vec(struct mptcp_sock *msk, struct mptcp_addr_info *local,
251
bool fullmesh, struct mptcp_addr_info *addrs)
252
{
253
/* Non-fullmesh: fill in the single entry corresponding to the primary
254
* MPC subflow remote address, and return 1, corresponding to 1 entry.
255
*/
256
if (!fullmesh)
257
return fill_remote_addr(msk, local, addrs);
258
259
/* Fullmesh endpoint: fill all possible remote addresses */
260
return fill_remote_addresses_fullmesh(msk, local, addrs);
261
}
262
263
static struct mptcp_pm_addr_entry *
264
__lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id)
265
{
266
struct mptcp_pm_addr_entry *entry;
267
268
list_for_each_entry_rcu(entry, &pernet->endp_list, list,
269
lockdep_is_held(&pernet->lock)) {
270
if (entry->addr.id == id)
271
return entry;
272
}
273
return NULL;
274
}
275
276
static struct mptcp_pm_addr_entry *
277
__lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info)
278
{
279
struct mptcp_pm_addr_entry *entry;
280
281
list_for_each_entry_rcu(entry, &pernet->endp_list, list,
282
lockdep_is_held(&pernet->lock)) {
283
if (mptcp_addresses_equal(&entry->addr, info, entry->addr.port))
284
return entry;
285
}
286
return NULL;
287
}
288
289
static u8 mptcp_endp_get_local_id(struct mptcp_sock *msk,
290
const struct mptcp_addr_info *addr)
291
{
292
return msk->mpc_endpoint_id == addr->id ? 0 : addr->id;
293
}
294
295
/* Set mpc_endpoint_id, and send MP_PRIO for ID0 if needed */
296
static void mptcp_mpc_endpoint_setup(struct mptcp_sock *msk)
297
{
298
struct mptcp_subflow_context *subflow;
299
struct mptcp_pm_addr_entry *entry;
300
struct mptcp_addr_info mpc_addr;
301
struct pm_nl_pernet *pernet;
302
bool backup = false;
303
304
/* do lazy endpoint usage accounting for the MPC subflows */
305
if (likely(msk->pm.status & BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED)) ||
306
!msk->first)
307
return;
308
309
subflow = mptcp_subflow_ctx(msk->first);
310
pernet = pm_nl_get_pernet_from_msk(msk);
311
312
mptcp_local_address((struct sock_common *)msk->first, &mpc_addr);
313
rcu_read_lock();
314
entry = __lookup_addr(pernet, &mpc_addr);
315
if (entry) {
316
__clear_bit(entry->addr.id, msk->pm.id_avail_bitmap);
317
msk->mpc_endpoint_id = entry->addr.id;
318
backup = !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP);
319
}
320
rcu_read_unlock();
321
322
/* Send MP_PRIO */
323
if (backup)
324
mptcp_pm_send_ack(msk, subflow, true, backup);
325
326
msk->pm.status |= BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED);
327
}
328
329
static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
330
{
331
u8 limit_extra_subflows = mptcp_pm_get_limit_extra_subflows(msk);
332
struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
333
u8 endp_subflow_max = mptcp_pm_get_endp_subflow_max(msk);
334
u8 endp_signal_max = mptcp_pm_get_endp_signal_max(msk);
335
struct sock *sk = (struct sock *)msk;
336
bool signal_and_subflow = false;
337
struct mptcp_pm_local local;
338
339
mptcp_mpc_endpoint_setup(msk);
340
if (!mptcp_is_fully_established(sk))
341
return;
342
343
pr_debug("local %d:%d signal %d:%d subflows %d:%d\n",
344
msk->pm.local_addr_used, endp_subflow_max,
345
msk->pm.add_addr_signaled, endp_signal_max,
346
msk->pm.extra_subflows, limit_extra_subflows);
347
348
/* check first for announce */
349
if (msk->pm.add_addr_signaled < endp_signal_max) {
350
/* due to racing events on both ends we can reach here while
351
* previous add address is still running: if we invoke now
352
* mptcp_pm_announce_addr(), that will fail and the
353
* corresponding id will be marked as used.
354
* Instead let the PM machinery reschedule us when the
355
* current address announce will be completed.
356
*/
357
if (msk->pm.addr_signal & BIT(MPTCP_ADD_ADDR_SIGNAL))
358
return;
359
360
if (!select_signal_address(pernet, msk, &local))
361
goto subflow;
362
363
/* If the alloc fails, we are on memory pressure, not worth
364
* continuing, and trying to create subflows.
365
*/
366
if (!mptcp_pm_alloc_anno_list(msk, &local.addr))
367
return;
368
369
__clear_bit(local.addr.id, msk->pm.id_avail_bitmap);
370
msk->pm.add_addr_signaled++;
371
372
/* Special case for ID0: set the correct ID */
373
if (local.addr.id == msk->mpc_endpoint_id)
374
local.addr.id = 0;
375
376
mptcp_pm_announce_addr(msk, &local.addr, false);
377
mptcp_pm_addr_send_ack(msk);
378
379
if (local.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)
380
signal_and_subflow = true;
381
}
382
383
subflow:
384
/* No need to try establishing subflows to remote id0 if not allowed */
385
if (mptcp_pm_add_addr_c_flag_case(msk))
386
goto exit;
387
388
/* check if should create a new subflow */
389
while (msk->pm.local_addr_used < endp_subflow_max &&
390
msk->pm.extra_subflows < limit_extra_subflows) {
391
struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX];
392
bool fullmesh;
393
int i, nr;
394
395
if (signal_and_subflow)
396
signal_and_subflow = false;
397
else if (!select_local_address(pernet, msk, &local))
398
break;
399
400
fullmesh = !!(local.flags & MPTCP_PM_ADDR_FLAG_FULLMESH);
401
402
__clear_bit(local.addr.id, msk->pm.id_avail_bitmap);
403
404
/* Special case for ID0: set the correct ID */
405
if (local.addr.id == msk->mpc_endpoint_id)
406
local.addr.id = 0;
407
else /* local_addr_used is not decr for ID 0 */
408
msk->pm.local_addr_used++;
409
410
nr = fill_remote_addresses_vec(msk, &local.addr, fullmesh, addrs);
411
if (nr == 0)
412
continue;
413
414
spin_unlock_bh(&msk->pm.lock);
415
for (i = 0; i < nr; i++)
416
__mptcp_subflow_connect(sk, &local, &addrs[i]);
417
spin_lock_bh(&msk->pm.lock);
418
}
419
420
exit:
421
mptcp_pm_nl_check_work_pending(msk);
422
}
423
424
static void mptcp_pm_nl_fully_established(struct mptcp_sock *msk)
425
{
426
mptcp_pm_create_subflow_or_signal_addr(msk);
427
}
428
429
static void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk)
430
{
431
mptcp_pm_create_subflow_or_signal_addr(msk);
432
}
433
434
static unsigned int
435
fill_local_addresses_vec_fullmesh(struct mptcp_sock *msk,
436
struct mptcp_addr_info *remote,
437
struct mptcp_pm_local *locals,
438
bool c_flag_case)
439
{
440
u8 limit_extra_subflows = mptcp_pm_get_limit_extra_subflows(msk);
441
struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
442
struct sock *sk = (struct sock *)msk;
443
struct mptcp_pm_addr_entry *entry;
444
struct mptcp_pm_local *local;
445
int i = 0;
446
447
rcu_read_lock();
448
list_for_each_entry_rcu(entry, &pernet->endp_list, list) {
449
bool is_id0;
450
451
if (!(entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH))
452
continue;
453
454
if (!mptcp_pm_addr_families_match(sk, &entry->addr, remote))
455
continue;
456
457
local = &locals[i];
458
local->addr = entry->addr;
459
local->flags = entry->flags;
460
local->ifindex = entry->ifindex;
461
462
is_id0 = local->addr.id == msk->mpc_endpoint_id;
463
464
if (c_flag_case &&
465
(entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)) {
466
__clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
467
468
if (!is_id0)
469
msk->pm.local_addr_used++;
470
}
471
472
/* Special case for ID0: set the correct ID */
473
if (is_id0)
474
local->addr.id = 0;
475
476
msk->pm.extra_subflows++;
477
i++;
478
479
if (msk->pm.extra_subflows >= limit_extra_subflows)
480
break;
481
}
482
rcu_read_unlock();
483
484
return i;
485
}
486
487
static unsigned int
488
fill_local_laminar_endp(struct mptcp_sock *msk, struct mptcp_addr_info *remote,
489
struct mptcp_pm_local *locals)
490
{
491
struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
492
DECLARE_BITMAP(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1);
493
struct mptcp_subflow_context *subflow;
494
struct sock *sk = (struct sock *)msk;
495
struct mptcp_pm_addr_entry *entry;
496
struct mptcp_pm_local *local;
497
int found = 0;
498
499
/* Forbid creation of new subflows matching existing ones, possibly
500
* already created by 'subflow' endpoints
501
*/
502
bitmap_zero(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1);
503
mptcp_for_each_subflow(msk, subflow) {
504
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
505
506
if ((1 << inet_sk_state_load(ssk)) &
507
(TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | TCPF_CLOSING |
508
TCPF_CLOSE))
509
continue;
510
511
__set_bit(subflow_get_local_id(subflow), unavail_id);
512
}
513
514
rcu_read_lock();
515
list_for_each_entry_rcu(entry, &pernet->endp_list, list) {
516
if (!(entry->flags & MPTCP_PM_ADDR_FLAG_LAMINAR))
517
continue;
518
519
if (!mptcp_pm_addr_families_match(sk, &entry->addr, remote))
520
continue;
521
522
if (test_bit(mptcp_endp_get_local_id(msk, &entry->addr),
523
unavail_id))
524
continue;
525
526
local = &locals[0];
527
local->addr = entry->addr;
528
local->flags = entry->flags;
529
local->ifindex = entry->ifindex;
530
531
if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) {
532
__clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
533
534
if (local->addr.id != msk->mpc_endpoint_id)
535
msk->pm.local_addr_used++;
536
}
537
538
msk->pm.extra_subflows++;
539
found = 1;
540
break;
541
}
542
rcu_read_unlock();
543
544
return found;
545
}
546
547
static unsigned int
548
fill_local_addresses_vec_c_flag(struct mptcp_sock *msk,
549
struct mptcp_addr_info *remote,
550
struct mptcp_pm_local *locals)
551
{
552
u8 limit_extra_subflows = mptcp_pm_get_limit_extra_subflows(msk);
553
struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
554
u8 endp_subflow_max = mptcp_pm_get_endp_subflow_max(msk);
555
struct sock *sk = (struct sock *)msk;
556
struct mptcp_pm_local *local;
557
int i = 0;
558
559
while (msk->pm.local_addr_used < endp_subflow_max) {
560
local = &locals[i];
561
562
if (!select_local_address(pernet, msk, local))
563
break;
564
565
__clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
566
567
if (!mptcp_pm_addr_families_match(sk, &local->addr, remote))
568
continue;
569
570
if (local->addr.id == msk->mpc_endpoint_id)
571
continue;
572
573
msk->pm.local_addr_used++;
574
msk->pm.extra_subflows++;
575
i++;
576
577
if (msk->pm.extra_subflows >= limit_extra_subflows)
578
break;
579
}
580
581
return i;
582
}
583
584
static unsigned int
585
fill_local_address_any(struct mptcp_sock *msk, struct mptcp_addr_info *remote,
586
struct mptcp_pm_local *local)
587
{
588
struct sock *sk = (struct sock *)msk;
589
590
memset(local, 0, sizeof(*local));
591
local->addr.family =
592
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
593
remote->family == AF_INET6 &&
594
ipv6_addr_v4mapped(&remote->addr6) ? AF_INET :
595
#endif
596
remote->family;
597
598
if (!mptcp_pm_addr_families_match(sk, &local->addr, remote))
599
return 0;
600
601
msk->pm.extra_subflows++;
602
603
return 1;
604
}
605
606
/* Fill all the local addresses into the array addrs[],
607
* and return the array size.
608
*/
609
static unsigned int
610
fill_local_addresses_vec(struct mptcp_sock *msk, struct mptcp_addr_info *remote,
611
struct mptcp_pm_local *locals)
612
{
613
bool c_flag_case = remote->id && mptcp_pm_add_addr_c_flag_case(msk);
614
615
/* If there is at least one MPTCP endpoint with a fullmesh flag */
616
if (mptcp_pm_get_endp_fullmesh_max(msk))
617
return fill_local_addresses_vec_fullmesh(msk, remote, locals,
618
c_flag_case);
619
620
/* If there is at least one MPTCP endpoint with a laminar flag */
621
if (mptcp_pm_get_endp_laminar_max(msk))
622
return fill_local_laminar_endp(msk, remote, locals);
623
624
/* Special case: peer sets the C flag, accept one ADD_ADDR if default
625
* limits are used -- accepting no ADD_ADDR -- and use subflow endpoints
626
*/
627
if (c_flag_case)
628
return fill_local_addresses_vec_c_flag(msk, remote, locals);
629
630
/* No special case: fill in the single 'IPADDRANY' local address */
631
return fill_local_address_any(msk, remote, &locals[0]);
632
}
633
634
static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
635
{
636
u8 limit_add_addr_accepted = mptcp_pm_get_limit_add_addr_accepted(msk);
637
u8 limit_extra_subflows = mptcp_pm_get_limit_extra_subflows(msk);
638
struct mptcp_pm_local locals[MPTCP_PM_ADDR_MAX];
639
struct sock *sk = (struct sock *)msk;
640
struct mptcp_addr_info remote;
641
bool sf_created = false;
642
int i, nr;
643
644
pr_debug("accepted %d:%d remote family %d\n",
645
msk->pm.add_addr_accepted, limit_add_addr_accepted,
646
msk->pm.remote.family);
647
648
remote = msk->pm.remote;
649
mptcp_pm_announce_addr(msk, &remote, true);
650
mptcp_pm_addr_send_ack(msk);
651
mptcp_mpc_endpoint_setup(msk);
652
653
if (lookup_subflow_by_daddr(&msk->conn_list, &remote))
654
return;
655
656
/* pick id 0 port, if none is provided the remote address */
657
if (!remote.port)
658
remote.port = sk->sk_dport;
659
660
/* connect to the specified remote address, using whatever
661
* local address the routing configuration will pick.
662
*/
663
nr = fill_local_addresses_vec(msk, &remote, locals);
664
if (nr == 0)
665
return;
666
667
spin_unlock_bh(&msk->pm.lock);
668
for (i = 0; i < nr; i++)
669
if (__mptcp_subflow_connect(sk, &locals[i], &remote) == 0)
670
sf_created = true;
671
spin_lock_bh(&msk->pm.lock);
672
673
if (sf_created) {
674
/* add_addr_accepted is not decr for ID 0 */
675
if (remote.id)
676
msk->pm.add_addr_accepted++;
677
if (msk->pm.add_addr_accepted >= limit_add_addr_accepted ||
678
msk->pm.extra_subflows >= limit_extra_subflows)
679
WRITE_ONCE(msk->pm.accept_addr, false);
680
}
681
}
682
683
void mptcp_pm_nl_rm_addr(struct mptcp_sock *msk, u8 rm_id)
684
{
685
if (rm_id && !WARN_ON_ONCE(msk->pm.add_addr_accepted == 0)) {
686
u8 limit_add_addr_accepted =
687
mptcp_pm_get_limit_add_addr_accepted(msk);
688
689
/* Note: if the subflow has been closed before, this
690
* add_addr_accepted counter will not be decremented.
691
*/
692
if (--msk->pm.add_addr_accepted < limit_add_addr_accepted)
693
WRITE_ONCE(msk->pm.accept_addr, true);
694
}
695
}
696
697
static bool address_use_port(struct mptcp_pm_addr_entry *entry)
698
{
699
return (entry->flags &
700
(MPTCP_PM_ADDR_FLAG_SIGNAL | MPTCP_PM_ADDR_FLAG_SUBFLOW)) ==
701
MPTCP_PM_ADDR_FLAG_SIGNAL;
702
}
703
704
/* caller must ensure the RCU grace period is already elapsed */
705
static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry)
706
{
707
if (entry->lsk)
708
sock_release(entry->lsk);
709
kfree(entry);
710
}
711
712
static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
713
struct mptcp_pm_addr_entry *entry,
714
bool needs_id, bool replace)
715
{
716
struct mptcp_pm_addr_entry *cur, *del_entry = NULL;
717
int ret = -EINVAL;
718
u8 addr_max;
719
720
spin_lock_bh(&pernet->lock);
721
/* to keep the code simple, don't do IDR-like allocation for address ID,
722
* just bail when we exceed limits
723
*/
724
if (pernet->next_id == MPTCP_PM_MAX_ADDR_ID)
725
pernet->next_id = 1;
726
if (pernet->endpoints >= MPTCP_PM_ADDR_MAX) {
727
ret = -ERANGE;
728
goto out;
729
}
730
if (test_bit(entry->addr.id, pernet->id_bitmap)) {
731
ret = -EBUSY;
732
goto out;
733
}
734
735
/* do not insert duplicate address, differentiate on port only
736
* singled addresses
737
*/
738
if (!address_use_port(entry))
739
entry->addr.port = 0;
740
list_for_each_entry(cur, &pernet->endp_list, list) {
741
if (mptcp_addresses_equal(&cur->addr, &entry->addr,
742
cur->addr.port || entry->addr.port)) {
743
/* allow replacing the exiting endpoint only if such
744
* endpoint is an implicit one and the user-space
745
* did not provide an endpoint id
746
*/
747
if (!(cur->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT)) {
748
ret = -EEXIST;
749
goto out;
750
}
751
if (entry->addr.id)
752
goto out;
753
754
/* allow callers that only need to look up the local
755
* addr's id to skip replacement. This allows them to
756
* avoid calling synchronize_rcu in the packet recv
757
* path.
758
*/
759
if (!replace) {
760
kfree(entry);
761
ret = cur->addr.id;
762
goto out;
763
}
764
765
pernet->endpoints--;
766
entry->addr.id = cur->addr.id;
767
list_del_rcu(&cur->list);
768
del_entry = cur;
769
break;
770
}
771
}
772
773
if (!entry->addr.id && needs_id) {
774
find_next:
775
entry->addr.id = find_next_zero_bit(pernet->id_bitmap,
776
MPTCP_PM_MAX_ADDR_ID + 1,
777
pernet->next_id);
778
if (!entry->addr.id && pernet->next_id != 1) {
779
pernet->next_id = 1;
780
goto find_next;
781
}
782
}
783
784
if (!entry->addr.id && needs_id)
785
goto out;
786
787
__set_bit(entry->addr.id, pernet->id_bitmap);
788
if (entry->addr.id > pernet->next_id)
789
pernet->next_id = entry->addr.id;
790
791
if (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL) {
792
addr_max = pernet->endp_signal_max;
793
WRITE_ONCE(pernet->endp_signal_max, addr_max + 1);
794
}
795
if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) {
796
addr_max = pernet->endp_subflow_max;
797
WRITE_ONCE(pernet->endp_subflow_max, addr_max + 1);
798
}
799
if (entry->flags & MPTCP_PM_ADDR_FLAG_LAMINAR) {
800
addr_max = pernet->endp_laminar_max;
801
WRITE_ONCE(pernet->endp_laminar_max, addr_max + 1);
802
}
803
if (entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH) {
804
addr_max = pernet->endp_fullmesh_max;
805
WRITE_ONCE(pernet->endp_fullmesh_max, addr_max + 1);
806
}
807
808
pernet->endpoints++;
809
if (!entry->addr.port)
810
list_add_tail_rcu(&entry->list, &pernet->endp_list);
811
else
812
list_add_rcu(&entry->list, &pernet->endp_list);
813
ret = entry->addr.id;
814
815
out:
816
spin_unlock_bh(&pernet->lock);
817
818
/* just replaced an existing entry, free it */
819
if (del_entry) {
820
synchronize_rcu();
821
__mptcp_pm_release_addr_entry(del_entry);
822
}
823
return ret;
824
}
825
826
static struct lock_class_key mptcp_slock_keys[2];
827
static struct lock_class_key mptcp_keys[2];
828
829
static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
830
struct mptcp_pm_addr_entry *entry)
831
{
832
bool is_ipv6 = sk->sk_family == AF_INET6;
833
int addrlen = sizeof(struct sockaddr_in);
834
struct sockaddr_storage addr;
835
struct sock *newsk, *ssk;
836
int backlog = 1024;
837
int err;
838
839
err = sock_create_kern(sock_net(sk), entry->addr.family,
840
SOCK_STREAM, IPPROTO_MPTCP, &entry->lsk);
841
if (err)
842
return err;
843
844
newsk = entry->lsk->sk;
845
if (!newsk)
846
return -EINVAL;
847
848
/* The subflow socket lock is acquired in a nested to the msk one
849
* in several places, even by the TCP stack, and this msk is a kernel
850
* socket: lockdep complains. Instead of propagating the _nested
851
* modifiers in several places, re-init the lock class for the msk
852
* socket to an mptcp specific one.
853
*/
854
sock_lock_init_class_and_name(newsk,
855
is_ipv6 ? "mlock-AF_INET6" : "mlock-AF_INET",
856
&mptcp_slock_keys[is_ipv6],
857
is_ipv6 ? "msk_lock-AF_INET6" : "msk_lock-AF_INET",
858
&mptcp_keys[is_ipv6]);
859
860
lock_sock(newsk);
861
ssk = __mptcp_nmpc_sk(mptcp_sk(newsk));
862
release_sock(newsk);
863
if (IS_ERR(ssk))
864
return PTR_ERR(ssk);
865
866
mptcp_info2sockaddr(&entry->addr, &addr, entry->addr.family);
867
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
868
if (entry->addr.family == AF_INET6)
869
addrlen = sizeof(struct sockaddr_in6);
870
#endif
871
if (ssk->sk_family == AF_INET)
872
err = inet_bind_sk(ssk, (struct sockaddr_unsized *)&addr, addrlen);
873
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
874
else if (ssk->sk_family == AF_INET6)
875
err = inet6_bind_sk(ssk, (struct sockaddr_unsized *)&addr, addrlen);
876
#endif
877
if (err)
878
return err;
879
880
/* We don't use mptcp_set_state() here because it needs to be called
881
* under the msk socket lock. For the moment, that will not bring
882
* anything more than only calling inet_sk_state_store(), because the
883
* old status is known (TCP_CLOSE).
884
*/
885
inet_sk_state_store(newsk, TCP_LISTEN);
886
lock_sock(ssk);
887
WRITE_ONCE(mptcp_subflow_ctx(ssk)->pm_listener, true);
888
err = __inet_listen_sk(ssk, backlog);
889
if (!err)
890
mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CREATED);
891
release_sock(ssk);
892
return err;
893
}
894
895
int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk,
896
struct mptcp_pm_addr_entry *skc)
897
{
898
struct mptcp_pm_addr_entry *entry;
899
struct pm_nl_pernet *pernet;
900
int ret;
901
902
pernet = pm_nl_get_pernet_from_msk(msk);
903
904
rcu_read_lock();
905
entry = __lookup_addr(pernet, &skc->addr);
906
ret = entry ? entry->addr.id : -1;
907
rcu_read_unlock();
908
if (ret >= 0)
909
return ret;
910
911
/* address not found, add to local list */
912
entry = kmemdup(skc, sizeof(*skc), GFP_ATOMIC);
913
if (!entry)
914
return -ENOMEM;
915
916
entry->addr.port = 0;
917
ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true, false);
918
if (ret < 0)
919
kfree(entry);
920
921
return ret;
922
}
923
924
bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc)
925
{
926
struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
927
struct mptcp_pm_addr_entry *entry;
928
bool backup;
929
930
rcu_read_lock();
931
entry = __lookup_addr(pernet, skc);
932
backup = entry && !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP);
933
rcu_read_unlock();
934
935
return backup;
936
}
937
938
static int mptcp_nl_add_subflow_or_signal_addr(struct net *net,
939
struct mptcp_addr_info *addr)
940
{
941
struct mptcp_sock *msk;
942
long s_slot = 0, s_num = 0;
943
944
while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
945
struct sock *sk = (struct sock *)msk;
946
struct mptcp_addr_info mpc_addr;
947
948
if (!READ_ONCE(msk->fully_established) ||
949
mptcp_pm_is_userspace(msk))
950
goto next;
951
952
/* if the endp linked to the init sf is re-added with a != ID */
953
mptcp_local_address((struct sock_common *)msk, &mpc_addr);
954
955
lock_sock(sk);
956
spin_lock_bh(&msk->pm.lock);
957
if (mptcp_addresses_equal(addr, &mpc_addr, addr->port))
958
msk->mpc_endpoint_id = addr->id;
959
mptcp_pm_create_subflow_or_signal_addr(msk);
960
spin_unlock_bh(&msk->pm.lock);
961
release_sock(sk);
962
963
next:
964
sock_put(sk);
965
cond_resched();
966
}
967
968
return 0;
969
}
970
971
static bool mptcp_pm_has_addr_attr_id(const struct nlattr *attr,
972
struct genl_info *info)
973
{
974
struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1];
975
976
if (!nla_parse_nested_deprecated(tb, MPTCP_PM_ADDR_ATTR_MAX, attr,
977
mptcp_pm_address_nl_policy, info->extack) &&
978
tb[MPTCP_PM_ADDR_ATTR_ID])
979
return true;
980
return false;
981
}
982
983
/* Add an MPTCP endpoint */
984
int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info)
985
{
986
struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
987
struct mptcp_pm_addr_entry addr, *entry;
988
struct nlattr *attr;
989
int ret;
990
991
if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ENDPOINT_ADDR))
992
return -EINVAL;
993
994
attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR];
995
ret = mptcp_pm_parse_entry(attr, info, true, &addr);
996
if (ret < 0)
997
return ret;
998
999
if (addr.addr.port && !address_use_port(&addr)) {
1000
NL_SET_ERR_MSG_ATTR(info->extack, attr,
1001
"flags must have signal and not subflow when using port");
1002
return -EINVAL;
1003
}
1004
1005
if (addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL &&
1006
addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) {
1007
NL_SET_ERR_MSG_ATTR(info->extack, attr,
1008
"flags mustn't have both signal and fullmesh");
1009
return -EINVAL;
1010
}
1011
1012
if (addr.flags & MPTCP_PM_ADDR_FLAG_IMPLICIT) {
1013
NL_SET_ERR_MSG_ATTR(info->extack, attr,
1014
"can't create IMPLICIT endpoint");
1015
return -EINVAL;
1016
}
1017
1018
entry = kmemdup(&addr, sizeof(addr), GFP_KERNEL_ACCOUNT);
1019
if (!entry) {
1020
GENL_SET_ERR_MSG(info, "can't allocate addr");
1021
return -ENOMEM;
1022
}
1023
1024
if (entry->addr.port) {
1025
ret = mptcp_pm_nl_create_listen_socket(skb->sk, entry);
1026
if (ret) {
1027
GENL_SET_ERR_MSG_FMT(info, "create listen socket error: %d", ret);
1028
goto out_free;
1029
}
1030
}
1031
ret = mptcp_pm_nl_append_new_local_addr(pernet, entry,
1032
!mptcp_pm_has_addr_attr_id(attr, info),
1033
true);
1034
if (ret < 0) {
1035
GENL_SET_ERR_MSG_FMT(info, "too many addresses or duplicate one: %d", ret);
1036
goto out_free;
1037
}
1038
1039
mptcp_nl_add_subflow_or_signal_addr(sock_net(skb->sk), &entry->addr);
1040
return 0;
1041
1042
out_free:
1043
__mptcp_pm_release_addr_entry(entry);
1044
return ret;
1045
}
1046
1047
static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk,
1048
const struct mptcp_addr_info *addr,
1049
bool force)
1050
{
1051
struct mptcp_rm_list list = { .nr = 0 };
1052
bool ret;
1053
1054
list.ids[list.nr++] = mptcp_endp_get_local_id(msk, addr);
1055
1056
ret = mptcp_remove_anno_list_by_saddr(msk, addr);
1057
if (ret || force) {
1058
spin_lock_bh(&msk->pm.lock);
1059
if (ret) {
1060
__set_bit(addr->id, msk->pm.id_avail_bitmap);
1061
msk->pm.add_addr_signaled--;
1062
}
1063
mptcp_pm_remove_addr(msk, &list);
1064
spin_unlock_bh(&msk->pm.lock);
1065
}
1066
return ret;
1067
}
1068
1069
static void __mark_subflow_endp_available(struct mptcp_sock *msk, u8 id)
1070
{
1071
/* If it was marked as used, and not ID 0, decrement local_addr_used */
1072
if (!__test_and_set_bit(id ? : msk->mpc_endpoint_id, msk->pm.id_avail_bitmap) &&
1073
id && !WARN_ON_ONCE(msk->pm.local_addr_used == 0))
1074
msk->pm.local_addr_used--;
1075
}
1076
1077
static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net,
1078
const struct mptcp_pm_addr_entry *entry)
1079
{
1080
const struct mptcp_addr_info *addr = &entry->addr;
1081
struct mptcp_rm_list list = { .nr = 1 };
1082
long s_slot = 0, s_num = 0;
1083
struct mptcp_sock *msk;
1084
1085
pr_debug("remove_id=%d\n", addr->id);
1086
1087
while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
1088
struct sock *sk = (struct sock *)msk;
1089
bool remove_subflow;
1090
1091
if (mptcp_pm_is_userspace(msk))
1092
goto next;
1093
1094
lock_sock(sk);
1095
remove_subflow = mptcp_lookup_subflow_by_saddr(&msk->conn_list, addr);
1096
mptcp_pm_remove_anno_addr(msk, addr, remove_subflow &&
1097
!(entry->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT));
1098
1099
list.ids[0] = mptcp_endp_get_local_id(msk, addr);
1100
if (remove_subflow) {
1101
spin_lock_bh(&msk->pm.lock);
1102
mptcp_pm_rm_subflow(msk, &list);
1103
spin_unlock_bh(&msk->pm.lock);
1104
}
1105
1106
if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) {
1107
spin_lock_bh(&msk->pm.lock);
1108
__mark_subflow_endp_available(msk, list.ids[0]);
1109
spin_unlock_bh(&msk->pm.lock);
1110
}
1111
1112
if (msk->mpc_endpoint_id == entry->addr.id)
1113
msk->mpc_endpoint_id = 0;
1114
release_sock(sk);
1115
1116
next:
1117
sock_put(sk);
1118
cond_resched();
1119
}
1120
1121
return 0;
1122
}
1123
1124
static int mptcp_nl_remove_id_zero_address(struct net *net,
1125
struct mptcp_addr_info *addr)
1126
{
1127
struct mptcp_rm_list list = { .nr = 0 };
1128
long s_slot = 0, s_num = 0;
1129
struct mptcp_sock *msk;
1130
1131
list.ids[list.nr++] = 0;
1132
1133
while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
1134
struct sock *sk = (struct sock *)msk;
1135
struct mptcp_addr_info msk_local;
1136
1137
if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk))
1138
goto next;
1139
1140
mptcp_local_address((struct sock_common *)msk, &msk_local);
1141
if (!mptcp_addresses_equal(&msk_local, addr, addr->port))
1142
goto next;
1143
1144
lock_sock(sk);
1145
spin_lock_bh(&msk->pm.lock);
1146
mptcp_pm_remove_addr(msk, &list);
1147
mptcp_pm_rm_subflow(msk, &list);
1148
__mark_subflow_endp_available(msk, 0);
1149
spin_unlock_bh(&msk->pm.lock);
1150
release_sock(sk);
1151
1152
next:
1153
sock_put(sk);
1154
cond_resched();
1155
}
1156
1157
return 0;
1158
}
1159
1160
/* Remove an MPTCP endpoint */
1161
int mptcp_pm_nl_del_addr_doit(struct sk_buff *skb, struct genl_info *info)
1162
{
1163
struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
1164
struct mptcp_pm_addr_entry addr, *entry;
1165
struct nlattr *attr;
1166
u8 addr_max;
1167
int ret;
1168
1169
if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ENDPOINT_ADDR))
1170
return -EINVAL;
1171
1172
attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR];
1173
ret = mptcp_pm_parse_entry(attr, info, false, &addr);
1174
if (ret < 0)
1175
return ret;
1176
1177
/* the zero id address is special: the first address used by the msk
1178
* always gets such an id, so different subflows can have different zero
1179
* id addresses. Additionally zero id is not accounted for in id_bitmap.
1180
* Let's use an 'mptcp_rm_list' instead of the common remove code.
1181
*/
1182
if (addr.addr.id == 0)
1183
return mptcp_nl_remove_id_zero_address(sock_net(skb->sk), &addr.addr);
1184
1185
spin_lock_bh(&pernet->lock);
1186
entry = __lookup_addr_by_id(pernet, addr.addr.id);
1187
if (!entry) {
1188
NL_SET_ERR_MSG_ATTR(info->extack, attr, "address not found");
1189
spin_unlock_bh(&pernet->lock);
1190
return -EINVAL;
1191
}
1192
if (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL) {
1193
addr_max = pernet->endp_signal_max;
1194
WRITE_ONCE(pernet->endp_signal_max, addr_max - 1);
1195
}
1196
if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) {
1197
addr_max = pernet->endp_subflow_max;
1198
WRITE_ONCE(pernet->endp_subflow_max, addr_max - 1);
1199
}
1200
if (entry->flags & MPTCP_PM_ADDR_FLAG_LAMINAR) {
1201
addr_max = pernet->endp_laminar_max;
1202
WRITE_ONCE(pernet->endp_laminar_max, addr_max - 1);
1203
}
1204
if (entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH) {
1205
addr_max = pernet->endp_fullmesh_max;
1206
WRITE_ONCE(pernet->endp_fullmesh_max, addr_max - 1);
1207
}
1208
1209
pernet->endpoints--;
1210
list_del_rcu(&entry->list);
1211
__clear_bit(entry->addr.id, pernet->id_bitmap);
1212
spin_unlock_bh(&pernet->lock);
1213
1214
mptcp_nl_remove_subflow_and_signal_addr(sock_net(skb->sk), entry);
1215
synchronize_rcu();
1216
__mptcp_pm_release_addr_entry(entry);
1217
1218
return ret;
1219
}
1220
1221
static void mptcp_pm_flush_addrs_and_subflows(struct mptcp_sock *msk,
1222
struct list_head *rm_list)
1223
{
1224
struct mptcp_rm_list alist = { .nr = 0 }, slist = { .nr = 0 };
1225
struct mptcp_pm_addr_entry *entry;
1226
1227
list_for_each_entry(entry, rm_list, list) {
1228
if (slist.nr < MPTCP_RM_IDS_MAX &&
1229
mptcp_lookup_subflow_by_saddr(&msk->conn_list, &entry->addr))
1230
slist.ids[slist.nr++] = mptcp_endp_get_local_id(msk, &entry->addr);
1231
1232
if (alist.nr < MPTCP_RM_IDS_MAX &&
1233
mptcp_remove_anno_list_by_saddr(msk, &entry->addr))
1234
alist.ids[alist.nr++] = mptcp_endp_get_local_id(msk, &entry->addr);
1235
}
1236
1237
spin_lock_bh(&msk->pm.lock);
1238
if (alist.nr) {
1239
msk->pm.add_addr_signaled -= alist.nr;
1240
mptcp_pm_remove_addr(msk, &alist);
1241
}
1242
if (slist.nr)
1243
mptcp_pm_rm_subflow(msk, &slist);
1244
/* Reset counters: maybe some subflows have been removed before */
1245
bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
1246
msk->pm.local_addr_used = 0;
1247
spin_unlock_bh(&msk->pm.lock);
1248
}
1249
1250
static void mptcp_nl_flush_addrs_list(struct net *net,
1251
struct list_head *rm_list)
1252
{
1253
long s_slot = 0, s_num = 0;
1254
struct mptcp_sock *msk;
1255
1256
if (list_empty(rm_list))
1257
return;
1258
1259
while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
1260
struct sock *sk = (struct sock *)msk;
1261
1262
if (!mptcp_pm_is_userspace(msk)) {
1263
lock_sock(sk);
1264
mptcp_pm_flush_addrs_and_subflows(msk, rm_list);
1265
release_sock(sk);
1266
}
1267
1268
sock_put(sk);
1269
cond_resched();
1270
}
1271
}
1272
1273
/* caller must ensure the RCU grace period is already elapsed */
1274
static void __flush_addrs(struct list_head *list)
1275
{
1276
while (!list_empty(list)) {
1277
struct mptcp_pm_addr_entry *cur;
1278
1279
cur = list_entry(list->next,
1280
struct mptcp_pm_addr_entry, list);
1281
list_del_rcu(&cur->list);
1282
__mptcp_pm_release_addr_entry(cur);
1283
}
1284
}
1285
1286
static void __reset_counters(struct pm_nl_pernet *pernet)
1287
{
1288
WRITE_ONCE(pernet->endp_signal_max, 0);
1289
WRITE_ONCE(pernet->endp_subflow_max, 0);
1290
WRITE_ONCE(pernet->endp_laminar_max, 0);
1291
pernet->endpoints = 0;
1292
}
1293
1294
int mptcp_pm_nl_flush_addrs_doit(struct sk_buff *skb, struct genl_info *info)
1295
{
1296
struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
1297
struct list_head free_list;
1298
1299
spin_lock_bh(&pernet->lock);
1300
free_list = pernet->endp_list;
1301
INIT_LIST_HEAD_RCU(&pernet->endp_list);
1302
__reset_counters(pernet);
1303
pernet->next_id = 1;
1304
bitmap_zero(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
1305
spin_unlock_bh(&pernet->lock);
1306
1307
if (free_list.next == &pernet->endp_list)
1308
return 0;
1309
1310
synchronize_rcu();
1311
1312
/* Adjust the pointers to free_list instead of pernet->endp_list */
1313
free_list.prev->next = &free_list;
1314
free_list.next->prev = &free_list;
1315
1316
mptcp_nl_flush_addrs_list(sock_net(skb->sk), &free_list);
1317
__flush_addrs(&free_list);
1318
return 0;
1319
}
1320
1321
int mptcp_pm_nl_get_addr(u8 id, struct mptcp_pm_addr_entry *addr,
1322
struct genl_info *info)
1323
{
1324
struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
1325
struct mptcp_pm_addr_entry *entry;
1326
int ret = -EINVAL;
1327
1328
rcu_read_lock();
1329
entry = __lookup_addr_by_id(pernet, id);
1330
if (entry) {
1331
*addr = *entry;
1332
ret = 0;
1333
}
1334
rcu_read_unlock();
1335
1336
return ret;
1337
}
1338
1339
int mptcp_pm_nl_dump_addr(struct sk_buff *msg,
1340
struct netlink_callback *cb)
1341
{
1342
struct net *net = sock_net(msg->sk);
1343
struct mptcp_pm_addr_entry *entry;
1344
struct pm_nl_pernet *pernet;
1345
int id = cb->args[0];
1346
int i;
1347
1348
pernet = pm_nl_get_pernet(net);
1349
1350
rcu_read_lock();
1351
for (i = id; i < MPTCP_PM_MAX_ADDR_ID + 1; i++) {
1352
if (test_bit(i, pernet->id_bitmap)) {
1353
entry = __lookup_addr_by_id(pernet, i);
1354
if (!entry)
1355
break;
1356
1357
if (entry->addr.id <= id)
1358
continue;
1359
1360
if (mptcp_pm_genl_fill_addr(msg, cb, entry) < 0)
1361
break;
1362
1363
id = entry->addr.id;
1364
}
1365
}
1366
rcu_read_unlock();
1367
1368
cb->args[0] = id;
1369
return msg->len;
1370
}
1371
1372
static int parse_limit(struct genl_info *info, int id, unsigned int *limit)
1373
{
1374
struct nlattr *attr = info->attrs[id];
1375
1376
if (!attr)
1377
return 0;
1378
1379
*limit = nla_get_u32(attr);
1380
if (*limit > MPTCP_PM_ADDR_MAX) {
1381
NL_SET_ERR_MSG_ATTR_FMT(info->extack, attr,
1382
"limit greater than maximum (%u)",
1383
MPTCP_PM_ADDR_MAX);
1384
return -EINVAL;
1385
}
1386
return 0;
1387
}
1388
1389
int mptcp_pm_nl_set_limits_doit(struct sk_buff *skb, struct genl_info *info)
1390
{
1391
struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
1392
unsigned int rcv_addrs, subflows;
1393
int ret;
1394
1395
spin_lock_bh(&pernet->lock);
1396
rcv_addrs = pernet->limit_add_addr_accepted;
1397
ret = parse_limit(info, MPTCP_PM_ATTR_RCV_ADD_ADDRS, &rcv_addrs);
1398
if (ret)
1399
goto unlock;
1400
1401
subflows = pernet->limit_extra_subflows;
1402
ret = parse_limit(info, MPTCP_PM_ATTR_SUBFLOWS, &subflows);
1403
if (ret)
1404
goto unlock;
1405
1406
WRITE_ONCE(pernet->limit_add_addr_accepted, rcv_addrs);
1407
WRITE_ONCE(pernet->limit_extra_subflows, subflows);
1408
1409
unlock:
1410
spin_unlock_bh(&pernet->lock);
1411
return ret;
1412
}
1413
1414
int mptcp_pm_nl_get_limits_doit(struct sk_buff *skb, struct genl_info *info)
1415
{
1416
struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
1417
struct sk_buff *msg;
1418
void *reply;
1419
1420
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1421
if (!msg)
1422
return -ENOMEM;
1423
1424
reply = genlmsg_put_reply(msg, info, &mptcp_genl_family, 0,
1425
MPTCP_PM_CMD_GET_LIMITS);
1426
if (!reply)
1427
goto fail;
1428
1429
if (nla_put_u32(msg, MPTCP_PM_ATTR_RCV_ADD_ADDRS,
1430
READ_ONCE(pernet->limit_add_addr_accepted)))
1431
goto fail;
1432
1433
if (nla_put_u32(msg, MPTCP_PM_ATTR_SUBFLOWS,
1434
READ_ONCE(pernet->limit_extra_subflows)))
1435
goto fail;
1436
1437
genlmsg_end(msg, reply);
1438
return genlmsg_reply(msg, info);
1439
1440
fail:
1441
GENL_SET_ERR_MSG(info, "not enough space in Netlink message");
1442
nlmsg_free(msg);
1443
return -EMSGSIZE;
1444
}
1445
1446
static void mptcp_pm_nl_fullmesh(struct mptcp_sock *msk,
1447
struct mptcp_addr_info *addr)
1448
{
1449
struct mptcp_rm_list list = { .nr = 0 };
1450
1451
list.ids[list.nr++] = mptcp_endp_get_local_id(msk, addr);
1452
1453
spin_lock_bh(&msk->pm.lock);
1454
mptcp_pm_rm_subflow(msk, &list);
1455
__mark_subflow_endp_available(msk, list.ids[0]);
1456
mptcp_pm_create_subflow_or_signal_addr(msk);
1457
spin_unlock_bh(&msk->pm.lock);
1458
}
1459
1460
static void mptcp_pm_nl_set_flags_all(struct net *net,
1461
struct mptcp_pm_addr_entry *local,
1462
u8 changed)
1463
{
1464
u8 is_subflow = !!(local->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW);
1465
u8 bkup = !!(local->flags & MPTCP_PM_ADDR_FLAG_BACKUP);
1466
long s_slot = 0, s_num = 0;
1467
struct mptcp_sock *msk;
1468
1469
if (changed == MPTCP_PM_ADDR_FLAG_FULLMESH && !is_subflow)
1470
return;
1471
1472
while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
1473
struct sock *sk = (struct sock *)msk;
1474
1475
if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk))
1476
goto next;
1477
1478
lock_sock(sk);
1479
if (changed & MPTCP_PM_ADDR_FLAG_BACKUP)
1480
mptcp_pm_mp_prio_send_ack(msk, &local->addr, NULL, bkup);
1481
/* Subflows will only be recreated if the SUBFLOW flag is set */
1482
if (is_subflow && (changed & MPTCP_PM_ADDR_FLAG_FULLMESH))
1483
mptcp_pm_nl_fullmesh(msk, &local->addr);
1484
release_sock(sk);
1485
1486
next:
1487
sock_put(sk);
1488
cond_resched();
1489
}
1490
}
1491
1492
int mptcp_pm_nl_set_flags(struct mptcp_pm_addr_entry *local,
1493
struct genl_info *info)
1494
{
1495
struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
1496
u8 changed, mask = MPTCP_PM_ADDR_FLAG_BACKUP |
1497
MPTCP_PM_ADDR_FLAG_FULLMESH;
1498
struct net *net = genl_info_net(info);
1499
struct mptcp_pm_addr_entry *entry;
1500
struct pm_nl_pernet *pernet;
1501
u8 lookup_by_id = 0;
1502
1503
pernet = pm_nl_get_pernet(net);
1504
1505
if (local->addr.family == AF_UNSPEC) {
1506
lookup_by_id = 1;
1507
if (!local->addr.id) {
1508
NL_SET_ERR_MSG_ATTR(info->extack, attr,
1509
"missing address ID");
1510
return -EOPNOTSUPP;
1511
}
1512
}
1513
1514
spin_lock_bh(&pernet->lock);
1515
entry = lookup_by_id ? __lookup_addr_by_id(pernet, local->addr.id) :
1516
__lookup_addr(pernet, &local->addr);
1517
if (!entry) {
1518
spin_unlock_bh(&pernet->lock);
1519
NL_SET_ERR_MSG_ATTR(info->extack, attr, "address not found");
1520
return -EINVAL;
1521
}
1522
if ((local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH) &&
1523
(entry->flags & (MPTCP_PM_ADDR_FLAG_SIGNAL |
1524
MPTCP_PM_ADDR_FLAG_IMPLICIT))) {
1525
spin_unlock_bh(&pernet->lock);
1526
NL_SET_ERR_MSG_ATTR(info->extack, attr, "invalid addr flags");
1527
return -EINVAL;
1528
}
1529
1530
changed = (local->flags ^ entry->flags) & mask;
1531
entry->flags = (entry->flags & ~mask) | (local->flags & mask);
1532
*local = *entry;
1533
1534
if (changed & MPTCP_PM_ADDR_FLAG_FULLMESH) {
1535
u8 addr_max = pernet->endp_fullmesh_max;
1536
1537
if (entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH)
1538
addr_max++;
1539
else
1540
addr_max--;
1541
1542
WRITE_ONCE(pernet->endp_fullmesh_max, addr_max);
1543
}
1544
1545
spin_unlock_bh(&pernet->lock);
1546
1547
mptcp_pm_nl_set_flags_all(net, local, changed);
1548
return 0;
1549
}
1550
1551
bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk)
1552
{
1553
struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
1554
1555
if (msk->pm.extra_subflows == mptcp_pm_get_limit_extra_subflows(msk) ||
1556
(find_next_and_bit(pernet->id_bitmap, msk->pm.id_avail_bitmap,
1557
MPTCP_PM_MAX_ADDR_ID + 1, 0) == MPTCP_PM_MAX_ADDR_ID + 1)) {
1558
WRITE_ONCE(msk->pm.work_pending, false);
1559
return false;
1560
}
1561
return true;
1562
}
1563
1564
/* Called under PM lock */
1565
void __mptcp_pm_kernel_worker(struct mptcp_sock *msk)
1566
{
1567
struct mptcp_pm_data *pm = &msk->pm;
1568
1569
if (pm->status & BIT(MPTCP_PM_ADD_ADDR_RECEIVED)) {
1570
pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_RECEIVED);
1571
mptcp_pm_nl_add_addr_received(msk);
1572
}
1573
if (pm->status & BIT(MPTCP_PM_ESTABLISHED)) {
1574
pm->status &= ~BIT(MPTCP_PM_ESTABLISHED);
1575
mptcp_pm_nl_fully_established(msk);
1576
}
1577
if (pm->status & BIT(MPTCP_PM_SUBFLOW_ESTABLISHED)) {
1578
pm->status &= ~BIT(MPTCP_PM_SUBFLOW_ESTABLISHED);
1579
mptcp_pm_nl_subflow_established(msk);
1580
}
1581
}
1582
1583
static int __net_init pm_nl_init_net(struct net *net)
1584
{
1585
struct pm_nl_pernet *pernet = pm_nl_get_pernet(net);
1586
1587
INIT_LIST_HEAD_RCU(&pernet->endp_list);
1588
1589
/* Cit. 2 subflows ought to be enough for anybody. */
1590
pernet->limit_extra_subflows = 2;
1591
pernet->next_id = 1;
1592
spin_lock_init(&pernet->lock);
1593
1594
/* No need to initialize other pernet fields, the struct is zeroed at
1595
* allocation time.
1596
*/
1597
1598
return 0;
1599
}
1600
1601
static void __net_exit pm_nl_exit_net(struct list_head *net_list)
1602
{
1603
struct net *net;
1604
1605
list_for_each_entry(net, net_list, exit_list) {
1606
struct pm_nl_pernet *pernet = pm_nl_get_pernet(net);
1607
1608
/* net is removed from namespace list, can't race with
1609
* other modifiers, also netns core already waited for a
1610
* RCU grace period.
1611
*/
1612
__flush_addrs(&pernet->endp_list);
1613
}
1614
}
1615
1616
static struct pernet_operations mptcp_pm_pernet_ops = {
1617
.init = pm_nl_init_net,
1618
.exit_batch = pm_nl_exit_net,
1619
.id = &pm_nl_pernet_id,
1620
.size = sizeof(struct pm_nl_pernet),
1621
};
1622
1623
struct mptcp_pm_ops mptcp_pm_kernel = {
1624
.name = "kernel",
1625
.owner = THIS_MODULE,
1626
};
1627
1628
void __init mptcp_pm_kernel_register(void)
1629
{
1630
if (register_pernet_subsys(&mptcp_pm_pernet_ops) < 0)
1631
panic("Failed to register MPTCP PM pernet subsystem.\n");
1632
1633
mptcp_pm_register(&mptcp_pm_kernel);
1634
}
1635
1636