Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
15112 views
1
/*
2
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
4
* Copyright (c) 2004 Voltaire, Inc. All rights reserved.
5
*
6
* This software is available to you under a choice of one of two
7
* licenses. You may choose to be licensed under the terms of the GNU
8
* General Public License (GPL) Version 2, available from the file
9
* COPYING in the main directory of this source tree, or the
10
* OpenIB.org BSD license below:
11
*
12
* Redistribution and use in source and binary forms, with or
13
* without modification, are permitted provided that the following
14
* conditions are met:
15
*
16
* - Redistributions of source code must retain the above
17
* copyright notice, this list of conditions and the following
18
* disclaimer.
19
*
20
* - Redistributions in binary form must reproduce the above
21
* copyright notice, this list of conditions and the following
22
* disclaimer in the documentation and/or other materials
23
* provided with the distribution.
24
*
25
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32
* SOFTWARE.
33
*/
34
35
#include <linux/skbuff.h>
36
#include <linux/rtnetlink.h>
37
#include <linux/ip.h>
38
#include <linux/in.h>
39
#include <linux/igmp.h>
40
#include <linux/inetdevice.h>
41
#include <linux/delay.h>
42
#include <linux/completion.h>
43
#include <linux/slab.h>
44
45
#include <net/dst.h>
46
47
#include "ipoib.h"
48
49
#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
50
static int mcast_debug_level;
51
52
module_param(mcast_debug_level, int, 0644);
53
MODULE_PARM_DESC(mcast_debug_level,
54
"Enable multicast debug tracing if > 0");
55
#endif
56
57
static DEFINE_MUTEX(mcast_mutex);
58
59
struct ipoib_mcast_iter {
60
struct net_device *dev;
61
union ib_gid mgid;
62
unsigned long created;
63
unsigned int queuelen;
64
unsigned int complete;
65
unsigned int send_only;
66
};
67
68
static void ipoib_mcast_free(struct ipoib_mcast *mcast)
69
{
70
struct net_device *dev = mcast->dev;
71
struct ipoib_dev_priv *priv = netdev_priv(dev);
72
struct ipoib_neigh *neigh, *tmp;
73
int tx_dropped = 0;
74
75
ipoib_dbg_mcast(netdev_priv(dev), "deleting multicast group %pI6\n",
76
mcast->mcmember.mgid.raw);
77
78
spin_lock_irq(&priv->lock);
79
80
list_for_each_entry_safe(neigh, tmp, &mcast->neigh_list, list) {
81
/*
82
* It's safe to call ipoib_put_ah() inside priv->lock
83
* here, because we know that mcast->ah will always
84
* hold one more reference, so ipoib_put_ah() will
85
* never do more than decrement the ref count.
86
*/
87
if (neigh->ah)
88
ipoib_put_ah(neigh->ah);
89
ipoib_neigh_free(dev, neigh);
90
}
91
92
spin_unlock_irq(&priv->lock);
93
94
if (mcast->ah)
95
ipoib_put_ah(mcast->ah);
96
97
while (!skb_queue_empty(&mcast->pkt_queue)) {
98
++tx_dropped;
99
dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
100
}
101
102
netif_tx_lock_bh(dev);
103
dev->stats.tx_dropped += tx_dropped;
104
netif_tx_unlock_bh(dev);
105
106
kfree(mcast);
107
}
108
109
static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev,
110
int can_sleep)
111
{
112
struct ipoib_mcast *mcast;
113
114
mcast = kzalloc(sizeof *mcast, can_sleep ? GFP_KERNEL : GFP_ATOMIC);
115
if (!mcast)
116
return NULL;
117
118
mcast->dev = dev;
119
mcast->created = jiffies;
120
mcast->backoff = 1;
121
122
INIT_LIST_HEAD(&mcast->list);
123
INIT_LIST_HEAD(&mcast->neigh_list);
124
skb_queue_head_init(&mcast->pkt_queue);
125
126
return mcast;
127
}
128
129
static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid)
130
{
131
struct ipoib_dev_priv *priv = netdev_priv(dev);
132
struct rb_node *n = priv->multicast_tree.rb_node;
133
134
while (n) {
135
struct ipoib_mcast *mcast;
136
int ret;
137
138
mcast = rb_entry(n, struct ipoib_mcast, rb_node);
139
140
ret = memcmp(mgid, mcast->mcmember.mgid.raw,
141
sizeof (union ib_gid));
142
if (ret < 0)
143
n = n->rb_left;
144
else if (ret > 0)
145
n = n->rb_right;
146
else
147
return mcast;
148
}
149
150
return NULL;
151
}
152
153
static int __ipoib_mcast_add(struct net_device *dev, struct ipoib_mcast *mcast)
154
{
155
struct ipoib_dev_priv *priv = netdev_priv(dev);
156
struct rb_node **n = &priv->multicast_tree.rb_node, *pn = NULL;
157
158
while (*n) {
159
struct ipoib_mcast *tmcast;
160
int ret;
161
162
pn = *n;
163
tmcast = rb_entry(pn, struct ipoib_mcast, rb_node);
164
165
ret = memcmp(mcast->mcmember.mgid.raw, tmcast->mcmember.mgid.raw,
166
sizeof (union ib_gid));
167
if (ret < 0)
168
n = &pn->rb_left;
169
else if (ret > 0)
170
n = &pn->rb_right;
171
else
172
return -EEXIST;
173
}
174
175
rb_link_node(&mcast->rb_node, pn, n);
176
rb_insert_color(&mcast->rb_node, &priv->multicast_tree);
177
178
return 0;
179
}
180
181
static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
182
struct ib_sa_mcmember_rec *mcmember)
183
{
184
struct net_device *dev = mcast->dev;
185
struct ipoib_dev_priv *priv = netdev_priv(dev);
186
struct ipoib_ah *ah;
187
int ret;
188
int set_qkey = 0;
189
190
mcast->mcmember = *mcmember;
191
192
/* Set the cached Q_Key before we attach if it's the broadcast group */
193
if (!memcmp(mcast->mcmember.mgid.raw, priv->dev->broadcast + 4,
194
sizeof (union ib_gid))) {
195
spin_lock_irq(&priv->lock);
196
if (!priv->broadcast) {
197
spin_unlock_irq(&priv->lock);
198
return -EAGAIN;
199
}
200
priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey);
201
spin_unlock_irq(&priv->lock);
202
priv->tx_wr.wr.ud.remote_qkey = priv->qkey;
203
set_qkey = 1;
204
}
205
206
if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
207
if (test_and_set_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
208
ipoib_warn(priv, "multicast group %pI6 already attached\n",
209
mcast->mcmember.mgid.raw);
210
211
return 0;
212
}
213
214
ret = ipoib_mcast_attach(dev, be16_to_cpu(mcast->mcmember.mlid),
215
&mcast->mcmember.mgid, set_qkey);
216
if (ret < 0) {
217
ipoib_warn(priv, "couldn't attach QP to multicast group %pI6\n",
218
mcast->mcmember.mgid.raw);
219
220
clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags);
221
return ret;
222
}
223
}
224
225
{
226
struct ib_ah_attr av = {
227
.dlid = be16_to_cpu(mcast->mcmember.mlid),
228
.port_num = priv->port,
229
.sl = mcast->mcmember.sl,
230
.ah_flags = IB_AH_GRH,
231
.static_rate = mcast->mcmember.rate,
232
.grh = {
233
.flow_label = be32_to_cpu(mcast->mcmember.flow_label),
234
.hop_limit = mcast->mcmember.hop_limit,
235
.sgid_index = 0,
236
.traffic_class = mcast->mcmember.traffic_class
237
}
238
};
239
av.grh.dgid = mcast->mcmember.mgid;
240
241
ah = ipoib_create_ah(dev, priv->pd, &av);
242
if (!ah) {
243
ipoib_warn(priv, "ib_address_create failed\n");
244
} else {
245
spin_lock_irq(&priv->lock);
246
mcast->ah = ah;
247
spin_unlock_irq(&priv->lock);
248
249
ipoib_dbg_mcast(priv, "MGID %pI6 AV %p, LID 0x%04x, SL %d\n",
250
mcast->mcmember.mgid.raw,
251
mcast->ah->ah,
252
be16_to_cpu(mcast->mcmember.mlid),
253
mcast->mcmember.sl);
254
}
255
}
256
257
/* actually send any queued packets */
258
netif_tx_lock_bh(dev);
259
while (!skb_queue_empty(&mcast->pkt_queue)) {
260
struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue);
261
netif_tx_unlock_bh(dev);
262
263
skb->dev = dev;
264
265
if (!skb_dst(skb) || !skb_dst(skb)->neighbour) {
266
/* put pseudoheader back on for next time */
267
skb_push(skb, sizeof (struct ipoib_pseudoheader));
268
}
269
270
if (dev_queue_xmit(skb))
271
ipoib_warn(priv, "dev_queue_xmit failed to requeue packet\n");
272
netif_tx_lock_bh(dev);
273
}
274
netif_tx_unlock_bh(dev);
275
276
return 0;
277
}
278
279
static int
280
ipoib_mcast_sendonly_join_complete(int status,
281
struct ib_sa_multicast *multicast)
282
{
283
struct ipoib_mcast *mcast = multicast->context;
284
struct net_device *dev = mcast->dev;
285
286
/* We trap for port events ourselves. */
287
if (status == -ENETRESET)
288
return 0;
289
290
if (!status)
291
status = ipoib_mcast_join_finish(mcast, &multicast->rec);
292
293
if (status) {
294
if (mcast->logcount++ < 20)
295
ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for %pI6, status %d\n",
296
mcast->mcmember.mgid.raw, status);
297
298
/* Flush out any queued packets */
299
netif_tx_lock_bh(dev);
300
while (!skb_queue_empty(&mcast->pkt_queue)) {
301
++dev->stats.tx_dropped;
302
dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
303
}
304
netif_tx_unlock_bh(dev);
305
306
/* Clear the busy flag so we try again */
307
status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY,
308
&mcast->flags);
309
}
310
return status;
311
}
312
313
static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
314
{
315
struct net_device *dev = mcast->dev;
316
struct ipoib_dev_priv *priv = netdev_priv(dev);
317
struct ib_sa_mcmember_rec rec = {
318
#if 0 /* Some SMs don't support send-only yet */
319
.join_state = 4
320
#else
321
.join_state = 1
322
#endif
323
};
324
int ret = 0;
325
326
if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
327
ipoib_dbg_mcast(priv, "device shutting down, no multicast joins\n");
328
return -ENODEV;
329
}
330
331
if (test_and_set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) {
332
ipoib_dbg_mcast(priv, "multicast entry busy, skipping\n");
333
return -EBUSY;
334
}
335
336
rec.mgid = mcast->mcmember.mgid;
337
rec.port_gid = priv->local_gid;
338
rec.pkey = cpu_to_be16(priv->pkey);
339
340
mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca,
341
priv->port, &rec,
342
IB_SA_MCMEMBER_REC_MGID |
343
IB_SA_MCMEMBER_REC_PORT_GID |
344
IB_SA_MCMEMBER_REC_PKEY |
345
IB_SA_MCMEMBER_REC_JOIN_STATE,
346
GFP_ATOMIC,
347
ipoib_mcast_sendonly_join_complete,
348
mcast);
349
if (IS_ERR(mcast->mc)) {
350
ret = PTR_ERR(mcast->mc);
351
clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
352
ipoib_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n",
353
ret);
354
} else {
355
ipoib_dbg_mcast(priv, "no multicast record for %pI6, starting join\n",
356
mcast->mcmember.mgid.raw);
357
}
358
359
return ret;
360
}
361
362
void ipoib_mcast_carrier_on_task(struct work_struct *work)
363
{
364
struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
365
carrier_on_task);
366
struct ib_port_attr attr;
367
368
/*
369
* Take rtnl_lock to avoid racing with ipoib_stop() and
370
* turning the carrier back on while a device is being
371
* removed.
372
*/
373
if (ib_query_port(priv->ca, priv->port, &attr) ||
374
attr.state != IB_PORT_ACTIVE) {
375
ipoib_dbg(priv, "Keeping carrier off until IB port is active\n");
376
return;
377
}
378
379
rtnl_lock();
380
netif_carrier_on(priv->dev);
381
rtnl_unlock();
382
}
383
384
static int ipoib_mcast_join_complete(int status,
385
struct ib_sa_multicast *multicast)
386
{
387
struct ipoib_mcast *mcast = multicast->context;
388
struct net_device *dev = mcast->dev;
389
struct ipoib_dev_priv *priv = netdev_priv(dev);
390
391
ipoib_dbg_mcast(priv, "join completion for %pI6 (status %d)\n",
392
mcast->mcmember.mgid.raw, status);
393
394
/* We trap for port events ourselves. */
395
if (status == -ENETRESET)
396
return 0;
397
398
if (!status)
399
status = ipoib_mcast_join_finish(mcast, &multicast->rec);
400
401
if (!status) {
402
mcast->backoff = 1;
403
mutex_lock(&mcast_mutex);
404
if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
405
queue_delayed_work(ipoib_workqueue,
406
&priv->mcast_task, 0);
407
mutex_unlock(&mcast_mutex);
408
409
/*
410
* Defer carrier on work to ipoib_workqueue to avoid a
411
* deadlock on rtnl_lock here.
412
*/
413
if (mcast == priv->broadcast)
414
queue_work(ipoib_workqueue, &priv->carrier_on_task);
415
416
return 0;
417
}
418
419
if (mcast->logcount++ < 20) {
420
if (status == -ETIMEDOUT || status == -EAGAIN) {
421
ipoib_dbg_mcast(priv, "multicast join failed for %pI6, status %d\n",
422
mcast->mcmember.mgid.raw, status);
423
} else {
424
ipoib_warn(priv, "multicast join failed for %pI6, status %d\n",
425
mcast->mcmember.mgid.raw, status);
426
}
427
}
428
429
mcast->backoff *= 2;
430
if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
431
mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
432
433
/* Clear the busy flag so we try again */
434
status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
435
436
mutex_lock(&mcast_mutex);
437
spin_lock_irq(&priv->lock);
438
if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
439
queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
440
mcast->backoff * HZ);
441
spin_unlock_irq(&priv->lock);
442
mutex_unlock(&mcast_mutex);
443
444
return status;
445
}
446
447
static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
448
int create)
449
{
450
struct ipoib_dev_priv *priv = netdev_priv(dev);
451
struct ib_sa_mcmember_rec rec = {
452
.join_state = 1
453
};
454
ib_sa_comp_mask comp_mask;
455
int ret = 0;
456
457
ipoib_dbg_mcast(priv, "joining MGID %pI6\n", mcast->mcmember.mgid.raw);
458
459
rec.mgid = mcast->mcmember.mgid;
460
rec.port_gid = priv->local_gid;
461
rec.pkey = cpu_to_be16(priv->pkey);
462
463
comp_mask =
464
IB_SA_MCMEMBER_REC_MGID |
465
IB_SA_MCMEMBER_REC_PORT_GID |
466
IB_SA_MCMEMBER_REC_PKEY |
467
IB_SA_MCMEMBER_REC_JOIN_STATE;
468
469
if (create) {
470
comp_mask |=
471
IB_SA_MCMEMBER_REC_QKEY |
472
IB_SA_MCMEMBER_REC_MTU_SELECTOR |
473
IB_SA_MCMEMBER_REC_MTU |
474
IB_SA_MCMEMBER_REC_TRAFFIC_CLASS |
475
IB_SA_MCMEMBER_REC_RATE_SELECTOR |
476
IB_SA_MCMEMBER_REC_RATE |
477
IB_SA_MCMEMBER_REC_SL |
478
IB_SA_MCMEMBER_REC_FLOW_LABEL |
479
IB_SA_MCMEMBER_REC_HOP_LIMIT;
480
481
rec.qkey = priv->broadcast->mcmember.qkey;
482
rec.mtu_selector = IB_SA_EQ;
483
rec.mtu = priv->broadcast->mcmember.mtu;
484
rec.traffic_class = priv->broadcast->mcmember.traffic_class;
485
rec.rate_selector = IB_SA_EQ;
486
rec.rate = priv->broadcast->mcmember.rate;
487
rec.sl = priv->broadcast->mcmember.sl;
488
rec.flow_label = priv->broadcast->mcmember.flow_label;
489
rec.hop_limit = priv->broadcast->mcmember.hop_limit;
490
}
491
492
set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
493
mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
494
&rec, comp_mask, GFP_KERNEL,
495
ipoib_mcast_join_complete, mcast);
496
if (IS_ERR(mcast->mc)) {
497
clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
498
ret = PTR_ERR(mcast->mc);
499
ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret);
500
501
mcast->backoff *= 2;
502
if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
503
mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
504
505
mutex_lock(&mcast_mutex);
506
if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
507
queue_delayed_work(ipoib_workqueue,
508
&priv->mcast_task,
509
mcast->backoff * HZ);
510
mutex_unlock(&mcast_mutex);
511
}
512
}
513
514
void ipoib_mcast_join_task(struct work_struct *work)
515
{
516
struct ipoib_dev_priv *priv =
517
container_of(work, struct ipoib_dev_priv, mcast_task.work);
518
struct net_device *dev = priv->dev;
519
520
if (!test_bit(IPOIB_MCAST_RUN, &priv->flags))
521
return;
522
523
if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid))
524
ipoib_warn(priv, "ib_query_gid() failed\n");
525
else
526
memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid));
527
528
{
529
struct ib_port_attr attr;
530
531
if (!ib_query_port(priv->ca, priv->port, &attr))
532
priv->local_lid = attr.lid;
533
else
534
ipoib_warn(priv, "ib_query_port failed\n");
535
}
536
537
if (!priv->broadcast) {
538
struct ipoib_mcast *broadcast;
539
540
if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
541
return;
542
543
broadcast = ipoib_mcast_alloc(dev, 1);
544
if (!broadcast) {
545
ipoib_warn(priv, "failed to allocate broadcast group\n");
546
mutex_lock(&mcast_mutex);
547
if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
548
queue_delayed_work(ipoib_workqueue,
549
&priv->mcast_task, HZ);
550
mutex_unlock(&mcast_mutex);
551
return;
552
}
553
554
spin_lock_irq(&priv->lock);
555
memcpy(broadcast->mcmember.mgid.raw, priv->dev->broadcast + 4,
556
sizeof (union ib_gid));
557
priv->broadcast = broadcast;
558
559
__ipoib_mcast_add(dev, priv->broadcast);
560
spin_unlock_irq(&priv->lock);
561
}
562
563
if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
564
if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags))
565
ipoib_mcast_join(dev, priv->broadcast, 0);
566
return;
567
}
568
569
while (1) {
570
struct ipoib_mcast *mcast = NULL;
571
572
spin_lock_irq(&priv->lock);
573
list_for_each_entry(mcast, &priv->multicast_list, list) {
574
if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)
575
&& !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)
576
&& !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
577
/* Found the next unjoined group */
578
break;
579
}
580
}
581
spin_unlock_irq(&priv->lock);
582
583
if (&mcast->list == &priv->multicast_list) {
584
/* All done */
585
break;
586
}
587
588
ipoib_mcast_join(dev, mcast, 1);
589
return;
590
}
591
592
priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
593
594
if (!ipoib_cm_admin_enabled(dev)) {
595
rtnl_lock();
596
dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu));
597
rtnl_unlock();
598
}
599
600
ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n");
601
602
clear_bit(IPOIB_MCAST_RUN, &priv->flags);
603
}
604
605
int ipoib_mcast_start_thread(struct net_device *dev)
606
{
607
struct ipoib_dev_priv *priv = netdev_priv(dev);
608
609
ipoib_dbg_mcast(priv, "starting multicast thread\n");
610
611
mutex_lock(&mcast_mutex);
612
if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags))
613
queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0);
614
mutex_unlock(&mcast_mutex);
615
616
return 0;
617
}
618
619
int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
620
{
621
struct ipoib_dev_priv *priv = netdev_priv(dev);
622
623
ipoib_dbg_mcast(priv, "stopping multicast thread\n");
624
625
mutex_lock(&mcast_mutex);
626
clear_bit(IPOIB_MCAST_RUN, &priv->flags);
627
cancel_delayed_work(&priv->mcast_task);
628
mutex_unlock(&mcast_mutex);
629
630
if (flush)
631
flush_workqueue(ipoib_workqueue);
632
633
return 0;
634
}
635
636
static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
637
{
638
struct ipoib_dev_priv *priv = netdev_priv(dev);
639
int ret = 0;
640
641
if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
642
ib_sa_free_multicast(mcast->mc);
643
644
if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
645
ipoib_dbg_mcast(priv, "leaving MGID %pI6\n",
646
mcast->mcmember.mgid.raw);
647
648
/* Remove ourselves from the multicast group */
649
ret = ib_detach_mcast(priv->qp, &mcast->mcmember.mgid,
650
be16_to_cpu(mcast->mcmember.mlid));
651
if (ret)
652
ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret);
653
}
654
655
return 0;
656
}
657
658
void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb)
659
{
660
struct ipoib_dev_priv *priv = netdev_priv(dev);
661
struct ipoib_mcast *mcast;
662
unsigned long flags;
663
664
spin_lock_irqsave(&priv->lock, flags);
665
666
if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags) ||
667
!priv->broadcast ||
668
!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
669
++dev->stats.tx_dropped;
670
dev_kfree_skb_any(skb);
671
goto unlock;
672
}
673
674
mcast = __ipoib_mcast_find(dev, mgid);
675
if (!mcast) {
676
/* Let's create a new send only group now */
677
ipoib_dbg_mcast(priv, "setting up send only multicast group for %pI6\n",
678
mgid);
679
680
mcast = ipoib_mcast_alloc(dev, 0);
681
if (!mcast) {
682
ipoib_warn(priv, "unable to allocate memory for "
683
"multicast structure\n");
684
++dev->stats.tx_dropped;
685
dev_kfree_skb_any(skb);
686
goto out;
687
}
688
689
set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags);
690
memcpy(mcast->mcmember.mgid.raw, mgid, sizeof (union ib_gid));
691
__ipoib_mcast_add(dev, mcast);
692
list_add_tail(&mcast->list, &priv->multicast_list);
693
}
694
695
if (!mcast->ah) {
696
if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE)
697
skb_queue_tail(&mcast->pkt_queue, skb);
698
else {
699
++dev->stats.tx_dropped;
700
dev_kfree_skb_any(skb);
701
}
702
703
if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
704
ipoib_dbg_mcast(priv, "no address vector, "
705
"but multicast join already started\n");
706
else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
707
ipoib_mcast_sendonly_join(mcast);
708
709
/*
710
* If lookup completes between here and out:, don't
711
* want to send packet twice.
712
*/
713
mcast = NULL;
714
}
715
716
out:
717
if (mcast && mcast->ah) {
718
if (skb_dst(skb) &&
719
skb_dst(skb)->neighbour &&
720
!*to_ipoib_neigh(skb_dst(skb)->neighbour)) {
721
struct ipoib_neigh *neigh = ipoib_neigh_alloc(skb_dst(skb)->neighbour,
722
skb->dev);
723
724
if (neigh) {
725
kref_get(&mcast->ah->ref);
726
neigh->ah = mcast->ah;
727
list_add_tail(&neigh->list, &mcast->neigh_list);
728
}
729
}
730
731
spin_unlock_irqrestore(&priv->lock, flags);
732
ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN);
733
return;
734
}
735
736
unlock:
737
spin_unlock_irqrestore(&priv->lock, flags);
738
}
739
740
void ipoib_mcast_dev_flush(struct net_device *dev)
741
{
742
struct ipoib_dev_priv *priv = netdev_priv(dev);
743
LIST_HEAD(remove_list);
744
struct ipoib_mcast *mcast, *tmcast;
745
unsigned long flags;
746
747
ipoib_dbg_mcast(priv, "flushing multicast list\n");
748
749
spin_lock_irqsave(&priv->lock, flags);
750
751
list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) {
752
list_del(&mcast->list);
753
rb_erase(&mcast->rb_node, &priv->multicast_tree);
754
list_add_tail(&mcast->list, &remove_list);
755
}
756
757
if (priv->broadcast) {
758
rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree);
759
list_add_tail(&priv->broadcast->list, &remove_list);
760
priv->broadcast = NULL;
761
}
762
763
spin_unlock_irqrestore(&priv->lock, flags);
764
765
list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
766
ipoib_mcast_leave(dev, mcast);
767
ipoib_mcast_free(mcast);
768
}
769
}
770
771
static int ipoib_mcast_addr_is_valid(const u8 *addr, const u8 *broadcast)
772
{
773
/* reserved QPN, prefix, scope */
774
if (memcmp(addr, broadcast, 6))
775
return 0;
776
/* signature lower, pkey */
777
if (memcmp(addr + 7, broadcast + 7, 3))
778
return 0;
779
return 1;
780
}
781
782
void ipoib_mcast_restart_task(struct work_struct *work)
783
{
784
struct ipoib_dev_priv *priv =
785
container_of(work, struct ipoib_dev_priv, restart_task);
786
struct net_device *dev = priv->dev;
787
struct netdev_hw_addr *ha;
788
struct ipoib_mcast *mcast, *tmcast;
789
LIST_HEAD(remove_list);
790
unsigned long flags;
791
struct ib_sa_mcmember_rec rec;
792
793
ipoib_dbg_mcast(priv, "restarting multicast task\n");
794
795
ipoib_mcast_stop_thread(dev, 0);
796
797
local_irq_save(flags);
798
netif_addr_lock(dev);
799
spin_lock(&priv->lock);
800
801
/*
802
* Unfortunately, the networking core only gives us a list of all of
803
* the multicast hardware addresses. We need to figure out which ones
804
* are new and which ones have been removed
805
*/
806
807
/* Clear out the found flag */
808
list_for_each_entry(mcast, &priv->multicast_list, list)
809
clear_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags);
810
811
/* Mark all of the entries that are found or don't exist */
812
netdev_for_each_mc_addr(ha, dev) {
813
union ib_gid mgid;
814
815
if (!ipoib_mcast_addr_is_valid(ha->addr, dev->broadcast))
816
continue;
817
818
memcpy(mgid.raw, ha->addr + 4, sizeof mgid);
819
820
mcast = __ipoib_mcast_find(dev, &mgid);
821
if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
822
struct ipoib_mcast *nmcast;
823
824
/* ignore group which is directly joined by userspace */
825
if (test_bit(IPOIB_FLAG_UMCAST, &priv->flags) &&
826
!ib_sa_get_mcmember_rec(priv->ca, priv->port, &mgid, &rec)) {
827
ipoib_dbg_mcast(priv, "ignoring multicast entry for mgid %pI6\n",
828
mgid.raw);
829
continue;
830
}
831
832
/* Not found or send-only group, let's add a new entry */
833
ipoib_dbg_mcast(priv, "adding multicast entry for mgid %pI6\n",
834
mgid.raw);
835
836
nmcast = ipoib_mcast_alloc(dev, 0);
837
if (!nmcast) {
838
ipoib_warn(priv, "unable to allocate memory for multicast structure\n");
839
continue;
840
}
841
842
set_bit(IPOIB_MCAST_FLAG_FOUND, &nmcast->flags);
843
844
nmcast->mcmember.mgid = mgid;
845
846
if (mcast) {
847
/* Destroy the send only entry */
848
list_move_tail(&mcast->list, &remove_list);
849
850
rb_replace_node(&mcast->rb_node,
851
&nmcast->rb_node,
852
&priv->multicast_tree);
853
} else
854
__ipoib_mcast_add(dev, nmcast);
855
856
list_add_tail(&nmcast->list, &priv->multicast_list);
857
}
858
859
if (mcast)
860
set_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags);
861
}
862
863
/* Remove all of the entries don't exist anymore */
864
list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) {
865
if (!test_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags) &&
866
!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
867
ipoib_dbg_mcast(priv, "deleting multicast group %pI6\n",
868
mcast->mcmember.mgid.raw);
869
870
rb_erase(&mcast->rb_node, &priv->multicast_tree);
871
872
/* Move to the remove list */
873
list_move_tail(&mcast->list, &remove_list);
874
}
875
}
876
877
spin_unlock(&priv->lock);
878
netif_addr_unlock(dev);
879
local_irq_restore(flags);
880
881
/* We have to cancel outside of the spinlock */
882
list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
883
ipoib_mcast_leave(mcast->dev, mcast);
884
ipoib_mcast_free(mcast);
885
}
886
887
if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
888
ipoib_mcast_start_thread(dev);
889
}
890
891
#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
892
893
struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev)
894
{
895
struct ipoib_mcast_iter *iter;
896
897
iter = kmalloc(sizeof *iter, GFP_KERNEL);
898
if (!iter)
899
return NULL;
900
901
iter->dev = dev;
902
memset(iter->mgid.raw, 0, 16);
903
904
if (ipoib_mcast_iter_next(iter)) {
905
kfree(iter);
906
return NULL;
907
}
908
909
return iter;
910
}
911
912
int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter)
913
{
914
struct ipoib_dev_priv *priv = netdev_priv(iter->dev);
915
struct rb_node *n;
916
struct ipoib_mcast *mcast;
917
int ret = 1;
918
919
spin_lock_irq(&priv->lock);
920
921
n = rb_first(&priv->multicast_tree);
922
923
while (n) {
924
mcast = rb_entry(n, struct ipoib_mcast, rb_node);
925
926
if (memcmp(iter->mgid.raw, mcast->mcmember.mgid.raw,
927
sizeof (union ib_gid)) < 0) {
928
iter->mgid = mcast->mcmember.mgid;
929
iter->created = mcast->created;
930
iter->queuelen = skb_queue_len(&mcast->pkt_queue);
931
iter->complete = !!mcast->ah;
932
iter->send_only = !!(mcast->flags & (1 << IPOIB_MCAST_FLAG_SENDONLY));
933
934
ret = 0;
935
936
break;
937
}
938
939
n = rb_next(n);
940
}
941
942
spin_unlock_irq(&priv->lock);
943
944
return ret;
945
}
946
947
void ipoib_mcast_iter_read(struct ipoib_mcast_iter *iter,
948
union ib_gid *mgid,
949
unsigned long *created,
950
unsigned int *queuelen,
951
unsigned int *complete,
952
unsigned int *send_only)
953
{
954
*mgid = iter->mgid;
955
*created = iter->created;
956
*queuelen = iter->queuelen;
957
*complete = iter->complete;
958
*send_only = iter->send_only;
959
}
960
961
#endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */
962
963