Path: blob/master/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
15112 views
/*1* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.2* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.3* Copyright (c) 2004 Voltaire, Inc. All rights reserved.4*5* This software is available to you under a choice of one of two6* licenses. You may choose to be licensed under the terms of the GNU7* General Public License (GPL) Version 2, available from the file8* COPYING in the main directory of this source tree, or the9* OpenIB.org BSD license below:10*11* Redistribution and use in source and binary forms, with or12* without modification, are permitted provided that the following13* conditions are met:14*15* - Redistributions of source code must retain the above16* copyright notice, this list of conditions and the following17* disclaimer.18*19* - Redistributions in binary form must reproduce the above20* copyright notice, this list of conditions and the following21* disclaimer in the documentation and/or other materials22* provided with the distribution.23*24* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,25* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF26* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND27* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS28* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN29* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN30* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE31* SOFTWARE.32*/3334#include <linux/skbuff.h>35#include <linux/rtnetlink.h>36#include <linux/ip.h>37#include <linux/in.h>38#include <linux/igmp.h>39#include <linux/inetdevice.h>40#include <linux/delay.h>41#include <linux/completion.h>42#include <linux/slab.h>4344#include <net/dst.h>4546#include "ipoib.h"4748#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG49static int mcast_debug_level;5051module_param(mcast_debug_level, int, 0644);52MODULE_PARM_DESC(mcast_debug_level,53"Enable multicast debug tracing if > 0");54#endif5556static DEFINE_MUTEX(mcast_mutex);5758struct ipoib_mcast_iter {59struct net_device *dev;60union ib_gid mgid;61unsigned long created;62unsigned int queuelen;63unsigned int complete;64unsigned int send_only;65};6667static void ipoib_mcast_free(struct ipoib_mcast *mcast)68{69struct net_device *dev = mcast->dev;70struct ipoib_dev_priv *priv = netdev_priv(dev);71struct ipoib_neigh *neigh, *tmp;72int tx_dropped = 0;7374ipoib_dbg_mcast(netdev_priv(dev), "deleting multicast group %pI6\n",75mcast->mcmember.mgid.raw);7677spin_lock_irq(&priv->lock);7879list_for_each_entry_safe(neigh, tmp, &mcast->neigh_list, list) {80/*81* It's safe to call ipoib_put_ah() inside priv->lock82* here, because we know that mcast->ah will always83* hold one more reference, so ipoib_put_ah() will84* never do more than decrement the ref count.85*/86if (neigh->ah)87ipoib_put_ah(neigh->ah);88ipoib_neigh_free(dev, neigh);89}9091spin_unlock_irq(&priv->lock);9293if (mcast->ah)94ipoib_put_ah(mcast->ah);9596while (!skb_queue_empty(&mcast->pkt_queue)) {97++tx_dropped;98dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));99}100101netif_tx_lock_bh(dev);102dev->stats.tx_dropped += tx_dropped;103netif_tx_unlock_bh(dev);104105kfree(mcast);106}107108static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev,109int can_sleep)110{111struct ipoib_mcast *mcast;112113mcast = kzalloc(sizeof *mcast, can_sleep ? GFP_KERNEL : GFP_ATOMIC);114if (!mcast)115return NULL;116117mcast->dev = dev;118mcast->created = jiffies;119mcast->backoff = 1;120121INIT_LIST_HEAD(&mcast->list);122INIT_LIST_HEAD(&mcast->neigh_list);123skb_queue_head_init(&mcast->pkt_queue);124125return mcast;126}127128static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid)129{130struct ipoib_dev_priv *priv = netdev_priv(dev);131struct rb_node *n = priv->multicast_tree.rb_node;132133while (n) {134struct ipoib_mcast *mcast;135int ret;136137mcast = rb_entry(n, struct ipoib_mcast, rb_node);138139ret = memcmp(mgid, mcast->mcmember.mgid.raw,140sizeof (union ib_gid));141if (ret < 0)142n = n->rb_left;143else if (ret > 0)144n = n->rb_right;145else146return mcast;147}148149return NULL;150}151152static int __ipoib_mcast_add(struct net_device *dev, struct ipoib_mcast *mcast)153{154struct ipoib_dev_priv *priv = netdev_priv(dev);155struct rb_node **n = &priv->multicast_tree.rb_node, *pn = NULL;156157while (*n) {158struct ipoib_mcast *tmcast;159int ret;160161pn = *n;162tmcast = rb_entry(pn, struct ipoib_mcast, rb_node);163164ret = memcmp(mcast->mcmember.mgid.raw, tmcast->mcmember.mgid.raw,165sizeof (union ib_gid));166if (ret < 0)167n = &pn->rb_left;168else if (ret > 0)169n = &pn->rb_right;170else171return -EEXIST;172}173174rb_link_node(&mcast->rb_node, pn, n);175rb_insert_color(&mcast->rb_node, &priv->multicast_tree);176177return 0;178}179180static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,181struct ib_sa_mcmember_rec *mcmember)182{183struct net_device *dev = mcast->dev;184struct ipoib_dev_priv *priv = netdev_priv(dev);185struct ipoib_ah *ah;186int ret;187int set_qkey = 0;188189mcast->mcmember = *mcmember;190191/* Set the cached Q_Key before we attach if it's the broadcast group */192if (!memcmp(mcast->mcmember.mgid.raw, priv->dev->broadcast + 4,193sizeof (union ib_gid))) {194spin_lock_irq(&priv->lock);195if (!priv->broadcast) {196spin_unlock_irq(&priv->lock);197return -EAGAIN;198}199priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey);200spin_unlock_irq(&priv->lock);201priv->tx_wr.wr.ud.remote_qkey = priv->qkey;202set_qkey = 1;203}204205if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {206if (test_and_set_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {207ipoib_warn(priv, "multicast group %pI6 already attached\n",208mcast->mcmember.mgid.raw);209210return 0;211}212213ret = ipoib_mcast_attach(dev, be16_to_cpu(mcast->mcmember.mlid),214&mcast->mcmember.mgid, set_qkey);215if (ret < 0) {216ipoib_warn(priv, "couldn't attach QP to multicast group %pI6\n",217mcast->mcmember.mgid.raw);218219clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags);220return ret;221}222}223224{225struct ib_ah_attr av = {226.dlid = be16_to_cpu(mcast->mcmember.mlid),227.port_num = priv->port,228.sl = mcast->mcmember.sl,229.ah_flags = IB_AH_GRH,230.static_rate = mcast->mcmember.rate,231.grh = {232.flow_label = be32_to_cpu(mcast->mcmember.flow_label),233.hop_limit = mcast->mcmember.hop_limit,234.sgid_index = 0,235.traffic_class = mcast->mcmember.traffic_class236}237};238av.grh.dgid = mcast->mcmember.mgid;239240ah = ipoib_create_ah(dev, priv->pd, &av);241if (!ah) {242ipoib_warn(priv, "ib_address_create failed\n");243} else {244spin_lock_irq(&priv->lock);245mcast->ah = ah;246spin_unlock_irq(&priv->lock);247248ipoib_dbg_mcast(priv, "MGID %pI6 AV %p, LID 0x%04x, SL %d\n",249mcast->mcmember.mgid.raw,250mcast->ah->ah,251be16_to_cpu(mcast->mcmember.mlid),252mcast->mcmember.sl);253}254}255256/* actually send any queued packets */257netif_tx_lock_bh(dev);258while (!skb_queue_empty(&mcast->pkt_queue)) {259struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue);260netif_tx_unlock_bh(dev);261262skb->dev = dev;263264if (!skb_dst(skb) || !skb_dst(skb)->neighbour) {265/* put pseudoheader back on for next time */266skb_push(skb, sizeof (struct ipoib_pseudoheader));267}268269if (dev_queue_xmit(skb))270ipoib_warn(priv, "dev_queue_xmit failed to requeue packet\n");271netif_tx_lock_bh(dev);272}273netif_tx_unlock_bh(dev);274275return 0;276}277278static int279ipoib_mcast_sendonly_join_complete(int status,280struct ib_sa_multicast *multicast)281{282struct ipoib_mcast *mcast = multicast->context;283struct net_device *dev = mcast->dev;284285/* We trap for port events ourselves. */286if (status == -ENETRESET)287return 0;288289if (!status)290status = ipoib_mcast_join_finish(mcast, &multicast->rec);291292if (status) {293if (mcast->logcount++ < 20)294ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for %pI6, status %d\n",295mcast->mcmember.mgid.raw, status);296297/* Flush out any queued packets */298netif_tx_lock_bh(dev);299while (!skb_queue_empty(&mcast->pkt_queue)) {300++dev->stats.tx_dropped;301dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));302}303netif_tx_unlock_bh(dev);304305/* Clear the busy flag so we try again */306status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY,307&mcast->flags);308}309return status;310}311312static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)313{314struct net_device *dev = mcast->dev;315struct ipoib_dev_priv *priv = netdev_priv(dev);316struct ib_sa_mcmember_rec rec = {317#if 0 /* Some SMs don't support send-only yet */318.join_state = 4319#else320.join_state = 1321#endif322};323int ret = 0;324325if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {326ipoib_dbg_mcast(priv, "device shutting down, no multicast joins\n");327return -ENODEV;328}329330if (test_and_set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) {331ipoib_dbg_mcast(priv, "multicast entry busy, skipping\n");332return -EBUSY;333}334335rec.mgid = mcast->mcmember.mgid;336rec.port_gid = priv->local_gid;337rec.pkey = cpu_to_be16(priv->pkey);338339mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca,340priv->port, &rec,341IB_SA_MCMEMBER_REC_MGID |342IB_SA_MCMEMBER_REC_PORT_GID |343IB_SA_MCMEMBER_REC_PKEY |344IB_SA_MCMEMBER_REC_JOIN_STATE,345GFP_ATOMIC,346ipoib_mcast_sendonly_join_complete,347mcast);348if (IS_ERR(mcast->mc)) {349ret = PTR_ERR(mcast->mc);350clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);351ipoib_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n",352ret);353} else {354ipoib_dbg_mcast(priv, "no multicast record for %pI6, starting join\n",355mcast->mcmember.mgid.raw);356}357358return ret;359}360361void ipoib_mcast_carrier_on_task(struct work_struct *work)362{363struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,364carrier_on_task);365struct ib_port_attr attr;366367/*368* Take rtnl_lock to avoid racing with ipoib_stop() and369* turning the carrier back on while a device is being370* removed.371*/372if (ib_query_port(priv->ca, priv->port, &attr) ||373attr.state != IB_PORT_ACTIVE) {374ipoib_dbg(priv, "Keeping carrier off until IB port is active\n");375return;376}377378rtnl_lock();379netif_carrier_on(priv->dev);380rtnl_unlock();381}382383static int ipoib_mcast_join_complete(int status,384struct ib_sa_multicast *multicast)385{386struct ipoib_mcast *mcast = multicast->context;387struct net_device *dev = mcast->dev;388struct ipoib_dev_priv *priv = netdev_priv(dev);389390ipoib_dbg_mcast(priv, "join completion for %pI6 (status %d)\n",391mcast->mcmember.mgid.raw, status);392393/* We trap for port events ourselves. */394if (status == -ENETRESET)395return 0;396397if (!status)398status = ipoib_mcast_join_finish(mcast, &multicast->rec);399400if (!status) {401mcast->backoff = 1;402mutex_lock(&mcast_mutex);403if (test_bit(IPOIB_MCAST_RUN, &priv->flags))404queue_delayed_work(ipoib_workqueue,405&priv->mcast_task, 0);406mutex_unlock(&mcast_mutex);407408/*409* Defer carrier on work to ipoib_workqueue to avoid a410* deadlock on rtnl_lock here.411*/412if (mcast == priv->broadcast)413queue_work(ipoib_workqueue, &priv->carrier_on_task);414415return 0;416}417418if (mcast->logcount++ < 20) {419if (status == -ETIMEDOUT || status == -EAGAIN) {420ipoib_dbg_mcast(priv, "multicast join failed for %pI6, status %d\n",421mcast->mcmember.mgid.raw, status);422} else {423ipoib_warn(priv, "multicast join failed for %pI6, status %d\n",424mcast->mcmember.mgid.raw, status);425}426}427428mcast->backoff *= 2;429if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)430mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;431432/* Clear the busy flag so we try again */433status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);434435mutex_lock(&mcast_mutex);436spin_lock_irq(&priv->lock);437if (test_bit(IPOIB_MCAST_RUN, &priv->flags))438queue_delayed_work(ipoib_workqueue, &priv->mcast_task,439mcast->backoff * HZ);440spin_unlock_irq(&priv->lock);441mutex_unlock(&mcast_mutex);442443return status;444}445446static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,447int create)448{449struct ipoib_dev_priv *priv = netdev_priv(dev);450struct ib_sa_mcmember_rec rec = {451.join_state = 1452};453ib_sa_comp_mask comp_mask;454int ret = 0;455456ipoib_dbg_mcast(priv, "joining MGID %pI6\n", mcast->mcmember.mgid.raw);457458rec.mgid = mcast->mcmember.mgid;459rec.port_gid = priv->local_gid;460rec.pkey = cpu_to_be16(priv->pkey);461462comp_mask =463IB_SA_MCMEMBER_REC_MGID |464IB_SA_MCMEMBER_REC_PORT_GID |465IB_SA_MCMEMBER_REC_PKEY |466IB_SA_MCMEMBER_REC_JOIN_STATE;467468if (create) {469comp_mask |=470IB_SA_MCMEMBER_REC_QKEY |471IB_SA_MCMEMBER_REC_MTU_SELECTOR |472IB_SA_MCMEMBER_REC_MTU |473IB_SA_MCMEMBER_REC_TRAFFIC_CLASS |474IB_SA_MCMEMBER_REC_RATE_SELECTOR |475IB_SA_MCMEMBER_REC_RATE |476IB_SA_MCMEMBER_REC_SL |477IB_SA_MCMEMBER_REC_FLOW_LABEL |478IB_SA_MCMEMBER_REC_HOP_LIMIT;479480rec.qkey = priv->broadcast->mcmember.qkey;481rec.mtu_selector = IB_SA_EQ;482rec.mtu = priv->broadcast->mcmember.mtu;483rec.traffic_class = priv->broadcast->mcmember.traffic_class;484rec.rate_selector = IB_SA_EQ;485rec.rate = priv->broadcast->mcmember.rate;486rec.sl = priv->broadcast->mcmember.sl;487rec.flow_label = priv->broadcast->mcmember.flow_label;488rec.hop_limit = priv->broadcast->mcmember.hop_limit;489}490491set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);492mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,493&rec, comp_mask, GFP_KERNEL,494ipoib_mcast_join_complete, mcast);495if (IS_ERR(mcast->mc)) {496clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);497ret = PTR_ERR(mcast->mc);498ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret);499500mcast->backoff *= 2;501if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)502mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;503504mutex_lock(&mcast_mutex);505if (test_bit(IPOIB_MCAST_RUN, &priv->flags))506queue_delayed_work(ipoib_workqueue,507&priv->mcast_task,508mcast->backoff * HZ);509mutex_unlock(&mcast_mutex);510}511}512513void ipoib_mcast_join_task(struct work_struct *work)514{515struct ipoib_dev_priv *priv =516container_of(work, struct ipoib_dev_priv, mcast_task.work);517struct net_device *dev = priv->dev;518519if (!test_bit(IPOIB_MCAST_RUN, &priv->flags))520return;521522if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid))523ipoib_warn(priv, "ib_query_gid() failed\n");524else525memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid));526527{528struct ib_port_attr attr;529530if (!ib_query_port(priv->ca, priv->port, &attr))531priv->local_lid = attr.lid;532else533ipoib_warn(priv, "ib_query_port failed\n");534}535536if (!priv->broadcast) {537struct ipoib_mcast *broadcast;538539if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))540return;541542broadcast = ipoib_mcast_alloc(dev, 1);543if (!broadcast) {544ipoib_warn(priv, "failed to allocate broadcast group\n");545mutex_lock(&mcast_mutex);546if (test_bit(IPOIB_MCAST_RUN, &priv->flags))547queue_delayed_work(ipoib_workqueue,548&priv->mcast_task, HZ);549mutex_unlock(&mcast_mutex);550return;551}552553spin_lock_irq(&priv->lock);554memcpy(broadcast->mcmember.mgid.raw, priv->dev->broadcast + 4,555sizeof (union ib_gid));556priv->broadcast = broadcast;557558__ipoib_mcast_add(dev, priv->broadcast);559spin_unlock_irq(&priv->lock);560}561562if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {563if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags))564ipoib_mcast_join(dev, priv->broadcast, 0);565return;566}567568while (1) {569struct ipoib_mcast *mcast = NULL;570571spin_lock_irq(&priv->lock);572list_for_each_entry(mcast, &priv->multicast_list, list) {573if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)574&& !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)575&& !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {576/* Found the next unjoined group */577break;578}579}580spin_unlock_irq(&priv->lock);581582if (&mcast->list == &priv->multicast_list) {583/* All done */584break;585}586587ipoib_mcast_join(dev, mcast, 1);588return;589}590591priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));592593if (!ipoib_cm_admin_enabled(dev)) {594rtnl_lock();595dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu));596rtnl_unlock();597}598599ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n");600601clear_bit(IPOIB_MCAST_RUN, &priv->flags);602}603604int ipoib_mcast_start_thread(struct net_device *dev)605{606struct ipoib_dev_priv *priv = netdev_priv(dev);607608ipoib_dbg_mcast(priv, "starting multicast thread\n");609610mutex_lock(&mcast_mutex);611if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags))612queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0);613mutex_unlock(&mcast_mutex);614615return 0;616}617618int ipoib_mcast_stop_thread(struct net_device *dev, int flush)619{620struct ipoib_dev_priv *priv = netdev_priv(dev);621622ipoib_dbg_mcast(priv, "stopping multicast thread\n");623624mutex_lock(&mcast_mutex);625clear_bit(IPOIB_MCAST_RUN, &priv->flags);626cancel_delayed_work(&priv->mcast_task);627mutex_unlock(&mcast_mutex);628629if (flush)630flush_workqueue(ipoib_workqueue);631632return 0;633}634635static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)636{637struct ipoib_dev_priv *priv = netdev_priv(dev);638int ret = 0;639640if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))641ib_sa_free_multicast(mcast->mc);642643if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {644ipoib_dbg_mcast(priv, "leaving MGID %pI6\n",645mcast->mcmember.mgid.raw);646647/* Remove ourselves from the multicast group */648ret = ib_detach_mcast(priv->qp, &mcast->mcmember.mgid,649be16_to_cpu(mcast->mcmember.mlid));650if (ret)651ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret);652}653654return 0;655}656657void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb)658{659struct ipoib_dev_priv *priv = netdev_priv(dev);660struct ipoib_mcast *mcast;661unsigned long flags;662663spin_lock_irqsave(&priv->lock, flags);664665if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags) ||666!priv->broadcast ||667!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {668++dev->stats.tx_dropped;669dev_kfree_skb_any(skb);670goto unlock;671}672673mcast = __ipoib_mcast_find(dev, mgid);674if (!mcast) {675/* Let's create a new send only group now */676ipoib_dbg_mcast(priv, "setting up send only multicast group for %pI6\n",677mgid);678679mcast = ipoib_mcast_alloc(dev, 0);680if (!mcast) {681ipoib_warn(priv, "unable to allocate memory for "682"multicast structure\n");683++dev->stats.tx_dropped;684dev_kfree_skb_any(skb);685goto out;686}687688set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags);689memcpy(mcast->mcmember.mgid.raw, mgid, sizeof (union ib_gid));690__ipoib_mcast_add(dev, mcast);691list_add_tail(&mcast->list, &priv->multicast_list);692}693694if (!mcast->ah) {695if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE)696skb_queue_tail(&mcast->pkt_queue, skb);697else {698++dev->stats.tx_dropped;699dev_kfree_skb_any(skb);700}701702if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))703ipoib_dbg_mcast(priv, "no address vector, "704"but multicast join already started\n");705else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))706ipoib_mcast_sendonly_join(mcast);707708/*709* If lookup completes between here and out:, don't710* want to send packet twice.711*/712mcast = NULL;713}714715out:716if (mcast && mcast->ah) {717if (skb_dst(skb) &&718skb_dst(skb)->neighbour &&719!*to_ipoib_neigh(skb_dst(skb)->neighbour)) {720struct ipoib_neigh *neigh = ipoib_neigh_alloc(skb_dst(skb)->neighbour,721skb->dev);722723if (neigh) {724kref_get(&mcast->ah->ref);725neigh->ah = mcast->ah;726list_add_tail(&neigh->list, &mcast->neigh_list);727}728}729730spin_unlock_irqrestore(&priv->lock, flags);731ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN);732return;733}734735unlock:736spin_unlock_irqrestore(&priv->lock, flags);737}738739void ipoib_mcast_dev_flush(struct net_device *dev)740{741struct ipoib_dev_priv *priv = netdev_priv(dev);742LIST_HEAD(remove_list);743struct ipoib_mcast *mcast, *tmcast;744unsigned long flags;745746ipoib_dbg_mcast(priv, "flushing multicast list\n");747748spin_lock_irqsave(&priv->lock, flags);749750list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) {751list_del(&mcast->list);752rb_erase(&mcast->rb_node, &priv->multicast_tree);753list_add_tail(&mcast->list, &remove_list);754}755756if (priv->broadcast) {757rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree);758list_add_tail(&priv->broadcast->list, &remove_list);759priv->broadcast = NULL;760}761762spin_unlock_irqrestore(&priv->lock, flags);763764list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {765ipoib_mcast_leave(dev, mcast);766ipoib_mcast_free(mcast);767}768}769770static int ipoib_mcast_addr_is_valid(const u8 *addr, const u8 *broadcast)771{772/* reserved QPN, prefix, scope */773if (memcmp(addr, broadcast, 6))774return 0;775/* signature lower, pkey */776if (memcmp(addr + 7, broadcast + 7, 3))777return 0;778return 1;779}780781void ipoib_mcast_restart_task(struct work_struct *work)782{783struct ipoib_dev_priv *priv =784container_of(work, struct ipoib_dev_priv, restart_task);785struct net_device *dev = priv->dev;786struct netdev_hw_addr *ha;787struct ipoib_mcast *mcast, *tmcast;788LIST_HEAD(remove_list);789unsigned long flags;790struct ib_sa_mcmember_rec rec;791792ipoib_dbg_mcast(priv, "restarting multicast task\n");793794ipoib_mcast_stop_thread(dev, 0);795796local_irq_save(flags);797netif_addr_lock(dev);798spin_lock(&priv->lock);799800/*801* Unfortunately, the networking core only gives us a list of all of802* the multicast hardware addresses. We need to figure out which ones803* are new and which ones have been removed804*/805806/* Clear out the found flag */807list_for_each_entry(mcast, &priv->multicast_list, list)808clear_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags);809810/* Mark all of the entries that are found or don't exist */811netdev_for_each_mc_addr(ha, dev) {812union ib_gid mgid;813814if (!ipoib_mcast_addr_is_valid(ha->addr, dev->broadcast))815continue;816817memcpy(mgid.raw, ha->addr + 4, sizeof mgid);818819mcast = __ipoib_mcast_find(dev, &mgid);820if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {821struct ipoib_mcast *nmcast;822823/* ignore group which is directly joined by userspace */824if (test_bit(IPOIB_FLAG_UMCAST, &priv->flags) &&825!ib_sa_get_mcmember_rec(priv->ca, priv->port, &mgid, &rec)) {826ipoib_dbg_mcast(priv, "ignoring multicast entry for mgid %pI6\n",827mgid.raw);828continue;829}830831/* Not found or send-only group, let's add a new entry */832ipoib_dbg_mcast(priv, "adding multicast entry for mgid %pI6\n",833mgid.raw);834835nmcast = ipoib_mcast_alloc(dev, 0);836if (!nmcast) {837ipoib_warn(priv, "unable to allocate memory for multicast structure\n");838continue;839}840841set_bit(IPOIB_MCAST_FLAG_FOUND, &nmcast->flags);842843nmcast->mcmember.mgid = mgid;844845if (mcast) {846/* Destroy the send only entry */847list_move_tail(&mcast->list, &remove_list);848849rb_replace_node(&mcast->rb_node,850&nmcast->rb_node,851&priv->multicast_tree);852} else853__ipoib_mcast_add(dev, nmcast);854855list_add_tail(&nmcast->list, &priv->multicast_list);856}857858if (mcast)859set_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags);860}861862/* Remove all of the entries don't exist anymore */863list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) {864if (!test_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags) &&865!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {866ipoib_dbg_mcast(priv, "deleting multicast group %pI6\n",867mcast->mcmember.mgid.raw);868869rb_erase(&mcast->rb_node, &priv->multicast_tree);870871/* Move to the remove list */872list_move_tail(&mcast->list, &remove_list);873}874}875876spin_unlock(&priv->lock);877netif_addr_unlock(dev);878local_irq_restore(flags);879880/* We have to cancel outside of the spinlock */881list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {882ipoib_mcast_leave(mcast->dev, mcast);883ipoib_mcast_free(mcast);884}885886if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))887ipoib_mcast_start_thread(dev);888}889890#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG891892struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev)893{894struct ipoib_mcast_iter *iter;895896iter = kmalloc(sizeof *iter, GFP_KERNEL);897if (!iter)898return NULL;899900iter->dev = dev;901memset(iter->mgid.raw, 0, 16);902903if (ipoib_mcast_iter_next(iter)) {904kfree(iter);905return NULL;906}907908return iter;909}910911int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter)912{913struct ipoib_dev_priv *priv = netdev_priv(iter->dev);914struct rb_node *n;915struct ipoib_mcast *mcast;916int ret = 1;917918spin_lock_irq(&priv->lock);919920n = rb_first(&priv->multicast_tree);921922while (n) {923mcast = rb_entry(n, struct ipoib_mcast, rb_node);924925if (memcmp(iter->mgid.raw, mcast->mcmember.mgid.raw,926sizeof (union ib_gid)) < 0) {927iter->mgid = mcast->mcmember.mgid;928iter->created = mcast->created;929iter->queuelen = skb_queue_len(&mcast->pkt_queue);930iter->complete = !!mcast->ah;931iter->send_only = !!(mcast->flags & (1 << IPOIB_MCAST_FLAG_SENDONLY));932933ret = 0;934935break;936}937938n = rb_next(n);939}940941spin_unlock_irq(&priv->lock);942943return ret;944}945946void ipoib_mcast_iter_read(struct ipoib_mcast_iter *iter,947union ib_gid *mgid,948unsigned long *created,949unsigned int *queuelen,950unsigned int *complete,951unsigned int *send_only)952{953*mgid = iter->mgid;954*created = iter->created;955*queuelen = iter->queuelen;956*complete = iter->complete;957*send_only = iter->send_only;958}959960#endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */961962963