/*-1* Copyright (c) 1991-1997 Regents of the University of California.2* All rights reserved.3*4* Redistribution and use in source and binary forms, with or without5* modification, are permitted provided that the following conditions6* are met:7* 1. Redistributions of source code must retain the above copyright8* notice, this list of conditions and the following disclaimer.9* 2. Redistributions in binary form must reproduce the above copyright10* notice, this list of conditions and the following disclaimer in the11* documentation and/or other materials provided with the distribution.12* 3. All advertising materials mentioning features or use of this software13* must display the following acknowledgement:14* This product includes software developed by the Network Research15* Group at Lawrence Berkeley Laboratory.16* 4. Neither the name of the University nor of the Laboratory may be used17* to endorse or promote products derived from this software without18* specific prior written permission.19*20* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND21* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE22* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE23* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE24* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL25* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS26* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)27* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT28* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY29* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF30* SUCH DAMAGE.31*32* LBL code modified by [email protected], May 1977.33* For questions and/or comments, please send mail to [email protected]34* $KAME: altq_rmclass.c,v 1.19 2005/04/13 03:44:25 suz Exp $35*/36#include "opt_altq.h"37#include "opt_inet.h"38#include "opt_inet6.h"39#ifdef ALTQ_CBQ /* cbq is enabled by ALTQ_CBQ option in opt_altq.h */4041#include <sys/param.h>42#include <sys/malloc.h>43#include <sys/mbuf.h>44#include <sys/socket.h>45#include <sys/systm.h>46#include <sys/errno.h>47#include <sys/time.h>4849#include <net/if.h>50#include <net/if_var.h>51#include <net/if_private.h>5253#include <net/altq/if_altq.h>54#include <net/altq/altq.h>55#include <net/altq/altq_codel.h>56#include <net/altq/altq_rmclass.h>57#include <net/altq/altq_rmclass_debug.h>58#include <net/altq/altq_red.h>59#include <net/altq/altq_rio.h>6061/*62* Local Macros63*/64#define reset_cutoff(ifd) { ifd->cutoff_ = RM_MAXDEPTH; }6566/*67* Local routines.68*/6970static int rmc_satisfied(struct rm_class *, struct timeval *);71static void rmc_wrr_set_weights(struct rm_ifdat *);72static void rmc_depth_compute(struct rm_class *);73static void rmc_depth_recompute(rm_class_t *);7475static mbuf_t *_rmc_wrr_dequeue_next(struct rm_ifdat *, int);76static mbuf_t *_rmc_prr_dequeue_next(struct rm_ifdat *, int);7778static int _rmc_addq(rm_class_t *, mbuf_t *);79static void _rmc_dropq(rm_class_t *);80static mbuf_t *_rmc_getq(rm_class_t *);81static mbuf_t *_rmc_pollq(rm_class_t *);8283static int rmc_under_limit(struct rm_class *, struct timeval *);84static void rmc_tl_satisfied(struct rm_ifdat *, struct timeval *);85static void rmc_drop_action(struct rm_class *);86static void rmc_restart(void *);87static void rmc_root_overlimit(struct rm_class *, struct rm_class *);8889#define BORROW_OFFTIME90/*91* BORROW_OFFTIME (experimental):92* borrow the offtime of the class borrowing from.93* the reason is that when its own offtime is set, the class is unable94* to borrow much, especially when cutoff is taking effect.95* but when the borrowed class is overloaded (advidle is close to minidle),96* use the borrowing class's offtime to avoid overload.97*/98#define ADJUST_CUTOFF99/*100* ADJUST_CUTOFF (experimental):101* if no underlimit class is found due to cutoff, increase cutoff and102* retry the scheduling loop.103* also, don't invoke delay_actions while cutoff is taking effect,104* since a sleeping class won't have a chance to be scheduled in the105* next loop.106*107* now heuristics for setting the top-level variable (cutoff_) becomes:108* 1. if a packet arrives for a not-overlimit class, set cutoff109* to the depth of the class.110* 2. if cutoff is i, and a packet arrives for an overlimit class111* with an underlimit ancestor at a lower level than i (say j),112* then set cutoff to j.113* 3. at scheduling a packet, if there is no underlimit class114* due to the current cutoff level, increase cutoff by 1 and115* then try to schedule again.116*/117118/*119* rm_class_t *120* rmc_newclass(...) - Create a new resource management class at priority121* 'pri' on the interface given by 'ifd'.122*123* nsecPerByte is the data rate of the interface in nanoseconds/byte.124* E.g., 800 for a 10Mb/s ethernet. If the class gets less125* than 100% of the bandwidth, this number should be the126* 'effective' rate for the class. Let f be the127* bandwidth fraction allocated to this class, and let128* nsPerByte be the data rate of the output link in129* nanoseconds/byte. Then nsecPerByte is set to130* nsPerByte / f. E.g., 1600 (= 800 / .5)131* for a class that gets 50% of an ethernet's bandwidth.132*133* action the routine to call when the class is over limit.134*135* maxq max allowable queue size for class (in packets).136*137* parent parent class pointer.138*139* borrow class to borrow from (should be either 'parent' or null).140*141* maxidle max value allowed for class 'idle' time estimate (this142* parameter determines how large an initial burst of packets143* can be before overlimit action is invoked.144*145* offtime how long 'delay' action will delay when class goes over146* limit (this parameter determines the steady-state burst147* size when a class is running over its limit).148*149* Maxidle and offtime have to be computed from the following: If the150* average packet size is s, the bandwidth fraction allocated to this151* class is f, we want to allow b packet bursts, and the gain of the152* averaging filter is g (= 1 - 2^(-RM_FILTER_GAIN)), then:153*154* ptime = s * nsPerByte * (1 - f) / f155* maxidle = ptime * (1 - g^b) / g^b156* minidle = -ptime * (1 / (f - 1))157* offtime = ptime * (1 + 1/(1 - g) * (1 - g^(b - 1)) / g^(b - 1)158*159* Operationally, it's convenient to specify maxidle & offtime in units160* independent of the link bandwidth so the maxidle & offtime passed to161* this routine are the above values multiplied by 8*f/(1000*nsPerByte).162* (The constant factor is a scale factor needed to make the parameters163* integers. This scaling also means that the 'unscaled' values of164* maxidle*nsecPerByte/8 and offtime*nsecPerByte/8 will be in microseconds,165* not nanoseconds.) Also note that the 'idle' filter computation keeps166* an estimate scaled upward by 2^RM_FILTER_GAIN so the passed value of167* maxidle also must be scaled upward by this value. Thus, the passed168* values for maxidle and offtime can be computed as follows:169*170* maxidle = maxidle * 2^RM_FILTER_GAIN * 8 / (1000 * nsecPerByte)171* offtime = offtime * 8 / (1000 * nsecPerByte)172*173* When USE_HRTIME is employed, then maxidle and offtime become:174* maxidle = maxilde * (8.0 / nsecPerByte);175* offtime = offtime * (8.0 / nsecPerByte);176*/177struct rm_class *178rmc_newclass(int pri, struct rm_ifdat *ifd, u_int nsecPerByte,179void (*action)(rm_class_t *, rm_class_t *), int maxq,180struct rm_class *parent, struct rm_class *borrow, u_int maxidle,181int minidle, u_int offtime, int pktsize, int flags)182{183struct rm_class *cl;184struct rm_class *peer;185int s;186187if (pri >= RM_MAXPRIO)188return (NULL);189#ifndef ALTQ_RED190if (flags & RMCF_RED) {191#ifdef ALTQ_DEBUG192printf("rmc_newclass: RED not configured for CBQ!\n");193#endif194return (NULL);195}196#endif197#ifndef ALTQ_RIO198if (flags & RMCF_RIO) {199#ifdef ALTQ_DEBUG200printf("rmc_newclass: RIO not configured for CBQ!\n");201#endif202return (NULL);203}204#endif205#ifndef ALTQ_CODEL206if (flags & RMCF_CODEL) {207#ifdef ALTQ_DEBUG208printf("rmc_newclass: CODEL not configured for CBQ!\n");209#endif210return (NULL);211}212#endif213214cl = malloc(sizeof(struct rm_class), M_DEVBUF, M_NOWAIT | M_ZERO);215if (cl == NULL)216return (NULL);217CALLOUT_INIT(&cl->callout_);218cl->q_ = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO);219if (cl->q_ == NULL) {220free(cl, M_DEVBUF);221return (NULL);222}223224/*225* Class initialization.226*/227cl->children_ = NULL;228cl->parent_ = parent;229cl->borrow_ = borrow;230cl->leaf_ = 1;231cl->ifdat_ = ifd;232cl->pri_ = pri;233cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */234cl->depth_ = 0;235cl->qthresh_ = 0;236cl->ns_per_byte_ = nsecPerByte;237238qlimit(cl->q_) = maxq;239qtype(cl->q_) = Q_DROPHEAD;240qlen(cl->q_) = 0;241cl->flags_ = flags;242243#if 1 /* minidle is also scaled in ALTQ */244cl->minidle_ = (minidle * (int)nsecPerByte) / 8;245if (cl->minidle_ > 0)246cl->minidle_ = 0;247#else248cl->minidle_ = minidle;249#endif250cl->maxidle_ = (maxidle * nsecPerByte) / 8;251if (cl->maxidle_ == 0)252cl->maxidle_ = 1;253#if 1 /* offtime is also scaled in ALTQ */254cl->avgidle_ = cl->maxidle_;255cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;256if (cl->offtime_ == 0)257cl->offtime_ = 1;258#else259cl->avgidle_ = 0;260cl->offtime_ = (offtime * nsecPerByte) / 8;261#endif262cl->overlimit = action;263264#ifdef ALTQ_RED265if (flags & (RMCF_RED|RMCF_RIO)) {266int red_flags, red_pkttime;267268red_flags = 0;269if (flags & RMCF_ECN)270red_flags |= REDF_ECN;271if (flags & RMCF_FLOWVALVE)272red_flags |= REDF_FLOWVALVE;273#ifdef ALTQ_RIO274if (flags & RMCF_CLEARDSCP)275red_flags |= RIOF_CLEARDSCP;276#endif277red_pkttime = nsecPerByte * pktsize / 1000;278279if (flags & RMCF_RED) {280cl->red_ = red_alloc(0, 0,281qlimit(cl->q_) * 10/100,282qlimit(cl->q_) * 30/100,283red_flags, red_pkttime);284if (cl->red_ != NULL)285qtype(cl->q_) = Q_RED;286}287#ifdef ALTQ_RIO288else {289cl->red_ = (red_t *)rio_alloc(0, NULL,290red_flags, red_pkttime);291if (cl->red_ != NULL)292qtype(cl->q_) = Q_RIO;293}294#endif295}296#endif /* ALTQ_RED */297#ifdef ALTQ_CODEL298if (flags & RMCF_CODEL) {299cl->codel_ = codel_alloc(5, 100, 0);300if (cl->codel_ != NULL)301qtype(cl->q_) = Q_CODEL;302}303#endif304305/*306* put the class into the class tree307*/308s = splnet();309IFQ_LOCK(ifd->ifq_);310if ((peer = ifd->active_[pri]) != NULL) {311/* find the last class at this pri */312cl->peer_ = peer;313while (peer->peer_ != ifd->active_[pri])314peer = peer->peer_;315peer->peer_ = cl;316} else {317ifd->active_[pri] = cl;318cl->peer_ = cl;319}320321if (cl->parent_) {322cl->next_ = parent->children_;323parent->children_ = cl;324parent->leaf_ = 0;325}326327/*328* Compute the depth of this class and its ancestors in the class329* hierarchy.330*/331rmc_depth_compute(cl);332333/*334* If CBQ's WRR is enabled, then initialize the class WRR state.335*/336if (ifd->wrr_) {337ifd->num_[pri]++;338ifd->alloc_[pri] += cl->allotment_;339rmc_wrr_set_weights(ifd);340}341IFQ_UNLOCK(ifd->ifq_);342splx(s);343return (cl);344}345346int347rmc_modclass(struct rm_class *cl, u_int nsecPerByte, int maxq, u_int maxidle,348int minidle, u_int offtime, int pktsize)349{350struct rm_ifdat *ifd;351u_int old_allotment;352int s;353354ifd = cl->ifdat_;355old_allotment = cl->allotment_;356357s = splnet();358IFQ_LOCK(ifd->ifq_);359cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */360cl->qthresh_ = 0;361cl->ns_per_byte_ = nsecPerByte;362363qlimit(cl->q_) = maxq;364365#if 1 /* minidle is also scaled in ALTQ */366cl->minidle_ = (minidle * nsecPerByte) / 8;367if (cl->minidle_ > 0)368cl->minidle_ = 0;369#else370cl->minidle_ = minidle;371#endif372cl->maxidle_ = (maxidle * nsecPerByte) / 8;373if (cl->maxidle_ == 0)374cl->maxidle_ = 1;375#if 1 /* offtime is also scaled in ALTQ */376cl->avgidle_ = cl->maxidle_;377cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;378if (cl->offtime_ == 0)379cl->offtime_ = 1;380#else381cl->avgidle_ = 0;382cl->offtime_ = (offtime * nsecPerByte) / 8;383#endif384385/*386* If CBQ's WRR is enabled, then initialize the class WRR state.387*/388if (ifd->wrr_) {389ifd->alloc_[cl->pri_] += cl->allotment_ - old_allotment;390rmc_wrr_set_weights(ifd);391}392IFQ_UNLOCK(ifd->ifq_);393splx(s);394return (0);395}396397/*398* static void399* rmc_wrr_set_weights(struct rm_ifdat *ifdat) - This function computes400* the appropriate run robin weights for the CBQ weighted round robin401* algorithm.402*403* Returns: NONE404*/405406static void407rmc_wrr_set_weights(struct rm_ifdat *ifd)408{409int i;410struct rm_class *cl, *clh;411412for (i = 0; i < RM_MAXPRIO; i++) {413/*414* This is inverted from that of the simulator to415* maintain precision.416*/417if (ifd->num_[i] == 0)418ifd->M_[i] = 0;419else420ifd->M_[i] = ifd->alloc_[i] /421(ifd->num_[i] * ifd->maxpkt_);422/*423* Compute the weighted allotment for each class.424* This takes the expensive div instruction out425* of the main loop for the wrr scheduling path.426* These only get recomputed when a class comes or427* goes.428*/429if (ifd->active_[i] != NULL) {430clh = cl = ifd->active_[i];431do {432/* safe-guard for slow link or alloc_ == 0 */433if (ifd->M_[i] == 0)434cl->w_allotment_ = 0;435else436cl->w_allotment_ = cl->allotment_ /437ifd->M_[i];438cl = cl->peer_;439} while ((cl != NULL) && (cl != clh));440}441}442}443444int445rmc_get_weight(struct rm_ifdat *ifd, int pri)446{447if ((pri >= 0) && (pri < RM_MAXPRIO))448return (ifd->M_[pri]);449else450return (0);451}452453/*454* static void455* rmc_depth_compute(struct rm_class *cl) - This function computes the456* appropriate depth of class 'cl' and its ancestors.457*458* Returns: NONE459*/460461static void462rmc_depth_compute(struct rm_class *cl)463{464rm_class_t *t = cl, *p;465466/*467* Recompute the depth for the branch of the tree.468*/469while (t != NULL) {470p = t->parent_;471if (p && (t->depth_ >= p->depth_)) {472p->depth_ = t->depth_ + 1;473t = p;474} else475t = NULL;476}477}478479/*480* static void481* rmc_depth_recompute(struct rm_class *cl) - This function re-computes482* the depth of the tree after a class has been deleted.483*484* Returns: NONE485*/486487static void488rmc_depth_recompute(rm_class_t *cl)489{490#if 1 /* ALTQ */491rm_class_t *p, *t;492493p = cl;494while (p != NULL) {495if ((t = p->children_) == NULL) {496p->depth_ = 0;497} else {498int cdepth = 0;499500while (t != NULL) {501if (t->depth_ > cdepth)502cdepth = t->depth_;503t = t->next_;504}505506if (p->depth_ == cdepth + 1)507/* no change to this parent */508return;509510p->depth_ = cdepth + 1;511}512513p = p->parent_;514}515#else516rm_class_t *t;517518if (cl->depth_ >= 1) {519if (cl->children_ == NULL) {520cl->depth_ = 0;521} else if ((t = cl->children_) != NULL) {522while (t != NULL) {523if (t->children_ != NULL)524rmc_depth_recompute(t);525t = t->next_;526}527} else528rmc_depth_compute(cl);529}530#endif531}532533/*534* void535* rmc_delete_class(struct rm_ifdat *ifdat, struct rm_class *cl) - This536* function deletes a class from the link-sharing structure and frees537* all resources associated with the class.538*539* Returns: NONE540*/541542void543rmc_delete_class(struct rm_ifdat *ifd, struct rm_class *cl)544{545struct rm_class *p, *head, *previous;546int s;547548ASSERT(cl->children_ == NULL);549550if (cl->sleeping_)551CALLOUT_STOP(&cl->callout_);552553s = splnet();554IFQ_LOCK(ifd->ifq_);555/*556* Free packets in the packet queue.557* XXX - this may not be a desired behavior. Packets should be558* re-queued.559*/560rmc_dropall(cl);561562/*563* If the class has a parent, then remove the class from the564* class from the parent's children chain.565*/566if (cl->parent_ != NULL) {567head = cl->parent_->children_;568p = previous = head;569if (head->next_ == NULL) {570ASSERT(head == cl);571cl->parent_->children_ = NULL;572cl->parent_->leaf_ = 1;573} else while (p != NULL) {574if (p == cl) {575if (cl == head)576cl->parent_->children_ = cl->next_;577else578previous->next_ = cl->next_;579cl->next_ = NULL;580p = NULL;581} else {582previous = p;583p = p->next_;584}585}586}587588/*589* Delete class from class priority peer list.590*/591if ((p = ifd->active_[cl->pri_]) != NULL) {592/*593* If there is more than one member of this priority594* level, then look for class(cl) in the priority level.595*/596if (p != p->peer_) {597while (p->peer_ != cl)598p = p->peer_;599p->peer_ = cl->peer_;600601if (ifd->active_[cl->pri_] == cl)602ifd->active_[cl->pri_] = cl->peer_;603} else {604ASSERT(p == cl);605ifd->active_[cl->pri_] = NULL;606}607}608609/*610* Recompute the WRR weights.611*/612if (ifd->wrr_) {613ifd->alloc_[cl->pri_] -= cl->allotment_;614ifd->num_[cl->pri_]--;615rmc_wrr_set_weights(ifd);616}617618/*619* Re-compute the depth of the tree.620*/621#if 1 /* ALTQ */622rmc_depth_recompute(cl->parent_);623#else624rmc_depth_recompute(ifd->root_);625#endif626627IFQ_UNLOCK(ifd->ifq_);628splx(s);629630/*631* Free the class structure.632*/633if (cl->red_ != NULL) {634#ifdef ALTQ_RIO635if (q_is_rio(cl->q_))636rio_destroy((rio_t *)cl->red_);637#endif638#ifdef ALTQ_RED639if (q_is_red(cl->q_))640red_destroy(cl->red_);641#endif642#ifdef ALTQ_CODEL643if (q_is_codel(cl->q_))644codel_destroy(cl->codel_);645#endif646}647free(cl->q_, M_DEVBUF);648free(cl, M_DEVBUF);649}650651/*652* void653* rmc_init(...) - Initialize the resource management data structures654* associated with the output portion of interface 'ifp'. 'ifd' is655* where the structures will be built (for backwards compatibility, the656* structures aren't kept in the ifnet struct). 'nsecPerByte'657* gives the link speed (inverse of bandwidth) in nanoseconds/byte.658* 'restart' is the driver-specific routine that the generic 'delay659* until under limit' action will call to restart output. `maxq'660* is the queue size of the 'link' & 'default' classes. 'maxqueued'661* is the maximum number of packets that the resource management662* code will allow to be queued 'downstream' (this is typically 1).663*664* Returns: NONE665*/666667void668rmc_init(struct ifaltq *ifq, struct rm_ifdat *ifd, u_int nsecPerByte,669void (*restart)(struct ifaltq *), int maxq, int maxqueued, u_int maxidle,670int minidle, u_int offtime, int flags)671{672int i, mtu;673674/*675* Initialize the CBQ tracing/debug facility.676*/677CBQTRACEINIT();678679bzero((char *)ifd, sizeof (*ifd));680mtu = ifq->altq_ifp->if_mtu;681ifd->ifq_ = ifq;682ifd->restart = restart;683ifd->maxqueued_ = maxqueued;684ifd->ns_per_byte_ = nsecPerByte;685ifd->maxpkt_ = mtu;686ifd->wrr_ = (flags & RMCF_WRR) ? 1 : 0;687ifd->efficient_ = (flags & RMCF_EFFICIENT) ? 1 : 0;688#if 1689ifd->maxiftime_ = mtu * nsecPerByte / 1000 * 16;690if (mtu * nsecPerByte > 10 * 1000000)691ifd->maxiftime_ /= 4;692#endif693694reset_cutoff(ifd);695CBQTRACE(rmc_init, 'INIT', ifd->cutoff_);696697/*698* Initialize the CBQ's WRR state.699*/700for (i = 0; i < RM_MAXPRIO; i++) {701ifd->alloc_[i] = 0;702ifd->M_[i] = 0;703ifd->num_[i] = 0;704ifd->na_[i] = 0;705ifd->active_[i] = NULL;706}707708/*709* Initialize current packet state.710*/711ifd->qi_ = 0;712ifd->qo_ = 0;713for (i = 0; i < RM_MAXQUEUED; i++) {714ifd->class_[i] = NULL;715ifd->curlen_[i] = 0;716ifd->borrowed_[i] = NULL;717}718719/*720* Create the root class of the link-sharing structure.721*/722if ((ifd->root_ = rmc_newclass(0, ifd,723nsecPerByte,724rmc_root_overlimit, maxq, 0, 0,725maxidle, minidle, offtime,7260, 0)) == NULL) {727printf("rmc_init: root class not allocated\n");728return ;729}730ifd->root_->depth_ = 0;731}732733/*734* void735* rmc_queue_packet(struct rm_class *cl, mbuf_t *m) - Add packet given by736* mbuf 'm' to queue for resource class 'cl'. This routine is called737* by a driver's if_output routine. This routine must be called with738* output packet completion interrupts locked out (to avoid racing with739* rmc_dequeue_next).740*741* Returns: 0 on successful queueing742* -1 when packet drop occurs743*/744int745rmc_queue_packet(struct rm_class *cl, mbuf_t *m)746{747struct timeval now;748struct rm_ifdat *ifd = cl->ifdat_;749int cpri = cl->pri_;750int is_empty = qempty(cl->q_);751752RM_GETTIME(now);753if (ifd->cutoff_ > 0) {754if (TV_LT(&cl->undertime_, &now)) {755if (ifd->cutoff_ > cl->depth_)756ifd->cutoff_ = cl->depth_;757CBQTRACE(rmc_queue_packet, 'ffoc', cl->depth_);758}759#if 1 /* ALTQ */760else {761/*762* the class is overlimit. if the class has763* underlimit ancestors, set cutoff to the lowest764* depth among them.765*/766struct rm_class *borrow = cl->borrow_;767768while (borrow != NULL &&769borrow->depth_ < ifd->cutoff_) {770if (TV_LT(&borrow->undertime_, &now)) {771ifd->cutoff_ = borrow->depth_;772CBQTRACE(rmc_queue_packet, 'ffob', ifd->cutoff_);773break;774}775borrow = borrow->borrow_;776}777}778#else /* !ALTQ */779else if ((ifd->cutoff_ > 1) && cl->borrow_) {780if (TV_LT(&cl->borrow_->undertime_, &now)) {781ifd->cutoff_ = cl->borrow_->depth_;782CBQTRACE(rmc_queue_packet, 'ffob',783cl->borrow_->depth_);784}785}786#endif /* !ALTQ */787}788789if (_rmc_addq(cl, m) < 0)790/* failed */791return (-1);792793if (is_empty) {794CBQTRACE(rmc_queue_packet, 'ytpe', cl->stats_.handle);795ifd->na_[cpri]++;796}797798if (qlen(cl->q_) > qlimit(cl->q_)) {799/* note: qlimit can be set to 0 or 1 */800rmc_drop_action(cl);801return (-1);802}803return (0);804}805806/*807* void808* rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) - Check all809* classes to see if there are satified.810*/811812static void813rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now)814{815int i;816rm_class_t *p, *bp;817818for (i = RM_MAXPRIO - 1; i >= 0; i--) {819if ((bp = ifd->active_[i]) != NULL) {820p = bp;821do {822if (!rmc_satisfied(p, now)) {823ifd->cutoff_ = p->depth_;824return;825}826p = p->peer_;827} while (p != bp);828}829}830831reset_cutoff(ifd);832}833834/*835* rmc_satisfied - Return 1 of the class is satisfied. O, otherwise.836*/837838static int839rmc_satisfied(struct rm_class *cl, struct timeval *now)840{841rm_class_t *p;842843if (cl == NULL)844return (1);845if (TV_LT(now, &cl->undertime_))846return (1);847if (cl->depth_ == 0) {848if (!cl->sleeping_ && (qlen(cl->q_) > cl->qthresh_))849return (0);850else851return (1);852}853if (cl->children_ != NULL) {854p = cl->children_;855while (p != NULL) {856if (!rmc_satisfied(p, now))857return (0);858p = p->next_;859}860}861862return (1);863}864865/*866* Return 1 if class 'cl' is under limit or can borrow from a parent,867* 0 if overlimit. As a side-effect, this routine will invoke the868* class overlimit action if the class if overlimit.869*/870871static int872rmc_under_limit(struct rm_class *cl, struct timeval *now)873{874rm_class_t *p = cl;875rm_class_t *top;876struct rm_ifdat *ifd = cl->ifdat_;877878ifd->borrowed_[ifd->qi_] = NULL;879/*880* If cl is the root class, then always return that it is881* underlimit. Otherwise, check to see if the class is underlimit.882*/883if (cl->parent_ == NULL)884return (1);885886if (cl->sleeping_) {887if (TV_LT(now, &cl->undertime_))888return (0);889890CALLOUT_STOP(&cl->callout_);891cl->sleeping_ = 0;892cl->undertime_.tv_sec = 0;893return (1);894}895896top = NULL;897while (cl->undertime_.tv_sec && TV_LT(now, &cl->undertime_)) {898if (((cl = cl->borrow_) == NULL) ||899(cl->depth_ > ifd->cutoff_)) {900#ifdef ADJUST_CUTOFF901if (cl != NULL)902/* cutoff is taking effect, just903return false without calling904the delay action. */905return (0);906#endif907#ifdef BORROW_OFFTIME908/*909* check if the class can borrow offtime too.910* borrow offtime from the top of the borrow911* chain if the top class is not overloaded.912*/913if (cl != NULL) {914/* cutoff is taking effect, use this class as top. */915top = cl;916CBQTRACE(rmc_under_limit, 'ffou', ifd->cutoff_);917}918if (top != NULL && top->avgidle_ == top->minidle_)919top = NULL;920p->overtime_ = *now;921(p->overlimit)(p, top);922#else923p->overtime_ = *now;924(p->overlimit)(p, NULL);925#endif926return (0);927}928top = cl;929}930931if (cl != p)932ifd->borrowed_[ifd->qi_] = cl;933return (1);934}935936/*937* _rmc_wrr_dequeue_next() - This is scheduler for WRR as opposed to938* Packet-by-packet round robin.939*940* The heart of the weighted round-robin scheduler, which decides which941* class next gets to send a packet. Highest priority first, then942* weighted round-robin within priorites.943*944* Each able-to-send class gets to send until its byte allocation is945* exhausted. Thus, the active pointer is only changed after a class has946* exhausted its allocation.947*948* If the scheduler finds no class that is underlimit or able to borrow,949* then the first class found that had a nonzero queue and is allowed to950* borrow gets to send.951*/952953static mbuf_t *954_rmc_wrr_dequeue_next(struct rm_ifdat *ifd, int op)955{956struct rm_class *cl = NULL, *first = NULL;957u_int deficit;958int cpri;959mbuf_t *m;960struct timeval now;961962RM_GETTIME(now);963964/*965* if the driver polls the top of the queue and then removes966* the polled packet, we must return the same packet.967*/968if (op == ALTDQ_REMOVE && ifd->pollcache_) {969cl = ifd->pollcache_;970cpri = cl->pri_;971if (ifd->efficient_) {972/* check if this class is overlimit */973if (cl->undertime_.tv_sec != 0 &&974rmc_under_limit(cl, &now) == 0)975first = cl;976}977ifd->pollcache_ = NULL;978goto _wrr_out;979}980else {981/* mode == ALTDQ_POLL || pollcache == NULL */982ifd->pollcache_ = NULL;983ifd->borrowed_[ifd->qi_] = NULL;984}985#ifdef ADJUST_CUTOFF986_again:987#endif988for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {989if (ifd->na_[cpri] == 0)990continue;991deficit = 0;992/*993* Loop through twice for a priority level, if some class994* was unable to send a packet the first round because995* of the weighted round-robin mechanism.996* During the second loop at this level, deficit==2.997* (This second loop is not needed if for every class,998* "M[cl->pri_])" times "cl->allotment" is greater than999* the byte size for the largest packet in the class.)1000*/1001_wrr_loop:1002cl = ifd->active_[cpri];1003ASSERT(cl != NULL);1004do {1005if ((deficit < 2) && (cl->bytes_alloc_ <= 0))1006cl->bytes_alloc_ += cl->w_allotment_;1007if (!qempty(cl->q_)) {1008if ((cl->undertime_.tv_sec == 0) ||1009rmc_under_limit(cl, &now)) {1010if (cl->bytes_alloc_ > 0 || deficit > 1)1011goto _wrr_out;10121013/* underlimit but no alloc */1014deficit = 1;1015#if 11016ifd->borrowed_[ifd->qi_] = NULL;1017#endif1018}1019else if (first == NULL && cl->borrow_ != NULL)1020first = cl; /* borrowing candidate */1021}10221023cl->bytes_alloc_ = 0;1024cl = cl->peer_;1025} while (cl != ifd->active_[cpri]);10261027if (deficit == 1) {1028/* first loop found an underlimit class with deficit */1029/* Loop on same priority level, with new deficit. */1030deficit = 2;1031goto _wrr_loop;1032}1033}10341035#ifdef ADJUST_CUTOFF1036/*1037* no underlimit class found. if cutoff is taking effect,1038* increase cutoff and try again.1039*/1040if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {1041ifd->cutoff_++;1042CBQTRACE(_rmc_wrr_dequeue_next, 'ojda', ifd->cutoff_);1043goto _again;1044}1045#endif /* ADJUST_CUTOFF */1046/*1047* If LINK_EFFICIENCY is turned on, then the first overlimit1048* class we encounter will send a packet if all the classes1049* of the link-sharing structure are overlimit.1050*/1051reset_cutoff(ifd);1052CBQTRACE(_rmc_wrr_dequeue_next, 'otsr', ifd->cutoff_);10531054if (!ifd->efficient_ || first == NULL)1055return (NULL);10561057cl = first;1058cpri = cl->pri_;1059#if 0 /* too time-consuming for nothing */1060if (cl->sleeping_)1061CALLOUT_STOP(&cl->callout_);1062cl->sleeping_ = 0;1063cl->undertime_.tv_sec = 0;1064#endif1065ifd->borrowed_[ifd->qi_] = cl->borrow_;1066ifd->cutoff_ = cl->borrow_->depth_;10671068/*1069* Deque the packet and do the book keeping...1070*/1071_wrr_out:1072if (op == ALTDQ_REMOVE) {1073m = _rmc_getq(cl);1074if (m == NULL)1075panic("_rmc_wrr_dequeue_next");1076if (qempty(cl->q_))1077ifd->na_[cpri]--;10781079/*1080* Update class statistics and link data.1081*/1082if (cl->bytes_alloc_ > 0)1083cl->bytes_alloc_ -= m_pktlen(m);10841085if ((cl->bytes_alloc_ <= 0) || first == cl)1086ifd->active_[cl->pri_] = cl->peer_;1087else1088ifd->active_[cl->pri_] = cl;10891090ifd->class_[ifd->qi_] = cl;1091ifd->curlen_[ifd->qi_] = m_pktlen(m);1092ifd->now_[ifd->qi_] = now;1093ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;1094ifd->queued_++;1095} else {1096/* mode == ALTDQ_PPOLL */1097m = _rmc_pollq(cl);1098ifd->pollcache_ = cl;1099}1100return (m);1101}11021103/*1104* Dequeue & return next packet from the highest priority class that1105* has a packet to send & has enough allocation to send it. This1106* routine is called by a driver whenever it needs a new packet to1107* output.1108*/1109static mbuf_t *1110_rmc_prr_dequeue_next(struct rm_ifdat *ifd, int op)1111{1112mbuf_t *m;1113int cpri;1114struct rm_class *cl, *first = NULL;1115struct timeval now;11161117RM_GETTIME(now);11181119/*1120* if the driver polls the top of the queue and then removes1121* the polled packet, we must return the same packet.1122*/1123if (op == ALTDQ_REMOVE && ifd->pollcache_) {1124cl = ifd->pollcache_;1125cpri = cl->pri_;1126ifd->pollcache_ = NULL;1127goto _prr_out;1128} else {1129/* mode == ALTDQ_POLL || pollcache == NULL */1130ifd->pollcache_ = NULL;1131ifd->borrowed_[ifd->qi_] = NULL;1132}1133#ifdef ADJUST_CUTOFF1134_again:1135#endif1136for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {1137if (ifd->na_[cpri] == 0)1138continue;1139cl = ifd->active_[cpri];1140ASSERT(cl != NULL);1141do {1142if (!qempty(cl->q_)) {1143if ((cl->undertime_.tv_sec == 0) ||1144rmc_under_limit(cl, &now))1145goto _prr_out;1146if (first == NULL && cl->borrow_ != NULL)1147first = cl;1148}1149cl = cl->peer_;1150} while (cl != ifd->active_[cpri]);1151}11521153#ifdef ADJUST_CUTOFF1154/*1155* no underlimit class found. if cutoff is taking effect, increase1156* cutoff and try again.1157*/1158if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {1159ifd->cutoff_++;1160goto _again;1161}1162#endif /* ADJUST_CUTOFF */1163/*1164* If LINK_EFFICIENCY is turned on, then the first overlimit1165* class we encounter will send a packet if all the classes1166* of the link-sharing structure are overlimit.1167*/1168reset_cutoff(ifd);1169if (!ifd->efficient_ || first == NULL)1170return (NULL);11711172cl = first;1173cpri = cl->pri_;1174#if 0 /* too time-consuming for nothing */1175if (cl->sleeping_)1176CALLOUT_STOP(&cl->callout_);1177cl->sleeping_ = 0;1178cl->undertime_.tv_sec = 0;1179#endif1180ifd->borrowed_[ifd->qi_] = cl->borrow_;1181ifd->cutoff_ = cl->borrow_->depth_;11821183/*1184* Deque the packet and do the book keeping...1185*/1186_prr_out:1187if (op == ALTDQ_REMOVE) {1188m = _rmc_getq(cl);1189if (m == NULL)1190panic("_rmc_prr_dequeue_next");1191if (qempty(cl->q_))1192ifd->na_[cpri]--;11931194ifd->active_[cpri] = cl->peer_;11951196ifd->class_[ifd->qi_] = cl;1197ifd->curlen_[ifd->qi_] = m_pktlen(m);1198ifd->now_[ifd->qi_] = now;1199ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;1200ifd->queued_++;1201} else {1202/* mode == ALTDQ_POLL */1203m = _rmc_pollq(cl);1204ifd->pollcache_ = cl;1205}1206return (m);1207}12081209/*1210* mbuf_t *1211* rmc_dequeue_next(struct rm_ifdat *ifd, struct timeval *now) - this function1212* is invoked by the packet driver to get the next packet to be1213* dequeued and output on the link. If WRR is enabled, then the1214* WRR dequeue next routine will determine the next packet to sent.1215* Otherwise, packet-by-packet round robin is invoked.1216*1217* Returns: NULL, if a packet is not available or if all1218* classes are overlimit.1219*1220* Otherwise, Pointer to the next packet.1221*/12221223mbuf_t *1224rmc_dequeue_next(struct rm_ifdat *ifd, int mode)1225{1226if (ifd->queued_ >= ifd->maxqueued_)1227return (NULL);1228else if (ifd->wrr_)1229return (_rmc_wrr_dequeue_next(ifd, mode));1230else1231return (_rmc_prr_dequeue_next(ifd, mode));1232}12331234/*1235* Update the utilization estimate for the packet that just completed.1236* The packet's class & the parent(s) of that class all get their1237* estimators updated. This routine is called by the driver's output-1238* packet-completion interrupt service routine.1239*/12401241/*1242* a macro to approximate "divide by 1000" that gives 0.000999,1243* if a value has enough effective digits.1244* (on pentium, mul takes 9 cycles but div takes 46!)1245*/1246#define NSEC_TO_USEC(t) (((t) >> 10) + ((t) >> 16) + ((t) >> 17))1247void1248rmc_update_class_util(struct rm_ifdat *ifd)1249{1250int idle, avgidle, pktlen;1251int pkt_time, tidle;1252rm_class_t *cl, *borrowed;1253rm_class_t *borrows;1254struct timeval *nowp;12551256/*1257* Get the most recent completed class.1258*/1259if ((cl = ifd->class_[ifd->qo_]) == NULL)1260return;12611262pktlen = ifd->curlen_[ifd->qo_];1263borrowed = ifd->borrowed_[ifd->qo_];1264borrows = borrowed;12651266PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);12671268/*1269* Run estimator on class and its ancestors.1270*/1271/*1272* rm_update_class_util is designed to be called when the1273* transfer is completed from a xmit complete interrupt,1274* but most drivers don't implement an upcall for that.1275* so, just use estimated completion time.1276* as a result, ifd->qi_ and ifd->qo_ are always synced.1277*/1278nowp = &ifd->now_[ifd->qo_];1279/* get pkt_time (for link) in usec */1280#if 1 /* use approximation */1281pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_;1282pkt_time = NSEC_TO_USEC(pkt_time);1283#else1284pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_ / 1000;1285#endif1286#if 1 /* ALTQ4PPP */1287if (TV_LT(nowp, &ifd->ifnow_)) {1288int iftime;12891290/*1291* make sure the estimated completion time does not go1292* too far. it can happen when the link layer supports1293* data compression or the interface speed is set to1294* a much lower value.1295*/1296TV_DELTA(&ifd->ifnow_, nowp, iftime);1297if (iftime+pkt_time < ifd->maxiftime_) {1298TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);1299} else {1300TV_ADD_DELTA(nowp, ifd->maxiftime_, &ifd->ifnow_);1301}1302} else {1303TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);1304}1305#else1306if (TV_LT(nowp, &ifd->ifnow_)) {1307TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);1308} else {1309TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);1310}1311#endif13121313while (cl != NULL) {1314TV_DELTA(&ifd->ifnow_, &cl->last_, idle);1315if (idle >= 2000000)1316/*1317* this class is idle enough, reset avgidle.1318* (TV_DELTA returns 2000000 us when delta is large.)1319*/1320cl->avgidle_ = cl->maxidle_;13211322/* get pkt_time (for class) in usec */1323#if 1 /* use approximation */1324pkt_time = pktlen * cl->ns_per_byte_;1325pkt_time = NSEC_TO_USEC(pkt_time);1326#else1327pkt_time = pktlen * cl->ns_per_byte_ / 1000;1328#endif1329idle -= pkt_time;13301331avgidle = cl->avgidle_;1332avgidle += idle - (avgidle >> RM_FILTER_GAIN);1333cl->avgidle_ = avgidle;13341335/* Are we overlimit ? */1336if (avgidle <= 0) {1337CBQTRACE(rmc_update_class_util, 'milo', cl->stats_.handle);1338#if 1 /* ALTQ */1339/*1340* need some lower bound for avgidle, otherwise1341* a borrowing class gets unbounded penalty.1342*/1343if (avgidle < cl->minidle_)1344avgidle = cl->avgidle_ = cl->minidle_;1345#endif1346/* set next idle to make avgidle 0 */1347tidle = pkt_time +1348(((1 - RM_POWER) * avgidle) >> RM_FILTER_GAIN);1349TV_ADD_DELTA(nowp, tidle, &cl->undertime_);1350++cl->stats_.over;1351} else {1352cl->avgidle_ =1353(avgidle > cl->maxidle_) ? cl->maxidle_ : avgidle;1354cl->undertime_.tv_sec = 0;1355if (cl->sleeping_) {1356CALLOUT_STOP(&cl->callout_);1357cl->sleeping_ = 0;1358}1359}13601361if (borrows != NULL) {1362if (borrows != cl)1363++cl->stats_.borrows;1364else1365borrows = NULL;1366}1367cl->last_ = ifd->ifnow_;1368cl->last_pkttime_ = pkt_time;13691370#if 11371if (cl->parent_ == NULL) {1372/* take stats of root class */1373PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);1374}1375#endif13761377cl = cl->parent_;1378}13791380/*1381* Check to see if cutoff needs to set to a new level.1382*/1383cl = ifd->class_[ifd->qo_];1384if (borrowed && (ifd->cutoff_ >= borrowed->depth_)) {1385#if 1 /* ALTQ */1386if ((qlen(cl->q_) <= 0) || TV_LT(nowp, &borrowed->undertime_)) {1387rmc_tl_satisfied(ifd, nowp);1388CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);1389} else {1390ifd->cutoff_ = borrowed->depth_;1391CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);1392}1393#else /* !ALTQ */1394if ((qlen(cl->q_) <= 1) || TV_LT(&now, &borrowed->undertime_)) {1395reset_cutoff(ifd);1396#ifdef notdef1397rmc_tl_satisfied(ifd, &now);1398#endif1399CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);1400} else {1401ifd->cutoff_ = borrowed->depth_;1402CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);1403}1404#endif /* !ALTQ */1405}14061407/*1408* Release class slot1409*/1410ifd->borrowed_[ifd->qo_] = NULL;1411ifd->class_[ifd->qo_] = NULL;1412ifd->qo_ = (ifd->qo_ + 1) % ifd->maxqueued_;1413ifd->queued_--;1414}14151416/*1417* void1418* rmc_drop_action(struct rm_class *cl) - Generic (not protocol-specific)1419* over-limit action routines. These get invoked by rmc_under_limit()1420* if a class with packets to send if over its bandwidth limit & can't1421* borrow from a parent class.1422*1423* Returns: NONE1424*/14251426static void1427rmc_drop_action(struct rm_class *cl)1428{1429struct rm_ifdat *ifd = cl->ifdat_;14301431ASSERT(qlen(cl->q_) > 0);1432_rmc_dropq(cl);1433if (qempty(cl->q_))1434ifd->na_[cl->pri_]--;1435}14361437void rmc_dropall(struct rm_class *cl)1438{1439struct rm_ifdat *ifd = cl->ifdat_;14401441if (!qempty(cl->q_)) {1442_flushq(cl->q_);14431444ifd->na_[cl->pri_]--;1445}1446}14471448static int1449hzto(struct timeval *tv)1450{1451struct timeval t2;14521453getmicrotime(&t2);1454t2.tv_sec = tv->tv_sec - t2.tv_sec;1455t2.tv_usec = tv->tv_usec - t2.tv_usec;1456return (tvtohz(&t2));1457}14581459/*1460* void1461* rmc_delay_action(struct rm_class *cl) - This function is the generic CBQ1462* delay action routine. It is invoked via rmc_under_limit when the1463* packet is discoverd to be overlimit.1464*1465* If the delay action is result of borrow class being overlimit, then1466* delay for the offtime of the borrowing class that is overlimit.1467*1468* Returns: NONE1469*/14701471void1472rmc_delay_action(struct rm_class *cl, struct rm_class *borrow)1473{1474int delay, t, extradelay;14751476cl->stats_.overactions++;1477TV_DELTA(&cl->undertime_, &cl->overtime_, delay);1478#ifndef BORROW_OFFTIME1479delay += cl->offtime_;1480#endif14811482if (!cl->sleeping_) {1483CBQTRACE(rmc_delay_action, 'yled', cl->stats_.handle);1484#ifdef BORROW_OFFTIME1485if (borrow != NULL)1486extradelay = borrow->offtime_;1487else1488#endif1489extradelay = cl->offtime_;14901491#ifdef ALTQ1492/*1493* XXX recalculate suspend time:1494* current undertime is (tidle + pkt_time) calculated1495* from the last transmission.1496* tidle: time required to bring avgidle back to 01497* pkt_time: target waiting time for this class1498* we need to replace pkt_time by offtime1499*/1500extradelay -= cl->last_pkttime_;1501#endif1502if (extradelay > 0) {1503TV_ADD_DELTA(&cl->undertime_, extradelay, &cl->undertime_);1504delay += extradelay;1505}15061507cl->sleeping_ = 1;1508cl->stats_.delays++;15091510/*1511* Since packets are phased randomly with respect to the1512* clock, 1 tick (the next clock tick) can be an arbitrarily1513* short time so we have to wait for at least two ticks.1514* NOTE: If there's no other traffic, we need the timer as1515* a 'backstop' to restart this class.1516*/1517if (delay > tick * 2) {1518/* FreeBSD rounds up the tick */1519t = hzto(&cl->undertime_);1520} else1521t = 2;1522CALLOUT_RESET(&cl->callout_, t, rmc_restart, cl);1523}1524}15251526/*1527* void1528* rmc_restart() - is just a helper routine for rmc_delay_action -- it is1529* called by the system timer code & is responsible checking if the1530* class is still sleeping (it might have been restarted as a side1531* effect of the queue scan on a packet arrival) and, if so, restarting1532* output for the class. Inspecting the class state & restarting output1533* require locking the class structure. In general the driver is1534* responsible for locking but this is the only routine that is not1535* called directly or indirectly from the interface driver so it has1536* know about system locking conventions. Under bsd, locking is done1537* by raising IPL to splimp so that's what's implemented here. On a1538* different system this would probably need to be changed.1539*1540* Returns: NONE1541*/15421543static void1544rmc_restart(void *arg)1545{1546struct rm_class *cl = arg;1547struct rm_ifdat *ifd = cl->ifdat_;1548struct epoch_tracker et;1549int s;15501551s = splnet();1552NET_EPOCH_ENTER(et);1553IFQ_LOCK(ifd->ifq_);1554CURVNET_SET(ifd->ifq_->altq_ifp->if_vnet);1555if (cl->sleeping_) {1556cl->sleeping_ = 0;1557cl->undertime_.tv_sec = 0;15581559if (ifd->queued_ < ifd->maxqueued_ && ifd->restart != NULL) {1560CBQTRACE(rmc_restart, 'trts', cl->stats_.handle);1561(ifd->restart)(ifd->ifq_);1562}1563}1564CURVNET_RESTORE();1565IFQ_UNLOCK(ifd->ifq_);1566NET_EPOCH_EXIT(et);1567splx(s);1568}15691570/*1571* void1572* rmc_root_overlimit(struct rm_class *cl) - This the generic overlimit1573* handling routine for the root class of the link sharing structure.1574*1575* Returns: NONE1576*/15771578static void1579rmc_root_overlimit(struct rm_class *cl, struct rm_class *borrow)1580{1581panic("rmc_root_overlimit");1582}15831584/*1585* Packet Queue handling routines. Eventually, this is to localize the1586* effects on the code whether queues are red queues or droptail1587* queues.1588*/15891590static int1591_rmc_addq(rm_class_t *cl, mbuf_t *m)1592{1593#ifdef ALTQ_RIO1594if (q_is_rio(cl->q_))1595return rio_addq((rio_t *)cl->red_, cl->q_, m, cl->pktattr_);1596#endif1597#ifdef ALTQ_RED1598if (q_is_red(cl->q_))1599return red_addq(cl->red_, cl->q_, m, cl->pktattr_);1600#endif /* ALTQ_RED */1601#ifdef ALTQ_CODEL1602if (q_is_codel(cl->q_))1603return codel_addq(cl->codel_, cl->q_, m);1604#endif16051606if (cl->flags_ & RMCF_CLEARDSCP)1607write_dsfield(m, cl->pktattr_, 0);16081609_addq(cl->q_, m);1610return (0);1611}16121613/* note: _rmc_dropq is not called for red */1614static void1615_rmc_dropq(rm_class_t *cl)1616{1617mbuf_t *m;16181619if ((m = _getq(cl->q_)) != NULL)1620m_freem(m);1621}16221623static mbuf_t *1624_rmc_getq(rm_class_t *cl)1625{1626#ifdef ALTQ_RIO1627if (q_is_rio(cl->q_))1628return rio_getq((rio_t *)cl->red_, cl->q_);1629#endif1630#ifdef ALTQ_RED1631if (q_is_red(cl->q_))1632return red_getq(cl->red_, cl->q_);1633#endif1634#ifdef ALTQ_CODEL1635if (q_is_codel(cl->q_))1636return codel_getq(cl->codel_, cl->q_);1637#endif1638return _getq(cl->q_);1639}16401641static mbuf_t *1642_rmc_pollq(rm_class_t *cl)1643{1644return qhead(cl->q_);1645}16461647#ifdef CBQ_TRACE16481649struct cbqtrace cbqtrace_buffer[NCBQTRACE+1];1650struct cbqtrace *cbqtrace_ptr = NULL;1651int cbqtrace_count;16521653/*1654* DDB hook to trace cbq events:1655* the last 1024 events are held in a circular buffer.1656* use "call cbqtrace_dump(N)" to display 20 events from Nth event.1657*/1658void cbqtrace_dump(int);1659static char *rmc_funcname(void *);16601661static struct rmc_funcs {1662void *func;1663char *name;1664} rmc_funcs[] =1665{1666rmc_init, "rmc_init",1667rmc_queue_packet, "rmc_queue_packet",1668rmc_under_limit, "rmc_under_limit",1669rmc_update_class_util, "rmc_update_class_util",1670rmc_delay_action, "rmc_delay_action",1671rmc_restart, "rmc_restart",1672_rmc_wrr_dequeue_next, "_rmc_wrr_dequeue_next",1673NULL, NULL1674};16751676static char *rmc_funcname(void *func)1677{1678struct rmc_funcs *fp;16791680for (fp = rmc_funcs; fp->func != NULL; fp++)1681if (fp->func == func)1682return (fp->name);1683return ("unknown");1684}16851686void cbqtrace_dump(int counter)1687{1688int i, *p;1689char *cp;16901691counter = counter % NCBQTRACE;1692p = (int *)&cbqtrace_buffer[counter];16931694for (i=0; i<20; i++) {1695printf("[0x%x] ", *p++);1696printf("%s: ", rmc_funcname((void *)*p++));1697cp = (char *)p++;1698printf("%c%c%c%c: ", cp[0], cp[1], cp[2], cp[3]);1699printf("%d\n",*p++);17001701if (p >= (int *)&cbqtrace_buffer[NCBQTRACE])1702p = (int *)cbqtrace_buffer;1703}1704}1705#endif /* CBQ_TRACE */1706#endif /* ALTQ_CBQ */17071708#if defined(ALTQ_CBQ) || defined(ALTQ_RED) || defined(ALTQ_RIO) || \1709defined(ALTQ_HFSC) || defined(ALTQ_PRIQ) || defined(ALTQ_CODEL)1710#if !defined(__GNUC__) || defined(ALTQ_DEBUG)17111712void1713_addq(class_queue_t *q, mbuf_t *m)1714{1715mbuf_t *m0;17161717if ((m0 = qtail(q)) != NULL)1718m->m_nextpkt = m0->m_nextpkt;1719else1720m0 = m;1721m0->m_nextpkt = m;1722qtail(q) = m;1723qlen(q)++;1724}17251726mbuf_t *1727_getq(class_queue_t *q)1728{1729mbuf_t *m, *m0;17301731if ((m = qtail(q)) == NULL)1732return (NULL);1733if ((m0 = m->m_nextpkt) != m)1734m->m_nextpkt = m0->m_nextpkt;1735else {1736ASSERT(qlen(q) == 1);1737qtail(q) = NULL;1738}1739qlen(q)--;1740m0->m_nextpkt = NULL;1741return (m0);1742}17431744/* drop a packet at the tail of the queue */1745mbuf_t *1746_getq_tail(class_queue_t *q)1747{1748mbuf_t *m, *m0, *prev;17491750if ((m = m0 = qtail(q)) == NULL)1751return NULL;1752do {1753prev = m0;1754m0 = m0->m_nextpkt;1755} while (m0 != m);1756prev->m_nextpkt = m->m_nextpkt;1757if (prev == m) {1758ASSERT(qlen(q) == 1);1759qtail(q) = NULL;1760} else1761qtail(q) = prev;1762qlen(q)--;1763m->m_nextpkt = NULL;1764return (m);1765}17661767/* randomly select a packet in the queue */1768mbuf_t *1769_getq_random(class_queue_t *q)1770{1771struct mbuf *m;1772int i, n;17731774if ((m = qtail(q)) == NULL)1775return NULL;1776if (m->m_nextpkt == m) {1777ASSERT(qlen(q) == 1);1778qtail(q) = NULL;1779} else {1780struct mbuf *prev = NULL;17811782n = arc4random() % qlen(q) + 1;1783for (i = 0; i < n; i++) {1784prev = m;1785m = m->m_nextpkt;1786}1787prev->m_nextpkt = m->m_nextpkt;1788if (m == qtail(q))1789qtail(q) = prev;1790}1791qlen(q)--;1792m->m_nextpkt = NULL;1793return (m);1794}17951796void1797_removeq(class_queue_t *q, mbuf_t *m)1798{1799mbuf_t *m0, *prev;18001801m0 = qtail(q);1802do {1803prev = m0;1804m0 = m0->m_nextpkt;1805} while (m0 != m);1806prev->m_nextpkt = m->m_nextpkt;1807if (prev == m)1808qtail(q) = NULL;1809else if (qtail(q) == m)1810qtail(q) = prev;1811qlen(q)--;1812}18131814void1815_flushq(class_queue_t *q)1816{1817mbuf_t *m;18181819while ((m = _getq(q)) != NULL)1820m_freem(m);1821ASSERT(qlen(q) == 0);1822}18231824#endif /* !__GNUC__ || ALTQ_DEBUG */1825#endif /* ALTQ_CBQ || ALTQ_RED || ALTQ_RIO || ALTQ_HFSC || ALTQ_PRIQ */182618271828