// SPDX-License-Identifier: GPL-2.0-or-later1/*2* Hierarchical Budget Worst-case Fair Weighted Fair Queueing3* (B-WF2Q+): hierarchical scheduling algorithm by which the BFQ I/O4* scheduler schedules generic entities. The latter can represent5* either single bfq queues (associated with processes) or groups of6* bfq queues (associated with cgroups).7*/8#include "bfq-iosched.h"910/**11* bfq_gt - compare two timestamps.12* @a: first ts.13* @b: second ts.14*15* Return @a > @b, dealing with wrapping correctly.16*/17static int bfq_gt(u64 a, u64 b)18{19return (s64)(a - b) > 0;20}2122static struct bfq_entity *bfq_root_active_entity(struct rb_root *tree)23{24struct rb_node *node = tree->rb_node;2526return rb_entry(node, struct bfq_entity, rb_node);27}2829static unsigned int bfq_class_idx(struct bfq_entity *entity)30{31struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);3233return bfqq ? bfqq->ioprio_class - 1 :34BFQ_DEFAULT_GRP_CLASS - 1;35}3637unsigned int bfq_tot_busy_queues(struct bfq_data *bfqd)38{39return bfqd->busy_queues[0] + bfqd->busy_queues[1] +40bfqd->busy_queues[2];41}4243static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,44bool expiration);4546static bool bfq_update_parent_budget(struct bfq_entity *next_in_service);4748/**49* bfq_update_next_in_service - update sd->next_in_service50* @sd: sched_data for which to perform the update.51* @new_entity: if not NULL, pointer to the entity whose activation,52* requeueing or repositioning triggered the invocation of53* this function.54* @expiration: id true, this function is being invoked after the55* expiration of the in-service entity56*57* This function is called to update sd->next_in_service, which, in58* its turn, may change as a consequence of the insertion or59* extraction of an entity into/from one of the active trees of60* sd. These insertions/extractions occur as a consequence of61* activations/deactivations of entities, with some activations being62* 'true' activations, and other activations being requeueings (i.e.,63* implementing the second, requeueing phase of the mechanism used to64* reposition an entity in its active tree; see comments on65* __bfq_activate_entity and __bfq_requeue_entity for details). In66* both the last two activation sub-cases, new_entity points to the67* just activated or requeued entity.68*69* Returns true if sd->next_in_service changes in such a way that70* entity->parent may become the next_in_service for its parent71* entity.72*/73static bool bfq_update_next_in_service(struct bfq_sched_data *sd,74struct bfq_entity *new_entity,75bool expiration)76{77struct bfq_entity *next_in_service = sd->next_in_service;78bool parent_sched_may_change = false;79bool change_without_lookup = false;8081/*82* If this update is triggered by the activation, requeueing83* or repositioning of an entity that does not coincide with84* sd->next_in_service, then a full lookup in the active tree85* can be avoided. In fact, it is enough to check whether the86* just-modified entity has the same priority as87* sd->next_in_service, is eligible and has a lower virtual88* finish time than sd->next_in_service. If this compound89* condition holds, then the new entity becomes the new90* next_in_service. Otherwise no change is needed.91*/92if (new_entity && new_entity != sd->next_in_service) {93/*94* Flag used to decide whether to replace95* sd->next_in_service with new_entity. Tentatively96* set to true, and left as true if97* sd->next_in_service is NULL.98*/99change_without_lookup = true;100101/*102* If there is already a next_in_service candidate103* entity, then compare timestamps to decide whether104* to replace sd->service_tree with new_entity.105*/106if (next_in_service) {107unsigned int new_entity_class_idx =108bfq_class_idx(new_entity);109struct bfq_service_tree *st =110sd->service_tree + new_entity_class_idx;111112change_without_lookup =113(new_entity_class_idx ==114bfq_class_idx(next_in_service)115&&116!bfq_gt(new_entity->start, st->vtime)117&&118bfq_gt(next_in_service->finish,119new_entity->finish));120}121122if (change_without_lookup)123next_in_service = new_entity;124}125126if (!change_without_lookup) /* lookup needed */127next_in_service = bfq_lookup_next_entity(sd, expiration);128129if (next_in_service) {130bool new_budget_triggers_change =131bfq_update_parent_budget(next_in_service);132133parent_sched_may_change = !sd->next_in_service ||134new_budget_triggers_change;135}136137sd->next_in_service = next_in_service;138139return parent_sched_may_change;140}141142#ifdef CONFIG_BFQ_GROUP_IOSCHED143144/*145* Returns true if this budget changes may let next_in_service->parent146* become the next_in_service entity for its parent entity.147*/148static bool bfq_update_parent_budget(struct bfq_entity *next_in_service)149{150struct bfq_entity *bfqg_entity;151struct bfq_group *bfqg;152struct bfq_sched_data *group_sd;153bool ret = false;154155group_sd = next_in_service->sched_data;156157bfqg = container_of(group_sd, struct bfq_group, sched_data);158/*159* bfq_group's my_entity field is not NULL only if the group160* is not the root group. We must not touch the root entity161* as it must never become an in-service entity.162*/163bfqg_entity = bfqg->my_entity;164if (bfqg_entity) {165if (bfqg_entity->budget > next_in_service->budget)166ret = true;167bfqg_entity->budget = next_in_service->budget;168}169170return ret;171}172173/*174* This function tells whether entity stops being a candidate for next175* service, according to the restrictive definition of the field176* next_in_service. In particular, this function is invoked for an177* entity that is about to be set in service.178*179* If entity is a queue, then the entity is no longer a candidate for180* next service according to the that definition, because entity is181* about to become the in-service queue. This function then returns182* true if entity is a queue.183*184* In contrast, entity could still be a candidate for next service if185* it is not a queue, and has more than one active child. In fact,186* even if one of its children is about to be set in service, other187* active children may still be the next to serve, for the parent188* entity, even according to the above definition. As a consequence, a189* non-queue entity is not a candidate for next-service only if it has190* only one active child. And only if this condition holds, then this191* function returns true for a non-queue entity.192*/193static bool bfq_no_longer_next_in_service(struct bfq_entity *entity)194{195struct bfq_group *bfqg;196197if (bfq_entity_to_bfqq(entity))198return true;199200bfqg = container_of(entity, struct bfq_group, entity);201202/*203* The field active_entities does not always contain the204* actual number of active children entities: it happens to205* not account for the in-service entity in case the latter is206* removed from its active tree (which may get done after207* invoking the function bfq_no_longer_next_in_service in208* bfq_get_next_queue). Fortunately, here, i.e., while209* bfq_no_longer_next_in_service is not yet completed in210* bfq_get_next_queue, bfq_active_extract has not yet been211* invoked, and thus active_entities still coincides with the212* actual number of active entities.213*/214if (bfqg->active_entities == 1)215return true;216217return false;218}219220static void bfq_inc_active_entities(struct bfq_entity *entity)221{222struct bfq_sched_data *sd = entity->sched_data;223struct bfq_group *bfqg = container_of(sd, struct bfq_group, sched_data);224225if (bfqg != bfqg->bfqd->root_group)226bfqg->active_entities++;227}228229static void bfq_dec_active_entities(struct bfq_entity *entity)230{231struct bfq_sched_data *sd = entity->sched_data;232struct bfq_group *bfqg = container_of(sd, struct bfq_group, sched_data);233234if (bfqg != bfqg->bfqd->root_group)235bfqg->active_entities--;236}237238#else /* CONFIG_BFQ_GROUP_IOSCHED */239240static bool bfq_update_parent_budget(struct bfq_entity *next_in_service)241{242return false;243}244245static bool bfq_no_longer_next_in_service(struct bfq_entity *entity)246{247return true;248}249250static void bfq_inc_active_entities(struct bfq_entity *entity)251{252}253254static void bfq_dec_active_entities(struct bfq_entity *entity)255{256}257258#endif /* CONFIG_BFQ_GROUP_IOSCHED */259260/*261* Shift for timestamp calculations. This actually limits the maximum262* service allowed in one timestamp delta (small shift values increase it),263* the maximum total weight that can be used for the queues in the system264* (big shift values increase it), and the period of virtual time265* wraparounds.266*/267#define WFQ_SERVICE_SHIFT 22268269struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity)270{271struct bfq_queue *bfqq = NULL;272273if (!entity->my_sched_data)274bfqq = container_of(entity, struct bfq_queue, entity);275276return bfqq;277}278279280/**281* bfq_delta - map service into the virtual time domain.282* @service: amount of service.283* @weight: scale factor (weight of an entity or weight sum).284*/285static u64 bfq_delta(unsigned long service, unsigned long weight)286{287return div64_ul((u64)service << WFQ_SERVICE_SHIFT, weight);288}289290/**291* bfq_calc_finish - assign the finish time to an entity.292* @entity: the entity to act upon.293* @service: the service to be charged to the entity.294*/295static void bfq_calc_finish(struct bfq_entity *entity, unsigned long service)296{297struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);298299entity->finish = entity->start +300bfq_delta(service, entity->weight);301302if (bfqq) {303bfq_log_bfqq(bfqq->bfqd, bfqq,304"calc_finish: serv %lu, w %d",305service, entity->weight);306bfq_log_bfqq(bfqq->bfqd, bfqq,307"calc_finish: start %llu, finish %llu, delta %llu",308entity->start, entity->finish,309bfq_delta(service, entity->weight));310}311}312313/**314* bfq_entity_of - get an entity from a node.315* @node: the node field of the entity.316*317* Convert a node pointer to the relative entity. This is used only318* to simplify the logic of some functions and not as the generic319* conversion mechanism because, e.g., in the tree walking functions,320* the check for a %NULL value would be redundant.321*/322struct bfq_entity *bfq_entity_of(struct rb_node *node)323{324struct bfq_entity *entity = NULL;325326if (node)327entity = rb_entry(node, struct bfq_entity, rb_node);328329return entity;330}331332/**333* bfq_extract - remove an entity from a tree.334* @root: the tree root.335* @entity: the entity to remove.336*/337static void bfq_extract(struct rb_root *root, struct bfq_entity *entity)338{339entity->tree = NULL;340rb_erase(&entity->rb_node, root);341}342343/**344* bfq_idle_extract - extract an entity from the idle tree.345* @st: the service tree of the owning @entity.346* @entity: the entity being removed.347*/348static void bfq_idle_extract(struct bfq_service_tree *st,349struct bfq_entity *entity)350{351struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);352struct rb_node *next;353354if (entity == st->first_idle) {355next = rb_next(&entity->rb_node);356st->first_idle = bfq_entity_of(next);357}358359if (entity == st->last_idle) {360next = rb_prev(&entity->rb_node);361st->last_idle = bfq_entity_of(next);362}363364bfq_extract(&st->idle, entity);365366if (bfqq)367list_del(&bfqq->bfqq_list);368}369370/**371* bfq_insert - generic tree insertion.372* @root: tree root.373* @entity: entity to insert.374*375* This is used for the idle and the active tree, since they are both376* ordered by finish time.377*/378static void bfq_insert(struct rb_root *root, struct bfq_entity *entity)379{380struct bfq_entity *entry;381struct rb_node **node = &root->rb_node;382struct rb_node *parent = NULL;383384while (*node) {385parent = *node;386entry = rb_entry(parent, struct bfq_entity, rb_node);387388if (bfq_gt(entry->finish, entity->finish))389node = &parent->rb_left;390else391node = &parent->rb_right;392}393394rb_link_node(&entity->rb_node, parent, node);395rb_insert_color(&entity->rb_node, root);396397entity->tree = root;398}399400/**401* bfq_update_min - update the min_start field of a entity.402* @entity: the entity to update.403* @node: one of its children.404*405* This function is called when @entity may store an invalid value for406* min_start due to updates to the active tree. The function assumes407* that the subtree rooted at @node (which may be its left or its right408* child) has a valid min_start value.409*/410static void bfq_update_min(struct bfq_entity *entity, struct rb_node *node)411{412struct bfq_entity *child;413414if (node) {415child = rb_entry(node, struct bfq_entity, rb_node);416if (bfq_gt(entity->min_start, child->min_start))417entity->min_start = child->min_start;418}419}420421/**422* bfq_update_active_node - recalculate min_start.423* @node: the node to update.424*425* @node may have changed position or one of its children may have moved,426* this function updates its min_start value. The left and right subtrees427* are assumed to hold a correct min_start value.428*/429static void bfq_update_active_node(struct rb_node *node)430{431struct bfq_entity *entity = rb_entry(node, struct bfq_entity, rb_node);432433entity->min_start = entity->start;434bfq_update_min(entity, node->rb_right);435bfq_update_min(entity, node->rb_left);436}437438/**439* bfq_update_active_tree - update min_start for the whole active tree.440* @node: the starting node.441*442* @node must be the deepest modified node after an update. This function443* updates its min_start using the values held by its children, assuming444* that they did not change, and then updates all the nodes that may have445* changed in the path to the root. The only nodes that may have changed446* are the ones in the path or their siblings.447*/448static void bfq_update_active_tree(struct rb_node *node)449{450struct rb_node *parent;451452up:453bfq_update_active_node(node);454455parent = rb_parent(node);456if (!parent)457return;458459if (node == parent->rb_left && parent->rb_right)460bfq_update_active_node(parent->rb_right);461else if (parent->rb_left)462bfq_update_active_node(parent->rb_left);463464node = parent;465goto up;466}467468/**469* bfq_active_insert - insert an entity in the active tree of its470* group/device.471* @st: the service tree of the entity.472* @entity: the entity being inserted.473*474* The active tree is ordered by finish time, but an extra key is kept475* per each node, containing the minimum value for the start times of476* its children (and the node itself), so it's possible to search for477* the eligible node with the lowest finish time in logarithmic time.478*/479static void bfq_active_insert(struct bfq_service_tree *st,480struct bfq_entity *entity)481{482struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);483struct rb_node *node = &entity->rb_node;484485bfq_insert(&st->active, entity);486487if (node->rb_left)488node = node->rb_left;489else if (node->rb_right)490node = node->rb_right;491492bfq_update_active_tree(node);493494if (bfqq)495list_add(&bfqq->bfqq_list, &bfqq->bfqd->active_list[bfqq->actuator_idx]);496497bfq_inc_active_entities(entity);498}499500/**501* bfq_ioprio_to_weight - calc a weight from an ioprio.502* @ioprio: the ioprio value to convert.503*/504unsigned short bfq_ioprio_to_weight(int ioprio)505{506return (IOPRIO_NR_LEVELS - ioprio) * BFQ_WEIGHT_CONVERSION_COEFF;507}508509/**510* bfq_weight_to_ioprio - calc an ioprio from a weight.511* @weight: the weight value to convert.512*513* To preserve as much as possible the old only-ioprio user interface,514* 0 is used as an escape ioprio value for weights (numerically) equal or515* larger than IOPRIO_NR_LEVELS * BFQ_WEIGHT_CONVERSION_COEFF.516*/517static unsigned short bfq_weight_to_ioprio(int weight)518{519return max_t(int, 0,520IOPRIO_NR_LEVELS - weight / BFQ_WEIGHT_CONVERSION_COEFF);521}522523static void bfq_get_entity(struct bfq_entity *entity)524{525struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);526527if (bfqq) {528bfqq->ref++;529bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d",530bfqq, bfqq->ref);531}532}533534/**535* bfq_find_deepest - find the deepest node that an extraction can modify.536* @node: the node being removed.537*538* Do the first step of an extraction in an rb tree, looking for the539* node that will replace @node, and returning the deepest node that540* the following modifications to the tree can touch. If @node is the541* last node in the tree return %NULL.542*/543static struct rb_node *bfq_find_deepest(struct rb_node *node)544{545struct rb_node *deepest;546547if (!node->rb_right && !node->rb_left)548deepest = rb_parent(node);549else if (!node->rb_right)550deepest = node->rb_left;551else if (!node->rb_left)552deepest = node->rb_right;553else {554deepest = rb_next(node);555if (deepest->rb_right)556deepest = deepest->rb_right;557else if (rb_parent(deepest) != node)558deepest = rb_parent(deepest);559}560561return deepest;562}563564/**565* bfq_active_extract - remove an entity from the active tree.566* @st: the service_tree containing the tree.567* @entity: the entity being removed.568*/569static void bfq_active_extract(struct bfq_service_tree *st,570struct bfq_entity *entity)571{572struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);573struct rb_node *node;574575node = bfq_find_deepest(&entity->rb_node);576bfq_extract(&st->active, entity);577578if (node)579bfq_update_active_tree(node);580if (bfqq)581list_del(&bfqq->bfqq_list);582583bfq_dec_active_entities(entity);584}585586/**587* bfq_idle_insert - insert an entity into the idle tree.588* @st: the service tree containing the tree.589* @entity: the entity to insert.590*/591static void bfq_idle_insert(struct bfq_service_tree *st,592struct bfq_entity *entity)593{594struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);595struct bfq_entity *first_idle = st->first_idle;596struct bfq_entity *last_idle = st->last_idle;597598if (!first_idle || bfq_gt(first_idle->finish, entity->finish))599st->first_idle = entity;600if (!last_idle || bfq_gt(entity->finish, last_idle->finish))601st->last_idle = entity;602603bfq_insert(&st->idle, entity);604605if (bfqq)606list_add(&bfqq->bfqq_list, &bfqq->bfqd->idle_list);607}608609/**610* bfq_forget_entity - do not consider entity any longer for scheduling611* @st: the service tree.612* @entity: the entity being removed.613* @is_in_service: true if entity is currently the in-service entity.614*615* Forget everything about @entity. In addition, if entity represents616* a queue, and the latter is not in service, then release the service617* reference to the queue (the one taken through bfq_get_entity). In618* fact, in this case, there is really no more service reference to619* the queue, as the latter is also outside any service tree. If,620* instead, the queue is in service, then __bfq_bfqd_reset_in_service621* will take care of putting the reference when the queue finally622* stops being served.623*/624static void bfq_forget_entity(struct bfq_service_tree *st,625struct bfq_entity *entity,626bool is_in_service)627{628struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);629630entity->on_st_or_in_serv = false;631st->wsum -= entity->weight;632if (bfqq && !is_in_service)633bfq_put_queue(bfqq);634}635636/**637* bfq_put_idle_entity - release the idle tree ref of an entity.638* @st: service tree for the entity.639* @entity: the entity being released.640*/641void bfq_put_idle_entity(struct bfq_service_tree *st, struct bfq_entity *entity)642{643bfq_idle_extract(st, entity);644bfq_forget_entity(st, entity,645entity == entity->sched_data->in_service_entity);646}647648/**649* bfq_forget_idle - update the idle tree if necessary.650* @st: the service tree to act upon.651*652* To preserve the global O(log N) complexity we only remove one entry here;653* as the idle tree will not grow indefinitely this can be done safely.654*/655static void bfq_forget_idle(struct bfq_service_tree *st)656{657struct bfq_entity *first_idle = st->first_idle;658struct bfq_entity *last_idle = st->last_idle;659660if (RB_EMPTY_ROOT(&st->active) && last_idle &&661!bfq_gt(last_idle->finish, st->vtime)) {662/*663* Forget the whole idle tree, increasing the vtime past664* the last finish time of idle entities.665*/666st->vtime = last_idle->finish;667}668669if (first_idle && !bfq_gt(first_idle->finish, st->vtime))670bfq_put_idle_entity(st, first_idle);671}672673struct bfq_service_tree *bfq_entity_service_tree(struct bfq_entity *entity)674{675struct bfq_sched_data *sched_data = entity->sched_data;676unsigned int idx = bfq_class_idx(entity);677678return sched_data->service_tree + idx;679}680681/*682* Update weight and priority of entity. If update_class_too is true,683* then update the ioprio_class of entity too.684*685* The reason why the update of ioprio_class is controlled through the686* last parameter is as follows. Changing the ioprio class of an687* entity implies changing the destination service trees for that688* entity. If such a change occurred when the entity is already on one689* of the service trees for its previous class, then the state of the690* entity would become more complex: none of the new possible service691* trees for the entity, according to bfq_entity_service_tree(), would692* match any of the possible service trees on which the entity693* is. Complex operations involving these trees, such as entity694* activations and deactivations, should take into account this695* additional complexity. To avoid this issue, this function is696* invoked with update_class_too unset in the points in the code where697* entity may happen to be on some tree.698*/699struct bfq_service_tree *700__bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,701struct bfq_entity *entity,702bool update_class_too)703{704struct bfq_service_tree *new_st = old_st;705706if (entity->prio_changed) {707struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);708unsigned int prev_weight, new_weight;709710/* Matches the smp_wmb() in bfq_group_set_weight. */711smp_rmb();712old_st->wsum -= entity->weight;713714if (entity->new_weight != entity->orig_weight) {715if (entity->new_weight < BFQ_MIN_WEIGHT ||716entity->new_weight > BFQ_MAX_WEIGHT) {717pr_crit("update_weight_prio: new_weight %d\n",718entity->new_weight);719if (entity->new_weight < BFQ_MIN_WEIGHT)720entity->new_weight = BFQ_MIN_WEIGHT;721else722entity->new_weight = BFQ_MAX_WEIGHT;723}724entity->orig_weight = entity->new_weight;725if (bfqq)726bfqq->ioprio =727bfq_weight_to_ioprio(entity->orig_weight);728}729730if (bfqq && update_class_too)731bfqq->ioprio_class = bfqq->new_ioprio_class;732733/*734* Reset prio_changed only if the ioprio_class change735* is not pending any longer.736*/737if (!bfqq || bfqq->ioprio_class == bfqq->new_ioprio_class)738entity->prio_changed = 0;739740/*741* NOTE: here we may be changing the weight too early,742* this will cause unfairness. The correct approach743* would have required additional complexity to defer744* weight changes to the proper time instants (i.e.,745* when entity->finish <= old_st->vtime).746*/747new_st = bfq_entity_service_tree(entity);748749prev_weight = entity->weight;750new_weight = entity->orig_weight *751(bfqq ? bfqq->wr_coeff : 1);752/*753* If the weight of the entity changes, and the entity is a754* queue, remove the entity from its old weight counter (if755* there is a counter associated with the entity).756*/757if (prev_weight != new_weight && bfqq)758bfq_weights_tree_remove(bfqq);759entity->weight = new_weight;760/*761* Add the entity, if it is not a weight-raised queue,762* to the counter associated with its new weight.763*/764if (prev_weight != new_weight && bfqq && bfqq->wr_coeff == 1)765bfq_weights_tree_add(bfqq);766767new_st->wsum += entity->weight;768769if (new_st != old_st)770entity->start = new_st->vtime;771}772773return new_st;774}775776/**777* bfq_bfqq_served - update the scheduler status after selection for778* service.779* @bfqq: the queue being served.780* @served: bytes to transfer.781*782* NOTE: this can be optimized, as the timestamps of upper level entities783* are synchronized every time a new bfqq is selected for service. By now,784* we keep it to better check consistency.785*/786void bfq_bfqq_served(struct bfq_queue *bfqq, int served)787{788struct bfq_entity *entity = &bfqq->entity;789struct bfq_service_tree *st;790791if (!bfqq->service_from_backlogged)792bfqq->first_IO_time = jiffies;793794if (bfqq->wr_coeff > 1)795bfqq->service_from_wr += served;796797bfqq->service_from_backlogged += served;798for_each_entity(entity) {799st = bfq_entity_service_tree(entity);800801entity->service += served;802803st->vtime += bfq_delta(served, st->wsum);804bfq_forget_idle(st);805}806bfq_log_bfqq(bfqq->bfqd, bfqq, "bfqq_served %d secs", served);807}808809/**810* bfq_bfqq_charge_time - charge an amount of service equivalent to the length811* of the time interval during which bfqq has been in812* service.813* @bfqd: the device814* @bfqq: the queue that needs a service update.815* @time_ms: the amount of time during which the queue has received service816*817* If a queue does not consume its budget fast enough, then providing818* the queue with service fairness may impair throughput, more or less819* severely. For this reason, queues that consume their budget slowly820* are provided with time fairness instead of service fairness. This821* goal is achieved through the BFQ scheduling engine, even if such an822* engine works in the service, and not in the time domain. The trick823* is charging these queues with an inflated amount of service, equal824* to the amount of service that they would have received during their825* service slot if they had been fast, i.e., if their requests had826* been dispatched at a rate equal to the estimated peak rate.827*828* It is worth noting that time fairness can cause important829* distortions in terms of bandwidth distribution, on devices with830* internal queueing. The reason is that I/O requests dispatched831* during the service slot of a queue may be served after that service832* slot is finished, and may have a total processing time loosely833* correlated with the duration of the service slot. This is834* especially true for short service slots.835*/836void bfq_bfqq_charge_time(struct bfq_data *bfqd, struct bfq_queue *bfqq,837unsigned long time_ms)838{839struct bfq_entity *entity = &bfqq->entity;840unsigned long timeout_ms = jiffies_to_msecs(bfq_timeout);841unsigned long bounded_time_ms = min(time_ms, timeout_ms);842int serv_to_charge_for_time =843(bfqd->bfq_max_budget * bounded_time_ms) / timeout_ms;844int tot_serv_to_charge = max(serv_to_charge_for_time, entity->service);845846/* Increase budget to avoid inconsistencies */847if (tot_serv_to_charge > entity->budget)848entity->budget = tot_serv_to_charge;849850bfq_bfqq_served(bfqq,851max_t(int, 0, tot_serv_to_charge - entity->service));852}853854static void bfq_update_fin_time_enqueue(struct bfq_entity *entity,855struct bfq_service_tree *st,856bool backshifted)857{858struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);859860/*861* When this function is invoked, entity is not in any service862* tree, then it is safe to invoke next function with the last863* parameter set (see the comments on the function).864*/865st = __bfq_entity_update_weight_prio(st, entity, true);866bfq_calc_finish(entity, entity->budget);867868/*869* If some queues enjoy backshifting for a while, then their870* (virtual) finish timestamps may happen to become lower and871* lower than the system virtual time. In particular, if872* these queues often happen to be idle for short time873* periods, and during such time periods other queues with874* higher timestamps happen to be busy, then the backshifted875* timestamps of the former queues can become much lower than876* the system virtual time. In fact, to serve the queues with877* higher timestamps while the ones with lower timestamps are878* idle, the system virtual time may be pushed-up to much879* higher values than the finish timestamps of the idle880* queues. As a consequence, the finish timestamps of all new881* or newly activated queues may end up being much larger than882* those of lucky queues with backshifted timestamps. The883* latter queues may then monopolize the device for a lot of884* time. This would simply break service guarantees.885*886* To reduce this problem, push up a little bit the887* backshifted timestamps of the queue associated with this888* entity (only a queue can happen to have the backshifted889* flag set): just enough to let the finish timestamp of the890* queue be equal to the current value of the system virtual891* time. This may introduce a little unfairness among queues892* with backshifted timestamps, but it does not break893* worst-case fairness guarantees.894*895* As a special case, if bfqq is weight-raised, push up896* timestamps much less, to keep very low the probability that897* this push up causes the backshifted finish timestamps of898* weight-raised queues to become higher than the backshifted899* finish timestamps of non weight-raised queues.900*/901if (backshifted && bfq_gt(st->vtime, entity->finish)) {902unsigned long delta = st->vtime - entity->finish;903904if (bfqq)905delta /= bfqq->wr_coeff;906907entity->start += delta;908entity->finish += delta;909}910911bfq_active_insert(st, entity);912}913914/**915* __bfq_activate_entity - handle activation of entity.916* @entity: the entity being activated.917* @non_blocking_wait_rq: true if entity was waiting for a request918*919* Called for a 'true' activation, i.e., if entity is not active and920* one of its children receives a new request.921*922* Basically, this function updates the timestamps of entity and923* inserts entity into its active tree, after possibly extracting it924* from its idle tree.925*/926static void __bfq_activate_entity(struct bfq_entity *entity,927bool non_blocking_wait_rq)928{929struct bfq_service_tree *st = bfq_entity_service_tree(entity);930bool backshifted = false;931unsigned long long min_vstart;932933/* See comments on bfq_fqq_update_budg_for_activation */934if (non_blocking_wait_rq && bfq_gt(st->vtime, entity->finish)) {935backshifted = true;936min_vstart = entity->finish;937} else938min_vstart = st->vtime;939940if (entity->tree == &st->idle) {941/*942* Must be on the idle tree, bfq_idle_extract() will943* check for that.944*/945bfq_idle_extract(st, entity);946entity->start = bfq_gt(min_vstart, entity->finish) ?947min_vstart : entity->finish;948} else {949/*950* The finish time of the entity may be invalid, and951* it is in the past for sure, otherwise the queue952* would have been on the idle tree.953*/954entity->start = min_vstart;955st->wsum += entity->weight;956/*957* entity is about to be inserted into a service tree,958* and then set in service: get a reference to make959* sure entity does not disappear until it is no960* longer in service or scheduled for service.961*/962bfq_get_entity(entity);963964entity->on_st_or_in_serv = true;965}966967bfq_update_fin_time_enqueue(entity, st, backshifted);968}969970/**971* __bfq_requeue_entity - handle requeueing or repositioning of an entity.972* @entity: the entity being requeued or repositioned.973*974* Requeueing is needed if this entity stops being served, which975* happens if a leaf descendant entity has expired. On the other hand,976* repositioning is needed if the next_inservice_entity for the child977* entity has changed. See the comments inside the function for978* details.979*980* Basically, this function: 1) removes entity from its active tree if981* present there, 2) updates the timestamps of entity and 3) inserts982* entity back into its active tree (in the new, right position for983* the new values of the timestamps).984*/985static void __bfq_requeue_entity(struct bfq_entity *entity)986{987struct bfq_sched_data *sd = entity->sched_data;988struct bfq_service_tree *st = bfq_entity_service_tree(entity);989990if (entity == sd->in_service_entity) {991/*992* We are requeueing the current in-service entity,993* which may have to be done for one of the following994* reasons:995* - entity represents the in-service queue, and the996* in-service queue is being requeued after an997* expiration;998* - entity represents a group, and its budget has999* changed because one of its child entities has1000* just been either activated or requeued for some1001* reason; the timestamps of the entity need then to1002* be updated, and the entity needs to be enqueued1003* or repositioned accordingly.1004*1005* In particular, before requeueing, the start time of1006* the entity must be moved forward to account for the1007* service that the entity has received while in1008* service. This is done by the next instructions. The1009* finish time will then be updated according to this1010* new value of the start time, and to the budget of1011* the entity.1012*/1013bfq_calc_finish(entity, entity->service);1014entity->start = entity->finish;1015/*1016* In addition, if the entity had more than one child1017* when set in service, then it was not extracted from1018* the active tree. This implies that the position of1019* the entity in the active tree may need to be1020* changed now, because we have just updated the start1021* time of the entity, and we will update its finish1022* time in a moment (the requeueing is then, more1023* precisely, a repositioning in this case). To1024* implement this repositioning, we: 1) dequeue the1025* entity here, 2) update the finish time and requeue1026* the entity according to the new timestamps below.1027*/1028if (entity->tree)1029bfq_active_extract(st, entity);1030} else { /* The entity is already active, and not in service */1031/*1032* In this case, this function gets called only if the1033* next_in_service entity below this entity has1034* changed, and this change has caused the budget of1035* this entity to change, which, finally implies that1036* the finish time of this entity must be1037* updated. Such an update may cause the scheduling,1038* i.e., the position in the active tree, of this1039* entity to change. We handle this change by: 1)1040* dequeueing the entity here, 2) updating the finish1041* time and requeueing the entity according to the new1042* timestamps below. This is the same approach as the1043* non-extracted-entity sub-case above.1044*/1045bfq_active_extract(st, entity);1046}10471048bfq_update_fin_time_enqueue(entity, st, false);1049}10501051static void __bfq_activate_requeue_entity(struct bfq_entity *entity,1052bool non_blocking_wait_rq)1053{1054struct bfq_service_tree *st = bfq_entity_service_tree(entity);10551056if (entity->sched_data->in_service_entity == entity ||1057entity->tree == &st->active)1058/*1059* in service or already queued on the active tree,1060* requeue or reposition1061*/1062__bfq_requeue_entity(entity);1063else1064/*1065* Not in service and not queued on its active tree:1066* the activity is idle and this is a true activation.1067*/1068__bfq_activate_entity(entity, non_blocking_wait_rq);1069}107010711072/**1073* bfq_activate_requeue_entity - activate or requeue an entity representing a1074* bfq_queue, and activate, requeue or reposition1075* all ancestors for which such an update becomes1076* necessary.1077* @entity: the entity to activate.1078* @non_blocking_wait_rq: true if this entity was waiting for a request1079* @requeue: true if this is a requeue, which implies that bfqq is1080* being expired; thus ALL its ancestors stop being served and must1081* therefore be requeued1082* @expiration: true if this function is being invoked in the expiration path1083* of the in-service queue1084*/1085static void bfq_activate_requeue_entity(struct bfq_entity *entity,1086bool non_blocking_wait_rq,1087bool requeue, bool expiration)1088{1089for_each_entity(entity) {1090__bfq_activate_requeue_entity(entity, non_blocking_wait_rq);1091if (!bfq_update_next_in_service(entity->sched_data, entity,1092expiration) && !requeue)1093break;1094}1095}10961097/**1098* __bfq_deactivate_entity - update sched_data and service trees for1099* entity, so as to represent entity as inactive1100* @entity: the entity being deactivated.1101* @ins_into_idle_tree: if false, the entity will not be put into the1102* idle tree.1103*1104* If necessary and allowed, puts entity into the idle tree. NOTE:1105* entity may be on no tree if in service.1106*/1107bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree)1108{1109struct bfq_sched_data *sd = entity->sched_data;1110struct bfq_service_tree *st;1111bool is_in_service;11121113if (!entity->on_st_or_in_serv) /*1114* entity never activated, or1115* already inactive1116*/1117return false;11181119/*1120* If we get here, then entity is active, which implies that1121* bfq_group_set_parent has already been invoked for the group1122* represented by entity. Therefore, the field1123* entity->sched_data has been set, and we can safely use it.1124*/1125st = bfq_entity_service_tree(entity);1126is_in_service = entity == sd->in_service_entity;11271128bfq_calc_finish(entity, entity->service);11291130if (is_in_service)1131sd->in_service_entity = NULL;1132else1133/*1134* Non in-service entity: nobody will take care of1135* resetting its service counter on expiration. Do it1136* now.1137*/1138entity->service = 0;11391140if (entity->tree == &st->active)1141bfq_active_extract(st, entity);1142else if (!is_in_service && entity->tree == &st->idle)1143bfq_idle_extract(st, entity);11441145if (!ins_into_idle_tree || !bfq_gt(entity->finish, st->vtime))1146bfq_forget_entity(st, entity, is_in_service);1147else1148bfq_idle_insert(st, entity);11491150return true;1151}11521153/**1154* bfq_deactivate_entity - deactivate an entity representing a bfq_queue.1155* @entity: the entity to deactivate.1156* @ins_into_idle_tree: true if the entity can be put into the idle tree1157* @expiration: true if this function is being invoked in the expiration path1158* of the in-service queue1159*/1160static void bfq_deactivate_entity(struct bfq_entity *entity,1161bool ins_into_idle_tree,1162bool expiration)1163{1164struct bfq_sched_data *sd;1165struct bfq_entity *parent = NULL;11661167for_each_entity_safe(entity, parent) {1168sd = entity->sched_data;11691170if (!__bfq_deactivate_entity(entity, ins_into_idle_tree)) {1171/*1172* entity is not in any tree any more, so1173* this deactivation is a no-op, and there is1174* nothing to change for upper-level entities1175* (in case of expiration, this can never1176* happen).1177*/1178return;1179}11801181if (sd->next_in_service == entity)1182/*1183* entity was the next_in_service entity,1184* then, since entity has just been1185* deactivated, a new one must be found.1186*/1187bfq_update_next_in_service(sd, NULL, expiration);11881189if (sd->next_in_service || sd->in_service_entity) {1190/*1191* The parent entity is still active, because1192* either next_in_service or in_service_entity1193* is not NULL. So, no further upwards1194* deactivation must be performed. Yet,1195* next_in_service has changed. Then the1196* schedule does need to be updated upwards.1197*1198* NOTE If in_service_entity is not NULL, then1199* next_in_service may happen to be NULL,1200* although the parent entity is evidently1201* active. This happens if 1) the entity1202* pointed by in_service_entity is the only1203* active entity in the parent entity, and 2)1204* according to the definition of1205* next_in_service, the in_service_entity1206* cannot be considered as1207* next_in_service. See the comments on the1208* definition of next_in_service for details.1209*/1210break;1211}12121213/*1214* If we get here, then the parent is no more1215* backlogged and we need to propagate the1216* deactivation upwards. Thus let the loop go on.1217*/12181219/*1220* Also let parent be queued into the idle tree on1221* deactivation, to preserve service guarantees, and1222* assuming that who invoked this function does not1223* need parent entities too to be removed completely.1224*/1225ins_into_idle_tree = true;1226}12271228/*1229* If the deactivation loop is fully executed, then there are1230* no more entities to touch and next loop is not executed at1231* all. Otherwise, requeue remaining entities if they are1232* about to stop receiving service, or reposition them if this1233* is not the case.1234*/1235entity = parent;1236for_each_entity(entity) {1237/*1238* Invoke __bfq_requeue_entity on entity, even if1239* already active, to requeue/reposition it in the1240* active tree (because sd->next_in_service has1241* changed)1242*/1243__bfq_requeue_entity(entity);12441245sd = entity->sched_data;1246if (!bfq_update_next_in_service(sd, entity, expiration) &&1247!expiration)1248/*1249* next_in_service unchanged or not causing1250* any change in entity->parent->sd, and no1251* requeueing needed for expiration: stop1252* here.1253*/1254break;1255}1256}12571258/**1259* bfq_calc_vtime_jump - compute the value to which the vtime should jump,1260* if needed, to have at least one entity eligible.1261* @st: the service tree to act upon.1262*1263* Assumes that st is not empty.1264*/1265static u64 bfq_calc_vtime_jump(struct bfq_service_tree *st)1266{1267struct bfq_entity *root_entity = bfq_root_active_entity(&st->active);12681269if (bfq_gt(root_entity->min_start, st->vtime))1270return root_entity->min_start;12711272return st->vtime;1273}12741275static void bfq_update_vtime(struct bfq_service_tree *st, u64 new_value)1276{1277if (new_value > st->vtime) {1278st->vtime = new_value;1279bfq_forget_idle(st);1280}1281}12821283/**1284* bfq_first_active_entity - find the eligible entity with1285* the smallest finish time1286* @st: the service tree to select from.1287* @vtime: the system virtual to use as a reference for eligibility1288*1289* This function searches the first schedulable entity, starting from the1290* root of the tree and going on the left every time on this side there is1291* a subtree with at least one eligible (start <= vtime) entity. The path on1292* the right is followed only if a) the left subtree contains no eligible1293* entities and b) no eligible entity has been found yet.1294*/1295static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st,1296u64 vtime)1297{1298struct bfq_entity *entry, *first = NULL;1299struct rb_node *node = st->active.rb_node;13001301while (node) {1302entry = rb_entry(node, struct bfq_entity, rb_node);1303left:1304if (!bfq_gt(entry->start, vtime))1305first = entry;13061307if (node->rb_left) {1308entry = rb_entry(node->rb_left,1309struct bfq_entity, rb_node);1310if (!bfq_gt(entry->min_start, vtime)) {1311node = node->rb_left;1312goto left;1313}1314}1315if (first)1316break;1317node = node->rb_right;1318}13191320return first;1321}13221323/**1324* __bfq_lookup_next_entity - return the first eligible entity in @st.1325* @st: the service tree.1326* @in_service: whether or not there is an in-service entity for the sched_data1327* this active tree belongs to.1328*1329* If there is no in-service entity for the sched_data st belongs to,1330* then return the entity that will be set in service if:1331* 1) the parent entity this st belongs to is set in service;1332* 2) no entity belonging to such parent entity undergoes a state change1333* that would influence the timestamps of the entity (e.g., becomes idle,1334* becomes backlogged, changes its budget, ...).1335*1336* In this first case, update the virtual time in @st too (see the1337* comments on this update inside the function).1338*1339* In contrast, if there is an in-service entity, then return the1340* entity that would be set in service if not only the above1341* conditions, but also the next one held true: the currently1342* in-service entity, on expiration,1343* 1) gets a finish time equal to the current one, or1344* 2) is not eligible any more, or1345* 3) is idle.1346*/1347static struct bfq_entity *1348__bfq_lookup_next_entity(struct bfq_service_tree *st, bool in_service)1349{1350struct bfq_entity *entity;1351u64 new_vtime;13521353if (RB_EMPTY_ROOT(&st->active))1354return NULL;13551356/*1357* Get the value of the system virtual time for which at1358* least one entity is eligible.1359*/1360new_vtime = bfq_calc_vtime_jump(st);13611362/*1363* If there is no in-service entity for the sched_data this1364* active tree belongs to, then push the system virtual time1365* up to the value that guarantees that at least one entity is1366* eligible. If, instead, there is an in-service entity, then1367* do not make any such update, because there is already an1368* eligible entity, namely the in-service one (even if the1369* entity is not on st, because it was extracted when set in1370* service).1371*/1372if (!in_service)1373bfq_update_vtime(st, new_vtime);13741375entity = bfq_first_active_entity(st, new_vtime);13761377return entity;1378}13791380/**1381* bfq_lookup_next_entity - return the first eligible entity in @sd.1382* @sd: the sched_data.1383* @expiration: true if we are on the expiration path of the in-service queue1384*1385* This function is invoked when there has been a change in the trees1386* for sd, and we need to know what is the new next entity to serve1387* after this change.1388*/1389static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,1390bool expiration)1391{1392struct bfq_service_tree *st = sd->service_tree;1393struct bfq_service_tree *idle_class_st = st + (BFQ_IOPRIO_CLASSES - 1);1394struct bfq_entity *entity = NULL;1395int class_idx = 0;13961397/*1398* Choose from idle class, if needed to guarantee a minimum1399* bandwidth to this class (and if there is some active entity1400* in idle class). This should also mitigate1401* priority-inversion problems in case a low priority task is1402* holding file system resources.1403*/1404if (time_is_before_jiffies(sd->bfq_class_idle_last_service +1405BFQ_CL_IDLE_TIMEOUT)) {1406if (!RB_EMPTY_ROOT(&idle_class_st->active))1407class_idx = BFQ_IOPRIO_CLASSES - 1;1408/* About to be served if backlogged, or not yet backlogged */1409sd->bfq_class_idle_last_service = jiffies;1410}14111412/*1413* Find the next entity to serve for the highest-priority1414* class, unless the idle class needs to be served.1415*/1416for (; class_idx < BFQ_IOPRIO_CLASSES; class_idx++) {1417/*1418* If expiration is true, then bfq_lookup_next_entity1419* is being invoked as a part of the expiration path1420* of the in-service queue. In this case, even if1421* sd->in_service_entity is not NULL,1422* sd->in_service_entity at this point is actually not1423* in service any more, and, if needed, has already1424* been properly queued or requeued into the right1425* tree. The reason why sd->in_service_entity is still1426* not NULL here, even if expiration is true, is that1427* sd->in_service_entity is reset as a last step in the1428* expiration path. So, if expiration is true, tell1429* __bfq_lookup_next_entity that there is no1430* sd->in_service_entity.1431*/1432entity = __bfq_lookup_next_entity(st + class_idx,1433sd->in_service_entity &&1434!expiration);14351436if (entity)1437break;1438}14391440return entity;1441}14421443bool next_queue_may_preempt(struct bfq_data *bfqd)1444{1445struct bfq_sched_data *sd = &bfqd->root_group->sched_data;14461447return sd->next_in_service != sd->in_service_entity;1448}14491450/*1451* Get next queue for service.1452*/1453struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)1454{1455struct bfq_entity *entity = NULL;1456struct bfq_sched_data *sd;1457struct bfq_queue *bfqq;14581459if (bfq_tot_busy_queues(bfqd) == 0)1460return NULL;14611462/*1463* Traverse the path from the root to the leaf entity to1464* serve. Set in service all the entities visited along the1465* way.1466*/1467sd = &bfqd->root_group->sched_data;1468for (; sd ; sd = entity->my_sched_data) {1469/*1470* WARNING. We are about to set the in-service entity1471* to sd->next_in_service, i.e., to the (cached) value1472* returned by bfq_lookup_next_entity(sd) the last1473* time it was invoked, i.e., the last time when the1474* service order in sd changed as a consequence of the1475* activation or deactivation of an entity. In this1476* respect, if we execute bfq_lookup_next_entity(sd)1477* in this very moment, it may, although with low1478* probability, yield a different entity than that1479* pointed to by sd->next_in_service. This rare event1480* happens in case there was no CLASS_IDLE entity to1481* serve for sd when bfq_lookup_next_entity(sd) was1482* invoked for the last time, while there is now one1483* such entity.1484*1485* If the above event happens, then the scheduling of1486* such entity in CLASS_IDLE is postponed until the1487* service of the sd->next_in_service entity1488* finishes. In fact, when the latter is expired,1489* bfq_lookup_next_entity(sd) gets called again,1490* exactly to update sd->next_in_service.1491*/14921493/* Make next_in_service entity become in_service_entity */1494entity = sd->next_in_service;1495sd->in_service_entity = entity;14961497/*1498* If entity is no longer a candidate for next1499* service, then it must be extracted from its active1500* tree, so as to make sure that it won't be1501* considered when computing next_in_service. See the1502* comments on the function1503* bfq_no_longer_next_in_service() for details.1504*/1505if (bfq_no_longer_next_in_service(entity))1506bfq_active_extract(bfq_entity_service_tree(entity),1507entity);15081509/*1510* Even if entity is not to be extracted according to1511* the above check, a descendant entity may get1512* extracted in one of the next iterations of this1513* loop. Such an event could cause a change in1514* next_in_service for the level of the descendant1515* entity, and thus possibly back to this level.1516*1517* However, we cannot perform the resulting needed1518* update of next_in_service for this level before the1519* end of the whole loop, because, to know which is1520* the correct next-to-serve candidate entity for each1521* level, we need first to find the leaf entity to set1522* in service. In fact, only after we know which is1523* the next-to-serve leaf entity, we can discover1524* whether the parent entity of the leaf entity1525* becomes the next-to-serve, and so on.1526*/1527}15281529bfqq = bfq_entity_to_bfqq(entity);15301531/*1532* We can finally update all next-to-serve entities along the1533* path from the leaf entity just set in service to the root.1534*/1535for_each_entity(entity) {1536struct bfq_sched_data *sd = entity->sched_data;15371538if (!bfq_update_next_in_service(sd, NULL, false))1539break;1540}15411542return bfqq;1543}15441545/* returns true if the in-service queue gets freed */1546bool __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)1547{1548struct bfq_queue *in_serv_bfqq = bfqd->in_service_queue;1549struct bfq_entity *in_serv_entity = &in_serv_bfqq->entity;1550struct bfq_entity *entity = in_serv_entity;15511552bfq_clear_bfqq_wait_request(in_serv_bfqq);1553hrtimer_try_to_cancel(&bfqd->idle_slice_timer);1554bfqd->in_service_queue = NULL;15551556/*1557* When this function is called, all in-service entities have1558* been properly deactivated or requeued, so we can safely1559* execute the final step: reset in_service_entity along the1560* path from entity to the root.1561*/1562for_each_entity(entity)1563entity->sched_data->in_service_entity = NULL;15641565/*1566* in_serv_entity is no longer in service, so, if it is in no1567* service tree either, then release the service reference to1568* the queue it represents (taken with bfq_get_entity).1569*/1570if (!in_serv_entity->on_st_or_in_serv) {1571/*1572* If no process is referencing in_serv_bfqq any1573* longer, then the service reference may be the only1574* reference to the queue. If this is the case, then1575* bfqq gets freed here.1576*/1577int ref = in_serv_bfqq->ref;1578bfq_put_queue(in_serv_bfqq);1579if (ref == 1)1580return true;1581}15821583return false;1584}15851586void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,1587bool ins_into_idle_tree, bool expiration)1588{1589struct bfq_entity *entity = &bfqq->entity;15901591bfq_deactivate_entity(entity, ins_into_idle_tree, expiration);1592}15931594void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)1595{1596struct bfq_entity *entity = &bfqq->entity;15971598bfq_activate_requeue_entity(entity, bfq_bfqq_non_blocking_wait_rq(bfqq),1599false, false);1600bfq_clear_bfqq_non_blocking_wait_rq(bfqq);1601}16021603void bfq_requeue_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,1604bool expiration)1605{1606struct bfq_entity *entity = &bfqq->entity;16071608bfq_activate_requeue_entity(entity, false,1609bfqq == bfqd->in_service_queue, expiration);1610}16111612void bfq_add_bfqq_in_groups_with_pending_reqs(struct bfq_queue *bfqq)1613{1614#ifdef CONFIG_BFQ_GROUP_IOSCHED1615struct bfq_entity *entity = &bfqq->entity;16161617if (!entity->in_groups_with_pending_reqs) {1618entity->in_groups_with_pending_reqs = true;1619if (!(bfqq_group(bfqq)->num_queues_with_pending_reqs++))1620bfqq->bfqd->num_groups_with_pending_reqs++;1621}1622#endif1623}16241625void bfq_del_bfqq_in_groups_with_pending_reqs(struct bfq_queue *bfqq)1626{1627#ifdef CONFIG_BFQ_GROUP_IOSCHED1628struct bfq_entity *entity = &bfqq->entity;16291630if (entity->in_groups_with_pending_reqs) {1631entity->in_groups_with_pending_reqs = false;1632if (!(--bfqq_group(bfqq)->num_queues_with_pending_reqs))1633bfqq->bfqd->num_groups_with_pending_reqs--;1634}1635#endif1636}16371638/*1639* Called when the bfqq no longer has requests pending, remove it from1640* the service tree. As a special case, it can be invoked during an1641* expiration.1642*/1643void bfq_del_bfqq_busy(struct bfq_queue *bfqq, bool expiration)1644{1645struct bfq_data *bfqd = bfqq->bfqd;16461647bfq_log_bfqq(bfqd, bfqq, "del from busy");16481649bfq_clear_bfqq_busy(bfqq);16501651bfqd->busy_queues[bfqq->ioprio_class - 1]--;16521653if (bfqq->wr_coeff > 1)1654bfqd->wr_busy_queues--;16551656bfqg_stats_update_dequeue(bfqq_group(bfqq));16571658bfq_deactivate_bfqq(bfqd, bfqq, true, expiration);16591660if (!bfqq->dispatched) {1661bfq_del_bfqq_in_groups_with_pending_reqs(bfqq);1662/*1663* Next function is invoked last, because it causes bfqq to be1664* freed. DO NOT use bfqq after the next function invocation.1665*/1666bfq_weights_tree_remove(bfqq);1667}1668}16691670/*1671* Called when an inactive queue receives a new request.1672*/1673void bfq_add_bfqq_busy(struct bfq_queue *bfqq)1674{1675struct bfq_data *bfqd = bfqq->bfqd;16761677bfq_log_bfqq(bfqd, bfqq, "add to busy");16781679bfq_activate_bfqq(bfqd, bfqq);16801681bfq_mark_bfqq_busy(bfqq);1682bfqd->busy_queues[bfqq->ioprio_class - 1]++;16831684if (!bfqq->dispatched) {1685bfq_add_bfqq_in_groups_with_pending_reqs(bfqq);1686if (bfqq->wr_coeff == 1)1687bfq_weights_tree_add(bfqq);1688}16891690if (bfqq->wr_coeff > 1)1691bfqd->wr_busy_queues++;16921693/* Move bfqq to the head of the woken list of its waker */1694if (!hlist_unhashed(&bfqq->woken_list_node) &&1695&bfqq->woken_list_node != bfqq->waker_bfqq->woken_list.first) {1696hlist_del_init(&bfqq->woken_list_node);1697hlist_add_head(&bfqq->woken_list_node,1698&bfqq->waker_bfqq->woken_list);1699}1700}170117021703