Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/block/bfq-wf2q.c
26242 views
1
// SPDX-License-Identifier: GPL-2.0-or-later
2
/*
3
* Hierarchical Budget Worst-case Fair Weighted Fair Queueing
4
* (B-WF2Q+): hierarchical scheduling algorithm by which the BFQ I/O
5
* scheduler schedules generic entities. The latter can represent
6
* either single bfq queues (associated with processes) or groups of
7
* bfq queues (associated with cgroups).
8
*/
9
#include "bfq-iosched.h"
10
11
/**
12
* bfq_gt - compare two timestamps.
13
* @a: first ts.
14
* @b: second ts.
15
*
16
* Return @a > @b, dealing with wrapping correctly.
17
*/
18
static int bfq_gt(u64 a, u64 b)
19
{
20
return (s64)(a - b) > 0;
21
}
22
23
static struct bfq_entity *bfq_root_active_entity(struct rb_root *tree)
24
{
25
struct rb_node *node = tree->rb_node;
26
27
return rb_entry(node, struct bfq_entity, rb_node);
28
}
29
30
static unsigned int bfq_class_idx(struct bfq_entity *entity)
31
{
32
struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
33
34
return bfqq ? bfqq->ioprio_class - 1 :
35
BFQ_DEFAULT_GRP_CLASS - 1;
36
}
37
38
unsigned int bfq_tot_busy_queues(struct bfq_data *bfqd)
39
{
40
return bfqd->busy_queues[0] + bfqd->busy_queues[1] +
41
bfqd->busy_queues[2];
42
}
43
44
static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
45
bool expiration);
46
47
static bool bfq_update_parent_budget(struct bfq_entity *next_in_service);
48
49
/**
50
* bfq_update_next_in_service - update sd->next_in_service
51
* @sd: sched_data for which to perform the update.
52
* @new_entity: if not NULL, pointer to the entity whose activation,
53
* requeueing or repositioning triggered the invocation of
54
* this function.
55
* @expiration: id true, this function is being invoked after the
56
* expiration of the in-service entity
57
*
58
* This function is called to update sd->next_in_service, which, in
59
* its turn, may change as a consequence of the insertion or
60
* extraction of an entity into/from one of the active trees of
61
* sd. These insertions/extractions occur as a consequence of
62
* activations/deactivations of entities, with some activations being
63
* 'true' activations, and other activations being requeueings (i.e.,
64
* implementing the second, requeueing phase of the mechanism used to
65
* reposition an entity in its active tree; see comments on
66
* __bfq_activate_entity and __bfq_requeue_entity for details). In
67
* both the last two activation sub-cases, new_entity points to the
68
* just activated or requeued entity.
69
*
70
* Returns true if sd->next_in_service changes in such a way that
71
* entity->parent may become the next_in_service for its parent
72
* entity.
73
*/
74
static bool bfq_update_next_in_service(struct bfq_sched_data *sd,
75
struct bfq_entity *new_entity,
76
bool expiration)
77
{
78
struct bfq_entity *next_in_service = sd->next_in_service;
79
bool parent_sched_may_change = false;
80
bool change_without_lookup = false;
81
82
/*
83
* If this update is triggered by the activation, requeueing
84
* or repositioning of an entity that does not coincide with
85
* sd->next_in_service, then a full lookup in the active tree
86
* can be avoided. In fact, it is enough to check whether the
87
* just-modified entity has the same priority as
88
* sd->next_in_service, is eligible and has a lower virtual
89
* finish time than sd->next_in_service. If this compound
90
* condition holds, then the new entity becomes the new
91
* next_in_service. Otherwise no change is needed.
92
*/
93
if (new_entity && new_entity != sd->next_in_service) {
94
/*
95
* Flag used to decide whether to replace
96
* sd->next_in_service with new_entity. Tentatively
97
* set to true, and left as true if
98
* sd->next_in_service is NULL.
99
*/
100
change_without_lookup = true;
101
102
/*
103
* If there is already a next_in_service candidate
104
* entity, then compare timestamps to decide whether
105
* to replace sd->service_tree with new_entity.
106
*/
107
if (next_in_service) {
108
unsigned int new_entity_class_idx =
109
bfq_class_idx(new_entity);
110
struct bfq_service_tree *st =
111
sd->service_tree + new_entity_class_idx;
112
113
change_without_lookup =
114
(new_entity_class_idx ==
115
bfq_class_idx(next_in_service)
116
&&
117
!bfq_gt(new_entity->start, st->vtime)
118
&&
119
bfq_gt(next_in_service->finish,
120
new_entity->finish));
121
}
122
123
if (change_without_lookup)
124
next_in_service = new_entity;
125
}
126
127
if (!change_without_lookup) /* lookup needed */
128
next_in_service = bfq_lookup_next_entity(sd, expiration);
129
130
if (next_in_service) {
131
bool new_budget_triggers_change =
132
bfq_update_parent_budget(next_in_service);
133
134
parent_sched_may_change = !sd->next_in_service ||
135
new_budget_triggers_change;
136
}
137
138
sd->next_in_service = next_in_service;
139
140
return parent_sched_may_change;
141
}
142
143
#ifdef CONFIG_BFQ_GROUP_IOSCHED
144
145
/*
146
* Returns true if this budget changes may let next_in_service->parent
147
* become the next_in_service entity for its parent entity.
148
*/
149
static bool bfq_update_parent_budget(struct bfq_entity *next_in_service)
150
{
151
struct bfq_entity *bfqg_entity;
152
struct bfq_group *bfqg;
153
struct bfq_sched_data *group_sd;
154
bool ret = false;
155
156
group_sd = next_in_service->sched_data;
157
158
bfqg = container_of(group_sd, struct bfq_group, sched_data);
159
/*
160
* bfq_group's my_entity field is not NULL only if the group
161
* is not the root group. We must not touch the root entity
162
* as it must never become an in-service entity.
163
*/
164
bfqg_entity = bfqg->my_entity;
165
if (bfqg_entity) {
166
if (bfqg_entity->budget > next_in_service->budget)
167
ret = true;
168
bfqg_entity->budget = next_in_service->budget;
169
}
170
171
return ret;
172
}
173
174
/*
175
* This function tells whether entity stops being a candidate for next
176
* service, according to the restrictive definition of the field
177
* next_in_service. In particular, this function is invoked for an
178
* entity that is about to be set in service.
179
*
180
* If entity is a queue, then the entity is no longer a candidate for
181
* next service according to the that definition, because entity is
182
* about to become the in-service queue. This function then returns
183
* true if entity is a queue.
184
*
185
* In contrast, entity could still be a candidate for next service if
186
* it is not a queue, and has more than one active child. In fact,
187
* even if one of its children is about to be set in service, other
188
* active children may still be the next to serve, for the parent
189
* entity, even according to the above definition. As a consequence, a
190
* non-queue entity is not a candidate for next-service only if it has
191
* only one active child. And only if this condition holds, then this
192
* function returns true for a non-queue entity.
193
*/
194
static bool bfq_no_longer_next_in_service(struct bfq_entity *entity)
195
{
196
struct bfq_group *bfqg;
197
198
if (bfq_entity_to_bfqq(entity))
199
return true;
200
201
bfqg = container_of(entity, struct bfq_group, entity);
202
203
/*
204
* The field active_entities does not always contain the
205
* actual number of active children entities: it happens to
206
* not account for the in-service entity in case the latter is
207
* removed from its active tree (which may get done after
208
* invoking the function bfq_no_longer_next_in_service in
209
* bfq_get_next_queue). Fortunately, here, i.e., while
210
* bfq_no_longer_next_in_service is not yet completed in
211
* bfq_get_next_queue, bfq_active_extract has not yet been
212
* invoked, and thus active_entities still coincides with the
213
* actual number of active entities.
214
*/
215
if (bfqg->active_entities == 1)
216
return true;
217
218
return false;
219
}
220
221
static void bfq_inc_active_entities(struct bfq_entity *entity)
222
{
223
struct bfq_sched_data *sd = entity->sched_data;
224
struct bfq_group *bfqg = container_of(sd, struct bfq_group, sched_data);
225
226
if (bfqg != bfqg->bfqd->root_group)
227
bfqg->active_entities++;
228
}
229
230
static void bfq_dec_active_entities(struct bfq_entity *entity)
231
{
232
struct bfq_sched_data *sd = entity->sched_data;
233
struct bfq_group *bfqg = container_of(sd, struct bfq_group, sched_data);
234
235
if (bfqg != bfqg->bfqd->root_group)
236
bfqg->active_entities--;
237
}
238
239
#else /* CONFIG_BFQ_GROUP_IOSCHED */
240
241
static bool bfq_update_parent_budget(struct bfq_entity *next_in_service)
242
{
243
return false;
244
}
245
246
static bool bfq_no_longer_next_in_service(struct bfq_entity *entity)
247
{
248
return true;
249
}
250
251
static void bfq_inc_active_entities(struct bfq_entity *entity)
252
{
253
}
254
255
static void bfq_dec_active_entities(struct bfq_entity *entity)
256
{
257
}
258
259
#endif /* CONFIG_BFQ_GROUP_IOSCHED */
260
261
/*
262
* Shift for timestamp calculations. This actually limits the maximum
263
* service allowed in one timestamp delta (small shift values increase it),
264
* the maximum total weight that can be used for the queues in the system
265
* (big shift values increase it), and the period of virtual time
266
* wraparounds.
267
*/
268
#define WFQ_SERVICE_SHIFT 22
269
270
struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity)
271
{
272
struct bfq_queue *bfqq = NULL;
273
274
if (!entity->my_sched_data)
275
bfqq = container_of(entity, struct bfq_queue, entity);
276
277
return bfqq;
278
}
279
280
281
/**
282
* bfq_delta - map service into the virtual time domain.
283
* @service: amount of service.
284
* @weight: scale factor (weight of an entity or weight sum).
285
*/
286
static u64 bfq_delta(unsigned long service, unsigned long weight)
287
{
288
return div64_ul((u64)service << WFQ_SERVICE_SHIFT, weight);
289
}
290
291
/**
292
* bfq_calc_finish - assign the finish time to an entity.
293
* @entity: the entity to act upon.
294
* @service: the service to be charged to the entity.
295
*/
296
static void bfq_calc_finish(struct bfq_entity *entity, unsigned long service)
297
{
298
struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
299
300
entity->finish = entity->start +
301
bfq_delta(service, entity->weight);
302
303
if (bfqq) {
304
bfq_log_bfqq(bfqq->bfqd, bfqq,
305
"calc_finish: serv %lu, w %d",
306
service, entity->weight);
307
bfq_log_bfqq(bfqq->bfqd, bfqq,
308
"calc_finish: start %llu, finish %llu, delta %llu",
309
entity->start, entity->finish,
310
bfq_delta(service, entity->weight));
311
}
312
}
313
314
/**
315
* bfq_entity_of - get an entity from a node.
316
* @node: the node field of the entity.
317
*
318
* Convert a node pointer to the relative entity. This is used only
319
* to simplify the logic of some functions and not as the generic
320
* conversion mechanism because, e.g., in the tree walking functions,
321
* the check for a %NULL value would be redundant.
322
*/
323
struct bfq_entity *bfq_entity_of(struct rb_node *node)
324
{
325
struct bfq_entity *entity = NULL;
326
327
if (node)
328
entity = rb_entry(node, struct bfq_entity, rb_node);
329
330
return entity;
331
}
332
333
/**
334
* bfq_extract - remove an entity from a tree.
335
* @root: the tree root.
336
* @entity: the entity to remove.
337
*/
338
static void bfq_extract(struct rb_root *root, struct bfq_entity *entity)
339
{
340
entity->tree = NULL;
341
rb_erase(&entity->rb_node, root);
342
}
343
344
/**
345
* bfq_idle_extract - extract an entity from the idle tree.
346
* @st: the service tree of the owning @entity.
347
* @entity: the entity being removed.
348
*/
349
static void bfq_idle_extract(struct bfq_service_tree *st,
350
struct bfq_entity *entity)
351
{
352
struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
353
struct rb_node *next;
354
355
if (entity == st->first_idle) {
356
next = rb_next(&entity->rb_node);
357
st->first_idle = bfq_entity_of(next);
358
}
359
360
if (entity == st->last_idle) {
361
next = rb_prev(&entity->rb_node);
362
st->last_idle = bfq_entity_of(next);
363
}
364
365
bfq_extract(&st->idle, entity);
366
367
if (bfqq)
368
list_del(&bfqq->bfqq_list);
369
}
370
371
/**
372
* bfq_insert - generic tree insertion.
373
* @root: tree root.
374
* @entity: entity to insert.
375
*
376
* This is used for the idle and the active tree, since they are both
377
* ordered by finish time.
378
*/
379
static void bfq_insert(struct rb_root *root, struct bfq_entity *entity)
380
{
381
struct bfq_entity *entry;
382
struct rb_node **node = &root->rb_node;
383
struct rb_node *parent = NULL;
384
385
while (*node) {
386
parent = *node;
387
entry = rb_entry(parent, struct bfq_entity, rb_node);
388
389
if (bfq_gt(entry->finish, entity->finish))
390
node = &parent->rb_left;
391
else
392
node = &parent->rb_right;
393
}
394
395
rb_link_node(&entity->rb_node, parent, node);
396
rb_insert_color(&entity->rb_node, root);
397
398
entity->tree = root;
399
}
400
401
/**
402
* bfq_update_min - update the min_start field of a entity.
403
* @entity: the entity to update.
404
* @node: one of its children.
405
*
406
* This function is called when @entity may store an invalid value for
407
* min_start due to updates to the active tree. The function assumes
408
* that the subtree rooted at @node (which may be its left or its right
409
* child) has a valid min_start value.
410
*/
411
static void bfq_update_min(struct bfq_entity *entity, struct rb_node *node)
412
{
413
struct bfq_entity *child;
414
415
if (node) {
416
child = rb_entry(node, struct bfq_entity, rb_node);
417
if (bfq_gt(entity->min_start, child->min_start))
418
entity->min_start = child->min_start;
419
}
420
}
421
422
/**
423
* bfq_update_active_node - recalculate min_start.
424
* @node: the node to update.
425
*
426
* @node may have changed position or one of its children may have moved,
427
* this function updates its min_start value. The left and right subtrees
428
* are assumed to hold a correct min_start value.
429
*/
430
static void bfq_update_active_node(struct rb_node *node)
431
{
432
struct bfq_entity *entity = rb_entry(node, struct bfq_entity, rb_node);
433
434
entity->min_start = entity->start;
435
bfq_update_min(entity, node->rb_right);
436
bfq_update_min(entity, node->rb_left);
437
}
438
439
/**
440
* bfq_update_active_tree - update min_start for the whole active tree.
441
* @node: the starting node.
442
*
443
* @node must be the deepest modified node after an update. This function
444
* updates its min_start using the values held by its children, assuming
445
* that they did not change, and then updates all the nodes that may have
446
* changed in the path to the root. The only nodes that may have changed
447
* are the ones in the path or their siblings.
448
*/
449
static void bfq_update_active_tree(struct rb_node *node)
450
{
451
struct rb_node *parent;
452
453
up:
454
bfq_update_active_node(node);
455
456
parent = rb_parent(node);
457
if (!parent)
458
return;
459
460
if (node == parent->rb_left && parent->rb_right)
461
bfq_update_active_node(parent->rb_right);
462
else if (parent->rb_left)
463
bfq_update_active_node(parent->rb_left);
464
465
node = parent;
466
goto up;
467
}
468
469
/**
470
* bfq_active_insert - insert an entity in the active tree of its
471
* group/device.
472
* @st: the service tree of the entity.
473
* @entity: the entity being inserted.
474
*
475
* The active tree is ordered by finish time, but an extra key is kept
476
* per each node, containing the minimum value for the start times of
477
* its children (and the node itself), so it's possible to search for
478
* the eligible node with the lowest finish time in logarithmic time.
479
*/
480
static void bfq_active_insert(struct bfq_service_tree *st,
481
struct bfq_entity *entity)
482
{
483
struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
484
struct rb_node *node = &entity->rb_node;
485
486
bfq_insert(&st->active, entity);
487
488
if (node->rb_left)
489
node = node->rb_left;
490
else if (node->rb_right)
491
node = node->rb_right;
492
493
bfq_update_active_tree(node);
494
495
if (bfqq)
496
list_add(&bfqq->bfqq_list, &bfqq->bfqd->active_list[bfqq->actuator_idx]);
497
498
bfq_inc_active_entities(entity);
499
}
500
501
/**
502
* bfq_ioprio_to_weight - calc a weight from an ioprio.
503
* @ioprio: the ioprio value to convert.
504
*/
505
unsigned short bfq_ioprio_to_weight(int ioprio)
506
{
507
return (IOPRIO_NR_LEVELS - ioprio) * BFQ_WEIGHT_CONVERSION_COEFF;
508
}
509
510
/**
511
* bfq_weight_to_ioprio - calc an ioprio from a weight.
512
* @weight: the weight value to convert.
513
*
514
* To preserve as much as possible the old only-ioprio user interface,
515
* 0 is used as an escape ioprio value for weights (numerically) equal or
516
* larger than IOPRIO_NR_LEVELS * BFQ_WEIGHT_CONVERSION_COEFF.
517
*/
518
static unsigned short bfq_weight_to_ioprio(int weight)
519
{
520
return max_t(int, 0,
521
IOPRIO_NR_LEVELS - weight / BFQ_WEIGHT_CONVERSION_COEFF);
522
}
523
524
static void bfq_get_entity(struct bfq_entity *entity)
525
{
526
struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
527
528
if (bfqq) {
529
bfqq->ref++;
530
bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d",
531
bfqq, bfqq->ref);
532
}
533
}
534
535
/**
536
* bfq_find_deepest - find the deepest node that an extraction can modify.
537
* @node: the node being removed.
538
*
539
* Do the first step of an extraction in an rb tree, looking for the
540
* node that will replace @node, and returning the deepest node that
541
* the following modifications to the tree can touch. If @node is the
542
* last node in the tree return %NULL.
543
*/
544
static struct rb_node *bfq_find_deepest(struct rb_node *node)
545
{
546
struct rb_node *deepest;
547
548
if (!node->rb_right && !node->rb_left)
549
deepest = rb_parent(node);
550
else if (!node->rb_right)
551
deepest = node->rb_left;
552
else if (!node->rb_left)
553
deepest = node->rb_right;
554
else {
555
deepest = rb_next(node);
556
if (deepest->rb_right)
557
deepest = deepest->rb_right;
558
else if (rb_parent(deepest) != node)
559
deepest = rb_parent(deepest);
560
}
561
562
return deepest;
563
}
564
565
/**
566
* bfq_active_extract - remove an entity from the active tree.
567
* @st: the service_tree containing the tree.
568
* @entity: the entity being removed.
569
*/
570
static void bfq_active_extract(struct bfq_service_tree *st,
571
struct bfq_entity *entity)
572
{
573
struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
574
struct rb_node *node;
575
576
node = bfq_find_deepest(&entity->rb_node);
577
bfq_extract(&st->active, entity);
578
579
if (node)
580
bfq_update_active_tree(node);
581
if (bfqq)
582
list_del(&bfqq->bfqq_list);
583
584
bfq_dec_active_entities(entity);
585
}
586
587
/**
588
* bfq_idle_insert - insert an entity into the idle tree.
589
* @st: the service tree containing the tree.
590
* @entity: the entity to insert.
591
*/
592
static void bfq_idle_insert(struct bfq_service_tree *st,
593
struct bfq_entity *entity)
594
{
595
struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
596
struct bfq_entity *first_idle = st->first_idle;
597
struct bfq_entity *last_idle = st->last_idle;
598
599
if (!first_idle || bfq_gt(first_idle->finish, entity->finish))
600
st->first_idle = entity;
601
if (!last_idle || bfq_gt(entity->finish, last_idle->finish))
602
st->last_idle = entity;
603
604
bfq_insert(&st->idle, entity);
605
606
if (bfqq)
607
list_add(&bfqq->bfqq_list, &bfqq->bfqd->idle_list);
608
}
609
610
/**
611
* bfq_forget_entity - do not consider entity any longer for scheduling
612
* @st: the service tree.
613
* @entity: the entity being removed.
614
* @is_in_service: true if entity is currently the in-service entity.
615
*
616
* Forget everything about @entity. In addition, if entity represents
617
* a queue, and the latter is not in service, then release the service
618
* reference to the queue (the one taken through bfq_get_entity). In
619
* fact, in this case, there is really no more service reference to
620
* the queue, as the latter is also outside any service tree. If,
621
* instead, the queue is in service, then __bfq_bfqd_reset_in_service
622
* will take care of putting the reference when the queue finally
623
* stops being served.
624
*/
625
static void bfq_forget_entity(struct bfq_service_tree *st,
626
struct bfq_entity *entity,
627
bool is_in_service)
628
{
629
struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
630
631
entity->on_st_or_in_serv = false;
632
st->wsum -= entity->weight;
633
if (bfqq && !is_in_service)
634
bfq_put_queue(bfqq);
635
}
636
637
/**
638
* bfq_put_idle_entity - release the idle tree ref of an entity.
639
* @st: service tree for the entity.
640
* @entity: the entity being released.
641
*/
642
void bfq_put_idle_entity(struct bfq_service_tree *st, struct bfq_entity *entity)
643
{
644
bfq_idle_extract(st, entity);
645
bfq_forget_entity(st, entity,
646
entity == entity->sched_data->in_service_entity);
647
}
648
649
/**
650
* bfq_forget_idle - update the idle tree if necessary.
651
* @st: the service tree to act upon.
652
*
653
* To preserve the global O(log N) complexity we only remove one entry here;
654
* as the idle tree will not grow indefinitely this can be done safely.
655
*/
656
static void bfq_forget_idle(struct bfq_service_tree *st)
657
{
658
struct bfq_entity *first_idle = st->first_idle;
659
struct bfq_entity *last_idle = st->last_idle;
660
661
if (RB_EMPTY_ROOT(&st->active) && last_idle &&
662
!bfq_gt(last_idle->finish, st->vtime)) {
663
/*
664
* Forget the whole idle tree, increasing the vtime past
665
* the last finish time of idle entities.
666
*/
667
st->vtime = last_idle->finish;
668
}
669
670
if (first_idle && !bfq_gt(first_idle->finish, st->vtime))
671
bfq_put_idle_entity(st, first_idle);
672
}
673
674
struct bfq_service_tree *bfq_entity_service_tree(struct bfq_entity *entity)
675
{
676
struct bfq_sched_data *sched_data = entity->sched_data;
677
unsigned int idx = bfq_class_idx(entity);
678
679
return sched_data->service_tree + idx;
680
}
681
682
/*
683
* Update weight and priority of entity. If update_class_too is true,
684
* then update the ioprio_class of entity too.
685
*
686
* The reason why the update of ioprio_class is controlled through the
687
* last parameter is as follows. Changing the ioprio class of an
688
* entity implies changing the destination service trees for that
689
* entity. If such a change occurred when the entity is already on one
690
* of the service trees for its previous class, then the state of the
691
* entity would become more complex: none of the new possible service
692
* trees for the entity, according to bfq_entity_service_tree(), would
693
* match any of the possible service trees on which the entity
694
* is. Complex operations involving these trees, such as entity
695
* activations and deactivations, should take into account this
696
* additional complexity. To avoid this issue, this function is
697
* invoked with update_class_too unset in the points in the code where
698
* entity may happen to be on some tree.
699
*/
700
struct bfq_service_tree *
701
__bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
702
struct bfq_entity *entity,
703
bool update_class_too)
704
{
705
struct bfq_service_tree *new_st = old_st;
706
707
if (entity->prio_changed) {
708
struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
709
unsigned int prev_weight, new_weight;
710
711
/* Matches the smp_wmb() in bfq_group_set_weight. */
712
smp_rmb();
713
old_st->wsum -= entity->weight;
714
715
if (entity->new_weight != entity->orig_weight) {
716
if (entity->new_weight < BFQ_MIN_WEIGHT ||
717
entity->new_weight > BFQ_MAX_WEIGHT) {
718
pr_crit("update_weight_prio: new_weight %d\n",
719
entity->new_weight);
720
if (entity->new_weight < BFQ_MIN_WEIGHT)
721
entity->new_weight = BFQ_MIN_WEIGHT;
722
else
723
entity->new_weight = BFQ_MAX_WEIGHT;
724
}
725
entity->orig_weight = entity->new_weight;
726
if (bfqq)
727
bfqq->ioprio =
728
bfq_weight_to_ioprio(entity->orig_weight);
729
}
730
731
if (bfqq && update_class_too)
732
bfqq->ioprio_class = bfqq->new_ioprio_class;
733
734
/*
735
* Reset prio_changed only if the ioprio_class change
736
* is not pending any longer.
737
*/
738
if (!bfqq || bfqq->ioprio_class == bfqq->new_ioprio_class)
739
entity->prio_changed = 0;
740
741
/*
742
* NOTE: here we may be changing the weight too early,
743
* this will cause unfairness. The correct approach
744
* would have required additional complexity to defer
745
* weight changes to the proper time instants (i.e.,
746
* when entity->finish <= old_st->vtime).
747
*/
748
new_st = bfq_entity_service_tree(entity);
749
750
prev_weight = entity->weight;
751
new_weight = entity->orig_weight *
752
(bfqq ? bfqq->wr_coeff : 1);
753
/*
754
* If the weight of the entity changes, and the entity is a
755
* queue, remove the entity from its old weight counter (if
756
* there is a counter associated with the entity).
757
*/
758
if (prev_weight != new_weight && bfqq)
759
bfq_weights_tree_remove(bfqq);
760
entity->weight = new_weight;
761
/*
762
* Add the entity, if it is not a weight-raised queue,
763
* to the counter associated with its new weight.
764
*/
765
if (prev_weight != new_weight && bfqq && bfqq->wr_coeff == 1)
766
bfq_weights_tree_add(bfqq);
767
768
new_st->wsum += entity->weight;
769
770
if (new_st != old_st)
771
entity->start = new_st->vtime;
772
}
773
774
return new_st;
775
}
776
777
/**
778
* bfq_bfqq_served - update the scheduler status after selection for
779
* service.
780
* @bfqq: the queue being served.
781
* @served: bytes to transfer.
782
*
783
* NOTE: this can be optimized, as the timestamps of upper level entities
784
* are synchronized every time a new bfqq is selected for service. By now,
785
* we keep it to better check consistency.
786
*/
787
void bfq_bfqq_served(struct bfq_queue *bfqq, int served)
788
{
789
struct bfq_entity *entity = &bfqq->entity;
790
struct bfq_service_tree *st;
791
792
if (!bfqq->service_from_backlogged)
793
bfqq->first_IO_time = jiffies;
794
795
if (bfqq->wr_coeff > 1)
796
bfqq->service_from_wr += served;
797
798
bfqq->service_from_backlogged += served;
799
for_each_entity(entity) {
800
st = bfq_entity_service_tree(entity);
801
802
entity->service += served;
803
804
st->vtime += bfq_delta(served, st->wsum);
805
bfq_forget_idle(st);
806
}
807
bfq_log_bfqq(bfqq->bfqd, bfqq, "bfqq_served %d secs", served);
808
}
809
810
/**
811
* bfq_bfqq_charge_time - charge an amount of service equivalent to the length
812
* of the time interval during which bfqq has been in
813
* service.
814
* @bfqd: the device
815
* @bfqq: the queue that needs a service update.
816
* @time_ms: the amount of time during which the queue has received service
817
*
818
* If a queue does not consume its budget fast enough, then providing
819
* the queue with service fairness may impair throughput, more or less
820
* severely. For this reason, queues that consume their budget slowly
821
* are provided with time fairness instead of service fairness. This
822
* goal is achieved through the BFQ scheduling engine, even if such an
823
* engine works in the service, and not in the time domain. The trick
824
* is charging these queues with an inflated amount of service, equal
825
* to the amount of service that they would have received during their
826
* service slot if they had been fast, i.e., if their requests had
827
* been dispatched at a rate equal to the estimated peak rate.
828
*
829
* It is worth noting that time fairness can cause important
830
* distortions in terms of bandwidth distribution, on devices with
831
* internal queueing. The reason is that I/O requests dispatched
832
* during the service slot of a queue may be served after that service
833
* slot is finished, and may have a total processing time loosely
834
* correlated with the duration of the service slot. This is
835
* especially true for short service slots.
836
*/
837
void bfq_bfqq_charge_time(struct bfq_data *bfqd, struct bfq_queue *bfqq,
838
unsigned long time_ms)
839
{
840
struct bfq_entity *entity = &bfqq->entity;
841
unsigned long timeout_ms = jiffies_to_msecs(bfq_timeout);
842
unsigned long bounded_time_ms = min(time_ms, timeout_ms);
843
int serv_to_charge_for_time =
844
(bfqd->bfq_max_budget * bounded_time_ms) / timeout_ms;
845
int tot_serv_to_charge = max(serv_to_charge_for_time, entity->service);
846
847
/* Increase budget to avoid inconsistencies */
848
if (tot_serv_to_charge > entity->budget)
849
entity->budget = tot_serv_to_charge;
850
851
bfq_bfqq_served(bfqq,
852
max_t(int, 0, tot_serv_to_charge - entity->service));
853
}
854
855
static void bfq_update_fin_time_enqueue(struct bfq_entity *entity,
856
struct bfq_service_tree *st,
857
bool backshifted)
858
{
859
struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
860
861
/*
862
* When this function is invoked, entity is not in any service
863
* tree, then it is safe to invoke next function with the last
864
* parameter set (see the comments on the function).
865
*/
866
st = __bfq_entity_update_weight_prio(st, entity, true);
867
bfq_calc_finish(entity, entity->budget);
868
869
/*
870
* If some queues enjoy backshifting for a while, then their
871
* (virtual) finish timestamps may happen to become lower and
872
* lower than the system virtual time. In particular, if
873
* these queues often happen to be idle for short time
874
* periods, and during such time periods other queues with
875
* higher timestamps happen to be busy, then the backshifted
876
* timestamps of the former queues can become much lower than
877
* the system virtual time. In fact, to serve the queues with
878
* higher timestamps while the ones with lower timestamps are
879
* idle, the system virtual time may be pushed-up to much
880
* higher values than the finish timestamps of the idle
881
* queues. As a consequence, the finish timestamps of all new
882
* or newly activated queues may end up being much larger than
883
* those of lucky queues with backshifted timestamps. The
884
* latter queues may then monopolize the device for a lot of
885
* time. This would simply break service guarantees.
886
*
887
* To reduce this problem, push up a little bit the
888
* backshifted timestamps of the queue associated with this
889
* entity (only a queue can happen to have the backshifted
890
* flag set): just enough to let the finish timestamp of the
891
* queue be equal to the current value of the system virtual
892
* time. This may introduce a little unfairness among queues
893
* with backshifted timestamps, but it does not break
894
* worst-case fairness guarantees.
895
*
896
* As a special case, if bfqq is weight-raised, push up
897
* timestamps much less, to keep very low the probability that
898
* this push up causes the backshifted finish timestamps of
899
* weight-raised queues to become higher than the backshifted
900
* finish timestamps of non weight-raised queues.
901
*/
902
if (backshifted && bfq_gt(st->vtime, entity->finish)) {
903
unsigned long delta = st->vtime - entity->finish;
904
905
if (bfqq)
906
delta /= bfqq->wr_coeff;
907
908
entity->start += delta;
909
entity->finish += delta;
910
}
911
912
bfq_active_insert(st, entity);
913
}
914
915
/**
916
* __bfq_activate_entity - handle activation of entity.
917
* @entity: the entity being activated.
918
* @non_blocking_wait_rq: true if entity was waiting for a request
919
*
920
* Called for a 'true' activation, i.e., if entity is not active and
921
* one of its children receives a new request.
922
*
923
* Basically, this function updates the timestamps of entity and
924
* inserts entity into its active tree, after possibly extracting it
925
* from its idle tree.
926
*/
927
static void __bfq_activate_entity(struct bfq_entity *entity,
928
bool non_blocking_wait_rq)
929
{
930
struct bfq_service_tree *st = bfq_entity_service_tree(entity);
931
bool backshifted = false;
932
unsigned long long min_vstart;
933
934
/* See comments on bfq_fqq_update_budg_for_activation */
935
if (non_blocking_wait_rq && bfq_gt(st->vtime, entity->finish)) {
936
backshifted = true;
937
min_vstart = entity->finish;
938
} else
939
min_vstart = st->vtime;
940
941
if (entity->tree == &st->idle) {
942
/*
943
* Must be on the idle tree, bfq_idle_extract() will
944
* check for that.
945
*/
946
bfq_idle_extract(st, entity);
947
entity->start = bfq_gt(min_vstart, entity->finish) ?
948
min_vstart : entity->finish;
949
} else {
950
/*
951
* The finish time of the entity may be invalid, and
952
* it is in the past for sure, otherwise the queue
953
* would have been on the idle tree.
954
*/
955
entity->start = min_vstart;
956
st->wsum += entity->weight;
957
/*
958
* entity is about to be inserted into a service tree,
959
* and then set in service: get a reference to make
960
* sure entity does not disappear until it is no
961
* longer in service or scheduled for service.
962
*/
963
bfq_get_entity(entity);
964
965
entity->on_st_or_in_serv = true;
966
}
967
968
bfq_update_fin_time_enqueue(entity, st, backshifted);
969
}
970
971
/**
972
* __bfq_requeue_entity - handle requeueing or repositioning of an entity.
973
* @entity: the entity being requeued or repositioned.
974
*
975
* Requeueing is needed if this entity stops being served, which
976
* happens if a leaf descendant entity has expired. On the other hand,
977
* repositioning is needed if the next_inservice_entity for the child
978
* entity has changed. See the comments inside the function for
979
* details.
980
*
981
* Basically, this function: 1) removes entity from its active tree if
982
* present there, 2) updates the timestamps of entity and 3) inserts
983
* entity back into its active tree (in the new, right position for
984
* the new values of the timestamps).
985
*/
986
static void __bfq_requeue_entity(struct bfq_entity *entity)
987
{
988
struct bfq_sched_data *sd = entity->sched_data;
989
struct bfq_service_tree *st = bfq_entity_service_tree(entity);
990
991
if (entity == sd->in_service_entity) {
992
/*
993
* We are requeueing the current in-service entity,
994
* which may have to be done for one of the following
995
* reasons:
996
* - entity represents the in-service queue, and the
997
* in-service queue is being requeued after an
998
* expiration;
999
* - entity represents a group, and its budget has
1000
* changed because one of its child entities has
1001
* just been either activated or requeued for some
1002
* reason; the timestamps of the entity need then to
1003
* be updated, and the entity needs to be enqueued
1004
* or repositioned accordingly.
1005
*
1006
* In particular, before requeueing, the start time of
1007
* the entity must be moved forward to account for the
1008
* service that the entity has received while in
1009
* service. This is done by the next instructions. The
1010
* finish time will then be updated according to this
1011
* new value of the start time, and to the budget of
1012
* the entity.
1013
*/
1014
bfq_calc_finish(entity, entity->service);
1015
entity->start = entity->finish;
1016
/*
1017
* In addition, if the entity had more than one child
1018
* when set in service, then it was not extracted from
1019
* the active tree. This implies that the position of
1020
* the entity in the active tree may need to be
1021
* changed now, because we have just updated the start
1022
* time of the entity, and we will update its finish
1023
* time in a moment (the requeueing is then, more
1024
* precisely, a repositioning in this case). To
1025
* implement this repositioning, we: 1) dequeue the
1026
* entity here, 2) update the finish time and requeue
1027
* the entity according to the new timestamps below.
1028
*/
1029
if (entity->tree)
1030
bfq_active_extract(st, entity);
1031
} else { /* The entity is already active, and not in service */
1032
/*
1033
* In this case, this function gets called only if the
1034
* next_in_service entity below this entity has
1035
* changed, and this change has caused the budget of
1036
* this entity to change, which, finally implies that
1037
* the finish time of this entity must be
1038
* updated. Such an update may cause the scheduling,
1039
* i.e., the position in the active tree, of this
1040
* entity to change. We handle this change by: 1)
1041
* dequeueing the entity here, 2) updating the finish
1042
* time and requeueing the entity according to the new
1043
* timestamps below. This is the same approach as the
1044
* non-extracted-entity sub-case above.
1045
*/
1046
bfq_active_extract(st, entity);
1047
}
1048
1049
bfq_update_fin_time_enqueue(entity, st, false);
1050
}
1051
1052
static void __bfq_activate_requeue_entity(struct bfq_entity *entity,
1053
bool non_blocking_wait_rq)
1054
{
1055
struct bfq_service_tree *st = bfq_entity_service_tree(entity);
1056
1057
if (entity->sched_data->in_service_entity == entity ||
1058
entity->tree == &st->active)
1059
/*
1060
* in service or already queued on the active tree,
1061
* requeue or reposition
1062
*/
1063
__bfq_requeue_entity(entity);
1064
else
1065
/*
1066
* Not in service and not queued on its active tree:
1067
* the activity is idle and this is a true activation.
1068
*/
1069
__bfq_activate_entity(entity, non_blocking_wait_rq);
1070
}
1071
1072
1073
/**
1074
* bfq_activate_requeue_entity - activate or requeue an entity representing a
1075
* bfq_queue, and activate, requeue or reposition
1076
* all ancestors for which such an update becomes
1077
* necessary.
1078
* @entity: the entity to activate.
1079
* @non_blocking_wait_rq: true if this entity was waiting for a request
1080
* @requeue: true if this is a requeue, which implies that bfqq is
1081
* being expired; thus ALL its ancestors stop being served and must
1082
* therefore be requeued
1083
* @expiration: true if this function is being invoked in the expiration path
1084
* of the in-service queue
1085
*/
1086
static void bfq_activate_requeue_entity(struct bfq_entity *entity,
1087
bool non_blocking_wait_rq,
1088
bool requeue, bool expiration)
1089
{
1090
for_each_entity(entity) {
1091
__bfq_activate_requeue_entity(entity, non_blocking_wait_rq);
1092
if (!bfq_update_next_in_service(entity->sched_data, entity,
1093
expiration) && !requeue)
1094
break;
1095
}
1096
}
1097
1098
/**
1099
* __bfq_deactivate_entity - update sched_data and service trees for
1100
* entity, so as to represent entity as inactive
1101
* @entity: the entity being deactivated.
1102
* @ins_into_idle_tree: if false, the entity will not be put into the
1103
* idle tree.
1104
*
1105
* If necessary and allowed, puts entity into the idle tree. NOTE:
1106
* entity may be on no tree if in service.
1107
*/
1108
bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree)
1109
{
1110
struct bfq_sched_data *sd = entity->sched_data;
1111
struct bfq_service_tree *st;
1112
bool is_in_service;
1113
1114
if (!entity->on_st_or_in_serv) /*
1115
* entity never activated, or
1116
* already inactive
1117
*/
1118
return false;
1119
1120
/*
1121
* If we get here, then entity is active, which implies that
1122
* bfq_group_set_parent has already been invoked for the group
1123
* represented by entity. Therefore, the field
1124
* entity->sched_data has been set, and we can safely use it.
1125
*/
1126
st = bfq_entity_service_tree(entity);
1127
is_in_service = entity == sd->in_service_entity;
1128
1129
bfq_calc_finish(entity, entity->service);
1130
1131
if (is_in_service)
1132
sd->in_service_entity = NULL;
1133
else
1134
/*
1135
* Non in-service entity: nobody will take care of
1136
* resetting its service counter on expiration. Do it
1137
* now.
1138
*/
1139
entity->service = 0;
1140
1141
if (entity->tree == &st->active)
1142
bfq_active_extract(st, entity);
1143
else if (!is_in_service && entity->tree == &st->idle)
1144
bfq_idle_extract(st, entity);
1145
1146
if (!ins_into_idle_tree || !bfq_gt(entity->finish, st->vtime))
1147
bfq_forget_entity(st, entity, is_in_service);
1148
else
1149
bfq_idle_insert(st, entity);
1150
1151
return true;
1152
}
1153
1154
/**
1155
* bfq_deactivate_entity - deactivate an entity representing a bfq_queue.
1156
* @entity: the entity to deactivate.
1157
* @ins_into_idle_tree: true if the entity can be put into the idle tree
1158
* @expiration: true if this function is being invoked in the expiration path
1159
* of the in-service queue
1160
*/
1161
static void bfq_deactivate_entity(struct bfq_entity *entity,
1162
bool ins_into_idle_tree,
1163
bool expiration)
1164
{
1165
struct bfq_sched_data *sd;
1166
struct bfq_entity *parent = NULL;
1167
1168
for_each_entity_safe(entity, parent) {
1169
sd = entity->sched_data;
1170
1171
if (!__bfq_deactivate_entity(entity, ins_into_idle_tree)) {
1172
/*
1173
* entity is not in any tree any more, so
1174
* this deactivation is a no-op, and there is
1175
* nothing to change for upper-level entities
1176
* (in case of expiration, this can never
1177
* happen).
1178
*/
1179
return;
1180
}
1181
1182
if (sd->next_in_service == entity)
1183
/*
1184
* entity was the next_in_service entity,
1185
* then, since entity has just been
1186
* deactivated, a new one must be found.
1187
*/
1188
bfq_update_next_in_service(sd, NULL, expiration);
1189
1190
if (sd->next_in_service || sd->in_service_entity) {
1191
/*
1192
* The parent entity is still active, because
1193
* either next_in_service or in_service_entity
1194
* is not NULL. So, no further upwards
1195
* deactivation must be performed. Yet,
1196
* next_in_service has changed. Then the
1197
* schedule does need to be updated upwards.
1198
*
1199
* NOTE If in_service_entity is not NULL, then
1200
* next_in_service may happen to be NULL,
1201
* although the parent entity is evidently
1202
* active. This happens if 1) the entity
1203
* pointed by in_service_entity is the only
1204
* active entity in the parent entity, and 2)
1205
* according to the definition of
1206
* next_in_service, the in_service_entity
1207
* cannot be considered as
1208
* next_in_service. See the comments on the
1209
* definition of next_in_service for details.
1210
*/
1211
break;
1212
}
1213
1214
/*
1215
* If we get here, then the parent is no more
1216
* backlogged and we need to propagate the
1217
* deactivation upwards. Thus let the loop go on.
1218
*/
1219
1220
/*
1221
* Also let parent be queued into the idle tree on
1222
* deactivation, to preserve service guarantees, and
1223
* assuming that who invoked this function does not
1224
* need parent entities too to be removed completely.
1225
*/
1226
ins_into_idle_tree = true;
1227
}
1228
1229
/*
1230
* If the deactivation loop is fully executed, then there are
1231
* no more entities to touch and next loop is not executed at
1232
* all. Otherwise, requeue remaining entities if they are
1233
* about to stop receiving service, or reposition them if this
1234
* is not the case.
1235
*/
1236
entity = parent;
1237
for_each_entity(entity) {
1238
/*
1239
* Invoke __bfq_requeue_entity on entity, even if
1240
* already active, to requeue/reposition it in the
1241
* active tree (because sd->next_in_service has
1242
* changed)
1243
*/
1244
__bfq_requeue_entity(entity);
1245
1246
sd = entity->sched_data;
1247
if (!bfq_update_next_in_service(sd, entity, expiration) &&
1248
!expiration)
1249
/*
1250
* next_in_service unchanged or not causing
1251
* any change in entity->parent->sd, and no
1252
* requeueing needed for expiration: stop
1253
* here.
1254
*/
1255
break;
1256
}
1257
}
1258
1259
/**
1260
* bfq_calc_vtime_jump - compute the value to which the vtime should jump,
1261
* if needed, to have at least one entity eligible.
1262
* @st: the service tree to act upon.
1263
*
1264
* Assumes that st is not empty.
1265
*/
1266
static u64 bfq_calc_vtime_jump(struct bfq_service_tree *st)
1267
{
1268
struct bfq_entity *root_entity = bfq_root_active_entity(&st->active);
1269
1270
if (bfq_gt(root_entity->min_start, st->vtime))
1271
return root_entity->min_start;
1272
1273
return st->vtime;
1274
}
1275
1276
static void bfq_update_vtime(struct bfq_service_tree *st, u64 new_value)
1277
{
1278
if (new_value > st->vtime) {
1279
st->vtime = new_value;
1280
bfq_forget_idle(st);
1281
}
1282
}
1283
1284
/**
1285
* bfq_first_active_entity - find the eligible entity with
1286
* the smallest finish time
1287
* @st: the service tree to select from.
1288
* @vtime: the system virtual to use as a reference for eligibility
1289
*
1290
* This function searches the first schedulable entity, starting from the
1291
* root of the tree and going on the left every time on this side there is
1292
* a subtree with at least one eligible (start <= vtime) entity. The path on
1293
* the right is followed only if a) the left subtree contains no eligible
1294
* entities and b) no eligible entity has been found yet.
1295
*/
1296
static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st,
1297
u64 vtime)
1298
{
1299
struct bfq_entity *entry, *first = NULL;
1300
struct rb_node *node = st->active.rb_node;
1301
1302
while (node) {
1303
entry = rb_entry(node, struct bfq_entity, rb_node);
1304
left:
1305
if (!bfq_gt(entry->start, vtime))
1306
first = entry;
1307
1308
if (node->rb_left) {
1309
entry = rb_entry(node->rb_left,
1310
struct bfq_entity, rb_node);
1311
if (!bfq_gt(entry->min_start, vtime)) {
1312
node = node->rb_left;
1313
goto left;
1314
}
1315
}
1316
if (first)
1317
break;
1318
node = node->rb_right;
1319
}
1320
1321
return first;
1322
}
1323
1324
/**
1325
* __bfq_lookup_next_entity - return the first eligible entity in @st.
1326
* @st: the service tree.
1327
* @in_service: whether or not there is an in-service entity for the sched_data
1328
* this active tree belongs to.
1329
*
1330
* If there is no in-service entity for the sched_data st belongs to,
1331
* then return the entity that will be set in service if:
1332
* 1) the parent entity this st belongs to is set in service;
1333
* 2) no entity belonging to such parent entity undergoes a state change
1334
* that would influence the timestamps of the entity (e.g., becomes idle,
1335
* becomes backlogged, changes its budget, ...).
1336
*
1337
* In this first case, update the virtual time in @st too (see the
1338
* comments on this update inside the function).
1339
*
1340
* In contrast, if there is an in-service entity, then return the
1341
* entity that would be set in service if not only the above
1342
* conditions, but also the next one held true: the currently
1343
* in-service entity, on expiration,
1344
* 1) gets a finish time equal to the current one, or
1345
* 2) is not eligible any more, or
1346
* 3) is idle.
1347
*/
1348
static struct bfq_entity *
1349
__bfq_lookup_next_entity(struct bfq_service_tree *st, bool in_service)
1350
{
1351
struct bfq_entity *entity;
1352
u64 new_vtime;
1353
1354
if (RB_EMPTY_ROOT(&st->active))
1355
return NULL;
1356
1357
/*
1358
* Get the value of the system virtual time for which at
1359
* least one entity is eligible.
1360
*/
1361
new_vtime = bfq_calc_vtime_jump(st);
1362
1363
/*
1364
* If there is no in-service entity for the sched_data this
1365
* active tree belongs to, then push the system virtual time
1366
* up to the value that guarantees that at least one entity is
1367
* eligible. If, instead, there is an in-service entity, then
1368
* do not make any such update, because there is already an
1369
* eligible entity, namely the in-service one (even if the
1370
* entity is not on st, because it was extracted when set in
1371
* service).
1372
*/
1373
if (!in_service)
1374
bfq_update_vtime(st, new_vtime);
1375
1376
entity = bfq_first_active_entity(st, new_vtime);
1377
1378
return entity;
1379
}
1380
1381
/**
1382
* bfq_lookup_next_entity - return the first eligible entity in @sd.
1383
* @sd: the sched_data.
1384
* @expiration: true if we are on the expiration path of the in-service queue
1385
*
1386
* This function is invoked when there has been a change in the trees
1387
* for sd, and we need to know what is the new next entity to serve
1388
* after this change.
1389
*/
1390
static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
1391
bool expiration)
1392
{
1393
struct bfq_service_tree *st = sd->service_tree;
1394
struct bfq_service_tree *idle_class_st = st + (BFQ_IOPRIO_CLASSES - 1);
1395
struct bfq_entity *entity = NULL;
1396
int class_idx = 0;
1397
1398
/*
1399
* Choose from idle class, if needed to guarantee a minimum
1400
* bandwidth to this class (and if there is some active entity
1401
* in idle class). This should also mitigate
1402
* priority-inversion problems in case a low priority task is
1403
* holding file system resources.
1404
*/
1405
if (time_is_before_jiffies(sd->bfq_class_idle_last_service +
1406
BFQ_CL_IDLE_TIMEOUT)) {
1407
if (!RB_EMPTY_ROOT(&idle_class_st->active))
1408
class_idx = BFQ_IOPRIO_CLASSES - 1;
1409
/* About to be served if backlogged, or not yet backlogged */
1410
sd->bfq_class_idle_last_service = jiffies;
1411
}
1412
1413
/*
1414
* Find the next entity to serve for the highest-priority
1415
* class, unless the idle class needs to be served.
1416
*/
1417
for (; class_idx < BFQ_IOPRIO_CLASSES; class_idx++) {
1418
/*
1419
* If expiration is true, then bfq_lookup_next_entity
1420
* is being invoked as a part of the expiration path
1421
* of the in-service queue. In this case, even if
1422
* sd->in_service_entity is not NULL,
1423
* sd->in_service_entity at this point is actually not
1424
* in service any more, and, if needed, has already
1425
* been properly queued or requeued into the right
1426
* tree. The reason why sd->in_service_entity is still
1427
* not NULL here, even if expiration is true, is that
1428
* sd->in_service_entity is reset as a last step in the
1429
* expiration path. So, if expiration is true, tell
1430
* __bfq_lookup_next_entity that there is no
1431
* sd->in_service_entity.
1432
*/
1433
entity = __bfq_lookup_next_entity(st + class_idx,
1434
sd->in_service_entity &&
1435
!expiration);
1436
1437
if (entity)
1438
break;
1439
}
1440
1441
return entity;
1442
}
1443
1444
bool next_queue_may_preempt(struct bfq_data *bfqd)
1445
{
1446
struct bfq_sched_data *sd = &bfqd->root_group->sched_data;
1447
1448
return sd->next_in_service != sd->in_service_entity;
1449
}
1450
1451
/*
1452
* Get next queue for service.
1453
*/
1454
struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
1455
{
1456
struct bfq_entity *entity = NULL;
1457
struct bfq_sched_data *sd;
1458
struct bfq_queue *bfqq;
1459
1460
if (bfq_tot_busy_queues(bfqd) == 0)
1461
return NULL;
1462
1463
/*
1464
* Traverse the path from the root to the leaf entity to
1465
* serve. Set in service all the entities visited along the
1466
* way.
1467
*/
1468
sd = &bfqd->root_group->sched_data;
1469
for (; sd ; sd = entity->my_sched_data) {
1470
/*
1471
* WARNING. We are about to set the in-service entity
1472
* to sd->next_in_service, i.e., to the (cached) value
1473
* returned by bfq_lookup_next_entity(sd) the last
1474
* time it was invoked, i.e., the last time when the
1475
* service order in sd changed as a consequence of the
1476
* activation or deactivation of an entity. In this
1477
* respect, if we execute bfq_lookup_next_entity(sd)
1478
* in this very moment, it may, although with low
1479
* probability, yield a different entity than that
1480
* pointed to by sd->next_in_service. This rare event
1481
* happens in case there was no CLASS_IDLE entity to
1482
* serve for sd when bfq_lookup_next_entity(sd) was
1483
* invoked for the last time, while there is now one
1484
* such entity.
1485
*
1486
* If the above event happens, then the scheduling of
1487
* such entity in CLASS_IDLE is postponed until the
1488
* service of the sd->next_in_service entity
1489
* finishes. In fact, when the latter is expired,
1490
* bfq_lookup_next_entity(sd) gets called again,
1491
* exactly to update sd->next_in_service.
1492
*/
1493
1494
/* Make next_in_service entity become in_service_entity */
1495
entity = sd->next_in_service;
1496
sd->in_service_entity = entity;
1497
1498
/*
1499
* If entity is no longer a candidate for next
1500
* service, then it must be extracted from its active
1501
* tree, so as to make sure that it won't be
1502
* considered when computing next_in_service. See the
1503
* comments on the function
1504
* bfq_no_longer_next_in_service() for details.
1505
*/
1506
if (bfq_no_longer_next_in_service(entity))
1507
bfq_active_extract(bfq_entity_service_tree(entity),
1508
entity);
1509
1510
/*
1511
* Even if entity is not to be extracted according to
1512
* the above check, a descendant entity may get
1513
* extracted in one of the next iterations of this
1514
* loop. Such an event could cause a change in
1515
* next_in_service for the level of the descendant
1516
* entity, and thus possibly back to this level.
1517
*
1518
* However, we cannot perform the resulting needed
1519
* update of next_in_service for this level before the
1520
* end of the whole loop, because, to know which is
1521
* the correct next-to-serve candidate entity for each
1522
* level, we need first to find the leaf entity to set
1523
* in service. In fact, only after we know which is
1524
* the next-to-serve leaf entity, we can discover
1525
* whether the parent entity of the leaf entity
1526
* becomes the next-to-serve, and so on.
1527
*/
1528
}
1529
1530
bfqq = bfq_entity_to_bfqq(entity);
1531
1532
/*
1533
* We can finally update all next-to-serve entities along the
1534
* path from the leaf entity just set in service to the root.
1535
*/
1536
for_each_entity(entity) {
1537
struct bfq_sched_data *sd = entity->sched_data;
1538
1539
if (!bfq_update_next_in_service(sd, NULL, false))
1540
break;
1541
}
1542
1543
return bfqq;
1544
}
1545
1546
/* returns true if the in-service queue gets freed */
1547
bool __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
1548
{
1549
struct bfq_queue *in_serv_bfqq = bfqd->in_service_queue;
1550
struct bfq_entity *in_serv_entity = &in_serv_bfqq->entity;
1551
struct bfq_entity *entity = in_serv_entity;
1552
1553
bfq_clear_bfqq_wait_request(in_serv_bfqq);
1554
hrtimer_try_to_cancel(&bfqd->idle_slice_timer);
1555
bfqd->in_service_queue = NULL;
1556
1557
/*
1558
* When this function is called, all in-service entities have
1559
* been properly deactivated or requeued, so we can safely
1560
* execute the final step: reset in_service_entity along the
1561
* path from entity to the root.
1562
*/
1563
for_each_entity(entity)
1564
entity->sched_data->in_service_entity = NULL;
1565
1566
/*
1567
* in_serv_entity is no longer in service, so, if it is in no
1568
* service tree either, then release the service reference to
1569
* the queue it represents (taken with bfq_get_entity).
1570
*/
1571
if (!in_serv_entity->on_st_or_in_serv) {
1572
/*
1573
* If no process is referencing in_serv_bfqq any
1574
* longer, then the service reference may be the only
1575
* reference to the queue. If this is the case, then
1576
* bfqq gets freed here.
1577
*/
1578
int ref = in_serv_bfqq->ref;
1579
bfq_put_queue(in_serv_bfqq);
1580
if (ref == 1)
1581
return true;
1582
}
1583
1584
return false;
1585
}
1586
1587
void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
1588
bool ins_into_idle_tree, bool expiration)
1589
{
1590
struct bfq_entity *entity = &bfqq->entity;
1591
1592
bfq_deactivate_entity(entity, ins_into_idle_tree, expiration);
1593
}
1594
1595
void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
1596
{
1597
struct bfq_entity *entity = &bfqq->entity;
1598
1599
bfq_activate_requeue_entity(entity, bfq_bfqq_non_blocking_wait_rq(bfqq),
1600
false, false);
1601
bfq_clear_bfqq_non_blocking_wait_rq(bfqq);
1602
}
1603
1604
void bfq_requeue_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
1605
bool expiration)
1606
{
1607
struct bfq_entity *entity = &bfqq->entity;
1608
1609
bfq_activate_requeue_entity(entity, false,
1610
bfqq == bfqd->in_service_queue, expiration);
1611
}
1612
1613
void bfq_add_bfqq_in_groups_with_pending_reqs(struct bfq_queue *bfqq)
1614
{
1615
#ifdef CONFIG_BFQ_GROUP_IOSCHED
1616
struct bfq_entity *entity = &bfqq->entity;
1617
1618
if (!entity->in_groups_with_pending_reqs) {
1619
entity->in_groups_with_pending_reqs = true;
1620
if (!(bfqq_group(bfqq)->num_queues_with_pending_reqs++))
1621
bfqq->bfqd->num_groups_with_pending_reqs++;
1622
}
1623
#endif
1624
}
1625
1626
void bfq_del_bfqq_in_groups_with_pending_reqs(struct bfq_queue *bfqq)
1627
{
1628
#ifdef CONFIG_BFQ_GROUP_IOSCHED
1629
struct bfq_entity *entity = &bfqq->entity;
1630
1631
if (entity->in_groups_with_pending_reqs) {
1632
entity->in_groups_with_pending_reqs = false;
1633
if (!(--bfqq_group(bfqq)->num_queues_with_pending_reqs))
1634
bfqq->bfqd->num_groups_with_pending_reqs--;
1635
}
1636
#endif
1637
}
1638
1639
/*
1640
* Called when the bfqq no longer has requests pending, remove it from
1641
* the service tree. As a special case, it can be invoked during an
1642
* expiration.
1643
*/
1644
void bfq_del_bfqq_busy(struct bfq_queue *bfqq, bool expiration)
1645
{
1646
struct bfq_data *bfqd = bfqq->bfqd;
1647
1648
bfq_log_bfqq(bfqd, bfqq, "del from busy");
1649
1650
bfq_clear_bfqq_busy(bfqq);
1651
1652
bfqd->busy_queues[bfqq->ioprio_class - 1]--;
1653
1654
if (bfqq->wr_coeff > 1)
1655
bfqd->wr_busy_queues--;
1656
1657
bfqg_stats_update_dequeue(bfqq_group(bfqq));
1658
1659
bfq_deactivate_bfqq(bfqd, bfqq, true, expiration);
1660
1661
if (!bfqq->dispatched) {
1662
bfq_del_bfqq_in_groups_with_pending_reqs(bfqq);
1663
/*
1664
* Next function is invoked last, because it causes bfqq to be
1665
* freed. DO NOT use bfqq after the next function invocation.
1666
*/
1667
bfq_weights_tree_remove(bfqq);
1668
}
1669
}
1670
1671
/*
1672
* Called when an inactive queue receives a new request.
1673
*/
1674
void bfq_add_bfqq_busy(struct bfq_queue *bfqq)
1675
{
1676
struct bfq_data *bfqd = bfqq->bfqd;
1677
1678
bfq_log_bfqq(bfqd, bfqq, "add to busy");
1679
1680
bfq_activate_bfqq(bfqd, bfqq);
1681
1682
bfq_mark_bfqq_busy(bfqq);
1683
bfqd->busy_queues[bfqq->ioprio_class - 1]++;
1684
1685
if (!bfqq->dispatched) {
1686
bfq_add_bfqq_in_groups_with_pending_reqs(bfqq);
1687
if (bfqq->wr_coeff == 1)
1688
bfq_weights_tree_add(bfqq);
1689
}
1690
1691
if (bfqq->wr_coeff > 1)
1692
bfqd->wr_busy_queues++;
1693
1694
/* Move bfqq to the head of the woken list of its waker */
1695
if (!hlist_unhashed(&bfqq->woken_list_node) &&
1696
&bfqq->woken_list_node != bfqq->waker_bfqq->woken_list.first) {
1697
hlist_del_init(&bfqq->woken_list_node);
1698
hlist_add_head(&bfqq->woken_list_node,
1699
&bfqq->waker_bfqq->woken_list);
1700
}
1701
}
1702
1703