Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/fs/btrfs/discard.c
26278 views
1
// SPDX-License-Identifier: GPL-2.0
2
3
#include <linux/jiffies.h>
4
#include <linux/kernel.h>
5
#include <linux/ktime.h>
6
#include <linux/list.h>
7
#include <linux/math64.h>
8
#include <linux/sizes.h>
9
#include <linux/workqueue.h>
10
#include "ctree.h"
11
#include "block-group.h"
12
#include "discard.h"
13
#include "free-space-cache.h"
14
#include "fs.h"
15
16
/*
17
* This contains the logic to handle async discard.
18
*
19
* Async discard manages trimming of free space outside of transaction commit.
20
* Discarding is done by managing the block_groups on a LRU list based on free
21
* space recency. Two passes are used to first prioritize discarding extents
22
* and then allow for trimming in the bitmap the best opportunity to coalesce.
23
* The block_groups are maintained on multiple lists to allow for multiple
24
* passes with different discard filter requirements. A delayed work item is
25
* used to manage discarding with timeout determined by a max of the delay
26
* incurred by the iops rate limit, the byte rate limit, and the max delay of
27
* BTRFS_DISCARD_MAX_DELAY.
28
*
29
* Note, this only keeps track of block_groups that are explicitly for data.
30
* Mixed block_groups are not supported.
31
*
32
* The first list is special to manage discarding of fully free block groups.
33
* This is necessary because we issue a final trim for a full free block group
34
* after forgetting it. When a block group becomes unused, instead of directly
35
* being added to the unused_bgs list, we add it to this first list. Then
36
* from there, if it becomes fully discarded, we place it onto the unused_bgs
37
* list.
38
*
39
* The in-memory free space cache serves as the backing state for discard.
40
* Consequently this means there is no persistence. We opt to load all the
41
* block groups in as not discarded, so the mount case degenerates to the
42
* crashing case.
43
*
44
* As the free space cache uses bitmaps, there exists a tradeoff between
45
* ease/efficiency for find_free_extent() and the accuracy of discard state.
46
* Here we opt to let untrimmed regions merge with everything while only letting
47
* trimmed regions merge with other trimmed regions. This can cause
48
* overtrimming, but the coalescing benefit seems to be worth it. Additionally,
49
* bitmap state is tracked as a whole. If we're able to fully trim a bitmap,
50
* the trimmed flag is set on the bitmap. Otherwise, if an allocation comes in,
51
* this resets the state and we will retry trimming the whole bitmap. This is a
52
* tradeoff between discard state accuracy and the cost of accounting.
53
*/
54
55
/* This is an initial delay to give some chance for block reuse */
56
#define BTRFS_DISCARD_DELAY (120ULL * NSEC_PER_SEC)
57
#define BTRFS_DISCARD_UNUSED_DELAY (10ULL * NSEC_PER_SEC)
58
59
#define BTRFS_DISCARD_MIN_DELAY_MSEC (1UL)
60
#define BTRFS_DISCARD_MAX_DELAY_MSEC (1000UL)
61
#define BTRFS_DISCARD_MAX_IOPS (1000U)
62
63
/* Monotonically decreasing minimum length filters after index 0 */
64
static int discard_minlen[BTRFS_NR_DISCARD_LISTS] = {
65
0,
66
BTRFS_ASYNC_DISCARD_MAX_FILTER,
67
BTRFS_ASYNC_DISCARD_MIN_FILTER
68
};
69
70
static struct list_head *get_discard_list(struct btrfs_discard_ctl *discard_ctl,
71
const struct btrfs_block_group *block_group)
72
{
73
return &discard_ctl->discard_list[block_group->discard_index];
74
}
75
76
/*
77
* Determine if async discard should be running.
78
*
79
* @discard_ctl: discard control
80
*
81
* Check if the file system is writeable and BTRFS_FS_DISCARD_RUNNING is set.
82
*/
83
static bool btrfs_run_discard_work(const struct btrfs_discard_ctl *discard_ctl)
84
{
85
struct btrfs_fs_info *fs_info = container_of(discard_ctl,
86
struct btrfs_fs_info,
87
discard_ctl);
88
89
return (!(fs_info->sb->s_flags & SB_RDONLY) &&
90
test_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags));
91
}
92
93
static void __add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
94
struct btrfs_block_group *block_group)
95
{
96
lockdep_assert_held(&discard_ctl->lock);
97
98
if (list_empty(&block_group->discard_list) ||
99
block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED) {
100
if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED)
101
block_group->discard_index = BTRFS_DISCARD_INDEX_START;
102
block_group->discard_eligible_time = (ktime_get_ns() +
103
BTRFS_DISCARD_DELAY);
104
block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
105
}
106
if (list_empty(&block_group->discard_list))
107
btrfs_get_block_group(block_group);
108
109
list_move_tail(&block_group->discard_list,
110
get_discard_list(discard_ctl, block_group));
111
}
112
113
static void add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
114
struct btrfs_block_group *block_group)
115
{
116
if (!btrfs_is_block_group_data_only(block_group))
117
return;
118
119
if (!btrfs_run_discard_work(discard_ctl))
120
return;
121
122
spin_lock(&discard_ctl->lock);
123
__add_to_discard_list(discard_ctl, block_group);
124
spin_unlock(&discard_ctl->lock);
125
}
126
127
static void add_to_discard_unused_list(struct btrfs_discard_ctl *discard_ctl,
128
struct btrfs_block_group *block_group)
129
{
130
bool queued;
131
132
spin_lock(&discard_ctl->lock);
133
134
queued = !list_empty(&block_group->discard_list);
135
136
if (!btrfs_run_discard_work(discard_ctl)) {
137
spin_unlock(&discard_ctl->lock);
138
return;
139
}
140
141
list_del_init(&block_group->discard_list);
142
143
block_group->discard_index = BTRFS_DISCARD_INDEX_UNUSED;
144
block_group->discard_eligible_time = (ktime_get_ns() +
145
BTRFS_DISCARD_UNUSED_DELAY);
146
block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
147
if (!queued)
148
btrfs_get_block_group(block_group);
149
list_add_tail(&block_group->discard_list,
150
&discard_ctl->discard_list[BTRFS_DISCARD_INDEX_UNUSED]);
151
152
spin_unlock(&discard_ctl->lock);
153
}
154
155
static bool remove_from_discard_list(struct btrfs_discard_ctl *discard_ctl,
156
struct btrfs_block_group *block_group)
157
{
158
bool running = false;
159
bool queued = false;
160
161
spin_lock(&discard_ctl->lock);
162
163
if (block_group == discard_ctl->block_group) {
164
running = true;
165
discard_ctl->block_group = NULL;
166
}
167
168
block_group->discard_eligible_time = 0;
169
queued = !list_empty(&block_group->discard_list);
170
list_del_init(&block_group->discard_list);
171
if (queued)
172
btrfs_put_block_group(block_group);
173
174
spin_unlock(&discard_ctl->lock);
175
176
return running;
177
}
178
179
/*
180
* Find block_group that's up next for discarding.
181
*
182
* @discard_ctl: discard control
183
* @now: current time
184
*
185
* Iterate over the discard lists to find the next block_group up for
186
* discarding checking the discard_eligible_time of block_group.
187
*/
188
static struct btrfs_block_group *find_next_block_group(
189
struct btrfs_discard_ctl *discard_ctl,
190
u64 now)
191
{
192
struct btrfs_block_group *ret_block_group = NULL, *block_group;
193
int i;
194
195
for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) {
196
struct list_head *discard_list = &discard_ctl->discard_list[i];
197
198
if (!list_empty(discard_list)) {
199
block_group = list_first_entry(discard_list,
200
struct btrfs_block_group,
201
discard_list);
202
203
if (!ret_block_group)
204
ret_block_group = block_group;
205
206
if (ret_block_group->discard_eligible_time < now)
207
break;
208
209
if (ret_block_group->discard_eligible_time >
210
block_group->discard_eligible_time)
211
ret_block_group = block_group;
212
}
213
}
214
215
return ret_block_group;
216
}
217
218
/*
219
* Look up next block group and set it for use.
220
*
221
* @discard_ctl: discard control
222
* @discard_state: the discard_state of the block_group after state management
223
* @discard_index: the discard_index of the block_group after state management
224
* @now: time when discard was invoked, in ns
225
*
226
* Wrap find_next_block_group() and set the block_group to be in use.
227
* @discard_state's control flow is managed here. Variables related to
228
* @discard_state are reset here as needed (eg. @discard_cursor). @discard_state
229
* and @discard_index are remembered as it may change while we're discarding,
230
* but we want the discard to execute in the context determined here.
231
*/
232
static struct btrfs_block_group *peek_discard_list(
233
struct btrfs_discard_ctl *discard_ctl,
234
enum btrfs_discard_state *discard_state,
235
int *discard_index, u64 now)
236
{
237
struct btrfs_block_group *block_group;
238
239
spin_lock(&discard_ctl->lock);
240
again:
241
block_group = find_next_block_group(discard_ctl, now);
242
243
if (block_group && now >= block_group->discard_eligible_time) {
244
if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED &&
245
block_group->used != 0) {
246
if (btrfs_is_block_group_data_only(block_group)) {
247
__add_to_discard_list(discard_ctl, block_group);
248
/*
249
* The block group must have been moved to other
250
* discard list even if discard was disabled in
251
* the meantime or a transaction abort happened,
252
* otherwise we can end up in an infinite loop,
253
* always jumping into the 'again' label and
254
* keep getting this block group over and over
255
* in case there are no other block groups in
256
* the discard lists.
257
*/
258
ASSERT(block_group->discard_index !=
259
BTRFS_DISCARD_INDEX_UNUSED,
260
"discard_index=%d",
261
block_group->discard_index);
262
} else {
263
list_del_init(&block_group->discard_list);
264
btrfs_put_block_group(block_group);
265
}
266
goto again;
267
}
268
if (block_group->discard_state == BTRFS_DISCARD_RESET_CURSOR) {
269
block_group->discard_cursor = block_group->start;
270
block_group->discard_state = BTRFS_DISCARD_EXTENTS;
271
}
272
}
273
if (block_group) {
274
btrfs_get_block_group(block_group);
275
discard_ctl->block_group = block_group;
276
*discard_state = block_group->discard_state;
277
*discard_index = block_group->discard_index;
278
}
279
spin_unlock(&discard_ctl->lock);
280
281
return block_group;
282
}
283
284
/*
285
* Update a block group's filters.
286
*
287
* @block_group: block group of interest
288
* @bytes: recently freed region size after coalescing
289
*
290
* Async discard maintains multiple lists with progressively smaller filters
291
* to prioritize discarding based on size. Should a free space that matches
292
* a larger filter be returned to the free_space_cache, prioritize that discard
293
* by moving @block_group to the proper filter.
294
*/
295
void btrfs_discard_check_filter(struct btrfs_block_group *block_group,
296
u64 bytes)
297
{
298
struct btrfs_discard_ctl *discard_ctl;
299
300
if (!block_group ||
301
!btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
302
return;
303
304
discard_ctl = &block_group->fs_info->discard_ctl;
305
306
if (block_group->discard_index > BTRFS_DISCARD_INDEX_START &&
307
bytes >= discard_minlen[block_group->discard_index - 1]) {
308
int i;
309
310
remove_from_discard_list(discard_ctl, block_group);
311
312
for (i = BTRFS_DISCARD_INDEX_START; i < BTRFS_NR_DISCARD_LISTS;
313
i++) {
314
if (bytes >= discard_minlen[i]) {
315
block_group->discard_index = i;
316
add_to_discard_list(discard_ctl, block_group);
317
break;
318
}
319
}
320
}
321
}
322
323
/*
324
* Move a block group along the discard lists.
325
*
326
* @discard_ctl: discard control
327
* @block_group: block_group of interest
328
*
329
* Increment @block_group's discard_index. If it falls of the list, let it be.
330
* Otherwise add it back to the appropriate list.
331
*/
332
static void btrfs_update_discard_index(struct btrfs_discard_ctl *discard_ctl,
333
struct btrfs_block_group *block_group)
334
{
335
block_group->discard_index++;
336
if (block_group->discard_index == BTRFS_NR_DISCARD_LISTS) {
337
block_group->discard_index = 1;
338
return;
339
}
340
341
add_to_discard_list(discard_ctl, block_group);
342
}
343
344
/*
345
* Remove a block_group from the discard lists.
346
*
347
* @discard_ctl: discard control
348
* @block_group: block_group of interest
349
*
350
* Remove @block_group from the discard lists. If necessary, wait on the
351
* current work and then reschedule the delayed work.
352
*/
353
void btrfs_discard_cancel_work(struct btrfs_discard_ctl *discard_ctl,
354
struct btrfs_block_group *block_group)
355
{
356
if (remove_from_discard_list(discard_ctl, block_group)) {
357
cancel_delayed_work_sync(&discard_ctl->work);
358
btrfs_discard_schedule_work(discard_ctl, true);
359
}
360
}
361
362
/*
363
* Handles queuing the block_groups.
364
*
365
* @discard_ctl: discard control
366
* @block_group: block_group of interest
367
*
368
* Maintain the LRU order of the discard lists.
369
*/
370
void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl,
371
struct btrfs_block_group *block_group)
372
{
373
if (!block_group || !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
374
return;
375
376
if (block_group->used == 0)
377
add_to_discard_unused_list(discard_ctl, block_group);
378
else
379
add_to_discard_list(discard_ctl, block_group);
380
381
if (!delayed_work_pending(&discard_ctl->work))
382
btrfs_discard_schedule_work(discard_ctl, false);
383
}
384
385
static void __btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
386
u64 now, bool override)
387
{
388
struct btrfs_block_group *block_group;
389
390
if (!btrfs_run_discard_work(discard_ctl))
391
return;
392
if (!override && delayed_work_pending(&discard_ctl->work))
393
return;
394
395
block_group = find_next_block_group(discard_ctl, now);
396
if (block_group) {
397
u64 delay = discard_ctl->delay_ms * NSEC_PER_MSEC;
398
u32 kbps_limit = READ_ONCE(discard_ctl->kbps_limit);
399
400
/*
401
* A single delayed workqueue item is responsible for
402
* discarding, so we can manage the bytes rate limit by keeping
403
* track of the previous discard.
404
*/
405
if (kbps_limit && discard_ctl->prev_discard) {
406
u64 bps_limit = ((u64)kbps_limit) * SZ_1K;
407
u64 bps_delay = div64_u64(discard_ctl->prev_discard *
408
NSEC_PER_SEC, bps_limit);
409
410
delay = max(delay, bps_delay);
411
}
412
413
/*
414
* This timeout is to hopefully prevent immediate discarding
415
* in a recently allocated block group.
416
*/
417
if (now < block_group->discard_eligible_time) {
418
u64 bg_timeout = block_group->discard_eligible_time - now;
419
420
delay = max(delay, bg_timeout);
421
}
422
423
if (override && discard_ctl->prev_discard) {
424
u64 elapsed = now - discard_ctl->prev_discard_time;
425
426
if (delay > elapsed)
427
delay -= elapsed;
428
else
429
delay = 0;
430
}
431
432
mod_delayed_work(discard_ctl->discard_workers,
433
&discard_ctl->work, nsecs_to_jiffies(delay));
434
}
435
}
436
437
/*
438
* Responsible for scheduling the discard work.
439
*
440
* @discard_ctl: discard control
441
* @override: override the current timer
442
*
443
* Discards are issued by a delayed workqueue item. @override is used to
444
* update the current delay as the baseline delay interval is reevaluated on
445
* transaction commit. This is also maxed with any other rate limit.
446
*/
447
void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
448
bool override)
449
{
450
const u64 now = ktime_get_ns();
451
452
spin_lock(&discard_ctl->lock);
453
__btrfs_discard_schedule_work(discard_ctl, now, override);
454
spin_unlock(&discard_ctl->lock);
455
}
456
457
/*
458
* Determine next step of a block_group.
459
*
460
* @discard_ctl: discard control
461
* @block_group: block_group of interest
462
*
463
* Determine the next step for a block group after it's finished going through
464
* a pass on a discard list. If it is unused and fully trimmed, we can mark it
465
* unused and send it to the unused_bgs path. Otherwise, pass it onto the
466
* appropriate filter list or let it fall off.
467
*/
468
static void btrfs_finish_discard_pass(struct btrfs_discard_ctl *discard_ctl,
469
struct btrfs_block_group *block_group)
470
{
471
remove_from_discard_list(discard_ctl, block_group);
472
473
if (block_group->used == 0) {
474
if (btrfs_is_free_space_trimmed(block_group))
475
btrfs_mark_bg_unused(block_group);
476
else
477
add_to_discard_unused_list(discard_ctl, block_group);
478
} else {
479
btrfs_update_discard_index(discard_ctl, block_group);
480
}
481
}
482
483
/*
484
* Discard work queue callback
485
*
486
* @work: work
487
*
488
* Find the next block_group to start discarding and then discard a single
489
* region. It does this in a two-pass fashion: first extents and second
490
* bitmaps. Completely discarded block groups are sent to the unused_bgs path.
491
*/
492
static void btrfs_discard_workfn(struct work_struct *work)
493
{
494
struct btrfs_discard_ctl *discard_ctl;
495
struct btrfs_block_group *block_group;
496
enum btrfs_discard_state discard_state;
497
int discard_index = 0;
498
u64 trimmed = 0;
499
u64 minlen = 0;
500
u64 now = ktime_get_ns();
501
502
discard_ctl = container_of(work, struct btrfs_discard_ctl, work.work);
503
504
block_group = peek_discard_list(discard_ctl, &discard_state,
505
&discard_index, now);
506
if (!block_group)
507
return;
508
if (!btrfs_run_discard_work(discard_ctl)) {
509
spin_lock(&discard_ctl->lock);
510
btrfs_put_block_group(block_group);
511
discard_ctl->block_group = NULL;
512
spin_unlock(&discard_ctl->lock);
513
return;
514
}
515
if (now < block_group->discard_eligible_time) {
516
spin_lock(&discard_ctl->lock);
517
btrfs_put_block_group(block_group);
518
discard_ctl->block_group = NULL;
519
spin_unlock(&discard_ctl->lock);
520
btrfs_discard_schedule_work(discard_ctl, false);
521
return;
522
}
523
524
/* Perform discarding */
525
minlen = discard_minlen[discard_index];
526
527
if (discard_state == BTRFS_DISCARD_BITMAPS) {
528
u64 maxlen = 0;
529
530
/*
531
* Use the previous levels minimum discard length as the max
532
* length filter. In the case something is added to make a
533
* region go beyond the max filter, the entire bitmap is set
534
* back to BTRFS_TRIM_STATE_UNTRIMMED.
535
*/
536
if (discard_index != BTRFS_DISCARD_INDEX_UNUSED)
537
maxlen = discard_minlen[discard_index - 1];
538
539
btrfs_trim_block_group_bitmaps(block_group, &trimmed,
540
block_group->discard_cursor,
541
btrfs_block_group_end(block_group),
542
minlen, maxlen, true);
543
discard_ctl->discard_bitmap_bytes += trimmed;
544
} else {
545
btrfs_trim_block_group_extents(block_group, &trimmed,
546
block_group->discard_cursor,
547
btrfs_block_group_end(block_group),
548
minlen, true);
549
discard_ctl->discard_extent_bytes += trimmed;
550
}
551
552
/* Determine next steps for a block_group */
553
if (block_group->discard_cursor >= btrfs_block_group_end(block_group)) {
554
if (discard_state == BTRFS_DISCARD_BITMAPS) {
555
btrfs_finish_discard_pass(discard_ctl, block_group);
556
} else {
557
block_group->discard_cursor = block_group->start;
558
spin_lock(&discard_ctl->lock);
559
if (block_group->discard_state !=
560
BTRFS_DISCARD_RESET_CURSOR)
561
block_group->discard_state =
562
BTRFS_DISCARD_BITMAPS;
563
spin_unlock(&discard_ctl->lock);
564
}
565
}
566
567
now = ktime_get_ns();
568
spin_lock(&discard_ctl->lock);
569
discard_ctl->prev_discard = trimmed;
570
discard_ctl->prev_discard_time = now;
571
btrfs_put_block_group(block_group);
572
discard_ctl->block_group = NULL;
573
__btrfs_discard_schedule_work(discard_ctl, now, false);
574
spin_unlock(&discard_ctl->lock);
575
}
576
577
/*
578
* Recalculate the base delay.
579
*
580
* @discard_ctl: discard control
581
*
582
* Recalculate the base delay which is based off the total number of
583
* discardable_extents. Clamp this between the lower_limit (iops_limit or 1ms)
584
* and the upper_limit (BTRFS_DISCARD_MAX_DELAY_MSEC).
585
*/
586
void btrfs_discard_calc_delay(struct btrfs_discard_ctl *discard_ctl)
587
{
588
s32 discardable_extents;
589
s64 discardable_bytes;
590
u32 iops_limit;
591
unsigned long min_delay = BTRFS_DISCARD_MIN_DELAY_MSEC;
592
unsigned long delay;
593
594
discardable_extents = atomic_read(&discard_ctl->discardable_extents);
595
if (!discardable_extents)
596
return;
597
598
spin_lock(&discard_ctl->lock);
599
600
/*
601
* The following is to fix a potential -1 discrepancy that we're not
602
* sure how to reproduce. But given that this is the only place that
603
* utilizes these numbers and this is only called by from
604
* btrfs_finish_extent_commit() which is synchronized, we can correct
605
* here.
606
*/
607
if (discardable_extents < 0)
608
atomic_add(-discardable_extents,
609
&discard_ctl->discardable_extents);
610
611
discardable_bytes = atomic64_read(&discard_ctl->discardable_bytes);
612
if (discardable_bytes < 0)
613
atomic64_add(-discardable_bytes,
614
&discard_ctl->discardable_bytes);
615
616
if (discardable_extents <= 0) {
617
spin_unlock(&discard_ctl->lock);
618
return;
619
}
620
621
iops_limit = READ_ONCE(discard_ctl->iops_limit);
622
623
if (iops_limit) {
624
delay = MSEC_PER_SEC / iops_limit;
625
} else {
626
/*
627
* Unset iops_limit means go as fast as possible, so allow a
628
* delay of 0.
629
*/
630
delay = 0;
631
min_delay = 0;
632
}
633
634
delay = clamp(delay, min_delay, BTRFS_DISCARD_MAX_DELAY_MSEC);
635
discard_ctl->delay_ms = delay;
636
637
spin_unlock(&discard_ctl->lock);
638
}
639
640
/*
641
* Propagate discard counters.
642
*
643
* @block_group: block_group of interest
644
*
645
* Propagate deltas of counters up to the discard_ctl. It maintains a current
646
* counter and a previous counter passing the delta up to the global stat.
647
* Then the current counter value becomes the previous counter value.
648
*/
649
void btrfs_discard_update_discardable(struct btrfs_block_group *block_group)
650
{
651
struct btrfs_free_space_ctl *ctl;
652
struct btrfs_discard_ctl *discard_ctl;
653
s32 extents_delta;
654
s64 bytes_delta;
655
656
if (!block_group ||
657
!btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC) ||
658
!btrfs_is_block_group_data_only(block_group))
659
return;
660
661
ctl = block_group->free_space_ctl;
662
discard_ctl = &block_group->fs_info->discard_ctl;
663
664
lockdep_assert_held(&ctl->tree_lock);
665
extents_delta = ctl->discardable_extents[BTRFS_STAT_CURR] -
666
ctl->discardable_extents[BTRFS_STAT_PREV];
667
if (extents_delta) {
668
atomic_add(extents_delta, &discard_ctl->discardable_extents);
669
ctl->discardable_extents[BTRFS_STAT_PREV] =
670
ctl->discardable_extents[BTRFS_STAT_CURR];
671
}
672
673
bytes_delta = ctl->discardable_bytes[BTRFS_STAT_CURR] -
674
ctl->discardable_bytes[BTRFS_STAT_PREV];
675
if (bytes_delta) {
676
atomic64_add(bytes_delta, &discard_ctl->discardable_bytes);
677
ctl->discardable_bytes[BTRFS_STAT_PREV] =
678
ctl->discardable_bytes[BTRFS_STAT_CURR];
679
}
680
}
681
682
/*
683
* Punt unused_bgs list to discard lists.
684
*
685
* @fs_info: fs_info of interest
686
*
687
* The unused_bgs list needs to be punted to the discard lists because the
688
* order of operations is changed. In the normal synchronous discard path, the
689
* block groups are trimmed via a single large trim in transaction commit. This
690
* is ultimately what we are trying to avoid with asynchronous discard. Thus,
691
* it must be done before going down the unused_bgs path.
692
*/
693
void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info)
694
{
695
struct btrfs_block_group *block_group, *next;
696
697
spin_lock(&fs_info->unused_bgs_lock);
698
/* We enabled async discard, so punt all to the queue */
699
list_for_each_entry_safe(block_group, next, &fs_info->unused_bgs,
700
bg_list) {
701
list_del_init(&block_group->bg_list);
702
btrfs_discard_queue_work(&fs_info->discard_ctl, block_group);
703
/*
704
* This put is for the get done by btrfs_mark_bg_unused.
705
* Queueing discard incremented it for discard's reference.
706
*/
707
btrfs_put_block_group(block_group);
708
}
709
spin_unlock(&fs_info->unused_bgs_lock);
710
}
711
712
/*
713
* Purge discard lists.
714
*
715
* @discard_ctl: discard control
716
*
717
* If we are disabling async discard, we may have intercepted block groups that
718
* are completely free and ready for the unused_bgs path. As discarding will
719
* now happen in transaction commit or not at all, we can safely mark the
720
* corresponding block groups as unused and they will be sent on their merry
721
* way to the unused_bgs list.
722
*/
723
static void btrfs_discard_purge_list(struct btrfs_discard_ctl *discard_ctl)
724
{
725
struct btrfs_block_group *block_group, *next;
726
int i;
727
728
spin_lock(&discard_ctl->lock);
729
for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) {
730
list_for_each_entry_safe(block_group, next,
731
&discard_ctl->discard_list[i],
732
discard_list) {
733
list_del_init(&block_group->discard_list);
734
spin_unlock(&discard_ctl->lock);
735
if (block_group->used == 0)
736
btrfs_mark_bg_unused(block_group);
737
spin_lock(&discard_ctl->lock);
738
btrfs_put_block_group(block_group);
739
}
740
}
741
spin_unlock(&discard_ctl->lock);
742
}
743
744
void btrfs_discard_resume(struct btrfs_fs_info *fs_info)
745
{
746
if (!btrfs_test_opt(fs_info, DISCARD_ASYNC)) {
747
btrfs_discard_cleanup(fs_info);
748
return;
749
}
750
751
btrfs_discard_punt_unused_bgs_list(fs_info);
752
753
set_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags);
754
}
755
756
void btrfs_discard_stop(struct btrfs_fs_info *fs_info)
757
{
758
clear_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags);
759
}
760
761
void btrfs_discard_init(struct btrfs_fs_info *fs_info)
762
{
763
struct btrfs_discard_ctl *discard_ctl = &fs_info->discard_ctl;
764
int i;
765
766
spin_lock_init(&discard_ctl->lock);
767
INIT_DELAYED_WORK(&discard_ctl->work, btrfs_discard_workfn);
768
769
for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++)
770
INIT_LIST_HEAD(&discard_ctl->discard_list[i]);
771
772
discard_ctl->prev_discard = 0;
773
discard_ctl->prev_discard_time = 0;
774
atomic_set(&discard_ctl->discardable_extents, 0);
775
atomic64_set(&discard_ctl->discardable_bytes, 0);
776
discard_ctl->max_discard_size = BTRFS_ASYNC_DISCARD_DEFAULT_MAX_SIZE;
777
discard_ctl->delay_ms = BTRFS_DISCARD_MAX_DELAY_MSEC;
778
discard_ctl->iops_limit = BTRFS_DISCARD_MAX_IOPS;
779
discard_ctl->kbps_limit = 0;
780
discard_ctl->discard_extent_bytes = 0;
781
discard_ctl->discard_bitmap_bytes = 0;
782
atomic64_set(&discard_ctl->discard_bytes_saved, 0);
783
}
784
785
void btrfs_discard_cleanup(struct btrfs_fs_info *fs_info)
786
{
787
btrfs_discard_stop(fs_info);
788
cancel_delayed_work_sync(&fs_info->discard_ctl.work);
789
btrfs_discard_purge_list(&fs_info->discard_ctl);
790
}
791
792