Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/fs/btrfs/extent-io-tree.c
26282 views
1
// SPDX-License-Identifier: GPL-2.0
2
3
#include <linux/slab.h>
4
#include <trace/events/btrfs.h>
5
#include "messages.h"
6
#include "ctree.h"
7
#include "extent_io.h"
8
#include "extent-io-tree.h"
9
#include "btrfs_inode.h"
10
11
static struct kmem_cache *extent_state_cache;
12
13
static inline bool extent_state_in_tree(const struct extent_state *state)
14
{
15
return !RB_EMPTY_NODE(&state->rb_node);
16
}
17
18
#ifdef CONFIG_BTRFS_DEBUG
19
static LIST_HEAD(states);
20
static DEFINE_SPINLOCK(leak_lock);
21
22
static inline void btrfs_leak_debug_add_state(struct extent_state *state)
23
{
24
unsigned long flags;
25
26
spin_lock_irqsave(&leak_lock, flags);
27
list_add(&state->leak_list, &states);
28
spin_unlock_irqrestore(&leak_lock, flags);
29
}
30
31
static inline void btrfs_leak_debug_del_state(struct extent_state *state)
32
{
33
unsigned long flags;
34
35
spin_lock_irqsave(&leak_lock, flags);
36
list_del(&state->leak_list);
37
spin_unlock_irqrestore(&leak_lock, flags);
38
}
39
40
static inline void btrfs_extent_state_leak_debug_check(void)
41
{
42
struct extent_state *state;
43
44
while (!list_empty(&states)) {
45
state = list_first_entry(&states, struct extent_state, leak_list);
46
btrfs_err(NULL,
47
"state leak: start %llu end %llu state %u in tree %d refs %d",
48
state->start, state->end, state->state,
49
extent_state_in_tree(state),
50
refcount_read(&state->refs));
51
list_del(&state->leak_list);
52
WARN_ON_ONCE(1);
53
kmem_cache_free(extent_state_cache, state);
54
}
55
}
56
57
#define btrfs_debug_check_extent_io_range(tree, start, end) \
58
__btrfs_debug_check_extent_io_range(__func__, (tree), (start), (end))
59
static inline void __btrfs_debug_check_extent_io_range(const char *caller,
60
struct extent_io_tree *tree,
61
u64 start, u64 end)
62
{
63
const struct btrfs_inode *inode = tree->inode;
64
u64 isize;
65
66
if (tree->owner != IO_TREE_INODE_IO)
67
return;
68
69
isize = i_size_read(&inode->vfs_inode);
70
if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
71
btrfs_debug_rl(inode->root->fs_info,
72
"%s: ino %llu isize %llu odd range [%llu,%llu]",
73
caller, btrfs_ino(inode), isize, start, end);
74
}
75
}
76
#else
77
#define btrfs_leak_debug_add_state(state) do {} while (0)
78
#define btrfs_leak_debug_del_state(state) do {} while (0)
79
#define btrfs_extent_state_leak_debug_check() do {} while (0)
80
#define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0)
81
#endif
82
83
/* Read-only access to the inode. */
84
const struct btrfs_inode *btrfs_extent_io_tree_to_inode(const struct extent_io_tree *tree)
85
{
86
if (tree->owner == IO_TREE_INODE_IO)
87
return tree->inode;
88
return NULL;
89
}
90
91
/* For read-only access to fs_info. */
92
const struct btrfs_fs_info *btrfs_extent_io_tree_to_fs_info(const struct extent_io_tree *tree)
93
{
94
if (tree->owner == IO_TREE_INODE_IO)
95
return tree->inode->root->fs_info;
96
return tree->fs_info;
97
}
98
99
void btrfs_extent_io_tree_init(struct btrfs_fs_info *fs_info,
100
struct extent_io_tree *tree, unsigned int owner)
101
{
102
tree->state = RB_ROOT;
103
spin_lock_init(&tree->lock);
104
tree->fs_info = fs_info;
105
tree->owner = owner;
106
}
107
108
/*
109
* Empty an io tree, removing and freeing every extent state record from the
110
* tree. This should be called once we are sure no other task can access the
111
* tree anymore, so no tree updates happen after we empty the tree and there
112
* aren't any waiters on any extent state record (EXTENT_LOCK_BITS are never
113
* set on any extent state when calling this function).
114
*/
115
void btrfs_extent_io_tree_release(struct extent_io_tree *tree)
116
{
117
struct rb_root root;
118
struct extent_state *state;
119
struct extent_state *tmp;
120
121
spin_lock(&tree->lock);
122
root = tree->state;
123
tree->state = RB_ROOT;
124
rbtree_postorder_for_each_entry_safe(state, tmp, &root, rb_node) {
125
/* Clear node to keep free_extent_state() happy. */
126
RB_CLEAR_NODE(&state->rb_node);
127
ASSERT(!(state->state & EXTENT_LOCK_BITS));
128
/*
129
* No need for a memory barrier here, as we are holding the tree
130
* lock and we only change the waitqueue while holding that lock
131
* (see wait_extent_bit()).
132
*/
133
ASSERT(!waitqueue_active(&state->wq));
134
btrfs_free_extent_state(state);
135
cond_resched_lock(&tree->lock);
136
}
137
/*
138
* Should still be empty even after a reschedule, no other task should
139
* be accessing the tree anymore.
140
*/
141
ASSERT(RB_EMPTY_ROOT(&tree->state));
142
spin_unlock(&tree->lock);
143
}
144
145
static struct extent_state *alloc_extent_state(gfp_t mask)
146
{
147
struct extent_state *state;
148
149
/*
150
* The given mask might be not appropriate for the slab allocator,
151
* drop the unsupported bits
152
*/
153
mask &= ~(__GFP_DMA32|__GFP_HIGHMEM);
154
state = kmem_cache_alloc(extent_state_cache, mask);
155
if (!state)
156
return state;
157
state->state = 0;
158
RB_CLEAR_NODE(&state->rb_node);
159
btrfs_leak_debug_add_state(state);
160
refcount_set(&state->refs, 1);
161
init_waitqueue_head(&state->wq);
162
trace_btrfs_alloc_extent_state(state, mask, _RET_IP_);
163
return state;
164
}
165
166
static struct extent_state *alloc_extent_state_atomic(struct extent_state *prealloc)
167
{
168
if (!prealloc)
169
prealloc = alloc_extent_state(GFP_ATOMIC);
170
171
return prealloc;
172
}
173
174
void btrfs_free_extent_state(struct extent_state *state)
175
{
176
if (!state)
177
return;
178
if (refcount_dec_and_test(&state->refs)) {
179
WARN_ON(extent_state_in_tree(state));
180
btrfs_leak_debug_del_state(state);
181
trace_btrfs_free_extent_state(state, _RET_IP_);
182
kmem_cache_free(extent_state_cache, state);
183
}
184
}
185
186
static int add_extent_changeset(struct extent_state *state, u32 bits,
187
struct extent_changeset *changeset,
188
int set)
189
{
190
int ret;
191
192
if (!changeset)
193
return 0;
194
if (set && (state->state & bits) == bits)
195
return 0;
196
if (!set && (state->state & bits) == 0)
197
return 0;
198
changeset->bytes_changed += state->end - state->start + 1;
199
ret = ulist_add(&changeset->range_changed, state->start, state->end,
200
GFP_ATOMIC);
201
return ret;
202
}
203
204
static inline struct extent_state *next_state(struct extent_state *state)
205
{
206
struct rb_node *next = rb_next(&state->rb_node);
207
208
return rb_entry_safe(next, struct extent_state, rb_node);
209
}
210
211
static inline struct extent_state *prev_state(struct extent_state *state)
212
{
213
struct rb_node *next = rb_prev(&state->rb_node);
214
215
return rb_entry_safe(next, struct extent_state, rb_node);
216
}
217
218
/*
219
* Search @tree for an entry that contains @offset or if none exists for the
220
* first entry that starts and ends after that offset.
221
*
222
* @tree: the tree to search
223
* @offset: search offset
224
* @node_ret: pointer where new node should be anchored (used when inserting an
225
* entry in the tree)
226
* @parent_ret: points to entry which would have been the parent of the entry,
227
* containing @offset
228
*
229
* Return a pointer to the entry that contains @offset byte address.
230
*
231
* If no such entry exists, return the first entry that starts and ends after
232
* @offset if one exists, otherwise NULL.
233
*
234
* If the returned entry starts at @offset, then @node_ret and @parent_ret
235
* aren't changed.
236
*/
237
static inline struct extent_state *tree_search_for_insert(struct extent_io_tree *tree,
238
u64 offset,
239
struct rb_node ***node_ret,
240
struct rb_node **parent_ret)
241
{
242
struct rb_root *root = &tree->state;
243
struct rb_node **node = &root->rb_node;
244
struct rb_node *prev = NULL;
245
struct extent_state *entry = NULL;
246
247
while (*node) {
248
prev = *node;
249
entry = rb_entry(prev, struct extent_state, rb_node);
250
251
if (offset < entry->start)
252
node = &(*node)->rb_left;
253
else if (offset > entry->end)
254
node = &(*node)->rb_right;
255
else
256
return entry;
257
}
258
259
if (node_ret)
260
*node_ret = node;
261
if (parent_ret)
262
*parent_ret = prev;
263
264
/*
265
* Return either the current entry if it contains offset (it ends after
266
* or at offset) or the first entry that starts and ends after offset if
267
* one exists, or NULL.
268
*/
269
while (entry && offset > entry->end)
270
entry = next_state(entry);
271
272
return entry;
273
}
274
275
/*
276
* Search offset in the tree or fill neighbor rbtree node pointers.
277
*
278
* @tree: the tree to search
279
* @offset: offset that should fall within an entry in @tree
280
* @next_ret: pointer to the first entry whose range ends after @offset
281
* @prev_ret: pointer to the first entry whose range begins before @offset
282
*
283
* Return a pointer to the entry that contains @offset byte address. If no
284
* such entry exists, then return NULL and fill @prev_ret and @next_ret.
285
* Otherwise return the found entry and other pointers are left untouched.
286
*/
287
static struct extent_state *tree_search_prev_next(struct extent_io_tree *tree,
288
u64 offset,
289
struct extent_state **prev_ret,
290
struct extent_state **next_ret)
291
{
292
struct rb_root *root = &tree->state;
293
struct rb_node **node = &root->rb_node;
294
struct extent_state *orig_prev;
295
struct extent_state *entry = NULL;
296
297
ASSERT(prev_ret);
298
ASSERT(next_ret);
299
300
while (*node) {
301
entry = rb_entry(*node, struct extent_state, rb_node);
302
303
if (offset < entry->start)
304
node = &(*node)->rb_left;
305
else if (offset > entry->end)
306
node = &(*node)->rb_right;
307
else
308
return entry;
309
}
310
311
orig_prev = entry;
312
while (entry && offset > entry->end)
313
entry = next_state(entry);
314
*next_ret = entry;
315
entry = orig_prev;
316
317
while (entry && offset < entry->start)
318
entry = prev_state(entry);
319
*prev_ret = entry;
320
321
return NULL;
322
}
323
324
/*
325
* Inexact rb-tree search, return the next entry if @offset is not found
326
*/
327
static inline struct extent_state *tree_search(struct extent_io_tree *tree, u64 offset)
328
{
329
return tree_search_for_insert(tree, offset, NULL, NULL);
330
}
331
332
static void __cold extent_io_tree_panic(const struct extent_io_tree *tree,
333
const struct extent_state *state,
334
const char *opname,
335
int err)
336
{
337
btrfs_panic(btrfs_extent_io_tree_to_fs_info(tree), err,
338
"extent io tree error on %s state start %llu end %llu",
339
opname, state->start, state->end);
340
}
341
342
static void merge_prev_state(struct extent_io_tree *tree, struct extent_state *state)
343
{
344
struct extent_state *prev;
345
346
prev = prev_state(state);
347
if (prev && prev->end == state->start - 1 && prev->state == state->state) {
348
if (tree->owner == IO_TREE_INODE_IO)
349
btrfs_merge_delalloc_extent(tree->inode, state, prev);
350
state->start = prev->start;
351
rb_erase(&prev->rb_node, &tree->state);
352
RB_CLEAR_NODE(&prev->rb_node);
353
btrfs_free_extent_state(prev);
354
}
355
}
356
357
static void merge_next_state(struct extent_io_tree *tree, struct extent_state *state)
358
{
359
struct extent_state *next;
360
361
next = next_state(state);
362
if (next && next->start == state->end + 1 && next->state == state->state) {
363
if (tree->owner == IO_TREE_INODE_IO)
364
btrfs_merge_delalloc_extent(tree->inode, state, next);
365
state->end = next->end;
366
rb_erase(&next->rb_node, &tree->state);
367
RB_CLEAR_NODE(&next->rb_node);
368
btrfs_free_extent_state(next);
369
}
370
}
371
372
/*
373
* Utility function to look for merge candidates inside a given range. Any
374
* extents with matching state are merged together into a single extent in the
375
* tree. Extents with EXTENT_IO in their state field are not merged because
376
* the end_io handlers need to be able to do operations on them without
377
* sleeping (or doing allocations/splits).
378
*
379
* This should be called with the tree lock held.
380
*/
381
static void merge_state(struct extent_io_tree *tree, struct extent_state *state)
382
{
383
if (state->state & (EXTENT_LOCK_BITS | EXTENT_BOUNDARY))
384
return;
385
386
merge_prev_state(tree, state);
387
merge_next_state(tree, state);
388
}
389
390
static void set_state_bits(struct extent_io_tree *tree,
391
struct extent_state *state,
392
u32 bits, struct extent_changeset *changeset)
393
{
394
u32 bits_to_set = bits & ~EXTENT_CTLBITS;
395
int ret;
396
397
if (tree->owner == IO_TREE_INODE_IO)
398
btrfs_set_delalloc_extent(tree->inode, state, bits);
399
400
ret = add_extent_changeset(state, bits_to_set, changeset, 1);
401
BUG_ON(ret < 0);
402
state->state |= bits_to_set;
403
}
404
405
/*
406
* Insert an extent_state struct into the tree. 'bits' are set on the
407
* struct before it is inserted.
408
*
409
* Returns a pointer to the struct extent_state record containing the range
410
* requested for insertion, which may be the same as the given struct or it
411
* may be an existing record in the tree that was expanded to accommodate the
412
* requested range. In case of an extent_state different from the one that was
413
* given, the later can be freed or reused by the caller.
414
*
415
* On error it returns an error pointer.
416
*
417
* The tree lock is not taken internally. This is a utility function and
418
* probably isn't what you want to call (see set/clear_extent_bit).
419
*/
420
static struct extent_state *insert_state(struct extent_io_tree *tree,
421
struct extent_state *state,
422
u32 bits,
423
struct extent_changeset *changeset)
424
{
425
struct rb_node **node;
426
struct rb_node *parent = NULL;
427
const u64 start = state->start - 1;
428
const u64 end = state->end + 1;
429
const bool try_merge = !(bits & (EXTENT_LOCK_BITS | EXTENT_BOUNDARY));
430
431
set_state_bits(tree, state, bits, changeset);
432
433
node = &tree->state.rb_node;
434
while (*node) {
435
struct extent_state *entry;
436
437
parent = *node;
438
entry = rb_entry(parent, struct extent_state, rb_node);
439
440
if (state->end < entry->start) {
441
if (try_merge && end == entry->start &&
442
state->state == entry->state) {
443
if (tree->owner == IO_TREE_INODE_IO)
444
btrfs_merge_delalloc_extent(tree->inode,
445
state, entry);
446
entry->start = state->start;
447
merge_prev_state(tree, entry);
448
state->state = 0;
449
return entry;
450
}
451
node = &(*node)->rb_left;
452
} else if (state->end > entry->end) {
453
if (try_merge && entry->end == start &&
454
state->state == entry->state) {
455
if (tree->owner == IO_TREE_INODE_IO)
456
btrfs_merge_delalloc_extent(tree->inode,
457
state, entry);
458
entry->end = state->end;
459
merge_next_state(tree, entry);
460
state->state = 0;
461
return entry;
462
}
463
node = &(*node)->rb_right;
464
} else {
465
return ERR_PTR(-EEXIST);
466
}
467
}
468
469
rb_link_node(&state->rb_node, parent, node);
470
rb_insert_color(&state->rb_node, &tree->state);
471
472
return state;
473
}
474
475
/*
476
* Insert state to @tree to the location given by @node and @parent.
477
*/
478
static void insert_state_fast(struct extent_io_tree *tree,
479
struct extent_state *state, struct rb_node **node,
480
struct rb_node *parent, unsigned bits,
481
struct extent_changeset *changeset)
482
{
483
set_state_bits(tree, state, bits, changeset);
484
rb_link_node(&state->rb_node, parent, node);
485
rb_insert_color(&state->rb_node, &tree->state);
486
merge_state(tree, state);
487
}
488
489
/*
490
* Split a given extent state struct in two, inserting the preallocated
491
* struct 'prealloc' as the newly created second half. 'split' indicates an
492
* offset inside 'orig' where it should be split.
493
*
494
* Before calling,
495
* the tree has 'orig' at [orig->start, orig->end]. After calling, there
496
* are two extent state structs in the tree:
497
* prealloc: [orig->start, split - 1]
498
* orig: [ split, orig->end ]
499
*
500
* The tree locks are not taken by this function. They need to be held
501
* by the caller.
502
*/
503
static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
504
struct extent_state *prealloc, u64 split)
505
{
506
struct rb_node *parent = NULL;
507
struct rb_node **node;
508
509
if (tree->owner == IO_TREE_INODE_IO)
510
btrfs_split_delalloc_extent(tree->inode, orig, split);
511
512
prealloc->start = orig->start;
513
prealloc->end = split - 1;
514
prealloc->state = orig->state;
515
orig->start = split;
516
517
parent = &orig->rb_node;
518
node = &parent;
519
while (*node) {
520
struct extent_state *entry;
521
522
parent = *node;
523
entry = rb_entry(parent, struct extent_state, rb_node);
524
525
if (prealloc->end < entry->start) {
526
node = &(*node)->rb_left;
527
} else if (prealloc->end > entry->end) {
528
node = &(*node)->rb_right;
529
} else {
530
btrfs_free_extent_state(prealloc);
531
return -EEXIST;
532
}
533
}
534
535
rb_link_node(&prealloc->rb_node, parent, node);
536
rb_insert_color(&prealloc->rb_node, &tree->state);
537
538
return 0;
539
}
540
541
/*
542
* Use this during tree iteration to avoid doing next node searches when it's
543
* not needed (the current record ends at or after the target range's end).
544
*/
545
static inline struct extent_state *next_search_state(struct extent_state *state, u64 end)
546
{
547
if (state->end < end)
548
return next_state(state);
549
550
return NULL;
551
}
552
553
/*
554
* Utility function to clear some bits in an extent state struct. It will
555
* optionally wake up anyone waiting on this state (wake == 1).
556
*
557
* If no bits are set on the state struct after clearing things, the
558
* struct is freed and removed from the tree
559
*/
560
static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
561
struct extent_state *state,
562
u32 bits, int wake, u64 end,
563
struct extent_changeset *changeset)
564
{
565
struct extent_state *next;
566
u32 bits_to_clear = bits & ~EXTENT_CTLBITS;
567
int ret;
568
569
if (tree->owner == IO_TREE_INODE_IO)
570
btrfs_clear_delalloc_extent(tree->inode, state, bits);
571
572
ret = add_extent_changeset(state, bits_to_clear, changeset, 0);
573
BUG_ON(ret < 0);
574
state->state &= ~bits_to_clear;
575
if (wake)
576
wake_up(&state->wq);
577
if (state->state == 0) {
578
next = next_search_state(state, end);
579
if (extent_state_in_tree(state)) {
580
rb_erase(&state->rb_node, &tree->state);
581
RB_CLEAR_NODE(&state->rb_node);
582
btrfs_free_extent_state(state);
583
} else {
584
WARN_ON(1);
585
}
586
} else {
587
merge_state(tree, state);
588
next = next_search_state(state, end);
589
}
590
return next;
591
}
592
593
/*
594
* Detect if extent bits request NOWAIT semantics and set the gfp mask accordingly,
595
* unset the EXTENT_NOWAIT bit.
596
*/
597
static void set_gfp_mask_from_bits(u32 *bits, gfp_t *mask)
598
{
599
*mask = (*bits & EXTENT_NOWAIT ? GFP_NOWAIT : GFP_NOFS);
600
*bits &= EXTENT_NOWAIT - 1;
601
}
602
603
/*
604
* Clear some bits on a range in the tree. This may require splitting or
605
* inserting elements in the tree, so the gfp mask is used to indicate which
606
* allocations or sleeping are allowed.
607
*
608
* The range [start, end] is inclusive.
609
*
610
* This takes the tree lock, and returns 0 on success and < 0 on error.
611
*/
612
int btrfs_clear_extent_bit_changeset(struct extent_io_tree *tree, u64 start, u64 end,
613
u32 bits, struct extent_state **cached_state,
614
struct extent_changeset *changeset)
615
{
616
struct extent_state *state;
617
struct extent_state *cached;
618
struct extent_state *prealloc = NULL;
619
u64 last_end;
620
int ret = 0;
621
bool clear;
622
bool wake;
623
const bool delete = (bits & EXTENT_CLEAR_ALL_BITS);
624
gfp_t mask;
625
626
set_gfp_mask_from_bits(&bits, &mask);
627
btrfs_debug_check_extent_io_range(tree, start, end);
628
trace_btrfs_clear_extent_bit(tree, start, end - start + 1, bits);
629
630
if (delete)
631
bits |= ~EXTENT_CTLBITS;
632
633
if (bits & EXTENT_DELALLOC)
634
bits |= EXTENT_NORESERVE;
635
636
wake = (bits & EXTENT_LOCK_BITS);
637
clear = (bits & (EXTENT_LOCK_BITS | EXTENT_BOUNDARY));
638
again:
639
if (!prealloc) {
640
/*
641
* Don't care for allocation failure here because we might end
642
* up not needing the pre-allocated extent state at all, which
643
* is the case if we only have in the tree extent states that
644
* cover our input range and don't cover too any other range.
645
* If we end up needing a new extent state we allocate it later.
646
*/
647
prealloc = alloc_extent_state(mask);
648
}
649
650
spin_lock(&tree->lock);
651
if (cached_state) {
652
cached = *cached_state;
653
654
if (clear) {
655
*cached_state = NULL;
656
cached_state = NULL;
657
}
658
659
if (cached && extent_state_in_tree(cached) &&
660
cached->start <= start && cached->end > start) {
661
if (clear)
662
refcount_dec(&cached->refs);
663
state = cached;
664
goto hit_next;
665
}
666
if (clear)
667
btrfs_free_extent_state(cached);
668
}
669
670
/* This search will find the extents that end after our range starts. */
671
state = tree_search(tree, start);
672
if (!state)
673
goto out;
674
hit_next:
675
if (state->start > end)
676
goto out;
677
WARN_ON(state->end < start);
678
last_end = state->end;
679
680
/* The state doesn't have the wanted bits, go ahead. */
681
if (!(state->state & bits)) {
682
state = next_search_state(state, end);
683
goto next;
684
}
685
686
/*
687
* | ---- desired range ---- |
688
* | state | or
689
* | ------------- state -------------- |
690
*
691
* We need to split the extent we found, and may flip bits on second
692
* half.
693
*
694
* If the extent we found extends past our range, we just split and
695
* search again. It'll get split again the next time though.
696
*
697
* If the extent we found is inside our range, we clear the desired bit
698
* on it.
699
*/
700
701
if (state->start < start) {
702
prealloc = alloc_extent_state_atomic(prealloc);
703
if (!prealloc)
704
goto search_again;
705
ret = split_state(tree, state, prealloc, start);
706
prealloc = NULL;
707
if (ret) {
708
extent_io_tree_panic(tree, state, "split", ret);
709
goto out;
710
}
711
if (state->end <= end) {
712
state = clear_state_bit(tree, state, bits, wake, end,
713
changeset);
714
goto next;
715
}
716
if (need_resched())
717
goto search_again;
718
/*
719
* Fallthrough and try atomic extent state allocation if needed.
720
* If it fails we'll jump to 'search_again' retry the allocation
721
* in non-atomic mode and start the search again.
722
*/
723
}
724
/*
725
* | ---- desired range ---- |
726
* | state |
727
* We need to split the extent, and clear the bit on the first half.
728
*/
729
if (state->start <= end && state->end > end) {
730
prealloc = alloc_extent_state_atomic(prealloc);
731
if (!prealloc)
732
goto search_again;
733
ret = split_state(tree, state, prealloc, end + 1);
734
if (ret) {
735
extent_io_tree_panic(tree, state, "split", ret);
736
prealloc = NULL;
737
goto out;
738
}
739
740
if (wake)
741
wake_up(&state->wq);
742
743
clear_state_bit(tree, prealloc, bits, wake, end, changeset);
744
745
prealloc = NULL;
746
goto out;
747
}
748
749
state = clear_state_bit(tree, state, bits, wake, end, changeset);
750
next:
751
if (last_end >= end)
752
goto out;
753
start = last_end + 1;
754
if (state && !need_resched())
755
goto hit_next;
756
757
search_again:
758
spin_unlock(&tree->lock);
759
if (gfpflags_allow_blocking(mask))
760
cond_resched();
761
goto again;
762
763
out:
764
spin_unlock(&tree->lock);
765
btrfs_free_extent_state(prealloc);
766
767
return ret;
768
769
}
770
771
/*
772
* Wait for one or more bits to clear on a range in the state tree.
773
* The range [start, end] is inclusive.
774
* The tree lock is taken by this function
775
*/
776
static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
777
u32 bits, struct extent_state **cached_state)
778
{
779
struct extent_state *state;
780
781
btrfs_debug_check_extent_io_range(tree, start, end);
782
783
spin_lock(&tree->lock);
784
again:
785
/*
786
* Maintain cached_state, as we may not remove it from the tree if there
787
* are more bits than the bits we're waiting on set on this state.
788
*/
789
if (cached_state && *cached_state) {
790
state = *cached_state;
791
if (extent_state_in_tree(state) &&
792
state->start <= start && start < state->end)
793
goto process_node;
794
}
795
while (1) {
796
/*
797
* This search will find all the extents that end after our
798
* range starts.
799
*/
800
state = tree_search(tree, start);
801
process_node:
802
if (!state)
803
break;
804
if (state->start > end)
805
goto out;
806
807
if (state->state & bits) {
808
DEFINE_WAIT(wait);
809
810
start = state->start;
811
refcount_inc(&state->refs);
812
prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
813
spin_unlock(&tree->lock);
814
schedule();
815
spin_lock(&tree->lock);
816
finish_wait(&state->wq, &wait);
817
btrfs_free_extent_state(state);
818
goto again;
819
}
820
start = state->end + 1;
821
822
if (start > end)
823
break;
824
825
if (!cond_resched_lock(&tree->lock)) {
826
state = next_state(state);
827
goto process_node;
828
}
829
}
830
out:
831
/* This state is no longer useful, clear it and free it up. */
832
if (cached_state && *cached_state) {
833
state = *cached_state;
834
*cached_state = NULL;
835
btrfs_free_extent_state(state);
836
}
837
spin_unlock(&tree->lock);
838
}
839
840
static void cache_state_if_flags(struct extent_state *state,
841
struct extent_state **cached_ptr,
842
unsigned flags)
843
{
844
if (cached_ptr && !(*cached_ptr)) {
845
if (!flags || (state->state & flags)) {
846
*cached_ptr = state;
847
refcount_inc(&state->refs);
848
}
849
}
850
}
851
852
static void cache_state(struct extent_state *state,
853
struct extent_state **cached_ptr)
854
{
855
return cache_state_if_flags(state, cached_ptr, EXTENT_LOCK_BITS | EXTENT_BOUNDARY);
856
}
857
858
/*
859
* Find the first state struct with 'bits' set after 'start', and return it.
860
* tree->lock must be held. NULL will returned if nothing was found after
861
* 'start'.
862
*/
863
static struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
864
u64 start, u32 bits)
865
{
866
struct extent_state *state;
867
868
/*
869
* This search will find all the extents that end after our range
870
* starts.
871
*/
872
state = tree_search(tree, start);
873
while (state) {
874
if (state->state & bits)
875
return state;
876
state = next_state(state);
877
}
878
return NULL;
879
}
880
881
/*
882
* Find the first offset in the io tree with one or more @bits set.
883
*
884
* Note: If there are multiple bits set in @bits, any of them will match.
885
*
886
* Return true if we find something, and update @start_ret and @end_ret.
887
* Return false if we found nothing.
888
*/
889
bool btrfs_find_first_extent_bit(struct extent_io_tree *tree, u64 start,
890
u64 *start_ret, u64 *end_ret, u32 bits,
891
struct extent_state **cached_state)
892
{
893
struct extent_state *state;
894
bool ret = false;
895
896
spin_lock(&tree->lock);
897
if (cached_state && *cached_state) {
898
state = *cached_state;
899
if (state->end == start - 1 && extent_state_in_tree(state)) {
900
while ((state = next_state(state)) != NULL) {
901
if (state->state & bits)
902
break;
903
}
904
/*
905
* If we found the next extent state, clear cached_state
906
* so that we can cache the next extent state below and
907
* avoid future calls going over the same extent state
908
* again. If we haven't found any, clear as well since
909
* it's now useless.
910
*/
911
btrfs_free_extent_state(*cached_state);
912
*cached_state = NULL;
913
if (state)
914
goto got_it;
915
goto out;
916
}
917
btrfs_free_extent_state(*cached_state);
918
*cached_state = NULL;
919
}
920
921
state = find_first_extent_bit_state(tree, start, bits);
922
got_it:
923
if (state) {
924
cache_state_if_flags(state, cached_state, 0);
925
*start_ret = state->start;
926
*end_ret = state->end;
927
ret = true;
928
}
929
out:
930
spin_unlock(&tree->lock);
931
return ret;
932
}
933
934
/*
935
* Find a contiguous area of bits
936
*
937
* @tree: io tree to check
938
* @start: offset to start the search from
939
* @start_ret: the first offset we found with the bits set
940
* @end_ret: the final contiguous range of the bits that were set
941
* @bits: bits to look for
942
*
943
* set_extent_bit and clear_extent_bit can temporarily split contiguous ranges
944
* to set bits appropriately, and then merge them again. During this time it
945
* will drop the tree->lock, so use this helper if you want to find the actual
946
* contiguous area for given bits. We will search to the first bit we find, and
947
* then walk down the tree until we find a non-contiguous area. The area
948
* returned will be the full contiguous area with the bits set.
949
*
950
* Returns true if we found a range with the given bits set, in which case
951
* @start_ret and @end_ret are updated, or false if no range was found.
952
*/
953
bool btrfs_find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start,
954
u64 *start_ret, u64 *end_ret, u32 bits)
955
{
956
struct extent_state *state;
957
bool ret = false;
958
959
ASSERT(!btrfs_fs_incompat(btrfs_extent_io_tree_to_fs_info(tree), NO_HOLES));
960
961
spin_lock(&tree->lock);
962
state = find_first_extent_bit_state(tree, start, bits);
963
if (state) {
964
*start_ret = state->start;
965
*end_ret = state->end;
966
while ((state = next_state(state)) != NULL) {
967
if (state->start > (*end_ret + 1))
968
break;
969
*end_ret = state->end;
970
}
971
ret = true;
972
}
973
spin_unlock(&tree->lock);
974
return ret;
975
}
976
977
/*
978
* Find a contiguous range of bytes in the file marked as delalloc, not more
979
* than 'max_bytes'. start and end are used to return the range,
980
*
981
* True is returned if we find something, false if nothing was in the tree.
982
*/
983
bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start,
984
u64 *end, u64 max_bytes,
985
struct extent_state **cached_state)
986
{
987
struct extent_state *state;
988
u64 cur_start = *start;
989
bool found = false;
990
u64 total_bytes = 0;
991
992
spin_lock(&tree->lock);
993
994
/*
995
* This search will find all the extents that end after our range
996
* starts.
997
*/
998
state = tree_search(tree, cur_start);
999
if (!state) {
1000
*end = (u64)-1;
1001
goto out;
1002
}
1003
1004
while (state) {
1005
if (found && (state->start != cur_start ||
1006
(state->state & EXTENT_BOUNDARY))) {
1007
goto out;
1008
}
1009
if (!(state->state & EXTENT_DELALLOC)) {
1010
if (!found)
1011
*end = state->end;
1012
goto out;
1013
}
1014
if (!found) {
1015
*start = state->start;
1016
*cached_state = state;
1017
refcount_inc(&state->refs);
1018
}
1019
found = true;
1020
*end = state->end;
1021
cur_start = state->end + 1;
1022
total_bytes += state->end - state->start + 1;
1023
if (total_bytes >= max_bytes)
1024
break;
1025
state = next_state(state);
1026
}
1027
out:
1028
spin_unlock(&tree->lock);
1029
return found;
1030
}
1031
1032
/*
1033
* Set some bits on a range in the tree. This may require allocations or
1034
* sleeping. By default all allocations use GFP_NOFS, use EXTENT_NOWAIT for
1035
* GFP_NOWAIT.
1036
*
1037
* If any of the exclusive bits are set, this will fail with -EEXIST if some
1038
* part of the range already has the desired bits set. The extent_state of the
1039
* existing range is returned in failed_state in this case, and the start of the
1040
* existing range is returned in failed_start. failed_state is used as an
1041
* optimization for wait_extent_bit, failed_start must be used as the source of
1042
* truth as failed_state may have changed since we returned.
1043
*
1044
* [start, end] is inclusive This takes the tree lock.
1045
*/
1046
static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1047
u32 bits, u64 *failed_start,
1048
struct extent_state **failed_state,
1049
struct extent_state **cached_state,
1050
struct extent_changeset *changeset)
1051
{
1052
struct extent_state *state;
1053
struct extent_state *prealloc = NULL;
1054
struct rb_node **p = NULL;
1055
struct rb_node *parent = NULL;
1056
int ret = 0;
1057
u64 last_start;
1058
u64 last_end;
1059
u32 exclusive_bits = (bits & EXTENT_LOCK_BITS);
1060
gfp_t mask;
1061
1062
set_gfp_mask_from_bits(&bits, &mask);
1063
btrfs_debug_check_extent_io_range(tree, start, end);
1064
trace_btrfs_set_extent_bit(tree, start, end - start + 1, bits);
1065
1066
if (exclusive_bits)
1067
ASSERT(failed_start);
1068
else
1069
ASSERT(failed_start == NULL && failed_state == NULL);
1070
again:
1071
if (!prealloc) {
1072
/*
1073
* Don't care for allocation failure here because we might end
1074
* up not needing the pre-allocated extent state at all, which
1075
* is the case if we only have in the tree extent states that
1076
* cover our input range and don't cover too any other range.
1077
* If we end up needing a new extent state we allocate it later.
1078
*/
1079
prealloc = alloc_extent_state(mask);
1080
}
1081
/* Optimistically preallocate the extent changeset ulist node. */
1082
if (changeset)
1083
extent_changeset_prealloc(changeset, mask);
1084
1085
spin_lock(&tree->lock);
1086
if (cached_state && *cached_state) {
1087
state = *cached_state;
1088
if (state->start <= start && state->end > start &&
1089
extent_state_in_tree(state))
1090
goto hit_next;
1091
}
1092
/*
1093
* This search will find all the extents that end after our range
1094
* starts.
1095
*/
1096
state = tree_search_for_insert(tree, start, &p, &parent);
1097
if (!state) {
1098
prealloc = alloc_extent_state_atomic(prealloc);
1099
if (!prealloc)
1100
goto search_again;
1101
prealloc->start = start;
1102
prealloc->end = end;
1103
insert_state_fast(tree, prealloc, p, parent, bits, changeset);
1104
cache_state(prealloc, cached_state);
1105
prealloc = NULL;
1106
goto out;
1107
}
1108
hit_next:
1109
last_start = state->start;
1110
last_end = state->end;
1111
1112
/*
1113
* | ---- desired range ---- |
1114
* | state |
1115
*
1116
* Just lock what we found and keep going
1117
*/
1118
if (state->start == start && state->end <= end) {
1119
if (state->state & exclusive_bits) {
1120
*failed_start = state->start;
1121
cache_state(state, failed_state);
1122
ret = -EEXIST;
1123
goto out;
1124
}
1125
1126
set_state_bits(tree, state, bits, changeset);
1127
cache_state(state, cached_state);
1128
merge_state(tree, state);
1129
if (last_end >= end)
1130
goto out;
1131
start = last_end + 1;
1132
state = next_state(state);
1133
if (state && state->start == start && !need_resched())
1134
goto hit_next;
1135
goto search_again;
1136
}
1137
1138
/*
1139
* | ---- desired range ---- |
1140
* | state |
1141
* or
1142
* | ------------- state -------------- |
1143
*
1144
* We need to split the extent we found, and may flip bits on second
1145
* half.
1146
*
1147
* If the extent we found extends past our range, we just split and
1148
* search again. It'll get split again the next time though.
1149
*
1150
* If the extent we found is inside our range, we set the desired bit
1151
* on it.
1152
*/
1153
if (state->start < start) {
1154
if (state->state & exclusive_bits) {
1155
*failed_start = start;
1156
cache_state(state, failed_state);
1157
ret = -EEXIST;
1158
goto out;
1159
}
1160
1161
/*
1162
* If this extent already has all the bits we want set, then
1163
* skip it, not necessary to split it or do anything with it.
1164
*/
1165
if ((state->state & bits) == bits) {
1166
start = state->end + 1;
1167
cache_state(state, cached_state);
1168
goto search_again;
1169
}
1170
1171
prealloc = alloc_extent_state_atomic(prealloc);
1172
if (!prealloc)
1173
goto search_again;
1174
ret = split_state(tree, state, prealloc, start);
1175
if (ret)
1176
extent_io_tree_panic(tree, state, "split", ret);
1177
1178
prealloc = NULL;
1179
if (ret)
1180
goto out;
1181
if (state->end <= end) {
1182
set_state_bits(tree, state, bits, changeset);
1183
cache_state(state, cached_state);
1184
merge_state(tree, state);
1185
if (last_end >= end)
1186
goto out;
1187
start = last_end + 1;
1188
state = next_state(state);
1189
if (state && state->start == start && !need_resched())
1190
goto hit_next;
1191
}
1192
goto search_again;
1193
}
1194
/*
1195
* | ---- desired range ---- |
1196
* | state | or | state |
1197
*
1198
* There's a hole, we need to insert something in it and ignore the
1199
* extent we found.
1200
*/
1201
if (state->start > start) {
1202
struct extent_state *inserted_state;
1203
1204
prealloc = alloc_extent_state_atomic(prealloc);
1205
if (!prealloc)
1206
goto search_again;
1207
1208
/*
1209
* Avoid to free 'prealloc' if it can be merged with the later
1210
* extent.
1211
*/
1212
prealloc->start = start;
1213
if (end < last_start)
1214
prealloc->end = end;
1215
else
1216
prealloc->end = last_start - 1;
1217
1218
inserted_state = insert_state(tree, prealloc, bits, changeset);
1219
if (IS_ERR(inserted_state)) {
1220
ret = PTR_ERR(inserted_state);
1221
extent_io_tree_panic(tree, prealloc, "insert", ret);
1222
goto out;
1223
}
1224
1225
cache_state(inserted_state, cached_state);
1226
if (inserted_state == prealloc)
1227
prealloc = NULL;
1228
start = inserted_state->end + 1;
1229
1230
/* Beyond target range, stop. */
1231
if (start > end)
1232
goto out;
1233
1234
if (need_resched())
1235
goto search_again;
1236
1237
state = next_search_state(inserted_state, end);
1238
/*
1239
* If there's a next state, whether contiguous or not, we don't
1240
* need to unlock and start search agian. If it's not contiguous
1241
* we will end up here and try to allocate a prealloc state and insert.
1242
*/
1243
if (state)
1244
goto hit_next;
1245
goto search_again;
1246
}
1247
/*
1248
* | ---- desired range ---- |
1249
* | state |
1250
*
1251
* We need to split the extent, and set the bit on the first half
1252
*/
1253
if (state->start <= end && state->end > end) {
1254
if (state->state & exclusive_bits) {
1255
*failed_start = start;
1256
cache_state(state, failed_state);
1257
ret = -EEXIST;
1258
goto out;
1259
}
1260
1261
prealloc = alloc_extent_state_atomic(prealloc);
1262
if (!prealloc)
1263
goto search_again;
1264
ret = split_state(tree, state, prealloc, end + 1);
1265
if (ret) {
1266
extent_io_tree_panic(tree, state, "split", ret);
1267
prealloc = NULL;
1268
goto out;
1269
}
1270
1271
set_state_bits(tree, prealloc, bits, changeset);
1272
cache_state(prealloc, cached_state);
1273
merge_state(tree, prealloc);
1274
prealloc = NULL;
1275
goto out;
1276
}
1277
1278
search_again:
1279
if (start > end)
1280
goto out;
1281
spin_unlock(&tree->lock);
1282
if (gfpflags_allow_blocking(mask))
1283
cond_resched();
1284
goto again;
1285
1286
out:
1287
spin_unlock(&tree->lock);
1288
btrfs_free_extent_state(prealloc);
1289
1290
return ret;
1291
1292
}
1293
1294
int btrfs_set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1295
u32 bits, struct extent_state **cached_state)
1296
{
1297
return set_extent_bit(tree, start, end, bits, NULL, NULL, cached_state, NULL);
1298
}
1299
1300
/*
1301
* Convert all bits in a given range from one bit to another
1302
*
1303
* @tree: the io tree to search
1304
* @start: the start offset in bytes
1305
* @end: the end offset in bytes (inclusive)
1306
* @bits: the bits to set in this range
1307
* @clear_bits: the bits to clear in this range
1308
* @cached_state: state that we're going to cache
1309
*
1310
* This will go through and set bits for the given range. If any states exist
1311
* already in this range they are set with the given bit and cleared of the
1312
* clear_bits. This is only meant to be used by things that are mergeable, ie.
1313
* converting from say DELALLOC to DIRTY. This is not meant to be used with
1314
* boundary bits like LOCK.
1315
*
1316
* All allocations are done with GFP_NOFS.
1317
*/
1318
int btrfs_convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1319
u32 bits, u32 clear_bits,
1320
struct extent_state **cached_state)
1321
{
1322
struct extent_state *state;
1323
struct extent_state *prealloc = NULL;
1324
struct rb_node **p = NULL;
1325
struct rb_node *parent = NULL;
1326
int ret = 0;
1327
u64 last_start;
1328
u64 last_end;
1329
bool first_iteration = true;
1330
1331
btrfs_debug_check_extent_io_range(tree, start, end);
1332
trace_btrfs_convert_extent_bit(tree, start, end - start + 1, bits,
1333
clear_bits);
1334
1335
again:
1336
if (!prealloc) {
1337
/*
1338
* Best effort, don't worry if extent state allocation fails
1339
* here for the first iteration. We might have a cached state
1340
* that matches exactly the target range, in which case no
1341
* extent state allocations are needed. We'll only know this
1342
* after locking the tree.
1343
*/
1344
prealloc = alloc_extent_state(GFP_NOFS);
1345
if (!prealloc && !first_iteration)
1346
return -ENOMEM;
1347
}
1348
1349
spin_lock(&tree->lock);
1350
if (cached_state && *cached_state) {
1351
state = *cached_state;
1352
if (state->start <= start && state->end > start &&
1353
extent_state_in_tree(state))
1354
goto hit_next;
1355
}
1356
1357
/*
1358
* This search will find all the extents that end after our range
1359
* starts.
1360
*/
1361
state = tree_search_for_insert(tree, start, &p, &parent);
1362
if (!state) {
1363
prealloc = alloc_extent_state_atomic(prealloc);
1364
if (!prealloc) {
1365
ret = -ENOMEM;
1366
goto out;
1367
}
1368
prealloc->start = start;
1369
prealloc->end = end;
1370
insert_state_fast(tree, prealloc, p, parent, bits, NULL);
1371
cache_state(prealloc, cached_state);
1372
prealloc = NULL;
1373
goto out;
1374
}
1375
hit_next:
1376
last_start = state->start;
1377
last_end = state->end;
1378
1379
/*
1380
* | ---- desired range ---- |
1381
* | state |
1382
*
1383
* Just lock what we found and keep going.
1384
*/
1385
if (state->start == start && state->end <= end) {
1386
set_state_bits(tree, state, bits, NULL);
1387
cache_state(state, cached_state);
1388
state = clear_state_bit(tree, state, clear_bits, 0, end, NULL);
1389
if (last_end >= end)
1390
goto out;
1391
start = last_end + 1;
1392
if (state && state->start == start && !need_resched())
1393
goto hit_next;
1394
goto search_again;
1395
}
1396
1397
/*
1398
* | ---- desired range ---- |
1399
* | state |
1400
* or
1401
* | ------------- state -------------- |
1402
*
1403
* We need to split the extent we found, and may flip bits on second
1404
* half.
1405
*
1406
* If the extent we found extends past our range, we just split and
1407
* search again. It'll get split again the next time though.
1408
*
1409
* If the extent we found is inside our range, we set the desired bit
1410
* on it.
1411
*/
1412
if (state->start < start) {
1413
prealloc = alloc_extent_state_atomic(prealloc);
1414
if (!prealloc) {
1415
ret = -ENOMEM;
1416
goto out;
1417
}
1418
ret = split_state(tree, state, prealloc, start);
1419
prealloc = NULL;
1420
if (ret) {
1421
extent_io_tree_panic(tree, state, "split", ret);
1422
goto out;
1423
}
1424
if (state->end <= end) {
1425
set_state_bits(tree, state, bits, NULL);
1426
cache_state(state, cached_state);
1427
state = clear_state_bit(tree, state, clear_bits, 0, end, NULL);
1428
if (last_end >= end)
1429
goto out;
1430
start = last_end + 1;
1431
if (state && state->start == start && !need_resched())
1432
goto hit_next;
1433
}
1434
goto search_again;
1435
}
1436
/*
1437
* | ---- desired range ---- |
1438
* | state | or | state |
1439
*
1440
* There's a hole, we need to insert something in it and ignore the
1441
* extent we found.
1442
*/
1443
if (state->start > start) {
1444
struct extent_state *inserted_state;
1445
1446
prealloc = alloc_extent_state_atomic(prealloc);
1447
if (!prealloc) {
1448
ret = -ENOMEM;
1449
goto out;
1450
}
1451
1452
/*
1453
* Avoid to free 'prealloc' if it can be merged with the later
1454
* extent.
1455
*/
1456
prealloc->start = start;
1457
if (end < last_start)
1458
prealloc->end = end;
1459
else
1460
prealloc->end = last_start - 1;
1461
1462
inserted_state = insert_state(tree, prealloc, bits, NULL);
1463
if (IS_ERR(inserted_state)) {
1464
ret = PTR_ERR(inserted_state);
1465
extent_io_tree_panic(tree, prealloc, "insert", ret);
1466
goto out;
1467
}
1468
cache_state(inserted_state, cached_state);
1469
if (inserted_state == prealloc)
1470
prealloc = NULL;
1471
start = inserted_state->end + 1;
1472
1473
/* Beyond target range, stop. */
1474
if (start > end)
1475
goto out;
1476
1477
if (need_resched())
1478
goto search_again;
1479
1480
state = next_search_state(inserted_state, end);
1481
/*
1482
* If there's a next state, whether contiguous or not, we don't
1483
* need to unlock and start search again. If it's not contiguous
1484
* we will end up here and try to allocate a prealloc state and insert.
1485
*/
1486
if (state)
1487
goto hit_next;
1488
goto search_again;
1489
}
1490
/*
1491
* | ---- desired range ---- |
1492
* | state |
1493
*
1494
* We need to split the extent, and set the bit on the first half.
1495
*/
1496
if (state->start <= end && state->end > end) {
1497
prealloc = alloc_extent_state_atomic(prealloc);
1498
if (!prealloc) {
1499
ret = -ENOMEM;
1500
goto out;
1501
}
1502
1503
ret = split_state(tree, state, prealloc, end + 1);
1504
if (ret) {
1505
extent_io_tree_panic(tree, state, "split", ret);
1506
prealloc = NULL;
1507
goto out;
1508
}
1509
1510
set_state_bits(tree, prealloc, bits, NULL);
1511
cache_state(prealloc, cached_state);
1512
clear_state_bit(tree, prealloc, clear_bits, 0, end, NULL);
1513
prealloc = NULL;
1514
goto out;
1515
}
1516
1517
search_again:
1518
if (start > end)
1519
goto out;
1520
spin_unlock(&tree->lock);
1521
cond_resched();
1522
first_iteration = false;
1523
goto again;
1524
1525
out:
1526
spin_unlock(&tree->lock);
1527
btrfs_free_extent_state(prealloc);
1528
1529
return ret;
1530
}
1531
1532
/*
1533
* Find the first range that has @bits not set. This range could start before
1534
* @start.
1535
*
1536
* @tree: the tree to search
1537
* @start: offset at/after which the found extent should start
1538
* @start_ret: records the beginning of the range
1539
* @end_ret: records the end of the range (inclusive)
1540
* @bits: the set of bits which must be unset
1541
*
1542
* Since unallocated range is also considered one which doesn't have the bits
1543
* set it's possible that @end_ret contains -1, this happens in case the range
1544
* spans (last_range_end, end of device]. In this case it's up to the caller to
1545
* trim @end_ret to the appropriate size.
1546
*/
1547
void btrfs_find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
1548
u64 *start_ret, u64 *end_ret, u32 bits)
1549
{
1550
struct extent_state *state;
1551
struct extent_state *prev = NULL, *next = NULL;
1552
1553
spin_lock(&tree->lock);
1554
1555
/* Find first extent with bits cleared */
1556
while (1) {
1557
state = tree_search_prev_next(tree, start, &prev, &next);
1558
if (!state && !next && !prev) {
1559
/*
1560
* Tree is completely empty, send full range and let
1561
* caller deal with it
1562
*/
1563
*start_ret = 0;
1564
*end_ret = -1;
1565
goto out;
1566
} else if (!state && !next) {
1567
/*
1568
* We are past the last allocated chunk, set start at
1569
* the end of the last extent.
1570
*/
1571
*start_ret = prev->end + 1;
1572
*end_ret = -1;
1573
goto out;
1574
} else if (!state) {
1575
state = next;
1576
}
1577
1578
/*
1579
* At this point 'state' either contains 'start' or start is
1580
* before 'state'
1581
*/
1582
if (in_range(start, state->start, state->end - state->start + 1)) {
1583
if (state->state & bits) {
1584
/*
1585
* |--range with bits sets--|
1586
* |
1587
* start
1588
*/
1589
start = state->end + 1;
1590
} else {
1591
/*
1592
* 'start' falls within a range that doesn't
1593
* have the bits set, so take its start as the
1594
* beginning of the desired range
1595
*
1596
* |--range with bits cleared----|
1597
* |
1598
* start
1599
*/
1600
*start_ret = state->start;
1601
break;
1602
}
1603
} else {
1604
/*
1605
* |---prev range---|---hole/unset---|---node range---|
1606
* |
1607
* start
1608
*
1609
* or
1610
*
1611
* |---hole/unset--||--first node--|
1612
* 0 |
1613
* start
1614
*/
1615
if (prev)
1616
*start_ret = prev->end + 1;
1617
else
1618
*start_ret = 0;
1619
break;
1620
}
1621
}
1622
1623
/*
1624
* Find the longest stretch from start until an entry which has the
1625
* bits set
1626
*/
1627
while (state) {
1628
if (state->end >= start && !(state->state & bits)) {
1629
*end_ret = state->end;
1630
} else {
1631
*end_ret = state->start - 1;
1632
break;
1633
}
1634
state = next_state(state);
1635
}
1636
out:
1637
spin_unlock(&tree->lock);
1638
}
1639
1640
/*
1641
* Count the number of bytes in the tree that have a given bit(s) set for a
1642
* given range.
1643
*
1644
* @tree: The io tree to search.
1645
* @start: The start offset of the range. This value is updated to the
1646
* offset of the first byte found with the given bit(s), so it
1647
* can end up being bigger than the initial value.
1648
* @search_end: The end offset (inclusive value) of the search range.
1649
* @max_bytes: The maximum byte count we are interested. The search stops
1650
* once it reaches this count.
1651
* @bits: The bits the range must have in order to be accounted for.
1652
* If multiple bits are set, then only subranges that have all
1653
* the bits set are accounted for.
1654
* @contig: Indicate if we should ignore holes in the range or not. If
1655
* this is true, then stop once we find a hole.
1656
* @cached_state: A cached state to be used across multiple calls to this
1657
* function in order to speedup searches. Use NULL if this is
1658
* called only once or if each call does not start where the
1659
* previous one ended.
1660
*
1661
* Returns the total number of bytes found within the given range that have
1662
* all given bits set. If the returned number of bytes is greater than zero
1663
* then @start is updated with the offset of the first byte with the bits set.
1664
*/
1665
u64 btrfs_count_range_bits(struct extent_io_tree *tree,
1666
u64 *start, u64 search_end, u64 max_bytes,
1667
u32 bits, int contig,
1668
struct extent_state **cached_state)
1669
{
1670
struct extent_state *state = NULL;
1671
struct extent_state *cached;
1672
u64 cur_start = *start;
1673
u64 total_bytes = 0;
1674
u64 last = 0;
1675
int found = 0;
1676
1677
if (WARN_ON(search_end < cur_start))
1678
return 0;
1679
1680
spin_lock(&tree->lock);
1681
1682
if (!cached_state || !*cached_state)
1683
goto search;
1684
1685
cached = *cached_state;
1686
1687
if (!extent_state_in_tree(cached))
1688
goto search;
1689
1690
if (cached->start <= cur_start && cur_start <= cached->end) {
1691
state = cached;
1692
} else if (cached->start > cur_start) {
1693
struct extent_state *prev;
1694
1695
/*
1696
* The cached state starts after our search range's start. Check
1697
* if the previous state record starts at or before the range we
1698
* are looking for, and if so, use it - this is a common case
1699
* when there are holes between records in the tree. If there is
1700
* no previous state record, we can start from our cached state.
1701
*/
1702
prev = prev_state(cached);
1703
if (!prev)
1704
state = cached;
1705
else if (prev->start <= cur_start && cur_start <= prev->end)
1706
state = prev;
1707
}
1708
1709
/*
1710
* This search will find all the extents that end after our range
1711
* starts.
1712
*/
1713
search:
1714
if (!state)
1715
state = tree_search(tree, cur_start);
1716
1717
while (state) {
1718
if (state->start > search_end)
1719
break;
1720
if (contig && found && state->start > last + 1)
1721
break;
1722
if (state->end >= cur_start && (state->state & bits) == bits) {
1723
total_bytes += min(search_end, state->end) + 1 -
1724
max(cur_start, state->start);
1725
if (total_bytes >= max_bytes)
1726
break;
1727
if (!found) {
1728
*start = max(cur_start, state->start);
1729
found = 1;
1730
}
1731
last = state->end;
1732
} else if (contig && found) {
1733
break;
1734
}
1735
state = next_state(state);
1736
}
1737
1738
if (cached_state) {
1739
btrfs_free_extent_state(*cached_state);
1740
*cached_state = state;
1741
if (state)
1742
refcount_inc(&state->refs);
1743
}
1744
1745
spin_unlock(&tree->lock);
1746
1747
return total_bytes;
1748
}
1749
1750
/*
1751
* Check if the single @bit exists in the given range.
1752
*/
1753
bool btrfs_test_range_bit_exists(struct extent_io_tree *tree, u64 start, u64 end, u32 bit)
1754
{
1755
struct extent_state *state;
1756
bool bitset = false;
1757
1758
ASSERT(is_power_of_2(bit));
1759
1760
spin_lock(&tree->lock);
1761
state = tree_search(tree, start);
1762
while (state) {
1763
if (state->start > end)
1764
break;
1765
1766
if (state->state & bit) {
1767
bitset = true;
1768
break;
1769
}
1770
1771
if (state->end >= end)
1772
break;
1773
state = next_state(state);
1774
}
1775
spin_unlock(&tree->lock);
1776
return bitset;
1777
}
1778
1779
void btrfs_get_range_bits(struct extent_io_tree *tree, u64 start, u64 end, u32 *bits,
1780
struct extent_state **cached_state)
1781
{
1782
struct extent_state *state;
1783
1784
/*
1785
* The cached state is currently mandatory and not used to start the
1786
* search, only to cache the first state record found in the range.
1787
*/
1788
ASSERT(cached_state != NULL);
1789
ASSERT(*cached_state == NULL);
1790
1791
*bits = 0;
1792
1793
spin_lock(&tree->lock);
1794
state = tree_search(tree, start);
1795
if (state && state->start < end) {
1796
*cached_state = state;
1797
refcount_inc(&state->refs);
1798
}
1799
while (state) {
1800
if (state->start > end)
1801
break;
1802
1803
*bits |= state->state;
1804
1805
if (state->end >= end)
1806
break;
1807
1808
state = next_state(state);
1809
}
1810
spin_unlock(&tree->lock);
1811
}
1812
1813
/*
1814
* Check if the whole range [@start,@end) contains the single @bit set.
1815
*/
1816
bool btrfs_test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bit,
1817
struct extent_state *cached)
1818
{
1819
struct extent_state *state;
1820
bool bitset = true;
1821
1822
ASSERT(is_power_of_2(bit));
1823
ASSERT(start < end);
1824
1825
spin_lock(&tree->lock);
1826
if (cached && extent_state_in_tree(cached) && cached->start <= start &&
1827
cached->end > start)
1828
state = cached;
1829
else
1830
state = tree_search(tree, start);
1831
while (state) {
1832
if (state->start > start) {
1833
bitset = false;
1834
break;
1835
}
1836
1837
if ((state->state & bit) == 0) {
1838
bitset = false;
1839
break;
1840
}
1841
1842
if (state->end >= end)
1843
break;
1844
1845
/* Next state must start where this one ends. */
1846
start = state->end + 1;
1847
state = next_state(state);
1848
}
1849
1850
/* We ran out of states and were still inside of our range. */
1851
if (!state)
1852
bitset = false;
1853
spin_unlock(&tree->lock);
1854
return bitset;
1855
}
1856
1857
/* Wrappers around set/clear extent bit */
1858
int btrfs_set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1859
u32 bits, struct extent_changeset *changeset)
1860
{
1861
/*
1862
* We don't support EXTENT_LOCK_BITS yet, as current changeset will
1863
* record any bits changed, so for EXTENT_LOCK_BITS case, it will either
1864
* fail with -EEXIST or changeset will record the whole range.
1865
*/
1866
ASSERT(!(bits & EXTENT_LOCK_BITS));
1867
1868
return set_extent_bit(tree, start, end, bits, NULL, NULL, NULL, changeset);
1869
}
1870
1871
int btrfs_clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1872
u32 bits, struct extent_changeset *changeset)
1873
{
1874
/*
1875
* Don't support EXTENT_LOCK_BITS case, same reason as
1876
* set_record_extent_bits().
1877
*/
1878
ASSERT(!(bits & EXTENT_LOCK_BITS));
1879
1880
return btrfs_clear_extent_bit_changeset(tree, start, end, bits, NULL, changeset);
1881
}
1882
1883
bool btrfs_try_lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1884
u32 bits, struct extent_state **cached)
1885
{
1886
int ret;
1887
u64 failed_start;
1888
1889
ret = set_extent_bit(tree, start, end, bits, &failed_start, NULL, cached, NULL);
1890
if (ret == -EEXIST) {
1891
if (failed_start > start)
1892
btrfs_clear_extent_bit(tree, start, failed_start - 1,
1893
bits, cached);
1894
return 0;
1895
}
1896
return 1;
1897
}
1898
1899
/*
1900
* Either insert or lock state struct between start and end use mask to tell
1901
* us if waiting is desired.
1902
*/
1903
int btrfs_lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, u32 bits,
1904
struct extent_state **cached_state)
1905
{
1906
struct extent_state *failed_state = NULL;
1907
int ret;
1908
u64 failed_start;
1909
1910
ret = set_extent_bit(tree, start, end, bits, &failed_start,
1911
&failed_state, cached_state, NULL);
1912
while (ret == -EEXIST) {
1913
if (failed_start != start)
1914
btrfs_clear_extent_bit(tree, start, failed_start - 1,
1915
bits, cached_state);
1916
1917
wait_extent_bit(tree, failed_start, end, bits, &failed_state);
1918
ret = set_extent_bit(tree, start, end, bits, &failed_start,
1919
&failed_state, cached_state, NULL);
1920
}
1921
return ret;
1922
}
1923
1924
/*
1925
* Get the extent state that follows the given extent state.
1926
* This is meant to be used in a context where we know no other tasks can
1927
* concurrently modify the tree.
1928
*/
1929
struct extent_state *btrfs_next_extent_state(struct extent_io_tree *tree,
1930
struct extent_state *state)
1931
{
1932
struct extent_state *next;
1933
1934
spin_lock(&tree->lock);
1935
ASSERT(extent_state_in_tree(state));
1936
next = next_state(state);
1937
if (next)
1938
refcount_inc(&next->refs);
1939
spin_unlock(&tree->lock);
1940
1941
return next;
1942
}
1943
1944
void __cold btrfs_extent_state_free_cachep(void)
1945
{
1946
btrfs_extent_state_leak_debug_check();
1947
kmem_cache_destroy(extent_state_cache);
1948
}
1949
1950
int __init btrfs_extent_state_init_cachep(void)
1951
{
1952
extent_state_cache = kmem_cache_create("btrfs_extent_state",
1953
sizeof(struct extent_state), 0, 0,
1954
NULL);
1955
if (!extent_state_cache)
1956
return -ENOMEM;
1957
1958
return 0;
1959
}
1960
1961