Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/fs/btrfs/ctree.c
15109 views
1
/*
2
* Copyright (C) 2007,2008 Oracle. All rights reserved.
3
*
4
* This program is free software; you can redistribute it and/or
5
* modify it under the terms of the GNU General Public
6
* License v2 as published by the Free Software Foundation.
7
*
8
* This program is distributed in the hope that it will be useful,
9
* but WITHOUT ANY WARRANTY; without even the implied warranty of
10
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
* General Public License for more details.
12
*
13
* You should have received a copy of the GNU General Public
14
* License along with this program; if not, write to the
15
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16
* Boston, MA 021110-1307, USA.
17
*/
18
19
#include <linux/sched.h>
20
#include <linux/slab.h>
21
#include "ctree.h"
22
#include "disk-io.h"
23
#include "transaction.h"
24
#include "print-tree.h"
25
#include "locking.h"
26
27
static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
28
*root, struct btrfs_path *path, int level);
29
static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
30
*root, struct btrfs_key *ins_key,
31
struct btrfs_path *path, int data_size, int extend);
32
static int push_node_left(struct btrfs_trans_handle *trans,
33
struct btrfs_root *root, struct extent_buffer *dst,
34
struct extent_buffer *src, int empty);
35
static int balance_node_right(struct btrfs_trans_handle *trans,
36
struct btrfs_root *root,
37
struct extent_buffer *dst_buf,
38
struct extent_buffer *src_buf);
39
static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
40
struct btrfs_path *path, int level, int slot);
41
42
struct btrfs_path *btrfs_alloc_path(void)
43
{
44
struct btrfs_path *path;
45
path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS);
46
return path;
47
}
48
49
/*
50
* set all locked nodes in the path to blocking locks. This should
51
* be done before scheduling
52
*/
53
noinline void btrfs_set_path_blocking(struct btrfs_path *p)
54
{
55
int i;
56
for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
57
if (p->nodes[i] && p->locks[i])
58
btrfs_set_lock_blocking(p->nodes[i]);
59
}
60
}
61
62
/*
63
* reset all the locked nodes in the patch to spinning locks.
64
*
65
* held is used to keep lockdep happy, when lockdep is enabled
66
* we set held to a blocking lock before we go around and
67
* retake all the spinlocks in the path. You can safely use NULL
68
* for held
69
*/
70
noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
71
struct extent_buffer *held)
72
{
73
int i;
74
75
#ifdef CONFIG_DEBUG_LOCK_ALLOC
76
/* lockdep really cares that we take all of these spinlocks
77
* in the right order. If any of the locks in the path are not
78
* currently blocking, it is going to complain. So, make really
79
* really sure by forcing the path to blocking before we clear
80
* the path blocking.
81
*/
82
if (held)
83
btrfs_set_lock_blocking(held);
84
btrfs_set_path_blocking(p);
85
#endif
86
87
for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) {
88
if (p->nodes[i] && p->locks[i])
89
btrfs_clear_lock_blocking(p->nodes[i]);
90
}
91
92
#ifdef CONFIG_DEBUG_LOCK_ALLOC
93
if (held)
94
btrfs_clear_lock_blocking(held);
95
#endif
96
}
97
98
/* this also releases the path */
99
void btrfs_free_path(struct btrfs_path *p)
100
{
101
if (!p)
102
return;
103
btrfs_release_path(p);
104
kmem_cache_free(btrfs_path_cachep, p);
105
}
106
107
/*
108
* path release drops references on the extent buffers in the path
109
* and it drops any locks held by this path
110
*
111
* It is safe to call this on paths that no locks or extent buffers held.
112
*/
113
noinline void btrfs_release_path(struct btrfs_path *p)
114
{
115
int i;
116
117
for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
118
p->slots[i] = 0;
119
if (!p->nodes[i])
120
continue;
121
if (p->locks[i]) {
122
btrfs_tree_unlock(p->nodes[i]);
123
p->locks[i] = 0;
124
}
125
free_extent_buffer(p->nodes[i]);
126
p->nodes[i] = NULL;
127
}
128
}
129
130
/*
131
* safely gets a reference on the root node of a tree. A lock
132
* is not taken, so a concurrent writer may put a different node
133
* at the root of the tree. See btrfs_lock_root_node for the
134
* looping required.
135
*
136
* The extent buffer returned by this has a reference taken, so
137
* it won't disappear. It may stop being the root of the tree
138
* at any time because there are no locks held.
139
*/
140
struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
141
{
142
struct extent_buffer *eb;
143
144
rcu_read_lock();
145
eb = rcu_dereference(root->node);
146
extent_buffer_get(eb);
147
rcu_read_unlock();
148
return eb;
149
}
150
151
/* loop around taking references on and locking the root node of the
152
* tree until you end up with a lock on the root. A locked buffer
153
* is returned, with a reference held.
154
*/
155
struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
156
{
157
struct extent_buffer *eb;
158
159
while (1) {
160
eb = btrfs_root_node(root);
161
btrfs_tree_lock(eb);
162
if (eb == root->node)
163
break;
164
btrfs_tree_unlock(eb);
165
free_extent_buffer(eb);
166
}
167
return eb;
168
}
169
170
/* cowonly root (everything not a reference counted cow subvolume), just get
171
* put onto a simple dirty list. transaction.c walks this to make sure they
172
* get properly updated on disk.
173
*/
174
static void add_root_to_dirty_list(struct btrfs_root *root)
175
{
176
if (root->track_dirty && list_empty(&root->dirty_list)) {
177
list_add(&root->dirty_list,
178
&root->fs_info->dirty_cowonly_roots);
179
}
180
}
181
182
/*
183
* used by snapshot creation to make a copy of a root for a tree with
184
* a given objectid. The buffer with the new root node is returned in
185
* cow_ret, and this func returns zero on success or a negative error code.
186
*/
187
int btrfs_copy_root(struct btrfs_trans_handle *trans,
188
struct btrfs_root *root,
189
struct extent_buffer *buf,
190
struct extent_buffer **cow_ret, u64 new_root_objectid)
191
{
192
struct extent_buffer *cow;
193
int ret = 0;
194
int level;
195
struct btrfs_disk_key disk_key;
196
197
WARN_ON(root->ref_cows && trans->transid !=
198
root->fs_info->running_transaction->transid);
199
WARN_ON(root->ref_cows && trans->transid != root->last_trans);
200
201
level = btrfs_header_level(buf);
202
if (level == 0)
203
btrfs_item_key(buf, &disk_key, 0);
204
else
205
btrfs_node_key(buf, &disk_key, 0);
206
207
cow = btrfs_alloc_free_block(trans, root, buf->len, 0,
208
new_root_objectid, &disk_key, level,
209
buf->start, 0);
210
if (IS_ERR(cow))
211
return PTR_ERR(cow);
212
213
copy_extent_buffer(cow, buf, 0, 0, cow->len);
214
btrfs_set_header_bytenr(cow, cow->start);
215
btrfs_set_header_generation(cow, trans->transid);
216
btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
217
btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
218
BTRFS_HEADER_FLAG_RELOC);
219
if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
220
btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
221
else
222
btrfs_set_header_owner(cow, new_root_objectid);
223
224
write_extent_buffer(cow, root->fs_info->fsid,
225
(unsigned long)btrfs_header_fsid(cow),
226
BTRFS_FSID_SIZE);
227
228
WARN_ON(btrfs_header_generation(buf) > trans->transid);
229
if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
230
ret = btrfs_inc_ref(trans, root, cow, 1);
231
else
232
ret = btrfs_inc_ref(trans, root, cow, 0);
233
234
if (ret)
235
return ret;
236
237
btrfs_mark_buffer_dirty(cow);
238
*cow_ret = cow;
239
return 0;
240
}
241
242
/*
243
* check if the tree block can be shared by multiple trees
244
*/
245
int btrfs_block_can_be_shared(struct btrfs_root *root,
246
struct extent_buffer *buf)
247
{
248
/*
249
* Tree blocks not in refernece counted trees and tree roots
250
* are never shared. If a block was allocated after the last
251
* snapshot and the block was not allocated by tree relocation,
252
* we know the block is not shared.
253
*/
254
if (root->ref_cows &&
255
buf != root->node && buf != root->commit_root &&
256
(btrfs_header_generation(buf) <=
257
btrfs_root_last_snapshot(&root->root_item) ||
258
btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
259
return 1;
260
#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
261
if (root->ref_cows &&
262
btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
263
return 1;
264
#endif
265
return 0;
266
}
267
268
static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
269
struct btrfs_root *root,
270
struct extent_buffer *buf,
271
struct extent_buffer *cow,
272
int *last_ref)
273
{
274
u64 refs;
275
u64 owner;
276
u64 flags;
277
u64 new_flags = 0;
278
int ret;
279
280
/*
281
* Backrefs update rules:
282
*
283
* Always use full backrefs for extent pointers in tree block
284
* allocated by tree relocation.
285
*
286
* If a shared tree block is no longer referenced by its owner
287
* tree (btrfs_header_owner(buf) == root->root_key.objectid),
288
* use full backrefs for extent pointers in tree block.
289
*
290
* If a tree block is been relocating
291
* (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID),
292
* use full backrefs for extent pointers in tree block.
293
* The reason for this is some operations (such as drop tree)
294
* are only allowed for blocks use full backrefs.
295
*/
296
297
if (btrfs_block_can_be_shared(root, buf)) {
298
ret = btrfs_lookup_extent_info(trans, root, buf->start,
299
buf->len, &refs, &flags);
300
BUG_ON(ret);
301
BUG_ON(refs == 0);
302
} else {
303
refs = 1;
304
if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
305
btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
306
flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
307
else
308
flags = 0;
309
}
310
311
owner = btrfs_header_owner(buf);
312
BUG_ON(owner == BTRFS_TREE_RELOC_OBJECTID &&
313
!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
314
315
if (refs > 1) {
316
if ((owner == root->root_key.objectid ||
317
root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
318
!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
319
ret = btrfs_inc_ref(trans, root, buf, 1);
320
BUG_ON(ret);
321
322
if (root->root_key.objectid ==
323
BTRFS_TREE_RELOC_OBJECTID) {
324
ret = btrfs_dec_ref(trans, root, buf, 0);
325
BUG_ON(ret);
326
ret = btrfs_inc_ref(trans, root, cow, 1);
327
BUG_ON(ret);
328
}
329
new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
330
} else {
331
332
if (root->root_key.objectid ==
333
BTRFS_TREE_RELOC_OBJECTID)
334
ret = btrfs_inc_ref(trans, root, cow, 1);
335
else
336
ret = btrfs_inc_ref(trans, root, cow, 0);
337
BUG_ON(ret);
338
}
339
if (new_flags != 0) {
340
ret = btrfs_set_disk_extent_flags(trans, root,
341
buf->start,
342
buf->len,
343
new_flags, 0);
344
BUG_ON(ret);
345
}
346
} else {
347
if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
348
if (root->root_key.objectid ==
349
BTRFS_TREE_RELOC_OBJECTID)
350
ret = btrfs_inc_ref(trans, root, cow, 1);
351
else
352
ret = btrfs_inc_ref(trans, root, cow, 0);
353
BUG_ON(ret);
354
ret = btrfs_dec_ref(trans, root, buf, 1);
355
BUG_ON(ret);
356
}
357
clean_tree_block(trans, root, buf);
358
*last_ref = 1;
359
}
360
return 0;
361
}
362
363
/*
364
* does the dirty work in cow of a single block. The parent block (if
365
* supplied) is updated to point to the new cow copy. The new buffer is marked
366
* dirty and returned locked. If you modify the block it needs to be marked
367
* dirty again.
368
*
369
* search_start -- an allocation hint for the new block
370
*
371
* empty_size -- a hint that you plan on doing more cow. This is the size in
372
* bytes the allocator should try to find free next to the block it returns.
373
* This is just a hint and may be ignored by the allocator.
374
*/
375
static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
376
struct btrfs_root *root,
377
struct extent_buffer *buf,
378
struct extent_buffer *parent, int parent_slot,
379
struct extent_buffer **cow_ret,
380
u64 search_start, u64 empty_size)
381
{
382
struct btrfs_disk_key disk_key;
383
struct extent_buffer *cow;
384
int level;
385
int last_ref = 0;
386
int unlock_orig = 0;
387
u64 parent_start;
388
389
if (*cow_ret == buf)
390
unlock_orig = 1;
391
392
btrfs_assert_tree_locked(buf);
393
394
WARN_ON(root->ref_cows && trans->transid !=
395
root->fs_info->running_transaction->transid);
396
WARN_ON(root->ref_cows && trans->transid != root->last_trans);
397
398
level = btrfs_header_level(buf);
399
400
if (level == 0)
401
btrfs_item_key(buf, &disk_key, 0);
402
else
403
btrfs_node_key(buf, &disk_key, 0);
404
405
if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
406
if (parent)
407
parent_start = parent->start;
408
else
409
parent_start = 0;
410
} else
411
parent_start = 0;
412
413
cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start,
414
root->root_key.objectid, &disk_key,
415
level, search_start, empty_size);
416
if (IS_ERR(cow))
417
return PTR_ERR(cow);
418
419
/* cow is set to blocking by btrfs_init_new_buffer */
420
421
copy_extent_buffer(cow, buf, 0, 0, cow->len);
422
btrfs_set_header_bytenr(cow, cow->start);
423
btrfs_set_header_generation(cow, trans->transid);
424
btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
425
btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
426
BTRFS_HEADER_FLAG_RELOC);
427
if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
428
btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
429
else
430
btrfs_set_header_owner(cow, root->root_key.objectid);
431
432
write_extent_buffer(cow, root->fs_info->fsid,
433
(unsigned long)btrfs_header_fsid(cow),
434
BTRFS_FSID_SIZE);
435
436
update_ref_for_cow(trans, root, buf, cow, &last_ref);
437
438
if (root->ref_cows)
439
btrfs_reloc_cow_block(trans, root, buf, cow);
440
441
if (buf == root->node) {
442
WARN_ON(parent && parent != buf);
443
if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
444
btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
445
parent_start = buf->start;
446
else
447
parent_start = 0;
448
449
extent_buffer_get(cow);
450
rcu_assign_pointer(root->node, cow);
451
452
btrfs_free_tree_block(trans, root, buf, parent_start,
453
last_ref);
454
free_extent_buffer(buf);
455
add_root_to_dirty_list(root);
456
} else {
457
if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
458
parent_start = parent->start;
459
else
460
parent_start = 0;
461
462
WARN_ON(trans->transid != btrfs_header_generation(parent));
463
btrfs_set_node_blockptr(parent, parent_slot,
464
cow->start);
465
btrfs_set_node_ptr_generation(parent, parent_slot,
466
trans->transid);
467
btrfs_mark_buffer_dirty(parent);
468
btrfs_free_tree_block(trans, root, buf, parent_start,
469
last_ref);
470
}
471
if (unlock_orig)
472
btrfs_tree_unlock(buf);
473
free_extent_buffer(buf);
474
btrfs_mark_buffer_dirty(cow);
475
*cow_ret = cow;
476
return 0;
477
}
478
479
static inline int should_cow_block(struct btrfs_trans_handle *trans,
480
struct btrfs_root *root,
481
struct extent_buffer *buf)
482
{
483
if (btrfs_header_generation(buf) == trans->transid &&
484
!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
485
!(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
486
btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
487
return 0;
488
return 1;
489
}
490
491
/*
492
* cows a single block, see __btrfs_cow_block for the real work.
493
* This version of it has extra checks so that a block isn't cow'd more than
494
* once per transaction, as long as it hasn't been written yet
495
*/
496
noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
497
struct btrfs_root *root, struct extent_buffer *buf,
498
struct extent_buffer *parent, int parent_slot,
499
struct extent_buffer **cow_ret)
500
{
501
u64 search_start;
502
int ret;
503
504
if (trans->transaction != root->fs_info->running_transaction) {
505
printk(KERN_CRIT "trans %llu running %llu\n",
506
(unsigned long long)trans->transid,
507
(unsigned long long)
508
root->fs_info->running_transaction->transid);
509
WARN_ON(1);
510
}
511
if (trans->transid != root->fs_info->generation) {
512
printk(KERN_CRIT "trans %llu running %llu\n",
513
(unsigned long long)trans->transid,
514
(unsigned long long)root->fs_info->generation);
515
WARN_ON(1);
516
}
517
518
if (!should_cow_block(trans, root, buf)) {
519
*cow_ret = buf;
520
return 0;
521
}
522
523
search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1);
524
525
if (parent)
526
btrfs_set_lock_blocking(parent);
527
btrfs_set_lock_blocking(buf);
528
529
ret = __btrfs_cow_block(trans, root, buf, parent,
530
parent_slot, cow_ret, search_start, 0);
531
532
trace_btrfs_cow_block(root, buf, *cow_ret);
533
534
return ret;
535
}
536
537
/*
538
* helper function for defrag to decide if two blocks pointed to by a
539
* node are actually close by
540
*/
541
static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
542
{
543
if (blocknr < other && other - (blocknr + blocksize) < 32768)
544
return 1;
545
if (blocknr > other && blocknr - (other + blocksize) < 32768)
546
return 1;
547
return 0;
548
}
549
550
/*
551
* compare two keys in a memcmp fashion
552
*/
553
static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
554
{
555
struct btrfs_key k1;
556
557
btrfs_disk_key_to_cpu(&k1, disk);
558
559
return btrfs_comp_cpu_keys(&k1, k2);
560
}
561
562
/*
563
* same as comp_keys only with two btrfs_key's
564
*/
565
int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2)
566
{
567
if (k1->objectid > k2->objectid)
568
return 1;
569
if (k1->objectid < k2->objectid)
570
return -1;
571
if (k1->type > k2->type)
572
return 1;
573
if (k1->type < k2->type)
574
return -1;
575
if (k1->offset > k2->offset)
576
return 1;
577
if (k1->offset < k2->offset)
578
return -1;
579
return 0;
580
}
581
582
/*
583
* this is used by the defrag code to go through all the
584
* leaves pointed to by a node and reallocate them so that
585
* disk order is close to key order
586
*/
587
int btrfs_realloc_node(struct btrfs_trans_handle *trans,
588
struct btrfs_root *root, struct extent_buffer *parent,
589
int start_slot, int cache_only, u64 *last_ret,
590
struct btrfs_key *progress)
591
{
592
struct extent_buffer *cur;
593
u64 blocknr;
594
u64 gen;
595
u64 search_start = *last_ret;
596
u64 last_block = 0;
597
u64 other;
598
u32 parent_nritems;
599
int end_slot;
600
int i;
601
int err = 0;
602
int parent_level;
603
int uptodate;
604
u32 blocksize;
605
int progress_passed = 0;
606
struct btrfs_disk_key disk_key;
607
608
parent_level = btrfs_header_level(parent);
609
if (cache_only && parent_level != 1)
610
return 0;
611
612
if (trans->transaction != root->fs_info->running_transaction)
613
WARN_ON(1);
614
if (trans->transid != root->fs_info->generation)
615
WARN_ON(1);
616
617
parent_nritems = btrfs_header_nritems(parent);
618
blocksize = btrfs_level_size(root, parent_level - 1);
619
end_slot = parent_nritems;
620
621
if (parent_nritems == 1)
622
return 0;
623
624
btrfs_set_lock_blocking(parent);
625
626
for (i = start_slot; i < end_slot; i++) {
627
int close = 1;
628
629
if (!parent->map_token) {
630
map_extent_buffer(parent,
631
btrfs_node_key_ptr_offset(i),
632
sizeof(struct btrfs_key_ptr),
633
&parent->map_token, &parent->kaddr,
634
&parent->map_start, &parent->map_len,
635
KM_USER1);
636
}
637
btrfs_node_key(parent, &disk_key, i);
638
if (!progress_passed && comp_keys(&disk_key, progress) < 0)
639
continue;
640
641
progress_passed = 1;
642
blocknr = btrfs_node_blockptr(parent, i);
643
gen = btrfs_node_ptr_generation(parent, i);
644
if (last_block == 0)
645
last_block = blocknr;
646
647
if (i > 0) {
648
other = btrfs_node_blockptr(parent, i - 1);
649
close = close_blocks(blocknr, other, blocksize);
650
}
651
if (!close && i < end_slot - 2) {
652
other = btrfs_node_blockptr(parent, i + 1);
653
close = close_blocks(blocknr, other, blocksize);
654
}
655
if (close) {
656
last_block = blocknr;
657
continue;
658
}
659
if (parent->map_token) {
660
unmap_extent_buffer(parent, parent->map_token,
661
KM_USER1);
662
parent->map_token = NULL;
663
}
664
665
cur = btrfs_find_tree_block(root, blocknr, blocksize);
666
if (cur)
667
uptodate = btrfs_buffer_uptodate(cur, gen);
668
else
669
uptodate = 0;
670
if (!cur || !uptodate) {
671
if (cache_only) {
672
free_extent_buffer(cur);
673
continue;
674
}
675
if (!cur) {
676
cur = read_tree_block(root, blocknr,
677
blocksize, gen);
678
if (!cur)
679
return -EIO;
680
} else if (!uptodate) {
681
btrfs_read_buffer(cur, gen);
682
}
683
}
684
if (search_start == 0)
685
search_start = last_block;
686
687
btrfs_tree_lock(cur);
688
btrfs_set_lock_blocking(cur);
689
err = __btrfs_cow_block(trans, root, cur, parent, i,
690
&cur, search_start,
691
min(16 * blocksize,
692
(end_slot - i) * blocksize));
693
if (err) {
694
btrfs_tree_unlock(cur);
695
free_extent_buffer(cur);
696
break;
697
}
698
search_start = cur->start;
699
last_block = cur->start;
700
*last_ret = search_start;
701
btrfs_tree_unlock(cur);
702
free_extent_buffer(cur);
703
}
704
if (parent->map_token) {
705
unmap_extent_buffer(parent, parent->map_token,
706
KM_USER1);
707
parent->map_token = NULL;
708
}
709
return err;
710
}
711
712
/*
713
* The leaf data grows from end-to-front in the node.
714
* this returns the address of the start of the last item,
715
* which is the stop of the leaf data stack
716
*/
717
static inline unsigned int leaf_data_end(struct btrfs_root *root,
718
struct extent_buffer *leaf)
719
{
720
u32 nr = btrfs_header_nritems(leaf);
721
if (nr == 0)
722
return BTRFS_LEAF_DATA_SIZE(root);
723
return btrfs_item_offset_nr(leaf, nr - 1);
724
}
725
726
727
/*
728
* search for key in the extent_buffer. The items start at offset p,
729
* and they are item_size apart. There are 'max' items in p.
730
*
731
* the slot in the array is returned via slot, and it points to
732
* the place where you would insert key if it is not found in
733
* the array.
734
*
735
* slot may point to max if the key is bigger than all of the keys
736
*/
737
static noinline int generic_bin_search(struct extent_buffer *eb,
738
unsigned long p,
739
int item_size, struct btrfs_key *key,
740
int max, int *slot)
741
{
742
int low = 0;
743
int high = max;
744
int mid;
745
int ret;
746
struct btrfs_disk_key *tmp = NULL;
747
struct btrfs_disk_key unaligned;
748
unsigned long offset;
749
char *map_token = NULL;
750
char *kaddr = NULL;
751
unsigned long map_start = 0;
752
unsigned long map_len = 0;
753
int err;
754
755
while (low < high) {
756
mid = (low + high) / 2;
757
offset = p + mid * item_size;
758
759
if (!map_token || offset < map_start ||
760
(offset + sizeof(struct btrfs_disk_key)) >
761
map_start + map_len) {
762
if (map_token) {
763
unmap_extent_buffer(eb, map_token, KM_USER0);
764
map_token = NULL;
765
}
766
767
err = map_private_extent_buffer(eb, offset,
768
sizeof(struct btrfs_disk_key),
769
&map_token, &kaddr,
770
&map_start, &map_len, KM_USER0);
771
772
if (!err) {
773
tmp = (struct btrfs_disk_key *)(kaddr + offset -
774
map_start);
775
} else {
776
read_extent_buffer(eb, &unaligned,
777
offset, sizeof(unaligned));
778
tmp = &unaligned;
779
}
780
781
} else {
782
tmp = (struct btrfs_disk_key *)(kaddr + offset -
783
map_start);
784
}
785
ret = comp_keys(tmp, key);
786
787
if (ret < 0)
788
low = mid + 1;
789
else if (ret > 0)
790
high = mid;
791
else {
792
*slot = mid;
793
if (map_token)
794
unmap_extent_buffer(eb, map_token, KM_USER0);
795
return 0;
796
}
797
}
798
*slot = low;
799
if (map_token)
800
unmap_extent_buffer(eb, map_token, KM_USER0);
801
return 1;
802
}
803
804
/*
805
* simple bin_search frontend that does the right thing for
806
* leaves vs nodes
807
*/
808
static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
809
int level, int *slot)
810
{
811
if (level == 0) {
812
return generic_bin_search(eb,
813
offsetof(struct btrfs_leaf, items),
814
sizeof(struct btrfs_item),
815
key, btrfs_header_nritems(eb),
816
slot);
817
} else {
818
return generic_bin_search(eb,
819
offsetof(struct btrfs_node, ptrs),
820
sizeof(struct btrfs_key_ptr),
821
key, btrfs_header_nritems(eb),
822
slot);
823
}
824
return -1;
825
}
826
827
int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
828
int level, int *slot)
829
{
830
return bin_search(eb, key, level, slot);
831
}
832
833
static void root_add_used(struct btrfs_root *root, u32 size)
834
{
835
spin_lock(&root->accounting_lock);
836
btrfs_set_root_used(&root->root_item,
837
btrfs_root_used(&root->root_item) + size);
838
spin_unlock(&root->accounting_lock);
839
}
840
841
static void root_sub_used(struct btrfs_root *root, u32 size)
842
{
843
spin_lock(&root->accounting_lock);
844
btrfs_set_root_used(&root->root_item,
845
btrfs_root_used(&root->root_item) - size);
846
spin_unlock(&root->accounting_lock);
847
}
848
849
/* given a node and slot number, this reads the blocks it points to. The
850
* extent buffer is returned with a reference taken (but unlocked).
851
* NULL is returned on error.
852
*/
853
static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root,
854
struct extent_buffer *parent, int slot)
855
{
856
int level = btrfs_header_level(parent);
857
if (slot < 0)
858
return NULL;
859
if (slot >= btrfs_header_nritems(parent))
860
return NULL;
861
862
BUG_ON(level == 0);
863
864
return read_tree_block(root, btrfs_node_blockptr(parent, slot),
865
btrfs_level_size(root, level - 1),
866
btrfs_node_ptr_generation(parent, slot));
867
}
868
869
/*
870
* node level balancing, used to make sure nodes are in proper order for
871
* item deletion. We balance from the top down, so we have to make sure
872
* that a deletion won't leave an node completely empty later on.
873
*/
874
static noinline int balance_level(struct btrfs_trans_handle *trans,
875
struct btrfs_root *root,
876
struct btrfs_path *path, int level)
877
{
878
struct extent_buffer *right = NULL;
879
struct extent_buffer *mid;
880
struct extent_buffer *left = NULL;
881
struct extent_buffer *parent = NULL;
882
int ret = 0;
883
int wret;
884
int pslot;
885
int orig_slot = path->slots[level];
886
u64 orig_ptr;
887
888
if (level == 0)
889
return 0;
890
891
mid = path->nodes[level];
892
893
WARN_ON(!path->locks[level]);
894
WARN_ON(btrfs_header_generation(mid) != trans->transid);
895
896
orig_ptr = btrfs_node_blockptr(mid, orig_slot);
897
898
if (level < BTRFS_MAX_LEVEL - 1)
899
parent = path->nodes[level + 1];
900
pslot = path->slots[level + 1];
901
902
/*
903
* deal with the case where there is only one pointer in the root
904
* by promoting the node below to a root
905
*/
906
if (!parent) {
907
struct extent_buffer *child;
908
909
if (btrfs_header_nritems(mid) != 1)
910
return 0;
911
912
/* promote the child to a root */
913
child = read_node_slot(root, mid, 0);
914
BUG_ON(!child);
915
btrfs_tree_lock(child);
916
btrfs_set_lock_blocking(child);
917
ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
918
if (ret) {
919
btrfs_tree_unlock(child);
920
free_extent_buffer(child);
921
goto enospc;
922
}
923
924
rcu_assign_pointer(root->node, child);
925
926
add_root_to_dirty_list(root);
927
btrfs_tree_unlock(child);
928
929
path->locks[level] = 0;
930
path->nodes[level] = NULL;
931
clean_tree_block(trans, root, mid);
932
btrfs_tree_unlock(mid);
933
/* once for the path */
934
free_extent_buffer(mid);
935
936
root_sub_used(root, mid->len);
937
btrfs_free_tree_block(trans, root, mid, 0, 1);
938
/* once for the root ptr */
939
free_extent_buffer(mid);
940
return 0;
941
}
942
if (btrfs_header_nritems(mid) >
943
BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
944
return 0;
945
946
btrfs_header_nritems(mid);
947
948
left = read_node_slot(root, parent, pslot - 1);
949
if (left) {
950
btrfs_tree_lock(left);
951
btrfs_set_lock_blocking(left);
952
wret = btrfs_cow_block(trans, root, left,
953
parent, pslot - 1, &left);
954
if (wret) {
955
ret = wret;
956
goto enospc;
957
}
958
}
959
right = read_node_slot(root, parent, pslot + 1);
960
if (right) {
961
btrfs_tree_lock(right);
962
btrfs_set_lock_blocking(right);
963
wret = btrfs_cow_block(trans, root, right,
964
parent, pslot + 1, &right);
965
if (wret) {
966
ret = wret;
967
goto enospc;
968
}
969
}
970
971
/* first, try to make some room in the middle buffer */
972
if (left) {
973
orig_slot += btrfs_header_nritems(left);
974
wret = push_node_left(trans, root, left, mid, 1);
975
if (wret < 0)
976
ret = wret;
977
btrfs_header_nritems(mid);
978
}
979
980
/*
981
* then try to empty the right most buffer into the middle
982
*/
983
if (right) {
984
wret = push_node_left(trans, root, mid, right, 1);
985
if (wret < 0 && wret != -ENOSPC)
986
ret = wret;
987
if (btrfs_header_nritems(right) == 0) {
988
clean_tree_block(trans, root, right);
989
btrfs_tree_unlock(right);
990
wret = del_ptr(trans, root, path, level + 1, pslot +
991
1);
992
if (wret)
993
ret = wret;
994
root_sub_used(root, right->len);
995
btrfs_free_tree_block(trans, root, right, 0, 1);
996
free_extent_buffer(right);
997
right = NULL;
998
} else {
999
struct btrfs_disk_key right_key;
1000
btrfs_node_key(right, &right_key, 0);
1001
btrfs_set_node_key(parent, &right_key, pslot + 1);
1002
btrfs_mark_buffer_dirty(parent);
1003
}
1004
}
1005
if (btrfs_header_nritems(mid) == 1) {
1006
/*
1007
* we're not allowed to leave a node with one item in the
1008
* tree during a delete. A deletion from lower in the tree
1009
* could try to delete the only pointer in this node.
1010
* So, pull some keys from the left.
1011
* There has to be a left pointer at this point because
1012
* otherwise we would have pulled some pointers from the
1013
* right
1014
*/
1015
BUG_ON(!left);
1016
wret = balance_node_right(trans, root, mid, left);
1017
if (wret < 0) {
1018
ret = wret;
1019
goto enospc;
1020
}
1021
if (wret == 1) {
1022
wret = push_node_left(trans, root, left, mid, 1);
1023
if (wret < 0)
1024
ret = wret;
1025
}
1026
BUG_ON(wret == 1);
1027
}
1028
if (btrfs_header_nritems(mid) == 0) {
1029
clean_tree_block(trans, root, mid);
1030
btrfs_tree_unlock(mid);
1031
wret = del_ptr(trans, root, path, level + 1, pslot);
1032
if (wret)
1033
ret = wret;
1034
root_sub_used(root, mid->len);
1035
btrfs_free_tree_block(trans, root, mid, 0, 1);
1036
free_extent_buffer(mid);
1037
mid = NULL;
1038
} else {
1039
/* update the parent key to reflect our changes */
1040
struct btrfs_disk_key mid_key;
1041
btrfs_node_key(mid, &mid_key, 0);
1042
btrfs_set_node_key(parent, &mid_key, pslot);
1043
btrfs_mark_buffer_dirty(parent);
1044
}
1045
1046
/* update the path */
1047
if (left) {
1048
if (btrfs_header_nritems(left) > orig_slot) {
1049
extent_buffer_get(left);
1050
/* left was locked after cow */
1051
path->nodes[level] = left;
1052
path->slots[level + 1] -= 1;
1053
path->slots[level] = orig_slot;
1054
if (mid) {
1055
btrfs_tree_unlock(mid);
1056
free_extent_buffer(mid);
1057
}
1058
} else {
1059
orig_slot -= btrfs_header_nritems(left);
1060
path->slots[level] = orig_slot;
1061
}
1062
}
1063
/* double check we haven't messed things up */
1064
if (orig_ptr !=
1065
btrfs_node_blockptr(path->nodes[level], path->slots[level]))
1066
BUG();
1067
enospc:
1068
if (right) {
1069
btrfs_tree_unlock(right);
1070
free_extent_buffer(right);
1071
}
1072
if (left) {
1073
if (path->nodes[level] != left)
1074
btrfs_tree_unlock(left);
1075
free_extent_buffer(left);
1076
}
1077
return ret;
1078
}
1079
1080
/* Node balancing for insertion. Here we only split or push nodes around
1081
* when they are completely full. This is also done top down, so we
1082
* have to be pessimistic.
1083
*/
1084
static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
1085
struct btrfs_root *root,
1086
struct btrfs_path *path, int level)
1087
{
1088
struct extent_buffer *right = NULL;
1089
struct extent_buffer *mid;
1090
struct extent_buffer *left = NULL;
1091
struct extent_buffer *parent = NULL;
1092
int ret = 0;
1093
int wret;
1094
int pslot;
1095
int orig_slot = path->slots[level];
1096
1097
if (level == 0)
1098
return 1;
1099
1100
mid = path->nodes[level];
1101
WARN_ON(btrfs_header_generation(mid) != trans->transid);
1102
1103
if (level < BTRFS_MAX_LEVEL - 1)
1104
parent = path->nodes[level + 1];
1105
pslot = path->slots[level + 1];
1106
1107
if (!parent)
1108
return 1;
1109
1110
left = read_node_slot(root, parent, pslot - 1);
1111
1112
/* first, try to make some room in the middle buffer */
1113
if (left) {
1114
u32 left_nr;
1115
1116
btrfs_tree_lock(left);
1117
btrfs_set_lock_blocking(left);
1118
1119
left_nr = btrfs_header_nritems(left);
1120
if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
1121
wret = 1;
1122
} else {
1123
ret = btrfs_cow_block(trans, root, left, parent,
1124
pslot - 1, &left);
1125
if (ret)
1126
wret = 1;
1127
else {
1128
wret = push_node_left(trans, root,
1129
left, mid, 0);
1130
}
1131
}
1132
if (wret < 0)
1133
ret = wret;
1134
if (wret == 0) {
1135
struct btrfs_disk_key disk_key;
1136
orig_slot += left_nr;
1137
btrfs_node_key(mid, &disk_key, 0);
1138
btrfs_set_node_key(parent, &disk_key, pslot);
1139
btrfs_mark_buffer_dirty(parent);
1140
if (btrfs_header_nritems(left) > orig_slot) {
1141
path->nodes[level] = left;
1142
path->slots[level + 1] -= 1;
1143
path->slots[level] = orig_slot;
1144
btrfs_tree_unlock(mid);
1145
free_extent_buffer(mid);
1146
} else {
1147
orig_slot -=
1148
btrfs_header_nritems(left);
1149
path->slots[level] = orig_slot;
1150
btrfs_tree_unlock(left);
1151
free_extent_buffer(left);
1152
}
1153
return 0;
1154
}
1155
btrfs_tree_unlock(left);
1156
free_extent_buffer(left);
1157
}
1158
right = read_node_slot(root, parent, pslot + 1);
1159
1160
/*
1161
* then try to empty the right most buffer into the middle
1162
*/
1163
if (right) {
1164
u32 right_nr;
1165
1166
btrfs_tree_lock(right);
1167
btrfs_set_lock_blocking(right);
1168
1169
right_nr = btrfs_header_nritems(right);
1170
if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
1171
wret = 1;
1172
} else {
1173
ret = btrfs_cow_block(trans, root, right,
1174
parent, pslot + 1,
1175
&right);
1176
if (ret)
1177
wret = 1;
1178
else {
1179
wret = balance_node_right(trans, root,
1180
right, mid);
1181
}
1182
}
1183
if (wret < 0)
1184
ret = wret;
1185
if (wret == 0) {
1186
struct btrfs_disk_key disk_key;
1187
1188
btrfs_node_key(right, &disk_key, 0);
1189
btrfs_set_node_key(parent, &disk_key, pslot + 1);
1190
btrfs_mark_buffer_dirty(parent);
1191
1192
if (btrfs_header_nritems(mid) <= orig_slot) {
1193
path->nodes[level] = right;
1194
path->slots[level + 1] += 1;
1195
path->slots[level] = orig_slot -
1196
btrfs_header_nritems(mid);
1197
btrfs_tree_unlock(mid);
1198
free_extent_buffer(mid);
1199
} else {
1200
btrfs_tree_unlock(right);
1201
free_extent_buffer(right);
1202
}
1203
return 0;
1204
}
1205
btrfs_tree_unlock(right);
1206
free_extent_buffer(right);
1207
}
1208
return 1;
1209
}
1210
1211
/*
1212
* readahead one full node of leaves, finding things that are close
1213
* to the block in 'slot', and triggering ra on them.
1214
*/
1215
static void reada_for_search(struct btrfs_root *root,
1216
struct btrfs_path *path,
1217
int level, int slot, u64 objectid)
1218
{
1219
struct extent_buffer *node;
1220
struct btrfs_disk_key disk_key;
1221
u32 nritems;
1222
u64 search;
1223
u64 target;
1224
u64 nread = 0;
1225
u64 gen;
1226
int direction = path->reada;
1227
struct extent_buffer *eb;
1228
u32 nr;
1229
u32 blocksize;
1230
u32 nscan = 0;
1231
bool map = true;
1232
1233
if (level != 1)
1234
return;
1235
1236
if (!path->nodes[level])
1237
return;
1238
1239
node = path->nodes[level];
1240
1241
search = btrfs_node_blockptr(node, slot);
1242
blocksize = btrfs_level_size(root, level - 1);
1243
eb = btrfs_find_tree_block(root, search, blocksize);
1244
if (eb) {
1245
free_extent_buffer(eb);
1246
return;
1247
}
1248
1249
target = search;
1250
1251
nritems = btrfs_header_nritems(node);
1252
nr = slot;
1253
if (node->map_token || path->skip_locking)
1254
map = false;
1255
1256
while (1) {
1257
if (map && !node->map_token) {
1258
unsigned long offset = btrfs_node_key_ptr_offset(nr);
1259
map_private_extent_buffer(node, offset,
1260
sizeof(struct btrfs_key_ptr),
1261
&node->map_token,
1262
&node->kaddr,
1263
&node->map_start,
1264
&node->map_len, KM_USER1);
1265
}
1266
if (direction < 0) {
1267
if (nr == 0)
1268
break;
1269
nr--;
1270
} else if (direction > 0) {
1271
nr++;
1272
if (nr >= nritems)
1273
break;
1274
}
1275
if (path->reada < 0 && objectid) {
1276
btrfs_node_key(node, &disk_key, nr);
1277
if (btrfs_disk_key_objectid(&disk_key) != objectid)
1278
break;
1279
}
1280
search = btrfs_node_blockptr(node, nr);
1281
if ((search <= target && target - search <= 65536) ||
1282
(search > target && search - target <= 65536)) {
1283
gen = btrfs_node_ptr_generation(node, nr);
1284
if (map && node->map_token) {
1285
unmap_extent_buffer(node, node->map_token,
1286
KM_USER1);
1287
node->map_token = NULL;
1288
}
1289
readahead_tree_block(root, search, blocksize, gen);
1290
nread += blocksize;
1291
}
1292
nscan++;
1293
if ((nread > 65536 || nscan > 32))
1294
break;
1295
}
1296
if (map && node->map_token) {
1297
unmap_extent_buffer(node, node->map_token, KM_USER1);
1298
node->map_token = NULL;
1299
}
1300
}
1301
1302
/*
1303
* returns -EAGAIN if it had to drop the path, or zero if everything was in
1304
* cache
1305
*/
1306
static noinline int reada_for_balance(struct btrfs_root *root,
1307
struct btrfs_path *path, int level)
1308
{
1309
int slot;
1310
int nritems;
1311
struct extent_buffer *parent;
1312
struct extent_buffer *eb;
1313
u64 gen;
1314
u64 block1 = 0;
1315
u64 block2 = 0;
1316
int ret = 0;
1317
int blocksize;
1318
1319
parent = path->nodes[level + 1];
1320
if (!parent)
1321
return 0;
1322
1323
nritems = btrfs_header_nritems(parent);
1324
slot = path->slots[level + 1];
1325
blocksize = btrfs_level_size(root, level);
1326
1327
if (slot > 0) {
1328
block1 = btrfs_node_blockptr(parent, slot - 1);
1329
gen = btrfs_node_ptr_generation(parent, slot - 1);
1330
eb = btrfs_find_tree_block(root, block1, blocksize);
1331
if (eb && btrfs_buffer_uptodate(eb, gen))
1332
block1 = 0;
1333
free_extent_buffer(eb);
1334
}
1335
if (slot + 1 < nritems) {
1336
block2 = btrfs_node_blockptr(parent, slot + 1);
1337
gen = btrfs_node_ptr_generation(parent, slot + 1);
1338
eb = btrfs_find_tree_block(root, block2, blocksize);
1339
if (eb && btrfs_buffer_uptodate(eb, gen))
1340
block2 = 0;
1341
free_extent_buffer(eb);
1342
}
1343
if (block1 || block2) {
1344
ret = -EAGAIN;
1345
1346
/* release the whole path */
1347
btrfs_release_path(path);
1348
1349
/* read the blocks */
1350
if (block1)
1351
readahead_tree_block(root, block1, blocksize, 0);
1352
if (block2)
1353
readahead_tree_block(root, block2, blocksize, 0);
1354
1355
if (block1) {
1356
eb = read_tree_block(root, block1, blocksize, 0);
1357
free_extent_buffer(eb);
1358
}
1359
if (block2) {
1360
eb = read_tree_block(root, block2, blocksize, 0);
1361
free_extent_buffer(eb);
1362
}
1363
}
1364
return ret;
1365
}
1366
1367
1368
/*
1369
* when we walk down the tree, it is usually safe to unlock the higher layers
1370
* in the tree. The exceptions are when our path goes through slot 0, because
1371
* operations on the tree might require changing key pointers higher up in the
1372
* tree.
1373
*
1374
* callers might also have set path->keep_locks, which tells this code to keep
1375
* the lock if the path points to the last slot in the block. This is part of
1376
* walking through the tree, and selecting the next slot in the higher block.
1377
*
1378
* lowest_unlock sets the lowest level in the tree we're allowed to unlock. so
1379
* if lowest_unlock is 1, level 0 won't be unlocked
1380
*/
1381
static noinline void unlock_up(struct btrfs_path *path, int level,
1382
int lowest_unlock)
1383
{
1384
int i;
1385
int skip_level = level;
1386
int no_skips = 0;
1387
struct extent_buffer *t;
1388
1389
for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1390
if (!path->nodes[i])
1391
break;
1392
if (!path->locks[i])
1393
break;
1394
if (!no_skips && path->slots[i] == 0) {
1395
skip_level = i + 1;
1396
continue;
1397
}
1398
if (!no_skips && path->keep_locks) {
1399
u32 nritems;
1400
t = path->nodes[i];
1401
nritems = btrfs_header_nritems(t);
1402
if (nritems < 1 || path->slots[i] >= nritems - 1) {
1403
skip_level = i + 1;
1404
continue;
1405
}
1406
}
1407
if (skip_level < i && i >= lowest_unlock)
1408
no_skips = 1;
1409
1410
t = path->nodes[i];
1411
if (i >= lowest_unlock && i > skip_level && path->locks[i]) {
1412
btrfs_tree_unlock(t);
1413
path->locks[i] = 0;
1414
}
1415
}
1416
}
1417
1418
/*
1419
* This releases any locks held in the path starting at level and
1420
* going all the way up to the root.
1421
*
1422
* btrfs_search_slot will keep the lock held on higher nodes in a few
1423
* corner cases, such as COW of the block at slot zero in the node. This
1424
* ignores those rules, and it should only be called when there are no
1425
* more updates to be done higher up in the tree.
1426
*/
1427
noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
1428
{
1429
int i;
1430
1431
if (path->keep_locks)
1432
return;
1433
1434
for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1435
if (!path->nodes[i])
1436
continue;
1437
if (!path->locks[i])
1438
continue;
1439
btrfs_tree_unlock(path->nodes[i]);
1440
path->locks[i] = 0;
1441
}
1442
}
1443
1444
/*
1445
* helper function for btrfs_search_slot. The goal is to find a block
1446
* in cache without setting the path to blocking. If we find the block
1447
* we return zero and the path is unchanged.
1448
*
1449
* If we can't find the block, we set the path blocking and do some
1450
* reada. -EAGAIN is returned and the search must be repeated.
1451
*/
1452
static int
1453
read_block_for_search(struct btrfs_trans_handle *trans,
1454
struct btrfs_root *root, struct btrfs_path *p,
1455
struct extent_buffer **eb_ret, int level, int slot,
1456
struct btrfs_key *key)
1457
{
1458
u64 blocknr;
1459
u64 gen;
1460
u32 blocksize;
1461
struct extent_buffer *b = *eb_ret;
1462
struct extent_buffer *tmp;
1463
int ret;
1464
1465
blocknr = btrfs_node_blockptr(b, slot);
1466
gen = btrfs_node_ptr_generation(b, slot);
1467
blocksize = btrfs_level_size(root, level - 1);
1468
1469
tmp = btrfs_find_tree_block(root, blocknr, blocksize);
1470
if (tmp) {
1471
if (btrfs_buffer_uptodate(tmp, 0)) {
1472
if (btrfs_buffer_uptodate(tmp, gen)) {
1473
/*
1474
* we found an up to date block without
1475
* sleeping, return
1476
* right away
1477
*/
1478
*eb_ret = tmp;
1479
return 0;
1480
}
1481
/* the pages were up to date, but we failed
1482
* the generation number check. Do a full
1483
* read for the generation number that is correct.
1484
* We must do this without dropping locks so
1485
* we can trust our generation number
1486
*/
1487
free_extent_buffer(tmp);
1488
tmp = read_tree_block(root, blocknr, blocksize, gen);
1489
if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
1490
*eb_ret = tmp;
1491
return 0;
1492
}
1493
free_extent_buffer(tmp);
1494
btrfs_release_path(p);
1495
return -EIO;
1496
}
1497
}
1498
1499
/*
1500
* reduce lock contention at high levels
1501
* of the btree by dropping locks before
1502
* we read. Don't release the lock on the current
1503
* level because we need to walk this node to figure
1504
* out which blocks to read.
1505
*/
1506
btrfs_unlock_up_safe(p, level + 1);
1507
btrfs_set_path_blocking(p);
1508
1509
free_extent_buffer(tmp);
1510
if (p->reada)
1511
reada_for_search(root, p, level, slot, key->objectid);
1512
1513
btrfs_release_path(p);
1514
1515
ret = -EAGAIN;
1516
tmp = read_tree_block(root, blocknr, blocksize, 0);
1517
if (tmp) {
1518
/*
1519
* If the read above didn't mark this buffer up to date,
1520
* it will never end up being up to date. Set ret to EIO now
1521
* and give up so that our caller doesn't loop forever
1522
* on our EAGAINs.
1523
*/
1524
if (!btrfs_buffer_uptodate(tmp, 0))
1525
ret = -EIO;
1526
free_extent_buffer(tmp);
1527
}
1528
return ret;
1529
}
1530
1531
/*
1532
* helper function for btrfs_search_slot. This does all of the checks
1533
* for node-level blocks and does any balancing required based on
1534
* the ins_len.
1535
*
1536
* If no extra work was required, zero is returned. If we had to
1537
* drop the path, -EAGAIN is returned and btrfs_search_slot must
1538
* start over
1539
*/
1540
static int
1541
setup_nodes_for_search(struct btrfs_trans_handle *trans,
1542
struct btrfs_root *root, struct btrfs_path *p,
1543
struct extent_buffer *b, int level, int ins_len)
1544
{
1545
int ret;
1546
if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >=
1547
BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {
1548
int sret;
1549
1550
sret = reada_for_balance(root, p, level);
1551
if (sret)
1552
goto again;
1553
1554
btrfs_set_path_blocking(p);
1555
sret = split_node(trans, root, p, level);
1556
btrfs_clear_path_blocking(p, NULL);
1557
1558
BUG_ON(sret > 0);
1559
if (sret) {
1560
ret = sret;
1561
goto done;
1562
}
1563
b = p->nodes[level];
1564
} else if (ins_len < 0 && btrfs_header_nritems(b) <
1565
BTRFS_NODEPTRS_PER_BLOCK(root) / 2) {
1566
int sret;
1567
1568
sret = reada_for_balance(root, p, level);
1569
if (sret)
1570
goto again;
1571
1572
btrfs_set_path_blocking(p);
1573
sret = balance_level(trans, root, p, level);
1574
btrfs_clear_path_blocking(p, NULL);
1575
1576
if (sret) {
1577
ret = sret;
1578
goto done;
1579
}
1580
b = p->nodes[level];
1581
if (!b) {
1582
btrfs_release_path(p);
1583
goto again;
1584
}
1585
BUG_ON(btrfs_header_nritems(b) == 1);
1586
}
1587
return 0;
1588
1589
again:
1590
ret = -EAGAIN;
1591
done:
1592
return ret;
1593
}
1594
1595
/*
1596
* look for key in the tree. path is filled in with nodes along the way
1597
* if key is found, we return zero and you can find the item in the leaf
1598
* level of the path (level 0)
1599
*
1600
* If the key isn't found, the path points to the slot where it should
1601
* be inserted, and 1 is returned. If there are other errors during the
1602
* search a negative error number is returned.
1603
*
1604
* if ins_len > 0, nodes and leaves will be split as we walk down the
1605
* tree. if ins_len < 0, nodes will be merged as we walk down the tree (if
1606
* possible)
1607
*/
1608
int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1609
*root, struct btrfs_key *key, struct btrfs_path *p, int
1610
ins_len, int cow)
1611
{
1612
struct extent_buffer *b;
1613
int slot;
1614
int ret;
1615
int err;
1616
int level;
1617
int lowest_unlock = 1;
1618
u8 lowest_level = 0;
1619
1620
lowest_level = p->lowest_level;
1621
WARN_ON(lowest_level && ins_len > 0);
1622
WARN_ON(p->nodes[0] != NULL);
1623
1624
if (ins_len < 0)
1625
lowest_unlock = 2;
1626
1627
again:
1628
if (p->search_commit_root) {
1629
b = root->commit_root;
1630
extent_buffer_get(b);
1631
if (!p->skip_locking)
1632
btrfs_tree_lock(b);
1633
} else {
1634
if (p->skip_locking)
1635
b = btrfs_root_node(root);
1636
else
1637
b = btrfs_lock_root_node(root);
1638
}
1639
1640
while (b) {
1641
level = btrfs_header_level(b);
1642
1643
/*
1644
* setup the path here so we can release it under lock
1645
* contention with the cow code
1646
*/
1647
p->nodes[level] = b;
1648
if (!p->skip_locking)
1649
p->locks[level] = 1;
1650
1651
if (cow) {
1652
/*
1653
* if we don't really need to cow this block
1654
* then we don't want to set the path blocking,
1655
* so we test it here
1656
*/
1657
if (!should_cow_block(trans, root, b))
1658
goto cow_done;
1659
1660
btrfs_set_path_blocking(p);
1661
1662
err = btrfs_cow_block(trans, root, b,
1663
p->nodes[level + 1],
1664
p->slots[level + 1], &b);
1665
if (err) {
1666
ret = err;
1667
goto done;
1668
}
1669
}
1670
cow_done:
1671
BUG_ON(!cow && ins_len);
1672
1673
p->nodes[level] = b;
1674
if (!p->skip_locking)
1675
p->locks[level] = 1;
1676
1677
btrfs_clear_path_blocking(p, NULL);
1678
1679
/*
1680
* we have a lock on b and as long as we aren't changing
1681
* the tree, there is no way to for the items in b to change.
1682
* It is safe to drop the lock on our parent before we
1683
* go through the expensive btree search on b.
1684
*
1685
* If cow is true, then we might be changing slot zero,
1686
* which may require changing the parent. So, we can't
1687
* drop the lock until after we know which slot we're
1688
* operating on.
1689
*/
1690
if (!cow)
1691
btrfs_unlock_up_safe(p, level + 1);
1692
1693
ret = bin_search(b, key, level, &slot);
1694
1695
if (level != 0) {
1696
int dec = 0;
1697
if (ret && slot > 0) {
1698
dec = 1;
1699
slot -= 1;
1700
}
1701
p->slots[level] = slot;
1702
err = setup_nodes_for_search(trans, root, p, b, level,
1703
ins_len);
1704
if (err == -EAGAIN)
1705
goto again;
1706
if (err) {
1707
ret = err;
1708
goto done;
1709
}
1710
b = p->nodes[level];
1711
slot = p->slots[level];
1712
1713
unlock_up(p, level, lowest_unlock);
1714
1715
if (level == lowest_level) {
1716
if (dec)
1717
p->slots[level]++;
1718
goto done;
1719
}
1720
1721
err = read_block_for_search(trans, root, p,
1722
&b, level, slot, key);
1723
if (err == -EAGAIN)
1724
goto again;
1725
if (err) {
1726
ret = err;
1727
goto done;
1728
}
1729
1730
if (!p->skip_locking) {
1731
btrfs_clear_path_blocking(p, NULL);
1732
err = btrfs_try_spin_lock(b);
1733
1734
if (!err) {
1735
btrfs_set_path_blocking(p);
1736
btrfs_tree_lock(b);
1737
btrfs_clear_path_blocking(p, b);
1738
}
1739
}
1740
} else {
1741
p->slots[level] = slot;
1742
if (ins_len > 0 &&
1743
btrfs_leaf_free_space(root, b) < ins_len) {
1744
btrfs_set_path_blocking(p);
1745
err = split_leaf(trans, root, key,
1746
p, ins_len, ret == 0);
1747
btrfs_clear_path_blocking(p, NULL);
1748
1749
BUG_ON(err > 0);
1750
if (err) {
1751
ret = err;
1752
goto done;
1753
}
1754
}
1755
if (!p->search_for_split)
1756
unlock_up(p, level, lowest_unlock);
1757
goto done;
1758
}
1759
}
1760
ret = 1;
1761
done:
1762
/*
1763
* we don't really know what they plan on doing with the path
1764
* from here on, so for now just mark it as blocking
1765
*/
1766
if (!p->leave_spinning)
1767
btrfs_set_path_blocking(p);
1768
if (ret < 0)
1769
btrfs_release_path(p);
1770
return ret;
1771
}
1772
1773
/*
1774
* adjust the pointers going up the tree, starting at level
1775
* making sure the right key of each node is points to 'key'.
1776
* This is used after shifting pointers to the left, so it stops
1777
* fixing up pointers when a given leaf/node is not in slot 0 of the
1778
* higher levels
1779
*
1780
* If this fails to write a tree block, it returns -1, but continues
1781
* fixing up the blocks in ram so the tree is consistent.
1782
*/
1783
static int fixup_low_keys(struct btrfs_trans_handle *trans,
1784
struct btrfs_root *root, struct btrfs_path *path,
1785
struct btrfs_disk_key *key, int level)
1786
{
1787
int i;
1788
int ret = 0;
1789
struct extent_buffer *t;
1790
1791
for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1792
int tslot = path->slots[i];
1793
if (!path->nodes[i])
1794
break;
1795
t = path->nodes[i];
1796
btrfs_set_node_key(t, key, tslot);
1797
btrfs_mark_buffer_dirty(path->nodes[i]);
1798
if (tslot != 0)
1799
break;
1800
}
1801
return ret;
1802
}
1803
1804
/*
1805
* update item key.
1806
*
1807
* This function isn't completely safe. It's the caller's responsibility
1808
* that the new key won't break the order
1809
*/
1810
int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
1811
struct btrfs_root *root, struct btrfs_path *path,
1812
struct btrfs_key *new_key)
1813
{
1814
struct btrfs_disk_key disk_key;
1815
struct extent_buffer *eb;
1816
int slot;
1817
1818
eb = path->nodes[0];
1819
slot = path->slots[0];
1820
if (slot > 0) {
1821
btrfs_item_key(eb, &disk_key, slot - 1);
1822
if (comp_keys(&disk_key, new_key) >= 0)
1823
return -1;
1824
}
1825
if (slot < btrfs_header_nritems(eb) - 1) {
1826
btrfs_item_key(eb, &disk_key, slot + 1);
1827
if (comp_keys(&disk_key, new_key) <= 0)
1828
return -1;
1829
}
1830
1831
btrfs_cpu_key_to_disk(&disk_key, new_key);
1832
btrfs_set_item_key(eb, &disk_key, slot);
1833
btrfs_mark_buffer_dirty(eb);
1834
if (slot == 0)
1835
fixup_low_keys(trans, root, path, &disk_key, 1);
1836
return 0;
1837
}
1838
1839
/*
1840
* try to push data from one node into the next node left in the
1841
* tree.
1842
*
1843
* returns 0 if some ptrs were pushed left, < 0 if there was some horrible
1844
* error, and > 0 if there was no room in the left hand block.
1845
*/
1846
static int push_node_left(struct btrfs_trans_handle *trans,
1847
struct btrfs_root *root, struct extent_buffer *dst,
1848
struct extent_buffer *src, int empty)
1849
{
1850
int push_items = 0;
1851
int src_nritems;
1852
int dst_nritems;
1853
int ret = 0;
1854
1855
src_nritems = btrfs_header_nritems(src);
1856
dst_nritems = btrfs_header_nritems(dst);
1857
push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1858
WARN_ON(btrfs_header_generation(src) != trans->transid);
1859
WARN_ON(btrfs_header_generation(dst) != trans->transid);
1860
1861
if (!empty && src_nritems <= 8)
1862
return 1;
1863
1864
if (push_items <= 0)
1865
return 1;
1866
1867
if (empty) {
1868
push_items = min(src_nritems, push_items);
1869
if (push_items < src_nritems) {
1870
/* leave at least 8 pointers in the node if
1871
* we aren't going to empty it
1872
*/
1873
if (src_nritems - push_items < 8) {
1874
if (push_items <= 8)
1875
return 1;
1876
push_items -= 8;
1877
}
1878
}
1879
} else
1880
push_items = min(src_nritems - 8, push_items);
1881
1882
copy_extent_buffer(dst, src,
1883
btrfs_node_key_ptr_offset(dst_nritems),
1884
btrfs_node_key_ptr_offset(0),
1885
push_items * sizeof(struct btrfs_key_ptr));
1886
1887
if (push_items < src_nritems) {
1888
memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
1889
btrfs_node_key_ptr_offset(push_items),
1890
(src_nritems - push_items) *
1891
sizeof(struct btrfs_key_ptr));
1892
}
1893
btrfs_set_header_nritems(src, src_nritems - push_items);
1894
btrfs_set_header_nritems(dst, dst_nritems + push_items);
1895
btrfs_mark_buffer_dirty(src);
1896
btrfs_mark_buffer_dirty(dst);
1897
1898
return ret;
1899
}
1900
1901
/*
1902
* try to push data from one node into the next node right in the
1903
* tree.
1904
*
1905
* returns 0 if some ptrs were pushed, < 0 if there was some horrible
1906
* error, and > 0 if there was no room in the right hand block.
1907
*
1908
* this will only push up to 1/2 the contents of the left node over
1909
*/
1910
static int balance_node_right(struct btrfs_trans_handle *trans,
1911
struct btrfs_root *root,
1912
struct extent_buffer *dst,
1913
struct extent_buffer *src)
1914
{
1915
int push_items = 0;
1916
int max_push;
1917
int src_nritems;
1918
int dst_nritems;
1919
int ret = 0;
1920
1921
WARN_ON(btrfs_header_generation(src) != trans->transid);
1922
WARN_ON(btrfs_header_generation(dst) != trans->transid);
1923
1924
src_nritems = btrfs_header_nritems(src);
1925
dst_nritems = btrfs_header_nritems(dst);
1926
push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1927
if (push_items <= 0)
1928
return 1;
1929
1930
if (src_nritems < 4)
1931
return 1;
1932
1933
max_push = src_nritems / 2 + 1;
1934
/* don't try to empty the node */
1935
if (max_push >= src_nritems)
1936
return 1;
1937
1938
if (max_push < push_items)
1939
push_items = max_push;
1940
1941
memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
1942
btrfs_node_key_ptr_offset(0),
1943
(dst_nritems) *
1944
sizeof(struct btrfs_key_ptr));
1945
1946
copy_extent_buffer(dst, src,
1947
btrfs_node_key_ptr_offset(0),
1948
btrfs_node_key_ptr_offset(src_nritems - push_items),
1949
push_items * sizeof(struct btrfs_key_ptr));
1950
1951
btrfs_set_header_nritems(src, src_nritems - push_items);
1952
btrfs_set_header_nritems(dst, dst_nritems + push_items);
1953
1954
btrfs_mark_buffer_dirty(src);
1955
btrfs_mark_buffer_dirty(dst);
1956
1957
return ret;
1958
}
1959
1960
/*
1961
* helper function to insert a new root level in the tree.
1962
* A new node is allocated, and a single item is inserted to
1963
* point to the existing root
1964
*
1965
* returns zero on success or < 0 on failure.
1966
*/
1967
static noinline int insert_new_root(struct btrfs_trans_handle *trans,
1968
struct btrfs_root *root,
1969
struct btrfs_path *path, int level)
1970
{
1971
u64 lower_gen;
1972
struct extent_buffer *lower;
1973
struct extent_buffer *c;
1974
struct extent_buffer *old;
1975
struct btrfs_disk_key lower_key;
1976
1977
BUG_ON(path->nodes[level]);
1978
BUG_ON(path->nodes[level-1] != root->node);
1979
1980
lower = path->nodes[level-1];
1981
if (level == 1)
1982
btrfs_item_key(lower, &lower_key, 0);
1983
else
1984
btrfs_node_key(lower, &lower_key, 0);
1985
1986
c = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
1987
root->root_key.objectid, &lower_key,
1988
level, root->node->start, 0);
1989
if (IS_ERR(c))
1990
return PTR_ERR(c);
1991
1992
root_add_used(root, root->nodesize);
1993
1994
memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
1995
btrfs_set_header_nritems(c, 1);
1996
btrfs_set_header_level(c, level);
1997
btrfs_set_header_bytenr(c, c->start);
1998
btrfs_set_header_generation(c, trans->transid);
1999
btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
2000
btrfs_set_header_owner(c, root->root_key.objectid);
2001
2002
write_extent_buffer(c, root->fs_info->fsid,
2003
(unsigned long)btrfs_header_fsid(c),
2004
BTRFS_FSID_SIZE);
2005
2006
write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
2007
(unsigned long)btrfs_header_chunk_tree_uuid(c),
2008
BTRFS_UUID_SIZE);
2009
2010
btrfs_set_node_key(c, &lower_key, 0);
2011
btrfs_set_node_blockptr(c, 0, lower->start);
2012
lower_gen = btrfs_header_generation(lower);
2013
WARN_ON(lower_gen != trans->transid);
2014
2015
btrfs_set_node_ptr_generation(c, 0, lower_gen);
2016
2017
btrfs_mark_buffer_dirty(c);
2018
2019
old = root->node;
2020
rcu_assign_pointer(root->node, c);
2021
2022
/* the super has an extra ref to root->node */
2023
free_extent_buffer(old);
2024
2025
add_root_to_dirty_list(root);
2026
extent_buffer_get(c);
2027
path->nodes[level] = c;
2028
path->locks[level] = 1;
2029
path->slots[level] = 0;
2030
return 0;
2031
}
2032
2033
/*
2034
* worker function to insert a single pointer in a node.
2035
* the node should have enough room for the pointer already
2036
*
2037
* slot and level indicate where you want the key to go, and
2038
* blocknr is the block the key points to.
2039
*
2040
* returns zero on success and < 0 on any error
2041
*/
2042
static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
2043
*root, struct btrfs_path *path, struct btrfs_disk_key
2044
*key, u64 bytenr, int slot, int level)
2045
{
2046
struct extent_buffer *lower;
2047
int nritems;
2048
2049
BUG_ON(!path->nodes[level]);
2050
btrfs_assert_tree_locked(path->nodes[level]);
2051
lower = path->nodes[level];
2052
nritems = btrfs_header_nritems(lower);
2053
BUG_ON(slot > nritems);
2054
if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
2055
BUG();
2056
if (slot != nritems) {
2057
memmove_extent_buffer(lower,
2058
btrfs_node_key_ptr_offset(slot + 1),
2059
btrfs_node_key_ptr_offset(slot),
2060
(nritems - slot) * sizeof(struct btrfs_key_ptr));
2061
}
2062
btrfs_set_node_key(lower, key, slot);
2063
btrfs_set_node_blockptr(lower, slot, bytenr);
2064
WARN_ON(trans->transid == 0);
2065
btrfs_set_node_ptr_generation(lower, slot, trans->transid);
2066
btrfs_set_header_nritems(lower, nritems + 1);
2067
btrfs_mark_buffer_dirty(lower);
2068
return 0;
2069
}
2070
2071
/*
2072
* split the node at the specified level in path in two.
2073
* The path is corrected to point to the appropriate node after the split
2074
*
2075
* Before splitting this tries to make some room in the node by pushing
2076
* left and right, if either one works, it returns right away.
2077
*
2078
* returns 0 on success and < 0 on failure
2079
*/
2080
static noinline int split_node(struct btrfs_trans_handle *trans,
2081
struct btrfs_root *root,
2082
struct btrfs_path *path, int level)
2083
{
2084
struct extent_buffer *c;
2085
struct extent_buffer *split;
2086
struct btrfs_disk_key disk_key;
2087
int mid;
2088
int ret;
2089
int wret;
2090
u32 c_nritems;
2091
2092
c = path->nodes[level];
2093
WARN_ON(btrfs_header_generation(c) != trans->transid);
2094
if (c == root->node) {
2095
/* trying to split the root, lets make a new one */
2096
ret = insert_new_root(trans, root, path, level + 1);
2097
if (ret)
2098
return ret;
2099
} else {
2100
ret = push_nodes_for_insert(trans, root, path, level);
2101
c = path->nodes[level];
2102
if (!ret && btrfs_header_nritems(c) <
2103
BTRFS_NODEPTRS_PER_BLOCK(root) - 3)
2104
return 0;
2105
if (ret < 0)
2106
return ret;
2107
}
2108
2109
c_nritems = btrfs_header_nritems(c);
2110
mid = (c_nritems + 1) / 2;
2111
btrfs_node_key(c, &disk_key, mid);
2112
2113
split = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
2114
root->root_key.objectid,
2115
&disk_key, level, c->start, 0);
2116
if (IS_ERR(split))
2117
return PTR_ERR(split);
2118
2119
root_add_used(root, root->nodesize);
2120
2121
memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header));
2122
btrfs_set_header_level(split, btrfs_header_level(c));
2123
btrfs_set_header_bytenr(split, split->start);
2124
btrfs_set_header_generation(split, trans->transid);
2125
btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV);
2126
btrfs_set_header_owner(split, root->root_key.objectid);
2127
write_extent_buffer(split, root->fs_info->fsid,
2128
(unsigned long)btrfs_header_fsid(split),
2129
BTRFS_FSID_SIZE);
2130
write_extent_buffer(split, root->fs_info->chunk_tree_uuid,
2131
(unsigned long)btrfs_header_chunk_tree_uuid(split),
2132
BTRFS_UUID_SIZE);
2133
2134
2135
copy_extent_buffer(split, c,
2136
btrfs_node_key_ptr_offset(0),
2137
btrfs_node_key_ptr_offset(mid),
2138
(c_nritems - mid) * sizeof(struct btrfs_key_ptr));
2139
btrfs_set_header_nritems(split, c_nritems - mid);
2140
btrfs_set_header_nritems(c, mid);
2141
ret = 0;
2142
2143
btrfs_mark_buffer_dirty(c);
2144
btrfs_mark_buffer_dirty(split);
2145
2146
wret = insert_ptr(trans, root, path, &disk_key, split->start,
2147
path->slots[level + 1] + 1,
2148
level + 1);
2149
if (wret)
2150
ret = wret;
2151
2152
if (path->slots[level] >= mid) {
2153
path->slots[level] -= mid;
2154
btrfs_tree_unlock(c);
2155
free_extent_buffer(c);
2156
path->nodes[level] = split;
2157
path->slots[level + 1] += 1;
2158
} else {
2159
btrfs_tree_unlock(split);
2160
free_extent_buffer(split);
2161
}
2162
return ret;
2163
}
2164
2165
/*
2166
* how many bytes are required to store the items in a leaf. start
2167
* and nr indicate which items in the leaf to check. This totals up the
2168
* space used both by the item structs and the item data
2169
*/
2170
static int leaf_space_used(struct extent_buffer *l, int start, int nr)
2171
{
2172
int data_len;
2173
int nritems = btrfs_header_nritems(l);
2174
int end = min(nritems, start + nr) - 1;
2175
2176
if (!nr)
2177
return 0;
2178
data_len = btrfs_item_end_nr(l, start);
2179
data_len = data_len - btrfs_item_offset_nr(l, end);
2180
data_len += sizeof(struct btrfs_item) * nr;
2181
WARN_ON(data_len < 0);
2182
return data_len;
2183
}
2184
2185
/*
2186
* The space between the end of the leaf items and
2187
* the start of the leaf data. IOW, how much room
2188
* the leaf has left for both items and data
2189
*/
2190
noinline int btrfs_leaf_free_space(struct btrfs_root *root,
2191
struct extent_buffer *leaf)
2192
{
2193
int nritems = btrfs_header_nritems(leaf);
2194
int ret;
2195
ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
2196
if (ret < 0) {
2197
printk(KERN_CRIT "leaf free space ret %d, leaf data size %lu, "
2198
"used %d nritems %d\n",
2199
ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root),
2200
leaf_space_used(leaf, 0, nritems), nritems);
2201
}
2202
return ret;
2203
}
2204
2205
/*
2206
* min slot controls the lowest index we're willing to push to the
2207
* right. We'll push up to and including min_slot, but no lower
2208
*/
2209
static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
2210
struct btrfs_root *root,
2211
struct btrfs_path *path,
2212
int data_size, int empty,
2213
struct extent_buffer *right,
2214
int free_space, u32 left_nritems,
2215
u32 min_slot)
2216
{
2217
struct extent_buffer *left = path->nodes[0];
2218
struct extent_buffer *upper = path->nodes[1];
2219
struct btrfs_disk_key disk_key;
2220
int slot;
2221
u32 i;
2222
int push_space = 0;
2223
int push_items = 0;
2224
struct btrfs_item *item;
2225
u32 nr;
2226
u32 right_nritems;
2227
u32 data_end;
2228
u32 this_item_size;
2229
2230
if (empty)
2231
nr = 0;
2232
else
2233
nr = max_t(u32, 1, min_slot);
2234
2235
if (path->slots[0] >= left_nritems)
2236
push_space += data_size;
2237
2238
slot = path->slots[1];
2239
i = left_nritems - 1;
2240
while (i >= nr) {
2241
item = btrfs_item_nr(left, i);
2242
2243
if (!empty && push_items > 0) {
2244
if (path->slots[0] > i)
2245
break;
2246
if (path->slots[0] == i) {
2247
int space = btrfs_leaf_free_space(root, left);
2248
if (space + push_space * 2 > free_space)
2249
break;
2250
}
2251
}
2252
2253
if (path->slots[0] == i)
2254
push_space += data_size;
2255
2256
if (!left->map_token) {
2257
map_extent_buffer(left, (unsigned long)item,
2258
sizeof(struct btrfs_item),
2259
&left->map_token, &left->kaddr,
2260
&left->map_start, &left->map_len,
2261
KM_USER1);
2262
}
2263
2264
this_item_size = btrfs_item_size(left, item);
2265
if (this_item_size + sizeof(*item) + push_space > free_space)
2266
break;
2267
2268
push_items++;
2269
push_space += this_item_size + sizeof(*item);
2270
if (i == 0)
2271
break;
2272
i--;
2273
}
2274
if (left->map_token) {
2275
unmap_extent_buffer(left, left->map_token, KM_USER1);
2276
left->map_token = NULL;
2277
}
2278
2279
if (push_items == 0)
2280
goto out_unlock;
2281
2282
if (!empty && push_items == left_nritems)
2283
WARN_ON(1);
2284
2285
/* push left to right */
2286
right_nritems = btrfs_header_nritems(right);
2287
2288
push_space = btrfs_item_end_nr(left, left_nritems - push_items);
2289
push_space -= leaf_data_end(root, left);
2290
2291
/* make room in the right data area */
2292
data_end = leaf_data_end(root, right);
2293
memmove_extent_buffer(right,
2294
btrfs_leaf_data(right) + data_end - push_space,
2295
btrfs_leaf_data(right) + data_end,
2296
BTRFS_LEAF_DATA_SIZE(root) - data_end);
2297
2298
/* copy from the left data area */
2299
copy_extent_buffer(right, left, btrfs_leaf_data(right) +
2300
BTRFS_LEAF_DATA_SIZE(root) - push_space,
2301
btrfs_leaf_data(left) + leaf_data_end(root, left),
2302
push_space);
2303
2304
memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
2305
btrfs_item_nr_offset(0),
2306
right_nritems * sizeof(struct btrfs_item));
2307
2308
/* copy the items from left to right */
2309
copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
2310
btrfs_item_nr_offset(left_nritems - push_items),
2311
push_items * sizeof(struct btrfs_item));
2312
2313
/* update the item pointers */
2314
right_nritems += push_items;
2315
btrfs_set_header_nritems(right, right_nritems);
2316
push_space = BTRFS_LEAF_DATA_SIZE(root);
2317
for (i = 0; i < right_nritems; i++) {
2318
item = btrfs_item_nr(right, i);
2319
if (!right->map_token) {
2320
map_extent_buffer(right, (unsigned long)item,
2321
sizeof(struct btrfs_item),
2322
&right->map_token, &right->kaddr,
2323
&right->map_start, &right->map_len,
2324
KM_USER1);
2325
}
2326
push_space -= btrfs_item_size(right, item);
2327
btrfs_set_item_offset(right, item, push_space);
2328
}
2329
2330
if (right->map_token) {
2331
unmap_extent_buffer(right, right->map_token, KM_USER1);
2332
right->map_token = NULL;
2333
}
2334
left_nritems -= push_items;
2335
btrfs_set_header_nritems(left, left_nritems);
2336
2337
if (left_nritems)
2338
btrfs_mark_buffer_dirty(left);
2339
else
2340
clean_tree_block(trans, root, left);
2341
2342
btrfs_mark_buffer_dirty(right);
2343
2344
btrfs_item_key(right, &disk_key, 0);
2345
btrfs_set_node_key(upper, &disk_key, slot + 1);
2346
btrfs_mark_buffer_dirty(upper);
2347
2348
/* then fixup the leaf pointer in the path */
2349
if (path->slots[0] >= left_nritems) {
2350
path->slots[0] -= left_nritems;
2351
if (btrfs_header_nritems(path->nodes[0]) == 0)
2352
clean_tree_block(trans, root, path->nodes[0]);
2353
btrfs_tree_unlock(path->nodes[0]);
2354
free_extent_buffer(path->nodes[0]);
2355
path->nodes[0] = right;
2356
path->slots[1] += 1;
2357
} else {
2358
btrfs_tree_unlock(right);
2359
free_extent_buffer(right);
2360
}
2361
return 0;
2362
2363
out_unlock:
2364
btrfs_tree_unlock(right);
2365
free_extent_buffer(right);
2366
return 1;
2367
}
2368
2369
/*
2370
* push some data in the path leaf to the right, trying to free up at
2371
* least data_size bytes. returns zero if the push worked, nonzero otherwise
2372
*
2373
* returns 1 if the push failed because the other node didn't have enough
2374
* room, 0 if everything worked out and < 0 if there were major errors.
2375
*
2376
* this will push starting from min_slot to the end of the leaf. It won't
2377
* push any slot lower than min_slot
2378
*/
2379
static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
2380
*root, struct btrfs_path *path,
2381
int min_data_size, int data_size,
2382
int empty, u32 min_slot)
2383
{
2384
struct extent_buffer *left = path->nodes[0];
2385
struct extent_buffer *right;
2386
struct extent_buffer *upper;
2387
int slot;
2388
int free_space;
2389
u32 left_nritems;
2390
int ret;
2391
2392
if (!path->nodes[1])
2393
return 1;
2394
2395
slot = path->slots[1];
2396
upper = path->nodes[1];
2397
if (slot >= btrfs_header_nritems(upper) - 1)
2398
return 1;
2399
2400
btrfs_assert_tree_locked(path->nodes[1]);
2401
2402
right = read_node_slot(root, upper, slot + 1);
2403
if (right == NULL)
2404
return 1;
2405
2406
btrfs_tree_lock(right);
2407
btrfs_set_lock_blocking(right);
2408
2409
free_space = btrfs_leaf_free_space(root, right);
2410
if (free_space < data_size)
2411
goto out_unlock;
2412
2413
/* cow and double check */
2414
ret = btrfs_cow_block(trans, root, right, upper,
2415
slot + 1, &right);
2416
if (ret)
2417
goto out_unlock;
2418
2419
free_space = btrfs_leaf_free_space(root, right);
2420
if (free_space < data_size)
2421
goto out_unlock;
2422
2423
left_nritems = btrfs_header_nritems(left);
2424
if (left_nritems == 0)
2425
goto out_unlock;
2426
2427
return __push_leaf_right(trans, root, path, min_data_size, empty,
2428
right, free_space, left_nritems, min_slot);
2429
out_unlock:
2430
btrfs_tree_unlock(right);
2431
free_extent_buffer(right);
2432
return 1;
2433
}
2434
2435
/*
2436
* push some data in the path leaf to the left, trying to free up at
2437
* least data_size bytes. returns zero if the push worked, nonzero otherwise
2438
*
2439
* max_slot can put a limit on how far into the leaf we'll push items. The
2440
* item at 'max_slot' won't be touched. Use (u32)-1 to make us do all the
2441
* items
2442
*/
2443
static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
2444
struct btrfs_root *root,
2445
struct btrfs_path *path, int data_size,
2446
int empty, struct extent_buffer *left,
2447
int free_space, u32 right_nritems,
2448
u32 max_slot)
2449
{
2450
struct btrfs_disk_key disk_key;
2451
struct extent_buffer *right = path->nodes[0];
2452
int i;
2453
int push_space = 0;
2454
int push_items = 0;
2455
struct btrfs_item *item;
2456
u32 old_left_nritems;
2457
u32 nr;
2458
int ret = 0;
2459
int wret;
2460
u32 this_item_size;
2461
u32 old_left_item_size;
2462
2463
if (empty)
2464
nr = min(right_nritems, max_slot);
2465
else
2466
nr = min(right_nritems - 1, max_slot);
2467
2468
for (i = 0; i < nr; i++) {
2469
item = btrfs_item_nr(right, i);
2470
if (!right->map_token) {
2471
map_extent_buffer(right, (unsigned long)item,
2472
sizeof(struct btrfs_item),
2473
&right->map_token, &right->kaddr,
2474
&right->map_start, &right->map_len,
2475
KM_USER1);
2476
}
2477
2478
if (!empty && push_items > 0) {
2479
if (path->slots[0] < i)
2480
break;
2481
if (path->slots[0] == i) {
2482
int space = btrfs_leaf_free_space(root, right);
2483
if (space + push_space * 2 > free_space)
2484
break;
2485
}
2486
}
2487
2488
if (path->slots[0] == i)
2489
push_space += data_size;
2490
2491
this_item_size = btrfs_item_size(right, item);
2492
if (this_item_size + sizeof(*item) + push_space > free_space)
2493
break;
2494
2495
push_items++;
2496
push_space += this_item_size + sizeof(*item);
2497
}
2498
2499
if (right->map_token) {
2500
unmap_extent_buffer(right, right->map_token, KM_USER1);
2501
right->map_token = NULL;
2502
}
2503
2504
if (push_items == 0) {
2505
ret = 1;
2506
goto out;
2507
}
2508
if (!empty && push_items == btrfs_header_nritems(right))
2509
WARN_ON(1);
2510
2511
/* push data from right to left */
2512
copy_extent_buffer(left, right,
2513
btrfs_item_nr_offset(btrfs_header_nritems(left)),
2514
btrfs_item_nr_offset(0),
2515
push_items * sizeof(struct btrfs_item));
2516
2517
push_space = BTRFS_LEAF_DATA_SIZE(root) -
2518
btrfs_item_offset_nr(right, push_items - 1);
2519
2520
copy_extent_buffer(left, right, btrfs_leaf_data(left) +
2521
leaf_data_end(root, left) - push_space,
2522
btrfs_leaf_data(right) +
2523
btrfs_item_offset_nr(right, push_items - 1),
2524
push_space);
2525
old_left_nritems = btrfs_header_nritems(left);
2526
BUG_ON(old_left_nritems <= 0);
2527
2528
old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
2529
for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
2530
u32 ioff;
2531
2532
item = btrfs_item_nr(left, i);
2533
if (!left->map_token) {
2534
map_extent_buffer(left, (unsigned long)item,
2535
sizeof(struct btrfs_item),
2536
&left->map_token, &left->kaddr,
2537
&left->map_start, &left->map_len,
2538
KM_USER1);
2539
}
2540
2541
ioff = btrfs_item_offset(left, item);
2542
btrfs_set_item_offset(left, item,
2543
ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size));
2544
}
2545
btrfs_set_header_nritems(left, old_left_nritems + push_items);
2546
if (left->map_token) {
2547
unmap_extent_buffer(left, left->map_token, KM_USER1);
2548
left->map_token = NULL;
2549
}
2550
2551
/* fixup right node */
2552
if (push_items > right_nritems) {
2553
printk(KERN_CRIT "push items %d nr %u\n", push_items,
2554
right_nritems);
2555
WARN_ON(1);
2556
}
2557
2558
if (push_items < right_nritems) {
2559
push_space = btrfs_item_offset_nr(right, push_items - 1) -
2560
leaf_data_end(root, right);
2561
memmove_extent_buffer(right, btrfs_leaf_data(right) +
2562
BTRFS_LEAF_DATA_SIZE(root) - push_space,
2563
btrfs_leaf_data(right) +
2564
leaf_data_end(root, right), push_space);
2565
2566
memmove_extent_buffer(right, btrfs_item_nr_offset(0),
2567
btrfs_item_nr_offset(push_items),
2568
(btrfs_header_nritems(right) - push_items) *
2569
sizeof(struct btrfs_item));
2570
}
2571
right_nritems -= push_items;
2572
btrfs_set_header_nritems(right, right_nritems);
2573
push_space = BTRFS_LEAF_DATA_SIZE(root);
2574
for (i = 0; i < right_nritems; i++) {
2575
item = btrfs_item_nr(right, i);
2576
2577
if (!right->map_token) {
2578
map_extent_buffer(right, (unsigned long)item,
2579
sizeof(struct btrfs_item),
2580
&right->map_token, &right->kaddr,
2581
&right->map_start, &right->map_len,
2582
KM_USER1);
2583
}
2584
2585
push_space = push_space - btrfs_item_size(right, item);
2586
btrfs_set_item_offset(right, item, push_space);
2587
}
2588
if (right->map_token) {
2589
unmap_extent_buffer(right, right->map_token, KM_USER1);
2590
right->map_token = NULL;
2591
}
2592
2593
btrfs_mark_buffer_dirty(left);
2594
if (right_nritems)
2595
btrfs_mark_buffer_dirty(right);
2596
else
2597
clean_tree_block(trans, root, right);
2598
2599
btrfs_item_key(right, &disk_key, 0);
2600
wret = fixup_low_keys(trans, root, path, &disk_key, 1);
2601
if (wret)
2602
ret = wret;
2603
2604
/* then fixup the leaf pointer in the path */
2605
if (path->slots[0] < push_items) {
2606
path->slots[0] += old_left_nritems;
2607
btrfs_tree_unlock(path->nodes[0]);
2608
free_extent_buffer(path->nodes[0]);
2609
path->nodes[0] = left;
2610
path->slots[1] -= 1;
2611
} else {
2612
btrfs_tree_unlock(left);
2613
free_extent_buffer(left);
2614
path->slots[0] -= push_items;
2615
}
2616
BUG_ON(path->slots[0] < 0);
2617
return ret;
2618
out:
2619
btrfs_tree_unlock(left);
2620
free_extent_buffer(left);
2621
return ret;
2622
}
2623
2624
/*
2625
* push some data in the path leaf to the left, trying to free up at
2626
* least data_size bytes. returns zero if the push worked, nonzero otherwise
2627
*
2628
* max_slot can put a limit on how far into the leaf we'll push items. The
2629
* item at 'max_slot' won't be touched. Use (u32)-1 to make us push all the
2630
* items
2631
*/
2632
static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
2633
*root, struct btrfs_path *path, int min_data_size,
2634
int data_size, int empty, u32 max_slot)
2635
{
2636
struct extent_buffer *right = path->nodes[0];
2637
struct extent_buffer *left;
2638
int slot;
2639
int free_space;
2640
u32 right_nritems;
2641
int ret = 0;
2642
2643
slot = path->slots[1];
2644
if (slot == 0)
2645
return 1;
2646
if (!path->nodes[1])
2647
return 1;
2648
2649
right_nritems = btrfs_header_nritems(right);
2650
if (right_nritems == 0)
2651
return 1;
2652
2653
btrfs_assert_tree_locked(path->nodes[1]);
2654
2655
left = read_node_slot(root, path->nodes[1], slot - 1);
2656
if (left == NULL)
2657
return 1;
2658
2659
btrfs_tree_lock(left);
2660
btrfs_set_lock_blocking(left);
2661
2662
free_space = btrfs_leaf_free_space(root, left);
2663
if (free_space < data_size) {
2664
ret = 1;
2665
goto out;
2666
}
2667
2668
/* cow and double check */
2669
ret = btrfs_cow_block(trans, root, left,
2670
path->nodes[1], slot - 1, &left);
2671
if (ret) {
2672
/* we hit -ENOSPC, but it isn't fatal here */
2673
ret = 1;
2674
goto out;
2675
}
2676
2677
free_space = btrfs_leaf_free_space(root, left);
2678
if (free_space < data_size) {
2679
ret = 1;
2680
goto out;
2681
}
2682
2683
return __push_leaf_left(trans, root, path, min_data_size,
2684
empty, left, free_space, right_nritems,
2685
max_slot);
2686
out:
2687
btrfs_tree_unlock(left);
2688
free_extent_buffer(left);
2689
return ret;
2690
}
2691
2692
/*
2693
* split the path's leaf in two, making sure there is at least data_size
2694
* available for the resulting leaf level of the path.
2695
*
2696
* returns 0 if all went well and < 0 on failure.
2697
*/
2698
static noinline int copy_for_split(struct btrfs_trans_handle *trans,
2699
struct btrfs_root *root,
2700
struct btrfs_path *path,
2701
struct extent_buffer *l,
2702
struct extent_buffer *right,
2703
int slot, int mid, int nritems)
2704
{
2705
int data_copy_size;
2706
int rt_data_off;
2707
int i;
2708
int ret = 0;
2709
int wret;
2710
struct btrfs_disk_key disk_key;
2711
2712
nritems = nritems - mid;
2713
btrfs_set_header_nritems(right, nritems);
2714
data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
2715
2716
copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
2717
btrfs_item_nr_offset(mid),
2718
nritems * sizeof(struct btrfs_item));
2719
2720
copy_extent_buffer(right, l,
2721
btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
2722
data_copy_size, btrfs_leaf_data(l) +
2723
leaf_data_end(root, l), data_copy_size);
2724
2725
rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
2726
btrfs_item_end_nr(l, mid);
2727
2728
for (i = 0; i < nritems; i++) {
2729
struct btrfs_item *item = btrfs_item_nr(right, i);
2730
u32 ioff;
2731
2732
if (!right->map_token) {
2733
map_extent_buffer(right, (unsigned long)item,
2734
sizeof(struct btrfs_item),
2735
&right->map_token, &right->kaddr,
2736
&right->map_start, &right->map_len,
2737
KM_USER1);
2738
}
2739
2740
ioff = btrfs_item_offset(right, item);
2741
btrfs_set_item_offset(right, item, ioff + rt_data_off);
2742
}
2743
2744
if (right->map_token) {
2745
unmap_extent_buffer(right, right->map_token, KM_USER1);
2746
right->map_token = NULL;
2747
}
2748
2749
btrfs_set_header_nritems(l, mid);
2750
ret = 0;
2751
btrfs_item_key(right, &disk_key, 0);
2752
wret = insert_ptr(trans, root, path, &disk_key, right->start,
2753
path->slots[1] + 1, 1);
2754
if (wret)
2755
ret = wret;
2756
2757
btrfs_mark_buffer_dirty(right);
2758
btrfs_mark_buffer_dirty(l);
2759
BUG_ON(path->slots[0] != slot);
2760
2761
if (mid <= slot) {
2762
btrfs_tree_unlock(path->nodes[0]);
2763
free_extent_buffer(path->nodes[0]);
2764
path->nodes[0] = right;
2765
path->slots[0] -= mid;
2766
path->slots[1] += 1;
2767
} else {
2768
btrfs_tree_unlock(right);
2769
free_extent_buffer(right);
2770
}
2771
2772
BUG_ON(path->slots[0] < 0);
2773
2774
return ret;
2775
}
2776
2777
/*
2778
* double splits happen when we need to insert a big item in the middle
2779
* of a leaf. A double split can leave us with 3 mostly empty leaves:
2780
* leaf: [ slots 0 - N] [ our target ] [ N + 1 - total in leaf ]
2781
* A B C
2782
*
2783
* We avoid this by trying to push the items on either side of our target
2784
* into the adjacent leaves. If all goes well we can avoid the double split
2785
* completely.
2786
*/
2787
static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
2788
struct btrfs_root *root,
2789
struct btrfs_path *path,
2790
int data_size)
2791
{
2792
int ret;
2793
int progress = 0;
2794
int slot;
2795
u32 nritems;
2796
2797
slot = path->slots[0];
2798
2799
/*
2800
* try to push all the items after our slot into the
2801
* right leaf
2802
*/
2803
ret = push_leaf_right(trans, root, path, 1, data_size, 0, slot);
2804
if (ret < 0)
2805
return ret;
2806
2807
if (ret == 0)
2808
progress++;
2809
2810
nritems = btrfs_header_nritems(path->nodes[0]);
2811
/*
2812
* our goal is to get our slot at the start or end of a leaf. If
2813
* we've done so we're done
2814
*/
2815
if (path->slots[0] == 0 || path->slots[0] == nritems)
2816
return 0;
2817
2818
if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
2819
return 0;
2820
2821
/* try to push all the items before our slot into the next leaf */
2822
slot = path->slots[0];
2823
ret = push_leaf_left(trans, root, path, 1, data_size, 0, slot);
2824
if (ret < 0)
2825
return ret;
2826
2827
if (ret == 0)
2828
progress++;
2829
2830
if (progress)
2831
return 0;
2832
return 1;
2833
}
2834
2835
/*
2836
* split the path's leaf in two, making sure there is at least data_size
2837
* available for the resulting leaf level of the path.
2838
*
2839
* returns 0 if all went well and < 0 on failure.
2840
*/
2841
static noinline int split_leaf(struct btrfs_trans_handle *trans,
2842
struct btrfs_root *root,
2843
struct btrfs_key *ins_key,
2844
struct btrfs_path *path, int data_size,
2845
int extend)
2846
{
2847
struct btrfs_disk_key disk_key;
2848
struct extent_buffer *l;
2849
u32 nritems;
2850
int mid;
2851
int slot;
2852
struct extent_buffer *right;
2853
int ret = 0;
2854
int wret;
2855
int split;
2856
int num_doubles = 0;
2857
int tried_avoid_double = 0;
2858
2859
l = path->nodes[0];
2860
slot = path->slots[0];
2861
if (extend && data_size + btrfs_item_size_nr(l, slot) +
2862
sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(root))
2863
return -EOVERFLOW;
2864
2865
/* first try to make some room by pushing left and right */
2866
if (data_size) {
2867
wret = push_leaf_right(trans, root, path, data_size,
2868
data_size, 0, 0);
2869
if (wret < 0)
2870
return wret;
2871
if (wret) {
2872
wret = push_leaf_left(trans, root, path, data_size,
2873
data_size, 0, (u32)-1);
2874
if (wret < 0)
2875
return wret;
2876
}
2877
l = path->nodes[0];
2878
2879
/* did the pushes work? */
2880
if (btrfs_leaf_free_space(root, l) >= data_size)
2881
return 0;
2882
}
2883
2884
if (!path->nodes[1]) {
2885
ret = insert_new_root(trans, root, path, 1);
2886
if (ret)
2887
return ret;
2888
}
2889
again:
2890
split = 1;
2891
l = path->nodes[0];
2892
slot = path->slots[0];
2893
nritems = btrfs_header_nritems(l);
2894
mid = (nritems + 1) / 2;
2895
2896
if (mid <= slot) {
2897
if (nritems == 1 ||
2898
leaf_space_used(l, mid, nritems - mid) + data_size >
2899
BTRFS_LEAF_DATA_SIZE(root)) {
2900
if (slot >= nritems) {
2901
split = 0;
2902
} else {
2903
mid = slot;
2904
if (mid != nritems &&
2905
leaf_space_used(l, mid, nritems - mid) +
2906
data_size > BTRFS_LEAF_DATA_SIZE(root)) {
2907
if (data_size && !tried_avoid_double)
2908
goto push_for_double;
2909
split = 2;
2910
}
2911
}
2912
}
2913
} else {
2914
if (leaf_space_used(l, 0, mid) + data_size >
2915
BTRFS_LEAF_DATA_SIZE(root)) {
2916
if (!extend && data_size && slot == 0) {
2917
split = 0;
2918
} else if ((extend || !data_size) && slot == 0) {
2919
mid = 1;
2920
} else {
2921
mid = slot;
2922
if (mid != nritems &&
2923
leaf_space_used(l, mid, nritems - mid) +
2924
data_size > BTRFS_LEAF_DATA_SIZE(root)) {
2925
if (data_size && !tried_avoid_double)
2926
goto push_for_double;
2927
split = 2 ;
2928
}
2929
}
2930
}
2931
}
2932
2933
if (split == 0)
2934
btrfs_cpu_key_to_disk(&disk_key, ins_key);
2935
else
2936
btrfs_item_key(l, &disk_key, mid);
2937
2938
right = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
2939
root->root_key.objectid,
2940
&disk_key, 0, l->start, 0);
2941
if (IS_ERR(right))
2942
return PTR_ERR(right);
2943
2944
root_add_used(root, root->leafsize);
2945
2946
memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
2947
btrfs_set_header_bytenr(right, right->start);
2948
btrfs_set_header_generation(right, trans->transid);
2949
btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV);
2950
btrfs_set_header_owner(right, root->root_key.objectid);
2951
btrfs_set_header_level(right, 0);
2952
write_extent_buffer(right, root->fs_info->fsid,
2953
(unsigned long)btrfs_header_fsid(right),
2954
BTRFS_FSID_SIZE);
2955
2956
write_extent_buffer(right, root->fs_info->chunk_tree_uuid,
2957
(unsigned long)btrfs_header_chunk_tree_uuid(right),
2958
BTRFS_UUID_SIZE);
2959
2960
if (split == 0) {
2961
if (mid <= slot) {
2962
btrfs_set_header_nritems(right, 0);
2963
wret = insert_ptr(trans, root, path,
2964
&disk_key, right->start,
2965
path->slots[1] + 1, 1);
2966
if (wret)
2967
ret = wret;
2968
2969
btrfs_tree_unlock(path->nodes[0]);
2970
free_extent_buffer(path->nodes[0]);
2971
path->nodes[0] = right;
2972
path->slots[0] = 0;
2973
path->slots[1] += 1;
2974
} else {
2975
btrfs_set_header_nritems(right, 0);
2976
wret = insert_ptr(trans, root, path,
2977
&disk_key,
2978
right->start,
2979
path->slots[1], 1);
2980
if (wret)
2981
ret = wret;
2982
btrfs_tree_unlock(path->nodes[0]);
2983
free_extent_buffer(path->nodes[0]);
2984
path->nodes[0] = right;
2985
path->slots[0] = 0;
2986
if (path->slots[1] == 0) {
2987
wret = fixup_low_keys(trans, root,
2988
path, &disk_key, 1);
2989
if (wret)
2990
ret = wret;
2991
}
2992
}
2993
btrfs_mark_buffer_dirty(right);
2994
return ret;
2995
}
2996
2997
ret = copy_for_split(trans, root, path, l, right, slot, mid, nritems);
2998
BUG_ON(ret);
2999
3000
if (split == 2) {
3001
BUG_ON(num_doubles != 0);
3002
num_doubles++;
3003
goto again;
3004
}
3005
3006
return ret;
3007
3008
push_for_double:
3009
push_for_double_split(trans, root, path, data_size);
3010
tried_avoid_double = 1;
3011
if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
3012
return 0;
3013
goto again;
3014
}
3015
3016
static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
3017
struct btrfs_root *root,
3018
struct btrfs_path *path, int ins_len)
3019
{
3020
struct btrfs_key key;
3021
struct extent_buffer *leaf;
3022
struct btrfs_file_extent_item *fi;
3023
u64 extent_len = 0;
3024
u32 item_size;
3025
int ret;
3026
3027
leaf = path->nodes[0];
3028
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3029
3030
BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY &&
3031
key.type != BTRFS_EXTENT_CSUM_KEY);
3032
3033
if (btrfs_leaf_free_space(root, leaf) >= ins_len)
3034
return 0;
3035
3036
item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3037
if (key.type == BTRFS_EXTENT_DATA_KEY) {
3038
fi = btrfs_item_ptr(leaf, path->slots[0],
3039
struct btrfs_file_extent_item);
3040
extent_len = btrfs_file_extent_num_bytes(leaf, fi);
3041
}
3042
btrfs_release_path(path);
3043
3044
path->keep_locks = 1;
3045
path->search_for_split = 1;
3046
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3047
path->search_for_split = 0;
3048
if (ret < 0)
3049
goto err;
3050
3051
ret = -EAGAIN;
3052
leaf = path->nodes[0];
3053
/* if our item isn't there or got smaller, return now */
3054
if (ret > 0 || item_size != btrfs_item_size_nr(leaf, path->slots[0]))
3055
goto err;
3056
3057
/* the leaf has changed, it now has room. return now */
3058
if (btrfs_leaf_free_space(root, path->nodes[0]) >= ins_len)
3059
goto err;
3060
3061
if (key.type == BTRFS_EXTENT_DATA_KEY) {
3062
fi = btrfs_item_ptr(leaf, path->slots[0],
3063
struct btrfs_file_extent_item);
3064
if (extent_len != btrfs_file_extent_num_bytes(leaf, fi))
3065
goto err;
3066
}
3067
3068
btrfs_set_path_blocking(path);
3069
ret = split_leaf(trans, root, &key, path, ins_len, 1);
3070
if (ret)
3071
goto err;
3072
3073
path->keep_locks = 0;
3074
btrfs_unlock_up_safe(path, 1);
3075
return 0;
3076
err:
3077
path->keep_locks = 0;
3078
return ret;
3079
}
3080
3081
static noinline int split_item(struct btrfs_trans_handle *trans,
3082
struct btrfs_root *root,
3083
struct btrfs_path *path,
3084
struct btrfs_key *new_key,
3085
unsigned long split_offset)
3086
{
3087
struct extent_buffer *leaf;
3088
struct btrfs_item *item;
3089
struct btrfs_item *new_item;
3090
int slot;
3091
char *buf;
3092
u32 nritems;
3093
u32 item_size;
3094
u32 orig_offset;
3095
struct btrfs_disk_key disk_key;
3096
3097
leaf = path->nodes[0];
3098
BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));
3099
3100
btrfs_set_path_blocking(path);
3101
3102
item = btrfs_item_nr(leaf, path->slots[0]);
3103
orig_offset = btrfs_item_offset(leaf, item);
3104
item_size = btrfs_item_size(leaf, item);
3105
3106
buf = kmalloc(item_size, GFP_NOFS);
3107
if (!buf)
3108
return -ENOMEM;
3109
3110
read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
3111
path->slots[0]), item_size);
3112
3113
slot = path->slots[0] + 1;
3114
nritems = btrfs_header_nritems(leaf);
3115
if (slot != nritems) {
3116
/* shift the items */
3117
memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
3118
btrfs_item_nr_offset(slot),
3119
(nritems - slot) * sizeof(struct btrfs_item));
3120
}
3121
3122
btrfs_cpu_key_to_disk(&disk_key, new_key);
3123
btrfs_set_item_key(leaf, &disk_key, slot);
3124
3125
new_item = btrfs_item_nr(leaf, slot);
3126
3127
btrfs_set_item_offset(leaf, new_item, orig_offset);
3128
btrfs_set_item_size(leaf, new_item, item_size - split_offset);
3129
3130
btrfs_set_item_offset(leaf, item,
3131
orig_offset + item_size - split_offset);
3132
btrfs_set_item_size(leaf, item, split_offset);
3133
3134
btrfs_set_header_nritems(leaf, nritems + 1);
3135
3136
/* write the data for the start of the original item */
3137
write_extent_buffer(leaf, buf,
3138
btrfs_item_ptr_offset(leaf, path->slots[0]),
3139
split_offset);
3140
3141
/* write the data for the new item */
3142
write_extent_buffer(leaf, buf + split_offset,
3143
btrfs_item_ptr_offset(leaf, slot),
3144
item_size - split_offset);
3145
btrfs_mark_buffer_dirty(leaf);
3146
3147
BUG_ON(btrfs_leaf_free_space(root, leaf) < 0);
3148
kfree(buf);
3149
return 0;
3150
}
3151
3152
/*
3153
* This function splits a single item into two items,
3154
* giving 'new_key' to the new item and splitting the
3155
* old one at split_offset (from the start of the item).
3156
*
3157
* The path may be released by this operation. After
3158
* the split, the path is pointing to the old item. The
3159
* new item is going to be in the same node as the old one.
3160
*
3161
* Note, the item being split must be smaller enough to live alone on
3162
* a tree block with room for one extra struct btrfs_item
3163
*
3164
* This allows us to split the item in place, keeping a lock on the
3165
* leaf the entire time.
3166
*/
3167
int btrfs_split_item(struct btrfs_trans_handle *trans,
3168
struct btrfs_root *root,
3169
struct btrfs_path *path,
3170
struct btrfs_key *new_key,
3171
unsigned long split_offset)
3172
{
3173
int ret;
3174
ret = setup_leaf_for_split(trans, root, path,
3175
sizeof(struct btrfs_item));
3176
if (ret)
3177
return ret;
3178
3179
ret = split_item(trans, root, path, new_key, split_offset);
3180
return ret;
3181
}
3182
3183
/*
3184
* This function duplicate a item, giving 'new_key' to the new item.
3185
* It guarantees both items live in the same tree leaf and the new item
3186
* is contiguous with the original item.
3187
*
3188
* This allows us to split file extent in place, keeping a lock on the
3189
* leaf the entire time.
3190
*/
3191
int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
3192
struct btrfs_root *root,
3193
struct btrfs_path *path,
3194
struct btrfs_key *new_key)
3195
{
3196
struct extent_buffer *leaf;
3197
int ret;
3198
u32 item_size;
3199
3200
leaf = path->nodes[0];
3201
item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3202
ret = setup_leaf_for_split(trans, root, path,
3203
item_size + sizeof(struct btrfs_item));
3204
if (ret)
3205
return ret;
3206
3207
path->slots[0]++;
3208
ret = setup_items_for_insert(trans, root, path, new_key, &item_size,
3209
item_size, item_size +
3210
sizeof(struct btrfs_item), 1);
3211
BUG_ON(ret);
3212
3213
leaf = path->nodes[0];
3214
memcpy_extent_buffer(leaf,
3215
btrfs_item_ptr_offset(leaf, path->slots[0]),
3216
btrfs_item_ptr_offset(leaf, path->slots[0] - 1),
3217
item_size);
3218
return 0;
3219
}
3220
3221
/*
3222
* make the item pointed to by the path smaller. new_size indicates
3223
* how small to make it, and from_end tells us if we just chop bytes
3224
* off the end of the item or if we shift the item to chop bytes off
3225
* the front.
3226
*/
3227
int btrfs_truncate_item(struct btrfs_trans_handle *trans,
3228
struct btrfs_root *root,
3229
struct btrfs_path *path,
3230
u32 new_size, int from_end)
3231
{
3232
int slot;
3233
struct extent_buffer *leaf;
3234
struct btrfs_item *item;
3235
u32 nritems;
3236
unsigned int data_end;
3237
unsigned int old_data_start;
3238
unsigned int old_size;
3239
unsigned int size_diff;
3240
int i;
3241
3242
leaf = path->nodes[0];
3243
slot = path->slots[0];
3244
3245
old_size = btrfs_item_size_nr(leaf, slot);
3246
if (old_size == new_size)
3247
return 0;
3248
3249
nritems = btrfs_header_nritems(leaf);
3250
data_end = leaf_data_end(root, leaf);
3251
3252
old_data_start = btrfs_item_offset_nr(leaf, slot);
3253
3254
size_diff = old_size - new_size;
3255
3256
BUG_ON(slot < 0);
3257
BUG_ON(slot >= nritems);
3258
3259
/*
3260
* item0..itemN ... dataN.offset..dataN.size .. data0.size
3261
*/
3262
/* first correct the data pointers */
3263
for (i = slot; i < nritems; i++) {
3264
u32 ioff;
3265
item = btrfs_item_nr(leaf, i);
3266
3267
if (!leaf->map_token) {
3268
map_extent_buffer(leaf, (unsigned long)item,
3269
sizeof(struct btrfs_item),
3270
&leaf->map_token, &leaf->kaddr,
3271
&leaf->map_start, &leaf->map_len,
3272
KM_USER1);
3273
}
3274
3275
ioff = btrfs_item_offset(leaf, item);
3276
btrfs_set_item_offset(leaf, item, ioff + size_diff);
3277
}
3278
3279
if (leaf->map_token) {
3280
unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
3281
leaf->map_token = NULL;
3282
}
3283
3284
/* shift the data */
3285
if (from_end) {
3286
memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3287
data_end + size_diff, btrfs_leaf_data(leaf) +
3288
data_end, old_data_start + new_size - data_end);
3289
} else {
3290
struct btrfs_disk_key disk_key;
3291
u64 offset;
3292
3293
btrfs_item_key(leaf, &disk_key, slot);
3294
3295
if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
3296
unsigned long ptr;
3297
struct btrfs_file_extent_item *fi;
3298
3299
fi = btrfs_item_ptr(leaf, slot,
3300
struct btrfs_file_extent_item);
3301
fi = (struct btrfs_file_extent_item *)(
3302
(unsigned long)fi - size_diff);
3303
3304
if (btrfs_file_extent_type(leaf, fi) ==
3305
BTRFS_FILE_EXTENT_INLINE) {
3306
ptr = btrfs_item_ptr_offset(leaf, slot);
3307
memmove_extent_buffer(leaf, ptr,
3308
(unsigned long)fi,
3309
offsetof(struct btrfs_file_extent_item,
3310
disk_bytenr));
3311
}
3312
}
3313
3314
memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3315
data_end + size_diff, btrfs_leaf_data(leaf) +
3316
data_end, old_data_start - data_end);
3317
3318
offset = btrfs_disk_key_offset(&disk_key);
3319
btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
3320
btrfs_set_item_key(leaf, &disk_key, slot);
3321
if (slot == 0)
3322
fixup_low_keys(trans, root, path, &disk_key, 1);
3323
}
3324
3325
item = btrfs_item_nr(leaf, slot);
3326
btrfs_set_item_size(leaf, item, new_size);
3327
btrfs_mark_buffer_dirty(leaf);
3328
3329
if (btrfs_leaf_free_space(root, leaf) < 0) {
3330
btrfs_print_leaf(root, leaf);
3331
BUG();
3332
}
3333
return 0;
3334
}
3335
3336
/*
3337
* make the item pointed to by the path bigger, data_size is the new size.
3338
*/
3339
int btrfs_extend_item(struct btrfs_trans_handle *trans,
3340
struct btrfs_root *root, struct btrfs_path *path,
3341
u32 data_size)
3342
{
3343
int slot;
3344
struct extent_buffer *leaf;
3345
struct btrfs_item *item;
3346
u32 nritems;
3347
unsigned int data_end;
3348
unsigned int old_data;
3349
unsigned int old_size;
3350
int i;
3351
3352
leaf = path->nodes[0];
3353
3354
nritems = btrfs_header_nritems(leaf);
3355
data_end = leaf_data_end(root, leaf);
3356
3357
if (btrfs_leaf_free_space(root, leaf) < data_size) {
3358
btrfs_print_leaf(root, leaf);
3359
BUG();
3360
}
3361
slot = path->slots[0];
3362
old_data = btrfs_item_end_nr(leaf, slot);
3363
3364
BUG_ON(slot < 0);
3365
if (slot >= nritems) {
3366
btrfs_print_leaf(root, leaf);
3367
printk(KERN_CRIT "slot %d too large, nritems %d\n",
3368
slot, nritems);
3369
BUG_ON(1);
3370
}
3371
3372
/*
3373
* item0..itemN ... dataN.offset..dataN.size .. data0.size
3374
*/
3375
/* first correct the data pointers */
3376
for (i = slot; i < nritems; i++) {
3377
u32 ioff;
3378
item = btrfs_item_nr(leaf, i);
3379
3380
if (!leaf->map_token) {
3381
map_extent_buffer(leaf, (unsigned long)item,
3382
sizeof(struct btrfs_item),
3383
&leaf->map_token, &leaf->kaddr,
3384
&leaf->map_start, &leaf->map_len,
3385
KM_USER1);
3386
}
3387
ioff = btrfs_item_offset(leaf, item);
3388
btrfs_set_item_offset(leaf, item, ioff - data_size);
3389
}
3390
3391
if (leaf->map_token) {
3392
unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
3393
leaf->map_token = NULL;
3394
}
3395
3396
/* shift the data */
3397
memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3398
data_end - data_size, btrfs_leaf_data(leaf) +
3399
data_end, old_data - data_end);
3400
3401
data_end = old_data;
3402
old_size = btrfs_item_size_nr(leaf, slot);
3403
item = btrfs_item_nr(leaf, slot);
3404
btrfs_set_item_size(leaf, item, old_size + data_size);
3405
btrfs_mark_buffer_dirty(leaf);
3406
3407
if (btrfs_leaf_free_space(root, leaf) < 0) {
3408
btrfs_print_leaf(root, leaf);
3409
BUG();
3410
}
3411
return 0;
3412
}
3413
3414
/*
3415
* Given a key and some data, insert items into the tree.
3416
* This does all the path init required, making room in the tree if needed.
3417
* Returns the number of keys that were inserted.
3418
*/
3419
int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
3420
struct btrfs_root *root,
3421
struct btrfs_path *path,
3422
struct btrfs_key *cpu_key, u32 *data_size,
3423
int nr)
3424
{
3425
struct extent_buffer *leaf;
3426
struct btrfs_item *item;
3427
int ret = 0;
3428
int slot;
3429
int i;
3430
u32 nritems;
3431
u32 total_data = 0;
3432
u32 total_size = 0;
3433
unsigned int data_end;
3434
struct btrfs_disk_key disk_key;
3435
struct btrfs_key found_key;
3436
3437
for (i = 0; i < nr; i++) {
3438
if (total_size + data_size[i] + sizeof(struct btrfs_item) >
3439
BTRFS_LEAF_DATA_SIZE(root)) {
3440
break;
3441
nr = i;
3442
}
3443
total_data += data_size[i];
3444
total_size += data_size[i] + sizeof(struct btrfs_item);
3445
}
3446
BUG_ON(nr == 0);
3447
3448
ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
3449
if (ret == 0)
3450
return -EEXIST;
3451
if (ret < 0)
3452
goto out;
3453
3454
leaf = path->nodes[0];
3455
3456
nritems = btrfs_header_nritems(leaf);
3457
data_end = leaf_data_end(root, leaf);
3458
3459
if (btrfs_leaf_free_space(root, leaf) < total_size) {
3460
for (i = nr; i >= 0; i--) {
3461
total_data -= data_size[i];
3462
total_size -= data_size[i] + sizeof(struct btrfs_item);
3463
if (total_size < btrfs_leaf_free_space(root, leaf))
3464
break;
3465
}
3466
nr = i;
3467
}
3468
3469
slot = path->slots[0];
3470
BUG_ON(slot < 0);
3471
3472
if (slot != nritems) {
3473
unsigned int old_data = btrfs_item_end_nr(leaf, slot);
3474
3475
item = btrfs_item_nr(leaf, slot);
3476
btrfs_item_key_to_cpu(leaf, &found_key, slot);
3477
3478
/* figure out how many keys we can insert in here */
3479
total_data = data_size[0];
3480
for (i = 1; i < nr; i++) {
3481
if (btrfs_comp_cpu_keys(&found_key, cpu_key + i) <= 0)
3482
break;
3483
total_data += data_size[i];
3484
}
3485
nr = i;
3486
3487
if (old_data < data_end) {
3488
btrfs_print_leaf(root, leaf);
3489
printk(KERN_CRIT "slot %d old_data %d data_end %d\n",
3490
slot, old_data, data_end);
3491
BUG_ON(1);
3492
}
3493
/*
3494
* item0..itemN ... dataN.offset..dataN.size .. data0.size
3495
*/
3496
/* first correct the data pointers */
3497
WARN_ON(leaf->map_token);
3498
for (i = slot; i < nritems; i++) {
3499
u32 ioff;
3500
3501
item = btrfs_item_nr(leaf, i);
3502
if (!leaf->map_token) {
3503
map_extent_buffer(leaf, (unsigned long)item,
3504
sizeof(struct btrfs_item),
3505
&leaf->map_token, &leaf->kaddr,
3506
&leaf->map_start, &leaf->map_len,
3507
KM_USER1);
3508
}
3509
3510
ioff = btrfs_item_offset(leaf, item);
3511
btrfs_set_item_offset(leaf, item, ioff - total_data);
3512
}
3513
if (leaf->map_token) {
3514
unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
3515
leaf->map_token = NULL;
3516
}
3517
3518
/* shift the items */
3519
memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
3520
btrfs_item_nr_offset(slot),
3521
(nritems - slot) * sizeof(struct btrfs_item));
3522
3523
/* shift the data */
3524
memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3525
data_end - total_data, btrfs_leaf_data(leaf) +
3526
data_end, old_data - data_end);
3527
data_end = old_data;
3528
} else {
3529
/*
3530
* this sucks but it has to be done, if we are inserting at
3531
* the end of the leaf only insert 1 of the items, since we
3532
* have no way of knowing whats on the next leaf and we'd have
3533
* to drop our current locks to figure it out
3534
*/
3535
nr = 1;
3536
}
3537
3538
/* setup the item for the new data */
3539
for (i = 0; i < nr; i++) {
3540
btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
3541
btrfs_set_item_key(leaf, &disk_key, slot + i);
3542
item = btrfs_item_nr(leaf, slot + i);
3543
btrfs_set_item_offset(leaf, item, data_end - data_size[i]);
3544
data_end -= data_size[i];
3545
btrfs_set_item_size(leaf, item, data_size[i]);
3546
}
3547
btrfs_set_header_nritems(leaf, nritems + nr);
3548
btrfs_mark_buffer_dirty(leaf);
3549
3550
ret = 0;
3551
if (slot == 0) {
3552
btrfs_cpu_key_to_disk(&disk_key, cpu_key);
3553
ret = fixup_low_keys(trans, root, path, &disk_key, 1);
3554
}
3555
3556
if (btrfs_leaf_free_space(root, leaf) < 0) {
3557
btrfs_print_leaf(root, leaf);
3558
BUG();
3559
}
3560
out:
3561
if (!ret)
3562
ret = nr;
3563
return ret;
3564
}
3565
3566
/*
3567
* this is a helper for btrfs_insert_empty_items, the main goal here is
3568
* to save stack depth by doing the bulk of the work in a function
3569
* that doesn't call btrfs_search_slot
3570
*/
3571
int setup_items_for_insert(struct btrfs_trans_handle *trans,
3572
struct btrfs_root *root, struct btrfs_path *path,
3573
struct btrfs_key *cpu_key, u32 *data_size,
3574
u32 total_data, u32 total_size, int nr)
3575
{
3576
struct btrfs_item *item;
3577
int i;
3578
u32 nritems;
3579
unsigned int data_end;
3580
struct btrfs_disk_key disk_key;
3581
int ret;
3582
struct extent_buffer *leaf;
3583
int slot;
3584
3585
leaf = path->nodes[0];
3586
slot = path->slots[0];
3587
3588
nritems = btrfs_header_nritems(leaf);
3589
data_end = leaf_data_end(root, leaf);
3590
3591
if (btrfs_leaf_free_space(root, leaf) < total_size) {
3592
btrfs_print_leaf(root, leaf);
3593
printk(KERN_CRIT "not enough freespace need %u have %d\n",
3594
total_size, btrfs_leaf_free_space(root, leaf));
3595
BUG();
3596
}
3597
3598
if (slot != nritems) {
3599
unsigned int old_data = btrfs_item_end_nr(leaf, slot);
3600
3601
if (old_data < data_end) {
3602
btrfs_print_leaf(root, leaf);
3603
printk(KERN_CRIT "slot %d old_data %d data_end %d\n",
3604
slot, old_data, data_end);
3605
BUG_ON(1);
3606
}
3607
/*
3608
* item0..itemN ... dataN.offset..dataN.size .. data0.size
3609
*/
3610
/* first correct the data pointers */
3611
WARN_ON(leaf->map_token);
3612
for (i = slot; i < nritems; i++) {
3613
u32 ioff;
3614
3615
item = btrfs_item_nr(leaf, i);
3616
if (!leaf->map_token) {
3617
map_extent_buffer(leaf, (unsigned long)item,
3618
sizeof(struct btrfs_item),
3619
&leaf->map_token, &leaf->kaddr,
3620
&leaf->map_start, &leaf->map_len,
3621
KM_USER1);
3622
}
3623
3624
ioff = btrfs_item_offset(leaf, item);
3625
btrfs_set_item_offset(leaf, item, ioff - total_data);
3626
}
3627
if (leaf->map_token) {
3628
unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
3629
leaf->map_token = NULL;
3630
}
3631
3632
/* shift the items */
3633
memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
3634
btrfs_item_nr_offset(slot),
3635
(nritems - slot) * sizeof(struct btrfs_item));
3636
3637
/* shift the data */
3638
memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3639
data_end - total_data, btrfs_leaf_data(leaf) +
3640
data_end, old_data - data_end);
3641
data_end = old_data;
3642
}
3643
3644
/* setup the item for the new data */
3645
for (i = 0; i < nr; i++) {
3646
btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
3647
btrfs_set_item_key(leaf, &disk_key, slot + i);
3648
item = btrfs_item_nr(leaf, slot + i);
3649
btrfs_set_item_offset(leaf, item, data_end - data_size[i]);
3650
data_end -= data_size[i];
3651
btrfs_set_item_size(leaf, item, data_size[i]);
3652
}
3653
3654
btrfs_set_header_nritems(leaf, nritems + nr);
3655
3656
ret = 0;
3657
if (slot == 0) {
3658
btrfs_cpu_key_to_disk(&disk_key, cpu_key);
3659
ret = fixup_low_keys(trans, root, path, &disk_key, 1);
3660
}
3661
btrfs_unlock_up_safe(path, 1);
3662
btrfs_mark_buffer_dirty(leaf);
3663
3664
if (btrfs_leaf_free_space(root, leaf) < 0) {
3665
btrfs_print_leaf(root, leaf);
3666
BUG();
3667
}
3668
return ret;
3669
}
3670
3671
/*
3672
* Given a key and some data, insert items into the tree.
3673
* This does all the path init required, making room in the tree if needed.
3674
*/
3675
int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
3676
struct btrfs_root *root,
3677
struct btrfs_path *path,
3678
struct btrfs_key *cpu_key, u32 *data_size,
3679
int nr)
3680
{
3681
int ret = 0;
3682
int slot;
3683
int i;
3684
u32 total_size = 0;
3685
u32 total_data = 0;
3686
3687
for (i = 0; i < nr; i++)
3688
total_data += data_size[i];
3689
3690
total_size = total_data + (nr * sizeof(struct btrfs_item));
3691
ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
3692
if (ret == 0)
3693
return -EEXIST;
3694
if (ret < 0)
3695
goto out;
3696
3697
slot = path->slots[0];
3698
BUG_ON(slot < 0);
3699
3700
ret = setup_items_for_insert(trans, root, path, cpu_key, data_size,
3701
total_data, total_size, nr);
3702
3703
out:
3704
return ret;
3705
}
3706
3707
/*
3708
* Given a key and some data, insert an item into the tree.
3709
* This does all the path init required, making room in the tree if needed.
3710
*/
3711
int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
3712
*root, struct btrfs_key *cpu_key, void *data, u32
3713
data_size)
3714
{
3715
int ret = 0;
3716
struct btrfs_path *path;
3717
struct extent_buffer *leaf;
3718
unsigned long ptr;
3719
3720
path = btrfs_alloc_path();
3721
if (!path)
3722
return -ENOMEM;
3723
ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
3724
if (!ret) {
3725
leaf = path->nodes[0];
3726
ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
3727
write_extent_buffer(leaf, data, ptr, data_size);
3728
btrfs_mark_buffer_dirty(leaf);
3729
}
3730
btrfs_free_path(path);
3731
return ret;
3732
}
3733
3734
/*
3735
* delete the pointer from a given node.
3736
*
3737
* the tree should have been previously balanced so the deletion does not
3738
* empty a node.
3739
*/
3740
static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3741
struct btrfs_path *path, int level, int slot)
3742
{
3743
struct extent_buffer *parent = path->nodes[level];
3744
u32 nritems;
3745
int ret = 0;
3746
int wret;
3747
3748
nritems = btrfs_header_nritems(parent);
3749
if (slot != nritems - 1) {
3750
memmove_extent_buffer(parent,
3751
btrfs_node_key_ptr_offset(slot),
3752
btrfs_node_key_ptr_offset(slot + 1),
3753
sizeof(struct btrfs_key_ptr) *
3754
(nritems - slot - 1));
3755
}
3756
nritems--;
3757
btrfs_set_header_nritems(parent, nritems);
3758
if (nritems == 0 && parent == root->node) {
3759
BUG_ON(btrfs_header_level(root->node) != 1);
3760
/* just turn the root into a leaf and break */
3761
btrfs_set_header_level(root->node, 0);
3762
} else if (slot == 0) {
3763
struct btrfs_disk_key disk_key;
3764
3765
btrfs_node_key(parent, &disk_key, 0);
3766
wret = fixup_low_keys(trans, root, path, &disk_key, level + 1);
3767
if (wret)
3768
ret = wret;
3769
}
3770
btrfs_mark_buffer_dirty(parent);
3771
return ret;
3772
}
3773
3774
/*
3775
* a helper function to delete the leaf pointed to by path->slots[1] and
3776
* path->nodes[1].
3777
*
3778
* This deletes the pointer in path->nodes[1] and frees the leaf
3779
* block extent. zero is returned if it all worked out, < 0 otherwise.
3780
*
3781
* The path must have already been setup for deleting the leaf, including
3782
* all the proper balancing. path->nodes[1] must be locked.
3783
*/
3784
static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
3785
struct btrfs_root *root,
3786
struct btrfs_path *path,
3787
struct extent_buffer *leaf)
3788
{
3789
int ret;
3790
3791
WARN_ON(btrfs_header_generation(leaf) != trans->transid);
3792
ret = del_ptr(trans, root, path, 1, path->slots[1]);
3793
if (ret)
3794
return ret;
3795
3796
/*
3797
* btrfs_free_extent is expensive, we want to make sure we
3798
* aren't holding any locks when we call it
3799
*/
3800
btrfs_unlock_up_safe(path, 0);
3801
3802
root_sub_used(root, leaf->len);
3803
3804
btrfs_free_tree_block(trans, root, leaf, 0, 1);
3805
return 0;
3806
}
3807
/*
3808
* delete the item at the leaf level in path. If that empties
3809
* the leaf, remove it from the tree
3810
*/
3811
int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3812
struct btrfs_path *path, int slot, int nr)
3813
{
3814
struct extent_buffer *leaf;
3815
struct btrfs_item *item;
3816
int last_off;
3817
int dsize = 0;
3818
int ret = 0;
3819
int wret;
3820
int i;
3821
u32 nritems;
3822
3823
leaf = path->nodes[0];
3824
last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);
3825
3826
for (i = 0; i < nr; i++)
3827
dsize += btrfs_item_size_nr(leaf, slot + i);
3828
3829
nritems = btrfs_header_nritems(leaf);
3830
3831
if (slot + nr != nritems) {
3832
int data_end = leaf_data_end(root, leaf);
3833
3834
memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3835
data_end + dsize,
3836
btrfs_leaf_data(leaf) + data_end,
3837
last_off - data_end);
3838
3839
for (i = slot + nr; i < nritems; i++) {
3840
u32 ioff;
3841
3842
item = btrfs_item_nr(leaf, i);
3843
if (!leaf->map_token) {
3844
map_extent_buffer(leaf, (unsigned long)item,
3845
sizeof(struct btrfs_item),
3846
&leaf->map_token, &leaf->kaddr,
3847
&leaf->map_start, &leaf->map_len,
3848
KM_USER1);
3849
}
3850
ioff = btrfs_item_offset(leaf, item);
3851
btrfs_set_item_offset(leaf, item, ioff + dsize);
3852
}
3853
3854
if (leaf->map_token) {
3855
unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
3856
leaf->map_token = NULL;
3857
}
3858
3859
memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
3860
btrfs_item_nr_offset(slot + nr),
3861
sizeof(struct btrfs_item) *
3862
(nritems - slot - nr));
3863
}
3864
btrfs_set_header_nritems(leaf, nritems - nr);
3865
nritems -= nr;
3866
3867
/* delete the leaf if we've emptied it */
3868
if (nritems == 0) {
3869
if (leaf == root->node) {
3870
btrfs_set_header_level(leaf, 0);
3871
} else {
3872
btrfs_set_path_blocking(path);
3873
clean_tree_block(trans, root, leaf);
3874
ret = btrfs_del_leaf(trans, root, path, leaf);
3875
BUG_ON(ret);
3876
}
3877
} else {
3878
int used = leaf_space_used(leaf, 0, nritems);
3879
if (slot == 0) {
3880
struct btrfs_disk_key disk_key;
3881
3882
btrfs_item_key(leaf, &disk_key, 0);
3883
wret = fixup_low_keys(trans, root, path,
3884
&disk_key, 1);
3885
if (wret)
3886
ret = wret;
3887
}
3888
3889
/* delete the leaf if it is mostly empty */
3890
if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) {
3891
/* push_leaf_left fixes the path.
3892
* make sure the path still points to our leaf
3893
* for possible call to del_ptr below
3894
*/
3895
slot = path->slots[1];
3896
extent_buffer_get(leaf);
3897
3898
btrfs_set_path_blocking(path);
3899
wret = push_leaf_left(trans, root, path, 1, 1,
3900
1, (u32)-1);
3901
if (wret < 0 && wret != -ENOSPC)
3902
ret = wret;
3903
3904
if (path->nodes[0] == leaf &&
3905
btrfs_header_nritems(leaf)) {
3906
wret = push_leaf_right(trans, root, path, 1,
3907
1, 1, 0);
3908
if (wret < 0 && wret != -ENOSPC)
3909
ret = wret;
3910
}
3911
3912
if (btrfs_header_nritems(leaf) == 0) {
3913
path->slots[1] = slot;
3914
ret = btrfs_del_leaf(trans, root, path, leaf);
3915
BUG_ON(ret);
3916
free_extent_buffer(leaf);
3917
} else {
3918
/* if we're still in the path, make sure
3919
* we're dirty. Otherwise, one of the
3920
* push_leaf functions must have already
3921
* dirtied this buffer
3922
*/
3923
if (path->nodes[0] == leaf)
3924
btrfs_mark_buffer_dirty(leaf);
3925
free_extent_buffer(leaf);
3926
}
3927
} else {
3928
btrfs_mark_buffer_dirty(leaf);
3929
}
3930
}
3931
return ret;
3932
}
3933
3934
/*
3935
* search the tree again to find a leaf with lesser keys
3936
* returns 0 if it found something or 1 if there are no lesser leaves.
3937
* returns < 0 on io errors.
3938
*
3939
* This may release the path, and so you may lose any locks held at the
3940
* time you call it.
3941
*/
3942
int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
3943
{
3944
struct btrfs_key key;
3945
struct btrfs_disk_key found_key;
3946
int ret;
3947
3948
btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
3949
3950
if (key.offset > 0)
3951
key.offset--;
3952
else if (key.type > 0)
3953
key.type--;
3954
else if (key.objectid > 0)
3955
key.objectid--;
3956
else
3957
return 1;
3958
3959
btrfs_release_path(path);
3960
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3961
if (ret < 0)
3962
return ret;
3963
btrfs_item_key(path->nodes[0], &found_key, 0);
3964
ret = comp_keys(&found_key, &key);
3965
if (ret < 0)
3966
return 0;
3967
return 1;
3968
}
3969
3970
/*
3971
* A helper function to walk down the tree starting at min_key, and looking
3972
* for nodes or leaves that are either in cache or have a minimum
3973
* transaction id. This is used by the btree defrag code, and tree logging
3974
*
3975
* This does not cow, but it does stuff the starting key it finds back
3976
* into min_key, so you can call btrfs_search_slot with cow=1 on the
3977
* key and get a writable path.
3978
*
3979
* This does lock as it descends, and path->keep_locks should be set
3980
* to 1 by the caller.
3981
*
3982
* This honors path->lowest_level to prevent descent past a given level
3983
* of the tree.
3984
*
3985
* min_trans indicates the oldest transaction that you are interested
3986
* in walking through. Any nodes or leaves older than min_trans are
3987
* skipped over (without reading them).
3988
*
3989
* returns zero if something useful was found, < 0 on error and 1 if there
3990
* was nothing in the tree that matched the search criteria.
3991
*/
3992
int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
3993
struct btrfs_key *max_key,
3994
struct btrfs_path *path, int cache_only,
3995
u64 min_trans)
3996
{
3997
struct extent_buffer *cur;
3998
struct btrfs_key found_key;
3999
int slot;
4000
int sret;
4001
u32 nritems;
4002
int level;
4003
int ret = 1;
4004
4005
WARN_ON(!path->keep_locks);
4006
again:
4007
cur = btrfs_lock_root_node(root);
4008
level = btrfs_header_level(cur);
4009
WARN_ON(path->nodes[level]);
4010
path->nodes[level] = cur;
4011
path->locks[level] = 1;
4012
4013
if (btrfs_header_generation(cur) < min_trans) {
4014
ret = 1;
4015
goto out;
4016
}
4017
while (1) {
4018
nritems = btrfs_header_nritems(cur);
4019
level = btrfs_header_level(cur);
4020
sret = bin_search(cur, min_key, level, &slot);
4021
4022
/* at the lowest level, we're done, setup the path and exit */
4023
if (level == path->lowest_level) {
4024
if (slot >= nritems)
4025
goto find_next_key;
4026
ret = 0;
4027
path->slots[level] = slot;
4028
btrfs_item_key_to_cpu(cur, &found_key, slot);
4029
goto out;
4030
}
4031
if (sret && slot > 0)
4032
slot--;
4033
/*
4034
* check this node pointer against the cache_only and
4035
* min_trans parameters. If it isn't in cache or is too
4036
* old, skip to the next one.
4037
*/
4038
while (slot < nritems) {
4039
u64 blockptr;
4040
u64 gen;
4041
struct extent_buffer *tmp;
4042
struct btrfs_disk_key disk_key;
4043
4044
blockptr = btrfs_node_blockptr(cur, slot);
4045
gen = btrfs_node_ptr_generation(cur, slot);
4046
if (gen < min_trans) {
4047
slot++;
4048
continue;
4049
}
4050
if (!cache_only)
4051
break;
4052
4053
if (max_key) {
4054
btrfs_node_key(cur, &disk_key, slot);
4055
if (comp_keys(&disk_key, max_key) >= 0) {
4056
ret = 1;
4057
goto out;
4058
}
4059
}
4060
4061
tmp = btrfs_find_tree_block(root, blockptr,
4062
btrfs_level_size(root, level - 1));
4063
4064
if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
4065
free_extent_buffer(tmp);
4066
break;
4067
}
4068
if (tmp)
4069
free_extent_buffer(tmp);
4070
slot++;
4071
}
4072
find_next_key:
4073
/*
4074
* we didn't find a candidate key in this node, walk forward
4075
* and find another one
4076
*/
4077
if (slot >= nritems) {
4078
path->slots[level] = slot;
4079
btrfs_set_path_blocking(path);
4080
sret = btrfs_find_next_key(root, path, min_key, level,
4081
cache_only, min_trans);
4082
if (sret == 0) {
4083
btrfs_release_path(path);
4084
goto again;
4085
} else {
4086
goto out;
4087
}
4088
}
4089
/* save our key for returning back */
4090
btrfs_node_key_to_cpu(cur, &found_key, slot);
4091
path->slots[level] = slot;
4092
if (level == path->lowest_level) {
4093
ret = 0;
4094
unlock_up(path, level, 1);
4095
goto out;
4096
}
4097
btrfs_set_path_blocking(path);
4098
cur = read_node_slot(root, cur, slot);
4099
BUG_ON(!cur);
4100
4101
btrfs_tree_lock(cur);
4102
4103
path->locks[level - 1] = 1;
4104
path->nodes[level - 1] = cur;
4105
unlock_up(path, level, 1);
4106
btrfs_clear_path_blocking(path, NULL);
4107
}
4108
out:
4109
if (ret == 0)
4110
memcpy(min_key, &found_key, sizeof(found_key));
4111
btrfs_set_path_blocking(path);
4112
return ret;
4113
}
4114
4115
/*
4116
* this is similar to btrfs_next_leaf, but does not try to preserve
4117
* and fixup the path. It looks for and returns the next key in the
4118
* tree based on the current path and the cache_only and min_trans
4119
* parameters.
4120
*
4121
* 0 is returned if another key is found, < 0 if there are any errors
4122
* and 1 is returned if there are no higher keys in the tree
4123
*
4124
* path->keep_locks should be set to 1 on the search made before
4125
* calling this function.
4126
*/
4127
int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
4128
struct btrfs_key *key, int level,
4129
int cache_only, u64 min_trans)
4130
{
4131
int slot;
4132
struct extent_buffer *c;
4133
4134
WARN_ON(!path->keep_locks);
4135
while (level < BTRFS_MAX_LEVEL) {
4136
if (!path->nodes[level])
4137
return 1;
4138
4139
slot = path->slots[level] + 1;
4140
c = path->nodes[level];
4141
next:
4142
if (slot >= btrfs_header_nritems(c)) {
4143
int ret;
4144
int orig_lowest;
4145
struct btrfs_key cur_key;
4146
if (level + 1 >= BTRFS_MAX_LEVEL ||
4147
!path->nodes[level + 1])
4148
return 1;
4149
4150
if (path->locks[level + 1]) {
4151
level++;
4152
continue;
4153
}
4154
4155
slot = btrfs_header_nritems(c) - 1;
4156
if (level == 0)
4157
btrfs_item_key_to_cpu(c, &cur_key, slot);
4158
else
4159
btrfs_node_key_to_cpu(c, &cur_key, slot);
4160
4161
orig_lowest = path->lowest_level;
4162
btrfs_release_path(path);
4163
path->lowest_level = level;
4164
ret = btrfs_search_slot(NULL, root, &cur_key, path,
4165
0, 0);
4166
path->lowest_level = orig_lowest;
4167
if (ret < 0)
4168
return ret;
4169
4170
c = path->nodes[level];
4171
slot = path->slots[level];
4172
if (ret == 0)
4173
slot++;
4174
goto next;
4175
}
4176
4177
if (level == 0)
4178
btrfs_item_key_to_cpu(c, key, slot);
4179
else {
4180
u64 blockptr = btrfs_node_blockptr(c, slot);
4181
u64 gen = btrfs_node_ptr_generation(c, slot);
4182
4183
if (cache_only) {
4184
struct extent_buffer *cur;
4185
cur = btrfs_find_tree_block(root, blockptr,
4186
btrfs_level_size(root, level - 1));
4187
if (!cur || !btrfs_buffer_uptodate(cur, gen)) {
4188
slot++;
4189
if (cur)
4190
free_extent_buffer(cur);
4191
goto next;
4192
}
4193
free_extent_buffer(cur);
4194
}
4195
if (gen < min_trans) {
4196
slot++;
4197
goto next;
4198
}
4199
btrfs_node_key_to_cpu(c, key, slot);
4200
}
4201
return 0;
4202
}
4203
return 1;
4204
}
4205
4206
/*
4207
* search the tree again to find a leaf with greater keys
4208
* returns 0 if it found something or 1 if there are no greater leaves.
4209
* returns < 0 on io errors.
4210
*/
4211
int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
4212
{
4213
int slot;
4214
int level;
4215
struct extent_buffer *c;
4216
struct extent_buffer *next;
4217
struct btrfs_key key;
4218
u32 nritems;
4219
int ret;
4220
int old_spinning = path->leave_spinning;
4221
int force_blocking = 0;
4222
4223
nritems = btrfs_header_nritems(path->nodes[0]);
4224
if (nritems == 0)
4225
return 1;
4226
4227
/*
4228
* we take the blocks in an order that upsets lockdep. Using
4229
* blocking mode is the only way around it.
4230
*/
4231
#ifdef CONFIG_DEBUG_LOCK_ALLOC
4232
force_blocking = 1;
4233
#endif
4234
4235
btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
4236
again:
4237
level = 1;
4238
next = NULL;
4239
btrfs_release_path(path);
4240
4241
path->keep_locks = 1;
4242
4243
if (!force_blocking)
4244
path->leave_spinning = 1;
4245
4246
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4247
path->keep_locks = 0;
4248
4249
if (ret < 0)
4250
return ret;
4251
4252
nritems = btrfs_header_nritems(path->nodes[0]);
4253
/*
4254
* by releasing the path above we dropped all our locks. A balance
4255
* could have added more items next to the key that used to be
4256
* at the very end of the block. So, check again here and
4257
* advance the path if there are now more items available.
4258
*/
4259
if (nritems > 0 && path->slots[0] < nritems - 1) {
4260
if (ret == 0)
4261
path->slots[0]++;
4262
ret = 0;
4263
goto done;
4264
}
4265
4266
while (level < BTRFS_MAX_LEVEL) {
4267
if (!path->nodes[level]) {
4268
ret = 1;
4269
goto done;
4270
}
4271
4272
slot = path->slots[level] + 1;
4273
c = path->nodes[level];
4274
if (slot >= btrfs_header_nritems(c)) {
4275
level++;
4276
if (level == BTRFS_MAX_LEVEL) {
4277
ret = 1;
4278
goto done;
4279
}
4280
continue;
4281
}
4282
4283
if (next) {
4284
btrfs_tree_unlock(next);
4285
free_extent_buffer(next);
4286
}
4287
4288
next = c;
4289
ret = read_block_for_search(NULL, root, path, &next, level,
4290
slot, &key);
4291
if (ret == -EAGAIN)
4292
goto again;
4293
4294
if (ret < 0) {
4295
btrfs_release_path(path);
4296
goto done;
4297
}
4298
4299
if (!path->skip_locking) {
4300
ret = btrfs_try_spin_lock(next);
4301
if (!ret) {
4302
btrfs_set_path_blocking(path);
4303
btrfs_tree_lock(next);
4304
if (!force_blocking)
4305
btrfs_clear_path_blocking(path, next);
4306
}
4307
if (force_blocking)
4308
btrfs_set_lock_blocking(next);
4309
}
4310
break;
4311
}
4312
path->slots[level] = slot;
4313
while (1) {
4314
level--;
4315
c = path->nodes[level];
4316
if (path->locks[level])
4317
btrfs_tree_unlock(c);
4318
4319
free_extent_buffer(c);
4320
path->nodes[level] = next;
4321
path->slots[level] = 0;
4322
if (!path->skip_locking)
4323
path->locks[level] = 1;
4324
4325
if (!level)
4326
break;
4327
4328
ret = read_block_for_search(NULL, root, path, &next, level,
4329
0, &key);
4330
if (ret == -EAGAIN)
4331
goto again;
4332
4333
if (ret < 0) {
4334
btrfs_release_path(path);
4335
goto done;
4336
}
4337
4338
if (!path->skip_locking) {
4339
btrfs_assert_tree_locked(path->nodes[level]);
4340
ret = btrfs_try_spin_lock(next);
4341
if (!ret) {
4342
btrfs_set_path_blocking(path);
4343
btrfs_tree_lock(next);
4344
if (!force_blocking)
4345
btrfs_clear_path_blocking(path, next);
4346
}
4347
if (force_blocking)
4348
btrfs_set_lock_blocking(next);
4349
}
4350
}
4351
ret = 0;
4352
done:
4353
unlock_up(path, 0, 1);
4354
path->leave_spinning = old_spinning;
4355
if (!old_spinning)
4356
btrfs_set_path_blocking(path);
4357
4358
return ret;
4359
}
4360
4361
/*
4362
* this uses btrfs_prev_leaf to walk backwards in the tree, and keeps
4363
* searching until it gets past min_objectid or finds an item of 'type'
4364
*
4365
* returns 0 if something is found, 1 if nothing was found and < 0 on error
4366
*/
4367
int btrfs_previous_item(struct btrfs_root *root,
4368
struct btrfs_path *path, u64 min_objectid,
4369
int type)
4370
{
4371
struct btrfs_key found_key;
4372
struct extent_buffer *leaf;
4373
u32 nritems;
4374
int ret;
4375
4376
while (1) {
4377
if (path->slots[0] == 0) {
4378
btrfs_set_path_blocking(path);
4379
ret = btrfs_prev_leaf(root, path);
4380
if (ret != 0)
4381
return ret;
4382
} else {
4383
path->slots[0]--;
4384
}
4385
leaf = path->nodes[0];
4386
nritems = btrfs_header_nritems(leaf);
4387
if (nritems == 0)
4388
return 1;
4389
if (path->slots[0] == nritems)
4390
path->slots[0]--;
4391
4392
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
4393
if (found_key.objectid < min_objectid)
4394
break;
4395
if (found_key.type == type)
4396
return 0;
4397
if (found_key.objectid == min_objectid &&
4398
found_key.type < type)
4399
break;
4400
}
4401
return 1;
4402
}
4403
4404