Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/fs/btrfs/inode-map.c
15109 views
1
/*
2
* Copyright (C) 2007 Oracle. All rights reserved.
3
*
4
* This program is free software; you can redistribute it and/or
5
* modify it under the terms of the GNU General Public
6
* License v2 as published by the Free Software Foundation.
7
*
8
* This program is distributed in the hope that it will be useful,
9
* but WITHOUT ANY WARRANTY; without even the implied warranty of
10
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
* General Public License for more details.
12
*
13
* You should have received a copy of the GNU General Public
14
* License along with this program; if not, write to the
15
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16
* Boston, MA 021110-1307, USA.
17
*/
18
19
#include <linux/delay.h>
20
#include <linux/kthread.h>
21
#include <linux/pagemap.h>
22
23
#include "ctree.h"
24
#include "disk-io.h"
25
#include "free-space-cache.h"
26
#include "inode-map.h"
27
#include "transaction.h"
28
29
static int caching_kthread(void *data)
30
{
31
struct btrfs_root *root = data;
32
struct btrfs_fs_info *fs_info = root->fs_info;
33
struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
34
struct btrfs_key key;
35
struct btrfs_path *path;
36
struct extent_buffer *leaf;
37
u64 last = (u64)-1;
38
int slot;
39
int ret;
40
41
if (!btrfs_test_opt(root, INODE_MAP_CACHE))
42
return 0;
43
44
path = btrfs_alloc_path();
45
if (!path)
46
return -ENOMEM;
47
48
/* Since the commit root is read-only, we can safely skip locking. */
49
path->skip_locking = 1;
50
path->search_commit_root = 1;
51
path->reada = 2;
52
53
key.objectid = BTRFS_FIRST_FREE_OBJECTID;
54
key.offset = 0;
55
key.type = BTRFS_INODE_ITEM_KEY;
56
again:
57
/* need to make sure the commit_root doesn't disappear */
58
mutex_lock(&root->fs_commit_mutex);
59
60
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
61
if (ret < 0)
62
goto out;
63
64
while (1) {
65
if (btrfs_fs_closing(fs_info))
66
goto out;
67
68
leaf = path->nodes[0];
69
slot = path->slots[0];
70
if (slot >= btrfs_header_nritems(leaf)) {
71
ret = btrfs_next_leaf(root, path);
72
if (ret < 0)
73
goto out;
74
else if (ret > 0)
75
break;
76
77
if (need_resched() ||
78
btrfs_transaction_in_commit(fs_info)) {
79
leaf = path->nodes[0];
80
81
if (btrfs_header_nritems(leaf) == 0) {
82
WARN_ON(1);
83
break;
84
}
85
86
/*
87
* Save the key so we can advances forward
88
* in the next search.
89
*/
90
btrfs_item_key_to_cpu(leaf, &key, 0);
91
btrfs_release_path(path);
92
root->cache_progress = last;
93
mutex_unlock(&root->fs_commit_mutex);
94
schedule_timeout(1);
95
goto again;
96
} else
97
continue;
98
}
99
100
btrfs_item_key_to_cpu(leaf, &key, slot);
101
102
if (key.type != BTRFS_INODE_ITEM_KEY)
103
goto next;
104
105
if (key.objectid >= root->highest_objectid)
106
break;
107
108
if (last != (u64)-1 && last + 1 != key.objectid) {
109
__btrfs_add_free_space(ctl, last + 1,
110
key.objectid - last - 1);
111
wake_up(&root->cache_wait);
112
}
113
114
last = key.objectid;
115
next:
116
path->slots[0]++;
117
}
118
119
if (last < root->highest_objectid - 1) {
120
__btrfs_add_free_space(ctl, last + 1,
121
root->highest_objectid - last - 1);
122
}
123
124
spin_lock(&root->cache_lock);
125
root->cached = BTRFS_CACHE_FINISHED;
126
spin_unlock(&root->cache_lock);
127
128
root->cache_progress = (u64)-1;
129
btrfs_unpin_free_ino(root);
130
out:
131
wake_up(&root->cache_wait);
132
mutex_unlock(&root->fs_commit_mutex);
133
134
btrfs_free_path(path);
135
136
return ret;
137
}
138
139
static void start_caching(struct btrfs_root *root)
140
{
141
struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
142
struct task_struct *tsk;
143
int ret;
144
u64 objectid;
145
146
if (!btrfs_test_opt(root, INODE_MAP_CACHE))
147
return;
148
149
spin_lock(&root->cache_lock);
150
if (root->cached != BTRFS_CACHE_NO) {
151
spin_unlock(&root->cache_lock);
152
return;
153
}
154
155
root->cached = BTRFS_CACHE_STARTED;
156
spin_unlock(&root->cache_lock);
157
158
ret = load_free_ino_cache(root->fs_info, root);
159
if (ret == 1) {
160
spin_lock(&root->cache_lock);
161
root->cached = BTRFS_CACHE_FINISHED;
162
spin_unlock(&root->cache_lock);
163
return;
164
}
165
166
/*
167
* It can be quite time-consuming to fill the cache by searching
168
* through the extent tree, and this can keep ino allocation path
169
* waiting. Therefore at start we quickly find out the highest
170
* inode number and we know we can use inode numbers which fall in
171
* [highest_ino + 1, BTRFS_LAST_FREE_OBJECTID].
172
*/
173
ret = btrfs_find_free_objectid(root, &objectid);
174
if (!ret && objectid <= BTRFS_LAST_FREE_OBJECTID) {
175
__btrfs_add_free_space(ctl, objectid,
176
BTRFS_LAST_FREE_OBJECTID - objectid + 1);
177
}
178
179
tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu\n",
180
root->root_key.objectid);
181
BUG_ON(IS_ERR(tsk));
182
}
183
184
int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid)
185
{
186
if (!btrfs_test_opt(root, INODE_MAP_CACHE))
187
return btrfs_find_free_objectid(root, objectid);
188
189
again:
190
*objectid = btrfs_find_ino_for_alloc(root);
191
192
if (*objectid != 0)
193
return 0;
194
195
start_caching(root);
196
197
wait_event(root->cache_wait,
198
root->cached == BTRFS_CACHE_FINISHED ||
199
root->free_ino_ctl->free_space > 0);
200
201
if (root->cached == BTRFS_CACHE_FINISHED &&
202
root->free_ino_ctl->free_space == 0)
203
return -ENOSPC;
204
else
205
goto again;
206
}
207
208
void btrfs_return_ino(struct btrfs_root *root, u64 objectid)
209
{
210
struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
211
struct btrfs_free_space_ctl *pinned = root->free_ino_pinned;
212
213
if (!btrfs_test_opt(root, INODE_MAP_CACHE))
214
return;
215
216
again:
217
if (root->cached == BTRFS_CACHE_FINISHED) {
218
__btrfs_add_free_space(ctl, objectid, 1);
219
} else {
220
/*
221
* If we are in the process of caching free ino chunks,
222
* to avoid adding the same inode number to the free_ino
223
* tree twice due to cross transaction, we'll leave it
224
* in the pinned tree until a transaction is committed
225
* or the caching work is done.
226
*/
227
228
mutex_lock(&root->fs_commit_mutex);
229
spin_lock(&root->cache_lock);
230
if (root->cached == BTRFS_CACHE_FINISHED) {
231
spin_unlock(&root->cache_lock);
232
mutex_unlock(&root->fs_commit_mutex);
233
goto again;
234
}
235
spin_unlock(&root->cache_lock);
236
237
start_caching(root);
238
239
if (objectid <= root->cache_progress ||
240
objectid > root->highest_objectid)
241
__btrfs_add_free_space(ctl, objectid, 1);
242
else
243
__btrfs_add_free_space(pinned, objectid, 1);
244
245
mutex_unlock(&root->fs_commit_mutex);
246
}
247
}
248
249
/*
250
* When a transaction is committed, we'll move those inode numbers which
251
* are smaller than root->cache_progress from pinned tree to free_ino tree,
252
* and others will just be dropped, because the commit root we were
253
* searching has changed.
254
*
255
* Must be called with root->fs_commit_mutex held
256
*/
257
void btrfs_unpin_free_ino(struct btrfs_root *root)
258
{
259
struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
260
struct rb_root *rbroot = &root->free_ino_pinned->free_space_offset;
261
struct btrfs_free_space *info;
262
struct rb_node *n;
263
u64 count;
264
265
if (!btrfs_test_opt(root, INODE_MAP_CACHE))
266
return;
267
268
while (1) {
269
n = rb_first(rbroot);
270
if (!n)
271
break;
272
273
info = rb_entry(n, struct btrfs_free_space, offset_index);
274
BUG_ON(info->bitmap);
275
276
if (info->offset > root->cache_progress)
277
goto free;
278
else if (info->offset + info->bytes > root->cache_progress)
279
count = root->cache_progress - info->offset + 1;
280
else
281
count = info->bytes;
282
283
__btrfs_add_free_space(ctl, info->offset, count);
284
free:
285
rb_erase(&info->offset_index, rbroot);
286
kfree(info);
287
}
288
}
289
290
#define INIT_THRESHOLD (((1024 * 32) / 2) / sizeof(struct btrfs_free_space))
291
#define INODES_PER_BITMAP (PAGE_CACHE_SIZE * 8)
292
293
/*
294
* The goal is to keep the memory used by the free_ino tree won't
295
* exceed the memory if we use bitmaps only.
296
*/
297
static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
298
{
299
struct btrfs_free_space *info;
300
struct rb_node *n;
301
int max_ino;
302
int max_bitmaps;
303
304
n = rb_last(&ctl->free_space_offset);
305
if (!n) {
306
ctl->extents_thresh = INIT_THRESHOLD;
307
return;
308
}
309
info = rb_entry(n, struct btrfs_free_space, offset_index);
310
311
/*
312
* Find the maximum inode number in the filesystem. Note we
313
* ignore the fact that this can be a bitmap, because we are
314
* not doing precise calculation.
315
*/
316
max_ino = info->bytes - 1;
317
318
max_bitmaps = ALIGN(max_ino, INODES_PER_BITMAP) / INODES_PER_BITMAP;
319
if (max_bitmaps <= ctl->total_bitmaps) {
320
ctl->extents_thresh = 0;
321
return;
322
}
323
324
ctl->extents_thresh = (max_bitmaps - ctl->total_bitmaps) *
325
PAGE_CACHE_SIZE / sizeof(*info);
326
}
327
328
/*
329
* We don't fall back to bitmap, if we are below the extents threshold
330
* or this chunk of inode numbers is a big one.
331
*/
332
static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
333
struct btrfs_free_space *info)
334
{
335
if (ctl->free_extents < ctl->extents_thresh ||
336
info->bytes > INODES_PER_BITMAP / 10)
337
return false;
338
339
return true;
340
}
341
342
static struct btrfs_free_space_op free_ino_op = {
343
.recalc_thresholds = recalculate_thresholds,
344
.use_bitmap = use_bitmap,
345
};
346
347
static void pinned_recalc_thresholds(struct btrfs_free_space_ctl *ctl)
348
{
349
}
350
351
static bool pinned_use_bitmap(struct btrfs_free_space_ctl *ctl,
352
struct btrfs_free_space *info)
353
{
354
/*
355
* We always use extents for two reasons:
356
*
357
* - The pinned tree is only used during the process of caching
358
* work.
359
* - Make code simpler. See btrfs_unpin_free_ino().
360
*/
361
return false;
362
}
363
364
static struct btrfs_free_space_op pinned_free_ino_op = {
365
.recalc_thresholds = pinned_recalc_thresholds,
366
.use_bitmap = pinned_use_bitmap,
367
};
368
369
void btrfs_init_free_ino_ctl(struct btrfs_root *root)
370
{
371
struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
372
struct btrfs_free_space_ctl *pinned = root->free_ino_pinned;
373
374
spin_lock_init(&ctl->tree_lock);
375
ctl->unit = 1;
376
ctl->start = 0;
377
ctl->private = NULL;
378
ctl->op = &free_ino_op;
379
380
/*
381
* Initially we allow to use 16K of ram to cache chunks of
382
* inode numbers before we resort to bitmaps. This is somewhat
383
* arbitrary, but it will be adjusted in runtime.
384
*/
385
ctl->extents_thresh = INIT_THRESHOLD;
386
387
spin_lock_init(&pinned->tree_lock);
388
pinned->unit = 1;
389
pinned->start = 0;
390
pinned->private = NULL;
391
pinned->extents_thresh = 0;
392
pinned->op = &pinned_free_ino_op;
393
}
394
395
int btrfs_save_ino_cache(struct btrfs_root *root,
396
struct btrfs_trans_handle *trans)
397
{
398
struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
399
struct btrfs_path *path;
400
struct inode *inode;
401
u64 alloc_hint = 0;
402
int ret;
403
int prealloc;
404
bool retry = false;
405
406
/* only fs tree and subvol/snap needs ino cache */
407
if (root->root_key.objectid != BTRFS_FS_TREE_OBJECTID &&
408
(root->root_key.objectid < BTRFS_FIRST_FREE_OBJECTID ||
409
root->root_key.objectid > BTRFS_LAST_FREE_OBJECTID))
410
return 0;
411
412
/* Don't save inode cache if we are deleting this root */
413
if (btrfs_root_refs(&root->root_item) == 0 &&
414
root != root->fs_info->tree_root)
415
return 0;
416
417
if (!btrfs_test_opt(root, INODE_MAP_CACHE))
418
return 0;
419
420
path = btrfs_alloc_path();
421
if (!path)
422
return -ENOMEM;
423
424
again:
425
inode = lookup_free_ino_inode(root, path);
426
if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
427
ret = PTR_ERR(inode);
428
goto out;
429
}
430
431
if (IS_ERR(inode)) {
432
BUG_ON(retry);
433
retry = true;
434
435
ret = create_free_ino_inode(root, trans, path);
436
if (ret)
437
goto out;
438
goto again;
439
}
440
441
BTRFS_I(inode)->generation = 0;
442
ret = btrfs_update_inode(trans, root, inode);
443
WARN_ON(ret);
444
445
if (i_size_read(inode) > 0) {
446
ret = btrfs_truncate_free_space_cache(root, trans, path, inode);
447
if (ret)
448
goto out_put;
449
}
450
451
spin_lock(&root->cache_lock);
452
if (root->cached != BTRFS_CACHE_FINISHED) {
453
ret = -1;
454
spin_unlock(&root->cache_lock);
455
goto out_put;
456
}
457
spin_unlock(&root->cache_lock);
458
459
spin_lock(&ctl->tree_lock);
460
prealloc = sizeof(struct btrfs_free_space) * ctl->free_extents;
461
prealloc = ALIGN(prealloc, PAGE_CACHE_SIZE);
462
prealloc += ctl->total_bitmaps * PAGE_CACHE_SIZE;
463
spin_unlock(&ctl->tree_lock);
464
465
/* Just to make sure we have enough space */
466
prealloc += 8 * PAGE_CACHE_SIZE;
467
468
ret = btrfs_check_data_free_space(inode, prealloc);
469
if (ret)
470
goto out_put;
471
472
ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
473
prealloc, prealloc, &alloc_hint);
474
if (ret)
475
goto out_put;
476
btrfs_free_reserved_data_space(inode, prealloc);
477
478
out_put:
479
iput(inode);
480
out:
481
if (ret == 0)
482
ret = btrfs_write_out_ino_cache(root, trans, path);
483
484
btrfs_free_path(path);
485
return ret;
486
}
487
488
static int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid)
489
{
490
struct btrfs_path *path;
491
int ret;
492
struct extent_buffer *l;
493
struct btrfs_key search_key;
494
struct btrfs_key found_key;
495
int slot;
496
497
path = btrfs_alloc_path();
498
if (!path)
499
return -ENOMEM;
500
501
search_key.objectid = BTRFS_LAST_FREE_OBJECTID;
502
search_key.type = -1;
503
search_key.offset = (u64)-1;
504
ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
505
if (ret < 0)
506
goto error;
507
BUG_ON(ret == 0);
508
if (path->slots[0] > 0) {
509
slot = path->slots[0] - 1;
510
l = path->nodes[0];
511
btrfs_item_key_to_cpu(l, &found_key, slot);
512
*objectid = max_t(u64, found_key.objectid,
513
BTRFS_FIRST_FREE_OBJECTID - 1);
514
} else {
515
*objectid = BTRFS_FIRST_FREE_OBJECTID - 1;
516
}
517
ret = 0;
518
error:
519
btrfs_free_path(path);
520
return ret;
521
}
522
523
int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid)
524
{
525
int ret;
526
mutex_lock(&root->objectid_mutex);
527
528
if (unlikely(root->highest_objectid < BTRFS_FIRST_FREE_OBJECTID)) {
529
ret = btrfs_find_highest_objectid(root,
530
&root->highest_objectid);
531
if (ret)
532
goto out;
533
}
534
535
if (unlikely(root->highest_objectid >= BTRFS_LAST_FREE_OBJECTID)) {
536
ret = -ENOSPC;
537
goto out;
538
}
539
540
*objectid = ++root->highest_objectid;
541
ret = 0;
542
out:
543
mutex_unlock(&root->objectid_mutex);
544
return ret;
545
}
546
547