Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/fs/erofs/fscache.c
50356 views
1
// SPDX-License-Identifier: GPL-2.0-or-later
2
/*
3
* Copyright (C) 2022, Alibaba Cloud
4
* Copyright (C) 2022, Bytedance Inc. All rights reserved.
5
*/
6
#include <linux/fscache.h>
7
#include "internal.h"
8
9
static DEFINE_MUTEX(erofs_domain_list_lock);
10
static DEFINE_MUTEX(erofs_domain_cookies_lock);
11
static LIST_HEAD(erofs_domain_list);
12
static LIST_HEAD(erofs_domain_cookies_list);
13
static struct vfsmount *erofs_pseudo_mnt;
14
15
struct erofs_fscache_io {
16
struct netfs_cache_resources cres;
17
struct iov_iter iter;
18
netfs_io_terminated_t end_io;
19
void *private;
20
refcount_t ref;
21
};
22
23
struct erofs_fscache_rq {
24
struct address_space *mapping; /* The mapping being accessed */
25
loff_t start; /* Start position */
26
size_t len; /* Length of the request */
27
size_t submitted; /* Length of submitted */
28
short error; /* 0 or error that occurred */
29
refcount_t ref;
30
};
31
32
static bool erofs_fscache_io_put(struct erofs_fscache_io *io)
33
{
34
if (!refcount_dec_and_test(&io->ref))
35
return false;
36
if (io->cres.ops)
37
io->cres.ops->end_operation(&io->cres);
38
kfree(io);
39
return true;
40
}
41
42
static void erofs_fscache_req_complete(struct erofs_fscache_rq *req)
43
{
44
struct folio *folio;
45
bool failed = req->error;
46
pgoff_t start_page = req->start / PAGE_SIZE;
47
pgoff_t last_page = ((req->start + req->len) / PAGE_SIZE) - 1;
48
49
XA_STATE(xas, &req->mapping->i_pages, start_page);
50
51
rcu_read_lock();
52
xas_for_each(&xas, folio, last_page) {
53
if (xas_retry(&xas, folio))
54
continue;
55
if (!failed)
56
folio_mark_uptodate(folio);
57
folio_unlock(folio);
58
}
59
rcu_read_unlock();
60
}
61
62
static void erofs_fscache_req_put(struct erofs_fscache_rq *req)
63
{
64
if (!refcount_dec_and_test(&req->ref))
65
return;
66
erofs_fscache_req_complete(req);
67
kfree(req);
68
}
69
70
static struct erofs_fscache_rq *erofs_fscache_req_alloc(struct address_space *mapping,
71
loff_t start, size_t len)
72
{
73
struct erofs_fscache_rq *req = kzalloc(sizeof(*req), GFP_KERNEL);
74
75
if (!req)
76
return NULL;
77
req->mapping = mapping;
78
req->start = start;
79
req->len = len;
80
refcount_set(&req->ref, 1);
81
return req;
82
}
83
84
static void erofs_fscache_req_io_put(struct erofs_fscache_io *io)
85
{
86
struct erofs_fscache_rq *req = io->private;
87
88
if (erofs_fscache_io_put(io))
89
erofs_fscache_req_put(req);
90
}
91
92
static void erofs_fscache_req_end_io(void *priv, ssize_t transferred_or_error)
93
{
94
struct erofs_fscache_io *io = priv;
95
struct erofs_fscache_rq *req = io->private;
96
97
if (IS_ERR_VALUE(transferred_or_error))
98
req->error = transferred_or_error;
99
erofs_fscache_req_io_put(io);
100
}
101
102
static struct erofs_fscache_io *erofs_fscache_req_io_alloc(struct erofs_fscache_rq *req)
103
{
104
struct erofs_fscache_io *io = kzalloc(sizeof(*io), GFP_KERNEL);
105
106
if (!io)
107
return NULL;
108
io->end_io = erofs_fscache_req_end_io;
109
io->private = req;
110
refcount_inc(&req->ref);
111
refcount_set(&io->ref, 1);
112
return io;
113
}
114
115
/*
116
* Read data from fscache described by cookie at pstart physical address
117
* offset, and fill the read data into buffer described by io->iter.
118
*/
119
static int erofs_fscache_read_io_async(struct fscache_cookie *cookie,
120
loff_t pstart, struct erofs_fscache_io *io)
121
{
122
enum netfs_io_source source;
123
struct netfs_cache_resources *cres = &io->cres;
124
struct iov_iter *iter = &io->iter;
125
int ret;
126
127
ret = fscache_begin_read_operation(cres, cookie);
128
if (ret)
129
return ret;
130
131
while (iov_iter_count(iter)) {
132
size_t orig_count = iov_iter_count(iter), len = orig_count;
133
unsigned long flags = 1 << NETFS_SREQ_ONDEMAND;
134
135
source = cres->ops->prepare_ondemand_read(cres,
136
pstart, &len, LLONG_MAX, &flags, 0);
137
if (WARN_ON(len == 0))
138
source = NETFS_INVALID_READ;
139
if (source != NETFS_READ_FROM_CACHE) {
140
erofs_err(NULL, "prepare_ondemand_read failed (source %d)", source);
141
return -EIO;
142
}
143
144
iov_iter_truncate(iter, len);
145
refcount_inc(&io->ref);
146
ret = fscache_read(cres, pstart, iter, NETFS_READ_HOLE_FAIL,
147
io->end_io, io);
148
if (ret == -EIOCBQUEUED)
149
ret = 0;
150
if (ret) {
151
erofs_err(NULL, "fscache_read failed (ret %d)", ret);
152
return ret;
153
}
154
if (WARN_ON(iov_iter_count(iter)))
155
return -EIO;
156
157
iov_iter_reexpand(iter, orig_count - len);
158
pstart += len;
159
}
160
return 0;
161
}
162
163
struct erofs_fscache_bio {
164
struct erofs_fscache_io io;
165
struct bio bio; /* w/o bdev to share bio_add_page/endio() */
166
struct bio_vec bvecs[BIO_MAX_VECS];
167
};
168
169
static void erofs_fscache_bio_endio(void *priv, ssize_t transferred_or_error)
170
{
171
struct erofs_fscache_bio *io = priv;
172
173
if (IS_ERR_VALUE(transferred_or_error))
174
io->bio.bi_status = errno_to_blk_status(transferred_or_error);
175
bio_endio(&io->bio);
176
BUILD_BUG_ON(offsetof(struct erofs_fscache_bio, io) != 0);
177
erofs_fscache_io_put(&io->io);
178
}
179
180
struct bio *erofs_fscache_bio_alloc(struct erofs_map_dev *mdev)
181
{
182
struct erofs_fscache_bio *io;
183
184
io = kmalloc(sizeof(*io), GFP_KERNEL | __GFP_NOFAIL);
185
bio_init(&io->bio, NULL, io->bvecs, BIO_MAX_VECS, REQ_OP_READ);
186
io->io.private = mdev->m_dif->fscache->cookie;
187
io->io.end_io = erofs_fscache_bio_endio;
188
refcount_set(&io->io.ref, 1);
189
return &io->bio;
190
}
191
192
void erofs_fscache_submit_bio(struct bio *bio)
193
{
194
struct erofs_fscache_bio *io = container_of(bio,
195
struct erofs_fscache_bio, bio);
196
int ret;
197
198
iov_iter_bvec(&io->io.iter, ITER_DEST, io->bvecs, bio->bi_vcnt,
199
bio->bi_iter.bi_size);
200
ret = erofs_fscache_read_io_async(io->io.private,
201
bio->bi_iter.bi_sector << 9, &io->io);
202
erofs_fscache_io_put(&io->io);
203
if (!ret)
204
return;
205
bio->bi_status = errno_to_blk_status(ret);
206
bio_endio(bio);
207
}
208
209
static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio)
210
{
211
struct erofs_fscache *ctx = folio->mapping->host->i_private;
212
int ret = -ENOMEM;
213
struct erofs_fscache_rq *req;
214
struct erofs_fscache_io *io;
215
216
req = erofs_fscache_req_alloc(folio->mapping,
217
folio_pos(folio), folio_size(folio));
218
if (!req) {
219
folio_unlock(folio);
220
return ret;
221
}
222
223
io = erofs_fscache_req_io_alloc(req);
224
if (!io) {
225
req->error = ret;
226
goto out;
227
}
228
iov_iter_xarray(&io->iter, ITER_DEST, &folio->mapping->i_pages,
229
folio_pos(folio), folio_size(folio));
230
231
ret = erofs_fscache_read_io_async(ctx->cookie, folio_pos(folio), io);
232
if (ret)
233
req->error = ret;
234
235
erofs_fscache_req_io_put(io);
236
out:
237
erofs_fscache_req_put(req);
238
return ret;
239
}
240
241
static int erofs_fscache_data_read_slice(struct erofs_fscache_rq *req)
242
{
243
struct address_space *mapping = req->mapping;
244
struct inode *inode = mapping->host;
245
struct super_block *sb = inode->i_sb;
246
struct erofs_fscache_io *io;
247
struct erofs_map_blocks map;
248
struct erofs_map_dev mdev;
249
loff_t pos = req->start + req->submitted;
250
size_t count;
251
int ret;
252
253
map.m_la = pos;
254
ret = erofs_map_blocks(inode, &map);
255
if (ret)
256
return ret;
257
258
if (map.m_flags & EROFS_MAP_META) {
259
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
260
struct iov_iter iter;
261
size_t size = map.m_llen;
262
void *src;
263
264
src = erofs_read_metabuf(&buf, sb, map.m_pa,
265
erofs_inode_in_metabox(inode));
266
if (IS_ERR(src))
267
return PTR_ERR(src);
268
269
iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, PAGE_SIZE);
270
if (copy_to_iter(src, size, &iter) != size) {
271
erofs_put_metabuf(&buf);
272
return -EFAULT;
273
}
274
iov_iter_zero(PAGE_SIZE - size, &iter);
275
erofs_put_metabuf(&buf);
276
req->submitted += PAGE_SIZE;
277
return 0;
278
}
279
280
count = req->len - req->submitted;
281
if (!(map.m_flags & EROFS_MAP_MAPPED)) {
282
struct iov_iter iter;
283
284
iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, count);
285
iov_iter_zero(count, &iter);
286
req->submitted += count;
287
return 0;
288
}
289
290
count = min_t(size_t, map.m_llen - (pos - map.m_la), count);
291
DBG_BUGON(!count || count % PAGE_SIZE);
292
293
mdev = (struct erofs_map_dev) {
294
.m_deviceid = map.m_deviceid,
295
.m_pa = map.m_pa,
296
};
297
ret = erofs_map_dev(sb, &mdev);
298
if (ret)
299
return ret;
300
301
io = erofs_fscache_req_io_alloc(req);
302
if (!io)
303
return -ENOMEM;
304
iov_iter_xarray(&io->iter, ITER_DEST, &mapping->i_pages, pos, count);
305
ret = erofs_fscache_read_io_async(mdev.m_dif->fscache->cookie,
306
mdev.m_pa + (pos - map.m_la), io);
307
erofs_fscache_req_io_put(io);
308
309
req->submitted += count;
310
return ret;
311
}
312
313
static int erofs_fscache_data_read(struct erofs_fscache_rq *req)
314
{
315
int ret;
316
317
do {
318
ret = erofs_fscache_data_read_slice(req);
319
if (ret)
320
req->error = ret;
321
} while (!ret && req->submitted < req->len);
322
return ret;
323
}
324
325
static int erofs_fscache_read_folio(struct file *file, struct folio *folio)
326
{
327
struct erofs_fscache_rq *req;
328
int ret;
329
330
req = erofs_fscache_req_alloc(folio->mapping,
331
folio_pos(folio), folio_size(folio));
332
if (!req) {
333
folio_unlock(folio);
334
return -ENOMEM;
335
}
336
337
ret = erofs_fscache_data_read(req);
338
erofs_fscache_req_put(req);
339
return ret;
340
}
341
342
static void erofs_fscache_readahead(struct readahead_control *rac)
343
{
344
struct erofs_fscache_rq *req;
345
346
if (!readahead_count(rac))
347
return;
348
349
req = erofs_fscache_req_alloc(rac->mapping,
350
readahead_pos(rac), readahead_length(rac));
351
if (!req)
352
return;
353
354
/* The request completion will drop refs on the folios. */
355
while (readahead_folio(rac))
356
;
357
358
erofs_fscache_data_read(req);
359
erofs_fscache_req_put(req);
360
}
361
362
static const struct address_space_operations erofs_fscache_meta_aops = {
363
.read_folio = erofs_fscache_meta_read_folio,
364
};
365
366
const struct address_space_operations erofs_fscache_access_aops = {
367
.read_folio = erofs_fscache_read_folio,
368
.readahead = erofs_fscache_readahead,
369
};
370
371
static void erofs_fscache_domain_put(struct erofs_domain *domain)
372
{
373
mutex_lock(&erofs_domain_list_lock);
374
if (refcount_dec_and_test(&domain->ref)) {
375
list_del(&domain->list);
376
if (list_empty(&erofs_domain_list)) {
377
kern_unmount(erofs_pseudo_mnt);
378
erofs_pseudo_mnt = NULL;
379
}
380
fscache_relinquish_volume(domain->volume, NULL, false);
381
mutex_unlock(&erofs_domain_list_lock);
382
kfree_sensitive(domain->domain_id);
383
kfree(domain);
384
return;
385
}
386
mutex_unlock(&erofs_domain_list_lock);
387
}
388
389
static int erofs_fscache_register_volume(struct super_block *sb)
390
{
391
struct erofs_sb_info *sbi = EROFS_SB(sb);
392
char *domain_id = sbi->domain_id;
393
struct fscache_volume *volume;
394
char *name;
395
int ret = 0;
396
397
name = kasprintf(GFP_KERNEL, "erofs,%s",
398
domain_id ? domain_id : sbi->fsid);
399
if (!name)
400
return -ENOMEM;
401
402
volume = fscache_acquire_volume(name, NULL, NULL, 0);
403
if (IS_ERR_OR_NULL(volume)) {
404
erofs_err(sb, "failed to register volume for %s", name);
405
ret = volume ? PTR_ERR(volume) : -EOPNOTSUPP;
406
volume = NULL;
407
}
408
409
sbi->volume = volume;
410
kfree(name);
411
return ret;
412
}
413
414
static int erofs_fscache_init_domain(struct super_block *sb)
415
{
416
int err;
417
struct erofs_domain *domain;
418
struct erofs_sb_info *sbi = EROFS_SB(sb);
419
420
domain = kzalloc(sizeof(struct erofs_domain), GFP_KERNEL);
421
if (!domain)
422
return -ENOMEM;
423
424
domain->domain_id = kstrdup(sbi->domain_id, GFP_KERNEL);
425
if (!domain->domain_id) {
426
kfree(domain);
427
return -ENOMEM;
428
}
429
430
err = erofs_fscache_register_volume(sb);
431
if (err)
432
goto out;
433
434
if (!erofs_pseudo_mnt) {
435
struct vfsmount *mnt = kern_mount(&erofs_anon_fs_type);
436
if (IS_ERR(mnt)) {
437
err = PTR_ERR(mnt);
438
goto out;
439
}
440
erofs_pseudo_mnt = mnt;
441
}
442
443
domain->volume = sbi->volume;
444
refcount_set(&domain->ref, 1);
445
list_add(&domain->list, &erofs_domain_list);
446
sbi->domain = domain;
447
return 0;
448
out:
449
kfree_sensitive(domain->domain_id);
450
kfree(domain);
451
return err;
452
}
453
454
static int erofs_fscache_register_domain(struct super_block *sb)
455
{
456
int err;
457
struct erofs_domain *domain;
458
struct erofs_sb_info *sbi = EROFS_SB(sb);
459
460
mutex_lock(&erofs_domain_list_lock);
461
list_for_each_entry(domain, &erofs_domain_list, list) {
462
if (!strcmp(domain->domain_id, sbi->domain_id)) {
463
sbi->domain = domain;
464
sbi->volume = domain->volume;
465
refcount_inc(&domain->ref);
466
mutex_unlock(&erofs_domain_list_lock);
467
return 0;
468
}
469
}
470
err = erofs_fscache_init_domain(sb);
471
mutex_unlock(&erofs_domain_list_lock);
472
return err;
473
}
474
475
static struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb,
476
char *name, unsigned int flags)
477
{
478
struct fscache_volume *volume = EROFS_SB(sb)->volume;
479
struct erofs_fscache *ctx;
480
struct fscache_cookie *cookie;
481
struct super_block *isb;
482
struct inode *inode;
483
int ret;
484
485
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
486
if (!ctx)
487
return ERR_PTR(-ENOMEM);
488
INIT_LIST_HEAD(&ctx->node);
489
refcount_set(&ctx->ref, 1);
490
491
cookie = fscache_acquire_cookie(volume, FSCACHE_ADV_WANT_CACHE_SIZE,
492
name, strlen(name), NULL, 0, 0);
493
if (!cookie) {
494
erofs_err(sb, "failed to get cookie for %s", name);
495
ret = -EINVAL;
496
goto err;
497
}
498
fscache_use_cookie(cookie, false);
499
500
/*
501
* Allocate anonymous inode in global pseudo mount for shareable blobs,
502
* so that they are accessible among erofs fs instances.
503
*/
504
isb = flags & EROFS_REG_COOKIE_SHARE ? erofs_pseudo_mnt->mnt_sb : sb;
505
inode = new_inode(isb);
506
if (!inode) {
507
erofs_err(sb, "failed to get anon inode for %s", name);
508
ret = -ENOMEM;
509
goto err_cookie;
510
}
511
512
inode->i_size = OFFSET_MAX;
513
inode->i_mapping->a_ops = &erofs_fscache_meta_aops;
514
mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL);
515
inode->i_blkbits = EROFS_SB(sb)->blkszbits;
516
inode->i_private = ctx;
517
518
ctx->cookie = cookie;
519
ctx->inode = inode;
520
return ctx;
521
522
err_cookie:
523
fscache_unuse_cookie(cookie, NULL, NULL);
524
fscache_relinquish_cookie(cookie, false);
525
err:
526
kfree(ctx);
527
return ERR_PTR(ret);
528
}
529
530
static void erofs_fscache_relinquish_cookie(struct erofs_fscache *ctx)
531
{
532
fscache_unuse_cookie(ctx->cookie, NULL, NULL);
533
fscache_relinquish_cookie(ctx->cookie, false);
534
iput(ctx->inode);
535
kfree(ctx->name);
536
kfree(ctx);
537
}
538
539
static struct erofs_fscache *erofs_domain_init_cookie(struct super_block *sb,
540
char *name, unsigned int flags)
541
{
542
struct erofs_fscache *ctx;
543
struct erofs_domain *domain = EROFS_SB(sb)->domain;
544
545
ctx = erofs_fscache_acquire_cookie(sb, name, flags);
546
if (IS_ERR(ctx))
547
return ctx;
548
549
ctx->name = kstrdup(name, GFP_KERNEL);
550
if (!ctx->name) {
551
erofs_fscache_relinquish_cookie(ctx);
552
return ERR_PTR(-ENOMEM);
553
}
554
555
refcount_inc(&domain->ref);
556
ctx->domain = domain;
557
list_add(&ctx->node, &erofs_domain_cookies_list);
558
return ctx;
559
}
560
561
static struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb,
562
char *name, unsigned int flags)
563
{
564
struct erofs_fscache *ctx;
565
struct erofs_domain *domain = EROFS_SB(sb)->domain;
566
567
flags |= EROFS_REG_COOKIE_SHARE;
568
mutex_lock(&erofs_domain_cookies_lock);
569
list_for_each_entry(ctx, &erofs_domain_cookies_list, node) {
570
if (ctx->domain != domain || strcmp(ctx->name, name))
571
continue;
572
if (!(flags & EROFS_REG_COOKIE_NEED_NOEXIST)) {
573
refcount_inc(&ctx->ref);
574
} else {
575
erofs_err(sb, "%s already exists in domain %s", name,
576
domain->domain_id);
577
ctx = ERR_PTR(-EEXIST);
578
}
579
mutex_unlock(&erofs_domain_cookies_lock);
580
return ctx;
581
}
582
ctx = erofs_domain_init_cookie(sb, name, flags);
583
mutex_unlock(&erofs_domain_cookies_lock);
584
return ctx;
585
}
586
587
struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb,
588
char *name,
589
unsigned int flags)
590
{
591
if (EROFS_SB(sb)->domain_id)
592
return erofs_domain_register_cookie(sb, name, flags);
593
return erofs_fscache_acquire_cookie(sb, name, flags);
594
}
595
596
void erofs_fscache_unregister_cookie(struct erofs_fscache *ctx)
597
{
598
struct erofs_domain *domain = NULL;
599
600
if (!ctx)
601
return;
602
if (!ctx->domain)
603
return erofs_fscache_relinquish_cookie(ctx);
604
605
mutex_lock(&erofs_domain_cookies_lock);
606
if (refcount_dec_and_test(&ctx->ref)) {
607
domain = ctx->domain;
608
list_del(&ctx->node);
609
erofs_fscache_relinquish_cookie(ctx);
610
}
611
mutex_unlock(&erofs_domain_cookies_lock);
612
if (domain)
613
erofs_fscache_domain_put(domain);
614
}
615
616
int erofs_fscache_register_fs(struct super_block *sb)
617
{
618
int ret;
619
struct erofs_sb_info *sbi = EROFS_SB(sb);
620
struct erofs_fscache *fscache;
621
unsigned int flags = 0;
622
623
if (sbi->domain_id)
624
ret = erofs_fscache_register_domain(sb);
625
else
626
ret = erofs_fscache_register_volume(sb);
627
if (ret)
628
return ret;
629
630
/*
631
* When shared domain is enabled, using NEED_NOEXIST to guarantee
632
* the primary data blob (aka fsid) is unique in the shared domain.
633
*
634
* For non-shared-domain case, fscache_acquire_volume() invoked by
635
* erofs_fscache_register_volume() has already guaranteed
636
* the uniqueness of primary data blob.
637
*
638
* Acquired domain/volume will be relinquished in kill_sb() on error.
639
*/
640
if (sbi->domain_id)
641
flags |= EROFS_REG_COOKIE_NEED_NOEXIST;
642
fscache = erofs_fscache_register_cookie(sb, sbi->fsid, flags);
643
if (IS_ERR(fscache))
644
return PTR_ERR(fscache);
645
646
sbi->dif0.fscache = fscache;
647
return 0;
648
}
649
650
void erofs_fscache_unregister_fs(struct super_block *sb)
651
{
652
struct erofs_sb_info *sbi = EROFS_SB(sb);
653
654
erofs_fscache_unregister_cookie(sbi->dif0.fscache);
655
656
if (sbi->domain)
657
erofs_fscache_domain_put(sbi->domain);
658
else
659
fscache_relinquish_volume(sbi->volume, NULL, false);
660
661
sbi->dif0.fscache = NULL;
662
sbi->volume = NULL;
663
sbi->domain = NULL;
664
}
665
666