Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/fs/erofs/data.c
26285 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* Copyright (C) 2017-2018 HUAWEI, Inc.
4
* https://www.huawei.com/
5
* Copyright (C) 2021, Alibaba Cloud
6
*/
7
#include "internal.h"
8
#include <linux/sched/mm.h>
9
#include <trace/events/erofs.h>
10
11
void erofs_unmap_metabuf(struct erofs_buf *buf)
12
{
13
if (!buf->base)
14
return;
15
kunmap_local(buf->base);
16
buf->base = NULL;
17
}
18
19
void erofs_put_metabuf(struct erofs_buf *buf)
20
{
21
if (!buf->page)
22
return;
23
erofs_unmap_metabuf(buf);
24
folio_put(page_folio(buf->page));
25
buf->page = NULL;
26
}
27
28
void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset, bool need_kmap)
29
{
30
pgoff_t index = (buf->off + offset) >> PAGE_SHIFT;
31
struct folio *folio = NULL;
32
33
if (buf->page) {
34
folio = page_folio(buf->page);
35
if (folio_file_page(folio, index) != buf->page)
36
erofs_unmap_metabuf(buf);
37
}
38
if (!folio || !folio_contains(folio, index)) {
39
erofs_put_metabuf(buf);
40
folio = read_mapping_folio(buf->mapping, index, buf->file);
41
if (IS_ERR(folio))
42
return folio;
43
}
44
buf->page = folio_file_page(folio, index);
45
if (!need_kmap)
46
return NULL;
47
if (!buf->base)
48
buf->base = kmap_local_page(buf->page);
49
return buf->base + (offset & ~PAGE_MASK);
50
}
51
52
int erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb,
53
bool in_metabox)
54
{
55
struct erofs_sb_info *sbi = EROFS_SB(sb);
56
57
buf->file = NULL;
58
if (in_metabox) {
59
if (unlikely(!sbi->metabox_inode))
60
return -EFSCORRUPTED;
61
buf->mapping = sbi->metabox_inode->i_mapping;
62
return 0;
63
}
64
buf->off = sbi->dif0.fsoff;
65
if (erofs_is_fileio_mode(sbi)) {
66
buf->file = sbi->dif0.file; /* some fs like FUSE needs it */
67
buf->mapping = buf->file->f_mapping;
68
} else if (erofs_is_fscache_mode(sb))
69
buf->mapping = sbi->dif0.fscache->inode->i_mapping;
70
else
71
buf->mapping = sb->s_bdev->bd_mapping;
72
return 0;
73
}
74
75
void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
76
erofs_off_t offset, bool in_metabox)
77
{
78
int err;
79
80
err = erofs_init_metabuf(buf, sb, in_metabox);
81
if (err)
82
return ERR_PTR(err);
83
return erofs_bread(buf, offset, true);
84
}
85
86
int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map)
87
{
88
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
89
struct super_block *sb = inode->i_sb;
90
unsigned int unit, blksz = sb->s_blocksize;
91
struct erofs_inode *vi = EROFS_I(inode);
92
struct erofs_inode_chunk_index *idx;
93
erofs_blk_t startblk, addrmask;
94
bool tailpacking;
95
erofs_off_t pos;
96
u64 chunknr;
97
int err = 0;
98
99
trace_erofs_map_blocks_enter(inode, map, 0);
100
map->m_deviceid = 0;
101
map->m_flags = 0;
102
if (map->m_la >= inode->i_size)
103
goto out;
104
105
if (vi->datalayout != EROFS_INODE_CHUNK_BASED) {
106
tailpacking = (vi->datalayout == EROFS_INODE_FLAT_INLINE);
107
if (!tailpacking && vi->startblk == EROFS_NULL_ADDR)
108
goto out;
109
pos = erofs_pos(sb, erofs_iblks(inode) - tailpacking);
110
111
map->m_flags = EROFS_MAP_MAPPED;
112
if (map->m_la < pos) {
113
map->m_pa = erofs_pos(sb, vi->startblk) + map->m_la;
114
map->m_llen = pos - map->m_la;
115
} else {
116
map->m_pa = erofs_iloc(inode) + vi->inode_isize +
117
vi->xattr_isize + erofs_blkoff(sb, map->m_la);
118
map->m_llen = inode->i_size - map->m_la;
119
map->m_flags |= EROFS_MAP_META;
120
}
121
goto out;
122
}
123
124
if (vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES)
125
unit = sizeof(*idx); /* chunk index */
126
else
127
unit = EROFS_BLOCK_MAP_ENTRY_SIZE; /* block map */
128
129
chunknr = map->m_la >> vi->chunkbits;
130
pos = ALIGN(erofs_iloc(inode) + vi->inode_isize +
131
vi->xattr_isize, unit) + unit * chunknr;
132
133
idx = erofs_read_metabuf(&buf, sb, pos, erofs_inode_in_metabox(inode));
134
if (IS_ERR(idx)) {
135
err = PTR_ERR(idx);
136
goto out;
137
}
138
map->m_la = chunknr << vi->chunkbits;
139
map->m_llen = min_t(erofs_off_t, 1UL << vi->chunkbits,
140
round_up(inode->i_size - map->m_la, blksz));
141
if (vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES) {
142
addrmask = (vi->chunkformat & EROFS_CHUNK_FORMAT_48BIT) ?
143
BIT_ULL(48) - 1 : BIT_ULL(32) - 1;
144
startblk = (((u64)le16_to_cpu(idx->startblk_hi) << 32) |
145
le32_to_cpu(idx->startblk_lo)) & addrmask;
146
if ((startblk ^ EROFS_NULL_ADDR) & addrmask) {
147
map->m_deviceid = le16_to_cpu(idx->device_id) &
148
EROFS_SB(sb)->device_id_mask;
149
map->m_pa = erofs_pos(sb, startblk);
150
map->m_flags = EROFS_MAP_MAPPED;
151
}
152
} else {
153
startblk = le32_to_cpu(*(__le32 *)idx);
154
if (startblk != (u32)EROFS_NULL_ADDR) {
155
map->m_pa = erofs_pos(sb, startblk);
156
map->m_flags = EROFS_MAP_MAPPED;
157
}
158
}
159
erofs_put_metabuf(&buf);
160
out:
161
if (!err) {
162
map->m_plen = map->m_llen;
163
/* inline data should be located in the same meta block */
164
if ((map->m_flags & EROFS_MAP_META) &&
165
erofs_blkoff(sb, map->m_pa) + map->m_plen > blksz) {
166
erofs_err(sb, "inline data across blocks @ nid %llu", vi->nid);
167
DBG_BUGON(1);
168
return -EFSCORRUPTED;
169
}
170
}
171
trace_erofs_map_blocks_exit(inode, map, 0, err);
172
return err;
173
}
174
175
static void erofs_fill_from_devinfo(struct erofs_map_dev *map,
176
struct super_block *sb, struct erofs_device_info *dif)
177
{
178
map->m_sb = sb;
179
map->m_dif = dif;
180
map->m_bdev = NULL;
181
if (dif->file && S_ISBLK(file_inode(dif->file)->i_mode))
182
map->m_bdev = file_bdev(dif->file);
183
}
184
185
int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
186
{
187
struct erofs_dev_context *devs = EROFS_SB(sb)->devs;
188
struct erofs_device_info *dif;
189
erofs_off_t startoff;
190
int id;
191
192
erofs_fill_from_devinfo(map, sb, &EROFS_SB(sb)->dif0);
193
map->m_bdev = sb->s_bdev; /* use s_bdev for the primary device */
194
if (map->m_deviceid) {
195
down_read(&devs->rwsem);
196
dif = idr_find(&devs->tree, map->m_deviceid - 1);
197
if (!dif) {
198
up_read(&devs->rwsem);
199
return -ENODEV;
200
}
201
if (devs->flatdev) {
202
map->m_pa += erofs_pos(sb, dif->uniaddr);
203
up_read(&devs->rwsem);
204
return 0;
205
}
206
erofs_fill_from_devinfo(map, sb, dif);
207
up_read(&devs->rwsem);
208
} else if (devs->extra_devices && !devs->flatdev) {
209
down_read(&devs->rwsem);
210
idr_for_each_entry(&devs->tree, dif, id) {
211
if (!dif->uniaddr)
212
continue;
213
214
startoff = erofs_pos(sb, dif->uniaddr);
215
if (map->m_pa >= startoff &&
216
map->m_pa < startoff + erofs_pos(sb, dif->blocks)) {
217
map->m_pa -= startoff;
218
erofs_fill_from_devinfo(map, sb, dif);
219
break;
220
}
221
}
222
up_read(&devs->rwsem);
223
}
224
return 0;
225
}
226
227
/*
228
* bit 30: I/O error occurred on this folio
229
* bit 29: CPU has dirty data in D-cache (needs aliasing handling);
230
* bit 0 - 29: remaining parts to complete this folio
231
*/
232
#define EROFS_ONLINEFOLIO_EIO 30
233
#define EROFS_ONLINEFOLIO_DIRTY 29
234
235
void erofs_onlinefolio_init(struct folio *folio)
236
{
237
union {
238
atomic_t o;
239
void *v;
240
} u = { .o = ATOMIC_INIT(1) };
241
242
folio->private = u.v; /* valid only if file-backed folio is locked */
243
}
244
245
void erofs_onlinefolio_split(struct folio *folio)
246
{
247
atomic_inc((atomic_t *)&folio->private);
248
}
249
250
void erofs_onlinefolio_end(struct folio *folio, int err, bool dirty)
251
{
252
int orig, v;
253
254
do {
255
orig = atomic_read((atomic_t *)&folio->private);
256
DBG_BUGON(orig <= 0);
257
v = dirty << EROFS_ONLINEFOLIO_DIRTY;
258
v |= (orig - 1) | (!!err << EROFS_ONLINEFOLIO_EIO);
259
} while (atomic_cmpxchg((atomic_t *)&folio->private, orig, v) != orig);
260
261
if (v & (BIT(EROFS_ONLINEFOLIO_DIRTY) - 1))
262
return;
263
folio->private = 0;
264
if (v & BIT(EROFS_ONLINEFOLIO_DIRTY))
265
flush_dcache_folio(folio);
266
folio_end_read(folio, !(v & BIT(EROFS_ONLINEFOLIO_EIO)));
267
}
268
269
static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
270
unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
271
{
272
int ret;
273
struct super_block *sb = inode->i_sb;
274
struct erofs_map_blocks map;
275
struct erofs_map_dev mdev;
276
277
map.m_la = offset;
278
map.m_llen = length;
279
ret = erofs_map_blocks(inode, &map);
280
if (ret < 0)
281
return ret;
282
283
iomap->offset = map.m_la;
284
iomap->length = map.m_llen;
285
iomap->flags = 0;
286
iomap->private = NULL;
287
iomap->addr = IOMAP_NULL_ADDR;
288
if (!(map.m_flags & EROFS_MAP_MAPPED)) {
289
iomap->type = IOMAP_HOLE;
290
return 0;
291
}
292
293
if (!(map.m_flags & EROFS_MAP_META) || !erofs_inode_in_metabox(inode)) {
294
mdev = (struct erofs_map_dev) {
295
.m_deviceid = map.m_deviceid,
296
.m_pa = map.m_pa,
297
};
298
ret = erofs_map_dev(sb, &mdev);
299
if (ret)
300
return ret;
301
302
if (flags & IOMAP_DAX)
303
iomap->dax_dev = mdev.m_dif->dax_dev;
304
else
305
iomap->bdev = mdev.m_bdev;
306
iomap->addr = mdev.m_dif->fsoff + mdev.m_pa;
307
if (flags & IOMAP_DAX)
308
iomap->addr += mdev.m_dif->dax_part_off;
309
}
310
311
if (map.m_flags & EROFS_MAP_META) {
312
void *ptr;
313
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
314
315
iomap->type = IOMAP_INLINE;
316
ptr = erofs_read_metabuf(&buf, sb, map.m_pa,
317
erofs_inode_in_metabox(inode));
318
if (IS_ERR(ptr))
319
return PTR_ERR(ptr);
320
iomap->inline_data = ptr;
321
iomap->private = buf.base;
322
} else {
323
iomap->type = IOMAP_MAPPED;
324
}
325
return 0;
326
}
327
328
static int erofs_iomap_end(struct inode *inode, loff_t pos, loff_t length,
329
ssize_t written, unsigned int flags, struct iomap *iomap)
330
{
331
void *ptr = iomap->private;
332
333
if (ptr) {
334
struct erofs_buf buf = {
335
.page = kmap_to_page(ptr),
336
.base = ptr,
337
};
338
339
DBG_BUGON(iomap->type != IOMAP_INLINE);
340
erofs_put_metabuf(&buf);
341
} else {
342
DBG_BUGON(iomap->type == IOMAP_INLINE);
343
}
344
return written;
345
}
346
347
static const struct iomap_ops erofs_iomap_ops = {
348
.iomap_begin = erofs_iomap_begin,
349
.iomap_end = erofs_iomap_end,
350
};
351
352
int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
353
u64 start, u64 len)
354
{
355
if (erofs_inode_is_data_compressed(EROFS_I(inode)->datalayout)) {
356
#ifdef CONFIG_EROFS_FS_ZIP
357
return iomap_fiemap(inode, fieinfo, start, len,
358
&z_erofs_iomap_report_ops);
359
#else
360
return -EOPNOTSUPP;
361
#endif
362
}
363
return iomap_fiemap(inode, fieinfo, start, len, &erofs_iomap_ops);
364
}
365
366
/*
367
* since we dont have write or truncate flows, so no inode
368
* locking needs to be held at the moment.
369
*/
370
static int erofs_read_folio(struct file *file, struct folio *folio)
371
{
372
trace_erofs_read_folio(folio, true);
373
374
return iomap_read_folio(folio, &erofs_iomap_ops);
375
}
376
377
static void erofs_readahead(struct readahead_control *rac)
378
{
379
trace_erofs_readahead(rac->mapping->host, readahead_index(rac),
380
readahead_count(rac), true);
381
382
return iomap_readahead(rac, &erofs_iomap_ops);
383
}
384
385
static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
386
{
387
return iomap_bmap(mapping, block, &erofs_iomap_ops);
388
}
389
390
static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
391
{
392
struct inode *inode = file_inode(iocb->ki_filp);
393
394
/* no need taking (shared) inode lock since it's a ro filesystem */
395
if (!iov_iter_count(to))
396
return 0;
397
398
#ifdef CONFIG_FS_DAX
399
if (IS_DAX(inode))
400
return dax_iomap_rw(iocb, to, &erofs_iomap_ops);
401
#endif
402
if ((iocb->ki_flags & IOCB_DIRECT) && inode->i_sb->s_bdev)
403
return iomap_dio_rw(iocb, to, &erofs_iomap_ops,
404
NULL, 0, NULL, 0);
405
return filemap_read(iocb, to, 0);
406
}
407
408
/* for uncompressed (aligned) files and raw access for other files */
409
const struct address_space_operations erofs_aops = {
410
.read_folio = erofs_read_folio,
411
.readahead = erofs_readahead,
412
.bmap = erofs_bmap,
413
.direct_IO = noop_direct_IO,
414
.release_folio = iomap_release_folio,
415
.invalidate_folio = iomap_invalidate_folio,
416
};
417
418
#ifdef CONFIG_FS_DAX
419
static vm_fault_t erofs_dax_huge_fault(struct vm_fault *vmf,
420
unsigned int order)
421
{
422
return dax_iomap_fault(vmf, order, NULL, NULL, &erofs_iomap_ops);
423
}
424
425
static vm_fault_t erofs_dax_fault(struct vm_fault *vmf)
426
{
427
return erofs_dax_huge_fault(vmf, 0);
428
}
429
430
static const struct vm_operations_struct erofs_dax_vm_ops = {
431
.fault = erofs_dax_fault,
432
.huge_fault = erofs_dax_huge_fault,
433
};
434
435
static int erofs_file_mmap_prepare(struct vm_area_desc *desc)
436
{
437
if (!IS_DAX(file_inode(desc->file)))
438
return generic_file_readonly_mmap_prepare(desc);
439
440
if ((desc->vm_flags & VM_SHARED) && (desc->vm_flags & VM_MAYWRITE))
441
return -EINVAL;
442
443
desc->vm_ops = &erofs_dax_vm_ops;
444
desc->vm_flags |= VM_HUGEPAGE;
445
return 0;
446
}
447
#else
448
#define erofs_file_mmap_prepare generic_file_readonly_mmap_prepare
449
#endif
450
451
static loff_t erofs_file_llseek(struct file *file, loff_t offset, int whence)
452
{
453
struct inode *inode = file->f_mapping->host;
454
const struct iomap_ops *ops = &erofs_iomap_ops;
455
456
if (erofs_inode_is_data_compressed(EROFS_I(inode)->datalayout))
457
#ifdef CONFIG_EROFS_FS_ZIP
458
ops = &z_erofs_iomap_report_ops;
459
#else
460
return generic_file_llseek(file, offset, whence);
461
#endif
462
463
if (whence == SEEK_HOLE)
464
offset = iomap_seek_hole(inode, offset, ops);
465
else if (whence == SEEK_DATA)
466
offset = iomap_seek_data(inode, offset, ops);
467
else
468
return generic_file_llseek(file, offset, whence);
469
470
if (offset < 0)
471
return offset;
472
return vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
473
}
474
475
const struct file_operations erofs_file_fops = {
476
.llseek = erofs_file_llseek,
477
.read_iter = erofs_file_read_iter,
478
.mmap_prepare = erofs_file_mmap_prepare,
479
.get_unmapped_area = thp_get_unmapped_area,
480
.splice_read = filemap_splice_read,
481
};
482
483