Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/ufs/ffs/ffs_rawread.c
39478 views
1
/*-
2
* SPDX-License-Identifier: BSD-2-Clause
3
*
4
* Copyright (c) 2000-2003 Tor Egge
5
* All rights reserved.
6
*
7
* Redistribution and use in source and binary forms, with or without
8
* modification, are permitted provided that the following conditions
9
* are met:
10
* 1. Redistributions of source code must retain the above copyright
11
* notice, this list of conditions and the following disclaimer.
12
* 2. Redistributions in binary form must reproduce the above copyright
13
* notice, this list of conditions and the following disclaimer in the
14
* documentation and/or other materials provided with the distribution.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26
* SUCH DAMAGE.
27
*/
28
29
#include <sys/systm.h>
30
#include <sys/bio.h>
31
#include <sys/buf.h>
32
#include <sys/conf.h>
33
#include <sys/fcntl.h>
34
#include <sys/kernel.h>
35
#include <sys/limits.h>
36
#include <sys/mount.h>
37
#include <sys/namei.h>
38
#include <sys/proc.h>
39
#include <sys/rwlock.h>
40
#include <sys/stat.h>
41
#include <sys/sysctl.h>
42
#include <sys/vnode.h>
43
44
#include <ufs/ufs/extattr.h>
45
#include <ufs/ufs/quota.h>
46
#include <ufs/ufs/inode.h>
47
#include <ufs/ufs/ufsmount.h>
48
#include <ufs/ufs/ufs_extern.h>
49
#include <ufs/ffs/fs.h>
50
#include <ufs/ffs/ffs_extern.h>
51
52
#include <vm/vm.h>
53
#include <vm/vm_extern.h>
54
#include <vm/vm_object.h>
55
#include <vm/vnode_pager.h>
56
57
static int ffs_rawread_readahead(struct vnode *vp,
58
caddr_t udata,
59
off_t offset,
60
size_t len,
61
struct thread *td,
62
struct buf *bp);
63
static int ffs_rawread_main(struct vnode *vp,
64
struct uio *uio);
65
66
static int ffs_rawread_sync(struct vnode *vp);
67
68
int ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone);
69
70
SYSCTL_DECL(_vfs_ffs);
71
72
static uma_zone_t ffsraw_pbuf_zone;
73
74
static int allowrawread = 1;
75
SYSCTL_INT(_vfs_ffs, OID_AUTO, allowrawread, CTLFLAG_RW, &allowrawread, 0,
76
"Flag to enable raw reads");
77
78
static int rawreadahead = 1;
79
SYSCTL_INT(_vfs_ffs, OID_AUTO, rawreadahead, CTLFLAG_RW, &rawreadahead, 0,
80
"Flag to enable readahead for long raw reads");
81
82
static void
83
ffs_rawread_setup(void *arg __unused)
84
{
85
86
ffsraw_pbuf_zone = pbuf_zsecond_create("ffsrawpbuf",
87
(nswbuf > 100 ) ? (nswbuf - (nswbuf >> 4)) : nswbuf - 8);
88
}
89
SYSINIT(ffs_raw, SI_SUB_VM_CONF, SI_ORDER_ANY, ffs_rawread_setup, NULL);
90
91
static int
92
ffs_rawread_sync(struct vnode *vp)
93
{
94
int error;
95
int upgraded;
96
struct bufobj *bo;
97
struct mount *mp;
98
vm_object_t obj;
99
100
/* Check for dirty mmap, pending writes and dirty buffers */
101
bo = &vp->v_bufobj;
102
BO_LOCK(bo);
103
VI_LOCK(vp);
104
if (bo->bo_numoutput > 0 ||
105
bo->bo_dirty.bv_cnt > 0 ||
106
((obj = vp->v_object) != NULL &&
107
vm_object_mightbedirty(obj))) {
108
VI_UNLOCK(vp);
109
BO_UNLOCK(bo);
110
111
if (vn_start_write(vp, &mp, V_NOWAIT) != 0) {
112
if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE)
113
upgraded = 1;
114
else
115
upgraded = 0;
116
VOP_UNLOCK(vp);
117
(void) vn_start_write(vp, &mp, V_WAIT);
118
VOP_LOCK(vp, LK_EXCLUSIVE);
119
} else if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) {
120
upgraded = 1;
121
/* Upgrade to exclusive lock, this might block */
122
VOP_LOCK(vp, LK_UPGRADE);
123
} else
124
upgraded = 0;
125
126
127
VI_LOCK(vp);
128
/* Check if vnode was reclaimed while unlocked. */
129
if (VN_IS_DOOMED(vp)) {
130
VI_UNLOCK(vp);
131
if (upgraded != 0)
132
VOP_LOCK(vp, LK_DOWNGRADE);
133
vn_finished_write(mp);
134
return (EIO);
135
}
136
VI_UNLOCK(vp);
137
138
/* Attempt to msync mmap() regions to clean dirty mmap */
139
vnode_pager_clean_sync(vp);
140
141
/* Wait for pending writes to complete */
142
BO_LOCK(bo);
143
error = bufobj_wwait(&vp->v_bufobj, 0, 0);
144
if (error != 0) {
145
/* XXX: can't happen with a zero timeout ??? */
146
BO_UNLOCK(bo);
147
if (upgraded != 0)
148
VOP_LOCK(vp, LK_DOWNGRADE);
149
vn_finished_write(mp);
150
return (error);
151
}
152
/* Flush dirty buffers */
153
if (bo->bo_dirty.bv_cnt > 0) {
154
BO_UNLOCK(bo);
155
if ((error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0) {
156
if (upgraded != 0)
157
VOP_LOCK(vp, LK_DOWNGRADE);
158
vn_finished_write(mp);
159
return (error);
160
}
161
BO_LOCK(bo);
162
if (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)
163
panic("ffs_rawread_sync: dirty bufs");
164
}
165
BO_UNLOCK(bo);
166
if (upgraded != 0)
167
VOP_LOCK(vp, LK_DOWNGRADE);
168
vn_finished_write(mp);
169
} else {
170
VI_UNLOCK(vp);
171
BO_UNLOCK(bo);
172
}
173
return 0;
174
}
175
176
static int
177
ffs_rawread_readahead(struct vnode *vp,
178
caddr_t udata,
179
off_t offset,
180
size_t len,
181
struct thread *td,
182
struct buf *bp)
183
{
184
int error;
185
uint64_t iolen;
186
off_t blockno;
187
int blockoff;
188
int bsize;
189
struct vnode *dp;
190
int bforwards;
191
struct inode *ip;
192
ufs2_daddr_t blkno;
193
194
bsize = vp->v_mount->mnt_stat.f_iosize;
195
196
ip = VTOI(vp);
197
dp = ITODEVVP(ip);
198
199
iolen = ((vm_offset_t) udata) & PAGE_MASK;
200
bp->b_bcount = len;
201
if (bp->b_bcount + iolen > bp->b_kvasize) {
202
bp->b_bcount = bp->b_kvasize;
203
if (iolen != 0)
204
bp->b_bcount -= PAGE_SIZE;
205
}
206
bp->b_flags = 0; /* XXX necessary ? */
207
bp->b_iocmd = BIO_READ;
208
bp->b_iodone = bdone;
209
blockno = offset / bsize;
210
blockoff = (offset % bsize) / DEV_BSIZE;
211
if ((daddr_t) blockno != blockno) {
212
return EINVAL; /* blockno overflow */
213
}
214
215
bp->b_lblkno = bp->b_blkno = blockno;
216
217
error = ufs_bmaparray(vp, bp->b_lblkno, &blkno, NULL, &bforwards, NULL);
218
if (error != 0)
219
return error;
220
if (blkno == -1) {
221
/* Fill holes with NULs to preserve semantics */
222
223
if (bp->b_bcount + blockoff * DEV_BSIZE > bsize)
224
bp->b_bcount = bsize - blockoff * DEV_BSIZE;
225
226
if (vmapbuf(bp, udata, bp->b_bcount, 1) < 0)
227
return EFAULT;
228
229
maybe_yield();
230
bzero(bp->b_data, bp->b_bufsize);
231
232
/* Mark operation completed (similar to bufdone()) */
233
234
bp->b_resid = 0;
235
bp->b_flags |= B_DONE;
236
return 0;
237
}
238
bp->b_blkno = blkno + blockoff;
239
bp->b_offset = bp->b_iooffset = (blkno + blockoff) * DEV_BSIZE;
240
241
if (bp->b_bcount + blockoff * DEV_BSIZE > bsize * (1 + bforwards))
242
bp->b_bcount = bsize * (1 + bforwards) - blockoff * DEV_BSIZE;
243
244
if (vmapbuf(bp, udata, bp->b_bcount, 1) < 0)
245
return EFAULT;
246
247
BO_STRATEGY(&dp->v_bufobj, bp);
248
return 0;
249
}
250
251
static int
252
ffs_rawread_main(struct vnode *vp,
253
struct uio *uio)
254
{
255
int error, nerror;
256
struct buf *bp, *nbp, *tbp;
257
uint64_t iolen;
258
caddr_t udata;
259
long resid;
260
off_t offset;
261
struct thread *td;
262
263
td = uio->uio_td ? uio->uio_td : curthread;
264
udata = uio->uio_iov->iov_base;
265
resid = uio->uio_resid;
266
offset = uio->uio_offset;
267
268
error = 0;
269
nerror = 0;
270
271
bp = NULL;
272
nbp = NULL;
273
274
while (resid > 0) {
275
276
if (bp == NULL) { /* Setup first read */
277
bp = uma_zalloc(ffsraw_pbuf_zone, M_WAITOK);
278
pbgetvp(vp, bp);
279
error = ffs_rawread_readahead(vp, udata, offset,
280
resid, td, bp);
281
if (error != 0)
282
break;
283
284
if (resid > bp->b_bufsize) { /* Setup first readahead */
285
if (rawreadahead != 0)
286
nbp = uma_zalloc(ffsraw_pbuf_zone,
287
M_NOWAIT);
288
else
289
nbp = NULL;
290
if (nbp != NULL) {
291
pbgetvp(vp, nbp);
292
293
nerror = ffs_rawread_readahead(vp,
294
udata +
295
bp->b_bufsize,
296
offset +
297
bp->b_bufsize,
298
resid -
299
bp->b_bufsize,
300
td,
301
nbp);
302
if (nerror) {
303
pbrelvp(nbp);
304
uma_zfree(ffsraw_pbuf_zone,
305
nbp);
306
nbp = NULL;
307
}
308
}
309
}
310
}
311
312
bwait(bp, PRIBIO, "rawrd");
313
vunmapbuf(bp);
314
315
iolen = bp->b_bcount - bp->b_resid;
316
if (iolen == 0 && (bp->b_ioflags & BIO_ERROR) == 0) {
317
nerror = 0; /* Ignore possible beyond EOF error */
318
break; /* EOF */
319
}
320
321
if ((bp->b_ioflags & BIO_ERROR) != 0) {
322
error = bp->b_error;
323
break;
324
}
325
resid -= iolen;
326
udata += iolen;
327
offset += iolen;
328
if (iolen < bp->b_bufsize) {
329
/* Incomplete read. Try to read remaining part */
330
error = ffs_rawread_readahead(vp,
331
udata,
332
offset,
333
bp->b_bufsize - iolen,
334
td,
335
bp);
336
if (error != 0)
337
break;
338
} else if (nbp != NULL) { /* Complete read with readahead */
339
340
tbp = bp;
341
bp = nbp;
342
nbp = tbp;
343
344
if (resid <= bp->b_bufsize) { /* No more readaheads */
345
pbrelvp(nbp);
346
uma_zfree(ffsraw_pbuf_zone, nbp);
347
nbp = NULL;
348
} else { /* Setup next readahead */
349
nerror = ffs_rawread_readahead(vp,
350
udata +
351
bp->b_bufsize,
352
offset +
353
bp->b_bufsize,
354
resid -
355
bp->b_bufsize,
356
td,
357
nbp);
358
if (nerror != 0) {
359
pbrelvp(nbp);
360
uma_zfree(ffsraw_pbuf_zone, nbp);
361
nbp = NULL;
362
}
363
}
364
} else if (nerror != 0) {/* Deferred Readahead error */
365
break;
366
} else if (resid > 0) { /* More to read, no readahead */
367
error = ffs_rawread_readahead(vp, udata, offset,
368
resid, td, bp);
369
if (error != 0)
370
break;
371
}
372
}
373
374
if (bp != NULL) {
375
pbrelvp(bp);
376
uma_zfree(ffsraw_pbuf_zone, bp);
377
}
378
if (nbp != NULL) { /* Run down readahead buffer */
379
bwait(nbp, PRIBIO, "rawrd");
380
vunmapbuf(nbp);
381
pbrelvp(nbp);
382
uma_zfree(ffsraw_pbuf_zone, nbp);
383
}
384
385
if (error == 0)
386
error = nerror;
387
uio->uio_iov->iov_base = udata;
388
uio->uio_resid = resid;
389
uio->uio_offset = offset;
390
return error;
391
}
392
393
int
394
ffs_rawread(struct vnode *vp,
395
struct uio *uio,
396
int *workdone)
397
{
398
if (allowrawread != 0 &&
399
uio->uio_iovcnt == 1 &&
400
uio->uio_segflg == UIO_USERSPACE &&
401
uio->uio_resid == uio->uio_iov->iov_len &&
402
(((uio->uio_td != NULL) ? uio->uio_td : curthread)->td_pflags &
403
TDP_DEADLKTREAT) == 0) {
404
int secsize; /* Media sector size */
405
off_t filebytes; /* Bytes left of file */
406
int blockbytes; /* Bytes left of file in full blocks */
407
int partialbytes; /* Bytes in last partial block */
408
int skipbytes; /* Bytes not to read in ffs_rawread */
409
struct inode *ip;
410
int error;
411
412
413
/* Only handle sector aligned reads */
414
ip = VTOI(vp);
415
secsize = ITODEVVP(ip)->v_bufobj.bo_bsize;
416
if ((uio->uio_offset & (secsize - 1)) == 0 &&
417
(uio->uio_resid & (secsize - 1)) == 0) {
418
419
/* Sync dirty pages and buffers if needed */
420
error = ffs_rawread_sync(vp);
421
if (error != 0)
422
return error;
423
424
/* Check for end of file */
425
if (ip->i_size > uio->uio_offset) {
426
filebytes = ip->i_size - uio->uio_offset;
427
428
/* No special eof handling needed ? */
429
if (uio->uio_resid <= filebytes) {
430
*workdone = 1;
431
return ffs_rawread_main(vp, uio);
432
}
433
434
partialbytes = ((unsigned int) ip->i_size) %
435
ITOFS(ip)->fs_bsize;
436
blockbytes = (int) filebytes - partialbytes;
437
if (blockbytes > 0) {
438
skipbytes = uio->uio_resid -
439
blockbytes;
440
uio->uio_resid = blockbytes;
441
error = ffs_rawread_main(vp, uio);
442
uio->uio_resid += skipbytes;
443
if (error != 0)
444
return error;
445
/* Read remaining part using buffer */
446
}
447
}
448
}
449
}
450
*workdone = 0;
451
return 0;
452
}
453
454