Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/fs/ext2fs/ext2_alloc.c
39536 views
1
/*-
2
* modified for Lites 1.1
3
*
4
* Aug 1995, Godmar Back ([email protected])
5
* University of Utah, Department of Computer Science
6
*/
7
/*-
8
* SPDX-License-Identifier: BSD-3-Clause
9
*
10
* Copyright (c) 1982, 1986, 1989, 1993
11
* The Regents of the University of California. All rights reserved.
12
*
13
* Redistribution and use in source and binary forms, with or without
14
* modification, are permitted provided that the following conditions
15
* are met:
16
* 1. Redistributions of source code must retain the above copyright
17
* notice, this list of conditions and the following disclaimer.
18
* 2. Redistributions in binary form must reproduce the above copyright
19
* notice, this list of conditions and the following disclaimer in the
20
* documentation and/or other materials provided with the distribution.
21
* 3. Neither the name of the University nor the names of its contributors
22
* may be used to endorse or promote products derived from this software
23
* without specific prior written permission.
24
*
25
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35
* SUCH DAMAGE.
36
*/
37
38
#include <sys/param.h>
39
#include <sys/systm.h>
40
#include <sys/conf.h>
41
#include <sys/vnode.h>
42
#include <sys/sdt.h>
43
#include <sys/stat.h>
44
#include <sys/mount.h>
45
#include <sys/sysctl.h>
46
#include <sys/syslog.h>
47
#include <sys/buf.h>
48
#include <sys/endian.h>
49
50
#include <fs/ext2fs/fs.h>
51
#include <fs/ext2fs/inode.h>
52
#include <fs/ext2fs/ext2_mount.h>
53
#include <fs/ext2fs/ext2fs.h>
54
#include <fs/ext2fs/ext2_extern.h>
55
56
SDT_PROVIDER_DEFINE(ext2fs);
57
/*
58
* ext2fs trace probe:
59
* arg0: verbosity. Higher numbers give more verbose messages
60
* arg1: Textual message
61
*/
62
SDT_PROBE_DEFINE2(ext2fs, , alloc, trace, "int", "char*");
63
SDT_PROBE_DEFINE3(ext2fs, , alloc, ext2_reallocblks_realloc,
64
"ino_t", "e2fs_lbn_t", "e2fs_lbn_t");
65
SDT_PROBE_DEFINE1(ext2fs, , alloc, ext2_reallocblks_bap, "uint32_t");
66
SDT_PROBE_DEFINE1(ext2fs, , alloc, ext2_reallocblks_blkno, "e2fs_daddr_t");
67
SDT_PROBE_DEFINE2(ext2fs, , alloc, ext2_b_bitmap_validate_error, "char*", "int");
68
SDT_PROBE_DEFINE3(ext2fs, , alloc, ext2_nodealloccg_bmap_corrupted,
69
"int", "daddr_t", "char*");
70
SDT_PROBE_DEFINE2(ext2fs, , alloc, ext2_blkfree_bad_block, "ino_t", "e4fs_daddr_t");
71
SDT_PROBE_DEFINE2(ext2fs, , alloc, ext2_vfree_doublefree, "char*", "ino_t");
72
73
static daddr_t ext2_alloccg(struct inode *, int, daddr_t, int);
74
static daddr_t ext2_clusteralloc(struct inode *, int, daddr_t, int);
75
static u_long ext2_dirpref(struct inode *);
76
static e4fs_daddr_t ext2_hashalloc(struct inode *, int, long, int,
77
daddr_t (*)(struct inode *, int, daddr_t,
78
int));
79
static daddr_t ext2_nodealloccg(struct inode *, int, daddr_t, int);
80
static daddr_t ext2_mapsearch(struct m_ext2fs *, char *, daddr_t);
81
82
/*
83
* Allocate a block in the filesystem.
84
*
85
* A preference may be optionally specified. If a preference is given
86
* the following hierarchy is used to allocate a block:
87
* 1) allocate the requested block.
88
* 2) allocate a rotationally optimal block in the same cylinder.
89
* 3) allocate a block in the same cylinder group.
90
* 4) quadratically rehash into other cylinder groups, until an
91
* available block is located.
92
* If no block preference is given the following hierarchy is used
93
* to allocate a block:
94
* 1) allocate a block in the cylinder group that contains the
95
* inode for the file.
96
* 2) quadratically rehash into other cylinder groups, until an
97
* available block is located.
98
*/
99
int
100
ext2_alloc(struct inode *ip, daddr_t lbn, e4fs_daddr_t bpref, int size,
101
struct ucred *cred, e4fs_daddr_t *bnp)
102
{
103
struct m_ext2fs *fs;
104
struct ext2mount *ump;
105
e4fs_daddr_t bno;
106
int cg;
107
108
*bnp = 0;
109
fs = ip->i_e2fs;
110
ump = ip->i_ump;
111
mtx_assert(EXT2_MTX(ump), MA_OWNED);
112
#ifdef INVARIANTS
113
if ((u_int)size > fs->e2fs_bsize || blkoff(fs, size) != 0) {
114
vn_printf(ip->i_devvp, "bsize = %lu, size = %d, fs = %s\n",
115
(long unsigned int)fs->e2fs_bsize, size, fs->e2fs_fsmnt);
116
panic("ext2_alloc: bad size");
117
}
118
if (cred == NOCRED)
119
panic("ext2_alloc: missing credential");
120
#endif /* INVARIANTS */
121
if (size == fs->e2fs_bsize && fs->e2fs_fbcount == 0)
122
goto nospace;
123
if (cred->cr_uid != 0 &&
124
fs->e2fs_fbcount < fs->e2fs_rbcount)
125
goto nospace;
126
if (bpref >= fs->e2fs_bcount)
127
bpref = 0;
128
if (bpref == 0)
129
cg = ino_to_cg(fs, ip->i_number);
130
else
131
cg = dtog(fs, bpref);
132
bno = (daddr_t)ext2_hashalloc(ip, cg, bpref, fs->e2fs_bsize,
133
ext2_alloccg);
134
if (bno > 0) {
135
/* set next_alloc fields as done in block_getblk */
136
ip->i_next_alloc_block = lbn;
137
ip->i_next_alloc_goal = bno;
138
139
ip->i_blocks += btodb(fs->e2fs_bsize);
140
ip->i_flag |= IN_CHANGE | IN_UPDATE;
141
*bnp = bno;
142
return (0);
143
}
144
nospace:
145
EXT2_UNLOCK(ump);
146
SDT_PROBE2(ext2fs, , alloc, trace, 1, "cannot allocate data block");
147
return (ENOSPC);
148
}
149
150
/*
151
* Allocate EA's block for inode.
152
*/
153
e4fs_daddr_t
154
ext2_alloc_meta(struct inode *ip)
155
{
156
struct m_ext2fs *fs;
157
daddr_t blk;
158
159
fs = ip->i_e2fs;
160
161
EXT2_LOCK(ip->i_ump);
162
blk = ext2_hashalloc(ip, ino_to_cg(fs, ip->i_number), 0, fs->e2fs_bsize,
163
ext2_alloccg);
164
if (0 == blk) {
165
EXT2_UNLOCK(ip->i_ump);
166
SDT_PROBE2(ext2fs, , alloc, trace, 1, "cannot allocate meta block");
167
}
168
169
return (blk);
170
}
171
172
/*
173
* Reallocate a sequence of blocks into a contiguous sequence of blocks.
174
*
175
* The vnode and an array of buffer pointers for a range of sequential
176
* logical blocks to be made contiguous is given. The allocator attempts
177
* to find a range of sequential blocks starting as close as possible to
178
* an fs_rotdelay offset from the end of the allocation for the logical
179
* block immediately preceding the current range. If successful, the
180
* physical block numbers in the buffer pointers and in the inode are
181
* changed to reflect the new allocation. If unsuccessful, the allocation
182
* is left unchanged. The success in doing the reallocation is returned.
183
* Note that the error return is not reflected back to the user. Rather
184
* the previous block allocation will be used.
185
*/
186
187
static SYSCTL_NODE(_vfs, OID_AUTO, ext2fs, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
188
"EXT2FS filesystem");
189
190
static int doasyncfree = 1;
191
192
SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0,
193
"Use asynchronous writes to update block pointers when freeing blocks");
194
195
static int doreallocblks = 0;
196
197
SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, "");
198
199
int
200
ext2_reallocblks(struct vop_reallocblks_args *ap)
201
{
202
struct m_ext2fs *fs;
203
struct inode *ip;
204
struct vnode *vp;
205
struct buf *sbp, *ebp;
206
uint32_t *bap, *sbap, *ebap;
207
struct ext2mount *ump;
208
struct cluster_save *buflist;
209
struct indir start_ap[EXT2_NIADDR + 1], end_ap[EXT2_NIADDR + 1], *idp;
210
e2fs_lbn_t start_lbn, end_lbn;
211
int soff;
212
e2fs_daddr_t newblk, blkno;
213
int i, len, start_lvl, end_lvl, pref, ssize;
214
215
if (doreallocblks == 0)
216
return (ENOSPC);
217
218
vp = ap->a_vp;
219
ip = VTOI(vp);
220
fs = ip->i_e2fs;
221
ump = ip->i_ump;
222
223
if (fs->e2fs_contigsumsize <= 0 || ip->i_flag & IN_E4EXTENTS)
224
return (ENOSPC);
225
226
buflist = ap->a_buflist;
227
len = buflist->bs_nchildren;
228
start_lbn = buflist->bs_children[0]->b_lblkno;
229
end_lbn = start_lbn + len - 1;
230
#ifdef INVARIANTS
231
for (i = 1; i < len; i++)
232
if (buflist->bs_children[i]->b_lblkno != start_lbn + i)
233
panic("ext2_reallocblks: non-cluster");
234
#endif
235
/*
236
* If the cluster crosses the boundary for the first indirect
237
* block, leave space for the indirect block. Indirect blocks
238
* are initially laid out in a position after the last direct
239
* block. Block reallocation would usually destroy locality by
240
* moving the indirect block out of the way to make room for
241
* data blocks if we didn't compensate here. We should also do
242
* this for other indirect block boundaries, but it is only
243
* important for the first one.
244
*/
245
if (start_lbn < EXT2_NDADDR && end_lbn >= EXT2_NDADDR)
246
return (ENOSPC);
247
/*
248
* If the latest allocation is in a new cylinder group, assume that
249
* the filesystem has decided to move and do not force it back to
250
* the previous cylinder group.
251
*/
252
if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) !=
253
dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno)))
254
return (ENOSPC);
255
if (ext2_getlbns(vp, start_lbn, start_ap, &start_lvl) ||
256
ext2_getlbns(vp, end_lbn, end_ap, &end_lvl))
257
return (ENOSPC);
258
/*
259
* Get the starting offset and block map for the first block.
260
*/
261
if (start_lvl == 0) {
262
sbap = &ip->i_db[0];
263
soff = start_lbn;
264
} else {
265
idp = &start_ap[start_lvl - 1];
266
if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &sbp)) {
267
brelse(sbp);
268
return (ENOSPC);
269
}
270
sbap = (u_int *)sbp->b_data;
271
soff = idp->in_off;
272
}
273
/*
274
* If the block range spans two block maps, get the second map.
275
*/
276
ebap = NULL;
277
if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
278
ssize = len;
279
} else {
280
#ifdef INVARIANTS
281
if (start_ap[start_lvl - 1].in_lbn == idp->in_lbn)
282
panic("ext2_reallocblks: start == end");
283
#endif
284
ssize = len - (idp->in_off + 1);
285
if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &ebp))
286
goto fail;
287
ebap = (u_int *)ebp->b_data;
288
}
289
/*
290
* Find the preferred location for the cluster.
291
*/
292
EXT2_LOCK(ump);
293
pref = ext2_blkpref(ip, start_lbn, soff, sbap, 0);
294
/*
295
* Search the block map looking for an allocation of the desired size.
296
*/
297
if ((newblk = (e2fs_daddr_t)ext2_hashalloc(ip, dtog(fs, pref), pref,
298
len, ext2_clusteralloc)) == 0) {
299
EXT2_UNLOCK(ump);
300
goto fail;
301
}
302
/*
303
* We have found a new contiguous block.
304
*
305
* First we have to replace the old block pointers with the new
306
* block pointers in the inode and indirect blocks associated
307
* with the file.
308
*/
309
SDT_PROBE3(ext2fs, , alloc, ext2_reallocblks_realloc,
310
ip->i_number, start_lbn, end_lbn);
311
blkno = newblk;
312
for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->e2fs_fpb) {
313
if (i == ssize) {
314
bap = ebap;
315
soff = -i;
316
}
317
#ifdef INVARIANTS
318
if (buflist->bs_children[i]->b_blkno != fsbtodb(fs, *bap))
319
panic("ext2_reallocblks: alloc mismatch");
320
#endif
321
SDT_PROBE1(ext2fs, , alloc, ext2_reallocblks_bap, *bap);
322
*bap++ = blkno;
323
}
324
/*
325
* Next we must write out the modified inode and indirect blocks.
326
* For strict correctness, the writes should be synchronous since
327
* the old block values may have been written to disk. In practise
328
* they are almost never written, but if we are concerned about
329
* strict correctness, the `doasyncfree' flag should be set to zero.
330
*
331
* The test on `doasyncfree' should be changed to test a flag
332
* that shows whether the associated buffers and inodes have
333
* been written. The flag should be set when the cluster is
334
* started and cleared whenever the buffer or inode is flushed.
335
* We can then check below to see if it is set, and do the
336
* synchronous write only when it has been cleared.
337
*/
338
if (sbap != &ip->i_db[0]) {
339
if (doasyncfree)
340
bdwrite(sbp);
341
else
342
bwrite(sbp);
343
} else {
344
ip->i_flag |= IN_CHANGE | IN_UPDATE;
345
if (!doasyncfree)
346
ext2_update(vp, 1);
347
}
348
if (ssize < len) {
349
if (doasyncfree)
350
bdwrite(ebp);
351
else
352
bwrite(ebp);
353
}
354
/*
355
* Last, free the old blocks and assign the new blocks to the buffers.
356
*/
357
for (blkno = newblk, i = 0; i < len; i++, blkno += fs->e2fs_fpb) {
358
ext2_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno),
359
fs->e2fs_bsize);
360
buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
361
SDT_PROBE1(ext2fs, , alloc, ext2_reallocblks_blkno, blkno);
362
}
363
364
return (0);
365
366
fail:
367
if (ssize < len)
368
brelse(ebp);
369
if (sbap != &ip->i_db[0])
370
brelse(sbp);
371
return (ENOSPC);
372
}
373
374
/*
375
* Allocate an inode in the filesystem.
376
*
377
*/
378
int
379
ext2_valloc(struct vnode *pvp, int mode, struct ucred *cred, struct vnode **vpp)
380
{
381
struct timespec ts;
382
struct m_ext2fs *fs;
383
struct ext2mount *ump;
384
struct inode *pip;
385
struct inode *ip;
386
struct vnode *vp;
387
struct thread *td;
388
ino_t ino, ipref;
389
int error, cg;
390
391
*vpp = NULL;
392
pip = VTOI(pvp);
393
fs = pip->i_e2fs;
394
ump = pip->i_ump;
395
396
EXT2_LOCK(ump);
397
if (fs->e2fs_ficount == 0)
398
goto noinodes;
399
/*
400
* If it is a directory then obtain a cylinder group based on
401
* ext2_dirpref else obtain it using ino_to_cg. The preferred inode is
402
* always the next inode.
403
*/
404
if ((mode & IFMT) == IFDIR) {
405
cg = ext2_dirpref(pip);
406
if (fs->e2fs_contigdirs[cg] < 255)
407
fs->e2fs_contigdirs[cg]++;
408
} else {
409
cg = ino_to_cg(fs, pip->i_number);
410
if (fs->e2fs_contigdirs[cg] > 0)
411
fs->e2fs_contigdirs[cg]--;
412
}
413
ipref = cg * fs->e2fs_ipg + 1;
414
ino = (ino_t)ext2_hashalloc(pip, cg, (long)ipref, mode, ext2_nodealloccg);
415
if (ino == 0)
416
goto noinodes;
417
418
td = curthread;
419
error = vfs_hash_get(ump->um_mountp, ino, LK_EXCLUSIVE, td, vpp, NULL, NULL);
420
if (error || *vpp != NULL) {
421
return (error);
422
}
423
424
ip = malloc(sizeof(struct inode), M_EXT2NODE, M_WAITOK | M_ZERO);
425
426
/* Allocate a new vnode/inode. */
427
if ((error = getnewvnode("ext2fs", ump->um_mountp, &ext2_vnodeops, &vp)) != 0) {
428
free(ip, M_EXT2NODE);
429
return (error);
430
}
431
432
lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL);
433
vp->v_data = ip;
434
ip->i_vnode = vp;
435
ip->i_e2fs = fs = ump->um_e2fs;
436
ip->i_ump = ump;
437
ip->i_number = ino;
438
ip->i_block_group = ino_to_cg(fs, ino);
439
ip->i_next_alloc_block = 0;
440
ip->i_next_alloc_goal = 0;
441
442
error = insmntque(vp, ump->um_mountp);
443
if (error) {
444
free(ip, M_EXT2NODE);
445
return (error);
446
}
447
448
error = vfs_hash_insert(vp, ino, LK_EXCLUSIVE, td, vpp, NULL, NULL);
449
if (error || *vpp != NULL) {
450
*vpp = NULL;
451
free(ip, M_EXT2NODE);
452
return (error);
453
}
454
455
if ((error = ext2_vinit(ump->um_mountp, &ext2_fifoops, &vp)) != 0) {
456
vput(vp);
457
*vpp = NULL;
458
free(ip, M_EXT2NODE);
459
return (error);
460
}
461
462
if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_EXTENTS)
463
&& (S_ISREG(mode) || S_ISDIR(mode)))
464
ext4_ext_tree_init(ip);
465
else
466
memset(ip->i_data, 0, sizeof(ip->i_data));
467
468
/*
469
* Set up a new generation number for this inode.
470
* Avoid zero values.
471
*/
472
do {
473
ip->i_gen = arc4random();
474
} while (ip->i_gen == 0);
475
476
vfs_timestamp(&ts);
477
ip->i_birthtime = ts.tv_sec;
478
ip->i_birthnsec = ts.tv_nsec;
479
480
vn_set_state(vp, VSTATE_CONSTRUCTED);
481
*vpp = vp;
482
483
return (0);
484
485
noinodes:
486
EXT2_UNLOCK(ump);
487
SDT_PROBE2(ext2fs, , alloc, trace, 1, "out of inodes");
488
return (ENOSPC);
489
}
490
491
/*
492
* 64-bit compatible getters and setters for struct ext2_gd from ext2fs.h
493
*/
494
uint64_t
495
e2fs_gd_get_b_bitmap(struct ext2_gd *gd)
496
{
497
498
return (((uint64_t)(le32toh(gd->ext4bgd_b_bitmap_hi)) << 32) |
499
le32toh(gd->ext2bgd_b_bitmap));
500
}
501
502
uint64_t
503
e2fs_gd_get_i_bitmap(struct ext2_gd *gd)
504
{
505
506
return (((uint64_t)(le32toh(gd->ext4bgd_i_bitmap_hi)) << 32) |
507
le32toh(gd->ext2bgd_i_bitmap));
508
}
509
510
uint64_t
511
e2fs_gd_get_i_tables(struct ext2_gd *gd)
512
{
513
514
return (((uint64_t)(le32toh(gd->ext4bgd_i_tables_hi)) << 32) |
515
le32toh(gd->ext2bgd_i_tables));
516
}
517
518
static uint32_t
519
e2fs_gd_get_nbfree(struct ext2_gd *gd)
520
{
521
522
return (((uint32_t)(le16toh(gd->ext4bgd_nbfree_hi)) << 16) |
523
le16toh(gd->ext2bgd_nbfree));
524
}
525
526
static void
527
e2fs_gd_set_nbfree(struct ext2_gd *gd, uint32_t val)
528
{
529
530
gd->ext2bgd_nbfree = htole16(val & 0xffff);
531
gd->ext4bgd_nbfree_hi = htole16(val >> 16);
532
}
533
534
static uint32_t
535
e2fs_gd_get_nifree(struct ext2_gd *gd)
536
{
537
538
return (((uint32_t)(le16toh(gd->ext4bgd_nifree_hi)) << 16) |
539
le16toh(gd->ext2bgd_nifree));
540
}
541
542
static void
543
e2fs_gd_set_nifree(struct ext2_gd *gd, uint32_t val)
544
{
545
546
gd->ext2bgd_nifree = htole16(val & 0xffff);
547
gd->ext4bgd_nifree_hi = htole16(val >> 16);
548
}
549
550
uint32_t
551
e2fs_gd_get_ndirs(struct ext2_gd *gd)
552
{
553
554
return (((uint32_t)(le16toh(gd->ext4bgd_ndirs_hi)) << 16) |
555
le16toh(gd->ext2bgd_ndirs));
556
}
557
558
static void
559
e2fs_gd_set_ndirs(struct ext2_gd *gd, uint32_t val)
560
{
561
562
gd->ext2bgd_ndirs = htole16(val & 0xffff);
563
gd->ext4bgd_ndirs_hi = htole16(val >> 16);
564
}
565
566
static uint32_t
567
e2fs_gd_get_i_unused(struct ext2_gd *gd)
568
{
569
return ((uint32_t)(le16toh(gd->ext4bgd_i_unused_hi) << 16) |
570
le16toh(gd->ext4bgd_i_unused));
571
}
572
573
static void
574
e2fs_gd_set_i_unused(struct ext2_gd *gd, uint32_t val)
575
{
576
577
gd->ext4bgd_i_unused = htole16(val & 0xffff);
578
gd->ext4bgd_i_unused_hi = htole16(val >> 16);
579
}
580
581
/*
582
* Find a cylinder to place a directory.
583
*
584
* The policy implemented by this algorithm is to allocate a
585
* directory inode in the same cylinder group as its parent
586
* directory, but also to reserve space for its files inodes
587
* and data. Restrict the number of directories which may be
588
* allocated one after another in the same cylinder group
589
* without intervening allocation of files.
590
*
591
* If we allocate a first level directory then force allocation
592
* in another cylinder group.
593
*
594
*/
595
static u_long
596
ext2_dirpref(struct inode *pip)
597
{
598
struct m_ext2fs *fs;
599
int cg, prefcg, cgsize;
600
uint64_t avgbfree, minbfree;
601
u_int avgifree, avgndir, curdirsize;
602
u_int minifree, maxndir;
603
u_int mincg, minndir;
604
u_int dirsize, maxcontigdirs;
605
606
mtx_assert(EXT2_MTX(pip->i_ump), MA_OWNED);
607
fs = pip->i_e2fs;
608
609
avgifree = fs->e2fs_ficount / fs->e2fs_gcount;
610
avgbfree = fs->e2fs_fbcount / fs->e2fs_gcount;
611
avgndir = fs->e2fs_total_dir / fs->e2fs_gcount;
612
613
/*
614
* Force allocation in another cg if creating a first level dir.
615
*/
616
ASSERT_VOP_LOCKED(ITOV(pip), "ext2fs_dirpref");
617
if (ITOV(pip)->v_vflag & VV_ROOT) {
618
prefcg = arc4random() % fs->e2fs_gcount;
619
mincg = prefcg;
620
minndir = fs->e2fs_ipg;
621
for (cg = prefcg; cg < fs->e2fs_gcount; cg++)
622
if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < minndir &&
623
e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree &&
624
e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= avgbfree) {
625
mincg = cg;
626
minndir = e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]);
627
}
628
for (cg = 0; cg < prefcg; cg++)
629
if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < minndir &&
630
e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree &&
631
e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= avgbfree) {
632
mincg = cg;
633
minndir = e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]);
634
}
635
return (mincg);
636
}
637
/*
638
* Count various limits which used for
639
* optimal allocation of a directory inode.
640
*/
641
maxndir = min(avgndir + fs->e2fs_ipg / 16, fs->e2fs_ipg);
642
minifree = avgifree - avgifree / 4;
643
if (minifree < 1)
644
minifree = 1;
645
minbfree = avgbfree - avgbfree / 4;
646
if (minbfree < 1)
647
minbfree = 1;
648
cgsize = fs->e2fs_fsize * fs->e2fs_fpg;
649
dirsize = AVGDIRSIZE;
650
curdirsize = avgndir ?
651
(cgsize - avgbfree * fs->e2fs_bsize) / avgndir : 0;
652
if (dirsize < curdirsize)
653
dirsize = curdirsize;
654
maxcontigdirs = min((avgbfree * fs->e2fs_bsize) / dirsize, 255);
655
maxcontigdirs = min(maxcontigdirs, fs->e2fs_ipg / AFPDIR);
656
if (maxcontigdirs == 0)
657
maxcontigdirs = 1;
658
659
/*
660
* Limit number of dirs in one cg and reserve space for
661
* regular files, but only if we have no deficit in
662
* inodes or space.
663
*/
664
prefcg = ino_to_cg(fs, pip->i_number);
665
for (cg = prefcg; cg < fs->e2fs_gcount; cg++)
666
if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < maxndir &&
667
e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= minifree &&
668
e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= minbfree) {
669
if (fs->e2fs_contigdirs[cg] < maxcontigdirs)
670
return (cg);
671
}
672
for (cg = 0; cg < prefcg; cg++)
673
if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < maxndir &&
674
e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= minifree &&
675
e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= minbfree) {
676
if (fs->e2fs_contigdirs[cg] < maxcontigdirs)
677
return (cg);
678
}
679
/*
680
* This is a backstop when we have deficit in space.
681
*/
682
for (cg = prefcg; cg < fs->e2fs_gcount; cg++)
683
if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree)
684
return (cg);
685
for (cg = 0; cg < prefcg; cg++)
686
if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree)
687
break;
688
return (cg);
689
}
690
691
/*
692
* Select the desired position for the next block in a file.
693
*
694
* we try to mimic what Remy does in inode_getblk/block_getblk
695
*
696
* we note: blocknr == 0 means that we're about to allocate either
697
* a direct block or a pointer block at the first level of indirection
698
* (In other words, stuff that will go in i_db[] or i_ib[])
699
*
700
* blocknr != 0 means that we're allocating a block that is none
701
* of the above. Then, blocknr tells us the number of the block
702
* that will hold the pointer
703
*/
704
e4fs_daddr_t
705
ext2_blkpref(struct inode *ip, e2fs_lbn_t lbn, int indx, e2fs_daddr_t *bap,
706
e2fs_daddr_t blocknr)
707
{
708
struct m_ext2fs *fs;
709
int tmp;
710
711
fs = ip->i_e2fs;
712
713
mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED);
714
715
/*
716
* If the next block is actually what we thought it is, then set the
717
* goal to what we thought it should be.
718
*/
719
if (ip->i_next_alloc_block == lbn && ip->i_next_alloc_goal != 0)
720
return ip->i_next_alloc_goal;
721
722
/*
723
* Now check whether we were provided with an array that basically
724
* tells us previous blocks to which we want to stay close.
725
*/
726
if (bap)
727
for (tmp = indx - 1; tmp >= 0; tmp--)
728
if (bap[tmp])
729
return (le32toh(bap[tmp]));
730
731
/*
732
* Else lets fall back to the blocknr or, if there is none, follow
733
* the rule that a block should be allocated near its inode.
734
*/
735
return (blocknr ? blocknr :
736
(e2fs_daddr_t)(ip->i_block_group *
737
EXT2_BLOCKS_PER_GROUP(fs)) + le32toh(fs->e2fs->e2fs_first_dblock));
738
}
739
740
/*
741
* Implement the cylinder overflow algorithm.
742
*
743
* The policy implemented by this algorithm is:
744
* 1) allocate the block in its requested cylinder group.
745
* 2) quadratically rehash on the cylinder group number.
746
* 3) brute force search for a free block.
747
*/
748
static e4fs_daddr_t
749
ext2_hashalloc(struct inode *ip, int cg, long pref, int size,
750
daddr_t (*allocator) (struct inode *, int, daddr_t, int))
751
{
752
struct m_ext2fs *fs;
753
e4fs_daddr_t result;
754
int i, icg = cg;
755
756
mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED);
757
fs = ip->i_e2fs;
758
/*
759
* 1: preferred cylinder group
760
*/
761
result = (*allocator)(ip, cg, pref, size);
762
if (result)
763
return (result);
764
/*
765
* 2: quadratic rehash
766
*/
767
for (i = 1; i < fs->e2fs_gcount; i *= 2) {
768
cg += i;
769
if (cg >= fs->e2fs_gcount)
770
cg -= fs->e2fs_gcount;
771
result = (*allocator)(ip, cg, 0, size);
772
if (result)
773
return (result);
774
}
775
/*
776
* 3: brute force search
777
* Note that we start at i == 2, since 0 was checked initially,
778
* and 1 is always checked in the quadratic rehash.
779
*/
780
cg = (icg + 2) % fs->e2fs_gcount;
781
for (i = 2; i < fs->e2fs_gcount; i++) {
782
result = (*allocator)(ip, cg, 0, size);
783
if (result)
784
return (result);
785
cg++;
786
if (cg == fs->e2fs_gcount)
787
cg = 0;
788
}
789
return (0);
790
}
791
792
static uint64_t
793
ext2_cg_number_gdb_nometa(struct m_ext2fs *fs, int cg)
794
{
795
796
if (!ext2_cg_has_sb(fs, cg))
797
return (0);
798
799
if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG))
800
return (le32toh(fs->e2fs->e3fs_first_meta_bg));
801
802
return ((fs->e2fs_gcount + EXT2_DESCS_PER_BLOCK(fs) - 1) /
803
EXT2_DESCS_PER_BLOCK(fs));
804
}
805
806
static uint64_t
807
ext2_cg_number_gdb_meta(struct m_ext2fs *fs, int cg)
808
{
809
unsigned long metagroup;
810
int first, last;
811
812
metagroup = cg / EXT2_DESCS_PER_BLOCK(fs);
813
first = metagroup * EXT2_DESCS_PER_BLOCK(fs);
814
last = first + EXT2_DESCS_PER_BLOCK(fs) - 1;
815
816
if (cg == first || cg == first + 1 || cg == last)
817
return (1);
818
819
return (0);
820
}
821
822
uint64_t
823
ext2_cg_number_gdb(struct m_ext2fs *fs, int cg)
824
{
825
unsigned long first_meta_bg, metagroup;
826
827
first_meta_bg = le32toh(fs->e2fs->e3fs_first_meta_bg);
828
metagroup = cg / EXT2_DESCS_PER_BLOCK(fs);
829
830
if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG) ||
831
metagroup < first_meta_bg)
832
return (ext2_cg_number_gdb_nometa(fs, cg));
833
834
return ext2_cg_number_gdb_meta(fs, cg);
835
}
836
837
static int
838
ext2_number_base_meta_blocks(struct m_ext2fs *fs, int cg)
839
{
840
int number;
841
842
number = ext2_cg_has_sb(fs, cg);
843
844
if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG) ||
845
cg < le32toh(fs->e2fs->e3fs_first_meta_bg) *
846
EXT2_DESCS_PER_BLOCK(fs)) {
847
if (number) {
848
number += ext2_cg_number_gdb(fs, cg);
849
number += le16toh(fs->e2fs->e2fs_reserved_ngdb);
850
}
851
} else {
852
number += ext2_cg_number_gdb(fs, cg);
853
}
854
855
return (number);
856
}
857
858
static void
859
ext2_mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
860
{
861
int i;
862
863
if (start_bit >= end_bit)
864
return;
865
866
for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++)
867
setbit(bitmap, i);
868
if (i < end_bit)
869
memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3);
870
}
871
872
static int
873
ext2_get_group_number(struct m_ext2fs *fs, e4fs_daddr_t block)
874
{
875
876
return ((block - le32toh(fs->e2fs->e2fs_first_dblock)) /
877
fs->e2fs_bsize);
878
}
879
880
static int
881
ext2_block_in_group(struct m_ext2fs *fs, e4fs_daddr_t block, int cg)
882
{
883
884
return ((ext2_get_group_number(fs, block) == cg) ? 1 : 0);
885
}
886
887
static int
888
ext2_cg_block_bitmap_init(struct m_ext2fs *fs, int cg, struct buf *bp)
889
{
890
int bit, bit_max, inodes_per_block;
891
uint64_t start, tmp;
892
893
if (!(le16toh(fs->e2fs_gd[cg].ext4bgd_flags) & EXT2_BG_BLOCK_UNINIT))
894
return (0);
895
896
memset(bp->b_data, 0, fs->e2fs_bsize);
897
898
bit_max = ext2_number_base_meta_blocks(fs, cg);
899
if ((bit_max >> 3) >= fs->e2fs_bsize)
900
return (EINVAL);
901
902
for (bit = 0; bit < bit_max; bit++)
903
setbit(bp->b_data, bit);
904
905
start = (uint64_t)cg * fs->e2fs_bpg +
906
le32toh(fs->e2fs->e2fs_first_dblock);
907
908
/* Set bits for block and inode bitmaps, and inode table. */
909
tmp = e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg]);
910
if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) ||
911
ext2_block_in_group(fs, tmp, cg))
912
setbit(bp->b_data, tmp - start);
913
914
tmp = e2fs_gd_get_i_bitmap(&fs->e2fs_gd[cg]);
915
if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) ||
916
ext2_block_in_group(fs, tmp, cg))
917
setbit(bp->b_data, tmp - start);
918
919
tmp = e2fs_gd_get_i_tables(&fs->e2fs_gd[cg]);
920
inodes_per_block = fs->e2fs_bsize/EXT2_INODE_SIZE(fs);
921
while( tmp < e2fs_gd_get_i_tables(&fs->e2fs_gd[cg]) +
922
fs->e2fs_ipg / inodes_per_block ) {
923
if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) ||
924
ext2_block_in_group(fs, tmp, cg))
925
setbit(bp->b_data, tmp - start);
926
tmp++;
927
}
928
929
/*
930
* Also if the number of blocks within the group is less than
931
* the blocksize * 8 ( which is the size of bitmap ), set rest
932
* of the block bitmap to 1
933
*/
934
ext2_mark_bitmap_end(fs->e2fs_bpg, fs->e2fs_bsize * 8,
935
bp->b_data);
936
937
/* Clean the flag */
938
fs->e2fs_gd[cg].ext4bgd_flags = htole16(le16toh(
939
fs->e2fs_gd[cg].ext4bgd_flags) & ~EXT2_BG_BLOCK_UNINIT);
940
941
return (0);
942
}
943
944
static int
945
ext2_b_bitmap_validate(struct m_ext2fs *fs, struct buf *bp, int cg)
946
{
947
struct ext2_gd *gd;
948
uint64_t group_first_block;
949
unsigned int offset, max_bit;
950
951
if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG)) {
952
/*
953
* It is not possible to check block bitmap in case of this
954
* feature, because the inode and block bitmaps and inode table
955
* blocks may not be in the group at all.
956
* So, skip check in this case.
957
*/
958
return (0);
959
}
960
961
gd = &fs->e2fs_gd[cg];
962
max_bit = fs->e2fs_fpg;
963
group_first_block = ((uint64_t)cg) * fs->e2fs_fpg +
964
le32toh(fs->e2fs->e2fs_first_dblock);
965
966
/* Check block bitmap block number */
967
offset = e2fs_gd_get_b_bitmap(gd) - group_first_block;
968
if (offset >= max_bit || !isset(bp->b_data, offset)) {
969
SDT_PROBE2(ext2fs, , alloc, ext2_b_bitmap_validate_error,
970
"bad block bitmap, group", cg);
971
return (EINVAL);
972
}
973
974
/* Check inode bitmap block number */
975
offset = e2fs_gd_get_i_bitmap(gd) - group_first_block;
976
if (offset >= max_bit || !isset(bp->b_data, offset)) {
977
SDT_PROBE2(ext2fs, , alloc, ext2_b_bitmap_validate_error,
978
"bad inode bitmap", cg);
979
return (EINVAL);
980
}
981
982
/* Check inode table */
983
offset = e2fs_gd_get_i_tables(gd) - group_first_block;
984
if (offset >= max_bit || offset + fs->e2fs_itpg >= max_bit) {
985
SDT_PROBE2(ext2fs, , alloc, ext2_b_bitmap_validate_error,
986
"bad inode table, group", cg);
987
return (EINVAL);
988
}
989
990
return (0);
991
}
992
993
/*
994
* Determine whether a block can be allocated.
995
*
996
* Check to see if a block of the appropriate size is available,
997
* and if it is, allocate it.
998
*/
999
static daddr_t
1000
ext2_alloccg(struct inode *ip, int cg, daddr_t bpref, int size)
1001
{
1002
struct m_ext2fs *fs;
1003
struct buf *bp;
1004
struct ext2mount *ump;
1005
daddr_t bno, runstart, runlen;
1006
int bit, loc, end, error, start;
1007
char *bbp;
1008
/* XXX ondisk32 */
1009
fs = ip->i_e2fs;
1010
ump = ip->i_ump;
1011
if (e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) == 0)
1012
return (0);
1013
1014
EXT2_UNLOCK(ump);
1015
error = bread(ip->i_devvp, fsbtodb(fs,
1016
e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg])),
1017
(int)fs->e2fs_bsize, NOCRED, &bp);
1018
if (error)
1019
goto fail;
1020
1021
if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) ||
1022
EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) {
1023
error = ext2_cg_block_bitmap_init(fs, cg, bp);
1024
if (error)
1025
goto fail;
1026
1027
ext2_gd_b_bitmap_csum_set(fs, cg, bp);
1028
}
1029
error = ext2_gd_b_bitmap_csum_verify(fs, cg, bp);
1030
if (error)
1031
goto fail;
1032
1033
error = ext2_b_bitmap_validate(fs,bp, cg);
1034
if (error)
1035
goto fail;
1036
1037
/*
1038
* Check, that another thread did not not allocate the last block in
1039
* this group while we were waiting for the buffer.
1040
*/
1041
if (e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) == 0)
1042
goto fail;
1043
1044
bbp = (char *)bp->b_data;
1045
1046
if (dtog(fs, bpref) != cg)
1047
bpref = 0;
1048
if (bpref != 0) {
1049
bpref = dtogd(fs, bpref);
1050
/*
1051
* if the requested block is available, use it
1052
*/
1053
if (isclr(bbp, bpref)) {
1054
bno = bpref;
1055
goto gotit;
1056
}
1057
}
1058
/*
1059
* no blocks in the requested cylinder, so take next
1060
* available one in this cylinder group.
1061
* first try to get 8 contigous blocks, then fall back to a single
1062
* block.
1063
*/
1064
if (bpref)
1065
start = dtogd(fs, bpref) / NBBY;
1066
else
1067
start = 0;
1068
end = howmany(fs->e2fs_fpg, NBBY);
1069
retry:
1070
runlen = 0;
1071
runstart = 0;
1072
for (loc = start; loc < end; loc++) {
1073
if (bbp[loc] == (char)0xff) {
1074
runlen = 0;
1075
continue;
1076
}
1077
1078
/* Start of a run, find the number of high clear bits. */
1079
if (runlen == 0) {
1080
bit = fls(bbp[loc]);
1081
runlen = NBBY - bit;
1082
runstart = loc * NBBY + bit;
1083
} else if (bbp[loc] == 0) {
1084
/* Continue a run. */
1085
runlen += NBBY;
1086
} else {
1087
/*
1088
* Finish the current run. If it isn't long
1089
* enough, start a new one.
1090
*/
1091
bit = ffs(bbp[loc]) - 1;
1092
runlen += bit;
1093
if (runlen >= 8) {
1094
bno = runstart;
1095
goto gotit;
1096
}
1097
1098
/* Run was too short, start a new one. */
1099
bit = fls(bbp[loc]);
1100
runlen = NBBY - bit;
1101
runstart = loc * NBBY + bit;
1102
}
1103
1104
/* If the current run is long enough, use it. */
1105
if (runlen >= 8) {
1106
bno = runstart;
1107
goto gotit;
1108
}
1109
}
1110
if (start != 0) {
1111
end = start;
1112
start = 0;
1113
goto retry;
1114
}
1115
bno = ext2_mapsearch(fs, bbp, bpref);
1116
if (bno < 0)
1117
goto fail;
1118
1119
gotit:
1120
#ifdef INVARIANTS
1121
if (isset(bbp, bno)) {
1122
printf("ext2fs_alloccgblk: cg=%d bno=%jd fs=%s\n",
1123
cg, (intmax_t)bno, fs->e2fs_fsmnt);
1124
panic("ext2fs_alloccg: dup alloc");
1125
}
1126
#endif
1127
setbit(bbp, bno);
1128
EXT2_LOCK(ump);
1129
ext2_clusteracct(fs, bbp, cg, bno, -1);
1130
fs->e2fs_fbcount--;
1131
e2fs_gd_set_nbfree(&fs->e2fs_gd[cg],
1132
e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) - 1);
1133
fs->e2fs_fmod = 1;
1134
EXT2_UNLOCK(ump);
1135
ext2_gd_b_bitmap_csum_set(fs, cg, bp);
1136
bdwrite(bp);
1137
return (((uint64_t)cg) * fs->e2fs_fpg +
1138
le32toh(fs->e2fs->e2fs_first_dblock) + bno);
1139
1140
fail:
1141
brelse(bp);
1142
EXT2_LOCK(ump);
1143
return (0);
1144
}
1145
1146
/*
1147
* Determine whether a cluster can be allocated.
1148
*/
1149
static daddr_t
1150
ext2_clusteralloc(struct inode *ip, int cg, daddr_t bpref, int len)
1151
{
1152
struct m_ext2fs *fs;
1153
struct ext2mount *ump;
1154
struct buf *bp;
1155
char *bbp;
1156
int bit, error, got, i, loc, run;
1157
int32_t *lp;
1158
daddr_t bno;
1159
1160
fs = ip->i_e2fs;
1161
ump = ip->i_ump;
1162
1163
if (fs->e2fs_maxcluster[cg] < len)
1164
return (0);
1165
1166
EXT2_UNLOCK(ump);
1167
error = bread(ip->i_devvp,
1168
fsbtodb(fs, e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg])),
1169
(int)fs->e2fs_bsize, NOCRED, &bp);
1170
if (error)
1171
goto fail_lock;
1172
1173
bbp = (char *)bp->b_data;
1174
EXT2_LOCK(ump);
1175
/*
1176
* Check to see if a cluster of the needed size (or bigger) is
1177
* available in this cylinder group.
1178
*/
1179
lp = &fs->e2fs_clustersum[cg].cs_sum[len];
1180
for (i = len; i <= fs->e2fs_contigsumsize; i++)
1181
if (*lp++ > 0)
1182
break;
1183
if (i > fs->e2fs_contigsumsize) {
1184
/*
1185
* Update the cluster summary information to reflect
1186
* the true maximum-sized cluster so that future cluster
1187
* allocation requests can avoid reading the bitmap only
1188
* to find no cluster.
1189
*/
1190
lp = &fs->e2fs_clustersum[cg].cs_sum[len - 1];
1191
for (i = len - 1; i > 0; i--)
1192
if (*lp-- > 0)
1193
break;
1194
fs->e2fs_maxcluster[cg] = i;
1195
goto fail;
1196
}
1197
EXT2_UNLOCK(ump);
1198
1199
/* Search the bitmap to find a big enough cluster like in FFS. */
1200
if (dtog(fs, bpref) != cg)
1201
bpref = 0;
1202
if (bpref != 0)
1203
bpref = dtogd(fs, bpref);
1204
loc = bpref / NBBY;
1205
bit = 1 << (bpref % NBBY);
1206
for (run = 0, got = bpref; got < fs->e2fs_fpg; got++) {
1207
if ((bbp[loc] & bit) != 0)
1208
run = 0;
1209
else {
1210
run++;
1211
if (run == len)
1212
break;
1213
}
1214
if ((got & (NBBY - 1)) != (NBBY - 1))
1215
bit <<= 1;
1216
else {
1217
loc++;
1218
bit = 1;
1219
}
1220
}
1221
1222
if (got >= fs->e2fs_fpg)
1223
goto fail_lock;
1224
1225
/* Allocate the cluster that we found. */
1226
for (i = 1; i < len; i++)
1227
if (!isclr(bbp, got - run + i))
1228
panic("ext2_clusteralloc: map mismatch");
1229
1230
bno = got - run + 1;
1231
if (bno >= fs->e2fs_fpg)
1232
panic("ext2_clusteralloc: allocated out of group");
1233
1234
EXT2_LOCK(ump);
1235
for (i = 0; i < len; i += fs->e2fs_fpb) {
1236
setbit(bbp, bno + i);
1237
ext2_clusteracct(fs, bbp, cg, bno + i, -1);
1238
fs->e2fs_fbcount--;
1239
e2fs_gd_set_nbfree(&fs->e2fs_gd[cg],
1240
e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) - 1);
1241
}
1242
fs->e2fs_fmod = 1;
1243
EXT2_UNLOCK(ump);
1244
1245
bdwrite(bp);
1246
return (cg * fs->e2fs_fpg + le32toh(fs->e2fs->e2fs_first_dblock)
1247
+ bno);
1248
1249
fail_lock:
1250
EXT2_LOCK(ump);
1251
fail:
1252
brelse(bp);
1253
return (0);
1254
}
1255
1256
static int
1257
ext2_zero_inode_table(struct inode *ip, int cg)
1258
{
1259
struct m_ext2fs *fs;
1260
struct buf *bp;
1261
int i, all_blks, used_blks;
1262
1263
fs = ip->i_e2fs;
1264
1265
if (le16toh(fs->e2fs_gd[cg].ext4bgd_flags) & EXT2_BG_INODE_ZEROED)
1266
return (0);
1267
1268
all_blks = le16toh(fs->e2fs->e2fs_inode_size) * fs->e2fs_ipg /
1269
fs->e2fs_bsize;
1270
1271
used_blks = howmany(fs->e2fs_ipg -
1272
e2fs_gd_get_i_unused(&fs->e2fs_gd[cg]),
1273
fs->e2fs_bsize / EXT2_INODE_SIZE(fs));
1274
1275
for (i = 0; i < all_blks - used_blks; i++) {
1276
bp = getblk(ip->i_devvp, fsbtodb(fs,
1277
e2fs_gd_get_i_tables(&fs->e2fs_gd[cg]) + used_blks + i),
1278
fs->e2fs_bsize, 0, 0, 0);
1279
if (!bp)
1280
return (EIO);
1281
1282
vfs_bio_bzero_buf(bp, 0, fs->e2fs_bsize);
1283
bawrite(bp);
1284
}
1285
1286
fs->e2fs_gd[cg].ext4bgd_flags = htole16(le16toh(
1287
fs->e2fs_gd[cg].ext4bgd_flags) | EXT2_BG_INODE_ZEROED);
1288
1289
return (0);
1290
}
1291
1292
static void
1293
ext2_fix_bitmap_tail(unsigned char *bitmap, int first, int last)
1294
{
1295
int i;
1296
1297
for (i = first; i <= last; i++)
1298
bitmap[i] = 0xff;
1299
}
1300
1301
/*
1302
* Determine whether an inode can be allocated.
1303
*
1304
* Check to see if an inode is available, and if it is,
1305
* allocate it using tode in the specified cylinder group.
1306
*/
1307
static daddr_t
1308
ext2_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode)
1309
{
1310
struct m_ext2fs *fs;
1311
struct buf *bp;
1312
struct ext2mount *ump;
1313
int error, start, len, ifree, ibytes;
1314
char *ibp, *loc;
1315
1316
ipref--; /* to avoid a lot of (ipref -1) */
1317
if (ipref == -1)
1318
ipref = 0;
1319
fs = ip->i_e2fs;
1320
ump = ip->i_ump;
1321
if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) == 0)
1322
return (0);
1323
EXT2_UNLOCK(ump);
1324
error = bread(ip->i_devvp, fsbtodb(fs,
1325
e2fs_gd_get_i_bitmap(&fs->e2fs_gd[cg])),
1326
(int)fs->e2fs_bsize, NOCRED, &bp);
1327
if (error) {
1328
EXT2_LOCK(ump);
1329
return (0);
1330
}
1331
if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) ||
1332
EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) {
1333
if (le16toh(fs->e2fs_gd[cg].ext4bgd_flags) &
1334
EXT2_BG_INODE_UNINIT) {
1335
ibytes = fs->e2fs_ipg / 8;
1336
memset(bp->b_data, 0, ibytes - 1);
1337
ext2_fix_bitmap_tail(bp->b_data, ibytes,
1338
fs->e2fs_bsize - 1);
1339
fs->e2fs_gd[cg].ext4bgd_flags = htole16(le16toh(
1340
fs->e2fs_gd[cg].ext4bgd_flags) &
1341
~EXT2_BG_INODE_UNINIT);
1342
}
1343
ext2_gd_i_bitmap_csum_set(fs, cg, bp);
1344
error = ext2_zero_inode_table(ip, cg);
1345
if (error) {
1346
brelse(bp);
1347
EXT2_LOCK(ump);
1348
return (0);
1349
}
1350
}
1351
error = ext2_gd_i_bitmap_csum_verify(fs, cg, bp);
1352
if (error) {
1353
brelse(bp);
1354
EXT2_LOCK(ump);
1355
return (0);
1356
}
1357
if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) == 0) {
1358
/*
1359
* Another thread allocated the last i-node in this
1360
* group while we were waiting for the buffer.
1361
*/
1362
brelse(bp);
1363
EXT2_LOCK(ump);
1364
return (0);
1365
}
1366
ibp = (char *)bp->b_data;
1367
if (ipref) {
1368
ipref %= fs->e2fs_ipg;
1369
if (isclr(ibp, ipref))
1370
goto gotit;
1371
}
1372
start = ipref / NBBY;
1373
len = howmany(fs->e2fs_ipg - ipref, NBBY);
1374
loc = memcchr(&ibp[start], 0xff, len);
1375
if (loc == NULL) {
1376
len = start + 1;
1377
start = 0;
1378
loc = memcchr(&ibp[start], 0xff, len);
1379
if (loc == NULL) {
1380
SDT_PROBE3(ext2fs, , alloc,
1381
ext2_nodealloccg_bmap_corrupted, cg, ipref,
1382
fs->e2fs_fsmnt);
1383
brelse(bp);
1384
EXT2_LOCK(ump);
1385
return (0);
1386
}
1387
}
1388
ipref = (loc - ibp) * NBBY + ffs(~*loc) - 1;
1389
gotit:
1390
setbit(ibp, ipref);
1391
EXT2_LOCK(ump);
1392
e2fs_gd_set_nifree(&fs->e2fs_gd[cg],
1393
e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) - 1);
1394
if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) ||
1395
EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) {
1396
ifree = fs->e2fs_ipg - e2fs_gd_get_i_unused(&fs->e2fs_gd[cg]);
1397
if (ipref + 1 > ifree)
1398
e2fs_gd_set_i_unused(&fs->e2fs_gd[cg],
1399
fs->e2fs_ipg - (ipref + 1));
1400
}
1401
fs->e2fs_ficount--;
1402
fs->e2fs_fmod = 1;
1403
if ((mode & IFMT) == IFDIR) {
1404
e2fs_gd_set_ndirs(&fs->e2fs_gd[cg],
1405
e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) + 1);
1406
fs->e2fs_total_dir++;
1407
}
1408
EXT2_UNLOCK(ump);
1409
ext2_gd_i_bitmap_csum_set(fs, cg, bp);
1410
bdwrite(bp);
1411
return ((uint64_t)cg * fs->e2fs_ipg + ipref + 1);
1412
}
1413
1414
/*
1415
* Free a block or fragment.
1416
*
1417
*/
1418
void
1419
ext2_blkfree(struct inode *ip, e4fs_daddr_t bno, long size)
1420
{
1421
struct m_ext2fs *fs;
1422
struct buf *bp;
1423
struct ext2mount *ump;
1424
int cg, error;
1425
char *bbp;
1426
1427
fs = ip->i_e2fs;
1428
ump = ip->i_ump;
1429
cg = dtog(fs, bno);
1430
if (bno >= fs->e2fs_bcount) {
1431
SDT_PROBE2(ext2fs, , alloc, ext2_blkfree_bad_block,
1432
ip->i_number, bno);
1433
return;
1434
}
1435
error = bread(ip->i_devvp,
1436
fsbtodb(fs, e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg])),
1437
(int)fs->e2fs_bsize, NOCRED, &bp);
1438
if (error) {
1439
return;
1440
}
1441
bbp = (char *)bp->b_data;
1442
bno = dtogd(fs, bno);
1443
if (isclr(bbp, bno)) {
1444
panic("ext2_blkfree: freeing free block %lld, fs=%s",
1445
(long long)bno, fs->e2fs_fsmnt);
1446
}
1447
clrbit(bbp, bno);
1448
EXT2_LOCK(ump);
1449
ext2_clusteracct(fs, bbp, cg, bno, 1);
1450
fs->e2fs_fbcount++;
1451
e2fs_gd_set_nbfree(&fs->e2fs_gd[cg],
1452
e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) + 1);
1453
fs->e2fs_fmod = 1;
1454
EXT2_UNLOCK(ump);
1455
ext2_gd_b_bitmap_csum_set(fs, cg, bp);
1456
bdwrite(bp);
1457
}
1458
1459
/*
1460
* Free an inode.
1461
*
1462
*/
1463
int
1464
ext2_vfree(struct vnode *pvp, ino_t ino, int mode)
1465
{
1466
struct m_ext2fs *fs;
1467
struct inode *pip;
1468
struct buf *bp;
1469
struct ext2mount *ump;
1470
int error, cg;
1471
char *ibp;
1472
1473
pip = VTOI(pvp);
1474
fs = pip->i_e2fs;
1475
ump = pip->i_ump;
1476
if ((u_int)ino > fs->e2fs_ipg * fs->e2fs_gcount)
1477
panic("ext2_vfree: range: devvp = %p, ino = %ju, fs = %s",
1478
pip->i_devvp, (uintmax_t)ino, fs->e2fs_fsmnt);
1479
1480
cg = ino_to_cg(fs, ino);
1481
error = bread(pip->i_devvp,
1482
fsbtodb(fs, e2fs_gd_get_i_bitmap(&fs->e2fs_gd[cg])),
1483
(int)fs->e2fs_bsize, NOCRED, &bp);
1484
if (error) {
1485
return (0);
1486
}
1487
ibp = (char *)bp->b_data;
1488
ino = (ino - 1) % fs->e2fs_ipg;
1489
if (isclr(ibp, ino)) {
1490
SDT_PROBE2(ext2fs, , alloc, ext2_vfree_doublefree,
1491
fs->e2fs_fsmnt, ino);
1492
if (fs->e2fs_ronly == 0)
1493
panic("ext2_vfree: freeing free inode");
1494
}
1495
clrbit(ibp, ino);
1496
EXT2_LOCK(ump);
1497
fs->e2fs_ficount++;
1498
e2fs_gd_set_nifree(&fs->e2fs_gd[cg],
1499
e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) + 1);
1500
if ((mode & IFMT) == IFDIR) {
1501
e2fs_gd_set_ndirs(&fs->e2fs_gd[cg],
1502
e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) - 1);
1503
fs->e2fs_total_dir--;
1504
}
1505
fs->e2fs_fmod = 1;
1506
EXT2_UNLOCK(ump);
1507
ext2_gd_i_bitmap_csum_set(fs, cg, bp);
1508
bdwrite(bp);
1509
return (0);
1510
}
1511
1512
/*
1513
* Find a block in the specified cylinder group.
1514
*
1515
* It is a panic if a request is made to find a block if none are
1516
* available.
1517
*/
1518
static daddr_t
1519
ext2_mapsearch(struct m_ext2fs *fs, char *bbp, daddr_t bpref)
1520
{
1521
char *loc;
1522
int start, len;
1523
1524
/*
1525
* find the fragment by searching through the free block
1526
* map for an appropriate bit pattern
1527
*/
1528
if (bpref)
1529
start = dtogd(fs, bpref) / NBBY;
1530
else
1531
start = 0;
1532
len = howmany(fs->e2fs_fpg, NBBY) - start;
1533
loc = memcchr(&bbp[start], 0xff, len);
1534
if (loc == NULL) {
1535
len = start + 1;
1536
start = 0;
1537
loc = memcchr(&bbp[start], 0xff, len);
1538
if (loc == NULL) {
1539
panic("ext2_mapsearch: map corrupted: start=%d, len=%d,"
1540
"fs=%s", start, len, fs->e2fs_fsmnt);
1541
/* NOTREACHED */
1542
}
1543
}
1544
return ((loc - bbp) * NBBY + ffs(~*loc) - 1);
1545
}
1546
1547
int
1548
ext2_cg_has_sb(struct m_ext2fs *fs, int cg)
1549
{
1550
int a3, a5, a7;
1551
1552
if (cg == 0)
1553
return (1);
1554
1555
if (EXT2_HAS_COMPAT_FEATURE(fs, EXT2F_COMPAT_SPARSESUPER2)) {
1556
if (cg == le32toh(fs->e2fs->e4fs_backup_bgs[0]) ||
1557
cg == le32toh(fs->e2fs->e4fs_backup_bgs[1]))
1558
return (1);
1559
return (0);
1560
}
1561
1562
if ((cg <= 1) ||
1563
!EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_SPARSESUPER))
1564
return (1);
1565
1566
if (!(cg & 1))
1567
return (0);
1568
1569
for (a3 = 3, a5 = 5, a7 = 7;
1570
a3 <= cg || a5 <= cg || a7 <= cg;
1571
a3 *= 3, a5 *= 5, a7 *= 7)
1572
if (cg == a3 || cg == a5 || cg == a7)
1573
return (1);
1574
return (0);
1575
}
1576
1577