Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/geom/geom_disk.c
104185 views
1
/*-
2
* SPDX-License-Identifier: BSD-3-Clause
3
*
4
* Copyright (c) 2002 Poul-Henning Kamp
5
* Copyright (c) 2002 Networks Associates Technology, Inc.
6
* All rights reserved.
7
*
8
* This software was developed for the FreeBSD Project by Poul-Henning Kamp
9
* and NAI Labs, the Security Research Division of Network Associates, Inc.
10
* under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
11
* DARPA CHATS research program.
12
*
13
* Redistribution and use in source and binary forms, with or without
14
* modification, are permitted provided that the following conditions
15
* are met:
16
* 1. Redistributions of source code must retain the above copyright
17
* notice, this list of conditions and the following disclaimer.
18
* 2. Redistributions in binary form must reproduce the above copyright
19
* notice, this list of conditions and the following disclaimer in the
20
* documentation and/or other materials provided with the distribution.
21
* 3. The names of the authors may not be used to endorse or promote
22
* products derived from this software without specific prior written
23
* permission.
24
*
25
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
26
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
29
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35
* SUCH DAMAGE.
36
*/
37
38
#include <sys/cdefs.h>
39
#include "opt_geom.h"
40
41
#include <sys/param.h>
42
#include <sys/systm.h>
43
#include <sys/kernel.h>
44
#include <sys/sysctl.h>
45
#include <sys/bio.h>
46
#include <sys/ctype.h>
47
#include <sys/devctl.h>
48
#include <sys/fcntl.h>
49
#include <sys/malloc.h>
50
#include <sys/msan.h>
51
#include <sys/sbuf.h>
52
#include <sys/devicestat.h>
53
54
#include <sys/lock.h>
55
#include <sys/mutex.h>
56
#include <geom/geom.h>
57
#include <geom/geom_disk.h>
58
#include <geom/geom_int.h>
59
60
#include <dev/led/led.h>
61
62
#include <machine/bus.h>
63
64
struct g_disk_softc {
65
struct disk *dp;
66
struct devstat *d_devstat;
67
struct sysctl_ctx_list sysctl_ctx;
68
struct sysctl_oid *sysctl_tree;
69
char led[64];
70
uint32_t state;
71
struct mtx done_mtx;
72
bool flush_notsup_succeed;
73
};
74
75
static g_access_t g_disk_access;
76
static g_start_t g_disk_start;
77
static g_ioctl_t g_disk_ioctl;
78
static g_dumpconf_t g_disk_dumpconf;
79
static g_provgone_t g_disk_providergone;
80
81
static int g_disk_sysctl_flags(SYSCTL_HANDLER_ARGS);
82
83
static struct g_class g_disk_class = {
84
.name = G_DISK_CLASS_NAME,
85
.version = G_VERSION,
86
.start = g_disk_start,
87
.access = g_disk_access,
88
.ioctl = g_disk_ioctl,
89
.providergone = g_disk_providergone,
90
.dumpconf = g_disk_dumpconf,
91
};
92
93
SYSCTL_DECL(_kern_geom);
94
static SYSCTL_NODE(_kern_geom, OID_AUTO, disk, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
95
"GEOM_DISK stuff");
96
97
DECLARE_GEOM_CLASS(g_disk_class, g_disk);
98
99
static int
100
g_disk_access(struct g_provider *pp, int r, int w, int e)
101
{
102
struct disk *dp;
103
struct g_disk_softc *sc;
104
int error;
105
106
g_trace(G_T_ACCESS, "g_disk_access(%s, %d, %d, %d)",
107
pp->name, r, w, e);
108
g_topology_assert();
109
sc = pp->private;
110
if ((dp = sc->dp) == NULL || dp->d_destroyed) {
111
/*
112
* Allow decreasing access count even if disk is not
113
* available anymore.
114
*/
115
if (r <= 0 && w <= 0 && e <= 0)
116
return (0);
117
return (ENXIO);
118
}
119
r += pp->acr;
120
w += pp->acw;
121
e += pp->ace;
122
error = 0;
123
if ((pp->acr + pp->acw + pp->ace) == 0 && (r + w + e) > 0) {
124
/*
125
* It would be better to defer this decision to d_open if
126
* it was able to take flags.
127
*/
128
if (w > 0 && (dp->d_flags & DISKFLAG_WRITE_PROTECT) != 0)
129
error = EROFS;
130
if (error == 0 && dp->d_open != NULL)
131
error = dp->d_open(dp);
132
if (bootverbose && error != 0)
133
printf("Opened disk %s -> %d\n", pp->name, error);
134
if (error != 0)
135
return (error);
136
pp->sectorsize = dp->d_sectorsize;
137
if (dp->d_maxsize == 0) {
138
printf("WARNING: Disk drive %s%d has no d_maxsize\n",
139
dp->d_name, dp->d_unit);
140
dp->d_maxsize = DFLTPHYS;
141
}
142
if (dp->d_delmaxsize == 0) {
143
if (bootverbose && dp->d_flags & DISKFLAG_CANDELETE) {
144
printf("WARNING: Disk drive %s%d has no "
145
"d_delmaxsize\n", dp->d_name, dp->d_unit);
146
}
147
dp->d_delmaxsize = dp->d_maxsize;
148
}
149
pp->stripeoffset = dp->d_stripeoffset;
150
pp->stripesize = dp->d_stripesize;
151
dp->d_flags |= DISKFLAG_OPEN;
152
/*
153
* Do not invoke resize event when initial size was zero.
154
* Some disks report its size only after first opening.
155
*/
156
if (pp->mediasize == 0)
157
pp->mediasize = dp->d_mediasize;
158
else
159
g_resize_provider(pp, dp->d_mediasize);
160
} else if ((pp->acr + pp->acw + pp->ace) > 0 && (r + w + e) == 0) {
161
if (dp->d_close != NULL) {
162
error = dp->d_close(dp);
163
if (error != 0)
164
printf("Closed disk %s -> %d\n",
165
pp->name, error);
166
}
167
sc->state = G_STATE_ACTIVE;
168
if (sc->led[0] != 0)
169
led_set(sc->led, "0");
170
dp->d_flags &= ~DISKFLAG_OPEN;
171
}
172
return (error);
173
}
174
175
static void
176
g_disk_kerneldump(struct bio *bp, struct disk *dp)
177
{
178
struct g_kerneldump *gkd;
179
struct g_geom *gp;
180
181
gkd = (struct g_kerneldump*)bp->bio_data;
182
gp = bp->bio_to->geom;
183
g_trace(G_T_TOPOLOGY, "g_disk_kerneldump(%s, %jd, %jd)",
184
gp->name, (intmax_t)gkd->offset, (intmax_t)gkd->length);
185
if (dp->d_dump == NULL) {
186
g_io_deliver(bp, ENODEV);
187
return;
188
}
189
gkd->di.dumper = dp->d_dump;
190
gkd->di.priv = dp;
191
gkd->di.blocksize = dp->d_sectorsize;
192
gkd->di.maxiosize = dp->d_maxsize;
193
gkd->di.mediaoffset = gkd->offset;
194
if ((gkd->offset + gkd->length) > dp->d_mediasize)
195
gkd->length = dp->d_mediasize - gkd->offset;
196
gkd->di.mediasize = gkd->length;
197
g_io_deliver(bp, 0);
198
}
199
200
static void
201
g_disk_setstate(struct bio *bp, struct g_disk_softc *sc)
202
{
203
const char *cmd;
204
205
memcpy(&sc->state, bp->bio_data, sizeof(sc->state));
206
if (sc->led[0] != 0) {
207
switch (sc->state) {
208
case G_STATE_FAILED:
209
cmd = "1";
210
break;
211
case G_STATE_REBUILD:
212
cmd = "f5";
213
break;
214
case G_STATE_RESYNC:
215
cmd = "f1";
216
break;
217
default:
218
cmd = "0";
219
break;
220
}
221
led_set(sc->led, cmd);
222
}
223
g_io_deliver(bp, 0);
224
}
225
226
static void
227
g_disk_done(struct bio *bp)
228
{
229
struct bintime now;
230
struct bio *bp2;
231
struct g_disk_softc *sc;
232
233
/* See "notes" for why we need a mutex here */
234
sc = bp->bio_caller1;
235
bp2 = bp->bio_parent;
236
binuptime(&now);
237
mtx_lock(&sc->done_mtx);
238
if (bp2->bio_error == 0) {
239
if ((bp->bio_flags & BIO_EXTERR) != 0) {
240
bp2->bio_flags |= BIO_EXTERR;
241
bp2->bio_exterr = bp->bio_exterr;
242
} else {
243
bp2->bio_error = bp->bio_error;
244
}
245
}
246
bp2->bio_completed += bp->bio_length - bp->bio_resid;
247
248
if (bp->bio_cmd == BIO_READ)
249
kmsan_check(bp2->bio_data, bp2->bio_completed, "g_disk_done");
250
251
switch (bp->bio_cmd) {
252
case BIO_ZONE:
253
bcopy(&bp->bio_zone, &bp2->bio_zone, sizeof(bp->bio_zone));
254
/*FALLTHROUGH*/
255
case BIO_READ:
256
case BIO_WRITE:
257
case BIO_DELETE:
258
case BIO_FLUSH:
259
devstat_end_transaction_bio_bt(sc->d_devstat, bp, &now);
260
break;
261
default:
262
break;
263
}
264
bp2->bio_inbed++;
265
if (bp2->bio_children == bp2->bio_inbed) {
266
mtx_unlock(&sc->done_mtx);
267
bp2->bio_resid = bp2->bio_bcount - bp2->bio_completed;
268
g_io_deliver(bp2, bp2->bio_error);
269
} else
270
mtx_unlock(&sc->done_mtx);
271
g_destroy_bio(bp);
272
}
273
274
static int
275
g_disk_ioctl(struct g_provider *pp, u_long cmd, void * data, int fflag, struct thread *td)
276
{
277
struct disk *dp;
278
struct g_disk_softc *sc;
279
280
sc = pp->private;
281
dp = sc->dp;
282
KASSERT(dp != NULL && !dp->d_destroyed,
283
("g_disk_ioctl(%lx) on destroyed disk %s", cmd, pp->name));
284
285
if (dp->d_ioctl == NULL)
286
return (ENOIOCTL);
287
return (dp->d_ioctl(dp, cmd, data, fflag, td));
288
}
289
290
static off_t
291
g_disk_maxsize(struct disk *dp, struct bio *bp)
292
{
293
if (bp->bio_cmd == BIO_DELETE)
294
return (dp->d_delmaxsize);
295
return (dp->d_maxsize);
296
}
297
298
static int
299
g_disk_maxsegs(struct disk *dp, struct bio *bp)
300
{
301
return ((g_disk_maxsize(dp, bp) / PAGE_SIZE) + 1);
302
}
303
304
static void
305
g_disk_advance(struct disk *dp, struct bio *bp, off_t off)
306
{
307
308
bp->bio_offset += off;
309
bp->bio_length -= off;
310
311
if ((bp->bio_flags & BIO_VLIST) != 0) {
312
bus_dma_segment_t *seg, *end;
313
314
seg = (bus_dma_segment_t *)bp->bio_data;
315
end = (bus_dma_segment_t *)bp->bio_data + bp->bio_ma_n;
316
off += bp->bio_ma_offset;
317
while (off >= seg->ds_len) {
318
KASSERT((seg != end),
319
("vlist request runs off the end"));
320
off -= seg->ds_len;
321
seg++;
322
}
323
bp->bio_ma_offset = off;
324
bp->bio_ma_n = end - seg;
325
bp->bio_data = (void *)seg;
326
} else if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
327
bp->bio_ma += off / PAGE_SIZE;
328
bp->bio_ma_offset += off;
329
bp->bio_ma_offset %= PAGE_SIZE;
330
bp->bio_ma_n -= off / PAGE_SIZE;
331
} else {
332
bp->bio_data += off;
333
}
334
}
335
336
static void
337
g_disk_seg_limit(bus_dma_segment_t *seg, off_t *poffset,
338
off_t *plength, int *ppages)
339
{
340
uintptr_t seg_page_base;
341
uintptr_t seg_page_end;
342
off_t offset;
343
off_t length;
344
int seg_pages;
345
346
offset = *poffset;
347
length = *plength;
348
349
if (length > seg->ds_len - offset)
350
length = seg->ds_len - offset;
351
352
seg_page_base = trunc_page(seg->ds_addr + offset);
353
seg_page_end = round_page(seg->ds_addr + offset + length);
354
seg_pages = (seg_page_end - seg_page_base) >> PAGE_SHIFT;
355
356
if (seg_pages > *ppages) {
357
seg_pages = *ppages;
358
length = (seg_page_base + (seg_pages << PAGE_SHIFT)) -
359
(seg->ds_addr + offset);
360
}
361
362
*poffset = 0;
363
*plength -= length;
364
*ppages -= seg_pages;
365
}
366
367
static off_t
368
g_disk_vlist_limit(struct disk *dp, struct bio *bp, bus_dma_segment_t **pendseg)
369
{
370
bus_dma_segment_t *seg, *end __diagused;
371
off_t residual;
372
off_t offset;
373
int pages;
374
375
seg = (bus_dma_segment_t *)bp->bio_data;
376
end = (bus_dma_segment_t *)bp->bio_data + bp->bio_ma_n;
377
residual = bp->bio_length;
378
offset = bp->bio_ma_offset;
379
pages = g_disk_maxsegs(dp, bp);
380
while (residual != 0 && pages != 0) {
381
KASSERT((seg != end),
382
("vlist limit runs off the end"));
383
g_disk_seg_limit(seg, &offset, &residual, &pages);
384
seg++;
385
}
386
if (pendseg != NULL)
387
*pendseg = seg;
388
return (residual);
389
}
390
391
static bool
392
g_disk_limit(struct disk *dp, struct bio *bp)
393
{
394
bool limited = false;
395
off_t maxsz;
396
397
maxsz = g_disk_maxsize(dp, bp);
398
399
/*
400
* XXX: If we have a stripesize we should really use it here.
401
* Care should be taken in the delete case if this is done
402
* as deletes can be very sensitive to size given how they
403
* are processed.
404
*/
405
if (bp->bio_length > maxsz) {
406
bp->bio_length = maxsz;
407
limited = true;
408
}
409
410
if ((bp->bio_flags & BIO_VLIST) != 0) {
411
bus_dma_segment_t *firstseg, *endseg;
412
off_t residual;
413
414
firstseg = (bus_dma_segment_t*)bp->bio_data;
415
residual = g_disk_vlist_limit(dp, bp, &endseg);
416
if (residual != 0) {
417
bp->bio_ma_n = endseg - firstseg;
418
bp->bio_length -= residual;
419
limited = true;
420
}
421
} else if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
422
bp->bio_ma_n =
423
howmany(bp->bio_ma_offset + bp->bio_length, PAGE_SIZE);
424
}
425
426
return (limited);
427
}
428
429
static void
430
g_disk_start(struct bio *bp)
431
{
432
struct bio *bp2, *bp3;
433
struct disk *dp;
434
struct g_disk_softc *sc;
435
int error;
436
off_t off;
437
438
biotrack(bp, __func__);
439
440
sc = bp->bio_to->private;
441
dp = sc->dp;
442
KASSERT(dp != NULL && !dp->d_destroyed,
443
("g_disk_start(%p) on destroyed disk %s", bp, bp->bio_to->name));
444
error = EJUSTRETURN;
445
switch(bp->bio_cmd) {
446
case BIO_DELETE:
447
if (!(dp->d_flags & DISKFLAG_CANDELETE)) {
448
error = EOPNOTSUPP;
449
break;
450
}
451
/* fall-through */
452
case BIO_READ:
453
case BIO_WRITE:
454
KASSERT((dp->d_flags & DISKFLAG_UNMAPPED_BIO) != 0 ||
455
(bp->bio_flags & BIO_UNMAPPED) == 0,
456
("unmapped bio not supported by disk %s", dp->d_name));
457
458
if (bp->bio_cmd == BIO_WRITE)
459
kmsan_check_bio(bp, "g_disk_start");
460
461
off = 0;
462
bp3 = NULL;
463
bp2 = g_clone_bio(bp);
464
if (bp2 == NULL) {
465
error = ENOMEM;
466
break;
467
}
468
for (;;) {
469
if (g_disk_limit(dp, bp2)) {
470
off += bp2->bio_length;
471
472
/*
473
* To avoid a race, we need to grab the next bio
474
* before we schedule this one. See "notes".
475
*/
476
bp3 = g_clone_bio(bp);
477
if (bp3 == NULL)
478
bp->bio_error = ENOMEM;
479
}
480
bp2->bio_done = g_disk_done;
481
bp2->bio_caller1 = sc;
482
bp2->bio_pblkno = bp2->bio_offset / dp->d_sectorsize;
483
bp2->bio_bcount = bp2->bio_length;
484
bp2->bio_disk = dp;
485
devstat_start_transaction_bio(dp->d_devstat, bp2);
486
dp->d_strategy(bp2);
487
488
if (bp3 == NULL)
489
break;
490
491
bp2 = bp3;
492
bp3 = NULL;
493
g_disk_advance(dp, bp2, off);
494
}
495
break;
496
case BIO_GETATTR:
497
/* Give the driver a chance to override */
498
if (dp->d_getattr != NULL) {
499
if (bp->bio_disk == NULL)
500
bp->bio_disk = dp;
501
error = dp->d_getattr(bp);
502
if (error != -1)
503
break;
504
error = EJUSTRETURN;
505
}
506
if (g_handleattr_int(bp, "GEOM::candelete",
507
(dp->d_flags & DISKFLAG_CANDELETE) != 0))
508
break;
509
else if (g_handleattr_int(bp, "GEOM::fwsectors",
510
dp->d_fwsectors))
511
break;
512
else if (g_handleattr_int(bp, "GEOM::fwheads", dp->d_fwheads))
513
break;
514
else if (g_handleattr_str(bp, "GEOM::ident", dp->d_ident))
515
break;
516
else if (g_handleattr_str(bp, "GEOM::descr", dp->d_descr))
517
break;
518
else if (g_handleattr_uint16_t(bp, "GEOM::hba_vendor",
519
dp->d_hba_vendor))
520
break;
521
else if (g_handleattr_uint16_t(bp, "GEOM::hba_device",
522
dp->d_hba_device))
523
break;
524
else if (g_handleattr_uint16_t(bp, "GEOM::hba_subvendor",
525
dp->d_hba_subvendor))
526
break;
527
else if (g_handleattr_uint16_t(bp, "GEOM::hba_subdevice",
528
dp->d_hba_subdevice))
529
break;
530
else if (!strcmp(bp->bio_attribute, "GEOM::kerneldump"))
531
g_disk_kerneldump(bp, dp);
532
else if (!strcmp(bp->bio_attribute, "GEOM::setstate"))
533
g_disk_setstate(bp, sc);
534
else if (g_handleattr_uint16_t(bp, "GEOM::rotation_rate",
535
dp->d_rotation_rate))
536
break;
537
else if (g_handleattr_str(bp, "GEOM::attachment",
538
dp->d_attachment))
539
break;
540
else
541
error = ENOIOCTL;
542
break;
543
case BIO_FLUSH:
544
g_trace(G_T_BIO, "g_disk_flushcache(%s)",
545
bp->bio_to->name);
546
if (!(dp->d_flags & DISKFLAG_CANFLUSHCACHE)) {
547
error = (sc->flush_notsup_succeed) ? 0 : EOPNOTSUPP;
548
break;
549
}
550
/*FALLTHROUGH*/
551
case BIO_ZONE:
552
if (bp->bio_cmd == BIO_ZONE) {
553
if (!(dp->d_flags & DISKFLAG_CANZONE)) {
554
error = EOPNOTSUPP;
555
break;
556
}
557
g_trace(G_T_BIO, "g_disk_zone(%s)",
558
bp->bio_to->name);
559
}
560
bp2 = g_clone_bio(bp);
561
if (bp2 == NULL) {
562
g_io_deliver(bp, ENOMEM);
563
return;
564
}
565
bp2->bio_done = g_disk_done;
566
bp2->bio_caller1 = sc;
567
bp2->bio_disk = dp;
568
devstat_start_transaction_bio(dp->d_devstat, bp2);
569
dp->d_strategy(bp2);
570
break;
571
case BIO_SPEEDUP:
572
bp2 = g_clone_bio(bp);
573
if (bp2 == NULL) {
574
g_io_deliver(bp, ENOMEM);
575
return;
576
}
577
bp2->bio_done = g_disk_done;
578
bp2->bio_caller1 = sc;
579
bp2->bio_disk = dp;
580
dp->d_strategy(bp2);
581
break;
582
default:
583
error = EOPNOTSUPP;
584
break;
585
}
586
if (error != EJUSTRETURN)
587
g_io_deliver(bp, error);
588
return;
589
}
590
591
static void
592
g_disk_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp)
593
{
594
struct bio *bp;
595
struct disk *dp;
596
struct g_disk_softc *sc;
597
char *buf;
598
int res = 0;
599
600
sc = gp->softc;
601
if (sc == NULL || (dp = sc->dp) == NULL)
602
return;
603
if (indent == NULL) {
604
sbuf_printf(sb, " hd %u", dp->d_fwheads);
605
sbuf_printf(sb, " sc %u", dp->d_fwsectors);
606
return;
607
}
608
if (pp != NULL) {
609
sbuf_printf(sb, "%s<fwheads>%u</fwheads>\n",
610
indent, dp->d_fwheads);
611
sbuf_printf(sb, "%s<fwsectors>%u</fwsectors>\n",
612
indent, dp->d_fwsectors);
613
614
/*
615
* "rotationrate" is a little complicated, because the value
616
* returned by the drive might not be the RPM; 0 and 1 are
617
* special cases, and there's also a valid range.
618
*/
619
sbuf_printf(sb, "%s<rotationrate>", indent);
620
if (dp->d_rotation_rate == DISK_RR_UNKNOWN) /* Old drives */
621
sbuf_cat(sb, "unknown"); /* don't report RPM. */
622
else if (dp->d_rotation_rate == DISK_RR_NON_ROTATING)
623
sbuf_cat(sb, "0");
624
else if ((dp->d_rotation_rate >= DISK_RR_MIN) &&
625
(dp->d_rotation_rate <= DISK_RR_MAX))
626
sbuf_printf(sb, "%u", dp->d_rotation_rate);
627
else
628
sbuf_cat(sb, "invalid");
629
sbuf_cat(sb, "</rotationrate>\n");
630
if (dp->d_getattr != NULL) {
631
buf = g_malloc(DISK_IDENT_SIZE, M_WAITOK);
632
bp = g_alloc_bio();
633
bp->bio_disk = dp;
634
bp->bio_attribute = "GEOM::ident";
635
bp->bio_length = DISK_IDENT_SIZE;
636
bp->bio_data = buf;
637
res = dp->d_getattr(bp);
638
sbuf_printf(sb, "%s<ident>", indent);
639
g_conf_cat_escaped(sb, res == 0 ? buf : dp->d_ident);
640
sbuf_cat(sb, "</ident>\n");
641
bp->bio_attribute = "GEOM::lunid";
642
bp->bio_length = DISK_IDENT_SIZE;
643
bp->bio_data = buf;
644
if (dp->d_getattr(bp) == 0) {
645
sbuf_printf(sb, "%s<lunid>", indent);
646
g_conf_cat_escaped(sb, buf);
647
sbuf_cat(sb, "</lunid>\n");
648
}
649
bp->bio_attribute = "GEOM::lunname";
650
bp->bio_length = DISK_IDENT_SIZE;
651
bp->bio_data = buf;
652
if (dp->d_getattr(bp) == 0) {
653
sbuf_printf(sb, "%s<lunname>", indent);
654
g_conf_cat_escaped(sb, buf);
655
sbuf_cat(sb, "</lunname>\n");
656
}
657
g_destroy_bio(bp);
658
g_free(buf);
659
} else {
660
sbuf_printf(sb, "%s<ident>", indent);
661
g_conf_cat_escaped(sb, dp->d_ident);
662
sbuf_cat(sb, "</ident>\n");
663
}
664
sbuf_printf(sb, "%s<descr>", indent);
665
g_conf_cat_escaped(sb, dp->d_descr);
666
sbuf_cat(sb, "</descr>\n");
667
}
668
}
669
670
static void
671
g_disk_resize(void *ptr, int flag)
672
{
673
struct disk *dp;
674
struct g_geom *gp;
675
struct g_provider *pp;
676
677
if (flag == EV_CANCEL)
678
return;
679
g_topology_assert();
680
681
dp = ptr;
682
gp = dp->d_geom;
683
684
if (dp->d_destroyed || gp == NULL)
685
return;
686
687
LIST_FOREACH(pp, &gp->provider, provider) {
688
if (pp->sectorsize != 0 &&
689
pp->sectorsize != dp->d_sectorsize)
690
g_wither_provider(pp, ENXIO);
691
else
692
g_resize_provider(pp, dp->d_mediasize);
693
}
694
}
695
696
static void
697
g_disk_create(void *arg, int flag)
698
{
699
struct g_geom *gp;
700
struct g_provider *pp;
701
struct disk *dp;
702
struct g_disk_softc *sc;
703
struct disk_alias *dap;
704
char tmpstr[80];
705
706
if (flag == EV_CANCEL)
707
return;
708
g_topology_assert();
709
dp = arg;
710
711
mtx_pool_lock(mtxpool_sleep, dp);
712
dp->d_init_level = DISK_INIT_START;
713
714
/*
715
* If the disk has already gone away, we can just stop here and
716
* call the user's callback to tell him we've cleaned things up.
717
*/
718
if (dp->d_goneflag != 0) {
719
mtx_pool_unlock(mtxpool_sleep, dp);
720
if (dp->d_gone != NULL)
721
dp->d_gone(dp);
722
return;
723
}
724
mtx_pool_unlock(mtxpool_sleep, dp);
725
726
sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
727
mtx_init(&sc->done_mtx, "g_disk_done", NULL, MTX_DEF);
728
sc->dp = dp;
729
if (dp->d_devstat == NULL) {
730
dp->d_devstat = devstat_new_entry(dp->d_name, dp->d_unit,
731
dp->d_sectorsize, DEVSTAT_ALL_SUPPORTED,
732
DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX);
733
}
734
sc->d_devstat = dp->d_devstat;
735
gp = g_new_geomf(&g_disk_class, "%s%d", dp->d_name, dp->d_unit);
736
gp->softc = sc;
737
pp = g_new_providerf(gp, "%s", gp->name);
738
LIST_FOREACH(dap, &dp->d_aliases, da_next)
739
g_provider_add_alias(pp, "%s%d", dap->da_alias, dp->d_unit);
740
devstat_remove_entry(pp->stat);
741
pp->stat = NULL;
742
dp->d_devstat->id = pp;
743
pp->mediasize = dp->d_mediasize;
744
pp->sectorsize = dp->d_sectorsize;
745
pp->stripeoffset = dp->d_stripeoffset;
746
pp->stripesize = dp->d_stripesize;
747
if ((dp->d_flags & DISKFLAG_UNMAPPED_BIO) != 0)
748
pp->flags |= G_PF_ACCEPT_UNMAPPED;
749
if ((dp->d_flags & DISKFLAG_DIRECT_COMPLETION) != 0)
750
pp->flags |= G_PF_DIRECT_SEND;
751
pp->flags |= G_PF_DIRECT_RECEIVE;
752
if (bootverbose)
753
printf("GEOM: new disk %s\n", gp->name);
754
sysctl_ctx_init(&sc->sysctl_ctx);
755
snprintf(tmpstr, sizeof(tmpstr), "GEOM disk %s", gp->name);
756
sc->sysctl_tree = SYSCTL_ADD_NODE(&sc->sysctl_ctx,
757
SYSCTL_STATIC_CHILDREN(_kern_geom_disk), OID_AUTO, gp->name,
758
CTLFLAG_RD | CTLFLAG_MPSAFE, 0, tmpstr);
759
if (sc->sysctl_tree != NULL) {
760
SYSCTL_ADD_STRING(&sc->sysctl_ctx,
761
SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, "led",
762
CTLFLAG_RWTUN, sc->led, sizeof(sc->led),
763
"LED name");
764
SYSCTL_ADD_PROC(&sc->sysctl_ctx,
765
SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, "flags",
766
CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, dp, 0,
767
g_disk_sysctl_flags, "A", "Report disk flags");
768
SYSCTL_ADD_BOOL(&sc->sysctl_ctx,
769
SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, "flush_notsup_succeed",
770
CTLFLAG_RWTUN, &sc->flush_notsup_succeed, sizeof(sc->flush_notsup_succeed),
771
"Do not return EOPNOTSUPP if there is no cache to flush");
772
}
773
pp->private = sc;
774
dp->d_geom = gp;
775
g_error_provider(pp, 0);
776
777
mtx_pool_lock(mtxpool_sleep, dp);
778
dp->d_init_level = DISK_INIT_DONE;
779
780
/*
781
* If the disk has gone away at this stage, start the withering
782
* process for it.
783
*/
784
if (dp->d_goneflag != 0) {
785
mtx_pool_unlock(mtxpool_sleep, dp);
786
g_wither_provider(pp, ENXIO);
787
return;
788
}
789
mtx_pool_unlock(mtxpool_sleep, dp);
790
791
}
792
793
/*
794
* We get this callback after all of the consumers have gone away, and just
795
* before the provider is freed. If the disk driver provided a d_gone
796
* callback, let them know that it is okay to free resources -- they won't
797
* be getting any more accesses from GEOM.
798
*/
799
static void
800
g_disk_providergone(struct g_provider *pp)
801
{
802
struct disk *dp;
803
struct g_disk_softc *sc;
804
805
sc = (struct g_disk_softc *)pp->private;
806
dp = sc->dp;
807
if (dp != NULL && dp->d_gone != NULL)
808
dp->d_gone(dp);
809
if (sc->sysctl_tree != NULL) {
810
sysctl_ctx_free(&sc->sysctl_ctx);
811
sc->sysctl_tree = NULL;
812
}
813
if (sc->led[0] != 0) {
814
led_set(sc->led, "0");
815
sc->led[0] = 0;
816
}
817
pp->private = NULL;
818
pp->geom->softc = NULL;
819
mtx_destroy(&sc->done_mtx);
820
g_free(sc);
821
}
822
823
static void
824
g_disk_destroy(void *ptr, int flag)
825
{
826
struct disk *dp;
827
struct g_geom *gp;
828
struct g_disk_softc *sc;
829
struct disk_alias *dap, *daptmp;
830
831
g_topology_assert();
832
dp = ptr;
833
gp = dp->d_geom;
834
if (gp != NULL) {
835
sc = gp->softc;
836
if (sc != NULL)
837
sc->dp = NULL;
838
dp->d_geom = NULL;
839
g_wither_geom(gp, ENXIO);
840
}
841
LIST_FOREACH_SAFE(dap, &dp->d_aliases, da_next, daptmp)
842
g_free(dap);
843
844
g_free(dp);
845
}
846
847
/*
848
* We only allow printable characters in disk ident,
849
* the rest is converted to 'x<HH>'.
850
*/
851
static void
852
g_disk_ident_adjust(char *ident, size_t size)
853
{
854
char *p, tmp[4], newid[DISK_IDENT_SIZE];
855
856
newid[0] = '\0';
857
for (p = ident; *p != '\0'; p++) {
858
if (isprint(*p)) {
859
tmp[0] = *p;
860
tmp[1] = '\0';
861
} else {
862
snprintf(tmp, sizeof(tmp), "x%02hhx",
863
*(unsigned char *)p);
864
}
865
if (strlcat(newid, tmp, sizeof(newid)) >= sizeof(newid))
866
break;
867
}
868
bzero(ident, size);
869
strlcpy(ident, newid, size);
870
}
871
872
struct disk *
873
disk_alloc(void)
874
{
875
struct disk *dp;
876
877
dp = g_malloc(sizeof(struct disk), M_WAITOK | M_ZERO);
878
LIST_INIT(&dp->d_aliases);
879
dp->d_init_level = DISK_INIT_NONE;
880
dp->d_cevent = g_alloc_event(M_WAITOK);
881
dp->d_devent = g_alloc_event(M_WAITOK);
882
return (dp);
883
}
884
885
void
886
disk_create(struct disk *dp, int version)
887
{
888
889
if (version != DISK_VERSION) {
890
printf("WARNING: Attempt to add disk %s%d %s",
891
dp->d_name, dp->d_unit,
892
" using incompatible ABI version of disk(9)\n");
893
printf("WARNING: Ignoring disk %s%d\n",
894
dp->d_name, dp->d_unit);
895
return;
896
}
897
if (dp->d_flags & DISKFLAG_RESERVED) {
898
printf("WARNING: Attempt to add non-MPSAFE disk %s%d\n",
899
dp->d_name, dp->d_unit);
900
printf("WARNING: Ignoring disk %s%d\n",
901
dp->d_name, dp->d_unit);
902
return;
903
}
904
KASSERT(dp->d_strategy != NULL, ("disk_create need d_strategy"));
905
KASSERT(dp->d_name != NULL, ("disk_create need d_name"));
906
KASSERT(*dp->d_name != 0, ("disk_create need d_name"));
907
KASSERT(strlen(dp->d_name) < SPECNAMELEN - 4, ("disk name too long"));
908
g_disk_ident_adjust(dp->d_ident, sizeof(dp->d_ident));
909
910
dp->d_init_level = DISK_INIT_CREATE;
911
912
KASSERT(dp->d_cevent != NULL,
913
("Disk create for %p with event NULL", dp));
914
g_post_event_ep(g_disk_create, dp, dp->d_cevent, dp, NULL);
915
}
916
917
void
918
disk_destroy(struct disk *dp)
919
{
920
struct disk_alias *dap, *daptmp;
921
922
/* If disk_create() was never called, just free the resources. */
923
if (dp->d_init_level < DISK_INIT_CREATE) {
924
if (dp->d_devstat != NULL)
925
devstat_remove_entry(dp->d_devstat);
926
LIST_FOREACH_SAFE(dap, &dp->d_aliases, da_next, daptmp)
927
g_free(dap);
928
g_free(dp->d_cevent);
929
g_free(dp->d_devent);
930
g_free(dp);
931
return;
932
}
933
934
KASSERT(dp->d_devent != NULL,
935
("Disk destroy for %p with event NULL", dp));
936
disk_gone(dp);
937
dp->d_destroyed = 1;
938
g_cancel_event(dp);
939
if (dp->d_devstat != NULL)
940
devstat_remove_entry(dp->d_devstat);
941
g_post_event_ep(g_disk_destroy, dp, dp->d_devent, NULL);
942
}
943
944
void
945
disk_add_alias(struct disk *dp, const char *name)
946
{
947
struct disk_alias *dap;
948
949
dap = (struct disk_alias *)g_malloc(
950
sizeof(struct disk_alias) + strlen(name) + 1, M_WAITOK);
951
strcpy((char *)(dap + 1), name);
952
dap->da_alias = (const char *)(dap + 1);
953
LIST_INSERT_HEAD(&dp->d_aliases, dap, da_next);
954
}
955
956
void
957
disk_gone(struct disk *dp)
958
{
959
struct g_geom *gp;
960
struct g_provider *pp;
961
962
mtx_pool_lock(mtxpool_sleep, dp);
963
964
/*
965
* Second wither call makes no sense, plus we can not access the list
966
* of providers without topology lock after calling wither once.
967
*/
968
if (dp->d_goneflag != 0) {
969
mtx_pool_unlock(mtxpool_sleep, dp);
970
return;
971
}
972
973
dp->d_goneflag = 1;
974
975
/*
976
* If we're still in the process of creating this disk (the
977
* g_disk_create() function is still queued, or is in
978
* progress), the init level will not yet be DISK_INIT_DONE.
979
*
980
* If that is the case, g_disk_create() will see d_goneflag
981
* and take care of cleaning things up.
982
*
983
* If the disk has already been created, we default to
984
* withering the provider as usual below.
985
*
986
* If the caller has not set a d_gone() callback, he will
987
* not be any worse off by returning here, because the geom
988
* has not been fully setup in any case.
989
*/
990
if (dp->d_init_level < DISK_INIT_DONE) {
991
mtx_pool_unlock(mtxpool_sleep, dp);
992
return;
993
}
994
mtx_pool_unlock(mtxpool_sleep, dp);
995
996
gp = dp->d_geom;
997
pp = LIST_FIRST(&gp->provider);
998
if (pp != NULL) {
999
KASSERT(LIST_NEXT(pp, provider) == NULL,
1000
("geom %p has more than one provider", gp));
1001
g_wither_provider(pp, ENXIO);
1002
}
1003
}
1004
1005
void
1006
disk_attr_changed(struct disk *dp, const char *attr, int flag)
1007
{
1008
struct g_geom *gp = dp->d_geom;
1009
struct g_provider *pp;
1010
char devnamebuf[128];
1011
1012
if (gp == NULL)
1013
return;
1014
LIST_FOREACH(pp, &gp->provider, provider)
1015
(void)g_attr_changed(pp, attr, flag);
1016
snprintf(devnamebuf, sizeof(devnamebuf), "devname=%s%d", dp->d_name,
1017
dp->d_unit);
1018
devctl_notify("GEOM", "disk", attr, devnamebuf);
1019
}
1020
1021
void
1022
disk_media_changed(struct disk *dp, int flag)
1023
{
1024
struct g_geom *gp = dp->d_geom;
1025
struct g_provider *pp;
1026
1027
if (gp == NULL)
1028
return;
1029
pp = LIST_FIRST(&gp->provider);
1030
if (pp != NULL) {
1031
KASSERT(LIST_NEXT(pp, provider) == NULL,
1032
("geom %p has more than one provider", gp));
1033
g_media_changed(pp, flag);
1034
}
1035
}
1036
1037
void
1038
disk_media_gone(struct disk *dp, int flag)
1039
{
1040
struct g_geom *gp = dp->d_geom;
1041
struct g_provider *pp;
1042
1043
if (gp == NULL)
1044
return;
1045
pp = LIST_FIRST(&gp->provider);
1046
if (pp != NULL) {
1047
KASSERT(LIST_NEXT(pp, provider) == NULL,
1048
("geom %p has more than one provider", gp));
1049
g_media_gone(pp, flag);
1050
}
1051
}
1052
1053
int
1054
disk_resize(struct disk *dp, int flag)
1055
{
1056
1057
if (dp->d_destroyed || dp->d_geom == NULL)
1058
return (0);
1059
1060
return (g_post_event(g_disk_resize, dp, flag, NULL));
1061
}
1062
1063
static void
1064
g_kern_disks(void *p, int flag __unused)
1065
{
1066
struct sbuf *sb;
1067
struct g_geom *gp;
1068
char *sp;
1069
1070
sb = p;
1071
sp = "";
1072
g_topology_assert();
1073
LIST_FOREACH(gp, &g_disk_class.geom, geom) {
1074
sbuf_printf(sb, "%s%s", sp, gp->name);
1075
sp = " ";
1076
}
1077
sbuf_finish(sb);
1078
}
1079
1080
static int
1081
g_disk_sysctl_flags(SYSCTL_HANDLER_ARGS)
1082
{
1083
struct disk *dp;
1084
struct sbuf *sb;
1085
int error;
1086
1087
sb = sbuf_new_auto();
1088
dp = (struct disk *)arg1;
1089
sbuf_printf(sb, "%b", dp->d_flags,
1090
"\20"
1091
"\2OPEN"
1092
"\3CANDELETE"
1093
"\4CANFLUSHCACHE"
1094
"\5UNMAPPEDBIO"
1095
"\6DIRECTCOMPLETION"
1096
"\10CANZONE"
1097
"\11WRITEPROTECT");
1098
1099
sbuf_finish(sb);
1100
error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
1101
sbuf_delete(sb);
1102
return (error);
1103
}
1104
1105
static int
1106
sysctl_disks(SYSCTL_HANDLER_ARGS)
1107
{
1108
int error;
1109
struct sbuf *sb;
1110
1111
sb = sbuf_new_auto();
1112
g_waitfor_event(g_kern_disks, sb, M_WAITOK, NULL);
1113
error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
1114
sbuf_delete(sb);
1115
return error;
1116
}
1117
1118
SYSCTL_PROC(_kern, OID_AUTO, disks,
1119
CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
1120
sysctl_disks, "A", "names of available disks");
1121
1122