Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/cam/ctl/ctl_backend_block.c
39483 views
1
/*-
2
* SPDX-License-Identifier: BSD-2-Clause
3
*
4
* Copyright (c) 2003 Silicon Graphics International Corp.
5
* Copyright (c) 2009-2011 Spectra Logic Corporation
6
* Copyright (c) 2012,2021 The FreeBSD Foundation
7
* Copyright (c) 2014-2021 Alexander Motin <[email protected]>
8
* All rights reserved.
9
*
10
* Portions of this software were developed by Edward Tomasz Napierala
11
* under sponsorship from the FreeBSD Foundation.
12
*
13
* Portions of this software were developed by Ka Ho Ng <[email protected]>
14
* under sponsorship from the FreeBSD Foundation.
15
*
16
* Redistribution and use in source and binary forms, with or without
17
* modification, are permitted provided that the following conditions
18
* are met:
19
* 1. Redistributions of source code must retain the above copyright
20
* notice, this list of conditions, and the following disclaimer,
21
* without modification.
22
* 2. Redistributions in binary form must reproduce at minimum a disclaimer
23
* substantially similar to the "NO WARRANTY" disclaimer below
24
* ("Disclaimer") and any redistribution must be conditioned upon
25
* including a substantially similar Disclaimer requirement for further
26
* binary redistribution.
27
*
28
* NO WARRANTY
29
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
32
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33
* HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
37
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
38
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39
* POSSIBILITY OF SUCH DAMAGES.
40
*
41
* $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_block.c#5 $
42
*/
43
/*
44
* CAM Target Layer driver backend for block devices.
45
*
46
* Author: Ken Merry <[email protected]>
47
*/
48
#include <sys/param.h>
49
#include <sys/systm.h>
50
#include <sys/kernel.h>
51
#include <sys/types.h>
52
#include <sys/kthread.h>
53
#include <sys/bio.h>
54
#include <sys/fcntl.h>
55
#include <sys/limits.h>
56
#include <sys/lock.h>
57
#include <sys/mutex.h>
58
#include <sys/condvar.h>
59
#include <sys/malloc.h>
60
#include <sys/conf.h>
61
#include <sys/ioccom.h>
62
#include <sys/queue.h>
63
#include <sys/sbuf.h>
64
#include <sys/endian.h>
65
#include <sys/uio.h>
66
#include <sys/buf.h>
67
#include <sys/taskqueue.h>
68
#include <sys/vnode.h>
69
#include <sys/namei.h>
70
#include <sys/mount.h>
71
#include <sys/disk.h>
72
#include <sys/fcntl.h>
73
#include <sys/filedesc.h>
74
#include <sys/filio.h>
75
#include <sys/proc.h>
76
#include <sys/pcpu.h>
77
#include <sys/module.h>
78
#include <sys/sdt.h>
79
#include <sys/devicestat.h>
80
#include <sys/sysctl.h>
81
#include <sys/nv.h>
82
#include <sys/dnv.h>
83
#include <sys/sx.h>
84
#include <sys/unistd.h>
85
86
#include <geom/geom.h>
87
88
#include <cam/cam.h>
89
#include <cam/scsi/scsi_all.h>
90
#include <cam/scsi/scsi_da.h>
91
#include <cam/ctl/ctl_io.h>
92
#include <cam/ctl/ctl.h>
93
#include <cam/ctl/ctl_backend.h>
94
#include <cam/ctl/ctl_ioctl.h>
95
#include <cam/ctl/ctl_ha.h>
96
#include <cam/ctl/ctl_scsi_all.h>
97
#include <cam/ctl/ctl_private.h>
98
#include <cam/ctl/ctl_error.h>
99
100
/*
101
* The idea here is to allocate enough S/G space to handle at least 1MB I/Os.
102
* On systems with small maxphys it can be 8 128KB segments. On large systems
103
* it can be up to 8 1MB segments. I/Os larger than that we'll split.
104
*/
105
#define CTLBLK_MAX_SEGS 8
106
#define CTLBLK_HALF_SEGS (CTLBLK_MAX_SEGS / 2)
107
#define CTLBLK_MIN_SEG (128 * 1024)
108
#define CTLBLK_MAX_SEG MIN(1024 * 1024, MAX(CTLBLK_MIN_SEG, maxphys))
109
#define CTLBLK_MAX_IO_SIZE (CTLBLK_MAX_SEG * CTLBLK_MAX_SEGS)
110
111
#ifdef CTLBLK_DEBUG
112
#define DPRINTF(fmt, args...) \
113
printf("cbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
114
#else
115
#define DPRINTF(fmt, args...) do {} while(0)
116
#endif
117
118
#define PRIV(io) \
119
((struct ctl_ptr_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_BACKEND])
120
#define ARGS(io) \
121
((struct ctl_lba_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_LBA_LEN])
122
#define DSM_RANGE(io) ((io)->io_hdr.ctl_private[CTL_PRIV_LBA_LEN].integer)
123
124
SDT_PROVIDER_DEFINE(cbb);
125
126
typedef enum {
127
CTL_BE_BLOCK_LUN_UNCONFIGURED = 0x01,
128
CTL_BE_BLOCK_LUN_WAITING = 0x04,
129
} ctl_be_block_lun_flags;
130
131
typedef enum {
132
CTL_BE_BLOCK_NONE,
133
CTL_BE_BLOCK_DEV,
134
CTL_BE_BLOCK_FILE
135
} ctl_be_block_type;
136
137
struct ctl_be_block_filedata {
138
struct ucred *cred;
139
};
140
141
union ctl_be_block_bedata {
142
struct ctl_be_block_filedata file;
143
};
144
145
struct ctl_be_block_io;
146
struct ctl_be_block_lun;
147
148
typedef void (*cbb_dispatch_t)(struct ctl_be_block_lun *be_lun,
149
struct ctl_be_block_io *beio);
150
typedef uint64_t (*cbb_getattr_t)(struct ctl_be_block_lun *be_lun,
151
const char *attrname);
152
153
/*
154
* Backend LUN structure. There is a 1:1 mapping between a block device
155
* and a backend block LUN, and between a backend block LUN and a CTL LUN.
156
*/
157
struct ctl_be_block_lun {
158
struct ctl_be_lun cbe_lun; /* Must be first element. */
159
struct ctl_lun_create_params params;
160
char *dev_path;
161
ctl_be_block_type dev_type;
162
struct vnode *vn;
163
union ctl_be_block_bedata backend;
164
cbb_dispatch_t dispatch;
165
cbb_dispatch_t lun_flush;
166
cbb_dispatch_t unmap;
167
cbb_dispatch_t get_lba_status;
168
cbb_getattr_t getattr;
169
uint64_t size_blocks;
170
uint64_t size_bytes;
171
struct ctl_be_block_softc *softc;
172
struct devstat *disk_stats;
173
ctl_be_block_lun_flags flags;
174
SLIST_ENTRY(ctl_be_block_lun) links;
175
struct taskqueue *io_taskqueue;
176
struct task io_task;
177
int num_threads;
178
STAILQ_HEAD(, ctl_io_hdr) input_queue;
179
STAILQ_HEAD(, ctl_io_hdr) config_read_queue;
180
STAILQ_HEAD(, ctl_io_hdr) config_write_queue;
181
STAILQ_HEAD(, ctl_io_hdr) datamove_queue;
182
struct mtx_padalign io_lock;
183
struct mtx_padalign queue_lock;
184
};
185
186
/*
187
* Overall softc structure for the block backend module.
188
*/
189
struct ctl_be_block_softc {
190
struct sx modify_lock;
191
struct mtx lock;
192
int num_luns;
193
SLIST_HEAD(, ctl_be_block_lun) lun_list;
194
uma_zone_t beio_zone;
195
uma_zone_t bufmin_zone;
196
uma_zone_t bufmax_zone;
197
};
198
199
static struct ctl_be_block_softc backend_block_softc;
200
201
/*
202
* Per-I/O information.
203
*/
204
struct ctl_be_block_io {
205
union ctl_io *io;
206
struct ctl_sg_entry sg_segs[CTLBLK_MAX_SEGS];
207
struct iovec xiovecs[CTLBLK_MAX_SEGS];
208
int refcnt;
209
int bio_cmd;
210
int two_sglists;
211
int num_segs;
212
int num_bios_sent;
213
int num_bios_done;
214
int send_complete;
215
int first_error;
216
uint64_t first_error_offset;
217
struct bintime ds_t0;
218
devstat_tag_type ds_tag_type;
219
devstat_trans_flags ds_trans_type;
220
uint64_t io_len;
221
uint64_t io_offset;
222
int io_arg;
223
struct ctl_be_block_softc *softc;
224
struct ctl_be_block_lun *lun;
225
void (*beio_cont)(struct ctl_be_block_io *beio); /* to continue processing */
226
};
227
228
static int cbb_num_threads = 32;
229
SYSCTL_NODE(_kern_cam_ctl, OID_AUTO, block, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
230
"CAM Target Layer Block Backend");
231
SYSCTL_INT(_kern_cam_ctl_block, OID_AUTO, num_threads, CTLFLAG_RWTUN,
232
&cbb_num_threads, 0, "Number of threads per backing file");
233
234
static struct ctl_be_block_io *ctl_alloc_beio(struct ctl_be_block_softc *softc);
235
static void ctl_free_beio(struct ctl_be_block_io *beio);
236
static void ctl_complete_beio(struct ctl_be_block_io *beio);
237
static int ctl_be_block_move_done(union ctl_io *io, bool samethr);
238
static void ctl_be_block_biodone(struct bio *bio);
239
static void ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
240
struct ctl_be_block_io *beio);
241
static void ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
242
struct ctl_be_block_io *beio);
243
static void ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun,
244
struct ctl_be_block_io *beio);
245
static uint64_t ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun,
246
const char *attrname);
247
static void ctl_be_block_unmap_file(struct ctl_be_block_lun *be_lun,
248
struct ctl_be_block_io *beio);
249
static void ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
250
struct ctl_be_block_io *beio);
251
static void ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
252
struct ctl_be_block_io *beio);
253
static void ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
254
struct ctl_be_block_io *beio);
255
static uint64_t ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun,
256
const char *attrname);
257
static void ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun,
258
union ctl_io *io);
259
static void ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
260
union ctl_io *io);
261
static void ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
262
union ctl_io *io);
263
static void ctl_be_block_worker(void *context, int pending);
264
static int ctl_be_block_submit(union ctl_io *io);
265
static int ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
266
int flag, struct thread *td);
267
static int ctl_be_block_open_file(struct ctl_be_block_lun *be_lun,
268
struct ctl_lun_req *req);
269
static int ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun,
270
struct ctl_lun_req *req);
271
static int ctl_be_block_close(struct ctl_be_block_lun *be_lun);
272
static int ctl_be_block_open(struct ctl_be_block_lun *be_lun,
273
struct ctl_lun_req *req);
274
static int ctl_be_block_create(struct ctl_be_block_softc *softc,
275
struct ctl_lun_req *req);
276
static int ctl_be_block_rm(struct ctl_be_block_softc *softc,
277
struct ctl_lun_req *req);
278
static int ctl_be_block_modify(struct ctl_be_block_softc *softc,
279
struct ctl_lun_req *req);
280
static void ctl_be_block_lun_shutdown(struct ctl_be_lun *cbe_lun);
281
static int ctl_be_block_config_write(union ctl_io *io);
282
static int ctl_be_block_config_read(union ctl_io *io);
283
static int ctl_be_block_lun_info(struct ctl_be_lun *cbe_lun, struct sbuf *sb);
284
static uint64_t ctl_be_block_lun_attr(struct ctl_be_lun *cbe_lun, const char *attrname);
285
static int ctl_be_block_init(void);
286
static int ctl_be_block_shutdown(void);
287
288
static struct ctl_backend_driver ctl_be_block_driver =
289
{
290
.name = "block",
291
.flags = CTL_BE_FLAG_HAS_CONFIG,
292
.init = ctl_be_block_init,
293
.shutdown = ctl_be_block_shutdown,
294
.data_submit = ctl_be_block_submit,
295
.config_read = ctl_be_block_config_read,
296
.config_write = ctl_be_block_config_write,
297
.ioctl = ctl_be_block_ioctl,
298
.lun_info = ctl_be_block_lun_info,
299
.lun_attr = ctl_be_block_lun_attr
300
};
301
302
MALLOC_DEFINE(M_CTLBLK, "ctlblock", "Memory used for CTL block backend");
303
CTL_BACKEND_DECLARE(cbb, ctl_be_block_driver);
304
305
static void
306
ctl_alloc_seg(struct ctl_be_block_softc *softc, struct ctl_sg_entry *sg,
307
size_t len)
308
{
309
310
if (len <= CTLBLK_MIN_SEG) {
311
sg->addr = uma_zalloc(softc->bufmin_zone, M_WAITOK);
312
} else {
313
KASSERT(len <= CTLBLK_MAX_SEG,
314
("Too large alloc %zu > %lu", len, CTLBLK_MAX_SEG));
315
sg->addr = uma_zalloc(softc->bufmax_zone, M_WAITOK);
316
}
317
sg->len = len;
318
}
319
320
static void
321
ctl_free_seg(struct ctl_be_block_softc *softc, struct ctl_sg_entry *sg)
322
{
323
324
if (sg->len <= CTLBLK_MIN_SEG) {
325
uma_zfree(softc->bufmin_zone, sg->addr);
326
} else {
327
KASSERT(sg->len <= CTLBLK_MAX_SEG,
328
("Too large free %zu > %lu", sg->len, CTLBLK_MAX_SEG));
329
uma_zfree(softc->bufmax_zone, sg->addr);
330
}
331
}
332
333
static struct ctl_be_block_io *
334
ctl_alloc_beio(struct ctl_be_block_softc *softc)
335
{
336
struct ctl_be_block_io *beio;
337
338
beio = uma_zalloc(softc->beio_zone, M_WAITOK | M_ZERO);
339
beio->softc = softc;
340
beio->refcnt = 1;
341
return (beio);
342
}
343
344
static void
345
ctl_real_free_beio(struct ctl_be_block_io *beio)
346
{
347
struct ctl_be_block_softc *softc = beio->softc;
348
int i;
349
350
for (i = 0; i < beio->num_segs; i++) {
351
ctl_free_seg(softc, &beio->sg_segs[i]);
352
353
/* For compare we had two equal S/G lists. */
354
if (beio->two_sglists) {
355
ctl_free_seg(softc,
356
&beio->sg_segs[i + CTLBLK_HALF_SEGS]);
357
}
358
}
359
360
uma_zfree(softc->beio_zone, beio);
361
}
362
363
static void
364
ctl_refcnt_beio(void *arg, int diff)
365
{
366
struct ctl_be_block_io *beio = arg;
367
368
if (atomic_fetchadd_int(&beio->refcnt, diff) + diff == 0)
369
ctl_real_free_beio(beio);
370
}
371
372
static void
373
ctl_free_beio(struct ctl_be_block_io *beio)
374
{
375
376
ctl_refcnt_beio(beio, -1);
377
}
378
379
static void
380
ctl_complete_beio(struct ctl_be_block_io *beio)
381
{
382
union ctl_io *io = beio->io;
383
384
if (beio->beio_cont != NULL) {
385
beio->beio_cont(beio);
386
} else {
387
ctl_free_beio(beio);
388
ctl_data_submit_done(io);
389
}
390
}
391
392
static void
393
ctl_be_block_io_error(union ctl_io *io, int bio_cmd, uint16_t retry_count)
394
{
395
switch (io->io_hdr.io_type) {
396
case CTL_IO_SCSI:
397
if (bio_cmd == BIO_FLUSH) {
398
/* XXX KDM is there is a better error here? */
399
ctl_set_internal_failure(&io->scsiio,
400
/*sks_valid*/ 1,
401
retry_count);
402
} else {
403
ctl_set_medium_error(&io->scsiio, bio_cmd == BIO_READ);
404
}
405
break;
406
case CTL_IO_NVME:
407
switch (bio_cmd) {
408
case BIO_FLUSH:
409
case BIO_WRITE:
410
ctl_nvme_set_write_fault(&io->nvmeio);
411
break;
412
case BIO_READ:
413
ctl_nvme_set_unrecoverable_read_error(&io->nvmeio);
414
break;
415
default:
416
ctl_nvme_set_internal_error(&io->nvmeio);
417
break;
418
}
419
break;
420
default:
421
__assert_unreachable();
422
}
423
}
424
425
static size_t
426
cmp(uint8_t *a, uint8_t *b, size_t size)
427
{
428
size_t i;
429
430
for (i = 0; i < size; i++) {
431
if (a[i] != b[i])
432
break;
433
}
434
return (i);
435
}
436
437
static void
438
ctl_be_block_compare(union ctl_io *io)
439
{
440
struct ctl_be_block_io *beio;
441
uint64_t off, res;
442
int i;
443
444
beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
445
off = 0;
446
for (i = 0; i < beio->num_segs; i++) {
447
res = cmp(beio->sg_segs[i].addr,
448
beio->sg_segs[i + CTLBLK_HALF_SEGS].addr,
449
beio->sg_segs[i].len);
450
off += res;
451
if (res < beio->sg_segs[i].len)
452
break;
453
}
454
if (i < beio->num_segs) {
455
ctl_io_set_compare_failure(io, off);
456
} else
457
ctl_io_set_success(io);
458
}
459
460
static int
461
ctl_be_block_move_done(union ctl_io *io, bool samethr)
462
{
463
struct ctl_be_block_io *beio;
464
struct ctl_be_block_lun *be_lun;
465
struct ctl_lba_len_flags *lbalen;
466
467
beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
468
469
DPRINTF("entered\n");
470
ctl_add_kern_rel_offset(io, ctl_kern_data_len(io));
471
472
/*
473
* We set status at this point for read and compare commands.
474
*/
475
if ((io->io_hdr.flags & CTL_FLAG_ABORT) == 0 &&
476
(io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE) {
477
lbalen = ARGS(io);
478
if (lbalen->flags & CTL_LLF_READ) {
479
ctl_io_set_success(io);
480
} else if (lbalen->flags & CTL_LLF_COMPARE) {
481
/* We have two data blocks ready for comparison. */
482
ctl_be_block_compare(io);
483
}
484
}
485
486
/*
487
* If this is a read, or a write with errors, it is done.
488
*/
489
if ((beio->bio_cmd == BIO_READ)
490
|| ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0)
491
|| ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)) {
492
ctl_complete_beio(beio);
493
return (0);
494
}
495
496
/*
497
* At this point, we have a write and the DMA completed successfully.
498
* If we were called synchronously in the original thread then just
499
* dispatch, otherwise we now have to queue it to the task queue to
500
* execute the backend I/O. That is because we do blocking
501
* memory allocations, and in the file backing case, blocking I/O.
502
* This move done routine is generally called in the SIM's
503
* interrupt context, and therefore we cannot block.
504
*/
505
be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io);
506
if (samethr) {
507
be_lun->dispatch(be_lun, beio);
508
} else {
509
mtx_lock(&be_lun->queue_lock);
510
STAILQ_INSERT_TAIL(&be_lun->datamove_queue, &io->io_hdr, links);
511
mtx_unlock(&be_lun->queue_lock);
512
taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
513
}
514
return (0);
515
}
516
517
static void
518
ctl_be_block_biodone(struct bio *bio)
519
{
520
struct ctl_be_block_io *beio = bio->bio_caller1;
521
struct ctl_be_block_lun *be_lun = beio->lun;
522
struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
523
union ctl_io *io;
524
int error;
525
526
io = beio->io;
527
528
DPRINTF("entered\n");
529
530
error = bio->bio_error;
531
mtx_lock(&be_lun->io_lock);
532
if (error != 0 &&
533
(beio->first_error == 0 ||
534
bio->bio_offset < beio->first_error_offset)) {
535
beio->first_error = error;
536
beio->first_error_offset = bio->bio_offset;
537
}
538
539
beio->num_bios_done++;
540
541
/*
542
* XXX KDM will this cause WITNESS to complain? Holding a lock
543
* during the free might cause it to complain.
544
*/
545
g_destroy_bio(bio);
546
547
/*
548
* If the send complete bit isn't set, or we aren't the last I/O to
549
* complete, then we're done.
550
*/
551
if ((beio->send_complete == 0)
552
|| (beio->num_bios_done < beio->num_bios_sent)) {
553
mtx_unlock(&be_lun->io_lock);
554
return;
555
}
556
557
/*
558
* At this point, we've verified that we are the last I/O to
559
* complete, so it's safe to drop the lock.
560
*/
561
devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
562
beio->ds_tag_type, beio->ds_trans_type,
563
/*now*/ NULL, /*then*/&beio->ds_t0);
564
mtx_unlock(&be_lun->io_lock);
565
566
/*
567
* If there are any errors from the backing device, we fail the
568
* entire I/O with a medium error.
569
*/
570
error = beio->first_error;
571
if (error != 0) {
572
if (error == EOPNOTSUPP) {
573
ctl_io_set_invalid_opcode(io);
574
} else if (error == ENOSPC || error == EDQUOT) {
575
ctl_io_set_space_alloc_fail(io);
576
} else if (error == EROFS || error == EACCES) {
577
ctl_io_set_hw_write_protected(io);
578
} else {
579
ctl_be_block_io_error(io, beio->bio_cmd,
580
/*retry_count*/ 0xbad2);
581
}
582
ctl_complete_beio(beio);
583
return;
584
}
585
586
/*
587
* If this is a write, a flush, a delete or verify, we're all done.
588
* If this is a read, we can now send the data to the user.
589
*/
590
if ((beio->bio_cmd == BIO_WRITE)
591
|| (beio->bio_cmd == BIO_FLUSH)
592
|| (beio->bio_cmd == BIO_DELETE)
593
|| (ARGS(io)->flags & CTL_LLF_VERIFY)) {
594
ctl_io_set_success(io);
595
ctl_complete_beio(beio);
596
} else {
597
if ((ARGS(io)->flags & CTL_LLF_READ) &&
598
beio->beio_cont == NULL) {
599
ctl_io_set_success(io);
600
if (cbe_lun->serseq >= CTL_LUN_SERSEQ_SOFT)
601
ctl_serseq_done(io);
602
}
603
ctl_datamove(io);
604
}
605
}
606
607
static void
608
ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
609
struct ctl_be_block_io *beio)
610
{
611
union ctl_io *io = beio->io;
612
struct mount *mountpoint;
613
int error;
614
615
DPRINTF("entered\n");
616
617
binuptime(&beio->ds_t0);
618
devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
619
620
(void) vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
621
622
vn_lock(be_lun->vn, vn_lktype_write(mountpoint, be_lun->vn) |
623
LK_RETRY);
624
error = VOP_FSYNC(be_lun->vn, beio->io_arg ? MNT_NOWAIT : MNT_WAIT,
625
curthread);
626
VOP_UNLOCK(be_lun->vn);
627
628
vn_finished_write(mountpoint);
629
630
mtx_lock(&be_lun->io_lock);
631
devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
632
beio->ds_tag_type, beio->ds_trans_type,
633
/*now*/ NULL, /*then*/&beio->ds_t0);
634
mtx_unlock(&be_lun->io_lock);
635
636
if (error == 0)
637
ctl_io_set_success(io);
638
else {
639
ctl_be_block_io_error(io, BIO_FLUSH,
640
/*retry_count*/ 0xbad1);
641
}
642
643
ctl_complete_beio(beio);
644
}
645
646
SDT_PROBE_DEFINE1(cbb, , read, file_start, "uint64_t");
647
SDT_PROBE_DEFINE1(cbb, , write, file_start, "uint64_t");
648
SDT_PROBE_DEFINE1(cbb, , read, file_done,"uint64_t");
649
SDT_PROBE_DEFINE1(cbb, , write, file_done, "uint64_t");
650
651
static void
652
ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
653
struct ctl_be_block_io *beio)
654
{
655
struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
656
struct ctl_be_block_filedata *file_data;
657
union ctl_io *io;
658
struct uio xuio;
659
struct iovec *xiovec;
660
size_t s;
661
int error, flags, i;
662
663
DPRINTF("entered\n");
664
665
file_data = &be_lun->backend.file;
666
io = beio->io;
667
flags = 0;
668
if (ARGS(io)->flags & CTL_LLF_DPO)
669
flags |= IO_DIRECT;
670
if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA)
671
flags |= IO_SYNC;
672
673
bzero(&xuio, sizeof(xuio));
674
if (beio->bio_cmd == BIO_READ) {
675
SDT_PROBE0(cbb, , read, file_start);
676
xuio.uio_rw = UIO_READ;
677
} else {
678
SDT_PROBE0(cbb, , write, file_start);
679
xuio.uio_rw = UIO_WRITE;
680
}
681
xuio.uio_offset = beio->io_offset;
682
xuio.uio_resid = beio->io_len;
683
xuio.uio_segflg = UIO_SYSSPACE;
684
xuio.uio_iov = beio->xiovecs;
685
xuio.uio_iovcnt = beio->num_segs;
686
xuio.uio_td = curthread;
687
688
for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) {
689
xiovec->iov_base = beio->sg_segs[i].addr;
690
xiovec->iov_len = beio->sg_segs[i].len;
691
}
692
693
binuptime(&beio->ds_t0);
694
devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
695
696
if (beio->bio_cmd == BIO_READ) {
697
vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
698
699
if (beio->beio_cont == NULL &&
700
cbe_lun->serseq == CTL_LUN_SERSEQ_SOFT)
701
ctl_serseq_done(io);
702
/*
703
* UFS pays attention to IO_DIRECT for reads. If the
704
* DIRECTIO option is configured into the kernel, it calls
705
* ffs_rawread(). But that only works for single-segment
706
* uios with user space addresses. In our case, with a
707
* kernel uio, it still reads into the buffer cache, but it
708
* will just try to release the buffer from the cache later
709
* on in ffs_read().
710
*
711
* ZFS does not pay attention to IO_DIRECT for reads.
712
*
713
* UFS does not pay attention to IO_SYNC for reads.
714
*
715
* ZFS pays attention to IO_SYNC (which translates into the
716
* Solaris define FRSYNC for zfs_read()) for reads. It
717
* attempts to sync the file before reading.
718
*/
719
error = VOP_READ(be_lun->vn, &xuio, flags, file_data->cred);
720
721
VOP_UNLOCK(be_lun->vn);
722
SDT_PROBE0(cbb, , read, file_done);
723
if (error == 0 && xuio.uio_resid > 0) {
724
/*
725
* If we read less then requested (EOF), then
726
* we should clean the rest of the buffer.
727
*/
728
s = beio->io_len - xuio.uio_resid;
729
for (i = 0; i < beio->num_segs; i++) {
730
if (s >= beio->sg_segs[i].len) {
731
s -= beio->sg_segs[i].len;
732
continue;
733
}
734
bzero((uint8_t *)beio->sg_segs[i].addr + s,
735
beio->sg_segs[i].len - s);
736
s = 0;
737
}
738
}
739
} else {
740
struct mount *mountpoint;
741
742
(void)vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
743
vn_lock(be_lun->vn, vn_lktype_write(mountpoint,
744
be_lun->vn) | LK_RETRY);
745
746
/*
747
* UFS pays attention to IO_DIRECT for writes. The write
748
* is done asynchronously. (Normally the write would just
749
* get put into cache.
750
*
751
* UFS pays attention to IO_SYNC for writes. It will
752
* attempt to write the buffer out synchronously if that
753
* flag is set.
754
*
755
* ZFS does not pay attention to IO_DIRECT for writes.
756
*
757
* ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC)
758
* for writes. It will flush the transaction from the
759
* cache before returning.
760
*/
761
error = VOP_WRITE(be_lun->vn, &xuio, flags, file_data->cred);
762
VOP_UNLOCK(be_lun->vn);
763
764
vn_finished_write(mountpoint);
765
SDT_PROBE0(cbb, , write, file_done);
766
}
767
768
mtx_lock(&be_lun->io_lock);
769
devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
770
beio->ds_tag_type, beio->ds_trans_type,
771
/*now*/ NULL, /*then*/&beio->ds_t0);
772
mtx_unlock(&be_lun->io_lock);
773
774
/*
775
* If we got an error, set the sense data to "MEDIUM ERROR" and
776
* return the I/O to the user.
777
*/
778
if (error != 0) {
779
if (error == ENOSPC || error == EDQUOT) {
780
ctl_io_set_space_alloc_fail(io);
781
} else if (error == EROFS || error == EACCES) {
782
ctl_io_set_hw_write_protected(io);
783
} else {
784
ctl_be_block_io_error(io, beio->bio_cmd, 0);
785
}
786
ctl_complete_beio(beio);
787
return;
788
}
789
790
/*
791
* If this is a write or a verify, we're all done.
792
* If this is a read, we can now send the data to the user.
793
*/
794
if ((beio->bio_cmd == BIO_WRITE) ||
795
(ARGS(io)->flags & CTL_LLF_VERIFY)) {
796
ctl_io_set_success(io);
797
ctl_complete_beio(beio);
798
} else {
799
if ((ARGS(io)->flags & CTL_LLF_READ) &&
800
beio->beio_cont == NULL) {
801
ctl_io_set_success(io);
802
if (cbe_lun->serseq > CTL_LUN_SERSEQ_SOFT)
803
ctl_serseq_done(io);
804
}
805
ctl_datamove(io);
806
}
807
}
808
809
static void
810
ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun,
811
struct ctl_be_block_io *beio)
812
{
813
union ctl_io *io = beio->io;
814
struct ctl_lba_len_flags *lbalen = ARGS(io);
815
struct scsi_get_lba_status_data *data;
816
off_t roff, off;
817
int error, status;
818
819
DPRINTF("entered\n");
820
821
CTL_IO_ASSERT(io, SCSI);
822
823
off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize;
824
vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
825
error = VOP_IOCTL(be_lun->vn, FIOSEEKHOLE, &off,
826
0, curthread->td_ucred, curthread);
827
if (error == 0 && off > roff)
828
status = 0; /* mapped up to off */
829
else {
830
error = VOP_IOCTL(be_lun->vn, FIOSEEKDATA, &off,
831
0, curthread->td_ucred, curthread);
832
if (error == 0 && off > roff)
833
status = 1; /* deallocated up to off */
834
else {
835
status = 0; /* unknown up to the end */
836
off = be_lun->size_bytes;
837
}
838
}
839
VOP_UNLOCK(be_lun->vn);
840
841
data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr;
842
scsi_u64to8b(lbalen->lba, data->descr[0].addr);
843
scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize -
844
lbalen->lba), data->descr[0].length);
845
data->descr[0].status = status;
846
847
ctl_complete_beio(beio);
848
}
849
850
static uint64_t
851
ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, const char *attrname)
852
{
853
struct vattr vattr;
854
struct statfs statfs;
855
uint64_t val;
856
int error;
857
858
val = UINT64_MAX;
859
if (be_lun->vn == NULL)
860
return (val);
861
vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
862
if (strcmp(attrname, "blocksused") == 0) {
863
error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
864
if (error == 0)
865
val = vattr.va_bytes / be_lun->cbe_lun.blocksize;
866
}
867
if (strcmp(attrname, "blocksavail") == 0 &&
868
!VN_IS_DOOMED(be_lun->vn)) {
869
error = VFS_STATFS(be_lun->vn->v_mount, &statfs);
870
if (error == 0)
871
val = statfs.f_bavail * statfs.f_bsize /
872
be_lun->cbe_lun.blocksize;
873
}
874
VOP_UNLOCK(be_lun->vn);
875
return (val);
876
}
877
878
static void
879
ctl_be_block_unmap_file(struct ctl_be_block_lun *be_lun,
880
struct ctl_be_block_io *beio)
881
{
882
struct ctl_be_block_filedata *file_data;
883
union ctl_io *io;
884
struct ctl_ptr_len_flags *ptrlen;
885
struct scsi_unmap_desc *buf, *end;
886
struct mount *mp;
887
off_t off, len;
888
int error;
889
890
io = beio->io;
891
file_data = &be_lun->backend.file;
892
mp = NULL;
893
error = 0;
894
895
binuptime(&beio->ds_t0);
896
devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
897
898
(void)vn_start_write(be_lun->vn, &mp, V_WAIT);
899
vn_lock(be_lun->vn, vn_lktype_write(mp, be_lun->vn) | LK_RETRY);
900
if (beio->io_offset == -1) {
901
beio->io_len = 0;
902
ptrlen = (struct ctl_ptr_len_flags *)
903
&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
904
buf = (struct scsi_unmap_desc *)ptrlen->ptr;
905
end = buf + ptrlen->len / sizeof(*buf);
906
for (; buf < end; buf++) {
907
off = (off_t)scsi_8btou64(buf->lba) *
908
be_lun->cbe_lun.blocksize;
909
len = (off_t)scsi_4btoul(buf->length) *
910
be_lun->cbe_lun.blocksize;
911
beio->io_len += len;
912
error = vn_deallocate(be_lun->vn, &off, &len,
913
0, IO_NOMACCHECK | IO_NODELOCKED, file_data->cred,
914
NOCRED);
915
if (error != 0)
916
break;
917
}
918
} else {
919
/* WRITE_SAME */
920
off = beio->io_offset;
921
len = beio->io_len;
922
error = vn_deallocate(be_lun->vn, &off, &len, 0,
923
IO_NOMACCHECK | IO_NODELOCKED, file_data->cred, NOCRED);
924
}
925
VOP_UNLOCK(be_lun->vn);
926
vn_finished_write(mp);
927
928
mtx_lock(&be_lun->io_lock);
929
devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
930
beio->ds_tag_type, beio->ds_trans_type,
931
/*now*/ NULL, /*then*/&beio->ds_t0);
932
mtx_unlock(&be_lun->io_lock);
933
934
/*
935
* If we got an error, set the sense data to "MEDIUM ERROR" and
936
* return the I/O to the user.
937
*/
938
switch (error) {
939
case 0:
940
ctl_io_set_success(io);
941
break;
942
case ENOSPC:
943
case EDQUOT:
944
ctl_io_set_space_alloc_fail(io);
945
break;
946
case EROFS:
947
case EACCES:
948
ctl_io_set_hw_write_protected(io);
949
break;
950
default:
951
ctl_be_block_io_error(io, BIO_DELETE, 0);
952
}
953
ctl_complete_beio(beio);
954
}
955
956
static void
957
ctl_be_block_dispatch_zvol(struct ctl_be_block_lun *be_lun,
958
struct ctl_be_block_io *beio)
959
{
960
struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
961
union ctl_io *io;
962
struct cdevsw *csw;
963
struct cdev *dev;
964
struct uio xuio;
965
struct iovec *xiovec;
966
int error, flags, i, ref;
967
968
DPRINTF("entered\n");
969
970
io = beio->io;
971
flags = 0;
972
if (ARGS(io)->flags & CTL_LLF_DPO)
973
flags |= IO_DIRECT;
974
if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA)
975
flags |= IO_SYNC;
976
977
bzero(&xuio, sizeof(xuio));
978
if (beio->bio_cmd == BIO_READ) {
979
SDT_PROBE0(cbb, , read, file_start);
980
xuio.uio_rw = UIO_READ;
981
} else {
982
SDT_PROBE0(cbb, , write, file_start);
983
xuio.uio_rw = UIO_WRITE;
984
}
985
xuio.uio_offset = beio->io_offset;
986
xuio.uio_resid = beio->io_len;
987
xuio.uio_segflg = UIO_SYSSPACE;
988
xuio.uio_iov = beio->xiovecs;
989
xuio.uio_iovcnt = beio->num_segs;
990
xuio.uio_td = curthread;
991
992
for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) {
993
xiovec->iov_base = beio->sg_segs[i].addr;
994
xiovec->iov_len = beio->sg_segs[i].len;
995
}
996
997
binuptime(&beio->ds_t0);
998
devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
999
1000
csw = devvn_refthread(be_lun->vn, &dev, &ref);
1001
if (csw) {
1002
if (beio->bio_cmd == BIO_READ) {
1003
if (beio->beio_cont == NULL &&
1004
cbe_lun->serseq == CTL_LUN_SERSEQ_SOFT)
1005
ctl_serseq_done(io);
1006
error = csw->d_read(dev, &xuio, flags);
1007
} else
1008
error = csw->d_write(dev, &xuio, flags);
1009
dev_relthread(dev, ref);
1010
} else
1011
error = ENXIO;
1012
1013
if (beio->bio_cmd == BIO_READ)
1014
SDT_PROBE0(cbb, , read, file_done);
1015
else
1016
SDT_PROBE0(cbb, , write, file_done);
1017
1018
mtx_lock(&be_lun->io_lock);
1019
devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
1020
beio->ds_tag_type, beio->ds_trans_type,
1021
/*now*/ NULL, /*then*/&beio->ds_t0);
1022
mtx_unlock(&be_lun->io_lock);
1023
1024
/*
1025
* If we got an error, set the sense data to "MEDIUM ERROR" and
1026
* return the I/O to the user.
1027
*/
1028
if (error != 0) {
1029
if (error == ENOSPC || error == EDQUOT) {
1030
ctl_io_set_space_alloc_fail(io);
1031
} else if (error == EROFS || error == EACCES) {
1032
ctl_io_set_hw_write_protected(io);
1033
} else {
1034
ctl_be_block_io_error(io, beio->bio_cmd, 0);
1035
}
1036
ctl_complete_beio(beio);
1037
return;
1038
}
1039
1040
/*
1041
* If this is a write or a verify, we're all done.
1042
* If this is a read, we can now send the data to the user.
1043
*/
1044
if ((beio->bio_cmd == BIO_WRITE) ||
1045
(ARGS(io)->flags & CTL_LLF_VERIFY)) {
1046
ctl_io_set_success(io);
1047
ctl_complete_beio(beio);
1048
} else {
1049
if ((ARGS(io)->flags & CTL_LLF_READ) &&
1050
beio->beio_cont == NULL) {
1051
ctl_io_set_success(io);
1052
if (cbe_lun->serseq > CTL_LUN_SERSEQ_SOFT)
1053
ctl_serseq_done(io);
1054
}
1055
ctl_datamove(io);
1056
}
1057
}
1058
1059
static void
1060
ctl_be_block_gls_zvol(struct ctl_be_block_lun *be_lun,
1061
struct ctl_be_block_io *beio)
1062
{
1063
union ctl_io *io = beio->io;
1064
struct cdevsw *csw;
1065
struct cdev *dev;
1066
struct ctl_lba_len_flags *lbalen = ARGS(io);
1067
struct scsi_get_lba_status_data *data;
1068
off_t roff, off;
1069
int error, ref, status;
1070
1071
DPRINTF("entered\n");
1072
1073
CTL_IO_ASSERT(io, SCSI);
1074
1075
csw = devvn_refthread(be_lun->vn, &dev, &ref);
1076
if (csw == NULL) {
1077
status = 0; /* unknown up to the end */
1078
off = be_lun->size_bytes;
1079
goto done;
1080
}
1081
off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize;
1082
error = csw->d_ioctl(dev, FIOSEEKHOLE, (caddr_t)&off, FREAD,
1083
curthread);
1084
if (error == 0 && off > roff)
1085
status = 0; /* mapped up to off */
1086
else {
1087
error = csw->d_ioctl(dev, FIOSEEKDATA, (caddr_t)&off, FREAD,
1088
curthread);
1089
if (error == 0 && off > roff)
1090
status = 1; /* deallocated up to off */
1091
else {
1092
status = 0; /* unknown up to the end */
1093
off = be_lun->size_bytes;
1094
}
1095
}
1096
dev_relthread(dev, ref);
1097
1098
done:
1099
data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr;
1100
scsi_u64to8b(lbalen->lba, data->descr[0].addr);
1101
scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize -
1102
lbalen->lba), data->descr[0].length);
1103
data->descr[0].status = status;
1104
1105
ctl_complete_beio(beio);
1106
}
1107
1108
static void
1109
ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
1110
struct ctl_be_block_io *beio)
1111
{
1112
struct bio *bio;
1113
struct cdevsw *csw;
1114
struct cdev *dev;
1115
int ref;
1116
1117
DPRINTF("entered\n");
1118
1119
/* This can't fail, it's a blocking allocation. */
1120
bio = g_alloc_bio();
1121
1122
bio->bio_cmd = BIO_FLUSH;
1123
bio->bio_offset = 0;
1124
bio->bio_data = 0;
1125
bio->bio_done = ctl_be_block_biodone;
1126
bio->bio_caller1 = beio;
1127
bio->bio_pblkno = 0;
1128
1129
/*
1130
* We don't need to acquire the LUN lock here, because we are only
1131
* sending one bio, and so there is no other context to synchronize
1132
* with.
1133
*/
1134
beio->num_bios_sent = 1;
1135
beio->send_complete = 1;
1136
1137
binuptime(&beio->ds_t0);
1138
devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1139
1140
csw = devvn_refthread(be_lun->vn, &dev, &ref);
1141
if (csw) {
1142
bio->bio_dev = dev;
1143
csw->d_strategy(bio);
1144
dev_relthread(dev, ref);
1145
} else {
1146
bio->bio_error = ENXIO;
1147
ctl_be_block_biodone(bio);
1148
}
1149
}
1150
1151
static void
1152
ctl_be_block_unmap_dev_range(struct ctl_be_block_lun *be_lun,
1153
struct ctl_be_block_io *beio,
1154
uint64_t off, uint64_t len, int last)
1155
{
1156
struct bio *bio;
1157
uint64_t maxlen;
1158
struct cdevsw *csw;
1159
struct cdev *dev;
1160
int ref;
1161
1162
csw = devvn_refthread(be_lun->vn, &dev, &ref);
1163
maxlen = LONG_MAX - (LONG_MAX % be_lun->cbe_lun.blocksize);
1164
while (len > 0) {
1165
bio = g_alloc_bio();
1166
bio->bio_cmd = BIO_DELETE;
1167
bio->bio_dev = dev;
1168
bio->bio_offset = off;
1169
bio->bio_length = MIN(len, maxlen);
1170
bio->bio_data = 0;
1171
bio->bio_done = ctl_be_block_biodone;
1172
bio->bio_caller1 = beio;
1173
bio->bio_pblkno = off / be_lun->cbe_lun.blocksize;
1174
1175
off += bio->bio_length;
1176
len -= bio->bio_length;
1177
1178
mtx_lock(&be_lun->io_lock);
1179
beio->num_bios_sent++;
1180
if (last && len == 0)
1181
beio->send_complete = 1;
1182
mtx_unlock(&be_lun->io_lock);
1183
1184
if (csw) {
1185
csw->d_strategy(bio);
1186
} else {
1187
bio->bio_error = ENXIO;
1188
ctl_be_block_biodone(bio);
1189
}
1190
}
1191
if (csw)
1192
dev_relthread(dev, ref);
1193
}
1194
1195
static void
1196
ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
1197
struct ctl_be_block_io *beio)
1198
{
1199
union ctl_io *io;
1200
struct ctl_ptr_len_flags *ptrlen;
1201
struct scsi_unmap_desc *buf, *end;
1202
uint64_t len;
1203
1204
io = beio->io;
1205
1206
DPRINTF("entered\n");
1207
1208
binuptime(&beio->ds_t0);
1209
devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1210
1211
if (beio->io_offset == -1) {
1212
beio->io_len = 0;
1213
ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1214
buf = (struct scsi_unmap_desc *)ptrlen->ptr;
1215
end = buf + ptrlen->len / sizeof(*buf);
1216
for (; buf < end; buf++) {
1217
len = (uint64_t)scsi_4btoul(buf->length) *
1218
be_lun->cbe_lun.blocksize;
1219
beio->io_len += len;
1220
ctl_be_block_unmap_dev_range(be_lun, beio,
1221
scsi_8btou64(buf->lba) * be_lun->cbe_lun.blocksize,
1222
len, (end - buf < 2) ? TRUE : FALSE);
1223
}
1224
} else
1225
ctl_be_block_unmap_dev_range(be_lun, beio,
1226
beio->io_offset, beio->io_len, TRUE);
1227
}
1228
1229
static void
1230
ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
1231
struct ctl_be_block_io *beio)
1232
{
1233
TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
1234
struct bio *bio;
1235
struct cdevsw *csw;
1236
struct cdev *dev;
1237
off_t cur_offset;
1238
int i, max_iosize, ref;
1239
1240
DPRINTF("entered\n");
1241
csw = devvn_refthread(be_lun->vn, &dev, &ref);
1242
1243
/*
1244
* We have to limit our I/O size to the maximum supported by the
1245
* backend device.
1246
*/
1247
if (csw) {
1248
max_iosize = dev->si_iosize_max;
1249
if (max_iosize <= 0)
1250
max_iosize = DFLTPHYS;
1251
} else
1252
max_iosize = maxphys;
1253
1254
cur_offset = beio->io_offset;
1255
for (i = 0; i < beio->num_segs; i++) {
1256
size_t cur_size;
1257
uint8_t *cur_ptr;
1258
1259
cur_size = beio->sg_segs[i].len;
1260
cur_ptr = beio->sg_segs[i].addr;
1261
1262
while (cur_size > 0) {
1263
/* This can't fail, it's a blocking allocation. */
1264
bio = g_alloc_bio();
1265
1266
KASSERT(bio != NULL, ("g_alloc_bio() failed!\n"));
1267
1268
bio->bio_cmd = beio->bio_cmd;
1269
bio->bio_dev = dev;
1270
bio->bio_caller1 = beio;
1271
bio->bio_length = min(cur_size, max_iosize);
1272
bio->bio_offset = cur_offset;
1273
bio->bio_data = cur_ptr;
1274
bio->bio_done = ctl_be_block_biodone;
1275
bio->bio_pblkno = cur_offset / be_lun->cbe_lun.blocksize;
1276
1277
cur_offset += bio->bio_length;
1278
cur_ptr += bio->bio_length;
1279
cur_size -= bio->bio_length;
1280
1281
TAILQ_INSERT_TAIL(&queue, bio, bio_queue);
1282
beio->num_bios_sent++;
1283
}
1284
}
1285
beio->send_complete = 1;
1286
binuptime(&beio->ds_t0);
1287
devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1288
1289
/*
1290
* Fire off all allocated requests!
1291
*/
1292
while ((bio = TAILQ_FIRST(&queue)) != NULL) {
1293
TAILQ_REMOVE(&queue, bio, bio_queue);
1294
if (csw)
1295
csw->d_strategy(bio);
1296
else {
1297
bio->bio_error = ENXIO;
1298
ctl_be_block_biodone(bio);
1299
}
1300
}
1301
if (csw)
1302
dev_relthread(dev, ref);
1303
}
1304
1305
static uint64_t
1306
ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, const char *attrname)
1307
{
1308
struct diocgattr_arg arg;
1309
struct cdevsw *csw;
1310
struct cdev *dev;
1311
int error, ref;
1312
1313
csw = devvn_refthread(be_lun->vn, &dev, &ref);
1314
if (csw == NULL)
1315
return (UINT64_MAX);
1316
strlcpy(arg.name, attrname, sizeof(arg.name));
1317
arg.len = sizeof(arg.value.off);
1318
if (csw->d_ioctl) {
1319
error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD,
1320
curthread);
1321
} else
1322
error = ENODEV;
1323
dev_relthread(dev, ref);
1324
if (error != 0)
1325
return (UINT64_MAX);
1326
return (arg.value.off);
1327
}
1328
1329
static void
1330
ctl_be_block_namespace_data(struct ctl_be_block_lun *be_lun,
1331
union ctl_io *io)
1332
{
1333
struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1334
struct nvme_namespace_data *nsdata;
1335
1336
nsdata = (struct nvme_namespace_data *)io->nvmeio.kern_data_ptr;
1337
memset(nsdata, 0, sizeof(*nsdata));
1338
nsdata->nsze = htole64(be_lun->size_blocks);
1339
nsdata->ncap = nsdata->nsze;
1340
nsdata->nuse = nsdata->nsze;
1341
nsdata->nlbaf = 1 - 1;
1342
nsdata->dlfeat = NVMEM(NVME_NS_DATA_DLFEAT_DWZ) |
1343
NVMEF(NVME_NS_DATA_DLFEAT_READ, NVME_NS_DATA_DLFEAT_READ_00);
1344
nsdata->flbas = NVMEF(NVME_NS_DATA_FLBAS_FORMAT, 0);
1345
nsdata->lbaf[0] = NVMEF(NVME_NS_DATA_LBAF_LBADS,
1346
ffs(cbe_lun->blocksize) - 1);
1347
1348
ctl_lun_nsdata_ids(cbe_lun, nsdata);
1349
ctl_config_read_done(io);
1350
}
1351
1352
static void
1353
ctl_be_block_nvme_ids(struct ctl_be_block_lun *be_lun,
1354
union ctl_io *io)
1355
{
1356
struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1357
1358
ctl_lun_nvme_ids(cbe_lun, io->nvmeio.kern_data_ptr);
1359
ctl_config_read_done(io);
1360
}
1361
1362
static void
1363
ctl_be_block_cw_dispatch_sync(struct ctl_be_block_lun *be_lun,
1364
union ctl_io *io)
1365
{
1366
struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1367
struct ctl_be_block_io *beio;
1368
struct ctl_lba_len_flags *lbalen;
1369
1370
DPRINTF("entered\n");
1371
beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1372
lbalen = (struct ctl_lba_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1373
1374
beio->io_len = lbalen->len * cbe_lun->blocksize;
1375
beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1376
beio->io_arg = (lbalen->flags & SSC_IMMED) != 0;
1377
beio->bio_cmd = BIO_FLUSH;
1378
beio->ds_trans_type = DEVSTAT_NO_DATA;
1379
DPRINTF("SYNC\n");
1380
be_lun->lun_flush(be_lun, beio);
1381
}
1382
1383
static void
1384
ctl_be_block_cw_done_ws(struct ctl_be_block_io *beio)
1385
{
1386
union ctl_io *io;
1387
1388
io = beio->io;
1389
ctl_free_beio(beio);
1390
if ((io->io_hdr.flags & CTL_FLAG_ABORT) ||
1391
((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
1392
(io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
1393
ctl_config_write_done(io);
1394
return;
1395
}
1396
1397
ctl_be_block_config_write(io);
1398
}
1399
1400
static void
1401
ctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun,
1402
union ctl_io *io)
1403
{
1404
struct ctl_be_block_softc *softc = be_lun->softc;
1405
struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1406
struct ctl_be_block_io *beio;
1407
struct ctl_lba_len_flags *lbalen;
1408
uint64_t len_left, lba;
1409
uint32_t pb, pbo, adj;
1410
int i, seglen;
1411
uint8_t *buf, *end;
1412
1413
DPRINTF("entered\n");
1414
1415
CTL_IO_ASSERT(io, SCSI);
1416
1417
beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1418
lbalen = ARGS(io);
1419
1420
if (lbalen->flags & ~(SWS_LBDATA | SWS_UNMAP | SWS_ANCHOR | SWS_NDOB) ||
1421
(lbalen->flags & (SWS_UNMAP | SWS_ANCHOR) && be_lun->unmap == NULL)) {
1422
ctl_free_beio(beio);
1423
ctl_set_invalid_field(&io->scsiio,
1424
/*sks_valid*/ 1,
1425
/*command*/ 1,
1426
/*field*/ 1,
1427
/*bit_valid*/ 0,
1428
/*bit*/ 0);
1429
ctl_config_write_done(io);
1430
return;
1431
}
1432
1433
if (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR)) {
1434
beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1435
beio->io_len = (uint64_t)lbalen->len * cbe_lun->blocksize;
1436
beio->bio_cmd = BIO_DELETE;
1437
beio->ds_trans_type = DEVSTAT_FREE;
1438
1439
be_lun->unmap(be_lun, beio);
1440
return;
1441
}
1442
1443
beio->bio_cmd = BIO_WRITE;
1444
beio->ds_trans_type = DEVSTAT_WRITE;
1445
1446
DPRINTF("WRITE SAME at LBA %jx len %u\n",
1447
(uintmax_t)lbalen->lba, lbalen->len);
1448
1449
pb = cbe_lun->blocksize << be_lun->cbe_lun.pblockexp;
1450
if (be_lun->cbe_lun.pblockoff > 0)
1451
pbo = pb - cbe_lun->blocksize * be_lun->cbe_lun.pblockoff;
1452
else
1453
pbo = 0;
1454
len_left = (uint64_t)lbalen->len * cbe_lun->blocksize;
1455
for (i = 0, lba = 0; i < CTLBLK_MAX_SEGS && len_left > 0; i++) {
1456
/*
1457
* Setup the S/G entry for this chunk.
1458
*/
1459
seglen = MIN(CTLBLK_MAX_SEG, len_left);
1460
if (pb > cbe_lun->blocksize) {
1461
adj = ((lbalen->lba + lba) * cbe_lun->blocksize +
1462
seglen - pbo) % pb;
1463
if (seglen > adj)
1464
seglen -= adj;
1465
else
1466
seglen -= seglen % cbe_lun->blocksize;
1467
} else
1468
seglen -= seglen % cbe_lun->blocksize;
1469
ctl_alloc_seg(softc, &beio->sg_segs[i], seglen);
1470
1471
DPRINTF("segment %d addr %p len %zd\n", i,
1472
beio->sg_segs[i].addr, beio->sg_segs[i].len);
1473
1474
beio->num_segs++;
1475
len_left -= seglen;
1476
1477
buf = beio->sg_segs[i].addr;
1478
end = buf + seglen;
1479
for (; buf < end; buf += cbe_lun->blocksize) {
1480
if (lbalen->flags & SWS_NDOB) {
1481
memset(buf, 0, cbe_lun->blocksize);
1482
} else {
1483
memcpy(buf, io->scsiio.kern_data_ptr,
1484
cbe_lun->blocksize);
1485
}
1486
if (lbalen->flags & SWS_LBDATA)
1487
scsi_ulto4b(lbalen->lba + lba, buf);
1488
lba++;
1489
}
1490
}
1491
1492
beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1493
beio->io_len = lba * cbe_lun->blocksize;
1494
1495
/* We can not do all in one run. Correct and schedule rerun. */
1496
if (len_left > 0) {
1497
lbalen->lba += lba;
1498
lbalen->len -= lba;
1499
beio->beio_cont = ctl_be_block_cw_done_ws;
1500
}
1501
1502
be_lun->dispatch(be_lun, beio);
1503
}
1504
1505
static void
1506
ctl_be_block_cw_dispatch_unmap(struct ctl_be_block_lun *be_lun,
1507
union ctl_io *io)
1508
{
1509
struct ctl_be_block_io *beio;
1510
struct ctl_ptr_len_flags *ptrlen;
1511
1512
DPRINTF("entered\n");
1513
1514
CTL_IO_ASSERT(io, SCSI);
1515
1516
beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1517
ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1518
1519
if ((ptrlen->flags & ~SU_ANCHOR) != 0 || be_lun->unmap == NULL) {
1520
ctl_free_beio(beio);
1521
ctl_set_invalid_field(&io->scsiio,
1522
/*sks_valid*/ 0,
1523
/*command*/ 1,
1524
/*field*/ 0,
1525
/*bit_valid*/ 0,
1526
/*bit*/ 0);
1527
ctl_config_write_done(io);
1528
return;
1529
}
1530
1531
beio->io_len = 0;
1532
beio->io_offset = -1;
1533
beio->bio_cmd = BIO_DELETE;
1534
beio->ds_trans_type = DEVSTAT_FREE;
1535
DPRINTF("UNMAP\n");
1536
be_lun->unmap(be_lun, beio);
1537
}
1538
1539
static void
1540
ctl_be_block_cw_dispatch_flush(struct ctl_be_block_lun *be_lun,
1541
union ctl_io *io)
1542
{
1543
struct ctl_be_block_io *beio;
1544
1545
DPRINTF("entered\n");
1546
beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1547
1548
beio->io_len = be_lun->size_bytes;
1549
beio->io_offset = 0;
1550
beio->io_arg = 1;
1551
beio->bio_cmd = BIO_FLUSH;
1552
beio->ds_trans_type = DEVSTAT_NO_DATA;
1553
DPRINTF("FLUSH\n");
1554
be_lun->lun_flush(be_lun, beio);
1555
}
1556
1557
static void
1558
ctl_be_block_cw_dispatch_wu(struct ctl_be_block_lun *be_lun,
1559
union ctl_io *io)
1560
{
1561
struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1562
struct ctl_be_block_io *beio;
1563
struct ctl_lba_len_flags *lbalen;
1564
1565
CTL_IO_ASSERT(io, NVME);
1566
1567
beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1568
lbalen = ARGS(io);
1569
1570
/*
1571
* XXX: Not quite right as reads will return zeroes rather
1572
* than failing.
1573
*/
1574
beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1575
beio->io_len = (uint64_t)lbalen->len * cbe_lun->blocksize;
1576
beio->bio_cmd = BIO_DELETE;
1577
beio->ds_trans_type = DEVSTAT_FREE;
1578
1579
be_lun->unmap(be_lun, beio);
1580
}
1581
1582
static void
1583
ctl_be_block_cw_dispatch_wz(struct ctl_be_block_lun *be_lun,
1584
union ctl_io *io)
1585
{
1586
struct ctl_be_block_softc *softc = be_lun->softc;
1587
struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1588
struct ctl_be_block_io *beio;
1589
struct ctl_lba_len_flags *lbalen;
1590
uint64_t len_left, lba;
1591
uint32_t pb, pbo, adj;
1592
int i, seglen;
1593
1594
DPRINTF("entered\n");
1595
1596
CTL_IO_ASSERT(io, NVME);
1597
1598
beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1599
lbalen = ARGS(io);
1600
1601
if ((le32toh(io->nvmeio.cmd.cdw12) & (1U << 25)) != 0 &&
1602
be_lun->unmap != NULL) {
1603
beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1604
beio->io_len = (uint64_t)lbalen->len * cbe_lun->blocksize;
1605
beio->bio_cmd = BIO_DELETE;
1606
beio->ds_trans_type = DEVSTAT_FREE;
1607
1608
be_lun->unmap(be_lun, beio);
1609
return;
1610
}
1611
1612
beio->bio_cmd = BIO_WRITE;
1613
beio->ds_trans_type = DEVSTAT_WRITE;
1614
1615
DPRINTF("WRITE ZEROES at LBA %jx len %u\n",
1616
(uintmax_t)lbalen->lba, lbalen->len);
1617
1618
pb = cbe_lun->blocksize << be_lun->cbe_lun.pblockexp;
1619
if (be_lun->cbe_lun.pblockoff > 0)
1620
pbo = pb - cbe_lun->blocksize * be_lun->cbe_lun.pblockoff;
1621
else
1622
pbo = 0;
1623
len_left = (uint64_t)lbalen->len * cbe_lun->blocksize;
1624
for (i = 0, lba = 0; i < CTLBLK_MAX_SEGS && len_left > 0; i++) {
1625
/*
1626
* Setup the S/G entry for this chunk.
1627
*/
1628
seglen = MIN(CTLBLK_MAX_SEG, len_left);
1629
if (pb > cbe_lun->blocksize) {
1630
adj = ((lbalen->lba + lba) * cbe_lun->blocksize +
1631
seglen - pbo) % pb;
1632
if (seglen > adj)
1633
seglen -= adj;
1634
else
1635
seglen -= seglen % cbe_lun->blocksize;
1636
} else
1637
seglen -= seglen % cbe_lun->blocksize;
1638
ctl_alloc_seg(softc, &beio->sg_segs[i], seglen);
1639
1640
DPRINTF("segment %d addr %p len %zd\n", i,
1641
beio->sg_segs[i].addr, beio->sg_segs[i].len);
1642
1643
beio->num_segs++;
1644
len_left -= seglen;
1645
1646
memset(beio->sg_segs[i].addr, 0, seglen);
1647
lba += seglen / cbe_lun->blocksize;
1648
}
1649
1650
beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1651
beio->io_len = lba * cbe_lun->blocksize;
1652
1653
/* We can not do all in one run. Correct and schedule rerun. */
1654
if (len_left > 0) {
1655
lbalen->lba += lba;
1656
lbalen->len -= lba;
1657
beio->beio_cont = ctl_be_block_cw_done_ws;
1658
}
1659
1660
be_lun->dispatch(be_lun, beio);
1661
}
1662
1663
static void
1664
ctl_be_block_cw_dispatch_dsm(struct ctl_be_block_lun *be_lun,
1665
union ctl_io *io)
1666
{
1667
struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1668
struct ctl_be_block_io *beio;
1669
struct nvme_dsm_range *r;
1670
uint64_t lba;
1671
uint32_t num_blocks;
1672
u_int i, ranges;
1673
1674
CTL_IO_ASSERT(io, NVME);
1675
1676
beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1677
1678
if (be_lun->unmap == NULL) {
1679
ctl_free_beio(beio);
1680
ctl_nvme_set_success(&io->nvmeio);
1681
ctl_config_write_done(io);
1682
return;
1683
}
1684
1685
ranges = le32toh(io->nvmeio.cmd.cdw10) & 0xff;
1686
r = (struct nvme_dsm_range *)io->nvmeio.kern_data_ptr;
1687
1688
/* Find the next range to delete. */
1689
for (i = DSM_RANGE(io); i < ranges; i++) {
1690
if ((le32toh(r[i].attributes) & (1U << 2)) != 0)
1691
break;
1692
}
1693
1694
/* If no range to delete, complete the operation. */
1695
if (i == ranges) {
1696
ctl_free_beio(beio);
1697
ctl_nvme_set_success(&io->nvmeio);
1698
ctl_config_write_done(io);
1699
return;
1700
}
1701
1702
/* If this is not the last range, request a rerun after this range. */
1703
if (i + 1 < ranges) {
1704
DSM_RANGE(io) = i + 1;
1705
beio->beio_cont = ctl_be_block_cw_done_ws;
1706
}
1707
1708
lba = le64toh(r[i].starting_lba);
1709
num_blocks = le32toh(r[i].length);
1710
1711
beio->io_offset = lba * cbe_lun->blocksize;
1712
beio->io_len = (uint64_t)num_blocks * cbe_lun->blocksize;
1713
beio->bio_cmd = BIO_DELETE;
1714
beio->ds_trans_type = DEVSTAT_FREE;
1715
1716
be_lun->unmap(be_lun, beio);
1717
}
1718
1719
static void
1720
ctl_be_block_scsi_cr_done(struct ctl_be_block_io *beio)
1721
{
1722
union ctl_io *io;
1723
1724
io = beio->io;
1725
ctl_free_beio(beio);
1726
ctl_config_read_done(io);
1727
}
1728
1729
static void
1730
ctl_be_block_scsi_cr_dispatch(struct ctl_be_block_lun *be_lun,
1731
union ctl_io *io)
1732
{
1733
struct ctl_be_block_io *beio;
1734
struct ctl_be_block_softc *softc;
1735
1736
DPRINTF("entered\n");
1737
1738
softc = be_lun->softc;
1739
beio = ctl_alloc_beio(softc);
1740
beio->io = io;
1741
beio->lun = be_lun;
1742
beio->beio_cont = ctl_be_block_scsi_cr_done;
1743
PRIV(io)->ptr = (void *)beio;
1744
1745
switch (io->scsiio.cdb[0]) {
1746
case SERVICE_ACTION_IN: /* GET LBA STATUS */
1747
beio->bio_cmd = -1;
1748
beio->ds_trans_type = DEVSTAT_NO_DATA;
1749
beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1750
beio->io_len = 0;
1751
if (be_lun->get_lba_status)
1752
be_lun->get_lba_status(be_lun, beio);
1753
else
1754
ctl_be_block_scsi_cr_done(beio);
1755
break;
1756
default:
1757
panic("Unhandled CDB type %#x", io->scsiio.cdb[0]);
1758
break;
1759
}
1760
}
1761
1762
static void
1763
ctl_be_block_nvme_cr_dispatch(struct ctl_be_block_lun *be_lun,
1764
union ctl_io *io)
1765
{
1766
uint8_t cns;
1767
1768
DPRINTF("entered\n");
1769
1770
MPASS(io->nvmeio.cmd.opc == NVME_OPC_IDENTIFY);
1771
1772
cns = le32toh(io->nvmeio.cmd.cdw10) & 0xff;
1773
switch (cns) {
1774
case 0:
1775
ctl_be_block_namespace_data(be_lun, io);
1776
break;
1777
case 3:
1778
ctl_be_block_nvme_ids(be_lun, io);
1779
break;
1780
default:
1781
__assert_unreachable();
1782
}
1783
}
1784
1785
static void
1786
ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun,
1787
union ctl_io *io)
1788
{
1789
switch (io->io_hdr.io_type) {
1790
case CTL_IO_SCSI:
1791
ctl_be_block_scsi_cr_dispatch(be_lun, io);
1792
break;
1793
case CTL_IO_NVME_ADMIN:
1794
ctl_be_block_nvme_cr_dispatch(be_lun, io);
1795
break;
1796
default:
1797
__assert_unreachable();
1798
}
1799
}
1800
1801
static void
1802
ctl_be_block_cw_done(struct ctl_be_block_io *beio)
1803
{
1804
union ctl_io *io;
1805
1806
io = beio->io;
1807
ctl_free_beio(beio);
1808
ctl_config_write_done(io);
1809
}
1810
1811
static void
1812
ctl_be_block_scsi_cw_dispatch(struct ctl_be_block_lun *be_lun,
1813
union ctl_io *io)
1814
{
1815
struct ctl_be_block_io *beio;
1816
1817
DPRINTF("entered\n");
1818
1819
beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1820
1821
switch (io->scsiio.tag_type) {
1822
case CTL_TAG_ORDERED:
1823
beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1824
break;
1825
case CTL_TAG_HEAD_OF_QUEUE:
1826
beio->ds_tag_type = DEVSTAT_TAG_HEAD;
1827
break;
1828
case CTL_TAG_UNTAGGED:
1829
case CTL_TAG_SIMPLE:
1830
case CTL_TAG_ACA:
1831
default:
1832
beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1833
break;
1834
}
1835
1836
switch (io->scsiio.cdb[0]) {
1837
case SYNCHRONIZE_CACHE:
1838
case SYNCHRONIZE_CACHE_16:
1839
ctl_be_block_cw_dispatch_sync(be_lun, io);
1840
break;
1841
case WRITE_SAME_10:
1842
case WRITE_SAME_16:
1843
ctl_be_block_cw_dispatch_ws(be_lun, io);
1844
break;
1845
case UNMAP:
1846
ctl_be_block_cw_dispatch_unmap(be_lun, io);
1847
break;
1848
default:
1849
panic("Unhandled CDB type %#x", io->scsiio.cdb[0]);
1850
break;
1851
}
1852
}
1853
1854
static void
1855
ctl_be_block_nvme_cw_dispatch(struct ctl_be_block_lun *be_lun,
1856
union ctl_io *io)
1857
{
1858
struct ctl_be_block_io *beio;
1859
1860
DPRINTF("entered\n");
1861
1862
beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1863
beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1864
1865
switch (io->nvmeio.cmd.opc) {
1866
case NVME_OPC_FLUSH:
1867
ctl_be_block_cw_dispatch_flush(be_lun, io);
1868
break;
1869
case NVME_OPC_WRITE_UNCORRECTABLE:
1870
ctl_be_block_cw_dispatch_wu(be_lun, io);
1871
break;
1872
case NVME_OPC_WRITE_ZEROES:
1873
ctl_be_block_cw_dispatch_wz(be_lun, io);
1874
break;
1875
case NVME_OPC_DATASET_MANAGEMENT:
1876
ctl_be_block_cw_dispatch_dsm(be_lun, io);
1877
break;
1878
default:
1879
__assert_unreachable();
1880
}
1881
}
1882
1883
static void
1884
ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
1885
union ctl_io *io)
1886
{
1887
struct ctl_be_block_io *beio;
1888
struct ctl_be_block_softc *softc;
1889
1890
softc = be_lun->softc;
1891
beio = ctl_alloc_beio(softc);
1892
beio->io = io;
1893
beio->lun = be_lun;
1894
beio->beio_cont = ctl_be_block_cw_done;
1895
PRIV(io)->ptr = (void *)beio;
1896
1897
switch (io->io_hdr.io_type) {
1898
case CTL_IO_SCSI:
1899
ctl_be_block_scsi_cw_dispatch(be_lun, io);
1900
break;
1901
case CTL_IO_NVME:
1902
ctl_be_block_nvme_cw_dispatch(be_lun, io);
1903
break;
1904
default:
1905
__assert_unreachable();
1906
}
1907
}
1908
1909
SDT_PROBE_DEFINE1(cbb, , read, start, "uint64_t");
1910
SDT_PROBE_DEFINE1(cbb, , write, start, "uint64_t");
1911
SDT_PROBE_DEFINE1(cbb, , read, alloc_done, "uint64_t");
1912
SDT_PROBE_DEFINE1(cbb, , write, alloc_done, "uint64_t");
1913
1914
static void
1915
ctl_be_block_next(struct ctl_be_block_io *beio)
1916
{
1917
struct ctl_be_block_lun *be_lun;
1918
union ctl_io *io;
1919
1920
io = beio->io;
1921
be_lun = beio->lun;
1922
ctl_free_beio(beio);
1923
if ((io->io_hdr.flags & CTL_FLAG_ABORT) ||
1924
((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
1925
(io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
1926
ctl_data_submit_done(io);
1927
return;
1928
}
1929
1930
io->io_hdr.status &= ~CTL_STATUS_MASK;
1931
io->io_hdr.status |= CTL_STATUS_NONE;
1932
1933
mtx_lock(&be_lun->queue_lock);
1934
STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
1935
mtx_unlock(&be_lun->queue_lock);
1936
taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
1937
}
1938
1939
static void
1940
ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
1941
union ctl_io *io)
1942
{
1943
struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1944
struct ctl_be_block_io *beio;
1945
struct ctl_be_block_softc *softc;
1946
struct ctl_lba_len_flags *lbalen;
1947
struct ctl_ptr_len_flags *bptrlen;
1948
uint64_t len_left, lbas;
1949
int i;
1950
1951
softc = be_lun->softc;
1952
1953
DPRINTF("entered\n");
1954
1955
lbalen = ARGS(io);
1956
if (lbalen->flags & CTL_LLF_WRITE) {
1957
SDT_PROBE0(cbb, , write, start);
1958
} else {
1959
SDT_PROBE0(cbb, , read, start);
1960
}
1961
1962
beio = ctl_alloc_beio(softc);
1963
beio->io = io;
1964
beio->lun = be_lun;
1965
bptrlen = PRIV(io);
1966
bptrlen->ptr = (void *)beio;
1967
1968
switch (io->io_hdr.io_type) {
1969
case CTL_IO_SCSI:
1970
switch (io->scsiio.tag_type) {
1971
case CTL_TAG_ORDERED:
1972
beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1973
break;
1974
case CTL_TAG_HEAD_OF_QUEUE:
1975
beio->ds_tag_type = DEVSTAT_TAG_HEAD;
1976
break;
1977
case CTL_TAG_UNTAGGED:
1978
case CTL_TAG_SIMPLE:
1979
case CTL_TAG_ACA:
1980
default:
1981
beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1982
break;
1983
}
1984
break;
1985
case CTL_IO_NVME:
1986
beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1987
break;
1988
default:
1989
__assert_unreachable();
1990
}
1991
1992
if (lbalen->flags & CTL_LLF_WRITE) {
1993
beio->bio_cmd = BIO_WRITE;
1994
beio->ds_trans_type = DEVSTAT_WRITE;
1995
} else {
1996
beio->bio_cmd = BIO_READ;
1997
beio->ds_trans_type = DEVSTAT_READ;
1998
}
1999
2000
DPRINTF("%s at LBA %jx len %u @%ju\n",
2001
(beio->bio_cmd == BIO_READ) ? "READ" : "WRITE",
2002
(uintmax_t)lbalen->lba, lbalen->len, bptrlen->len);
2003
lbas = CTLBLK_MAX_IO_SIZE;
2004
if (lbalen->flags & CTL_LLF_COMPARE) {
2005
beio->two_sglists = 1;
2006
lbas /= 2;
2007
}
2008
lbas = MIN(lbalen->len - bptrlen->len, lbas / cbe_lun->blocksize);
2009
beio->io_offset = (lbalen->lba + bptrlen->len) * cbe_lun->blocksize;
2010
beio->io_len = lbas * cbe_lun->blocksize;
2011
bptrlen->len += lbas;
2012
2013
for (i = 0, len_left = beio->io_len; len_left > 0; i++) {
2014
KASSERT(i < CTLBLK_MAX_SEGS, ("Too many segs (%d >= %d)",
2015
i, CTLBLK_MAX_SEGS));
2016
2017
/*
2018
* Setup the S/G entry for this chunk.
2019
*/
2020
ctl_alloc_seg(softc, &beio->sg_segs[i],
2021
MIN(CTLBLK_MAX_SEG, len_left));
2022
2023
DPRINTF("segment %d addr %p len %zd\n", i,
2024
beio->sg_segs[i].addr, beio->sg_segs[i].len);
2025
2026
/* Set up second segment for compare operation. */
2027
if (beio->two_sglists) {
2028
ctl_alloc_seg(softc,
2029
&beio->sg_segs[i + CTLBLK_HALF_SEGS],
2030
beio->sg_segs[i].len);
2031
}
2032
2033
beio->num_segs++;
2034
len_left -= beio->sg_segs[i].len;
2035
}
2036
if (bptrlen->len < lbalen->len)
2037
beio->beio_cont = ctl_be_block_next;
2038
ctl_set_be_move_done(io, ctl_be_block_move_done);
2039
/* For compare we have separate S/G lists for read and datamove. */
2040
if (beio->two_sglists)
2041
ctl_set_kern_data_ptr(io, &beio->sg_segs[CTLBLK_HALF_SEGS]);
2042
else
2043
ctl_set_kern_data_ptr(io, beio->sg_segs);
2044
ctl_set_kern_data_len(io, beio->io_len);
2045
ctl_set_kern_sg_entries(io, beio->num_segs);
2046
ctl_set_kern_data_ref(io, ctl_refcnt_beio);
2047
ctl_set_kern_data_arg(io, beio);
2048
io->io_hdr.flags |= CTL_FLAG_ALLOCATED;
2049
2050
/*
2051
* For the read case, we need to read the data into our buffers and
2052
* then we can send it back to the user. For the write case, we
2053
* need to get the data from the user first.
2054
*/
2055
if (beio->bio_cmd == BIO_READ) {
2056
SDT_PROBE0(cbb, , read, alloc_done);
2057
be_lun->dispatch(be_lun, beio);
2058
} else {
2059
SDT_PROBE0(cbb, , write, alloc_done);
2060
ctl_datamove(io);
2061
}
2062
}
2063
2064
static void
2065
ctl_be_block_worker(void *context, int pending)
2066
{
2067
struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)context;
2068
struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
2069
union ctl_io *io;
2070
struct ctl_be_block_io *beio;
2071
2072
DPRINTF("entered\n");
2073
/*
2074
* Fetch and process I/Os from all queues. If we detect LUN
2075
* CTL_LUN_FLAG_NO_MEDIA status here -- it is result of a race,
2076
* so make response maximally opaque to not confuse initiator.
2077
*/
2078
for (;;) {
2079
mtx_lock(&be_lun->queue_lock);
2080
io = (union ctl_io *)STAILQ_FIRST(&be_lun->datamove_queue);
2081
if (io != NULL) {
2082
DPRINTF("datamove queue\n");
2083
STAILQ_REMOVE_HEAD(&be_lun->datamove_queue, links);
2084
mtx_unlock(&be_lun->queue_lock);
2085
beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
2086
if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
2087
ctl_io_set_busy(io);
2088
ctl_complete_beio(beio);
2089
continue;
2090
}
2091
be_lun->dispatch(be_lun, beio);
2092
continue;
2093
}
2094
io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_write_queue);
2095
if (io != NULL) {
2096
DPRINTF("config write queue\n");
2097
STAILQ_REMOVE_HEAD(&be_lun->config_write_queue, links);
2098
mtx_unlock(&be_lun->queue_lock);
2099
if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
2100
ctl_io_set_busy(io);
2101
ctl_config_write_done(io);
2102
continue;
2103
}
2104
ctl_be_block_cw_dispatch(be_lun, io);
2105
continue;
2106
}
2107
io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_read_queue);
2108
if (io != NULL) {
2109
DPRINTF("config read queue\n");
2110
STAILQ_REMOVE_HEAD(&be_lun->config_read_queue, links);
2111
mtx_unlock(&be_lun->queue_lock);
2112
if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
2113
ctl_io_set_busy(io);
2114
ctl_config_read_done(io);
2115
continue;
2116
}
2117
ctl_be_block_cr_dispatch(be_lun, io);
2118
continue;
2119
}
2120
io = (union ctl_io *)STAILQ_FIRST(&be_lun->input_queue);
2121
if (io != NULL) {
2122
DPRINTF("input queue\n");
2123
STAILQ_REMOVE_HEAD(&be_lun->input_queue, links);
2124
mtx_unlock(&be_lun->queue_lock);
2125
if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
2126
ctl_io_set_busy(io);
2127
ctl_data_submit_done(io);
2128
continue;
2129
}
2130
ctl_be_block_dispatch(be_lun, io);
2131
continue;
2132
}
2133
2134
/*
2135
* If we get here, there is no work left in the queues, so
2136
* just break out and let the task queue go to sleep.
2137
*/
2138
mtx_unlock(&be_lun->queue_lock);
2139
break;
2140
}
2141
}
2142
2143
/*
2144
* Entry point from CTL to the backend for I/O. We queue everything to a
2145
* work thread, so this just puts the I/O on a queue and wakes up the
2146
* thread.
2147
*/
2148
static int
2149
ctl_be_block_submit(union ctl_io *io)
2150
{
2151
struct ctl_be_block_lun *be_lun;
2152
2153
DPRINTF("entered\n");
2154
2155
be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io);
2156
2157
CTL_IO_ASSERT(io, SCSI, NVME);
2158
2159
PRIV(io)->len = 0;
2160
2161
mtx_lock(&be_lun->queue_lock);
2162
STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
2163
mtx_unlock(&be_lun->queue_lock);
2164
taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
2165
2166
return (CTL_RETVAL_COMPLETE);
2167
}
2168
2169
static int
2170
ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
2171
int flag, struct thread *td)
2172
{
2173
struct ctl_be_block_softc *softc = &backend_block_softc;
2174
int error;
2175
2176
error = 0;
2177
switch (cmd) {
2178
case CTL_LUN_REQ: {
2179
struct ctl_lun_req *lun_req;
2180
2181
lun_req = (struct ctl_lun_req *)addr;
2182
2183
switch (lun_req->reqtype) {
2184
case CTL_LUNREQ_CREATE:
2185
error = ctl_be_block_create(softc, lun_req);
2186
break;
2187
case CTL_LUNREQ_RM:
2188
error = ctl_be_block_rm(softc, lun_req);
2189
break;
2190
case CTL_LUNREQ_MODIFY:
2191
error = ctl_be_block_modify(softc, lun_req);
2192
break;
2193
default:
2194
lun_req->status = CTL_LUN_ERROR;
2195
snprintf(lun_req->error_str, sizeof(lun_req->error_str),
2196
"invalid LUN request type %d",
2197
lun_req->reqtype);
2198
break;
2199
}
2200
break;
2201
}
2202
default:
2203
error = ENOTTY;
2204
break;
2205
}
2206
2207
return (error);
2208
}
2209
2210
static int
2211
ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
2212
{
2213
struct ctl_be_lun *cbe_lun;
2214
struct ctl_be_block_filedata *file_data;
2215
struct ctl_lun_create_params *params;
2216
const char *value;
2217
struct vattr vattr;
2218
off_t ps, pss, po, pos, us, uss, uo, uos;
2219
int error;
2220
long pconf;
2221
2222
cbe_lun = &be_lun->cbe_lun;
2223
file_data = &be_lun->backend.file;
2224
params = &be_lun->params;
2225
2226
be_lun->dev_type = CTL_BE_BLOCK_FILE;
2227
be_lun->dispatch = ctl_be_block_dispatch_file;
2228
be_lun->lun_flush = ctl_be_block_flush_file;
2229
be_lun->get_lba_status = ctl_be_block_gls_file;
2230
be_lun->getattr = ctl_be_block_getattr_file;
2231
be_lun->unmap = ctl_be_block_unmap_file;
2232
cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP;
2233
2234
error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
2235
if (error != 0) {
2236
snprintf(req->error_str, sizeof(req->error_str),
2237
"error calling VOP_GETATTR() for file %s",
2238
be_lun->dev_path);
2239
return (error);
2240
}
2241
2242
error = VOP_PATHCONF(be_lun->vn, _PC_DEALLOC_PRESENT, &pconf);
2243
if (error != 0) {
2244
snprintf(req->error_str, sizeof(req->error_str),
2245
"error calling VOP_PATHCONF() for file %s",
2246
be_lun->dev_path);
2247
return (error);
2248
}
2249
if (pconf == 1)
2250
cbe_lun->flags |= CTL_LUN_FLAG_UNMAP;
2251
2252
file_data->cred = crhold(curthread->td_ucred);
2253
if (params->lun_size_bytes != 0)
2254
be_lun->size_bytes = params->lun_size_bytes;
2255
else
2256
be_lun->size_bytes = vattr.va_size;
2257
2258
/*
2259
* For files we can use any logical block size. Prefer 512 bytes
2260
* for compatibility reasons. If file's vattr.va_blocksize
2261
* (preferred I/O block size) is bigger and multiple to chosen
2262
* logical block size -- report it as physical block size.
2263
*/
2264
if (params->blocksize_bytes != 0)
2265
cbe_lun->blocksize = params->blocksize_bytes;
2266
else if (cbe_lun->lun_type == T_CDROM)
2267
cbe_lun->blocksize = 2048;
2268
else
2269
cbe_lun->blocksize = 512;
2270
be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
2271
cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
2272
0 : (be_lun->size_blocks - 1);
2273
2274
us = ps = vattr.va_blocksize;
2275
uo = po = 0;
2276
2277
value = dnvlist_get_string(cbe_lun->options, "pblocksize", NULL);
2278
if (value != NULL)
2279
ctl_expand_number(value, &ps);
2280
value = dnvlist_get_string(cbe_lun->options, "pblockoffset", NULL);
2281
if (value != NULL)
2282
ctl_expand_number(value, &po);
2283
pss = ps / cbe_lun->blocksize;
2284
pos = po / cbe_lun->blocksize;
2285
if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) &&
2286
((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) {
2287
cbe_lun->pblockexp = fls(pss) - 1;
2288
cbe_lun->pblockoff = (pss - pos) % pss;
2289
}
2290
2291
value = dnvlist_get_string(cbe_lun->options, "ublocksize", NULL);
2292
if (value != NULL)
2293
ctl_expand_number(value, &us);
2294
value = dnvlist_get_string(cbe_lun->options, "ublockoffset", NULL);
2295
if (value != NULL)
2296
ctl_expand_number(value, &uo);
2297
uss = us / cbe_lun->blocksize;
2298
uos = uo / cbe_lun->blocksize;
2299
if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) &&
2300
((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) {
2301
cbe_lun->ublockexp = fls(uss) - 1;
2302
cbe_lun->ublockoff = (uss - uos) % uss;
2303
}
2304
2305
/*
2306
* Sanity check. The media size has to be at least one
2307
* sector long.
2308
*/
2309
if (be_lun->size_bytes < cbe_lun->blocksize) {
2310
error = EINVAL;
2311
snprintf(req->error_str, sizeof(req->error_str),
2312
"file %s size %ju < block size %u", be_lun->dev_path,
2313
(uintmax_t)be_lun->size_bytes, cbe_lun->blocksize);
2314
}
2315
2316
cbe_lun->opttxferlen = CTLBLK_MAX_IO_SIZE / cbe_lun->blocksize;
2317
return (error);
2318
}
2319
2320
static int
2321
ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
2322
{
2323
struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
2324
struct ctl_lun_create_params *params;
2325
struct cdevsw *csw;
2326
struct cdev *dev;
2327
const char *value;
2328
int error, atomic, maxio, ref, unmap, tmp;
2329
off_t ps, pss, po, pos, us, uss, uo, uos, otmp;
2330
2331
params = &be_lun->params;
2332
2333
be_lun->dev_type = CTL_BE_BLOCK_DEV;
2334
csw = devvn_refthread(be_lun->vn, &dev, &ref);
2335
if (csw == NULL)
2336
return (ENXIO);
2337
if (strcmp(csw->d_name, "zvol") == 0) {
2338
be_lun->dispatch = ctl_be_block_dispatch_zvol;
2339
be_lun->get_lba_status = ctl_be_block_gls_zvol;
2340
atomic = maxio = CTLBLK_MAX_IO_SIZE;
2341
} else {
2342
be_lun->dispatch = ctl_be_block_dispatch_dev;
2343
be_lun->get_lba_status = NULL;
2344
atomic = 0;
2345
maxio = dev->si_iosize_max;
2346
if (maxio <= 0)
2347
maxio = DFLTPHYS;
2348
if (maxio > CTLBLK_MAX_SEG)
2349
maxio = CTLBLK_MAX_SEG;
2350
}
2351
be_lun->lun_flush = ctl_be_block_flush_dev;
2352
be_lun->getattr = ctl_be_block_getattr_dev;
2353
be_lun->unmap = ctl_be_block_unmap_dev;
2354
2355
if (!csw->d_ioctl) {
2356
dev_relthread(dev, ref);
2357
snprintf(req->error_str, sizeof(req->error_str),
2358
"no d_ioctl for device %s!", be_lun->dev_path);
2359
return (ENODEV);
2360
}
2361
2362
error = csw->d_ioctl(dev, DIOCGSECTORSIZE, (caddr_t)&tmp, FREAD,
2363
curthread);
2364
if (error) {
2365
dev_relthread(dev, ref);
2366
snprintf(req->error_str, sizeof(req->error_str),
2367
"error %d returned for DIOCGSECTORSIZE ioctl "
2368
"on %s!", error, be_lun->dev_path);
2369
return (error);
2370
}
2371
2372
/*
2373
* If the user has asked for a blocksize that is greater than the
2374
* backing device's blocksize, we can do it only if the blocksize
2375
* the user is asking for is an even multiple of the underlying
2376
* device's blocksize.
2377
*/
2378
if ((params->blocksize_bytes != 0) &&
2379
(params->blocksize_bytes >= tmp)) {
2380
if (params->blocksize_bytes % tmp == 0) {
2381
cbe_lun->blocksize = params->blocksize_bytes;
2382
} else {
2383
dev_relthread(dev, ref);
2384
snprintf(req->error_str, sizeof(req->error_str),
2385
"requested blocksize %u is not an even "
2386
"multiple of backing device blocksize %u",
2387
params->blocksize_bytes, tmp);
2388
return (EINVAL);
2389
}
2390
} else if (params->blocksize_bytes != 0) {
2391
dev_relthread(dev, ref);
2392
snprintf(req->error_str, sizeof(req->error_str),
2393
"requested blocksize %u < backing device "
2394
"blocksize %u", params->blocksize_bytes, tmp);
2395
return (EINVAL);
2396
} else if (cbe_lun->lun_type == T_CDROM)
2397
cbe_lun->blocksize = MAX(tmp, 2048);
2398
else
2399
cbe_lun->blocksize = tmp;
2400
2401
error = csw->d_ioctl(dev, DIOCGMEDIASIZE, (caddr_t)&otmp, FREAD,
2402
curthread);
2403
if (error) {
2404
dev_relthread(dev, ref);
2405
snprintf(req->error_str, sizeof(req->error_str),
2406
"error %d returned for DIOCGMEDIASIZE "
2407
" ioctl on %s!", error,
2408
be_lun->dev_path);
2409
return (error);
2410
}
2411
2412
if (params->lun_size_bytes != 0) {
2413
if (params->lun_size_bytes > otmp) {
2414
dev_relthread(dev, ref);
2415
snprintf(req->error_str, sizeof(req->error_str),
2416
"requested LUN size %ju > backing device "
2417
"size %ju",
2418
(uintmax_t)params->lun_size_bytes,
2419
(uintmax_t)otmp);
2420
return (EINVAL);
2421
}
2422
2423
be_lun->size_bytes = params->lun_size_bytes;
2424
} else
2425
be_lun->size_bytes = otmp;
2426
be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
2427
cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
2428
0 : (be_lun->size_blocks - 1);
2429
2430
error = csw->d_ioctl(dev, DIOCGSTRIPESIZE, (caddr_t)&ps, FREAD,
2431
curthread);
2432
if (error)
2433
ps = po = 0;
2434
else {
2435
error = csw->d_ioctl(dev, DIOCGSTRIPEOFFSET, (caddr_t)&po,
2436
FREAD, curthread);
2437
if (error)
2438
po = 0;
2439
}
2440
us = ps;
2441
uo = po;
2442
2443
value = dnvlist_get_string(cbe_lun->options, "pblocksize", NULL);
2444
if (value != NULL)
2445
ctl_expand_number(value, &ps);
2446
value = dnvlist_get_string(cbe_lun->options, "pblockoffset", NULL);
2447
if (value != NULL)
2448
ctl_expand_number(value, &po);
2449
pss = ps / cbe_lun->blocksize;
2450
pos = po / cbe_lun->blocksize;
2451
if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) &&
2452
((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) {
2453
cbe_lun->pblockexp = fls(pss) - 1;
2454
cbe_lun->pblockoff = (pss - pos) % pss;
2455
}
2456
2457
value = dnvlist_get_string(cbe_lun->options, "ublocksize", NULL);
2458
if (value != NULL)
2459
ctl_expand_number(value, &us);
2460
value = dnvlist_get_string(cbe_lun->options, "ublockoffset", NULL);
2461
if (value != NULL)
2462
ctl_expand_number(value, &uo);
2463
uss = us / cbe_lun->blocksize;
2464
uos = uo / cbe_lun->blocksize;
2465
if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) &&
2466
((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) {
2467
cbe_lun->ublockexp = fls(uss) - 1;
2468
cbe_lun->ublockoff = (uss - uos) % uss;
2469
}
2470
2471
cbe_lun->atomicblock = atomic / cbe_lun->blocksize;
2472
cbe_lun->opttxferlen = maxio / cbe_lun->blocksize;
2473
2474
if (be_lun->dispatch == ctl_be_block_dispatch_zvol) {
2475
unmap = 1;
2476
} else {
2477
struct diocgattr_arg arg;
2478
2479
strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name));
2480
arg.len = sizeof(arg.value.i);
2481
error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD,
2482
curthread);
2483
unmap = (error == 0) ? arg.value.i : 0;
2484
}
2485
value = dnvlist_get_string(cbe_lun->options, "unmap", NULL);
2486
if (value != NULL)
2487
unmap = (strcmp(value, "on") == 0);
2488
if (unmap)
2489
cbe_lun->flags |= CTL_LUN_FLAG_UNMAP;
2490
else
2491
cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP;
2492
2493
dev_relthread(dev, ref);
2494
return (0);
2495
}
2496
2497
static int
2498
ctl_be_block_close(struct ctl_be_block_lun *be_lun)
2499
{
2500
struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
2501
int flags;
2502
2503
if (be_lun->vn) {
2504
flags = FREAD;
2505
if ((cbe_lun->flags & CTL_LUN_FLAG_READONLY) == 0)
2506
flags |= FWRITE;
2507
(void)vn_close(be_lun->vn, flags, NOCRED, curthread);
2508
be_lun->vn = NULL;
2509
2510
switch (be_lun->dev_type) {
2511
case CTL_BE_BLOCK_DEV:
2512
break;
2513
case CTL_BE_BLOCK_FILE:
2514
if (be_lun->backend.file.cred != NULL) {
2515
crfree(be_lun->backend.file.cred);
2516
be_lun->backend.file.cred = NULL;
2517
}
2518
break;
2519
case CTL_BE_BLOCK_NONE:
2520
break;
2521
default:
2522
panic("Unexpected backend type %d", be_lun->dev_type);
2523
break;
2524
}
2525
be_lun->dev_type = CTL_BE_BLOCK_NONE;
2526
}
2527
return (0);
2528
}
2529
2530
static int
2531
ctl_be_block_open(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
2532
{
2533
struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
2534
struct nameidata nd;
2535
const char *value;
2536
int error, flags;
2537
2538
error = 0;
2539
if (rootvnode == NULL) {
2540
snprintf(req->error_str, sizeof(req->error_str),
2541
"Root filesystem is not mounted");
2542
return (1);
2543
}
2544
pwd_ensure_dirs();
2545
2546
value = dnvlist_get_string(cbe_lun->options, "file", NULL);
2547
if (value == NULL) {
2548
snprintf(req->error_str, sizeof(req->error_str),
2549
"no file argument specified");
2550
return (1);
2551
}
2552
free(be_lun->dev_path, M_CTLBLK);
2553
be_lun->dev_path = strdup(value, M_CTLBLK);
2554
2555
flags = FREAD;
2556
value = dnvlist_get_string(cbe_lun->options, "readonly", NULL);
2557
if (value != NULL) {
2558
if (strcmp(value, "on") != 0)
2559
flags |= FWRITE;
2560
} else if (cbe_lun->lun_type == T_DIRECT)
2561
flags |= FWRITE;
2562
2563
again:
2564
NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, be_lun->dev_path);
2565
error = vn_open(&nd, &flags, 0, NULL);
2566
if ((error == EROFS || error == EACCES) && (flags & FWRITE)) {
2567
flags &= ~FWRITE;
2568
goto again;
2569
}
2570
if (error) {
2571
/*
2572
* This is the only reasonable guess we can make as far as
2573
* path if the user doesn't give us a fully qualified path.
2574
* If they want to specify a file, they need to specify the
2575
* full path.
2576
*/
2577
if (be_lun->dev_path[0] != '/') {
2578
char *dev_name;
2579
2580
asprintf(&dev_name, M_CTLBLK, "/dev/%s",
2581
be_lun->dev_path);
2582
free(be_lun->dev_path, M_CTLBLK);
2583
be_lun->dev_path = dev_name;
2584
goto again;
2585
}
2586
snprintf(req->error_str, sizeof(req->error_str),
2587
"error opening %s: %d", be_lun->dev_path, error);
2588
return (error);
2589
}
2590
if (flags & FWRITE)
2591
cbe_lun->flags &= ~CTL_LUN_FLAG_READONLY;
2592
else
2593
cbe_lun->flags |= CTL_LUN_FLAG_READONLY;
2594
2595
NDFREE_PNBUF(&nd);
2596
be_lun->vn = nd.ni_vp;
2597
2598
/* We only support disks and files. */
2599
if (vn_isdisk_error(be_lun->vn, &error)) {
2600
error = ctl_be_block_open_dev(be_lun, req);
2601
} else if (be_lun->vn->v_type == VREG) {
2602
error = ctl_be_block_open_file(be_lun, req);
2603
} else {
2604
error = EINVAL;
2605
snprintf(req->error_str, sizeof(req->error_str),
2606
"%s is not a disk or plain file", be_lun->dev_path);
2607
}
2608
VOP_UNLOCK(be_lun->vn);
2609
2610
if (error != 0)
2611
ctl_be_block_close(be_lun);
2612
cbe_lun->serseq = CTL_LUN_SERSEQ_OFF;
2613
if (be_lun->dispatch != ctl_be_block_dispatch_dev)
2614
cbe_lun->serseq = CTL_LUN_SERSEQ_SOFT;
2615
value = dnvlist_get_string(cbe_lun->options, "serseq", NULL);
2616
if (value != NULL && strcmp(value, "on") == 0)
2617
cbe_lun->serseq = CTL_LUN_SERSEQ_ON;
2618
else if (value != NULL && strcmp(value, "read") == 0)
2619
cbe_lun->serseq = CTL_LUN_SERSEQ_READ;
2620
else if (value != NULL && strcmp(value, "soft") == 0)
2621
cbe_lun->serseq = CTL_LUN_SERSEQ_SOFT;
2622
else if (value != NULL && strcmp(value, "off") == 0)
2623
cbe_lun->serseq = CTL_LUN_SERSEQ_OFF;
2624
return (0);
2625
}
2626
2627
static int
2628
ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2629
{
2630
struct ctl_be_lun *cbe_lun;
2631
struct ctl_be_block_lun *be_lun;
2632
struct ctl_lun_create_params *params;
2633
char num_thread_str[16];
2634
char tmpstr[32];
2635
const char *value;
2636
int retval, num_threads;
2637
int tmp_num_threads;
2638
2639
params = &req->reqdata.create;
2640
retval = 0;
2641
req->status = CTL_LUN_OK;
2642
2643
be_lun = malloc(sizeof(*be_lun), M_CTLBLK, M_ZERO | M_WAITOK);
2644
cbe_lun = &be_lun->cbe_lun;
2645
be_lun->params = req->reqdata.create;
2646
be_lun->softc = softc;
2647
STAILQ_INIT(&be_lun->input_queue);
2648
STAILQ_INIT(&be_lun->config_read_queue);
2649
STAILQ_INIT(&be_lun->config_write_queue);
2650
STAILQ_INIT(&be_lun->datamove_queue);
2651
mtx_init(&be_lun->io_lock, "ctlblock io", NULL, MTX_DEF);
2652
mtx_init(&be_lun->queue_lock, "ctlblock queue", NULL, MTX_DEF);
2653
cbe_lun->options = nvlist_clone(req->args_nvl);
2654
2655
if (params->flags & CTL_LUN_FLAG_DEV_TYPE)
2656
cbe_lun->lun_type = params->device_type;
2657
else
2658
cbe_lun->lun_type = T_DIRECT;
2659
be_lun->flags = 0;
2660
cbe_lun->flags = 0;
2661
value = dnvlist_get_string(cbe_lun->options, "ha_role", NULL);
2662
if (value != NULL) {
2663
if (strcmp(value, "primary") == 0)
2664
cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2665
} else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF)
2666
cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2667
2668
if (cbe_lun->lun_type == T_DIRECT ||
2669
cbe_lun->lun_type == T_CDROM) {
2670
be_lun->size_bytes = params->lun_size_bytes;
2671
if (params->blocksize_bytes != 0)
2672
cbe_lun->blocksize = params->blocksize_bytes;
2673
else if (cbe_lun->lun_type == T_CDROM)
2674
cbe_lun->blocksize = 2048;
2675
else
2676
cbe_lun->blocksize = 512;
2677
be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
2678
cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
2679
0 : (be_lun->size_blocks - 1);
2680
2681
if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) ||
2682
control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) {
2683
retval = ctl_be_block_open(be_lun, req);
2684
if (retval != 0) {
2685
retval = 0;
2686
req->status = CTL_LUN_WARNING;
2687
}
2688
}
2689
num_threads = cbb_num_threads;
2690
} else {
2691
num_threads = 1;
2692
}
2693
2694
value = dnvlist_get_string(cbe_lun->options, "num_threads", NULL);
2695
if (value != NULL) {
2696
tmp_num_threads = strtol(value, NULL, 0);
2697
2698
/*
2699
* We don't let the user specify less than one
2700
* thread, but hope he's clueful enough not to
2701
* specify 1000 threads.
2702
*/
2703
if (tmp_num_threads < 1) {
2704
snprintf(req->error_str, sizeof(req->error_str),
2705
"invalid number of threads %s",
2706
num_thread_str);
2707
goto bailout_error;
2708
}
2709
num_threads = tmp_num_threads;
2710
}
2711
2712
if (be_lun->vn == NULL)
2713
cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2714
/* Tell the user the blocksize we ended up using */
2715
params->lun_size_bytes = be_lun->size_bytes;
2716
params->blocksize_bytes = cbe_lun->blocksize;
2717
if (params->flags & CTL_LUN_FLAG_ID_REQ) {
2718
cbe_lun->req_lun_id = params->req_lun_id;
2719
cbe_lun->flags |= CTL_LUN_FLAG_ID_REQ;
2720
} else
2721
cbe_lun->req_lun_id = 0;
2722
2723
cbe_lun->lun_shutdown = ctl_be_block_lun_shutdown;
2724
cbe_lun->be = &ctl_be_block_driver;
2725
2726
if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) {
2727
snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%04d",
2728
softc->num_luns);
2729
strncpy((char *)cbe_lun->serial_num, tmpstr,
2730
MIN(sizeof(cbe_lun->serial_num), sizeof(tmpstr)));
2731
2732
/* Tell the user what we used for a serial number */
2733
strncpy((char *)params->serial_num, tmpstr,
2734
MIN(sizeof(params->serial_num), sizeof(tmpstr)));
2735
} else {
2736
strncpy((char *)cbe_lun->serial_num, params->serial_num,
2737
MIN(sizeof(cbe_lun->serial_num),
2738
sizeof(params->serial_num)));
2739
}
2740
if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) {
2741
snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%04d", softc->num_luns);
2742
strncpy((char *)cbe_lun->device_id, tmpstr,
2743
MIN(sizeof(cbe_lun->device_id), sizeof(tmpstr)));
2744
2745
/* Tell the user what we used for a device ID */
2746
strncpy((char *)params->device_id, tmpstr,
2747
MIN(sizeof(params->device_id), sizeof(tmpstr)));
2748
} else {
2749
strncpy((char *)cbe_lun->device_id, params->device_id,
2750
MIN(sizeof(cbe_lun->device_id),
2751
sizeof(params->device_id)));
2752
}
2753
2754
TASK_INIT(&be_lun->io_task, /*priority*/0, ctl_be_block_worker, be_lun);
2755
2756
be_lun->io_taskqueue = taskqueue_create("ctlblocktq", M_WAITOK,
2757
taskqueue_thread_enqueue, /*context*/&be_lun->io_taskqueue);
2758
2759
if (be_lun->io_taskqueue == NULL) {
2760
snprintf(req->error_str, sizeof(req->error_str),
2761
"unable to create taskqueue");
2762
goto bailout_error;
2763
}
2764
2765
/*
2766
* Note that we start the same number of threads by default for
2767
* both the file case and the block device case. For the file
2768
* case, we need multiple threads to allow concurrency, because the
2769
* vnode interface is designed to be a blocking interface. For the
2770
* block device case, ZFS zvols at least will block the caller's
2771
* context in many instances, and so we need multiple threads to
2772
* overcome that problem. Other block devices don't need as many
2773
* threads, but they shouldn't cause too many problems.
2774
*
2775
* If the user wants to just have a single thread for a block
2776
* device, he can specify that when the LUN is created, or change
2777
* the tunable/sysctl to alter the default number of threads.
2778
*/
2779
retval = taskqueue_start_threads_in_proc(&be_lun->io_taskqueue,
2780
/*num threads*/num_threads,
2781
/*priority*/PUSER,
2782
/*proc*/control_softc->ctl_proc,
2783
/*thread name*/"block");
2784
2785
if (retval != 0)
2786
goto bailout_error;
2787
2788
be_lun->num_threads = num_threads;
2789
2790
retval = ctl_add_lun(&be_lun->cbe_lun);
2791
if (retval != 0) {
2792
snprintf(req->error_str, sizeof(req->error_str),
2793
"ctl_add_lun() returned error %d, see dmesg for "
2794
"details", retval);
2795
retval = 0;
2796
goto bailout_error;
2797
}
2798
2799
be_lun->disk_stats = devstat_new_entry("cbb", cbe_lun->lun_id,
2800
cbe_lun->blocksize,
2801
DEVSTAT_ALL_SUPPORTED,
2802
cbe_lun->lun_type
2803
| DEVSTAT_TYPE_IF_OTHER,
2804
DEVSTAT_PRIORITY_OTHER);
2805
2806
mtx_lock(&softc->lock);
2807
softc->num_luns++;
2808
SLIST_INSERT_HEAD(&softc->lun_list, be_lun, links);
2809
mtx_unlock(&softc->lock);
2810
2811
params->req_lun_id = cbe_lun->lun_id;
2812
2813
return (retval);
2814
2815
bailout_error:
2816
req->status = CTL_LUN_ERROR;
2817
2818
if (be_lun->io_taskqueue != NULL)
2819
taskqueue_free(be_lun->io_taskqueue);
2820
ctl_be_block_close(be_lun);
2821
if (be_lun->dev_path != NULL)
2822
free(be_lun->dev_path, M_CTLBLK);
2823
nvlist_destroy(cbe_lun->options);
2824
mtx_destroy(&be_lun->queue_lock);
2825
mtx_destroy(&be_lun->io_lock);
2826
free(be_lun, M_CTLBLK);
2827
2828
return (retval);
2829
}
2830
2831
static int
2832
ctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2833
{
2834
struct ctl_lun_rm_params *params;
2835
struct ctl_be_block_lun *be_lun;
2836
struct ctl_be_lun *cbe_lun;
2837
int retval;
2838
2839
params = &req->reqdata.rm;
2840
2841
sx_xlock(&softc->modify_lock);
2842
mtx_lock(&softc->lock);
2843
SLIST_FOREACH(be_lun, &softc->lun_list, links) {
2844
if (be_lun->cbe_lun.lun_id == params->lun_id) {
2845
SLIST_REMOVE(&softc->lun_list, be_lun,
2846
ctl_be_block_lun, links);
2847
softc->num_luns--;
2848
break;
2849
}
2850
}
2851
mtx_unlock(&softc->lock);
2852
sx_xunlock(&softc->modify_lock);
2853
if (be_lun == NULL) {
2854
snprintf(req->error_str, sizeof(req->error_str),
2855
"LUN %u is not managed by the block backend",
2856
params->lun_id);
2857
goto bailout_error;
2858
}
2859
cbe_lun = &be_lun->cbe_lun;
2860
2861
if (be_lun->vn != NULL) {
2862
cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2863
ctl_lun_no_media(cbe_lun);
2864
taskqueue_drain_all(be_lun->io_taskqueue);
2865
ctl_be_block_close(be_lun);
2866
}
2867
2868
mtx_lock(&softc->lock);
2869
be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING;
2870
mtx_unlock(&softc->lock);
2871
2872
retval = ctl_remove_lun(cbe_lun);
2873
if (retval != 0) {
2874
snprintf(req->error_str, sizeof(req->error_str),
2875
"error %d returned from ctl_remove_lun() for "
2876
"LUN %d", retval, params->lun_id);
2877
mtx_lock(&softc->lock);
2878
be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
2879
mtx_unlock(&softc->lock);
2880
goto bailout_error;
2881
}
2882
2883
mtx_lock(&softc->lock);
2884
while ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) {
2885
retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblockrm", 0);
2886
if (retval == EINTR)
2887
break;
2888
}
2889
be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
2890
if (be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) {
2891
mtx_unlock(&softc->lock);
2892
free(be_lun, M_CTLBLK);
2893
} else {
2894
mtx_unlock(&softc->lock);
2895
return (EINTR);
2896
}
2897
2898
req->status = CTL_LUN_OK;
2899
return (0);
2900
2901
bailout_error:
2902
req->status = CTL_LUN_ERROR;
2903
return (0);
2904
}
2905
2906
static int
2907
ctl_be_block_modify(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2908
{
2909
struct ctl_lun_modify_params *params;
2910
struct ctl_be_block_lun *be_lun;
2911
struct ctl_be_lun *cbe_lun;
2912
const char *value;
2913
uint64_t oldsize;
2914
int error, wasprim;
2915
2916
params = &req->reqdata.modify;
2917
2918
sx_xlock(&softc->modify_lock);
2919
mtx_lock(&softc->lock);
2920
SLIST_FOREACH(be_lun, &softc->lun_list, links) {
2921
if (be_lun->cbe_lun.lun_id == params->lun_id)
2922
break;
2923
}
2924
mtx_unlock(&softc->lock);
2925
if (be_lun == NULL) {
2926
snprintf(req->error_str, sizeof(req->error_str),
2927
"LUN %u is not managed by the block backend",
2928
params->lun_id);
2929
goto bailout_error;
2930
}
2931
cbe_lun = &be_lun->cbe_lun;
2932
2933
if (params->lun_size_bytes != 0)
2934
be_lun->params.lun_size_bytes = params->lun_size_bytes;
2935
2936
if (req->args_nvl != NULL) {
2937
nvlist_destroy(cbe_lun->options);
2938
cbe_lun->options = nvlist_clone(req->args_nvl);
2939
}
2940
2941
wasprim = (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY);
2942
value = dnvlist_get_string(cbe_lun->options, "ha_role", NULL);
2943
if (value != NULL) {
2944
if (strcmp(value, "primary") == 0)
2945
cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2946
else
2947
cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY;
2948
} else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF)
2949
cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2950
else
2951
cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY;
2952
if (wasprim != (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)) {
2953
if (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)
2954
ctl_lun_primary(cbe_lun);
2955
else
2956
ctl_lun_secondary(cbe_lun);
2957
}
2958
2959
oldsize = be_lun->size_blocks;
2960
if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) ||
2961
control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) {
2962
if (be_lun->vn == NULL)
2963
error = ctl_be_block_open(be_lun, req);
2964
else if (vn_isdisk_error(be_lun->vn, &error))
2965
error = ctl_be_block_open_dev(be_lun, req);
2966
else if (be_lun->vn->v_type == VREG) {
2967
vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
2968
error = ctl_be_block_open_file(be_lun, req);
2969
VOP_UNLOCK(be_lun->vn);
2970
} else
2971
error = EINVAL;
2972
if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) &&
2973
be_lun->vn != NULL) {
2974
cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA;
2975
ctl_lun_has_media(cbe_lun);
2976
} else if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) == 0 &&
2977
be_lun->vn == NULL) {
2978
cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2979
ctl_lun_no_media(cbe_lun);
2980
}
2981
cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED;
2982
} else {
2983
if (be_lun->vn != NULL) {
2984
cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2985
ctl_lun_no_media(cbe_lun);
2986
taskqueue_drain_all(be_lun->io_taskqueue);
2987
error = ctl_be_block_close(be_lun);
2988
} else
2989
error = 0;
2990
}
2991
if (be_lun->size_blocks != oldsize)
2992
ctl_lun_capacity_changed(cbe_lun);
2993
2994
/* Tell the user the exact size we ended up using */
2995
params->lun_size_bytes = be_lun->size_bytes;
2996
2997
sx_xunlock(&softc->modify_lock);
2998
req->status = error ? CTL_LUN_WARNING : CTL_LUN_OK;
2999
return (0);
3000
3001
bailout_error:
3002
sx_xunlock(&softc->modify_lock);
3003
req->status = CTL_LUN_ERROR;
3004
return (0);
3005
}
3006
3007
static void
3008
ctl_be_block_lun_shutdown(struct ctl_be_lun *cbe_lun)
3009
{
3010
struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)cbe_lun;
3011
struct ctl_be_block_softc *softc = be_lun->softc;
3012
3013
taskqueue_drain_all(be_lun->io_taskqueue);
3014
taskqueue_free(be_lun->io_taskqueue);
3015
if (be_lun->disk_stats != NULL)
3016
devstat_remove_entry(be_lun->disk_stats);
3017
nvlist_destroy(be_lun->cbe_lun.options);
3018
free(be_lun->dev_path, M_CTLBLK);
3019
mtx_destroy(&be_lun->queue_lock);
3020
mtx_destroy(&be_lun->io_lock);
3021
3022
mtx_lock(&softc->lock);
3023
be_lun->flags |= CTL_BE_BLOCK_LUN_UNCONFIGURED;
3024
if (be_lun->flags & CTL_BE_BLOCK_LUN_WAITING)
3025
wakeup(be_lun);
3026
else
3027
free(be_lun, M_CTLBLK);
3028
mtx_unlock(&softc->lock);
3029
}
3030
3031
static int
3032
ctl_be_block_scsi_config_write(union ctl_io *io)
3033
{
3034
struct ctl_be_block_lun *be_lun;
3035
struct ctl_be_lun *cbe_lun;
3036
int retval;
3037
3038
DPRINTF("entered\n");
3039
3040
cbe_lun = CTL_BACKEND_LUN(io);
3041
be_lun = (struct ctl_be_block_lun *)cbe_lun;
3042
3043
retval = 0;
3044
switch (io->scsiio.cdb[0]) {
3045
case SYNCHRONIZE_CACHE:
3046
case SYNCHRONIZE_CACHE_16:
3047
case WRITE_SAME_10:
3048
case WRITE_SAME_16:
3049
case UNMAP:
3050
/*
3051
* The upper level CTL code will filter out any CDBs with
3052
* the immediate bit set and return the proper error.
3053
*
3054
* We don't really need to worry about what LBA range the
3055
* user asked to be synced out. When they issue a sync
3056
* cache command, we'll sync out the whole thing.
3057
*/
3058
mtx_lock(&be_lun->queue_lock);
3059
STAILQ_INSERT_TAIL(&be_lun->config_write_queue, &io->io_hdr,
3060
links);
3061
mtx_unlock(&be_lun->queue_lock);
3062
taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
3063
break;
3064
case START_STOP_UNIT: {
3065
struct scsi_start_stop_unit *cdb;
3066
struct ctl_lun_req req;
3067
3068
cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb;
3069
if ((cdb->how & SSS_PC_MASK) != 0) {
3070
ctl_set_success(&io->scsiio);
3071
ctl_config_write_done(io);
3072
break;
3073
}
3074
if (cdb->how & SSS_START) {
3075
if ((cdb->how & SSS_LOEJ) && be_lun->vn == NULL) {
3076
retval = ctl_be_block_open(be_lun, &req);
3077
cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED;
3078
if (retval == 0) {
3079
cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA;
3080
ctl_lun_has_media(cbe_lun);
3081
} else {
3082
cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
3083
ctl_lun_no_media(cbe_lun);
3084
}
3085
}
3086
ctl_start_lun(cbe_lun);
3087
} else {
3088
ctl_stop_lun(cbe_lun);
3089
if (cdb->how & SSS_LOEJ) {
3090
cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
3091
cbe_lun->flags |= CTL_LUN_FLAG_EJECTED;
3092
ctl_lun_ejected(cbe_lun);
3093
if (be_lun->vn != NULL)
3094
ctl_be_block_close(be_lun);
3095
}
3096
}
3097
3098
ctl_set_success(&io->scsiio);
3099
ctl_config_write_done(io);
3100
break;
3101
}
3102
case PREVENT_ALLOW:
3103
ctl_set_success(&io->scsiio);
3104
ctl_config_write_done(io);
3105
break;
3106
default:
3107
ctl_set_invalid_opcode(&io->scsiio);
3108
ctl_config_write_done(io);
3109
retval = CTL_RETVAL_COMPLETE;
3110
break;
3111
}
3112
3113
return (retval);
3114
}
3115
3116
static int
3117
ctl_be_block_nvme_config_write(union ctl_io *io)
3118
{
3119
struct ctl_be_block_lun *be_lun;
3120
3121
DPRINTF("entered\n");
3122
3123
be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io);
3124
3125
switch (io->nvmeio.cmd.opc) {
3126
case NVME_OPC_DATASET_MANAGEMENT:
3127
DSM_RANGE(io) = 0;
3128
/* FALLTHROUGH */
3129
case NVME_OPC_FLUSH:
3130
case NVME_OPC_WRITE_UNCORRECTABLE:
3131
case NVME_OPC_WRITE_ZEROES:
3132
mtx_lock(&be_lun->queue_lock);
3133
STAILQ_INSERT_TAIL(&be_lun->config_write_queue, &io->io_hdr,
3134
links);
3135
mtx_unlock(&be_lun->queue_lock);
3136
taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
3137
break;
3138
default:
3139
ctl_nvme_set_invalid_opcode(&io->nvmeio);
3140
ctl_config_write_done(io);
3141
break;
3142
}
3143
return (CTL_RETVAL_COMPLETE);
3144
}
3145
3146
static int
3147
ctl_be_block_config_write(union ctl_io *io)
3148
{
3149
switch (io->io_hdr.io_type) {
3150
case CTL_IO_SCSI:
3151
return (ctl_be_block_scsi_config_write(io));
3152
case CTL_IO_NVME:
3153
return (ctl_be_block_nvme_config_write(io));
3154
default:
3155
__assert_unreachable();
3156
}
3157
}
3158
3159
static int
3160
ctl_be_block_scsi_config_read(union ctl_io *io)
3161
{
3162
struct ctl_be_block_lun *be_lun;
3163
int retval = 0;
3164
3165
DPRINTF("entered\n");
3166
3167
be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io);
3168
3169
switch (io->scsiio.cdb[0]) {
3170
case SERVICE_ACTION_IN:
3171
if (io->scsiio.cdb[1] == SGLS_SERVICE_ACTION) {
3172
mtx_lock(&be_lun->queue_lock);
3173
STAILQ_INSERT_TAIL(&be_lun->config_read_queue,
3174
&io->io_hdr, links);
3175
mtx_unlock(&be_lun->queue_lock);
3176
taskqueue_enqueue(be_lun->io_taskqueue,
3177
&be_lun->io_task);
3178
retval = CTL_RETVAL_QUEUED;
3179
break;
3180
}
3181
ctl_set_invalid_field(&io->scsiio,
3182
/*sks_valid*/ 1,
3183
/*command*/ 1,
3184
/*field*/ 1,
3185
/*bit_valid*/ 1,
3186
/*bit*/ 4);
3187
ctl_config_read_done(io);
3188
retval = CTL_RETVAL_COMPLETE;
3189
break;
3190
default:
3191
ctl_set_invalid_opcode(&io->scsiio);
3192
ctl_config_read_done(io);
3193
retval = CTL_RETVAL_COMPLETE;
3194
break;
3195
}
3196
3197
return (retval);
3198
}
3199
3200
static int
3201
ctl_be_block_nvme_config_read(union ctl_io *io)
3202
{
3203
struct ctl_be_block_lun *be_lun;
3204
3205
DPRINTF("entered\n");
3206
3207
be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io);
3208
3209
switch (io->nvmeio.cmd.opc) {
3210
case NVME_OPC_IDENTIFY:
3211
{
3212
uint8_t cns;
3213
3214
cns = le32toh(io->nvmeio.cmd.cdw10) & 0xff;
3215
switch (cns) {
3216
case 0:
3217
case 3:
3218
mtx_lock(&be_lun->queue_lock);
3219
STAILQ_INSERT_TAIL(&be_lun->config_read_queue,
3220
&io->io_hdr, links);
3221
mtx_unlock(&be_lun->queue_lock);
3222
taskqueue_enqueue(be_lun->io_taskqueue,
3223
&be_lun->io_task);
3224
return (CTL_RETVAL_QUEUED);
3225
default:
3226
ctl_nvme_set_invalid_field(&io->nvmeio);
3227
ctl_config_read_done(io);
3228
break;
3229
}
3230
break;
3231
}
3232
default:
3233
ctl_nvme_set_invalid_opcode(&io->nvmeio);
3234
ctl_config_read_done(io);
3235
break;
3236
}
3237
return (CTL_RETVAL_COMPLETE);
3238
}
3239
3240
static int
3241
ctl_be_block_config_read(union ctl_io *io)
3242
{
3243
switch (io->io_hdr.io_type) {
3244
case CTL_IO_SCSI:
3245
return (ctl_be_block_scsi_config_read(io));
3246
case CTL_IO_NVME_ADMIN:
3247
return (ctl_be_block_nvme_config_read(io));
3248
default:
3249
__assert_unreachable();
3250
}
3251
}
3252
3253
static int
3254
ctl_be_block_lun_info(struct ctl_be_lun *cbe_lun, struct sbuf *sb)
3255
{
3256
struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)cbe_lun;
3257
int retval;
3258
3259
retval = sbuf_cat(sb, "\t<num_threads>");
3260
if (retval != 0)
3261
goto bailout;
3262
retval = sbuf_printf(sb, "%d", lun->num_threads);
3263
if (retval != 0)
3264
goto bailout;
3265
retval = sbuf_cat(sb, "</num_threads>\n");
3266
3267
bailout:
3268
return (retval);
3269
}
3270
3271
static uint64_t
3272
ctl_be_block_lun_attr(struct ctl_be_lun *cbe_lun, const char *attrname)
3273
{
3274
struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)cbe_lun;
3275
3276
if (lun->getattr == NULL)
3277
return (UINT64_MAX);
3278
return (lun->getattr(lun, attrname));
3279
}
3280
3281
static int
3282
ctl_be_block_init(void)
3283
{
3284
struct ctl_be_block_softc *softc = &backend_block_softc;
3285
3286
sx_init(&softc->modify_lock, "ctlblock modify");
3287
mtx_init(&softc->lock, "ctlblock", NULL, MTX_DEF);
3288
softc->beio_zone = uma_zcreate("beio", sizeof(struct ctl_be_block_io),
3289
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
3290
softc->bufmin_zone = uma_zcreate("ctlblockmin", CTLBLK_MIN_SEG,
3291
NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0);
3292
if (CTLBLK_MIN_SEG < CTLBLK_MAX_SEG)
3293
softc->bufmax_zone = uma_zcreate("ctlblockmax", CTLBLK_MAX_SEG,
3294
NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0);
3295
SLIST_INIT(&softc->lun_list);
3296
return (0);
3297
}
3298
3299
static int
3300
ctl_be_block_shutdown(void)
3301
{
3302
struct ctl_be_block_softc *softc = &backend_block_softc;
3303
struct ctl_be_block_lun *lun;
3304
3305
mtx_lock(&softc->lock);
3306
while ((lun = SLIST_FIRST(&softc->lun_list)) != NULL) {
3307
SLIST_REMOVE_HEAD(&softc->lun_list, links);
3308
softc->num_luns--;
3309
/*
3310
* Drop our lock here. Since ctl_remove_lun() can call
3311
* back into us, this could potentially lead to a recursive
3312
* lock of the same mutex, which would cause a hang.
3313
*/
3314
mtx_unlock(&softc->lock);
3315
ctl_remove_lun(&lun->cbe_lun);
3316
mtx_lock(&softc->lock);
3317
}
3318
mtx_unlock(&softc->lock);
3319
uma_zdestroy(softc->bufmin_zone);
3320
if (CTLBLK_MIN_SEG < CTLBLK_MAX_SEG)
3321
uma_zdestroy(softc->bufmax_zone);
3322
uma_zdestroy(softc->beio_zone);
3323
mtx_destroy(&softc->lock);
3324
sx_destroy(&softc->modify_lock);
3325
return (0);
3326
}
3327
3328