Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/drivers/block/zloop.c
49054 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* Copyright (c) 2025, Christoph Hellwig.
4
* Copyright (c) 2025, Western Digital Corporation or its affiliates.
5
*
6
* Zoned Loop Device driver - exports a zoned block device using one file per
7
* zone as backing storage.
8
*/
9
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10
11
#include <linux/module.h>
12
#include <linux/blk-mq.h>
13
#include <linux/blkzoned.h>
14
#include <linux/pagemap.h>
15
#include <linux/miscdevice.h>
16
#include <linux/falloc.h>
17
#include <linux/mutex.h>
18
#include <linux/parser.h>
19
#include <linux/seq_file.h>
20
21
/*
22
* Options for adding (and removing) a device.
23
*/
24
enum {
25
ZLOOP_OPT_ERR = 0,
26
ZLOOP_OPT_ID = (1 << 0),
27
ZLOOP_OPT_CAPACITY = (1 << 1),
28
ZLOOP_OPT_ZONE_SIZE = (1 << 2),
29
ZLOOP_OPT_ZONE_CAPACITY = (1 << 3),
30
ZLOOP_OPT_NR_CONV_ZONES = (1 << 4),
31
ZLOOP_OPT_BASE_DIR = (1 << 5),
32
ZLOOP_OPT_NR_QUEUES = (1 << 6),
33
ZLOOP_OPT_QUEUE_DEPTH = (1 << 7),
34
ZLOOP_OPT_BUFFERED_IO = (1 << 8),
35
ZLOOP_OPT_ZONE_APPEND = (1 << 9),
36
ZLOOP_OPT_ORDERED_ZONE_APPEND = (1 << 10),
37
};
38
39
static const match_table_t zloop_opt_tokens = {
40
{ ZLOOP_OPT_ID, "id=%d" },
41
{ ZLOOP_OPT_CAPACITY, "capacity_mb=%u" },
42
{ ZLOOP_OPT_ZONE_SIZE, "zone_size_mb=%u" },
43
{ ZLOOP_OPT_ZONE_CAPACITY, "zone_capacity_mb=%u" },
44
{ ZLOOP_OPT_NR_CONV_ZONES, "conv_zones=%u" },
45
{ ZLOOP_OPT_BASE_DIR, "base_dir=%s" },
46
{ ZLOOP_OPT_NR_QUEUES, "nr_queues=%u" },
47
{ ZLOOP_OPT_QUEUE_DEPTH, "queue_depth=%u" },
48
{ ZLOOP_OPT_BUFFERED_IO, "buffered_io" },
49
{ ZLOOP_OPT_ZONE_APPEND, "zone_append=%u" },
50
{ ZLOOP_OPT_ORDERED_ZONE_APPEND, "ordered_zone_append" },
51
{ ZLOOP_OPT_ERR, NULL }
52
};
53
54
/* Default values for the "add" operation. */
55
#define ZLOOP_DEF_ID -1
56
#define ZLOOP_DEF_ZONE_SIZE ((256ULL * SZ_1M) >> SECTOR_SHIFT)
57
#define ZLOOP_DEF_NR_ZONES 64
58
#define ZLOOP_DEF_NR_CONV_ZONES 8
59
#define ZLOOP_DEF_BASE_DIR "/var/local/zloop"
60
#define ZLOOP_DEF_NR_QUEUES 1
61
#define ZLOOP_DEF_QUEUE_DEPTH 128
62
#define ZLOOP_DEF_BUFFERED_IO false
63
#define ZLOOP_DEF_ZONE_APPEND true
64
#define ZLOOP_DEF_ORDERED_ZONE_APPEND false
65
66
/* Arbitrary limit on the zone size (16GB). */
67
#define ZLOOP_MAX_ZONE_SIZE_MB 16384
68
69
struct zloop_options {
70
unsigned int mask;
71
int id;
72
sector_t capacity;
73
sector_t zone_size;
74
sector_t zone_capacity;
75
unsigned int nr_conv_zones;
76
char *base_dir;
77
unsigned int nr_queues;
78
unsigned int queue_depth;
79
bool buffered_io;
80
bool zone_append;
81
bool ordered_zone_append;
82
};
83
84
/*
85
* Device states.
86
*/
87
enum {
88
Zlo_creating = 0,
89
Zlo_live,
90
Zlo_deleting,
91
};
92
93
enum zloop_zone_flags {
94
ZLOOP_ZONE_CONV = 0,
95
ZLOOP_ZONE_SEQ_ERROR,
96
};
97
98
struct zloop_zone {
99
struct file *file;
100
101
unsigned long flags;
102
struct mutex lock;
103
spinlock_t wp_lock;
104
enum blk_zone_cond cond;
105
sector_t start;
106
sector_t wp;
107
108
gfp_t old_gfp_mask;
109
};
110
111
struct zloop_device {
112
unsigned int id;
113
unsigned int state;
114
115
struct blk_mq_tag_set tag_set;
116
struct gendisk *disk;
117
118
struct workqueue_struct *workqueue;
119
bool buffered_io;
120
bool zone_append;
121
bool ordered_zone_append;
122
123
const char *base_dir;
124
struct file *data_dir;
125
126
unsigned int zone_shift;
127
sector_t zone_size;
128
sector_t zone_capacity;
129
unsigned int nr_zones;
130
unsigned int nr_conv_zones;
131
unsigned int block_size;
132
133
struct zloop_zone zones[] __counted_by(nr_zones);
134
};
135
136
struct zloop_cmd {
137
struct work_struct work;
138
atomic_t ref;
139
sector_t sector;
140
sector_t nr_sectors;
141
long ret;
142
struct kiocb iocb;
143
struct bio_vec *bvec;
144
};
145
146
static DEFINE_IDR(zloop_index_idr);
147
static DEFINE_MUTEX(zloop_ctl_mutex);
148
149
static unsigned int rq_zone_no(struct request *rq)
150
{
151
struct zloop_device *zlo = rq->q->queuedata;
152
153
return blk_rq_pos(rq) >> zlo->zone_shift;
154
}
155
156
static int zloop_update_seq_zone(struct zloop_device *zlo, unsigned int zone_no)
157
{
158
struct zloop_zone *zone = &zlo->zones[zone_no];
159
struct kstat stat;
160
sector_t file_sectors;
161
unsigned long flags;
162
int ret;
163
164
lockdep_assert_held(&zone->lock);
165
166
ret = vfs_getattr(&zone->file->f_path, &stat, STATX_SIZE, 0);
167
if (ret < 0) {
168
pr_err("Failed to get zone %u file stat (err=%d)\n",
169
zone_no, ret);
170
set_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags);
171
return ret;
172
}
173
174
file_sectors = stat.size >> SECTOR_SHIFT;
175
if (file_sectors > zlo->zone_capacity) {
176
pr_err("Zone %u file too large (%llu sectors > %llu)\n",
177
zone_no, file_sectors, zlo->zone_capacity);
178
return -EINVAL;
179
}
180
181
if (file_sectors & ((zlo->block_size >> SECTOR_SHIFT) - 1)) {
182
pr_err("Zone %u file size not aligned to block size %u\n",
183
zone_no, zlo->block_size);
184
return -EINVAL;
185
}
186
187
spin_lock_irqsave(&zone->wp_lock, flags);
188
if (!file_sectors) {
189
zone->cond = BLK_ZONE_COND_EMPTY;
190
zone->wp = zone->start;
191
} else if (file_sectors == zlo->zone_capacity) {
192
zone->cond = BLK_ZONE_COND_FULL;
193
zone->wp = ULLONG_MAX;
194
} else {
195
zone->cond = BLK_ZONE_COND_CLOSED;
196
zone->wp = zone->start + file_sectors;
197
}
198
spin_unlock_irqrestore(&zone->wp_lock, flags);
199
200
return 0;
201
}
202
203
static int zloop_open_zone(struct zloop_device *zlo, unsigned int zone_no)
204
{
205
struct zloop_zone *zone = &zlo->zones[zone_no];
206
int ret = 0;
207
208
if (test_bit(ZLOOP_ZONE_CONV, &zone->flags))
209
return -EIO;
210
211
mutex_lock(&zone->lock);
212
213
if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) {
214
ret = zloop_update_seq_zone(zlo, zone_no);
215
if (ret)
216
goto unlock;
217
}
218
219
switch (zone->cond) {
220
case BLK_ZONE_COND_EXP_OPEN:
221
break;
222
case BLK_ZONE_COND_EMPTY:
223
case BLK_ZONE_COND_CLOSED:
224
case BLK_ZONE_COND_IMP_OPEN:
225
zone->cond = BLK_ZONE_COND_EXP_OPEN;
226
break;
227
case BLK_ZONE_COND_FULL:
228
default:
229
ret = -EIO;
230
break;
231
}
232
233
unlock:
234
mutex_unlock(&zone->lock);
235
236
return ret;
237
}
238
239
static int zloop_close_zone(struct zloop_device *zlo, unsigned int zone_no)
240
{
241
struct zloop_zone *zone = &zlo->zones[zone_no];
242
unsigned long flags;
243
int ret = 0;
244
245
if (test_bit(ZLOOP_ZONE_CONV, &zone->flags))
246
return -EIO;
247
248
mutex_lock(&zone->lock);
249
250
if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) {
251
ret = zloop_update_seq_zone(zlo, zone_no);
252
if (ret)
253
goto unlock;
254
}
255
256
switch (zone->cond) {
257
case BLK_ZONE_COND_CLOSED:
258
break;
259
case BLK_ZONE_COND_IMP_OPEN:
260
case BLK_ZONE_COND_EXP_OPEN:
261
spin_lock_irqsave(&zone->wp_lock, flags);
262
if (zone->wp == zone->start)
263
zone->cond = BLK_ZONE_COND_EMPTY;
264
else
265
zone->cond = BLK_ZONE_COND_CLOSED;
266
spin_unlock_irqrestore(&zone->wp_lock, flags);
267
break;
268
case BLK_ZONE_COND_EMPTY:
269
case BLK_ZONE_COND_FULL:
270
default:
271
ret = -EIO;
272
break;
273
}
274
275
unlock:
276
mutex_unlock(&zone->lock);
277
278
return ret;
279
}
280
281
static int zloop_reset_zone(struct zloop_device *zlo, unsigned int zone_no)
282
{
283
struct zloop_zone *zone = &zlo->zones[zone_no];
284
unsigned long flags;
285
int ret = 0;
286
287
if (test_bit(ZLOOP_ZONE_CONV, &zone->flags))
288
return -EIO;
289
290
mutex_lock(&zone->lock);
291
292
if (!test_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags) &&
293
zone->cond == BLK_ZONE_COND_EMPTY)
294
goto unlock;
295
296
if (vfs_truncate(&zone->file->f_path, 0)) {
297
set_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags);
298
ret = -EIO;
299
goto unlock;
300
}
301
302
spin_lock_irqsave(&zone->wp_lock, flags);
303
zone->cond = BLK_ZONE_COND_EMPTY;
304
zone->wp = zone->start;
305
clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags);
306
spin_unlock_irqrestore(&zone->wp_lock, flags);
307
308
unlock:
309
mutex_unlock(&zone->lock);
310
311
return ret;
312
}
313
314
static int zloop_reset_all_zones(struct zloop_device *zlo)
315
{
316
unsigned int i;
317
int ret;
318
319
for (i = zlo->nr_conv_zones; i < zlo->nr_zones; i++) {
320
ret = zloop_reset_zone(zlo, i);
321
if (ret)
322
return ret;
323
}
324
325
return 0;
326
}
327
328
static int zloop_finish_zone(struct zloop_device *zlo, unsigned int zone_no)
329
{
330
struct zloop_zone *zone = &zlo->zones[zone_no];
331
unsigned long flags;
332
int ret = 0;
333
334
if (test_bit(ZLOOP_ZONE_CONV, &zone->flags))
335
return -EIO;
336
337
mutex_lock(&zone->lock);
338
339
if (!test_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags) &&
340
zone->cond == BLK_ZONE_COND_FULL)
341
goto unlock;
342
343
if (vfs_truncate(&zone->file->f_path, zlo->zone_size << SECTOR_SHIFT)) {
344
set_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags);
345
ret = -EIO;
346
goto unlock;
347
}
348
349
spin_lock_irqsave(&zone->wp_lock, flags);
350
zone->cond = BLK_ZONE_COND_FULL;
351
zone->wp = ULLONG_MAX;
352
clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags);
353
spin_unlock_irqrestore(&zone->wp_lock, flags);
354
355
unlock:
356
mutex_unlock(&zone->lock);
357
358
return ret;
359
}
360
361
static void zloop_put_cmd(struct zloop_cmd *cmd)
362
{
363
struct request *rq = blk_mq_rq_from_pdu(cmd);
364
365
if (!atomic_dec_and_test(&cmd->ref))
366
return;
367
kfree(cmd->bvec);
368
cmd->bvec = NULL;
369
if (likely(!blk_should_fake_timeout(rq->q)))
370
blk_mq_complete_request(rq);
371
}
372
373
static void zloop_rw_complete(struct kiocb *iocb, long ret)
374
{
375
struct zloop_cmd *cmd = container_of(iocb, struct zloop_cmd, iocb);
376
377
cmd->ret = ret;
378
zloop_put_cmd(cmd);
379
}
380
381
static void zloop_rw(struct zloop_cmd *cmd)
382
{
383
struct request *rq = blk_mq_rq_from_pdu(cmd);
384
struct zloop_device *zlo = rq->q->queuedata;
385
unsigned int zone_no = rq_zone_no(rq);
386
sector_t sector = blk_rq_pos(rq);
387
sector_t nr_sectors = blk_rq_sectors(rq);
388
bool is_append = req_op(rq) == REQ_OP_ZONE_APPEND;
389
bool is_write = req_op(rq) == REQ_OP_WRITE || is_append;
390
int rw = is_write ? ITER_SOURCE : ITER_DEST;
391
struct req_iterator rq_iter;
392
struct zloop_zone *zone;
393
struct iov_iter iter;
394
struct bio_vec tmp;
395
unsigned long flags;
396
sector_t zone_end;
397
unsigned int nr_bvec;
398
int ret;
399
400
atomic_set(&cmd->ref, 2);
401
cmd->sector = sector;
402
cmd->nr_sectors = nr_sectors;
403
cmd->ret = 0;
404
405
if (WARN_ON_ONCE(is_append && !zlo->zone_append)) {
406
ret = -EIO;
407
goto out;
408
}
409
410
/* We should never get an I/O beyond the device capacity. */
411
if (WARN_ON_ONCE(zone_no >= zlo->nr_zones)) {
412
ret = -EIO;
413
goto out;
414
}
415
zone = &zlo->zones[zone_no];
416
zone_end = zone->start + zlo->zone_capacity;
417
418
/*
419
* The block layer should never send requests that are not fully
420
* contained within the zone.
421
*/
422
if (WARN_ON_ONCE(sector + nr_sectors > zone->start + zlo->zone_size)) {
423
ret = -EIO;
424
goto out;
425
}
426
427
if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) {
428
mutex_lock(&zone->lock);
429
ret = zloop_update_seq_zone(zlo, zone_no);
430
mutex_unlock(&zone->lock);
431
if (ret)
432
goto out;
433
}
434
435
if (!test_bit(ZLOOP_ZONE_CONV, &zone->flags) && is_write) {
436
mutex_lock(&zone->lock);
437
438
spin_lock_irqsave(&zone->wp_lock, flags);
439
440
/*
441
* Zone append operations always go at the current write
442
* pointer, but regular write operations must already be
443
* aligned to the write pointer when submitted.
444
*/
445
if (is_append) {
446
/*
447
* If ordered zone append is in use, we already checked
448
* and set the target sector in zloop_queue_rq().
449
*/
450
if (!zlo->ordered_zone_append) {
451
if (zone->cond == BLK_ZONE_COND_FULL ||
452
zone->wp + nr_sectors > zone_end) {
453
spin_unlock_irqrestore(&zone->wp_lock,
454
flags);
455
ret = -EIO;
456
goto unlock;
457
}
458
sector = zone->wp;
459
}
460
cmd->sector = sector;
461
} else if (sector != zone->wp) {
462
spin_unlock_irqrestore(&zone->wp_lock, flags);
463
pr_err("Zone %u: unaligned write: sect %llu, wp %llu\n",
464
zone_no, sector, zone->wp);
465
ret = -EIO;
466
goto unlock;
467
}
468
469
/* Implicitly open the target zone. */
470
if (zone->cond == BLK_ZONE_COND_CLOSED ||
471
zone->cond == BLK_ZONE_COND_EMPTY)
472
zone->cond = BLK_ZONE_COND_IMP_OPEN;
473
474
/*
475
* Advance the write pointer, unless ordered zone append is in
476
* use. If the write fails, the write pointer position will be
477
* corrected when the next I/O starts execution.
478
*/
479
if (!is_append || !zlo->ordered_zone_append) {
480
zone->wp += nr_sectors;
481
if (zone->wp == zone_end) {
482
zone->cond = BLK_ZONE_COND_FULL;
483
zone->wp = ULLONG_MAX;
484
}
485
}
486
487
spin_unlock_irqrestore(&zone->wp_lock, flags);
488
}
489
490
nr_bvec = blk_rq_nr_bvec(rq);
491
492
if (rq->bio != rq->biotail) {
493
struct bio_vec *bvec;
494
495
cmd->bvec = kmalloc_array(nr_bvec, sizeof(*cmd->bvec), GFP_NOIO);
496
if (!cmd->bvec) {
497
ret = -EIO;
498
goto unlock;
499
}
500
501
/*
502
* The bios of the request may be started from the middle of
503
* the 'bvec' because of bio splitting, so we can't directly
504
* copy bio->bi_iov_vec to new bvec. The rq_for_each_bvec
505
* API will take care of all details for us.
506
*/
507
bvec = cmd->bvec;
508
rq_for_each_bvec(tmp, rq, rq_iter) {
509
*bvec = tmp;
510
bvec++;
511
}
512
iov_iter_bvec(&iter, rw, cmd->bvec, nr_bvec, blk_rq_bytes(rq));
513
} else {
514
/*
515
* Same here, this bio may be started from the middle of the
516
* 'bvec' because of bio splitting, so offset from the bvec
517
* must be passed to iov iterator
518
*/
519
iov_iter_bvec(&iter, rw,
520
__bvec_iter_bvec(rq->bio->bi_io_vec, rq->bio->bi_iter),
521
nr_bvec, blk_rq_bytes(rq));
522
iter.iov_offset = rq->bio->bi_iter.bi_bvec_done;
523
}
524
525
cmd->iocb.ki_pos = (sector - zone->start) << SECTOR_SHIFT;
526
cmd->iocb.ki_filp = zone->file;
527
cmd->iocb.ki_complete = zloop_rw_complete;
528
if (!zlo->buffered_io)
529
cmd->iocb.ki_flags = IOCB_DIRECT;
530
cmd->iocb.ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
531
532
if (rw == ITER_SOURCE)
533
ret = zone->file->f_op->write_iter(&cmd->iocb, &iter);
534
else
535
ret = zone->file->f_op->read_iter(&cmd->iocb, &iter);
536
unlock:
537
if (!test_bit(ZLOOP_ZONE_CONV, &zone->flags) && is_write)
538
mutex_unlock(&zone->lock);
539
out:
540
if (ret != -EIOCBQUEUED)
541
zloop_rw_complete(&cmd->iocb, ret);
542
zloop_put_cmd(cmd);
543
}
544
545
static void zloop_handle_cmd(struct zloop_cmd *cmd)
546
{
547
struct request *rq = blk_mq_rq_from_pdu(cmd);
548
struct zloop_device *zlo = rq->q->queuedata;
549
550
/* We can block in this context, so ignore REQ_NOWAIT. */
551
if (rq->cmd_flags & REQ_NOWAIT)
552
rq->cmd_flags &= ~REQ_NOWAIT;
553
554
switch (req_op(rq)) {
555
case REQ_OP_READ:
556
case REQ_OP_WRITE:
557
case REQ_OP_ZONE_APPEND:
558
/*
559
* zloop_rw() always executes asynchronously or completes
560
* directly.
561
*/
562
zloop_rw(cmd);
563
return;
564
case REQ_OP_FLUSH:
565
/*
566
* Sync the entire FS containing the zone files instead of
567
* walking all files
568
*/
569
cmd->ret = sync_filesystem(file_inode(zlo->data_dir)->i_sb);
570
break;
571
case REQ_OP_ZONE_RESET:
572
cmd->ret = zloop_reset_zone(zlo, rq_zone_no(rq));
573
break;
574
case REQ_OP_ZONE_RESET_ALL:
575
cmd->ret = zloop_reset_all_zones(zlo);
576
break;
577
case REQ_OP_ZONE_FINISH:
578
cmd->ret = zloop_finish_zone(zlo, rq_zone_no(rq));
579
break;
580
case REQ_OP_ZONE_OPEN:
581
cmd->ret = zloop_open_zone(zlo, rq_zone_no(rq));
582
break;
583
case REQ_OP_ZONE_CLOSE:
584
cmd->ret = zloop_close_zone(zlo, rq_zone_no(rq));
585
break;
586
default:
587
WARN_ON_ONCE(1);
588
pr_err("Unsupported operation %d\n", req_op(rq));
589
cmd->ret = -EOPNOTSUPP;
590
break;
591
}
592
593
blk_mq_complete_request(rq);
594
}
595
596
static void zloop_cmd_workfn(struct work_struct *work)
597
{
598
struct zloop_cmd *cmd = container_of(work, struct zloop_cmd, work);
599
int orig_flags = current->flags;
600
601
current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO;
602
zloop_handle_cmd(cmd);
603
current->flags = orig_flags;
604
}
605
606
static void zloop_complete_rq(struct request *rq)
607
{
608
struct zloop_cmd *cmd = blk_mq_rq_to_pdu(rq);
609
struct zloop_device *zlo = rq->q->queuedata;
610
unsigned int zone_no = cmd->sector >> zlo->zone_shift;
611
struct zloop_zone *zone = &zlo->zones[zone_no];
612
blk_status_t sts = BLK_STS_OK;
613
614
switch (req_op(rq)) {
615
case REQ_OP_READ:
616
if (cmd->ret < 0)
617
pr_err("Zone %u: failed read sector %llu, %llu sectors\n",
618
zone_no, cmd->sector, cmd->nr_sectors);
619
620
if (cmd->ret >= 0 && cmd->ret != blk_rq_bytes(rq)) {
621
/* short read */
622
struct bio *bio;
623
624
__rq_for_each_bio(bio, rq)
625
zero_fill_bio(bio);
626
}
627
break;
628
case REQ_OP_WRITE:
629
case REQ_OP_ZONE_APPEND:
630
if (cmd->ret < 0)
631
pr_err("Zone %u: failed %swrite sector %llu, %llu sectors\n",
632
zone_no,
633
req_op(rq) == REQ_OP_WRITE ? "" : "append ",
634
cmd->sector, cmd->nr_sectors);
635
636
if (cmd->ret >= 0 && cmd->ret != blk_rq_bytes(rq)) {
637
pr_err("Zone %u: partial write %ld/%u B\n",
638
zone_no, cmd->ret, blk_rq_bytes(rq));
639
cmd->ret = -EIO;
640
}
641
642
if (cmd->ret < 0 && !test_bit(ZLOOP_ZONE_CONV, &zone->flags)) {
643
/*
644
* A write to a sequential zone file failed: mark the
645
* zone as having an error. This will be corrected and
646
* cleared when the next IO is submitted.
647
*/
648
set_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags);
649
break;
650
}
651
if (req_op(rq) == REQ_OP_ZONE_APPEND)
652
rq->__sector = cmd->sector;
653
654
break;
655
default:
656
break;
657
}
658
659
if (cmd->ret < 0)
660
sts = errno_to_blk_status(cmd->ret);
661
blk_mq_end_request(rq, sts);
662
}
663
664
static bool zloop_set_zone_append_sector(struct request *rq)
665
{
666
struct zloop_device *zlo = rq->q->queuedata;
667
unsigned int zone_no = rq_zone_no(rq);
668
struct zloop_zone *zone = &zlo->zones[zone_no];
669
sector_t zone_end = zone->start + zlo->zone_capacity;
670
sector_t nr_sectors = blk_rq_sectors(rq);
671
unsigned long flags;
672
673
spin_lock_irqsave(&zone->wp_lock, flags);
674
675
if (zone->cond == BLK_ZONE_COND_FULL ||
676
zone->wp + nr_sectors > zone_end) {
677
spin_unlock_irqrestore(&zone->wp_lock, flags);
678
return false;
679
}
680
681
rq->__sector = zone->wp;
682
zone->wp += blk_rq_sectors(rq);
683
if (zone->wp >= zone_end) {
684
zone->cond = BLK_ZONE_COND_FULL;
685
zone->wp = ULLONG_MAX;
686
}
687
688
spin_unlock_irqrestore(&zone->wp_lock, flags);
689
690
return true;
691
}
692
693
static blk_status_t zloop_queue_rq(struct blk_mq_hw_ctx *hctx,
694
const struct blk_mq_queue_data *bd)
695
{
696
struct request *rq = bd->rq;
697
struct zloop_cmd *cmd = blk_mq_rq_to_pdu(rq);
698
struct zloop_device *zlo = rq->q->queuedata;
699
700
if (data_race(READ_ONCE(zlo->state)) == Zlo_deleting)
701
return BLK_STS_IOERR;
702
703
/*
704
* If we need to strongly order zone append operations, set the request
705
* sector to the zone write pointer location now instead of when the
706
* command work runs.
707
*/
708
if (zlo->ordered_zone_append && req_op(rq) == REQ_OP_ZONE_APPEND) {
709
if (!zloop_set_zone_append_sector(rq))
710
return BLK_STS_IOERR;
711
}
712
713
blk_mq_start_request(rq);
714
715
INIT_WORK(&cmd->work, zloop_cmd_workfn);
716
queue_work(zlo->workqueue, &cmd->work);
717
718
return BLK_STS_OK;
719
}
720
721
static const struct blk_mq_ops zloop_mq_ops = {
722
.queue_rq = zloop_queue_rq,
723
.complete = zloop_complete_rq,
724
};
725
726
static int zloop_open(struct gendisk *disk, blk_mode_t mode)
727
{
728
struct zloop_device *zlo = disk->private_data;
729
int ret;
730
731
ret = mutex_lock_killable(&zloop_ctl_mutex);
732
if (ret)
733
return ret;
734
735
if (zlo->state != Zlo_live)
736
ret = -ENXIO;
737
mutex_unlock(&zloop_ctl_mutex);
738
return ret;
739
}
740
741
static int zloop_report_zones(struct gendisk *disk, sector_t sector,
742
unsigned int nr_zones, struct blk_report_zones_args *args)
743
{
744
struct zloop_device *zlo = disk->private_data;
745
struct blk_zone blkz = {};
746
unsigned int first, i;
747
unsigned long flags;
748
int ret;
749
750
first = disk_zone_no(disk, sector);
751
if (first >= zlo->nr_zones)
752
return 0;
753
nr_zones = min(nr_zones, zlo->nr_zones - first);
754
755
for (i = 0; i < nr_zones; i++) {
756
unsigned int zone_no = first + i;
757
struct zloop_zone *zone = &zlo->zones[zone_no];
758
759
mutex_lock(&zone->lock);
760
761
if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) {
762
ret = zloop_update_seq_zone(zlo, zone_no);
763
if (ret) {
764
mutex_unlock(&zone->lock);
765
return ret;
766
}
767
}
768
769
blkz.start = zone->start;
770
blkz.len = zlo->zone_size;
771
spin_lock_irqsave(&zone->wp_lock, flags);
772
blkz.wp = zone->wp;
773
spin_unlock_irqrestore(&zone->wp_lock, flags);
774
blkz.cond = zone->cond;
775
if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) {
776
blkz.type = BLK_ZONE_TYPE_CONVENTIONAL;
777
blkz.capacity = zlo->zone_size;
778
} else {
779
blkz.type = BLK_ZONE_TYPE_SEQWRITE_REQ;
780
blkz.capacity = zlo->zone_capacity;
781
}
782
783
mutex_unlock(&zone->lock);
784
785
ret = disk_report_zone(disk, &blkz, i, args);
786
if (ret)
787
return ret;
788
}
789
790
return nr_zones;
791
}
792
793
static void zloop_free_disk(struct gendisk *disk)
794
{
795
struct zloop_device *zlo = disk->private_data;
796
unsigned int i;
797
798
blk_mq_free_tag_set(&zlo->tag_set);
799
800
for (i = 0; i < zlo->nr_zones; i++) {
801
struct zloop_zone *zone = &zlo->zones[i];
802
803
mapping_set_gfp_mask(zone->file->f_mapping,
804
zone->old_gfp_mask);
805
fput(zone->file);
806
}
807
808
fput(zlo->data_dir);
809
destroy_workqueue(zlo->workqueue);
810
kfree(zlo->base_dir);
811
kvfree(zlo);
812
}
813
814
static const struct block_device_operations zloop_fops = {
815
.owner = THIS_MODULE,
816
.open = zloop_open,
817
.report_zones = zloop_report_zones,
818
.free_disk = zloop_free_disk,
819
};
820
821
__printf(3, 4)
822
static struct file *zloop_filp_open_fmt(int oflags, umode_t mode,
823
const char *fmt, ...)
824
{
825
struct file *file;
826
va_list ap;
827
char *p;
828
829
va_start(ap, fmt);
830
p = kvasprintf(GFP_KERNEL, fmt, ap);
831
va_end(ap);
832
833
if (!p)
834
return ERR_PTR(-ENOMEM);
835
file = filp_open(p, oflags, mode);
836
kfree(p);
837
return file;
838
}
839
840
static int zloop_get_block_size(struct zloop_device *zlo,
841
struct zloop_zone *zone)
842
{
843
struct block_device *sb_bdev = zone->file->f_mapping->host->i_sb->s_bdev;
844
struct kstat st;
845
846
/*
847
* If the FS block size is lower than or equal to 4K, use that as the
848
* device block size. Otherwise, fallback to the FS direct IO alignment
849
* constraint if that is provided, and to the FS underlying device
850
* physical block size if the direct IO alignment is unknown.
851
*/
852
if (file_inode(zone->file)->i_sb->s_blocksize <= SZ_4K)
853
zlo->block_size = file_inode(zone->file)->i_sb->s_blocksize;
854
else if (!vfs_getattr(&zone->file->f_path, &st, STATX_DIOALIGN, 0) &&
855
(st.result_mask & STATX_DIOALIGN))
856
zlo->block_size = st.dio_offset_align;
857
else if (sb_bdev)
858
zlo->block_size = bdev_physical_block_size(sb_bdev);
859
else
860
zlo->block_size = SECTOR_SIZE;
861
862
if (zlo->zone_capacity & ((zlo->block_size >> SECTOR_SHIFT) - 1)) {
863
pr_err("Zone capacity is not aligned to block size %u\n",
864
zlo->block_size);
865
return -EINVAL;
866
}
867
868
return 0;
869
}
870
871
static int zloop_init_zone(struct zloop_device *zlo, struct zloop_options *opts,
872
unsigned int zone_no, bool restore)
873
{
874
struct zloop_zone *zone = &zlo->zones[zone_no];
875
int oflags = O_RDWR;
876
struct kstat stat;
877
sector_t file_sectors;
878
int ret;
879
880
mutex_init(&zone->lock);
881
spin_lock_init(&zone->wp_lock);
882
zone->start = (sector_t)zone_no << zlo->zone_shift;
883
884
if (!restore)
885
oflags |= O_CREAT;
886
887
if (!opts->buffered_io)
888
oflags |= O_DIRECT;
889
890
if (zone_no < zlo->nr_conv_zones) {
891
/* Conventional zone file. */
892
set_bit(ZLOOP_ZONE_CONV, &zone->flags);
893
zone->cond = BLK_ZONE_COND_NOT_WP;
894
zone->wp = U64_MAX;
895
896
zone->file = zloop_filp_open_fmt(oflags, 0600, "%s/%u/cnv-%06u",
897
zlo->base_dir, zlo->id, zone_no);
898
if (IS_ERR(zone->file)) {
899
pr_err("Failed to open zone %u file %s/%u/cnv-%06u (err=%ld)",
900
zone_no, zlo->base_dir, zlo->id, zone_no,
901
PTR_ERR(zone->file));
902
return PTR_ERR(zone->file);
903
}
904
905
if (!zlo->block_size) {
906
ret = zloop_get_block_size(zlo, zone);
907
if (ret)
908
return ret;
909
}
910
911
ret = vfs_getattr(&zone->file->f_path, &stat, STATX_SIZE, 0);
912
if (ret < 0) {
913
pr_err("Failed to get zone %u file stat\n", zone_no);
914
return ret;
915
}
916
file_sectors = stat.size >> SECTOR_SHIFT;
917
918
if (restore && file_sectors != zlo->zone_size) {
919
pr_err("Invalid conventional zone %u file size (%llu sectors != %llu)\n",
920
zone_no, file_sectors, zlo->zone_capacity);
921
return ret;
922
}
923
924
ret = vfs_truncate(&zone->file->f_path,
925
zlo->zone_size << SECTOR_SHIFT);
926
if (ret < 0) {
927
pr_err("Failed to truncate zone %u file (err=%d)\n",
928
zone_no, ret);
929
return ret;
930
}
931
932
return 0;
933
}
934
935
/* Sequential zone file. */
936
zone->file = zloop_filp_open_fmt(oflags, 0600, "%s/%u/seq-%06u",
937
zlo->base_dir, zlo->id, zone_no);
938
if (IS_ERR(zone->file)) {
939
pr_err("Failed to open zone %u file %s/%u/seq-%06u (err=%ld)",
940
zone_no, zlo->base_dir, zlo->id, zone_no,
941
PTR_ERR(zone->file));
942
return PTR_ERR(zone->file);
943
}
944
945
if (!zlo->block_size) {
946
ret = zloop_get_block_size(zlo, zone);
947
if (ret)
948
return ret;
949
}
950
951
zloop_get_block_size(zlo, zone);
952
953
mutex_lock(&zone->lock);
954
ret = zloop_update_seq_zone(zlo, zone_no);
955
mutex_unlock(&zone->lock);
956
957
return ret;
958
}
959
960
static bool zloop_dev_exists(struct zloop_device *zlo)
961
{
962
struct file *cnv, *seq;
963
bool exists;
964
965
cnv = zloop_filp_open_fmt(O_RDONLY, 0600, "%s/%u/cnv-%06u",
966
zlo->base_dir, zlo->id, 0);
967
seq = zloop_filp_open_fmt(O_RDONLY, 0600, "%s/%u/seq-%06u",
968
zlo->base_dir, zlo->id, 0);
969
exists = !IS_ERR(cnv) || !IS_ERR(seq);
970
971
if (!IS_ERR(cnv))
972
fput(cnv);
973
if (!IS_ERR(seq))
974
fput(seq);
975
976
return exists;
977
}
978
979
static int zloop_ctl_add(struct zloop_options *opts)
980
{
981
struct queue_limits lim = {
982
.max_hw_sectors = SZ_1M >> SECTOR_SHIFT,
983
.chunk_sectors = opts->zone_size,
984
.features = BLK_FEAT_ZONED,
985
};
986
unsigned int nr_zones, i, j;
987
struct zloop_device *zlo;
988
int ret = -EINVAL;
989
bool restore;
990
991
__module_get(THIS_MODULE);
992
993
nr_zones = opts->capacity >> ilog2(opts->zone_size);
994
if (opts->nr_conv_zones >= nr_zones) {
995
pr_err("Invalid number of conventional zones %u\n",
996
opts->nr_conv_zones);
997
goto out;
998
}
999
1000
zlo = kvzalloc(struct_size(zlo, zones, nr_zones), GFP_KERNEL);
1001
if (!zlo) {
1002
ret = -ENOMEM;
1003
goto out;
1004
}
1005
WRITE_ONCE(zlo->state, Zlo_creating);
1006
1007
ret = mutex_lock_killable(&zloop_ctl_mutex);
1008
if (ret)
1009
goto out_free_dev;
1010
1011
/* Allocate id, if @opts->id >= 0, we're requesting that specific id */
1012
if (opts->id >= 0) {
1013
ret = idr_alloc(&zloop_index_idr, zlo,
1014
opts->id, opts->id + 1, GFP_KERNEL);
1015
if (ret == -ENOSPC)
1016
ret = -EEXIST;
1017
} else {
1018
ret = idr_alloc(&zloop_index_idr, zlo, 0, 0, GFP_KERNEL);
1019
}
1020
mutex_unlock(&zloop_ctl_mutex);
1021
if (ret < 0)
1022
goto out_free_dev;
1023
1024
zlo->id = ret;
1025
zlo->zone_shift = ilog2(opts->zone_size);
1026
zlo->zone_size = opts->zone_size;
1027
if (opts->zone_capacity)
1028
zlo->zone_capacity = opts->zone_capacity;
1029
else
1030
zlo->zone_capacity = zlo->zone_size;
1031
zlo->nr_zones = nr_zones;
1032
zlo->nr_conv_zones = opts->nr_conv_zones;
1033
zlo->buffered_io = opts->buffered_io;
1034
zlo->zone_append = opts->zone_append;
1035
if (zlo->zone_append)
1036
zlo->ordered_zone_append = opts->ordered_zone_append;
1037
1038
zlo->workqueue = alloc_workqueue("zloop%d", WQ_UNBOUND | WQ_FREEZABLE,
1039
opts->nr_queues * opts->queue_depth, zlo->id);
1040
if (!zlo->workqueue) {
1041
ret = -ENOMEM;
1042
goto out_free_idr;
1043
}
1044
1045
if (opts->base_dir)
1046
zlo->base_dir = kstrdup(opts->base_dir, GFP_KERNEL);
1047
else
1048
zlo->base_dir = kstrdup(ZLOOP_DEF_BASE_DIR, GFP_KERNEL);
1049
if (!zlo->base_dir) {
1050
ret = -ENOMEM;
1051
goto out_destroy_workqueue;
1052
}
1053
1054
zlo->data_dir = zloop_filp_open_fmt(O_RDONLY | O_DIRECTORY, 0, "%s/%u",
1055
zlo->base_dir, zlo->id);
1056
if (IS_ERR(zlo->data_dir)) {
1057
ret = PTR_ERR(zlo->data_dir);
1058
pr_warn("Failed to open directory %s/%u (err=%d)\n",
1059
zlo->base_dir, zlo->id, ret);
1060
goto out_free_base_dir;
1061
}
1062
1063
/*
1064
* If we already have zone files, we are restoring a device created by a
1065
* previous add operation. In this case, zloop_init_zone() will check
1066
* that the zone files are consistent with the zone configuration given.
1067
*/
1068
restore = zloop_dev_exists(zlo);
1069
for (i = 0; i < nr_zones; i++) {
1070
ret = zloop_init_zone(zlo, opts, i, restore);
1071
if (ret)
1072
goto out_close_files;
1073
}
1074
1075
lim.physical_block_size = zlo->block_size;
1076
lim.logical_block_size = zlo->block_size;
1077
if (zlo->zone_append)
1078
lim.max_hw_zone_append_sectors = lim.max_hw_sectors;
1079
1080
zlo->tag_set.ops = &zloop_mq_ops;
1081
zlo->tag_set.nr_hw_queues = opts->nr_queues;
1082
zlo->tag_set.queue_depth = opts->queue_depth;
1083
zlo->tag_set.numa_node = NUMA_NO_NODE;
1084
zlo->tag_set.cmd_size = sizeof(struct zloop_cmd);
1085
zlo->tag_set.driver_data = zlo;
1086
1087
ret = blk_mq_alloc_tag_set(&zlo->tag_set);
1088
if (ret) {
1089
pr_err("blk_mq_alloc_tag_set failed (err=%d)\n", ret);
1090
goto out_close_files;
1091
}
1092
1093
zlo->disk = blk_mq_alloc_disk(&zlo->tag_set, &lim, zlo);
1094
if (IS_ERR(zlo->disk)) {
1095
pr_err("blk_mq_alloc_disk failed (err=%d)\n", ret);
1096
ret = PTR_ERR(zlo->disk);
1097
goto out_cleanup_tags;
1098
}
1099
zlo->disk->flags = GENHD_FL_NO_PART;
1100
zlo->disk->fops = &zloop_fops;
1101
zlo->disk->private_data = zlo;
1102
sprintf(zlo->disk->disk_name, "zloop%d", zlo->id);
1103
set_capacity(zlo->disk, (u64)lim.chunk_sectors * zlo->nr_zones);
1104
1105
ret = blk_revalidate_disk_zones(zlo->disk);
1106
if (ret)
1107
goto out_cleanup_disk;
1108
1109
ret = add_disk(zlo->disk);
1110
if (ret) {
1111
pr_err("add_disk failed (err=%d)\n", ret);
1112
goto out_cleanup_disk;
1113
}
1114
1115
mutex_lock(&zloop_ctl_mutex);
1116
WRITE_ONCE(zlo->state, Zlo_live);
1117
mutex_unlock(&zloop_ctl_mutex);
1118
1119
pr_info("zloop: device %d, %u zones of %llu MiB, %u B block size\n",
1120
zlo->id, zlo->nr_zones,
1121
((sector_t)zlo->zone_size << SECTOR_SHIFT) >> 20,
1122
zlo->block_size);
1123
pr_info("zloop%d: using %s%s zone append\n",
1124
zlo->id,
1125
zlo->ordered_zone_append ? "ordered " : "",
1126
zlo->zone_append ? "native" : "emulated");
1127
1128
return 0;
1129
1130
out_cleanup_disk:
1131
put_disk(zlo->disk);
1132
out_cleanup_tags:
1133
blk_mq_free_tag_set(&zlo->tag_set);
1134
out_close_files:
1135
for (j = 0; j < i; j++) {
1136
struct zloop_zone *zone = &zlo->zones[j];
1137
1138
if (!IS_ERR_OR_NULL(zone->file))
1139
fput(zone->file);
1140
}
1141
fput(zlo->data_dir);
1142
out_free_base_dir:
1143
kfree(zlo->base_dir);
1144
out_destroy_workqueue:
1145
destroy_workqueue(zlo->workqueue);
1146
out_free_idr:
1147
mutex_lock(&zloop_ctl_mutex);
1148
idr_remove(&zloop_index_idr, zlo->id);
1149
mutex_unlock(&zloop_ctl_mutex);
1150
out_free_dev:
1151
kvfree(zlo);
1152
out:
1153
module_put(THIS_MODULE);
1154
if (ret == -ENOENT)
1155
ret = -EINVAL;
1156
return ret;
1157
}
1158
1159
static int zloop_ctl_remove(struct zloop_options *opts)
1160
{
1161
struct zloop_device *zlo;
1162
int ret;
1163
1164
if (!(opts->mask & ZLOOP_OPT_ID)) {
1165
pr_err("No ID specified\n");
1166
return -EINVAL;
1167
}
1168
1169
ret = mutex_lock_killable(&zloop_ctl_mutex);
1170
if (ret)
1171
return ret;
1172
1173
zlo = idr_find(&zloop_index_idr, opts->id);
1174
if (!zlo || zlo->state == Zlo_creating) {
1175
ret = -ENODEV;
1176
} else if (zlo->state == Zlo_deleting) {
1177
ret = -EINVAL;
1178
} else {
1179
idr_remove(&zloop_index_idr, zlo->id);
1180
WRITE_ONCE(zlo->state, Zlo_deleting);
1181
}
1182
1183
mutex_unlock(&zloop_ctl_mutex);
1184
if (ret)
1185
return ret;
1186
1187
del_gendisk(zlo->disk);
1188
put_disk(zlo->disk);
1189
1190
pr_info("Removed device %d\n", opts->id);
1191
1192
module_put(THIS_MODULE);
1193
1194
return 0;
1195
}
1196
1197
static int zloop_parse_options(struct zloop_options *opts, const char *buf)
1198
{
1199
substring_t args[MAX_OPT_ARGS];
1200
char *options, *o, *p;
1201
unsigned int token;
1202
int ret = 0;
1203
1204
/* Set defaults. */
1205
opts->mask = 0;
1206
opts->id = ZLOOP_DEF_ID;
1207
opts->capacity = ZLOOP_DEF_ZONE_SIZE * ZLOOP_DEF_NR_ZONES;
1208
opts->zone_size = ZLOOP_DEF_ZONE_SIZE;
1209
opts->nr_conv_zones = ZLOOP_DEF_NR_CONV_ZONES;
1210
opts->nr_queues = ZLOOP_DEF_NR_QUEUES;
1211
opts->queue_depth = ZLOOP_DEF_QUEUE_DEPTH;
1212
opts->buffered_io = ZLOOP_DEF_BUFFERED_IO;
1213
opts->zone_append = ZLOOP_DEF_ZONE_APPEND;
1214
opts->ordered_zone_append = ZLOOP_DEF_ORDERED_ZONE_APPEND;
1215
1216
if (!buf)
1217
return 0;
1218
1219
/* Skip leading spaces before the options. */
1220
while (isspace(*buf))
1221
buf++;
1222
1223
options = o = kstrdup(buf, GFP_KERNEL);
1224
if (!options)
1225
return -ENOMEM;
1226
1227
/* Parse the options, doing only some light invalid value checks. */
1228
while ((p = strsep(&o, ",\n")) != NULL) {
1229
if (!*p)
1230
continue;
1231
1232
token = match_token(p, zloop_opt_tokens, args);
1233
opts->mask |= token;
1234
switch (token) {
1235
case ZLOOP_OPT_ID:
1236
if (match_int(args, &opts->id)) {
1237
ret = -EINVAL;
1238
goto out;
1239
}
1240
break;
1241
case ZLOOP_OPT_CAPACITY:
1242
if (match_uint(args, &token)) {
1243
ret = -EINVAL;
1244
goto out;
1245
}
1246
if (!token) {
1247
pr_err("Invalid capacity\n");
1248
ret = -EINVAL;
1249
goto out;
1250
}
1251
opts->capacity =
1252
((sector_t)token * SZ_1M) >> SECTOR_SHIFT;
1253
break;
1254
case ZLOOP_OPT_ZONE_SIZE:
1255
if (match_uint(args, &token)) {
1256
ret = -EINVAL;
1257
goto out;
1258
}
1259
if (!token || token > ZLOOP_MAX_ZONE_SIZE_MB ||
1260
!is_power_of_2(token)) {
1261
pr_err("Invalid zone size %u\n", token);
1262
ret = -EINVAL;
1263
goto out;
1264
}
1265
opts->zone_size =
1266
((sector_t)token * SZ_1M) >> SECTOR_SHIFT;
1267
break;
1268
case ZLOOP_OPT_ZONE_CAPACITY:
1269
if (match_uint(args, &token)) {
1270
ret = -EINVAL;
1271
goto out;
1272
}
1273
if (!token) {
1274
pr_err("Invalid zone capacity\n");
1275
ret = -EINVAL;
1276
goto out;
1277
}
1278
opts->zone_capacity =
1279
((sector_t)token * SZ_1M) >> SECTOR_SHIFT;
1280
break;
1281
case ZLOOP_OPT_NR_CONV_ZONES:
1282
if (match_uint(args, &token)) {
1283
ret = -EINVAL;
1284
goto out;
1285
}
1286
opts->nr_conv_zones = token;
1287
break;
1288
case ZLOOP_OPT_BASE_DIR:
1289
p = match_strdup(args);
1290
if (!p) {
1291
ret = -ENOMEM;
1292
goto out;
1293
}
1294
kfree(opts->base_dir);
1295
opts->base_dir = p;
1296
break;
1297
case ZLOOP_OPT_NR_QUEUES:
1298
if (match_uint(args, &token)) {
1299
ret = -EINVAL;
1300
goto out;
1301
}
1302
if (!token) {
1303
pr_err("Invalid number of queues\n");
1304
ret = -EINVAL;
1305
goto out;
1306
}
1307
opts->nr_queues = min(token, num_online_cpus());
1308
break;
1309
case ZLOOP_OPT_QUEUE_DEPTH:
1310
if (match_uint(args, &token)) {
1311
ret = -EINVAL;
1312
goto out;
1313
}
1314
if (!token) {
1315
pr_err("Invalid queue depth\n");
1316
ret = -EINVAL;
1317
goto out;
1318
}
1319
opts->queue_depth = token;
1320
break;
1321
case ZLOOP_OPT_BUFFERED_IO:
1322
opts->buffered_io = true;
1323
break;
1324
case ZLOOP_OPT_ZONE_APPEND:
1325
if (match_uint(args, &token)) {
1326
ret = -EINVAL;
1327
goto out;
1328
}
1329
if (token != 0 && token != 1) {
1330
pr_err("Invalid zone_append value\n");
1331
ret = -EINVAL;
1332
goto out;
1333
}
1334
opts->zone_append = token;
1335
break;
1336
case ZLOOP_OPT_ORDERED_ZONE_APPEND:
1337
opts->ordered_zone_append = true;
1338
break;
1339
case ZLOOP_OPT_ERR:
1340
default:
1341
pr_warn("unknown parameter or missing value '%s'\n", p);
1342
ret = -EINVAL;
1343
goto out;
1344
}
1345
}
1346
1347
ret = -EINVAL;
1348
if (opts->capacity <= opts->zone_size) {
1349
pr_err("Invalid capacity\n");
1350
goto out;
1351
}
1352
1353
if (opts->zone_capacity > opts->zone_size) {
1354
pr_err("Invalid zone capacity\n");
1355
goto out;
1356
}
1357
1358
ret = 0;
1359
out:
1360
kfree(options);
1361
return ret;
1362
}
1363
1364
enum {
1365
ZLOOP_CTL_ADD,
1366
ZLOOP_CTL_REMOVE,
1367
};
1368
1369
static struct zloop_ctl_op {
1370
int code;
1371
const char *name;
1372
} zloop_ctl_ops[] = {
1373
{ ZLOOP_CTL_ADD, "add" },
1374
{ ZLOOP_CTL_REMOVE, "remove" },
1375
{ -1, NULL },
1376
};
1377
1378
static ssize_t zloop_ctl_write(struct file *file, const char __user *ubuf,
1379
size_t count, loff_t *pos)
1380
{
1381
struct zloop_options opts = { };
1382
struct zloop_ctl_op *op;
1383
const char *buf, *opts_buf;
1384
int i, ret;
1385
1386
if (count > PAGE_SIZE)
1387
return -ENOMEM;
1388
1389
buf = memdup_user_nul(ubuf, count);
1390
if (IS_ERR(buf))
1391
return PTR_ERR(buf);
1392
1393
for (i = 0; i < ARRAY_SIZE(zloop_ctl_ops); i++) {
1394
op = &zloop_ctl_ops[i];
1395
if (!op->name) {
1396
pr_err("Invalid operation\n");
1397
ret = -EINVAL;
1398
goto out;
1399
}
1400
if (!strncmp(buf, op->name, strlen(op->name)))
1401
break;
1402
}
1403
1404
if (count <= strlen(op->name))
1405
opts_buf = NULL;
1406
else
1407
opts_buf = buf + strlen(op->name);
1408
1409
ret = zloop_parse_options(&opts, opts_buf);
1410
if (ret) {
1411
pr_err("Failed to parse options\n");
1412
goto out;
1413
}
1414
1415
switch (op->code) {
1416
case ZLOOP_CTL_ADD:
1417
ret = zloop_ctl_add(&opts);
1418
break;
1419
case ZLOOP_CTL_REMOVE:
1420
ret = zloop_ctl_remove(&opts);
1421
break;
1422
default:
1423
pr_err("Invalid operation\n");
1424
ret = -EINVAL;
1425
goto out;
1426
}
1427
1428
out:
1429
kfree(opts.base_dir);
1430
kfree(buf);
1431
return ret ? ret : count;
1432
}
1433
1434
static int zloop_ctl_show(struct seq_file *seq_file, void *private)
1435
{
1436
const struct match_token *tok;
1437
int i;
1438
1439
/* Add operation */
1440
seq_printf(seq_file, "%s ", zloop_ctl_ops[0].name);
1441
for (i = 0; i < ARRAY_SIZE(zloop_opt_tokens); i++) {
1442
tok = &zloop_opt_tokens[i];
1443
if (!tok->pattern)
1444
break;
1445
if (i)
1446
seq_putc(seq_file, ',');
1447
seq_puts(seq_file, tok->pattern);
1448
}
1449
seq_putc(seq_file, '\n');
1450
1451
/* Remove operation */
1452
seq_puts(seq_file, zloop_ctl_ops[1].name);
1453
seq_puts(seq_file, " id=%d\n");
1454
1455
return 0;
1456
}
1457
1458
static int zloop_ctl_open(struct inode *inode, struct file *file)
1459
{
1460
file->private_data = NULL;
1461
return single_open(file, zloop_ctl_show, NULL);
1462
}
1463
1464
static int zloop_ctl_release(struct inode *inode, struct file *file)
1465
{
1466
return single_release(inode, file);
1467
}
1468
1469
static const struct file_operations zloop_ctl_fops = {
1470
.owner = THIS_MODULE,
1471
.open = zloop_ctl_open,
1472
.release = zloop_ctl_release,
1473
.write = zloop_ctl_write,
1474
.read = seq_read,
1475
};
1476
1477
static struct miscdevice zloop_misc = {
1478
.minor = MISC_DYNAMIC_MINOR,
1479
.name = "zloop-control",
1480
.fops = &zloop_ctl_fops,
1481
};
1482
1483
static int __init zloop_init(void)
1484
{
1485
int ret;
1486
1487
ret = misc_register(&zloop_misc);
1488
if (ret) {
1489
pr_err("Failed to register misc device: %d\n", ret);
1490
return ret;
1491
}
1492
pr_info("Module loaded\n");
1493
1494
return 0;
1495
}
1496
1497
static void __exit zloop_exit(void)
1498
{
1499
misc_deregister(&zloop_misc);
1500
idr_destroy(&zloop_index_idr);
1501
}
1502
1503
module_init(zloop_init);
1504
module_exit(zloop_exit);
1505
1506
MODULE_DESCRIPTION("Zoned loopback device");
1507
MODULE_LICENSE("GPL");
1508
1509