Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/drivers/block/osdblk.c
15111 views
1
2
/*
3
osdblk.c -- Export a single SCSI OSD object as a Linux block device
4
5
6
Copyright 2009 Red Hat, Inc.
7
8
This program is free software; you can redistribute it and/or modify
9
it under the terms of the GNU General Public License as published by
10
the Free Software Foundation.
11
12
This program is distributed in the hope that it will be useful,
13
but WITHOUT ANY WARRANTY; without even the implied warranty of
14
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
GNU General Public License for more details.
16
17
You should have received a copy of the GNU General Public License
18
along with this program; see the file COPYING. If not, write to
19
the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
20
21
22
Instructions for use
23
--------------------
24
25
1) Map a Linux block device to an existing OSD object.
26
27
In this example, we will use partition id 1234, object id 5678,
28
OSD device /dev/osd1.
29
30
$ echo "1234 5678 /dev/osd1" > /sys/class/osdblk/add
31
32
33
2) List all active blkdev<->object mappings.
34
35
In this example, we have performed step #1 twice, creating two blkdevs,
36
mapped to two separate OSD objects.
37
38
$ cat /sys/class/osdblk/list
39
0 174 1234 5678 /dev/osd1
40
1 179 1994 897123 /dev/osd0
41
42
The columns, in order, are:
43
- blkdev unique id
44
- blkdev assigned major
45
- OSD object partition id
46
- OSD object id
47
- OSD device
48
49
50
3) Remove an active blkdev<->object mapping.
51
52
In this example, we remove the mapping with blkdev unique id 1.
53
54
$ echo 1 > /sys/class/osdblk/remove
55
56
57
NOTE: The actual creation and deletion of OSD objects is outside the scope
58
of this driver.
59
60
*/
61
62
#include <linux/kernel.h>
63
#include <linux/device.h>
64
#include <linux/module.h>
65
#include <linux/fs.h>
66
#include <linux/slab.h>
67
#include <scsi/osd_initiator.h>
68
#include <scsi/osd_attributes.h>
69
#include <scsi/osd_sec.h>
70
#include <scsi/scsi_device.h>
71
72
#define DRV_NAME "osdblk"
73
#define PFX DRV_NAME ": "
74
75
/* #define _OSDBLK_DEBUG */
76
#ifdef _OSDBLK_DEBUG
77
#define OSDBLK_DEBUG(fmt, a...) \
78
printk(KERN_NOTICE "osdblk @%s:%d: " fmt, __func__, __LINE__, ##a)
79
#else
80
#define OSDBLK_DEBUG(fmt, a...) \
81
do { if (0) printk(fmt, ##a); } while (0)
82
#endif
83
84
MODULE_AUTHOR("Jeff Garzik <[email protected]>");
85
MODULE_DESCRIPTION("block device inside an OSD object osdblk.ko");
86
MODULE_LICENSE("GPL");
87
88
struct osdblk_device;
89
90
enum {
91
OSDBLK_MINORS_PER_MAJOR = 256, /* max minors per blkdev */
92
OSDBLK_MAX_REQ = 32, /* max parallel requests */
93
OSDBLK_OP_TIMEOUT = 4 * 60, /* sync OSD req timeout */
94
};
95
96
struct osdblk_request {
97
struct request *rq; /* blk layer request */
98
struct bio *bio; /* cloned bio */
99
struct osdblk_device *osdev; /* associated blkdev */
100
};
101
102
struct osdblk_device {
103
int id; /* blkdev unique id */
104
105
int major; /* blkdev assigned major */
106
struct gendisk *disk; /* blkdev's gendisk and rq */
107
struct request_queue *q;
108
109
struct osd_dev *osd; /* associated OSD */
110
111
char name[32]; /* blkdev name, e.g. osdblk34 */
112
113
spinlock_t lock; /* queue lock */
114
115
struct osd_obj_id obj; /* OSD partition, obj id */
116
uint8_t obj_cred[OSD_CAP_LEN]; /* OSD cred */
117
118
struct osdblk_request req[OSDBLK_MAX_REQ]; /* request table */
119
120
struct list_head node;
121
122
char osd_path[0]; /* OSD device path */
123
};
124
125
static struct class *class_osdblk; /* /sys/class/osdblk */
126
static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */
127
static LIST_HEAD(osdblkdev_list);
128
129
static const struct block_device_operations osdblk_bd_ops = {
130
.owner = THIS_MODULE,
131
};
132
133
static const struct osd_attr g_attr_logical_length = ATTR_DEF(
134
OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8);
135
136
static void osdblk_make_credential(u8 cred_a[OSD_CAP_LEN],
137
const struct osd_obj_id *obj)
138
{
139
osd_sec_init_nosec_doall_caps(cred_a, obj, false, true);
140
}
141
142
/* copied from exofs; move to libosd? */
143
/*
144
* Perform a synchronous OSD operation. copied from exofs; move to libosd?
145
*/
146
static int osd_sync_op(struct osd_request *or, int timeout, uint8_t *credential)
147
{
148
int ret;
149
150
or->timeout = timeout;
151
ret = osd_finalize_request(or, 0, credential, NULL);
152
if (ret)
153
return ret;
154
155
ret = osd_execute_request(or);
156
157
/* osd_req_decode_sense(or, ret); */
158
return ret;
159
}
160
161
/*
162
* Perform an asynchronous OSD operation. copied from exofs; move to libosd?
163
*/
164
static int osd_async_op(struct osd_request *or, osd_req_done_fn *async_done,
165
void *caller_context, u8 *cred)
166
{
167
int ret;
168
169
ret = osd_finalize_request(or, 0, cred, NULL);
170
if (ret)
171
return ret;
172
173
ret = osd_execute_request_async(or, async_done, caller_context);
174
175
return ret;
176
}
177
178
/* copied from exofs; move to libosd? */
179
static int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr)
180
{
181
struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */
182
void *iter = NULL;
183
int nelem;
184
185
do {
186
nelem = 1;
187
osd_req_decode_get_attr_list(or, &cur_attr, &nelem, &iter);
188
if ((cur_attr.attr_page == attr->attr_page) &&
189
(cur_attr.attr_id == attr->attr_id)) {
190
attr->len = cur_attr.len;
191
attr->val_ptr = cur_attr.val_ptr;
192
return 0;
193
}
194
} while (iter);
195
196
return -EIO;
197
}
198
199
static int osdblk_get_obj_size(struct osdblk_device *osdev, u64 *size_out)
200
{
201
struct osd_request *or;
202
struct osd_attr attr;
203
int ret;
204
205
/* start request */
206
or = osd_start_request(osdev->osd, GFP_KERNEL);
207
if (!or)
208
return -ENOMEM;
209
210
/* create a get-attributes(length) request */
211
osd_req_get_attributes(or, &osdev->obj);
212
213
osd_req_add_get_attr_list(or, &g_attr_logical_length, 1);
214
215
/* execute op synchronously */
216
ret = osd_sync_op(or, OSDBLK_OP_TIMEOUT, osdev->obj_cred);
217
if (ret)
218
goto out;
219
220
/* extract length from returned attribute info */
221
attr = g_attr_logical_length;
222
ret = extract_attr_from_req(or, &attr);
223
if (ret)
224
goto out;
225
226
*size_out = get_unaligned_be64(attr.val_ptr);
227
228
out:
229
osd_end_request(or);
230
return ret;
231
232
}
233
234
static void osdblk_osd_complete(struct osd_request *or, void *private)
235
{
236
struct osdblk_request *orq = private;
237
struct osd_sense_info osi;
238
int ret = osd_req_decode_sense(or, &osi);
239
240
if (ret) {
241
ret = -EIO;
242
OSDBLK_DEBUG("osdblk_osd_complete with err=%d\n", ret);
243
}
244
245
/* complete OSD request */
246
osd_end_request(or);
247
248
/* complete request passed to osdblk by block layer */
249
__blk_end_request_all(orq->rq, ret);
250
}
251
252
static void bio_chain_put(struct bio *chain)
253
{
254
struct bio *tmp;
255
256
while (chain) {
257
tmp = chain;
258
chain = chain->bi_next;
259
260
bio_put(tmp);
261
}
262
}
263
264
static struct bio *bio_chain_clone(struct bio *old_chain, gfp_t gfpmask)
265
{
266
struct bio *tmp, *new_chain = NULL, *tail = NULL;
267
268
while (old_chain) {
269
tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs);
270
if (!tmp)
271
goto err_out;
272
273
__bio_clone(tmp, old_chain);
274
tmp->bi_bdev = NULL;
275
gfpmask &= ~__GFP_WAIT;
276
tmp->bi_next = NULL;
277
278
if (!new_chain)
279
new_chain = tail = tmp;
280
else {
281
tail->bi_next = tmp;
282
tail = tmp;
283
}
284
285
old_chain = old_chain->bi_next;
286
}
287
288
return new_chain;
289
290
err_out:
291
OSDBLK_DEBUG("bio_chain_clone with err\n");
292
bio_chain_put(new_chain);
293
return NULL;
294
}
295
296
static void osdblk_rq_fn(struct request_queue *q)
297
{
298
struct osdblk_device *osdev = q->queuedata;
299
300
while (1) {
301
struct request *rq;
302
struct osdblk_request *orq;
303
struct osd_request *or;
304
struct bio *bio;
305
bool do_write, do_flush;
306
307
/* peek at request from block layer */
308
rq = blk_fetch_request(q);
309
if (!rq)
310
break;
311
312
/* filter out block requests we don't understand */
313
if (rq->cmd_type != REQ_TYPE_FS) {
314
blk_end_request_all(rq, 0);
315
continue;
316
}
317
318
/* deduce our operation (read, write, flush) */
319
/* I wish the block layer simplified cmd_type/cmd_flags/cmd[]
320
* into a clearly defined set of RPC commands:
321
* read, write, flush, scsi command, power mgmt req,
322
* driver-specific, etc.
323
*/
324
325
do_flush = rq->cmd_flags & REQ_FLUSH;
326
do_write = (rq_data_dir(rq) == WRITE);
327
328
if (!do_flush) { /* osd_flush does not use a bio */
329
/* a bio clone to be passed down to OSD request */
330
bio = bio_chain_clone(rq->bio, GFP_ATOMIC);
331
if (!bio)
332
break;
333
} else
334
bio = NULL;
335
336
/* alloc internal OSD request, for OSD command execution */
337
or = osd_start_request(osdev->osd, GFP_ATOMIC);
338
if (!or) {
339
bio_chain_put(bio);
340
OSDBLK_DEBUG("osd_start_request with err\n");
341
break;
342
}
343
344
orq = &osdev->req[rq->tag];
345
orq->rq = rq;
346
orq->bio = bio;
347
orq->osdev = osdev;
348
349
/* init OSD command: flush, write or read */
350
if (do_flush)
351
osd_req_flush_object(or, &osdev->obj,
352
OSD_CDB_FLUSH_ALL, 0, 0);
353
else if (do_write)
354
osd_req_write(or, &osdev->obj, blk_rq_pos(rq) * 512ULL,
355
bio, blk_rq_bytes(rq));
356
else
357
osd_req_read(or, &osdev->obj, blk_rq_pos(rq) * 512ULL,
358
bio, blk_rq_bytes(rq));
359
360
OSDBLK_DEBUG("%s 0x%x bytes at 0x%llx\n",
361
do_flush ? "flush" : do_write ?
362
"write" : "read", blk_rq_bytes(rq),
363
blk_rq_pos(rq) * 512ULL);
364
365
/* begin OSD command execution */
366
if (osd_async_op(or, osdblk_osd_complete, orq,
367
osdev->obj_cred)) {
368
osd_end_request(or);
369
blk_requeue_request(q, rq);
370
bio_chain_put(bio);
371
OSDBLK_DEBUG("osd_execute_request_async with err\n");
372
break;
373
}
374
375
/* remove the special 'flush' marker, now that the command
376
* is executing
377
*/
378
rq->special = NULL;
379
}
380
}
381
382
static void osdblk_free_disk(struct osdblk_device *osdev)
383
{
384
struct gendisk *disk = osdev->disk;
385
386
if (!disk)
387
return;
388
389
if (disk->flags & GENHD_FL_UP)
390
del_gendisk(disk);
391
if (disk->queue)
392
blk_cleanup_queue(disk->queue);
393
put_disk(disk);
394
}
395
396
static int osdblk_init_disk(struct osdblk_device *osdev)
397
{
398
struct gendisk *disk;
399
struct request_queue *q;
400
int rc;
401
u64 obj_size = 0;
402
403
/* contact OSD, request size info about the object being mapped */
404
rc = osdblk_get_obj_size(osdev, &obj_size);
405
if (rc)
406
return rc;
407
408
/* create gendisk info */
409
disk = alloc_disk(OSDBLK_MINORS_PER_MAJOR);
410
if (!disk)
411
return -ENOMEM;
412
413
sprintf(disk->disk_name, DRV_NAME "%d", osdev->id);
414
disk->major = osdev->major;
415
disk->first_minor = 0;
416
disk->fops = &osdblk_bd_ops;
417
disk->private_data = osdev;
418
419
/* init rq */
420
q = blk_init_queue(osdblk_rq_fn, &osdev->lock);
421
if (!q) {
422
put_disk(disk);
423
return -ENOMEM;
424
}
425
426
/* switch queue to TCQ mode; allocate tag map */
427
rc = blk_queue_init_tags(q, OSDBLK_MAX_REQ, NULL);
428
if (rc) {
429
blk_cleanup_queue(q);
430
put_disk(disk);
431
return rc;
432
}
433
434
/* Set our limits to the lower device limits, because osdblk cannot
435
* sleep when allocating a lower-request and therefore cannot be
436
* bouncing.
437
*/
438
blk_queue_stack_limits(q, osd_request_queue(osdev->osd));
439
440
blk_queue_prep_rq(q, blk_queue_start_tag);
441
blk_queue_flush(q, REQ_FLUSH);
442
443
disk->queue = q;
444
445
q->queuedata = osdev;
446
447
osdev->disk = disk;
448
osdev->q = q;
449
450
/* finally, announce the disk to the world */
451
set_capacity(disk, obj_size / 512ULL);
452
add_disk(disk);
453
454
printk(KERN_INFO "%s: Added of size 0x%llx\n",
455
disk->disk_name, (unsigned long long)obj_size);
456
457
return 0;
458
}
459
460
/********************************************************************
461
* /sys/class/osdblk/
462
* add map OSD object to blkdev
463
* remove unmap OSD object
464
* list show mappings
465
*******************************************************************/
466
467
static void class_osdblk_release(struct class *cls)
468
{
469
kfree(cls);
470
}
471
472
static ssize_t class_osdblk_list(struct class *c,
473
struct class_attribute *attr,
474
char *data)
475
{
476
int n = 0;
477
struct list_head *tmp;
478
479
mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
480
481
list_for_each(tmp, &osdblkdev_list) {
482
struct osdblk_device *osdev;
483
484
osdev = list_entry(tmp, struct osdblk_device, node);
485
486
n += sprintf(data+n, "%d %d %llu %llu %s\n",
487
osdev->id,
488
osdev->major,
489
osdev->obj.partition,
490
osdev->obj.id,
491
osdev->osd_path);
492
}
493
494
mutex_unlock(&ctl_mutex);
495
return n;
496
}
497
498
static ssize_t class_osdblk_add(struct class *c,
499
struct class_attribute *attr,
500
const char *buf, size_t count)
501
{
502
struct osdblk_device *osdev;
503
ssize_t rc;
504
int irc, new_id = 0;
505
struct list_head *tmp;
506
507
if (!try_module_get(THIS_MODULE))
508
return -ENODEV;
509
510
/* new osdblk_device object */
511
osdev = kzalloc(sizeof(*osdev) + strlen(buf) + 1, GFP_KERNEL);
512
if (!osdev) {
513
rc = -ENOMEM;
514
goto err_out_mod;
515
}
516
517
/* static osdblk_device initialization */
518
spin_lock_init(&osdev->lock);
519
INIT_LIST_HEAD(&osdev->node);
520
521
/* generate unique id: find highest unique id, add one */
522
523
mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
524
525
list_for_each(tmp, &osdblkdev_list) {
526
struct osdblk_device *osdev;
527
528
osdev = list_entry(tmp, struct osdblk_device, node);
529
if (osdev->id > new_id)
530
new_id = osdev->id + 1;
531
}
532
533
osdev->id = new_id;
534
535
/* add to global list */
536
list_add_tail(&osdev->node, &osdblkdev_list);
537
538
mutex_unlock(&ctl_mutex);
539
540
/* parse add command */
541
if (sscanf(buf, "%llu %llu %s", &osdev->obj.partition, &osdev->obj.id,
542
osdev->osd_path) != 3) {
543
rc = -EINVAL;
544
goto err_out_slot;
545
}
546
547
/* initialize rest of new object */
548
sprintf(osdev->name, DRV_NAME "%d", osdev->id);
549
550
/* contact requested OSD */
551
osdev->osd = osduld_path_lookup(osdev->osd_path);
552
if (IS_ERR(osdev->osd)) {
553
rc = PTR_ERR(osdev->osd);
554
goto err_out_slot;
555
}
556
557
/* build OSD credential */
558
osdblk_make_credential(osdev->obj_cred, &osdev->obj);
559
560
/* register our block device */
561
irc = register_blkdev(0, osdev->name);
562
if (irc < 0) {
563
rc = irc;
564
goto err_out_osd;
565
}
566
567
osdev->major = irc;
568
569
/* set up and announce blkdev mapping */
570
rc = osdblk_init_disk(osdev);
571
if (rc)
572
goto err_out_blkdev;
573
574
return count;
575
576
err_out_blkdev:
577
unregister_blkdev(osdev->major, osdev->name);
578
err_out_osd:
579
osduld_put_device(osdev->osd);
580
err_out_slot:
581
mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
582
list_del_init(&osdev->node);
583
mutex_unlock(&ctl_mutex);
584
585
kfree(osdev);
586
err_out_mod:
587
OSDBLK_DEBUG("Error adding device %s\n", buf);
588
module_put(THIS_MODULE);
589
return rc;
590
}
591
592
static ssize_t class_osdblk_remove(struct class *c,
593
struct class_attribute *attr,
594
const char *buf,
595
size_t count)
596
{
597
struct osdblk_device *osdev = NULL;
598
int target_id, rc;
599
unsigned long ul;
600
struct list_head *tmp;
601
602
rc = strict_strtoul(buf, 10, &ul);
603
if (rc)
604
return rc;
605
606
/* convert to int; abort if we lost anything in the conversion */
607
target_id = (int) ul;
608
if (target_id != ul)
609
return -EINVAL;
610
611
/* remove object from list immediately */
612
mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
613
614
list_for_each(tmp, &osdblkdev_list) {
615
osdev = list_entry(tmp, struct osdblk_device, node);
616
if (osdev->id == target_id) {
617
list_del_init(&osdev->node);
618
break;
619
}
620
osdev = NULL;
621
}
622
623
mutex_unlock(&ctl_mutex);
624
625
if (!osdev)
626
return -ENOENT;
627
628
/* clean up and free blkdev and associated OSD connection */
629
osdblk_free_disk(osdev);
630
unregister_blkdev(osdev->major, osdev->name);
631
osduld_put_device(osdev->osd);
632
kfree(osdev);
633
634
/* release module ref */
635
module_put(THIS_MODULE);
636
637
return count;
638
}
639
640
static struct class_attribute class_osdblk_attrs[] = {
641
__ATTR(add, 0200, NULL, class_osdblk_add),
642
__ATTR(remove, 0200, NULL, class_osdblk_remove),
643
__ATTR(list, 0444, class_osdblk_list, NULL),
644
__ATTR_NULL
645
};
646
647
static int osdblk_sysfs_init(void)
648
{
649
int ret = 0;
650
651
/*
652
* create control files in sysfs
653
* /sys/class/osdblk/...
654
*/
655
class_osdblk = kzalloc(sizeof(*class_osdblk), GFP_KERNEL);
656
if (!class_osdblk)
657
return -ENOMEM;
658
659
class_osdblk->name = DRV_NAME;
660
class_osdblk->owner = THIS_MODULE;
661
class_osdblk->class_release = class_osdblk_release;
662
class_osdblk->class_attrs = class_osdblk_attrs;
663
664
ret = class_register(class_osdblk);
665
if (ret) {
666
kfree(class_osdblk);
667
class_osdblk = NULL;
668
printk(PFX "failed to create class osdblk\n");
669
return ret;
670
}
671
672
return 0;
673
}
674
675
static void osdblk_sysfs_cleanup(void)
676
{
677
if (class_osdblk)
678
class_destroy(class_osdblk);
679
class_osdblk = NULL;
680
}
681
682
static int __init osdblk_init(void)
683
{
684
int rc;
685
686
rc = osdblk_sysfs_init();
687
if (rc)
688
return rc;
689
690
return 0;
691
}
692
693
static void __exit osdblk_exit(void)
694
{
695
osdblk_sysfs_cleanup();
696
}
697
698
module_init(osdblk_init);
699
module_exit(osdblk_exit);
700
701
702