Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/drivers/block/virtio_blk.c
15109 views
1
//#define DEBUG
2
#include <linux/spinlock.h>
3
#include <linux/slab.h>
4
#include <linux/blkdev.h>
5
#include <linux/hdreg.h>
6
#include <linux/virtio.h>
7
#include <linux/virtio_blk.h>
8
#include <linux/scatterlist.h>
9
#include <linux/string_helpers.h>
10
#include <scsi/scsi_cmnd.h>
11
12
#define PART_BITS 4
13
14
static int major, index;
15
struct workqueue_struct *virtblk_wq;
16
17
struct virtio_blk
18
{
19
spinlock_t lock;
20
21
struct virtio_device *vdev;
22
struct virtqueue *vq;
23
24
/* The disk structure for the kernel. */
25
struct gendisk *disk;
26
27
/* Request tracking. */
28
struct list_head reqs;
29
30
mempool_t *pool;
31
32
/* Process context for config space updates */
33
struct work_struct config_work;
34
35
/* What host tells us, plus 2 for header & tailer. */
36
unsigned int sg_elems;
37
38
/* Scatterlist: can be too big for stack. */
39
struct scatterlist sg[/*sg_elems*/];
40
};
41
42
struct virtblk_req
43
{
44
struct list_head list;
45
struct request *req;
46
struct virtio_blk_outhdr out_hdr;
47
struct virtio_scsi_inhdr in_hdr;
48
u8 status;
49
};
50
51
static void blk_done(struct virtqueue *vq)
52
{
53
struct virtio_blk *vblk = vq->vdev->priv;
54
struct virtblk_req *vbr;
55
unsigned int len;
56
unsigned long flags;
57
58
spin_lock_irqsave(&vblk->lock, flags);
59
while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
60
int error;
61
62
switch (vbr->status) {
63
case VIRTIO_BLK_S_OK:
64
error = 0;
65
break;
66
case VIRTIO_BLK_S_UNSUPP:
67
error = -ENOTTY;
68
break;
69
default:
70
error = -EIO;
71
break;
72
}
73
74
switch (vbr->req->cmd_type) {
75
case REQ_TYPE_BLOCK_PC:
76
vbr->req->resid_len = vbr->in_hdr.residual;
77
vbr->req->sense_len = vbr->in_hdr.sense_len;
78
vbr->req->errors = vbr->in_hdr.errors;
79
break;
80
case REQ_TYPE_SPECIAL:
81
vbr->req->errors = (error != 0);
82
break;
83
default:
84
break;
85
}
86
87
__blk_end_request_all(vbr->req, error);
88
list_del(&vbr->list);
89
mempool_free(vbr, vblk->pool);
90
}
91
/* In case queue is stopped waiting for more buffers. */
92
blk_start_queue(vblk->disk->queue);
93
spin_unlock_irqrestore(&vblk->lock, flags);
94
}
95
96
static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
97
struct request *req)
98
{
99
unsigned long num, out = 0, in = 0;
100
struct virtblk_req *vbr;
101
102
vbr = mempool_alloc(vblk->pool, GFP_ATOMIC);
103
if (!vbr)
104
/* When another request finishes we'll try again. */
105
return false;
106
107
vbr->req = req;
108
109
if (req->cmd_flags & REQ_FLUSH) {
110
vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
111
vbr->out_hdr.sector = 0;
112
vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
113
} else {
114
switch (req->cmd_type) {
115
case REQ_TYPE_FS:
116
vbr->out_hdr.type = 0;
117
vbr->out_hdr.sector = blk_rq_pos(vbr->req);
118
vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
119
break;
120
case REQ_TYPE_BLOCK_PC:
121
vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD;
122
vbr->out_hdr.sector = 0;
123
vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
124
break;
125
case REQ_TYPE_SPECIAL:
126
vbr->out_hdr.type = VIRTIO_BLK_T_GET_ID;
127
vbr->out_hdr.sector = 0;
128
vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
129
break;
130
default:
131
/* We don't put anything else in the queue. */
132
BUG();
133
}
134
}
135
136
sg_set_buf(&vblk->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
137
138
/*
139
* If this is a packet command we need a couple of additional headers.
140
* Behind the normal outhdr we put a segment with the scsi command
141
* block, and before the normal inhdr we put the sense data and the
142
* inhdr with additional status information before the normal inhdr.
143
*/
144
if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC)
145
sg_set_buf(&vblk->sg[out++], vbr->req->cmd, vbr->req->cmd_len);
146
147
num = blk_rq_map_sg(q, vbr->req, vblk->sg + out);
148
149
if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC) {
150
sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, SCSI_SENSE_BUFFERSIZE);
151
sg_set_buf(&vblk->sg[num + out + in++], &vbr->in_hdr,
152
sizeof(vbr->in_hdr));
153
}
154
155
sg_set_buf(&vblk->sg[num + out + in++], &vbr->status,
156
sizeof(vbr->status));
157
158
if (num) {
159
if (rq_data_dir(vbr->req) == WRITE) {
160
vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
161
out += num;
162
} else {
163
vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
164
in += num;
165
}
166
}
167
168
if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr) < 0) {
169
mempool_free(vbr, vblk->pool);
170
return false;
171
}
172
173
list_add_tail(&vbr->list, &vblk->reqs);
174
return true;
175
}
176
177
static void do_virtblk_request(struct request_queue *q)
178
{
179
struct virtio_blk *vblk = q->queuedata;
180
struct request *req;
181
unsigned int issued = 0;
182
183
while ((req = blk_peek_request(q)) != NULL) {
184
BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
185
186
/* If this request fails, stop queue and wait for something to
187
finish to restart it. */
188
if (!do_req(q, vblk, req)) {
189
blk_stop_queue(q);
190
break;
191
}
192
blk_start_request(req);
193
issued++;
194
}
195
196
if (issued)
197
virtqueue_kick(vblk->vq);
198
}
199
200
/* return id (s/n) string for *disk to *id_str
201
*/
202
static int virtblk_get_id(struct gendisk *disk, char *id_str)
203
{
204
struct virtio_blk *vblk = disk->private_data;
205
struct request *req;
206
struct bio *bio;
207
int err;
208
209
bio = bio_map_kern(vblk->disk->queue, id_str, VIRTIO_BLK_ID_BYTES,
210
GFP_KERNEL);
211
if (IS_ERR(bio))
212
return PTR_ERR(bio);
213
214
req = blk_make_request(vblk->disk->queue, bio, GFP_KERNEL);
215
if (IS_ERR(req)) {
216
bio_put(bio);
217
return PTR_ERR(req);
218
}
219
220
req->cmd_type = REQ_TYPE_SPECIAL;
221
err = blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
222
blk_put_request(req);
223
224
return err;
225
}
226
227
static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
228
unsigned int cmd, unsigned long data)
229
{
230
struct gendisk *disk = bdev->bd_disk;
231
struct virtio_blk *vblk = disk->private_data;
232
233
/*
234
* Only allow the generic SCSI ioctls if the host can support it.
235
*/
236
if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI))
237
return -ENOTTY;
238
239
return scsi_cmd_ioctl(disk->queue, disk, mode, cmd,
240
(void __user *)data);
241
}
242
243
/* We provide getgeo only to please some old bootloader/partitioning tools */
244
static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
245
{
246
struct virtio_blk *vblk = bd->bd_disk->private_data;
247
struct virtio_blk_geometry vgeo;
248
int err;
249
250
/* see if the host passed in geometry config */
251
err = virtio_config_val(vblk->vdev, VIRTIO_BLK_F_GEOMETRY,
252
offsetof(struct virtio_blk_config, geometry),
253
&vgeo);
254
255
if (!err) {
256
geo->heads = vgeo.heads;
257
geo->sectors = vgeo.sectors;
258
geo->cylinders = vgeo.cylinders;
259
} else {
260
/* some standard values, similar to sd */
261
geo->heads = 1 << 6;
262
geo->sectors = 1 << 5;
263
geo->cylinders = get_capacity(bd->bd_disk) >> 11;
264
}
265
return 0;
266
}
267
268
static const struct block_device_operations virtblk_fops = {
269
.ioctl = virtblk_ioctl,
270
.owner = THIS_MODULE,
271
.getgeo = virtblk_getgeo,
272
};
273
274
static int index_to_minor(int index)
275
{
276
return index << PART_BITS;
277
}
278
279
static ssize_t virtblk_serial_show(struct device *dev,
280
struct device_attribute *attr, char *buf)
281
{
282
struct gendisk *disk = dev_to_disk(dev);
283
int err;
284
285
/* sysfs gives us a PAGE_SIZE buffer */
286
BUILD_BUG_ON(PAGE_SIZE < VIRTIO_BLK_ID_BYTES);
287
288
buf[VIRTIO_BLK_ID_BYTES] = '\0';
289
err = virtblk_get_id(disk, buf);
290
if (!err)
291
return strlen(buf);
292
293
if (err == -EIO) /* Unsupported? Make it empty. */
294
return 0;
295
296
return err;
297
}
298
DEVICE_ATTR(serial, S_IRUGO, virtblk_serial_show, NULL);
299
300
static void virtblk_config_changed_work(struct work_struct *work)
301
{
302
struct virtio_blk *vblk =
303
container_of(work, struct virtio_blk, config_work);
304
struct virtio_device *vdev = vblk->vdev;
305
struct request_queue *q = vblk->disk->queue;
306
char cap_str_2[10], cap_str_10[10];
307
u64 capacity, size;
308
309
/* Host must always specify the capacity. */
310
vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity),
311
&capacity, sizeof(capacity));
312
313
/* If capacity is too big, truncate with warning. */
314
if ((sector_t)capacity != capacity) {
315
dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
316
(unsigned long long)capacity);
317
capacity = (sector_t)-1;
318
}
319
320
size = capacity * queue_logical_block_size(q);
321
string_get_size(size, STRING_UNITS_2, cap_str_2, sizeof(cap_str_2));
322
string_get_size(size, STRING_UNITS_10, cap_str_10, sizeof(cap_str_10));
323
324
dev_notice(&vdev->dev,
325
"new size: %llu %d-byte logical blocks (%s/%s)\n",
326
(unsigned long long)capacity,
327
queue_logical_block_size(q),
328
cap_str_10, cap_str_2);
329
330
set_capacity(vblk->disk, capacity);
331
}
332
333
static void virtblk_config_changed(struct virtio_device *vdev)
334
{
335
struct virtio_blk *vblk = vdev->priv;
336
337
queue_work(virtblk_wq, &vblk->config_work);
338
}
339
340
static int __devinit virtblk_probe(struct virtio_device *vdev)
341
{
342
struct virtio_blk *vblk;
343
struct request_queue *q;
344
int err;
345
u64 cap;
346
u32 v, blk_size, sg_elems, opt_io_size;
347
u16 min_io_size;
348
u8 physical_block_exp, alignment_offset;
349
350
if (index_to_minor(index) >= 1 << MINORBITS)
351
return -ENOSPC;
352
353
/* We need to know how many segments before we allocate. */
354
err = virtio_config_val(vdev, VIRTIO_BLK_F_SEG_MAX,
355
offsetof(struct virtio_blk_config, seg_max),
356
&sg_elems);
357
358
/* We need at least one SG element, whatever they say. */
359
if (err || !sg_elems)
360
sg_elems = 1;
361
362
/* We need an extra sg elements at head and tail. */
363
sg_elems += 2;
364
vdev->priv = vblk = kmalloc(sizeof(*vblk) +
365
sizeof(vblk->sg[0]) * sg_elems, GFP_KERNEL);
366
if (!vblk) {
367
err = -ENOMEM;
368
goto out;
369
}
370
371
INIT_LIST_HEAD(&vblk->reqs);
372
spin_lock_init(&vblk->lock);
373
vblk->vdev = vdev;
374
vblk->sg_elems = sg_elems;
375
sg_init_table(vblk->sg, vblk->sg_elems);
376
INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
377
378
/* We expect one virtqueue, for output. */
379
vblk->vq = virtio_find_single_vq(vdev, blk_done, "requests");
380
if (IS_ERR(vblk->vq)) {
381
err = PTR_ERR(vblk->vq);
382
goto out_free_vblk;
383
}
384
385
vblk->pool = mempool_create_kmalloc_pool(1,sizeof(struct virtblk_req));
386
if (!vblk->pool) {
387
err = -ENOMEM;
388
goto out_free_vq;
389
}
390
391
/* FIXME: How many partitions? How long is a piece of string? */
392
vblk->disk = alloc_disk(1 << PART_BITS);
393
if (!vblk->disk) {
394
err = -ENOMEM;
395
goto out_mempool;
396
}
397
398
q = vblk->disk->queue = blk_init_queue(do_virtblk_request, &vblk->lock);
399
if (!q) {
400
err = -ENOMEM;
401
goto out_put_disk;
402
}
403
404
q->queuedata = vblk;
405
406
if (index < 26) {
407
sprintf(vblk->disk->disk_name, "vd%c", 'a' + index % 26);
408
} else if (index < (26 + 1) * 26) {
409
sprintf(vblk->disk->disk_name, "vd%c%c",
410
'a' + index / 26 - 1, 'a' + index % 26);
411
} else {
412
const unsigned int m1 = (index / 26 - 1) / 26 - 1;
413
const unsigned int m2 = (index / 26 - 1) % 26;
414
const unsigned int m3 = index % 26;
415
sprintf(vblk->disk->disk_name, "vd%c%c%c",
416
'a' + m1, 'a' + m2, 'a' + m3);
417
}
418
419
vblk->disk->major = major;
420
vblk->disk->first_minor = index_to_minor(index);
421
vblk->disk->private_data = vblk;
422
vblk->disk->fops = &virtblk_fops;
423
vblk->disk->driverfs_dev = &vdev->dev;
424
index++;
425
426
/* configure queue flush support */
427
if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH))
428
blk_queue_flush(q, REQ_FLUSH);
429
430
/* If disk is read-only in the host, the guest should obey */
431
if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
432
set_disk_ro(vblk->disk, 1);
433
434
/* Host must always specify the capacity. */
435
vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity),
436
&cap, sizeof(cap));
437
438
/* If capacity is too big, truncate with warning. */
439
if ((sector_t)cap != cap) {
440
dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
441
(unsigned long long)cap);
442
cap = (sector_t)-1;
443
}
444
set_capacity(vblk->disk, cap);
445
446
/* We can handle whatever the host told us to handle. */
447
blk_queue_max_segments(q, vblk->sg_elems-2);
448
449
/* No need to bounce any requests */
450
blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
451
452
/* No real sector limit. */
453
blk_queue_max_hw_sectors(q, -1U);
454
455
/* Host can optionally specify maximum segment size and number of
456
* segments. */
457
err = virtio_config_val(vdev, VIRTIO_BLK_F_SIZE_MAX,
458
offsetof(struct virtio_blk_config, size_max),
459
&v);
460
if (!err)
461
blk_queue_max_segment_size(q, v);
462
else
463
blk_queue_max_segment_size(q, -1U);
464
465
/* Host can optionally specify the block size of the device */
466
err = virtio_config_val(vdev, VIRTIO_BLK_F_BLK_SIZE,
467
offsetof(struct virtio_blk_config, blk_size),
468
&blk_size);
469
if (!err)
470
blk_queue_logical_block_size(q, blk_size);
471
else
472
blk_size = queue_logical_block_size(q);
473
474
/* Use topology information if available */
475
err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
476
offsetof(struct virtio_blk_config, physical_block_exp),
477
&physical_block_exp);
478
if (!err && physical_block_exp)
479
blk_queue_physical_block_size(q,
480
blk_size * (1 << physical_block_exp));
481
482
err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
483
offsetof(struct virtio_blk_config, alignment_offset),
484
&alignment_offset);
485
if (!err && alignment_offset)
486
blk_queue_alignment_offset(q, blk_size * alignment_offset);
487
488
err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
489
offsetof(struct virtio_blk_config, min_io_size),
490
&min_io_size);
491
if (!err && min_io_size)
492
blk_queue_io_min(q, blk_size * min_io_size);
493
494
err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
495
offsetof(struct virtio_blk_config, opt_io_size),
496
&opt_io_size);
497
if (!err && opt_io_size)
498
blk_queue_io_opt(q, blk_size * opt_io_size);
499
500
501
add_disk(vblk->disk);
502
err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial);
503
if (err)
504
goto out_del_disk;
505
506
return 0;
507
508
out_del_disk:
509
del_gendisk(vblk->disk);
510
blk_cleanup_queue(vblk->disk->queue);
511
out_put_disk:
512
put_disk(vblk->disk);
513
out_mempool:
514
mempool_destroy(vblk->pool);
515
out_free_vq:
516
vdev->config->del_vqs(vdev);
517
out_free_vblk:
518
kfree(vblk);
519
out:
520
return err;
521
}
522
523
static void __devexit virtblk_remove(struct virtio_device *vdev)
524
{
525
struct virtio_blk *vblk = vdev->priv;
526
527
flush_work(&vblk->config_work);
528
529
/* Nothing should be pending. */
530
BUG_ON(!list_empty(&vblk->reqs));
531
532
/* Stop all the virtqueues. */
533
vdev->config->reset(vdev);
534
535
del_gendisk(vblk->disk);
536
blk_cleanup_queue(vblk->disk->queue);
537
put_disk(vblk->disk);
538
mempool_destroy(vblk->pool);
539
vdev->config->del_vqs(vdev);
540
kfree(vblk);
541
}
542
543
static const struct virtio_device_id id_table[] = {
544
{ VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
545
{ 0 },
546
};
547
548
static unsigned int features[] = {
549
VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
550
VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI,
551
VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY
552
};
553
554
/*
555
* virtio_blk causes spurious section mismatch warning by
556
* simultaneously referring to a __devinit and a __devexit function.
557
* Use __refdata to avoid this warning.
558
*/
559
static struct virtio_driver __refdata virtio_blk = {
560
.feature_table = features,
561
.feature_table_size = ARRAY_SIZE(features),
562
.driver.name = KBUILD_MODNAME,
563
.driver.owner = THIS_MODULE,
564
.id_table = id_table,
565
.probe = virtblk_probe,
566
.remove = __devexit_p(virtblk_remove),
567
.config_changed = virtblk_config_changed,
568
};
569
570
static int __init init(void)
571
{
572
int error;
573
574
virtblk_wq = alloc_workqueue("virtio-blk", 0, 0);
575
if (!virtblk_wq)
576
return -ENOMEM;
577
578
major = register_blkdev(0, "virtblk");
579
if (major < 0) {
580
error = major;
581
goto out_destroy_workqueue;
582
}
583
584
error = register_virtio_driver(&virtio_blk);
585
if (error)
586
goto out_unregister_blkdev;
587
return 0;
588
589
out_unregister_blkdev:
590
unregister_blkdev(major, "virtblk");
591
out_destroy_workqueue:
592
destroy_workqueue(virtblk_wq);
593
return error;
594
}
595
596
static void __exit fini(void)
597
{
598
unregister_blkdev(major, "virtblk");
599
unregister_virtio_driver(&virtio_blk);
600
destroy_workqueue(virtblk_wq);
601
}
602
module_init(init);
603
module_exit(fini);
604
605
MODULE_DEVICE_TABLE(virtio, id_table);
606
MODULE_DESCRIPTION("Virtio block driver");
607
MODULE_LICENSE("GPL");
608
609