Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/block/genhd.c
15109 views
1
/*
2
* gendisk handling
3
*/
4
5
#include <linux/module.h>
6
#include <linux/fs.h>
7
#include <linux/genhd.h>
8
#include <linux/kdev_t.h>
9
#include <linux/kernel.h>
10
#include <linux/blkdev.h>
11
#include <linux/init.h>
12
#include <linux/spinlock.h>
13
#include <linux/proc_fs.h>
14
#include <linux/seq_file.h>
15
#include <linux/slab.h>
16
#include <linux/kmod.h>
17
#include <linux/kobj_map.h>
18
#include <linux/buffer_head.h>
19
#include <linux/mutex.h>
20
#include <linux/idr.h>
21
#include <linux/log2.h>
22
23
#include "blk.h"
24
25
static DEFINE_MUTEX(block_class_lock);
26
struct kobject *block_depr;
27
28
/* for extended dynamic devt allocation, currently only one major is used */
29
#define MAX_EXT_DEVT (1 << MINORBITS)
30
31
/* For extended devt allocation. ext_devt_mutex prevents look up
32
* results from going away underneath its user.
33
*/
34
static DEFINE_MUTEX(ext_devt_mutex);
35
static DEFINE_IDR(ext_devt_idr);
36
37
static struct device_type disk_type;
38
39
static void disk_add_events(struct gendisk *disk);
40
static void disk_del_events(struct gendisk *disk);
41
static void disk_release_events(struct gendisk *disk);
42
43
/**
44
* disk_get_part - get partition
45
* @disk: disk to look partition from
46
* @partno: partition number
47
*
48
* Look for partition @partno from @disk. If found, increment
49
* reference count and return it.
50
*
51
* CONTEXT:
52
* Don't care.
53
*
54
* RETURNS:
55
* Pointer to the found partition on success, NULL if not found.
56
*/
57
struct hd_struct *disk_get_part(struct gendisk *disk, int partno)
58
{
59
struct hd_struct *part = NULL;
60
struct disk_part_tbl *ptbl;
61
62
if (unlikely(partno < 0))
63
return NULL;
64
65
rcu_read_lock();
66
67
ptbl = rcu_dereference(disk->part_tbl);
68
if (likely(partno < ptbl->len)) {
69
part = rcu_dereference(ptbl->part[partno]);
70
if (part)
71
get_device(part_to_dev(part));
72
}
73
74
rcu_read_unlock();
75
76
return part;
77
}
78
EXPORT_SYMBOL_GPL(disk_get_part);
79
80
/**
81
* disk_part_iter_init - initialize partition iterator
82
* @piter: iterator to initialize
83
* @disk: disk to iterate over
84
* @flags: DISK_PITER_* flags
85
*
86
* Initialize @piter so that it iterates over partitions of @disk.
87
*
88
* CONTEXT:
89
* Don't care.
90
*/
91
void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk,
92
unsigned int flags)
93
{
94
struct disk_part_tbl *ptbl;
95
96
rcu_read_lock();
97
ptbl = rcu_dereference(disk->part_tbl);
98
99
piter->disk = disk;
100
piter->part = NULL;
101
102
if (flags & DISK_PITER_REVERSE)
103
piter->idx = ptbl->len - 1;
104
else if (flags & (DISK_PITER_INCL_PART0 | DISK_PITER_INCL_EMPTY_PART0))
105
piter->idx = 0;
106
else
107
piter->idx = 1;
108
109
piter->flags = flags;
110
111
rcu_read_unlock();
112
}
113
EXPORT_SYMBOL_GPL(disk_part_iter_init);
114
115
/**
116
* disk_part_iter_next - proceed iterator to the next partition and return it
117
* @piter: iterator of interest
118
*
119
* Proceed @piter to the next partition and return it.
120
*
121
* CONTEXT:
122
* Don't care.
123
*/
124
struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter)
125
{
126
struct disk_part_tbl *ptbl;
127
int inc, end;
128
129
/* put the last partition */
130
disk_put_part(piter->part);
131
piter->part = NULL;
132
133
/* get part_tbl */
134
rcu_read_lock();
135
ptbl = rcu_dereference(piter->disk->part_tbl);
136
137
/* determine iteration parameters */
138
if (piter->flags & DISK_PITER_REVERSE) {
139
inc = -1;
140
if (piter->flags & (DISK_PITER_INCL_PART0 |
141
DISK_PITER_INCL_EMPTY_PART0))
142
end = -1;
143
else
144
end = 0;
145
} else {
146
inc = 1;
147
end = ptbl->len;
148
}
149
150
/* iterate to the next partition */
151
for (; piter->idx != end; piter->idx += inc) {
152
struct hd_struct *part;
153
154
part = rcu_dereference(ptbl->part[piter->idx]);
155
if (!part)
156
continue;
157
if (!part->nr_sects &&
158
!(piter->flags & DISK_PITER_INCL_EMPTY) &&
159
!(piter->flags & DISK_PITER_INCL_EMPTY_PART0 &&
160
piter->idx == 0))
161
continue;
162
163
get_device(part_to_dev(part));
164
piter->part = part;
165
piter->idx += inc;
166
break;
167
}
168
169
rcu_read_unlock();
170
171
return piter->part;
172
}
173
EXPORT_SYMBOL_GPL(disk_part_iter_next);
174
175
/**
176
* disk_part_iter_exit - finish up partition iteration
177
* @piter: iter of interest
178
*
179
* Called when iteration is over. Cleans up @piter.
180
*
181
* CONTEXT:
182
* Don't care.
183
*/
184
void disk_part_iter_exit(struct disk_part_iter *piter)
185
{
186
disk_put_part(piter->part);
187
piter->part = NULL;
188
}
189
EXPORT_SYMBOL_GPL(disk_part_iter_exit);
190
191
static inline int sector_in_part(struct hd_struct *part, sector_t sector)
192
{
193
return part->start_sect <= sector &&
194
sector < part->start_sect + part->nr_sects;
195
}
196
197
/**
198
* disk_map_sector_rcu - map sector to partition
199
* @disk: gendisk of interest
200
* @sector: sector to map
201
*
202
* Find out which partition @sector maps to on @disk. This is
203
* primarily used for stats accounting.
204
*
205
* CONTEXT:
206
* RCU read locked. The returned partition pointer is valid only
207
* while preemption is disabled.
208
*
209
* RETURNS:
210
* Found partition on success, part0 is returned if no partition matches
211
*/
212
struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector)
213
{
214
struct disk_part_tbl *ptbl;
215
struct hd_struct *part;
216
int i;
217
218
ptbl = rcu_dereference(disk->part_tbl);
219
220
part = rcu_dereference(ptbl->last_lookup);
221
if (part && sector_in_part(part, sector))
222
return part;
223
224
for (i = 1; i < ptbl->len; i++) {
225
part = rcu_dereference(ptbl->part[i]);
226
227
if (part && sector_in_part(part, sector)) {
228
rcu_assign_pointer(ptbl->last_lookup, part);
229
return part;
230
}
231
}
232
return &disk->part0;
233
}
234
EXPORT_SYMBOL_GPL(disk_map_sector_rcu);
235
236
/*
237
* Can be deleted altogether. Later.
238
*
239
*/
240
static struct blk_major_name {
241
struct blk_major_name *next;
242
int major;
243
char name[16];
244
} *major_names[BLKDEV_MAJOR_HASH_SIZE];
245
246
/* index in the above - for now: assume no multimajor ranges */
247
static inline int major_to_index(unsigned major)
248
{
249
return major % BLKDEV_MAJOR_HASH_SIZE;
250
}
251
252
#ifdef CONFIG_PROC_FS
253
void blkdev_show(struct seq_file *seqf, off_t offset)
254
{
255
struct blk_major_name *dp;
256
257
if (offset < BLKDEV_MAJOR_HASH_SIZE) {
258
mutex_lock(&block_class_lock);
259
for (dp = major_names[offset]; dp; dp = dp->next)
260
seq_printf(seqf, "%3d %s\n", dp->major, dp->name);
261
mutex_unlock(&block_class_lock);
262
}
263
}
264
#endif /* CONFIG_PROC_FS */
265
266
/**
267
* register_blkdev - register a new block device
268
*
269
* @major: the requested major device number [1..255]. If @major=0, try to
270
* allocate any unused major number.
271
* @name: the name of the new block device as a zero terminated string
272
*
273
* The @name must be unique within the system.
274
*
275
* The return value depends on the @major input parameter.
276
* - if a major device number was requested in range [1..255] then the
277
* function returns zero on success, or a negative error code
278
* - if any unused major number was requested with @major=0 parameter
279
* then the return value is the allocated major number in range
280
* [1..255] or a negative error code otherwise
281
*/
282
int register_blkdev(unsigned int major, const char *name)
283
{
284
struct blk_major_name **n, *p;
285
int index, ret = 0;
286
287
mutex_lock(&block_class_lock);
288
289
/* temporary */
290
if (major == 0) {
291
for (index = ARRAY_SIZE(major_names)-1; index > 0; index--) {
292
if (major_names[index] == NULL)
293
break;
294
}
295
296
if (index == 0) {
297
printk("register_blkdev: failed to get major for %s\n",
298
name);
299
ret = -EBUSY;
300
goto out;
301
}
302
major = index;
303
ret = major;
304
}
305
306
p = kmalloc(sizeof(struct blk_major_name), GFP_KERNEL);
307
if (p == NULL) {
308
ret = -ENOMEM;
309
goto out;
310
}
311
312
p->major = major;
313
strlcpy(p->name, name, sizeof(p->name));
314
p->next = NULL;
315
index = major_to_index(major);
316
317
for (n = &major_names[index]; *n; n = &(*n)->next) {
318
if ((*n)->major == major)
319
break;
320
}
321
if (!*n)
322
*n = p;
323
else
324
ret = -EBUSY;
325
326
if (ret < 0) {
327
printk("register_blkdev: cannot get major %d for %s\n",
328
major, name);
329
kfree(p);
330
}
331
out:
332
mutex_unlock(&block_class_lock);
333
return ret;
334
}
335
336
EXPORT_SYMBOL(register_blkdev);
337
338
void unregister_blkdev(unsigned int major, const char *name)
339
{
340
struct blk_major_name **n;
341
struct blk_major_name *p = NULL;
342
int index = major_to_index(major);
343
344
mutex_lock(&block_class_lock);
345
for (n = &major_names[index]; *n; n = &(*n)->next)
346
if ((*n)->major == major)
347
break;
348
if (!*n || strcmp((*n)->name, name)) {
349
WARN_ON(1);
350
} else {
351
p = *n;
352
*n = p->next;
353
}
354
mutex_unlock(&block_class_lock);
355
kfree(p);
356
}
357
358
EXPORT_SYMBOL(unregister_blkdev);
359
360
static struct kobj_map *bdev_map;
361
362
/**
363
* blk_mangle_minor - scatter minor numbers apart
364
* @minor: minor number to mangle
365
*
366
* Scatter consecutively allocated @minor number apart if MANGLE_DEVT
367
* is enabled. Mangling twice gives the original value.
368
*
369
* RETURNS:
370
* Mangled value.
371
*
372
* CONTEXT:
373
* Don't care.
374
*/
375
static int blk_mangle_minor(int minor)
376
{
377
#ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT
378
int i;
379
380
for (i = 0; i < MINORBITS / 2; i++) {
381
int low = minor & (1 << i);
382
int high = minor & (1 << (MINORBITS - 1 - i));
383
int distance = MINORBITS - 1 - 2 * i;
384
385
minor ^= low | high; /* clear both bits */
386
low <<= distance; /* swap the positions */
387
high >>= distance;
388
minor |= low | high; /* and set */
389
}
390
#endif
391
return minor;
392
}
393
394
/**
395
* blk_alloc_devt - allocate a dev_t for a partition
396
* @part: partition to allocate dev_t for
397
* @devt: out parameter for resulting dev_t
398
*
399
* Allocate a dev_t for block device.
400
*
401
* RETURNS:
402
* 0 on success, allocated dev_t is returned in *@devt. -errno on
403
* failure.
404
*
405
* CONTEXT:
406
* Might sleep.
407
*/
408
int blk_alloc_devt(struct hd_struct *part, dev_t *devt)
409
{
410
struct gendisk *disk = part_to_disk(part);
411
int idx, rc;
412
413
/* in consecutive minor range? */
414
if (part->partno < disk->minors) {
415
*devt = MKDEV(disk->major, disk->first_minor + part->partno);
416
return 0;
417
}
418
419
/* allocate ext devt */
420
do {
421
if (!idr_pre_get(&ext_devt_idr, GFP_KERNEL))
422
return -ENOMEM;
423
rc = idr_get_new(&ext_devt_idr, part, &idx);
424
} while (rc == -EAGAIN);
425
426
if (rc)
427
return rc;
428
429
if (idx > MAX_EXT_DEVT) {
430
idr_remove(&ext_devt_idr, idx);
431
return -EBUSY;
432
}
433
434
*devt = MKDEV(BLOCK_EXT_MAJOR, blk_mangle_minor(idx));
435
return 0;
436
}
437
438
/**
439
* blk_free_devt - free a dev_t
440
* @devt: dev_t to free
441
*
442
* Free @devt which was allocated using blk_alloc_devt().
443
*
444
* CONTEXT:
445
* Might sleep.
446
*/
447
void blk_free_devt(dev_t devt)
448
{
449
might_sleep();
450
451
if (devt == MKDEV(0, 0))
452
return;
453
454
if (MAJOR(devt) == BLOCK_EXT_MAJOR) {
455
mutex_lock(&ext_devt_mutex);
456
idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
457
mutex_unlock(&ext_devt_mutex);
458
}
459
}
460
461
static char *bdevt_str(dev_t devt, char *buf)
462
{
463
if (MAJOR(devt) <= 0xff && MINOR(devt) <= 0xff) {
464
char tbuf[BDEVT_SIZE];
465
snprintf(tbuf, BDEVT_SIZE, "%02x%02x", MAJOR(devt), MINOR(devt));
466
snprintf(buf, BDEVT_SIZE, "%-9s", tbuf);
467
} else
468
snprintf(buf, BDEVT_SIZE, "%03x:%05x", MAJOR(devt), MINOR(devt));
469
470
return buf;
471
}
472
473
/*
474
* Register device numbers dev..(dev+range-1)
475
* range must be nonzero
476
* The hash chain is sorted on range, so that subranges can override.
477
*/
478
void blk_register_region(dev_t devt, unsigned long range, struct module *module,
479
struct kobject *(*probe)(dev_t, int *, void *),
480
int (*lock)(dev_t, void *), void *data)
481
{
482
kobj_map(bdev_map, devt, range, module, probe, lock, data);
483
}
484
485
EXPORT_SYMBOL(blk_register_region);
486
487
void blk_unregister_region(dev_t devt, unsigned long range)
488
{
489
kobj_unmap(bdev_map, devt, range);
490
}
491
492
EXPORT_SYMBOL(blk_unregister_region);
493
494
static struct kobject *exact_match(dev_t devt, int *partno, void *data)
495
{
496
struct gendisk *p = data;
497
498
return &disk_to_dev(p)->kobj;
499
}
500
501
static int exact_lock(dev_t devt, void *data)
502
{
503
struct gendisk *p = data;
504
505
if (!get_disk(p))
506
return -1;
507
return 0;
508
}
509
510
void register_disk(struct gendisk *disk)
511
{
512
struct device *ddev = disk_to_dev(disk);
513
struct block_device *bdev;
514
struct disk_part_iter piter;
515
struct hd_struct *part;
516
int err;
517
518
ddev->parent = disk->driverfs_dev;
519
520
dev_set_name(ddev, disk->disk_name);
521
522
/* delay uevents, until we scanned partition table */
523
dev_set_uevent_suppress(ddev, 1);
524
525
if (device_add(ddev))
526
return;
527
if (!sysfs_deprecated) {
528
err = sysfs_create_link(block_depr, &ddev->kobj,
529
kobject_name(&ddev->kobj));
530
if (err) {
531
device_del(ddev);
532
return;
533
}
534
}
535
disk->part0.holder_dir = kobject_create_and_add("holders", &ddev->kobj);
536
disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
537
538
/* No minors to use for partitions */
539
if (!disk_partitionable(disk))
540
goto exit;
541
542
/* No such device (e.g., media were just removed) */
543
if (!get_capacity(disk))
544
goto exit;
545
546
bdev = bdget_disk(disk, 0);
547
if (!bdev)
548
goto exit;
549
550
bdev->bd_invalidated = 1;
551
err = blkdev_get(bdev, FMODE_READ, NULL);
552
if (err < 0)
553
goto exit;
554
blkdev_put(bdev, FMODE_READ);
555
556
exit:
557
/* announce disk after possible partitions are created */
558
dev_set_uevent_suppress(ddev, 0);
559
kobject_uevent(&ddev->kobj, KOBJ_ADD);
560
561
/* announce possible partitions */
562
disk_part_iter_init(&piter, disk, 0);
563
while ((part = disk_part_iter_next(&piter)))
564
kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD);
565
disk_part_iter_exit(&piter);
566
}
567
568
/**
569
* add_disk - add partitioning information to kernel list
570
* @disk: per-device partitioning information
571
*
572
* This function registers the partitioning information in @disk
573
* with the kernel.
574
*
575
* FIXME: error handling
576
*/
577
void add_disk(struct gendisk *disk)
578
{
579
struct backing_dev_info *bdi;
580
dev_t devt;
581
int retval;
582
583
/* minors == 0 indicates to use ext devt from part0 and should
584
* be accompanied with EXT_DEVT flag. Make sure all
585
* parameters make sense.
586
*/
587
WARN_ON(disk->minors && !(disk->major || disk->first_minor));
588
WARN_ON(!disk->minors && !(disk->flags & GENHD_FL_EXT_DEVT));
589
590
disk->flags |= GENHD_FL_UP;
591
592
retval = blk_alloc_devt(&disk->part0, &devt);
593
if (retval) {
594
WARN_ON(1);
595
return;
596
}
597
disk_to_dev(disk)->devt = devt;
598
599
/* ->major and ->first_minor aren't supposed to be
600
* dereferenced from here on, but set them just in case.
601
*/
602
disk->major = MAJOR(devt);
603
disk->first_minor = MINOR(devt);
604
605
/* Register BDI before referencing it from bdev */
606
bdi = &disk->queue->backing_dev_info;
607
bdi_register_dev(bdi, disk_devt(disk));
608
609
blk_register_region(disk_devt(disk), disk->minors, NULL,
610
exact_match, exact_lock, disk);
611
register_disk(disk);
612
blk_register_queue(disk);
613
614
retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj,
615
"bdi");
616
WARN_ON(retval);
617
618
disk_add_events(disk);
619
}
620
EXPORT_SYMBOL(add_disk);
621
622
void del_gendisk(struct gendisk *disk)
623
{
624
struct disk_part_iter piter;
625
struct hd_struct *part;
626
627
disk_del_events(disk);
628
629
/* invalidate stuff */
630
disk_part_iter_init(&piter, disk,
631
DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
632
while ((part = disk_part_iter_next(&piter))) {
633
invalidate_partition(disk, part->partno);
634
delete_partition(disk, part->partno);
635
}
636
disk_part_iter_exit(&piter);
637
638
invalidate_partition(disk, 0);
639
blk_free_devt(disk_to_dev(disk)->devt);
640
set_capacity(disk, 0);
641
disk->flags &= ~GENHD_FL_UP;
642
643
sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
644
bdi_unregister(&disk->queue->backing_dev_info);
645
blk_unregister_queue(disk);
646
blk_unregister_region(disk_devt(disk), disk->minors);
647
648
part_stat_set_all(&disk->part0, 0);
649
disk->part0.stamp = 0;
650
651
kobject_put(disk->part0.holder_dir);
652
kobject_put(disk->slave_dir);
653
disk->driverfs_dev = NULL;
654
if (!sysfs_deprecated)
655
sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
656
device_del(disk_to_dev(disk));
657
}
658
EXPORT_SYMBOL(del_gendisk);
659
660
/**
661
* get_gendisk - get partitioning information for a given device
662
* @devt: device to get partitioning information for
663
* @partno: returned partition index
664
*
665
* This function gets the structure containing partitioning
666
* information for the given device @devt.
667
*/
668
struct gendisk *get_gendisk(dev_t devt, int *partno)
669
{
670
struct gendisk *disk = NULL;
671
672
if (MAJOR(devt) != BLOCK_EXT_MAJOR) {
673
struct kobject *kobj;
674
675
kobj = kobj_lookup(bdev_map, devt, partno);
676
if (kobj)
677
disk = dev_to_disk(kobj_to_dev(kobj));
678
} else {
679
struct hd_struct *part;
680
681
mutex_lock(&ext_devt_mutex);
682
part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
683
if (part && get_disk(part_to_disk(part))) {
684
*partno = part->partno;
685
disk = part_to_disk(part);
686
}
687
mutex_unlock(&ext_devt_mutex);
688
}
689
690
return disk;
691
}
692
EXPORT_SYMBOL(get_gendisk);
693
694
/**
695
* bdget_disk - do bdget() by gendisk and partition number
696
* @disk: gendisk of interest
697
* @partno: partition number
698
*
699
* Find partition @partno from @disk, do bdget() on it.
700
*
701
* CONTEXT:
702
* Don't care.
703
*
704
* RETURNS:
705
* Resulting block_device on success, NULL on failure.
706
*/
707
struct block_device *bdget_disk(struct gendisk *disk, int partno)
708
{
709
struct hd_struct *part;
710
struct block_device *bdev = NULL;
711
712
part = disk_get_part(disk, partno);
713
if (part)
714
bdev = bdget(part_devt(part));
715
disk_put_part(part);
716
717
return bdev;
718
}
719
EXPORT_SYMBOL(bdget_disk);
720
721
/*
722
* print a full list of all partitions - intended for places where the root
723
* filesystem can't be mounted and thus to give the victim some idea of what
724
* went wrong
725
*/
726
void __init printk_all_partitions(void)
727
{
728
struct class_dev_iter iter;
729
struct device *dev;
730
731
class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
732
while ((dev = class_dev_iter_next(&iter))) {
733
struct gendisk *disk = dev_to_disk(dev);
734
struct disk_part_iter piter;
735
struct hd_struct *part;
736
char name_buf[BDEVNAME_SIZE];
737
char devt_buf[BDEVT_SIZE];
738
u8 uuid[PARTITION_META_INFO_UUIDLTH * 2 + 1];
739
740
/*
741
* Don't show empty devices or things that have been
742
* suppressed
743
*/
744
if (get_capacity(disk) == 0 ||
745
(disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO))
746
continue;
747
748
/*
749
* Note, unlike /proc/partitions, I am showing the
750
* numbers in hex - the same format as the root=
751
* option takes.
752
*/
753
disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0);
754
while ((part = disk_part_iter_next(&piter))) {
755
bool is_part0 = part == &disk->part0;
756
757
uuid[0] = 0;
758
if (part->info)
759
part_unpack_uuid(part->info->uuid, uuid);
760
761
printk("%s%s %10llu %s %s", is_part0 ? "" : " ",
762
bdevt_str(part_devt(part), devt_buf),
763
(unsigned long long)part->nr_sects >> 1,
764
disk_name(disk, part->partno, name_buf), uuid);
765
if (is_part0) {
766
if (disk->driverfs_dev != NULL &&
767
disk->driverfs_dev->driver != NULL)
768
printk(" driver: %s\n",
769
disk->driverfs_dev->driver->name);
770
else
771
printk(" (driver?)\n");
772
} else
773
printk("\n");
774
}
775
disk_part_iter_exit(&piter);
776
}
777
class_dev_iter_exit(&iter);
778
}
779
780
#ifdef CONFIG_PROC_FS
781
/* iterator */
782
static void *disk_seqf_start(struct seq_file *seqf, loff_t *pos)
783
{
784
loff_t skip = *pos;
785
struct class_dev_iter *iter;
786
struct device *dev;
787
788
iter = kmalloc(sizeof(*iter), GFP_KERNEL);
789
if (!iter)
790
return ERR_PTR(-ENOMEM);
791
792
seqf->private = iter;
793
class_dev_iter_init(iter, &block_class, NULL, &disk_type);
794
do {
795
dev = class_dev_iter_next(iter);
796
if (!dev)
797
return NULL;
798
} while (skip--);
799
800
return dev_to_disk(dev);
801
}
802
803
static void *disk_seqf_next(struct seq_file *seqf, void *v, loff_t *pos)
804
{
805
struct device *dev;
806
807
(*pos)++;
808
dev = class_dev_iter_next(seqf->private);
809
if (dev)
810
return dev_to_disk(dev);
811
812
return NULL;
813
}
814
815
static void disk_seqf_stop(struct seq_file *seqf, void *v)
816
{
817
struct class_dev_iter *iter = seqf->private;
818
819
/* stop is called even after start failed :-( */
820
if (iter) {
821
class_dev_iter_exit(iter);
822
kfree(iter);
823
}
824
}
825
826
static void *show_partition_start(struct seq_file *seqf, loff_t *pos)
827
{
828
static void *p;
829
830
p = disk_seqf_start(seqf, pos);
831
if (!IS_ERR_OR_NULL(p) && !*pos)
832
seq_puts(seqf, "major minor #blocks name\n\n");
833
return p;
834
}
835
836
static int show_partition(struct seq_file *seqf, void *v)
837
{
838
struct gendisk *sgp = v;
839
struct disk_part_iter piter;
840
struct hd_struct *part;
841
char buf[BDEVNAME_SIZE];
842
843
/* Don't show non-partitionable removeable devices or empty devices */
844
if (!get_capacity(sgp) || (!disk_partitionable(sgp) &&
845
(sgp->flags & GENHD_FL_REMOVABLE)))
846
return 0;
847
if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)
848
return 0;
849
850
/* show the full disk and all non-0 size partitions of it */
851
disk_part_iter_init(&piter, sgp, DISK_PITER_INCL_PART0);
852
while ((part = disk_part_iter_next(&piter)))
853
seq_printf(seqf, "%4d %7d %10llu %s\n",
854
MAJOR(part_devt(part)), MINOR(part_devt(part)),
855
(unsigned long long)part->nr_sects >> 1,
856
disk_name(sgp, part->partno, buf));
857
disk_part_iter_exit(&piter);
858
859
return 0;
860
}
861
862
static const struct seq_operations partitions_op = {
863
.start = show_partition_start,
864
.next = disk_seqf_next,
865
.stop = disk_seqf_stop,
866
.show = show_partition
867
};
868
869
static int partitions_open(struct inode *inode, struct file *file)
870
{
871
return seq_open(file, &partitions_op);
872
}
873
874
static const struct file_operations proc_partitions_operations = {
875
.open = partitions_open,
876
.read = seq_read,
877
.llseek = seq_lseek,
878
.release = seq_release,
879
};
880
#endif
881
882
883
static struct kobject *base_probe(dev_t devt, int *partno, void *data)
884
{
885
if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0)
886
/* Make old-style 2.4 aliases work */
887
request_module("block-major-%d", MAJOR(devt));
888
return NULL;
889
}
890
891
static int __init genhd_device_init(void)
892
{
893
int error;
894
895
block_class.dev_kobj = sysfs_dev_block_kobj;
896
error = class_register(&block_class);
897
if (unlikely(error))
898
return error;
899
bdev_map = kobj_map_init(base_probe, &block_class_lock);
900
blk_dev_init();
901
902
register_blkdev(BLOCK_EXT_MAJOR, "blkext");
903
904
/* create top-level block dir */
905
if (!sysfs_deprecated)
906
block_depr = kobject_create_and_add("block", NULL);
907
return 0;
908
}
909
910
subsys_initcall(genhd_device_init);
911
912
static ssize_t disk_range_show(struct device *dev,
913
struct device_attribute *attr, char *buf)
914
{
915
struct gendisk *disk = dev_to_disk(dev);
916
917
return sprintf(buf, "%d\n", disk->minors);
918
}
919
920
static ssize_t disk_ext_range_show(struct device *dev,
921
struct device_attribute *attr, char *buf)
922
{
923
struct gendisk *disk = dev_to_disk(dev);
924
925
return sprintf(buf, "%d\n", disk_max_parts(disk));
926
}
927
928
static ssize_t disk_removable_show(struct device *dev,
929
struct device_attribute *attr, char *buf)
930
{
931
struct gendisk *disk = dev_to_disk(dev);
932
933
return sprintf(buf, "%d\n",
934
(disk->flags & GENHD_FL_REMOVABLE ? 1 : 0));
935
}
936
937
static ssize_t disk_ro_show(struct device *dev,
938
struct device_attribute *attr, char *buf)
939
{
940
struct gendisk *disk = dev_to_disk(dev);
941
942
return sprintf(buf, "%d\n", get_disk_ro(disk) ? 1 : 0);
943
}
944
945
static ssize_t disk_capability_show(struct device *dev,
946
struct device_attribute *attr, char *buf)
947
{
948
struct gendisk *disk = dev_to_disk(dev);
949
950
return sprintf(buf, "%x\n", disk->flags);
951
}
952
953
static ssize_t disk_alignment_offset_show(struct device *dev,
954
struct device_attribute *attr,
955
char *buf)
956
{
957
struct gendisk *disk = dev_to_disk(dev);
958
959
return sprintf(buf, "%d\n", queue_alignment_offset(disk->queue));
960
}
961
962
static ssize_t disk_discard_alignment_show(struct device *dev,
963
struct device_attribute *attr,
964
char *buf)
965
{
966
struct gendisk *disk = dev_to_disk(dev);
967
968
return sprintf(buf, "%d\n", queue_discard_alignment(disk->queue));
969
}
970
971
static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
972
static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL);
973
static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
974
static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL);
975
static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
976
static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL);
977
static DEVICE_ATTR(discard_alignment, S_IRUGO, disk_discard_alignment_show,
978
NULL);
979
static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
980
static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
981
static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
982
#ifdef CONFIG_FAIL_MAKE_REQUEST
983
static struct device_attribute dev_attr_fail =
984
__ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
985
#endif
986
#ifdef CONFIG_FAIL_IO_TIMEOUT
987
static struct device_attribute dev_attr_fail_timeout =
988
__ATTR(io-timeout-fail, S_IRUGO|S_IWUSR, part_timeout_show,
989
part_timeout_store);
990
#endif
991
992
static struct attribute *disk_attrs[] = {
993
&dev_attr_range.attr,
994
&dev_attr_ext_range.attr,
995
&dev_attr_removable.attr,
996
&dev_attr_ro.attr,
997
&dev_attr_size.attr,
998
&dev_attr_alignment_offset.attr,
999
&dev_attr_discard_alignment.attr,
1000
&dev_attr_capability.attr,
1001
&dev_attr_stat.attr,
1002
&dev_attr_inflight.attr,
1003
#ifdef CONFIG_FAIL_MAKE_REQUEST
1004
&dev_attr_fail.attr,
1005
#endif
1006
#ifdef CONFIG_FAIL_IO_TIMEOUT
1007
&dev_attr_fail_timeout.attr,
1008
#endif
1009
NULL
1010
};
1011
1012
static struct attribute_group disk_attr_group = {
1013
.attrs = disk_attrs,
1014
};
1015
1016
static const struct attribute_group *disk_attr_groups[] = {
1017
&disk_attr_group,
1018
NULL
1019
};
1020
1021
static void disk_free_ptbl_rcu_cb(struct rcu_head *head)
1022
{
1023
struct disk_part_tbl *ptbl =
1024
container_of(head, struct disk_part_tbl, rcu_head);
1025
1026
kfree(ptbl);
1027
}
1028
1029
/**
1030
* disk_replace_part_tbl - replace disk->part_tbl in RCU-safe way
1031
* @disk: disk to replace part_tbl for
1032
* @new_ptbl: new part_tbl to install
1033
*
1034
* Replace disk->part_tbl with @new_ptbl in RCU-safe way. The
1035
* original ptbl is freed using RCU callback.
1036
*
1037
* LOCKING:
1038
* Matching bd_mutx locked.
1039
*/
1040
static void disk_replace_part_tbl(struct gendisk *disk,
1041
struct disk_part_tbl *new_ptbl)
1042
{
1043
struct disk_part_tbl *old_ptbl = disk->part_tbl;
1044
1045
rcu_assign_pointer(disk->part_tbl, new_ptbl);
1046
1047
if (old_ptbl) {
1048
rcu_assign_pointer(old_ptbl->last_lookup, NULL);
1049
call_rcu(&old_ptbl->rcu_head, disk_free_ptbl_rcu_cb);
1050
}
1051
}
1052
1053
/**
1054
* disk_expand_part_tbl - expand disk->part_tbl
1055
* @disk: disk to expand part_tbl for
1056
* @partno: expand such that this partno can fit in
1057
*
1058
* Expand disk->part_tbl such that @partno can fit in. disk->part_tbl
1059
* uses RCU to allow unlocked dereferencing for stats and other stuff.
1060
*
1061
* LOCKING:
1062
* Matching bd_mutex locked, might sleep.
1063
*
1064
* RETURNS:
1065
* 0 on success, -errno on failure.
1066
*/
1067
int disk_expand_part_tbl(struct gendisk *disk, int partno)
1068
{
1069
struct disk_part_tbl *old_ptbl = disk->part_tbl;
1070
struct disk_part_tbl *new_ptbl;
1071
int len = old_ptbl ? old_ptbl->len : 0;
1072
int target = partno + 1;
1073
size_t size;
1074
int i;
1075
1076
/* disk_max_parts() is zero during initialization, ignore if so */
1077
if (disk_max_parts(disk) && target > disk_max_parts(disk))
1078
return -EINVAL;
1079
1080
if (target <= len)
1081
return 0;
1082
1083
size = sizeof(*new_ptbl) + target * sizeof(new_ptbl->part[0]);
1084
new_ptbl = kzalloc_node(size, GFP_KERNEL, disk->node_id);
1085
if (!new_ptbl)
1086
return -ENOMEM;
1087
1088
new_ptbl->len = target;
1089
1090
for (i = 0; i < len; i++)
1091
rcu_assign_pointer(new_ptbl->part[i], old_ptbl->part[i]);
1092
1093
disk_replace_part_tbl(disk, new_ptbl);
1094
return 0;
1095
}
1096
1097
static void disk_release(struct device *dev)
1098
{
1099
struct gendisk *disk = dev_to_disk(dev);
1100
1101
disk_release_events(disk);
1102
kfree(disk->random);
1103
disk_replace_part_tbl(disk, NULL);
1104
free_part_stats(&disk->part0);
1105
free_part_info(&disk->part0);
1106
kfree(disk);
1107
}
1108
struct class block_class = {
1109
.name = "block",
1110
};
1111
1112
static char *block_devnode(struct device *dev, mode_t *mode)
1113
{
1114
struct gendisk *disk = dev_to_disk(dev);
1115
1116
if (disk->devnode)
1117
return disk->devnode(disk, mode);
1118
return NULL;
1119
}
1120
1121
static struct device_type disk_type = {
1122
.name = "disk",
1123
.groups = disk_attr_groups,
1124
.release = disk_release,
1125
.devnode = block_devnode,
1126
};
1127
1128
#ifdef CONFIG_PROC_FS
1129
/*
1130
* aggregate disk stat collector. Uses the same stats that the sysfs
1131
* entries do, above, but makes them available through one seq_file.
1132
*
1133
* The output looks suspiciously like /proc/partitions with a bunch of
1134
* extra fields.
1135
*/
1136
static int diskstats_show(struct seq_file *seqf, void *v)
1137
{
1138
struct gendisk *gp = v;
1139
struct disk_part_iter piter;
1140
struct hd_struct *hd;
1141
char buf[BDEVNAME_SIZE];
1142
int cpu;
1143
1144
/*
1145
if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next)
1146
seq_puts(seqf, "major minor name"
1147
" rio rmerge rsect ruse wio wmerge "
1148
"wsect wuse running use aveq"
1149
"\n\n");
1150
*/
1151
1152
disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0);
1153
while ((hd = disk_part_iter_next(&piter))) {
1154
cpu = part_stat_lock();
1155
part_round_stats(cpu, hd);
1156
part_stat_unlock();
1157
seq_printf(seqf, "%4d %7d %s %lu %lu %llu "
1158
"%u %lu %lu %llu %u %u %u %u\n",
1159
MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
1160
disk_name(gp, hd->partno, buf),
1161
part_stat_read(hd, ios[READ]),
1162
part_stat_read(hd, merges[READ]),
1163
(unsigned long long)part_stat_read(hd, sectors[READ]),
1164
jiffies_to_msecs(part_stat_read(hd, ticks[READ])),
1165
part_stat_read(hd, ios[WRITE]),
1166
part_stat_read(hd, merges[WRITE]),
1167
(unsigned long long)part_stat_read(hd, sectors[WRITE]),
1168
jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])),
1169
part_in_flight(hd),
1170
jiffies_to_msecs(part_stat_read(hd, io_ticks)),
1171
jiffies_to_msecs(part_stat_read(hd, time_in_queue))
1172
);
1173
}
1174
disk_part_iter_exit(&piter);
1175
1176
return 0;
1177
}
1178
1179
static const struct seq_operations diskstats_op = {
1180
.start = disk_seqf_start,
1181
.next = disk_seqf_next,
1182
.stop = disk_seqf_stop,
1183
.show = diskstats_show
1184
};
1185
1186
static int diskstats_open(struct inode *inode, struct file *file)
1187
{
1188
return seq_open(file, &diskstats_op);
1189
}
1190
1191
static const struct file_operations proc_diskstats_operations = {
1192
.open = diskstats_open,
1193
.read = seq_read,
1194
.llseek = seq_lseek,
1195
.release = seq_release,
1196
};
1197
1198
static int __init proc_genhd_init(void)
1199
{
1200
proc_create("diskstats", 0, NULL, &proc_diskstats_operations);
1201
proc_create("partitions", 0, NULL, &proc_partitions_operations);
1202
return 0;
1203
}
1204
module_init(proc_genhd_init);
1205
#endif /* CONFIG_PROC_FS */
1206
1207
dev_t blk_lookup_devt(const char *name, int partno)
1208
{
1209
dev_t devt = MKDEV(0, 0);
1210
struct class_dev_iter iter;
1211
struct device *dev;
1212
1213
class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
1214
while ((dev = class_dev_iter_next(&iter))) {
1215
struct gendisk *disk = dev_to_disk(dev);
1216
struct hd_struct *part;
1217
1218
if (strcmp(dev_name(dev), name))
1219
continue;
1220
1221
if (partno < disk->minors) {
1222
/* We need to return the right devno, even
1223
* if the partition doesn't exist yet.
1224
*/
1225
devt = MKDEV(MAJOR(dev->devt),
1226
MINOR(dev->devt) + partno);
1227
break;
1228
}
1229
part = disk_get_part(disk, partno);
1230
if (part) {
1231
devt = part_devt(part);
1232
disk_put_part(part);
1233
break;
1234
}
1235
disk_put_part(part);
1236
}
1237
class_dev_iter_exit(&iter);
1238
return devt;
1239
}
1240
EXPORT_SYMBOL(blk_lookup_devt);
1241
1242
struct gendisk *alloc_disk(int minors)
1243
{
1244
return alloc_disk_node(minors, -1);
1245
}
1246
EXPORT_SYMBOL(alloc_disk);
1247
1248
struct gendisk *alloc_disk_node(int minors, int node_id)
1249
{
1250
struct gendisk *disk;
1251
1252
disk = kmalloc_node(sizeof(struct gendisk),
1253
GFP_KERNEL | __GFP_ZERO, node_id);
1254
if (disk) {
1255
if (!init_part_stats(&disk->part0)) {
1256
kfree(disk);
1257
return NULL;
1258
}
1259
disk->node_id = node_id;
1260
if (disk_expand_part_tbl(disk, 0)) {
1261
free_part_stats(&disk->part0);
1262
kfree(disk);
1263
return NULL;
1264
}
1265
disk->part_tbl->part[0] = &disk->part0;
1266
1267
hd_ref_init(&disk->part0);
1268
1269
disk->minors = minors;
1270
rand_initialize_disk(disk);
1271
disk_to_dev(disk)->class = &block_class;
1272
disk_to_dev(disk)->type = &disk_type;
1273
device_initialize(disk_to_dev(disk));
1274
}
1275
return disk;
1276
}
1277
EXPORT_SYMBOL(alloc_disk_node);
1278
1279
struct kobject *get_disk(struct gendisk *disk)
1280
{
1281
struct module *owner;
1282
struct kobject *kobj;
1283
1284
if (!disk->fops)
1285
return NULL;
1286
owner = disk->fops->owner;
1287
if (owner && !try_module_get(owner))
1288
return NULL;
1289
kobj = kobject_get(&disk_to_dev(disk)->kobj);
1290
if (kobj == NULL) {
1291
module_put(owner);
1292
return NULL;
1293
}
1294
return kobj;
1295
1296
}
1297
1298
EXPORT_SYMBOL(get_disk);
1299
1300
void put_disk(struct gendisk *disk)
1301
{
1302
if (disk)
1303
kobject_put(&disk_to_dev(disk)->kobj);
1304
}
1305
1306
EXPORT_SYMBOL(put_disk);
1307
1308
static void set_disk_ro_uevent(struct gendisk *gd, int ro)
1309
{
1310
char event[] = "DISK_RO=1";
1311
char *envp[] = { event, NULL };
1312
1313
if (!ro)
1314
event[8] = '0';
1315
kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp);
1316
}
1317
1318
void set_device_ro(struct block_device *bdev, int flag)
1319
{
1320
bdev->bd_part->policy = flag;
1321
}
1322
1323
EXPORT_SYMBOL(set_device_ro);
1324
1325
void set_disk_ro(struct gendisk *disk, int flag)
1326
{
1327
struct disk_part_iter piter;
1328
struct hd_struct *part;
1329
1330
if (disk->part0.policy != flag) {
1331
set_disk_ro_uevent(disk, flag);
1332
disk->part0.policy = flag;
1333
}
1334
1335
disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY);
1336
while ((part = disk_part_iter_next(&piter)))
1337
part->policy = flag;
1338
disk_part_iter_exit(&piter);
1339
}
1340
1341
EXPORT_SYMBOL(set_disk_ro);
1342
1343
int bdev_read_only(struct block_device *bdev)
1344
{
1345
if (!bdev)
1346
return 0;
1347
return bdev->bd_part->policy;
1348
}
1349
1350
EXPORT_SYMBOL(bdev_read_only);
1351
1352
int invalidate_partition(struct gendisk *disk, int partno)
1353
{
1354
int res = 0;
1355
struct block_device *bdev = bdget_disk(disk, partno);
1356
if (bdev) {
1357
fsync_bdev(bdev);
1358
res = __invalidate_device(bdev, true);
1359
bdput(bdev);
1360
}
1361
return res;
1362
}
1363
1364
EXPORT_SYMBOL(invalidate_partition);
1365
1366
/*
1367
* Disk events - monitor disk events like media change and eject request.
1368
*/
1369
struct disk_events {
1370
struct list_head node; /* all disk_event's */
1371
struct gendisk *disk; /* the associated disk */
1372
spinlock_t lock;
1373
1374
struct mutex block_mutex; /* protects blocking */
1375
int block; /* event blocking depth */
1376
unsigned int pending; /* events already sent out */
1377
unsigned int clearing; /* events being cleared */
1378
1379
long poll_msecs; /* interval, -1 for default */
1380
struct delayed_work dwork;
1381
};
1382
1383
static const char *disk_events_strs[] = {
1384
[ilog2(DISK_EVENT_MEDIA_CHANGE)] = "media_change",
1385
[ilog2(DISK_EVENT_EJECT_REQUEST)] = "eject_request",
1386
};
1387
1388
static char *disk_uevents[] = {
1389
[ilog2(DISK_EVENT_MEDIA_CHANGE)] = "DISK_MEDIA_CHANGE=1",
1390
[ilog2(DISK_EVENT_EJECT_REQUEST)] = "DISK_EJECT_REQUEST=1",
1391
};
1392
1393
/* list of all disk_events */
1394
static DEFINE_MUTEX(disk_events_mutex);
1395
static LIST_HEAD(disk_events);
1396
1397
/* disable in-kernel polling by default */
1398
static unsigned long disk_events_dfl_poll_msecs = 0;
1399
1400
static unsigned long disk_events_poll_jiffies(struct gendisk *disk)
1401
{
1402
struct disk_events *ev = disk->ev;
1403
long intv_msecs = 0;
1404
1405
/*
1406
* If device-specific poll interval is set, always use it. If
1407
* the default is being used, poll iff there are events which
1408
* can't be monitored asynchronously.
1409
*/
1410
if (ev->poll_msecs >= 0)
1411
intv_msecs = ev->poll_msecs;
1412
else if (disk->events & ~disk->async_events)
1413
intv_msecs = disk_events_dfl_poll_msecs;
1414
1415
return msecs_to_jiffies(intv_msecs);
1416
}
1417
1418
/**
1419
* disk_block_events - block and flush disk event checking
1420
* @disk: disk to block events for
1421
*
1422
* On return from this function, it is guaranteed that event checking
1423
* isn't in progress and won't happen until unblocked by
1424
* disk_unblock_events(). Events blocking is counted and the actual
1425
* unblocking happens after the matching number of unblocks are done.
1426
*
1427
* Note that this intentionally does not block event checking from
1428
* disk_clear_events().
1429
*
1430
* CONTEXT:
1431
* Might sleep.
1432
*/
1433
void disk_block_events(struct gendisk *disk)
1434
{
1435
struct disk_events *ev = disk->ev;
1436
unsigned long flags;
1437
bool cancel;
1438
1439
if (!ev)
1440
return;
1441
1442
/*
1443
* Outer mutex ensures that the first blocker completes canceling
1444
* the event work before further blockers are allowed to finish.
1445
*/
1446
mutex_lock(&ev->block_mutex);
1447
1448
spin_lock_irqsave(&ev->lock, flags);
1449
cancel = !ev->block++;
1450
spin_unlock_irqrestore(&ev->lock, flags);
1451
1452
if (cancel)
1453
cancel_delayed_work_sync(&disk->ev->dwork);
1454
1455
mutex_unlock(&ev->block_mutex);
1456
}
1457
1458
static void __disk_unblock_events(struct gendisk *disk, bool check_now)
1459
{
1460
struct disk_events *ev = disk->ev;
1461
unsigned long intv;
1462
unsigned long flags;
1463
1464
spin_lock_irqsave(&ev->lock, flags);
1465
1466
if (WARN_ON_ONCE(ev->block <= 0))
1467
goto out_unlock;
1468
1469
if (--ev->block)
1470
goto out_unlock;
1471
1472
/*
1473
* Not exactly a latency critical operation, set poll timer
1474
* slack to 25% and kick event check.
1475
*/
1476
intv = disk_events_poll_jiffies(disk);
1477
set_timer_slack(&ev->dwork.timer, intv / 4);
1478
if (check_now)
1479
queue_delayed_work(system_nrt_wq, &ev->dwork, 0);
1480
else if (intv)
1481
queue_delayed_work(system_nrt_wq, &ev->dwork, intv);
1482
out_unlock:
1483
spin_unlock_irqrestore(&ev->lock, flags);
1484
}
1485
1486
/**
1487
* disk_unblock_events - unblock disk event checking
1488
* @disk: disk to unblock events for
1489
*
1490
* Undo disk_block_events(). When the block count reaches zero, it
1491
* starts events polling if configured.
1492
*
1493
* CONTEXT:
1494
* Don't care. Safe to call from irq context.
1495
*/
1496
void disk_unblock_events(struct gendisk *disk)
1497
{
1498
if (disk->ev)
1499
__disk_unblock_events(disk, false);
1500
}
1501
1502
/**
1503
* disk_check_events - schedule immediate event checking
1504
* @disk: disk to check events for
1505
*
1506
* Schedule immediate event checking on @disk if not blocked.
1507
*
1508
* CONTEXT:
1509
* Don't care. Safe to call from irq context.
1510
*/
1511
void disk_check_events(struct gendisk *disk)
1512
{
1513
struct disk_events *ev = disk->ev;
1514
unsigned long flags;
1515
1516
if (!ev)
1517
return;
1518
1519
spin_lock_irqsave(&ev->lock, flags);
1520
if (!ev->block) {
1521
cancel_delayed_work(&ev->dwork);
1522
queue_delayed_work(system_nrt_wq, &ev->dwork, 0);
1523
}
1524
spin_unlock_irqrestore(&ev->lock, flags);
1525
}
1526
EXPORT_SYMBOL_GPL(disk_check_events);
1527
1528
/**
1529
* disk_clear_events - synchronously check, clear and return pending events
1530
* @disk: disk to fetch and clear events from
1531
* @mask: mask of events to be fetched and clearted
1532
*
1533
* Disk events are synchronously checked and pending events in @mask
1534
* are cleared and returned. This ignores the block count.
1535
*
1536
* CONTEXT:
1537
* Might sleep.
1538
*/
1539
unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask)
1540
{
1541
const struct block_device_operations *bdops = disk->fops;
1542
struct disk_events *ev = disk->ev;
1543
unsigned int pending;
1544
1545
if (!ev) {
1546
/* for drivers still using the old ->media_changed method */
1547
if ((mask & DISK_EVENT_MEDIA_CHANGE) &&
1548
bdops->media_changed && bdops->media_changed(disk))
1549
return DISK_EVENT_MEDIA_CHANGE;
1550
return 0;
1551
}
1552
1553
/* tell the workfn about the events being cleared */
1554
spin_lock_irq(&ev->lock);
1555
ev->clearing |= mask;
1556
spin_unlock_irq(&ev->lock);
1557
1558
/* uncondtionally schedule event check and wait for it to finish */
1559
disk_block_events(disk);
1560
queue_delayed_work(system_nrt_wq, &ev->dwork, 0);
1561
flush_delayed_work(&ev->dwork);
1562
__disk_unblock_events(disk, false);
1563
1564
/* then, fetch and clear pending events */
1565
spin_lock_irq(&ev->lock);
1566
WARN_ON_ONCE(ev->clearing & mask); /* cleared by workfn */
1567
pending = ev->pending & mask;
1568
ev->pending &= ~mask;
1569
spin_unlock_irq(&ev->lock);
1570
1571
return pending;
1572
}
1573
1574
static void disk_events_workfn(struct work_struct *work)
1575
{
1576
struct delayed_work *dwork = to_delayed_work(work);
1577
struct disk_events *ev = container_of(dwork, struct disk_events, dwork);
1578
struct gendisk *disk = ev->disk;
1579
char *envp[ARRAY_SIZE(disk_uevents) + 1] = { };
1580
unsigned int clearing = ev->clearing;
1581
unsigned int events;
1582
unsigned long intv;
1583
int nr_events = 0, i;
1584
1585
/* check events */
1586
events = disk->fops->check_events(disk, clearing);
1587
1588
/* accumulate pending events and schedule next poll if necessary */
1589
spin_lock_irq(&ev->lock);
1590
1591
events &= ~ev->pending;
1592
ev->pending |= events;
1593
ev->clearing &= ~clearing;
1594
1595
intv = disk_events_poll_jiffies(disk);
1596
if (!ev->block && intv)
1597
queue_delayed_work(system_nrt_wq, &ev->dwork, intv);
1598
1599
spin_unlock_irq(&ev->lock);
1600
1601
/*
1602
* Tell userland about new events. Only the events listed in
1603
* @disk->events are reported. Unlisted events are processed the
1604
* same internally but never get reported to userland.
1605
*/
1606
for (i = 0; i < ARRAY_SIZE(disk_uevents); i++)
1607
if (events & disk->events & (1 << i))
1608
envp[nr_events++] = disk_uevents[i];
1609
1610
if (nr_events)
1611
kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp);
1612
}
1613
1614
/*
1615
* A disk events enabled device has the following sysfs nodes under
1616
* its /sys/block/X/ directory.
1617
*
1618
* events : list of all supported events
1619
* events_async : list of events which can be detected w/o polling
1620
* events_poll_msecs : polling interval, 0: disable, -1: system default
1621
*/
1622
static ssize_t __disk_events_show(unsigned int events, char *buf)
1623
{
1624
const char *delim = "";
1625
ssize_t pos = 0;
1626
int i;
1627
1628
for (i = 0; i < ARRAY_SIZE(disk_events_strs); i++)
1629
if (events & (1 << i)) {
1630
pos += sprintf(buf + pos, "%s%s",
1631
delim, disk_events_strs[i]);
1632
delim = " ";
1633
}
1634
if (pos)
1635
pos += sprintf(buf + pos, "\n");
1636
return pos;
1637
}
1638
1639
static ssize_t disk_events_show(struct device *dev,
1640
struct device_attribute *attr, char *buf)
1641
{
1642
struct gendisk *disk = dev_to_disk(dev);
1643
1644
return __disk_events_show(disk->events, buf);
1645
}
1646
1647
static ssize_t disk_events_async_show(struct device *dev,
1648
struct device_attribute *attr, char *buf)
1649
{
1650
struct gendisk *disk = dev_to_disk(dev);
1651
1652
return __disk_events_show(disk->async_events, buf);
1653
}
1654
1655
static ssize_t disk_events_poll_msecs_show(struct device *dev,
1656
struct device_attribute *attr,
1657
char *buf)
1658
{
1659
struct gendisk *disk = dev_to_disk(dev);
1660
1661
return sprintf(buf, "%ld\n", disk->ev->poll_msecs);
1662
}
1663
1664
static ssize_t disk_events_poll_msecs_store(struct device *dev,
1665
struct device_attribute *attr,
1666
const char *buf, size_t count)
1667
{
1668
struct gendisk *disk = dev_to_disk(dev);
1669
long intv;
1670
1671
if (!count || !sscanf(buf, "%ld", &intv))
1672
return -EINVAL;
1673
1674
if (intv < 0 && intv != -1)
1675
return -EINVAL;
1676
1677
disk_block_events(disk);
1678
disk->ev->poll_msecs = intv;
1679
__disk_unblock_events(disk, true);
1680
1681
return count;
1682
}
1683
1684
static const DEVICE_ATTR(events, S_IRUGO, disk_events_show, NULL);
1685
static const DEVICE_ATTR(events_async, S_IRUGO, disk_events_async_show, NULL);
1686
static const DEVICE_ATTR(events_poll_msecs, S_IRUGO|S_IWUSR,
1687
disk_events_poll_msecs_show,
1688
disk_events_poll_msecs_store);
1689
1690
static const struct attribute *disk_events_attrs[] = {
1691
&dev_attr_events.attr,
1692
&dev_attr_events_async.attr,
1693
&dev_attr_events_poll_msecs.attr,
1694
NULL,
1695
};
1696
1697
/*
1698
* The default polling interval can be specified by the kernel
1699
* parameter block.events_dfl_poll_msecs which defaults to 0
1700
* (disable). This can also be modified runtime by writing to
1701
* /sys/module/block/events_dfl_poll_msecs.
1702
*/
1703
static int disk_events_set_dfl_poll_msecs(const char *val,
1704
const struct kernel_param *kp)
1705
{
1706
struct disk_events *ev;
1707
int ret;
1708
1709
ret = param_set_ulong(val, kp);
1710
if (ret < 0)
1711
return ret;
1712
1713
mutex_lock(&disk_events_mutex);
1714
1715
list_for_each_entry(ev, &disk_events, node)
1716
disk_check_events(ev->disk);
1717
1718
mutex_unlock(&disk_events_mutex);
1719
1720
return 0;
1721
}
1722
1723
static const struct kernel_param_ops disk_events_dfl_poll_msecs_param_ops = {
1724
.set = disk_events_set_dfl_poll_msecs,
1725
.get = param_get_ulong,
1726
};
1727
1728
#undef MODULE_PARAM_PREFIX
1729
#define MODULE_PARAM_PREFIX "block."
1730
1731
module_param_cb(events_dfl_poll_msecs, &disk_events_dfl_poll_msecs_param_ops,
1732
&disk_events_dfl_poll_msecs, 0644);
1733
1734
/*
1735
* disk_{add|del|release}_events - initialize and destroy disk_events.
1736
*/
1737
static void disk_add_events(struct gendisk *disk)
1738
{
1739
struct disk_events *ev;
1740
1741
if (!disk->fops->check_events)
1742
return;
1743
1744
ev = kzalloc(sizeof(*ev), GFP_KERNEL);
1745
if (!ev) {
1746
pr_warn("%s: failed to initialize events\n", disk->disk_name);
1747
return;
1748
}
1749
1750
if (sysfs_create_files(&disk_to_dev(disk)->kobj,
1751
disk_events_attrs) < 0) {
1752
pr_warn("%s: failed to create sysfs files for events\n",
1753
disk->disk_name);
1754
kfree(ev);
1755
return;
1756
}
1757
1758
disk->ev = ev;
1759
1760
INIT_LIST_HEAD(&ev->node);
1761
ev->disk = disk;
1762
spin_lock_init(&ev->lock);
1763
mutex_init(&ev->block_mutex);
1764
ev->block = 1;
1765
ev->poll_msecs = -1;
1766
INIT_DELAYED_WORK(&ev->dwork, disk_events_workfn);
1767
1768
mutex_lock(&disk_events_mutex);
1769
list_add_tail(&ev->node, &disk_events);
1770
mutex_unlock(&disk_events_mutex);
1771
1772
/*
1773
* Block count is initialized to 1 and the following initial
1774
* unblock kicks it into action.
1775
*/
1776
__disk_unblock_events(disk, true);
1777
}
1778
1779
static void disk_del_events(struct gendisk *disk)
1780
{
1781
if (!disk->ev)
1782
return;
1783
1784
disk_block_events(disk);
1785
1786
mutex_lock(&disk_events_mutex);
1787
list_del_init(&disk->ev->node);
1788
mutex_unlock(&disk_events_mutex);
1789
1790
sysfs_remove_files(&disk_to_dev(disk)->kobj, disk_events_attrs);
1791
}
1792
1793
static void disk_release_events(struct gendisk *disk)
1794
{
1795
/* the block count should be 1 from disk_del_events() */
1796
WARN_ON_ONCE(disk->ev && disk->ev->block != 1);
1797
kfree(disk->ev);
1798
}
1799
1800