Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/drivers/block/loop.c
15109 views
1
/*
2
* linux/drivers/block/loop.c
3
*
4
* Written by Theodore Ts'o, 3/29/93
5
*
6
* Copyright 1993 by Theodore Ts'o. Redistribution of this file is
7
* permitted under the GNU General Public License.
8
*
9
* DES encryption plus some minor changes by Werner Almesberger, 30-MAY-1993
10
* more DES encryption plus IDEA encryption by Nicholas J. Leon, June 20, 1996
11
*
12
* Modularized and updated for 1.1.16 kernel - Mitch Dsouza 28th May 1994
13
* Adapted for 1.3.59 kernel - Andries Brouwer, 1 Feb 1996
14
*
15
* Fixed do_loop_request() re-entrancy - [email protected] Mar 20, 1997
16
*
17
* Added devfs support - Richard Gooch <[email protected]> 16-Jan-1998
18
*
19
* Handle sparse backing files correctly - Kenn Humborg, Jun 28, 1998
20
*
21
* Loadable modules and other fixes by AK, 1998
22
*
23
* Make real block number available to downstream transfer functions, enables
24
* CBC (and relatives) mode encryption requiring unique IVs per data block.
25
* Reed H. Petty, [email protected]
26
*
27
* Maximum number of loop devices now dynamic via max_loop module parameter.
28
* Russell Kroll <[email protected]> 19990701
29
*
30
* Maximum number of loop devices when compiled-in now selectable by passing
31
* max_loop=<1-255> to the kernel on boot.
32
* Erik I. Bolsø, <[email protected]>, Oct 31, 1999
33
*
34
* Completely rewrite request handling to be make_request_fn style and
35
* non blocking, pushing work to a helper thread. Lots of fixes from
36
* Al Viro too.
37
* Jens Axboe <[email protected]>, Nov 2000
38
*
39
* Support up to 256 loop devices
40
* Heinz Mauelshagen <[email protected]>, Feb 2002
41
*
42
* Support for falling back on the write file operation when the address space
43
* operations write_begin is not available on the backing filesystem.
44
* Anton Altaparmakov, 16 Feb 2005
45
*
46
* Still To Fix:
47
* - Advisory locking is ignored here.
48
* - Should use an own CAP_* category instead of CAP_SYS_ADMIN
49
*
50
*/
51
52
#include <linux/module.h>
53
#include <linux/moduleparam.h>
54
#include <linux/sched.h>
55
#include <linux/fs.h>
56
#include <linux/file.h>
57
#include <linux/stat.h>
58
#include <linux/errno.h>
59
#include <linux/major.h>
60
#include <linux/wait.h>
61
#include <linux/blkdev.h>
62
#include <linux/blkpg.h>
63
#include <linux/init.h>
64
#include <linux/swap.h>
65
#include <linux/slab.h>
66
#include <linux/loop.h>
67
#include <linux/compat.h>
68
#include <linux/suspend.h>
69
#include <linux/freezer.h>
70
#include <linux/mutex.h>
71
#include <linux/writeback.h>
72
#include <linux/buffer_head.h> /* for invalidate_bdev() */
73
#include <linux/completion.h>
74
#include <linux/highmem.h>
75
#include <linux/kthread.h>
76
#include <linux/splice.h>
77
#include <linux/sysfs.h>
78
79
#include <asm/uaccess.h>
80
81
static LIST_HEAD(loop_devices);
82
static DEFINE_MUTEX(loop_devices_mutex);
83
84
static int max_part;
85
static int part_shift;
86
87
/*
88
* Transfer functions
89
*/
90
static int transfer_none(struct loop_device *lo, int cmd,
91
struct page *raw_page, unsigned raw_off,
92
struct page *loop_page, unsigned loop_off,
93
int size, sector_t real_block)
94
{
95
char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off;
96
char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off;
97
98
if (cmd == READ)
99
memcpy(loop_buf, raw_buf, size);
100
else
101
memcpy(raw_buf, loop_buf, size);
102
103
kunmap_atomic(loop_buf, KM_USER1);
104
kunmap_atomic(raw_buf, KM_USER0);
105
cond_resched();
106
return 0;
107
}
108
109
static int transfer_xor(struct loop_device *lo, int cmd,
110
struct page *raw_page, unsigned raw_off,
111
struct page *loop_page, unsigned loop_off,
112
int size, sector_t real_block)
113
{
114
char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off;
115
char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off;
116
char *in, *out, *key;
117
int i, keysize;
118
119
if (cmd == READ) {
120
in = raw_buf;
121
out = loop_buf;
122
} else {
123
in = loop_buf;
124
out = raw_buf;
125
}
126
127
key = lo->lo_encrypt_key;
128
keysize = lo->lo_encrypt_key_size;
129
for (i = 0; i < size; i++)
130
*out++ = *in++ ^ key[(i & 511) % keysize];
131
132
kunmap_atomic(loop_buf, KM_USER1);
133
kunmap_atomic(raw_buf, KM_USER0);
134
cond_resched();
135
return 0;
136
}
137
138
static int xor_init(struct loop_device *lo, const struct loop_info64 *info)
139
{
140
if (unlikely(info->lo_encrypt_key_size <= 0))
141
return -EINVAL;
142
return 0;
143
}
144
145
static struct loop_func_table none_funcs = {
146
.number = LO_CRYPT_NONE,
147
.transfer = transfer_none,
148
};
149
150
static struct loop_func_table xor_funcs = {
151
.number = LO_CRYPT_XOR,
152
.transfer = transfer_xor,
153
.init = xor_init
154
};
155
156
/* xfer_funcs[0] is special - its release function is never called */
157
static struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = {
158
&none_funcs,
159
&xor_funcs
160
};
161
162
static loff_t get_loop_size(struct loop_device *lo, struct file *file)
163
{
164
loff_t size, offset, loopsize;
165
166
/* Compute loopsize in bytes */
167
size = i_size_read(file->f_mapping->host);
168
offset = lo->lo_offset;
169
loopsize = size - offset;
170
if (lo->lo_sizelimit > 0 && lo->lo_sizelimit < loopsize)
171
loopsize = lo->lo_sizelimit;
172
173
/*
174
* Unfortunately, if we want to do I/O on the device,
175
* the number of 512-byte sectors has to fit into a sector_t.
176
*/
177
return loopsize >> 9;
178
}
179
180
static int
181
figure_loop_size(struct loop_device *lo)
182
{
183
loff_t size = get_loop_size(lo, lo->lo_backing_file);
184
sector_t x = (sector_t)size;
185
186
if (unlikely((loff_t)x != size))
187
return -EFBIG;
188
189
set_capacity(lo->lo_disk, x);
190
return 0;
191
}
192
193
static inline int
194
lo_do_transfer(struct loop_device *lo, int cmd,
195
struct page *rpage, unsigned roffs,
196
struct page *lpage, unsigned loffs,
197
int size, sector_t rblock)
198
{
199
if (unlikely(!lo->transfer))
200
return 0;
201
202
return lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock);
203
}
204
205
/**
206
* do_lo_send_aops - helper for writing data to a loop device
207
*
208
* This is the fast version for backing filesystems which implement the address
209
* space operations write_begin and write_end.
210
*/
211
static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
212
loff_t pos, struct page *unused)
213
{
214
struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */
215
struct address_space *mapping = file->f_mapping;
216
pgoff_t index;
217
unsigned offset, bv_offs;
218
int len, ret;
219
220
mutex_lock(&mapping->host->i_mutex);
221
index = pos >> PAGE_CACHE_SHIFT;
222
offset = pos & ((pgoff_t)PAGE_CACHE_SIZE - 1);
223
bv_offs = bvec->bv_offset;
224
len = bvec->bv_len;
225
while (len > 0) {
226
sector_t IV;
227
unsigned size, copied;
228
int transfer_result;
229
struct page *page;
230
void *fsdata;
231
232
IV = ((sector_t)index << (PAGE_CACHE_SHIFT - 9))+(offset >> 9);
233
size = PAGE_CACHE_SIZE - offset;
234
if (size > len)
235
size = len;
236
237
ret = pagecache_write_begin(file, mapping, pos, size, 0,
238
&page, &fsdata);
239
if (ret)
240
goto fail;
241
242
file_update_time(file);
243
244
transfer_result = lo_do_transfer(lo, WRITE, page, offset,
245
bvec->bv_page, bv_offs, size, IV);
246
copied = size;
247
if (unlikely(transfer_result))
248
copied = 0;
249
250
ret = pagecache_write_end(file, mapping, pos, size, copied,
251
page, fsdata);
252
if (ret < 0 || ret != copied)
253
goto fail;
254
255
if (unlikely(transfer_result))
256
goto fail;
257
258
bv_offs += copied;
259
len -= copied;
260
offset = 0;
261
index++;
262
pos += copied;
263
}
264
ret = 0;
265
out:
266
mutex_unlock(&mapping->host->i_mutex);
267
return ret;
268
fail:
269
ret = -1;
270
goto out;
271
}
272
273
/**
274
* __do_lo_send_write - helper for writing data to a loop device
275
*
276
* This helper just factors out common code between do_lo_send_direct_write()
277
* and do_lo_send_write().
278
*/
279
static int __do_lo_send_write(struct file *file,
280
u8 *buf, const int len, loff_t pos)
281
{
282
ssize_t bw;
283
mm_segment_t old_fs = get_fs();
284
285
set_fs(get_ds());
286
bw = file->f_op->write(file, buf, len, &pos);
287
set_fs(old_fs);
288
if (likely(bw == len))
289
return 0;
290
printk(KERN_ERR "loop: Write error at byte offset %llu, length %i.\n",
291
(unsigned long long)pos, len);
292
if (bw >= 0)
293
bw = -EIO;
294
return bw;
295
}
296
297
/**
298
* do_lo_send_direct_write - helper for writing data to a loop device
299
*
300
* This is the fast, non-transforming version for backing filesystems which do
301
* not implement the address space operations write_begin and write_end.
302
* It uses the write file operation which should be present on all writeable
303
* filesystems.
304
*/
305
static int do_lo_send_direct_write(struct loop_device *lo,
306
struct bio_vec *bvec, loff_t pos, struct page *page)
307
{
308
ssize_t bw = __do_lo_send_write(lo->lo_backing_file,
309
kmap(bvec->bv_page) + bvec->bv_offset,
310
bvec->bv_len, pos);
311
kunmap(bvec->bv_page);
312
cond_resched();
313
return bw;
314
}
315
316
/**
317
* do_lo_send_write - helper for writing data to a loop device
318
*
319
* This is the slow, transforming version for filesystems which do not
320
* implement the address space operations write_begin and write_end. It
321
* uses the write file operation which should be present on all writeable
322
* filesystems.
323
*
324
* Using fops->write is slower than using aops->{prepare,commit}_write in the
325
* transforming case because we need to double buffer the data as we cannot do
326
* the transformations in place as we do not have direct access to the
327
* destination pages of the backing file.
328
*/
329
static int do_lo_send_write(struct loop_device *lo, struct bio_vec *bvec,
330
loff_t pos, struct page *page)
331
{
332
int ret = lo_do_transfer(lo, WRITE, page, 0, bvec->bv_page,
333
bvec->bv_offset, bvec->bv_len, pos >> 9);
334
if (likely(!ret))
335
return __do_lo_send_write(lo->lo_backing_file,
336
page_address(page), bvec->bv_len,
337
pos);
338
printk(KERN_ERR "loop: Transfer error at byte offset %llu, "
339
"length %i.\n", (unsigned long long)pos, bvec->bv_len);
340
if (ret > 0)
341
ret = -EIO;
342
return ret;
343
}
344
345
static int lo_send(struct loop_device *lo, struct bio *bio, loff_t pos)
346
{
347
int (*do_lo_send)(struct loop_device *, struct bio_vec *, loff_t,
348
struct page *page);
349
struct bio_vec *bvec;
350
struct page *page = NULL;
351
int i, ret = 0;
352
353
do_lo_send = do_lo_send_aops;
354
if (!(lo->lo_flags & LO_FLAGS_USE_AOPS)) {
355
do_lo_send = do_lo_send_direct_write;
356
if (lo->transfer != transfer_none) {
357
page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
358
if (unlikely(!page))
359
goto fail;
360
kmap(page);
361
do_lo_send = do_lo_send_write;
362
}
363
}
364
bio_for_each_segment(bvec, bio, i) {
365
ret = do_lo_send(lo, bvec, pos, page);
366
if (ret < 0)
367
break;
368
pos += bvec->bv_len;
369
}
370
if (page) {
371
kunmap(page);
372
__free_page(page);
373
}
374
out:
375
return ret;
376
fail:
377
printk(KERN_ERR "loop: Failed to allocate temporary page for write.\n");
378
ret = -ENOMEM;
379
goto out;
380
}
381
382
struct lo_read_data {
383
struct loop_device *lo;
384
struct page *page;
385
unsigned offset;
386
int bsize;
387
};
388
389
static int
390
lo_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
391
struct splice_desc *sd)
392
{
393
struct lo_read_data *p = sd->u.data;
394
struct loop_device *lo = p->lo;
395
struct page *page = buf->page;
396
sector_t IV;
397
int size;
398
399
IV = ((sector_t) page->index << (PAGE_CACHE_SHIFT - 9)) +
400
(buf->offset >> 9);
401
size = sd->len;
402
if (size > p->bsize)
403
size = p->bsize;
404
405
if (lo_do_transfer(lo, READ, page, buf->offset, p->page, p->offset, size, IV)) {
406
printk(KERN_ERR "loop: transfer error block %ld\n",
407
page->index);
408
size = -EINVAL;
409
}
410
411
flush_dcache_page(p->page);
412
413
if (size > 0)
414
p->offset += size;
415
416
return size;
417
}
418
419
static int
420
lo_direct_splice_actor(struct pipe_inode_info *pipe, struct splice_desc *sd)
421
{
422
return __splice_from_pipe(pipe, sd, lo_splice_actor);
423
}
424
425
static int
426
do_lo_receive(struct loop_device *lo,
427
struct bio_vec *bvec, int bsize, loff_t pos)
428
{
429
struct lo_read_data cookie;
430
struct splice_desc sd;
431
struct file *file;
432
long retval;
433
434
cookie.lo = lo;
435
cookie.page = bvec->bv_page;
436
cookie.offset = bvec->bv_offset;
437
cookie.bsize = bsize;
438
439
sd.len = 0;
440
sd.total_len = bvec->bv_len;
441
sd.flags = 0;
442
sd.pos = pos;
443
sd.u.data = &cookie;
444
445
file = lo->lo_backing_file;
446
retval = splice_direct_to_actor(file, &sd, lo_direct_splice_actor);
447
448
if (retval < 0)
449
return retval;
450
451
return 0;
452
}
453
454
static int
455
lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos)
456
{
457
struct bio_vec *bvec;
458
int i, ret = 0;
459
460
bio_for_each_segment(bvec, bio, i) {
461
ret = do_lo_receive(lo, bvec, bsize, pos);
462
if (ret < 0)
463
break;
464
pos += bvec->bv_len;
465
}
466
return ret;
467
}
468
469
static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
470
{
471
loff_t pos;
472
int ret;
473
474
pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset;
475
476
if (bio_rw(bio) == WRITE) {
477
struct file *file = lo->lo_backing_file;
478
479
if (bio->bi_rw & REQ_FLUSH) {
480
ret = vfs_fsync(file, 0);
481
if (unlikely(ret && ret != -EINVAL)) {
482
ret = -EIO;
483
goto out;
484
}
485
}
486
487
ret = lo_send(lo, bio, pos);
488
489
if ((bio->bi_rw & REQ_FUA) && !ret) {
490
ret = vfs_fsync(file, 0);
491
if (unlikely(ret && ret != -EINVAL))
492
ret = -EIO;
493
}
494
} else
495
ret = lo_receive(lo, bio, lo->lo_blocksize, pos);
496
497
out:
498
return ret;
499
}
500
501
/*
502
* Add bio to back of pending list
503
*/
504
static void loop_add_bio(struct loop_device *lo, struct bio *bio)
505
{
506
bio_list_add(&lo->lo_bio_list, bio);
507
}
508
509
/*
510
* Grab first pending buffer
511
*/
512
static struct bio *loop_get_bio(struct loop_device *lo)
513
{
514
return bio_list_pop(&lo->lo_bio_list);
515
}
516
517
static int loop_make_request(struct request_queue *q, struct bio *old_bio)
518
{
519
struct loop_device *lo = q->queuedata;
520
int rw = bio_rw(old_bio);
521
522
if (rw == READA)
523
rw = READ;
524
525
BUG_ON(!lo || (rw != READ && rw != WRITE));
526
527
spin_lock_irq(&lo->lo_lock);
528
if (lo->lo_state != Lo_bound)
529
goto out;
530
if (unlikely(rw == WRITE && (lo->lo_flags & LO_FLAGS_READ_ONLY)))
531
goto out;
532
loop_add_bio(lo, old_bio);
533
wake_up(&lo->lo_event);
534
spin_unlock_irq(&lo->lo_lock);
535
return 0;
536
537
out:
538
spin_unlock_irq(&lo->lo_lock);
539
bio_io_error(old_bio);
540
return 0;
541
}
542
543
struct switch_request {
544
struct file *file;
545
struct completion wait;
546
};
547
548
static void do_loop_switch(struct loop_device *, struct switch_request *);
549
550
static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio)
551
{
552
if (unlikely(!bio->bi_bdev)) {
553
do_loop_switch(lo, bio->bi_private);
554
bio_put(bio);
555
} else {
556
int ret = do_bio_filebacked(lo, bio);
557
bio_endio(bio, ret);
558
}
559
}
560
561
/*
562
* worker thread that handles reads/writes to file backed loop devices,
563
* to avoid blocking in our make_request_fn. it also does loop decrypting
564
* on reads for block backed loop, as that is too heavy to do from
565
* b_end_io context where irqs may be disabled.
566
*
567
* Loop explanation: loop_clr_fd() sets lo_state to Lo_rundown before
568
* calling kthread_stop(). Therefore once kthread_should_stop() is
569
* true, make_request will not place any more requests. Therefore
570
* once kthread_should_stop() is true and lo_bio is NULL, we are
571
* done with the loop.
572
*/
573
static int loop_thread(void *data)
574
{
575
struct loop_device *lo = data;
576
struct bio *bio;
577
578
set_user_nice(current, -20);
579
580
while (!kthread_should_stop() || !bio_list_empty(&lo->lo_bio_list)) {
581
582
wait_event_interruptible(lo->lo_event,
583
!bio_list_empty(&lo->lo_bio_list) ||
584
kthread_should_stop());
585
586
if (bio_list_empty(&lo->lo_bio_list))
587
continue;
588
spin_lock_irq(&lo->lo_lock);
589
bio = loop_get_bio(lo);
590
spin_unlock_irq(&lo->lo_lock);
591
592
BUG_ON(!bio);
593
loop_handle_bio(lo, bio);
594
}
595
596
return 0;
597
}
598
599
/*
600
* loop_switch performs the hard work of switching a backing store.
601
* First it needs to flush existing IO, it does this by sending a magic
602
* BIO down the pipe. The completion of this BIO does the actual switch.
603
*/
604
static int loop_switch(struct loop_device *lo, struct file *file)
605
{
606
struct switch_request w;
607
struct bio *bio = bio_alloc(GFP_KERNEL, 0);
608
if (!bio)
609
return -ENOMEM;
610
init_completion(&w.wait);
611
w.file = file;
612
bio->bi_private = &w;
613
bio->bi_bdev = NULL;
614
loop_make_request(lo->lo_queue, bio);
615
wait_for_completion(&w.wait);
616
return 0;
617
}
618
619
/*
620
* Helper to flush the IOs in loop, but keeping loop thread running
621
*/
622
static int loop_flush(struct loop_device *lo)
623
{
624
/* loop not yet configured, no running thread, nothing to flush */
625
if (!lo->lo_thread)
626
return 0;
627
628
return loop_switch(lo, NULL);
629
}
630
631
/*
632
* Do the actual switch; called from the BIO completion routine
633
*/
634
static void do_loop_switch(struct loop_device *lo, struct switch_request *p)
635
{
636
struct file *file = p->file;
637
struct file *old_file = lo->lo_backing_file;
638
struct address_space *mapping;
639
640
/* if no new file, only flush of queued bios requested */
641
if (!file)
642
goto out;
643
644
mapping = file->f_mapping;
645
mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
646
lo->lo_backing_file = file;
647
lo->lo_blocksize = S_ISBLK(mapping->host->i_mode) ?
648
mapping->host->i_bdev->bd_block_size : PAGE_SIZE;
649
lo->old_gfp_mask = mapping_gfp_mask(mapping);
650
mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
651
out:
652
complete(&p->wait);
653
}
654
655
656
/*
657
* loop_change_fd switched the backing store of a loopback device to
658
* a new file. This is useful for operating system installers to free up
659
* the original file and in High Availability environments to switch to
660
* an alternative location for the content in case of server meltdown.
661
* This can only work if the loop device is used read-only, and if the
662
* new backing store is the same size and type as the old backing store.
663
*/
664
static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
665
unsigned int arg)
666
{
667
struct file *file, *old_file;
668
struct inode *inode;
669
int error;
670
671
error = -ENXIO;
672
if (lo->lo_state != Lo_bound)
673
goto out;
674
675
/* the loop device has to be read-only */
676
error = -EINVAL;
677
if (!(lo->lo_flags & LO_FLAGS_READ_ONLY))
678
goto out;
679
680
error = -EBADF;
681
file = fget(arg);
682
if (!file)
683
goto out;
684
685
inode = file->f_mapping->host;
686
old_file = lo->lo_backing_file;
687
688
error = -EINVAL;
689
690
if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
691
goto out_putf;
692
693
/* size of the new backing store needs to be the same */
694
if (get_loop_size(lo, file) != get_loop_size(lo, old_file))
695
goto out_putf;
696
697
/* and ... switch */
698
error = loop_switch(lo, file);
699
if (error)
700
goto out_putf;
701
702
fput(old_file);
703
if (max_part > 0)
704
ioctl_by_bdev(bdev, BLKRRPART, 0);
705
return 0;
706
707
out_putf:
708
fput(file);
709
out:
710
return error;
711
}
712
713
static inline int is_loop_device(struct file *file)
714
{
715
struct inode *i = file->f_mapping->host;
716
717
return i && S_ISBLK(i->i_mode) && MAJOR(i->i_rdev) == LOOP_MAJOR;
718
}
719
720
/* loop sysfs attributes */
721
722
static ssize_t loop_attr_show(struct device *dev, char *page,
723
ssize_t (*callback)(struct loop_device *, char *))
724
{
725
struct loop_device *l, *lo = NULL;
726
727
mutex_lock(&loop_devices_mutex);
728
list_for_each_entry(l, &loop_devices, lo_list)
729
if (disk_to_dev(l->lo_disk) == dev) {
730
lo = l;
731
break;
732
}
733
mutex_unlock(&loop_devices_mutex);
734
735
return lo ? callback(lo, page) : -EIO;
736
}
737
738
#define LOOP_ATTR_RO(_name) \
739
static ssize_t loop_attr_##_name##_show(struct loop_device *, char *); \
740
static ssize_t loop_attr_do_show_##_name(struct device *d, \
741
struct device_attribute *attr, char *b) \
742
{ \
743
return loop_attr_show(d, b, loop_attr_##_name##_show); \
744
} \
745
static struct device_attribute loop_attr_##_name = \
746
__ATTR(_name, S_IRUGO, loop_attr_do_show_##_name, NULL);
747
748
static ssize_t loop_attr_backing_file_show(struct loop_device *lo, char *buf)
749
{
750
ssize_t ret;
751
char *p = NULL;
752
753
mutex_lock(&lo->lo_ctl_mutex);
754
if (lo->lo_backing_file)
755
p = d_path(&lo->lo_backing_file->f_path, buf, PAGE_SIZE - 1);
756
mutex_unlock(&lo->lo_ctl_mutex);
757
758
if (IS_ERR_OR_NULL(p))
759
ret = PTR_ERR(p);
760
else {
761
ret = strlen(p);
762
memmove(buf, p, ret);
763
buf[ret++] = '\n';
764
buf[ret] = 0;
765
}
766
767
return ret;
768
}
769
770
static ssize_t loop_attr_offset_show(struct loop_device *lo, char *buf)
771
{
772
return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_offset);
773
}
774
775
static ssize_t loop_attr_sizelimit_show(struct loop_device *lo, char *buf)
776
{
777
return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_sizelimit);
778
}
779
780
static ssize_t loop_attr_autoclear_show(struct loop_device *lo, char *buf)
781
{
782
int autoclear = (lo->lo_flags & LO_FLAGS_AUTOCLEAR);
783
784
return sprintf(buf, "%s\n", autoclear ? "1" : "0");
785
}
786
787
LOOP_ATTR_RO(backing_file);
788
LOOP_ATTR_RO(offset);
789
LOOP_ATTR_RO(sizelimit);
790
LOOP_ATTR_RO(autoclear);
791
792
static struct attribute *loop_attrs[] = {
793
&loop_attr_backing_file.attr,
794
&loop_attr_offset.attr,
795
&loop_attr_sizelimit.attr,
796
&loop_attr_autoclear.attr,
797
NULL,
798
};
799
800
static struct attribute_group loop_attribute_group = {
801
.name = "loop",
802
.attrs= loop_attrs,
803
};
804
805
static int loop_sysfs_init(struct loop_device *lo)
806
{
807
return sysfs_create_group(&disk_to_dev(lo->lo_disk)->kobj,
808
&loop_attribute_group);
809
}
810
811
static void loop_sysfs_exit(struct loop_device *lo)
812
{
813
sysfs_remove_group(&disk_to_dev(lo->lo_disk)->kobj,
814
&loop_attribute_group);
815
}
816
817
static int loop_set_fd(struct loop_device *lo, fmode_t mode,
818
struct block_device *bdev, unsigned int arg)
819
{
820
struct file *file, *f;
821
struct inode *inode;
822
struct address_space *mapping;
823
unsigned lo_blocksize;
824
int lo_flags = 0;
825
int error;
826
loff_t size;
827
828
/* This is safe, since we have a reference from open(). */
829
__module_get(THIS_MODULE);
830
831
error = -EBADF;
832
file = fget(arg);
833
if (!file)
834
goto out;
835
836
error = -EBUSY;
837
if (lo->lo_state != Lo_unbound)
838
goto out_putf;
839
840
/* Avoid recursion */
841
f = file;
842
while (is_loop_device(f)) {
843
struct loop_device *l;
844
845
if (f->f_mapping->host->i_bdev == bdev)
846
goto out_putf;
847
848
l = f->f_mapping->host->i_bdev->bd_disk->private_data;
849
if (l->lo_state == Lo_unbound) {
850
error = -EINVAL;
851
goto out_putf;
852
}
853
f = l->lo_backing_file;
854
}
855
856
mapping = file->f_mapping;
857
inode = mapping->host;
858
859
if (!(file->f_mode & FMODE_WRITE))
860
lo_flags |= LO_FLAGS_READ_ONLY;
861
862
error = -EINVAL;
863
if (S_ISREG(inode->i_mode) || S_ISBLK(inode->i_mode)) {
864
const struct address_space_operations *aops = mapping->a_ops;
865
866
if (aops->write_begin)
867
lo_flags |= LO_FLAGS_USE_AOPS;
868
if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write)
869
lo_flags |= LO_FLAGS_READ_ONLY;
870
871
lo_blocksize = S_ISBLK(inode->i_mode) ?
872
inode->i_bdev->bd_block_size : PAGE_SIZE;
873
874
error = 0;
875
} else {
876
goto out_putf;
877
}
878
879
size = get_loop_size(lo, file);
880
881
if ((loff_t)(sector_t)size != size) {
882
error = -EFBIG;
883
goto out_putf;
884
}
885
886
if (!(mode & FMODE_WRITE))
887
lo_flags |= LO_FLAGS_READ_ONLY;
888
889
set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
890
891
lo->lo_blocksize = lo_blocksize;
892
lo->lo_device = bdev;
893
lo->lo_flags = lo_flags;
894
lo->lo_backing_file = file;
895
lo->transfer = transfer_none;
896
lo->ioctl = NULL;
897
lo->lo_sizelimit = 0;
898
lo->old_gfp_mask = mapping_gfp_mask(mapping);
899
mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
900
901
bio_list_init(&lo->lo_bio_list);
902
903
/*
904
* set queue make_request_fn, and add limits based on lower level
905
* device
906
*/
907
blk_queue_make_request(lo->lo_queue, loop_make_request);
908
lo->lo_queue->queuedata = lo;
909
910
if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
911
blk_queue_flush(lo->lo_queue, REQ_FLUSH);
912
913
set_capacity(lo->lo_disk, size);
914
bd_set_size(bdev, size << 9);
915
loop_sysfs_init(lo);
916
/* let user-space know about the new size */
917
kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
918
919
set_blocksize(bdev, lo_blocksize);
920
921
lo->lo_thread = kthread_create(loop_thread, lo, "loop%d",
922
lo->lo_number);
923
if (IS_ERR(lo->lo_thread)) {
924
error = PTR_ERR(lo->lo_thread);
925
goto out_clr;
926
}
927
lo->lo_state = Lo_bound;
928
wake_up_process(lo->lo_thread);
929
if (max_part > 0)
930
ioctl_by_bdev(bdev, BLKRRPART, 0);
931
return 0;
932
933
out_clr:
934
loop_sysfs_exit(lo);
935
lo->lo_thread = NULL;
936
lo->lo_device = NULL;
937
lo->lo_backing_file = NULL;
938
lo->lo_flags = 0;
939
set_capacity(lo->lo_disk, 0);
940
invalidate_bdev(bdev);
941
bd_set_size(bdev, 0);
942
kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
943
mapping_set_gfp_mask(mapping, lo->old_gfp_mask);
944
lo->lo_state = Lo_unbound;
945
out_putf:
946
fput(file);
947
out:
948
/* This is safe: open() is still holding a reference. */
949
module_put(THIS_MODULE);
950
return error;
951
}
952
953
static int
954
loop_release_xfer(struct loop_device *lo)
955
{
956
int err = 0;
957
struct loop_func_table *xfer = lo->lo_encryption;
958
959
if (xfer) {
960
if (xfer->release)
961
err = xfer->release(lo);
962
lo->transfer = NULL;
963
lo->lo_encryption = NULL;
964
module_put(xfer->owner);
965
}
966
return err;
967
}
968
969
static int
970
loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer,
971
const struct loop_info64 *i)
972
{
973
int err = 0;
974
975
if (xfer) {
976
struct module *owner = xfer->owner;
977
978
if (!try_module_get(owner))
979
return -EINVAL;
980
if (xfer->init)
981
err = xfer->init(lo, i);
982
if (err)
983
module_put(owner);
984
else
985
lo->lo_encryption = xfer;
986
}
987
return err;
988
}
989
990
static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
991
{
992
struct file *filp = lo->lo_backing_file;
993
gfp_t gfp = lo->old_gfp_mask;
994
995
if (lo->lo_state != Lo_bound)
996
return -ENXIO;
997
998
if (lo->lo_refcnt > 1) /* we needed one fd for the ioctl */
999
return -EBUSY;
1000
1001
if (filp == NULL)
1002
return -EINVAL;
1003
1004
spin_lock_irq(&lo->lo_lock);
1005
lo->lo_state = Lo_rundown;
1006
spin_unlock_irq(&lo->lo_lock);
1007
1008
kthread_stop(lo->lo_thread);
1009
1010
lo->lo_backing_file = NULL;
1011
1012
loop_release_xfer(lo);
1013
lo->transfer = NULL;
1014
lo->ioctl = NULL;
1015
lo->lo_device = NULL;
1016
lo->lo_encryption = NULL;
1017
lo->lo_offset = 0;
1018
lo->lo_sizelimit = 0;
1019
lo->lo_encrypt_key_size = 0;
1020
lo->lo_flags = 0;
1021
lo->lo_thread = NULL;
1022
memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
1023
memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
1024
memset(lo->lo_file_name, 0, LO_NAME_SIZE);
1025
if (bdev)
1026
invalidate_bdev(bdev);
1027
set_capacity(lo->lo_disk, 0);
1028
loop_sysfs_exit(lo);
1029
if (bdev) {
1030
bd_set_size(bdev, 0);
1031
/* let user-space know about this change */
1032
kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
1033
}
1034
mapping_set_gfp_mask(filp->f_mapping, gfp);
1035
lo->lo_state = Lo_unbound;
1036
/* This is safe: open() is still holding a reference. */
1037
module_put(THIS_MODULE);
1038
if (max_part > 0 && bdev)
1039
ioctl_by_bdev(bdev, BLKRRPART, 0);
1040
mutex_unlock(&lo->lo_ctl_mutex);
1041
/*
1042
* Need not hold lo_ctl_mutex to fput backing file.
1043
* Calling fput holding lo_ctl_mutex triggers a circular
1044
* lock dependency possibility warning as fput can take
1045
* bd_mutex which is usually taken before lo_ctl_mutex.
1046
*/
1047
fput(filp);
1048
return 0;
1049
}
1050
1051
static int
1052
loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
1053
{
1054
int err;
1055
struct loop_func_table *xfer;
1056
uid_t uid = current_uid();
1057
1058
if (lo->lo_encrypt_key_size &&
1059
lo->lo_key_owner != uid &&
1060
!capable(CAP_SYS_ADMIN))
1061
return -EPERM;
1062
if (lo->lo_state != Lo_bound)
1063
return -ENXIO;
1064
if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE)
1065
return -EINVAL;
1066
1067
err = loop_release_xfer(lo);
1068
if (err)
1069
return err;
1070
1071
if (info->lo_encrypt_type) {
1072
unsigned int type = info->lo_encrypt_type;
1073
1074
if (type >= MAX_LO_CRYPT)
1075
return -EINVAL;
1076
xfer = xfer_funcs[type];
1077
if (xfer == NULL)
1078
return -EINVAL;
1079
} else
1080
xfer = NULL;
1081
1082
err = loop_init_xfer(lo, xfer, info);
1083
if (err)
1084
return err;
1085
1086
if (lo->lo_offset != info->lo_offset ||
1087
lo->lo_sizelimit != info->lo_sizelimit) {
1088
lo->lo_offset = info->lo_offset;
1089
lo->lo_sizelimit = info->lo_sizelimit;
1090
if (figure_loop_size(lo))
1091
return -EFBIG;
1092
}
1093
1094
memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
1095
memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
1096
lo->lo_file_name[LO_NAME_SIZE-1] = 0;
1097
lo->lo_crypt_name[LO_NAME_SIZE-1] = 0;
1098
1099
if (!xfer)
1100
xfer = &none_funcs;
1101
lo->transfer = xfer->transfer;
1102
lo->ioctl = xfer->ioctl;
1103
1104
if ((lo->lo_flags & LO_FLAGS_AUTOCLEAR) !=
1105
(info->lo_flags & LO_FLAGS_AUTOCLEAR))
1106
lo->lo_flags ^= LO_FLAGS_AUTOCLEAR;
1107
1108
lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
1109
lo->lo_init[0] = info->lo_init[0];
1110
lo->lo_init[1] = info->lo_init[1];
1111
if (info->lo_encrypt_key_size) {
1112
memcpy(lo->lo_encrypt_key, info->lo_encrypt_key,
1113
info->lo_encrypt_key_size);
1114
lo->lo_key_owner = uid;
1115
}
1116
1117
return 0;
1118
}
1119
1120
static int
1121
loop_get_status(struct loop_device *lo, struct loop_info64 *info)
1122
{
1123
struct file *file = lo->lo_backing_file;
1124
struct kstat stat;
1125
int error;
1126
1127
if (lo->lo_state != Lo_bound)
1128
return -ENXIO;
1129
error = vfs_getattr(file->f_path.mnt, file->f_path.dentry, &stat);
1130
if (error)
1131
return error;
1132
memset(info, 0, sizeof(*info));
1133
info->lo_number = lo->lo_number;
1134
info->lo_device = huge_encode_dev(stat.dev);
1135
info->lo_inode = stat.ino;
1136
info->lo_rdevice = huge_encode_dev(lo->lo_device ? stat.rdev : stat.dev);
1137
info->lo_offset = lo->lo_offset;
1138
info->lo_sizelimit = lo->lo_sizelimit;
1139
info->lo_flags = lo->lo_flags;
1140
memcpy(info->lo_file_name, lo->lo_file_name, LO_NAME_SIZE);
1141
memcpy(info->lo_crypt_name, lo->lo_crypt_name, LO_NAME_SIZE);
1142
info->lo_encrypt_type =
1143
lo->lo_encryption ? lo->lo_encryption->number : 0;
1144
if (lo->lo_encrypt_key_size && capable(CAP_SYS_ADMIN)) {
1145
info->lo_encrypt_key_size = lo->lo_encrypt_key_size;
1146
memcpy(info->lo_encrypt_key, lo->lo_encrypt_key,
1147
lo->lo_encrypt_key_size);
1148
}
1149
return 0;
1150
}
1151
1152
static void
1153
loop_info64_from_old(const struct loop_info *info, struct loop_info64 *info64)
1154
{
1155
memset(info64, 0, sizeof(*info64));
1156
info64->lo_number = info->lo_number;
1157
info64->lo_device = info->lo_device;
1158
info64->lo_inode = info->lo_inode;
1159
info64->lo_rdevice = info->lo_rdevice;
1160
info64->lo_offset = info->lo_offset;
1161
info64->lo_sizelimit = 0;
1162
info64->lo_encrypt_type = info->lo_encrypt_type;
1163
info64->lo_encrypt_key_size = info->lo_encrypt_key_size;
1164
info64->lo_flags = info->lo_flags;
1165
info64->lo_init[0] = info->lo_init[0];
1166
info64->lo_init[1] = info->lo_init[1];
1167
if (info->lo_encrypt_type == LO_CRYPT_CRYPTOAPI)
1168
memcpy(info64->lo_crypt_name, info->lo_name, LO_NAME_SIZE);
1169
else
1170
memcpy(info64->lo_file_name, info->lo_name, LO_NAME_SIZE);
1171
memcpy(info64->lo_encrypt_key, info->lo_encrypt_key, LO_KEY_SIZE);
1172
}
1173
1174
static int
1175
loop_info64_to_old(const struct loop_info64 *info64, struct loop_info *info)
1176
{
1177
memset(info, 0, sizeof(*info));
1178
info->lo_number = info64->lo_number;
1179
info->lo_device = info64->lo_device;
1180
info->lo_inode = info64->lo_inode;
1181
info->lo_rdevice = info64->lo_rdevice;
1182
info->lo_offset = info64->lo_offset;
1183
info->lo_encrypt_type = info64->lo_encrypt_type;
1184
info->lo_encrypt_key_size = info64->lo_encrypt_key_size;
1185
info->lo_flags = info64->lo_flags;
1186
info->lo_init[0] = info64->lo_init[0];
1187
info->lo_init[1] = info64->lo_init[1];
1188
if (info->lo_encrypt_type == LO_CRYPT_CRYPTOAPI)
1189
memcpy(info->lo_name, info64->lo_crypt_name, LO_NAME_SIZE);
1190
else
1191
memcpy(info->lo_name, info64->lo_file_name, LO_NAME_SIZE);
1192
memcpy(info->lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE);
1193
1194
/* error in case values were truncated */
1195
if (info->lo_device != info64->lo_device ||
1196
info->lo_rdevice != info64->lo_rdevice ||
1197
info->lo_inode != info64->lo_inode ||
1198
info->lo_offset != info64->lo_offset)
1199
return -EOVERFLOW;
1200
1201
return 0;
1202
}
1203
1204
static int
1205
loop_set_status_old(struct loop_device *lo, const struct loop_info __user *arg)
1206
{
1207
struct loop_info info;
1208
struct loop_info64 info64;
1209
1210
if (copy_from_user(&info, arg, sizeof (struct loop_info)))
1211
return -EFAULT;
1212
loop_info64_from_old(&info, &info64);
1213
return loop_set_status(lo, &info64);
1214
}
1215
1216
static int
1217
loop_set_status64(struct loop_device *lo, const struct loop_info64 __user *arg)
1218
{
1219
struct loop_info64 info64;
1220
1221
if (copy_from_user(&info64, arg, sizeof (struct loop_info64)))
1222
return -EFAULT;
1223
return loop_set_status(lo, &info64);
1224
}
1225
1226
static int
1227
loop_get_status_old(struct loop_device *lo, struct loop_info __user *arg) {
1228
struct loop_info info;
1229
struct loop_info64 info64;
1230
int err = 0;
1231
1232
if (!arg)
1233
err = -EINVAL;
1234
if (!err)
1235
err = loop_get_status(lo, &info64);
1236
if (!err)
1237
err = loop_info64_to_old(&info64, &info);
1238
if (!err && copy_to_user(arg, &info, sizeof(info)))
1239
err = -EFAULT;
1240
1241
return err;
1242
}
1243
1244
static int
1245
loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) {
1246
struct loop_info64 info64;
1247
int err = 0;
1248
1249
if (!arg)
1250
err = -EINVAL;
1251
if (!err)
1252
err = loop_get_status(lo, &info64);
1253
if (!err && copy_to_user(arg, &info64, sizeof(info64)))
1254
err = -EFAULT;
1255
1256
return err;
1257
}
1258
1259
static int loop_set_capacity(struct loop_device *lo, struct block_device *bdev)
1260
{
1261
int err;
1262
sector_t sec;
1263
loff_t sz;
1264
1265
err = -ENXIO;
1266
if (unlikely(lo->lo_state != Lo_bound))
1267
goto out;
1268
err = figure_loop_size(lo);
1269
if (unlikely(err))
1270
goto out;
1271
sec = get_capacity(lo->lo_disk);
1272
/* the width of sector_t may be narrow for bit-shift */
1273
sz = sec;
1274
sz <<= 9;
1275
mutex_lock(&bdev->bd_mutex);
1276
bd_set_size(bdev, sz);
1277
/* let user-space know about the new size */
1278
kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
1279
mutex_unlock(&bdev->bd_mutex);
1280
1281
out:
1282
return err;
1283
}
1284
1285
static int lo_ioctl(struct block_device *bdev, fmode_t mode,
1286
unsigned int cmd, unsigned long arg)
1287
{
1288
struct loop_device *lo = bdev->bd_disk->private_data;
1289
int err;
1290
1291
mutex_lock_nested(&lo->lo_ctl_mutex, 1);
1292
switch (cmd) {
1293
case LOOP_SET_FD:
1294
err = loop_set_fd(lo, mode, bdev, arg);
1295
break;
1296
case LOOP_CHANGE_FD:
1297
err = loop_change_fd(lo, bdev, arg);
1298
break;
1299
case LOOP_CLR_FD:
1300
/* loop_clr_fd would have unlocked lo_ctl_mutex on success */
1301
err = loop_clr_fd(lo, bdev);
1302
if (!err)
1303
goto out_unlocked;
1304
break;
1305
case LOOP_SET_STATUS:
1306
err = loop_set_status_old(lo, (struct loop_info __user *) arg);
1307
break;
1308
case LOOP_GET_STATUS:
1309
err = loop_get_status_old(lo, (struct loop_info __user *) arg);
1310
break;
1311
case LOOP_SET_STATUS64:
1312
err = loop_set_status64(lo, (struct loop_info64 __user *) arg);
1313
break;
1314
case LOOP_GET_STATUS64:
1315
err = loop_get_status64(lo, (struct loop_info64 __user *) arg);
1316
break;
1317
case LOOP_SET_CAPACITY:
1318
err = -EPERM;
1319
if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
1320
err = loop_set_capacity(lo, bdev);
1321
break;
1322
default:
1323
err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL;
1324
}
1325
mutex_unlock(&lo->lo_ctl_mutex);
1326
1327
out_unlocked:
1328
return err;
1329
}
1330
1331
#ifdef CONFIG_COMPAT
1332
struct compat_loop_info {
1333
compat_int_t lo_number; /* ioctl r/o */
1334
compat_dev_t lo_device; /* ioctl r/o */
1335
compat_ulong_t lo_inode; /* ioctl r/o */
1336
compat_dev_t lo_rdevice; /* ioctl r/o */
1337
compat_int_t lo_offset;
1338
compat_int_t lo_encrypt_type;
1339
compat_int_t lo_encrypt_key_size; /* ioctl w/o */
1340
compat_int_t lo_flags; /* ioctl r/o */
1341
char lo_name[LO_NAME_SIZE];
1342
unsigned char lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */
1343
compat_ulong_t lo_init[2];
1344
char reserved[4];
1345
};
1346
1347
/*
1348
* Transfer 32-bit compatibility structure in userspace to 64-bit loop info
1349
* - noinlined to reduce stack space usage in main part of driver
1350
*/
1351
static noinline int
1352
loop_info64_from_compat(const struct compat_loop_info __user *arg,
1353
struct loop_info64 *info64)
1354
{
1355
struct compat_loop_info info;
1356
1357
if (copy_from_user(&info, arg, sizeof(info)))
1358
return -EFAULT;
1359
1360
memset(info64, 0, sizeof(*info64));
1361
info64->lo_number = info.lo_number;
1362
info64->lo_device = info.lo_device;
1363
info64->lo_inode = info.lo_inode;
1364
info64->lo_rdevice = info.lo_rdevice;
1365
info64->lo_offset = info.lo_offset;
1366
info64->lo_sizelimit = 0;
1367
info64->lo_encrypt_type = info.lo_encrypt_type;
1368
info64->lo_encrypt_key_size = info.lo_encrypt_key_size;
1369
info64->lo_flags = info.lo_flags;
1370
info64->lo_init[0] = info.lo_init[0];
1371
info64->lo_init[1] = info.lo_init[1];
1372
if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI)
1373
memcpy(info64->lo_crypt_name, info.lo_name, LO_NAME_SIZE);
1374
else
1375
memcpy(info64->lo_file_name, info.lo_name, LO_NAME_SIZE);
1376
memcpy(info64->lo_encrypt_key, info.lo_encrypt_key, LO_KEY_SIZE);
1377
return 0;
1378
}
1379
1380
/*
1381
* Transfer 64-bit loop info to 32-bit compatibility structure in userspace
1382
* - noinlined to reduce stack space usage in main part of driver
1383
*/
1384
static noinline int
1385
loop_info64_to_compat(const struct loop_info64 *info64,
1386
struct compat_loop_info __user *arg)
1387
{
1388
struct compat_loop_info info;
1389
1390
memset(&info, 0, sizeof(info));
1391
info.lo_number = info64->lo_number;
1392
info.lo_device = info64->lo_device;
1393
info.lo_inode = info64->lo_inode;
1394
info.lo_rdevice = info64->lo_rdevice;
1395
info.lo_offset = info64->lo_offset;
1396
info.lo_encrypt_type = info64->lo_encrypt_type;
1397
info.lo_encrypt_key_size = info64->lo_encrypt_key_size;
1398
info.lo_flags = info64->lo_flags;
1399
info.lo_init[0] = info64->lo_init[0];
1400
info.lo_init[1] = info64->lo_init[1];
1401
if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI)
1402
memcpy(info.lo_name, info64->lo_crypt_name, LO_NAME_SIZE);
1403
else
1404
memcpy(info.lo_name, info64->lo_file_name, LO_NAME_SIZE);
1405
memcpy(info.lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE);
1406
1407
/* error in case values were truncated */
1408
if (info.lo_device != info64->lo_device ||
1409
info.lo_rdevice != info64->lo_rdevice ||
1410
info.lo_inode != info64->lo_inode ||
1411
info.lo_offset != info64->lo_offset ||
1412
info.lo_init[0] != info64->lo_init[0] ||
1413
info.lo_init[1] != info64->lo_init[1])
1414
return -EOVERFLOW;
1415
1416
if (copy_to_user(arg, &info, sizeof(info)))
1417
return -EFAULT;
1418
return 0;
1419
}
1420
1421
static int
1422
loop_set_status_compat(struct loop_device *lo,
1423
const struct compat_loop_info __user *arg)
1424
{
1425
struct loop_info64 info64;
1426
int ret;
1427
1428
ret = loop_info64_from_compat(arg, &info64);
1429
if (ret < 0)
1430
return ret;
1431
return loop_set_status(lo, &info64);
1432
}
1433
1434
static int
1435
loop_get_status_compat(struct loop_device *lo,
1436
struct compat_loop_info __user *arg)
1437
{
1438
struct loop_info64 info64;
1439
int err = 0;
1440
1441
if (!arg)
1442
err = -EINVAL;
1443
if (!err)
1444
err = loop_get_status(lo, &info64);
1445
if (!err)
1446
err = loop_info64_to_compat(&info64, arg);
1447
return err;
1448
}
1449
1450
static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
1451
unsigned int cmd, unsigned long arg)
1452
{
1453
struct loop_device *lo = bdev->bd_disk->private_data;
1454
int err;
1455
1456
switch(cmd) {
1457
case LOOP_SET_STATUS:
1458
mutex_lock(&lo->lo_ctl_mutex);
1459
err = loop_set_status_compat(
1460
lo, (const struct compat_loop_info __user *) arg);
1461
mutex_unlock(&lo->lo_ctl_mutex);
1462
break;
1463
case LOOP_GET_STATUS:
1464
mutex_lock(&lo->lo_ctl_mutex);
1465
err = loop_get_status_compat(
1466
lo, (struct compat_loop_info __user *) arg);
1467
mutex_unlock(&lo->lo_ctl_mutex);
1468
break;
1469
case LOOP_SET_CAPACITY:
1470
case LOOP_CLR_FD:
1471
case LOOP_GET_STATUS64:
1472
case LOOP_SET_STATUS64:
1473
arg = (unsigned long) compat_ptr(arg);
1474
case LOOP_SET_FD:
1475
case LOOP_CHANGE_FD:
1476
err = lo_ioctl(bdev, mode, cmd, arg);
1477
break;
1478
default:
1479
err = -ENOIOCTLCMD;
1480
break;
1481
}
1482
return err;
1483
}
1484
#endif
1485
1486
static int lo_open(struct block_device *bdev, fmode_t mode)
1487
{
1488
struct loop_device *lo = bdev->bd_disk->private_data;
1489
1490
mutex_lock(&lo->lo_ctl_mutex);
1491
lo->lo_refcnt++;
1492
mutex_unlock(&lo->lo_ctl_mutex);
1493
1494
return 0;
1495
}
1496
1497
static int lo_release(struct gendisk *disk, fmode_t mode)
1498
{
1499
struct loop_device *lo = disk->private_data;
1500
int err;
1501
1502
mutex_lock(&lo->lo_ctl_mutex);
1503
1504
if (--lo->lo_refcnt)
1505
goto out;
1506
1507
if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) {
1508
/*
1509
* In autoclear mode, stop the loop thread
1510
* and remove configuration after last close.
1511
*/
1512
err = loop_clr_fd(lo, NULL);
1513
if (!err)
1514
goto out_unlocked;
1515
} else {
1516
/*
1517
* Otherwise keep thread (if running) and config,
1518
* but flush possible ongoing bios in thread.
1519
*/
1520
loop_flush(lo);
1521
}
1522
1523
out:
1524
mutex_unlock(&lo->lo_ctl_mutex);
1525
out_unlocked:
1526
return 0;
1527
}
1528
1529
static const struct block_device_operations lo_fops = {
1530
.owner = THIS_MODULE,
1531
.open = lo_open,
1532
.release = lo_release,
1533
.ioctl = lo_ioctl,
1534
#ifdef CONFIG_COMPAT
1535
.compat_ioctl = lo_compat_ioctl,
1536
#endif
1537
};
1538
1539
/*
1540
* And now the modules code and kernel interface.
1541
*/
1542
static int max_loop;
1543
module_param(max_loop, int, S_IRUGO);
1544
MODULE_PARM_DESC(max_loop, "Maximum number of loop devices");
1545
module_param(max_part, int, S_IRUGO);
1546
MODULE_PARM_DESC(max_part, "Maximum number of partitions per loop device");
1547
MODULE_LICENSE("GPL");
1548
MODULE_ALIAS_BLOCKDEV_MAJOR(LOOP_MAJOR);
1549
1550
int loop_register_transfer(struct loop_func_table *funcs)
1551
{
1552
unsigned int n = funcs->number;
1553
1554
if (n >= MAX_LO_CRYPT || xfer_funcs[n])
1555
return -EINVAL;
1556
xfer_funcs[n] = funcs;
1557
return 0;
1558
}
1559
1560
int loop_unregister_transfer(int number)
1561
{
1562
unsigned int n = number;
1563
struct loop_device *lo;
1564
struct loop_func_table *xfer;
1565
1566
if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL)
1567
return -EINVAL;
1568
1569
xfer_funcs[n] = NULL;
1570
1571
list_for_each_entry(lo, &loop_devices, lo_list) {
1572
mutex_lock(&lo->lo_ctl_mutex);
1573
1574
if (lo->lo_encryption == xfer)
1575
loop_release_xfer(lo);
1576
1577
mutex_unlock(&lo->lo_ctl_mutex);
1578
}
1579
1580
return 0;
1581
}
1582
1583
EXPORT_SYMBOL(loop_register_transfer);
1584
EXPORT_SYMBOL(loop_unregister_transfer);
1585
1586
static struct loop_device *loop_alloc(int i)
1587
{
1588
struct loop_device *lo;
1589
struct gendisk *disk;
1590
1591
lo = kzalloc(sizeof(*lo), GFP_KERNEL);
1592
if (!lo)
1593
goto out;
1594
1595
lo->lo_queue = blk_alloc_queue(GFP_KERNEL);
1596
if (!lo->lo_queue)
1597
goto out_free_dev;
1598
1599
disk = lo->lo_disk = alloc_disk(1 << part_shift);
1600
if (!disk)
1601
goto out_free_queue;
1602
1603
mutex_init(&lo->lo_ctl_mutex);
1604
lo->lo_number = i;
1605
lo->lo_thread = NULL;
1606
init_waitqueue_head(&lo->lo_event);
1607
spin_lock_init(&lo->lo_lock);
1608
disk->major = LOOP_MAJOR;
1609
disk->first_minor = i << part_shift;
1610
disk->fops = &lo_fops;
1611
disk->private_data = lo;
1612
disk->queue = lo->lo_queue;
1613
sprintf(disk->disk_name, "loop%d", i);
1614
return lo;
1615
1616
out_free_queue:
1617
blk_cleanup_queue(lo->lo_queue);
1618
out_free_dev:
1619
kfree(lo);
1620
out:
1621
return NULL;
1622
}
1623
1624
static void loop_free(struct loop_device *lo)
1625
{
1626
blk_cleanup_queue(lo->lo_queue);
1627
put_disk(lo->lo_disk);
1628
list_del(&lo->lo_list);
1629
kfree(lo);
1630
}
1631
1632
static struct loop_device *loop_init_one(int i)
1633
{
1634
struct loop_device *lo;
1635
1636
list_for_each_entry(lo, &loop_devices, lo_list) {
1637
if (lo->lo_number == i)
1638
return lo;
1639
}
1640
1641
lo = loop_alloc(i);
1642
if (lo) {
1643
add_disk(lo->lo_disk);
1644
list_add_tail(&lo->lo_list, &loop_devices);
1645
}
1646
return lo;
1647
}
1648
1649
static void loop_del_one(struct loop_device *lo)
1650
{
1651
del_gendisk(lo->lo_disk);
1652
loop_free(lo);
1653
}
1654
1655
static struct kobject *loop_probe(dev_t dev, int *part, void *data)
1656
{
1657
struct loop_device *lo;
1658
struct kobject *kobj;
1659
1660
mutex_lock(&loop_devices_mutex);
1661
lo = loop_init_one(MINOR(dev) >> part_shift);
1662
kobj = lo ? get_disk(lo->lo_disk) : ERR_PTR(-ENOMEM);
1663
mutex_unlock(&loop_devices_mutex);
1664
1665
*part = 0;
1666
return kobj;
1667
}
1668
1669
static int __init loop_init(void)
1670
{
1671
int i, nr;
1672
unsigned long range;
1673
struct loop_device *lo, *next;
1674
1675
/*
1676
* loop module now has a feature to instantiate underlying device
1677
* structure on-demand, provided that there is an access dev node.
1678
* However, this will not work well with user space tool that doesn't
1679
* know about such "feature". In order to not break any existing
1680
* tool, we do the following:
1681
*
1682
* (1) if max_loop is specified, create that many upfront, and this
1683
* also becomes a hard limit.
1684
* (2) if max_loop is not specified, create 8 loop device on module
1685
* load, user can further extend loop device by create dev node
1686
* themselves and have kernel automatically instantiate actual
1687
* device on-demand.
1688
*/
1689
1690
part_shift = 0;
1691
if (max_part > 0) {
1692
part_shift = fls(max_part);
1693
1694
/*
1695
* Adjust max_part according to part_shift as it is exported
1696
* to user space so that user can decide correct minor number
1697
* if [s]he want to create more devices.
1698
*
1699
* Note that -1 is required because partition 0 is reserved
1700
* for the whole disk.
1701
*/
1702
max_part = (1UL << part_shift) - 1;
1703
}
1704
1705
if ((1UL << part_shift) > DISK_MAX_PARTS)
1706
return -EINVAL;
1707
1708
if (max_loop > 1UL << (MINORBITS - part_shift))
1709
return -EINVAL;
1710
1711
if (max_loop) {
1712
nr = max_loop;
1713
range = max_loop << part_shift;
1714
} else {
1715
nr = 8;
1716
range = 1UL << MINORBITS;
1717
}
1718
1719
if (register_blkdev(LOOP_MAJOR, "loop"))
1720
return -EIO;
1721
1722
for (i = 0; i < nr; i++) {
1723
lo = loop_alloc(i);
1724
if (!lo)
1725
goto Enomem;
1726
list_add_tail(&lo->lo_list, &loop_devices);
1727
}
1728
1729
/* point of no return */
1730
1731
list_for_each_entry(lo, &loop_devices, lo_list)
1732
add_disk(lo->lo_disk);
1733
1734
blk_register_region(MKDEV(LOOP_MAJOR, 0), range,
1735
THIS_MODULE, loop_probe, NULL, NULL);
1736
1737
printk(KERN_INFO "loop: module loaded\n");
1738
return 0;
1739
1740
Enomem:
1741
printk(KERN_INFO "loop: out of memory\n");
1742
1743
list_for_each_entry_safe(lo, next, &loop_devices, lo_list)
1744
loop_free(lo);
1745
1746
unregister_blkdev(LOOP_MAJOR, "loop");
1747
return -ENOMEM;
1748
}
1749
1750
static void __exit loop_exit(void)
1751
{
1752
unsigned long range;
1753
struct loop_device *lo, *next;
1754
1755
range = max_loop ? max_loop << part_shift : 1UL << MINORBITS;
1756
1757
list_for_each_entry_safe(lo, next, &loop_devices, lo_list)
1758
loop_del_one(lo);
1759
1760
blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range);
1761
unregister_blkdev(LOOP_MAJOR, "loop");
1762
}
1763
1764
module_init(loop_init);
1765
module_exit(loop_exit);
1766
1767
#ifndef MODULE
1768
static int __init max_loop_setup(char *str)
1769
{
1770
max_loop = simple_strtol(str, NULL, 0);
1771
return 1;
1772
}
1773
1774
__setup("max_loop=", max_loop_setup);
1775
#endif
1776
1777