CoCalc -- loop.c

GitHub Repository: awilliam/linux-vfio
Path: blob/master/drivers/block/loop.c
¹⁷²⁶¹ views
1
/*
2
 *  linux/drivers/block/loop.c
3
 *
4
 *  Written by Theodore Ts'o, 3/29/93
5
 *
6
 * Copyright 1993 by Theodore Ts'o.  Redistribution of this file is
7
 * permitted under the GNU General Public License.
8
 *
9
 * DES encryption plus some minor changes by Werner Almesberger, 30-MAY-1993
10
 * more DES encryption plus IDEA encryption by Nicholas J. Leon, June 20, 1996
11
 *
12
 * Modularized and updated for 1.1.16 kernel - Mitch Dsouza 28th May 1994
13
 * Adapted for 1.3.59 kernel - Andries Brouwer, 1 Feb 1996
14
 *
15
 * Fixed do_loop_request() re-entrancy - [email protected] Mar 20, 1997
16
 *
17
 * Added devfs support - Richard Gooch <[email protected]> 16-Jan-1998
18
 *
19
 * Handle sparse backing files correctly - Kenn Humborg, Jun 28, 1998
20
 *
21
 * Loadable modules and other fixes by AK, 1998
22
 *
23
 * Make real block number available to downstream transfer functions, enables
24
 * CBC (and relatives) mode encryption requiring unique IVs per data block.
25
 * Reed H. Petty, [email protected]
26
 *
27
 * Maximum number of loop devices now dynamic via max_loop module parameter.
28
 * Russell Kroll <[email protected]> 19990701
29
 *
30
 * Maximum number of loop devices when compiled-in now selectable by passing
31
 * max_loop=<1-255> to the kernel on boot.
32
 * Erik I. Bolsø, <[email protected]>, Oct 31, 1999
33
 *
34
 * Completely rewrite request handling to be make_request_fn style and
35
 * non blocking, pushing work to a helper thread. Lots of fixes from
36
 * Al Viro too.
37
 * Jens Axboe <[email protected]>, Nov 2000
38
 *
39
 * Support up to 256 loop devices
40
 * Heinz Mauelshagen <[email protected]>, Feb 2002
41
 *
42
 * Support for falling back on the write file operation when the address space
43
 * operations write_begin is not available on the backing filesystem.
44
 * Anton Altaparmakov, 16 Feb 2005
45
 *
46
 * Still To Fix:
47
 * - Advisory locking is ignored here.
48
 * - Should use an own CAP_* category instead of CAP_SYS_ADMIN
49
 *
50
 */
51

52
#include <linux/module.h>
53
#include <linux/moduleparam.h>
54
#include <linux/sched.h>
55
#include <linux/fs.h>
56
#include <linux/file.h>
57
#include <linux/stat.h>
58
#include <linux/errno.h>
59
#include <linux/major.h>
60
#include <linux/wait.h>
61
#include <linux/blkdev.h>
62
#include <linux/blkpg.h>
63
#include <linux/init.h>
64
#include <linux/swap.h>
65
#include <linux/slab.h>
66
#include <linux/loop.h>
67
#include <linux/compat.h>
68
#include <linux/suspend.h>
69
#include <linux/freezer.h>
70
#include <linux/mutex.h>
71
#include <linux/writeback.h>
72
#include <linux/buffer_head.h>		/* for invalidate_bdev() */
73
#include <linux/completion.h>
74
#include <linux/highmem.h>
75
#include <linux/kthread.h>
76
#include <linux/splice.h>
77
#include <linux/sysfs.h>
78

79
#include <asm/uaccess.h>
80

81
static LIST_HEAD(loop_devices);
82
static DEFINE_MUTEX(loop_devices_mutex);
83

84
static int max_part;
85
static int part_shift;
86

87
/*
88
 * Transfer functions
89
 */
90
static int transfer_none(struct loop_device *lo, int cmd,
91
			 struct page *raw_page, unsigned raw_off,
92
			 struct page *loop_page, unsigned loop_off,
93
			 int size, sector_t real_block)
94
{
95
	char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off;
96
	char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off;
97

98
	if (cmd == READ)
99
		memcpy(loop_buf, raw_buf, size);
100
	else
101
		memcpy(raw_buf, loop_buf, size);
102

103
	kunmap_atomic(loop_buf, KM_USER1);
104
	kunmap_atomic(raw_buf, KM_USER0);
105
	cond_resched();
106
	return 0;
107
}
108

109
static int transfer_xor(struct loop_device *lo, int cmd,
110
			struct page *raw_page, unsigned raw_off,
111
			struct page *loop_page, unsigned loop_off,
112
			int size, sector_t real_block)
113
{
114
	char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off;
115
	char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off;
116
	char *in, *out, *key;
117
	int i, keysize;
118

119
	if (cmd == READ) {
120
		in = raw_buf;
121
		out = loop_buf;
122
	} else {
123
		in = loop_buf;
124
		out = raw_buf;
125
	}
126

127
	key = lo->lo_encrypt_key;
128
	keysize = lo->lo_encrypt_key_size;
129
	for (i = 0; i < size; i++)
130
		*out++ = *in++ ^ key[(i & 511) % keysize];
131

132
	kunmap_atomic(loop_buf, KM_USER1);
133
	kunmap_atomic(raw_buf, KM_USER0);
134
	cond_resched();
135
	return 0;
136
}
137

138
static int xor_init(struct loop_device *lo, const struct loop_info64 *info)
139
{
140
	if (unlikely(info->lo_encrypt_key_size <= 0))
141
		return -EINVAL;
142
	return 0;
143
}
144

145
static struct loop_func_table none_funcs = {
146
	.number = LO_CRYPT_NONE,
147
	.transfer = transfer_none,
148
}; 	
149

150
static struct loop_func_table xor_funcs = {
151
	.number = LO_CRYPT_XOR,
152
	.transfer = transfer_xor,
153
	.init = xor_init
154
}; 	
155

156
/* xfer_funcs[0] is special - its release function is never called */
157
static struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = {
158
	&none_funcs,
159
	&xor_funcs
160
};
161

162
static loff_t get_loop_size(struct loop_device *lo, struct file *file)
163
{
164
	loff_t size, offset, loopsize;
165

166
	/* Compute loopsize in bytes */
167
	size = i_size_read(file->f_mapping->host);
168
	offset = lo->lo_offset;
169
	loopsize = size - offset;
170
	if (lo->lo_sizelimit > 0 && lo->lo_sizelimit < loopsize)
171
		loopsize = lo->lo_sizelimit;
172

173
	/*
174
	 * Unfortunately, if we want to do I/O on the device,
175
	 * the number of 512-byte sectors has to fit into a sector_t.
176
	 */
177
	return loopsize >> 9;
178
}
179

180
static int
181
figure_loop_size(struct loop_device *lo)
182
{
183
	loff_t size = get_loop_size(lo, lo->lo_backing_file);
184
	sector_t x = (sector_t)size;
185

186
	if (unlikely((loff_t)x != size))
187
		return -EFBIG;
188

189
	set_capacity(lo->lo_disk, x);
190
	return 0;					
191
}
192

193
static inline int
194
lo_do_transfer(struct loop_device *lo, int cmd,
195
	       struct page *rpage, unsigned roffs,
196
	       struct page *lpage, unsigned loffs,
197
	       int size, sector_t rblock)
198
{
199
	if (unlikely(!lo->transfer))
200
		return 0;
201

202
	return lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock);
203
}
204

205
/**
206
 * do_lo_send_aops - helper for writing data to a loop device
207
 *
208
 * This is the fast version for backing filesystems which implement the address
209
 * space operations write_begin and write_end.
210
 */
211
static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
212
		loff_t pos, struct page *unused)
213
{
214
	struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */
215
	struct address_space *mapping = file->f_mapping;
216
	pgoff_t index;
217
	unsigned offset, bv_offs;
218
	int len, ret;
219

220
	mutex_lock(&mapping->host->i_mutex);
221
	index = pos >> PAGE_CACHE_SHIFT;
222
	offset = pos & ((pgoff_t)PAGE_CACHE_SIZE - 1);
223
	bv_offs = bvec->bv_offset;
224
	len = bvec->bv_len;
225
	while (len > 0) {
226
		sector_t IV;
227
		unsigned size, copied;
228
		int transfer_result;
229
		struct page *page;
230
		void *fsdata;
231

232
		IV = ((sector_t)index << (PAGE_CACHE_SHIFT - 9))+(offset >> 9);
233
		size = PAGE_CACHE_SIZE - offset;
234
		if (size > len)
235
			size = len;
236

237
		ret = pagecache_write_begin(file, mapping, pos, size, 0,
238
							&page, &fsdata);
239
		if (ret)
240
			goto fail;
241

242
		file_update_time(file);
243

244
		transfer_result = lo_do_transfer(lo, WRITE, page, offset,
245
				bvec->bv_page, bv_offs, size, IV);
246
		copied = size;
247
		if (unlikely(transfer_result))
248
			copied = 0;
249

250
		ret = pagecache_write_end(file, mapping, pos, size, copied,
251
							page, fsdata);
252
		if (ret < 0 || ret != copied)
253
			goto fail;
254

255
		if (unlikely(transfer_result))
256
			goto fail;
257

258
		bv_offs += copied;
259
		len -= copied;
260
		offset = 0;
261
		index++;
262
		pos += copied;
263
	}
264
	ret = 0;
265
out:
266
	mutex_unlock(&mapping->host->i_mutex);
267
	return ret;
268
fail:
269
	ret = -1;
270
	goto out;
271
}
272

273
/**
274
 * __do_lo_send_write - helper for writing data to a loop device
275
 *
276
 * This helper just factors out common code between do_lo_send_direct_write()
277
 * and do_lo_send_write().
278
 */
279
static int __do_lo_send_write(struct file *file,
280
		u8 *buf, const int len, loff_t pos)
281
{
282
	ssize_t bw;
283
	mm_segment_t old_fs = get_fs();
284

285
	set_fs(get_ds());
286
	bw = file->f_op->write(file, buf, len, &pos);
287
	set_fs(old_fs);
288
	if (likely(bw == len))
289
		return 0;
290
	printk(KERN_ERR "loop: Write error at byte offset %llu, length %i.\n",
291
			(unsigned long long)pos, len);
292
	if (bw >= 0)
293
		bw = -EIO;
294
	return bw;
295
}
296

297
/**
298
 * do_lo_send_direct_write - helper for writing data to a loop device
299
 *
300
 * This is the fast, non-transforming version for backing filesystems which do
301
 * not implement the address space operations write_begin and write_end.
302
 * It uses the write file operation which should be present on all writeable
303
 * filesystems.
304
 */
305
static int do_lo_send_direct_write(struct loop_device *lo,
306
		struct bio_vec *bvec, loff_t pos, struct page *page)
307
{
308
	ssize_t bw = __do_lo_send_write(lo->lo_backing_file,
309
			kmap(bvec->bv_page) + bvec->bv_offset,
310
			bvec->bv_len, pos);
311
	kunmap(bvec->bv_page);
312
	cond_resched();
313
	return bw;
314
}
315

316
/**
317
 * do_lo_send_write - helper for writing data to a loop device
318
 *
319
 * This is the slow, transforming version for filesystems which do not
320
 * implement the address space operations write_begin and write_end.  It
321
 * uses the write file operation which should be present on all writeable
322
 * filesystems.
323
 *
324
 * Using fops->write is slower than using aops->{prepare,commit}_write in the
325
 * transforming case because we need to double buffer the data as we cannot do
326
 * the transformations in place as we do not have direct access to the
327
 * destination pages of the backing file.
328
 */
329
static int do_lo_send_write(struct loop_device *lo, struct bio_vec *bvec,
330
		loff_t pos, struct page *page)
331
{
332
	int ret = lo_do_transfer(lo, WRITE, page, 0, bvec->bv_page,
333
			bvec->bv_offset, bvec->bv_len, pos >> 9);
334
	if (likely(!ret))
335
		return __do_lo_send_write(lo->lo_backing_file,
336
				page_address(page), bvec->bv_len,
337
				pos);
338
	printk(KERN_ERR "loop: Transfer error at byte offset %llu, "
339
			"length %i.\n", (unsigned long long)pos, bvec->bv_len);
340
	if (ret > 0)
341
		ret = -EIO;
342
	return ret;
343
}
344

345
static int lo_send(struct loop_device *lo, struct bio *bio, loff_t pos)
346
{
347
	int (*do_lo_send)(struct loop_device *, struct bio_vec *, loff_t,
348
			struct page *page);
349
	struct bio_vec *bvec;
350
	struct page *page = NULL;
351
	int i, ret = 0;
352

353
	do_lo_send = do_lo_send_aops;
354
	if (!(lo->lo_flags & LO_FLAGS_USE_AOPS)) {
355
		do_lo_send = do_lo_send_direct_write;
356
		if (lo->transfer != transfer_none) {
357
			page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
358
			if (unlikely(!page))
359
				goto fail;
360
			kmap(page);
361
			do_lo_send = do_lo_send_write;
362
		}
363
	}
364
	bio_for_each_segment(bvec, bio, i) {
365
		ret = do_lo_send(lo, bvec, pos, page);
366
		if (ret < 0)
367
			break;
368
		pos += bvec->bv_len;
369
	}
370
	if (page) {
371
		kunmap(page);
372
		__free_page(page);
373
	}
374
out:
375
	return ret;
376
fail:
377
	printk(KERN_ERR "loop: Failed to allocate temporary page for write.\n");
378
	ret = -ENOMEM;
379
	goto out;
380
}
381

382
struct lo_read_data {
383
	struct loop_device *lo;
384
	struct page *page;
385
	unsigned offset;
386
	int bsize;
387
};
388

389
static int
390
lo_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
391
		struct splice_desc *sd)
392
{
393
	struct lo_read_data *p = sd->u.data;
394
	struct loop_device *lo = p->lo;
395
	struct page *page = buf->page;
396
	sector_t IV;
397
	int size;
398

399
	IV = ((sector_t) page->index << (PAGE_CACHE_SHIFT - 9)) +
400
							(buf->offset >> 9);
401
	size = sd->len;
402
	if (size > p->bsize)
403
		size = p->bsize;
404

405
	if (lo_do_transfer(lo, READ, page, buf->offset, p->page, p->offset, size, IV)) {
406
		printk(KERN_ERR "loop: transfer error block %ld\n",
407
		       page->index);
408
		size = -EINVAL;
409
	}
410

411
	flush_dcache_page(p->page);
412

413
	if (size > 0)
414
		p->offset += size;
415

416
	return size;
417
}
418

419
static int
420
lo_direct_splice_actor(struct pipe_inode_info *pipe, struct splice_desc *sd)
421
{
422
	return __splice_from_pipe(pipe, sd, lo_splice_actor);
423
}
424

425
static int
426
do_lo_receive(struct loop_device *lo,
427
	      struct bio_vec *bvec, int bsize, loff_t pos)
428
{
429
	struct lo_read_data cookie;
430
	struct splice_desc sd;
431
	struct file *file;
432
	long retval;
433

434
	cookie.lo = lo;
435
	cookie.page = bvec->bv_page;
436
	cookie.offset = bvec->bv_offset;
437
	cookie.bsize = bsize;
438

439
	sd.len = 0;
440
	sd.total_len = bvec->bv_len;
441
	sd.flags = 0;
442
	sd.pos = pos;
443
	sd.u.data = &cookie;
444

445
	file = lo->lo_backing_file;
446
	retval = splice_direct_to_actor(file, &sd, lo_direct_splice_actor);
447

448
	if (retval < 0)
449
		return retval;
450

451
	return 0;
452
}
453

454
static int
455
lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos)
456
{
457
	struct bio_vec *bvec;
458
	int i, ret = 0;
459

460
	bio_for_each_segment(bvec, bio, i) {
461
		ret = do_lo_receive(lo, bvec, bsize, pos);
462
		if (ret < 0)
463
			break;
464
		pos += bvec->bv_len;
465
	}
466
	return ret;
467
}
468

469
static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
470
{
471
	loff_t pos;
472
	int ret;
473

474
	pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset;
475

476
	if (bio_rw(bio) == WRITE) {
477
		struct file *file = lo->lo_backing_file;
478

479
		if (bio->bi_rw & REQ_FLUSH) {
480
			ret = vfs_fsync(file, 0);
481
			if (unlikely(ret && ret != -EINVAL)) {
482
				ret = -EIO;
483
				goto out;
484
			}
485
		}
486

487
		ret = lo_send(lo, bio, pos);
488

489
		if ((bio->bi_rw & REQ_FUA) && !ret) {
490
			ret = vfs_fsync(file, 0);
491
			if (unlikely(ret && ret != -EINVAL))
492
				ret = -EIO;
493
		}
494
	} else
495
		ret = lo_receive(lo, bio, lo->lo_blocksize, pos);
496

497
out:
498
	return ret;
499
}
500

501
/*
502
 * Add bio to back of pending list
503
 */
504
static void loop_add_bio(struct loop_device *lo, struct bio *bio)
505
{
506
	bio_list_add(&lo->lo_bio_list, bio);
507
}
508

509
/*
510
 * Grab first pending buffer
511
 */
512
static struct bio *loop_get_bio(struct loop_device *lo)
513
{
514
	return bio_list_pop(&lo->lo_bio_list);
515
}
516

517
static int loop_make_request(struct request_queue *q, struct bio *old_bio)
518
{
519
	struct loop_device *lo = q->queuedata;
520
	int rw = bio_rw(old_bio);
521

522
	if (rw == READA)
523
		rw = READ;
524

525
	BUG_ON(!lo || (rw != READ && rw != WRITE));
526

527
	spin_lock_irq(&lo->lo_lock);
528
	if (lo->lo_state != Lo_bound)
529
		goto out;
530
	if (unlikely(rw == WRITE && (lo->lo_flags & LO_FLAGS_READ_ONLY)))
531
		goto out;
532
	loop_add_bio(lo, old_bio);
533
	wake_up(&lo->lo_event);
534
	spin_unlock_irq(&lo->lo_lock);
535
	return 0;
536

537
out:
538
	spin_unlock_irq(&lo->lo_lock);
539
	bio_io_error(old_bio);
540
	return 0;
541
}
542

543
struct switch_request {
544
	struct file *file;
545
	struct completion wait;
546
};
547

548
static void do_loop_switch(struct loop_device *, struct switch_request *);
549

550
static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio)
551
{
552
	if (unlikely(!bio->bi_bdev)) {
553
		do_loop_switch(lo, bio->bi_private);
554
		bio_put(bio);
555
	} else {
556
		int ret = do_bio_filebacked(lo, bio);
557
		bio_endio(bio, ret);
558
	}
559
}
560

561
/*
562
 * worker thread that handles reads/writes to file backed loop devices,
563
 * to avoid blocking in our make_request_fn. it also does loop decrypting
564
 * on reads for block backed loop, as that is too heavy to do from
565
 * b_end_io context where irqs may be disabled.
566
 *
567
 * Loop explanation:  loop_clr_fd() sets lo_state to Lo_rundown before
568
 * calling kthread_stop().  Therefore once kthread_should_stop() is
569
 * true, make_request will not place any more requests.  Therefore
570
 * once kthread_should_stop() is true and lo_bio is NULL, we are
571
 * done with the loop.
572
 */
573
static int loop_thread(void *data)
574
{
575
	struct loop_device *lo = data;
576
	struct bio *bio;
577

578
	set_user_nice(current, -20);
579

580
	while (!kthread_should_stop() || !bio_list_empty(&lo->lo_bio_list)) {
581

582
		wait_event_interruptible(lo->lo_event,
583
				!bio_list_empty(&lo->lo_bio_list) ||
584
				kthread_should_stop());
585

586
		if (bio_list_empty(&lo->lo_bio_list))
587
			continue;
588
		spin_lock_irq(&lo->lo_lock);
589
		bio = loop_get_bio(lo);
590
		spin_unlock_irq(&lo->lo_lock);
591

592
		BUG_ON(!bio);
593
		loop_handle_bio(lo, bio);
594
	}
595

596
	return 0;
597
}
598

599
/*
600
 * loop_switch performs the hard work of switching a backing store.
601
 * First it needs to flush existing IO, it does this by sending a magic
602
 * BIO down the pipe. The completion of this BIO does the actual switch.
603
 */
604
static int loop_switch(struct loop_device *lo, struct file *file)
605
{
606
	struct switch_request w;
607
	struct bio *bio = bio_alloc(GFP_KERNEL, 0);
608
	if (!bio)
609
		return -ENOMEM;
610
	init_completion(&w.wait);
611
	w.file = file;
612
	bio->bi_private = &w;
613
	bio->bi_bdev = NULL;
614
	loop_make_request(lo->lo_queue, bio);
615
	wait_for_completion(&w.wait);
616
	return 0;
617
}
618

619
/*
620
 * Helper to flush the IOs in loop, but keeping loop thread running
621
 */
622
static int loop_flush(struct loop_device *lo)
623
{
624
	/* loop not yet configured, no running thread, nothing to flush */
625
	if (!lo->lo_thread)
626
		return 0;
627

628
	return loop_switch(lo, NULL);
629
}
630

631
/*
632
 * Do the actual switch; called from the BIO completion routine
633
 */
634
static void do_loop_switch(struct loop_device *lo, struct switch_request *p)
635
{
636
	struct file *file = p->file;
637
	struct file *old_file = lo->lo_backing_file;
638
	struct address_space *mapping;
639

640
	/* if no new file, only flush of queued bios requested */
641
	if (!file)
642
		goto out;
643

644
	mapping = file->f_mapping;
645
	mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
646
	lo->lo_backing_file = file;
647
	lo->lo_blocksize = S_ISBLK(mapping->host->i_mode) ?
648
		mapping->host->i_bdev->bd_block_size : PAGE_SIZE;
649
	lo->old_gfp_mask = mapping_gfp_mask(mapping);
650
	mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
651
out:
652
	complete(&p->wait);
653
}
654

655

656
/*
657
 * loop_change_fd switched the backing store of a loopback device to
658
 * a new file. This is useful for operating system installers to free up
659
 * the original file and in High Availability environments to switch to
660
 * an alternative location for the content in case of server meltdown.
661
 * This can only work if the loop device is used read-only, and if the
662
 * new backing store is the same size and type as the old backing store.
663
 */
664
static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
665
			  unsigned int arg)
666
{
667
	struct file	*file, *old_file;
668
	struct inode	*inode;
669
	int		error;
670

671
	error = -ENXIO;
672
	if (lo->lo_state != Lo_bound)
673
		goto out;
674

675
	/* the loop device has to be read-only */
676
	error = -EINVAL;
677
	if (!(lo->lo_flags & LO_FLAGS_READ_ONLY))
678
		goto out;
679

680
	error = -EBADF;
681
	file = fget(arg);
682
	if (!file)
683
		goto out;
684

685
	inode = file->f_mapping->host;
686
	old_file = lo->lo_backing_file;
687

688
	error = -EINVAL;
689

690
	if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
691
		goto out_putf;
692

693
	/* size of the new backing store needs to be the same */
694
	if (get_loop_size(lo, file) != get_loop_size(lo, old_file))
695
		goto out_putf;
696

697
	/* and ... switch */
698
	error = loop_switch(lo, file);
699
	if (error)
700
		goto out_putf;
701

702
	fput(old_file);
703
	if (max_part > 0)
704
		ioctl_by_bdev(bdev, BLKRRPART, 0);
705
	return 0;
706

707
 out_putf:
708
	fput(file);
709
 out:
710
	return error;
711
}
712

713
static inline int is_loop_device(struct file *file)
714
{
715
	struct inode *i = file->f_mapping->host;
716

717
	return i && S_ISBLK(i->i_mode) && MAJOR(i->i_rdev) == LOOP_MAJOR;
718
}
719

720
/* loop sysfs attributes */
721

722
static ssize_t loop_attr_show(struct device *dev, char *page,
723
			      ssize_t (*callback)(struct loop_device *, char *))
724
{
725
	struct loop_device *l, *lo = NULL;
726

727
	mutex_lock(&loop_devices_mutex);
728
	list_for_each_entry(l, &loop_devices, lo_list)
729
		if (disk_to_dev(l->lo_disk) == dev) {
730
			lo = l;
731
			break;
732
		}
733
	mutex_unlock(&loop_devices_mutex);
734

735
	return lo ? callback(lo, page) : -EIO;
736
}
737

738
#define LOOP_ATTR_RO(_name)						\
739
static ssize_t loop_attr_##_name##_show(struct loop_device *, char *);	\
740
static ssize_t loop_attr_do_show_##_name(struct device *d,		\
741
				struct device_attribute *attr, char *b)	\
742
{									\
743
	return loop_attr_show(d, b, loop_attr_##_name##_show);		\
744
}									\
745
static struct device_attribute loop_attr_##_name =			\
746
	__ATTR(_name, S_IRUGO, loop_attr_do_show_##_name, NULL);
747

748
static ssize_t loop_attr_backing_file_show(struct loop_device *lo, char *buf)
749
{
750
	ssize_t ret;
751
	char *p = NULL;
752

753
	mutex_lock(&lo->lo_ctl_mutex);
754
	if (lo->lo_backing_file)
755
		p = d_path(&lo->lo_backing_file->f_path, buf, PAGE_SIZE - 1);
756
	mutex_unlock(&lo->lo_ctl_mutex);
757

758
	if (IS_ERR_OR_NULL(p))
759
		ret = PTR_ERR(p);
760
	else {
761
		ret = strlen(p);
762
		memmove(buf, p, ret);
763
		buf[ret++] = '\n';
764
		buf[ret] = 0;
765
	}
766

767
	return ret;
768
}
769

770
static ssize_t loop_attr_offset_show(struct loop_device *lo, char *buf)
771
{
772
	return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_offset);
773
}
774

775
static ssize_t loop_attr_sizelimit_show(struct loop_device *lo, char *buf)
776
{
777
	return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_sizelimit);
778
}
779

780
static ssize_t loop_attr_autoclear_show(struct loop_device *lo, char *buf)
781
{
782
	int autoclear = (lo->lo_flags & LO_FLAGS_AUTOCLEAR);
783

784
	return sprintf(buf, "%s\n", autoclear ? "1" : "0");
785
}
786

787
LOOP_ATTR_RO(backing_file);
788
LOOP_ATTR_RO(offset);
789
LOOP_ATTR_RO(sizelimit);
790
LOOP_ATTR_RO(autoclear);
791

792
static struct attribute *loop_attrs[] = {
793
	&loop_attr_backing_file.attr,
794
	&loop_attr_offset.attr,
795
	&loop_attr_sizelimit.attr,
796
	&loop_attr_autoclear.attr,
797
	NULL,
798
};
799

800
static struct attribute_group loop_attribute_group = {
801
	.name = "loop",
802
	.attrs= loop_attrs,
803
};
804

805
static int loop_sysfs_init(struct loop_device *lo)
806
{
807
	return sysfs_create_group(&disk_to_dev(lo->lo_disk)->kobj,
808
				  &loop_attribute_group);
809
}
810

811
static void loop_sysfs_exit(struct loop_device *lo)
812
{
813
	sysfs_remove_group(&disk_to_dev(lo->lo_disk)->kobj,
814
			   &loop_attribute_group);
815
}
816

817
static int loop_set_fd(struct loop_device *lo, fmode_t mode,
818
		       struct block_device *bdev, unsigned int arg)
819
{
820
	struct file	*file, *f;
821
	struct inode	*inode;
822
	struct address_space *mapping;
823
	unsigned lo_blocksize;
824
	int		lo_flags = 0;
825
	int		error;
826
	loff_t		size;
827

828
	/* This is safe, since we have a reference from open(). */
829
	__module_get(THIS_MODULE);
830

831
	error = -EBADF;
832
	file = fget(arg);
833
	if (!file)
834
		goto out;
835

836
	error = -EBUSY;
837
	if (lo->lo_state != Lo_unbound)
838
		goto out_putf;
839

840
	/* Avoid recursion */
841
	f = file;
842
	while (is_loop_device(f)) {
843
		struct loop_device *l;
844

845
		if (f->f_mapping->host->i_bdev == bdev)
846
			goto out_putf;
847

848
		l = f->f_mapping->host->i_bdev->bd_disk->private_data;
849
		if (l->lo_state == Lo_unbound) {
850
			error = -EINVAL;
851
			goto out_putf;
852
		}
853
		f = l->lo_backing_file;
854
	}
855

856
	mapping = file->f_mapping;
857
	inode = mapping->host;
858

859
	if (!(file->f_mode & FMODE_WRITE))
860
		lo_flags |= LO_FLAGS_READ_ONLY;
861

862
	error = -EINVAL;
863
	if (S_ISREG(inode->i_mode) || S_ISBLK(inode->i_mode)) {
864
		const struct address_space_operations *aops = mapping->a_ops;
865

866
		if (aops->write_begin)
867
			lo_flags |= LO_FLAGS_USE_AOPS;
868
		if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write)
869
			lo_flags |= LO_FLAGS_READ_ONLY;
870

871
		lo_blocksize = S_ISBLK(inode->i_mode) ?
872
			inode->i_bdev->bd_block_size : PAGE_SIZE;
873

874
		error = 0;
875
	} else {
876
		goto out_putf;
877
	}
878

879
	size = get_loop_size(lo, file);
880

881
	if ((loff_t)(sector_t)size != size) {
882
		error = -EFBIG;
883
		goto out_putf;
884
	}
885

886
	if (!(mode & FMODE_WRITE))
887
		lo_flags |= LO_FLAGS_READ_ONLY;
888

889
	set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
890

891
	lo->lo_blocksize = lo_blocksize;
892
	lo->lo_device = bdev;
893
	lo->lo_flags = lo_flags;
894
	lo->lo_backing_file = file;
895
	lo->transfer = transfer_none;
896
	lo->ioctl = NULL;
897
	lo->lo_sizelimit = 0;
898
	lo->old_gfp_mask = mapping_gfp_mask(mapping);
899
	mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
900

901
	bio_list_init(&lo->lo_bio_list);
902

903
	/*
904
	 * set queue make_request_fn, and add limits based on lower level
905
	 * device
906
	 */
907
	blk_queue_make_request(lo->lo_queue, loop_make_request);
908
	lo->lo_queue->queuedata = lo;
909

910
	if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
911
		blk_queue_flush(lo->lo_queue, REQ_FLUSH);
912

913
	set_capacity(lo->lo_disk, size);
914
	bd_set_size(bdev, size << 9);
915
	loop_sysfs_init(lo);
916
	/* let user-space know about the new size */
917
	kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
918

919
	set_blocksize(bdev, lo_blocksize);
920

921
	lo->lo_thread = kthread_create(loop_thread, lo, "loop%d",
922
						lo->lo_number);
923
	if (IS_ERR(lo->lo_thread)) {
924
		error = PTR_ERR(lo->lo_thread);
925
		goto out_clr;
926
	}
927
	lo->lo_state = Lo_bound;
928
	wake_up_process(lo->lo_thread);
929
	if (max_part > 0)
930
		ioctl_by_bdev(bdev, BLKRRPART, 0);
931
	return 0;
932

933
out_clr:
934
	loop_sysfs_exit(lo);
935
	lo->lo_thread = NULL;
936
	lo->lo_device = NULL;
937
	lo->lo_backing_file = NULL;
938
	lo->lo_flags = 0;
939
	set_capacity(lo->lo_disk, 0);
940
	invalidate_bdev(bdev);
941
	bd_set_size(bdev, 0);
942
	kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
943
	mapping_set_gfp_mask(mapping, lo->old_gfp_mask);
944
	lo->lo_state = Lo_unbound;
945
 out_putf:
946
	fput(file);
947
 out:
948
	/* This is safe: open() is still holding a reference. */
949
	module_put(THIS_MODULE);
950
	return error;
951
}
952

953
static int
954
loop_release_xfer(struct loop_device *lo)
955
{
956
	int err = 0;
957
	struct loop_func_table *xfer = lo->lo_encryption;
958

959
	if (xfer) {
960
		if (xfer->release)
961
			err = xfer->release(lo);
962
		lo->transfer = NULL;
963
		lo->lo_encryption = NULL;
964
		module_put(xfer->owner);
965
	}
966
	return err;
967
}
968

969
static int
970
loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer,
971
	       const struct loop_info64 *i)
972
{
973
	int err = 0;
974

975
	if (xfer) {
976
		struct module *owner = xfer->owner;
977

978
		if (!try_module_get(owner))
979
			return -EINVAL;
980
		if (xfer->init)
981
			err = xfer->init(lo, i);
982
		if (err)
983
			module_put(owner);
984
		else
985
			lo->lo_encryption = xfer;
986
	}
987
	return err;
988
}
989

990
static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
991
{
992
	struct file *filp = lo->lo_backing_file;
993
	gfp_t gfp = lo->old_gfp_mask;
994

995
	if (lo->lo_state != Lo_bound)
996
		return -ENXIO;
997

998
	if (lo->lo_refcnt > 1)	/* we needed one fd for the ioctl */
999
		return -EBUSY;
1000

1001
	if (filp == NULL)
1002
		return -EINVAL;
1003

1004
	spin_lock_irq(&lo->lo_lock);
1005
	lo->lo_state = Lo_rundown;
1006
	spin_unlock_irq(&lo->lo_lock);
1007

1008
	kthread_stop(lo->lo_thread);
1009

1010
	lo->lo_backing_file = NULL;
1011

1012
	loop_release_xfer(lo);
1013
	lo->transfer = NULL;
1014
	lo->ioctl = NULL;
1015
	lo->lo_device = NULL;
1016
	lo->lo_encryption = NULL;
1017
	lo->lo_offset = 0;
1018
	lo->lo_sizelimit = 0;
1019
	lo->lo_encrypt_key_size = 0;
1020
	lo->lo_flags = 0;
1021
	lo->lo_thread = NULL;
1022
	memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
1023
	memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
1024
	memset(lo->lo_file_name, 0, LO_NAME_SIZE);
1025
	if (bdev)
1026
		invalidate_bdev(bdev);
1027
	set_capacity(lo->lo_disk, 0);
1028
	loop_sysfs_exit(lo);
1029
	if (bdev) {
1030
		bd_set_size(bdev, 0);
1031
		/* let user-space know about this change */
1032
		kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
1033
	}
1034
	mapping_set_gfp_mask(filp->f_mapping, gfp);
1035
	lo->lo_state = Lo_unbound;
1036
	/* This is safe: open() is still holding a reference. */
1037
	module_put(THIS_MODULE);
1038
	if (max_part > 0 && bdev)
1039
		ioctl_by_bdev(bdev, BLKRRPART, 0);
1040
	mutex_unlock(&lo->lo_ctl_mutex);
1041
	/*
1042
	 * Need not hold lo_ctl_mutex to fput backing file.
1043
	 * Calling fput holding lo_ctl_mutex triggers a circular
1044
	 * lock dependency possibility warning as fput can take
1045
	 * bd_mutex which is usually taken before lo_ctl_mutex.
1046
	 */
1047
	fput(filp);
1048
	return 0;
1049
}
1050

1051
static int
1052
loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
1053
{
1054
	int err;
1055
	struct loop_func_table *xfer;
1056
	uid_t uid = current_uid();
1057

1058
	if (lo->lo_encrypt_key_size &&
1059
	    lo->lo_key_owner != uid &&
1060
	    !capable(CAP_SYS_ADMIN))
1061
		return -EPERM;
1062
	if (lo->lo_state != Lo_bound)
1063
		return -ENXIO;
1064
	if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE)
1065
		return -EINVAL;
1066

1067
	err = loop_release_xfer(lo);
1068
	if (err)
1069
		return err;
1070

1071
	if (info->lo_encrypt_type) {
1072
		unsigned int type = info->lo_encrypt_type;
1073

1074
		if (type >= MAX_LO_CRYPT)
1075
			return -EINVAL;
1076
		xfer = xfer_funcs[type];
1077
		if (xfer == NULL)
1078
			return -EINVAL;
1079
	} else
1080
		xfer = NULL;
1081

1082
	err = loop_init_xfer(lo, xfer, info);
1083
	if (err)
1084
		return err;
1085

1086
	if (lo->lo_offset != info->lo_offset ||
1087
	    lo->lo_sizelimit != info->lo_sizelimit) {
1088
		lo->lo_offset = info->lo_offset;
1089
		lo->lo_sizelimit = info->lo_sizelimit;
1090
		if (figure_loop_size(lo))
1091
			return -EFBIG;
1092
	}
1093

1094
	memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
1095
	memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
1096
	lo->lo_file_name[LO_NAME_SIZE-1] = 0;
1097
	lo->lo_crypt_name[LO_NAME_SIZE-1] = 0;
1098

1099
	if (!xfer)
1100
		xfer = &none_funcs;
1101
	lo->transfer = xfer->transfer;
1102
	lo->ioctl = xfer->ioctl;
1103

1104
	if ((lo->lo_flags & LO_FLAGS_AUTOCLEAR) !=
1105
	     (info->lo_flags & LO_FLAGS_AUTOCLEAR))
1106
		lo->lo_flags ^= LO_FLAGS_AUTOCLEAR;
1107

1108
	lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
1109
	lo->lo_init[0] = info->lo_init[0];
1110
	lo->lo_init[1] = info->lo_init[1];
1111
	if (info->lo_encrypt_key_size) {
1112
		memcpy(lo->lo_encrypt_key, info->lo_encrypt_key,
1113
		       info->lo_encrypt_key_size);
1114
		lo->lo_key_owner = uid;
1115
	}	
1116

1117
	return 0;
1118
}
1119

1120
static int
1121
loop_get_status(struct loop_device *lo, struct loop_info64 *info)
1122
{
1123
	struct file *file = lo->lo_backing_file;
1124
	struct kstat stat;
1125
	int error;
1126

1127
	if (lo->lo_state != Lo_bound)
1128
		return -ENXIO;
1129
	error = vfs_getattr(file->f_path.mnt, file->f_path.dentry, &stat);
1130
	if (error)
1131
		return error;
1132
	memset(info, 0, sizeof(*info));
1133
	info->lo_number = lo->lo_number;
1134
	info->lo_device = huge_encode_dev(stat.dev);
1135
	info->lo_inode = stat.ino;
1136
	info->lo_rdevice = huge_encode_dev(lo->lo_device ? stat.rdev : stat.dev);
1137
	info->lo_offset = lo->lo_offset;
1138
	info->lo_sizelimit = lo->lo_sizelimit;
1139
	info->lo_flags = lo->lo_flags;
1140
	memcpy(info->lo_file_name, lo->lo_file_name, LO_NAME_SIZE);
1141
	memcpy(info->lo_crypt_name, lo->lo_crypt_name, LO_NAME_SIZE);
1142
	info->lo_encrypt_type =
1143
		lo->lo_encryption ? lo->lo_encryption->number : 0;
1144
	if (lo->lo_encrypt_key_size && capable(CAP_SYS_ADMIN)) {
1145
		info->lo_encrypt_key_size = lo->lo_encrypt_key_size;
1146
		memcpy(info->lo_encrypt_key, lo->lo_encrypt_key,
1147
		       lo->lo_encrypt_key_size);
1148
	}
1149
	return 0;
1150
}
1151

1152
static void
1153
loop_info64_from_old(const struct loop_info *info, struct loop_info64 *info64)
1154
{
1155
	memset(info64, 0, sizeof(*info64));
1156
	info64->lo_number = info->lo_number;
1157
	info64->lo_device = info->lo_device;
1158
	info64->lo_inode = info->lo_inode;
1159
	info64->lo_rdevice = info->lo_rdevice;
1160
	info64->lo_offset = info->lo_offset;
1161
	info64->lo_sizelimit = 0;
1162
	info64->lo_encrypt_type = info->lo_encrypt_type;
1163
	info64->lo_encrypt_key_size = info->lo_encrypt_key_size;
1164
	info64->lo_flags = info->lo_flags;
1165
	info64->lo_init[0] = info->lo_init[0];
1166
	info64->lo_init[1] = info->lo_init[1];
1167
	if (info->lo_encrypt_type == LO_CRYPT_CRYPTOAPI)
1168
		memcpy(info64->lo_crypt_name, info->lo_name, LO_NAME_SIZE);
1169
	else
1170
		memcpy(info64->lo_file_name, info->lo_name, LO_NAME_SIZE);
1171
	memcpy(info64->lo_encrypt_key, info->lo_encrypt_key, LO_KEY_SIZE);
1172
}
1173

1174
static int
1175
loop_info64_to_old(const struct loop_info64 *info64, struct loop_info *info)
1176
{
1177
	memset(info, 0, sizeof(*info));
1178
	info->lo_number = info64->lo_number;
1179
	info->lo_device = info64->lo_device;
1180
	info->lo_inode = info64->lo_inode;
1181
	info->lo_rdevice = info64->lo_rdevice;
1182
	info->lo_offset = info64->lo_offset;
1183
	info->lo_encrypt_type = info64->lo_encrypt_type;
1184
	info->lo_encrypt_key_size = info64->lo_encrypt_key_size;
1185
	info->lo_flags = info64->lo_flags;
1186
	info->lo_init[0] = info64->lo_init[0];
1187
	info->lo_init[1] = info64->lo_init[1];
1188
	if (info->lo_encrypt_type == LO_CRYPT_CRYPTOAPI)
1189
		memcpy(info->lo_name, info64->lo_crypt_name, LO_NAME_SIZE);
1190
	else
1191
		memcpy(info->lo_name, info64->lo_file_name, LO_NAME_SIZE);
1192
	memcpy(info->lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE);
1193

1194
	/* error in case values were truncated */
1195
	if (info->lo_device != info64->lo_device ||
1196
	    info->lo_rdevice != info64->lo_rdevice ||
1197
	    info->lo_inode != info64->lo_inode ||
1198
	    info->lo_offset != info64->lo_offset)
1199
		return -EOVERFLOW;
1200

1201
	return 0;
1202
}
1203

1204
static int
1205
loop_set_status_old(struct loop_device *lo, const struct loop_info __user *arg)
1206
{
1207
	struct loop_info info;
1208
	struct loop_info64 info64;
1209

1210
	if (copy_from_user(&info, arg, sizeof (struct loop_info)))
1211
		return -EFAULT;
1212
	loop_info64_from_old(&info, &info64);
1213
	return loop_set_status(lo, &info64);
1214
}
1215

1216
static int
1217
loop_set_status64(struct loop_device *lo, const struct loop_info64 __user *arg)
1218
{
1219
	struct loop_info64 info64;
1220

1221
	if (copy_from_user(&info64, arg, sizeof (struct loop_info64)))
1222
		return -EFAULT;
1223
	return loop_set_status(lo, &info64);
1224
}
1225

1226
static int
1227
loop_get_status_old(struct loop_device *lo, struct loop_info __user *arg) {
1228
	struct loop_info info;
1229
	struct loop_info64 info64;
1230
	int err = 0;
1231

1232
	if (!arg)
1233
		err = -EINVAL;
1234
	if (!err)
1235
		err = loop_get_status(lo, &info64);
1236
	if (!err)
1237
		err = loop_info64_to_old(&info64, &info);
1238
	if (!err && copy_to_user(arg, &info, sizeof(info)))
1239
		err = -EFAULT;
1240

1241
	return err;
1242
}
1243

1244
static int
1245
loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) {
1246
	struct loop_info64 info64;
1247
	int err = 0;
1248

1249
	if (!arg)
1250
		err = -EINVAL;
1251
	if (!err)
1252
		err = loop_get_status(lo, &info64);
1253
	if (!err && copy_to_user(arg, &info64, sizeof(info64)))
1254
		err = -EFAULT;
1255

1256
	return err;
1257
}
1258

1259
static int loop_set_capacity(struct loop_device *lo, struct block_device *bdev)
1260
{
1261
	int err;
1262
	sector_t sec;
1263
	loff_t sz;
1264

1265
	err = -ENXIO;
1266
	if (unlikely(lo->lo_state != Lo_bound))
1267
		goto out;
1268
	err = figure_loop_size(lo);
1269
	if (unlikely(err))
1270
		goto out;
1271
	sec = get_capacity(lo->lo_disk);
1272
	/* the width of sector_t may be narrow for bit-shift */
1273
	sz = sec;
1274
	sz <<= 9;
1275
	mutex_lock(&bdev->bd_mutex);
1276
	bd_set_size(bdev, sz);
1277
	/* let user-space know about the new size */
1278
	kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
1279
	mutex_unlock(&bdev->bd_mutex);
1280

1281
 out:
1282
	return err;
1283
}
1284

1285
static int lo_ioctl(struct block_device *bdev, fmode_t mode,
1286
	unsigned int cmd, unsigned long arg)
1287
{
1288
	struct loop_device *lo = bdev->bd_disk->private_data;
1289
	int err;
1290

1291
	mutex_lock_nested(&lo->lo_ctl_mutex, 1);
1292
	switch (cmd) {
1293
	case LOOP_SET_FD:
1294
		err = loop_set_fd(lo, mode, bdev, arg);
1295
		break;
1296
	case LOOP_CHANGE_FD:
1297
		err = loop_change_fd(lo, bdev, arg);
1298
		break;
1299
	case LOOP_CLR_FD:
1300
		/* loop_clr_fd would have unlocked lo_ctl_mutex on success */
1301
		err = loop_clr_fd(lo, bdev);
1302
		if (!err)
1303
			goto out_unlocked;
1304
		break;
1305
	case LOOP_SET_STATUS:
1306
		err = loop_set_status_old(lo, (struct loop_info __user *) arg);
1307
		break;
1308
	case LOOP_GET_STATUS:
1309
		err = loop_get_status_old(lo, (struct loop_info __user *) arg);
1310
		break;
1311
	case LOOP_SET_STATUS64:
1312
		err = loop_set_status64(lo, (struct loop_info64 __user *) arg);
1313
		break;
1314
	case LOOP_GET_STATUS64:
1315
		err = loop_get_status64(lo, (struct loop_info64 __user *) arg);
1316
		break;
1317
	case LOOP_SET_CAPACITY:
1318
		err = -EPERM;
1319
		if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
1320
			err = loop_set_capacity(lo, bdev);
1321
		break;
1322
	default:
1323
		err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL;
1324
	}
1325
	mutex_unlock(&lo->lo_ctl_mutex);
1326

1327
out_unlocked:
1328
	return err;
1329
}
1330

1331
#ifdef CONFIG_COMPAT
1332
struct compat_loop_info {
1333
	compat_int_t	lo_number;      /* ioctl r/o */
1334
	compat_dev_t	lo_device;      /* ioctl r/o */
1335
	compat_ulong_t	lo_inode;       /* ioctl r/o */
1336
	compat_dev_t	lo_rdevice;     /* ioctl r/o */
1337
	compat_int_t	lo_offset;
1338
	compat_int_t	lo_encrypt_type;
1339
	compat_int_t	lo_encrypt_key_size;    /* ioctl w/o */
1340
	compat_int_t	lo_flags;       /* ioctl r/o */
1341
	char		lo_name[LO_NAME_SIZE];
1342
	unsigned char	lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */
1343
	compat_ulong_t	lo_init[2];
1344
	char		reserved[4];
1345
};
1346

1347
/*
1348
 * Transfer 32-bit compatibility structure in userspace to 64-bit loop info
1349
 * - noinlined to reduce stack space usage in main part of driver
1350
 */
1351
static noinline int
1352
loop_info64_from_compat(const struct compat_loop_info __user *arg,
1353
			struct loop_info64 *info64)
1354
{
1355
	struct compat_loop_info info;
1356

1357
	if (copy_from_user(&info, arg, sizeof(info)))
1358
		return -EFAULT;
1359

1360
	memset(info64, 0, sizeof(*info64));
1361
	info64->lo_number = info.lo_number;
1362
	info64->lo_device = info.lo_device;
1363
	info64->lo_inode = info.lo_inode;
1364
	info64->lo_rdevice = info.lo_rdevice;
1365
	info64->lo_offset = info.lo_offset;
1366
	info64->lo_sizelimit = 0;
1367
	info64->lo_encrypt_type = info.lo_encrypt_type;
1368
	info64->lo_encrypt_key_size = info.lo_encrypt_key_size;
1369
	info64->lo_flags = info.lo_flags;
1370
	info64->lo_init[0] = info.lo_init[0];
1371
	info64->lo_init[1] = info.lo_init[1];
1372
	if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI)
1373
		memcpy(info64->lo_crypt_name, info.lo_name, LO_NAME_SIZE);
1374
	else
1375
		memcpy(info64->lo_file_name, info.lo_name, LO_NAME_SIZE);
1376
	memcpy(info64->lo_encrypt_key, info.lo_encrypt_key, LO_KEY_SIZE);
1377
	return 0;
1378
}
1379

1380
/*
1381
 * Transfer 64-bit loop info to 32-bit compatibility structure in userspace
1382
 * - noinlined to reduce stack space usage in main part of driver
1383
 */
1384
static noinline int
1385
loop_info64_to_compat(const struct loop_info64 *info64,
1386
		      struct compat_loop_info __user *arg)
1387
{
1388
	struct compat_loop_info info;
1389

1390
	memset(&info, 0, sizeof(info));
1391
	info.lo_number = info64->lo_number;
1392
	info.lo_device = info64->lo_device;
1393
	info.lo_inode = info64->lo_inode;
1394
	info.lo_rdevice = info64->lo_rdevice;
1395
	info.lo_offset = info64->lo_offset;
1396
	info.lo_encrypt_type = info64->lo_encrypt_type;
1397
	info.lo_encrypt_key_size = info64->lo_encrypt_key_size;
1398
	info.lo_flags = info64->lo_flags;
1399
	info.lo_init[0] = info64->lo_init[0];
1400
	info.lo_init[1] = info64->lo_init[1];
1401
	if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI)
1402
		memcpy(info.lo_name, info64->lo_crypt_name, LO_NAME_SIZE);
1403
	else
1404
		memcpy(info.lo_name, info64->lo_file_name, LO_NAME_SIZE);
1405
	memcpy(info.lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE);
1406

1407
	/* error in case values were truncated */
1408
	if (info.lo_device != info64->lo_device ||
1409
	    info.lo_rdevice != info64->lo_rdevice ||
1410
	    info.lo_inode != info64->lo_inode ||
1411
	    info.lo_offset != info64->lo_offset ||
1412
	    info.lo_init[0] != info64->lo_init[0] ||
1413
	    info.lo_init[1] != info64->lo_init[1])
1414
		return -EOVERFLOW;
1415

1416
	if (copy_to_user(arg, &info, sizeof(info)))
1417
		return -EFAULT;
1418
	return 0;
1419
}
1420

1421
static int
1422
loop_set_status_compat(struct loop_device *lo,
1423
		       const struct compat_loop_info __user *arg)
1424
{
1425
	struct loop_info64 info64;
1426
	int ret;
1427

1428
	ret = loop_info64_from_compat(arg, &info64);
1429
	if (ret < 0)
1430
		return ret;
1431
	return loop_set_status(lo, &info64);
1432
}
1433

1434
static int
1435
loop_get_status_compat(struct loop_device *lo,
1436
		       struct compat_loop_info __user *arg)
1437
{
1438
	struct loop_info64 info64;
1439
	int err = 0;
1440

1441
	if (!arg)
1442
		err = -EINVAL;
1443
	if (!err)
1444
		err = loop_get_status(lo, &info64);
1445
	if (!err)
1446
		err = loop_info64_to_compat(&info64, arg);
1447
	return err;
1448
}
1449

1450
static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
1451
			   unsigned int cmd, unsigned long arg)
1452
{
1453
	struct loop_device *lo = bdev->bd_disk->private_data;
1454
	int err;
1455

1456
	switch(cmd) {
1457
	case LOOP_SET_STATUS:
1458
		mutex_lock(&lo->lo_ctl_mutex);
1459
		err = loop_set_status_compat(
1460
			lo, (const struct compat_loop_info __user *) arg);
1461
		mutex_unlock(&lo->lo_ctl_mutex);
1462
		break;
1463
	case LOOP_GET_STATUS:
1464
		mutex_lock(&lo->lo_ctl_mutex);
1465
		err = loop_get_status_compat(
1466
			lo, (struct compat_loop_info __user *) arg);
1467
		mutex_unlock(&lo->lo_ctl_mutex);
1468
		break;
1469
	case LOOP_SET_CAPACITY:
1470
	case LOOP_CLR_FD:
1471
	case LOOP_GET_STATUS64:
1472
	case LOOP_SET_STATUS64:
1473
		arg = (unsigned long) compat_ptr(arg);
1474
	case LOOP_SET_FD:
1475
	case LOOP_CHANGE_FD:
1476
		err = lo_ioctl(bdev, mode, cmd, arg);
1477
		break;
1478
	default:
1479
		err = -ENOIOCTLCMD;
1480
		break;
1481
	}
1482
	return err;
1483
}
1484
#endif
1485

1486
static int lo_open(struct block_device *bdev, fmode_t mode)
1487
{
1488
	struct loop_device *lo = bdev->bd_disk->private_data;
1489

1490
	mutex_lock(&lo->lo_ctl_mutex);
1491
	lo->lo_refcnt++;
1492
	mutex_unlock(&lo->lo_ctl_mutex);
1493

1494
	return 0;
1495
}
1496

1497
static int lo_release(struct gendisk *disk, fmode_t mode)
1498
{
1499
	struct loop_device *lo = disk->private_data;
1500
	int err;
1501

1502
	mutex_lock(&lo->lo_ctl_mutex);
1503

1504
	if (--lo->lo_refcnt)
1505
		goto out;
1506

1507
	if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) {
1508
		/*
1509
		 * In autoclear mode, stop the loop thread
1510
		 * and remove configuration after last close.
1511
		 */
1512
		err = loop_clr_fd(lo, NULL);
1513
		if (!err)
1514
			goto out_unlocked;
1515
	} else {
1516
		/*
1517
		 * Otherwise keep thread (if running) and config,
1518
		 * but flush possible ongoing bios in thread.
1519
		 */
1520
		loop_flush(lo);
1521
	}
1522

1523
out:
1524
	mutex_unlock(&lo->lo_ctl_mutex);
1525
out_unlocked:
1526
	return 0;
1527
}
1528

1529
static const struct block_device_operations lo_fops = {
1530
	.owner =	THIS_MODULE,
1531
	.open =		lo_open,
1532
	.release =	lo_release,
1533
	.ioctl =	lo_ioctl,
1534
#ifdef CONFIG_COMPAT
1535
	.compat_ioctl =	lo_compat_ioctl,
1536
#endif
1537
};
1538

1539
/*
1540
 * And now the modules code and kernel interface.
1541
 */
1542
static int max_loop;
1543
module_param(max_loop, int, S_IRUGO);
1544
MODULE_PARM_DESC(max_loop, "Maximum number of loop devices");
1545
module_param(max_part, int, S_IRUGO);
1546
MODULE_PARM_DESC(max_part, "Maximum number of partitions per loop device");
1547
MODULE_LICENSE("GPL");
1548
MODULE_ALIAS_BLOCKDEV_MAJOR(LOOP_MAJOR);
1549

1550
int loop_register_transfer(struct loop_func_table *funcs)
1551
{
1552
	unsigned int n = funcs->number;
1553

1554
	if (n >= MAX_LO_CRYPT || xfer_funcs[n])
1555
		return -EINVAL;
1556
	xfer_funcs[n] = funcs;
1557
	return 0;
1558
}
1559

1560
int loop_unregister_transfer(int number)
1561
{
1562
	unsigned int n = number;
1563
	struct loop_device *lo;
1564
	struct loop_func_table *xfer;
1565

1566
	if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL)
1567
		return -EINVAL;
1568

1569
	xfer_funcs[n] = NULL;
1570

1571
	list_for_each_entry(lo, &loop_devices, lo_list) {
1572
		mutex_lock(&lo->lo_ctl_mutex);
1573

1574
		if (lo->lo_encryption == xfer)
1575
			loop_release_xfer(lo);
1576

1577
		mutex_unlock(&lo->lo_ctl_mutex);
1578
	}
1579

1580
	return 0;
1581
}
1582

1583
EXPORT_SYMBOL(loop_register_transfer);
1584
EXPORT_SYMBOL(loop_unregister_transfer);
1585

1586
static struct loop_device *loop_alloc(int i)
1587
{
1588
	struct loop_device *lo;
1589
	struct gendisk *disk;
1590

1591
	lo = kzalloc(sizeof(*lo), GFP_KERNEL);
1592
	if (!lo)
1593
		goto out;
1594

1595
	lo->lo_queue = blk_alloc_queue(GFP_KERNEL);
1596
	if (!lo->lo_queue)
1597
		goto out_free_dev;
1598

1599
	disk = lo->lo_disk = alloc_disk(1 << part_shift);
1600
	if (!disk)
1601
		goto out_free_queue;
1602

1603
	mutex_init(&lo->lo_ctl_mutex);
1604
	lo->lo_number		= i;
1605
	lo->lo_thread		= NULL;
1606
	init_waitqueue_head(&lo->lo_event);
1607
	spin_lock_init(&lo->lo_lock);
1608
	disk->major		= LOOP_MAJOR;
1609
	disk->first_minor	= i << part_shift;
1610
	disk->fops		= &lo_fops;
1611
	disk->private_data	= lo;
1612
	disk->queue		= lo->lo_queue;
1613
	sprintf(disk->disk_name, "loop%d", i);
1614
	return lo;
1615

1616
out_free_queue:
1617
	blk_cleanup_queue(lo->lo_queue);
1618
out_free_dev:
1619
	kfree(lo);
1620
out:
1621
	return NULL;
1622
}
1623

1624
static void loop_free(struct loop_device *lo)
1625
{
1626
	blk_cleanup_queue(lo->lo_queue);
1627
	put_disk(lo->lo_disk);
1628
	list_del(&lo->lo_list);
1629
	kfree(lo);
1630
}
1631

1632
static struct loop_device *loop_init_one(int i)
1633
{
1634
	struct loop_device *lo;
1635

1636
	list_for_each_entry(lo, &loop_devices, lo_list) {
1637
		if (lo->lo_number == i)
1638
			return lo;
1639
	}
1640

1641
	lo = loop_alloc(i);
1642
	if (lo) {
1643
		add_disk(lo->lo_disk);
1644
		list_add_tail(&lo->lo_list, &loop_devices);
1645
	}
1646
	return lo;
1647
}
1648

1649
static void loop_del_one(struct loop_device *lo)
1650
{
1651
	del_gendisk(lo->lo_disk);
1652
	loop_free(lo);
1653
}
1654

1655
static struct kobject *loop_probe(dev_t dev, int *part, void *data)
1656
{
1657
	struct loop_device *lo;
1658
	struct kobject *kobj;
1659

1660
	mutex_lock(&loop_devices_mutex);
1661
	lo = loop_init_one(MINOR(dev) >> part_shift);
1662
	kobj = lo ? get_disk(lo->lo_disk) : ERR_PTR(-ENOMEM);
1663
	mutex_unlock(&loop_devices_mutex);
1664

1665
	*part = 0;
1666
	return kobj;
1667
}
1668

1669
static int __init loop_init(void)
1670
{
1671
	int i, nr;
1672
	unsigned long range;
1673
	struct loop_device *lo, *next;
1674

1675
	/*
1676
	 * loop module now has a feature to instantiate underlying device
1677
	 * structure on-demand, provided that there is an access dev node.
1678
	 * However, this will not work well with user space tool that doesn't
1679
	 * know about such "feature".  In order to not break any existing
1680
	 * tool, we do the following:
1681
	 *
1682
	 * (1) if max_loop is specified, create that many upfront, and this
1683
	 *     also becomes a hard limit.
1684
	 * (2) if max_loop is not specified, create 8 loop device on module
1685
	 *     load, user can further extend loop device by create dev node
1686
	 *     themselves and have kernel automatically instantiate actual
1687
	 *     device on-demand.
1688
	 */
1689

1690
	part_shift = 0;
1691
	if (max_part > 0) {
1692
		part_shift = fls(max_part);
1693

1694
		/*
1695
		 * Adjust max_part according to part_shift as it is exported
1696
		 * to user space so that user can decide correct minor number
1697
		 * if [s]he want to create more devices.
1698
		 *
1699
		 * Note that -1 is required because partition 0 is reserved
1700
		 * for the whole disk.
1701
		 */
1702
		max_part = (1UL << part_shift) - 1;
1703
	}
1704

1705
	if ((1UL << part_shift) > DISK_MAX_PARTS)
1706
		return -EINVAL;
1707

1708
	if (max_loop > 1UL << (MINORBITS - part_shift))
1709
		return -EINVAL;
1710

1711
	if (max_loop) {
1712
		nr = max_loop;
1713
		range = max_loop << part_shift;
1714
	} else {
1715
		nr = 8;
1716
		range = 1UL << MINORBITS;
1717
	}
1718

1719
	if (register_blkdev(LOOP_MAJOR, "loop"))
1720
		return -EIO;
1721

1722
	for (i = 0; i < nr; i++) {
1723
		lo = loop_alloc(i);
1724
		if (!lo)
1725
			goto Enomem;
1726
		list_add_tail(&lo->lo_list, &loop_devices);
1727
	}
1728

1729
	/* point of no return */
1730

1731
	list_for_each_entry(lo, &loop_devices, lo_list)
1732
		add_disk(lo->lo_disk);
1733

1734
	blk_register_region(MKDEV(LOOP_MAJOR, 0), range,
1735
				  THIS_MODULE, loop_probe, NULL, NULL);
1736

1737
	printk(KERN_INFO "loop: module loaded\n");
1738
	return 0;
1739

1740
Enomem:
1741
	printk(KERN_INFO "loop: out of memory\n");
1742

1743
	list_for_each_entry_safe(lo, next, &loop_devices, lo_list)
1744
		loop_free(lo);
1745

1746
	unregister_blkdev(LOOP_MAJOR, "loop");
1747
	return -ENOMEM;
1748
}
1749

1750
static void __exit loop_exit(void)
1751
{
1752
	unsigned long range;
1753
	struct loop_device *lo, *next;
1754

1755
	range = max_loop ? max_loop << part_shift : 1UL << MINORBITS;
1756

1757
	list_for_each_entry_safe(lo, next, &loop_devices, lo_list)
1758
		loop_del_one(lo);
1759

1760
	blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range);
1761
	unregister_blkdev(LOOP_MAJOR, "loop");
1762
}
1763

1764
module_init(loop_init);
1765
module_exit(loop_exit);
1766

1767
#ifndef MODULE
1768
static int __init max_loop_setup(char *str)
1769
{
1770
	max_loop = simple_strtol(str, NULL, 0);
1771
	return 1;
1772
}
1773

1774
__setup("max_loop=", max_loop_setup);
1775
#endif
1776

1777
Product

Resources

Company