CoCalc -- compression.c

GitHub Repository: awilliam/linux-vfio
Path: blob/master/fs/btrfs/compression.c
¹⁷³³⁰ views
1
/*
2
 * Copyright (C) 2008 Oracle.  All rights reserved.
3
 *
4
 * This program is free software; you can redistribute it and/or
5
 * modify it under the terms of the GNU General Public
6
 * License v2 as published by the Free Software Foundation.
7
 *
8
 * This program is distributed in the hope that it will be useful,
9
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11
 * General Public License for more details.
12
 *
13
 * You should have received a copy of the GNU General Public
14
 * License along with this program; if not, write to the
15
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16
 * Boston, MA 021110-1307, USA.
17
 */
18

19
#include <linux/kernel.h>
20
#include <linux/bio.h>
21
#include <linux/buffer_head.h>
22
#include <linux/file.h>
23
#include <linux/fs.h>
24
#include <linux/pagemap.h>
25
#include <linux/highmem.h>
26
#include <linux/time.h>
27
#include <linux/init.h>
28
#include <linux/string.h>
29
#include <linux/backing-dev.h>
30
#include <linux/mpage.h>
31
#include <linux/swap.h>
32
#include <linux/writeback.h>
33
#include <linux/bit_spinlock.h>
34
#include <linux/slab.h>
35
#include "compat.h"
36
#include "ctree.h"
37
#include "disk-io.h"
38
#include "transaction.h"
39
#include "btrfs_inode.h"
40
#include "volumes.h"
41
#include "ordered-data.h"
42
#include "compression.h"
43
#include "extent_io.h"
44
#include "extent_map.h"
45

46
struct compressed_bio {
47
	/* number of bios pending for this compressed extent */
48
	atomic_t pending_bios;
49

50
	/* the pages with the compressed data on them */
51
	struct page **compressed_pages;
52

53
	/* inode that owns this data */
54
	struct inode *inode;
55

56
	/* starting offset in the inode for our pages */
57
	u64 start;
58

59
	/* number of bytes in the inode we're working on */
60
	unsigned long len;
61

62
	/* number of bytes on disk */
63
	unsigned long compressed_len;
64

65
	/* the compression algorithm for this bio */
66
	int compress_type;
67

68
	/* number of compressed pages in the array */
69
	unsigned long nr_pages;
70

71
	/* IO errors */
72
	int errors;
73
	int mirror_num;
74

75
	/* for reads, this is the bio we are copying the data into */
76
	struct bio *orig_bio;
77

78
	/*
79
	 * the start of a variable length array of checksums only
80
	 * used by reads
81
	 */
82
	u32 sums;
83
};
84

85
static inline int compressed_bio_size(struct btrfs_root *root,
86
				      unsigned long disk_size)
87
{
88
	u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy);
89
	return sizeof(struct compressed_bio) +
90
		((disk_size + root->sectorsize - 1) / root->sectorsize) *
91
		csum_size;
92
}
93

94
static struct bio *compressed_bio_alloc(struct block_device *bdev,
95
					u64 first_byte, gfp_t gfp_flags)
96
{
97
	int nr_vecs;
98

99
	nr_vecs = bio_get_nr_vecs(bdev);
100
	return btrfs_bio_alloc(bdev, first_byte >> 9, nr_vecs, gfp_flags);
101
}
102

103
static int check_compressed_csum(struct inode *inode,
104
				 struct compressed_bio *cb,
105
				 u64 disk_start)
106
{
107
	int ret;
108
	struct btrfs_root *root = BTRFS_I(inode)->root;
109
	struct page *page;
110
	unsigned long i;
111
	char *kaddr;
112
	u32 csum;
113
	u32 *cb_sum = &cb->sums;
114

115
	if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
116
		return 0;
117

118
	for (i = 0; i < cb->nr_pages; i++) {
119
		page = cb->compressed_pages[i];
120
		csum = ~(u32)0;
121

122
		kaddr = kmap_atomic(page, KM_USER0);
123
		csum = btrfs_csum_data(root, kaddr, csum, PAGE_CACHE_SIZE);
124
		btrfs_csum_final(csum, (char *)&csum);
125
		kunmap_atomic(kaddr, KM_USER0);
126

127
		if (csum != *cb_sum) {
128
			printk(KERN_INFO "btrfs csum failed ino %llu "
129
			       "extent %llu csum %u "
130
			       "wanted %u mirror %d\n",
131
			       (unsigned long long)btrfs_ino(inode),
132
			       (unsigned long long)disk_start,
133
			       csum, *cb_sum, cb->mirror_num);
134
			ret = -EIO;
135
			goto fail;
136
		}
137
		cb_sum++;
138

139
	}
140
	ret = 0;
141
fail:
142
	return ret;
143
}
144

145
/* when we finish reading compressed pages from the disk, we
146
 * decompress them and then run the bio end_io routines on the
147
 * decompressed pages (in the inode address space).
148
 *
149
 * This allows the checksumming and other IO error handling routines
150
 * to work normally
151
 *
152
 * The compressed pages are freed here, and it must be run
153
 * in process context
154
 */
155
static void end_compressed_bio_read(struct bio *bio, int err)
156
{
157
	struct compressed_bio *cb = bio->bi_private;
158
	struct inode *inode;
159
	struct page *page;
160
	unsigned long index;
161
	int ret;
162

163
	if (err)
164
		cb->errors = 1;
165

166
	/* if there are more bios still pending for this compressed
167
	 * extent, just exit
168
	 */
169
	if (!atomic_dec_and_test(&cb->pending_bios))
170
		goto out;
171

172
	inode = cb->inode;
173
	ret = check_compressed_csum(inode, cb, (u64)bio->bi_sector << 9);
174
	if (ret)
175
		goto csum_failed;
176

177
	/* ok, we're the last bio for this extent, lets start
178
	 * the decompression.
179
	 */
180
	ret = btrfs_decompress_biovec(cb->compress_type,
181
				      cb->compressed_pages,
182
				      cb->start,
183
				      cb->orig_bio->bi_io_vec,
184
				      cb->orig_bio->bi_vcnt,
185
				      cb->compressed_len);
186
csum_failed:
187
	if (ret)
188
		cb->errors = 1;
189

190
	/* release the compressed pages */
191
	index = 0;
192
	for (index = 0; index < cb->nr_pages; index++) {
193
		page = cb->compressed_pages[index];
194
		page->mapping = NULL;
195
		page_cache_release(page);
196
	}
197

198
	/* do io completion on the original bio */
199
	if (cb->errors) {
200
		bio_io_error(cb->orig_bio);
201
	} else {
202
		int bio_index = 0;
203
		struct bio_vec *bvec = cb->orig_bio->bi_io_vec;
204

205
		/*
206
		 * we have verified the checksum already, set page
207
		 * checked so the end_io handlers know about it
208
		 */
209
		while (bio_index < cb->orig_bio->bi_vcnt) {
210
			SetPageChecked(bvec->bv_page);
211
			bvec++;
212
			bio_index++;
213
		}
214
		bio_endio(cb->orig_bio, 0);
215
	}
216

217
	/* finally free the cb struct */
218
	kfree(cb->compressed_pages);
219
	kfree(cb);
220
out:
221
	bio_put(bio);
222
}
223

224
/*
225
 * Clear the writeback bits on all of the file
226
 * pages for a compressed write
227
 */
228
static noinline int end_compressed_writeback(struct inode *inode, u64 start,
229
					     unsigned long ram_size)
230
{
231
	unsigned long index = start >> PAGE_CACHE_SHIFT;
232
	unsigned long end_index = (start + ram_size - 1) >> PAGE_CACHE_SHIFT;
233
	struct page *pages[16];
234
	unsigned long nr_pages = end_index - index + 1;
235
	int i;
236
	int ret;
237

238
	while (nr_pages > 0) {
239
		ret = find_get_pages_contig(inode->i_mapping, index,
240
				     min_t(unsigned long,
241
				     nr_pages, ARRAY_SIZE(pages)), pages);
242
		if (ret == 0) {
243
			nr_pages -= 1;
244
			index += 1;
245
			continue;
246
		}
247
		for (i = 0; i < ret; i++) {
248
			end_page_writeback(pages[i]);
249
			page_cache_release(pages[i]);
250
		}
251
		nr_pages -= ret;
252
		index += ret;
253
	}
254
	/* the inode may be gone now */
255
	return 0;
256
}
257

258
/*
259
 * do the cleanup once all the compressed pages hit the disk.
260
 * This will clear writeback on the file pages and free the compressed
261
 * pages.
262
 *
263
 * This also calls the writeback end hooks for the file pages so that
264
 * metadata and checksums can be updated in the file.
265
 */
266
static void end_compressed_bio_write(struct bio *bio, int err)
267
{
268
	struct extent_io_tree *tree;
269
	struct compressed_bio *cb = bio->bi_private;
270
	struct inode *inode;
271
	struct page *page;
272
	unsigned long index;
273

274
	if (err)
275
		cb->errors = 1;
276

277
	/* if there are more bios still pending for this compressed
278
	 * extent, just exit
279
	 */
280
	if (!atomic_dec_and_test(&cb->pending_bios))
281
		goto out;
282

283
	/* ok, we're the last bio for this extent, step one is to
284
	 * call back into the FS and do all the end_io operations
285
	 */
286
	inode = cb->inode;
287
	tree = &BTRFS_I(inode)->io_tree;
288
	cb->compressed_pages[0]->mapping = cb->inode->i_mapping;
289
	tree->ops->writepage_end_io_hook(cb->compressed_pages[0],
290
					 cb->start,
291
					 cb->start + cb->len - 1,
292
					 NULL, 1);
293
	cb->compressed_pages[0]->mapping = NULL;
294

295
	end_compressed_writeback(inode, cb->start, cb->len);
296
	/* note, our inode could be gone now */
297

298
	/*
299
	 * release the compressed pages, these came from alloc_page and
300
	 * are not attached to the inode at all
301
	 */
302
	index = 0;
303
	for (index = 0; index < cb->nr_pages; index++) {
304
		page = cb->compressed_pages[index];
305
		page->mapping = NULL;
306
		page_cache_release(page);
307
	}
308

309
	/* finally free the cb struct */
310
	kfree(cb->compressed_pages);
311
	kfree(cb);
312
out:
313
	bio_put(bio);
314
}
315

316
/*
317
 * worker function to build and submit bios for previously compressed pages.
318
 * The corresponding pages in the inode should be marked for writeback
319
 * and the compressed pages should have a reference on them for dropping
320
 * when the IO is complete.
321
 *
322
 * This also checksums the file bytes and gets things ready for
323
 * the end io hooks.
324
 */
325
int btrfs_submit_compressed_write(struct inode *inode, u64 start,
326
				 unsigned long len, u64 disk_start,
327
				 unsigned long compressed_len,
328
				 struct page **compressed_pages,
329
				 unsigned long nr_pages)
330
{
331
	struct bio *bio = NULL;
332
	struct btrfs_root *root = BTRFS_I(inode)->root;
333
	struct compressed_bio *cb;
334
	unsigned long bytes_left;
335
	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
336
	int pg_index = 0;
337
	struct page *page;
338
	u64 first_byte = disk_start;
339
	struct block_device *bdev;
340
	int ret;
341

342
	WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1));
343
	cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
344
	if (!cb)
345
		return -ENOMEM;
346
	atomic_set(&cb->pending_bios, 0);
347
	cb->errors = 0;
348
	cb->inode = inode;
349
	cb->start = start;
350
	cb->len = len;
351
	cb->mirror_num = 0;
352
	cb->compressed_pages = compressed_pages;
353
	cb->compressed_len = compressed_len;
354
	cb->orig_bio = NULL;
355
	cb->nr_pages = nr_pages;
356

357
	bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
358

359
	bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS);
360
	if(!bio) {
361
		kfree(cb);
362
		return -ENOMEM;
363
	}
364
	bio->bi_private = cb;
365
	bio->bi_end_io = end_compressed_bio_write;
366
	atomic_inc(&cb->pending_bios);
367

368
	/* create and submit bios for the compressed pages */
369
	bytes_left = compressed_len;
370
	for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) {
371
		page = compressed_pages[pg_index];
372
		page->mapping = inode->i_mapping;
373
		if (bio->bi_size)
374
			ret = io_tree->ops->merge_bio_hook(page, 0,
375
							   PAGE_CACHE_SIZE,
376
							   bio, 0);
377
		else
378
			ret = 0;
379

380
		page->mapping = NULL;
381
		if (ret || bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) <
382
		    PAGE_CACHE_SIZE) {
383
			bio_get(bio);
384

385
			/*
386
			 * inc the count before we submit the bio so
387
			 * we know the end IO handler won't happen before
388
			 * we inc the count.  Otherwise, the cb might get
389
			 * freed before we're done setting it up
390
			 */
391
			atomic_inc(&cb->pending_bios);
392
			ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
393
			BUG_ON(ret);
394

395
			ret = btrfs_csum_one_bio(root, inode, bio, start, 1);
396
			BUG_ON(ret);
397

398
			ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
399
			BUG_ON(ret);
400

401
			bio_put(bio);
402

403
			bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS);
404
			bio->bi_private = cb;
405
			bio->bi_end_io = end_compressed_bio_write;
406
			bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
407
		}
408
		if (bytes_left < PAGE_CACHE_SIZE) {
409
			printk("bytes left %lu compress len %lu nr %lu\n",
410
			       bytes_left, cb->compressed_len, cb->nr_pages);
411
		}
412
		bytes_left -= PAGE_CACHE_SIZE;
413
		first_byte += PAGE_CACHE_SIZE;
414
		cond_resched();
415
	}
416
	bio_get(bio);
417

418
	ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
419
	BUG_ON(ret);
420

421
	ret = btrfs_csum_one_bio(root, inode, bio, start, 1);
422
	BUG_ON(ret);
423

424
	ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
425
	BUG_ON(ret);
426

427
	bio_put(bio);
428
	return 0;
429
}
430

431
static noinline int add_ra_bio_pages(struct inode *inode,
432
				     u64 compressed_end,
433
				     struct compressed_bio *cb)
434
{
435
	unsigned long end_index;
436
	unsigned long pg_index;
437
	u64 last_offset;
438
	u64 isize = i_size_read(inode);
439
	int ret;
440
	struct page *page;
441
	unsigned long nr_pages = 0;
442
	struct extent_map *em;
443
	struct address_space *mapping = inode->i_mapping;
444
	struct extent_map_tree *em_tree;
445
	struct extent_io_tree *tree;
446
	u64 end;
447
	int misses = 0;
448

449
	page = cb->orig_bio->bi_io_vec[cb->orig_bio->bi_vcnt - 1].bv_page;
450
	last_offset = (page_offset(page) + PAGE_CACHE_SIZE);
451
	em_tree = &BTRFS_I(inode)->extent_tree;
452
	tree = &BTRFS_I(inode)->io_tree;
453

454
	if (isize == 0)
455
		return 0;
456

457
	end_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
458

459
	while (last_offset < compressed_end) {
460
		pg_index = last_offset >> PAGE_CACHE_SHIFT;
461

462
		if (pg_index > end_index)
463
			break;
464

465
		rcu_read_lock();
466
		page = radix_tree_lookup(&mapping->page_tree, pg_index);
467
		rcu_read_unlock();
468
		if (page) {
469
			misses++;
470
			if (misses > 4)
471
				break;
472
			goto next;
473
		}
474

475
		page = __page_cache_alloc(mapping_gfp_mask(mapping) &
476
								~__GFP_FS);
477
		if (!page)
478
			break;
479

480
		if (add_to_page_cache_lru(page, mapping, pg_index,
481
								GFP_NOFS)) {
482
			page_cache_release(page);
483
			goto next;
484
		}
485

486
		end = last_offset + PAGE_CACHE_SIZE - 1;
487
		/*
488
		 * at this point, we have a locked page in the page cache
489
		 * for these bytes in the file.  But, we have to make
490
		 * sure they map to this compressed extent on disk.
491
		 */
492
		set_page_extent_mapped(page);
493
		lock_extent(tree, last_offset, end, GFP_NOFS);
494
		read_lock(&em_tree->lock);
495
		em = lookup_extent_mapping(em_tree, last_offset,
496
					   PAGE_CACHE_SIZE);
497
		read_unlock(&em_tree->lock);
498

499
		if (!em || last_offset < em->start ||
500
		    (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) ||
501
		    (em->block_start >> 9) != cb->orig_bio->bi_sector) {
502
			free_extent_map(em);
503
			unlock_extent(tree, last_offset, end, GFP_NOFS);
504
			unlock_page(page);
505
			page_cache_release(page);
506
			break;
507
		}
508
		free_extent_map(em);
509

510
		if (page->index == end_index) {
511
			char *userpage;
512
			size_t zero_offset = isize & (PAGE_CACHE_SIZE - 1);
513

514
			if (zero_offset) {
515
				int zeros;
516
				zeros = PAGE_CACHE_SIZE - zero_offset;
517
				userpage = kmap_atomic(page, KM_USER0);
518
				memset(userpage + zero_offset, 0, zeros);
519
				flush_dcache_page(page);
520
				kunmap_atomic(userpage, KM_USER0);
521
			}
522
		}
523

524
		ret = bio_add_page(cb->orig_bio, page,
525
				   PAGE_CACHE_SIZE, 0);
526

527
		if (ret == PAGE_CACHE_SIZE) {
528
			nr_pages++;
529
			page_cache_release(page);
530
		} else {
531
			unlock_extent(tree, last_offset, end, GFP_NOFS);
532
			unlock_page(page);
533
			page_cache_release(page);
534
			break;
535
		}
536
next:
537
		last_offset += PAGE_CACHE_SIZE;
538
	}
539
	return 0;
540
}
541

542
/*
543
 * for a compressed read, the bio we get passed has all the inode pages
544
 * in it.  We don't actually do IO on those pages but allocate new ones
545
 * to hold the compressed pages on disk.
546
 *
547
 * bio->bi_sector points to the compressed extent on disk
548
 * bio->bi_io_vec points to all of the inode pages
549
 * bio->bi_vcnt is a count of pages
550
 *
551
 * After the compressed pages are read, we copy the bytes into the
552
 * bio we were passed and then call the bio end_io calls
553
 */
554
int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
555
				 int mirror_num, unsigned long bio_flags)
556
{
557
	struct extent_io_tree *tree;
558
	struct extent_map_tree *em_tree;
559
	struct compressed_bio *cb;
560
	struct btrfs_root *root = BTRFS_I(inode)->root;
561
	unsigned long uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE;
562
	unsigned long compressed_len;
563
	unsigned long nr_pages;
564
	unsigned long pg_index;
565
	struct page *page;
566
	struct block_device *bdev;
567
	struct bio *comp_bio;
568
	u64 cur_disk_byte = (u64)bio->bi_sector << 9;
569
	u64 em_len;
570
	u64 em_start;
571
	struct extent_map *em;
572
	int ret = -ENOMEM;
573
	u32 *sums;
574

575
	tree = &BTRFS_I(inode)->io_tree;
576
	em_tree = &BTRFS_I(inode)->extent_tree;
577

578
	/* we need the actual starting offset of this extent in the file */
579
	read_lock(&em_tree->lock);
580
	em = lookup_extent_mapping(em_tree,
581
				   page_offset(bio->bi_io_vec->bv_page),
582
				   PAGE_CACHE_SIZE);
583
	read_unlock(&em_tree->lock);
584

585
	compressed_len = em->block_len;
586
	cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
587
	if (!cb)
588
		goto out;
589

590
	atomic_set(&cb->pending_bios, 0);
591
	cb->errors = 0;
592
	cb->inode = inode;
593
	cb->mirror_num = mirror_num;
594
	sums = &cb->sums;
595

596
	cb->start = em->orig_start;
597
	em_len = em->len;
598
	em_start = em->start;
599

600
	free_extent_map(em);
601
	em = NULL;
602

603
	cb->len = uncompressed_len;
604
	cb->compressed_len = compressed_len;
605
	cb->compress_type = extent_compress_type(bio_flags);
606
	cb->orig_bio = bio;
607

608
	nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) /
609
				 PAGE_CACHE_SIZE;
610
	cb->compressed_pages = kzalloc(sizeof(struct page *) * nr_pages,
611
				       GFP_NOFS);
612
	if (!cb->compressed_pages)
613
		goto fail1;
614

615
	bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
616

617
	for (pg_index = 0; pg_index < nr_pages; pg_index++) {
618
		cb->compressed_pages[pg_index] = alloc_page(GFP_NOFS |
619
							      __GFP_HIGHMEM);
620
		if (!cb->compressed_pages[pg_index])
621
			goto fail2;
622
	}
623
	cb->nr_pages = nr_pages;
624

625
	add_ra_bio_pages(inode, em_start + em_len, cb);
626

627
	/* include any pages we added in add_ra-bio_pages */
628
	uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE;
629
	cb->len = uncompressed_len;
630

631
	comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS);
632
	if (!comp_bio)
633
		goto fail2;
634
	comp_bio->bi_private = cb;
635
	comp_bio->bi_end_io = end_compressed_bio_read;
636
	atomic_inc(&cb->pending_bios);
637

638
	for (pg_index = 0; pg_index < nr_pages; pg_index++) {
639
		page = cb->compressed_pages[pg_index];
640
		page->mapping = inode->i_mapping;
641
		page->index = em_start >> PAGE_CACHE_SHIFT;
642

643
		if (comp_bio->bi_size)
644
			ret = tree->ops->merge_bio_hook(page, 0,
645
							PAGE_CACHE_SIZE,
646
							comp_bio, 0);
647
		else
648
			ret = 0;
649

650
		page->mapping = NULL;
651
		if (ret || bio_add_page(comp_bio, page, PAGE_CACHE_SIZE, 0) <
652
		    PAGE_CACHE_SIZE) {
653
			bio_get(comp_bio);
654

655
			ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0);
656
			BUG_ON(ret);
657

658
			/*
659
			 * inc the count before we submit the bio so
660
			 * we know the end IO handler won't happen before
661
			 * we inc the count.  Otherwise, the cb might get
662
			 * freed before we're done setting it up
663
			 */
664
			atomic_inc(&cb->pending_bios);
665

666
			if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
667
				ret = btrfs_lookup_bio_sums(root, inode,
668
							comp_bio, sums);
669
				BUG_ON(ret);
670
			}
671
			sums += (comp_bio->bi_size + root->sectorsize - 1) /
672
				root->sectorsize;
673

674
			ret = btrfs_map_bio(root, READ, comp_bio,
675
					    mirror_num, 0);
676
			BUG_ON(ret);
677

678
			bio_put(comp_bio);
679

680
			comp_bio = compressed_bio_alloc(bdev, cur_disk_byte,
681
							GFP_NOFS);
682
			comp_bio->bi_private = cb;
683
			comp_bio->bi_end_io = end_compressed_bio_read;
684

685
			bio_add_page(comp_bio, page, PAGE_CACHE_SIZE, 0);
686
		}
687
		cur_disk_byte += PAGE_CACHE_SIZE;
688
	}
689
	bio_get(comp_bio);
690

691
	ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0);
692
	BUG_ON(ret);
693

694
	if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
695
		ret = btrfs_lookup_bio_sums(root, inode, comp_bio, sums);
696
		BUG_ON(ret);
697
	}
698

699
	ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0);
700
	BUG_ON(ret);
701

702
	bio_put(comp_bio);
703
	return 0;
704

705
fail2:
706
	for (pg_index = 0; pg_index < nr_pages; pg_index++)
707
		free_page((unsigned long)cb->compressed_pages[pg_index]);
708

709
	kfree(cb->compressed_pages);
710
fail1:
711
	kfree(cb);
712
out:
713
	free_extent_map(em);
714
	return ret;
715
}
716

717
static struct list_head comp_idle_workspace[BTRFS_COMPRESS_TYPES];
718
static spinlock_t comp_workspace_lock[BTRFS_COMPRESS_TYPES];
719
static int comp_num_workspace[BTRFS_COMPRESS_TYPES];
720
static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES];
721
static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES];
722

723
struct btrfs_compress_op *btrfs_compress_op[] = {
724
	&btrfs_zlib_compress,
725
	&btrfs_lzo_compress,
726
};
727

728
int __init btrfs_init_compress(void)
729
{
730
	int i;
731

732
	for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
733
		INIT_LIST_HEAD(&comp_idle_workspace[i]);
734
		spin_lock_init(&comp_workspace_lock[i]);
735
		atomic_set(&comp_alloc_workspace[i], 0);
736
		init_waitqueue_head(&comp_workspace_wait[i]);
737
	}
738
	return 0;
739
}
740

741
/*
742
 * this finds an available workspace or allocates a new one
743
 * ERR_PTR is returned if things go bad.
744
 */
745
static struct list_head *find_workspace(int type)
746
{
747
	struct list_head *workspace;
748
	int cpus = num_online_cpus();
749
	int idx = type - 1;
750

751
	struct list_head *idle_workspace	= &comp_idle_workspace[idx];
752
	spinlock_t *workspace_lock		= &comp_workspace_lock[idx];
753
	atomic_t *alloc_workspace		= &comp_alloc_workspace[idx];
754
	wait_queue_head_t *workspace_wait	= &comp_workspace_wait[idx];
755
	int *num_workspace			= &comp_num_workspace[idx];
756
again:
757
	spin_lock(workspace_lock);
758
	if (!list_empty(idle_workspace)) {
759
		workspace = idle_workspace->next;
760
		list_del(workspace);
761
		(*num_workspace)--;
762
		spin_unlock(workspace_lock);
763
		return workspace;
764

765
	}
766
	if (atomic_read(alloc_workspace) > cpus) {
767
		DEFINE_WAIT(wait);
768

769
		spin_unlock(workspace_lock);
770
		prepare_to_wait(workspace_wait, &wait, TASK_UNINTERRUPTIBLE);
771
		if (atomic_read(alloc_workspace) > cpus && !*num_workspace)
772
			schedule();
773
		finish_wait(workspace_wait, &wait);
774
		goto again;
775
	}
776
	atomic_inc(alloc_workspace);
777
	spin_unlock(workspace_lock);
778

779
	workspace = btrfs_compress_op[idx]->alloc_workspace();
780
	if (IS_ERR(workspace)) {
781
		atomic_dec(alloc_workspace);
782
		wake_up(workspace_wait);
783
	}
784
	return workspace;
785
}
786

787
/*
788
 * put a workspace struct back on the list or free it if we have enough
789
 * idle ones sitting around
790
 */
791
static void free_workspace(int type, struct list_head *workspace)
792
{
793
	int idx = type - 1;
794
	struct list_head *idle_workspace	= &comp_idle_workspace[idx];
795
	spinlock_t *workspace_lock		= &comp_workspace_lock[idx];
796
	atomic_t *alloc_workspace		= &comp_alloc_workspace[idx];
797
	wait_queue_head_t *workspace_wait	= &comp_workspace_wait[idx];
798
	int *num_workspace			= &comp_num_workspace[idx];
799

800
	spin_lock(workspace_lock);
801
	if (*num_workspace < num_online_cpus()) {
802
		list_add_tail(workspace, idle_workspace);
803
		(*num_workspace)++;
804
		spin_unlock(workspace_lock);
805
		goto wake;
806
	}
807
	spin_unlock(workspace_lock);
808

809
	btrfs_compress_op[idx]->free_workspace(workspace);
810
	atomic_dec(alloc_workspace);
811
wake:
812
	if (waitqueue_active(workspace_wait))
813
		wake_up(workspace_wait);
814
}
815

816
/*
817
 * cleanup function for module exit
818
 */
819
static void free_workspaces(void)
820
{
821
	struct list_head *workspace;
822
	int i;
823

824
	for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
825
		while (!list_empty(&comp_idle_workspace[i])) {
826
			workspace = comp_idle_workspace[i].next;
827
			list_del(workspace);
828
			btrfs_compress_op[i]->free_workspace(workspace);
829
			atomic_dec(&comp_alloc_workspace[i]);
830
		}
831
	}
832
}
833

834
/*
835
 * given an address space and start/len, compress the bytes.
836
 *
837
 * pages are allocated to hold the compressed result and stored
838
 * in 'pages'
839
 *
840
 * out_pages is used to return the number of pages allocated.  There
841
 * may be pages allocated even if we return an error
842
 *
843
 * total_in is used to return the number of bytes actually read.  It
844
 * may be smaller then len if we had to exit early because we
845
 * ran out of room in the pages array or because we cross the
846
 * max_out threshold.
847
 *
848
 * total_out is used to return the total number of compressed bytes
849
 *
850
 * max_out tells us the max number of bytes that we're allowed to
851
 * stuff into pages
852
 */
853
int btrfs_compress_pages(int type, struct address_space *mapping,
854
			 u64 start, unsigned long len,
855
			 struct page **pages,
856
			 unsigned long nr_dest_pages,
857
			 unsigned long *out_pages,
858
			 unsigned long *total_in,
859
			 unsigned long *total_out,
860
			 unsigned long max_out)
861
{
862
	struct list_head *workspace;
863
	int ret;
864

865
	workspace = find_workspace(type);
866
	if (IS_ERR(workspace))
867
		return -1;
868

869
	ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping,
870
						      start, len, pages,
871
						      nr_dest_pages, out_pages,
872
						      total_in, total_out,
873
						      max_out);
874
	free_workspace(type, workspace);
875
	return ret;
876
}
877

878
/*
879
 * pages_in is an array of pages with compressed data.
880
 *
881
 * disk_start is the starting logical offset of this array in the file
882
 *
883
 * bvec is a bio_vec of pages from the file that we want to decompress into
884
 *
885
 * vcnt is the count of pages in the biovec
886
 *
887
 * srclen is the number of bytes in pages_in
888
 *
889
 * The basic idea is that we have a bio that was created by readpages.
890
 * The pages in the bio are for the uncompressed data, and they may not
891
 * be contiguous.  They all correspond to the range of bytes covered by
892
 * the compressed extent.
893
 */
894
int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start,
895
			    struct bio_vec *bvec, int vcnt, size_t srclen)
896
{
897
	struct list_head *workspace;
898
	int ret;
899

900
	workspace = find_workspace(type);
901
	if (IS_ERR(workspace))
902
		return -ENOMEM;
903

904
	ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in,
905
							 disk_start,
906
							 bvec, vcnt, srclen);
907
	free_workspace(type, workspace);
908
	return ret;
909
}
910

911
/*
912
 * a less complex decompression routine.  Our compressed data fits in a
913
 * single page, and we want to read a single page out of it.
914
 * start_byte tells us the offset into the compressed data we're interested in
915
 */
916
int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
917
		     unsigned long start_byte, size_t srclen, size_t destlen)
918
{
919
	struct list_head *workspace;
920
	int ret;
921

922
	workspace = find_workspace(type);
923
	if (IS_ERR(workspace))
924
		return -ENOMEM;
925

926
	ret = btrfs_compress_op[type-1]->decompress(workspace, data_in,
927
						  dest_page, start_byte,
928
						  srclen, destlen);
929

930
	free_workspace(type, workspace);
931
	return ret;
932
}
933

934
void btrfs_exit_compress(void)
935
{
936
	free_workspaces();
937
}
938

939
/*
940
 * Copy uncompressed data from working buffer to pages.
941
 *
942
 * buf_start is the byte offset we're of the start of our workspace buffer.
943
 *
944
 * total_out is the last byte of the buffer
945
 */
946
int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
947
			      unsigned long total_out, u64 disk_start,
948
			      struct bio_vec *bvec, int vcnt,
949
			      unsigned long *pg_index,
950
			      unsigned long *pg_offset)
951
{
952
	unsigned long buf_offset;
953
	unsigned long current_buf_start;
954
	unsigned long start_byte;
955
	unsigned long working_bytes = total_out - buf_start;
956
	unsigned long bytes;
957
	char *kaddr;
958
	struct page *page_out = bvec[*pg_index].bv_page;
959

960
	/*
961
	 * start byte is the first byte of the page we're currently
962
	 * copying into relative to the start of the compressed data.
963
	 */
964
	start_byte = page_offset(page_out) - disk_start;
965

966
	/* we haven't yet hit data corresponding to this page */
967
	if (total_out <= start_byte)
968
		return 1;
969

970
	/*
971
	 * the start of the data we care about is offset into
972
	 * the middle of our working buffer
973
	 */
974
	if (total_out > start_byte && buf_start < start_byte) {
975
		buf_offset = start_byte - buf_start;
976
		working_bytes -= buf_offset;
977
	} else {
978
		buf_offset = 0;
979
	}
980
	current_buf_start = buf_start;
981

982
	/* copy bytes from the working buffer into the pages */
983
	while (working_bytes > 0) {
984
		bytes = min(PAGE_CACHE_SIZE - *pg_offset,
985
			    PAGE_CACHE_SIZE - buf_offset);
986
		bytes = min(bytes, working_bytes);
987
		kaddr = kmap_atomic(page_out, KM_USER0);
988
		memcpy(kaddr + *pg_offset, buf + buf_offset, bytes);
989
		kunmap_atomic(kaddr, KM_USER0);
990
		flush_dcache_page(page_out);
991

992
		*pg_offset += bytes;
993
		buf_offset += bytes;
994
		working_bytes -= bytes;
995
		current_buf_start += bytes;
996

997
		/* check if we need to pick another page */
998
		if (*pg_offset == PAGE_CACHE_SIZE) {
999
			(*pg_index)++;
1000
			if (*pg_index >= vcnt)
1001
				return 0;
1002

1003
			page_out = bvec[*pg_index].bv_page;
1004
			*pg_offset = 0;
1005
			start_byte = page_offset(page_out) - disk_start;
1006

1007
			/*
1008
			 * make sure our new page is covered by this
1009
			 * working buffer
1010
			 */
1011
			if (total_out <= start_byte)
1012
				return 1;
1013

1014
			/*
1015
			 * the next page in the biovec might not be adjacent
1016
			 * to the last page, but it might still be found
1017
			 * inside this working buffer. bump our offset pointer
1018
			 */
1019
			if (total_out > start_byte &&
1020
			    current_buf_start < start_byte) {
1021
				buf_offset = start_byte - buf_start;
1022
				working_bytes = total_out - start_byte;
1023
				current_buf_start = buf_start + buf_offset;
1024
			}
1025
		}
1026
	}
1027

1028
	return 1;
1029
}
1030

1031
Product

Resources

Company