Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/drivers/block/zram/zram_drv.c
50693 views
1
/*
2
* Compressed RAM block device
3
*
4
* Copyright (C) 2008, 2009, 2010 Nitin Gupta
5
* 2012, 2013 Minchan Kim
6
*
7
* This code is released using a dual license strategy: BSD/GPL
8
* You can choose the licence that better fits your requirements.
9
*
10
* Released under the terms of 3-clause BSD License
11
* Released under the terms of GNU General Public License Version 2.0
12
*
13
*/
14
15
#define KMSG_COMPONENT "zram"
16
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
17
18
#include <linux/module.h>
19
#include <linux/kernel.h>
20
#include <linux/bio.h>
21
#include <linux/bitops.h>
22
#include <linux/blkdev.h>
23
#include <linux/buffer_head.h>
24
#include <linux/device.h>
25
#include <linux/highmem.h>
26
#include <linux/slab.h>
27
#include <linux/backing-dev.h>
28
#include <linux/string.h>
29
#include <linux/vmalloc.h>
30
#include <linux/err.h>
31
#include <linux/idr.h>
32
#include <linux/sysfs.h>
33
#include <linux/debugfs.h>
34
#include <linux/cpuhotplug.h>
35
#include <linux/part_stat.h>
36
#include <linux/kernel_read_file.h>
37
38
#include "zram_drv.h"
39
40
static DEFINE_IDR(zram_index_idr);
41
/* idr index must be protected */
42
static DEFINE_MUTEX(zram_index_mutex);
43
44
static int zram_major;
45
static const char *default_compressor = CONFIG_ZRAM_DEF_COMP;
46
47
#define ZRAM_MAX_ALGO_NAME_SZ 128
48
49
/* Module params (documentation at end) */
50
static unsigned int num_devices = 1;
51
/*
52
* Pages that compress to sizes equals or greater than this are stored
53
* uncompressed in memory.
54
*/
55
static size_t huge_class_size;
56
57
static const struct block_device_operations zram_devops;
58
59
static void zram_free_page(struct zram *zram, size_t index);
60
static int zram_read_from_zspool(struct zram *zram, struct page *page,
61
u32 index);
62
63
#define slot_dep_map(zram, index) (&(zram)->table[(index)].dep_map)
64
65
static void zram_slot_lock_init(struct zram *zram, u32 index)
66
{
67
static struct lock_class_key __key;
68
69
lockdep_init_map(slot_dep_map(zram, index), "zram->table[index].lock",
70
&__key, 0);
71
}
72
73
/*
74
* entry locking rules:
75
*
76
* 1) Lock is exclusive
77
*
78
* 2) lock() function can sleep waiting for the lock
79
*
80
* 3) Lock owner can sleep
81
*
82
* 4) Use TRY lock variant when in atomic context
83
* - must check return value and handle locking failers
84
*/
85
static __must_check bool zram_slot_trylock(struct zram *zram, u32 index)
86
{
87
unsigned long *lock = &zram->table[index].flags;
88
89
if (!test_and_set_bit_lock(ZRAM_ENTRY_LOCK, lock)) {
90
mutex_acquire(slot_dep_map(zram, index), 0, 1, _RET_IP_);
91
lock_acquired(slot_dep_map(zram, index), _RET_IP_);
92
return true;
93
}
94
95
return false;
96
}
97
98
static void zram_slot_lock(struct zram *zram, u32 index)
99
{
100
unsigned long *lock = &zram->table[index].flags;
101
102
mutex_acquire(slot_dep_map(zram, index), 0, 0, _RET_IP_);
103
wait_on_bit_lock(lock, ZRAM_ENTRY_LOCK, TASK_UNINTERRUPTIBLE);
104
lock_acquired(slot_dep_map(zram, index), _RET_IP_);
105
}
106
107
static void zram_slot_unlock(struct zram *zram, u32 index)
108
{
109
unsigned long *lock = &zram->table[index].flags;
110
111
mutex_release(slot_dep_map(zram, index), _RET_IP_);
112
clear_and_wake_up_bit(ZRAM_ENTRY_LOCK, lock);
113
}
114
115
static inline bool init_done(struct zram *zram)
116
{
117
return zram->disksize;
118
}
119
120
static inline struct zram *dev_to_zram(struct device *dev)
121
{
122
return (struct zram *)dev_to_disk(dev)->private_data;
123
}
124
125
static unsigned long zram_get_handle(struct zram *zram, u32 index)
126
{
127
return zram->table[index].handle;
128
}
129
130
static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle)
131
{
132
zram->table[index].handle = handle;
133
}
134
135
static bool zram_test_flag(struct zram *zram, u32 index,
136
enum zram_pageflags flag)
137
{
138
return zram->table[index].flags & BIT(flag);
139
}
140
141
static void zram_set_flag(struct zram *zram, u32 index,
142
enum zram_pageflags flag)
143
{
144
zram->table[index].flags |= BIT(flag);
145
}
146
147
static void zram_clear_flag(struct zram *zram, u32 index,
148
enum zram_pageflags flag)
149
{
150
zram->table[index].flags &= ~BIT(flag);
151
}
152
153
static size_t zram_get_obj_size(struct zram *zram, u32 index)
154
{
155
return zram->table[index].flags & (BIT(ZRAM_FLAG_SHIFT) - 1);
156
}
157
158
static void zram_set_obj_size(struct zram *zram,
159
u32 index, size_t size)
160
{
161
unsigned long flags = zram->table[index].flags >> ZRAM_FLAG_SHIFT;
162
163
zram->table[index].flags = (flags << ZRAM_FLAG_SHIFT) | size;
164
}
165
166
static inline bool zram_allocated(struct zram *zram, u32 index)
167
{
168
return zram_get_obj_size(zram, index) ||
169
zram_test_flag(zram, index, ZRAM_SAME) ||
170
zram_test_flag(zram, index, ZRAM_WB);
171
}
172
173
static inline void update_used_max(struct zram *zram, const unsigned long pages)
174
{
175
unsigned long cur_max = atomic_long_read(&zram->stats.max_used_pages);
176
177
do {
178
if (cur_max >= pages)
179
return;
180
} while (!atomic_long_try_cmpxchg(&zram->stats.max_used_pages,
181
&cur_max, pages));
182
}
183
184
static bool zram_can_store_page(struct zram *zram)
185
{
186
unsigned long alloced_pages;
187
188
alloced_pages = zs_get_total_pages(zram->mem_pool);
189
update_used_max(zram, alloced_pages);
190
191
return !zram->limit_pages || alloced_pages <= zram->limit_pages;
192
}
193
194
#if PAGE_SIZE != 4096
195
static inline bool is_partial_io(struct bio_vec *bvec)
196
{
197
return bvec->bv_len != PAGE_SIZE;
198
}
199
#define ZRAM_PARTIAL_IO 1
200
#else
201
static inline bool is_partial_io(struct bio_vec *bvec)
202
{
203
return false;
204
}
205
#endif
206
207
static inline void zram_set_priority(struct zram *zram, u32 index, u32 prio)
208
{
209
prio &= ZRAM_COMP_PRIORITY_MASK;
210
/*
211
* Clear previous priority value first, in case if we recompress
212
* further an already recompressed page
213
*/
214
zram->table[index].flags &= ~(ZRAM_COMP_PRIORITY_MASK <<
215
ZRAM_COMP_PRIORITY_BIT1);
216
zram->table[index].flags |= (prio << ZRAM_COMP_PRIORITY_BIT1);
217
}
218
219
static inline u32 zram_get_priority(struct zram *zram, u32 index)
220
{
221
u32 prio = zram->table[index].flags >> ZRAM_COMP_PRIORITY_BIT1;
222
223
return prio & ZRAM_COMP_PRIORITY_MASK;
224
}
225
226
static void zram_accessed(struct zram *zram, u32 index)
227
{
228
zram_clear_flag(zram, index, ZRAM_IDLE);
229
zram_clear_flag(zram, index, ZRAM_PP_SLOT);
230
#ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME
231
zram->table[index].ac_time = ktime_get_boottime();
232
#endif
233
}
234
235
#if defined CONFIG_ZRAM_WRITEBACK || defined CONFIG_ZRAM_MULTI_COMP
236
struct zram_pp_slot {
237
unsigned long index;
238
struct list_head entry;
239
};
240
241
/*
242
* A post-processing bucket is, essentially, a size class, this defines
243
* the range (in bytes) of pp-slots sizes in particular bucket.
244
*/
245
#define PP_BUCKET_SIZE_RANGE 64
246
#define NUM_PP_BUCKETS ((PAGE_SIZE / PP_BUCKET_SIZE_RANGE) + 1)
247
248
struct zram_pp_ctl {
249
struct list_head pp_buckets[NUM_PP_BUCKETS];
250
};
251
252
static struct zram_pp_ctl *init_pp_ctl(void)
253
{
254
struct zram_pp_ctl *ctl;
255
u32 idx;
256
257
ctl = kmalloc(sizeof(*ctl), GFP_KERNEL);
258
if (!ctl)
259
return NULL;
260
261
for (idx = 0; idx < NUM_PP_BUCKETS; idx++)
262
INIT_LIST_HEAD(&ctl->pp_buckets[idx]);
263
return ctl;
264
}
265
266
static void release_pp_slot(struct zram *zram, struct zram_pp_slot *pps)
267
{
268
list_del_init(&pps->entry);
269
270
zram_slot_lock(zram, pps->index);
271
zram_clear_flag(zram, pps->index, ZRAM_PP_SLOT);
272
zram_slot_unlock(zram, pps->index);
273
274
kfree(pps);
275
}
276
277
static void release_pp_ctl(struct zram *zram, struct zram_pp_ctl *ctl)
278
{
279
u32 idx;
280
281
if (!ctl)
282
return;
283
284
for (idx = 0; idx < NUM_PP_BUCKETS; idx++) {
285
while (!list_empty(&ctl->pp_buckets[idx])) {
286
struct zram_pp_slot *pps;
287
288
pps = list_first_entry(&ctl->pp_buckets[idx],
289
struct zram_pp_slot,
290
entry);
291
release_pp_slot(zram, pps);
292
}
293
}
294
295
kfree(ctl);
296
}
297
298
static bool place_pp_slot(struct zram *zram, struct zram_pp_ctl *ctl,
299
u32 index)
300
{
301
struct zram_pp_slot *pps;
302
u32 bid;
303
304
pps = kmalloc(sizeof(*pps), GFP_NOIO | __GFP_NOWARN);
305
if (!pps)
306
return false;
307
308
INIT_LIST_HEAD(&pps->entry);
309
pps->index = index;
310
311
bid = zram_get_obj_size(zram, pps->index) / PP_BUCKET_SIZE_RANGE;
312
list_add(&pps->entry, &ctl->pp_buckets[bid]);
313
314
zram_set_flag(zram, pps->index, ZRAM_PP_SLOT);
315
return true;
316
}
317
318
static struct zram_pp_slot *select_pp_slot(struct zram_pp_ctl *ctl)
319
{
320
struct zram_pp_slot *pps = NULL;
321
s32 idx = NUM_PP_BUCKETS - 1;
322
323
/* The higher the bucket id the more optimal slot post-processing is */
324
while (idx >= 0) {
325
pps = list_first_entry_or_null(&ctl->pp_buckets[idx],
326
struct zram_pp_slot,
327
entry);
328
if (pps)
329
break;
330
331
idx--;
332
}
333
return pps;
334
}
335
#endif
336
337
static inline void zram_fill_page(void *ptr, unsigned long len,
338
unsigned long value)
339
{
340
WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long)));
341
memset_l(ptr, value, len / sizeof(unsigned long));
342
}
343
344
static bool page_same_filled(void *ptr, unsigned long *element)
345
{
346
unsigned long *page;
347
unsigned long val;
348
unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1;
349
350
page = (unsigned long *)ptr;
351
val = page[0];
352
353
if (val != page[last_pos])
354
return false;
355
356
for (pos = 1; pos < last_pos; pos++) {
357
if (val != page[pos])
358
return false;
359
}
360
361
*element = val;
362
363
return true;
364
}
365
366
static ssize_t initstate_show(struct device *dev,
367
struct device_attribute *attr, char *buf)
368
{
369
u32 val;
370
struct zram *zram = dev_to_zram(dev);
371
372
down_read(&zram->init_lock);
373
val = init_done(zram);
374
up_read(&zram->init_lock);
375
376
return sysfs_emit(buf, "%u\n", val);
377
}
378
379
static ssize_t disksize_show(struct device *dev,
380
struct device_attribute *attr, char *buf)
381
{
382
struct zram *zram = dev_to_zram(dev);
383
384
return sysfs_emit(buf, "%llu\n", zram->disksize);
385
}
386
387
static ssize_t mem_limit_store(struct device *dev,
388
struct device_attribute *attr, const char *buf, size_t len)
389
{
390
u64 limit;
391
char *tmp;
392
struct zram *zram = dev_to_zram(dev);
393
394
limit = memparse(buf, &tmp);
395
if (buf == tmp) /* no chars parsed, invalid input */
396
return -EINVAL;
397
398
down_write(&zram->init_lock);
399
zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT;
400
up_write(&zram->init_lock);
401
402
return len;
403
}
404
405
static ssize_t mem_used_max_store(struct device *dev,
406
struct device_attribute *attr, const char *buf, size_t len)
407
{
408
int err;
409
unsigned long val;
410
struct zram *zram = dev_to_zram(dev);
411
412
err = kstrtoul(buf, 10, &val);
413
if (err || val != 0)
414
return -EINVAL;
415
416
down_read(&zram->init_lock);
417
if (init_done(zram)) {
418
atomic_long_set(&zram->stats.max_used_pages,
419
zs_get_total_pages(zram->mem_pool));
420
}
421
up_read(&zram->init_lock);
422
423
return len;
424
}
425
426
/*
427
* Mark all pages which are older than or equal to cutoff as IDLE.
428
* Callers should hold the zram init lock in read mode
429
*/
430
static void mark_idle(struct zram *zram, ktime_t cutoff)
431
{
432
int is_idle = 1;
433
unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
434
int index;
435
436
for (index = 0; index < nr_pages; index++) {
437
/*
438
* Do not mark ZRAM_SAME slots as ZRAM_IDLE, because no
439
* post-processing (recompress, writeback) happens to the
440
* ZRAM_SAME slot.
441
*
442
* And ZRAM_WB slots simply cannot be ZRAM_IDLE.
443
*/
444
zram_slot_lock(zram, index);
445
if (!zram_allocated(zram, index) ||
446
zram_test_flag(zram, index, ZRAM_WB) ||
447
zram_test_flag(zram, index, ZRAM_SAME)) {
448
zram_slot_unlock(zram, index);
449
continue;
450
}
451
452
#ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME
453
is_idle = !cutoff ||
454
ktime_after(cutoff, zram->table[index].ac_time);
455
#endif
456
if (is_idle)
457
zram_set_flag(zram, index, ZRAM_IDLE);
458
else
459
zram_clear_flag(zram, index, ZRAM_IDLE);
460
zram_slot_unlock(zram, index);
461
}
462
}
463
464
static ssize_t idle_store(struct device *dev,
465
struct device_attribute *attr, const char *buf, size_t len)
466
{
467
struct zram *zram = dev_to_zram(dev);
468
ktime_t cutoff_time = 0;
469
ssize_t rv = -EINVAL;
470
471
if (!sysfs_streq(buf, "all")) {
472
/*
473
* If it did not parse as 'all' try to treat it as an integer
474
* when we have memory tracking enabled.
475
*/
476
u64 age_sec;
477
478
if (IS_ENABLED(CONFIG_ZRAM_TRACK_ENTRY_ACTIME) && !kstrtoull(buf, 0, &age_sec))
479
cutoff_time = ktime_sub(ktime_get_boottime(),
480
ns_to_ktime(age_sec * NSEC_PER_SEC));
481
else
482
goto out;
483
}
484
485
down_read(&zram->init_lock);
486
if (!init_done(zram))
487
goto out_unlock;
488
489
/*
490
* A cutoff_time of 0 marks everything as idle, this is the
491
* "all" behavior.
492
*/
493
mark_idle(zram, cutoff_time);
494
rv = len;
495
496
out_unlock:
497
up_read(&zram->init_lock);
498
out:
499
return rv;
500
}
501
502
#ifdef CONFIG_ZRAM_WRITEBACK
503
#define INVALID_BDEV_BLOCK (~0UL)
504
505
struct zram_wb_ctl {
506
/* idle list is accessed only by the writeback task, no concurency */
507
struct list_head idle_reqs;
508
/* done list is accessed concurrently, protect by done_lock */
509
struct list_head done_reqs;
510
wait_queue_head_t done_wait;
511
spinlock_t done_lock;
512
atomic_t num_inflight;
513
};
514
515
struct zram_wb_req {
516
unsigned long blk_idx;
517
struct page *page;
518
struct zram_pp_slot *pps;
519
struct bio_vec bio_vec;
520
struct bio bio;
521
522
struct list_head entry;
523
};
524
525
static ssize_t writeback_limit_enable_store(struct device *dev,
526
struct device_attribute *attr,
527
const char *buf, size_t len)
528
{
529
struct zram *zram = dev_to_zram(dev);
530
u64 val;
531
ssize_t ret = -EINVAL;
532
533
if (kstrtoull(buf, 10, &val))
534
return ret;
535
536
down_write(&zram->init_lock);
537
zram->wb_limit_enable = val;
538
up_write(&zram->init_lock);
539
ret = len;
540
541
return ret;
542
}
543
544
static ssize_t writeback_limit_enable_show(struct device *dev,
545
struct device_attribute *attr,
546
char *buf)
547
{
548
bool val;
549
struct zram *zram = dev_to_zram(dev);
550
551
down_read(&zram->init_lock);
552
val = zram->wb_limit_enable;
553
up_read(&zram->init_lock);
554
555
return sysfs_emit(buf, "%d\n", val);
556
}
557
558
static ssize_t writeback_limit_store(struct device *dev,
559
struct device_attribute *attr,
560
const char *buf, size_t len)
561
{
562
struct zram *zram = dev_to_zram(dev);
563
u64 val;
564
ssize_t ret = -EINVAL;
565
566
if (kstrtoull(buf, 10, &val))
567
return ret;
568
569
/*
570
* When the page size is greater than 4KB, if bd_wb_limit is set to
571
* a value that is not page - size aligned, it will cause value
572
* wrapping. For example, when the page size is set to 16KB and
573
* bd_wb_limit is set to 3, a single write - back operation will
574
* cause bd_wb_limit to become -1. Even more terrifying is that
575
* bd_wb_limit is an unsigned number.
576
*/
577
val = rounddown(val, PAGE_SIZE / 4096);
578
579
down_write(&zram->init_lock);
580
zram->bd_wb_limit = val;
581
up_write(&zram->init_lock);
582
ret = len;
583
584
return ret;
585
}
586
587
static ssize_t writeback_limit_show(struct device *dev,
588
struct device_attribute *attr, char *buf)
589
{
590
u64 val;
591
struct zram *zram = dev_to_zram(dev);
592
593
down_read(&zram->init_lock);
594
val = zram->bd_wb_limit;
595
up_read(&zram->init_lock);
596
597
return sysfs_emit(buf, "%llu\n", val);
598
}
599
600
static ssize_t writeback_batch_size_store(struct device *dev,
601
struct device_attribute *attr,
602
const char *buf, size_t len)
603
{
604
struct zram *zram = dev_to_zram(dev);
605
u32 val;
606
607
if (kstrtouint(buf, 10, &val))
608
return -EINVAL;
609
610
if (!val)
611
return -EINVAL;
612
613
down_write(&zram->init_lock);
614
zram->wb_batch_size = val;
615
up_write(&zram->init_lock);
616
617
return len;
618
}
619
620
static ssize_t writeback_batch_size_show(struct device *dev,
621
struct device_attribute *attr,
622
char *buf)
623
{
624
u32 val;
625
struct zram *zram = dev_to_zram(dev);
626
627
down_read(&zram->init_lock);
628
val = zram->wb_batch_size;
629
up_read(&zram->init_lock);
630
631
return sysfs_emit(buf, "%u\n", val);
632
}
633
634
static void reset_bdev(struct zram *zram)
635
{
636
if (!zram->backing_dev)
637
return;
638
639
/* hope filp_close flush all of IO */
640
filp_close(zram->backing_dev, NULL);
641
zram->backing_dev = NULL;
642
zram->bdev = NULL;
643
zram->disk->fops = &zram_devops;
644
kvfree(zram->bitmap);
645
zram->bitmap = NULL;
646
}
647
648
static ssize_t backing_dev_show(struct device *dev,
649
struct device_attribute *attr, char *buf)
650
{
651
struct file *file;
652
struct zram *zram = dev_to_zram(dev);
653
char *p;
654
ssize_t ret;
655
656
down_read(&zram->init_lock);
657
file = zram->backing_dev;
658
if (!file) {
659
memcpy(buf, "none\n", 5);
660
up_read(&zram->init_lock);
661
return 5;
662
}
663
664
p = file_path(file, buf, PAGE_SIZE - 1);
665
if (IS_ERR(p)) {
666
ret = PTR_ERR(p);
667
goto out;
668
}
669
670
ret = strlen(p);
671
memmove(buf, p, ret);
672
buf[ret++] = '\n';
673
out:
674
up_read(&zram->init_lock);
675
return ret;
676
}
677
678
static ssize_t backing_dev_store(struct device *dev,
679
struct device_attribute *attr, const char *buf, size_t len)
680
{
681
char *file_name;
682
size_t sz;
683
struct file *backing_dev = NULL;
684
struct inode *inode;
685
unsigned int bitmap_sz;
686
unsigned long nr_pages, *bitmap = NULL;
687
int err;
688
struct zram *zram = dev_to_zram(dev);
689
690
file_name = kmalloc(PATH_MAX, GFP_KERNEL);
691
if (!file_name)
692
return -ENOMEM;
693
694
down_write(&zram->init_lock);
695
if (init_done(zram)) {
696
pr_info("Can't setup backing device for initialized device\n");
697
err = -EBUSY;
698
goto out;
699
}
700
701
strscpy(file_name, buf, PATH_MAX);
702
/* ignore trailing newline */
703
sz = strlen(file_name);
704
if (sz > 0 && file_name[sz - 1] == '\n')
705
file_name[sz - 1] = 0x00;
706
707
backing_dev = filp_open(file_name, O_RDWR | O_LARGEFILE | O_EXCL, 0);
708
if (IS_ERR(backing_dev)) {
709
err = PTR_ERR(backing_dev);
710
backing_dev = NULL;
711
goto out;
712
}
713
714
inode = backing_dev->f_mapping->host;
715
716
/* Support only block device in this moment */
717
if (!S_ISBLK(inode->i_mode)) {
718
err = -ENOTBLK;
719
goto out;
720
}
721
722
nr_pages = i_size_read(inode) >> PAGE_SHIFT;
723
/* Refuse to use zero sized device (also prevents self reference) */
724
if (!nr_pages) {
725
err = -EINVAL;
726
goto out;
727
}
728
729
bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long);
730
bitmap = kvzalloc(bitmap_sz, GFP_KERNEL);
731
if (!bitmap) {
732
err = -ENOMEM;
733
goto out;
734
}
735
736
reset_bdev(zram);
737
738
zram->bdev = I_BDEV(inode);
739
zram->backing_dev = backing_dev;
740
zram->bitmap = bitmap;
741
zram->nr_pages = nr_pages;
742
up_write(&zram->init_lock);
743
744
pr_info("setup backing device %s\n", file_name);
745
kfree(file_name);
746
747
return len;
748
out:
749
kvfree(bitmap);
750
751
if (backing_dev)
752
filp_close(backing_dev, NULL);
753
754
up_write(&zram->init_lock);
755
756
kfree(file_name);
757
758
return err;
759
}
760
761
static unsigned long zram_reserve_bdev_block(struct zram *zram)
762
{
763
unsigned long blk_idx;
764
765
blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, 0);
766
if (blk_idx == zram->nr_pages)
767
return INVALID_BDEV_BLOCK;
768
769
set_bit(blk_idx, zram->bitmap);
770
atomic64_inc(&zram->stats.bd_count);
771
return blk_idx;
772
}
773
774
static void zram_release_bdev_block(struct zram *zram, unsigned long blk_idx)
775
{
776
int was_set;
777
778
was_set = test_and_clear_bit(blk_idx, zram->bitmap);
779
WARN_ON_ONCE(!was_set);
780
atomic64_dec(&zram->stats.bd_count);
781
}
782
783
static void read_from_bdev_async(struct zram *zram, struct page *page,
784
unsigned long entry, struct bio *parent)
785
{
786
struct bio *bio;
787
788
bio = bio_alloc(zram->bdev, 1, parent->bi_opf, GFP_NOIO);
789
bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
790
__bio_add_page(bio, page, PAGE_SIZE, 0);
791
bio_chain(bio, parent);
792
submit_bio(bio);
793
}
794
795
static void release_wb_req(struct zram_wb_req *req)
796
{
797
__free_page(req->page);
798
kfree(req);
799
}
800
801
static void release_wb_ctl(struct zram_wb_ctl *wb_ctl)
802
{
803
if (!wb_ctl)
804
return;
805
806
/* We should never have inflight requests at this point */
807
WARN_ON(atomic_read(&wb_ctl->num_inflight));
808
WARN_ON(!list_empty(&wb_ctl->done_reqs));
809
810
while (!list_empty(&wb_ctl->idle_reqs)) {
811
struct zram_wb_req *req;
812
813
req = list_first_entry(&wb_ctl->idle_reqs,
814
struct zram_wb_req, entry);
815
list_del(&req->entry);
816
release_wb_req(req);
817
}
818
819
kfree(wb_ctl);
820
}
821
822
static struct zram_wb_ctl *init_wb_ctl(struct zram *zram)
823
{
824
struct zram_wb_ctl *wb_ctl;
825
int i;
826
827
wb_ctl = kmalloc(sizeof(*wb_ctl), GFP_KERNEL);
828
if (!wb_ctl)
829
return NULL;
830
831
INIT_LIST_HEAD(&wb_ctl->idle_reqs);
832
INIT_LIST_HEAD(&wb_ctl->done_reqs);
833
atomic_set(&wb_ctl->num_inflight, 0);
834
init_waitqueue_head(&wb_ctl->done_wait);
835
spin_lock_init(&wb_ctl->done_lock);
836
837
for (i = 0; i < zram->wb_batch_size; i++) {
838
struct zram_wb_req *req;
839
840
/*
841
* This is fatal condition only if we couldn't allocate
842
* any requests at all. Otherwise we just work with the
843
* requests that we have successfully allocated, so that
844
* writeback can still proceed, even if there is only one
845
* request on the idle list.
846
*/
847
req = kzalloc(sizeof(*req), GFP_KERNEL | __GFP_NOWARN);
848
if (!req)
849
break;
850
851
req->page = alloc_page(GFP_KERNEL | __GFP_NOWARN);
852
if (!req->page) {
853
kfree(req);
854
break;
855
}
856
857
list_add(&req->entry, &wb_ctl->idle_reqs);
858
}
859
860
/* We couldn't allocate any requests, so writeabck is not possible */
861
if (list_empty(&wb_ctl->idle_reqs))
862
goto release_wb_ctl;
863
864
return wb_ctl;
865
866
release_wb_ctl:
867
release_wb_ctl(wb_ctl);
868
return NULL;
869
}
870
871
static void zram_account_writeback_rollback(struct zram *zram)
872
{
873
lockdep_assert_held_read(&zram->init_lock);
874
875
if (zram->wb_limit_enable)
876
zram->bd_wb_limit += 1UL << (PAGE_SHIFT - 12);
877
}
878
879
static void zram_account_writeback_submit(struct zram *zram)
880
{
881
lockdep_assert_held_read(&zram->init_lock);
882
883
if (zram->wb_limit_enable && zram->bd_wb_limit > 0)
884
zram->bd_wb_limit -= 1UL << (PAGE_SHIFT - 12);
885
}
886
887
static int zram_writeback_complete(struct zram *zram, struct zram_wb_req *req)
888
{
889
u32 index = req->pps->index;
890
int err;
891
892
err = blk_status_to_errno(req->bio.bi_status);
893
if (err) {
894
/*
895
* Failed wb requests should not be accounted in wb_limit
896
* (if enabled).
897
*/
898
zram_account_writeback_rollback(zram);
899
zram_release_bdev_block(zram, req->blk_idx);
900
return err;
901
}
902
903
atomic64_inc(&zram->stats.bd_writes);
904
zram_slot_lock(zram, index);
905
/*
906
* We release slot lock during writeback so slot can change under us:
907
* slot_free() or slot_free() and zram_write_page(). In both cases
908
* slot loses ZRAM_PP_SLOT flag. No concurrent post-processing can
909
* set ZRAM_PP_SLOT on such slots until current post-processing
910
* finishes.
911
*/
912
if (!zram_test_flag(zram, index, ZRAM_PP_SLOT)) {
913
zram_release_bdev_block(zram, req->blk_idx);
914
goto out;
915
}
916
917
zram_free_page(zram, index);
918
zram_set_flag(zram, index, ZRAM_WB);
919
zram_set_handle(zram, index, req->blk_idx);
920
atomic64_inc(&zram->stats.pages_stored);
921
922
out:
923
zram_slot_unlock(zram, index);
924
return 0;
925
}
926
927
static void zram_writeback_endio(struct bio *bio)
928
{
929
struct zram_wb_req *req = container_of(bio, struct zram_wb_req, bio);
930
struct zram_wb_ctl *wb_ctl = bio->bi_private;
931
unsigned long flags;
932
933
spin_lock_irqsave(&wb_ctl->done_lock, flags);
934
list_add(&req->entry, &wb_ctl->done_reqs);
935
spin_unlock_irqrestore(&wb_ctl->done_lock, flags);
936
937
wake_up(&wb_ctl->done_wait);
938
}
939
940
static void zram_submit_wb_request(struct zram *zram,
941
struct zram_wb_ctl *wb_ctl,
942
struct zram_wb_req *req)
943
{
944
/*
945
* wb_limit (if enabled) should be adjusted before submission,
946
* so that we don't over-submit.
947
*/
948
zram_account_writeback_submit(zram);
949
atomic_inc(&wb_ctl->num_inflight);
950
req->bio.bi_private = wb_ctl;
951
submit_bio(&req->bio);
952
}
953
954
static int zram_complete_done_reqs(struct zram *zram,
955
struct zram_wb_ctl *wb_ctl)
956
{
957
struct zram_wb_req *req;
958
unsigned long flags;
959
int ret = 0, err;
960
961
while (atomic_read(&wb_ctl->num_inflight) > 0) {
962
spin_lock_irqsave(&wb_ctl->done_lock, flags);
963
req = list_first_entry_or_null(&wb_ctl->done_reqs,
964
struct zram_wb_req, entry);
965
if (req)
966
list_del(&req->entry);
967
spin_unlock_irqrestore(&wb_ctl->done_lock, flags);
968
969
/* ->num_inflight > 0 doesn't mean we have done requests */
970
if (!req)
971
break;
972
973
err = zram_writeback_complete(zram, req);
974
if (err)
975
ret = err;
976
977
atomic_dec(&wb_ctl->num_inflight);
978
release_pp_slot(zram, req->pps);
979
req->pps = NULL;
980
981
list_add(&req->entry, &wb_ctl->idle_reqs);
982
}
983
984
return ret;
985
}
986
987
static struct zram_wb_req *zram_select_idle_req(struct zram_wb_ctl *wb_ctl)
988
{
989
struct zram_wb_req *req;
990
991
req = list_first_entry_or_null(&wb_ctl->idle_reqs,
992
struct zram_wb_req, entry);
993
if (req)
994
list_del(&req->entry);
995
return req;
996
}
997
998
static int zram_writeback_slots(struct zram *zram,
999
struct zram_pp_ctl *ctl,
1000
struct zram_wb_ctl *wb_ctl)
1001
{
1002
unsigned long blk_idx = INVALID_BDEV_BLOCK;
1003
struct zram_wb_req *req = NULL;
1004
struct zram_pp_slot *pps;
1005
int ret = 0, err = 0;
1006
u32 index = 0;
1007
1008
while ((pps = select_pp_slot(ctl))) {
1009
if (zram->wb_limit_enable && !zram->bd_wb_limit) {
1010
ret = -EIO;
1011
break;
1012
}
1013
1014
while (!req) {
1015
req = zram_select_idle_req(wb_ctl);
1016
if (req)
1017
break;
1018
1019
wait_event(wb_ctl->done_wait,
1020
!list_empty(&wb_ctl->done_reqs));
1021
1022
err = zram_complete_done_reqs(zram, wb_ctl);
1023
/*
1024
* BIO errors are not fatal, we continue and simply
1025
* attempt to writeback the remaining objects (pages).
1026
* At the same time we need to signal user-space that
1027
* some writes (at least one, but also could be all of
1028
* them) were not successful and we do so by returning
1029
* the most recent BIO error.
1030
*/
1031
if (err)
1032
ret = err;
1033
}
1034
1035
if (blk_idx == INVALID_BDEV_BLOCK) {
1036
blk_idx = zram_reserve_bdev_block(zram);
1037
if (blk_idx == INVALID_BDEV_BLOCK) {
1038
ret = -ENOSPC;
1039
break;
1040
}
1041
}
1042
1043
index = pps->index;
1044
zram_slot_lock(zram, index);
1045
/*
1046
* scan_slots() sets ZRAM_PP_SLOT and releases slot lock, so
1047
* slots can change in the meantime. If slots are accessed or
1048
* freed they lose ZRAM_PP_SLOT flag and hence we don't
1049
* post-process them.
1050
*/
1051
if (!zram_test_flag(zram, index, ZRAM_PP_SLOT))
1052
goto next;
1053
if (zram_read_from_zspool(zram, req->page, index))
1054
goto next;
1055
zram_slot_unlock(zram, index);
1056
1057
/*
1058
* From now on pp-slot is owned by the req, remove it from
1059
* its pp bucket.
1060
*/
1061
list_del_init(&pps->entry);
1062
1063
req->blk_idx = blk_idx;
1064
req->pps = pps;
1065
bio_init(&req->bio, zram->bdev, &req->bio_vec, 1, REQ_OP_WRITE);
1066
req->bio.bi_iter.bi_sector = req->blk_idx * (PAGE_SIZE >> 9);
1067
req->bio.bi_end_io = zram_writeback_endio;
1068
__bio_add_page(&req->bio, req->page, PAGE_SIZE, 0);
1069
1070
zram_submit_wb_request(zram, wb_ctl, req);
1071
blk_idx = INVALID_BDEV_BLOCK;
1072
req = NULL;
1073
cond_resched();
1074
continue;
1075
1076
next:
1077
zram_slot_unlock(zram, index);
1078
release_pp_slot(zram, pps);
1079
}
1080
1081
/*
1082
* Selected idle req, but never submitted it due to some error or
1083
* wb limit.
1084
*/
1085
if (req)
1086
release_wb_req(req);
1087
1088
while (atomic_read(&wb_ctl->num_inflight) > 0) {
1089
wait_event(wb_ctl->done_wait, !list_empty(&wb_ctl->done_reqs));
1090
err = zram_complete_done_reqs(zram, wb_ctl);
1091
if (err)
1092
ret = err;
1093
}
1094
1095
return ret;
1096
}
1097
1098
#define PAGE_WRITEBACK 0
1099
#define HUGE_WRITEBACK (1 << 0)
1100
#define IDLE_WRITEBACK (1 << 1)
1101
#define INCOMPRESSIBLE_WRITEBACK (1 << 2)
1102
1103
static int parse_page_index(char *val, unsigned long nr_pages,
1104
unsigned long *lo, unsigned long *hi)
1105
{
1106
int ret;
1107
1108
ret = kstrtoul(val, 10, lo);
1109
if (ret)
1110
return ret;
1111
if (*lo >= nr_pages)
1112
return -ERANGE;
1113
*hi = *lo + 1;
1114
return 0;
1115
}
1116
1117
static int parse_page_indexes(char *val, unsigned long nr_pages,
1118
unsigned long *lo, unsigned long *hi)
1119
{
1120
char *delim;
1121
int ret;
1122
1123
delim = strchr(val, '-');
1124
if (!delim)
1125
return -EINVAL;
1126
1127
*delim = 0x00;
1128
ret = kstrtoul(val, 10, lo);
1129
if (ret)
1130
return ret;
1131
if (*lo >= nr_pages)
1132
return -ERANGE;
1133
1134
ret = kstrtoul(delim + 1, 10, hi);
1135
if (ret)
1136
return ret;
1137
if (*hi >= nr_pages || *lo > *hi)
1138
return -ERANGE;
1139
*hi += 1;
1140
return 0;
1141
}
1142
1143
static int parse_mode(char *val, u32 *mode)
1144
{
1145
*mode = 0;
1146
1147
if (!strcmp(val, "idle"))
1148
*mode = IDLE_WRITEBACK;
1149
if (!strcmp(val, "huge"))
1150
*mode = HUGE_WRITEBACK;
1151
if (!strcmp(val, "huge_idle"))
1152
*mode = IDLE_WRITEBACK | HUGE_WRITEBACK;
1153
if (!strcmp(val, "incompressible"))
1154
*mode = INCOMPRESSIBLE_WRITEBACK;
1155
1156
if (*mode == 0)
1157
return -EINVAL;
1158
return 0;
1159
}
1160
1161
static int scan_slots_for_writeback(struct zram *zram, u32 mode,
1162
unsigned long lo, unsigned long hi,
1163
struct zram_pp_ctl *ctl)
1164
{
1165
u32 index = lo;
1166
1167
while (index < hi) {
1168
bool ok = true;
1169
1170
zram_slot_lock(zram, index);
1171
if (!zram_allocated(zram, index))
1172
goto next;
1173
1174
if (zram_test_flag(zram, index, ZRAM_WB) ||
1175
zram_test_flag(zram, index, ZRAM_SAME))
1176
goto next;
1177
1178
if (mode & IDLE_WRITEBACK &&
1179
!zram_test_flag(zram, index, ZRAM_IDLE))
1180
goto next;
1181
if (mode & HUGE_WRITEBACK &&
1182
!zram_test_flag(zram, index, ZRAM_HUGE))
1183
goto next;
1184
if (mode & INCOMPRESSIBLE_WRITEBACK &&
1185
!zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE))
1186
goto next;
1187
1188
ok = place_pp_slot(zram, ctl, index);
1189
next:
1190
zram_slot_unlock(zram, index);
1191
if (!ok)
1192
break;
1193
index++;
1194
}
1195
1196
return 0;
1197
}
1198
1199
static ssize_t writeback_store(struct device *dev,
1200
struct device_attribute *attr,
1201
const char *buf, size_t len)
1202
{
1203
struct zram *zram = dev_to_zram(dev);
1204
u64 nr_pages = zram->disksize >> PAGE_SHIFT;
1205
unsigned long lo = 0, hi = nr_pages;
1206
struct zram_pp_ctl *pp_ctl = NULL;
1207
struct zram_wb_ctl *wb_ctl = NULL;
1208
char *args, *param, *val;
1209
ssize_t ret = len;
1210
int err, mode = 0;
1211
1212
down_read(&zram->init_lock);
1213
if (!init_done(zram)) {
1214
up_read(&zram->init_lock);
1215
return -EINVAL;
1216
}
1217
1218
/* Do not permit concurrent post-processing actions. */
1219
if (atomic_xchg(&zram->pp_in_progress, 1)) {
1220
up_read(&zram->init_lock);
1221
return -EAGAIN;
1222
}
1223
1224
if (!zram->backing_dev) {
1225
ret = -ENODEV;
1226
goto release_init_lock;
1227
}
1228
1229
pp_ctl = init_pp_ctl();
1230
if (!pp_ctl) {
1231
ret = -ENOMEM;
1232
goto release_init_lock;
1233
}
1234
1235
wb_ctl = init_wb_ctl(zram);
1236
if (!wb_ctl) {
1237
ret = -ENOMEM;
1238
goto release_init_lock;
1239
}
1240
1241
args = skip_spaces(buf);
1242
while (*args) {
1243
args = next_arg(args, &param, &val);
1244
1245
/*
1246
* Workaround to support the old writeback interface.
1247
*
1248
* The old writeback interface has a minor inconsistency and
1249
* requires key=value only for page_index parameter, while the
1250
* writeback mode is a valueless parameter.
1251
*
1252
* This is not the case anymore and now all parameters are
1253
* required to have values, however, we need to support the
1254
* legacy writeback interface format so we check if we can
1255
* recognize a valueless parameter as the (legacy) writeback
1256
* mode.
1257
*/
1258
if (!val || !*val) {
1259
err = parse_mode(param, &mode);
1260
if (err) {
1261
ret = err;
1262
goto release_init_lock;
1263
}
1264
1265
scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl);
1266
break;
1267
}
1268
1269
if (!strcmp(param, "type")) {
1270
err = parse_mode(val, &mode);
1271
if (err) {
1272
ret = err;
1273
goto release_init_lock;
1274
}
1275
1276
scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl);
1277
break;
1278
}
1279
1280
if (!strcmp(param, "page_index")) {
1281
err = parse_page_index(val, nr_pages, &lo, &hi);
1282
if (err) {
1283
ret = err;
1284
goto release_init_lock;
1285
}
1286
1287
scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl);
1288
continue;
1289
}
1290
1291
if (!strcmp(param, "page_indexes")) {
1292
err = parse_page_indexes(val, nr_pages, &lo, &hi);
1293
if (err) {
1294
ret = err;
1295
goto release_init_lock;
1296
}
1297
1298
scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl);
1299
continue;
1300
}
1301
}
1302
1303
err = zram_writeback_slots(zram, pp_ctl, wb_ctl);
1304
if (err)
1305
ret = err;
1306
1307
release_init_lock:
1308
release_pp_ctl(zram, pp_ctl);
1309
release_wb_ctl(wb_ctl);
1310
atomic_set(&zram->pp_in_progress, 0);
1311
up_read(&zram->init_lock);
1312
1313
return ret;
1314
}
1315
1316
struct zram_work {
1317
struct work_struct work;
1318
struct zram *zram;
1319
unsigned long entry;
1320
struct page *page;
1321
int error;
1322
};
1323
1324
static void zram_sync_read(struct work_struct *work)
1325
{
1326
struct zram_work *zw = container_of(work, struct zram_work, work);
1327
struct bio_vec bv;
1328
struct bio bio;
1329
1330
bio_init(&bio, zw->zram->bdev, &bv, 1, REQ_OP_READ);
1331
bio.bi_iter.bi_sector = zw->entry * (PAGE_SIZE >> 9);
1332
__bio_add_page(&bio, zw->page, PAGE_SIZE, 0);
1333
zw->error = submit_bio_wait(&bio);
1334
}
1335
1336
/*
1337
* Block layer want one ->submit_bio to be active at a time, so if we use
1338
* chained IO with parent IO in same context, it's a deadlock. To avoid that,
1339
* use a worker thread context.
1340
*/
1341
static int read_from_bdev_sync(struct zram *zram, struct page *page,
1342
unsigned long entry)
1343
{
1344
struct zram_work work;
1345
1346
work.page = page;
1347
work.zram = zram;
1348
work.entry = entry;
1349
1350
INIT_WORK_ONSTACK(&work.work, zram_sync_read);
1351
queue_work(system_dfl_wq, &work.work);
1352
flush_work(&work.work);
1353
destroy_work_on_stack(&work.work);
1354
1355
return work.error;
1356
}
1357
1358
static int read_from_bdev(struct zram *zram, struct page *page,
1359
unsigned long entry, struct bio *parent)
1360
{
1361
atomic64_inc(&zram->stats.bd_reads);
1362
if (!parent) {
1363
if (WARN_ON_ONCE(!IS_ENABLED(ZRAM_PARTIAL_IO)))
1364
return -EIO;
1365
return read_from_bdev_sync(zram, page, entry);
1366
}
1367
read_from_bdev_async(zram, page, entry, parent);
1368
return 0;
1369
}
1370
#else
1371
static inline void reset_bdev(struct zram *zram) {};
1372
static int read_from_bdev(struct zram *zram, struct page *page,
1373
unsigned long entry, struct bio *parent)
1374
{
1375
return -EIO;
1376
}
1377
1378
static void zram_release_bdev_block(struct zram *zram, unsigned long blk_idx)
1379
{
1380
}
1381
#endif
1382
1383
#ifdef CONFIG_ZRAM_MEMORY_TRACKING
1384
1385
static struct dentry *zram_debugfs_root;
1386
1387
static void zram_debugfs_create(void)
1388
{
1389
zram_debugfs_root = debugfs_create_dir("zram", NULL);
1390
}
1391
1392
static void zram_debugfs_destroy(void)
1393
{
1394
debugfs_remove_recursive(zram_debugfs_root);
1395
}
1396
1397
static ssize_t read_block_state(struct file *file, char __user *buf,
1398
size_t count, loff_t *ppos)
1399
{
1400
char *kbuf;
1401
ssize_t index, written = 0;
1402
struct zram *zram = file->private_data;
1403
unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
1404
struct timespec64 ts;
1405
1406
kbuf = kvmalloc(count, GFP_KERNEL);
1407
if (!kbuf)
1408
return -ENOMEM;
1409
1410
down_read(&zram->init_lock);
1411
if (!init_done(zram)) {
1412
up_read(&zram->init_lock);
1413
kvfree(kbuf);
1414
return -EINVAL;
1415
}
1416
1417
for (index = *ppos; index < nr_pages; index++) {
1418
int copied;
1419
1420
zram_slot_lock(zram, index);
1421
if (!zram_allocated(zram, index))
1422
goto next;
1423
1424
ts = ktime_to_timespec64(zram->table[index].ac_time);
1425
copied = snprintf(kbuf + written, count,
1426
"%12zd %12lld.%06lu %c%c%c%c%c%c\n",
1427
index, (s64)ts.tv_sec,
1428
ts.tv_nsec / NSEC_PER_USEC,
1429
zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.',
1430
zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.',
1431
zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.',
1432
zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.',
1433
zram_get_priority(zram, index) ? 'r' : '.',
1434
zram_test_flag(zram, index,
1435
ZRAM_INCOMPRESSIBLE) ? 'n' : '.');
1436
1437
if (count <= copied) {
1438
zram_slot_unlock(zram, index);
1439
break;
1440
}
1441
written += copied;
1442
count -= copied;
1443
next:
1444
zram_slot_unlock(zram, index);
1445
*ppos += 1;
1446
}
1447
1448
up_read(&zram->init_lock);
1449
if (copy_to_user(buf, kbuf, written))
1450
written = -EFAULT;
1451
kvfree(kbuf);
1452
1453
return written;
1454
}
1455
1456
static const struct file_operations proc_zram_block_state_op = {
1457
.open = simple_open,
1458
.read = read_block_state,
1459
.llseek = default_llseek,
1460
};
1461
1462
static void zram_debugfs_register(struct zram *zram)
1463
{
1464
if (!zram_debugfs_root)
1465
return;
1466
1467
zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name,
1468
zram_debugfs_root);
1469
debugfs_create_file("block_state", 0400, zram->debugfs_dir,
1470
zram, &proc_zram_block_state_op);
1471
}
1472
1473
static void zram_debugfs_unregister(struct zram *zram)
1474
{
1475
debugfs_remove_recursive(zram->debugfs_dir);
1476
}
1477
#else
1478
static void zram_debugfs_create(void) {};
1479
static void zram_debugfs_destroy(void) {};
1480
static void zram_debugfs_register(struct zram *zram) {};
1481
static void zram_debugfs_unregister(struct zram *zram) {};
1482
#endif
1483
1484
static void comp_algorithm_set(struct zram *zram, u32 prio, const char *alg)
1485
{
1486
/* Do not free statically defined compression algorithms */
1487
if (zram->comp_algs[prio] != default_compressor)
1488
kfree(zram->comp_algs[prio]);
1489
1490
zram->comp_algs[prio] = alg;
1491
}
1492
1493
static int __comp_algorithm_store(struct zram *zram, u32 prio, const char *buf)
1494
{
1495
char *compressor;
1496
size_t sz;
1497
1498
sz = strlen(buf);
1499
if (sz >= ZRAM_MAX_ALGO_NAME_SZ)
1500
return -E2BIG;
1501
1502
compressor = kstrdup(buf, GFP_KERNEL);
1503
if (!compressor)
1504
return -ENOMEM;
1505
1506
/* ignore trailing newline */
1507
if (sz > 0 && compressor[sz - 1] == '\n')
1508
compressor[sz - 1] = 0x00;
1509
1510
if (!zcomp_available_algorithm(compressor)) {
1511
kfree(compressor);
1512
return -EINVAL;
1513
}
1514
1515
down_write(&zram->init_lock);
1516
if (init_done(zram)) {
1517
up_write(&zram->init_lock);
1518
kfree(compressor);
1519
pr_info("Can't change algorithm for initialized device\n");
1520
return -EBUSY;
1521
}
1522
1523
comp_algorithm_set(zram, prio, compressor);
1524
up_write(&zram->init_lock);
1525
return 0;
1526
}
1527
1528
static void comp_params_reset(struct zram *zram, u32 prio)
1529
{
1530
struct zcomp_params *params = &zram->params[prio];
1531
1532
vfree(params->dict);
1533
params->level = ZCOMP_PARAM_NOT_SET;
1534
params->deflate.winbits = ZCOMP_PARAM_NOT_SET;
1535
params->dict_sz = 0;
1536
params->dict = NULL;
1537
}
1538
1539
static int comp_params_store(struct zram *zram, u32 prio, s32 level,
1540
const char *dict_path,
1541
struct deflate_params *deflate_params)
1542
{
1543
ssize_t sz = 0;
1544
1545
comp_params_reset(zram, prio);
1546
1547
if (dict_path) {
1548
sz = kernel_read_file_from_path(dict_path, 0,
1549
&zram->params[prio].dict,
1550
INT_MAX,
1551
NULL,
1552
READING_POLICY);
1553
if (sz < 0)
1554
return -EINVAL;
1555
}
1556
1557
zram->params[prio].dict_sz = sz;
1558
zram->params[prio].level = level;
1559
zram->params[prio].deflate.winbits = deflate_params->winbits;
1560
return 0;
1561
}
1562
1563
static ssize_t algorithm_params_store(struct device *dev,
1564
struct device_attribute *attr,
1565
const char *buf,
1566
size_t len)
1567
{
1568
s32 prio = ZRAM_PRIMARY_COMP, level = ZCOMP_PARAM_NOT_SET;
1569
char *args, *param, *val, *algo = NULL, *dict_path = NULL;
1570
struct deflate_params deflate_params;
1571
struct zram *zram = dev_to_zram(dev);
1572
int ret;
1573
1574
deflate_params.winbits = ZCOMP_PARAM_NOT_SET;
1575
1576
args = skip_spaces(buf);
1577
while (*args) {
1578
args = next_arg(args, &param, &val);
1579
1580
if (!val || !*val)
1581
return -EINVAL;
1582
1583
if (!strcmp(param, "priority")) {
1584
ret = kstrtoint(val, 10, &prio);
1585
if (ret)
1586
return ret;
1587
continue;
1588
}
1589
1590
if (!strcmp(param, "level")) {
1591
ret = kstrtoint(val, 10, &level);
1592
if (ret)
1593
return ret;
1594
continue;
1595
}
1596
1597
if (!strcmp(param, "algo")) {
1598
algo = val;
1599
continue;
1600
}
1601
1602
if (!strcmp(param, "dict")) {
1603
dict_path = val;
1604
continue;
1605
}
1606
1607
if (!strcmp(param, "deflate.winbits")) {
1608
ret = kstrtoint(val, 10, &deflate_params.winbits);
1609
if (ret)
1610
return ret;
1611
continue;
1612
}
1613
}
1614
1615
/* Lookup priority by algorithm name */
1616
if (algo) {
1617
s32 p;
1618
1619
prio = -EINVAL;
1620
for (p = ZRAM_PRIMARY_COMP; p < ZRAM_MAX_COMPS; p++) {
1621
if (!zram->comp_algs[p])
1622
continue;
1623
1624
if (!strcmp(zram->comp_algs[p], algo)) {
1625
prio = p;
1626
break;
1627
}
1628
}
1629
}
1630
1631
if (prio < ZRAM_PRIMARY_COMP || prio >= ZRAM_MAX_COMPS)
1632
return -EINVAL;
1633
1634
ret = comp_params_store(zram, prio, level, dict_path, &deflate_params);
1635
return ret ? ret : len;
1636
}
1637
1638
static ssize_t comp_algorithm_show(struct device *dev,
1639
struct device_attribute *attr,
1640
char *buf)
1641
{
1642
struct zram *zram = dev_to_zram(dev);
1643
ssize_t sz;
1644
1645
down_read(&zram->init_lock);
1646
sz = zcomp_available_show(zram->comp_algs[ZRAM_PRIMARY_COMP], buf, 0);
1647
up_read(&zram->init_lock);
1648
return sz;
1649
}
1650
1651
static ssize_t comp_algorithm_store(struct device *dev,
1652
struct device_attribute *attr,
1653
const char *buf,
1654
size_t len)
1655
{
1656
struct zram *zram = dev_to_zram(dev);
1657
int ret;
1658
1659
ret = __comp_algorithm_store(zram, ZRAM_PRIMARY_COMP, buf);
1660
return ret ? ret : len;
1661
}
1662
1663
#ifdef CONFIG_ZRAM_MULTI_COMP
1664
static ssize_t recomp_algorithm_show(struct device *dev,
1665
struct device_attribute *attr,
1666
char *buf)
1667
{
1668
struct zram *zram = dev_to_zram(dev);
1669
ssize_t sz = 0;
1670
u32 prio;
1671
1672
down_read(&zram->init_lock);
1673
for (prio = ZRAM_SECONDARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
1674
if (!zram->comp_algs[prio])
1675
continue;
1676
1677
sz += sysfs_emit_at(buf, sz, "#%d: ", prio);
1678
sz += zcomp_available_show(zram->comp_algs[prio], buf, sz);
1679
}
1680
up_read(&zram->init_lock);
1681
return sz;
1682
}
1683
1684
static ssize_t recomp_algorithm_store(struct device *dev,
1685
struct device_attribute *attr,
1686
const char *buf,
1687
size_t len)
1688
{
1689
struct zram *zram = dev_to_zram(dev);
1690
int prio = ZRAM_SECONDARY_COMP;
1691
char *args, *param, *val;
1692
char *alg = NULL;
1693
int ret;
1694
1695
args = skip_spaces(buf);
1696
while (*args) {
1697
args = next_arg(args, &param, &val);
1698
1699
if (!val || !*val)
1700
return -EINVAL;
1701
1702
if (!strcmp(param, "algo")) {
1703
alg = val;
1704
continue;
1705
}
1706
1707
if (!strcmp(param, "priority")) {
1708
ret = kstrtoint(val, 10, &prio);
1709
if (ret)
1710
return ret;
1711
continue;
1712
}
1713
}
1714
1715
if (!alg)
1716
return -EINVAL;
1717
1718
if (prio < ZRAM_SECONDARY_COMP || prio >= ZRAM_MAX_COMPS)
1719
return -EINVAL;
1720
1721
ret = __comp_algorithm_store(zram, prio, alg);
1722
return ret ? ret : len;
1723
}
1724
#endif
1725
1726
static ssize_t compact_store(struct device *dev,
1727
struct device_attribute *attr, const char *buf, size_t len)
1728
{
1729
struct zram *zram = dev_to_zram(dev);
1730
1731
down_read(&zram->init_lock);
1732
if (!init_done(zram)) {
1733
up_read(&zram->init_lock);
1734
return -EINVAL;
1735
}
1736
1737
zs_compact(zram->mem_pool);
1738
up_read(&zram->init_lock);
1739
1740
return len;
1741
}
1742
1743
static ssize_t io_stat_show(struct device *dev,
1744
struct device_attribute *attr, char *buf)
1745
{
1746
struct zram *zram = dev_to_zram(dev);
1747
ssize_t ret;
1748
1749
down_read(&zram->init_lock);
1750
ret = sysfs_emit(buf,
1751
"%8llu %8llu 0 %8llu\n",
1752
(u64)atomic64_read(&zram->stats.failed_reads),
1753
(u64)atomic64_read(&zram->stats.failed_writes),
1754
(u64)atomic64_read(&zram->stats.notify_free));
1755
up_read(&zram->init_lock);
1756
1757
return ret;
1758
}
1759
1760
static ssize_t mm_stat_show(struct device *dev,
1761
struct device_attribute *attr, char *buf)
1762
{
1763
struct zram *zram = dev_to_zram(dev);
1764
struct zs_pool_stats pool_stats;
1765
u64 orig_size, mem_used = 0;
1766
long max_used;
1767
ssize_t ret;
1768
1769
memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats));
1770
1771
down_read(&zram->init_lock);
1772
if (init_done(zram)) {
1773
mem_used = zs_get_total_pages(zram->mem_pool);
1774
zs_pool_stats(zram->mem_pool, &pool_stats);
1775
}
1776
1777
orig_size = atomic64_read(&zram->stats.pages_stored);
1778
max_used = atomic_long_read(&zram->stats.max_used_pages);
1779
1780
ret = sysfs_emit(buf,
1781
"%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu %8llu\n",
1782
orig_size << PAGE_SHIFT,
1783
(u64)atomic64_read(&zram->stats.compr_data_size),
1784
mem_used << PAGE_SHIFT,
1785
zram->limit_pages << PAGE_SHIFT,
1786
max_used << PAGE_SHIFT,
1787
(u64)atomic64_read(&zram->stats.same_pages),
1788
atomic_long_read(&pool_stats.pages_compacted),
1789
(u64)atomic64_read(&zram->stats.huge_pages),
1790
(u64)atomic64_read(&zram->stats.huge_pages_since));
1791
up_read(&zram->init_lock);
1792
1793
return ret;
1794
}
1795
1796
#ifdef CONFIG_ZRAM_WRITEBACK
1797
#define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12)))
1798
static ssize_t bd_stat_show(struct device *dev,
1799
struct device_attribute *attr, char *buf)
1800
{
1801
struct zram *zram = dev_to_zram(dev);
1802
ssize_t ret;
1803
1804
down_read(&zram->init_lock);
1805
ret = sysfs_emit(buf,
1806
"%8llu %8llu %8llu\n",
1807
FOUR_K((u64)atomic64_read(&zram->stats.bd_count)),
1808
FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)),
1809
FOUR_K((u64)atomic64_read(&zram->stats.bd_writes)));
1810
up_read(&zram->init_lock);
1811
1812
return ret;
1813
}
1814
#endif
1815
1816
static ssize_t debug_stat_show(struct device *dev,
1817
struct device_attribute *attr, char *buf)
1818
{
1819
int version = 1;
1820
struct zram *zram = dev_to_zram(dev);
1821
ssize_t ret;
1822
1823
down_read(&zram->init_lock);
1824
ret = sysfs_emit(buf,
1825
"version: %d\n0 %8llu\n",
1826
version,
1827
(u64)atomic64_read(&zram->stats.miss_free));
1828
up_read(&zram->init_lock);
1829
1830
return ret;
1831
}
1832
1833
static DEVICE_ATTR_RO(io_stat);
1834
static DEVICE_ATTR_RO(mm_stat);
1835
#ifdef CONFIG_ZRAM_WRITEBACK
1836
static DEVICE_ATTR_RO(bd_stat);
1837
#endif
1838
static DEVICE_ATTR_RO(debug_stat);
1839
1840
static void zram_meta_free(struct zram *zram, u64 disksize)
1841
{
1842
size_t num_pages = disksize >> PAGE_SHIFT;
1843
size_t index;
1844
1845
if (!zram->table)
1846
return;
1847
1848
/* Free all pages that are still in this zram device */
1849
for (index = 0; index < num_pages; index++)
1850
zram_free_page(zram, index);
1851
1852
zs_destroy_pool(zram->mem_pool);
1853
vfree(zram->table);
1854
zram->table = NULL;
1855
}
1856
1857
static bool zram_meta_alloc(struct zram *zram, u64 disksize)
1858
{
1859
size_t num_pages, index;
1860
1861
num_pages = disksize >> PAGE_SHIFT;
1862
zram->table = vzalloc(array_size(num_pages, sizeof(*zram->table)));
1863
if (!zram->table)
1864
return false;
1865
1866
zram->mem_pool = zs_create_pool(zram->disk->disk_name);
1867
if (!zram->mem_pool) {
1868
vfree(zram->table);
1869
zram->table = NULL;
1870
return false;
1871
}
1872
1873
if (!huge_class_size)
1874
huge_class_size = zs_huge_class_size(zram->mem_pool);
1875
1876
for (index = 0; index < num_pages; index++)
1877
zram_slot_lock_init(zram, index);
1878
1879
return true;
1880
}
1881
1882
static void zram_free_page(struct zram *zram, size_t index)
1883
{
1884
unsigned long handle;
1885
1886
#ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME
1887
zram->table[index].ac_time = 0;
1888
#endif
1889
1890
zram_clear_flag(zram, index, ZRAM_IDLE);
1891
zram_clear_flag(zram, index, ZRAM_INCOMPRESSIBLE);
1892
zram_clear_flag(zram, index, ZRAM_PP_SLOT);
1893
zram_set_priority(zram, index, 0);
1894
1895
if (zram_test_flag(zram, index, ZRAM_HUGE)) {
1896
zram_clear_flag(zram, index, ZRAM_HUGE);
1897
atomic64_dec(&zram->stats.huge_pages);
1898
}
1899
1900
if (zram_test_flag(zram, index, ZRAM_WB)) {
1901
zram_clear_flag(zram, index, ZRAM_WB);
1902
zram_release_bdev_block(zram, zram_get_handle(zram, index));
1903
goto out;
1904
}
1905
1906
/*
1907
* No memory is allocated for same element filled pages.
1908
* Simply clear same page flag.
1909
*/
1910
if (zram_test_flag(zram, index, ZRAM_SAME)) {
1911
zram_clear_flag(zram, index, ZRAM_SAME);
1912
atomic64_dec(&zram->stats.same_pages);
1913
goto out;
1914
}
1915
1916
handle = zram_get_handle(zram, index);
1917
if (!handle)
1918
return;
1919
1920
zs_free(zram->mem_pool, handle);
1921
1922
atomic64_sub(zram_get_obj_size(zram, index),
1923
&zram->stats.compr_data_size);
1924
out:
1925
atomic64_dec(&zram->stats.pages_stored);
1926
zram_set_handle(zram, index, 0);
1927
zram_set_obj_size(zram, index, 0);
1928
}
1929
1930
static int read_same_filled_page(struct zram *zram, struct page *page,
1931
u32 index)
1932
{
1933
void *mem;
1934
1935
mem = kmap_local_page(page);
1936
zram_fill_page(mem, PAGE_SIZE, zram_get_handle(zram, index));
1937
kunmap_local(mem);
1938
return 0;
1939
}
1940
1941
static int read_incompressible_page(struct zram *zram, struct page *page,
1942
u32 index)
1943
{
1944
unsigned long handle;
1945
void *src, *dst;
1946
1947
handle = zram_get_handle(zram, index);
1948
src = zs_obj_read_begin(zram->mem_pool, handle, NULL);
1949
dst = kmap_local_page(page);
1950
copy_page(dst, src);
1951
kunmap_local(dst);
1952
zs_obj_read_end(zram->mem_pool, handle, src);
1953
1954
return 0;
1955
}
1956
1957
static int read_compressed_page(struct zram *zram, struct page *page, u32 index)
1958
{
1959
struct zcomp_strm *zstrm;
1960
unsigned long handle;
1961
unsigned int size;
1962
void *src, *dst;
1963
int ret, prio;
1964
1965
handle = zram_get_handle(zram, index);
1966
size = zram_get_obj_size(zram, index);
1967
prio = zram_get_priority(zram, index);
1968
1969
zstrm = zcomp_stream_get(zram->comps[prio]);
1970
src = zs_obj_read_begin(zram->mem_pool, handle, zstrm->local_copy);
1971
dst = kmap_local_page(page);
1972
ret = zcomp_decompress(zram->comps[prio], zstrm, src, size, dst);
1973
kunmap_local(dst);
1974
zs_obj_read_end(zram->mem_pool, handle, src);
1975
zcomp_stream_put(zstrm);
1976
1977
return ret;
1978
}
1979
1980
/*
1981
* Reads (decompresses if needed) a page from zspool (zsmalloc).
1982
* Corresponding ZRAM slot should be locked.
1983
*/
1984
static int zram_read_from_zspool(struct zram *zram, struct page *page,
1985
u32 index)
1986
{
1987
if (zram_test_flag(zram, index, ZRAM_SAME) ||
1988
!zram_get_handle(zram, index))
1989
return read_same_filled_page(zram, page, index);
1990
1991
if (!zram_test_flag(zram, index, ZRAM_HUGE))
1992
return read_compressed_page(zram, page, index);
1993
else
1994
return read_incompressible_page(zram, page, index);
1995
}
1996
1997
static int zram_read_page(struct zram *zram, struct page *page, u32 index,
1998
struct bio *parent)
1999
{
2000
int ret;
2001
2002
zram_slot_lock(zram, index);
2003
if (!zram_test_flag(zram, index, ZRAM_WB)) {
2004
/* Slot should be locked through out the function call */
2005
ret = zram_read_from_zspool(zram, page, index);
2006
zram_slot_unlock(zram, index);
2007
} else {
2008
unsigned long blk_idx = zram_get_handle(zram, index);
2009
2010
/*
2011
* The slot should be unlocked before reading from the backing
2012
* device.
2013
*/
2014
zram_slot_unlock(zram, index);
2015
ret = read_from_bdev(zram, page, blk_idx, parent);
2016
}
2017
2018
/* Should NEVER happen. Return bio error if it does. */
2019
if (WARN_ON(ret < 0))
2020
pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
2021
2022
return ret;
2023
}
2024
2025
/*
2026
* Use a temporary buffer to decompress the page, as the decompressor
2027
* always expects a full page for the output.
2028
*/
2029
static int zram_bvec_read_partial(struct zram *zram, struct bio_vec *bvec,
2030
u32 index, int offset)
2031
{
2032
struct page *page = alloc_page(GFP_NOIO);
2033
int ret;
2034
2035
if (!page)
2036
return -ENOMEM;
2037
ret = zram_read_page(zram, page, index, NULL);
2038
if (likely(!ret))
2039
memcpy_to_bvec(bvec, page_address(page) + offset);
2040
__free_page(page);
2041
return ret;
2042
}
2043
2044
static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
2045
u32 index, int offset, struct bio *bio)
2046
{
2047
if (is_partial_io(bvec))
2048
return zram_bvec_read_partial(zram, bvec, index, offset);
2049
return zram_read_page(zram, bvec->bv_page, index, bio);
2050
}
2051
2052
static int write_same_filled_page(struct zram *zram, unsigned long fill,
2053
u32 index)
2054
{
2055
zram_slot_lock(zram, index);
2056
zram_free_page(zram, index);
2057
zram_set_flag(zram, index, ZRAM_SAME);
2058
zram_set_handle(zram, index, fill);
2059
zram_slot_unlock(zram, index);
2060
2061
atomic64_inc(&zram->stats.same_pages);
2062
atomic64_inc(&zram->stats.pages_stored);
2063
2064
return 0;
2065
}
2066
2067
static int write_incompressible_page(struct zram *zram, struct page *page,
2068
u32 index)
2069
{
2070
unsigned long handle;
2071
void *src;
2072
2073
/*
2074
* This function is called from preemptible context so we don't need
2075
* to do optimistic and fallback to pessimistic handle allocation,
2076
* like we do for compressible pages.
2077
*/
2078
handle = zs_malloc(zram->mem_pool, PAGE_SIZE,
2079
GFP_NOIO | __GFP_NOWARN |
2080
__GFP_HIGHMEM | __GFP_MOVABLE, page_to_nid(page));
2081
if (IS_ERR_VALUE(handle))
2082
return PTR_ERR((void *)handle);
2083
2084
if (!zram_can_store_page(zram)) {
2085
zs_free(zram->mem_pool, handle);
2086
return -ENOMEM;
2087
}
2088
2089
src = kmap_local_page(page);
2090
zs_obj_write(zram->mem_pool, handle, src, PAGE_SIZE);
2091
kunmap_local(src);
2092
2093
zram_slot_lock(zram, index);
2094
zram_free_page(zram, index);
2095
zram_set_flag(zram, index, ZRAM_HUGE);
2096
zram_set_handle(zram, index, handle);
2097
zram_set_obj_size(zram, index, PAGE_SIZE);
2098
zram_slot_unlock(zram, index);
2099
2100
atomic64_add(PAGE_SIZE, &zram->stats.compr_data_size);
2101
atomic64_inc(&zram->stats.huge_pages);
2102
atomic64_inc(&zram->stats.huge_pages_since);
2103
atomic64_inc(&zram->stats.pages_stored);
2104
2105
return 0;
2106
}
2107
2108
static int zram_write_page(struct zram *zram, struct page *page, u32 index)
2109
{
2110
int ret = 0;
2111
unsigned long handle;
2112
unsigned int comp_len;
2113
void *mem;
2114
struct zcomp_strm *zstrm;
2115
unsigned long element;
2116
bool same_filled;
2117
2118
mem = kmap_local_page(page);
2119
same_filled = page_same_filled(mem, &element);
2120
kunmap_local(mem);
2121
if (same_filled)
2122
return write_same_filled_page(zram, element, index);
2123
2124
zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]);
2125
mem = kmap_local_page(page);
2126
ret = zcomp_compress(zram->comps[ZRAM_PRIMARY_COMP], zstrm,
2127
mem, &comp_len);
2128
kunmap_local(mem);
2129
2130
if (unlikely(ret)) {
2131
zcomp_stream_put(zstrm);
2132
pr_err("Compression failed! err=%d\n", ret);
2133
return ret;
2134
}
2135
2136
if (comp_len >= huge_class_size) {
2137
zcomp_stream_put(zstrm);
2138
return write_incompressible_page(zram, page, index);
2139
}
2140
2141
handle = zs_malloc(zram->mem_pool, comp_len,
2142
GFP_NOIO | __GFP_NOWARN |
2143
__GFP_HIGHMEM | __GFP_MOVABLE, page_to_nid(page));
2144
if (IS_ERR_VALUE(handle)) {
2145
zcomp_stream_put(zstrm);
2146
return PTR_ERR((void *)handle);
2147
}
2148
2149
if (!zram_can_store_page(zram)) {
2150
zcomp_stream_put(zstrm);
2151
zs_free(zram->mem_pool, handle);
2152
return -ENOMEM;
2153
}
2154
2155
zs_obj_write(zram->mem_pool, handle, zstrm->buffer, comp_len);
2156
zcomp_stream_put(zstrm);
2157
2158
zram_slot_lock(zram, index);
2159
zram_free_page(zram, index);
2160
zram_set_handle(zram, index, handle);
2161
zram_set_obj_size(zram, index, comp_len);
2162
zram_slot_unlock(zram, index);
2163
2164
/* Update stats */
2165
atomic64_inc(&zram->stats.pages_stored);
2166
atomic64_add(comp_len, &zram->stats.compr_data_size);
2167
2168
return ret;
2169
}
2170
2171
/*
2172
* This is a partial IO. Read the full page before writing the changes.
2173
*/
2174
static int zram_bvec_write_partial(struct zram *zram, struct bio_vec *bvec,
2175
u32 index, int offset, struct bio *bio)
2176
{
2177
struct page *page = alloc_page(GFP_NOIO);
2178
int ret;
2179
2180
if (!page)
2181
return -ENOMEM;
2182
2183
ret = zram_read_page(zram, page, index, bio);
2184
if (!ret) {
2185
memcpy_from_bvec(page_address(page) + offset, bvec);
2186
ret = zram_write_page(zram, page, index);
2187
}
2188
__free_page(page);
2189
return ret;
2190
}
2191
2192
static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
2193
u32 index, int offset, struct bio *bio)
2194
{
2195
if (is_partial_io(bvec))
2196
return zram_bvec_write_partial(zram, bvec, index, offset, bio);
2197
return zram_write_page(zram, bvec->bv_page, index);
2198
}
2199
2200
#ifdef CONFIG_ZRAM_MULTI_COMP
2201
#define RECOMPRESS_IDLE (1 << 0)
2202
#define RECOMPRESS_HUGE (1 << 1)
2203
2204
static int scan_slots_for_recompress(struct zram *zram, u32 mode, u32 prio_max,
2205
struct zram_pp_ctl *ctl)
2206
{
2207
unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
2208
unsigned long index;
2209
2210
for (index = 0; index < nr_pages; index++) {
2211
bool ok = true;
2212
2213
zram_slot_lock(zram, index);
2214
if (!zram_allocated(zram, index))
2215
goto next;
2216
2217
if (mode & RECOMPRESS_IDLE &&
2218
!zram_test_flag(zram, index, ZRAM_IDLE))
2219
goto next;
2220
2221
if (mode & RECOMPRESS_HUGE &&
2222
!zram_test_flag(zram, index, ZRAM_HUGE))
2223
goto next;
2224
2225
if (zram_test_flag(zram, index, ZRAM_WB) ||
2226
zram_test_flag(zram, index, ZRAM_SAME) ||
2227
zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE))
2228
goto next;
2229
2230
/* Already compressed with same of higher priority */
2231
if (zram_get_priority(zram, index) + 1 >= prio_max)
2232
goto next;
2233
2234
ok = place_pp_slot(zram, ctl, index);
2235
next:
2236
zram_slot_unlock(zram, index);
2237
if (!ok)
2238
break;
2239
}
2240
2241
return 0;
2242
}
2243
2244
/*
2245
* This function will decompress (unless it's ZRAM_HUGE) the page and then
2246
* attempt to compress it using provided compression algorithm priority
2247
* (which is potentially more effective).
2248
*
2249
* Corresponding ZRAM slot should be locked.
2250
*/
2251
static int recompress_slot(struct zram *zram, u32 index, struct page *page,
2252
u64 *num_recomp_pages, u32 threshold, u32 prio,
2253
u32 prio_max)
2254
{
2255
struct zcomp_strm *zstrm = NULL;
2256
unsigned long handle_old;
2257
unsigned long handle_new;
2258
unsigned int comp_len_old;
2259
unsigned int comp_len_new;
2260
unsigned int class_index_old;
2261
unsigned int class_index_new;
2262
void *src;
2263
int ret = 0;
2264
2265
handle_old = zram_get_handle(zram, index);
2266
if (!handle_old)
2267
return -EINVAL;
2268
2269
comp_len_old = zram_get_obj_size(zram, index);
2270
/*
2271
* Do not recompress objects that are already "small enough".
2272
*/
2273
if (comp_len_old < threshold)
2274
return 0;
2275
2276
ret = zram_read_from_zspool(zram, page, index);
2277
if (ret)
2278
return ret;
2279
2280
/*
2281
* We touched this entry so mark it as non-IDLE. This makes sure that
2282
* we don't preserve IDLE flag and don't incorrectly pick this entry
2283
* for different post-processing type (e.g. writeback).
2284
*/
2285
zram_clear_flag(zram, index, ZRAM_IDLE);
2286
2287
class_index_old = zs_lookup_class_index(zram->mem_pool, comp_len_old);
2288
2289
prio = max(prio, zram_get_priority(zram, index) + 1);
2290
/*
2291
* Recompression slots scan should not select slots that are
2292
* already compressed with a higher priority algorithm, but
2293
* just in case
2294
*/
2295
if (prio >= prio_max)
2296
return 0;
2297
2298
/*
2299
* Iterate the secondary comp algorithms list (in order of priority)
2300
* and try to recompress the page.
2301
*/
2302
for (; prio < prio_max; prio++) {
2303
if (!zram->comps[prio])
2304
continue;
2305
2306
zstrm = zcomp_stream_get(zram->comps[prio]);
2307
src = kmap_local_page(page);
2308
ret = zcomp_compress(zram->comps[prio], zstrm,
2309
src, &comp_len_new);
2310
kunmap_local(src);
2311
2312
if (ret) {
2313
zcomp_stream_put(zstrm);
2314
zstrm = NULL;
2315
break;
2316
}
2317
2318
class_index_new = zs_lookup_class_index(zram->mem_pool,
2319
comp_len_new);
2320
2321
/* Continue until we make progress */
2322
if (class_index_new >= class_index_old ||
2323
(threshold && comp_len_new >= threshold)) {
2324
zcomp_stream_put(zstrm);
2325
zstrm = NULL;
2326
continue;
2327
}
2328
2329
/* Recompression was successful so break out */
2330
break;
2331
}
2332
2333
/*
2334
* Decrement the limit (if set) on pages we can recompress, even
2335
* when current recompression was unsuccessful or did not compress
2336
* the page below the threshold, because we still spent resources
2337
* on it.
2338
*/
2339
if (*num_recomp_pages)
2340
*num_recomp_pages -= 1;
2341
2342
/* Compression error */
2343
if (ret)
2344
return ret;
2345
2346
if (!zstrm) {
2347
/*
2348
* Secondary algorithms failed to re-compress the page
2349
* in a way that would save memory.
2350
*
2351
* Mark the object incompressible if the max-priority
2352
* algorithm couldn't re-compress it.
2353
*/
2354
if (prio < zram->num_active_comps)
2355
return 0;
2356
zram_set_flag(zram, index, ZRAM_INCOMPRESSIBLE);
2357
return 0;
2358
}
2359
2360
/*
2361
* We are holding per-CPU stream mutex and entry lock so better
2362
* avoid direct reclaim. Allocation error is not fatal since
2363
* we still have the old object in the mem_pool.
2364
*
2365
* XXX: technically, the node we really want here is the node that holds
2366
* the original compressed data. But that would require us to modify
2367
* zsmalloc API to return this information. For now, we will make do with
2368
* the node of the page allocated for recompression.
2369
*/
2370
handle_new = zs_malloc(zram->mem_pool, comp_len_new,
2371
GFP_NOIO | __GFP_NOWARN |
2372
__GFP_HIGHMEM | __GFP_MOVABLE, page_to_nid(page));
2373
if (IS_ERR_VALUE(handle_new)) {
2374
zcomp_stream_put(zstrm);
2375
return PTR_ERR((void *)handle_new);
2376
}
2377
2378
zs_obj_write(zram->mem_pool, handle_new, zstrm->buffer, comp_len_new);
2379
zcomp_stream_put(zstrm);
2380
2381
zram_free_page(zram, index);
2382
zram_set_handle(zram, index, handle_new);
2383
zram_set_obj_size(zram, index, comp_len_new);
2384
zram_set_priority(zram, index, prio);
2385
2386
atomic64_add(comp_len_new, &zram->stats.compr_data_size);
2387
atomic64_inc(&zram->stats.pages_stored);
2388
2389
return 0;
2390
}
2391
2392
static ssize_t recompress_store(struct device *dev,
2393
struct device_attribute *attr,
2394
const char *buf, size_t len)
2395
{
2396
struct zram *zram = dev_to_zram(dev);
2397
char *args, *param, *val, *algo = NULL;
2398
u64 num_recomp_pages = ULLONG_MAX;
2399
struct zram_pp_ctl *ctl = NULL;
2400
struct zram_pp_slot *pps;
2401
u32 mode = 0, threshold = 0;
2402
u32 prio, prio_max;
2403
struct page *page = NULL;
2404
ssize_t ret;
2405
2406
prio = ZRAM_SECONDARY_COMP;
2407
prio_max = zram->num_active_comps;
2408
2409
args = skip_spaces(buf);
2410
while (*args) {
2411
args = next_arg(args, &param, &val);
2412
2413
if (!val || !*val)
2414
return -EINVAL;
2415
2416
if (!strcmp(param, "type")) {
2417
if (!strcmp(val, "idle"))
2418
mode = RECOMPRESS_IDLE;
2419
if (!strcmp(val, "huge"))
2420
mode = RECOMPRESS_HUGE;
2421
if (!strcmp(val, "huge_idle"))
2422
mode = RECOMPRESS_IDLE | RECOMPRESS_HUGE;
2423
continue;
2424
}
2425
2426
if (!strcmp(param, "max_pages")) {
2427
/*
2428
* Limit the number of entries (pages) we attempt to
2429
* recompress.
2430
*/
2431
ret = kstrtoull(val, 10, &num_recomp_pages);
2432
if (ret)
2433
return ret;
2434
continue;
2435
}
2436
2437
if (!strcmp(param, "threshold")) {
2438
/*
2439
* We will re-compress only idle objects equal or
2440
* greater in size than watermark.
2441
*/
2442
ret = kstrtouint(val, 10, &threshold);
2443
if (ret)
2444
return ret;
2445
continue;
2446
}
2447
2448
if (!strcmp(param, "algo")) {
2449
algo = val;
2450
continue;
2451
}
2452
2453
if (!strcmp(param, "priority")) {
2454
ret = kstrtouint(val, 10, &prio);
2455
if (ret)
2456
return ret;
2457
2458
if (prio == ZRAM_PRIMARY_COMP)
2459
prio = ZRAM_SECONDARY_COMP;
2460
2461
prio_max = prio + 1;
2462
continue;
2463
}
2464
}
2465
2466
if (threshold >= huge_class_size)
2467
return -EINVAL;
2468
2469
down_read(&zram->init_lock);
2470
if (!init_done(zram)) {
2471
ret = -EINVAL;
2472
goto release_init_lock;
2473
}
2474
2475
/* Do not permit concurrent post-processing actions. */
2476
if (atomic_xchg(&zram->pp_in_progress, 1)) {
2477
up_read(&zram->init_lock);
2478
return -EAGAIN;
2479
}
2480
2481
if (algo) {
2482
bool found = false;
2483
2484
for (; prio < ZRAM_MAX_COMPS; prio++) {
2485
if (!zram->comp_algs[prio])
2486
continue;
2487
2488
if (!strcmp(zram->comp_algs[prio], algo)) {
2489
prio_max = prio + 1;
2490
found = true;
2491
break;
2492
}
2493
}
2494
2495
if (!found) {
2496
ret = -EINVAL;
2497
goto release_init_lock;
2498
}
2499
}
2500
2501
prio_max = min(prio_max, (u32)zram->num_active_comps);
2502
if (prio >= prio_max) {
2503
ret = -EINVAL;
2504
goto release_init_lock;
2505
}
2506
2507
page = alloc_page(GFP_KERNEL);
2508
if (!page) {
2509
ret = -ENOMEM;
2510
goto release_init_lock;
2511
}
2512
2513
ctl = init_pp_ctl();
2514
if (!ctl) {
2515
ret = -ENOMEM;
2516
goto release_init_lock;
2517
}
2518
2519
scan_slots_for_recompress(zram, mode, prio_max, ctl);
2520
2521
ret = len;
2522
while ((pps = select_pp_slot(ctl))) {
2523
int err = 0;
2524
2525
if (!num_recomp_pages)
2526
break;
2527
2528
zram_slot_lock(zram, pps->index);
2529
if (!zram_test_flag(zram, pps->index, ZRAM_PP_SLOT))
2530
goto next;
2531
2532
err = recompress_slot(zram, pps->index, page,
2533
&num_recomp_pages, threshold,
2534
prio, prio_max);
2535
next:
2536
zram_slot_unlock(zram, pps->index);
2537
release_pp_slot(zram, pps);
2538
2539
if (err) {
2540
ret = err;
2541
break;
2542
}
2543
2544
cond_resched();
2545
}
2546
2547
release_init_lock:
2548
if (page)
2549
__free_page(page);
2550
release_pp_ctl(zram, ctl);
2551
atomic_set(&zram->pp_in_progress, 0);
2552
up_read(&zram->init_lock);
2553
return ret;
2554
}
2555
#endif
2556
2557
static void zram_bio_discard(struct zram *zram, struct bio *bio)
2558
{
2559
size_t n = bio->bi_iter.bi_size;
2560
u32 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
2561
u32 offset = (bio->bi_iter.bi_sector & (SECTORS_PER_PAGE - 1)) <<
2562
SECTOR_SHIFT;
2563
2564
/*
2565
* zram manages data in physical block size units. Because logical block
2566
* size isn't identical with physical block size on some arch, we
2567
* could get a discard request pointing to a specific offset within a
2568
* certain physical block. Although we can handle this request by
2569
* reading that physiclal block and decompressing and partially zeroing
2570
* and re-compressing and then re-storing it, this isn't reasonable
2571
* because our intent with a discard request is to save memory. So
2572
* skipping this logical block is appropriate here.
2573
*/
2574
if (offset) {
2575
if (n <= (PAGE_SIZE - offset))
2576
return;
2577
2578
n -= (PAGE_SIZE - offset);
2579
index++;
2580
}
2581
2582
while (n >= PAGE_SIZE) {
2583
zram_slot_lock(zram, index);
2584
zram_free_page(zram, index);
2585
zram_slot_unlock(zram, index);
2586
atomic64_inc(&zram->stats.notify_free);
2587
index++;
2588
n -= PAGE_SIZE;
2589
}
2590
2591
bio_endio(bio);
2592
}
2593
2594
static void zram_bio_read(struct zram *zram, struct bio *bio)
2595
{
2596
unsigned long start_time = bio_start_io_acct(bio);
2597
struct bvec_iter iter = bio->bi_iter;
2598
2599
do {
2600
u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
2601
u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) <<
2602
SECTOR_SHIFT;
2603
struct bio_vec bv = bio_iter_iovec(bio, iter);
2604
2605
bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset);
2606
2607
if (zram_bvec_read(zram, &bv, index, offset, bio) < 0) {
2608
atomic64_inc(&zram->stats.failed_reads);
2609
bio->bi_status = BLK_STS_IOERR;
2610
break;
2611
}
2612
flush_dcache_page(bv.bv_page);
2613
2614
zram_slot_lock(zram, index);
2615
zram_accessed(zram, index);
2616
zram_slot_unlock(zram, index);
2617
2618
bio_advance_iter_single(bio, &iter, bv.bv_len);
2619
} while (iter.bi_size);
2620
2621
bio_end_io_acct(bio, start_time);
2622
bio_endio(bio);
2623
}
2624
2625
static void zram_bio_write(struct zram *zram, struct bio *bio)
2626
{
2627
unsigned long start_time = bio_start_io_acct(bio);
2628
struct bvec_iter iter = bio->bi_iter;
2629
2630
do {
2631
u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
2632
u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) <<
2633
SECTOR_SHIFT;
2634
struct bio_vec bv = bio_iter_iovec(bio, iter);
2635
2636
bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset);
2637
2638
if (zram_bvec_write(zram, &bv, index, offset, bio) < 0) {
2639
atomic64_inc(&zram->stats.failed_writes);
2640
bio->bi_status = BLK_STS_IOERR;
2641
break;
2642
}
2643
2644
zram_slot_lock(zram, index);
2645
zram_accessed(zram, index);
2646
zram_slot_unlock(zram, index);
2647
2648
bio_advance_iter_single(bio, &iter, bv.bv_len);
2649
} while (iter.bi_size);
2650
2651
bio_end_io_acct(bio, start_time);
2652
bio_endio(bio);
2653
}
2654
2655
/*
2656
* Handler function for all zram I/O requests.
2657
*/
2658
static void zram_submit_bio(struct bio *bio)
2659
{
2660
struct zram *zram = bio->bi_bdev->bd_disk->private_data;
2661
2662
switch (bio_op(bio)) {
2663
case REQ_OP_READ:
2664
zram_bio_read(zram, bio);
2665
break;
2666
case REQ_OP_WRITE:
2667
zram_bio_write(zram, bio);
2668
break;
2669
case REQ_OP_DISCARD:
2670
case REQ_OP_WRITE_ZEROES:
2671
zram_bio_discard(zram, bio);
2672
break;
2673
default:
2674
WARN_ON_ONCE(1);
2675
bio_endio(bio);
2676
}
2677
}
2678
2679
static void zram_slot_free_notify(struct block_device *bdev,
2680
unsigned long index)
2681
{
2682
struct zram *zram;
2683
2684
zram = bdev->bd_disk->private_data;
2685
2686
atomic64_inc(&zram->stats.notify_free);
2687
if (!zram_slot_trylock(zram, index)) {
2688
atomic64_inc(&zram->stats.miss_free);
2689
return;
2690
}
2691
2692
zram_free_page(zram, index);
2693
zram_slot_unlock(zram, index);
2694
}
2695
2696
static void zram_comp_params_reset(struct zram *zram)
2697
{
2698
u32 prio;
2699
2700
for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
2701
comp_params_reset(zram, prio);
2702
}
2703
}
2704
2705
static void zram_destroy_comps(struct zram *zram)
2706
{
2707
u32 prio;
2708
2709
for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
2710
struct zcomp *comp = zram->comps[prio];
2711
2712
zram->comps[prio] = NULL;
2713
if (!comp)
2714
continue;
2715
zcomp_destroy(comp);
2716
zram->num_active_comps--;
2717
}
2718
2719
for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
2720
/* Do not free statically defined compression algorithms */
2721
if (zram->comp_algs[prio] != default_compressor)
2722
kfree(zram->comp_algs[prio]);
2723
zram->comp_algs[prio] = NULL;
2724
}
2725
2726
zram_comp_params_reset(zram);
2727
}
2728
2729
static void zram_reset_device(struct zram *zram)
2730
{
2731
down_write(&zram->init_lock);
2732
2733
zram->limit_pages = 0;
2734
2735
set_capacity_and_notify(zram->disk, 0);
2736
part_stat_set_all(zram->disk->part0, 0);
2737
2738
/* I/O operation under all of CPU are done so let's free */
2739
zram_meta_free(zram, zram->disksize);
2740
zram->disksize = 0;
2741
zram_destroy_comps(zram);
2742
memset(&zram->stats, 0, sizeof(zram->stats));
2743
atomic_set(&zram->pp_in_progress, 0);
2744
reset_bdev(zram);
2745
2746
comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor);
2747
up_write(&zram->init_lock);
2748
}
2749
2750
static ssize_t disksize_store(struct device *dev,
2751
struct device_attribute *attr, const char *buf, size_t len)
2752
{
2753
u64 disksize;
2754
struct zcomp *comp;
2755
struct zram *zram = dev_to_zram(dev);
2756
int err;
2757
u32 prio;
2758
2759
disksize = memparse(buf, NULL);
2760
if (!disksize)
2761
return -EINVAL;
2762
2763
down_write(&zram->init_lock);
2764
if (init_done(zram)) {
2765
pr_info("Cannot change disksize for initialized device\n");
2766
err = -EBUSY;
2767
goto out_unlock;
2768
}
2769
2770
disksize = PAGE_ALIGN(disksize);
2771
if (!zram_meta_alloc(zram, disksize)) {
2772
err = -ENOMEM;
2773
goto out_unlock;
2774
}
2775
2776
for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
2777
if (!zram->comp_algs[prio])
2778
continue;
2779
2780
comp = zcomp_create(zram->comp_algs[prio],
2781
&zram->params[prio]);
2782
if (IS_ERR(comp)) {
2783
pr_err("Cannot initialise %s compressing backend\n",
2784
zram->comp_algs[prio]);
2785
err = PTR_ERR(comp);
2786
goto out_free_comps;
2787
}
2788
2789
zram->comps[prio] = comp;
2790
zram->num_active_comps++;
2791
}
2792
zram->disksize = disksize;
2793
set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT);
2794
up_write(&zram->init_lock);
2795
2796
return len;
2797
2798
out_free_comps:
2799
zram_destroy_comps(zram);
2800
zram_meta_free(zram, disksize);
2801
out_unlock:
2802
up_write(&zram->init_lock);
2803
return err;
2804
}
2805
2806
static ssize_t reset_store(struct device *dev,
2807
struct device_attribute *attr, const char *buf, size_t len)
2808
{
2809
int ret;
2810
unsigned short do_reset;
2811
struct zram *zram;
2812
struct gendisk *disk;
2813
2814
ret = kstrtou16(buf, 10, &do_reset);
2815
if (ret)
2816
return ret;
2817
2818
if (!do_reset)
2819
return -EINVAL;
2820
2821
zram = dev_to_zram(dev);
2822
disk = zram->disk;
2823
2824
mutex_lock(&disk->open_mutex);
2825
/* Do not reset an active device or claimed device */
2826
if (disk_openers(disk) || zram->claim) {
2827
mutex_unlock(&disk->open_mutex);
2828
return -EBUSY;
2829
}
2830
2831
/* From now on, anyone can't open /dev/zram[0-9] */
2832
zram->claim = true;
2833
mutex_unlock(&disk->open_mutex);
2834
2835
/* Make sure all the pending I/O are finished */
2836
sync_blockdev(disk->part0);
2837
zram_reset_device(zram);
2838
2839
mutex_lock(&disk->open_mutex);
2840
zram->claim = false;
2841
mutex_unlock(&disk->open_mutex);
2842
2843
return len;
2844
}
2845
2846
static int zram_open(struct gendisk *disk, blk_mode_t mode)
2847
{
2848
struct zram *zram = disk->private_data;
2849
2850
WARN_ON(!mutex_is_locked(&disk->open_mutex));
2851
2852
/* zram was claimed to reset so open request fails */
2853
if (zram->claim)
2854
return -EBUSY;
2855
return 0;
2856
}
2857
2858
static const struct block_device_operations zram_devops = {
2859
.open = zram_open,
2860
.submit_bio = zram_submit_bio,
2861
.swap_slot_free_notify = zram_slot_free_notify,
2862
.owner = THIS_MODULE
2863
};
2864
2865
static DEVICE_ATTR_WO(compact);
2866
static DEVICE_ATTR_RW(disksize);
2867
static DEVICE_ATTR_RO(initstate);
2868
static DEVICE_ATTR_WO(reset);
2869
static DEVICE_ATTR_WO(mem_limit);
2870
static DEVICE_ATTR_WO(mem_used_max);
2871
static DEVICE_ATTR_WO(idle);
2872
static DEVICE_ATTR_RW(comp_algorithm);
2873
#ifdef CONFIG_ZRAM_WRITEBACK
2874
static DEVICE_ATTR_RW(backing_dev);
2875
static DEVICE_ATTR_WO(writeback);
2876
static DEVICE_ATTR_RW(writeback_limit);
2877
static DEVICE_ATTR_RW(writeback_limit_enable);
2878
static DEVICE_ATTR_RW(writeback_batch_size);
2879
#endif
2880
#ifdef CONFIG_ZRAM_MULTI_COMP
2881
static DEVICE_ATTR_RW(recomp_algorithm);
2882
static DEVICE_ATTR_WO(recompress);
2883
#endif
2884
static DEVICE_ATTR_WO(algorithm_params);
2885
2886
static struct attribute *zram_disk_attrs[] = {
2887
&dev_attr_disksize.attr,
2888
&dev_attr_initstate.attr,
2889
&dev_attr_reset.attr,
2890
&dev_attr_compact.attr,
2891
&dev_attr_mem_limit.attr,
2892
&dev_attr_mem_used_max.attr,
2893
&dev_attr_idle.attr,
2894
&dev_attr_comp_algorithm.attr,
2895
#ifdef CONFIG_ZRAM_WRITEBACK
2896
&dev_attr_backing_dev.attr,
2897
&dev_attr_writeback.attr,
2898
&dev_attr_writeback_limit.attr,
2899
&dev_attr_writeback_limit_enable.attr,
2900
&dev_attr_writeback_batch_size.attr,
2901
#endif
2902
&dev_attr_io_stat.attr,
2903
&dev_attr_mm_stat.attr,
2904
#ifdef CONFIG_ZRAM_WRITEBACK
2905
&dev_attr_bd_stat.attr,
2906
#endif
2907
&dev_attr_debug_stat.attr,
2908
#ifdef CONFIG_ZRAM_MULTI_COMP
2909
&dev_attr_recomp_algorithm.attr,
2910
&dev_attr_recompress.attr,
2911
#endif
2912
&dev_attr_algorithm_params.attr,
2913
NULL,
2914
};
2915
2916
ATTRIBUTE_GROUPS(zram_disk);
2917
2918
/*
2919
* Allocate and initialize new zram device. the function returns
2920
* '>= 0' device_id upon success, and negative value otherwise.
2921
*/
2922
static int zram_add(void)
2923
{
2924
struct queue_limits lim = {
2925
.logical_block_size = ZRAM_LOGICAL_BLOCK_SIZE,
2926
/*
2927
* To ensure that we always get PAGE_SIZE aligned and
2928
* n*PAGE_SIZED sized I/O requests.
2929
*/
2930
.physical_block_size = PAGE_SIZE,
2931
.io_min = PAGE_SIZE,
2932
.io_opt = PAGE_SIZE,
2933
.max_hw_discard_sectors = UINT_MAX,
2934
/*
2935
* zram_bio_discard() will clear all logical blocks if logical
2936
* block size is identical with physical block size(PAGE_SIZE).
2937
* But if it is different, we will skip discarding some parts of
2938
* logical blocks in the part of the request range which isn't
2939
* aligned to physical block size. So we can't ensure that all
2940
* discarded logical blocks are zeroed.
2941
*/
2942
#if ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE
2943
.max_write_zeroes_sectors = UINT_MAX,
2944
#endif
2945
.features = BLK_FEAT_STABLE_WRITES |
2946
BLK_FEAT_SYNCHRONOUS,
2947
};
2948
struct zram *zram;
2949
int ret, device_id;
2950
2951
zram = kzalloc(sizeof(struct zram), GFP_KERNEL);
2952
if (!zram)
2953
return -ENOMEM;
2954
2955
ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL);
2956
if (ret < 0)
2957
goto out_free_dev;
2958
device_id = ret;
2959
2960
init_rwsem(&zram->init_lock);
2961
#ifdef CONFIG_ZRAM_WRITEBACK
2962
zram->wb_batch_size = 32;
2963
#endif
2964
2965
/* gendisk structure */
2966
zram->disk = blk_alloc_disk(&lim, NUMA_NO_NODE);
2967
if (IS_ERR(zram->disk)) {
2968
pr_err("Error allocating disk structure for device %d\n",
2969
device_id);
2970
ret = PTR_ERR(zram->disk);
2971
goto out_free_idr;
2972
}
2973
2974
zram->disk->major = zram_major;
2975
zram->disk->first_minor = device_id;
2976
zram->disk->minors = 1;
2977
zram->disk->flags |= GENHD_FL_NO_PART;
2978
zram->disk->fops = &zram_devops;
2979
zram->disk->private_data = zram;
2980
snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
2981
atomic_set(&zram->pp_in_progress, 0);
2982
zram_comp_params_reset(zram);
2983
comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor);
2984
2985
/* Actual capacity set using sysfs (/sys/block/zram<id>/disksize */
2986
set_capacity(zram->disk, 0);
2987
ret = device_add_disk(NULL, zram->disk, zram_disk_groups);
2988
if (ret)
2989
goto out_cleanup_disk;
2990
2991
zram_debugfs_register(zram);
2992
pr_info("Added device: %s\n", zram->disk->disk_name);
2993
return device_id;
2994
2995
out_cleanup_disk:
2996
put_disk(zram->disk);
2997
out_free_idr:
2998
idr_remove(&zram_index_idr, device_id);
2999
out_free_dev:
3000
kfree(zram);
3001
return ret;
3002
}
3003
3004
static int zram_remove(struct zram *zram)
3005
{
3006
bool claimed;
3007
3008
mutex_lock(&zram->disk->open_mutex);
3009
if (disk_openers(zram->disk)) {
3010
mutex_unlock(&zram->disk->open_mutex);
3011
return -EBUSY;
3012
}
3013
3014
claimed = zram->claim;
3015
if (!claimed)
3016
zram->claim = true;
3017
mutex_unlock(&zram->disk->open_mutex);
3018
3019
zram_debugfs_unregister(zram);
3020
3021
if (claimed) {
3022
/*
3023
* If we were claimed by reset_store(), del_gendisk() will
3024
* wait until reset_store() is done, so nothing need to do.
3025
*/
3026
;
3027
} else {
3028
/* Make sure all the pending I/O are finished */
3029
sync_blockdev(zram->disk->part0);
3030
zram_reset_device(zram);
3031
}
3032
3033
pr_info("Removed device: %s\n", zram->disk->disk_name);
3034
3035
del_gendisk(zram->disk);
3036
3037
/* del_gendisk drains pending reset_store */
3038
WARN_ON_ONCE(claimed && zram->claim);
3039
3040
/*
3041
* disksize_store() may be called in between zram_reset_device()
3042
* and del_gendisk(), so run the last reset to avoid leaking
3043
* anything allocated with disksize_store()
3044
*/
3045
zram_reset_device(zram);
3046
3047
put_disk(zram->disk);
3048
kfree(zram);
3049
return 0;
3050
}
3051
3052
/* zram-control sysfs attributes */
3053
3054
/*
3055
* NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a
3056
* sense that reading from this file does alter the state of your system -- it
3057
* creates a new un-initialized zram device and returns back this device's
3058
* device_id (or an error code if it fails to create a new device).
3059
*/
3060
static ssize_t hot_add_show(const struct class *class,
3061
const struct class_attribute *attr,
3062
char *buf)
3063
{
3064
int ret;
3065
3066
mutex_lock(&zram_index_mutex);
3067
ret = zram_add();
3068
mutex_unlock(&zram_index_mutex);
3069
3070
if (ret < 0)
3071
return ret;
3072
return sysfs_emit(buf, "%d\n", ret);
3073
}
3074
/* This attribute must be set to 0400, so CLASS_ATTR_RO() can not be used */
3075
static struct class_attribute class_attr_hot_add =
3076
__ATTR(hot_add, 0400, hot_add_show, NULL);
3077
3078
static ssize_t hot_remove_store(const struct class *class,
3079
const struct class_attribute *attr,
3080
const char *buf,
3081
size_t count)
3082
{
3083
struct zram *zram;
3084
int ret, dev_id;
3085
3086
/* dev_id is gendisk->first_minor, which is `int' */
3087
ret = kstrtoint(buf, 10, &dev_id);
3088
if (ret)
3089
return ret;
3090
if (dev_id < 0)
3091
return -EINVAL;
3092
3093
mutex_lock(&zram_index_mutex);
3094
3095
zram = idr_find(&zram_index_idr, dev_id);
3096
if (zram) {
3097
ret = zram_remove(zram);
3098
if (!ret)
3099
idr_remove(&zram_index_idr, dev_id);
3100
} else {
3101
ret = -ENODEV;
3102
}
3103
3104
mutex_unlock(&zram_index_mutex);
3105
return ret ? ret : count;
3106
}
3107
static CLASS_ATTR_WO(hot_remove);
3108
3109
static struct attribute *zram_control_class_attrs[] = {
3110
&class_attr_hot_add.attr,
3111
&class_attr_hot_remove.attr,
3112
NULL,
3113
};
3114
ATTRIBUTE_GROUPS(zram_control_class);
3115
3116
static struct class zram_control_class = {
3117
.name = "zram-control",
3118
.class_groups = zram_control_class_groups,
3119
};
3120
3121
static int zram_remove_cb(int id, void *ptr, void *data)
3122
{
3123
WARN_ON_ONCE(zram_remove(ptr));
3124
return 0;
3125
}
3126
3127
static void destroy_devices(void)
3128
{
3129
class_unregister(&zram_control_class);
3130
idr_for_each(&zram_index_idr, &zram_remove_cb, NULL);
3131
zram_debugfs_destroy();
3132
idr_destroy(&zram_index_idr);
3133
unregister_blkdev(zram_major, "zram");
3134
cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
3135
}
3136
3137
static int __init zram_init(void)
3138
{
3139
struct zram_table_entry zram_te;
3140
int ret;
3141
3142
BUILD_BUG_ON(__NR_ZRAM_PAGEFLAGS > sizeof(zram_te.flags) * 8);
3143
3144
ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare",
3145
zcomp_cpu_up_prepare, zcomp_cpu_dead);
3146
if (ret < 0)
3147
return ret;
3148
3149
ret = class_register(&zram_control_class);
3150
if (ret) {
3151
pr_err("Unable to register zram-control class\n");
3152
cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
3153
return ret;
3154
}
3155
3156
zram_debugfs_create();
3157
zram_major = register_blkdev(0, "zram");
3158
if (zram_major <= 0) {
3159
pr_err("Unable to get major number\n");
3160
class_unregister(&zram_control_class);
3161
cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
3162
return -EBUSY;
3163
}
3164
3165
while (num_devices != 0) {
3166
mutex_lock(&zram_index_mutex);
3167
ret = zram_add();
3168
mutex_unlock(&zram_index_mutex);
3169
if (ret < 0)
3170
goto out_error;
3171
num_devices--;
3172
}
3173
3174
return 0;
3175
3176
out_error:
3177
destroy_devices();
3178
return ret;
3179
}
3180
3181
static void __exit zram_exit(void)
3182
{
3183
destroy_devices();
3184
}
3185
3186
module_init(zram_init);
3187
module_exit(zram_exit);
3188
3189
module_param(num_devices, uint, 0);
3190
MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices");
3191
3192
MODULE_LICENSE("Dual BSD/GPL");
3193
MODULE_AUTHOR("Nitin Gupta <[email protected]>");
3194
MODULE_DESCRIPTION("Compressed RAM Block Device");
3195
3196