Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/tools/testing/selftests/cgroup/test_zswap.c
26285 views
1
// SPDX-License-Identifier: GPL-2.0
2
#define _GNU_SOURCE
3
4
#include <linux/limits.h>
5
#include <unistd.h>
6
#include <stdio.h>
7
#include <signal.h>
8
#include <sys/sysinfo.h>
9
#include <string.h>
10
#include <sys/wait.h>
11
#include <sys/mman.h>
12
13
#include "../kselftest.h"
14
#include "cgroup_util.h"
15
16
static int read_int(const char *path, size_t *value)
17
{
18
FILE *file;
19
int ret = 0;
20
21
file = fopen(path, "r");
22
if (!file)
23
return -1;
24
if (fscanf(file, "%ld", value) != 1)
25
ret = -1;
26
fclose(file);
27
return ret;
28
}
29
30
static int set_min_free_kb(size_t value)
31
{
32
FILE *file;
33
int ret;
34
35
file = fopen("/proc/sys/vm/min_free_kbytes", "w");
36
if (!file)
37
return -1;
38
ret = fprintf(file, "%ld\n", value);
39
fclose(file);
40
return ret;
41
}
42
43
static int read_min_free_kb(size_t *value)
44
{
45
return read_int("/proc/sys/vm/min_free_kbytes", value);
46
}
47
48
static int get_zswap_stored_pages(size_t *value)
49
{
50
return read_int("/sys/kernel/debug/zswap/stored_pages", value);
51
}
52
53
static long get_cg_wb_count(const char *cg)
54
{
55
return cg_read_key_long(cg, "memory.stat", "zswpwb");
56
}
57
58
static long get_zswpout(const char *cgroup)
59
{
60
return cg_read_key_long(cgroup, "memory.stat", "zswpout ");
61
}
62
63
static int allocate_and_read_bytes(const char *cgroup, void *arg)
64
{
65
size_t size = (size_t)arg;
66
char *mem = (char *)malloc(size);
67
int ret = 0;
68
69
if (!mem)
70
return -1;
71
for (int i = 0; i < size; i += 4095)
72
mem[i] = 'a';
73
74
/* Go through the allocated memory to (z)swap in and out pages */
75
for (int i = 0; i < size; i += 4095) {
76
if (mem[i] != 'a')
77
ret = -1;
78
}
79
80
free(mem);
81
return ret;
82
}
83
84
static int allocate_bytes(const char *cgroup, void *arg)
85
{
86
size_t size = (size_t)arg;
87
char *mem = (char *)malloc(size);
88
89
if (!mem)
90
return -1;
91
for (int i = 0; i < size; i += 4095)
92
mem[i] = 'a';
93
free(mem);
94
return 0;
95
}
96
97
static char *setup_test_group_1M(const char *root, const char *name)
98
{
99
char *group_name = cg_name(root, name);
100
101
if (!group_name)
102
return NULL;
103
if (cg_create(group_name))
104
goto fail;
105
if (cg_write(group_name, "memory.max", "1M")) {
106
cg_destroy(group_name);
107
goto fail;
108
}
109
return group_name;
110
fail:
111
free(group_name);
112
return NULL;
113
}
114
115
/*
116
* Sanity test to check that pages are written into zswap.
117
*/
118
static int test_zswap_usage(const char *root)
119
{
120
long zswpout_before, zswpout_after;
121
int ret = KSFT_FAIL;
122
char *test_group;
123
124
test_group = cg_name(root, "no_shrink_test");
125
if (!test_group)
126
goto out;
127
if (cg_create(test_group))
128
goto out;
129
if (cg_write(test_group, "memory.max", "1M"))
130
goto out;
131
132
zswpout_before = get_zswpout(test_group);
133
if (zswpout_before < 0) {
134
ksft_print_msg("Failed to get zswpout\n");
135
goto out;
136
}
137
138
/* Allocate more than memory.max to push memory into zswap */
139
if (cg_run(test_group, allocate_bytes, (void *)MB(4)))
140
goto out;
141
142
/* Verify that pages come into zswap */
143
zswpout_after = get_zswpout(test_group);
144
if (zswpout_after <= zswpout_before) {
145
ksft_print_msg("zswpout does not increase after test program\n");
146
goto out;
147
}
148
ret = KSFT_PASS;
149
150
out:
151
cg_destroy(test_group);
152
free(test_group);
153
return ret;
154
}
155
156
/*
157
* Check that when memory.zswap.max = 0, no pages can go to the zswap pool for
158
* the cgroup.
159
*/
160
static int test_swapin_nozswap(const char *root)
161
{
162
int ret = KSFT_FAIL;
163
char *test_group;
164
long swap_peak, zswpout;
165
166
test_group = cg_name(root, "no_zswap_test");
167
if (!test_group)
168
goto out;
169
if (cg_create(test_group))
170
goto out;
171
if (cg_write(test_group, "memory.max", "8M"))
172
goto out;
173
if (cg_write(test_group, "memory.zswap.max", "0"))
174
goto out;
175
176
/* Allocate and read more than memory.max to trigger swapin */
177
if (cg_run(test_group, allocate_and_read_bytes, (void *)MB(32)))
178
goto out;
179
180
/* Verify that pages are swapped out, but no zswap happened */
181
swap_peak = cg_read_long(test_group, "memory.swap.peak");
182
if (swap_peak < 0) {
183
ksft_print_msg("failed to get cgroup's swap_peak\n");
184
goto out;
185
}
186
187
if (swap_peak < MB(24)) {
188
ksft_print_msg("at least 24MB of memory should be swapped out\n");
189
goto out;
190
}
191
192
zswpout = get_zswpout(test_group);
193
if (zswpout < 0) {
194
ksft_print_msg("failed to get zswpout\n");
195
goto out;
196
}
197
198
if (zswpout > 0) {
199
ksft_print_msg("zswapout > 0 when memory.zswap.max = 0\n");
200
goto out;
201
}
202
203
ret = KSFT_PASS;
204
205
out:
206
cg_destroy(test_group);
207
free(test_group);
208
return ret;
209
}
210
211
/* Simple test to verify the (z)swapin code paths */
212
static int test_zswapin(const char *root)
213
{
214
int ret = KSFT_FAIL;
215
char *test_group;
216
long zswpin;
217
218
test_group = cg_name(root, "zswapin_test");
219
if (!test_group)
220
goto out;
221
if (cg_create(test_group))
222
goto out;
223
if (cg_write(test_group, "memory.max", "8M"))
224
goto out;
225
if (cg_write(test_group, "memory.zswap.max", "max"))
226
goto out;
227
228
/* Allocate and read more than memory.max to trigger (z)swap in */
229
if (cg_run(test_group, allocate_and_read_bytes, (void *)MB(32)))
230
goto out;
231
232
zswpin = cg_read_key_long(test_group, "memory.stat", "zswpin ");
233
if (zswpin < 0) {
234
ksft_print_msg("failed to get zswpin\n");
235
goto out;
236
}
237
238
if (zswpin < MB(24) / PAGE_SIZE) {
239
ksft_print_msg("at least 24MB should be brought back from zswap\n");
240
goto out;
241
}
242
243
ret = KSFT_PASS;
244
245
out:
246
cg_destroy(test_group);
247
free(test_group);
248
return ret;
249
}
250
251
/*
252
* Attempt writeback with the following steps:
253
* 1. Allocate memory.
254
* 2. Reclaim memory equal to the amount that was allocated in step 1.
255
This will move it into zswap.
256
* 3. Save current zswap usage.
257
* 4. Move the memory allocated in step 1 back in from zswap.
258
* 5. Set zswap.max to half the amount that was recorded in step 3.
259
* 6. Attempt to reclaim memory equal to the amount that was allocated,
260
this will either trigger writeback if it's enabled, or reclamation
261
will fail if writeback is disabled as there isn't enough zswap space.
262
*/
263
static int attempt_writeback(const char *cgroup, void *arg)
264
{
265
long pagesize = sysconf(_SC_PAGESIZE);
266
size_t memsize = MB(4);
267
char buf[pagesize];
268
long zswap_usage;
269
bool wb_enabled = *(bool *) arg;
270
int ret = -1;
271
char *mem;
272
273
mem = (char *)malloc(memsize);
274
if (!mem)
275
return ret;
276
277
/*
278
* Fill half of each page with increasing data, and keep other
279
* half empty, this will result in data that is still compressible
280
* and ends up in zswap, with material zswap usage.
281
*/
282
for (int i = 0; i < pagesize; i++)
283
buf[i] = i < pagesize/2 ? (char) i : 0;
284
285
for (int i = 0; i < memsize; i += pagesize)
286
memcpy(&mem[i], buf, pagesize);
287
288
/* Try and reclaim allocated memory */
289
if (cg_write_numeric(cgroup, "memory.reclaim", memsize)) {
290
ksft_print_msg("Failed to reclaim all of the requested memory\n");
291
goto out;
292
}
293
294
zswap_usage = cg_read_long(cgroup, "memory.zswap.current");
295
296
/* zswpin */
297
for (int i = 0; i < memsize; i += pagesize) {
298
if (memcmp(&mem[i], buf, pagesize)) {
299
ksft_print_msg("invalid memory\n");
300
goto out;
301
}
302
}
303
304
if (cg_write_numeric(cgroup, "memory.zswap.max", zswap_usage/2))
305
goto out;
306
307
/*
308
* If writeback is enabled, trying to reclaim memory now will trigger a
309
* writeback as zswap.max is half of what was needed when reclaim ran the first time.
310
* If writeback is disabled, memory reclaim will fail as zswap is limited and
311
* it can't writeback to swap.
312
*/
313
ret = cg_write_numeric(cgroup, "memory.reclaim", memsize);
314
if (!wb_enabled)
315
ret = (ret == -EAGAIN) ? 0 : -1;
316
317
out:
318
free(mem);
319
return ret;
320
}
321
322
static int test_zswap_writeback_one(const char *cgroup, bool wb)
323
{
324
long zswpwb_before, zswpwb_after;
325
326
zswpwb_before = get_cg_wb_count(cgroup);
327
if (zswpwb_before != 0) {
328
ksft_print_msg("zswpwb_before = %ld instead of 0\n", zswpwb_before);
329
return -1;
330
}
331
332
if (cg_run(cgroup, attempt_writeback, (void *) &wb))
333
return -1;
334
335
/* Verify that zswap writeback occurred only if writeback was enabled */
336
zswpwb_after = get_cg_wb_count(cgroup);
337
if (zswpwb_after < 0)
338
return -1;
339
340
if (wb != !!zswpwb_after) {
341
ksft_print_msg("zswpwb_after is %ld while wb is %s\n",
342
zswpwb_after, wb ? "enabled" : "disabled");
343
return -1;
344
}
345
346
return 0;
347
}
348
349
/* Test to verify the zswap writeback path */
350
static int test_zswap_writeback(const char *root, bool wb)
351
{
352
int ret = KSFT_FAIL;
353
char *test_group, *test_group_child = NULL;
354
355
if (cg_read_strcmp(root, "memory.zswap.writeback", "1"))
356
return KSFT_SKIP;
357
358
test_group = cg_name(root, "zswap_writeback_test");
359
if (!test_group)
360
goto out;
361
if (cg_create(test_group))
362
goto out;
363
if (cg_write(test_group, "memory.zswap.writeback", wb ? "1" : "0"))
364
goto out;
365
366
if (test_zswap_writeback_one(test_group, wb))
367
goto out;
368
369
/* Reset memory.zswap.max to max (modified by attempt_writeback), and
370
* set up child cgroup, whose memory.zswap.writeback is hardcoded to 1.
371
* Thus, the parent's setting shall be what's in effect. */
372
if (cg_write(test_group, "memory.zswap.max", "max"))
373
goto out;
374
if (cg_write(test_group, "cgroup.subtree_control", "+memory"))
375
goto out;
376
377
test_group_child = cg_name(test_group, "zswap_writeback_test_child");
378
if (!test_group_child)
379
goto out;
380
if (cg_create(test_group_child))
381
goto out;
382
if (cg_write(test_group_child, "memory.zswap.writeback", "1"))
383
goto out;
384
385
if (test_zswap_writeback_one(test_group_child, wb))
386
goto out;
387
388
ret = KSFT_PASS;
389
390
out:
391
if (test_group_child) {
392
cg_destroy(test_group_child);
393
free(test_group_child);
394
}
395
cg_destroy(test_group);
396
free(test_group);
397
return ret;
398
}
399
400
static int test_zswap_writeback_enabled(const char *root)
401
{
402
return test_zswap_writeback(root, true);
403
}
404
405
static int test_zswap_writeback_disabled(const char *root)
406
{
407
return test_zswap_writeback(root, false);
408
}
409
410
/*
411
* When trying to store a memcg page in zswap, if the memcg hits its memory
412
* limit in zswap, writeback should affect only the zswapped pages of that
413
* memcg.
414
*/
415
static int test_no_invasive_cgroup_shrink(const char *root)
416
{
417
int ret = KSFT_FAIL;
418
size_t control_allocation_size = MB(10);
419
char *control_allocation = NULL, *wb_group = NULL, *control_group = NULL;
420
421
wb_group = setup_test_group_1M(root, "per_memcg_wb_test1");
422
if (!wb_group)
423
return KSFT_FAIL;
424
if (cg_write(wb_group, "memory.zswap.max", "10K"))
425
goto out;
426
control_group = setup_test_group_1M(root, "per_memcg_wb_test2");
427
if (!control_group)
428
goto out;
429
430
/* Push some test_group2 memory into zswap */
431
if (cg_enter_current(control_group))
432
goto out;
433
control_allocation = malloc(control_allocation_size);
434
for (int i = 0; i < control_allocation_size; i += 4095)
435
control_allocation[i] = 'a';
436
if (cg_read_key_long(control_group, "memory.stat", "zswapped") < 1)
437
goto out;
438
439
/* Allocate 10x memory.max to push wb_group memory into zswap and trigger wb */
440
if (cg_run(wb_group, allocate_bytes, (void *)MB(10)))
441
goto out;
442
443
/* Verify that only zswapped memory from gwb_group has been written back */
444
if (get_cg_wb_count(wb_group) > 0 && get_cg_wb_count(control_group) == 0)
445
ret = KSFT_PASS;
446
out:
447
cg_enter_current(root);
448
if (control_group) {
449
cg_destroy(control_group);
450
free(control_group);
451
}
452
cg_destroy(wb_group);
453
free(wb_group);
454
if (control_allocation)
455
free(control_allocation);
456
return ret;
457
}
458
459
struct no_kmem_bypass_child_args {
460
size_t target_alloc_bytes;
461
size_t child_allocated;
462
};
463
464
static int no_kmem_bypass_child(const char *cgroup, void *arg)
465
{
466
struct no_kmem_bypass_child_args *values = arg;
467
void *allocation;
468
469
allocation = malloc(values->target_alloc_bytes);
470
if (!allocation) {
471
values->child_allocated = true;
472
return -1;
473
}
474
for (long i = 0; i < values->target_alloc_bytes; i += 4095)
475
((char *)allocation)[i] = 'a';
476
values->child_allocated = true;
477
pause();
478
free(allocation);
479
return 0;
480
}
481
482
/*
483
* When pages owned by a memcg are pushed to zswap by kswapd, they should be
484
* charged to that cgroup. This wasn't the case before commit
485
* cd08d80ecdac("mm: correctly charge compressed memory to its memcg").
486
*
487
* The test first allocates memory in a memcg, then raises min_free_kbytes to
488
* a very high value so that the allocation falls below low wm, then makes
489
* another allocation to trigger kswapd that should push the memcg-owned pages
490
* to zswap and verifies that the zswap pages are correctly charged.
491
*
492
* To be run on a VM with at most 4G of memory.
493
*/
494
static int test_no_kmem_bypass(const char *root)
495
{
496
size_t min_free_kb_high, min_free_kb_low, min_free_kb_original;
497
struct no_kmem_bypass_child_args *values;
498
size_t trigger_allocation_size;
499
int wait_child_iteration = 0;
500
long stored_pages_threshold;
501
struct sysinfo sys_info;
502
int ret = KSFT_FAIL;
503
int child_status;
504
char *test_group = NULL;
505
pid_t child_pid;
506
507
/* Read sys info and compute test values accordingly */
508
if (sysinfo(&sys_info) != 0)
509
return KSFT_FAIL;
510
if (sys_info.totalram > 5000000000)
511
return KSFT_SKIP;
512
values = mmap(0, sizeof(struct no_kmem_bypass_child_args), PROT_READ |
513
PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
514
if (values == MAP_FAILED)
515
return KSFT_FAIL;
516
if (read_min_free_kb(&min_free_kb_original))
517
return KSFT_FAIL;
518
min_free_kb_high = sys_info.totalram / 2000;
519
min_free_kb_low = sys_info.totalram / 500000;
520
values->target_alloc_bytes = (sys_info.totalram - min_free_kb_high * 1000) +
521
sys_info.totalram * 5 / 100;
522
stored_pages_threshold = sys_info.totalram / 5 / 4096;
523
trigger_allocation_size = sys_info.totalram / 20;
524
525
/* Set up test memcg */
526
test_group = cg_name(root, "kmem_bypass_test");
527
if (!test_group)
528
goto out;
529
530
/* Spawn memcg child and wait for it to allocate */
531
set_min_free_kb(min_free_kb_low);
532
if (cg_create(test_group))
533
goto out;
534
values->child_allocated = false;
535
child_pid = cg_run_nowait(test_group, no_kmem_bypass_child, values);
536
if (child_pid < 0)
537
goto out;
538
while (!values->child_allocated && wait_child_iteration++ < 10000)
539
usleep(1000);
540
541
/* Try to wakeup kswapd and let it push child memory to zswap */
542
set_min_free_kb(min_free_kb_high);
543
for (int i = 0; i < 20; i++) {
544
size_t stored_pages;
545
char *trigger_allocation = malloc(trigger_allocation_size);
546
547
if (!trigger_allocation)
548
break;
549
for (int i = 0; i < trigger_allocation_size; i += 4095)
550
trigger_allocation[i] = 'b';
551
usleep(100000);
552
free(trigger_allocation);
553
if (get_zswap_stored_pages(&stored_pages))
554
break;
555
if (stored_pages < 0)
556
break;
557
/* If memory was pushed to zswap, verify it belongs to memcg */
558
if (stored_pages > stored_pages_threshold) {
559
int zswapped = cg_read_key_long(test_group, "memory.stat", "zswapped ");
560
int delta = stored_pages * 4096 - zswapped;
561
int result_ok = delta < stored_pages * 4096 / 4;
562
563
ret = result_ok ? KSFT_PASS : KSFT_FAIL;
564
break;
565
}
566
}
567
568
kill(child_pid, SIGTERM);
569
waitpid(child_pid, &child_status, 0);
570
out:
571
set_min_free_kb(min_free_kb_original);
572
cg_destroy(test_group);
573
free(test_group);
574
return ret;
575
}
576
577
#define T(x) { x, #x }
578
struct zswap_test {
579
int (*fn)(const char *root);
580
const char *name;
581
} tests[] = {
582
T(test_zswap_usage),
583
T(test_swapin_nozswap),
584
T(test_zswapin),
585
T(test_zswap_writeback_enabled),
586
T(test_zswap_writeback_disabled),
587
T(test_no_kmem_bypass),
588
T(test_no_invasive_cgroup_shrink),
589
};
590
#undef T
591
592
static bool zswap_configured(void)
593
{
594
return access("/sys/module/zswap", F_OK) == 0;
595
}
596
597
int main(int argc, char **argv)
598
{
599
char root[PATH_MAX];
600
int i, ret = EXIT_SUCCESS;
601
602
if (cg_find_unified_root(root, sizeof(root), NULL))
603
ksft_exit_skip("cgroup v2 isn't mounted\n");
604
605
if (!zswap_configured())
606
ksft_exit_skip("zswap isn't configured\n");
607
608
/*
609
* Check that memory controller is available:
610
* memory is listed in cgroup.controllers
611
*/
612
if (cg_read_strstr(root, "cgroup.controllers", "memory"))
613
ksft_exit_skip("memory controller isn't available\n");
614
615
if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
616
if (cg_write(root, "cgroup.subtree_control", "+memory"))
617
ksft_exit_skip("Failed to set memory controller\n");
618
619
for (i = 0; i < ARRAY_SIZE(tests); i++) {
620
switch (tests[i].fn(root)) {
621
case KSFT_PASS:
622
ksft_test_result_pass("%s\n", tests[i].name);
623
break;
624
case KSFT_SKIP:
625
ksft_test_result_skip("%s\n", tests[i].name);
626
break;
627
default:
628
ret = EXIT_FAILURE;
629
ksft_test_result_fail("%s\n", tests[i].name);
630
break;
631
}
632
}
633
634
return ret;
635
}
636
637