Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/tools/testing/selftests/cgroup/test_kmem.c
26285 views
1
// SPDX-License-Identifier: GPL-2.0
2
#define _GNU_SOURCE
3
4
#include <linux/limits.h>
5
#include <fcntl.h>
6
#include <stdio.h>
7
#include <stdlib.h>
8
#include <string.h>
9
#include <sys/stat.h>
10
#include <sys/types.h>
11
#include <unistd.h>
12
#include <sys/wait.h>
13
#include <errno.h>
14
#include <sys/sysinfo.h>
15
#include <pthread.h>
16
17
#include "../kselftest.h"
18
#include "cgroup_util.h"
19
20
21
/*
22
* Memory cgroup charging is performed using percpu batches 64 pages
23
* big (look at MEMCG_CHARGE_BATCH), whereas memory.stat is exact. So
24
* the maximum discrepancy between charge and vmstat entries is number
25
* of cpus multiplied by 64 pages.
26
*/
27
#define MAX_VMSTAT_ERROR (4096 * 64 * get_nprocs())
28
29
30
static int alloc_dcache(const char *cgroup, void *arg)
31
{
32
unsigned long i;
33
struct stat st;
34
char buf[128];
35
36
for (i = 0; i < (unsigned long)arg; i++) {
37
snprintf(buf, sizeof(buf),
38
"/something-non-existent-with-a-long-name-%64lu-%d",
39
i, getpid());
40
stat(buf, &st);
41
}
42
43
return 0;
44
}
45
46
/*
47
* This test allocates 100000 of negative dentries with long names.
48
* Then it checks that "slab" in memory.stat is larger than 1M.
49
* Then it sets memory.high to 1M and checks that at least 1/2
50
* of slab memory has been reclaimed.
51
*/
52
static int test_kmem_basic(const char *root)
53
{
54
int ret = KSFT_FAIL;
55
char *cg = NULL;
56
long slab0, slab1, current;
57
58
cg = cg_name(root, "kmem_basic_test");
59
if (!cg)
60
goto cleanup;
61
62
if (cg_create(cg))
63
goto cleanup;
64
65
if (cg_run(cg, alloc_dcache, (void *)100000))
66
goto cleanup;
67
68
slab0 = cg_read_key_long(cg, "memory.stat", "slab ");
69
if (slab0 < (1 << 20))
70
goto cleanup;
71
72
cg_write(cg, "memory.high", "1M");
73
74
/* wait for RCU freeing */
75
sleep(1);
76
77
slab1 = cg_read_key_long(cg, "memory.stat", "slab ");
78
if (slab1 < 0)
79
goto cleanup;
80
81
current = cg_read_long(cg, "memory.current");
82
if (current < 0)
83
goto cleanup;
84
85
if (slab1 < slab0 / 2 && current < slab0 / 2)
86
ret = KSFT_PASS;
87
cleanup:
88
cg_destroy(cg);
89
free(cg);
90
91
return ret;
92
}
93
94
static void *alloc_kmem_fn(void *arg)
95
{
96
alloc_dcache(NULL, (void *)100);
97
return NULL;
98
}
99
100
static int alloc_kmem_smp(const char *cgroup, void *arg)
101
{
102
int nr_threads = 2 * get_nprocs();
103
pthread_t *tinfo;
104
unsigned long i;
105
int ret = -1;
106
107
tinfo = calloc(nr_threads, sizeof(pthread_t));
108
if (tinfo == NULL)
109
return -1;
110
111
for (i = 0; i < nr_threads; i++) {
112
if (pthread_create(&tinfo[i], NULL, &alloc_kmem_fn,
113
(void *)i)) {
114
free(tinfo);
115
return -1;
116
}
117
}
118
119
for (i = 0; i < nr_threads; i++) {
120
ret = pthread_join(tinfo[i], NULL);
121
if (ret)
122
break;
123
}
124
125
free(tinfo);
126
return ret;
127
}
128
129
static int cg_run_in_subcgroups(const char *parent,
130
int (*fn)(const char *cgroup, void *arg),
131
void *arg, int times)
132
{
133
char *child;
134
int i;
135
136
for (i = 0; i < times; i++) {
137
child = cg_name_indexed(parent, "child", i);
138
if (!child)
139
return -1;
140
141
if (cg_create(child)) {
142
cg_destroy(child);
143
free(child);
144
return -1;
145
}
146
147
if (cg_run(child, fn, NULL)) {
148
cg_destroy(child);
149
free(child);
150
return -1;
151
}
152
153
cg_destroy(child);
154
free(child);
155
}
156
157
return 0;
158
}
159
160
/*
161
* The test creates and destroys a large number of cgroups. In each cgroup it
162
* allocates some slab memory (mostly negative dentries) using 2 * NR_CPUS
163
* threads. Then it checks the sanity of numbers on the parent level:
164
* the total size of the cgroups should be roughly equal to
165
* anon + file + kernel + sock.
166
*/
167
static int test_kmem_memcg_deletion(const char *root)
168
{
169
long current, anon, file, kernel, sock, sum;
170
int ret = KSFT_FAIL;
171
char *parent;
172
173
parent = cg_name(root, "kmem_memcg_deletion_test");
174
if (!parent)
175
goto cleanup;
176
177
if (cg_create(parent))
178
goto cleanup;
179
180
if (cg_write(parent, "cgroup.subtree_control", "+memory"))
181
goto cleanup;
182
183
if (cg_run_in_subcgroups(parent, alloc_kmem_smp, NULL, 100))
184
goto cleanup;
185
186
current = cg_read_long(parent, "memory.current");
187
anon = cg_read_key_long(parent, "memory.stat", "anon ");
188
file = cg_read_key_long(parent, "memory.stat", "file ");
189
kernel = cg_read_key_long(parent, "memory.stat", "kernel ");
190
sock = cg_read_key_long(parent, "memory.stat", "sock ");
191
if (current < 0 || anon < 0 || file < 0 || kernel < 0 || sock < 0)
192
goto cleanup;
193
194
sum = anon + file + kernel + sock;
195
if (labs(sum - current) < MAX_VMSTAT_ERROR) {
196
ret = KSFT_PASS;
197
} else {
198
printf("memory.current = %ld\n", current);
199
printf("anon + file + kernel + sock = %ld\n", sum);
200
printf("anon = %ld\n", anon);
201
printf("file = %ld\n", file);
202
printf("kernel = %ld\n", kernel);
203
printf("sock = %ld\n", sock);
204
}
205
206
cleanup:
207
cg_destroy(parent);
208
free(parent);
209
210
return ret;
211
}
212
213
/*
214
* The test reads the entire /proc/kpagecgroup. If the operation went
215
* successfully (and the kernel didn't panic), the test is treated as passed.
216
*/
217
static int test_kmem_proc_kpagecgroup(const char *root)
218
{
219
unsigned long buf[128];
220
int ret = KSFT_FAIL;
221
ssize_t len;
222
int fd;
223
224
fd = open("/proc/kpagecgroup", O_RDONLY);
225
if (fd < 0)
226
return ret;
227
228
do {
229
len = read(fd, buf, sizeof(buf));
230
} while (len > 0);
231
232
if (len == 0)
233
ret = KSFT_PASS;
234
235
close(fd);
236
return ret;
237
}
238
239
static void *pthread_wait_fn(void *arg)
240
{
241
sleep(100);
242
return NULL;
243
}
244
245
static int spawn_1000_threads(const char *cgroup, void *arg)
246
{
247
int nr_threads = 1000;
248
pthread_t *tinfo;
249
unsigned long i;
250
long stack;
251
int ret = -1;
252
253
tinfo = calloc(nr_threads, sizeof(pthread_t));
254
if (tinfo == NULL)
255
return -1;
256
257
for (i = 0; i < nr_threads; i++) {
258
if (pthread_create(&tinfo[i], NULL, &pthread_wait_fn,
259
(void *)i)) {
260
free(tinfo);
261
return(-1);
262
}
263
}
264
265
stack = cg_read_key_long(cgroup, "memory.stat", "kernel_stack ");
266
if (stack >= 4096 * 1000)
267
ret = 0;
268
269
free(tinfo);
270
return ret;
271
}
272
273
/*
274
* The test spawns a process, which spawns 1000 threads. Then it checks
275
* that memory.stat's kernel_stack is at least 1000 pages large.
276
*/
277
static int test_kmem_kernel_stacks(const char *root)
278
{
279
int ret = KSFT_FAIL;
280
char *cg = NULL;
281
282
cg = cg_name(root, "kmem_kernel_stacks_test");
283
if (!cg)
284
goto cleanup;
285
286
if (cg_create(cg))
287
goto cleanup;
288
289
if (cg_run(cg, spawn_1000_threads, NULL))
290
goto cleanup;
291
292
ret = KSFT_PASS;
293
cleanup:
294
cg_destroy(cg);
295
free(cg);
296
297
return ret;
298
}
299
300
/*
301
* This test sequentionally creates 30 child cgroups, allocates some
302
* kernel memory in each of them, and deletes them. Then it checks
303
* that the number of dying cgroups on the parent level is 0.
304
*/
305
static int test_kmem_dead_cgroups(const char *root)
306
{
307
int ret = KSFT_FAIL;
308
char *parent;
309
long dead;
310
int i;
311
int max_time = 20;
312
313
parent = cg_name(root, "kmem_dead_cgroups_test");
314
if (!parent)
315
goto cleanup;
316
317
if (cg_create(parent))
318
goto cleanup;
319
320
if (cg_write(parent, "cgroup.subtree_control", "+memory"))
321
goto cleanup;
322
323
if (cg_run_in_subcgroups(parent, alloc_dcache, (void *)100, 30))
324
goto cleanup;
325
326
for (i = 0; i < max_time; i++) {
327
dead = cg_read_key_long(parent, "cgroup.stat",
328
"nr_dying_descendants ");
329
if (dead == 0) {
330
ret = KSFT_PASS;
331
break;
332
}
333
/*
334
* Reclaiming cgroups might take some time,
335
* let's wait a bit and repeat.
336
*/
337
sleep(1);
338
if (i > 5)
339
printf("Waiting time longer than 5s; wait: %ds (dead: %ld)\n", i, dead);
340
}
341
342
cleanup:
343
cg_destroy(parent);
344
free(parent);
345
346
return ret;
347
}
348
349
/*
350
* This test creates a sub-tree with 1000 memory cgroups.
351
* Then it checks that the memory.current on the parent level
352
* is greater than 0 and approximates matches the percpu value
353
* from memory.stat.
354
*/
355
static int test_percpu_basic(const char *root)
356
{
357
int ret = KSFT_FAIL;
358
char *parent, *child;
359
long current, percpu;
360
int i;
361
362
parent = cg_name(root, "percpu_basic_test");
363
if (!parent)
364
goto cleanup;
365
366
if (cg_create(parent))
367
goto cleanup;
368
369
if (cg_write(parent, "cgroup.subtree_control", "+memory"))
370
goto cleanup;
371
372
for (i = 0; i < 1000; i++) {
373
child = cg_name_indexed(parent, "child", i);
374
if (!child)
375
return -1;
376
377
if (cg_create(child))
378
goto cleanup_children;
379
380
free(child);
381
}
382
383
current = cg_read_long(parent, "memory.current");
384
percpu = cg_read_key_long(parent, "memory.stat", "percpu ");
385
386
if (current > 0 && percpu > 0 && labs(current - percpu) <
387
MAX_VMSTAT_ERROR)
388
ret = KSFT_PASS;
389
else
390
printf("memory.current %ld\npercpu %ld\n",
391
current, percpu);
392
393
cleanup_children:
394
for (i = 0; i < 1000; i++) {
395
child = cg_name_indexed(parent, "child", i);
396
cg_destroy(child);
397
free(child);
398
}
399
400
cleanup:
401
cg_destroy(parent);
402
free(parent);
403
404
return ret;
405
}
406
407
#define T(x) { x, #x }
408
struct kmem_test {
409
int (*fn)(const char *root);
410
const char *name;
411
} tests[] = {
412
T(test_kmem_basic),
413
T(test_kmem_memcg_deletion),
414
T(test_kmem_proc_kpagecgroup),
415
T(test_kmem_kernel_stacks),
416
T(test_kmem_dead_cgroups),
417
T(test_percpu_basic),
418
};
419
#undef T
420
421
int main(int argc, char **argv)
422
{
423
char root[PATH_MAX];
424
int i, ret = EXIT_SUCCESS;
425
426
if (cg_find_unified_root(root, sizeof(root), NULL))
427
ksft_exit_skip("cgroup v2 isn't mounted\n");
428
429
/*
430
* Check that memory controller is available:
431
* memory is listed in cgroup.controllers
432
*/
433
if (cg_read_strstr(root, "cgroup.controllers", "memory"))
434
ksft_exit_skip("memory controller isn't available\n");
435
436
if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
437
if (cg_write(root, "cgroup.subtree_control", "+memory"))
438
ksft_exit_skip("Failed to set memory controller\n");
439
440
for (i = 0; i < ARRAY_SIZE(tests); i++) {
441
switch (tests[i].fn(root)) {
442
case KSFT_PASS:
443
ksft_test_result_pass("%s\n", tests[i].name);
444
break;
445
case KSFT_SKIP:
446
ksft_test_result_skip("%s\n", tests[i].name);
447
break;
448
default:
449
ret = EXIT_FAILURE;
450
ksft_test_result_fail("%s\n", tests[i].name);
451
break;
452
}
453
}
454
455
return ret;
456
}
457
458