Path: blob/master/tools/testing/selftests/cgroup/test_kmem.c
26285 views
// SPDX-License-Identifier: GPL-2.01#define _GNU_SOURCE23#include <linux/limits.h>4#include <fcntl.h>5#include <stdio.h>6#include <stdlib.h>7#include <string.h>8#include <sys/stat.h>9#include <sys/types.h>10#include <unistd.h>11#include <sys/wait.h>12#include <errno.h>13#include <sys/sysinfo.h>14#include <pthread.h>1516#include "../kselftest.h"17#include "cgroup_util.h"181920/*21* Memory cgroup charging is performed using percpu batches 64 pages22* big (look at MEMCG_CHARGE_BATCH), whereas memory.stat is exact. So23* the maximum discrepancy between charge and vmstat entries is number24* of cpus multiplied by 64 pages.25*/26#define MAX_VMSTAT_ERROR (4096 * 64 * get_nprocs())272829static int alloc_dcache(const char *cgroup, void *arg)30{31unsigned long i;32struct stat st;33char buf[128];3435for (i = 0; i < (unsigned long)arg; i++) {36snprintf(buf, sizeof(buf),37"/something-non-existent-with-a-long-name-%64lu-%d",38i, getpid());39stat(buf, &st);40}4142return 0;43}4445/*46* This test allocates 100000 of negative dentries with long names.47* Then it checks that "slab" in memory.stat is larger than 1M.48* Then it sets memory.high to 1M and checks that at least 1/249* of slab memory has been reclaimed.50*/51static int test_kmem_basic(const char *root)52{53int ret = KSFT_FAIL;54char *cg = NULL;55long slab0, slab1, current;5657cg = cg_name(root, "kmem_basic_test");58if (!cg)59goto cleanup;6061if (cg_create(cg))62goto cleanup;6364if (cg_run(cg, alloc_dcache, (void *)100000))65goto cleanup;6667slab0 = cg_read_key_long(cg, "memory.stat", "slab ");68if (slab0 < (1 << 20))69goto cleanup;7071cg_write(cg, "memory.high", "1M");7273/* wait for RCU freeing */74sleep(1);7576slab1 = cg_read_key_long(cg, "memory.stat", "slab ");77if (slab1 < 0)78goto cleanup;7980current = cg_read_long(cg, "memory.current");81if (current < 0)82goto cleanup;8384if (slab1 < slab0 / 2 && current < slab0 / 2)85ret = KSFT_PASS;86cleanup:87cg_destroy(cg);88free(cg);8990return ret;91}9293static void *alloc_kmem_fn(void *arg)94{95alloc_dcache(NULL, (void *)100);96return NULL;97}9899static int alloc_kmem_smp(const char *cgroup, void *arg)100{101int nr_threads = 2 * get_nprocs();102pthread_t *tinfo;103unsigned long i;104int ret = -1;105106tinfo = calloc(nr_threads, sizeof(pthread_t));107if (tinfo == NULL)108return -1;109110for (i = 0; i < nr_threads; i++) {111if (pthread_create(&tinfo[i], NULL, &alloc_kmem_fn,112(void *)i)) {113free(tinfo);114return -1;115}116}117118for (i = 0; i < nr_threads; i++) {119ret = pthread_join(tinfo[i], NULL);120if (ret)121break;122}123124free(tinfo);125return ret;126}127128static int cg_run_in_subcgroups(const char *parent,129int (*fn)(const char *cgroup, void *arg),130void *arg, int times)131{132char *child;133int i;134135for (i = 0; i < times; i++) {136child = cg_name_indexed(parent, "child", i);137if (!child)138return -1;139140if (cg_create(child)) {141cg_destroy(child);142free(child);143return -1;144}145146if (cg_run(child, fn, NULL)) {147cg_destroy(child);148free(child);149return -1;150}151152cg_destroy(child);153free(child);154}155156return 0;157}158159/*160* The test creates and destroys a large number of cgroups. In each cgroup it161* allocates some slab memory (mostly negative dentries) using 2 * NR_CPUS162* threads. Then it checks the sanity of numbers on the parent level:163* the total size of the cgroups should be roughly equal to164* anon + file + kernel + sock.165*/166static int test_kmem_memcg_deletion(const char *root)167{168long current, anon, file, kernel, sock, sum;169int ret = KSFT_FAIL;170char *parent;171172parent = cg_name(root, "kmem_memcg_deletion_test");173if (!parent)174goto cleanup;175176if (cg_create(parent))177goto cleanup;178179if (cg_write(parent, "cgroup.subtree_control", "+memory"))180goto cleanup;181182if (cg_run_in_subcgroups(parent, alloc_kmem_smp, NULL, 100))183goto cleanup;184185current = cg_read_long(parent, "memory.current");186anon = cg_read_key_long(parent, "memory.stat", "anon ");187file = cg_read_key_long(parent, "memory.stat", "file ");188kernel = cg_read_key_long(parent, "memory.stat", "kernel ");189sock = cg_read_key_long(parent, "memory.stat", "sock ");190if (current < 0 || anon < 0 || file < 0 || kernel < 0 || sock < 0)191goto cleanup;192193sum = anon + file + kernel + sock;194if (labs(sum - current) < MAX_VMSTAT_ERROR) {195ret = KSFT_PASS;196} else {197printf("memory.current = %ld\n", current);198printf("anon + file + kernel + sock = %ld\n", sum);199printf("anon = %ld\n", anon);200printf("file = %ld\n", file);201printf("kernel = %ld\n", kernel);202printf("sock = %ld\n", sock);203}204205cleanup:206cg_destroy(parent);207free(parent);208209return ret;210}211212/*213* The test reads the entire /proc/kpagecgroup. If the operation went214* successfully (and the kernel didn't panic), the test is treated as passed.215*/216static int test_kmem_proc_kpagecgroup(const char *root)217{218unsigned long buf[128];219int ret = KSFT_FAIL;220ssize_t len;221int fd;222223fd = open("/proc/kpagecgroup", O_RDONLY);224if (fd < 0)225return ret;226227do {228len = read(fd, buf, sizeof(buf));229} while (len > 0);230231if (len == 0)232ret = KSFT_PASS;233234close(fd);235return ret;236}237238static void *pthread_wait_fn(void *arg)239{240sleep(100);241return NULL;242}243244static int spawn_1000_threads(const char *cgroup, void *arg)245{246int nr_threads = 1000;247pthread_t *tinfo;248unsigned long i;249long stack;250int ret = -1;251252tinfo = calloc(nr_threads, sizeof(pthread_t));253if (tinfo == NULL)254return -1;255256for (i = 0; i < nr_threads; i++) {257if (pthread_create(&tinfo[i], NULL, &pthread_wait_fn,258(void *)i)) {259free(tinfo);260return(-1);261}262}263264stack = cg_read_key_long(cgroup, "memory.stat", "kernel_stack ");265if (stack >= 4096 * 1000)266ret = 0;267268free(tinfo);269return ret;270}271272/*273* The test spawns a process, which spawns 1000 threads. Then it checks274* that memory.stat's kernel_stack is at least 1000 pages large.275*/276static int test_kmem_kernel_stacks(const char *root)277{278int ret = KSFT_FAIL;279char *cg = NULL;280281cg = cg_name(root, "kmem_kernel_stacks_test");282if (!cg)283goto cleanup;284285if (cg_create(cg))286goto cleanup;287288if (cg_run(cg, spawn_1000_threads, NULL))289goto cleanup;290291ret = KSFT_PASS;292cleanup:293cg_destroy(cg);294free(cg);295296return ret;297}298299/*300* This test sequentionally creates 30 child cgroups, allocates some301* kernel memory in each of them, and deletes them. Then it checks302* that the number of dying cgroups on the parent level is 0.303*/304static int test_kmem_dead_cgroups(const char *root)305{306int ret = KSFT_FAIL;307char *parent;308long dead;309int i;310int max_time = 20;311312parent = cg_name(root, "kmem_dead_cgroups_test");313if (!parent)314goto cleanup;315316if (cg_create(parent))317goto cleanup;318319if (cg_write(parent, "cgroup.subtree_control", "+memory"))320goto cleanup;321322if (cg_run_in_subcgroups(parent, alloc_dcache, (void *)100, 30))323goto cleanup;324325for (i = 0; i < max_time; i++) {326dead = cg_read_key_long(parent, "cgroup.stat",327"nr_dying_descendants ");328if (dead == 0) {329ret = KSFT_PASS;330break;331}332/*333* Reclaiming cgroups might take some time,334* let's wait a bit and repeat.335*/336sleep(1);337if (i > 5)338printf("Waiting time longer than 5s; wait: %ds (dead: %ld)\n", i, dead);339}340341cleanup:342cg_destroy(parent);343free(parent);344345return ret;346}347348/*349* This test creates a sub-tree with 1000 memory cgroups.350* Then it checks that the memory.current on the parent level351* is greater than 0 and approximates matches the percpu value352* from memory.stat.353*/354static int test_percpu_basic(const char *root)355{356int ret = KSFT_FAIL;357char *parent, *child;358long current, percpu;359int i;360361parent = cg_name(root, "percpu_basic_test");362if (!parent)363goto cleanup;364365if (cg_create(parent))366goto cleanup;367368if (cg_write(parent, "cgroup.subtree_control", "+memory"))369goto cleanup;370371for (i = 0; i < 1000; i++) {372child = cg_name_indexed(parent, "child", i);373if (!child)374return -1;375376if (cg_create(child))377goto cleanup_children;378379free(child);380}381382current = cg_read_long(parent, "memory.current");383percpu = cg_read_key_long(parent, "memory.stat", "percpu ");384385if (current > 0 && percpu > 0 && labs(current - percpu) <386MAX_VMSTAT_ERROR)387ret = KSFT_PASS;388else389printf("memory.current %ld\npercpu %ld\n",390current, percpu);391392cleanup_children:393for (i = 0; i < 1000; i++) {394child = cg_name_indexed(parent, "child", i);395cg_destroy(child);396free(child);397}398399cleanup:400cg_destroy(parent);401free(parent);402403return ret;404}405406#define T(x) { x, #x }407struct kmem_test {408int (*fn)(const char *root);409const char *name;410} tests[] = {411T(test_kmem_basic),412T(test_kmem_memcg_deletion),413T(test_kmem_proc_kpagecgroup),414T(test_kmem_kernel_stacks),415T(test_kmem_dead_cgroups),416T(test_percpu_basic),417};418#undef T419420int main(int argc, char **argv)421{422char root[PATH_MAX];423int i, ret = EXIT_SUCCESS;424425if (cg_find_unified_root(root, sizeof(root), NULL))426ksft_exit_skip("cgroup v2 isn't mounted\n");427428/*429* Check that memory controller is available:430* memory is listed in cgroup.controllers431*/432if (cg_read_strstr(root, "cgroup.controllers", "memory"))433ksft_exit_skip("memory controller isn't available\n");434435if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))436if (cg_write(root, "cgroup.subtree_control", "+memory"))437ksft_exit_skip("Failed to set memory controller\n");438439for (i = 0; i < ARRAY_SIZE(tests); i++) {440switch (tests[i].fn(root)) {441case KSFT_PASS:442ksft_test_result_pass("%s\n", tests[i].name);443break;444case KSFT_SKIP:445ksft_test_result_skip("%s\n", tests[i].name);446break;447default:448ret = EXIT_FAILURE;449ksft_test_result_fail("%s\n", tests[i].name);450break;451}452}453454return ret;455}456457458