Path: blob/master/tools/testing/selftests/bpf/cgroup_helpers.c
26285 views
// SPDX-License-Identifier: GPL-2.01#define _GNU_SOURCE2#include <sched.h>3#include <sys/mount.h>4#include <sys/stat.h>5#include <sys/types.h>6#include <sys/xattr.h>7#include <linux/limits.h>8#include <stdio.h>9#include <stdlib.h>10#include <linux/sched.h>11#include <fcntl.h>12#include <unistd.h>13#include <ftw.h>1415#include "cgroup_helpers.h"16#include "bpf_util.h"1718/*19* To avoid relying on the system setup, when setup_cgroup_env is called20* we create a new mount namespace, and cgroup namespace. The cgroupv221* root is mounted at CGROUP_MOUNT_PATH. Unfortunately, most people don't22* have cgroupv2 enabled at this point in time. It's easier to create our23* own mount namespace and manage it ourselves. We assume /mnt exists.24*25* Related cgroupv1 helpers are named *classid*(), since we only use the26* net_cls controller for tagging net_cls.classid. We assume the default27* mount under /sys/fs/cgroup/net_cls, which should be the case for the28* vast majority of users.29*/3031#define WALK_FD_LIMIT 163233#define CGROUP_MOUNT_PATH "/mnt"34#define CGROUP_MOUNT_DFLT "/sys/fs/cgroup"35#define NETCLS_MOUNT_PATH CGROUP_MOUNT_DFLT "/net_cls"36#define CGROUP_WORK_DIR "/cgroup-test-work-dir"3738#define format_cgroup_path_pid(buf, path, pid) \39snprintf(buf, sizeof(buf), "%s%s%d%s", CGROUP_MOUNT_PATH, \40CGROUP_WORK_DIR, pid, path)4142#define format_cgroup_path(buf, path) \43format_cgroup_path_pid(buf, path, getpid())4445#define format_parent_cgroup_path(buf, path) \46format_cgroup_path_pid(buf, path, getppid())4748#define format_classid_path_pid(buf, pid) \49snprintf(buf, sizeof(buf), "%s%s%d", NETCLS_MOUNT_PATH, \50CGROUP_WORK_DIR, pid)5152#define format_classid_path(buf) \53format_classid_path_pid(buf, getpid())5455static __thread bool cgroup_workdir_mounted;5657static void __cleanup_cgroup_environment(void);5859static int __enable_controllers(const char *cgroup_path, const char *controllers)60{61char path[PATH_MAX + 1];62char enable[PATH_MAX + 1];63char *c, *c2;64int fd, cfd;65ssize_t len;6667/* If not controllers are passed, enable all available controllers */68if (!controllers) {69snprintf(path, sizeof(path), "%s/cgroup.controllers",70cgroup_path);71fd = open(path, O_RDONLY);72if (fd < 0) {73log_err("Opening cgroup.controllers: %s", path);74return 1;75}76len = read(fd, enable, sizeof(enable) - 1);77if (len < 0) {78close(fd);79log_err("Reading cgroup.controllers: %s", path);80return 1;81} else if (len == 0) { /* No controllers to enable */82close(fd);83return 0;84}85enable[len] = 0;86close(fd);87} else {88bpf_strlcpy(enable, controllers, sizeof(enable));89}9091snprintf(path, sizeof(path), "%s/cgroup.subtree_control", cgroup_path);92cfd = open(path, O_RDWR);93if (cfd < 0) {94log_err("Opening cgroup.subtree_control: %s", path);95return 1;96}9798for (c = strtok_r(enable, " ", &c2); c; c = strtok_r(NULL, " ", &c2)) {99if (dprintf(cfd, "+%s\n", c) <= 0) {100log_err("Enabling controller %s: %s", c, path);101close(cfd);102return 1;103}104}105close(cfd);106return 0;107}108109/**110* enable_controllers() - Enable cgroup v2 controllers111* @relative_path: The cgroup path, relative to the workdir112* @controllers: List of controllers to enable in cgroup.controllers format113*114*115* Enable given cgroup v2 controllers, if @controllers is NULL, enable all116* available controllers.117*118* If successful, 0 is returned.119*/120int enable_controllers(const char *relative_path, const char *controllers)121{122char cgroup_path[PATH_MAX + 1];123124format_cgroup_path(cgroup_path, relative_path);125return __enable_controllers(cgroup_path, controllers);126}127128static int __write_cgroup_file(const char *cgroup_path, const char *file,129const char *buf)130{131char file_path[PATH_MAX + 1];132int fd;133134snprintf(file_path, sizeof(file_path), "%s/%s", cgroup_path, file);135fd = open(file_path, O_RDWR);136if (fd < 0) {137log_err("Opening %s", file_path);138return 1;139}140141if (dprintf(fd, "%s", buf) <= 0) {142log_err("Writing to %s", file_path);143close(fd);144return 1;145}146close(fd);147return 0;148}149150/**151* write_cgroup_file() - Write to a cgroup file152* @relative_path: The cgroup path, relative to the workdir153* @file: The name of the file in cgroupfs to write to154* @buf: Buffer to write to the file155*156* Write to a file in the given cgroup's directory.157*158* If successful, 0 is returned.159*/160int write_cgroup_file(const char *relative_path, const char *file,161const char *buf)162{163char cgroup_path[PATH_MAX - 24];164165format_cgroup_path(cgroup_path, relative_path);166return __write_cgroup_file(cgroup_path, file, buf);167}168169/**170* write_cgroup_file_parent() - Write to a cgroup file in the parent process171* workdir172* @relative_path: The cgroup path, relative to the parent process workdir173* @file: The name of the file in cgroupfs to write to174* @buf: Buffer to write to the file175*176* Write to a file in the given cgroup's directory under the parent process177* workdir.178*179* If successful, 0 is returned.180*/181int write_cgroup_file_parent(const char *relative_path, const char *file,182const char *buf)183{184char cgroup_path[PATH_MAX - 24];185186format_parent_cgroup_path(cgroup_path, relative_path);187return __write_cgroup_file(cgroup_path, file, buf);188}189190/**191* setup_cgroup_environment() - Setup the cgroup environment192*193* After calling this function, cleanup_cgroup_environment should be called194* once testing is complete.195*196* This function will print an error to stderr and return 1 if it is unable197* to setup the cgroup environment. If setup is successful, 0 is returned.198*/199int setup_cgroup_environment(void)200{201char cgroup_workdir[PATH_MAX - 24];202203format_cgroup_path(cgroup_workdir, "");204205if (mkdir(CGROUP_MOUNT_PATH, 0777) && errno != EEXIST) {206log_err("mkdir mount");207return 1;208}209210if (unshare(CLONE_NEWNS)) {211log_err("unshare");212return 1;213}214215if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL)) {216log_err("mount fakeroot");217return 1;218}219220if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL) && errno != EBUSY) {221log_err("mount cgroup2");222return 1;223}224cgroup_workdir_mounted = true;225226/* Cleanup existing failed runs, now that the environment is setup */227__cleanup_cgroup_environment();228229if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) {230log_err("mkdir cgroup work dir");231return 1;232}233234/* Enable all available controllers to increase test coverage */235if (__enable_controllers(CGROUP_MOUNT_PATH, NULL) ||236__enable_controllers(cgroup_workdir, NULL))237return 1;238239return 0;240}241242static int nftwfunc(const char *filename, const struct stat *statptr,243int fileflags, struct FTW *pfwt)244{245if ((fileflags & FTW_D) && rmdir(filename))246log_err("Removing cgroup: %s", filename);247return 0;248}249250static int join_cgroup_from_top(const char *cgroup_path)251{252char cgroup_procs_path[PATH_MAX + 1];253pid_t pid = getpid();254int fd, rc = 0;255256snprintf(cgroup_procs_path, sizeof(cgroup_procs_path),257"%s/cgroup.procs", cgroup_path);258259fd = open(cgroup_procs_path, O_WRONLY);260if (fd < 0) {261log_err("Opening Cgroup Procs: %s", cgroup_procs_path);262return 1;263}264265if (dprintf(fd, "%d\n", pid) < 0) {266log_err("Joining Cgroup");267rc = 1;268}269270close(fd);271return rc;272}273274/**275* join_cgroup() - Join a cgroup276* @relative_path: The cgroup path, relative to the workdir, to join277*278* This function expects a cgroup to already be created, relative to the cgroup279* work dir, and it joins it. For example, passing "/my-cgroup" as the path280* would actually put the calling process into the cgroup281* "/cgroup-test-work-dir/my-cgroup"282*283* On success, it returns 0, otherwise on failure it returns 1.284*/285int join_cgroup(const char *relative_path)286{287char cgroup_path[PATH_MAX + 1];288289format_cgroup_path(cgroup_path, relative_path);290return join_cgroup_from_top(cgroup_path);291}292293/**294* join_root_cgroup() - Join the root cgroup295*296* This function joins the root cgroup.297*298* On success, it returns 0, otherwise on failure it returns 1.299*/300int join_root_cgroup(void)301{302return join_cgroup_from_top(CGROUP_MOUNT_PATH);303}304305/**306* join_parent_cgroup() - Join a cgroup in the parent process workdir307* @relative_path: The cgroup path, relative to parent process workdir, to join308*309* See join_cgroup().310*311* On success, it returns 0, otherwise on failure it returns 1.312*/313int join_parent_cgroup(const char *relative_path)314{315char cgroup_path[PATH_MAX + 1];316317format_parent_cgroup_path(cgroup_path, relative_path);318return join_cgroup_from_top(cgroup_path);319}320321/**322* set_cgroup_xattr() - Set xattr on a cgroup dir323* @relative_path: The cgroup path, relative to the workdir, to set xattr324* @name: xattr name325* @value: xattr value326*327* This function set xattr on cgroup dir.328*329* On success, it returns 0, otherwise on failure it returns -1.330*/331int set_cgroup_xattr(const char *relative_path,332const char *name,333const char *value)334{335char cgroup_path[PATH_MAX + 1];336337format_cgroup_path(cgroup_path, relative_path);338return setxattr(cgroup_path, name, value, strlen(value) + 1, 0);339}340341/**342* __cleanup_cgroup_environment() - Delete temporary cgroups343*344* This is a helper for cleanup_cgroup_environment() that is responsible for345* deletion of all temporary cgroups that have been created during the test.346*/347static void __cleanup_cgroup_environment(void)348{349char cgroup_workdir[PATH_MAX + 1];350351format_cgroup_path(cgroup_workdir, "");352join_cgroup_from_top(CGROUP_MOUNT_PATH);353nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);354}355356/**357* cleanup_cgroup_environment() - Cleanup Cgroup Testing Environment358*359* This is an idempotent function to delete all temporary cgroups that360* have been created during the test and unmount the cgroup testing work361* directory.362*363* At call time, it moves the calling process to the root cgroup, and then364* runs the deletion process. It is idempotent, and should not fail, unless365* a process is lingering.366*367* On failure, it will print an error to stderr, and try to continue.368*/369void cleanup_cgroup_environment(void)370{371__cleanup_cgroup_environment();372if (cgroup_workdir_mounted && umount(CGROUP_MOUNT_PATH))373log_err("umount cgroup2");374cgroup_workdir_mounted = false;375}376377/**378* get_root_cgroup() - Get the FD of the root cgroup379*380* On success, it returns the file descriptor. On failure, it returns -1.381* If there is a failure, it prints the error to stderr.382*/383int get_root_cgroup(void)384{385int fd;386387fd = open(CGROUP_MOUNT_PATH, O_RDONLY);388if (fd < 0) {389log_err("Opening root cgroup");390return -1;391}392return fd;393}394395/*396* remove_cgroup() - Remove a cgroup397* @relative_path: The cgroup path, relative to the workdir, to remove398*399* This function expects a cgroup to already be created, relative to the cgroup400* work dir. It also expects the cgroup doesn't have any children or live401* processes and it removes the cgroup.402*403* On failure, it will print an error to stderr.404*/405void remove_cgroup(const char *relative_path)406{407char cgroup_path[PATH_MAX + 1];408409format_cgroup_path(cgroup_path, relative_path);410if (rmdir(cgroup_path))411log_err("rmdiring cgroup %s .. %s", relative_path, cgroup_path);412}413414/**415* create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD416* @relative_path: The cgroup path, relative to the workdir, to join417*418* This function creates a cgroup under the top level workdir and returns the419* file descriptor. It is idempotent.420*421* On success, it returns the file descriptor. On failure it returns -1.422* If there is a failure, it prints the error to stderr.423*/424int create_and_get_cgroup(const char *relative_path)425{426char cgroup_path[PATH_MAX + 1];427int fd;428429format_cgroup_path(cgroup_path, relative_path);430if (mkdir(cgroup_path, 0777) && errno != EEXIST) {431log_err("mkdiring cgroup %s .. %s", relative_path, cgroup_path);432return -1;433}434435fd = open(cgroup_path, O_RDONLY);436if (fd < 0) {437log_err("Opening Cgroup");438return -1;439}440441return fd;442}443444/**445* get_cgroup_id_from_path - Get cgroup id for a particular cgroup path446* @cgroup_workdir: The absolute cgroup path447*448* On success, it returns the cgroup id. On failure it returns 0,449* which is an invalid cgroup id.450* If there is a failure, it prints the error to stderr.451*/452static unsigned long long get_cgroup_id_from_path(const char *cgroup_workdir)453{454int dirfd, err, flags, mount_id, fhsize;455union {456unsigned long long cgid;457unsigned char raw_bytes[8];458} id;459struct file_handle *fhp, *fhp2;460unsigned long long ret = 0;461462dirfd = AT_FDCWD;463flags = 0;464fhsize = sizeof(*fhp);465fhp = calloc(1, fhsize);466if (!fhp) {467log_err("calloc");468return 0;469}470err = name_to_handle_at(dirfd, cgroup_workdir, fhp, &mount_id, flags);471if (err >= 0 || fhp->handle_bytes != 8) {472log_err("name_to_handle_at");473goto free_mem;474}475476fhsize = sizeof(struct file_handle) + fhp->handle_bytes;477fhp2 = realloc(fhp, fhsize);478if (!fhp2) {479log_err("realloc");480goto free_mem;481}482err = name_to_handle_at(dirfd, cgroup_workdir, fhp2, &mount_id, flags);483fhp = fhp2;484if (err < 0) {485log_err("name_to_handle_at");486goto free_mem;487}488489memcpy(id.raw_bytes, fhp->f_handle, 8);490ret = id.cgid;491492free_mem:493free(fhp);494return ret;495}496497unsigned long long get_cgroup_id(const char *relative_path)498{499char cgroup_workdir[PATH_MAX + 1];500501format_cgroup_path(cgroup_workdir, relative_path);502return get_cgroup_id_from_path(cgroup_workdir);503}504505int cgroup_setup_and_join(const char *path) {506int cg_fd;507508if (setup_cgroup_environment()) {509fprintf(stderr, "Failed to setup cgroup environment\n");510return -EINVAL;511}512513cg_fd = create_and_get_cgroup(path);514if (cg_fd < 0) {515fprintf(stderr, "Failed to create test cgroup\n");516cleanup_cgroup_environment();517return cg_fd;518}519520if (join_cgroup(path)) {521fprintf(stderr, "Failed to join cgroup\n");522cleanup_cgroup_environment();523return -EINVAL;524}525return cg_fd;526}527528/**529* setup_classid_environment() - Setup the cgroupv1 net_cls environment530*531* This function should only be called in a custom mount namespace, e.g.532* created by running setup_cgroup_environment.533*534* After calling this function, cleanup_classid_environment should be called535* once testing is complete.536*537* This function will print an error to stderr and return 1 if it is unable538* to setup the cgroup environment. If setup is successful, 0 is returned.539*/540int setup_classid_environment(void)541{542char cgroup_workdir[PATH_MAX + 1];543544format_classid_path(cgroup_workdir);545546if (mount("tmpfs", CGROUP_MOUNT_DFLT, "tmpfs", 0, NULL) &&547errno != EBUSY) {548log_err("mount cgroup base");549return 1;550}551552if (mkdir(NETCLS_MOUNT_PATH, 0777) && errno != EEXIST) {553log_err("mkdir cgroup net_cls");554return 1;555}556557if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls")) {558if (errno != EBUSY) {559log_err("mount cgroup net_cls");560return 1;561}562563if (rmdir(NETCLS_MOUNT_PATH)) {564log_err("rmdir cgroup net_cls");565return 1;566}567if (umount(CGROUP_MOUNT_DFLT)) {568log_err("umount cgroup base");569return 1;570}571}572573cleanup_classid_environment();574575if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) {576log_err("mkdir cgroup work dir");577return 1;578}579580return 0;581}582583/**584* set_classid() - Set a cgroupv1 net_cls classid585*586* Writes the classid into the cgroup work dir's net_cls.classid587* file in order to later on trigger socket tagging.588*589* We leverage the current pid as the classid, ensuring unique identification.590*591* On success, it returns 0, otherwise on failure it returns 1. If there592* is a failure, it prints the error to stderr.593*/594int set_classid(void)595{596char cgroup_workdir[PATH_MAX - 42];597char cgroup_classid_path[PATH_MAX + 1];598int fd, rc = 0;599600format_classid_path(cgroup_workdir);601snprintf(cgroup_classid_path, sizeof(cgroup_classid_path),602"%s/net_cls.classid", cgroup_workdir);603604fd = open(cgroup_classid_path, O_WRONLY);605if (fd < 0) {606log_err("Opening cgroup classid: %s", cgroup_classid_path);607return 1;608}609610if (dprintf(fd, "%u\n", getpid()) < 0) {611log_err("Setting cgroup classid");612rc = 1;613}614615close(fd);616return rc;617}618619/**620* join_classid() - Join a cgroupv1 net_cls classid621*622* This function expects the cgroup work dir to be already created, as we623* join it here. This causes the process sockets to be tagged with the given624* net_cls classid.625*626* On success, it returns 0, otherwise on failure it returns 1.627*/628int join_classid(void)629{630char cgroup_workdir[PATH_MAX + 1];631632format_classid_path(cgroup_workdir);633return join_cgroup_from_top(cgroup_workdir);634}635636/**637* cleanup_classid_environment() - Cleanup the cgroupv1 net_cls environment638*639* At call time, it moves the calling process to the root cgroup, and then640* runs the deletion process.641*642* On failure, it will print an error to stderr, and try to continue.643*/644void cleanup_classid_environment(void)645{646char cgroup_workdir[PATH_MAX + 1];647648format_classid_path(cgroup_workdir);649join_cgroup_from_top(NETCLS_MOUNT_PATH);650nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);651}652653/**654* get_classid_cgroup_id - Get the cgroup id of a net_cls cgroup655*/656unsigned long long get_classid_cgroup_id(void)657{658char cgroup_workdir[PATH_MAX + 1];659660format_classid_path(cgroup_workdir);661return get_cgroup_id_from_path(cgroup_workdir);662}663664/**665* get_cgroup1_hierarchy_id - Retrieves the ID of a cgroup1 hierarchy from the cgroup1 subsys name.666* @subsys_name: The cgroup1 subsys name, which can be retrieved from /proc/self/cgroup. It can be667* a named cgroup like "name=systemd", a controller name like "net_cls", or multi-controllers like668* "net_cls,net_prio".669*/670int get_cgroup1_hierarchy_id(const char *subsys_name)671{672char *c, *c2, *c3, *c4;673bool found = false;674char line[1024];675FILE *file;676int i, id;677678if (!subsys_name)679return -1;680681file = fopen("/proc/self/cgroup", "r");682if (!file) {683log_err("fopen /proc/self/cgroup");684return -1;685}686687while (fgets(line, 1024, file)) {688i = 0;689for (c = strtok_r(line, ":", &c2); c && i < 2; c = strtok_r(NULL, ":", &c2)) {690if (i == 0) {691id = strtol(c, NULL, 10);692} else if (i == 1) {693if (!strcmp(c, subsys_name)) {694found = true;695break;696}697698/* Multiple subsystems may share one single mount point */699for (c3 = strtok_r(c, ",", &c4); c3;700c3 = strtok_r(NULL, ",", &c4)) {701if (!strcmp(c, subsys_name)) {702found = true;703break;704}705}706}707i++;708}709if (found)710break;711}712fclose(file);713return found ? id : -1;714}715716/**717* open_classid() - Open a cgroupv1 net_cls classid718*719* This function expects the cgroup work dir to be already created, as we720* open it here.721*722* On success, it returns the file descriptor. On failure it returns -1.723*/724int open_classid(void)725{726char cgroup_workdir[PATH_MAX + 1];727728format_classid_path(cgroup_workdir);729return open(cgroup_workdir, O_RDONLY);730}731732733