Path: blob/main/sys/contrib/openzfs/tests/zfs-tests/cmd/idmap_util.c
48529 views
// SPDX-License-Identifier: CDDL-1.01/*2* CDDL HEADER START3*4* The contents of this file are subject to the terms of the5* Common Development and Distribution License (the "License").6* You may not use this file except in compliance with the License.7*8* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE9* or https://opensource.org/licenses/CDDL-1.0.10* See the License for the specific language governing permissions11* and limitations under the License.12*13* When distributing Covered Code, include this CDDL HEADER in each14* file and include the License file at usr/src/OPENSOLARIS.LICENSE.15* If applicable, add the following below this CDDL HEADER, with the16* fields enclosed by brackets "[]" replaced with your own identifying17* information: Portions Copyright [yyyy] [name of copyright owner]18*19* CDDL HEADER END20*/2122#ifndef _GNU_SOURCE23#define _GNU_SOURCE24#endif2526#include <stdio.h>27#include <stdlib.h>28#include <stdbool.h>29#include <stddef.h>30#include <string.h>31#include <linux/types.h>32#include <sys/wait.h>33#include <sys/stat.h>34#include <sys/mount.h>35#include <fcntl.h>36#include <errno.h>37#include <sched.h>38#include <syscall.h>39#include <sys/socket.h>4041#include <sys/list.h>4243#ifndef UINT_MAX44#define UINT_MAX 4294967295U45#endif4647#ifndef __NR_Linux48#if defined __alpha__49#define __NR_Linux 11050#elif defined _MIPS_SIM51#if _MIPS_SIM == _MIPS_SIM_ABI3252#define __NR_Linux 400053#endif54#if _MIPS_SIM == _MIPS_SIM_NABI3255#define __NR_Linux 600056#endif57#if _MIPS_SIM == _MIPS_SIM_ABI6458#define __NR_Linux 500059#endif60#elif defined __ia64__61#define __NR_Linux 102462#else63#define __NR_Linux 064#endif65#endif6667#ifndef __NR_mount_setattr68#define __NR_mount_setattr (442 + __NR_Linux)69#endif7071#ifndef __NR_open_tree72#define __NR_open_tree (428 + __NR_Linux)73#endif7475#ifndef __NR_move_mount76#define __NR_move_mount (429 + __NR_Linux)77#endif7879#ifndef MNT_DETACH80#define MNT_DETACH 281#endif8283#ifndef MOVE_MOUNT_F_EMPTY_PATH84#define MOVE_MOUNT_F_EMPTY_PATH 0x0000000485#endif8687#ifndef MOUNT_ATTR_IDMAP88#define MOUNT_ATTR_IDMAP 0x0010000089#endif9091#ifndef OPEN_TREE_CLONE92#define OPEN_TREE_CLONE 193#endif9495#ifndef OPEN_TREE_CLOEXEC96#define OPEN_TREE_CLOEXEC O_CLOEXEC97#endif9899#ifndef AT_RECURSIVE100#define AT_RECURSIVE 0x8000101#endif102103typedef struct {104__u64 attr_set;105__u64 attr_clr;106__u64 propagation;107__u64 userns_fd;108} mount_attr_t;109110static inline int111sys_mount_setattr(int dfd, const char *path, unsigned int flags,112mount_attr_t *attr, size_t size)113{114return (syscall(__NR_mount_setattr, dfd, path, flags, attr, size));115}116117static inline int118sys_open_tree(int dfd, const char *filename, unsigned int flags)119{120return (syscall(__NR_open_tree, dfd, filename, flags));121}122123static inline int sys_move_mount(int from_dfd, const char *from_pathname,124int to_dfd, const char *to_pathname, unsigned int flags)125{126return (syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd,127to_pathname, flags));128}129130typedef enum idmap_type_t {131TYPE_UID,132TYPE_GID,133TYPE_BOTH134} idmap_type_t;135136struct idmap_entry {137__u32 first;138__u32 lower_first;139__u32 count;140idmap_type_t type;141list_node_t node;142};143144static void145log_msg(const char *msg, ...)146{147va_list ap;148149va_start(ap, msg);150vfprintf(stderr, msg, ap);151fputc('\n', stderr);152va_end(ap);153}154155#define log_errno(msg, args...) \156do { \157log_msg("%s:%d:%s: [%m] " msg, __FILE__, __LINE__,\158__FUNCTION__, ##args); \159} while (0)160161/*162* Parse the idmapping in the following format163* and add to the list:164*165* u:nsid_first:hostid_first:count166* g:nsid_first:hostid_first:count167* b:nsid_first:hostid_first:count168*169* The delimiter can be : or space character.170*171* Return:172* 0 if success173* ENOMEM if out of memory174* EINVAL if wrong arg or input175*/176static int177parse_idmap_entry(list_t *head, char *input)178{179char *token, *savedptr = NULL;180struct idmap_entry *entry;181unsigned long ul;182char *delimiter = (char *)": ";183char c;184185if (!input || !head)186return (EINVAL);187entry = malloc(sizeof (*entry));188if (!entry)189return (ENOMEM);190191token = strtok_r(input, delimiter, &savedptr);192if (token)193c = token[0];194if (!token || (c != 'b' && c != 'u' && c != 'g'))195goto errout;196entry->type = (c == 'b') ? TYPE_BOTH :197((c == 'u') ? TYPE_UID : TYPE_GID);198199token = strtok_r(NULL, delimiter, &savedptr);200if (!token)201goto errout;202ul = strtoul(token, NULL, 10);203if (ul > UINT_MAX || errno != 0)204goto errout;205entry->first = (__u32)ul;206207token = strtok_r(NULL, delimiter, &savedptr);208if (!token)209goto errout;210ul = strtoul(token, NULL, 10);211if (ul > UINT_MAX || errno != 0)212goto errout;213entry->lower_first = (__u32)ul;214215token = strtok_r(NULL, delimiter, &savedptr);216if (!token)217goto errout;218ul = strtoul(token, NULL, 10);219if (ul > UINT_MAX || errno != 0)220goto errout;221entry->count = (__u32)ul;222223list_insert_tail(head, entry);224225return (0);226227errout:228free(entry);229return (EINVAL);230}231232/*233* Release all the entries in the list234*/235static void236free_idmap(list_t *head)237{238struct idmap_entry *entry;239240while ((entry = list_remove_head(head)) != NULL)241free(entry);242/* list_destroy() to be done by the caller */243}244245/*246* Write all bytes in the buffer to fd247*/248static ssize_t249write_buf(int fd, const char *buf, size_t buf_size)250{251ssize_t written, total_written = 0;252size_t remaining = buf_size;253char *position = (char *)buf;254255for (;;) {256written = write(fd, position, remaining);257if (written < 0 && errno == EINTR)258continue;259if (written < 0) {260log_errno("write");261return (written);262}263total_written += written;264if (total_written == buf_size)265break;266remaining -= written;267position += written;268}269270return (total_written);271}272273/*274* Read data from file into buffer275*/276static ssize_t277read_buf(int fd, char *buf, size_t buf_size)278{279int ret;280for (;;) {281ret = read(fd, buf, buf_size);282if (ret < 0 && errno == EINTR)283continue;284break;285}286if (ret < 0)287log_errno("read");288return (ret);289}290291/*292* Write idmap of the given type in the buffer to the293* process' uid_map or gid_map proc file.294*295* Return:296* 0 if success297* errno if there's any error298*/299static int300write_idmap(pid_t pid, char *buf, size_t buf_size, idmap_type_t type)301{302char path[PATH_MAX];303int fd;304int ret;305306(void) snprintf(path, sizeof (path), "/proc/%d/%cid_map",307pid, type == TYPE_UID ? 'u' : 'g');308fd = open(path, O_WRONLY | O_CLOEXEC);309if (fd < 0) {310ret = errno;311log_errno("open(%s)", path);312goto out;313}314ret = write_buf(fd, buf, buf_size);315if (ret < 0)316ret = errno;317else318ret = 0;319out:320if (fd >= 0)321close(fd);322return (ret);323}324325/*326* Write idmap info in the list to the process327* user namespace, i.e. its /proc/<pid>/uid_map328* and /proc/<pid>/gid_map file.329*330* Return:331* 0 if success332* errno if it fails333*/334static int335write_pid_idmaps(pid_t pid, list_t *head)336{337char *buf_uids, *buf_gids;338char *curr_bufu, *curr_bufg;339/* max 4k to be allowed for each map */340int size_buf_uids = 4096, size_buf_gids = 4096;341struct idmap_entry *entry;342int uid_filled, gid_filled;343int ret = 0;344int has_uids = 0, has_gids = 0;345size_t buf_size;346347buf_uids = malloc(size_buf_uids);348if (!buf_uids)349return (ENOMEM);350buf_gids = malloc(size_buf_gids);351if (!buf_gids) {352free(buf_uids);353return (ENOMEM);354}355curr_bufu = buf_uids;356curr_bufg = buf_gids;357358for (entry = list_head(head); entry; entry = list_next(head, entry)) {359if (entry->type == TYPE_UID || entry->type == TYPE_BOTH) {360uid_filled = snprintf(curr_bufu, size_buf_uids,361"%u %u %u\n", entry->first, entry->lower_first,362entry->count);363if (uid_filled <= 0 || uid_filled >= size_buf_uids) {364ret = E2BIG;365goto out;366}367curr_bufu += uid_filled;368size_buf_uids -= uid_filled;369has_uids = 1;370}371if (entry->type == TYPE_GID || entry->type == TYPE_BOTH) {372gid_filled = snprintf(curr_bufg, size_buf_gids,373"%u %u %u\n", entry->first, entry->lower_first,374entry->count);375if (gid_filled <= 0 || gid_filled >= size_buf_gids) {376ret = E2BIG;377goto out;378}379curr_bufg += gid_filled;380size_buf_gids -= gid_filled;381has_gids = 1;382}383}384if (has_uids) {385buf_size = curr_bufu - buf_uids;386ret = write_idmap(pid, buf_uids, buf_size, TYPE_UID);387if (ret)388goto out;389}390if (has_gids) {391buf_size = curr_bufg - buf_gids;392ret = write_idmap(pid, buf_gids, buf_size, TYPE_GID);393}394395out:396free(buf_uids);397free(buf_gids);398return (ret);399}400401/*402* Wait for the child process to exit403* and reap it.404*405* Return:406* process exit code if available407*/408static int409wait_for_pid(pid_t pid)410{411int status;412int ret;413414for (;;) {415ret = waitpid(pid, &status, 0);416if (ret < 0) {417if (errno == EINTR)418continue;419return (EXIT_FAILURE);420}421break;422}423if (!WIFEXITED(status))424return (EXIT_FAILURE);425return (WEXITSTATUS(status));426}427428/*429* Get the file descriptor of the process user namespace430* given its pid.431*432* Return:433* fd if success434* -1 if it fails435*/436static int437userns_fd_from_pid(pid_t pid)438{439int fd;440char path[PATH_MAX];441442(void) snprintf(path, sizeof (path), "/proc/%d/ns/user", pid);443fd = open(path, O_RDONLY | O_CLOEXEC);444if (fd < 0)445log_errno("open(%s)", path);446return (fd);447}448449/*450* Get the user namespace file descriptor given a list451* of idmap info.452*453* Return:454* fd if success455* -errno if it fails456*/457static int458userns_fd_from_idmap(list_t *head)459{460pid_t pid;461int ret, fd;462int fds[2];463char c;464int saved_errno = 0;465466/* socketpair for bidirectional communication */467ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, fds);468if (ret) {469log_errno("socketpair");470return (-errno);471}472473pid = fork();474if (pid < 0) {475log_errno("fork");476fd = -errno;477goto out;478}479480if (pid == 0) {481/* child process */482ret = unshare(CLONE_NEWUSER);483if (ret == 0) {484/* notify the parent of success */485ret = write_buf(fds[1], "1", 1);486if (ret < 0)487saved_errno = errno;488else {489/*490* Until the parent has written to idmap,491* we cannot exit, otherwise the defunct492* process is owned by the real root, writing493* to its idmap ends up with EPERM in the494* context of a user ns495*/496ret = read_buf(fds[1], &c, 1);497if (ret < 0)498saved_errno = errno;499}500} else {501saved_errno = errno;502log_errno("unshare");503ret = write_buf(fds[1], "0", 1);504if (ret < 0)505saved_errno = errno;506}507exit(saved_errno);508}509510/* parent process */511ret = read_buf(fds[0], &c, 1);512if (ret == 1 && c == '1') {513ret = write_pid_idmaps(pid, head);514if (!ret) {515fd = userns_fd_from_pid(pid);516if (fd < 0)517fd = -errno;518} else {519fd = -ret;520}521/* Let child know it can exit */522(void) write_buf(fds[0], "1", 1);523} else {524fd = -EBADF;525}526(void) wait_for_pid(pid);527out:528close(fds[0]);529close(fds[1]);530return (fd);531}532533/*534* Check if the operating system supports idmapped mount on the535* given path or not.536*537* Return:538* true if supported539* false if not supported540*/541static bool542is_idmap_supported(char *path)543{544list_t head;545int ret;546int tree_fd = -EBADF, path_fd = -EBADF;547mount_attr_t attr = {548.attr_set = MOUNT_ATTR_IDMAP,549.userns_fd = -EBADF,550};551552/* strtok_r() won't be happy with a const string */553/* To check if idmapped mount can be done in a user ns, map 0 to 0 */554char *input = strdup("b:0:0:1");555556if (!input) {557errno = ENOMEM;558log_errno("strdup");559return (false);560}561562list_create(&head, sizeof (struct idmap_entry),563offsetof(struct idmap_entry, node));564ret = parse_idmap_entry(&head, input);565if (ret) {566errno = ret;567log_errno("parse_idmap_entry(%s)", input);568goto out1;569}570ret = userns_fd_from_idmap(&head);571if (ret < 0)572goto out1;573attr.userns_fd = ret;574ret = openat(-EBADF, path, O_DIRECTORY | O_CLOEXEC);575if (ret < 0) {576log_errno("openat(%s)", path);577goto out;578}579path_fd = ret;580ret = sys_open_tree(path_fd, "", AT_EMPTY_PATH | AT_NO_AUTOMOUNT |581AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);582if (ret < 0) {583log_errno("sys_open_tree");584goto out;585}586tree_fd = ret;587ret = sys_mount_setattr(tree_fd, "", AT_EMPTY_PATH, &attr,588sizeof (attr));589if (ret < 0) {590log_errno("sys_mount_setattr");591}592out:593close(attr.userns_fd);594out1:595free_idmap(&head);596list_destroy(&head);597if (tree_fd >= 0)598close(tree_fd);599if (path_fd >= 0)600close(path_fd);601free(input);602return (ret == 0);603}604605/*606* Check if the given path is a mount point or not.607*608* Return:609* true if it is610* false otherwise611*/612static bool613is_mountpoint(char *path)614{615char *parent;616struct stat st_me, st_parent;617bool ret;618619parent = malloc(strlen(path)+4);620if (!parent) {621errno = ENOMEM;622log_errno("malloc");623return (false);624}625strcat(strcpy(parent, path), "/..");626if (lstat(path, &st_me) != 0 ||627lstat(parent, &st_parent) != 0)628ret = false;629else630if (st_me.st_dev != st_parent.st_dev ||631st_me.st_ino == st_parent.st_ino)632ret = true;633else634ret = false;635free(parent);636return (ret);637}638639/*640* Remount the source on the new target folder with the given641* list of idmap info. If target is NULL, the source will be642* unmounted and then remounted if it is a mountpoint, otherwise643* no unmount is done, the source is simply idmap remounted.644*645* Return:646* 0 if success647* -errno otherwise648*/649static int650do_idmap_mount(list_t *idmap, char *source, char *target, int flags)651{652int ret;653int tree_fd = -EBADF, source_fd = -EBADF;654mount_attr_t attr = {655.attr_set = MOUNT_ATTR_IDMAP,656.userns_fd = -EBADF,657};658659ret = userns_fd_from_idmap(idmap);660if (ret < 0)661goto out1;662attr.userns_fd = ret;663ret = openat(-EBADF, source, O_DIRECTORY | O_CLOEXEC);664if (ret < 0) {665ret = -errno;666log_errno("openat(%s)", source);667goto out;668}669source_fd = ret;670ret = sys_open_tree(source_fd, "", AT_EMPTY_PATH | AT_NO_AUTOMOUNT |671AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE | flags);672if (ret < 0) {673ret = -errno;674log_errno("sys_open_tree");675goto out;676}677tree_fd = ret;678ret = sys_mount_setattr(tree_fd, "", AT_EMPTY_PATH | flags, &attr,679sizeof (attr));680if (ret < 0) {681ret = -errno;682log_errno("sys_mount_setattr");683goto out;684}685if (target == NULL && is_mountpoint(source)) {686ret = umount2(source, MNT_DETACH);687if (ret < 0) {688ret = -errno;689log_errno("umount2(%s)", source);690goto out;691}692}693ret = sys_move_mount(tree_fd, "", -EBADF, target == NULL ?694source : target, MOVE_MOUNT_F_EMPTY_PATH);695if (ret < 0) {696ret = -errno;697log_errno("sys_move_mount(%s)", target == NULL ?698source : target);699}700out:701close(attr.userns_fd);702out1:703if (tree_fd >= 0)704close(tree_fd);705if (source_fd >= 0)706close(source_fd);707return (ret);708}709710static void711print_usage(char *argv[])712{713fprintf(stderr, "Usage: %s [-r] [-c] [-m <idmap1>] [-m <idmap2>]" \714" ... [<source>] [<target>]\n", argv[0]);715fprintf(stderr, "\n");716fprintf(stderr, " -r Recursively do idmapped mount.\n");717fprintf(stderr, "\n");718fprintf(stderr, " -c Checks if idmapped mount is supported " \719"on the <source> by the operating system or not.\n");720fprintf(stderr, "\n");721fprintf(stderr, " -m <idmap> to specify the idmap info, " \722"in the following format:\n");723fprintf(stderr, " <id_type>:<nsid_first>:<hostid_first>:<count>\n");724fprintf(stderr, "\n");725fprintf(stderr, " <id_type> can be either of 'b', 'u', and 'g'.\n");726fprintf(stderr, "\n");727fprintf(stderr, "The <source> folder will be mounted at <target> " \728"with the provided idmap information.\nIf no <target> is " \729"specified, and <source> is a mount point, " \730"then <source> will be unmounted and then remounted.\n");731}732733int734main(int argc, char *argv[])735{736int opt;737list_t idmap_head;738int check_supported = 0;739int ret = EXIT_SUCCESS;740char *source = NULL, *target = NULL;741int flags = 0;742743list_create(&idmap_head, sizeof (struct idmap_entry),744offsetof(struct idmap_entry, node));745746while ((opt = getopt(argc, argv, "rcm:")) != -1) {747switch (opt) {748case 'r':749flags |= AT_RECURSIVE;750break;751case 'c':752check_supported = 1;753break;754case 'm':755ret = parse_idmap_entry(&idmap_head, optarg);756if (ret) {757errno = ret;758log_errno("parse_idmap_entry(%s)", optarg);759ret = EXIT_FAILURE;760goto out;761}762break;763default:764print_usage(argv);765exit(EXIT_FAILURE);766}767}768769if (check_supported == 0 && list_is_empty(&idmap_head)) {770print_usage(argv);771ret = EXIT_FAILURE;772goto out;773}774775if (optind >= argc) {776fprintf(stderr, "Expected to have <source>, <target>.\n");777print_usage(argv);778ret = EXIT_FAILURE;779goto out;780}781782source = argv[optind];783if (optind < (argc - 1)) {784target = argv[optind + 1];785}786787if (check_supported) {788free_idmap(&idmap_head);789list_destroy(&idmap_head);790if (is_idmap_supported(source)) {791printf("idmapped mount is supported on [%s].\n",792source);793return (EXIT_SUCCESS);794} else {795printf("idmapped mount is NOT supported.\n");796return (EXIT_FAILURE);797}798}799800ret = do_idmap_mount(&idmap_head, source, target, flags);801if (ret)802ret = EXIT_FAILURE;803out:804free_idmap(&idmap_head);805list_destroy(&idmap_head);806807exit(ret);808}809810811