Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/contrib/openzfs/tests/zfs-tests/cmd/idmap_util.c
48529 views
1
// SPDX-License-Identifier: CDDL-1.0
2
/*
3
* CDDL HEADER START
4
*
5
* The contents of this file are subject to the terms of the
6
* Common Development and Distribution License (the "License").
7
* You may not use this file except in compliance with the License.
8
*
9
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10
* or https://opensource.org/licenses/CDDL-1.0.
11
* See the License for the specific language governing permissions
12
* and limitations under the License.
13
*
14
* When distributing Covered Code, include this CDDL HEADER in each
15
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16
* If applicable, add the following below this CDDL HEADER, with the
17
* fields enclosed by brackets "[]" replaced with your own identifying
18
* information: Portions Copyright [yyyy] [name of copyright owner]
19
*
20
* CDDL HEADER END
21
*/
22
23
#ifndef _GNU_SOURCE
24
#define _GNU_SOURCE
25
#endif
26
27
#include <stdio.h>
28
#include <stdlib.h>
29
#include <stdbool.h>
30
#include <stddef.h>
31
#include <string.h>
32
#include <linux/types.h>
33
#include <sys/wait.h>
34
#include <sys/stat.h>
35
#include <sys/mount.h>
36
#include <fcntl.h>
37
#include <errno.h>
38
#include <sched.h>
39
#include <syscall.h>
40
#include <sys/socket.h>
41
42
#include <sys/list.h>
43
44
#ifndef UINT_MAX
45
#define UINT_MAX 4294967295U
46
#endif
47
48
#ifndef __NR_Linux
49
#if defined __alpha__
50
#define __NR_Linux 110
51
#elif defined _MIPS_SIM
52
#if _MIPS_SIM == _MIPS_SIM_ABI32
53
#define __NR_Linux 4000
54
#endif
55
#if _MIPS_SIM == _MIPS_SIM_NABI32
56
#define __NR_Linux 6000
57
#endif
58
#if _MIPS_SIM == _MIPS_SIM_ABI64
59
#define __NR_Linux 5000
60
#endif
61
#elif defined __ia64__
62
#define __NR_Linux 1024
63
#else
64
#define __NR_Linux 0
65
#endif
66
#endif
67
68
#ifndef __NR_mount_setattr
69
#define __NR_mount_setattr (442 + __NR_Linux)
70
#endif
71
72
#ifndef __NR_open_tree
73
#define __NR_open_tree (428 + __NR_Linux)
74
#endif
75
76
#ifndef __NR_move_mount
77
#define __NR_move_mount (429 + __NR_Linux)
78
#endif
79
80
#ifndef MNT_DETACH
81
#define MNT_DETACH 2
82
#endif
83
84
#ifndef MOVE_MOUNT_F_EMPTY_PATH
85
#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004
86
#endif
87
88
#ifndef MOUNT_ATTR_IDMAP
89
#define MOUNT_ATTR_IDMAP 0x00100000
90
#endif
91
92
#ifndef OPEN_TREE_CLONE
93
#define OPEN_TREE_CLONE 1
94
#endif
95
96
#ifndef OPEN_TREE_CLOEXEC
97
#define OPEN_TREE_CLOEXEC O_CLOEXEC
98
#endif
99
100
#ifndef AT_RECURSIVE
101
#define AT_RECURSIVE 0x8000
102
#endif
103
104
typedef struct {
105
__u64 attr_set;
106
__u64 attr_clr;
107
__u64 propagation;
108
__u64 userns_fd;
109
} mount_attr_t;
110
111
static inline int
112
sys_mount_setattr(int dfd, const char *path, unsigned int flags,
113
mount_attr_t *attr, size_t size)
114
{
115
return (syscall(__NR_mount_setattr, dfd, path, flags, attr, size));
116
}
117
118
static inline int
119
sys_open_tree(int dfd, const char *filename, unsigned int flags)
120
{
121
return (syscall(__NR_open_tree, dfd, filename, flags));
122
}
123
124
static inline int sys_move_mount(int from_dfd, const char *from_pathname,
125
int to_dfd, const char *to_pathname, unsigned int flags)
126
{
127
return (syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd,
128
to_pathname, flags));
129
}
130
131
typedef enum idmap_type_t {
132
TYPE_UID,
133
TYPE_GID,
134
TYPE_BOTH
135
} idmap_type_t;
136
137
struct idmap_entry {
138
__u32 first;
139
__u32 lower_first;
140
__u32 count;
141
idmap_type_t type;
142
list_node_t node;
143
};
144
145
static void
146
log_msg(const char *msg, ...)
147
{
148
va_list ap;
149
150
va_start(ap, msg);
151
vfprintf(stderr, msg, ap);
152
fputc('\n', stderr);
153
va_end(ap);
154
}
155
156
#define log_errno(msg, args...) \
157
do { \
158
log_msg("%s:%d:%s: [%m] " msg, __FILE__, __LINE__,\
159
__FUNCTION__, ##args); \
160
} while (0)
161
162
/*
163
* Parse the idmapping in the following format
164
* and add to the list:
165
*
166
* u:nsid_first:hostid_first:count
167
* g:nsid_first:hostid_first:count
168
* b:nsid_first:hostid_first:count
169
*
170
* The delimiter can be : or space character.
171
*
172
* Return:
173
* 0 if success
174
* ENOMEM if out of memory
175
* EINVAL if wrong arg or input
176
*/
177
static int
178
parse_idmap_entry(list_t *head, char *input)
179
{
180
char *token, *savedptr = NULL;
181
struct idmap_entry *entry;
182
unsigned long ul;
183
char *delimiter = (char *)": ";
184
char c;
185
186
if (!input || !head)
187
return (EINVAL);
188
entry = malloc(sizeof (*entry));
189
if (!entry)
190
return (ENOMEM);
191
192
token = strtok_r(input, delimiter, &savedptr);
193
if (token)
194
c = token[0];
195
if (!token || (c != 'b' && c != 'u' && c != 'g'))
196
goto errout;
197
entry->type = (c == 'b') ? TYPE_BOTH :
198
((c == 'u') ? TYPE_UID : TYPE_GID);
199
200
token = strtok_r(NULL, delimiter, &savedptr);
201
if (!token)
202
goto errout;
203
ul = strtoul(token, NULL, 10);
204
if (ul > UINT_MAX || errno != 0)
205
goto errout;
206
entry->first = (__u32)ul;
207
208
token = strtok_r(NULL, delimiter, &savedptr);
209
if (!token)
210
goto errout;
211
ul = strtoul(token, NULL, 10);
212
if (ul > UINT_MAX || errno != 0)
213
goto errout;
214
entry->lower_first = (__u32)ul;
215
216
token = strtok_r(NULL, delimiter, &savedptr);
217
if (!token)
218
goto errout;
219
ul = strtoul(token, NULL, 10);
220
if (ul > UINT_MAX || errno != 0)
221
goto errout;
222
entry->count = (__u32)ul;
223
224
list_insert_tail(head, entry);
225
226
return (0);
227
228
errout:
229
free(entry);
230
return (EINVAL);
231
}
232
233
/*
234
* Release all the entries in the list
235
*/
236
static void
237
free_idmap(list_t *head)
238
{
239
struct idmap_entry *entry;
240
241
while ((entry = list_remove_head(head)) != NULL)
242
free(entry);
243
/* list_destroy() to be done by the caller */
244
}
245
246
/*
247
* Write all bytes in the buffer to fd
248
*/
249
static ssize_t
250
write_buf(int fd, const char *buf, size_t buf_size)
251
{
252
ssize_t written, total_written = 0;
253
size_t remaining = buf_size;
254
char *position = (char *)buf;
255
256
for (;;) {
257
written = write(fd, position, remaining);
258
if (written < 0 && errno == EINTR)
259
continue;
260
if (written < 0) {
261
log_errno("write");
262
return (written);
263
}
264
total_written += written;
265
if (total_written == buf_size)
266
break;
267
remaining -= written;
268
position += written;
269
}
270
271
return (total_written);
272
}
273
274
/*
275
* Read data from file into buffer
276
*/
277
static ssize_t
278
read_buf(int fd, char *buf, size_t buf_size)
279
{
280
int ret;
281
for (;;) {
282
ret = read(fd, buf, buf_size);
283
if (ret < 0 && errno == EINTR)
284
continue;
285
break;
286
}
287
if (ret < 0)
288
log_errno("read");
289
return (ret);
290
}
291
292
/*
293
* Write idmap of the given type in the buffer to the
294
* process' uid_map or gid_map proc file.
295
*
296
* Return:
297
* 0 if success
298
* errno if there's any error
299
*/
300
static int
301
write_idmap(pid_t pid, char *buf, size_t buf_size, idmap_type_t type)
302
{
303
char path[PATH_MAX];
304
int fd;
305
int ret;
306
307
(void) snprintf(path, sizeof (path), "/proc/%d/%cid_map",
308
pid, type == TYPE_UID ? 'u' : 'g');
309
fd = open(path, O_WRONLY | O_CLOEXEC);
310
if (fd < 0) {
311
ret = errno;
312
log_errno("open(%s)", path);
313
goto out;
314
}
315
ret = write_buf(fd, buf, buf_size);
316
if (ret < 0)
317
ret = errno;
318
else
319
ret = 0;
320
out:
321
if (fd >= 0)
322
close(fd);
323
return (ret);
324
}
325
326
/*
327
* Write idmap info in the list to the process
328
* user namespace, i.e. its /proc/<pid>/uid_map
329
* and /proc/<pid>/gid_map file.
330
*
331
* Return:
332
* 0 if success
333
* errno if it fails
334
*/
335
static int
336
write_pid_idmaps(pid_t pid, list_t *head)
337
{
338
char *buf_uids, *buf_gids;
339
char *curr_bufu, *curr_bufg;
340
/* max 4k to be allowed for each map */
341
int size_buf_uids = 4096, size_buf_gids = 4096;
342
struct idmap_entry *entry;
343
int uid_filled, gid_filled;
344
int ret = 0;
345
int has_uids = 0, has_gids = 0;
346
size_t buf_size;
347
348
buf_uids = malloc(size_buf_uids);
349
if (!buf_uids)
350
return (ENOMEM);
351
buf_gids = malloc(size_buf_gids);
352
if (!buf_gids) {
353
free(buf_uids);
354
return (ENOMEM);
355
}
356
curr_bufu = buf_uids;
357
curr_bufg = buf_gids;
358
359
for (entry = list_head(head); entry; entry = list_next(head, entry)) {
360
if (entry->type == TYPE_UID || entry->type == TYPE_BOTH) {
361
uid_filled = snprintf(curr_bufu, size_buf_uids,
362
"%u %u %u\n", entry->first, entry->lower_first,
363
entry->count);
364
if (uid_filled <= 0 || uid_filled >= size_buf_uids) {
365
ret = E2BIG;
366
goto out;
367
}
368
curr_bufu += uid_filled;
369
size_buf_uids -= uid_filled;
370
has_uids = 1;
371
}
372
if (entry->type == TYPE_GID || entry->type == TYPE_BOTH) {
373
gid_filled = snprintf(curr_bufg, size_buf_gids,
374
"%u %u %u\n", entry->first, entry->lower_first,
375
entry->count);
376
if (gid_filled <= 0 || gid_filled >= size_buf_gids) {
377
ret = E2BIG;
378
goto out;
379
}
380
curr_bufg += gid_filled;
381
size_buf_gids -= gid_filled;
382
has_gids = 1;
383
}
384
}
385
if (has_uids) {
386
buf_size = curr_bufu - buf_uids;
387
ret = write_idmap(pid, buf_uids, buf_size, TYPE_UID);
388
if (ret)
389
goto out;
390
}
391
if (has_gids) {
392
buf_size = curr_bufg - buf_gids;
393
ret = write_idmap(pid, buf_gids, buf_size, TYPE_GID);
394
}
395
396
out:
397
free(buf_uids);
398
free(buf_gids);
399
return (ret);
400
}
401
402
/*
403
* Wait for the child process to exit
404
* and reap it.
405
*
406
* Return:
407
* process exit code if available
408
*/
409
static int
410
wait_for_pid(pid_t pid)
411
{
412
int status;
413
int ret;
414
415
for (;;) {
416
ret = waitpid(pid, &status, 0);
417
if (ret < 0) {
418
if (errno == EINTR)
419
continue;
420
return (EXIT_FAILURE);
421
}
422
break;
423
}
424
if (!WIFEXITED(status))
425
return (EXIT_FAILURE);
426
return (WEXITSTATUS(status));
427
}
428
429
/*
430
* Get the file descriptor of the process user namespace
431
* given its pid.
432
*
433
* Return:
434
* fd if success
435
* -1 if it fails
436
*/
437
static int
438
userns_fd_from_pid(pid_t pid)
439
{
440
int fd;
441
char path[PATH_MAX];
442
443
(void) snprintf(path, sizeof (path), "/proc/%d/ns/user", pid);
444
fd = open(path, O_RDONLY | O_CLOEXEC);
445
if (fd < 0)
446
log_errno("open(%s)", path);
447
return (fd);
448
}
449
450
/*
451
* Get the user namespace file descriptor given a list
452
* of idmap info.
453
*
454
* Return:
455
* fd if success
456
* -errno if it fails
457
*/
458
static int
459
userns_fd_from_idmap(list_t *head)
460
{
461
pid_t pid;
462
int ret, fd;
463
int fds[2];
464
char c;
465
int saved_errno = 0;
466
467
/* socketpair for bidirectional communication */
468
ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, fds);
469
if (ret) {
470
log_errno("socketpair");
471
return (-errno);
472
}
473
474
pid = fork();
475
if (pid < 0) {
476
log_errno("fork");
477
fd = -errno;
478
goto out;
479
}
480
481
if (pid == 0) {
482
/* child process */
483
ret = unshare(CLONE_NEWUSER);
484
if (ret == 0) {
485
/* notify the parent of success */
486
ret = write_buf(fds[1], "1", 1);
487
if (ret < 0)
488
saved_errno = errno;
489
else {
490
/*
491
* Until the parent has written to idmap,
492
* we cannot exit, otherwise the defunct
493
* process is owned by the real root, writing
494
* to its idmap ends up with EPERM in the
495
* context of a user ns
496
*/
497
ret = read_buf(fds[1], &c, 1);
498
if (ret < 0)
499
saved_errno = errno;
500
}
501
} else {
502
saved_errno = errno;
503
log_errno("unshare");
504
ret = write_buf(fds[1], "0", 1);
505
if (ret < 0)
506
saved_errno = errno;
507
}
508
exit(saved_errno);
509
}
510
511
/* parent process */
512
ret = read_buf(fds[0], &c, 1);
513
if (ret == 1 && c == '1') {
514
ret = write_pid_idmaps(pid, head);
515
if (!ret) {
516
fd = userns_fd_from_pid(pid);
517
if (fd < 0)
518
fd = -errno;
519
} else {
520
fd = -ret;
521
}
522
/* Let child know it can exit */
523
(void) write_buf(fds[0], "1", 1);
524
} else {
525
fd = -EBADF;
526
}
527
(void) wait_for_pid(pid);
528
out:
529
close(fds[0]);
530
close(fds[1]);
531
return (fd);
532
}
533
534
/*
535
* Check if the operating system supports idmapped mount on the
536
* given path or not.
537
*
538
* Return:
539
* true if supported
540
* false if not supported
541
*/
542
static bool
543
is_idmap_supported(char *path)
544
{
545
list_t head;
546
int ret;
547
int tree_fd = -EBADF, path_fd = -EBADF;
548
mount_attr_t attr = {
549
.attr_set = MOUNT_ATTR_IDMAP,
550
.userns_fd = -EBADF,
551
};
552
553
/* strtok_r() won't be happy with a const string */
554
/* To check if idmapped mount can be done in a user ns, map 0 to 0 */
555
char *input = strdup("b:0:0:1");
556
557
if (!input) {
558
errno = ENOMEM;
559
log_errno("strdup");
560
return (false);
561
}
562
563
list_create(&head, sizeof (struct idmap_entry),
564
offsetof(struct idmap_entry, node));
565
ret = parse_idmap_entry(&head, input);
566
if (ret) {
567
errno = ret;
568
log_errno("parse_idmap_entry(%s)", input);
569
goto out1;
570
}
571
ret = userns_fd_from_idmap(&head);
572
if (ret < 0)
573
goto out1;
574
attr.userns_fd = ret;
575
ret = openat(-EBADF, path, O_DIRECTORY | O_CLOEXEC);
576
if (ret < 0) {
577
log_errno("openat(%s)", path);
578
goto out;
579
}
580
path_fd = ret;
581
ret = sys_open_tree(path_fd, "", AT_EMPTY_PATH | AT_NO_AUTOMOUNT |
582
AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
583
if (ret < 0) {
584
log_errno("sys_open_tree");
585
goto out;
586
}
587
tree_fd = ret;
588
ret = sys_mount_setattr(tree_fd, "", AT_EMPTY_PATH, &attr,
589
sizeof (attr));
590
if (ret < 0) {
591
log_errno("sys_mount_setattr");
592
}
593
out:
594
close(attr.userns_fd);
595
out1:
596
free_idmap(&head);
597
list_destroy(&head);
598
if (tree_fd >= 0)
599
close(tree_fd);
600
if (path_fd >= 0)
601
close(path_fd);
602
free(input);
603
return (ret == 0);
604
}
605
606
/*
607
* Check if the given path is a mount point or not.
608
*
609
* Return:
610
* true if it is
611
* false otherwise
612
*/
613
static bool
614
is_mountpoint(char *path)
615
{
616
char *parent;
617
struct stat st_me, st_parent;
618
bool ret;
619
620
parent = malloc(strlen(path)+4);
621
if (!parent) {
622
errno = ENOMEM;
623
log_errno("malloc");
624
return (false);
625
}
626
strcat(strcpy(parent, path), "/..");
627
if (lstat(path, &st_me) != 0 ||
628
lstat(parent, &st_parent) != 0)
629
ret = false;
630
else
631
if (st_me.st_dev != st_parent.st_dev ||
632
st_me.st_ino == st_parent.st_ino)
633
ret = true;
634
else
635
ret = false;
636
free(parent);
637
return (ret);
638
}
639
640
/*
641
* Remount the source on the new target folder with the given
642
* list of idmap info. If target is NULL, the source will be
643
* unmounted and then remounted if it is a mountpoint, otherwise
644
* no unmount is done, the source is simply idmap remounted.
645
*
646
* Return:
647
* 0 if success
648
* -errno otherwise
649
*/
650
static int
651
do_idmap_mount(list_t *idmap, char *source, char *target, int flags)
652
{
653
int ret;
654
int tree_fd = -EBADF, source_fd = -EBADF;
655
mount_attr_t attr = {
656
.attr_set = MOUNT_ATTR_IDMAP,
657
.userns_fd = -EBADF,
658
};
659
660
ret = userns_fd_from_idmap(idmap);
661
if (ret < 0)
662
goto out1;
663
attr.userns_fd = ret;
664
ret = openat(-EBADF, source, O_DIRECTORY | O_CLOEXEC);
665
if (ret < 0) {
666
ret = -errno;
667
log_errno("openat(%s)", source);
668
goto out;
669
}
670
source_fd = ret;
671
ret = sys_open_tree(source_fd, "", AT_EMPTY_PATH | AT_NO_AUTOMOUNT |
672
AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE | flags);
673
if (ret < 0) {
674
ret = -errno;
675
log_errno("sys_open_tree");
676
goto out;
677
}
678
tree_fd = ret;
679
ret = sys_mount_setattr(tree_fd, "", AT_EMPTY_PATH | flags, &attr,
680
sizeof (attr));
681
if (ret < 0) {
682
ret = -errno;
683
log_errno("sys_mount_setattr");
684
goto out;
685
}
686
if (target == NULL && is_mountpoint(source)) {
687
ret = umount2(source, MNT_DETACH);
688
if (ret < 0) {
689
ret = -errno;
690
log_errno("umount2(%s)", source);
691
goto out;
692
}
693
}
694
ret = sys_move_mount(tree_fd, "", -EBADF, target == NULL ?
695
source : target, MOVE_MOUNT_F_EMPTY_PATH);
696
if (ret < 0) {
697
ret = -errno;
698
log_errno("sys_move_mount(%s)", target == NULL ?
699
source : target);
700
}
701
out:
702
close(attr.userns_fd);
703
out1:
704
if (tree_fd >= 0)
705
close(tree_fd);
706
if (source_fd >= 0)
707
close(source_fd);
708
return (ret);
709
}
710
711
static void
712
print_usage(char *argv[])
713
{
714
fprintf(stderr, "Usage: %s [-r] [-c] [-m <idmap1>] [-m <idmap2>]" \
715
" ... [<source>] [<target>]\n", argv[0]);
716
fprintf(stderr, "\n");
717
fprintf(stderr, " -r Recursively do idmapped mount.\n");
718
fprintf(stderr, "\n");
719
fprintf(stderr, " -c Checks if idmapped mount is supported " \
720
"on the <source> by the operating system or not.\n");
721
fprintf(stderr, "\n");
722
fprintf(stderr, " -m <idmap> to specify the idmap info, " \
723
"in the following format:\n");
724
fprintf(stderr, " <id_type>:<nsid_first>:<hostid_first>:<count>\n");
725
fprintf(stderr, "\n");
726
fprintf(stderr, " <id_type> can be either of 'b', 'u', and 'g'.\n");
727
fprintf(stderr, "\n");
728
fprintf(stderr, "The <source> folder will be mounted at <target> " \
729
"with the provided idmap information.\nIf no <target> is " \
730
"specified, and <source> is a mount point, " \
731
"then <source> will be unmounted and then remounted.\n");
732
}
733
734
int
735
main(int argc, char *argv[])
736
{
737
int opt;
738
list_t idmap_head;
739
int check_supported = 0;
740
int ret = EXIT_SUCCESS;
741
char *source = NULL, *target = NULL;
742
int flags = 0;
743
744
list_create(&idmap_head, sizeof (struct idmap_entry),
745
offsetof(struct idmap_entry, node));
746
747
while ((opt = getopt(argc, argv, "rcm:")) != -1) {
748
switch (opt) {
749
case 'r':
750
flags |= AT_RECURSIVE;
751
break;
752
case 'c':
753
check_supported = 1;
754
break;
755
case 'm':
756
ret = parse_idmap_entry(&idmap_head, optarg);
757
if (ret) {
758
errno = ret;
759
log_errno("parse_idmap_entry(%s)", optarg);
760
ret = EXIT_FAILURE;
761
goto out;
762
}
763
break;
764
default:
765
print_usage(argv);
766
exit(EXIT_FAILURE);
767
}
768
}
769
770
if (check_supported == 0 && list_is_empty(&idmap_head)) {
771
print_usage(argv);
772
ret = EXIT_FAILURE;
773
goto out;
774
}
775
776
if (optind >= argc) {
777
fprintf(stderr, "Expected to have <source>, <target>.\n");
778
print_usage(argv);
779
ret = EXIT_FAILURE;
780
goto out;
781
}
782
783
source = argv[optind];
784
if (optind < (argc - 1)) {
785
target = argv[optind + 1];
786
}
787
788
if (check_supported) {
789
free_idmap(&idmap_head);
790
list_destroy(&idmap_head);
791
if (is_idmap_supported(source)) {
792
printf("idmapped mount is supported on [%s].\n",
793
source);
794
return (EXIT_SUCCESS);
795
} else {
796
printf("idmapped mount is NOT supported.\n");
797
return (EXIT_FAILURE);
798
}
799
}
800
801
ret = do_idmap_mount(&idmap_head, source, target, flags);
802
if (ret)
803
ret = EXIT_FAILURE;
804
out:
805
free_idmap(&idmap_head);
806
list_destroy(&idmap_head);
807
808
exit(ret);
809
}
810
811