/* Authors: Gregory P. Smith & Jeffrey Yasskin */1#ifndef Py_BUILD_CORE_BUILTIN2# define Py_BUILD_CORE_MODULE 13#endif45#include "Python.h"6#include "pycore_fileutils.h"7#include "pycore_pystate.h"8#if defined(HAVE_PIPE2) && !defined(_GNU_SOURCE)9# define _GNU_SOURCE10#endif11#include <unistd.h>12#include <fcntl.h>13#ifdef HAVE_SYS_TYPES_H14#include <sys/types.h>15#endif16#if defined(HAVE_SYS_STAT_H)17#include <sys/stat.h>18#endif19#ifdef HAVE_SYS_SYSCALL_H20#include <sys/syscall.h>21#endif22#if defined(HAVE_SYS_RESOURCE_H)23#include <sys/resource.h>24#endif25#ifdef HAVE_DIRENT_H26#include <dirent.h>27#endif28#ifdef HAVE_GRP_H29#include <grp.h>30#endif /* HAVE_GRP_H */3132#include "posixmodule.h"3334#ifdef _Py_MEMORY_SANITIZER35# include <sanitizer/msan_interface.h>36#endif3738#if defined(__ANDROID__) && __ANDROID_API__ < 21 && !defined(SYS_getdents64)39# include <sys/linux-syscalls.h>40# define SYS_getdents64 __NR_getdents6441#endif4243#if defined(__linux__) && defined(HAVE_VFORK) && defined(HAVE_SIGNAL_H) && \44defined(HAVE_PTHREAD_SIGMASK) && !defined(HAVE_BROKEN_PTHREAD_SIGMASK)45/* If this is ever expanded to non-Linux platforms, verify what calls are46* allowed after vfork(). Ex: setsid() may be disallowed on macOS? */47# include <signal.h>48# define VFORK_USABLE 149#endif5051#if defined(__sun) && defined(__SVR4)52/* readdir64 is used to work around Solaris 9 bug 6395699. */53# define readdir readdir6454# define dirent dirent6455# if !defined(HAVE_DIRFD)56/* Some versions of Solaris lack dirfd(). */57# define dirfd(dirp) ((dirp)->dd_fd)58# define HAVE_DIRFD59# endif60#endif6162#if defined(__FreeBSD__) || (defined(__APPLE__) && defined(__MACH__)) || defined(__DragonFly__)63# define FD_DIR "/dev/fd"64#else65# define FD_DIR "/proc/self/fd"66#endif6768#ifdef NGROUPS_MAX69#define MAX_GROUPS NGROUPS_MAX70#else71#define MAX_GROUPS 6472#endif7374#define POSIX_CALL(call) do { if ((call) == -1) goto error; } while (0)7576static struct PyModuleDef _posixsubprocessmodule;7778/*[clinic input]79module _posixsubprocess80[clinic start generated code]*/81/*[clinic end generated code: output=da39a3ee5e6b4b0d input=c62211df27cf7334]*/8283/*[python input]84class pid_t_converter(CConverter):85type = 'pid_t'86format_unit = '" _Py_PARSE_PID "'8788def parse_arg(self, argname, displayname):89return """90{paramname} = PyLong_AsPid({argname});91if ({paramname} == -1 && PyErr_Occurred()) {{{{92goto exit;93}}}}94""".format(argname=argname, paramname=self.parser_name)95[python start generated code]*/96/*[python end generated code: output=da39a3ee5e6b4b0d input=5af1c116d56cbb5a]*/9798#include "clinic/_posixsubprocess.c.h"99100/* Convert ASCII to a positive int, no libc call. no overflow. -1 on error. */101static int102_pos_int_from_ascii(const char *name)103{104int num = 0;105while (*name >= '0' && *name <= '9') {106num = num * 10 + (*name - '0');107++name;108}109if (*name)110return -1; /* Non digit found, not a number. */111return num;112}113114115#if defined(__FreeBSD__) || defined(__DragonFly__)116/* When /dev/fd isn't mounted it is often a static directory populated117* with 0 1 2 or entries for 0 .. 63 on FreeBSD, NetBSD, OpenBSD and DragonFlyBSD.118* NetBSD and OpenBSD have a /proc fs available (though not necessarily119* mounted) and do not have fdescfs for /dev/fd. MacOS X has a devfs120* that properly supports /dev/fd.121*/122static int123_is_fdescfs_mounted_on_dev_fd(void)124{125struct stat dev_stat;126struct stat dev_fd_stat;127if (stat("/dev", &dev_stat) != 0)128return 0;129if (stat(FD_DIR, &dev_fd_stat) != 0)130return 0;131if (dev_stat.st_dev == dev_fd_stat.st_dev)132return 0; /* / == /dev == /dev/fd means it is static. #fail */133return 1;134}135#endif136137138/* Returns 1 if there is a problem with fd_sequence, 0 otherwise. */139static int140_sanity_check_python_fd_sequence(PyObject *fd_sequence)141{142Py_ssize_t seq_idx;143long prev_fd = -1;144for (seq_idx = 0; seq_idx < PyTuple_GET_SIZE(fd_sequence); ++seq_idx) {145PyObject* py_fd = PyTuple_GET_ITEM(fd_sequence, seq_idx);146long iter_fd;147if (!PyLong_Check(py_fd)) {148return 1;149}150iter_fd = PyLong_AsLong(py_fd);151if (iter_fd < 0 || iter_fd <= prev_fd || iter_fd > INT_MAX) {152/* Negative, overflow, unsorted, too big for a fd. */153return 1;154}155prev_fd = iter_fd;156}157return 0;158}159160161/* Is fd found in the sorted Python Sequence? */162static int163_is_fd_in_sorted_fd_sequence(int fd, int *fd_sequence,164Py_ssize_t fd_sequence_len)165{166/* Binary search. */167Py_ssize_t search_min = 0;168Py_ssize_t search_max = fd_sequence_len - 1;169if (search_max < 0)170return 0;171do {172long middle = (search_min + search_max) / 2;173long middle_fd = fd_sequence[middle];174if (fd == middle_fd)175return 1;176if (fd > middle_fd)177search_min = middle + 1;178else179search_max = middle - 1;180} while (search_min <= search_max);181return 0;182}183184185// Forward declaration186static void _Py_FreeCharPArray(char *const array[]);187188/*189* Flatten a sequence of bytes() objects into a C array of190* NULL terminated string pointers with a NULL char* terminating the array.191* (ie: an argv or env list)192*193* Memory allocated for the returned list is allocated using PyMem_Malloc()194* and MUST be freed by _Py_FreeCharPArray().195*/196static char *const *197_PySequence_BytesToCharpArray(PyObject* self)198{199char **array;200Py_ssize_t i, argc;201PyObject *item = NULL;202Py_ssize_t size;203204argc = PySequence_Size(self);205if (argc == -1)206return NULL;207208assert(argc >= 0);209210if ((size_t)argc > (PY_SSIZE_T_MAX-sizeof(char *)) / sizeof(char *)) {211PyErr_NoMemory();212return NULL;213}214215array = PyMem_Malloc((argc + 1) * sizeof(char *));216if (array == NULL) {217PyErr_NoMemory();218return NULL;219}220for (i = 0; i < argc; ++i) {221char *data;222item = PySequence_GetItem(self, i);223if (item == NULL) {224/* NULL terminate before freeing. */225array[i] = NULL;226goto fail;227}228/* check for embedded null bytes */229if (PyBytes_AsStringAndSize(item, &data, NULL) < 0) {230/* NULL terminate before freeing. */231array[i] = NULL;232goto fail;233}234size = PyBytes_GET_SIZE(item) + 1;235array[i] = PyMem_Malloc(size);236if (!array[i]) {237PyErr_NoMemory();238goto fail;239}240memcpy(array[i], data, size);241Py_DECREF(item);242}243array[argc] = NULL;244245return array;246247fail:248Py_XDECREF(item);249_Py_FreeCharPArray(array);250return NULL;251}252253254/* Free's a NULL terminated char** array of C strings. */255static void256_Py_FreeCharPArray(char *const array[])257{258Py_ssize_t i;259for (i = 0; array[i] != NULL; ++i) {260PyMem_Free(array[i]);261}262PyMem_Free((void*)array);263}264265266/*267* Do all the Python C API calls in the parent process to turn the pass_fds268* "py_fds_to_keep" tuple into a C array. The caller owns allocation and269* freeing of the array.270*271* On error an unknown number of array elements may have been filled in.272* A Python exception has been set when an error is returned.273*274* Returns: -1 on error, 0 on success.275*/276static int277convert_fds_to_keep_to_c(PyObject *py_fds_to_keep, int *c_fds_to_keep)278{279Py_ssize_t i, len;280281len = PyTuple_GET_SIZE(py_fds_to_keep);282for (i = 0; i < len; ++i) {283PyObject* fdobj = PyTuple_GET_ITEM(py_fds_to_keep, i);284long fd = PyLong_AsLong(fdobj);285if (fd == -1 && PyErr_Occurred()) {286return -1;287}288if (fd < 0 || fd > INT_MAX) {289PyErr_SetString(PyExc_ValueError,290"fd out of range in fds_to_keep.");291return -1;292}293c_fds_to_keep[i] = (int)fd;294}295return 0;296}297298299/* This function must be async-signal-safe as it is called from child_exec()300* after fork() or vfork().301*/302static int303make_inheritable(int *c_fds_to_keep, Py_ssize_t len, int errpipe_write)304{305Py_ssize_t i;306307for (i = 0; i < len; ++i) {308int fd = c_fds_to_keep[i];309if (fd == errpipe_write) {310/* errpipe_write is part of fds_to_keep. It must be closed at311exec(), but kept open in the child process until exec() is312called. */313continue;314}315if (_Py_set_inheritable_async_safe(fd, 1, NULL) < 0)316return -1;317}318return 0;319}320321322/* Get the maximum file descriptor that could be opened by this process.323* This function is async signal safe for use between fork() and exec().324*/325static long326safe_get_max_fd(void)327{328long local_max_fd;329#if defined(__NetBSD__)330local_max_fd = fcntl(0, F_MAXFD);331if (local_max_fd >= 0)332return local_max_fd;333#endif334#if defined(HAVE_SYS_RESOURCE_H) && defined(__OpenBSD__)335struct rlimit rl;336/* Not on the POSIX async signal safe functions list but likely337* safe. TODO - Someone should audit OpenBSD to make sure. */338if (getrlimit(RLIMIT_NOFILE, &rl) >= 0)339return (long) rl.rlim_max;340#endif341#ifdef _SC_OPEN_MAX342local_max_fd = sysconf(_SC_OPEN_MAX);343if (local_max_fd == -1)344#endif345local_max_fd = 256; /* Matches legacy Lib/subprocess.py behavior. */346return local_max_fd;347}348349350/* Close all file descriptors in the given range except for those in351* fds_to_keep by invoking closer on each subrange.352*353* If end_fd == -1, it's guessed via safe_get_max_fd(), but it isn't354* possible to know for sure what the max fd to go up to is for355* processes with the capability of raising their maximum, or in case356* a process opened a high fd and then lowered its maximum.357*/358static int359_close_range_except(int start_fd,360int end_fd,361int *fds_to_keep,362Py_ssize_t fds_to_keep_len,363int (*closer)(int, int))364{365if (end_fd == -1) {366end_fd = Py_MIN(safe_get_max_fd(), INT_MAX);367}368Py_ssize_t keep_seq_idx;369/* As fds_to_keep is sorted we can loop through the list closing370* fds in between any in the keep list falling within our range. */371for (keep_seq_idx = 0; keep_seq_idx < fds_to_keep_len; ++keep_seq_idx) {372int keep_fd = fds_to_keep[keep_seq_idx];373if (keep_fd < start_fd)374continue;375if (closer(start_fd, keep_fd - 1) != 0)376return -1;377start_fd = keep_fd + 1;378}379if (start_fd <= end_fd) {380if (closer(start_fd, end_fd) != 0)381return -1;382}383return 0;384}385386#if defined(__linux__) && defined(HAVE_SYS_SYSCALL_H)387/* It doesn't matter if d_name has room for NAME_MAX chars; we're using this388* only to read a directory of short file descriptor number names. The kernel389* will return an error if we didn't give it enough space. Highly Unlikely.390* This structure is very old and stable: It will not change unless the kernel391* chooses to break compatibility with all existing binaries. Highly Unlikely.392*/393struct linux_dirent64 {394unsigned long long d_ino;395long long d_off;396unsigned short d_reclen; /* Length of this linux_dirent */397unsigned char d_type;398char d_name[256]; /* Filename (null-terminated) */399};400401static int402_brute_force_closer(int first, int last)403{404for (int i = first; i <= last; i++) {405/* Ignore errors */406(void)close(i);407}408return 0;409}410411/* Close all open file descriptors in the range from start_fd and higher412* Do not close any in the sorted fds_to_keep list.413*414* This version is async signal safe as it does not make any unsafe C library415* calls, malloc calls or handle any locks. It is _unfortunate_ to be forced416* to resort to making a kernel system call directly but this is the ONLY api417* available that does no harm. opendir/readdir/closedir perform memory418* allocation and locking so while they usually work they are not guaranteed419* to (especially if you have replaced your malloc implementation). A version420* of this function that uses those can be found in the _maybe_unsafe variant.421*422* This is Linux specific because that is all I am ready to test it on. It423* should be easy to add OS specific dirent or dirent64 structures and modify424* it with some cpp #define magic to work on other OSes as well if you want.425*/426static void427_close_open_fds_safe(int start_fd, int *fds_to_keep, Py_ssize_t fds_to_keep_len)428{429int fd_dir_fd;430431fd_dir_fd = _Py_open_noraise(FD_DIR, O_RDONLY);432if (fd_dir_fd == -1) {433/* No way to get a list of open fds. */434_close_range_except(start_fd, -1,435fds_to_keep, fds_to_keep_len,436_brute_force_closer);437return;438} else {439char buffer[sizeof(struct linux_dirent64)];440int bytes;441while ((bytes = syscall(SYS_getdents64, fd_dir_fd,442(struct linux_dirent64 *)buffer,443sizeof(buffer))) > 0) {444struct linux_dirent64 *entry;445int offset;446#ifdef _Py_MEMORY_SANITIZER447__msan_unpoison(buffer, bytes);448#endif449for (offset = 0; offset < bytes; offset += entry->d_reclen) {450int fd;451entry = (struct linux_dirent64 *)(buffer + offset);452if ((fd = _pos_int_from_ascii(entry->d_name)) < 0)453continue; /* Not a number. */454if (fd != fd_dir_fd && fd >= start_fd &&455!_is_fd_in_sorted_fd_sequence(fd, fds_to_keep,456fds_to_keep_len)) {457close(fd);458}459}460}461close(fd_dir_fd);462}463}464465#define _close_open_fds_fallback _close_open_fds_safe466467#else /* NOT (defined(__linux__) && defined(HAVE_SYS_SYSCALL_H)) */468469static int470_unsafe_closer(int first, int last)471{472_Py_closerange(first, last);473return 0;474}475476/* Close all open file descriptors from start_fd and higher.477* Do not close any in the sorted fds_to_keep tuple.478*479* This function violates the strict use of async signal safe functions. :(480* It calls opendir(), readdir() and closedir(). Of these, the one most481* likely to ever cause a problem is opendir() as it performs an internal482* malloc(). Practically this should not be a problem. The Java VM makes the483* same calls between fork and exec in its own UNIXProcess_md.c implementation.484*485* readdir_r() is not used because it provides no benefit. It is typically486* implemented as readdir() followed by memcpy(). See also:487* http://womble.decadent.org.uk/readdir_r-advisory.html488*/489static void490_close_open_fds_maybe_unsafe(int start_fd, int *fds_to_keep,491Py_ssize_t fds_to_keep_len)492{493DIR *proc_fd_dir;494#ifndef HAVE_DIRFD495while (_is_fd_in_sorted_fd_sequence(start_fd, fds_to_keep,496fds_to_keep_len)) {497++start_fd;498}499/* Close our lowest fd before we call opendir so that it is likely to500* reuse that fd otherwise we might close opendir's file descriptor in501* our loop. This trick assumes that fd's are allocated on a lowest502* available basis. */503close(start_fd);504++start_fd;505#endif506507#if defined(__FreeBSD__) || defined(__DragonFly__)508if (!_is_fdescfs_mounted_on_dev_fd())509proc_fd_dir = NULL;510else511#endif512proc_fd_dir = opendir(FD_DIR);513if (!proc_fd_dir) {514/* No way to get a list of open fds. */515_close_range_except(start_fd, -1, fds_to_keep, fds_to_keep_len,516_unsafe_closer);517} else {518struct dirent *dir_entry;519#ifdef HAVE_DIRFD520int fd_used_by_opendir = dirfd(proc_fd_dir);521#else522int fd_used_by_opendir = start_fd - 1;523#endif524errno = 0;525while ((dir_entry = readdir(proc_fd_dir))) {526int fd;527if ((fd = _pos_int_from_ascii(dir_entry->d_name)) < 0)528continue; /* Not a number. */529if (fd != fd_used_by_opendir && fd >= start_fd &&530!_is_fd_in_sorted_fd_sequence(fd, fds_to_keep,531fds_to_keep_len)) {532close(fd);533}534errno = 0;535}536if (errno) {537/* readdir error, revert behavior. Highly Unlikely. */538_close_range_except(start_fd, -1, fds_to_keep, fds_to_keep_len,539_unsafe_closer);540}541closedir(proc_fd_dir);542}543}544545#define _close_open_fds_fallback _close_open_fds_maybe_unsafe546547#endif /* else NOT (defined(__linux__) && defined(HAVE_SYS_SYSCALL_H)) */548549/* We can use close_range() library function only if it's known to be550* async-signal-safe.551*552* On Linux, glibc explicitly documents it to be a thin wrapper over553* the system call, and other C libraries are likely to follow glibc.554*/555#if defined(HAVE_CLOSE_RANGE) && \556(defined(__linux__) || defined(__FreeBSD__))557#define HAVE_ASYNC_SAFE_CLOSE_RANGE558559static int560_close_range_closer(int first, int last)561{562return close_range(first, last, 0);563}564#endif565566static void567_close_open_fds(int start_fd, int *fds_to_keep, Py_ssize_t fds_to_keep_len)568{569#ifdef HAVE_ASYNC_SAFE_CLOSE_RANGE570if (_close_range_except(571start_fd, INT_MAX, fds_to_keep, fds_to_keep_len,572_close_range_closer) == 0) {573return;574}575#endif576_close_open_fds_fallback(start_fd, fds_to_keep, fds_to_keep_len);577}578579#ifdef VFORK_USABLE580/* Reset dispositions for all signals to SIG_DFL except for ignored581* signals. This way we ensure that no signal handlers can run582* after we unblock signals in a child created by vfork().583*/584static void585reset_signal_handlers(const sigset_t *child_sigmask)586{587struct sigaction sa_dfl = {.sa_handler = SIG_DFL};588for (int sig = 1; sig < _NSIG; sig++) {589/* Dispositions for SIGKILL and SIGSTOP can't be changed. */590if (sig == SIGKILL || sig == SIGSTOP) {591continue;592}593594/* There is no need to reset the disposition of signals that will595* remain blocked across execve() since the kernel will do it. */596if (sigismember(child_sigmask, sig) == 1) {597continue;598}599600struct sigaction sa;601/* C libraries usually return EINVAL for signals used602* internally (e.g. for thread cancellation), so simply603* skip errors here. */604if (sigaction(sig, NULL, &sa) == -1) {605continue;606}607608/* void *h works as these fields are both pointer types already. */609void *h = (sa.sa_flags & SA_SIGINFO ? (void *)sa.sa_sigaction :610(void *)sa.sa_handler);611if (h == SIG_IGN || h == SIG_DFL) {612continue;613}614615/* This call can't reasonably fail, but if it does, terminating616* the child seems to be too harsh, so ignore errors. */617(void) sigaction(sig, &sa_dfl, NULL);618}619}620#endif /* VFORK_USABLE */621622623/*624* This function is code executed in the child process immediately after625* (v)fork to set things up and call exec().626*627* All of the code in this function must only use async-signal-safe functions,628* listed at `man 7 signal` or629* http://www.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html.630*631* This restriction is documented at632* http://www.opengroup.org/onlinepubs/009695399/functions/fork.html.633*634* If this function is called after vfork(), even more care must be taken.635* The lack of preparations that C libraries normally take on fork(),636* as well as sharing the address space with the parent, might make even637* async-signal-safe functions vfork-unsafe. In particular, on Linux,638* set*id() and setgroups() library functions must not be called, since639* they have to interact with the library-level thread list and send640* library-internal signals to implement per-process credentials semantics641* required by POSIX but not supported natively on Linux. Another reason to642* avoid this family of functions is that sharing an address space between643* processes running with different privileges is inherently insecure.644* See https://bugs.python.org/issue35823 for discussion and references.645*646* In some C libraries, setrlimit() has the same thread list/signalling647* behavior since resource limits were per-thread attributes before648* Linux 2.6.10. Musl, as of 1.2.1, is known to have this issue649* (https://www.openwall.com/lists/musl/2020/10/15/6).650*651* If vfork-unsafe functionality is desired after vfork(), consider using652* syscall() to obtain it.653*/654Py_NO_INLINE static void655child_exec(char *const exec_array[],656char *const argv[],657char *const envp[],658const char *cwd,659int p2cread, int p2cwrite,660int c2pread, int c2pwrite,661int errread, int errwrite,662int errpipe_read, int errpipe_write,663int close_fds, int restore_signals,664int call_setsid, pid_t pgid_to_set,665gid_t gid,666Py_ssize_t extra_group_size, const gid_t *extra_groups,667uid_t uid, int child_umask,668const void *child_sigmask,669int *fds_to_keep, Py_ssize_t fds_to_keep_len,670PyObject *preexec_fn,671PyObject *preexec_fn_args_tuple)672{673int i, saved_errno, reached_preexec = 0;674PyObject *result;675const char* err_msg = "";676/* Buffer large enough to hold a hex integer. We can't malloc. */677char hex_errno[sizeof(saved_errno)*2+1];678679if (make_inheritable(fds_to_keep, fds_to_keep_len, errpipe_write) < 0)680goto error;681682/* Close parent's pipe ends. */683if (p2cwrite != -1)684POSIX_CALL(close(p2cwrite));685if (c2pread != -1)686POSIX_CALL(close(c2pread));687if (errread != -1)688POSIX_CALL(close(errread));689POSIX_CALL(close(errpipe_read));690691/* When duping fds, if there arises a situation where one of the fds is692either 0, 1 or 2, it is possible that it is overwritten (#12607). */693if (c2pwrite == 0) {694POSIX_CALL(c2pwrite = dup(c2pwrite));695/* issue32270 */696if (_Py_set_inheritable_async_safe(c2pwrite, 0, NULL) < 0) {697goto error;698}699}700while (errwrite == 0 || errwrite == 1) {701POSIX_CALL(errwrite = dup(errwrite));702/* issue32270 */703if (_Py_set_inheritable_async_safe(errwrite, 0, NULL) < 0) {704goto error;705}706}707708/* Dup fds for child.709dup2() removes the CLOEXEC flag but we must do it ourselves if dup2()710would be a no-op (issue #10806). */711if (p2cread == 0) {712if (_Py_set_inheritable_async_safe(p2cread, 1, NULL) < 0)713goto error;714}715else if (p2cread != -1)716POSIX_CALL(dup2(p2cread, 0)); /* stdin */717718if (c2pwrite == 1) {719if (_Py_set_inheritable_async_safe(c2pwrite, 1, NULL) < 0)720goto error;721}722else if (c2pwrite != -1)723POSIX_CALL(dup2(c2pwrite, 1)); /* stdout */724725if (errwrite == 2) {726if (_Py_set_inheritable_async_safe(errwrite, 1, NULL) < 0)727goto error;728}729else if (errwrite != -1)730POSIX_CALL(dup2(errwrite, 2)); /* stderr */731732/* We no longer manually close p2cread, c2pwrite, and errwrite here as733* _close_open_fds takes care when it is not already non-inheritable. */734735if (cwd)736POSIX_CALL(chdir(cwd));737738if (child_umask >= 0)739umask(child_umask); /* umask() always succeeds. */740741if (restore_signals)742_Py_RestoreSignals();743744#ifdef VFORK_USABLE745if (child_sigmask) {746reset_signal_handlers(child_sigmask);747if ((errno = pthread_sigmask(SIG_SETMASK, child_sigmask, NULL))) {748goto error;749}750}751#endif752753#ifdef HAVE_SETSID754if (call_setsid)755POSIX_CALL(setsid());756#endif757758#ifdef HAVE_SETPGID759static_assert(_Py_IS_TYPE_SIGNED(pid_t), "pid_t is unsigned");760if (pgid_to_set >= 0) {761POSIX_CALL(setpgid(0, pgid_to_set));762}763#endif764765#ifdef HAVE_SETGROUPS766if (extra_group_size > 0)767POSIX_CALL(setgroups(extra_group_size, extra_groups));768#endif /* HAVE_SETGROUPS */769770#ifdef HAVE_SETREGID771if (gid != (gid_t)-1)772POSIX_CALL(setregid(gid, gid));773#endif /* HAVE_SETREGID */774775#ifdef HAVE_SETREUID776if (uid != (uid_t)-1)777POSIX_CALL(setreuid(uid, uid));778#endif /* HAVE_SETREUID */779780781reached_preexec = 1;782if (preexec_fn != Py_None && preexec_fn_args_tuple) {783/* This is where the user has asked us to deadlock their program. */784result = PyObject_Call(preexec_fn, preexec_fn_args_tuple, NULL);785if (result == NULL) {786/* Stringifying the exception or traceback would involve787* memory allocation and thus potential for deadlock.788* We've already faced potential deadlock by calling back789* into Python in the first place, so it probably doesn't790* matter but we avoid it to minimize the possibility. */791err_msg = "Exception occurred in preexec_fn.";792errno = 0; /* We don't want to report an OSError. */793goto error;794}795/* Py_DECREF(result); - We're about to exec so why bother? */796}797798/* close FDs after executing preexec_fn, which might open FDs */799if (close_fds) {800/* TODO HP-UX could use pstat_getproc() if anyone cares about it. */801_close_open_fds(3, fds_to_keep, fds_to_keep_len);802}803804/* This loop matches the Lib/os.py _execvpe()'s PATH search when */805/* given the executable_list generated by Lib/subprocess.py. */806saved_errno = 0;807for (i = 0; exec_array[i] != NULL; ++i) {808const char *executable = exec_array[i];809if (envp) {810execve(executable, argv, envp);811} else {812execv(executable, argv);813}814if (errno != ENOENT && errno != ENOTDIR && saved_errno == 0) {815saved_errno = errno;816}817}818/* Report the first exec error, not the last. */819if (saved_errno)820errno = saved_errno;821822error:823saved_errno = errno;824/* Report the posix error to our parent process. */825/* We ignore all write() return values as the total size of our writes is826less than PIPEBUF and we cannot do anything about an error anyways.827Use _Py_write_noraise() to retry write() if it is interrupted by a828signal (fails with EINTR). */829if (saved_errno) {830char *cur;831_Py_write_noraise(errpipe_write, "OSError:", 8);832cur = hex_errno + sizeof(hex_errno);833while (saved_errno != 0 && cur != hex_errno) {834*--cur = Py_hexdigits[saved_errno % 16];835saved_errno /= 16;836}837_Py_write_noraise(errpipe_write, cur, hex_errno + sizeof(hex_errno) - cur);838_Py_write_noraise(errpipe_write, ":", 1);839if (!reached_preexec) {840/* Indicate to the parent that the error happened before exec(). */841_Py_write_noraise(errpipe_write, "noexec", 6);842}843/* We can't call strerror(saved_errno). It is not async signal safe.844* The parent process will look the error message up. */845} else {846_Py_write_noraise(errpipe_write, "SubprocessError:0:", 18);847_Py_write_noraise(errpipe_write, err_msg, strlen(err_msg));848}849}850851852/* The main purpose of this wrapper function is to isolate vfork() from both853* subprocess_fork_exec() and child_exec(). A child process created via854* vfork() executes on the same stack as the parent process while the latter is855* suspended, so this function should not be inlined to avoid compiler bugs856* that might clobber data needed by the parent later. Additionally,857* child_exec() should not be inlined to avoid spurious -Wclobber warnings from858* GCC (see bpo-35823).859*/860Py_NO_INLINE static pid_t861do_fork_exec(char *const exec_array[],862char *const argv[],863char *const envp[],864const char *cwd,865int p2cread, int p2cwrite,866int c2pread, int c2pwrite,867int errread, int errwrite,868int errpipe_read, int errpipe_write,869int close_fds, int restore_signals,870int call_setsid, pid_t pgid_to_set,871gid_t gid,872Py_ssize_t extra_group_size, const gid_t *extra_groups,873uid_t uid, int child_umask,874const void *child_sigmask,875int *fds_to_keep, Py_ssize_t fds_to_keep_len,876PyObject *preexec_fn,877PyObject *preexec_fn_args_tuple)878{879880pid_t pid;881882#ifdef VFORK_USABLE883PyThreadState *vfork_tstate_save;884if (child_sigmask) {885/* These are checked by our caller; verify them in debug builds. */886assert(uid == (uid_t)-1);887assert(gid == (gid_t)-1);888assert(extra_group_size < 0);889assert(preexec_fn == Py_None);890891/* Drop the GIL so that other threads can continue execution while this892* thread in the parent remains blocked per vfork-semantics on the893* child's exec syscall outcome. Exec does filesystem access which894* can take an arbitrarily long time. This addresses GH-104372.895*896* The vfork'ed child still runs in our address space. Per POSIX it897* must be limited to nothing but exec, but the Linux implementation898* is a little more usable. See the child_exec() comment - The child899* MUST NOT re-acquire the GIL.900*/901vfork_tstate_save = PyEval_SaveThread();902pid = vfork();903if (pid != 0) {904// Not in the child process, reacquire the GIL.905PyEval_RestoreThread(vfork_tstate_save);906}907if (pid == (pid_t)-1) {908/* If vfork() fails, fall back to using fork(). When it isn't909* allowed in a process by the kernel, vfork can return -1910* with errno EINVAL. https://bugs.python.org/issue47151. */911pid = fork();912}913} else914#endif915{916pid = fork();917}918919if (pid != 0) {920// Parent process.921return pid;922}923924/* Child process.925* See the comment above child_exec() for restrictions imposed on926* the code below.927*/928929if (preexec_fn != Py_None) {930/* We'll be calling back into Python later so we need to do this.931* This call may not be async-signal-safe but neither is calling932* back into Python. The user asked us to use hope as a strategy933* to avoid deadlock... */934PyOS_AfterFork_Child();935}936937child_exec(exec_array, argv, envp, cwd,938p2cread, p2cwrite, c2pread, c2pwrite,939errread, errwrite, errpipe_read, errpipe_write,940close_fds, restore_signals, call_setsid, pgid_to_set,941gid, extra_group_size, extra_groups,942uid, child_umask, child_sigmask,943fds_to_keep, fds_to_keep_len,944preexec_fn, preexec_fn_args_tuple);945_exit(255);946return 0; /* Dead code to avoid a potential compiler warning. */947}948949/*[clinic input]950_posixsubprocess.fork_exec as subprocess_fork_exec951args as process_args: object952executable_list: object953close_fds: bool954pass_fds as py_fds_to_keep: object(subclass_of='&PyTuple_Type')955cwd as cwd_obj: object956env as env_list: object957p2cread: int958p2cwrite: int959c2pread: int960c2pwrite: int961errread: int962errwrite: int963errpipe_read: int964errpipe_write: int965restore_signals: bool966call_setsid: bool967pgid_to_set: pid_t968gid as gid_object: object969extra_groups as extra_groups_packed: object970uid as uid_object: object971child_umask: int972preexec_fn: object973allow_vfork: bool974/975976Spawn a fresh new child process.977978Fork a child process, close parent file descriptors as appropriate in the979child and duplicate the few that are needed before calling exec() in the980child process.981982If close_fds is True, close file descriptors 3 and higher, except those listed983in the sorted tuple pass_fds.984985The preexec_fn, if supplied, will be called immediately before closing file986descriptors and exec.987988WARNING: preexec_fn is NOT SAFE if your application uses threads.989It may trigger infrequent, difficult to debug deadlocks.990991If an error occurs in the child process before the exec, it is992serialized and written to the errpipe_write fd per subprocess.py.993994Returns: the child process's PID.995996Raises: Only on an error in the parent process.997[clinic start generated code]*/998999static PyObject *1000subprocess_fork_exec_impl(PyObject *module, PyObject *process_args,1001PyObject *executable_list, int close_fds,1002PyObject *py_fds_to_keep, PyObject *cwd_obj,1003PyObject *env_list, int p2cread, int p2cwrite,1004int c2pread, int c2pwrite, int errread,1005int errwrite, int errpipe_read, int errpipe_write,1006int restore_signals, int call_setsid,1007pid_t pgid_to_set, PyObject *gid_object,1008PyObject *extra_groups_packed,1009PyObject *uid_object, int child_umask,1010PyObject *preexec_fn, int allow_vfork)1011/*[clinic end generated code: output=7ee4f6ee5cf22b5b input=51757287ef266ffa]*/1012{1013PyObject *converted_args = NULL, *fast_args = NULL;1014PyObject *preexec_fn_args_tuple = NULL;1015gid_t *extra_groups = NULL;1016PyObject *cwd_obj2 = NULL;1017const char *cwd = NULL;1018pid_t pid = -1;1019int need_to_reenable_gc = 0;1020char *const *argv = NULL, *const *envp = NULL;1021Py_ssize_t extra_group_size = 0;1022int need_after_fork = 0;1023int saved_errno = 0;1024int *c_fds_to_keep = NULL;1025Py_ssize_t fds_to_keep_len = PyTuple_GET_SIZE(py_fds_to_keep);10261027PyInterpreterState *interp = PyInterpreterState_Get();1028if ((preexec_fn != Py_None) && interp->finalizing) {1029PyErr_SetString(PyExc_RuntimeError,1030"preexec_fn not supported at interpreter shutdown");1031return NULL;1032}1033if ((preexec_fn != Py_None) && (interp != PyInterpreterState_Main())) {1034PyErr_SetString(PyExc_RuntimeError,1035"preexec_fn not supported within subinterpreters");1036return NULL;1037}10381039if (close_fds && errpipe_write < 3) { /* precondition */1040PyErr_SetString(PyExc_ValueError, "errpipe_write must be >= 3");1041return NULL;1042}1043if (_sanity_check_python_fd_sequence(py_fds_to_keep)) {1044PyErr_SetString(PyExc_ValueError, "bad value(s) in fds_to_keep");1045return NULL;1046}10471048/* We need to call gc.disable() when we'll be calling preexec_fn */1049if (preexec_fn != Py_None) {1050need_to_reenable_gc = PyGC_Disable();1051}10521053char *const *exec_array = _PySequence_BytesToCharpArray(executable_list);1054if (!exec_array)1055goto cleanup;10561057/* Convert args and env into appropriate arguments for exec() */1058/* These conversions are done in the parent process to avoid allocating1059or freeing memory in the child process. */1060if (process_args != Py_None) {1061Py_ssize_t num_args;1062/* Equivalent to: */1063/* tuple(PyUnicode_FSConverter(arg) for arg in process_args) */1064fast_args = PySequence_Fast(process_args, "argv must be a tuple");1065if (fast_args == NULL)1066goto cleanup;1067num_args = PySequence_Fast_GET_SIZE(fast_args);1068converted_args = PyTuple_New(num_args);1069if (converted_args == NULL)1070goto cleanup;1071for (Py_ssize_t arg_num = 0; arg_num < num_args; ++arg_num) {1072PyObject *borrowed_arg, *converted_arg;1073if (PySequence_Fast_GET_SIZE(fast_args) != num_args) {1074PyErr_SetString(PyExc_RuntimeError, "args changed during iteration");1075goto cleanup;1076}1077borrowed_arg = PySequence_Fast_GET_ITEM(fast_args, arg_num);1078if (PyUnicode_FSConverter(borrowed_arg, &converted_arg) == 0)1079goto cleanup;1080PyTuple_SET_ITEM(converted_args, arg_num, converted_arg);1081}10821083argv = _PySequence_BytesToCharpArray(converted_args);1084Py_CLEAR(converted_args);1085Py_CLEAR(fast_args);1086if (!argv)1087goto cleanup;1088}10891090if (env_list != Py_None) {1091envp = _PySequence_BytesToCharpArray(env_list);1092if (!envp)1093goto cleanup;1094}10951096if (cwd_obj != Py_None) {1097if (PyUnicode_FSConverter(cwd_obj, &cwd_obj2) == 0)1098goto cleanup;1099cwd = PyBytes_AsString(cwd_obj2);1100}11011102if (extra_groups_packed != Py_None) {1103#ifdef HAVE_SETGROUPS1104if (!PyList_Check(extra_groups_packed)) {1105PyErr_SetString(PyExc_TypeError,1106"setgroups argument must be a list");1107goto cleanup;1108}1109extra_group_size = PySequence_Size(extra_groups_packed);11101111if (extra_group_size < 0)1112goto cleanup;11131114if (extra_group_size > MAX_GROUPS) {1115PyErr_SetString(PyExc_ValueError, "too many extra_groups");1116goto cleanup;1117}11181119/* Deliberately keep extra_groups == NULL for extra_group_size == 0 */1120if (extra_group_size > 0) {1121extra_groups = PyMem_RawMalloc(extra_group_size * sizeof(gid_t));1122if (extra_groups == NULL) {1123PyErr_SetString(PyExc_MemoryError,1124"failed to allocate memory for group list");1125goto cleanup;1126}1127}11281129for (Py_ssize_t i = 0; i < extra_group_size; i++) {1130PyObject *elem;1131elem = PySequence_GetItem(extra_groups_packed, i);1132if (!elem)1133goto cleanup;1134if (!PyLong_Check(elem)) {1135PyErr_SetString(PyExc_TypeError,1136"extra_groups must be integers");1137Py_DECREF(elem);1138goto cleanup;1139} else {1140gid_t gid;1141if (!_Py_Gid_Converter(elem, &gid)) {1142Py_DECREF(elem);1143PyErr_SetString(PyExc_ValueError, "invalid group id");1144goto cleanup;1145}1146extra_groups[i] = gid;1147}1148Py_DECREF(elem);1149}11501151#else /* HAVE_SETGROUPS */1152PyErr_BadInternalCall();1153goto cleanup;1154#endif /* HAVE_SETGROUPS */1155}11561157gid_t gid = (gid_t)-1;1158if (gid_object != Py_None) {1159#ifdef HAVE_SETREGID1160if (!_Py_Gid_Converter(gid_object, &gid))1161goto cleanup;11621163#else /* HAVE_SETREGID */1164PyErr_BadInternalCall();1165goto cleanup;1166#endif /* HAVE_SETREUID */1167}11681169uid_t uid = (uid_t)-1;1170if (uid_object != Py_None) {1171#ifdef HAVE_SETREUID1172if (!_Py_Uid_Converter(uid_object, &uid))1173goto cleanup;11741175#else /* HAVE_SETREUID */1176PyErr_BadInternalCall();1177goto cleanup;1178#endif /* HAVE_SETREUID */1179}11801181c_fds_to_keep = PyMem_Malloc(fds_to_keep_len * sizeof(int));1182if (c_fds_to_keep == NULL) {1183PyErr_SetString(PyExc_MemoryError, "failed to malloc c_fds_to_keep");1184goto cleanup;1185}1186if (convert_fds_to_keep_to_c(py_fds_to_keep, c_fds_to_keep) < 0) {1187goto cleanup;1188}11891190/* This must be the last thing done before fork() because we do not1191* want to call PyOS_BeforeFork() if there is any chance of another1192* error leading to the cleanup: code without calling fork(). */1193if (preexec_fn != Py_None) {1194preexec_fn_args_tuple = PyTuple_New(0);1195if (!preexec_fn_args_tuple)1196goto cleanup;1197PyOS_BeforeFork();1198need_after_fork = 1;1199}12001201/* NOTE: When old_sigmask is non-NULL, do_fork_exec() may use vfork(). */1202const void *old_sigmask = NULL;1203#ifdef VFORK_USABLE1204/* Use vfork() only if it's safe. See the comment above child_exec(). */1205sigset_t old_sigs;1206if (preexec_fn == Py_None && allow_vfork &&1207uid == (uid_t)-1 && gid == (gid_t)-1 && extra_group_size < 0) {1208/* Block all signals to ensure that no signal handlers are run in the1209* child process while it shares memory with us. Note that signals1210* used internally by C libraries won't be blocked by1211* pthread_sigmask(), but signal handlers installed by C libraries1212* normally service only signals originating from *within the process*,1213* so it should be sufficient to consider any library function that1214* might send such a signal to be vfork-unsafe and do not call it in1215* the child.1216*/1217sigset_t all_sigs;1218sigfillset(&all_sigs);1219if ((saved_errno = pthread_sigmask(SIG_BLOCK, &all_sigs, &old_sigs))) {1220goto cleanup;1221}1222old_sigmask = &old_sigs;1223}1224#endif12251226pid = do_fork_exec(exec_array, argv, envp, cwd,1227p2cread, p2cwrite, c2pread, c2pwrite,1228errread, errwrite, errpipe_read, errpipe_write,1229close_fds, restore_signals, call_setsid, pgid_to_set,1230gid, extra_group_size, extra_groups,1231uid, child_umask, old_sigmask,1232c_fds_to_keep, fds_to_keep_len,1233preexec_fn, preexec_fn_args_tuple);12341235/* Parent (original) process */1236if (pid == (pid_t)-1) {1237/* Capture errno for the exception. */1238saved_errno = errno;1239}12401241#ifdef VFORK_USABLE1242if (old_sigmask) {1243/* vfork() semantics guarantees that the parent is blocked1244* until the child performs _exit() or execve(), so it is safe1245* to unblock signals once we're here.1246* Note that in environments where vfork() is implemented as fork(),1247* such as QEMU user-mode emulation, the parent won't be blocked,1248* but it won't share the address space with the child,1249* so it's still safe to unblock the signals.1250*1251* We don't handle errors here because this call can't fail1252* if valid arguments are given, and because there is no good1253* way for the caller to deal with a failure to restore1254* the thread signal mask. */1255(void) pthread_sigmask(SIG_SETMASK, old_sigmask, NULL);1256}1257#endif12581259if (need_after_fork)1260PyOS_AfterFork_Parent();12611262cleanup:1263if (c_fds_to_keep != NULL) {1264PyMem_Free(c_fds_to_keep);1265}12661267if (saved_errno != 0) {1268errno = saved_errno;1269/* We can't call this above as PyOS_AfterFork_Parent() calls back1270* into Python code which would see the unreturned error. */1271PyErr_SetFromErrno(PyExc_OSError);1272}12731274Py_XDECREF(preexec_fn_args_tuple);1275PyMem_RawFree(extra_groups);1276Py_XDECREF(cwd_obj2);1277if (envp)1278_Py_FreeCharPArray(envp);1279Py_XDECREF(converted_args);1280Py_XDECREF(fast_args);1281if (argv)1282_Py_FreeCharPArray(argv);1283if (exec_array)1284_Py_FreeCharPArray(exec_array);12851286if (need_to_reenable_gc) {1287PyGC_Enable();1288}12891290return pid == -1 ? NULL : PyLong_FromPid(pid);1291}12921293/* module level code ********************************************************/12941295PyDoc_STRVAR(module_doc,1296"A POSIX helper for the subprocess module.");12971298static PyMethodDef module_methods[] = {1299SUBPROCESS_FORK_EXEC_METHODDEF1300{NULL, NULL} /* sentinel */1301};13021303static PyModuleDef_Slot _posixsubprocess_slots[] = {1304{Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},1305{0, NULL}1306};13071308static struct PyModuleDef _posixsubprocessmodule = {1309PyModuleDef_HEAD_INIT,1310.m_name = "_posixsubprocess",1311.m_doc = module_doc,1312.m_size = 0,1313.m_methods = module_methods,1314.m_slots = _posixsubprocess_slots,1315};13161317PyMODINIT_FUNC1318PyInit__posixsubprocess(void)1319{1320return PyModuleDef_Init(&_posixsubprocessmodule);1321}132213231324