diff options
-rw-r--r-- | include/clone_internal.h | 16 | ||||
-rw-r--r-- | nptl/allocatestack.c | 59 | ||||
-rw-r--r-- | nptl/pthread_create.c | 38 | ||||
-rw-r--r-- | sysdeps/unix/sysv/linux/Makefile | 3 | ||||
-rw-r--r-- | sysdeps/unix/sysv/linux/clone-internal.c | 91 | ||||
-rw-r--r-- | sysdeps/unix/sysv/linux/clone3.c | 1 | ||||
-rw-r--r-- | sysdeps/unix/sysv/linux/clone3.h | 67 | ||||
-rw-r--r-- | sysdeps/unix/sysv/linux/spawni.c | 26 |
8 files changed, 213 insertions, 88 deletions
diff --git a/include/clone_internal.h b/include/clone_internal.h new file mode 100644 index 0000000000..4b23ef33ce --- /dev/null +++ b/include/clone_internal.h @@ -0,0 +1,16 @@ +#ifndef _CLONE3_H +#include_next <clone3.h> + +extern __typeof (clone3) __clone3; + +/* The internal wrapper of clone/clone2 and clone3. If __clone3 returns + -1 with ENOSYS, fall back to clone or clone2. */ +extern int __clone_internal (struct clone_args *__cl_args, + int (*__func) (void *__arg), void *__arg); + +#ifndef _ISOMAC +libc_hidden_proto (__clone3) +libc_hidden_proto (__clone_internal) +#endif + +#endif diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c index 9be6c42894..cfe37a3443 100644 --- a/nptl/allocatestack.c +++ b/nptl/allocatestack.c @@ -33,47 +33,6 @@ #include <kernel-features.h> #include <nptl-stack.h> -#ifndef NEED_SEPARATE_REGISTER_STACK - -/* Most architectures have exactly one stack pointer. Some have more. */ -# define STACK_VARIABLES void *stackaddr = NULL - -/* How to pass the values to the 'create_thread' function. */ -# define STACK_VARIABLES_ARGS stackaddr - -/* How to declare function which gets there parameters. */ -# define STACK_VARIABLES_PARMS void *stackaddr - -/* How to declare allocate_stack. */ -# define ALLOCATE_STACK_PARMS void **stack - -/* This is how the function is called. We do it this way to allow - other variants of the function to have more parameters. */ -# define ALLOCATE_STACK(attr, pd) allocate_stack (attr, pd, &stackaddr) - -#else - -/* We need two stacks. The kernel will place them but we have to tell - the kernel about the size of the reserved address space. */ -# define STACK_VARIABLES void *stackaddr = NULL; size_t stacksize = 0 - -/* How to pass the values to the 'create_thread' function. */ -# define STACK_VARIABLES_ARGS stackaddr, stacksize - -/* How to declare function which gets there parameters. */ -# define STACK_VARIABLES_PARMS void *stackaddr, size_t stacksize - -/* How to declare allocate_stack. */ -# define ALLOCATE_STACK_PARMS void **stack, size_t *stacksize - -/* This is how the function is called. We do it this way to allow - other variants of the function to have more parameters. */ -# define ALLOCATE_STACK(attr, pd) \ - allocate_stack (attr, pd, &stackaddr, &stacksize) - -#endif - - /* Default alignment of stack. */ #ifndef STACK_ALIGN # define STACK_ALIGN __alignof__ (long double) @@ -252,7 +211,7 @@ advise_stack_range (void *mem, size_t size, uintptr_t pd, size_t guardsize) PDP must be non-NULL. */ static int allocate_stack (const struct pthread_attr *attr, struct pthread **pdp, - ALLOCATE_STACK_PARMS) + void **stack, size_t *stacksize) { struct pthread *pd; size_t size; @@ -603,25 +562,17 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp, /* We place the thread descriptor at the end of the stack. */ *pdp = pd; -#if _STACK_GROWS_DOWN void *stacktop; -# if TLS_TCB_AT_TP +#if TLS_TCB_AT_TP /* The stack begins before the TCB and the static TLS block. */ stacktop = ((char *) (pd + 1) - tls_static_size_for_stack); -# elif TLS_DTV_AT_TP +#elif TLS_DTV_AT_TP stacktop = (char *) (pd - 1); -# endif +#endif -# ifdef NEED_SEPARATE_REGISTER_STACK + *stacksize = stacktop - pd->stackblock; *stack = pd->stackblock; - *stacksize = stacktop - *stack; -# else - *stack = stacktop; -# endif -#else - *stack = pd->stackblock; -#endif return 0; } diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c index 440adc2a6f..d8ec299cb1 100644 --- a/nptl/pthread_create.c +++ b/nptl/pthread_create.c @@ -36,6 +36,7 @@ #include "libioP.h" #include <sys/single_threaded.h> #include <version.h> +#include <clone_internal.h> #include <shlib-compat.h> @@ -227,8 +228,8 @@ late_init (void) static int _Noreturn start_thread (void *arg); static int create_thread (struct pthread *pd, const struct pthread_attr *attr, - bool *stopped_start, STACK_VARIABLES_PARMS, - bool *thread_ran) + bool *stopped_start, void *stackaddr, + size_t stacksize, bool *thread_ran) { /* Determine whether the newly created threads has to be started stopped since we have to set the scheduling parameters or set the @@ -280,14 +281,18 @@ static int create_thread (struct pthread *pd, const struct pthread_attr *attr, TLS_DEFINE_INIT_TP (tp, pd); -#ifdef __NR_clone2 -# define ARCH_CLONE __clone2 -#else -# define ARCH_CLONE __clone -#endif - if (__glibc_unlikely (ARCH_CLONE (&start_thread, STACK_VARIABLES_ARGS, - clone_flags, pd, &pd->tid, tp, &pd->tid) - == -1)) + struct clone_args args = + { + .flags = clone_flags, + .pidfd = (uintptr_t) &pd->tid, + .parent_tid = (uintptr_t) &pd->tid, + .child_tid = (uintptr_t) &pd->tid, + .stack = (uintptr_t) stackaddr, + .stack_size = stacksize, + .tls = (uintptr_t) tp, + }; + int ret = __clone_internal (&args, &start_thread, pd); + if (__glibc_unlikely (ret == -1)) return errno; /* It's started now, so if we fail below, we'll have to let it clean itself @@ -576,7 +581,8 @@ int __pthread_create_2_1 (pthread_t *newthread, const pthread_attr_t *attr, void *(*start_routine) (void *), void *arg) { - STACK_VARIABLES; + void *stackaddr = NULL; + size_t stacksize = 0; /* Avoid a data race in the multi-threaded case, and call the deferred initialization only once. */ @@ -600,7 +606,7 @@ __pthread_create_2_1 (pthread_t *newthread, const pthread_attr_t *attr, } struct pthread *pd = NULL; - int err = ALLOCATE_STACK (iattr, &pd); + int err = allocate_stack (iattr, &pd, &stackaddr, &stacksize); int retval = 0; if (__glibc_unlikely (err != 0)) @@ -744,8 +750,8 @@ __pthread_create_2_1 (pthread_t *newthread, const pthread_attr_t *attr, /* We always create the thread stopped at startup so we can notify the debugger. */ - retval = create_thread (pd, iattr, &stopped_start, - STACK_VARIABLES_ARGS, &thread_ran); + retval = create_thread (pd, iattr, &stopped_start, stackaddr, + stacksize, &thread_ran); if (retval == 0) { /* We retain ownership of PD until (a) (see CONCURRENCY NOTES @@ -776,8 +782,8 @@ __pthread_create_2_1 (pthread_t *newthread, const pthread_attr_t *attr, } } else - retval = create_thread (pd, iattr, &stopped_start, - STACK_VARIABLES_ARGS, &thread_ran); + retval = create_thread (pd, iattr, &stopped_start, stackaddr, + stacksize, &thread_ran); /* Return to the previous signal mask, after creating the new thread. */ diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile index feb8fd4ce1..ed0c0d27f4 100644 --- a/sysdeps/unix/sysv/linux/Makefile +++ b/sysdeps/unix/sysv/linux/Makefile @@ -65,7 +65,8 @@ sysdep_routines += adjtimex clone umount umount2 readahead sysctl \ xstat fxstat lxstat xstat64 fxstat64 lxstat64 \ fxstatat fxstatat64 \ xmknod xmknodat convert_scm_timestamps \ - closefrom_fallback + closefrom_fallback \ + clone3 clone-internal CFLAGS-gethostid.c = -fexceptions CFLAGS-tee.c = -fexceptions -fasynchronous-unwind-tables diff --git a/sysdeps/unix/sysv/linux/clone-internal.c b/sysdeps/unix/sysv/linux/clone-internal.c new file mode 100644 index 0000000000..1e7a8f6b35 --- /dev/null +++ b/sysdeps/unix/sysv/linux/clone-internal.c @@ -0,0 +1,91 @@ +/* The internal wrapper of clone and clone3. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <https://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <stddef.h> +#include <errno.h> +#include <sched.h> +#include <clone_internal.h> +#include <libc-pointer-arith.h> /* For cast_to_pointer. */ +#include <stackinfo.h> /* For _STACK_GROWS_{UP,DOWN}. */ + +#define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */ +#define CLONE_ARGS_SIZE_VER1 80 /* sizeof second published struct */ +#define CLONE_ARGS_SIZE_VER2 88 /* sizeof third published struct */ + +#define sizeof_field(TYPE, MEMBER) sizeof ((((TYPE *)0)->MEMBER)) +#define offsetofend(TYPE, MEMBER) \ + (offsetof (TYPE, MEMBER) + sizeof_field (TYPE, MEMBER)) + +_Static_assert (__alignof (struct clone_args) == 8, + "__alignof (struct clone_args) != 8"); +_Static_assert (offsetofend (struct clone_args, tls) == CLONE_ARGS_SIZE_VER0, + "offsetofend (struct clone_args, tls) != CLONE_ARGS_SIZE_VER0"); +_Static_assert (offsetofend (struct clone_args, set_tid_size) == CLONE_ARGS_SIZE_VER1, + "offsetofend (struct clone_args, set_tid_size) != CLONE_ARGS_SIZE_VER1"); +_Static_assert (offsetofend (struct clone_args, cgroup) == CLONE_ARGS_SIZE_VER2, + "offsetofend (struct clone_args, cgroup) != CLONE_ARGS_SIZE_VER2"); +_Static_assert (sizeof (struct clone_args) == CLONE_ARGS_SIZE_VER2, + "sizeof (struct clone_args) != CLONE_ARGS_SIZE_VER2"); + +int +__clone_internal (struct clone_args *cl_args, + int (*func) (void *arg), void *arg) +{ + int ret; +#ifdef HAVE_CLONE3_WAPPER + /* Try clone3 first. */ + int saved_errno = errno; + ret = __clone3 (cl_args, sizeof (*cl_args), func, arg); + if (ret != -1 || errno != ENOSYS) + return ret; + + /* NB: Restore errno since errno may be checked against non-zero + return value. */ + __set_errno (saved_errno); +#endif + + /* Map clone3 arguments to clone arguments. NB: No need to check + invalid clone3 specific bits in flags nor exit_signal since this + is an internal function. */ + int flags = cl_args->flags | cl_args->exit_signal; + void *stack = cast_to_pointer (cl_args->stack); + +#ifdef __ia64__ + ret = __clone2 (func, stack, cl_args->stack_size, + flags, arg, + cast_to_pointer (cl_args->parent_tid), + cast_to_pointer (cl_args->tls), + cast_to_pointer (cl_args->child_tid)); +#else +# if !_STACK_GROWS_DOWN && !_STACK_GROWS_UP +# error "Define either _STACK_GROWS_DOWN or _STACK_GROWS_UP" +# endif + +# if _STACK_GROWS_DOWN + stack += cl_args->stack_size; +# endif + ret = __clone (func, stack, flags, arg, + cast_to_pointer (cl_args->parent_tid), + cast_to_pointer (cl_args->tls), + cast_to_pointer (cl_args->child_tid)); +#endif + return ret; +} + +libc_hidden_def (__clone_internal) diff --git a/sysdeps/unix/sysv/linux/clone3.c b/sysdeps/unix/sysv/linux/clone3.c new file mode 100644 index 0000000000..de963ef89d --- /dev/null +++ b/sysdeps/unix/sysv/linux/clone3.c @@ -0,0 +1 @@ +/* An empty placeholder. */ diff --git a/sysdeps/unix/sysv/linux/clone3.h b/sysdeps/unix/sysv/linux/clone3.h new file mode 100644 index 0000000000..1e35ff6422 --- /dev/null +++ b/sysdeps/unix/sysv/linux/clone3.h @@ -0,0 +1,67 @@ +/* The wrapper of clone3. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef _CLONE3_H +#define _CLONE3_H 1 + +#include <features.h> +#include <stddef.h> +#include <bits/types.h> + +__BEGIN_DECLS + +/* The unsigned 64-bit and 8-byte aligned integer type. */ +typedef __U64_TYPE __aligned_uint64_t __attribute__ ((__aligned__ (8))); + +/* This struct should only be used in an argument to the clone3 system + call (along with its size argument). It may be extended with new + fields in the future. */ + +struct clone_args +{ + /* Flags bit mask. */ + __aligned_uint64_t flags; + /* Where to store PID file descriptor (pid_t *). */ + __aligned_uint64_t pidfd; + /* Where to store child TID, in child's memory (pid_t *). */ + __aligned_uint64_t child_tid; + /* Where to store child TID, in parent's memory (int *). */ + __aligned_uint64_t parent_tid; + /* Signal to deliver to parent on child termination */ + __aligned_uint64_t exit_signal; + /* The lowest address of stack. */ + __aligned_uint64_t stack; + /* Size of stack. */ + __aligned_uint64_t stack_size; + /* Location of new TLS. */ + __aligned_uint64_t tls; + /* Pointer to a pid_t array (since Linux 5.5). */ + __aligned_uint64_t set_tid; + /* Number of elements in set_tid (since Linux 5.5). */ + __aligned_uint64_t set_tid_size; + /* File descriptor for target cgroup of child (since Linux 5.7). */ + __aligned_uint64_t cgroup; +}; + +/* The wrapper of clone3. */ +extern int clone3 (struct clone_args *__cl_args, size_t __size, + int (*__func) (void *__arg), void *__arg); + +__END_DECLS + +#endif /* clone3.h */ diff --git a/sysdeps/unix/sysv/linux/spawni.c b/sysdeps/unix/sysv/linux/spawni.c index f7e7353a05..6b0bade4d4 100644 --- a/sysdeps/unix/sysv/linux/spawni.c +++ b/sysdeps/unix/sysv/linux/spawni.c @@ -26,6 +26,7 @@ #include <spawn_int.h> #include <sysdep.h> #include <sys/resource.h> +#include <clone_internal.h> /* The Linux implementation of posix_spawn{p} uses the clone syscall directly with CLONE_VM and CLONE_VFORK flags and an allocated stack. The new stack @@ -53,21 +54,6 @@ normal program exit with the exit code 127. */ #define SPAWN_ERROR 127 -#ifdef __ia64__ -# define CLONE(__fn, __stackbase, __stacksize, __flags, __args) \ - __clone2 (__fn, __stackbase, __stacksize, __flags, __args, 0, 0, 0) -#else -# define CLONE(__fn, __stack, __stacksize, __flags, __args) \ - __clone (__fn, __stack, __flags, __args) -#endif - -/* Since ia64 wants the stackbase w/clone2, re-use the grows-up macro. */ -#if _STACK_GROWS_UP || defined (__ia64__) -# define STACK(__stack, __stack_size) (__stack) -#elif _STACK_GROWS_DOWN -# define STACK(__stack, __stack_size) (__stack + __stack_size) -#endif - struct posix_spawn_args { @@ -382,8 +368,14 @@ __spawnix (pid_t * pid, const char *file, need for CLONE_SETTLS. Although parent and child share the same TLS namespace, there will be no concurrent access for TLS variables (errno for instance). */ - new_pid = CLONE (__spawni_child, STACK (stack, stack_size), stack_size, - CLONE_VM | CLONE_VFORK | SIGCHLD, &args); + struct clone_args clone_args = + { + .flags = CLONE_VM | CLONE_VFORK, + .exit_signal = SIGCHLD, + .stack = (uintptr_t) stack, + .stack_size = stack_size, + }; + new_pid = __clone_internal (&clone_args, __spawni_child, &args); /* It needs to collect the case where the auxiliary process was created but failed to execute the file (due either any preparation step or |