diff options
37 files changed, 243 insertions, 184 deletions
diff --git a/csu/libc-tls.c b/csu/libc-tls.c index 06e76bd395..c3589f0a7d 100644 --- a/csu/libc-tls.c +++ b/csu/libc-tls.c @@ -24,6 +24,7 @@ #include <stdio.h> #include <sys/param.h> #include <array_length.h> +#include <list.h> #ifdef SHARED #error makefile bug, this file is for static only @@ -193,6 +194,12 @@ __libc_setup_tls (void) if (__builtin_expect (lossage != NULL, 0)) _startup_fatal (lossage); +#if THREAD_GSCOPE_IN_TCB + INIT_LIST_HEAD (&_dl_stack_used); + INIT_LIST_HEAD (&_dl_stack_user); + list_add (&THREAD_SELF->list, &_dl_stack_user); +#endif + /* Update the executable's link map with enough information to make the TLS routines happy. */ main_map->l_tls_align = align; diff --git a/elf/Makefile b/elf/Makefile index f10cc59e7c..2015383eb2 100644 --- a/elf/Makefile +++ b/elf/Makefile @@ -34,7 +34,8 @@ dl-routines = $(addprefix dl-,load lookup object reloc deps \ version profile tls origin scope \ execstack open close trampoline \ exception sort-maps lookup-direct \ - call-libc-early-init write) + call-libc-early-init write \ + thread_gscope_wait) ifeq (yes,$(use-ldconfig)) dl-routines += dl-cache endif @@ -54,7 +55,8 @@ endif all-dl-routines = $(dl-routines) $(sysdep-dl-routines) # But they are absent from the shared libc, because that code is in ld.so. elide-routines.os = $(all-dl-routines) dl-support enbl-secure dl-origin \ - dl-sysdep dl-exception dl-reloc-static-pie + dl-sysdep dl-exception dl-reloc-static-pie \ + thread_gscope_wait # ld.so uses those routines, plus some special stuff for being the program # interpreter and operating independent of libc. diff --git a/elf/dl-support.c b/elf/dl-support.c index afbc94df54..15e6d787c9 100644 --- a/elf/dl-support.c +++ b/elf/dl-support.c @@ -183,10 +183,11 @@ ElfW(Word) _dl_stack_flags = DEFAULT_STACK_PERMS; int (*_dl_make_stack_executable_hook) (void **) = _dl_make_stack_executable; -/* Function in libpthread to wait for termination of lookups. */ -void (*_dl_wait_lookup_done) (void); - -#if !THREAD_GSCOPE_IN_TCB +#if THREAD_GSCOPE_IN_TCB +list_t _dl_stack_used; +list_t _dl_stack_user; +int _dl_stack_cache_lock; +#else int _dl_thread_gscope_count; #endif struct dl_scope_free_list *_dl_scope_free_list; diff --git a/elf/dl-thread_gscope_wait.c b/elf/dl-thread_gscope_wait.c new file mode 100644 index 0000000000..28e1494730 --- /dev/null +++ b/elf/dl-thread_gscope_wait.c @@ -0,0 +1,2 @@ +/* By default, the dynamic linker does not use an out-of-line + __thread_gscope_wait function. */ diff --git a/elf/rtld.c b/elf/rtld.c index 5d117d0d2c..c4ffc8d4b7 100644 --- a/elf/rtld.c +++ b/elf/rtld.c @@ -48,6 +48,7 @@ #include <array_length.h> #include <libc-early-init.h> #include <dl-main.h> +#include <list.h> #include <assert.h> @@ -799,6 +800,9 @@ cannot allocate TLS data structures for initial thread\n"); const char *lossage = TLS_INIT_TP (tcbp); if (__glibc_unlikely (lossage != NULL)) _dl_fatal_printf ("cannot set up thread-local storage: %s\n", lossage); +#if THREAD_GSCOPE_IN_TCB + list_add (&THREAD_SELF->list, &GL (dl_stack_user)); +#endif tls_init_tp_called = true; return tcbp; @@ -1139,6 +1143,11 @@ dl_main (const ElfW(Phdr) *phdr, GL(dl_rtld_unlock_recursive) = rtld_lock_default_unlock_recursive; #endif +#if THREAD_GSCOPE_IN_TCB + INIT_LIST_HEAD (&GL (dl_stack_used)); + INIT_LIST_HEAD (&GL (dl_stack_user)); +#endif + /* The explicit initialization here is cheaper than processing the reloc in the _rtld_local definition's initializer. */ GL(dl_make_stack_executable_hook) = &_dl_make_stack_executable; @@ -2383,6 +2392,9 @@ dl_main (const ElfW(Phdr) *phdr, if (__glibc_unlikely (lossage != NULL)) _dl_fatal_printf ("cannot set up thread-local storage: %s\n", lossage); +#if THREAD_GSCOPE_IN_TCB + list_add (&THREAD_SELF->list, &GL (dl_stack_user)); +#endif } /* Make sure no new search directories have been added. */ diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c index 4b45f8c884..b7f9eeebf6 100644 --- a/nptl/allocatestack.c +++ b/nptl/allocatestack.c @@ -106,26 +106,14 @@ static size_t stack_cache_maxsize = 40 * 1024 * 1024; /* 40MiBi by default. */ static size_t stack_cache_actsize; -/* Mutex protecting this variable. */ -static int stack_cache_lock = LLL_LOCK_INITIALIZER; - /* List of queued stack frames. */ static LIST_HEAD (stack_cache); -/* List of the stacks in use. */ -static LIST_HEAD (stack_used); - /* We need to record what list operations we are going to do so that, in case of an asynchronous interruption due to a fork() call, we can correct for the work. */ static uintptr_t in_flight_stack; -/* List of the threads with user provided stacks in use. No need to - initialize this, since it's done in __pthread_initialize_minimal. */ -list_t __stack_user __attribute__ ((nocommon)); -hidden_data_def (__stack_user) - - /* Check whether the stack is still used or not. */ #define FREE_P(descr) ((descr)->tid <= 0) @@ -173,7 +161,7 @@ get_cached_stack (size_t *sizep, void **memp) struct pthread *result = NULL; list_t *entry; - lll_lock (stack_cache_lock, LLL_PRIVATE); + lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE); /* Search the cache for a matching entry. We search for the smallest stack which has at least the required size. Note that @@ -206,7 +194,7 @@ get_cached_stack (size_t *sizep, void **memp) || __builtin_expect (result->stackblock_size > 4 * size, 0)) { /* Release the lock. */ - lll_unlock (stack_cache_lock, LLL_PRIVATE); + lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE); return NULL; } @@ -218,13 +206,13 @@ get_cached_stack (size_t *sizep, void **memp) stack_list_del (&result->list); /* And add to the list of stacks in use. */ - stack_list_add (&result->list, &stack_used); + stack_list_add (&result->list, &GL (dl_stack_used)); /* And decrease the cache size. */ stack_cache_actsize -= result->stackblock_size; /* Release the lock early. */ - lll_unlock (stack_cache_lock, LLL_PRIVATE); + lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE); /* Report size and location of the stack to the caller. */ *sizep = result->stackblock_size; @@ -510,12 +498,12 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp, /* Prepare to modify global data. */ - lll_lock (stack_cache_lock, LLL_PRIVATE); + lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE); /* And add to the list of stacks in use. */ - list_add (&pd->list, &__stack_user); + list_add (&pd->list, &GL (dl_stack_user)); - lll_unlock (stack_cache_lock, LLL_PRIVATE); + lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE); } else { @@ -644,12 +632,12 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp, /* Prepare to modify global data. */ - lll_lock (stack_cache_lock, LLL_PRIVATE); + lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE); /* And add to the list of stacks in use. */ - stack_list_add (&pd->list, &stack_used); + stack_list_add (&pd->list, &GL (dl_stack_used)); - lll_unlock (stack_cache_lock, LLL_PRIVATE); + lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE); /* There might have been a race. Another thread might have @@ -690,12 +678,12 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp, if (__mprotect (guard, guardsize, PROT_NONE) != 0) { mprot_error: - lll_lock (stack_cache_lock, LLL_PRIVATE); + lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE); /* Remove the thread from the list. */ stack_list_del (&pd->list); - lll_unlock (stack_cache_lock, LLL_PRIVATE); + lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE); /* Get rid of the TLS block we allocated. */ _dl_deallocate_tls (TLS_TPADJ (pd), false); @@ -799,7 +787,7 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp, void __deallocate_stack (struct pthread *pd) { - lll_lock (stack_cache_lock, LLL_PRIVATE); + lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE); /* Remove the thread from the list of threads with user defined stacks. */ @@ -815,7 +803,7 @@ __deallocate_stack (struct pthread *pd) /* Free the memory associated with the ELF TLS. */ _dl_deallocate_tls (TLS_TPADJ (pd), false); - lll_unlock (stack_cache_lock, LLL_PRIVATE); + lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE); } @@ -831,10 +819,10 @@ __make_stacks_executable (void **stack_endp) const size_t pagemask = ~(__getpagesize () - 1); #endif - lll_lock (stack_cache_lock, LLL_PRIVATE); + lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE); list_t *runp; - list_for_each (runp, &stack_used) + list_for_each (runp, &GL (dl_stack_used)) { err = change_stack_perm (list_entry (runp, struct pthread, list) #ifdef NEED_SEPARATE_REGISTER_STACK @@ -860,7 +848,7 @@ __make_stacks_executable (void **stack_endp) break; } - lll_unlock (stack_cache_lock, LLL_PRIVATE); + lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE); return err; } @@ -891,8 +879,8 @@ __reclaim_stacks (void) pointers at the head of the list are inconsistent. */ list_t *l = NULL; - if (stack_used.next->prev != &stack_used) - l = &stack_used; + if (GL (dl_stack_used).next->prev != &GL (dl_stack_used)) + l = &GL (dl_stack_used); else if (stack_cache.next->prev != &stack_cache) l = &stack_cache; @@ -914,7 +902,7 @@ __reclaim_stacks (void) /* Mark all stacks except the still running one as free. */ list_t *runp; - list_for_each (runp, &stack_used) + list_for_each (runp, &GL (dl_stack_used)) { struct pthread *curp = list_entry (runp, struct pthread, list); if (curp != self) @@ -948,7 +936,7 @@ __reclaim_stacks (void) } /* Add the stack of all running threads to the cache. */ - list_splice (&stack_used, &stack_cache); + list_splice (&GL (dl_stack_used), &stack_cache); /* Remove the entry for the current thread to from the cache list and add it to the list of running threads. Which of the two @@ -956,13 +944,13 @@ __reclaim_stacks (void) stack_list_del (&self->list); /* Re-initialize the lists for all the threads. */ - INIT_LIST_HEAD (&stack_used); - INIT_LIST_HEAD (&__stack_user); + INIT_LIST_HEAD (&GL (dl_stack_used)); + INIT_LIST_HEAD (&GL (dl_stack_user)); if (__glibc_unlikely (THREAD_GETMEM (self, user_stack))) - list_add (&self->list, &__stack_user); + list_add (&self->list, &GL (dl_stack_user)); else - list_add (&self->list, &stack_used); + list_add (&self->list, &GL (dl_stack_used)); /* There is one thread running. */ __nptl_nthreads = 1; @@ -970,7 +958,7 @@ __reclaim_stacks (void) in_flight_stack = 0; /* Initialize locks. */ - stack_cache_lock = LLL_LOCK_INITIALIZER; + GL (dl_stack_cache_lock) = LLL_LOCK_INITIALIZER; __default_pthread_attr_lock = LLL_LOCK_INITIALIZER; } @@ -1083,7 +1071,7 @@ __nptl_setxid (struct xid_command *cmdp) { int signalled; int result; - lll_lock (stack_cache_lock, LLL_PRIVATE); + lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE); __xidcmd = cmdp; cmdp->cntr = 0; @@ -1093,7 +1081,7 @@ __nptl_setxid (struct xid_command *cmdp) /* Iterate over the list with system-allocated threads first. */ list_t *runp; - list_for_each (runp, &stack_used) + list_for_each (runp, &GL (dl_stack_used)) { struct pthread *t = list_entry (runp, struct pthread, list); if (t == self) @@ -1103,7 +1091,7 @@ __nptl_setxid (struct xid_command *cmdp) } /* Now the list with threads using user-allocated stacks. */ - list_for_each (runp, &__stack_user) + list_for_each (runp, &GL (dl_stack_user)) { struct pthread *t = list_entry (runp, struct pthread, list); if (t == self) @@ -1119,7 +1107,7 @@ __nptl_setxid (struct xid_command *cmdp) { signalled = 0; - list_for_each (runp, &stack_used) + list_for_each (runp, &GL (dl_stack_used)) { struct pthread *t = list_entry (runp, struct pthread, list); if (t == self) @@ -1128,7 +1116,7 @@ __nptl_setxid (struct xid_command *cmdp) signalled += setxid_signal_thread (cmdp, t); } - list_for_each (runp, &__stack_user) + list_for_each (runp, &GL (dl_stack_user)) { struct pthread *t = list_entry (runp, struct pthread, list); if (t == self) @@ -1149,7 +1137,7 @@ __nptl_setxid (struct xid_command *cmdp) /* Clean up flags, so that no thread blocks during exit waiting for a signal which will never come. */ - list_for_each (runp, &stack_used) + list_for_each (runp, &GL (dl_stack_used)) { struct pthread *t = list_entry (runp, struct pthread, list); if (t == self) @@ -1158,7 +1146,7 @@ __nptl_setxid (struct xid_command *cmdp) setxid_unmark_thread (cmdp, t); } - list_for_each (runp, &__stack_user) + list_for_each (runp, &GL (dl_stack_user)) { struct pthread *t = list_entry (runp, struct pthread, list); if (t == self) @@ -1180,7 +1168,7 @@ __nptl_setxid (struct xid_command *cmdp) } __nptl_setxid_error (cmdp, error); - lll_unlock (stack_cache_lock, LLL_PRIVATE); + lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE); return result; } @@ -1204,75 +1192,16 @@ void attribute_hidden __pthread_init_static_tls (struct link_map *map) { - lll_lock (stack_cache_lock, LLL_PRIVATE); + lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE); /* Iterate over the list with system-allocated threads first. */ list_t *runp; - list_for_each (runp, &stack_used) + list_for_each (runp, &GL (dl_stack_used)) init_one_static_tls (list_entry (runp, struct pthread, list), map); /* Now the list with threads using user-allocated stacks. */ - list_for_each (runp, &__stack_user) + list_for_each (runp, &GL (dl_stack_user)) init_one_static_tls (list_entry (runp, struct pthread, list), map); - lll_unlock (stack_cache_lock, LLL_PRIVATE); -} - - -void -attribute_hidden -__wait_lookup_done (void) -{ - lll_lock (stack_cache_lock, LLL_PRIVATE); - - struct pthread *self = THREAD_SELF; - - /* Iterate over the list with system-allocated threads first. */ - list_t *runp; - list_for_each (runp, &stack_used) - { - struct pthread *t = list_entry (runp, struct pthread, list); - if (t == self || t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED) - continue; - - int *const gscope_flagp = &t->header.gscope_flag; - - /* We have to wait until this thread is done with the global - scope. First tell the thread that we are waiting and - possibly have to be woken. */ - if (atomic_compare_and_exchange_bool_acq (gscope_flagp, - THREAD_GSCOPE_FLAG_WAIT, - THREAD_GSCOPE_FLAG_USED)) - continue; - - do - futex_wait_simple ((unsigned int *) gscope_flagp, - THREAD_GSCOPE_FLAG_WAIT, FUTEX_PRIVATE); - while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT); - } - - /* Now the list with threads using user-allocated stacks. */ - list_for_each (runp, &__stack_user) |
