1 files changed, 392 insertions, 102 deletions
diff --git a/sysdeps/generic/dl-tls.c b/sysdeps/generic/dl-tls.c
index e42911acd3..d13b0f93fa 100644
--- a/sysdeps/generic/dl-tls.c
+++ b/sysdeps/generic/dl-tls.c
@@ -18,8 +18,12 @@
    02111-1307 USA.  */
 
 #include <assert.h>
+#include <signal.h>
 #include <stdlib.h>
+#include <unistd.h>
+#include <sys/param.h>
 
+#include <abort-instr.h>
 #include <tls.h>
 
 /* We don't need any of this if TLS is not supported.  */
@@ -29,7 +33,31 @@
 #include <ldsodefs.h>
 
 /* Value used for dtv entries for which the allocation is delayed.  */
-# define TLS_DTV_UNALLOCATE	((void *) -1l)
+# define TLS_DTV_UNALLOCATED	((void *) -1l)
+
+
+/* Out-of-memory handler.  */
+static void
+__attribute__ ((__noreturn__))
+oom (void)
+{
+  static const char msg[] = "\
+cannot allocate memory for thread-local data: ABORT\n";
+
+  __libc_write (STDERR_FILENO, msg, sizeof (msg) - 1);
+
+  /* Kill ourself.  */
+  __kill (__getpid (), SIGKILL);
+
+  /* Just in case something goes wrong with the kill.  */
+  while (1)
+    {
+# ifdef ABORT_INSTRUCTION
+      ABORT_INSTRUCTION;
+# endif
+    }
+}
+
 
 
 size_t
@@ -40,38 +68,49 @@ _dl_next_tls_modid (void)
 
   if (__builtin_expect (GL(dl_tls_dtv_gaps), false))
     {
-      /* XXX If this method proves too costly we can optimize
-	 it to use a constant time method.  But I don't think
-	 it's a problem.  */
-      struct link_map *runp = GL(dl_initimage_list);
-      bool used[GL(dl_tls_max_dtv_idx)];
-
-      assert (runp != NULL);
+      size_t disp = 0;
+      struct dtv_slotinfo_list *runp = GL(dl_tls_dtv_slotinfo_list);
+
+      /* Note that this branch will never be executed during program
+	 start since there are no gaps at that time.  Therefore it
+	 does not matter that the dl_tls_dtv_slotinfo is not allocated
+	 yet when the function is called for the first times.  */
+      result = GL(dl_tls_static_nelem);
+      assert (result < GL(dl_tls_max_dtv_idx));
       do
 	{
-	  assert (runp->l_tls_modid > 0
-		  && runp->l_tls_modid <= GL(dl_tls_max_dtv_idx));
-	  used[runp->l_tls_modid - 1] = true;
-	}
-      while ((runp = runp->l_tls_nextimage) != GL(dl_initimage_list));
+	  while (result - disp < runp->len)
+	    if (runp->slotinfo[result - disp].map == NULL)
+	      break;
 
-      result = 0;
-      do
-	/* The information about the gaps is pessimistic.  It might be
-	   there are actually none.  */
-	if (result >= GL(dl_tls_max_dtv_idx))
-	  {
-	    /* Now we know there is actually no gap.  Bump the maximum
-	       ID number and remember that there are no gaps.  */
-	    result = ++GL(dl_tls_max_dtv_idx);
-	    GL(dl_tls_dtv_gaps) = false;
+	  ++result;
+	  assert (result <= GL(dl_tls_max_dtv_idx) + 1);
+
+	  if (result - disp < runp->len)
 	    break;
-	  }
-      while (used[result++]);
+
+	  disp += runp->len;
+	}
+      while ((runp = runp->next) != NULL);
+
+      if (result >= GL(dl_tls_max_dtv_idx) + 1)
+	{
+	  /* The new index must indeed be exactly one higher than the
+	     previous high.  */
+	  assert (result == GL(dl_tls_max_dtv_idx) + 1);
+
+	  /* There is no gap anymore.  */
+	  GL(dl_tls_dtv_gaps) = false;
+
+	  goto nogaps;
+	}
     }
   else
-    /* No gaps, allocate a new entry.  */
-    result = ++GL(dl_tls_max_dtv_idx);
+    {
+      /* No gaps, allocate a new entry.  */
+    nogaps:
+      result = ++GL(dl_tls_max_dtv_idx);
+    }
 
   return result;
 }
@@ -79,41 +118,39 @@ _dl_next_tls_modid (void)
 
 void
 internal_function
-_dl_determine_tlsoffset (struct link_map *lastp)
+_dl_determine_tlsoffset (void)
 {
-  struct link_map *runp;
-  size_t max_align = 0;
+  struct dtv_slotinfo *slotinfo;
+  size_t max_align = __alignof__ (void *);
   size_t offset;
+  size_t cnt;
 
-  if (lastp == NULL)
-    {
-      /* None of the objects used at startup time uses TLS.  We still
-	 have to allocate the TCB and dtv.  */
-      GL(dl_tls_static_size) = TLS_TCB_SIZE;
-      GL(dl_tls_static_align) = TLS_TCB_ALIGN;
-
-      return;
-    }
+  /* The first element of the dtv slot info list is allocated.  */
+  assert (GL(dl_tls_dtv_slotinfo_list) != NULL);
+  /* There is at this point only one element in the
+     dl_tls_dtv_slotinfo_list list.  */
+  assert (GL(dl_tls_dtv_slotinfo_list)->next == NULL);
 
 # if TLS_TCB_AT_TP
   /* We simply start with zero.  */
   offset = 0;
 
-  runp = lastp->l_tls_nextimage;
-  do
+  slotinfo = GL(dl_tls_dtv_slotinfo_list)->slotinfo;
+  for (cnt = 1; slotinfo[cnt].map != NULL; ++cnt)
     {
-      max_align = MAX (max_align, runp->l_tls_align);
+      assert (cnt < GL(dl_tls_dtv_slotinfo_list)->len);
+
+      max_align = MAX (max_align, slotinfo[cnt].map->l_tls_align);
 
       /* Compute the offset of the next TLS block.  */
-      offset = roundup (offset + runp->l_tls_blocksize, runp->l_tls_align);
+      offset = roundup (offset + slotinfo[cnt].map->l_tls_blocksize,
+			slotinfo[cnt].map->l_tls_align);
 
       /* XXX For some architectures we perhaps should store the
 	 negative offset.  */
-      runp->l_tls_offset = offset;
+      slotinfo[cnt].map->l_tls_offset = offset;
     }
-  while ((runp = runp->l_tls_nextimage) != lastp->l_tls_nextimage);
 
-#if 0
   /* The thread descriptor (pointed to by the thread pointer) has its
      own alignment requirement.  Adjust the static TLS size
      and TLS offsets appropriately.  */
@@ -121,34 +158,44 @@ _dl_determine_tlsoffset (struct link_map *lastp)
   // XXX after the first (closest to the TCB) TLS block since this
   // XXX would invalidate the offsets the linker creates for the LE
   // XXX model.
-  if (offset % TLS_TCB_ALIGN != 0)
-    abort ();
-#endif
 
   GL(dl_tls_static_size) = offset + TLS_TCB_SIZE;
 # elif TLS_DTV_AT_TP
-  struct link_map *prevp;
-
-  /* The first block starts right after the TCB.  */
+  /* The TLS blocks start right after the TCB.  */
   offset = TLS_TCB_SIZE;
-  max_align = runp->l_tls_align;
-  runp = lastp->l_tls_nextimage;
-  runp->l_tls_offset = offset;
-  prevp = runp;
 
-  while ((runp = runp->l_tls_nextimage) != firstp)
+  /* The first block starts right after the TCB.  */
+  slotinfo = GL(dl_tls_dtv_slotinfo_list)->slotinfo;
+  if (slotinfo[1].map != NULL)
     {
-      max_align = MAX (max_align, runp->l_tls_align);
+      size_t prev_size
 
-      /* Compute the offset of the next TLS block.  */
-      offset = roundup (offset + prevp->l_tls_blocksize, runp->l_tls_align);
+      offset = roundup (offset, slotinfo[1].map->l_tls_align);
+      slotinfo[1].map->l_tls_offset = offset;
+      max_align = slotinfo[1].map->l_tls_align;
+      prev_size = slotinfo[1].map->l_tls_blocksize;
 
-      runp->l_tls_offset = offset;
+      for (cnt = 2; slotinfo[cnt].map != NULL; ++cnt)
+	{
+	  assert (cnt < GL(dl_tls_dtv_slotinfo_list)->len);
+
+	  max_align = MAX (max_align, slotinfo[cnt].map->l_tls_align);
+
+	  /* Compute the offset of the next TLS block.  */
+	  offset = roundup (offset + prev_size,
+			    slotinfo[cnt].map->l_tls_align);
+
+	  /* XXX For some architectures we perhaps should store the
+	     negative offset.  */
+	  slotinfo[cnt].map->l_tls_offset = offset;
+
+	  prev_size = slotinfo[cnt].map->l_tls_blocksize;
+	}
 
-      prevp = runp;
+      offset += prev_size;
     }
 
-  GL(dl_tls_static_size) = offset + prevp->l_tls_blocksize;
+  GL(dl_tls_static_size) = offset;
 # else
 #  error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
 # endif
@@ -164,59 +211,100 @@ _dl_allocate_tls (void)
 {
   void *result;
   dtv_t *dtv;
+  size_t dtv_length;
 
   /* Allocate a correctly aligned chunk of memory.  */
   /* XXX For now */
   assert (GL(dl_tls_static_align) <= GL(dl_pagesize));
-#ifdef MAP_ANON
-# define _dl_zerofd (-1)
-#else
-# define _dl_zerofd GL(dl_zerofd)
+# ifdef MAP_ANON
+#  define _dl_zerofd (-1)
+# else
+#  define _dl_zerofd GL(dl_zerofd)
   if ((dl_zerofd) == -1)
     GL(dl_zerofd) = _dl_sysdep_open_zero_fill ();
-# define MAP_ANON 0
-#endif
+#  define MAP_ANON 0
+# endif
   result = __mmap (0, GL(dl_tls_static_size), PROT_READ|PROT_WRITE,
 		   MAP_ANON|MAP_PRIVATE, _dl_zerofd, 0);
 
-  dtv = (dtv_t *) malloc ((GL(dl_tls_max_dtv_idx) + 1) * sizeof (dtv_t));
+  /* We allocate a few more elements in the dtv than are needed for the
+     initial set of modules.  This should avoid in most cases expansions
+     of the dtv.  */
+  dtv_length = GL(dl_tls_max_dtv_idx) + DTV_SURPLUS;
+  dtv = (dtv_t *) malloc ((dtv_length + 2) * sizeof (dtv_t));
   if (result != MAP_FAILED && dtv != NULL)
     {
-      struct link_map *runp;
+      struct dtv_slotinfo_list *listp;
+      bool first_block = true;
+      size_t total = 0;
 
 # if TLS_TCB_AT_TP
       /* The TCB follows the TLS blocks.  */
       result = (char *) result + GL(dl_tls_static_size) - TLS_TCB_SIZE;
 # endif
 
-      /* XXX Fill in an correct generation number.  */
-      dtv[0].counter = 0;
-
-      /* Initialize the memory from the initialization image list and clear
-	 the BSS parts.  */
-      if (GL(dl_initimage_list) != NULL)
+      /* This is the initial length of the dtv.  */
+      dtv[0].counter = dtv_length;
+      /* Fill in the generation number.  */
+      dtv[1].counter = GL(dl_tls_generation) = 0;
+      /* Initialize all of the rest of the dtv with zero to indicate
+	 nothing there.  */
+      memset (dtv + 2, '\0', dtv_length * sizeof (dtv_t));
+
+      /* We have to look prepare the dtv for all currently loaded
+	 modules using TLS.  For those which are dynamically loaded we
+	 add the values indicating deferred allocation.  */
+      listp = GL(dl_tls_dtv_slotinfo_list);
+      while (1)
 	{
-	  runp = GL(dl_initimage_list)->l_tls_nextimage;
-	  do
+	  size_t cnt;
+
+	  for (cnt = first_block ? 1 : 0; cnt < listp->len; ++cnt)
 	    {
-	      assert (runp->l_tls_modid > 0);
-	      assert (runp->l_tls_modid <= GL(dl_tls_max_dtv_idx));
+	      struct link_map *map;
+	      void *dest;
+
+	      /* Check for the total number of used slots.  */
+	      if (total + cnt >= GL(dl_tls_max_dtv_idx))
+		break;
+
+	      map = listp->slotinfo[cnt].map;
+	      if (map == NULL)
+		/* Unused entry.  */
+		continue;
+
+	      if (map->l_type == lt_loaded)
+		{
+		  /* For dynamically loaded modules we simply store
+		     the value indicating deferred allocation.  */
+		  dtv[1 + map->l_tls_modid].pointer = TLS_DTV_UNALLOCATED;
+		  continue;
+		}
+
+	      assert (map->l_tls_modid == cnt);
+	      assert (map->l_tls_blocksize >= map->l_tls_initimage_size);
 # if TLS_TCB_AT_TP
-	      dtv[runp->l_tls_modid].pointer = result - runp->l_tls_offset;
+	      assert (map->l_tls_offset >= map->l_tls_blocksize);
+	      dest = (char *) result - map->l_tls_offset;
 # elif TLS_DTV_AT_TP
-	      dtv[runp->l_tls_modid].pointer = result + runp->l_tls_offset;
+	      dest = (char *) result + map->l_tls_offset;
 # else
 #  error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
 # endif
 
-	      memset (__mempcpy (dtv[runp->l_tls_modid].pointer,
-				 runp->l_tls_initimage,
-				 runp->l_tls_initimage_size),
-		      '\0',
-		      runp->l_tls_blocksize - runp->l_tls_initimage_size);
+	      /* We don't have to clear the BSS part of the TLS block
+		 since mmap is used to allocate the memory which
+		 guarantees it is initialized to zero.  */
+	      dtv[1 + cnt].pointer = memcpy (dest, map->l_tls_initimage,
+					     map->l_tls_initimage_size);
 	    }
-	  while ((runp = runp->l_tls_nextimage)
-		 !=  GL(dl_initimage_list)->l_tls_nextimage);
+
+	  total += cnt;
+	  if (total >= GL(dl_tls_max_dtv_idx))
+	    break;
+
+	  listp = listp->next;
+	  assert (listp != NULL);
 	}
 
       /* Add the dtv to the thread data structures.  */
@@ -232,6 +320,7 @@ _dl_allocate_tls (void)
 }
 
 
+# ifdef SHARED
 /* The __tls_get_addr function has two basic forms which differ in the
    arguments.  The IA-64 form takes two parameters, the module ID and
    offset.  The form used, among others, on IA-32 takes a reference to
@@ -239,26 +328,227 @@ _dl_allocate_tls (void)
    form seems to be more often used (in the moment) so we default to
    it.  Users of the IA-64 form have to provide adequate definitions
    of the following macros.  */
-# ifndef GET_ADDR_ARGS
-#  define GET_ADDR_ARGS tls_index *ti
-# endif
-# ifndef GET_ADDR_MODULE
-#  define GET_ADDR_MODULE ti->ti_module
-# endif
-# ifndef GET_ADDR_OFFSET
-#  define GET_ADDR_OFFSET ti->ti_offset
-# endif
+#  ifndef GET_ADDR_ARGS
+#   define GET_ADDR_ARGS tls_index *ti
+#  endif
+#  ifndef GET_ADDR_MODULE
+#   define GET_ADDR_MODULE ti->ti_module
+#  endif
+#  ifndef GET_ADDR_OFFSET
+#   define GET_ADDR_OFFSET ti->ti_offset
+#  endif
+/* Systems which do not have tls_index also probably have to define
+   DONT_USE_TLS_INDEX.  */
+
+#  ifndef __TLS_GET_ADDR
+#   define __TLS_GET_ADDR __tls_get_addr
+#  endif
+
+
+/* Return the symbol address given the map of the module it is in and
+   the symbol record.  This is used in dl-sym.c.  */
+void *
+internal_function
+_dl_tls_symaddr (struct link_map *map, const ElfW(Sym) *ref)
+{
+#  ifndef DONT_USE_TLS_INDEX
+  tls_index tmp =
+    {
+      .ti_module = map->l_tls_modid,
+      .ti_offset = ref->st_value
+    };
+
+  return __TLS_GET_ADDR (&tmp);
+#  else
+  return __TLS_GET_ADDR (map->l_tls_modid, ref->st_value);
+#  endif
+}
+
+
+static void *
+allocate_and_init (struct link_map *map)
+{
+  void *newp;
+
+  newp = __libc_memalign (map->l_tls_align, map->l_tls_blocksize);
+  if (newp == NULL)
+    oom ();
 
+  /* Initialize the memory.  */
+  memset (__mempcpy (newp, map->l_tls_initimage, map->l_tls_initimage_size),
+	  '\0', map->l_tls_blocksize - map->l_tls_initimage_size);
 
+  return newp;
+}
+
+
+/* The generic dynamic and local dynamic model cannot be used in
+   statically linked applications.  */
 void *
 __tls_get_addr (GET_ADDR_ARGS)
 {
   dtv_t *dtv = THREAD_DTV ();
+  struct link_map *the_map = NULL;
+  void *p;
+
+  if (__builtin_expect (dtv[0].counter != GL(dl_tls_generation), 0))
+    {
+      struct dtv_slotinfo_list *listp;
+      size_t idx;
+
+      /* The global dl_tls_dtv_slotinfo array contains for each module
+	 index the generation counter current when the entry was
+	 created.  This array never shrinks so that all module indices
+	 which were valid at some time can be used to access it.
+	 Before the first use of a new module index in this function
+	 the array was extended appropriately.  Access also does not
+	 have to be guarded against modifications of the array.  It is
+	 assumed that pointer-size values can be read atomically even
+	 in SMP environments.  It is possible that other threads at
+	 the same time dynamically load code and therefore add to the
+	 slotinfo list.  This is a problem since we must not pick up
+	 any information about incomplete work.  The solution to this
+	 is to ignore all dtv slots which were created after the one
+	 we are currently interested.  We know that dynamic loading
+	 for this module is completed and this is the last load
+	 operation we know finished.  */
+      idx = GET_ADDR_MODULE;
+      listp = GL(dl_tls_dtv_slotinfo_list);
+      while (idx >= listp->len)
+	{
+	  idx -= listp->len;
+	  listp = listp->next;
+	}
 
-  if (dtv[GET_ADDR_MODULE].pointer == TLS_DTV_UNALLOCATE)
-    /* XXX */;
+      if (dtv[0].counter < listp->slotinfo[idx].gen)
+	{
+	  /* The generation counter for the slot is higher than what
+	     the current dtv implements.  We have to update the whole
+	     dtv but only those entries with a generation counter <=
+	     the one for the entry we need.  */
+	  size_t new_gen = listp->slotinfo[idx].gen;
+	  size_t total = 0;
+
+	  /* We have to look through the entire dtv slotinfo list.  */
+	  listp =  GL(dl_tls_dtv_slotinfo_list);
+	  do
+	    {
+	      size_t cnt;
+
+	      for (cnt = total = 0 ? 1 : 0; cnt < listp->len; ++cnt)
+		{
+		  size_t gen = listp->slotinfo[cnt].gen;
+		  struct link_map *map;
+		  size_t modid;
+
+		  if (gen > new_gen)
+		    /* This is a slot for a generation younger than
+		       the one we are handling now.  It might be
+		       incompletely set up so ignore it.  */
+		    continue;
+
+		  /* If the entry is older than the current dtv layout
+		     we know we don't have to handle it.  */
+		  if (gen <= dtv[0].counter)
+		    continue;
+
+		  /* If there is no map this means the entry is empty.  */
+		  map = listp->slotinfo[cnt].map;
+		  if (map == NULL)
+		    {
+		      /* If this modid was used at some point the memory
+			 might still be allocated.  */
+		      if (dtv[total + cnt].pointer != TLS_DTV_UNALLOCATED)
+			free (dtv[total + cnt].pointer);
+
+		      continue;
+		    }
+
+		  /* Check whether the current dtv array is large enough.  */
+		  modid = map->l_tls_modid;
+		  assert (total + cnt == modid);
+		  if (dtv[-1].counter < modid)
+		    {
+		      /* Reallocate the dtv.  */
+		      dtv_t *newp;
+		      size_t newsize = GL(dl_tls_max_dtv_idx) + DTV_SURPLUS;
+		      size_t oldsize = dtv[-1].counter;
+
+		      assert (map->l_tls_modid <= newsize);
+
+		      newp = (dtv_t *) realloc (&dtv[-1],
+						(2 + newsize)
+						* sizeof (dtv_t));
+		      if (newp == NULL)
+			oom ();
+
+		      newp[0].counter = newsize;
+
+		      /* Clear the newly allocate part.  */
+		      memset (newp + 2 + oldsize, '\0',
+			      (newsize - oldsize) * sizeof (dtv_t));
+
+		      /* Point dtv to the generation counter.  */
+		      dtv = &newp[1];
+
+		      /* Install this new dtv in the thread data
+			 structures.  */
+		      INSTALL_NEW_DTV (dtv);
+		    }
+
+		  /* If there is currently memory allocate for this
+		     dtv entry free it.  */
+		  /* XXX Ideally we will at some point create a memory
+		     pool.  */
+		  if (dtv[modid].pointer != TLS_DTV_UNALLOCATED)
+		    /* Note that free is called for NULL is well.  We
+		       deallocate even if it is this dtv entry we are
+		       supposed to load.  The reason is that we call
+		       memalign and not malloc.  */
+		    free (dtv[modid].pointer);
+
+		  /* This module is loaded dynamically- We defer
+		     memory allocation.  */
+		  dtv[modid].pointer = TLS_DTV_UNALLOCATED;
+
+		  if (modid == GET_ADDR_MODULE)
+		    the_map = map;
+		}
+
+	      total += listp->len;
+	    }
+	  while ((listp = listp->next) != NULL);
 
-  return (char *) dtv[GET_ADDR_MODULE].pointer + GET_ADDR_OFFSET;
+	  /* This will be the new maximum generation counter.  */
+	  dtv[0].counter = new_gen;
+	}
+    }
+
+  p = dtv[GET_ADDR_MODULE].pointer;
+
+  if (__builtin_expect (p == TLS_DTV_UNALLOCATED, 0))
+    {
+      /* The allocation was deferred.  Do it now.  */
+      if (the_map == NULL)
+	{
+	  /* Find the link map for this module.  */
+	  size_t idx = GET_ADDR_MODULE;
+	  struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list);
+
+	  while (idx >= listp->len)
+	    {
+	      idx -= listp->len;
+	      listp = listp->next;
+	    }
+
+	  the_map = listp->slotinfo[idx].map;
+	}
+
+      p = dtv[GET_ADDR_MODULE].pointer = allocate_and_init (the_map);
+    }
+
+  return (char *) p + GET_ADDR_OFFSET;
 }
+# endif
 
 #endif	/* use TLS */