1 files changed, 172 insertions, 0 deletions
diff --git a/nptl/sem_waitcommon.c b/nptl/sem_waitcommon.c
index 30984be2d0..c75c74b506 100644
--- a/nptl/sem_waitcommon.c
+++ b/nptl/sem_waitcommon.c
@@ -119,6 +119,24 @@ do_futex_wait (struct new_sem *sem, const struct timespec *abstime)
   return err;
 }
 
+static int
+__attribute__ ((noinline))
+do_futex_wait64 (struct new_sem *sem, const struct __timespec64 *abstime)
+{
+  int err;
+
+#if __HAVE_64B_ATOMICS
+  err = futex_abstimed_wait_cancelable64 (
+      (unsigned int *) &sem->data + SEM_VALUE_OFFSET, 0, abstime,
+      sem->private);
+#else
+  err = futex_abstimed_wait_cancelable64 (&sem->value, SEM_NWAITERS_MASK,
+					abstime, sem->private);
+#endif
+
+  return err;
+}
+
 /* Fast path: Try to grab a token without blocking.  */
 static int
 __new_sem_wait_fast (struct new_sem *sem, int definitive_result)
@@ -310,6 +328,160 @@ error:
   return err;
 }
 
+/* 64-bit time version */
+
+static int
+__attribute__ ((noinline))
+__new_sem_wait_slow64 (struct new_sem *sem, const struct __timespec64 *abstime)
+{
+  int err = 0;
+
+#if __HAVE_64B_ATOMICS
+  /* Add a waiter.  Relaxed MO is sufficient because we can rely on the
+     ordering provided by the RMW operations we use.  */
+  uint64_t d = atomic_fetch_add_relaxed (&sem->data,
+      (uint64_t) 1 << SEM_NWAITERS_SHIFT);
+
+  pthread_cleanup_push (__sem_wait_cleanup, sem);
+
+  /* Wait for a token to be available.  Retry until we can grab one.  */
+  for (;;)
+    {
+      /* If there is no token available, sleep until there is.  */
+      if ((d & SEM_VALUE_MASK) == 0)
+	{
+	  err = do_futex_wait64 (sem, abstime);
+	  /* A futex return value of 0 or EAGAIN is due to a real or spurious
+	     wake-up, or due to a change in the number of tokens.  We retry in
+	     these cases.
+	     If we timed out, forward this to the caller.
+	     EINTR is returned if we are interrupted by a signal; we
+	     forward this to the caller.  (See futex_wait and related
+	     documentation.  Before Linux 2.6.22, EINTR was also returned on
+	     spurious wake-ups; we only support more recent Linux versions,
+	     so do not need to consider this here.)  */
+	  if (err == ETIMEDOUT || err == EINTR)
+	    {
+	      __set_errno (err);
+	      err = -1;
+	      /* Stop being registered as a waiter.  */
+	      atomic_fetch_add_relaxed (&sem->data,
+		  -((uint64_t) 1 << SEM_NWAITERS_SHIFT));
+	      break;
+	    }
+	  /* Relaxed MO is sufficient; see below.  */
+	  d = atomic_load_relaxed (&sem->data);
+	}
+      else
+	{
+	  /* Try to grab both a token and stop being a waiter.  We need
+	     acquire MO so this synchronizes with all token providers (i.e.,
+	     the RMW operation we read from or all those before it in
+	     modification order; also see sem_post).  On the failure path,
+	     relaxed MO is sufficient because we only eventually need the
+	     up-to-date value; the futex_wait or the CAS perform the real
+	     work.  */
+	  if (atomic_compare_exchange_weak_acquire (&sem->data,
+	      &d, d - 1 - ((uint64_t) 1 << SEM_NWAITERS_SHIFT)))
+	    {
+	      err = 0;
+	      break;
+	    }
+	}
+    }
+
+  pthread_cleanup_pop (0);
+#else
+  /* The main difference to the 64b-atomics implementation is that we need to
+     access value and nwaiters in separate steps, and that the nwaiters bit
+     in the value can temporarily not be set even if nwaiters is nonzero.
+     We work around incorrectly unsetting the nwaiters bit by letting sem_wait
+     set the bit again and waking the number of waiters that could grab a
+     token.  There are two additional properties we need to ensure:
+     (1) We make sure that whenever unsetting the bit, we see the increment of
+     nwaiters by the other thread that set the bit.  IOW, we will notice if
+     we make a mistake.
+     (2) When setting the nwaiters bit, we make sure that we see the unsetting
+     of the bit by another waiter that happened before us.  This avoids having
+     to blindly set the bit whenever we need to block on it.  We set/unset
+     the bit while having incremented nwaiters (i.e., are a registered
+     waiter), and the problematic case only happens when one waiter indeed
+     followed another (i.e., nwaiters was never larger than 1); thus, this
+     works similarly as with a critical section using nwaiters (see the MOs
+     and related comments below).
+
+     An alternative approach would be to unset the bit after decrementing
+     nwaiters; however, that would result in needing Dekker-like
+     synchronization and thus full memory barriers.  We also would not be able
+     to prevent misspeculation, so this alternative scheme does not seem
+     beneficial.  */
+  unsigned int v;
+
+  /* Add a waiter.  We need acquire MO so this synchronizes with the release
+     MO we use when decrementing nwaiters below; it ensures that if another
+     waiter unset the bit before us, we see that and set it again.  Also see
+     property (2) above.  */
+  atomic_fetch_add_acquire (&sem->nwaiters, 1);
+
+  pthread_cleanup_push (__sem_wait_cleanup, sem);
+
+  /* Wait for a token to be available.  Retry until we can grab one.  */
+  /* We do not need any ordering wrt. to this load's reads-from, so relaxed
+     MO is sufficient.  The acquire MO above ensures that in the problematic
+     case, we do see the unsetting of the bit by another waiter.  */
+  v = atomic_load_relaxed (&sem->value);
+  do
+    {
+      do
+	{
+	  /* We are about to block, so make sure that the nwaiters bit is
+	     set.  We need release MO on the CAS to ensure that when another
+	     waiter unsets the nwaiters bit, it will also observe that we
+	     incremented nwaiters in the meantime (also see the unsetting of
+	     the bit below).  Relaxed MO on CAS failure is sufficient (see
+	     above).  */
+	  do
+	    {
+	      if ((v & SEM_NWAITERS_MASK) != 0)
+		break;
+	    }
+	  while (!atomic_compare_exchange_weak_release (&sem->value,
+	      &v, v | SEM_NWAITERS_MASK));
+	  /* If there is no token, wait.  */
+	  if ((v >> SEM_VALUE_SHIFT) == 0)
+	    {
+	      /* See __HAVE_64B_ATOMICS variant.  */
+	      err = do_futex_wait64 (sem, abstime);
+	      if (err == ETIMEDOUT || err == EINTR)
+		{
+		  __set_errno (err);
+		  err = -1;
+		  goto error;
+		}
+	      err = 0;
+	      /* We blocked, so there might be a token now.  Relaxed MO is
+		 sufficient (see above).  */
+	      v = atomic_load_relaxed (&sem->value);
+	    }
+	}
+      /* If there is no token, we must not try to grab one.  */
+      while ((v >> SEM_VALUE_SHIFT) == 0);
+    }
+  /* Try to grab a token.  We need acquire MO so this synchronizes with
+     all token providers (i.e., the RMW operation we read from or all those
+     before it in modification order; also see sem_post).  */
+  while (!atomic_compare_exchange_weak_acquire (&sem->value,
+      &v, v - (1 << SEM_VALUE_SHIFT)));
+
+error:
+  pthread_cleanup_pop (0);
+
+  __sem_wait_32_finish (sem);
+#endif
+
+  return err;
+}
+
 /* Stop being a registered waiter (non-64b-atomics code only).  */
 #if !__HAVE_64B_ATOMICS
 static void