aboutsummaryrefslogtreecommitdiff
path: root/nptl
diff options
context:
space:
mode:
Diffstat (limited to 'nptl')
-rw-r--r--nptl/DESIGN-condvar.txt134
-rw-r--r--nptl/Makefile6
-rw-r--r--nptl/lowlevelcond.sym16
-rw-r--r--nptl/nptl-printers.py70
-rw-r--r--nptl/nptl_lock_constants.pysym27
-rw-r--r--nptl/pthreadP.h7
-rw-r--r--nptl/pthread_cond_broadcast.c99
-rw-r--r--nptl/pthread_cond_common.c466
-rw-r--r--nptl/pthread_cond_destroy.c82
-rw-r--r--nptl/pthread_cond_init.c28
-rw-r--r--nptl/pthread_cond_signal.c99
-rw-r--r--nptl/pthread_cond_timedwait.c268
-rw-r--r--nptl/pthread_cond_wait.c754
-rw-r--r--nptl/pthread_condattr_getclock.c2
-rw-r--r--nptl/pthread_condattr_getpshared.c3
-rw-r--r--nptl/pthread_condattr_init.c4
-rw-r--r--nptl/pthread_condattr_setclock.c11
-rw-r--r--nptl/test-cond-printers.py2
-rw-r--r--nptl/tst-cond1.c3
-rw-r--r--nptl/tst-cond20.c5
-rw-r--r--nptl/tst-cond22.c18
21 files changed, 1274 insertions, 830 deletions
diff --git a/nptl/DESIGN-condvar.txt b/nptl/DESIGN-condvar.txt
deleted file mode 100644
index 4845251c75..0000000000
--- a/nptl/DESIGN-condvar.txt
+++ /dev/null
@@ -1,134 +0,0 @@
-Conditional Variable pseudocode.
-================================
-
- int pthread_cond_timedwait (pthread_cond_t *cv, pthread_mutex_t *mutex);
- int pthread_cond_signal (pthread_cond_t *cv);
- int pthread_cond_broadcast (pthread_cond_t *cv);
-
-struct pthread_cond_t {
-
- unsigned int cond_lock;
-
- internal mutex
-
- uint64_t total_seq;
-
- Total number of threads using the conditional variable.
-
- uint64_t wakeup_seq;
-
- sequence number for next wakeup.
-
- uint64_t woken_seq;
-
- sequence number of last woken thread.
-
- uint32_t broadcast_seq;
-
-}
-
-
-struct cv_data {
-
- pthread_cond_t *cv;
-
- uint32_t bc_seq
-
-}
-
-
-
-cleanup_handler(cv_data)
-{
- cv = cv_data->cv;
- lll_lock(cv->lock);
-
- if (cv_data->bc_seq == cv->broadcast_seq) {
- ++cv->wakeup_seq;
- ++cv->woken_seq;
- }
-
- /* make sure no signal gets lost. */
- FUTEX_WAKE(cv->wakeup_seq, ALL);
-
- lll_unlock(cv->lock);
-}
-
-
-cond_timedwait(cv, mutex, timeout):
-{
- lll_lock(cv->lock);
- mutex_unlock(mutex);
-
- cleanup_push
-
- ++cv->total_seq;
- val = seq = cv->wakeup_seq;
- cv_data.bc = cv->broadcast_seq;
- cv_data.cv = cv;
-
- while (1) {
-
- lll_unlock(cv->lock);
-
- enable_async(&cv_data);
-
- ret = FUTEX_WAIT(cv->wakeup_seq, val, timeout);
-
- restore_async
-
- lll_lock(cv->lock);
-
- if (bc != cv->broadcast_seq)
- goto bc_out;
-
- val = cv->wakeup_seq;
-
- if (val != seq && cv->woken_seq != val) {
- ret = 0;
- break;
- }
-
- if (ret == TIMEDOUT) {
- ++cv->wakeup_seq;
- break;
- }
- }
-
- ++cv->woken_seq;
-
- bc_out:
- lll_unlock(cv->lock);
-
- cleanup_pop
-
- mutex_lock(mutex);
-
- return ret;
-}
-
-cond_signal(cv)
-{
- lll_lock(cv->lock);
-
- if (cv->total_seq > cv->wakeup_seq) {
- ++cv->wakeup_seq;
- FUTEX_WAKE(cv->wakeup_seq, 1);
- }
-
- lll_unlock(cv->lock);
-}
-
-cond_broadcast(cv)
-{
- lll_lock(cv->lock);
-
- if (cv->total_seq > cv->wakeup_seq) {
- cv->wakeup_seq = cv->total_seq;
- cv->woken_seq = cv->total_seq;
- ++cv->broadcast_seq;
- FUTEX_WAKE(cv->wakeup_seq, ALL);
- }
-
- lll_unlock(cv->lock);
-}
diff --git a/nptl/Makefile b/nptl/Makefile
index bed5babfd9..62b0951ec0 100644
--- a/nptl/Makefile
+++ b/nptl/Makefile
@@ -83,7 +83,7 @@ libpthread-routines = nptl-init vars events version pt-interp \
pthread_rwlockattr_getkind_np \
pthread_rwlockattr_setkind_np \
pthread_cond_init pthread_cond_destroy \
- pthread_cond_wait pthread_cond_timedwait \
+ pthread_cond_wait \
pthread_cond_signal pthread_cond_broadcast \
old_pthread_cond_init old_pthread_cond_destroy \
old_pthread_cond_wait old_pthread_cond_timedwait \
@@ -186,7 +186,6 @@ CFLAGS-pthread_timedjoin.c = -fexceptions -fasynchronous-unwind-tables
CFLAGS-pthread_once.c = $(uses-callbacks) -fexceptions \
-fasynchronous-unwind-tables
CFLAGS-pthread_cond_wait.c = -fexceptions -fasynchronous-unwind-tables
-CFLAGS-pthread_cond_timedwait.c = -fexceptions -fasynchronous-unwind-tables
CFLAGS-sem_wait.c = -fexceptions -fasynchronous-unwind-tables
CFLAGS-sem_timedwait.c = -fexceptions -fasynchronous-unwind-tables
@@ -307,8 +306,7 @@ test-xfail-tst-once5 = yes
# Files which must not be linked with libpthread.
tests-nolibpthread = tst-unload
-gen-as-const-headers = pthread-errnos.sym \
- lowlevelcond.sym lowlevelrwlock.sym \
+gen-as-const-headers = pthread-errnos.sym lowlevelrwlock.sym \
unwindbuf.sym \
lowlevelrobustlock.sym pthread-pi-defines.sym
diff --git a/nptl/lowlevelcond.sym b/nptl/lowlevelcond.sym
deleted file mode 100644
index 18e1adad43..0000000000
--- a/nptl/lowlevelcond.sym
+++ /dev/null
@@ -1,16 +0,0 @@
-#include <stddef.h>
-#include <sched.h>
-#include <bits/pthreadtypes.h>
-#include <internaltypes.h>
-
---
-
-cond_lock offsetof (pthread_cond_t, __data.__lock)
-cond_futex offsetof (pthread_cond_t, __data.__futex)
-cond_nwaiters offsetof (pthread_cond_t, __data.__nwaiters)
-total_seq offsetof (pthread_cond_t, __data.__total_seq)
-wakeup_seq offsetof (pthread_cond_t, __data.__wakeup_seq)
-woken_seq offsetof (pthread_cond_t, __data.__woken_seq)
-dep_mutex offsetof (pthread_cond_t, __data.__mutex)
-broadcast_seq offsetof (pthread_cond_t, __data.__broadcast_seq)
-nwaiters_shift COND_NWAITERS_SHIFT
diff --git a/nptl/nptl-printers.py b/nptl/nptl-printers.py
index e402f232c7..76adaddd95 100644
--- a/nptl/nptl-printers.py
+++ b/nptl/nptl-printers.py
@@ -293,16 +293,6 @@ class MutexAttributesPrinter(object):
elif protocol == PTHREAD_PRIO_PROTECT:
self.values.append(('Protocol', 'Priority protect'))
-CLOCK_IDS = {
- CLOCK_REALTIME: 'CLOCK_REALTIME',
- CLOCK_MONOTONIC: 'CLOCK_MONOTONIC',
- CLOCK_PROCESS_CPUTIME_ID: 'CLOCK_PROCESS_CPUTIME_ID',
- CLOCK_THREAD_CPUTIME_ID: 'CLOCK_THREAD_CPUTIME_ID',
- CLOCK_MONOTONIC_RAW: 'CLOCK_MONOTONIC_RAW',
- CLOCK_REALTIME_COARSE: 'CLOCK_REALTIME_COARSE',
- CLOCK_MONOTONIC_COARSE: 'CLOCK_MONOTONIC_COARSE'
-}
-
class ConditionVariablePrinter(object):
"""Pretty printer for pthread_cond_t."""
@@ -313,24 +303,8 @@ class ConditionVariablePrinter(object):
cond: A gdb.value representing a pthread_cond_t.
"""
- # Since PTHREAD_COND_SHARED is an integer, we need to cast it to void *
- # to be able to compare it to the condvar's __data.__mutex member.
- #
- # While it looks like self.shared_value should be a class variable,
- # that would result in it having an incorrect size if we're loading
- # these printers through .gdbinit for a 64-bit objfile in AMD64.
- # This is because gdb initially assumes the pointer size to be 4 bytes,
- # and only sets it to 8 after loading the 64-bit objfiles. Since
- # .gdbinit runs before any objfiles are loaded, this would effectively
- # make self.shared_value have a size of 4, thus breaking later
- # comparisons with pointers whose types are looked up at runtime.
- void_ptr_type = gdb.lookup_type('void').pointer()
- self.shared_value = gdb.Value(PTHREAD_COND_SHARED).cast(void_ptr_type)
-
data = cond['__data']
- self.total_seq = data['__total_seq']
- self.mutex = data['__mutex']
- self.nwaiters = data['__nwaiters']
+ self.wrefs = data['__wrefs']
self.values = []
self.read_values()
@@ -360,7 +334,6 @@ class ConditionVariablePrinter(object):
self.read_status()
self.read_attributes()
- self.read_mutex_info()
def read_status(self):
"""Read the status of the condvar.
@@ -369,41 +342,22 @@ class ConditionVariablePrinter(object):
are waiting for it.
"""
- if self.total_seq == PTHREAD_COND_DESTROYED:
- self.values.append(('Status', 'Destroyed'))
-
- self.values.append(('Threads waiting for this condvar',
- self.nwaiters >> COND_NWAITERS_SHIFT))
+ self.values.append(('Threads known to still execute a wait function',
+ self.wrefs >> PTHREAD_COND_WREFS_SHIFT))
def read_attributes(self):
"""Read the condvar's attributes."""
- clock_id = self.nwaiters & ((1 << COND_NWAITERS_SHIFT) - 1)
-
- # clock_id must be casted to int because it's a gdb.Value
- self.values.append(('Clock ID', CLOCK_IDS[int(clock_id)]))
+ if (self.wrefs & PTHREAD_COND_CLOCK_MONOTONIC_MASK) != 0:
+ self.values.append(('Clock ID', 'CLOCK_MONOTONIC'))
+ else:
+ self.values.append(('Clock ID', 'CLOCK_REALTIME'))
- shared = (self.mutex == self.shared_value)
-
- if shared:
+ if (self.wrefs & PTHREAD_COND_SHARED_MASK) != 0:
self.values.append(('Shared', 'Yes'))
else:
self.values.append(('Shared', 'No'))
- def read_mutex_info(self):
- """Read the data of the mutex this condvar is bound to.
-
- A pthread_cond_t's __data.__mutex member is a void * which
- must be casted to pthread_mutex_t *. For shared condvars, this
- member isn't recorded and has a special value instead.
- """
-
- if self.mutex and self.mutex != self.shared_value:
- mutex_type = gdb.lookup_type('pthread_mutex_t')
- mutex = self.mutex.cast(mutex_type.pointer()).dereference()
-
- self.values.append(('Mutex', mutex))
-
class ConditionVariableAttributesPrinter(object):
"""Pretty printer for pthread_condattr_t.
@@ -453,10 +407,12 @@ class ConditionVariableAttributesPrinter(object):
created in self.children.
"""
- clock_id = self.condattr & ((1 << COND_NWAITERS_SHIFT) - 1)
+ clock_id = (self.condattr >> 1) & ((1 << COND_CLOCK_BITS) - 1)
- # clock_id must be casted to int because it's a gdb.Value
- self.values.append(('Clock ID', CLOCK_IDS[int(clock_id)]))
+ if clock_id != 0:
+ self.values.append(('Clock ID', 'CLOCK_MONOTONIC'))
+ else:
+ self.values.append(('Clock ID', 'CLOCK_REALTIME'))
if self.condattr & 1:
self.values.append(('Shared', 'Yes'))
diff --git a/nptl/nptl_lock_constants.pysym b/nptl/nptl_lock_constants.pysym
index 303ec61213..2ab3179155 100644
--- a/nptl/nptl_lock_constants.pysym
+++ b/nptl/nptl_lock_constants.pysym
@@ -44,26 +44,13 @@ PTHREAD_PRIO_NONE
PTHREAD_PRIO_INHERIT
PTHREAD_PRIO_PROTECT
--- These values are hardcoded as well:
--- Value of __mutex for shared condvars.
-PTHREAD_COND_SHARED (void *)~0l
-
--- Value of __total_seq for destroyed condvars.
-PTHREAD_COND_DESTROYED -1ull
-
--- __nwaiters encodes the number of threads waiting on a condvar
--- and the clock ID.
--- __nwaiters >> COND_NWAITERS_SHIFT gives us the number of waiters.
-COND_NWAITERS_SHIFT
-
--- Condvar clock IDs
-CLOCK_REALTIME
-CLOCK_MONOTONIC
-CLOCK_PROCESS_CPUTIME_ID
-CLOCK_THREAD_CPUTIME_ID
-CLOCK_MONOTONIC_RAW
-CLOCK_REALTIME_COARSE
-CLOCK_MONOTONIC_COARSE
+-- Condition variable
+-- FIXME Why do macros prefixed with __ cannot be used directly?
+PTHREAD_COND_SHARED_MASK __PTHREAD_COND_SHARED_MASK
+PTHREAD_COND_CLOCK_MONOTONIC_MASK __PTHREAD_COND_CLOCK_MONOTONIC_MASK
+COND_CLOCK_BITS
+-- These values are hardcoded:
+PTHREAD_COND_WREFS_SHIFT 3
-- Rwlock attributes
PTHREAD_RWLOCK_PREFER_READER_NP
diff --git a/nptl/pthreadP.h b/nptl/pthreadP.h
index 6e0dd09f4f..92a9992e1f 100644
--- a/nptl/pthreadP.h
+++ b/nptl/pthreadP.h
@@ -167,6 +167,13 @@ enum
#define __PTHREAD_ONCE_FORK_GEN_INCR 4
+/* Condition variable definitions. See __pthread_cond_wait_common.
+ Need to be defined here so there is one place from which
+ nptl_lock_constants can grab them. */
+#define __PTHREAD_COND_CLOCK_MONOTONIC_MASK 2
+#define __PTHREAD_COND_SHARED_MASK 1
+
+
/* Internal variables. */
diff --git a/nptl/pthread_cond_broadcast.c b/nptl/pthread_cond_broadcast.c
index 552fd42f60..87c07552cf 100644
--- a/nptl/pthread_cond_broadcast.c
+++ b/nptl/pthread_cond_broadcast.c
@@ -19,72 +19,71 @@
#include <endian.h>
#include <errno.h>
#include <sysdep.h>
-#include <lowlevellock.h>
+#include <futex-internal.h>
#include <pthread.h>
#include <pthreadP.h>
#include <stap-probe.h>
+#include <atomic.h>
#include <shlib-compat.h>
-#include <kernel-features.h>
+#include "pthread_cond_common.c"
+
+/* We do the following steps from __pthread_cond_signal in one critical
+ section: (1) signal all waiters in G1, (2) close G1 so that it can become
+ the new G2 and make G2 the new G1, and (3) signal all waiters in the new
+ G1. We don't need to do all these steps if there are no waiters in G1
+ and/or G2. See __pthread_cond_signal for further details. */
int
__pthread_cond_broadcast (pthread_cond_t *cond)
{
LIBC_PROBE (cond_broadcast, 1, cond);
- int pshared = (cond->__data.__mutex == (void *) ~0l)
- ? LLL_SHARED : LLL_PRIVATE;
- /* Make sure we are alone. */
- lll_lock (cond->__data.__lock, pshared);
+ unsigned int wrefs = atomic_load_relaxed (&cond->__data.__wrefs);
+ if (wrefs >> 3 == 0)
+ return 0;
+ int private = __condvar_get_private (wrefs);
+
+ __condvar_acquire_lock (cond, private);
- /* Are there any waiters to be woken? */
- if (cond->__data.__total_seq > cond->__data.__wakeup_seq)
+ unsigned long long int wseq = __condvar_load_wseq_relaxed (cond);
+ unsigned int g2 = wseq & 1;
+ unsigned int g1 = g2 ^ 1;
+ wseq >>= 1;
+ bool do_futex_wake = false;
+
+ /* Step (1): signal all waiters remaining in G1. */
+ if (cond->__data.__g_size[g1] != 0)
{
- /* Yes. Mark them all as woken. */
- cond->__data.__wakeup_seq = cond->__data.__total_seq;
- cond->__data.__woken_seq = cond->__data.__total_seq;
- cond->__data.__futex = (unsigned int) cond->__data.__total_seq * 2;
- int futex_val = cond->__data.__futex;
- /* Signal that a broadcast happened. */
- ++cond->__data.__broadcast_seq;
-
- /* We are done. */
- lll_unlock (cond->__data.__lock, pshared);
-
- /* Wake everybody. */
- pthread_mutex_t *mut = (pthread_mutex_t *) cond->__data.__mutex;
-
- /* Do not use requeue for pshared condvars. */
- if (mut == (void *) ~0l
- || PTHREAD_MUTEX_PSHARED (mut) & PTHREAD_MUTEX_PSHARED_BIT)
- goto wake_all;
-
-#if (defined lll_futex_cmp_requeue_pi \
- && defined __ASSUME_REQUEUE_PI)
- if (USE_REQUEUE_PI (mut))
- {
- if (lll_futex_cmp_requeue_pi (&cond->__data.__futex, 1, INT_MAX,
- &mut->__data.__lock, futex_val,
- LLL_PRIVATE) == 0)
- return 0;
- }
- else
-#endif
- /* lll_futex_requeue returns 0 for success and non-zero
- for errors. */
- if (!__builtin_expect (lll_futex_requeue (&cond->__data.__futex, 1,
- INT_MAX, &mut->__data.__lock,
- futex_val, LLL_PRIVATE), 0))
- return 0;
-
-wake_all:
- lll_futex_wake (&cond->__data.__futex, INT_MAX, pshared);
- return 0;
+ /* Add as many signals as the remaining size of the group. */
+ atomic_fetch_add_relaxed (cond->__data.__g_signals + g1,
+ cond->__data.__g_size[g1] << 1);
+ cond->__data.__g_size[g1] = 0;
+
+ /* We need to wake G1 waiters before we quiesce G1 below. */
+ /* TODO Only set it if there are indeed futex waiters. We could
+ also try to move this out of the critical section in cases when
+ G2 is empty (and we don't need to quiesce). */
+ futex_wake (cond->__data.__g_signals + g1, INT_MAX, private);
}
- /* We are done. */
- lll_unlock (cond->__data.__lock, pshared);
+ /* G1 is complete. Step (2) is next unless there are no waiters in G2, in
+ which case we can stop. */
+ if (__condvar_quiesce_and_switch_g1 (cond, wseq, &g1, private))
+ {
+ /* Step (3): Send signals to all waiters in the old G2 / new G1. */
+ atomic_fetch_add_relaxed (cond->__data.__g_signals + g1,
+ cond->__data.__g_size[g1] << 1);
+ cond->__data.__g_size[g1] = 0;
+ /* TODO Only set it if there are indeed futex waiters. */
+ do_futex_wake = true;
+ }
+
+ __condvar_release_lock (cond, private);
+
+ if (do_futex_wake)
+ futex_wake (cond->__data.__g_signals + g1, INT_MAX, private);
return 0;
}
diff --git a/nptl/pthread_cond_common.c b/nptl/pthread_cond_common.c
new file mode 100644
index 0000000000..b374396d45
--- /dev/null
+++ b/nptl/pthread_cond_common.c
@@ -0,0 +1,466 @@
+/* pthread_cond_common -- shared code for condition variable.
+ Copyright (C) 2016 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <atomic.h>
+#include <stdint.h>
+#include <pthread.h>
+#include <libc-internal.h>
+
+/* We need 3 least-significant bits on __wrefs for something else. */
+#define __PTHREAD_COND_MAX_GROUP_SIZE ((unsigned) 1 << 29)
+
+#if __HAVE_64B_ATOMICS == 1
+
+static uint64_t __attribute__ ((unused))
+__condvar_load_wseq_relaxed (pthread_cond_t *cond)
+{
+ return atomic_load_relaxed (&cond->__data.__wseq);
+}
+
+static uint64_t __attribute__ ((unused))
+__condvar_fetch_add_wseq_acquire (pthread_cond_t *cond, unsigned int val)
+{
+ return atomic_fetch_add_acquire (&cond->__data.__wseq, val);
+}
+
+static uint64_t __attribute__ ((unused))
+__condvar_fetch_xor_wseq_release (pthread_cond_t *cond, unsigned int val)
+{
+ return atomic_fetch_xor_release (&cond->__data.__wseq, val);
+}
+
+static uint64_t __attribute__ ((unused))
+__condvar_load_g1_start_relaxed (pthread_cond_t *cond)
+{
+ return atomic_load_relaxed (&cond->__data.__g1_start);
+}
+
+static void __attribute__ ((unused))
+__condvar_add_g1_start_relaxed (pthread_cond_t *cond, unsigned int val)
+{
+ atomic_store_relaxed (&cond->__data.__g1_start,
+ atomic_load_relaxed (&cond->__data.__g1_start) + val);
+}
+
+#else
+
+/* We use two 64b counters: __wseq and __g1_start. They are monotonically
+ increasing and single-writer-multiple-readers counters, so we can implement
+ load, fetch-and-add, and fetch-and-xor operations even when we just have
+ 32b atomics. Values we add or xor are less than or equal to 1<<31 (*),
+ so we only have to make overflow-and-addition atomic wrt. to concurrent
+ load operations and xor operations. To do that, we split each counter into
+ two 32b values of which we reserve the MSB of each to represent an
+ overflow from the lower-order half to the higher-order half.
+
+ In the common case, the state is (higher-order / lower-order half, and . is
+ basically concatenation of the bits):
+ 0.h / 0.l = h.l
+
+ When we add a value of x that overflows (i.e., 0.l + x == 1.L), we run the
+ following steps S1-S4 (the values these represent are on the right-hand
+ side):
+ S1: 0.h / 1.L == (h+1).L
+ S2: 1.(h+1) / 1.L == (h+1).L
+ S3: 1.(h+1) / 0.L == (h+1).L
+ S4: 0.(h+1) / 0.L == (h+1).L
+ If the LSB of the higher-order half is set, readers will ignore the
+ overflow bit in the lower-order half.
+
+ To get an atomic snapshot in load operations, we exploit that the
+ higher-order half is monotonically increasing; if we load a value V from
+ it, then read the lower-order half, and then read the higher-order half
+ again and see the same value V, we know that both halves have existed in
+ the sequence of values the full counter had. This is similar to the
+ validated reads in the time-based STMs in GCC's libitm (e.g.,
+ method_ml_wt).
+
+ The xor operation needs to be an atomic read-modify-write. The write
+ itself is not an issue as it affects just the lower-order half but not bits
+ used in the add operation. To make the full fetch-and-xor atomic, we
+ exploit that concurrently, the value can increase by at most 1<<31 (*): The
+ xor operation is only called while having acquired the lock, so not more
+ than __PTHREAD_COND_MAX_GROUP_SIZE waiters can enter concurrently and thus
+ increment __wseq. Therefore, if the xor operation observes a value of
+ __wseq, then the value it applies the modification to later on can be
+ derived (see below).
+
+ One benefit of this scheme is that this makes load operations
+ obstruction-free because unlike if we would just lock the counter, readers
+ can almost always interpret a snapshot of each halves. Readers can be
+ forced to read a new snapshot when the read is concurrent with an overflow.
+ However, overflows will happen infrequently, so load operations are
+ practically lock-free.
+
+ (*) The highest value we add is __PTHREAD_COND_MAX_GROUP_SIZE << 2 to
+ __g1_start (the two extra bits are for the lock in the two LSBs of
+ __g1_start). */
+
+typedef struct
+{
+ unsigned int low;
+ unsigned int high;
+} _condvar_lohi;
+
+static uint64_t
+__condvar_fetch_add_64_relaxed (_condvar_lohi *lh, unsigned int op)
+{
+ /* S1. Note that this is an atomic read-modify-write so it extends the
+ release sequence of release MO store at S3. */
+ unsigned int l = atomic_fetch_add_relaxed (&lh->low, op);
+ unsigned int h = atomic_load_relaxed (&lh->high);
+ uint64_t result = ((uint64_t) h << 31) | l;
+ l += op;
+ if ((l >> 31) > 0)
+ {
+ /* Overflow. Need to increment higher-order half. Note that all
+ add operations are ordered in happens-before. */
+ h++;
+ /* S2. Release MO to synchronize with the loads of the higher-order half
+ in the load operation. See __condvar_load_64_relaxed. */
+ atomic_store_release (&lh->high, h | ((unsigned int) 1 << 31));
+ l ^= (unsigned int) 1 << 31;
+ /* S3. See __condvar_load_64_relaxed. */
+ atomic_store_release (&lh->low, l);
+ /* S4. Likewise. */
+ atomic_store_release (&lh->high, h);
+ }
+ return result;
+}
+
+static uint64_t
+__condvar_load_64_relaxed (_condvar_lohi *lh)
+{
+ unsigned int h, l, h2;
+ do
+ {
+ /* This load and the second one below to the same location read from the
+ stores in the overflow handling of the add operation or the
+ initializing stores (which is a simple special case because
+ initialization always completely happens before further use).
+ Because no two stores to the higher-order half write the same value,
+ the loop ensures that if we continue to use the snapshot, this load
+ and the second one read from the same store operation. All candidate
+ store operations have release MO.
+ If we read from S2 in the first load, then we will see the value of
+ S1 on the next load (because we synchronize with S2), or a value
+ later in modification order. We correctly ignore the lower-half's
+ overflow bit in this case. If we read from S4, then we will see the
+ value of S3 in the next load (or a later value), which does not have
+ the overflow bit set anymore.
+ */
+ h = atomic_load_acquire (&lh->high);
+ /* This will read from the release sequence of S3 (i.e, either the S3
+ store or the read-modify-writes at S1 following S3 in modification
+ order). Thus, the read synchronizes with S3, and the following load
+ of the higher-order half will read from the matching S2 (or a later
+ value).
+ Thus, if we read a lower-half value here that already overflowed and
+ belongs to an increased higher-order half value, we will see the
+ latter and h and h2 will not be equal. */
+ l = atomic_load_acquire (&lh->low);
+ /* See above. */
+ h2 = atomic_load_relaxed (&lh->high);
+ }
+ while (h != h2);
+ if (((l >> 31) > 0) && ((h >> 31) > 0))
+ l ^= (unsigned int) 1 << 31;
+ return ((uint64_t) (h & ~((unsigned int) 1 << 31)) << 31) + l;
+}
+
+static uint64_t __attribute__ ((unused))
+__condvar_load_wseq_relaxed (pthread_cond_t *cond)
+{
+ return __condvar_load_64_relaxed ((_condvar_lohi *) &cond->__data.__wseq32);
+}
+
+static uint64_t __attribute__ ((unused))
+__condvar_fetch_add_wseq_acquire (pthread_cond_t *cond, unsigned int val)
+{
+ uint64_t r = __condvar_fetch_add_64_relaxed
+ ((_condvar_lohi *) &cond->__data.__wseq32, val);
+ atomic_thread_fence_acquire ();
+ return r;
+}
+
+static uint64_t __attribute__ ((unused))
+__condvar_fetch_xor_wseq_release (pthread_cond_t *cond, unsigned int val)
+{
+ _condvar_lohi *lh = (_condvar_lohi *) &cond->__data.__wseq32;
+ /* First, get the current value. See __condvar_load_64_relaxed. */
+ unsigned int h, l, h2;
+ do
+ {
+ h = atomic_load_acquire (&lh->high);
+ l = atomic_load_acquire (&lh->low);
+ h2 = atomic_load_relaxed (&lh->high);
+ }
+ while (h != h2);
+ if (((l >> 31) > 0) && ((h >> 31) == 0))
+ h++;
+ h &= ~((unsigned int) 1 << 31);
+ l &= ~((unsigned int) 1 << 31);
+
+ /* Now modify. Due to the coherence rules, the prior load will read a value
+ earlier in modification order than the following fetch-xor.
+ This uses release MO to make the full operation have release semantics
+ (all other operations access the lower-order half). */
+ unsigned int l2 = atomic_fetch_xor_release (&lh->low, val)
+ & ~((unsigned int) 1 << 31);
+ if (l2 < l)
+ /* The lower-order half overflowed in the meantime. This happened exactly
+ once due to the limit on concurrent waiters (see above). */
+ h++;
+ return ((uint64_t) h << 31) + l2;
+}
+
+static uint64_t __attribute__ ((unused))
+__condvar_load_g1_start_relaxed (pthread_cond_t *cond)
+{
+ return __condvar_load_64_relaxed
+ ((_condvar_lohi *) &cond->__data.__g1_start32);
+}
+
+static void __attribute__ ((unused))
+__condvar_add_g1_start_relaxed (pthread_cond_t *cond, unsigned int val)
+{
+ ignore_value (__condvar_fetch_add_64_relaxed
+ ((_condvar_lohi *) &cond->__data.__g1_start32, val));
+}
+
+#endif /* !__HAVE_64B_ATOMICS */
+
+
+/* The lock that signalers use. See pthread_cond_wait_common for uses.
+ The lock is our normal three-state lock: not acquired (0) / acquired (1) /
+ acquired-with-futex_wake-request (2). However, we need to preserve the
+ other bits in the unsigned int used for the lock, and therefore it is a
+ little more complex. */
+static void __attribute__ ((unused))
+__condvar_acquire_lock (pthread_cond_t *cond, int private)
+{
+ unsigned int s = atomic_load_relaxed (&cond->__data.__g1_orig_size);
+ while ((s & 3) == 0)
+ {
+ if (atomic_compare_exchange_weak_acquire (&cond->__data.__g1_orig_size,
+ &s, s | 1))
+ return;
+ /* TODO Spinning and back-off. */
+ }
+ /* We can't change from not acquired to acquired, so try to change to
+ acquired-with-futex-wake-request and do a futex wait if we cannot change
+ from not acquired. */
+ while (1)
+ {
+ while ((s & 3) != 2)
+ {
+ if (atomic_compare_exchange_weak_acquire
+ (&cond->__data.__g1_orig_size, &s, (s & ~(unsigned int) 3) | 2))
+ {
+ if ((s & 3) == 0)
+ return;
+ break;
+ }
+ /* TODO Back off. */
+ }
+ futex_wait_simple (&cond->__data.__g1_orig_size,
+ (s & ~(unsigned int) 3) | 2, private);
+ /* Reload so we see a recent value. */
+ s = atomic_load_relaxed (&cond->__data.__g1_orig_size);
+ }
+}
+
+/* See __condvar_acquire_lock. */
+static void __attribute__ ((unused))
+__condvar_release_lock (pthread_cond_t *cond, int private)
+{
+ if ((atomic_fetch_and_release (&cond->__data.__g1_orig_size,
+ ~(unsigned int) 3) & 3)
+ == 2)
+ futex_wake (&cond->__data.__g1_orig_size, 1, private);
+}
+
+/* Only use this when having acquired the lock. */
+static unsigned int __attribute__ ((unused))
+__condvar_get_orig_size (pthread_cond_t *cond)
+{
+ return atomic_load_relaxed (&cond->__data.__g1_orig_size) >> 2;
+}
+
+/* Only use this when having acquired the lock. */
+static void __attribute__ ((unused))
+__condvar_set_orig_size (pthread_cond_t *cond, unsigned int size)
+{
+ /* We have acquired the lock, but might get one concurrent update due to a
+ lock state change from acquired to acquired-with-futex_wake-request.
+ The store with relaxed MO is fine because there will be no further
+ changes to the lock bits nor the size, and we will subsequently release
+ the lock with release MO. */
+ unsigned int s;
+ s = (atomic_load_relaxed (&cond->__data.__g1_orig_size) & 3)
+ | (size << 2);
+ if ((atomic_exchange_relaxed (&cond->__data.__g1_orig_size, s) & 3)
+ != (s & 3))
+ atomic_store_relaxed (&cond->__data.__g1_orig_size, (size << 2) | 2);
+}
+
+/* Returns FUTEX_SHARED or FUTEX_PRIVATE based on the provided __wrefs
+ value. */
+static int __attribute__ ((unused))
+__condvar_get_private (int flags)
+{
+ if ((flags & __PTHREAD_COND_SHARED_MASK) == 0)
+ return FUTEX_PRIVATE;
+ else
+ return FUTEX_SHARED;
+}
+
+/* This closes G1 (whose index is in G1INDEX), waits for all futex waiters to
+ leave G1, converts G1 into a fresh G2, and then switches group roles so that
+ the former G2 becomes the new G1 ending at the current __wseq value when we
+ eventually make the switch (WSEQ is just an observation of __wseq by the
+ signaler).
+ If G2 is empty, it will not switch groups because then it would create an
+ empty G1 which would require switching groups again on the next signal.
+ Returns false iff groups were not switched because G2 was empty. */
+static bool __attribute__ ((unused))
+__condvar_quiesce_and_switch_g1 (pthread_cond_t *cond, uint64_t wseq,
+ unsigned int *g1index, int private)
+{
+ const unsigned int maxspin = 0;
+ unsigned int g1 = *g1index;
+
+ /* If there is no waiter in G2, we don't do anything. The expression may
+ look odd but remember that __g_size might hold a negative value, so
+ putting the expression this way avoids relying on implementation-defined
+ behavior.
+ Note that this works correctly for a zero-initialized condvar too. */
+ unsigned int old_orig_size = __condvar_get_orig_size (cond);
+ uint64_t old_g1_start = __condvar_load_g1_start_relaxed (cond) >> 1;
+ if (((unsigned) (wseq - old_g1_start - old_orig_size)
+ + cond->__data.__g_size[g1 ^ 1]) == 0)
+ return false;
+
+ /* Now try to close and quiesce G1. We have to consider the following kinds
+ of waiters:
+ * Waiters from less recent groups than G1 are not affected because
+ nothing will change for them apart from __g1_start getting larger.
+ * New waiters arriving concurrently with the group switching will all go
+ into G2 until we atomically make the switch. Waiters existing in G2
+ are not affected.
+ * Waiters in G1 will be closed out immediately by setting a flag in
+ __g_signals, which will prevent waiters from blocking using a futex on
+ __g_signals and also notifies them that the group is closed. As a
+ result, they will eventually remove their group reference, allowing us
+ to close switch group roles. */
+
+ /* First, set the closed flag on __g_signals. This tells waiters that are
+ about to wait that they shouldn't do that anymore. This basically
+ serves as an advance notificaton of the upcoming change to __g1_start;
+ waiters interpret it as if __g1_start was larger than their waiter
+ sequence position. This allows us to change __g1_start after waiting
+ for all existing waiters with group references to leave, which in turn
+ makes recovery after stealing a signal simpler because it then can be
+ skipped if __g1_start indicates that the group is closed (otherwise,
+ we would have to recover always because waiters don't know how big their
+ groups are). Relaxed MO is fine. */
+ atomic_fetch_or_relaxed (cond->__data.__g_signals + g1, 1);
+
+ /* Wait until there are no group references anymore. The fetch-or operation
+ injects us into the modification order of __g_refs; release MO ensures
+ that waiters incrementing __g_refs after our fetch-or see the previous
+ changes to __g_signals and to __g1_start that had to happen before we can
+ switch this G1 and alias with an older group (we have two groups, so
+ aliasing requires switching group roles twice). Note that nobody else
+ can have set the wake-request flag, so we do not have to act upon it.
+
+ Also note that it is harmless if older waiters or waiters from this G1
+ get a group reference after we have quiesced the group because it will
+ remain closed for them either because of the closed flag in __g_signals
+ or the later update to __g1_start. New waiters will never arrive here
+ but instead continue to go into the still current G2. */
+ unsigned r = atomic_fetch_or_release (cond->__data.__g_refs + g1, 0);
+ while ((r >> 1) > 0)
+ {
+ for (unsigned int spin = maxspin; ((r >> 1) > 0) && (spin > 0); spin--)
+ {
+ /* TODO Back off. */
+ r = atomic_load_relaxed (cond->__data.__g_refs + g1);
+ }
+ if ((r >> 1) > 0)
+ {
+ /* There is still a waiter after spinning. Set the wake-request
+ flag and block. Relaxed MO is fine because this is just about
+ this futex word. */
+ r = atomic_fetch_or_relaxed (cond->__data.__g_refs + g1, 1);
+
+ if ((r >> 1) > 0)
+ futex_wait_simple (cond->__data.__g_refs + g1, r, private);
+ /* Reload here so we eventually see the most recent value even if we
+ do not spin. */
+ r = atomic_load_relaxed (cond->__data.__g_refs + g1);
+ }
+ }
+ /* Acquire MO so that we synchronize with the release operation that waiters
+ use to decrement __g_refs and thus happen after the waiters we waited
+ for. */
+ atomic_thread_fence_acquire ();
+
+ /* Update __g1_start, which finishes closing this group. The value we add
+ will never be negative because old_orig_size can only be zero when we
+ switch groups the first time after a condvar was initialized, in which
+ case G1 will be at index 1 and we will add a value of 1. See above for
+ why this takes place after waiting for quiescence of the group.
+ Relaxed MO is fine because the change comes with no additional
+ constraints that others would have to observe. */
+ __condvar_add_g1_start_relaxed (cond,
+ (old_orig_size << 1) + (g1 == 1 ? 1 : - 1));
+
+ /* Now reopen the group, thus enabling waiters to again block using the
+ futex controlled by __g_signals. Release MO so that observers that see
+ no signals (and thus can block) also see the write __g1_start and thus
+ that this is now a new group (see __pthread_cond_wait_common for the
+ matching acquire MO loads). */
+ atomic_store_release (cond->__data.__g_signals + g1, 0);
+
+ /* At this point, the old G1 is now a valid new G2 (but not in use yet).
+ No old waiter can neither grab a signal nor acquire a reference without
+ noticing that __g1_start is larger.
+ We can now publish the group switch by flipping the G2 index in __wseq.
+ Release MO so that this synchronizes with the acquire MO operation
+ waiters use to obtain a position in the waiter sequence. */
+ wseq = __condvar_fetch_xor_wseq_release (cond, 1) >> 1;
+ g1 ^= 1;
+ *g1index ^= 1;
+
+ /* These values are just observed by signalers, and thus protected by the
+ lock. */
+ unsigned int orig_size = wseq - (old_g1_start + old_orig_size);
+ __condvar_set_orig_size (cond, orig_size);
+ /* Use and addition to not loose track of cancellations in what was
+ previously G2. */
+ cond->__data.__g_size[g1] += orig_size;
+
+ /* The new G1's size may be zero because of cancellations during its time
+ as G2. If this happens, there are no waiters that have to receive a
+ signal, so we do not need to add any and return false. */
+ if (cond->__data.__g_size[g1] == 0)
+ return false;
+
+ return true;
+}
diff --git a/nptl/pthread_cond_destroy.c b/nptl/pthread_cond_destroy.c
index 1acd8042d8..5845c6a7ad 100644
--- a/nptl/pthread_cond_destroy.c
+++ b/nptl/pthread_cond_destroy.c
@@ -20,66 +20,42 @@
#include <shlib-compat.h>
#include "pthreadP.h"
#include <stap-probe.h>
-
-
+#include <atomic.h>
+#include <futex-internal.h>
+
+#include "pthread_cond_common.c"
+
+/* See __pthread_cond_wait for a high-level description of the algorithm.
+
+ A correct program must make sure that no waiters are blocked on the condvar
+ when it is destroyed, and that there are no concurrent signals or
+ broadcasts. To wake waiters reliably, the program must signal or
+ broadcast while holding the mutex or after having held the mutex. It must
+ also ensure that no signal or broadcast are still pending to unblock
+ waiters; IOW, because waiters can wake up spuriously, the program must
+ effectively ensure that destruction happens after the execution of those
+ signal or broadcast calls.
+ Thus, we can assume that all waiters that are still accessing the condvar
+ have been woken. We wait until they have confirmed to have woken up by
+ decrementing __wrefs. */
int
__pthread_cond_destroy (pthread_cond_t *cond)
{
- int pshared = (cond->__data.__mutex == (void *) ~0l)
- ? LLL_SHARED : LLL_PRIVATE;
-
LIBC_PROBE (cond_destroy, 1, cond);
- /* Make sure we are alone. */
- lll_lock (cond->__data.__lock, pshared);
-
- if (cond->__data.__total_seq > cond->__data.__wakeup_seq)
- {
- /* If there are still some waiters which have not been
- woken up, this is an application bug. */
- lll_unlock (cond->__data.__lock, pshared);
- return EBUSY;
- }
-
- /* Tell pthread_cond_*wait that this condvar is being destroyed. */
- cond->__data.__total_seq = -1ULL;
-
- /* If there are waiters which have been already signalled or
- broadcasted, but still are using the pthread_cond_t structure,
- pthread_cond_destroy needs to wait for them. */
- unsigned int nwaiters = cond->__data.__nwaiters;
-
- if (nwaiters >= (1 << COND_NWAITERS_SHIFT))
+ /* Set the wake request flag. We could also spin, but destruction that is
+ concurrent with still-active waiters is probably neither common nor
+ performance critical. Acquire MO to synchronize with waiters confirming
+ that they finished. */
+ unsigned int wrefs = atomic_fetch_or_acquire (&cond->__data.__wrefs, 4);
+ int private = __condvar_get_private (wrefs);
+ while (wrefs >> 3 != 0)
{
- /* Wake everybody on the associated mutex in case there are
- threads that have been requeued to it.
- Without this, pthread_cond_destroy could block potentially
- for a long time or forever, as it would depend on other
- thread's using the mutex.
- When all threads waiting on the mutex are woken up, pthread_cond_wait
- only waits for threads to acquire and release the internal
- condvar lock. */
- if (cond->__data.__mutex != NULL
- && cond->__data.__mutex != (void *) ~0l)
- {
- pthread_mutex_t *mut = (pthread_mutex_t *) cond->__data.__mutex;
- lll_futex_wake (&mut->__data.__lock, INT_MAX,
- PTHREAD_MUTEX_PSHARED (mut));
- }
-
- do
- {
- lll_unlock (cond->__data.__lock, pshared);
-
- lll_futex_wait (&cond->__data.__nwaiters, nwaiters, pshared);
-
- lll_lock (cond->__data.__lock, pshared);
-
- nwaiters = cond->__data.__nwaiters;
- }
- while (nwaiters >= (1 << COND_NWAITERS_SHIFT));
+ futex_wait_simple (&cond->__data.__wrefs, wrefs, private);
+ /* See above. */
+ wrefs = atomic_load_acquire (&cond->__data.__wrefs);
}
-
+ /* The memory the condvar occupies can now be reused. */
return 0;
}
versioned_symbol (libpthread, __pthread_cond_destroy,
diff --git a/nptl/pthread_cond_init.c b/nptl/pthread_cond_init.c
index 9023370278..c1eac5f779 100644
--- a/nptl/pthread_cond_init.c
+++ b/nptl/pthread_cond_init.c
@@ -19,25 +19,29 @@
#include <shlib-compat.h>
#include "pthreadP.h"
#include <stap-probe.h>
+#include <string.h>
+/* See __pthread_cond_wait for details. */
int
__pthread_cond_init (pthread_cond_t *cond, const pthread_condattr_t *cond_attr)
{
struct pthread_condattr *icond_attr = (struct pthread_condattr *) cond_attr;
- cond->__data.__lock = LLL_LOCK_INITIALIZER;
- cond->__data.__futex = 0;
- cond->__data.__nwaiters = (icond_attr != NULL
- ? ((icond_attr->value >> 1)
- & ((1 << COND_NWAITERS_SHIFT) - 1))
- : CLOCK_REALTIME);
- cond->__data.__total_seq = 0;
- cond->__data.__wakeup_seq = 0;
- cond->__data.__woken_seq = 0;
- cond->__data.__mutex = (icond_attr == NULL || (icond_attr->value & 1) == 0
- ? NULL : (void *) ~0l);
- cond->__data.__broadcast_seq = 0;
+ memset (cond, 0, sizeof (pthread_cond_t));
+
+ /* Update the pretty printers if the internal representation of icond_attr
+ is changed. */
+
+ /* Iff not equal to ~0l, this is a PTHREAD_PROCESS_PRIVATE condvar. */
+ if (icond_attr != NULL && (icond_attr->value & 1) != 0)
+ cond->__data.__wrefs |= __PTHREAD_COND_SHARED_MASK;
+ int clockid = (icond_attr != NULL
+ ? ((icond_attr->value >> 1) & ((1 << COND_CLOCK_BITS) - 1))
+ : CLOCK_REALTIME);
+ /* If 0, CLOCK_REALTIME is used; CLOCK_MONOTONIC otherwise. */
+ if (clockid != CLOCK_REALTIME)
+ cond->__data.__wrefs |= __PTHREAD_COND_CLOCK_MONOTONIC_MASK;
LIBC_PROBE (cond_init, 2, cond, cond_attr);
diff --git a/nptl/pthread_cond_signal.c b/nptl/pthread_cond_signal.c
index b3a6d3d2a4..a95d5690af 100644
--- a/nptl/pthread_cond_signal.c
+++ b/nptl/pthread_cond_signal.c
@@ -19,62 +19,79 @@
#include <endian.h>
#include <errno.h>
#include <sysdep.h>
-#include <lowlevellock.h>
+#include <futex-internal.h>
#include <pthread.h>
#include <pthreadP.h>
+#include <atomic.h>
+#include <stdint.h>
#include <shlib-compat.h>
-#include <kernel-features.h>
#include <stap-probe.h>
+#include "pthread_cond_common.c"
+/* See __pthread_cond_wait for a high-level description of the algorithm. */
int
__pthread_cond_signal (pthread_cond_t *cond)
{
- int pshared = (cond->__data.__mutex == (void *) ~0l)
- ? LLL_SHARED : LLL_PRIVATE;
-
LIBC_PROBE (cond_signal, 1, cond);
- /* Make sure we are alone. */
- lll_lock (cond->__data.__lock, pshared);
-
- /* Are there any waiters to be woken? */
- if (cond->__data.__total_seq > cond->__data.__wakeup_seq)
+ /* First check whether there are waiters. Relaxed MO is fine for that for
+ the same reasons that relaxed MO is fine when observing __wseq (see
+ below). */
+ unsigned int wrefs = atomic_load_relaxed (&cond->__data.__wrefs);
+ if (wrefs >> 3 == 0)
+ return 0;
+ int private = __condvar_get_private (wrefs);
+
+ __condvar_acquire_lock (cond, private);
+
+ /* Load the waiter sequence number, which represents our relative ordering
+ to any waiters. Relaxed MO is sufficient for that because:
+ 1) We can pick any position that is allowed by external happens-before
+ constraints. In particular, if another __pthread_cond_wait call
+ happened before us, this waiter must be eligible for being woken by
+ us. The only way do establish such a happens-before is by signaling
+ while having acquired the mutex associated with the condvar and
+ ensuring that the signal's critical section happens after the waiter.
+ Thus, the mutex ensures that we see that waiter's __wseq increase.
+ 2) Once we pick a position, we do not need to communicate this to the
+ program via a happens-before that we set up: First, any wake-up could
+ be a spurious wake-up, so the program must not interpret a wake-up as
+ an indication that the waiter happened before a particular signal;
+ second, a program cannot detect whether a waiter has not yet been
+ woken (i.e., it cannot distinguish between a non-woken waiter and one
+ that has been woken but hasn't resumed execution yet), and thus it
+ cannot try to deduce that a signal happened before a particular
+ waiter. */
+ unsigned long long int wseq = __condvar_load_wseq_relaxed (cond);
+ unsigned int g1 = (wseq & 1) ^ 1;
+ wseq >>= 1;
+ bool do_futex_wake = false;
+
+ /* If G1 is still receiving signals, we put the signal there. If not, we
+ check if G2 has waiters, and if so, quiesce and switch G1 to the former
+ G2; if this results in a new G1 with waiters (G2 might have cancellations
+ already, see __condvar_quiesce_and_switch_g1), we put the signal in the
+ new G1. */
+ if ((cond->__data.__g_size[g1] != 0)
+ || __condvar_quiesce_and_switch_g1 (cond, wseq, &g1, private))
{
- /* Yes. Mark one of them as woken. */
- ++cond->__data.__wakeup_seq;
- ++cond->__data.__futex;
-
-#if (defined lll_futex_cmp_requeue_pi \
- && defined __ASSUME_REQUEUE_PI)
- pthread_mutex_t *mut = cond->__data.__mutex;
-
- if (USE_REQUEUE_PI (mut)
- /* This can only really fail with a ENOSYS, since nobody can modify
- futex while we have the cond_lock. */
- && lll_futex_cmp_requeue_pi (&cond->__data.__futex, 1, 0,
- &mut->__data.__lock,
- cond->__data.__futex, pshared) == 0)
- {
- lll_unlock (cond->__data.__lock, pshared);
- return 0;
- }
- else
-#endif
- /* Wake one. */
- if (! __builtin_expect (lll_futex_wake_unlock (&cond->__data.__futex,
- 1, 1,
- &cond->__data.__lock,
- pshared), 0))
- return 0;
-
- /* Fallback if neither of them work. */
- lll_futex_wake (&cond->__data.__futex, 1, pshared);
+ /* Add a signal. Relaxed MO is fine because signaling does not need to
+ establish a happens-before relation (see above). We do not mask the
+ release-MO store when initializing a group in
+ __condvar_quiesce_and_switch_g1 because we use an atomic
+ read-modify-write and thus extend that store's release sequence. */
+ atomic_fetch_add_relaxed (cond->__data.__g_signals + g1, 2);
+ cond->__data.__g_size[g1]--;
+ /* TODO Only set it if there are indeed futex waiters. */
+ do_futex_wake = true;
}
- /* We are done. */
- lll_unlock (cond->__data.__lock, pshared);
+ __condvar_release_lock (cond, private);
+
+ if (do_futex_wake)
+ futex_wake (cond->__data.__g_signals + g1, 1, private);
return 0;
}
diff --git a/nptl/pthread_cond_timedwait.c b/nptl/pthread_cond_timedwait.c
deleted file mode 100644
index 711a51de20..0000000000
--- a/nptl/pthread_cond_timedwait.c
+++ /dev/null
@@ -1,268 +0,0 @@
-/* Copyright (C) 2003-2016 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Martin Schwidefsky <schwidefsky@de.ibm.com>, 2003.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <endian.h>
-#include <errno.h>
-#include <sysdep.h>
-#include <lowlevellock.h>
-#include <pthread.h>
-#include <pthreadP.h>
-#include <sys/time.h>
-#include <kernel-features.h>
-
-#include <shlib-compat.h>
-
-#ifndef HAVE_CLOCK_GETTIME_VSYSCALL
-# undef INTERNAL_VSYSCALL
-# define INTERNAL_VSYSCALL INTERNAL_SYSCALL
-# undef INLINE_VSYSCALL
-# define INLINE_VSYSCALL INLINE_SYSCALL
-#else
-# include <libc-vdso.h>
-#endif
-
-/* Cleanup handler, defined in pthread_cond_wait.c. */
-extern void __condvar_cleanup (void *arg)
- __attribute__ ((visibility ("hidden")));
-
-struct _condvar_cleanup_buffer
-{
- int oldtype;
- pthread_cond_t *cond;
- pthread_mutex_t *mutex;
- unsigned int bc_seq;
-};
-
-int
-__pthread_cond_timedwait (pthread_cond_t *cond, pthread_mutex_t *mutex,
- const struct timespec *abstime)
-{
- struct _pthread_cleanup_buffer buffer;
- struct _condvar_cleanup_buffer cbuffer;
- int result = 0;
-
- /* Catch invalid parameters. */
- if (abstime->tv_nsec < 0 || abstime->tv_nsec >= 1000000000)
- return EINVAL;
-
- int pshared = (cond->__data.__mutex == (void *) ~0l)
- ? LLL_SHARED : LLL_PRIVATE;
-
-#if (defined lll_futex_timed_wait_requeue_pi \
- && defined __ASSUME_REQUEUE_PI)
- int pi_flag = 0;
-#endif
-
- /* Make sure we are alone. */
- lll_lock (cond->__data.__lock, pshared);
-
- /* Now we can release the mutex. */
- int err = __pthread_mutex_unlock_usercnt (mutex, 0);
- if (err)
- {
- lll_unlock (cond->__data.__lock, pshared);
- return err;
- }
-
- /* We have one new user of the condvar. */
- ++cond->__data.__total_seq;
- ++cond->__data.__futex;
- cond->__data.__nwaiters += 1 << COND_NWAITERS_SHIFT;
-
- /* Work around the fact that the kernel rejects negative timeout values
- despite them being valid. */
- if (__glibc_unlikely (abstime->tv_sec < 0))
- goto timeout;
-
- /* Remember the mutex we are using here. If there is already a
- different address store this is a bad user bug. Do not store
- anything for pshared condvars. */
- if (cond->__data.__mutex != (void *) ~0l)
- cond->__data.__mutex = mutex;
-
- /* Prepare structure passed to cancellation handler. */
- cbuffer.cond = cond;
- cbuffer.mutex = mutex;
-
- /* Before we block we enable cancellation. Therefore we have to
- install a cancellation handler. */
- __pthread_cleanup_push (&buffer, __condvar_cleanup, &cbuffer);
-
- /* The current values of the wakeup counter. The "woken" counter
- must exceed this value. */
- unsigned long long int val;
- unsigned long long int seq;
- val = seq = cond->__data.__wakeup_seq;
- /* Remember the broadcast counter. */
- cbuffer.bc_seq = cond->__data.__broadcast_seq;
-
- while (1)
- {
-#if (!defined __ASSUME_FUTEX_CLOCK_REALTIME \
- || !defined lll_futex_timed_wait_bitset)
- struct timespec rt;
- {
-# ifdef __NR_clock_gettime
- INTERNAL_SYSCALL_DECL (err);
- (void) INTERNAL_VSYSCALL (clock_gettime, err, 2,
- (cond->__data.__nwaiters
- & ((1 << COND_NWAITERS_SHIFT) - 1)),
- &rt);
- /* Convert the absolute timeout value to a relative timeout. */
- rt.tv_sec = abstime->tv_sec - rt.tv_sec;
- rt.tv_nsec = abstime->tv_nsec - rt.tv_nsec;
-# else
- /* Get the current time. So far we support only one clock. */
- struct timeval tv;
- (void) __gettimeofday (&tv, NULL);
-
- /* Convert the absolute timeout value to a relative timeout. */
- rt.tv_sec = abstime->tv_sec - tv.tv_sec;
- rt.tv_nsec = abstime->tv_nsec - tv.tv_usec * 1000;
-# endif
- }
- if (rt.tv_nsec < 0)
- {
- rt.tv_nsec += 1000000000;
- --rt.tv_sec;
- }
- /* Did we already time out? */
- if (__glibc_unlikely (rt.tv_sec < 0))
- {
- if (cbuffer.bc_seq != cond->__data.__broadcast_seq)
- goto bc_out;
-
- goto timeout;
- }
-#endif
-
- unsigned int futex_val = cond->__data.__futex;
-
- /* Prepare to wait. Release the condvar futex. */
- lll_unlock (cond->__data.__lock, pshared);
-
- /* Enable asynchronous cancellation. Required by the standard. */
- cbuffer.oldtype = __pthread_enable_asynccancel ();
-
-/* REQUEUE_PI was implemented after FUTEX_CLOCK_REALTIME, so it is sufficient
- to check just the former. */
-#if (defined lll_futex_timed_wait_requeue_pi \
- && defined __ASSUME_REQUEUE_PI)
- /* If pi_flag remained 1 then it means that we had the lock and the mutex
- but a spurious waker raced ahead of us. Give back the mutex before
- going into wait again. */
- if (pi_flag)
- {
- __pthread_mutex_cond_lock_adjust (mutex);
- __pthread_mutex_unlock_usercnt (mutex, 0);
- }
- pi_flag = USE_REQUEUE_PI (mutex);
-
- if (pi_flag)
- {
- unsigned int clockbit = (cond->__data.__nwaiters & 1
- ? 0 : FUTEX_CLOCK_REALTIME);
- err = lll_futex_timed_wait_requeue_pi (&cond->__data.__futex,
- futex_val, abstime, clockbit,
- &mutex->__data.__lock,
- pshared);
- pi_flag = (err == 0);
- }
- else
-#endif
-
- {
-#if (!defined __ASSUME_FUTEX_CLOCK_REALTIME \
- || !defined lll_futex_timed_wait_bitset)
- /* Wait until woken by signal or broadcast. */
- err = lll_futex_timed_wait (&cond->__data.__futex,
- futex_val, &rt, pshared);
-#else
- unsigned int clockbit = (cond->__data.__nwaiters & 1
- ? 0 : FUTEX_CLOCK_REALTIME);
- err = lll_futex_timed_wait_bitset (&cond->__data.__futex, futex_val,
- abstime, clockbit, pshared);
-#endif
- }
-
- /* Disable asynchronous cancellation. */
- __pthread_disable_asynccancel (cbuffer.oldtype);
-
- /* We are going to look at shared data again, so get the lock. */
- lll_lock (cond->__data.__lock, pshared);
-
- /* If a broadcast happened, we are done. */
- if (cbuffer.bc_seq != cond->__data.__broadcast_seq)
- goto bc_out;
-
- /* Check whether we are eligible for wakeup. */
- val = cond->__data.__wakeup_seq;
- if (val != seq && cond->__data.__woken_seq != val)
- break;
-
- /* Not woken yet. Maybe the time expired? */
- if (__glibc_unlikely (err == -ETIMEDOUT))
- {
- timeout:
- /* Yep. Adjust the counters. */
- ++cond->__data.__wakeup_seq;
- ++cond->__data.__futex;
-
- /* The error value. */
- result = ETIMEDOUT;
- break;
- }
- }
-
- /* Another thread woken up. */
- ++cond->__data.__woken_seq;
-
- bc_out:
-
- cond->__data.__nwaiters -= 1 << COND_NWAITERS_SHIFT;
-
- /* If pthread_cond_destroy was called on this variable already,
- notify the pthread_cond_destroy caller all waiters have left
- and it can be successfully destroyed. */
- if (cond->__data.__total_seq == -1ULL
- && cond->__data.__nwaiters < (1 << COND_NWAITERS_SHIFT))
- lll_futex_wake (&cond->__data.__nwaiters, 1, pshared);
-
- /* We are done with the condvar. */
- lll_unlock (cond->__data.__lock, pshared);
-
- /* The cancellation handling is back to normal, remove the handler. */
- __pthread_cleanup_pop (&buffer, 0);
-
- /* Get the mutex before returning. */
-#if (defined lll_futex_timed_wait_requeue_pi \
- && defined __ASSUME_REQUEUE_PI)
- if (pi_flag)
- {
- __pthread_mutex_cond_lock_adjust (mutex);
- err = 0;
- }
- else
-#endif
- err = __pthread_mutex_cond_lock (mutex);
-
- return err ?: result;
-}
-
-versioned_symbol (libpthread, __pthread_cond_timedwait, pthread_cond_timedwait,
- GLIBC_2_3_2);
diff --git a/nptl/pthread_cond_wait.c b/nptl/pthread_cond_wait.c
index 3f62acc6bd..2b434026c6 100644
--- a/nptl/pthread_cond_wait.c
+++ b/nptl/pthread_cond_wait.c
@@ -19,219 +19,655 @@
#include <endian.h>
#include <errno.h>
#include <sysdep.h>
-#include <lowlevellock.h>
+#include <futex-internal.h>
#include <pthread.h>
#include <pthreadP.h>
-#include <kernel-features.h>
+#include <sys/time.h>
+#include <atomic.h>
+#include <stdint.h>
+#include <stdbool.h>
#include <shlib-compat.h>
#include <stap-probe.h>
+#include <time.h>
+
+#include "pthread_cond_common.c"
+
struct _condvar_cleanup_buffer
{
- int oldtype;
+ uint64_t wseq;
pthread_cond_t *cond;
pthread_mutex_t *mutex;
- unsigned int bc_seq;
+ int private;
};
-void
-__attribute__ ((visibility ("hidden")))
-__condvar_cleanup (void *arg)
+/* Decrease the waiter reference count. */
+static void
+__condvar_confirm_wakeup (pthread_cond_t *cond, int private)
{
- struct _condvar_cleanup_buffer *cbuffer =
- (struct _condvar_cleanup_buffer *) arg;
- unsigned int destroying;
- int pshared = (cbuffer->cond->__data.__mutex == (void *) ~0l)
- ? LLL_SHARED : LLL_PRIVATE;
+ /* If destruction is pending (i.e., the wake-request flag is nonzero) and we
+ are the last waiter (prior value of __wrefs was 1 << 3), then wake any
+ threads waiting in pthread_cond_destroy. Release MO to synchronize with
+ these threads. Don't bother clearing the wake-up request flag. */
+ if ((atomic_fetch_add_release (&cond->__data.__wrefs, -8) >> 2) == 3)
+ futex_wake (&cond->__data.__wrefs, INT_MAX, private);
+}
+
- /* We are going to modify shared data. */
- lll_lock (cbuffer->cond->__data.__lock, pshared);
+/* Cancel waiting after having registered as a waiter previously. SEQ is our
+ position and G is our group index.
+ The goal of cancellation is to make our group smaller if that is still
+ possible. If we are in a closed group, this is not possible anymore; in
+ this case, we need to send a replacement signal for the one we effectively
+ consumed because the signal should have gotten consumed by another waiter
+ instead; we must not both cancel waiting and consume a signal.
+
+ Must not be called while still holding a reference on the group.
+
+ Returns true iff we consumed a signal.
+
+ On some kind of timeouts, we may be able to pretend that a signal we
+ effectively consumed happened before the timeout (i.e., similarly to first
+ spinning on signals before actually checking whether the timeout has
+ passed already). Doing this would allow us to skip sending a replacement
+ signal, but this case might happen rarely because the end of the timeout
+ must race with someone else sending a signal. Therefore, we don't bother
+ trying to optimize this. */
+static void
+__condvar_cancel_waiting (pthread_cond_t *cond, uint64_t seq, unsigned int g,
+ int private)
+{
+ bool consumed_signal = false;
- if (cbuffer->bc_seq == cbuffer->cond->__data.__broadcast_seq)
+ /* No deadlock with group switching is possible here because we have do
+ not hold a reference on the group. */
+ __condvar_acquire_lock (cond, private);
+
+ uint64_t g1_start = __condvar_load_g1_start_relaxed (cond) >> 1;
+ if (g1_start > seq)
+ {
+ /* Our group is closed, so someone provided enough signals for it.
+ Thus, we effectively consumed a signal. */
+ consumed_signal = true;
+ }
+ else
{
- /* This thread is not waiting anymore. Adjust the sequence counters
- appropriately. We do not increment WAKEUP_SEQ if this would
- bump it over the value of TOTAL_SEQ. This can happen if a thread
- was woken and then canceled. */
- if (cbuffer->cond->__data.__wakeup_seq
- < cbuffer->cond->__data.__total_seq)
+ if (g1_start + __condvar_get_orig_size (cond) <= seq)
+ {
+ /* We are in the current G2 and thus cannot have consumed a signal.
+ Reduce its effective size or handle overflow. Remember that in
+ G2, unsigned int size is zero or a negative value. */
+ if (cond->__data.__g_size[g] + __PTHREAD_COND_MAX_GROUP_SIZE > 0)
+ {
+ cond->__data.__g_size[g]--;
+ }
+ else
+ {
+ /* Cancellations would overflow the maximum group size. Just
+ wake up everyone spuriously to create a clean state. This
+ also means we do not consume a signal someone else sent. */
+ __condvar_release_lock (cond, private);
+ __pthread_cond_broadcast (cond);
+ return;
+ }
+ }
+ else
{
- ++cbuffer->cond->__data.__wakeup_seq;
- ++cbuffer->cond->__data.__futex;
+ /* We are in current G1. If the group's size is zero, someone put
+ a signal in the group that nobody else but us can consume. */
+ if (cond->__data.__g_size[g] == 0)
+ consumed_signal = true;
+ else
+ {
+ /* Otherwise, we decrease the size of the group. This is
+ equivalent to atomically putting in a signal just for us and
+ consuming it right away. We do not consume a signal sent
+ by someone else. We also cannot have consumed a futex
+ wake-up because if we were cancelled or timed out in a futex
+ call, the futex will wake another waiter. */
+ cond->__data.__g_size[g]--;
+ }
}
- ++cbuffer->cond->__data.__woken_seq;
}
- cbuffer->cond->__data.__nwaiters -= 1 << COND_NWAITERS_SHIFT;
+ __condvar_release_lock (cond, private);
- /* If pthread_cond_destroy was called on this variable already,
- notify the pthread_cond_destroy caller all waiters have left
- and it can be successfully destroyed. */
- destroying = 0;
- if (cbuffer->cond->__data.__total_seq == -1ULL
- && cbuffer->cond->__data.__nwaiters < (1 << COND_NWAITERS_SHIFT))
+ if (consumed_signal)
{
- lll_futex_wake (&cbuffer->cond->__data.__nwaiters, 1, pshared);
- destroying = 1;
+ /* We effectively consumed a signal even though we didn't want to.
+ Therefore, we need to send a replacement signal.
+ If we would want to optimize this, we could do what
+ pthread_cond_signal does right in the critical section above. */
+ __pthread_cond_signal (cond);
}
+}
- /* We are done. */
- lll_unlock (cbuffer->cond->__data.__lock, pshared);
-
- /* Wake everybody to make sure no condvar signal gets lost. */
- if (! destroying)
- lll_futex_wake (&cbuffer->cond->__data.__futex, INT_MAX, pshared);
-
- /* Get the mutex before returning unless asynchronous cancellation
- is in effect. We don't try to get the mutex if we already own it. */
- if (!(USE_REQUEUE_PI (cbuffer->mutex))
- || ((cbuffer->mutex->__data.__lock & FUTEX_TID_MASK)
- != THREAD_GETMEM (THREAD_SELF, tid)))
- {
- __pthread_mutex_cond_lock (cbuffer->mutex);
- }
- else
- __pthread_mutex_cond_lock_adjust (cbuffer->mutex);
+/* Wake up any signalers that might be waiting. */
+static void
+__condvar_dec_grefs (pthread_cond_t *cond, unsigned int g, int private)
+{
+ /* Release MO to synchronize-with the acquire load in
+ __condvar_quiesce_and_switch_g1. */
+ if (atomic_fetch_add_release (cond->__data.__g_refs + g, -2) == 3)
+ {
+ /* Clear the wake-up request flag before waking up. We do not need more
+ than relaxed MO and it doesn't matter if we apply this for an aliased
+ group because we wake all futex waiters right after clearing the
+ flag. */
+ atomic_fetch_and_relaxed (cond->__data.__g_refs + g, ~(unsigned int) 1);
+ futex_wake (cond->__data.__g_refs + g, INT_MAX, private);
+ }
}
+/* Clean-up for cancellation of waiters waiting for normal signals. We cancel
+ our registration as a waiter, confirm we have woken up, and re-acquire the
+ mutex. */
+static void
+__condvar_cleanup_waiting (void *arg)
+{
+ struct _condvar_cleanup_buffer *cbuffer =
+ (struct _condvar_cleanup_buffer *) arg;
+ pthread_cond_t *cond = cbuffer->cond;
+ unsigned g = cbuffer->wseq & 1;
-int
-__pthread_cond_wait (pthread_cond_t *cond, pthread_mutex_t *mutex)
+ __condvar_dec_grefs (cond, g, cbuffer->private);
+
+ __condvar_cancel_waiting (cond, cbuffer->wseq >> 1, g, cbuffer->private);
+ /* FIXME With the current cancellation implementation, it is possible that
+ a thread is cancelled after it has returned from a syscall. This could
+ result in a cancelled waiter consuming a futex wake-up that is then
+ causing another waiter in the same group to not wake up. To work around
+ this issue until we have fixed cancellation, just add a futex wake-up
+ conservatively. */
+ futex_wake (cond->__data.__g_signals + g, 1, cbuffer->private);
+
+ __condvar_confirm_wakeup (cond, cbuffer->private);
+
+ /* XXX If locking the mutex fails, should we just stop execution? This
+ might be better than silently ignoring the error. */
+ __pthread_mutex_cond_lock (cbuffer->mutex);
+}
+
+/* This condvar implementation guarantees that all calls to signal and
+ broadcast and all of the three virtually atomic parts of each call to wait
+ (i.e., (1) releasing the mutex and blocking, (2) unblocking, and (3) re-
+ acquiring the mutex) happen in some total order that is consistent with the
+ happens-before relations in the calling program. However, this order does
+ not necessarily result in additional happens-before relations being
+ established (which aligns well with spurious wake-ups being allowed).
+
+ All waiters acquire a certain position in a 64b waiter sequence (__wseq).
+ This sequence determines which waiters are allowed to consume signals.
+ A broadcast is equal to sending as many signals as are unblocked waiters.
+ When a signal arrives, it samples the current value of __wseq with a
+ relaxed-MO load (i.e., the position the next waiter would get). (This is
+ sufficient because it is consistent with happens-before; the caller can
+ enforce stronger ordering constraints by calling signal while holding the
+ mutex.) Only waiters with a position less than the __wseq value observed
+ by the signal are eligible to consume this signal.
+
+ This would be straight-forward to implement if waiters would just spin but
+ we need to let them block using futexes. Futexes give no guarantee of
+ waking in FIFO order, so we cannot reliably wake eligible waiters if we
+ just use a single futex. Also, futex words are 32b in size, but we need
+ to distinguish more than 1<<32 states because we need to represent the
+ order of wake-up (and thus which waiters are eligible to consume signals);
+ blocking in a futex is not atomic with a waiter determining its position in
+ the waiter sequence, so we need the futex word to reliably notify waiters
+ that they should not attempt to block anymore because they have been
+ already signaled in the meantime. While an ABA issue on a 32b value will
+ be rare, ignoring it when we are aware of it is not the right thing to do
+ either.
+
+ Therefore, we use a 64b counter to represent the waiter sequence (on
+ architectures which only support 32b atomics, we use a few bits less).
+ To deal with the blocking using futexes, we maintain two groups of waiters:
+ * Group G1 consists of waiters that are all eligible to consume signals;
+ incoming signals will always signal waiters in this group until all
+ waiters in G1 have been signaled.
+ * Group G2 consists of waiters that arrive when a G1 is present and still
+ contains waiters that have not been signaled. When all waiters in G1
+ are signaled and a new signal arrives, the new signal will convert G2
+ into the new G1 and create a new G2 for future waiters.
+
+ We cannot allocate new memory because of process-shared condvars, so we
+ have just two slots of groups that change their role between G1 and G2.
+ Each has a separate futex word, a number of signals available for
+ consumption, a size (number of waiters in the group that have not been
+ signaled), and a reference count.
+
+ The group reference count is used to maintain the number of waiters that
+ are using the group's futex. Before a group can change its role, the
+ reference count must show that no waiters are using the futex anymore; this
+ prevents ABA issues on the futex word.
+
+ To represent which intervals in the waiter sequence the groups cover (and
+ thus also which group slot contains G1 or G2), we use a 64b counter to
+ designate the start position of G1 (inclusive), and a single bit in the
+ waiter sequence counter to represent which group slot currently contains
+ G2. This allows us to switch group roles atomically wrt. waiters obtaining
+ a position in the waiter sequence. The G1 start position allows waiters to
+ figure out whether they are in a group that has already been completely
+ signaled (i.e., if the current G1 starts at a later position that the
+ waiter's position). Waiters cannot determine whether they are currently
+ in G2 or G1 -- but they do not have too because all they are interested in
+ is whether there are available signals, and they always start in G2 (whose
+ group slot they know because of the bit in the waiter sequence. Signalers
+ will simply fill the right group until it is completely signaled and can
+ be closed (they do not switch group roles until they really have to to
+ decrease the likelihood of having to wait for waiters still holding a
+ reference on the now-closed G1).
+
+ Signalers maintain the initial size of G1 to be able to determine where
+ G2 starts (G2 is always open-ended until it becomes G1). They track the
+ remaining size of a group; when waiters cancel waiting (due to PThreads
+ cancellation or timeouts), they will decrease this remaining size as well.
+
+ To implement condvar destruction requirements (i.e., that
+ pthread_cond_destroy can be called as soon as all waiters have been
+ signaled), waiters increment a reference count before starting to wait and
+ decrement it after they stopped waiting but right before they acquire the
+ mutex associated with the condvar.
+
+ pthread_cond_t thus consists of the following (bits that are used for
+ flags and are not part of the primary value of each field but necessary
+ to make some things atomic or because there was no space for them
+ elsewhere in the data structure):
+
+ __wseq: Waiter sequence counter
+ * LSB is index of current G2.
+ * Waiters fetch-add while having acquire the mutex associated with the
+ condvar. Signalers load it and fetch-xor it concurrently.
+ __g1_start: Starting position of G1 (inclusive)
+ * LSB is index of current G2.
+ * Modified by signalers while having acquired the condvar-internal lock
+ and observed concurrently by waiters.
+ __g1_orig_size: Initial size of G1
+ * The two least-significant bits represent the condvar-internal lock.
+ * Only accessed while having acquired the condvar-internal lock.
+ __wrefs: Waiter reference counter.
+ * Bit 2 is true if waiters should run futex_wake when they remove the
+ last reference. pthread_cond_destroy uses this as futex word.
+ * Bit 1 is the clock ID (0 == CLOCK_REALTIME, 1 == CLOCK_MONOTONIC).
+ * Bit 0 is true iff this is a process-shared condvar.
+ * Simple reference count used by both waiters and pthread_cond_destroy.
+ (If the format of __wrefs is changed, update nptl_lock_constants.pysym
+ and the pretty printers.)
+ For each of the two groups, we have:
+ __g_refs: Futex waiter reference count.
+ * LSB is true if waiters should run futex_wake when they remove the
+ last reference.
+ * Reference count used by waiters concurrently with signalers that have
+ acquired the condvar-internal lock.
+ __g_signals: The number of signals that can still be consumed.
+ * Used as a futex word by waiters. Used concurrently by waiters and
+ signalers.
+ * LSB is true iff this group has been completely signaled (i.e., it is
+ closed).
+ __g_size: Waiters remaining in this group (i.e., which have not been
+ signaled yet.
+ * Accessed by signalers and waiters that cancel waiting (both do so only
+ when having acquired the condvar-internal lock.
+ * The size of G2 is always zero because it cannot be determined until
+ the group becomes G1.
+ * Although this is of unsigned type, we rely on using unsigned overflow
+ rules to make this hold effectively negative values too (in
+ particular, when waiters in G2 cancel waiting).
+
+ A PTHREAD_COND_INITIALIZER condvar has all fields set to zero, which yields
+ a condvar that has G2 starting at position 0 and a G1 that is closed.
+
+ Because waiters do not claim ownership of a group right when obtaining a
+ position in __wseq but only reference count the group when using futexes
+ to block, it can happen that a group gets closed before a waiter can
+ increment the reference count. Therefore, waiters have to check whether
+ their group is already closed using __g1_start. They also have to perform
+ this check when spinning when trying to grab a signal from __g_signals.
+ Note that for these checks, using relaxed MO to load __g1_start is
+ sufficient because if a waiter can see a sufficiently large value, it could
+ have also consume a signal in the waiters group.
+
+ Waiters try to grab a signal from __g_signals without holding a reference
+ count, which can lead to stealing a signal from a more recent group after
+ their own group was already closed. They cannot always detect whether they
+ in fact did because they do not know when they stole, but they can
+ conservatively add a signal back to the group they stole from; if they
+ did so unnecessarily, all that happens is a spurious wake-up. To make this
+ even less likely, __g1_start contains the index of the current g2 too,
+ which allows waiters to check if there aliasing on the group slots; if
+ there wasn't, they didn't steal from the current G1, which means that the
+ G1 they stole from must have been already closed and they do not need to
+ fix anything.
+
+ It is essential that the last field in pthread_cond_t is __g_signals[1]:
+ The previous condvar used a pointer-sized field in pthread_cond_t, so a
+ PTHREAD_COND_INITIALIZER from that condvar implementation might only
+ initialize 4 bytes to zero instead of the 8 bytes we need (i.e., 44 bytes
+ in total instead of the 48 we need). __g_signals[1] is not accessed before
+ the first group switch (G2 starts at index 0), which will set its value to
+ zero after a harmless fetch-or whose return value is ignored. This
+ effectively completes initialization.
+
+
+ Limitations:
+ * This condvar isn't designed to allow for more than
+ __PTHREAD_COND_MAX_GROUP_SIZE * (1 << 31) calls to __pthread_cond_wait.
+ * More than __PTHREAD_COND_MAX_GROUP_SIZE concurrent waiters are not
+ supported.
+ * Beyond what is allowed as errors by POSIX or documented, we can also
+ return the following errors:
+ * EPERM if MUTEX is a recursive mutex and the caller doesn't own it.
+ * EOWNERDEAD or ENOTRECOVERABLE when using robust mutexes. Unlike
+ for other errors, this can happen when we re-acquire the mutex; this
+ isn't allowed by POSIX (which requires all errors to virtually happen
+ before we release the mutex or change the condvar state), but there's
+ nothing we can do really.
+ * When using PTHREAD_MUTEX_PP_* mutexes, we can also return all errors
+ returned by __pthread_tpp_change_priority. We will already have
+ released the mutex in such cases, so the caller cannot expect to own
+ MUTEX.
+
+ Other notes:
+ * Instead of the normal mutex unlock / lock functions, we use
+ __pthread_mutex_unlock_usercnt(m, 0) / __pthread_mutex_cond_lock(m)
+ because those will not change the mutex-internal users count, so that it
+ can be detected when a condvar is still associated with a particular
+ mutex because there is a waiter blocked on this condvar using this mutex.
+*/
+static __always_inline int
+__pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
+ const struct timespec *abstime)
{
- struct _pthread_cleanup_buffer buffer;
- struct _condvar_cleanup_buffer cbuffer;
+ const int maxspin = 0;
int err;
- int pshared = (cond->__data.__mutex == (void *) ~0l)
- ? LLL_SHARED : LLL_PRIVATE;
-
-#if (defined lll_futex_wait_requeue_pi \
- && defined __ASSUME_REQUEUE_PI)
- int pi_flag = 0;
-#endif
+ int result = 0;
LIBC_PROBE (cond_wait, 2, cond, mutex);
- /* Make sure we are alone. */
- lll_lock (cond->__data.__lock, pshared);
-
- /* Now we can release the mutex. */
+ /* Acquire a position (SEQ) in the waiter sequence (WSEQ). We use an
+ atomic operation because signals and broadcasts may update the group
+ switch without acquiring the mutex. We do not need release MO here
+ because we do not need to establish any happens-before relation with
+ signalers (see __pthread_cond_signal); modification order alone
+ establishes a total order of waiters/signals. We do need acquire MO
+ to synchronize with group reinitialization in
+ __condvar_quiesce_and_switch_g1. */
+ uint64_t wseq = __condvar_fetch_add_wseq_acquire (cond, 2);
+ /* Find our group's index. We always go into what was G2 when we acquired
+ our position. */
+ unsigned int g = wseq & 1;
+ uint64_t seq = wseq >> 1;
+
+ /* Increase the waiter reference count. Relaxed MO is sufficient because
+ we only need to synchronize when decrementing the reference count. */
+ unsigned int flags = atomic_fetch_add_relaxed (&cond->__data.__wrefs, 8);
+ int private = __condvar_get_private (flags);
+
+ /* Now that we are registered as a waiter, we can release the mutex.
+ Waiting on the condvar must be atomic with releasing the mutex, so if
+ the mutex is used to establish a happens-before relation with any
+ signaler, the waiter must be visible to the latter; thus, we release the
+ mutex after registering as waiter.
+ If releasing the mutex fails, we just cancel our registration as a
+ waiter and confirm that we have woken up. */
err = __pthread_mutex_unlock_usercnt (mutex, 0);
- if (__glibc_unlikely (err))
+ if (__glibc_unlikely (err != 0))
{
- lll_unlock (cond->__data.__lock, pshared);
+ __condvar_cancel_waiting (cond, seq, g, private);
+ __condvar_confirm_wakeup (cond, private);
return err;
}
- /* We have one new user of the condvar. */
- ++cond->__data.__total_seq;
- ++cond->__data.__futex;
- cond->__data.__nwaiters += 1 << COND_NWAITERS_SHIFT;
-
- /* Remember the mutex we are using here. If there is already a
- different address store this is a bad user bug. Do not store
- anything for pshared condvars. */
- if (cond->__data.__mutex != (void *) ~0l)
- cond->__data.__mutex = mutex;
-
- /* Prepare structure passed to cancellation handler. */
- cbuffer.cond = cond;
- cbuffer.mutex = mutex;
-
- /* Before we block we enable cancellation. Therefore we have to
- install a cancellation handler. */
- __pthread_cleanup_push (&buffer, __condvar_cleanup, &cbuffer);
-
- /* The current values of the wakeup counter. The "woken" counter
- must exceed this value. */
- unsigned long long int val;
- unsigned long long int seq;
- val = seq = cond->__data.__wakeup_seq;
- /* Remember the broadcast counter. */
- cbuffer.bc_seq = cond->__data.__broadcast_seq;
+ /* Now wait until a signal is available in our group or it is closed.
+ Acquire MO so that if we observe a value of zero written after group
+ switching in __condvar_quiesce_and_switch_g1, we synchronize with that
+ store and will see the prior update of __g1_start done while switching
+ groups too. */
+ unsigned int signals = atomic_load_acquire (cond->__data.__g_signals + g);
do
{
- unsigned int futex_val = cond->__data.__futex;
- /* Prepare to wait. Release the condvar futex. */
- lll_unlock (cond->__data.__lock, pshared);
-
- /* Enable asynchronous cancellation. Required by the standard. */
- cbuffer.oldtype = __pthread_enable_asynccancel ();
-
-#if (defined lll_futex_wait_requeue_pi \
- && defined __ASSUME_REQUEUE_PI)
- /* If pi_flag remained 1 then it means that we had the lock and the mutex
- but a spurious waker raced ahead of us. Give back the mutex before
- going into wait again. */
- if (pi_flag)
+ while (1)
{
- __pthread_mutex_cond_lock_adjust (mutex);
- __pthread_mutex_unlock_usercnt (mutex, 0);
+ /* Spin-wait first.
+ Note that spinning first without checking whether a timeout
+ passed might lead to what looks like a spurious wake-up even
+ though we should return ETIMEDOUT (e.g., if the caller provides
+ an absolute timeout that is clearly in the past). However,
+ (1) spurious wake-ups are allowed, (2) it seems unlikely that a
+ user will (ab)use pthread_cond_wait as a check for whether a
+ point in time is in the past, and (3) spinning first without
+ having to compare against the current time seems to be the right
+ choice from a performance perspective for most use cases. */
+ unsigned int spin = maxspin;
+ while (signals == 0 && spin > 0)
+ {
+ /* Check that we are not spinning on a group that's already
+ closed. */
+ if (seq < (__condvar_load_g1_start_relaxed (cond) >> 1))
+ goto done;
+
+ /* TODO Back off. */
+
+ /* Reload signals. See above for MO. */
+ signals = atomic_load_acquire (cond->__data.__g_signals + g);
+ spin--;
+ }
+
+ /* If our group will be closed as indicated by the flag on signals,
+ don't bother grabbing a signal. */
+ if (signals & 1)
+ goto done;
+
+ /* If there is an available signal, don't block. */
+ if (signals != 0)
+ break;
+
+ /* No signals available after spinning, so prepare to block.
+ We first acquire a group reference and use acquire MO for that so
+ that we synchronize with the dummy read-modify-write in
+ __condvar_quiesce_and_switch_g1 if we read from that. In turn,
+ in this case this will make us see the closed flag on __g_signals
+ that designates a concurrent attempt to reuse the group's slot.
+ We use acquire MO for the __g_signals check to make the
+ __g1_start check work (see spinning above).
+ Note that the group reference acquisition will not mask the
+ release MO when decrementing the reference count because we use
+ an atomic read-modify-write operation and thus extend the release
+ sequence. */
+ atomic_fetch_add_acquire (cond->__data.__g_refs + g, 2);
+ if (((atomic_load_acquire (cond->__data.__g_signals + g) & 1) != 0)
+ || (seq < (__condvar_load_g1_start_relaxed (cond) >> 1)))
+ {
+ /* Our group is closed. Wake up any signalers that might be
+ waiting. */
+ __condvar_dec_grefs (cond, g, private);
+ goto done;
+ }
+
+ // Now block.
+ struct _pthread_cleanup_buffer buffer;
+ struct _condvar_cleanup_buffer cbuffer;
+ cbuffer.wseq = wseq;
+ cbuffer.cond = cond;
+ cbuffer.mutex = mutex;
+ cbuffer.private = private;
+ __pthread_cleanup_push (&buffer, __condvar_cleanup_waiting, &cbuffer);
+
+ if (abstime == NULL)
+ {
+ /* Block without a timeout. */
+ err = futex_wait_cancelable (
+ cond->__data.__g_signals + g, 0, private);
+ }
+ else
+ {
+ /* Block, but with a timeout.
+ Work around the fact that the kernel rejects negative timeout
+ values despite them being valid. */
+ if (__glibc_unlikely (abstime->tv_sec < 0))
+ err = ETIMEDOUT;
+
+ else if ((flags & __PTHREAD_COND_CLOCK_MONOTONIC_MASK) != 0)
+ {
+ /* CLOCK_MONOTONIC is requested. */
+ struct timespec rt;
+ if (__clock_gettime (CLOCK_MONOTONIC, &rt) != 0)
+ __libc_fatal ("clock_gettime does not support "
+ "CLOCK_MONOTONIC");
+ /* Convert the absolute timeout value to a relative
+ timeout. */
+ rt.tv_sec = abstime->tv_sec - rt.tv_sec;
+ rt.tv_nsec = abstime->tv_nsec - rt.tv_nsec;
+ if (rt.tv_nsec < 0)
+ {
+ rt.tv_nsec += 1000000000;
+ --rt.tv_sec;
+ }
+ /* Did we already time out? */
+ if (__glibc_unlikely (rt.tv_sec < 0))
+ err = ETIMEDOUT;
+ else
+ err = futex_reltimed_wait_cancelable
+ (cond->__data.__g_signals + g, 0, &rt, private);
+ }
+ else
+ {
+ /* Use CLOCK_REALTIME. */
+ err = futex_abstimed_wait_cancelable
+ (cond->__data.__g_signals + g, 0, abstime, private);
+ }
+ }
+
+ __pthread_cleanup_pop (&buffer, 0);
+
+ if (__glibc_unlikely (err == ETIMEDOUT))
+ {
+ __condvar_dec_grefs (cond, g, private);
+ /* If we timed out, we effectively cancel waiting. Note that
+ we have decremented __g_refs before cancellation, so that a
+ deadlock between waiting for quiescence of our group in
+ __condvar_quiesce_and_switch_g1 and us trying to acquire
+ the lock during cancellation is not possible. */
+ __condvar_cancel_waiting (cond, seq, g, private);
+ result = ETIMEDOUT;
+ goto done;
+ }
+ else
+ __condvar_dec_grefs (cond, g, private);
+
+ /* Reload signals. See above for MO. */
+ signals = atomic_load_acquire (cond->__data.__g_signals + g);
}
- pi_flag = USE_REQUEUE_PI (mutex);
- if (pi_flag)
+ }
+ /* Try to grab a signal. Use acquire MO so that we see an up-to-date value
+ of __g1_start below (see spinning above for a similar case). In
+ particular, if we steal from a more recent group, we will also see a
+ more recent __g1_start below. */
+ while (!atomic_compare_exchange_weak_acquire (cond->__data.__g_signals + g,
+ &signals, signals - 2));
+
+ /* We consumed a signal but we could have consumed from a more recent group
+ that aliased with ours due to being in the same group slot. If this
+ might be the case our group must be closed as visible through
+ __g1_start. */
+ uint64_t g1_start = __condvar_load_g1_start_relaxed (cond);
+ if (seq < (g1_start >> 1))
+ {
+ /* We potentially stole a signal from a more recent group but we do not
+ know which group we really consumed from.
+ We do not care about groups older than current G1 because they are
+ closed; we could have stolen from these, but then we just add a
+ spurious wake-up for the current groups.
+ We will never steal a signal from current G2 that was really intended
+ for G2 because G2 never receives signals (until it becomes G1). We
+ could have stolen a signal from G2 that was conservatively added by a
+ previous waiter that also thought it stole a signal -- but given that
+ that signal was added unnecessarily, it's not a problem if we steal
+ it.
+ Thus, the remaining case is that we could have stolen from the current
+ G1, where "current" means the __g1_start value we observed. However,
+ if the current G1 does not have the same slot index as we do, we did
+ not steal from it and do not need to undo that. This is the reason
+ for putting a bit with G2's index into__g1_start as well. */
+ if (((g1_start & 1) ^ 1) == g)
{
- err = lll_futex_wait_requeue_pi (&cond->__data.__futex,
- futex_val, &mutex->__data.__lock,
- pshared);
-
- pi_flag = (err == 0);
+ /* We have to conservatively undo our potential mistake of stealing
+ a signal. We can stop trying to do that when the current G1
+ changes because other spinning waiters will notice this too and
+ __condvar_quiesce_and_switch_g1 has checked that there are no
+ futex waiters anymore before switching G1.
+ Relaxed MO is fine for the __g1_start load because we need to
+ merely be able to observe this fact and not have to observe
+ something else as well.
+ ??? Would it help to spin for a little while to see whether the
+ current G1 gets closed? This might be worthwhile if the group is
+ small or close to being closed. */
+ unsigned int s = atomic_load_relaxed (cond->__data.__g_signals + g);
+ while (__condvar_load_g1_start_relaxed (cond) == g1_start)
+ {
+ /* Try to add a signal. We don't need to acquire the lock
+ because at worst we can cause a spurious wake-up. If the
+ group is in the process of being closed (LSB is true), this
+ has an effect similar to us adding a signal. */
+ if (((s & 1) != 0)
+ || atomic_compare_exchange_weak_relaxed
+ (cond->__data.__g_signals + g, &s, s + 2))
+ {
+ /* If we added a signal, we also need to add a wake-up on
+ the futex. We also need to do that if we skipped adding
+ a signal because the group is being closed because
+ while __condvar_quiesce_and_switch_g1 could have closed
+ the group, it might stil be waiting for futex waiters to
+ leave (and one of those waiters might be the one we stole
+ the signal from, which cause it to block using the
+ futex). */
+ futex_wake (cond->__data.__g_signals + g, 1, private);
+ break;
+ }
+ /* TODO Back off. */
+ }
}
- else
-#endif
- /* Wait until woken by signal or broadcast. */
- lll_futex_wait (&cond->__data.__futex, futex_val, pshared);
-
- /* Disable asynchronous cancellation. */
- __pthread_disable_asynccancel (cbuffer.oldtype);
-
- /* We are going to look at shared data again, so get the lock. */
- lll_lock (cond->__data.__lock, pshared);
-
- /* If a broadcast happened, we are done. */
- if (cbuffer.bc_seq != cond->__data.__broadcast_seq)
- goto bc_out;
-
- /* Check whether we are eligible for wakeup. */
- val = cond->__data.__wakeup_seq;
}
- while (val == seq || cond->__data.__woken_seq == val);
- /* Another thread woken up. */
- ++cond->__data.__woken_seq;
+ done:
- bc_out:
+ /* Confirm that we have been woken. We do that before acquiring the mutex
+ to allow for execution of pthread_cond_destroy while having acquired the
+ mutex. */
+ __condvar_confirm_wakeup (cond, private);
- cond->__data.__nwaiters -= 1 << COND_NWAITERS_SHIFT;
-
- /* If pthread_cond_destroy was called on this varaible already,
- notify the pthread_cond_destroy caller all waiters have left
- and it can be successfully destroyed. */
- if (cond->__data.__total_seq == -1ULL
- && cond->__data.__nwaiters < (1 << COND_NWAITERS_SHIFT))
- lll_futex_wake (&cond->__data.__nwaiters, 1, pshared);
+ /* Woken up; now re-acquire the mutex. If this doesn't fail, return RESULT,
+ which is set to ETIMEDOUT if a timeout occured, or zero otherwise. */
+ err = __pthread_mutex_cond_lock (mutex);
+ /* XXX Abort on errors that are disallowed by POSIX? */
+ return (err != 0) ? err : result;
+}
- /* We are done with the condvar. */
- lll_unlock (cond->__data.__lock, pshared);
- /* The cancellation handling is back to normal, remove the handler. */
- __pthread_cleanup_pop (&buffer, 0);
+/* See __pthread_cond_wait_common. */
+int
+__pthread_cond_wait (pthread_cond_t *cond, pthread_mutex_t *mutex)
+{
+ return __pthread_cond_wait_common (cond, mutex, NULL);
+}
- /* Get the mutex before returning. Not needed for PI. */
-#if (defined lll_futex_wait_requeue_pi \
- && defined __ASSUME_REQUEUE_PI)
- if (pi_flag)
- {
- __pthread_mutex_cond_lock_adjust (mutex);
- return 0;
- }
- else
-#endif
- return __pthread_mutex_cond_lock (mutex);
+/* See __pthread_cond_wait_common. */
+int
+__pthread_cond_timedwait (pthread_cond_t *cond, pthread_mutex_t *mutex,
+ const struct timespec *abstime)
+{
+ /* Check parameter validity. This should also tell the compiler that
+ it can assume that abstime is not NULL. */
+ if (abstime->tv_nsec < 0 || abstime->tv_nsec >= 1000000000)
+ return EINVAL;
+ return __pthread_cond_wait_common (cond, mutex, abstime);
}
versioned_symbol (libpthread, __pthread_cond_wait, pthread_cond_wait,
GLIBC_2_3_2);
+versioned_symbol (libpthread, __pthread_cond_timedwait, pthread_cond_timedwait,
+ GLIBC_2_3_2);
diff --git a/nptl/pthread_condattr_getclock.c b/nptl/pthread_condattr_getclock.c
index d156302ffb..cecb4aa8a5 100644
--- a/nptl/pthread_condattr_getclock.c
+++ b/nptl/pthread_condattr_getclock.c
@@ -23,6 +23,6 @@ int
pthread_condattr_getclock (const pthread_condattr_t *attr, clockid_t *clock_id)
{
*clock_id = (((((const struct pthread_condattr *) attr)->value) >> 1)
- & ((1 << COND_NWAITERS_SHIFT) - 1));
+ & ((1 << COND_CLOCK_BITS) - 1));
return 0;
}
diff --git a/nptl/pthread_condattr_getpshared.c b/nptl/pthread_condattr_getpshared.c
index 5a10f3eeb0..814796690c 100644
--- a/nptl/pthread_condattr_getpshared.c
+++ b/nptl/pthread_condattr_getpshared.c
@@ -22,7 +22,8 @@
int
pthread_condattr_getpshared (const pthread_condattr_t *attr, int *pshared)
{
- *pshared = ((const struct pthread_condattr *) attr)->value & 1;
+ *pshared = (((const struct pthread_condattr *) attr)->value & 1
+ ? PTHREAD_PROCESS_SHARED : PTHREAD_PROCESS_PRIVATE);
return 0;
}
diff --git a/nptl/pthread_condattr_init.c b/nptl/pthread_condattr_init.c
index 0ce42e5dfa..6e5168dadd 100644
--- a/nptl/pthread_condattr_init.c
+++ b/nptl/pthread_condattr_init.c
@@ -23,7 +23,9 @@
int
__pthread_condattr_init (pthread_condattr_t *attr)
{
- memset (attr, '\0', sizeof (*attr));
+ struct pthread_condattr *iattr = (struct pthread_condattr *) attr;
+ /* Default is not pshared and CLOCK_REALTIME. */
+ iattr-> value = CLOCK_REALTIME << 1;
return 0;
}
diff --git a/nptl/pthread_condattr_setclock.c b/nptl/pthread_condattr_setclock.c
index 25e2a176a0..3cfad84cda 100644
--- a/nptl/pthread_condattr_setclock.c
+++ b/nptl/pthread_condattr_setclock.c
@@ -18,7 +18,7 @@
#include <assert.h>
#include <errno.h>
-#include <stdbool.h>
+#include <futex-internal.h>
#include <time.h>
#include <sysdep.h>
#include "pthreadP.h"
@@ -33,12 +33,17 @@ pthread_condattr_setclock (pthread_condattr_t *attr, clockid_t clock_id)
in the pthread_cond_t structure needs to be adjusted. */
return EINVAL;
+ /* If we do not support waiting using CLOCK_MONOTONIC, return an error. */
+ if (clock_id == CLOCK_MONOTONIC
+ && !futex_supports_exact_relative_timeouts())
+ return ENOTSUP;
+
/* Make sure the value fits in the bits we reserved. */
- assert (clock_id < (1 << COND_NWAITERS_SHIFT));
+ assert (clock_id < (1 << COND_CLOCK_BITS));
int *valuep = &((struct pthread_condattr *) attr)->value;
- *valuep = ((*valuep & ~(((1 << COND_NWAITERS_SHIFT) - 1) << 1))
+ *valuep = ((*valuep & ~(((1 << COND_CLOCK_BITS) - 1) << 1))
| (clock_id << 1));
return 0;
diff --git a/nptl/test-cond-printers.py b/nptl/test-cond-printers.py
index af0e12eb97..9e807c9f2c 100644
--- a/nptl/test-cond-printers.py
+++ b/nptl/test-cond-printers.py
@@ -35,7 +35,7 @@ try:
break_at(test_source, 'Test status (destroyed)')
continue_cmd() # Go to test_status_destroyed
- test_printer(var, to_string, {'Status': 'Destroyed'})
+ test_printer(var, to_string, {'Threads known to still execute a wait function': '0'})
continue_cmd() # Exit
diff --git a/nptl/tst-cond1.c b/nptl/tst-cond1.c
index 75ab9c8d8a..509bbd0be4 100644
--- a/nptl/tst-cond1.c
+++ b/nptl/tst-cond1.c
@@ -73,6 +73,9 @@ do_test (void)
puts ("parent: wait for condition");
+ /* This test will fail on spurious wake-ups, which are allowed; however,
+ the current implementation shouldn't produce spurious wake-ups in the
+ scenario we are testing here. */
err = pthread_cond_wait (&cond, &mut);
if (err != 0)
error (EXIT_FAILURE, err, "parent: cannot wait fir signal");
diff --git a/nptl/tst-cond20.c b/nptl/tst-cond20.c
index 918c4adb51..665a66a92e 100644
--- a/nptl/tst-cond20.c
+++ b/nptl/tst-cond20.c
@@ -96,7 +96,10 @@ do_test (void)
for (i = 0; i < ROUNDS; ++i)
{
- pthread_cond_wait (&cond2, &mut);
+ /* Make sure we discard spurious wake-ups. */
+ do
+ pthread_cond_wait (&cond2, &mut);
+ while (count != N);
if (i & 1)
pthread_mutex_unlock (&mut);
diff --git a/nptl/tst-cond22.c b/nptl/tst-cond22.c
index bd978e50ca..64f19ea0a5 100644
--- a/nptl/tst-cond22.c
+++ b/nptl/tst-cond22.c
@@ -106,10 +106,11 @@ do_test (void)
status = 1;
}
- printf ("cond = { %d, %x, %lld, %lld, %lld, %p, %u, %u }\n",
- c.__data.__lock, c.__data.__futex, c.__data.__total_seq,
- c.__data.__wakeup_seq, c.__data.__woken_seq, c.__data.__mutex,
- c.__data.__nwaiters, c.__data.__broadcast_seq);
+ printf ("cond = { %llu, %llu, %u/%u/%u, %u/%u/%u, %u, %u }\n",
+ c.__data.__wseq, c.__data.__g1_start,
+ c.__data.__g_signals[0], c.__data.__g_refs[0], c.__data.__g_size[0],
+ c.__data.__g_signals[1], c.__data.__g_refs[1], c.__data.__g_size[1],
+ c.__data.__g1_orig_size, c.__data.__wrefs);
if (pthread_create (&th, NULL, tf, (void *) 1l) != 0)
{
@@ -148,10 +149,11 @@ do_test (void)
status = 1;
}
- printf ("cond = { %d, %x, %lld, %lld, %lld, %p, %u, %u }\n",
- c.__data.__lock, c.__data.__futex, c.__data.__total_seq,
- c.__data.__wakeup_seq, c.__data.__woken_seq, c.__data.__mutex,
- c.__data.__nwaiters, c.__data.__broadcast_seq);
+ printf ("cond = { %llu, %llu, %u/%u/%u, %u/%u/%u, %u, %u }\n",
+ c.__data.__wseq, c.__data.__g1_start,
+ c.__data.__g_signals[0], c.__data.__g_refs[0], c.__data.__g_size[0],
+ c.__data.__g_signals[1], c.__data.__g_refs[1], c.__data.__g_size[1],
+ c.__data.__g1_orig_size, c.__data.__wrefs);
return status;
}