aboutsummaryrefslogtreecommitdiff
path: root/nptl
diff options
context:
space:
mode:
authorTorvald Riegel <triegel@redhat.com>2016-12-22 10:20:43 +0100
committerTorvald Riegel <triegel@redhat.com>2017-01-13 17:16:07 +0100
commit65810f0ef05e8c9e333f17a44e77808b163ca298 (patch)
tree09928382f6015251c3b351650204f65fc8d6ec21 /nptl
parentf32941d80c7f532031061f8dd4704fab9c275cfe (diff)
downloadglibc-65810f0ef05e8c9e333f17a44e77808b163ca298.tar
glibc-65810f0ef05e8c9e333f17a44e77808b163ca298.tar.gz
glibc-65810f0ef05e8c9e333f17a44e77808b163ca298.tar.bz2
glibc-65810f0ef05e8c9e333f17a44e77808b163ca298.zip
robust mutexes: Fix broken x86 assembly by removing it
lll_robust_unlock on i386 and x86_64 first sets the futex word to FUTEX_WAITERS|0 before calling __lll_unlock_wake, which will set the futex word to 0. If the thread is killed between these steps, then the futex word will be FUTEX_WAITERS|0, and the kernel (at least current upstream) will not set it to FUTEX_OWNER_DIED|FUTEX_WAITERS because 0 is not equal to the TID of the crashed thread. The lll_robust_lock assembly code on i386 and x86_64 is not prepared to deal with this case because the fastpath tries to only CAS 0 to TID and not FUTEX_WAITERS|0 to TID; the slowpath simply waits until it can CAS 0 to TID or the futex_word has the FUTEX_OWNER_DIED bit set. This issue is fixed by removing the custom x86 assembly code and using the generic C code instead. However, instead of adding more duplicate code to the custom x86 lowlevellock.h, the code of the lll_robust* functions is inlined into the single call sites that exist for each of these functions in the pthread_mutex_* functions. The robust mutex paths in the latter have been slightly reorganized to make them simpler. This patch is meant to be easy to backport, so C11-style atomics are not used. [BZ #20985] * nptl/Makefile: Adapt. * nptl/pthread_mutex_cond_lock.c (LLL_ROBUST_MUTEX_LOCK): Remove. (LLL_ROBUST_MUTEX_LOCK_MODIFIER): New. * nptl/pthread_mutex_lock.c (LLL_ROBUST_MUTEX_LOCK): Remove. (LLL_ROBUST_MUTEX_LOCK_MODIFIER): New. (__pthread_mutex_lock_full): Inline lll_robust* functions and adapt. * nptl/pthread_mutex_timedlock.c (pthread_mutex_timedlock): Inline lll_robust* functions and adapt. * nptl/pthread_mutex_unlock.c (__pthread_mutex_unlock_full): Likewise. * sysdeps/nptl/lowlevellock.h (__lll_robust_lock_wait, __lll_robust_lock, lll_robust_cond_lock, __lll_robust_timedlock_wait, __lll_robust_timedlock, __lll_robust_unlock): Remove. * sysdeps/unix/sysv/linux/i386/lowlevellock.h (lll_robust_lock, lll_robust_cond_lock, lll_robust_timedlock, lll_robust_unlock): Remove. * sysdeps/unix/sysv/linux/x86_64/lowlevellock.h (lll_robust_lock, lll_robust_cond_lock, lll_robust_timedlock, lll_robust_unlock): Remove. * sysdeps/unix/sysv/linux/sparc/lowlevellock.h (__lll_robust_lock_wait, __lll_robust_lock, lll_robust_cond_lock, __lll_robust_timedlock_wait, __lll_robust_timedlock, __lll_robust_unlock): Remove. * nptl/lowlevelrobustlock.c: Remove file. * nptl/lowlevelrobustlock.sym: Likewise. * sysdeps/unix/sysv/linux/i386/lowlevelrobustlock.S: Likewise. * sysdeps/unix/sysv/linux/x86_64/lowlevelrobustlock.S: Likewise.
Diffstat (limited to 'nptl')
-rw-r--r--nptl/Makefile4
-rw-r--r--nptl/lowlevelrobustlock.c136
-rw-r--r--nptl/lowlevelrobustlock.sym6
-rw-r--r--nptl/pthread_mutex_cond_lock.c6
-rw-r--r--nptl/pthread_mutex_lock.c79
-rw-r--r--nptl/pthread_mutex_timedlock.c106
-rw-r--r--nptl/pthread_mutex_unlock.c18
7 files changed, 158 insertions, 197 deletions
diff --git a/nptl/Makefile b/nptl/Makefile
index 9d5738f270..dcb5953325 100644
--- a/nptl/Makefile
+++ b/nptl/Makefile
@@ -117,7 +117,7 @@ libpthread-routines = nptl-init vars events version pt-interp \
cleanup_defer_compat unwind \
pt-longjmp pt-cleanup\
cancellation \
- lowlevellock lowlevelrobustlock \
+ lowlevellock \
lll_timedlock_wait lll_timedwait_tid \
pt-fork pt-vfork \
$(pthread-compat-wrappers) \
@@ -309,7 +309,7 @@ tests-nolibpthread = tst-unload
gen-as-const-headers = pthread-errnos.sym \
unwindbuf.sym \
- lowlevelrobustlock.sym pthread-pi-defines.sym
+ pthread-pi-defines.sym
gen-py-const-headers := nptl_lock_constants.pysym
pretty-printers := nptl-printers.py
diff --git a/nptl/lowlevelrobustlock.c b/nptl/lowlevelrobustlock.c
deleted file mode 100644
index 3a91a6b9a7..0000000000
--- a/nptl/lowlevelrobustlock.c
+++ /dev/null
@@ -1,136 +0,0 @@
-/* Copyright (C) 2006-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Jakub Jelinek <jakub@redhat.com>, 2006.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <errno.h>
-#include <sysdep.h>
-#include <lowlevellock.h>
-#include <sys/time.h>
-#include <pthreadP.h>
-#include <kernel-features.h>
-
-
-int
-__lll_robust_lock_wait (int *futex, int private)
-{
- int oldval = *futex;
- int tid = THREAD_GETMEM (THREAD_SELF, tid);
-
- /* If the futex changed meanwhile try locking again. */
- if (oldval == 0)
- goto try;
-
- do
- {
- /* If the owner died, return the present value of the futex. */
- if (__glibc_unlikely (oldval & FUTEX_OWNER_DIED))
- return oldval;
-
- /* Try to put the lock into state 'acquired, possibly with waiters'. */
- int newval = oldval | FUTEX_WAITERS;
- if (oldval != newval
- && atomic_compare_and_exchange_bool_acq (futex, newval, oldval))
- continue;
-
- /* If *futex == 2, wait until woken. */
- lll_futex_wait (futex, newval, private);
-
- try:
- ;
- }
- while ((oldval = atomic_compare_and_exchange_val_acq (futex,
- tid | FUTEX_WAITERS,
- 0)) != 0);
- return 0;
-}
-
-
-int
-__lll_robust_timedlock_wait (int *futex, const struct timespec *abstime,
- int private)
-{
- /* Reject invalid timeouts. */
- if (abstime->tv_nsec < 0 || abstime->tv_nsec >= 1000000000)
- return EINVAL;
-
- int tid = THREAD_GETMEM (THREAD_SELF, tid);
- int oldval = *futex;
-
- /* If the futex changed meanwhile, try locking again. */
- if (oldval == 0)
- goto try;
-
- /* Work around the fact that the kernel rejects negative timeout values
- despite them being valid. */
- if (__glibc_unlikely (abstime->tv_sec < 0))
- return ETIMEDOUT;
-
- do
- {
-#if (!defined __ASSUME_FUTEX_CLOCK_REALTIME \
- || !defined lll_futex_timed_wait_bitset)
- struct timeval tv;
- struct timespec rt;
-
- /* Get the current time. */
- (void) __gettimeofday (&tv, NULL);
-
- /* Compute relative timeout. */
- rt.tv_sec = abstime->tv_sec - tv.tv_sec;
- rt.tv_nsec = abstime->tv_nsec - tv.tv_usec * 1000;
- if (rt.tv_nsec < 0)
- {
- rt.tv_nsec += 1000000000;
- --rt.tv_sec;
- }
-
- /* Already timed out? */
- if (rt.tv_sec < 0)
- return ETIMEDOUT;
-#endif
-
- /* If the owner died, return the present value of the futex. */
- if (__glibc_unlikely (oldval & FUTEX_OWNER_DIED))
- return oldval;
-
- /* Try to put the lock into state 'acquired, possibly with waiters'. */
- int newval = oldval | FUTEX_WAITERS;
- if (oldval != newval
- && atomic_compare_and_exchange_bool_acq (futex, newval, oldval))
- continue;
-
- /* If *futex == 2, wait until woken or timeout. */
-#if (!defined __ASSUME_FUTEX_CLOCK_REALTIME \
- || !defined lll_futex_timed_wait_bitset)
- lll_futex_timed_wait (futex, newval, &rt, private);
-#else
- int err = lll_futex_timed_wait_bitset (futex, newval, abstime,
- FUTEX_CLOCK_REALTIME, private);
- /* The futex call timed out. */
- if (err == -ETIMEDOUT)
- return -err;
-#endif
-
- try:
- ;
- }
- while ((oldval = atomic_compare_and_exchange_val_acq (futex,
- tid | FUTEX_WAITERS,
- 0)) != 0);
-
- return 0;
-}
diff --git a/nptl/lowlevelrobustlock.sym b/nptl/lowlevelrobustlock.sym
deleted file mode 100644
index 2f1e9da52b..0000000000
--- a/nptl/lowlevelrobustlock.sym
+++ /dev/null
@@ -1,6 +0,0 @@
-#include <stddef.h>
-#include <pthreadP.h>
-
---
-
-TID offsetof (struct pthread, tid)
diff --git a/nptl/pthread_mutex_cond_lock.c b/nptl/pthread_mutex_cond_lock.c
index 2ac421fd63..bd7149ef1d 100644
--- a/nptl/pthread_mutex_cond_lock.c
+++ b/nptl/pthread_mutex_cond_lock.c
@@ -11,9 +11,9 @@
lll_cond_trylock ((mutex)->__data.__lock)
#define LLL_MUTEX_TRYLOCK_ELISION(mutex) LLL_MUTEX_TRYLOCK(mutex)
-#define LLL_ROBUST_MUTEX_LOCK(mutex, id) \
- lll_robust_cond_lock ((mutex)->__data.__lock, id, \
- PTHREAD_ROBUST_MUTEX_PSHARED (mutex))
+/* We need to assume that there are other threads blocked on the futex.
+ See __pthread_mutex_lock_full for further details. */
+#define LLL_ROBUST_MUTEX_LOCK_MODIFIER FUTEX_WAITERS
#define __pthread_mutex_lock internal_function __pthread_mutex_cond_lock
#define __pthread_mutex_lock_full __pthread_mutex_cond_lock_full
#define NO_INCR
diff --git a/nptl/pthread_mutex_lock.c b/nptl/pthread_mutex_lock.c
index 1795cc13c5..9b81f88a9f 100644
--- a/nptl/pthread_mutex_lock.c
+++ b/nptl/pthread_mutex_lock.c
@@ -36,14 +36,14 @@
#define lll_trylock_elision(a,t) lll_trylock(a)
#endif
+/* Some of the following definitions differ when pthread_mutex_cond_lock.c
+ includes this file. */
#ifndef LLL_MUTEX_LOCK
# define LLL_MUTEX_LOCK(mutex) \
lll_lock ((mutex)->__data.__lock, PTHREAD_MUTEX_PSHARED (mutex))
# define LLL_MUTEX_TRYLOCK(mutex) \
lll_trylock ((mutex)->__data.__lock)
-# define LLL_ROBUST_MUTEX_LOCK(mutex, id) \
- lll_robust_lock ((mutex)->__data.__lock, id, \
- PTHREAD_ROBUST_MUTEX_PSHARED (mutex))
+# define LLL_ROBUST_MUTEX_LOCK_MODIFIER 0
# define LLL_MUTEX_LOCK_ELISION(mutex) \
lll_lock_elision ((mutex)->__data.__lock, (mutex)->__data.__elision, \
PTHREAD_MUTEX_PSHARED (mutex))
@@ -185,11 +185,21 @@ __pthread_mutex_lock_full (pthread_mutex_t *mutex)
/* This is set to FUTEX_WAITERS iff we might have shared the
FUTEX_WAITERS flag with other threads, and therefore need to keep it
set to avoid lost wake-ups. We have the same requirement in the
- simple mutex algorithm. */
- unsigned int assume_other_futex_waiters = 0;
- do
+ simple mutex algorithm.
+ We start with value zero for a normal mutex, and FUTEX_WAITERS if we
+ are building the special case mutexes for use from within condition
+ variables. */
+ unsigned int assume_other_futex_waiters = LLL_ROBUST_MUTEX_LOCK_MODIFIER;
+ while (1)
{
- again:
+ /* Try to acquire the lock through a CAS from 0 (not acquired) to
+ our TID | assume_other_futex_waiters. */
+ if (__glibc_likely ((oldval == 0)
+ && (atomic_compare_and_exchange_bool_acq
+ (&mutex->__data.__lock,
+ id | assume_other_futex_waiters, 0) == 0)))
+ break;
+
if ((oldval & FUTEX_OWNER_DIED) != 0)
{
/* The previous owner died. Try locking the mutex. */
@@ -209,7 +219,7 @@ __pthread_mutex_lock_full (pthread_mutex_t *mutex)
if (newval != oldval)
{
oldval = newval;
- goto again;
+ continue;
}
/* We got the mutex. */
@@ -260,24 +270,47 @@ __pthread_mutex_lock_full (pthread_mutex_t *mutex)
}
}
- oldval = LLL_ROBUST_MUTEX_LOCK (mutex,
- id | assume_other_futex_waiters);
- /* See above. We set FUTEX_WAITERS and might have shared this flag
- with other threads; thus, we need to preserve it. */
- assume_other_futex_waiters = FUTEX_WAITERS;
-
- if (__builtin_expect (mutex->__data.__owner
- == PTHREAD_MUTEX_NOTRECOVERABLE, 0))
+ /* We cannot acquire the mutex nor has its owner died. Thus, try
+ to block using futexes. Set FUTEX_WAITERS if necessary so that
+ other threads are aware that there are potentially threads
+ blocked on the futex. Restart if oldval changed in the
+ meantime. */
+ if ((oldval & FUTEX_WAITERS) == 0)
{
- /* This mutex is now not recoverable. */
- mutex->__data.__count = 0;
- lll_unlock (mutex->__data.__lock,
- PTHREAD_ROBUST_MUTEX_PSHARED (mutex));
- THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
- return ENOTRECOVERABLE;
+ if (atomic_compare_and_exchange_bool_acq (&mutex->__data.__lock,
+ oldval | FUTEX_WAITERS,
+ oldval)
+ != 0)
+ {
+ oldval = mutex->__data.__lock;
+ continue;
+ }
+ oldval |= FUTEX_WAITERS;
}
+
+ /* It is now possible that we share the FUTEX_WAITERS flag with
+ another thread; therefore, update assume_other_futex_waiters so
+ that we do not forget about this when handling other cases
+ above and thus do not cause lost wake-ups. */
+ assume_other_futex_waiters |= FUTEX_WAITERS;
+
+ /* Block using the futex and reload current lock value. */
+ lll_futex_wait (&mutex->__data.__lock, oldval,
+ PTHREAD_ROBUST_MUTEX_PSHARED (mutex));
+ oldval = mutex->__data.__lock;
+ }
+
+ /* We have acquired the mutex; check if it is still consistent. */
+ if (__builtin_expect (mutex->__data.__owner
+ == PTHREAD_MUTEX_NOTRECOVERABLE, 0))
+ {
+ /* This mutex is now not recoverable. */
+ mutex->__data.__count = 0;
+ int private = PTHREAD_ROBUST_MUTEX_PSHARED (mutex);
+ lll_unlock (mutex->__data.__lock, private);
+ THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
+ return ENOTRECOVERABLE;
}
- while ((oldval & FUTEX_OWNER_DIED) != 0);
mutex->__data.__count = 1;
ENQUEUE_MUTEX (mutex);
diff --git a/nptl/pthread_mutex_timedlock.c b/nptl/pthread_mutex_timedlock.c
index 6915e829c1..ddd46fe414 100644
--- a/nptl/pthread_mutex_timedlock.c
+++ b/nptl/pthread_mutex_timedlock.c
@@ -147,9 +147,16 @@ pthread_mutex_timedlock (pthread_mutex_t *mutex,
set to avoid lost wake-ups. We have the same requirement in the
simple mutex algorithm. */
unsigned int assume_other_futex_waiters = 0;
- do
+ while (1)
{
- again:
+ /* Try to acquire the lock through a CAS from 0 (not acquired) to
+ our TID | assume_other_futex_waiters. */
+ if (__glibc_likely ((oldval == 0)
+ && (atomic_compare_and_exchange_bool_acq
+ (&mutex->__data.__lock,
+ id | assume_other_futex_waiters, 0) == 0)))
+ break;
+
if ((oldval & FUTEX_OWNER_DIED) != 0)
{
/* The previous owner died. Try locking the mutex. */
@@ -162,7 +169,7 @@ pthread_mutex_timedlock (pthread_mutex_t *mutex,
if (newval != oldval)
{
oldval = newval;
- goto again;
+ continue;
}
/* We got the mutex. */
@@ -209,30 +216,87 @@ pthread_mutex_timedlock (pthread_mutex_t *mutex,
}
}
- result = lll_robust_timedlock (mutex->__data.__lock, abstime,
- id | assume_other_futex_waiters,
- PTHREAD_ROBUST_MUTEX_PSHARED (mutex));
- /* See above. We set FUTEX_WAITERS and might have shared this flag
- with other threads; thus, we need to preserve it. */
- assume_other_futex_waiters = FUTEX_WAITERS;
+ /* We are about to block; check whether the timeout is invalid. */
+ if (abstime->tv_nsec < 0 || abstime->tv_nsec >= 1000000000)
+ return EINVAL;
+ /* Work around the fact that the kernel rejects negative timeout
+ values despite them being valid. */
+ if (__glibc_unlikely (abstime->tv_sec < 0))
+ return ETIMEDOUT;
+#if (!defined __ASSUME_FUTEX_CLOCK_REALTIME \
+ || !defined lll_futex_timed_wait_bitset)
+ struct timeval tv;
+ struct timespec rt;
+
+ /* Get the current time. */
+ (void) __gettimeofday (&tv, NULL);
+
+ /* Compute relative timeout. */
+ rt.tv_sec = abstime->tv_sec - tv.tv_sec;
+ rt.tv_nsec = abstime->tv_nsec - tv.tv_usec * 1000;
+ if (rt.tv_nsec < 0)
+ {
+ rt.tv_nsec += 1000000000;
+ --rt.tv_sec;
+ }
+
+ /* Already timed out? */
+ if (rt.tv_sec < 0)
+ return ETIMEDOUT;
+#endif
- if (__builtin_expect (mutex->__data.__owner
- == PTHREAD_MUTEX_NOTRECOVERABLE, 0))
+ /* We cannot acquire the mutex nor has its owner died. Thus, try
+ to block using futexes. Set FUTEX_WAITERS if necessary so that
+ other threads are aware that there are potentially threads
+ blocked on the futex. Restart if oldval changed in the
+ meantime. */
+ if ((oldval & FUTEX_WAITERS) == 0)
{
- /* This mutex is now not recoverable. */
- mutex->__data.__count = 0;
- lll_unlock (mutex->__data.__lock,
- PTHREAD_ROBUST_MUTEX_PSHARED (mutex));
- THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
- return ENOTRECOVERABLE;
+ if (atomic_compare_and_exchange_bool_acq (&mutex->__data.__lock,
+ oldval | FUTEX_WAITERS,
+ oldval)
+ != 0)
+ {
+ oldval = mutex->__data.__lock;
+ continue;
+ }
+ oldval |= FUTEX_WAITERS;
}
- if (result == ETIMEDOUT || result == EINVAL)
- goto out;
+ /* It is now possible that we share the FUTEX_WAITERS flag with
+ another thread; therefore, update assume_other_futex_waiters so
+ that we do not forget about this when handling other cases
+ above and thus do not cause lost wake-ups. */
+ assume_other_futex_waiters |= FUTEX_WAITERS;
+
+ /* Block using the futex. */
+#if (!defined __ASSUME_FUTEX_CLOCK_REALTIME \
+ || !defined lll_futex_timed_wait_bitset)
+ lll_futex_timed wait (&mutex->__data.__lock, oldval,
+ &rt, PTHREAD_ROBUST_MUTEX_PSHARED (mutex));
+#else
+ int err = lll_futex_timed_wait_bitset (&mutex->__data.__lock,
+ oldval, abstime, FUTEX_CLOCK_REALTIME,
+ PTHREAD_ROBUST_MUTEX_PSHARED (mutex));
+ /* The futex call timed out. */
+ if (err == -ETIMEDOUT)
+ return -err;
+#endif
+ /* Reload current lock value. */
+ oldval = mutex->__data.__lock;
+ }
- oldval = result;
+ /* We have acquired the mutex; check if it is still consistent. */
+ if (__builtin_expect (mutex->__data.__owner
+ == PTHREAD_MUTEX_NOTRECOVERABLE, 0))
+ {
+ /* This mutex is now not recoverable. */
+ mutex->__data.__count = 0;
+ int private = PTHREAD_ROBUST_MUTEX_PSHARED (mutex);
+ lll_unlock (mutex->__data.__lock, private);
+ THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
+ return ENOTRECOVERABLE;
}
- while ((oldval & FUTEX_OWNER_DIED) != 0);
mutex->__data.__count = 1;
ENQUEUE_MUTEX (mutex);
diff --git a/nptl/pthread_mutex_unlock.c b/nptl/pthread_mutex_unlock.c
index 130883c4bb..d3b39900bc 100644
--- a/nptl/pthread_mutex_unlock.c
+++ b/nptl/pthread_mutex_unlock.c
@@ -96,6 +96,7 @@ internal_function
__pthread_mutex_unlock_full (pthread_mutex_t *mutex, int decr)
{
int newowner = 0;
+ int private;
switch (PTHREAD_MUTEX_TYPE (mutex))
{
@@ -149,9 +150,14 @@ __pthread_mutex_unlock_full (pthread_mutex_t *mutex, int decr)
/* One less user. */
--mutex->__data.__nusers;
- /* Unlock. */
- lll_robust_unlock (mutex->__data.__lock,
- PTHREAD_ROBUST_MUTEX_PSHARED (mutex));
+ /* Unlock by setting the lock to 0 (not acquired); if the lock had
+ FUTEX_WAITERS set previously, then wake any waiters.
+ The unlock operation must be the last access to the mutex to not
+ violate the mutex destruction requirements (see __lll_unlock). */
+ private = PTHREAD_ROBUST_MUTEX_PSHARED (mutex);
+ if (__glibc_unlikely ((atomic_exchange_rel (&mutex->__data.__lock, 0)
+ & FUTEX_WAITERS) != 0))
+ lll_futex_wake (&mutex->__data.__lock, 1, private);
THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
break;
@@ -234,9 +240,9 @@ __pthread_mutex_unlock_full (pthread_mutex_t *mutex, int decr)
to not violate the mutex destruction requirements (see
lll_unlock). */
int robust = mutex->__data.__kind & PTHREAD_MUTEX_ROBUST_NORMAL_NP;
- int private = (robust
- ? PTHREAD_ROBUST_MUTEX_PSHARED (mutex)
- : PTHREAD_MUTEX_PSHARED (mutex));
+ private = (robust
+ ? PTHREAD_ROBUST_MUTEX_PSHARED (mutex)
+ : PTHREAD_MUTEX_PSHARED (mutex));
/* Unlock the mutex using a CAS unless there are futex waiters or our
TID is not the value of __lock anymore, in which case we let the
kernel take care of the situation. Use release MO in the CAS to