diff options
Diffstat (limited to 'js/src/jit/x86-shared/AtomicOperations-x86-shared.h')
-rw-r--r-- | js/src/jit/x86-shared/AtomicOperations-x86-shared.h | 602 |
1 files changed, 602 insertions, 0 deletions
diff --git a/js/src/jit/x86-shared/AtomicOperations-x86-shared.h b/js/src/jit/x86-shared/AtomicOperations-x86-shared.h new file mode 100644 index 000000000..b34aba7ef --- /dev/null +++ b/js/src/jit/x86-shared/AtomicOperations-x86-shared.h @@ -0,0 +1,602 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sts=4 et sw=4 tw=99: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* For overall documentation, see jit/AtomicOperations.h */ + +#ifndef jit_shared_AtomicOperations_x86_shared_h +#define jit_shared_AtomicOperations_x86_shared_h + +#include "mozilla/Assertions.h" +#include "mozilla/Types.h" + +// Lock-freedom on x86 and x64: +// +// On x86 and x64 there are atomic instructions for 8-byte accesses: +// +// Load and stores: +// - Loads and stores are single-copy atomic for up to 8 bytes +// starting with the Pentium; the store requires a post-fence for +// sequential consistency +// +// CompareExchange: +// - On x64 CMPXCHGQ can always be used +// - On x86 CMPXCHG8B can be used starting with the first Pentium +// +// Exchange: +// - On x64 XCHGQ can always be used +// - On x86 one has to use a CompareExchange loop +// +// Observe also that the JIT will not be enabled unless we have SSE2, +// which was introduced with the Pentium 4. Ergo the JIT will be able +// to use atomic instructions for up to 8 bytes on all x86 platforms +// for the primitives we care about. +// +// However, C++ compilers and libraries may not provide access to +// those 8-byte instructions directly. Clang in 32-bit mode does not +// provide 8-byte atomic primitives at all (even with eg -arch i686 +// specified). On Windows 32-bit, MSVC does not provide +// _InterlockedExchange64 since it does not map directly to an +// instruction. +// +// There are thus sundry workarounds below to handle known corner +// cases. + +#if defined(__clang__) || defined(__GNUC__) + +// The default implementation tactic for gcc/clang is to use the newer +// __atomic intrinsics added for use in C++11 <atomic>. Where that +// isn't available, we use GCC's older __sync functions instead. +// +// ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS is kept as a backward +// compatible option for older compilers: enable this to use GCC's old +// __sync functions instead of the newer __atomic functions. This +// will be required for GCC 4.6.x and earlier, and probably for Clang +// 3.1, should we need to use those versions. + +// #define ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS + +// Lock-free 8-byte atomics are assumed on x86 but must be disabled in +// corner cases, see comments below and in isLockfree8(). + +# define LOCKFREE8 + +// This pertains to Clang compiling with -m32, in this case the 64-bit +// __atomic builtins are not available (observed on various Mac OS X +// versions with Apple Clang and on Linux with Clang 3.5). +// +// For now just punt: disable lock-free 8-word data. The JIT will +// call isLockfree8() to determine what to do and will stay in sync. +// (Bug 1146817 tracks the work to improve on this.) + +# if defined(__clang__) && defined(__i386) +# undef LOCKFREE8 +# endif + +inline bool +js::jit::AtomicOperations::isLockfree8() +{ +# ifndef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS + MOZ_ASSERT(__atomic_always_lock_free(sizeof(int8_t), 0)); + MOZ_ASSERT(__atomic_always_lock_free(sizeof(int16_t), 0)); + MOZ_ASSERT(__atomic_always_lock_free(sizeof(int32_t), 0)); +# endif +# ifdef LOCKFREE8 +# ifndef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS + MOZ_ASSERT(__atomic_always_lock_free(sizeof(int64_t), 0)); +# endif + return true; +# else + return false; +# endif +} + +inline void +js::jit::AtomicOperations::fenceSeqCst() +{ +# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS + __sync_synchronize(); +# else + __atomic_thread_fence(__ATOMIC_SEQ_CST); +# endif +} + +template<typename T> +inline T +js::jit::AtomicOperations::loadSeqCst(T* addr) +{ + MOZ_ASSERT(sizeof(T) < 8 || isLockfree8()); +# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS + // Inhibit compiler reordering with a volatile load. The x86 does + // not reorder loads with respect to subsequent loads or stores + // and no ordering barrier is required here. See more elaborate + // comments in storeSeqCst. + T v = *static_cast<T volatile*>(addr); +# else + T v; + __atomic_load(addr, &v, __ATOMIC_SEQ_CST); +# endif + return v; +} + +# ifndef LOCKFREE8 +template<> +inline int64_t +js::jit::AtomicOperations::loadSeqCst(int64_t* addr) +{ + MOZ_CRASH(); +} + +template<> +inline uint64_t +js::jit::AtomicOperations::loadSeqCst(uint64_t* addr) +{ + MOZ_CRASH(); +} +# endif // LOCKFREE8 + +template<typename T> +inline void +js::jit::AtomicOperations::storeSeqCst(T* addr, T val) +{ + MOZ_ASSERT(sizeof(T) < 8 || isLockfree8()); +# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS + // Inhibit compiler reordering with a volatile store. The x86 may + // reorder a store with respect to a subsequent load from a + // different location, hence there is an ordering barrier here to + // prevent that. + // + // By way of background, look to eg + // http://bartoszmilewski.com/2008/11/05/who-ordered-memory-fences-on-an-x86/ + // + // Consider: + // + // uint8_t x = 0, y = 0; // to start + // + // thread1: + // sx: AtomicOperations::store(&x, 1); + // gy: uint8_t obs1 = AtomicOperations::loadSeqCst(&y); + // + // thread2: + // sy: AtomicOperations::store(&y, 1); + // gx: uint8_t obs2 = AtomicOperations::loadSeqCst(&x); + // + // Sequential consistency requires a total global ordering of + // operations: sx-gy-sy-gx, sx-sy-gx-gy, sx-sy-gy-gx, sy-gx-sx-gy, + // sy-sx-gy-gx, or sy-sx-gx-gy. In every ordering at least one of + // sx-before-gx or sy-before-gy happens, so *at least one* of + // obs1/obs2 is 1. + // + // If AtomicOperations::{load,store}SeqCst were just volatile + // {load,store}, x86 could reorder gx/gy before each thread's + // prior load. That would permit gx-gy-sx-sy: both loads would be + // 0! Thus after a volatile store we must synchronize to ensure + // the store happens before the load. + *static_cast<T volatile*>(addr) = val; + __sync_synchronize(); +# else + __atomic_store(addr, &val, __ATOMIC_SEQ_CST); +# endif +} + +# ifndef LOCKFREE8 +template<> +inline void +js::jit::AtomicOperations::storeSeqCst(int64_t* addr, int64_t val) +{ + MOZ_CRASH(); +} + +template<> +inline void +js::jit::AtomicOperations::storeSeqCst(uint64_t* addr, uint64_t val) +{ + MOZ_CRASH(); +} +# endif // LOCKFREE8 + +template<typename T> +inline T +js::jit::AtomicOperations::exchangeSeqCst(T* addr, T val) +{ + MOZ_ASSERT(sizeof(T) < 8 || isLockfree8()); +# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS + T v; + do { + // Here I assume the compiler will not hoist the load. It + // shouldn't, because the CAS could affect* addr. + v = *addr; + } while (!__sync_bool_compare_and_swap(addr, v, val)); + return v; +# else + T v; + __atomic_exchange(addr, &val, &v, __ATOMIC_SEQ_CST); + return v; +# endif +} + +# ifndef LOCKFREE8 +template<> +inline int64_t +js::jit::AtomicOperations::exchangeSeqCst(int64_t* addr, int64_t val) +{ + MOZ_CRASH(); +} + +template<> +inline uint64_t +js::jit::AtomicOperations::exchangeSeqCst(uint64_t* addr, uint64_t val) +{ + MOZ_CRASH(); +} +# endif // LOCKFREE8 + +template<typename T> +inline T +js::jit::AtomicOperations::compareExchangeSeqCst(T* addr, T oldval, T newval) +{ + MOZ_ASSERT(sizeof(T) < 8 || isLockfree8()); +# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS + return __sync_val_compare_and_swap(addr, oldval, newval); +# else + __atomic_compare_exchange(addr, &oldval, &newval, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); + return oldval; +# endif +} + +# ifndef LOCKFREE8 +template<> +inline int64_t +js::jit::AtomicOperations::compareExchangeSeqCst(int64_t* addr, int64_t oldval, int64_t newval) +{ + MOZ_CRASH(); +} + +template<> +inline uint64_t +js::jit::AtomicOperations::compareExchangeSeqCst(uint64_t* addr, uint64_t oldval, uint64_t newval) +{ + MOZ_CRASH(); +} +# endif // LOCKFREE8 + +template<typename T> +inline T +js::jit::AtomicOperations::fetchAddSeqCst(T* addr, T val) +{ + static_assert(sizeof(T) <= 4, "not available for 8-byte values yet"); +# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS + return __sync_fetch_and_add(addr, val); +# else + return __atomic_fetch_add(addr, val, __ATOMIC_SEQ_CST); +# endif +} + +template<typename T> +inline T +js::jit::AtomicOperations::fetchSubSeqCst(T* addr, T val) +{ + static_assert(sizeof(T) <= 4, "not available for 8-byte values yet"); +# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS + return __sync_fetch_and_sub(addr, val); +# else + return __atomic_fetch_sub(addr, val, __ATOMIC_SEQ_CST); +# endif +} + +template<typename T> +inline T +js::jit::AtomicOperations::fetchAndSeqCst(T* addr, T val) +{ + static_assert(sizeof(T) <= 4, "not available for 8-byte values yet"); +# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS + return __sync_fetch_and_and(addr, val); +# else + return __atomic_fetch_and(addr, val, __ATOMIC_SEQ_CST); +# endif +} + +template<typename T> +inline T +js::jit::AtomicOperations::fetchOrSeqCst(T* addr, T val) +{ + static_assert(sizeof(T) <= 4, "not available for 8-byte values yet"); +# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS + return __sync_fetch_and_or(addr, val); +# else + return __atomic_fetch_or(addr, val, __ATOMIC_SEQ_CST); +# endif +} + +template<typename T> +inline T +js::jit::AtomicOperations::fetchXorSeqCst(T* addr, T val) +{ + static_assert(sizeof(T) <= 4, "not available for 8-byte values yet"); +# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS + return __sync_fetch_and_xor(addr, val); +# else + return __atomic_fetch_xor(addr, val, __ATOMIC_SEQ_CST); +# endif +} + +template<typename T> +inline T +js::jit::AtomicOperations::loadSafeWhenRacy(T* addr) +{ + return *addr; // FIXME (1208663): not yet safe +} + +template<typename T> +inline void +js::jit::AtomicOperations::storeSafeWhenRacy(T* addr, T val) +{ + *addr = val; // FIXME (1208663): not yet safe +} + +inline void +js::jit::AtomicOperations::memcpySafeWhenRacy(void* dest, const void* src, size_t nbytes) +{ + ::memcpy(dest, src, nbytes); // FIXME (1208663): not yet safe +} + +inline void +js::jit::AtomicOperations::memmoveSafeWhenRacy(void* dest, const void* src, size_t nbytes) +{ + ::memmove(dest, src, nbytes); // FIXME (1208663): not yet safe +} + +template<size_t nbytes> +inline void +js::jit::RegionLock::acquire(void* addr) +{ +# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS + while (!__sync_bool_compare_and_swap(&spinlock, 0, 1)) + continue; +# else + uint32_t zero = 0; + uint32_t one = 1; + while (!__atomic_compare_exchange(&spinlock, &zero, &one, false, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE)) { + zero = 0; + continue; + } +# endif +} + +template<size_t nbytes> +inline void +js::jit::RegionLock::release(void* addr) +{ + MOZ_ASSERT(AtomicOperations::loadSeqCst(&spinlock) == 1, "releasing unlocked region lock"); +# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS + __sync_sub_and_fetch(&spinlock, 1); // Should turn into LOCK XADD +# else + uint32_t zero = 0; + __atomic_store(&spinlock, &zero, __ATOMIC_SEQ_CST); +# endif +} + +# undef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS +# undef LOCKFREE8 + +#elif defined(_MSC_VER) + +// On 32-bit CPUs there is no 64-bit XCHG instruction, one must +// instead use a loop with CMPXCHG8B. Since MSVC provides +// _InterlockedExchange64 only if it maps directly to XCHG, the +// workaround must be manual. + +# define HAVE_EXCHANGE64 + +# if !_WIN64 +# undef HAVE_EXCHANGE64 +# endif + +// Below, _ReadWriteBarrier is a compiler directive, preventing +// reordering of instructions and reuse of memory values across it. + +inline bool +js::jit::AtomicOperations::isLockfree8() +{ + // See general comments at the start of this file. + // + // The MSDN docs suggest very strongly that if code is compiled for + // Pentium or better the 64-bit primitives will be lock-free, see + // eg the "Remarks" secion of the page for _InterlockedCompareExchange64, + // currently here: + // https://msdn.microsoft.com/en-us/library/ttk2z1ws%28v=vs.85%29.aspx + // + // But I've found no way to assert that at compile time or run time, + // there appears to be no WinAPI is_lock_free() test. + return true; +} + +inline void +js::jit::AtomicOperations::fenceSeqCst() +{ + _ReadWriteBarrier(); +# if JS_BITS_PER_WORD == 32 + // If configured for SSE2+ we can use the MFENCE instruction, available + // through the _mm_mfence intrinsic. But for non-SSE2 systems we have + // to do something else. Linux uses "lock add [esp], 0", so why not? + __asm lock add [esp], 0; +# else + _mm_mfence(); +# endif +} + +template<typename T> +inline T +js::jit::AtomicOperations::loadSeqCst(T* addr) +{ + MOZ_ASSERT(sizeof(T) < 8 || isLockfree8()); + _ReadWriteBarrier(); + T v = *addr; + _ReadWriteBarrier(); + return v; +} + +template<typename T> +inline void +js::jit::AtomicOperations::storeSeqCst(T* addr, T val) +{ + MOZ_ASSERT(sizeof(T) < 8 || isLockfree8()); + _ReadWriteBarrier(); + *addr = val; + fenceSeqCst(); +} + +# define MSC_EXCHANGEOP(T, U, xchgop) \ + template<> inline T \ + js::jit::AtomicOperations::exchangeSeqCst(T* addr, T val) { \ + MOZ_ASSERT(sizeof(T) < 8 || isLockfree8()); \ + return (T)xchgop((U volatile*)addr, (U)val); \ + } + +# define MSC_EXCHANGEOP_CAS(T, U, cmpxchg) \ + template<> inline T \ + js::jit::AtomicOperations::exchangeSeqCst(T* addr, T newval) { \ + MOZ_ASSERT(sizeof(T) < 8 || isLockfree8()); \ + T oldval; \ + do { \ + _ReadWriteBarrier(); \ + oldval = *addr; \ + } while (!cmpxchg((U volatile*)addr, (U)newval, (U)oldval)); \ + return oldval; \ + } + +MSC_EXCHANGEOP(int8_t, char, _InterlockedExchange8) +MSC_EXCHANGEOP(uint8_t, char, _InterlockedExchange8) +MSC_EXCHANGEOP(int16_t, short, _InterlockedExchange16) +MSC_EXCHANGEOP(uint16_t, short, _InterlockedExchange16) +MSC_EXCHANGEOP(int32_t, long, _InterlockedExchange) +MSC_EXCHANGEOP(uint32_t, long, _InterlockedExchange) +# ifdef HAVE_EXCHANGE64 +MSC_EXCHANGEOP(int64_t, __int64, _InterlockedExchange64) +MSC_EXCHANGEOP(uint64_t, __int64, _InterlockedExchange64) +# else +MSC_EXCHANGEOP_CAS(int64_t, __int64, _InterlockedCompareExchange64) +MSC_EXCHANGEOP_CAS(uint64_t, __int64, _InterlockedCompareExchange64) +# endif + +# undef MSC_EXCHANGEOP +# undef MSC_EXCHANGEOP_CAS + +# define MSC_CAS(T, U, cmpxchg) \ + template<> inline T \ + js::jit::AtomicOperations::compareExchangeSeqCst(T* addr, T oldval, T newval) { \ + MOZ_ASSERT(sizeof(T) < 8 || isLockfree8()); \ + return (T)cmpxchg((U volatile*)addr, (U)newval, (U)oldval); \ + } + +MSC_CAS(int8_t, char, _InterlockedCompareExchange8) +MSC_CAS(uint8_t, char, _InterlockedCompareExchange8) +MSC_CAS(int16_t, short, _InterlockedCompareExchange16) +MSC_CAS(uint16_t, short, _InterlockedCompareExchange16) +MSC_CAS(int32_t, long, _InterlockedCompareExchange) +MSC_CAS(uint32_t, long, _InterlockedCompareExchange) +MSC_CAS(int64_t, __int64, _InterlockedCompareExchange64) +MSC_CAS(uint64_t, __int64, _InterlockedCompareExchange64) + +# undef MSC_CAS + +# define MSC_FETCHADDOP(T, U, xadd) \ + template<> inline T \ + js::jit::AtomicOperations::fetchAddSeqCst(T* addr, T val) { \ + static_assert(sizeof(T) <= 4, "not available for 8-byte values yet"); \ + return (T)xadd((U volatile*)addr, (U)val); \ + } \ + template<> inline T \ + js::jit::AtomicOperations::fetchSubSeqCst(T* addr, T val) { \ + static_assert(sizeof(T) <= 4, "not available for 8-byte values yet"); \ + return (T)xadd((U volatile*)addr, -(U)val); \ + } + +MSC_FETCHADDOP(int8_t, char, _InterlockedExchangeAdd8) +MSC_FETCHADDOP(uint8_t, char, _InterlockedExchangeAdd8) +MSC_FETCHADDOP(int16_t, short, _InterlockedExchangeAdd16) +MSC_FETCHADDOP(uint16_t, short, _InterlockedExchangeAdd16) +MSC_FETCHADDOP(int32_t, long, _InterlockedExchangeAdd) +MSC_FETCHADDOP(uint32_t, long, _InterlockedExchangeAdd) + +# undef MSC_FETCHADDOP + +# define MSC_FETCHBITOP(T, U, andop, orop, xorop) \ + template<> inline T \ + js::jit::AtomicOperations::fetchAndSeqCst(T* addr, T val) { \ + static_assert(sizeof(T) <= 4, "not available for 8-byte values yet"); \ + return (T)andop((U volatile*)addr, (U)val); \ + } \ + template<> inline T \ + js::jit::AtomicOperations::fetchOrSeqCst(T* addr, T val) { \ + static_assert(sizeof(T) <= 4, "not available for 8-byte values yet"); \ + return (T)orop((U volatile*)addr, (U)val); \ + } \ + template<> inline T \ + js::jit::AtomicOperations::fetchXorSeqCst(T* addr, T val) { \ + static_assert(sizeof(T) <= 4, "not available for 8-byte values yet"); \ + return (T)xorop((U volatile*)addr, (U)val); \ + } + +MSC_FETCHBITOP(int8_t, char, _InterlockedAnd8, _InterlockedOr8, _InterlockedXor8) +MSC_FETCHBITOP(uint8_t, char, _InterlockedAnd8, _InterlockedOr8, _InterlockedXor8) +MSC_FETCHBITOP(int16_t, short, _InterlockedAnd16, _InterlockedOr16, _InterlockedXor16) +MSC_FETCHBITOP(uint16_t, short, _InterlockedAnd16, _InterlockedOr16, _InterlockedXor16) +MSC_FETCHBITOP(int32_t, long, _InterlockedAnd, _InterlockedOr, _InterlockedXor) +MSC_FETCHBITOP(uint32_t, long, _InterlockedAnd, _InterlockedOr, _InterlockedXor) + +# undef MSC_FETCHBITOP + +template<typename T> +inline T +js::jit::AtomicOperations::loadSafeWhenRacy(T* addr) +{ + return *addr; // FIXME (1208663): not yet safe +} + +template<typename T> +inline void +js::jit::AtomicOperations::storeSafeWhenRacy(T* addr, T val) +{ + *addr = val; // FIXME (1208663): not yet safe +} + +inline void +js::jit::AtomicOperations::memcpySafeWhenRacy(void* dest, const void* src, size_t nbytes) +{ + ::memcpy(dest, src, nbytes); // FIXME (1208663): not yet safe +} + +inline void +js::jit::AtomicOperations::memmoveSafeWhenRacy(void* dest, const void* src, size_t nbytes) +{ + ::memmove(dest, src, nbytes); // FIXME (1208663): not yet safe +} + +template<size_t nbytes> +inline void +js::jit::RegionLock::acquire(void* addr) +{ + while (_InterlockedCompareExchange((long*)&spinlock, /*newval=*/1, /*oldval=*/0) == 1) + continue; +} + +template<size_t nbytes> +inline void +js::jit::RegionLock::release(void* addr) +{ + MOZ_ASSERT(AtomicOperations::loadSeqCst(&spinlock) == 1, "releasing unlocked region lock"); + _InterlockedExchange((long*)&spinlock, 0); +} + +# undef HAVE_EXCHANGE64 + +#elif defined(ENABLE_SHARED_ARRAY_BUFFER) + +# error "Either disable JS shared memory at compile time, use GCC, Clang, or MSVC, or add code here" + +#endif // platform + +#endif // jit_shared_AtomicOperations_x86_shared_h |