diff options
Diffstat (limited to 'mozglue/build/SSE.cpp')
-rw-r--r-- | mozglue/build/SSE.cpp | 206 |
1 files changed, 206 insertions, 0 deletions
diff --git a/mozglue/build/SSE.cpp b/mozglue/build/SSE.cpp new file mode 100644 index 000000000..8ad228684 --- /dev/null +++ b/mozglue/build/SSE.cpp @@ -0,0 +1,206 @@ +/* vim: set shiftwidth=4 tabstop=8 autoindent cindent expandtab: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* compile-time and runtime tests for whether to use SSE instructions */ + +#include "SSE.h" + +#ifdef HAVE_CPUID_H +// cpuid.h is available on gcc 4.3 and higher on i386 and x86_64 +#include <cpuid.h> +#elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64)) +// MSVC 2005 or newer on x86-32 or x86-64 +#include <intrin.h> +#endif + +namespace { + +// SSE.h has parallel #ifs which declare MOZILLA_SSE_HAVE_CPUID_DETECTION. +// We can't declare these functions in the header file, however, because +// <intrin.h> conflicts with <windows.h> on MSVC 2005, and some files want to +// include both SSE.h and <windows.h>. + +#ifdef HAVE_CPUID_H + +enum CPUIDRegister { eax = 0, ebx = 1, ecx = 2, edx = 3 }; + +static bool +has_cpuid_bits(unsigned int level, CPUIDRegister reg, unsigned int bits) +{ + unsigned int regs[4]; + unsigned int eax, ebx, ecx, edx; + unsigned max = __get_cpuid_max(0, NULL); + if (level > max) + return false; + __cpuid_count(level, 0, eax, ebx, ecx, edx); + regs[0] = eax; + regs[1] = ebx; + regs[2] = ecx; + regs[3] = edx; + return (regs[reg] & bits) == bits; +} + +#if !defined(MOZILLA_PRESUME_AVX) +static uint64_t xgetbv(uint32_t xcr) { + uint32_t eax, edx; + __asm__ ( ".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(xcr)); + return (uint64_t)(edx) << 32 | eax; +} +#endif + +#elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64)) + +enum CPUIDRegister { eax = 0, ebx = 1, ecx = 2, edx = 3 }; + +static bool +has_cpuid_bits(unsigned int level, CPUIDRegister reg, unsigned int bits) +{ + // Check that the level in question is supported. + int regs[4]; + __cpuid(regs, level & 0x80000000u); + if (unsigned(regs[0]) < level) + return false; + + // "The __cpuid intrinsic clears the ECX register before calling the cpuid instruction." + __cpuid(regs, level); + return (unsigned(regs[reg]) & bits) == bits; +} + +#if !defined(MOZILLA_PRESUME_AVX) +static uint64_t xgetbv(uint32_t xcr) { return _xgetbv(xcr); } +#endif + +#elif (defined(__GNUC__) || defined(__SUNPRO_CC)) && (defined(__i386) || defined(__x86_64__)) + +enum CPUIDRegister { eax = 0, ebx = 1, ecx = 2, edx = 3 }; + +#ifdef __i386 +static void +moz_cpuid(int CPUInfo[4], int InfoType) +{ + asm ( + "xchg %esi, %ebx\n" + "xor %ecx, %ecx\n" // ecx is the sub-leaf (we only ever need 0) + "cpuid\n" + "movl %eax, (%edi)\n" + "movl %ebx, 4(%edi)\n" + "movl %ecx, 8(%edi)\n" + "movl %edx, 12(%edi)\n" + "xchg %esi, %ebx\n" + : + : "a"(InfoType), // %eax + "D"(CPUInfo) // %edi + : "%ecx", "%edx", "%esi" + ); +} +#else +static void +moz_cpuid(int CPUInfo[4], int InfoType) +{ + asm ( + "xchg %rsi, %rbx\n" + "xor %ecx, %ecx\n" // ecx is the sub-leaf (we only ever need 0) + "cpuid\n" + "movl %eax, (%rdi)\n" + "movl %ebx, 4(%rdi)\n" + "movl %ecx, 8(%rdi)\n" + "movl %edx, 12(%rdi)\n" + "xchg %rsi, %rbx\n" + : + : "a"(InfoType), // %eax + "D"(CPUInfo) // %rdi + : "%ecx", "%edx", "%rsi" + ); +} +#endif + +static bool +has_cpuid_bits(unsigned int level, CPUIDRegister reg, unsigned int bits) +{ + // Check that the level in question is supported. + volatile int regs[4]; + moz_cpuid((int *)regs, level & 0x80000000u); + if (unsigned(regs[0]) < level) + return false; + + moz_cpuid((int *)regs, level); + return (unsigned(regs[reg]) & bits) == bits; +} + +#endif // end CPUID declarations + +} // namespace + +namespace mozilla { + +namespace sse_private { + +#if defined(MOZILLA_SSE_HAVE_CPUID_DETECTION) + +#if !defined(MOZILLA_PRESUME_MMX) + bool mmx_enabled = has_cpuid_bits(1u, edx, (1u<<23)); +#endif + +#if !defined(MOZILLA_PRESUME_SSE) + bool sse_enabled = has_cpuid_bits(1u, edx, (1u<<25)); +#endif + +#if !defined(MOZILLA_PRESUME_SSE2) + bool sse2_enabled = has_cpuid_bits(1u, edx, (1u<<26)); +#endif + +#if !defined(MOZILLA_PRESUME_SSE3) + bool sse3_enabled = has_cpuid_bits(1u, ecx, (1u<<0)); +#endif + +#if !defined(MOZILLA_PRESUME_SSSE3) + bool ssse3_enabled = has_cpuid_bits(1u, ecx, (1u<<9)); +#endif + +#if !defined(MOZILLA_PRESUME_SSE4A) + bool sse4a_enabled = has_cpuid_bits(0x80000001u, ecx, (1u<<6)); +#endif + +#if !defined(MOZILLA_PRESUME_SSE4_1) + bool sse4_1_enabled = has_cpuid_bits(1u, ecx, (1u<<19)); +#endif + +#if !defined(MOZILLA_PRESUME_SSE4_2) + bool sse4_2_enabled = has_cpuid_bits(1u, ecx, (1u<<20)); +#endif + +#if !defined(MOZILLA_PRESUME_AVX) || !defined(MOZILLA_PRESUME_AVX2) + static bool has_avx() + { +#if defined(MOZILLA_PRESUME_AVX) + return true; +#else + const unsigned AVX = 1u << 28; + const unsigned OSXSAVE = 1u << 27; + const unsigned XSAVE = 1u << 26; + + const unsigned XMM_STATE = 1u << 1; + const unsigned YMM_STATE = 1u << 2; + const unsigned AVX_STATE = XMM_STATE | YMM_STATE; + + return has_cpuid_bits(1u, ecx, AVX | OSXSAVE | XSAVE) && + // ensure the OS supports XSAVE of YMM registers + (xgetbv(0) & AVX_STATE) == AVX_STATE; +#endif // MOZILLA_PRESUME_AVX + } +#endif // !MOZILLA_PRESUME_AVX || !MOZILLA_PRESUME_AVX2 + +#if !defined(MOZILLA_PRESUME_AVX) + bool avx_enabled = has_avx(); +#endif + +#if !defined(MOZILLA_PRESUME_AVX2) + bool avx2_enabled = has_avx() && has_cpuid_bits(7u, ebx, (1u<<5)); +#endif + +#endif + +} // namespace sse_private +} // namespace mozilla |