/* vim: set shiftwidth=4 tabstop=8 autoindent cindent expandtab: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ /* compile-time and runtime tests for whether to use SSE instructions */ #include "SSE.h" #ifdef HAVE_CPUID_H // cpuid.h is available on gcc 4.3 and higher on i386 and x86_64 #include <cpuid.h> #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64)) // MSVC 2005 or newer on x86-32 or x86-64 #include <intrin.h> #endif namespace { // SSE.h has parallel #ifs which declare MOZILLA_SSE_HAVE_CPUID_DETECTION. // We can't declare these functions in the header file, however, because // <intrin.h> conflicts with <windows.h> on MSVC 2005, and some files want to // include both SSE.h and <windows.h>. #ifdef HAVE_CPUID_H enum CPUIDRegister { eax = 0, ebx = 1, ecx = 2, edx = 3 }; static bool has_cpuid_bits(unsigned int level, CPUIDRegister reg, unsigned int bits) { unsigned int regs[4]; unsigned int eax, ebx, ecx, edx; unsigned max = __get_cpuid_max(0, NULL); if (level > max) return false; __cpuid_count(level, 0, eax, ebx, ecx, edx); regs[0] = eax; regs[1] = ebx; regs[2] = ecx; regs[3] = edx; return (regs[reg] & bits) == bits; } #if !defined(MOZILLA_PRESUME_AVX) static uint64_t xgetbv(uint32_t xcr) { uint32_t eax, edx; __asm__ ( ".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(xcr)); return (uint64_t)(edx) << 32 | eax; } #endif #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64)) enum CPUIDRegister { eax = 0, ebx = 1, ecx = 2, edx = 3 }; static bool has_cpuid_bits(unsigned int level, CPUIDRegister reg, unsigned int bits) { // Check that the level in question is supported. int regs[4]; __cpuid(regs, level & 0x80000000u); if (unsigned(regs[0]) < level) return false; // "The __cpuid intrinsic clears the ECX register before calling the cpuid instruction." __cpuid(regs, level); return (unsigned(regs[reg]) & bits) == bits; } #if !defined(MOZILLA_PRESUME_AVX) static uint64_t xgetbv(uint32_t xcr) { return _xgetbv(xcr); } #endif #elif (defined(__GNUC__) || defined(__SUNPRO_CC)) && (defined(__i386) || defined(__x86_64__)) enum CPUIDRegister { eax = 0, ebx = 1, ecx = 2, edx = 3 }; #ifdef __i386 static void moz_cpuid(int CPUInfo[4], int InfoType) { asm ( "xchg %esi, %ebx\n" "xor %ecx, %ecx\n" // ecx is the sub-leaf (we only ever need 0) "cpuid\n" "movl %eax, (%edi)\n" "movl %ebx, 4(%edi)\n" "movl %ecx, 8(%edi)\n" "movl %edx, 12(%edi)\n" "xchg %esi, %ebx\n" : : "a"(InfoType), // %eax "D"(CPUInfo) // %edi : "%ecx", "%edx", "%esi" ); } #else static void moz_cpuid(int CPUInfo[4], int InfoType) { asm ( "xchg %rsi, %rbx\n" "xor %ecx, %ecx\n" // ecx is the sub-leaf (we only ever need 0) "cpuid\n" "movl %eax, (%rdi)\n" "movl %ebx, 4(%rdi)\n" "movl %ecx, 8(%rdi)\n" "movl %edx, 12(%rdi)\n" "xchg %rsi, %rbx\n" : : "a"(InfoType), // %eax "D"(CPUInfo) // %rdi : "%ecx", "%edx", "%rsi" ); } #endif static bool has_cpuid_bits(unsigned int level, CPUIDRegister reg, unsigned int bits) { // Check that the level in question is supported. volatile int regs[4]; moz_cpuid((int *)regs, level & 0x80000000u); if (unsigned(regs[0]) < level) return false; moz_cpuid((int *)regs, level); return (unsigned(regs[reg]) & bits) == bits; } #endif // end CPUID declarations } // namespace namespace mozilla { namespace sse_private { #if defined(MOZILLA_SSE_HAVE_CPUID_DETECTION) #if !defined(MOZILLA_PRESUME_MMX) bool mmx_enabled = has_cpuid_bits(1u, edx, (1u<<23)); #endif #if !defined(MOZILLA_PRESUME_SSE) bool sse_enabled = has_cpuid_bits(1u, edx, (1u<<25)); #endif #if !defined(MOZILLA_PRESUME_SSE2) bool sse2_enabled = has_cpuid_bits(1u, edx, (1u<<26)); #endif #if !defined(MOZILLA_PRESUME_SSE3) bool sse3_enabled = has_cpuid_bits(1u, ecx, (1u<<0)); #endif #if !defined(MOZILLA_PRESUME_SSSE3) bool ssse3_enabled = has_cpuid_bits(1u, ecx, (1u<<9)); #endif #if !defined(MOZILLA_PRESUME_SSE4A) bool sse4a_enabled = has_cpuid_bits(0x80000001u, ecx, (1u<<6)); #endif #if !defined(MOZILLA_PRESUME_SSE4_1) bool sse4_1_enabled = has_cpuid_bits(1u, ecx, (1u<<19)); #endif #if !defined(MOZILLA_PRESUME_SSE4_2) bool sse4_2_enabled = has_cpuid_bits(1u, ecx, (1u<<20)); #endif #if !defined(MOZILLA_PRESUME_AVX) || !defined(MOZILLA_PRESUME_AVX2) static bool has_avx() { #if defined(MOZILLA_PRESUME_AVX) return true; #else const unsigned AVX = 1u << 28; const unsigned OSXSAVE = 1u << 27; const unsigned XSAVE = 1u << 26; const unsigned XMM_STATE = 1u << 1; const unsigned YMM_STATE = 1u << 2; const unsigned AVX_STATE = XMM_STATE | YMM_STATE; return has_cpuid_bits(1u, ecx, AVX | OSXSAVE | XSAVE) && // ensure the OS supports XSAVE of YMM registers (xgetbv(0) & AVX_STATE) == AVX_STATE; #endif // MOZILLA_PRESUME_AVX } #endif // !MOZILLA_PRESUME_AVX || !MOZILLA_PRESUME_AVX2 #if !defined(MOZILLA_PRESUME_AVX) bool avx_enabled = has_avx(); #endif #if !defined(MOZILLA_PRESUME_AVX2) bool avx2_enabled = has_avx() && has_cpuid_bits(7u, ebx, (1u<<5)); #endif #endif } // namespace sse_private } // namespace mozilla