summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndre Heider <[email protected]>2013-07-23 19:08:45 +0200
committerJosé Fonseca <[email protected]>2013-07-23 23:12:58 +0100
commit0acf3a8407fbeaa9d61731ce83fbe6de529e303c (patch)
tree6b52f6855eeead08bdc7e7449d19e675b27dda59
parent5a7bdd4b4173958c53109517b7c95f1039623e7e (diff)
gallium/util: Fix detection of AVX cpu caps
For AVX it's not sufficient to only rely on the cpuid flags. If the CPU supports these extensions, but the OS doesn't, issuing these insns will trigger an undefined opcode exception. In addition to the AVX cpuid bit we also need to: * test cpuid for OSXSAVE support * XGETBV to check if the OS saves/restores AVX regs on context switches See "Detecting Availability and Support" at http://software.intel.com/en-us/articles/introduction-to-intel-advanced-vector-extensions Signed-off-by: Andre Heider <[email protected]> Reviewed-by: Roland Scheidegger <[email protected]> Reviewed-by: José Fonseca <[email protected]>
-rw-r--r--src/gallium/auxiliary/util/u_cpu_detect.c27
1 files changed, 25 insertions, 2 deletions
diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c
index b118fc8ae36..588fc7c7292 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.c
+++ b/src/gallium/auxiliary/util/u_cpu_detect.c
@@ -67,7 +67,7 @@
#if defined(PIPE_OS_WINDOWS)
#include <windows.h>
-#if defined(MSVC)
+#if defined(PIPE_CC_MSVC)
#include <intrin.h>
#endif
#endif
@@ -211,6 +211,27 @@ cpuid(uint32_t ax, uint32_t *p)
p[3] = 0;
#endif
}
+
+static INLINE uint64_t xgetbv(void)
+{
+#if defined(PIPE_CC_GCC)
+ uint32_t eax, edx;
+
+ __asm __volatile (
+ ".byte 0x0f, 0x01, 0xd0" // xgetbv isn't supported on gcc < 4.4
+ : "=a"(eax),
+ "=d"(edx)
+ : "c"(0)
+ );
+
+ return ((uint64_t)edx << 32) | eax;
+#elif defined(PIPE_CC_MSVC) && defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
+ return _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
+#else
+ return 0;
+#endif
+
+}
#endif /* X86 or X86_64 */
void
@@ -284,7 +305,9 @@ util_cpu_detect(void)
util_cpu_caps.has_sse4_1 = (regs2[2] >> 19) & 1;
util_cpu_caps.has_sse4_2 = (regs2[2] >> 20) & 1;
util_cpu_caps.has_popcnt = (regs2[2] >> 23) & 1;
- util_cpu_caps.has_avx = (regs2[2] >> 28) & 1;
+ util_cpu_caps.has_avx = ((regs2[2] >> 28) & 1) && // AVX
+ ((regs2[2] >> 27) & 1) && // OSXSAVE
+ ((xgetbv() & 6) == 6); // XMM & YMM
util_cpu_caps.has_f16c = (regs2[2] >> 29) & 1;
util_cpu_caps.has_mmx2 = util_cpu_caps.has_sse; /* SSE cpus supports mmxext too */