summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJosé Fonseca <[email protected]>2013-07-10 07:56:17 +0100
committerJosé Fonseca <[email protected]>2013-07-10 07:56:17 +0100
commitb042aae70d4c1790a42033d23816bc092347d846 (patch)
treebaf16f9502727f3aba240e1b89cea8345871ea5f
parent045bf0db5215789bdaa9043c9a3075d3c3a71d64 (diff)
util/u_math: Use xmmintrin.h whenever possible.
It seems __builtin_ia32_ldmxcsr is only available on gcc and only when -msse is used. xmmintrin.h/pmmintrin.h provide portable intrinsics, but these too are only available with gcc when -msse/-msse3 are set. scons build always sets -msse on x86 builds, but autotools doesn't seem to. We could try to get this working on gcc x86 without -msse by emitting assembly, but I believe that in this day and age we really should be building Mesa with -msse and -msse2.
-rw-r--r--src/gallium/auxiliary/util/u_math.c26
1 files changed, 17 insertions, 9 deletions
diff --git a/src/gallium/auxiliary/util/u_math.c b/src/gallium/auxiliary/util/u_math.c
index 2487bc74b25..f3fe392babe 100644
--- a/src/gallium/auxiliary/util/u_math.c
+++ b/src/gallium/auxiliary/util/u_math.c
@@ -27,9 +27,17 @@
+#include "pipe/p_config.h"
#include "util/u_math.h"
#include "util/u_cpu_detect.h"
+#if defined(PIPE_ARCH_SSE)
+#include <xmmintrin.h>
+/* This is defined in pmmintrin.h, but it can only be included when -msse3 is
+ * used, so just define it here to avoid further. */
+#define _MM_DENORMALS_ZERO_MASK 0x0040
+#endif
+
/** 2^x, for x in [-1.0, 1.0) */
float pow2_table[POW2_TABLE_SIZE];
@@ -81,9 +89,9 @@ util_fpstate_get(void)
{
unsigned mxcsr = 0;
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+#if defined(PIPE_ARCH_SSE)
if (util_cpu_caps.has_sse) {
- mxcsr = __builtin_ia32_stmxcsr();
+ mxcsr = _mm_getcsr();
}
#endif
@@ -99,13 +107,13 @@ util_fpstate_get(void)
unsigned
util_fpstate_set_denorms_to_zero(unsigned current_mxcsr)
{
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
-#define MXCSR_DAZ (1 << 6) /* Enable denormals are zero mode */
-#define MXCSR_FTZ (1 << 15) /* Enable flush to zero mode */
+#if defined(PIPE_ARCH_SSE)
if (util_cpu_caps.has_sse) {
- current_mxcsr |= MXCSR_FTZ;
+ /* Enable flush to zero mode */
+ current_mxcsr |= _MM_FLUSH_ZERO_MASK;
if (util_cpu_caps.has_sse3) {
- current_mxcsr |= MXCSR_DAZ;
+ /* Enable denormals are zero mode */
+ current_mxcsr |= _MM_DENORMALS_ZERO_MASK;
}
util_fpstate_set(current_mxcsr);
}
@@ -121,9 +129,9 @@ util_fpstate_set_denorms_to_zero(unsigned current_mxcsr)
void
util_fpstate_set(unsigned mxcsr)
{
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+#if defined(PIPE_ARCH_SSE)
if (util_cpu_caps.has_sse) {
- __builtin_ia32_ldmxcsr(mxcsr);
+ _mm_setcsr(mxcsr);
}
#endif
}