diff options
author | José Fonseca <[email protected]> | 2013-07-10 07:56:17 +0100 |
---|---|---|
committer | José Fonseca <[email protected]> | 2013-07-10 07:56:17 +0100 |
commit | b042aae70d4c1790a42033d23816bc092347d846 (patch) | |
tree | baf16f9502727f3aba240e1b89cea8345871ea5f | |
parent | 045bf0db5215789bdaa9043c9a3075d3c3a71d64 (diff) |
util/u_math: Use xmmintrin.h whenever possible.
It seems __builtin_ia32_ldmxcsr is only available on gcc and only when
-msse is used. xmmintrin.h/pmmintrin.h provide portable intrinsics, but
these too are only available with gcc when -msse/-msse3 are set.
scons build always sets -msse on x86 builds, but autotools doesn't seem
to.
We could try to get this working on gcc x86 without -msse by emitting
assembly, but I believe that in this day and age we really should be
building Mesa with -msse and -msse2.
-rw-r--r-- | src/gallium/auxiliary/util/u_math.c | 26 |
1 files changed, 17 insertions, 9 deletions
diff --git a/src/gallium/auxiliary/util/u_math.c b/src/gallium/auxiliary/util/u_math.c index 2487bc74b25..f3fe392babe 100644 --- a/src/gallium/auxiliary/util/u_math.c +++ b/src/gallium/auxiliary/util/u_math.c @@ -27,9 +27,17 @@ +#include "pipe/p_config.h" #include "util/u_math.h" #include "util/u_cpu_detect.h" +#if defined(PIPE_ARCH_SSE) +#include <xmmintrin.h> +/* This is defined in pmmintrin.h, but it can only be included when -msse3 is + * used, so just define it here to avoid further. */ +#define _MM_DENORMALS_ZERO_MASK 0x0040 +#endif + /** 2^x, for x in [-1.0, 1.0) */ float pow2_table[POW2_TABLE_SIZE]; @@ -81,9 +89,9 @@ util_fpstate_get(void) { unsigned mxcsr = 0; -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) +#if defined(PIPE_ARCH_SSE) if (util_cpu_caps.has_sse) { - mxcsr = __builtin_ia32_stmxcsr(); + mxcsr = _mm_getcsr(); } #endif @@ -99,13 +107,13 @@ util_fpstate_get(void) unsigned util_fpstate_set_denorms_to_zero(unsigned current_mxcsr) { -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) -#define MXCSR_DAZ (1 << 6) /* Enable denormals are zero mode */ -#define MXCSR_FTZ (1 << 15) /* Enable flush to zero mode */ +#if defined(PIPE_ARCH_SSE) if (util_cpu_caps.has_sse) { - current_mxcsr |= MXCSR_FTZ; + /* Enable flush to zero mode */ + current_mxcsr |= _MM_FLUSH_ZERO_MASK; if (util_cpu_caps.has_sse3) { - current_mxcsr |= MXCSR_DAZ; + /* Enable denormals are zero mode */ + current_mxcsr |= _MM_DENORMALS_ZERO_MASK; } util_fpstate_set(current_mxcsr); } @@ -121,9 +129,9 @@ util_fpstate_set_denorms_to_zero(unsigned current_mxcsr) void util_fpstate_set(unsigned mxcsr) { -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) +#if defined(PIPE_ARCH_SSE) if (util_cpu_caps.has_sse) { - __builtin_ia32_ldmxcsr(mxcsr); + _mm_setcsr(mxcsr); } #endif } |