diff options
author | Dylan Baker <[email protected]> | 2018-09-07 13:09:23 -0700 |
---|---|---|
committer | Dylan Baker <[email protected]> | 2018-09-20 05:52:23 -0700 |
commit | 0abce6d7700ee42eb00c787732ec1fdefe250d03 (patch) | |
tree | 382bf7596a0d44e7df8010d1beb370ae1b5d1cbf | |
parent | b8b3517a49555b5127776272848d8689327db960 (diff) |
utils/u_math: break dependency on gallium/utils
Currently u_math needs gallium utils for cpu detection. Most of what
u_math uses out of u_cpu_detection is duplicated in src/mesa/x86
(surprise!), so I've just reworked it as much as possible to use the
x86/common_x86_features.h macros instead of the gallium ones. The mesa
implementation is a header only approach, with no external dependencies.
There is one small function that was copied over, as promoting
u_cpu_detection is itself a fairly hefty undertaking, as it depends on
u_debug, and this fixes the bug for now.
bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107870
Tested-by: Vinson Lee <[email protected]>
-rw-r--r-- | src/util/u_math.c | 43 |
1 files changed, 38 insertions, 5 deletions
diff --git a/src/util/u_math.c b/src/util/u_math.c index c58af911be7..bf0c398eeec 100644 --- a/src/util/u_math.c +++ b/src/util/u_math.c @@ -29,7 +29,7 @@ #include "pipe/p_config.h" #include "util/u_math.h" -#include "util/u_cpu_detect.h" +#include "x86/common_x86_features.h" #if defined(PIPE_ARCH_SSE) #include <xmmintrin.h> @@ -90,7 +90,7 @@ util_fpstate_get(void) unsigned mxcsr = 0; #if defined(PIPE_ARCH_SSE) - if (util_cpu_caps.has_sse) { + if (cpu_has_xmm) { mxcsr = _mm_getcsr(); } #endif @@ -98,6 +98,31 @@ util_fpstate_get(void) return mxcsr; } +/* TODO: this was copied from u_cpu_detection. It's another case of duplication + * between gallium and core mesa, and it would be nice to get rid of that + * duplication as well. + */ +#if defined(PIPE_ARCH_X86) +PIPE_ALIGN_STACK static inline bool sse2_has_daz(void) +{ + struct { + uint32_t pad1[7]; + uint32_t mxcsr_mask; + uint32_t pad2[128-8]; + } PIPE_ALIGN_VAR(16) fxarea; + + fxarea.mxcsr_mask = 0; +#if defined(PIPE_CC_GCC) + __asm __volatile ("fxsave %0" : "+m" (fxarea)); +#elif defined(PIPE_CC_MSVC) || defined(PIPE_CC_ICL) + _fxsave(&fxarea); +#else + fxarea.mxcsr_mask = 0; +#endif + return !!(fxarea.mxcsr_mask & (1 << 6)); +} +#endif + /** * Make sure that the fp treats the denormalized floating * point numbers as zero. @@ -108,13 +133,21 @@ unsigned util_fpstate_set_denorms_to_zero(unsigned current_mxcsr) { #if defined(PIPE_ARCH_SSE) - if (util_cpu_caps.has_sse) { + if (cpu_has_xmm) { /* Enable flush to zero mode */ current_mxcsr |= _MM_FLUSH_ZERO_MASK; - if (util_cpu_caps.has_daz) { + /* x86_64 cpus always have daz, as do cpus with sse3 in fact, there's + * basically only a handful of very early pentium 4's that have sse2 but + * not daz. + */ +# if !defined(PIPE_ARCH_x86_64) && !defined(PIPE_ARCH_SSSE3) + if (sse2_has_daz()) { +# endif /* Enable denormals are zero mode */ current_mxcsr |= _MM_DENORMALS_ZERO_MASK; +# if !defined(PIPE_ARCH_x86_64) && !defined(PIPE_ARCH_SSSE3) } +#endif util_fpstate_set(current_mxcsr); } #endif @@ -130,7 +163,7 @@ void util_fpstate_set(unsigned mxcsr) { #if defined(PIPE_ARCH_SSE) - if (util_cpu_caps.has_sse) { + if (cpu_has_xmm) { _mm_setcsr(mxcsr); } #endif |