util: try much harder to set DAZ flag

While so far this only causes some harmless test failures, there's lots more cpus with DAZ. All 64bit capable ones can do it (particularly relevant for AMD cpus as they supported sse3 very very late) but if really necessary we can check support for that for real with some more magic. (In fact just about ANY cpu with sse2 can support DAZ, I believe the only exception are first gen P4 (Willamette) and from those only early steppings which can't do it it's almost like intel forgot to add it... - a real pity though docs say you can't just try to set it as they will throw a GPF.) While this was meant to address https://bugs.freedesktop.org/show_bug.cgi?id=67672 it does not fix it. Most likely the tests need fixing as I don't think there's any guarantee about denorm handling in the reference math library functions if the flags aren't set to standard values. Nevertheless enabling DAZ on all cpus which can do it should be the right thing to do. Reviewed-by: Jose Fonseca <[email protected]>
author: Roland Scheidegger <[email protected]> 2013-08-06 03:30:37 +0200
committer: Roland Scheidegger <[email protected]> 2013-08-08 18:55:57 +0200
commit: 883987503fc79691398eb024f37480ff083805a3 (patch)
tree: 5b806f083a13532fe26c9fa30fb390917a271be8
parent: e3b5e2db1b189092522c43ba789c4f244ddb7c60 (diff)
3 files changed, 31 insertions, 1 deletions
diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c
index 588fc7c7292..c58a3dd07fc 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.c
+++ b/src/gallium/auxiliary/util/u_cpu_detect.c
@@ -230,8 +230,28 @@ static INLINE uint64_t xgetbv(void)
 #else
    return 0;
 #endif
+}
+
 
+#if defined(PIPE_ARCH_X86)
+static INLINE boolean sse2_has_daz(void)
+{
+   struct {
+      uint32_t pad1[7];
+      uint32_t mxcsr_mask;
+      uint32_t pad2[128-8];
+   } PIPE_ALIGN_VAR(16) fxarea;
+
+   fxarea.mxcsr_mask = 0;
+#if (defined(PIPE_CC_GCC) || defined(PIPE_CC_SUNPRO))
+   asm volatile ("fxsave %0" :: "m" (fxarea));
+#elif (defined(PIPE_CC_MSVC) || defined(PIPE_CC_ICL))
+   _fxsave(fxarea);
+#endif
+   return !!(fxarea.mxcsr_mask & (1 << 6));
 }
+#endif
+
 #endif /* X86 or X86_64 */
 
 void
@@ -310,6 +330,12 @@ util_cpu_detect(void)
                                     ((xgetbv() & 6) == 6);    // XMM & YMM
          util_cpu_caps.has_f16c   = (regs2[2] >> 29) & 1;
          util_cpu_caps.has_mmx2   = util_cpu_caps.has_sse; /* SSE cpus supports mmxext too */
+#if defined(PIPE_ARCH_X86_64)
+         util_cpu_caps.has_daz = 1;
+#else
+         util_cpu_caps.has_daz = util_cpu_caps.has_sse3 ||
+            (util_cpu_caps.has_sse2 && sse2_has_daz());
+#endif
 
          cacheline = ((regs2[1] >> 8) & 0xFF) * 8;
          if (cacheline > 0)
@@ -368,9 +394,12 @@ util_cpu_detect(void)
       debug_printf("util_cpu_caps.has_sse4_1 = %u\n", util_cpu_caps.has_sse4_1);
       debug_printf("util_cpu_caps.has_sse4_2 = %u\n", util_cpu_caps.has_sse4_2);
       debug_printf("util_cpu_caps.has_avx = %u\n", util_cpu_caps.has_avx);
+      debug_printf("util_cpu_caps.has_f16c = %u\n", util_cpu_caps.has_f16c);
+      debug_printf("util_cpu_caps.has_popcnt = %u\n", util_cpu_caps.has_popcnt);
       debug_printf("util_cpu_caps.has_3dnow = %u\n", util_cpu_caps.has_3dnow);
       debug_printf("util_cpu_caps.has_3dnow_ext = %u\n", util_cpu_caps.has_3dnow_ext);
       debug_printf("util_cpu_caps.has_altivec = %u\n", util_cpu_caps.has_altivec);
+      debug_printf("util_cpu_caps.has_daz = %u\n", util_cpu_caps.has_daz);
    }
 #endif
 
diff --git a/src/gallium/auxiliary/util/u_cpu_detect.h b/src/gallium/auxiliary/util/u_cpu_detect.h
index f9cd6475e45..cc3e0ce0344 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.h
+++ b/src/gallium/auxiliary/util/u_cpu_detect.h
@@ -68,6 +68,7 @@ struct util_cpu_caps {
    unsigned has_3dnow:1;
    unsigned has_3dnow_ext:1;
    unsigned has_altivec:1;
+   unsigned has_daz:1;
 };
 
 extern struct util_cpu_caps
diff --git a/src/gallium/auxiliary/util/u_math.c b/src/gallium/auxiliary/util/u_math.c
index f3fe392babe..6981ee93912 100644
--- a/src/gallium/auxiliary/util/u_math.c
+++ b/src/gallium/auxiliary/util/u_math.c
@@ -111,7 +111,7 @@ util_fpstate_set_denorms_to_zero(unsigned current_mxcsr)
    if (util_cpu_caps.has_sse) {
       /* Enable flush to zero mode */
       current_mxcsr |= _MM_FLUSH_ZERO_MASK;
-      if (util_cpu_caps.has_sse3) {
+      if (util_cpu_caps.has_daz) {
          /* Enable denormals are zero mode */
          current_mxcsr |= _MM_DENORMALS_ZERO_MASK;
       }
author	Roland Scheidegger <[email protected]>	2013-08-06 03:30:37 +0200
committer	Roland Scheidegger <[email protected]>	2013-08-08 18:55:57 +0200
commit	883987503fc79691398eb024f37480ff083805a3 (patch)
tree	5b806f083a13532fe26c9fa30fb390917a271be8
parent	e3b5e2db1b189092522c43ba789c4f244ddb7c60 (diff)