summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/gallium/auxiliary/util/u_cpu_detect.c29
-rw-r--r--src/gallium/auxiliary/util/u_cpu_detect.h1
-rw-r--r--src/gallium/auxiliary/util/u_math.c2
3 files changed, 31 insertions, 1 deletions
diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c
index 588fc7c7292..c58a3dd07fc 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.c
+++ b/src/gallium/auxiliary/util/u_cpu_detect.c
@@ -230,8 +230,28 @@ static INLINE uint64_t xgetbv(void)
#else
return 0;
#endif
+}
+
+#if defined(PIPE_ARCH_X86)
+static INLINE boolean sse2_has_daz(void)
+{
+ struct {
+ uint32_t pad1[7];
+ uint32_t mxcsr_mask;
+ uint32_t pad2[128-8];
+ } PIPE_ALIGN_VAR(16) fxarea;
+
+ fxarea.mxcsr_mask = 0;
+#if (defined(PIPE_CC_GCC) || defined(PIPE_CC_SUNPRO))
+ asm volatile ("fxsave %0" :: "m" (fxarea));
+#elif (defined(PIPE_CC_MSVC) || defined(PIPE_CC_ICL))
+ _fxsave(fxarea);
+#endif
+ return !!(fxarea.mxcsr_mask & (1 << 6));
}
+#endif
+
#endif /* X86 or X86_64 */
void
@@ -310,6 +330,12 @@ util_cpu_detect(void)
((xgetbv() & 6) == 6); // XMM & YMM
util_cpu_caps.has_f16c = (regs2[2] >> 29) & 1;
util_cpu_caps.has_mmx2 = util_cpu_caps.has_sse; /* SSE cpus supports mmxext too */
+#if defined(PIPE_ARCH_X86_64)
+ util_cpu_caps.has_daz = 1;
+#else
+ util_cpu_caps.has_daz = util_cpu_caps.has_sse3 ||
+ (util_cpu_caps.has_sse2 && sse2_has_daz());
+#endif
cacheline = ((regs2[1] >> 8) & 0xFF) * 8;
if (cacheline > 0)
@@ -368,9 +394,12 @@ util_cpu_detect(void)
debug_printf("util_cpu_caps.has_sse4_1 = %u\n", util_cpu_caps.has_sse4_1);
debug_printf("util_cpu_caps.has_sse4_2 = %u\n", util_cpu_caps.has_sse4_2);
debug_printf("util_cpu_caps.has_avx = %u\n", util_cpu_caps.has_avx);
+ debug_printf("util_cpu_caps.has_f16c = %u\n", util_cpu_caps.has_f16c);
+ debug_printf("util_cpu_caps.has_popcnt = %u\n", util_cpu_caps.has_popcnt);
debug_printf("util_cpu_caps.has_3dnow = %u\n", util_cpu_caps.has_3dnow);
debug_printf("util_cpu_caps.has_3dnow_ext = %u\n", util_cpu_caps.has_3dnow_ext);
debug_printf("util_cpu_caps.has_altivec = %u\n", util_cpu_caps.has_altivec);
+ debug_printf("util_cpu_caps.has_daz = %u\n", util_cpu_caps.has_daz);
}
#endif
diff --git a/src/gallium/auxiliary/util/u_cpu_detect.h b/src/gallium/auxiliary/util/u_cpu_detect.h
index f9cd6475e45..cc3e0ce0344 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.h
+++ b/src/gallium/auxiliary/util/u_cpu_detect.h
@@ -68,6 +68,7 @@ struct util_cpu_caps {
unsigned has_3dnow:1;
unsigned has_3dnow_ext:1;
unsigned has_altivec:1;
+ unsigned has_daz:1;
};
extern struct util_cpu_caps
diff --git a/src/gallium/auxiliary/util/u_math.c b/src/gallium/auxiliary/util/u_math.c
index f3fe392babe..6981ee93912 100644
--- a/src/gallium/auxiliary/util/u_math.c
+++ b/src/gallium/auxiliary/util/u_math.c
@@ -111,7 +111,7 @@ util_fpstate_set_denorms_to_zero(unsigned current_mxcsr)
if (util_cpu_caps.has_sse) {
/* Enable flush to zero mode */
current_mxcsr |= _MM_FLUSH_ZERO_MASK;
- if (util_cpu_caps.has_sse3) {
+ if (util_cpu_caps.has_daz) {
/* Enable denormals are zero mode */
current_mxcsr |= _MM_DENORMALS_ZERO_MASK;
}