summaryrefslogtreecommitdiffstats
path: root/src/gallium/auxiliary
diff options
context:
space:
mode:
authorJose Fonseca <[email protected]>2016-04-01 11:06:30 +0100
committerJose Fonseca <[email protected]>2016-04-03 22:08:57 +0100
commit324451e73fae17c2844f24c7e02000bddc260e78 (patch)
tree1d0b6cea499cf222f3fe20d50eab2be41e0e71aa /src/gallium/auxiliary
parent5fa31a4aba5bce5b42ba4188119eafa0f6c3a7f1 (diff)
gallivm: Add debug option to force SSE2.
For simulating less capable machines. Reviewed-by: Roland Scheidegger <[email protected]>
Diffstat (limited to 'src/gallium/auxiliary')
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_init.c25
1 files changed, 14 insertions, 11 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c
index ab55be4c439..6e08ac48d72 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -399,6 +399,20 @@ lp_build_init(void)
util_cpu_detect();
+ /* For simulating less capable machines */
+#ifdef DEBUG
+ if (debug_get_bool_option("LP_FORCE_SSE2", FALSE)) {
+ assert(util_cpu_caps.has_sse2);
+ util_cpu_caps.has_sse3 = 0;
+ util_cpu_caps.has_ssse3 = 0;
+ util_cpu_caps.has_sse4_1 = 0;
+ util_cpu_caps.has_sse4_2 = 0;
+ util_cpu_caps.has_avx = 0;
+ util_cpu_caps.has_avx2 = 0;
+ util_cpu_caps.has_f16c = 0;
+ }
+#endif
+
/* AMD Bulldozer AVX's throughput is the same as SSE2; and because using
* 8-wide vector needs more floating ops than 4-wide (due to padding), it is
* actually more efficient to use 4-wide vectors on this processor.
@@ -456,17 +470,6 @@ lp_build_init(void)
gallivm_initialized = TRUE;
-#if 0
- /* For simulating less capable machines */
- util_cpu_caps.has_sse3 = 0;
- util_cpu_caps.has_ssse3 = 0;
- util_cpu_caps.has_sse4_1 = 0;
- util_cpu_caps.has_sse4_2 = 0;
- util_cpu_caps.has_avx = 0;
- util_cpu_caps.has_avx2 = 0;
- util_cpu_caps.has_f16c = 0;
-#endif
-
return TRUE;
}