gallium: implement recip sqrt() with C code for now.

Some conformance lighting tests fail with the SSE rsqrt instruction.
author: Brian Paul <[email protected]> 2008-04-18 11:15:18 -0600
committer: Brian Paul <[email protected]> 2008-04-18 11:46:00 -0600
commit: 19218e2195f3dffc9403f16a742ba8c63edbf8b4 (patch)
tree: 161803e974ee2b7147646575dd6fc578fa439c3b
parent: 26c27f6636069ca849a740c3969c577d841484e2 (diff)
1 files changed, 30 insertions, 0 deletions
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
index 0a3a7559ca4..6f785be3f56 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
@@ -36,6 +36,8 @@
 
 #if defined(__i386__) || defined(__386__)
 
+#define HIGH_PRECISION 1  /* for 1/sqrt() */
+
 #define DUMP_SSE  0
 
 #if DUMP_SSE
@@ -1137,16 +1139,44 @@ emit_rcp (
       make_xmm( xmm_src ) );
 }
 
+#if HIGH_PRECISION
+static void XSTDCALL
+rsqrt4f(
+   float *store )
+{
+#ifdef WIN32
+   store[0] = 1.0F / (float) sqrt( (double) store[0] );
+   store[1] = 1.0F / (float) sqrt( (double) store[1] );
+   store[2] = 1.0F / (float) sqrt( (double) store[2] );
+   store[3] = 1.0F / (float) sqrt( (double) store[3] );
+#else
+   const unsigned X = TEMP_R0 * 16;
+   store[X + 0] = 1.0F / sqrt( store[X + 0] );
+   store[X + 1] = 1.0F / sqrt( store[X + 1] );
+   store[X + 2] = 1.0F / sqrt( store[X + 2] );
+   store[X + 3] = 1.0F / sqrt( store[X + 3] );
+#endif
+}
+#endif
+
 static void
 emit_rsqrt(
    struct x86_function *func,
    unsigned xmm_dst,
    unsigned xmm_src )
 {
+#if HIGH_PRECISION
+   emit_func_call_dst_src(
+      func,
+      xmm_dst,
+      xmm_src,
+      rsqrt4f );
+#else
    emit_rsqrtps(
       func,
       make_xmm( xmm_dst ),
       make_xmm( xmm_src ) );
+#endif
 }
 
 static void
author	Brian Paul <[email protected]>	2008-04-18 11:15:18 -0600
committer	Brian Paul <[email protected]>	2008-04-18 11:46:00 -0600
commit	19218e2195f3dffc9403f16a742ba8c63edbf8b4 (patch)
tree	161803e974ee2b7147646575dd6fc578fa439c3b
parent	26c27f6636069ca849a740c3969c577d841484e2 (diff)