From 9311bc253e0942af621b7efab0549ed75c0ce4a8 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keith@tungstengraphics.com>
Date: Sun, 10 Jul 2005 11:23:10 +0000
Subject: Add runtime compiled x87 implementations for most of the remaining
 opcodes.  When enabled via environment vars, gears runs and almost looks
 right but other apps are still quite buggy.

---
 src/mesa/tnl/t_vb_arbprogram.c | 53 +++++++++++++++++++++++++++---------------
 1 file changed, 34 insertions(+), 19 deletions(-)

(limited to 'src/mesa/tnl/t_vb_arbprogram.c')

diff --git a/src/mesa/tnl/t_vb_arbprogram.c b/src/mesa/tnl/t_vb_arbprogram.c
index 508233cf316..13041949d17 100644
--- a/src/mesa/tnl/t_vb_arbprogram.c
+++ b/src/mesa/tnl/t_vb_arbprogram.c
@@ -110,6 +110,10 @@ static GLfloat ApproxPower(GLfloat x, GLfloat y)
    return (GLfloat) _mesa_pow(x, y);
 }
 
+static GLfloat rough_approx_log2_0_1(GLfloat x)
+{
+   return LOG2(x);
+}
 
 
 
@@ -273,13 +277,11 @@ static void do_EXP( struct arb_vp_machine *m, union instruction op )
    const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
    GLfloat tmp = arg0[0];
    GLfloat flr_tmp = FLOORF(tmp);
+   GLfloat frac_tmp = tmp - flr_tmp;
 
-   /* KW: nvvertexec has an optimized version of this which is pretty
-    * hard to understand/validate, but avoids the RoughApproxExp2.
-    */
-   result[0] = (GLfloat) (1 << (int)flr_tmp);
-   result[1] = tmp - flr_tmp;
-   result[2] = RoughApproxExp2(tmp);
+   result[0] = ldexpf(1.0, (int)flr_tmp);
+   result[1] = frac_tmp;
+   result[2] = ldexpf(rough_approx_log2_0_1(frac_tmp), (int)flr_tmp);
    result[3] = 1.0F;
 }
 
@@ -322,18 +324,22 @@ static void do_LIT( struct arb_vp_machine *m, union instruction op )
 {
    GLfloat *result = m->File[0][op.alu.dst];
    const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
-
-   const GLfloat epsilon = 1.0F / 256.0F; /* per NV spec */
    GLfloat tmp[4];
 
-   tmp[0] = MAX2(arg0[0], 0.0F);
-   tmp[1] = MAX2(arg0[1], 0.0F);
-   tmp[3] = CLAMP(arg0[3], -(128.0F - epsilon), (128.0F - epsilon));
+   tmp[0] = 1.0;
+   tmp[1] = 0.0;
+   tmp[2] = 0.0;
+   tmp[3] = 1.0;
 
-   result[0] = 1.0;
-   result[1] = tmp[0];
-   result[2] = (tmp[0] > 0.0) ? RoughApproxPower(tmp[1], tmp[3]) : 0.0F;
-   result[3] = 1.0;
+   if (arg0[0] > 0.0) {
+      tmp[1] = arg0[0];
+
+      if (arg0[1] > 0.0) {
+	 tmp[2] = RoughApproxPower(arg0[1], arg0[3]);
+      }
+   }
+
+   COPY_4V(result, tmp);
 }
 
 
@@ -353,7 +359,7 @@ static void do_LOG( struct arb_vp_machine *m, union instruction op )
 
    result[0] = (GLfloat) (exponent - 1);
    result[1] = 2.0 * mantissa; /* map [.5, 1) -> [1, 2) */
-   result[2] = result[0] + LOG2(result[1]);
+   result[2] = exponent + LOG2(mantissa);
    result[3] = 1.0;
 }
 
@@ -1201,9 +1207,6 @@ run_arb_vertex_program(GLcontext *ctx, struct tnl_pipeline_stage *stage)
    p = (struct tnl_compiled_program *)program->TnlData;
    assert(p);
 
-   /* Initialize regs where necessary:
-    */
-   ASSIGN_4V(m->File[0][REG_ID], 0, 0, 0, 1);
 
    m->nr_inputs = m->nr_outputs = 0;
 
@@ -1386,6 +1389,15 @@ static GLboolean init_vertex_program( GLcontext *ctx,
 
    m->File[0] = ALIGN_MALLOC(REG_MAX * sizeof(GLfloat) * 4, 16);
 
+   /* Initialize regs where necessary:
+    */
+   ASSIGN_4V(m->File[0][REG_ID], 0, 0, 0, 1);
+   ASSIGN_4V(m->File[0][REG_ONES], 1, 1, 1, 1);
+   ASSIGN_4V(m->File[0][REG_SWZ], -1, 1, 0, 0);
+   ASSIGN_4V(m->File[0][REG_NEG], -1, -1, -1, -1);
+   ASSIGN_4V(m->File[0][REG_LIT], 1, 0, 0, 1);
+   ASSIGN_4V(m->File[0][REG_LIT2], 1, .5, .2, 1); /* debug value */
+
    if (_mesa_getenv("MESA_EXPERIMENTAL"))
       m->try_codegen = 1;
 
@@ -1402,6 +1414,9 @@ static GLboolean init_vertex_program( GLcontext *ctx,
    if (ctx->_MaintainTnlProgram)
       _mesa_allow_light_in_model( ctx, GL_FALSE );
 
+   m->fpucntl_rnd_neg = RND_NEG_FPU; /* const value */
+   m->fpucntl_restore = RESTORE_FPU; /* const value */
+
    return GL_TRUE;
 }
 
-- 
cgit v1.2.3