From 92fcbf6e7bc622dcace226bb70ff6d5cdbdbaecb Mon Sep 17 00:00:00 2001
From: José Fonseca <jrfonseca@tungstengraphics.com>
Date: Fri, 15 Feb 2008 20:07:18 +0900
Subject: Code reorganization: s/aux/auxiliary/.

"aux" is a reserved name on Windows (X_X)
---
 src/gallium/auxiliary/tgsi/Makefile              |    3 +
 src/gallium/auxiliary/tgsi/exec/Makefile         |    3 +
 src/gallium/auxiliary/tgsi/exec/tgsi_exec.c      | 2485 ++++++++++++++++++++++
 src/gallium/auxiliary/tgsi/exec/tgsi_exec.h      |  239 +++
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c      | 2378 +++++++++++++++++++++
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h      |   26 +
 src/gallium/auxiliary/tgsi/util/tgsi_build.c     | 1371 ++++++++++++
 src/gallium/auxiliary/tgsi/util/tgsi_build.h     |  320 +++
 src/gallium/auxiliary/tgsi/util/tgsi_dump.c      | 1581 ++++++++++++++
 src/gallium/auxiliary/tgsi/util/tgsi_dump.h      |   28 +
 src/gallium/auxiliary/tgsi/util/tgsi_parse.c     |  319 +++
 src/gallium/auxiliary/tgsi/util/tgsi_parse.h     |  121 ++
 src/gallium/auxiliary/tgsi/util/tgsi_transform.c |  199 ++
 src/gallium/auxiliary/tgsi/util/tgsi_transform.h |   93 +
 src/gallium/auxiliary/tgsi/util/tgsi_util.c      |  274 +++
 src/gallium/auxiliary/tgsi/util/tgsi_util.h      |   70 +
 16 files changed, 9510 insertions(+)
 create mode 100644 src/gallium/auxiliary/tgsi/Makefile
 create mode 100644 src/gallium/auxiliary/tgsi/exec/Makefile
 create mode 100644 src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
 create mode 100644 src/gallium/auxiliary/tgsi/exec/tgsi_exec.h
 create mode 100755 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
 create mode 100755 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h
 create mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_build.c
 create mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_build.h
 create mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_dump.c
 create mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_dump.h
 create mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_parse.c
 create mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_parse.h
 create mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_transform.c
 create mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_transform.h
 create mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_util.c
 create mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_util.h

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile
new file mode 100644
index 00000000000..12a8bd0409e
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/Makefile
@@ -0,0 +1,3 @@
+default:
+	cd ../.. ; make
+
diff --git a/src/gallium/auxiliary/tgsi/exec/Makefile b/src/gallium/auxiliary/tgsi/exec/Makefile
new file mode 100644
index 00000000000..eb8b14e0e89
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/exec/Makefile
@@ -0,0 +1,3 @@
+default:
+	cd ../../.. ; make
+
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
new file mode 100644
index 00000000000..a8f64c2287f
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
@@ -0,0 +1,2485 @@
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+/**
+ * TGSI interpretor/executor.
+ *
+ * Flow control information:
+ *
+ * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
+ * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
+ * care since a condition may be true for some quad components but false
+ * for other components.
+ *
+ * We basically execute all statements (even if they're in the part of
+ * an IF/ELSE clause that's "not taken") and use a special mask to
+ * control writing to destination registers.  This is the ExecMask.
+ * See store_dest().
+ *
+ * The ExecMask is computed from three other masks (CondMask, LoopMask and
+ * ContMask) which are controlled by the flow control instructions (namely:
+ * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
+ *
+ *
+ * Authors:
+ *   Michal Krol
+ *   Brian Paul
+ */
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_state.h"
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/util/tgsi_parse.h"
+#include "tgsi/util/tgsi_util.h"
+#include "tgsi_exec.h"
+
+#define TILE_TOP_LEFT     0
+#define TILE_TOP_RIGHT    1
+#define TILE_BOTTOM_LEFT  2
+#define TILE_BOTTOM_RIGHT 3
+
+/*
+ * Shorthand locations of various utility registers (_I = Index, _C = Channel)
+ */
+#define TEMP_0_I           TGSI_EXEC_TEMP_00000000_I
+#define TEMP_0_C           TGSI_EXEC_TEMP_00000000_C
+#define TEMP_7F_I          TGSI_EXEC_TEMP_7FFFFFFF_I
+#define TEMP_7F_C          TGSI_EXEC_TEMP_7FFFFFFF_C
+#define TEMP_80_I          TGSI_EXEC_TEMP_80000000_I
+#define TEMP_80_C          TGSI_EXEC_TEMP_80000000_C
+#define TEMP_FF_I          TGSI_EXEC_TEMP_FFFFFFFF_I
+#define TEMP_FF_C          TGSI_EXEC_TEMP_FFFFFFFF_C
+#define TEMP_1_I           TGSI_EXEC_TEMP_ONE_I
+#define TEMP_1_C           TGSI_EXEC_TEMP_ONE_C
+#define TEMP_2_I           TGSI_EXEC_TEMP_TWO_I
+#define TEMP_2_C           TGSI_EXEC_TEMP_TWO_C
+#define TEMP_128_I         TGSI_EXEC_TEMP_128_I
+#define TEMP_128_C         TGSI_EXEC_TEMP_128_C
+#define TEMP_M128_I        TGSI_EXEC_TEMP_MINUS_128_I
+#define TEMP_M128_C        TGSI_EXEC_TEMP_MINUS_128_C
+#define TEMP_KILMASK_I     TGSI_EXEC_TEMP_KILMASK_I
+#define TEMP_KILMASK_C     TGSI_EXEC_TEMP_KILMASK_C
+#define TEMP_OUTPUT_I      TGSI_EXEC_TEMP_OUTPUT_I
+#define TEMP_OUTPUT_C      TGSI_EXEC_TEMP_OUTPUT_C
+#define TEMP_PRIMITIVE_I   TGSI_EXEC_TEMP_PRIMITIVE_I
+#define TEMP_PRIMITIVE_C   TGSI_EXEC_TEMP_PRIMITIVE_C
+#define TEMP_R0            TGSI_EXEC_TEMP_R0
+
+#define FOR_EACH_CHANNEL(CHAN)\
+   for (CHAN = 0; CHAN < 4; CHAN++)
+
+#define IS_CHANNEL_ENABLED(INST, CHAN)\
+   ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
+
+#define IS_CHANNEL_ENABLED2(INST, CHAN)\
+   ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
+
+#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
+   FOR_EACH_CHANNEL( CHAN )\
+      if (IS_CHANNEL_ENABLED( INST, CHAN ))
+
+#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
+   FOR_EACH_CHANNEL( CHAN )\
+      if (IS_CHANNEL_ENABLED2( INST, CHAN ))
+
+
+/** The execution mask depends on the conditional mask and the loop mask */
+#define UPDATE_EXEC_MASK(MACH) \
+      MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
+
+
+#define CHAN_X  0
+#define CHAN_Y  1
+#define CHAN_Z  2
+#define CHAN_W  3
+
+
+
+static void
+tgsi_exec_prepare( struct tgsi_exec_machine *mach )
+{
+   struct tgsi_exec_labels *labels = &mach->Labels;
+   struct tgsi_parse_context parse;
+   struct tgsi_full_instruction *instructions;
+   struct tgsi_full_declaration *declarations;
+   uint maxInstructions = 10, numInstructions = 0;
+   uint maxDeclarations = 10, numDeclarations = 0;
+   uint k;
+   uint instno = 0;
+
+   mach->ImmLimit = 0;
+   labels->count = 0;
+
+   declarations = (struct tgsi_full_declaration *)
+      MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
+
+   instructions = (struct tgsi_full_instruction *)
+      MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
+
+   k = tgsi_parse_init( &parse, mach->Tokens );
+   if (k != TGSI_PARSE_OK) {
+      debug_printf("Problem parsing!\n");
+      return;
+   }
+
+   while( !tgsi_parse_end_of_tokens( &parse ) ) {
+      uint pointer = parse.Position;
+      uint i;
+
+      tgsi_parse_token( &parse );
+      switch( parse.FullToken.Token.Type ) {
+      case TGSI_TOKEN_TYPE_DECLARATION:
+         /* save expanded declaration */
+         if (numDeclarations == maxDeclarations) {
+            declarations = REALLOC(declarations,
+                                   maxDeclarations
+                                   * sizeof(struct tgsi_full_declaration),
+                                   (maxDeclarations + 10)
+                                   * sizeof(struct tgsi_full_declaration));
+            maxDeclarations += 10;
+         }
+         memcpy(declarations + numDeclarations,
+                &parse.FullToken.FullDeclaration,
+                sizeof(declarations[0]));
+         numDeclarations++;
+         break;
+
+      case TGSI_TOKEN_TYPE_IMMEDIATE:
+         {
+            uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
+            assert( size % 4 == 0 );
+            assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES );
+
+            for( i = 0; i < size; i++ ) {
+               mach->Imms[mach->ImmLimit + i / 4][i % 4] = parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
+            }
+            mach->ImmLimit += size / 4;
+         }
+         break;
+
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+         assert( labels->count < 128 );
+
+         labels->labels[labels->count][0] = instno;
+         labels->labels[labels->count][1] = pointer;
+         labels->count++;
+
+         /* save expanded instruction */
+         if (numInstructions == maxInstructions) {
+            instructions = REALLOC(instructions,
+                                   maxInstructions
+                                   * sizeof(struct tgsi_full_instruction),
+                                   (maxInstructions + 10)
+                                   * sizeof(struct tgsi_full_instruction));
+            maxInstructions += 10;
+         }
+         memcpy(instructions + numInstructions,
+                &parse.FullToken.FullInstruction,
+                sizeof(instructions[0]));
+         numInstructions++;
+         break;
+
+      default:
+         assert( 0 );
+      }
+   }
+   tgsi_parse_free (&parse);
+
+   if (mach->Declarations) {
+      FREE( mach->Declarations );
+   }
+   mach->Declarations = declarations;
+   mach->NumDeclarations = numDeclarations;
+
+   if (mach->Instructions) {
+      FREE( mach->Instructions );
+   }
+   mach->Instructions = instructions;
+   mach->NumInstructions = numInstructions;
+}
+
+
+/**
+ * Initialize machine state by expanding tokens to full instructions,
+ * allocating temporary storage, setting up constants, etc.
+ * After this, we can call tgsi_exec_machine_run() many times.
+ */
+void
+tgsi_exec_machine_init(
+   struct tgsi_exec_machine *mach,
+   const struct tgsi_token *tokens,
+   uint numSamplers,
+   struct tgsi_sampler *samplers)
+{
+   uint i, k;
+   struct tgsi_parse_context parse;
+
+#if 0
+   tgsi_dump(tokens, 0);
+#endif
+
+   mach->Tokens = tokens;
+
+   mach->Samplers = samplers;
+
+   k = tgsi_parse_init (&parse, mach->Tokens);
+   if (k != TGSI_PARSE_OK) {
+      debug_printf( "Problem parsing!\n" );
+      return;
+   }
+
+   mach->Processor = parse.FullHeader.Processor.Processor;
+   tgsi_parse_free (&parse);
+
+   mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps);
+   mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS];
+
+   /* Setup constants. */
+   for( i = 0; i < 4; i++ ) {
+      mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
+      mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
+      mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
+      mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
+      mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
+      mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
+      mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
+      mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
+   }
+
+   tgsi_exec_prepare( mach );
+}
+
+
+void
+tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach)
+{
+   if (mach->Instructions) {
+      FREE(mach->Instructions);
+      mach->Instructions = NULL;
+      mach->NumInstructions = 0;
+   }
+   if (mach->Declarations) {
+      FREE(mach->Declarations);
+      mach->Declarations = NULL;
+      mach->NumDeclarations = 0;
+   }
+}
+
+
+static void
+micro_abs(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src )
+{
+   dst->f[0] = (float) fabs( (double) src->f[0] );
+   dst->f[1] = (float) fabs( (double) src->f[1] );
+   dst->f[2] = (float) fabs( (double) src->f[2] );
+   dst->f[3] = (float) fabs( (double) src->f[3] );
+}
+
+static void
+micro_add(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst->f[0] = src0->f[0] + src1->f[0];
+   dst->f[1] = src0->f[1] + src1->f[1];
+   dst->f[2] = src0->f[2] + src1->f[2];
+   dst->f[3] = src0->f[3] + src1->f[3];
+}
+
+static void
+micro_iadd(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst->i[0] = src0->i[0] + src1->i[0];
+   dst->i[1] = src0->i[1] + src1->i[1];
+   dst->i[2] = src0->i[2] + src1->i[2];
+   dst->i[3] = src0->i[3] + src1->i[3];
+}
+
+static void
+micro_and(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst->u[0] = src0->u[0] & src1->u[0];
+   dst->u[1] = src0->u[1] & src1->u[1];
+   dst->u[2] = src0->u[2] & src1->u[2];
+   dst->u[3] = src0->u[3] & src1->u[3];
+}
+
+static void
+micro_ceil(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src )
+{
+   dst->f[0] = (float) ceil( (double) src->f[0] );
+   dst->f[1] = (float) ceil( (double) src->f[1] );
+   dst->f[2] = (float) ceil( (double) src->f[2] );
+   dst->f[3] = (float) ceil( (double) src->f[3] );
+}
+
+static void
+micro_cos(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src )
+{
+   dst->f[0] = (float) cos( (double) src->f[0] );
+   dst->f[1] = (float) cos( (double) src->f[1] );
+   dst->f[2] = (float) cos( (double) src->f[2] );
+   dst->f[3] = (float) cos( (double) src->f[3] );
+}
+
+static void
+micro_ddx(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src )
+{
+   dst->f[0] =
+   dst->f[1] =
+   dst->f[2] =
+   dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
+}
+
+static void
+micro_ddy(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src )
+{
+   dst->f[0] =
+   dst->f[1] =
+   dst->f[2] =
+   dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT];
+}
+
+static void
+micro_div(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst->f[0] = src0->f[0] / src1->f[0];
+   dst->f[1] = src0->f[1] / src1->f[1];
+   dst->f[2] = src0->f[2] / src1->f[2];
+   dst->f[3] = src0->f[3] / src1->f[3];
+}
+
+static void
+micro_udiv(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst->u[0] = src0->u[0] / src1->u[0];
+   dst->u[1] = src0->u[1] / src1->u[1];
+   dst->u[2] = src0->u[2] / src1->u[2];
+   dst->u[3] = src0->u[3] / src1->u[3];
+}
+
+static void
+micro_eq(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1,
+   const union tgsi_exec_channel *src2,
+   const union tgsi_exec_channel *src3 )
+{
+   dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
+   dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
+   dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
+   dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
+}
+
+static void
+micro_ieq(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1,
+   const union tgsi_exec_channel *src2,
+   const union tgsi_exec_channel *src3 )
+{
+   dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0];
+   dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1];
+   dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
+   dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
+}
+
+static void
+micro_exp2(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src)
+{
+   dst->f[0] = (float) pow( 2.0, (double) src->f[0] );
+   dst->f[1] = (float) pow( 2.0, (double) src->f[1] );
+   dst->f[2] = (float) pow( 2.0, (double) src->f[2] );
+   dst->f[3] = (float) pow( 2.0, (double) src->f[3] );
+}
+
+static void
+micro_f2it(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src )
+{
+   dst->i[0] = (int) src->f[0];
+   dst->i[1] = (int) src->f[1];
+   dst->i[2] = (int) src->f[2];
+   dst->i[3] = (int) src->f[3];
+}
+
+static void
+micro_f2ut(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src )
+{
+   dst->u[0] = (uint) src->f[0];
+   dst->u[1] = (uint) src->f[1];
+   dst->u[2] = (uint) src->f[2];
+   dst->u[3] = (uint) src->f[3];
+}
+
+static void
+micro_flr(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src )
+{
+   dst->f[0] = (float) floor( (double) src->f[0] );
+   dst->f[1] = (float) floor( (double) src->f[1] );
+   dst->f[2] = (float) floor( (double) src->f[2] );
+   dst->f[3] = (float) floor( (double) src->f[3] );
+}
+
+static void
+micro_frc(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src )
+{
+   dst->f[0] = src->f[0] - (float) floor( (double) src->f[0] );
+   dst->f[1] = src->f[1] - (float) floor( (double) src->f[1] );
+   dst->f[2] = src->f[2] - (float) floor( (double) src->f[2] );
+   dst->f[3] = src->f[3] - (float) floor( (double) src->f[3] );
+}
+
+static void
+micro_ge(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1,
+   const union tgsi_exec_channel *src2,
+   const union tgsi_exec_channel *src3 )
+{
+   dst->f[0] = src0->f[0] >= src1->f[0] ? src2->f[0] : src3->f[0];
+   dst->f[1] = src0->f[1] >= src1->f[1] ? src2->f[1] : src3->f[1];
+   dst->f[2] = src0->f[2] >= src1->f[2] ? src2->f[2] : src3->f[2];
+   dst->f[3] = src0->f[3] >= src1->f[3] ? src2->f[3] : src3->f[3];
+}
+
+static void
+micro_i2f(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src )
+{
+   dst->f[0] = (float) src->i[0];
+   dst->f[1] = (float) src->i[1];
+   dst->f[2] = (float) src->i[2];
+   dst->f[3] = (float) src->i[3];
+}
+
+static void
+micro_lg2(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src )
+{
+   dst->f[0] = (float) log( (double) src->f[0] ) * 1.442695f;
+   dst->f[1] = (float) log( (double) src->f[1] ) * 1.442695f;
+   dst->f[2] = (float) log( (double) src->f[2] ) * 1.442695f;
+   dst->f[3] = (float) log( (double) src->f[3] ) * 1.442695f;
+}
+
+static void
+micro_lt(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1,
+   const union tgsi_exec_channel *src2,
+   const union tgsi_exec_channel *src3 )
+{
+   dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
+   dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
+   dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
+   dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
+}
+
+static void
+micro_ilt(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1,
+   const union tgsi_exec_channel *src2,
+   const union tgsi_exec_channel *src3 )
+{
+   dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0];
+   dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1];
+   dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
+   dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
+}
+
+static void
+micro_ult(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1,
+   const union tgsi_exec_channel *src2,
+   const union tgsi_exec_channel *src3 )
+{
+   dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0];
+   dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1];
+   dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
+   dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
+}
+
+static void
+micro_max(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
+   dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
+   dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
+   dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
+}
+
+static void
+micro_imax(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
+   dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
+   dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
+   dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
+}
+
+static void
+micro_umax(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
+   dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
+   dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
+   dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
+}
+
+static void
+micro_min(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
+   dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
+   dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
+   dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
+}
+
+static void
+micro_imin(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
+   dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
+   dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
+   dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
+}
+
+static void
+micro_umin(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
+   dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
+   dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
+   dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
+}
+
+static void
+micro_umod(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst->u[0] = src0->u[0] % src1->u[0];
+   dst->u[1] = src0->u[1] % src1->u[1];
+   dst->u[2] = src0->u[2] % src1->u[2];
+   dst->u[3] = src0->u[3] % src1->u[3];
+}
+
+static void
+micro_mul(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst->f[0] = src0->f[0] * src1->f[0];
+   dst->f[1] = src0->f[1] * src1->f[1];
+   dst->f[2] = src0->f[2] * src1->f[2];
+   dst->f[3] = src0->f[3] * src1->f[3];
+}
+
+static void
+micro_imul(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst->i[0] = src0->i[0] * src1->i[0];
+   dst->i[1] = src0->i[1] * src1->i[1];
+   dst->i[2] = src0->i[2] * src1->i[2];
+   dst->i[3] = src0->i[3] * src1->i[3];
+}
+
+static void
+micro_imul64(
+   union tgsi_exec_channel *dst0,
+   union tgsi_exec_channel *dst1,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst1->i[0] = src0->i[0] * src1->i[0];
+   dst1->i[1] = src0->i[1] * src1->i[1];
+   dst1->i[2] = src0->i[2] * src1->i[2];
+   dst1->i[3] = src0->i[3] * src1->i[3];
+   dst0->i[0] = 0;
+   dst0->i[1] = 0;
+   dst0->i[2] = 0;
+   dst0->i[3] = 0;
+}
+
+static void
+micro_umul64(
+   union tgsi_exec_channel *dst0,
+   union tgsi_exec_channel *dst1,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst1->u[0] = src0->u[0] * src1->u[0];
+   dst1->u[1] = src0->u[1] * src1->u[1];
+   dst1->u[2] = src0->u[2] * src1->u[2];
+   dst1->u[3] = src0->u[3] * src1->u[3];
+   dst0->u[0] = 0;
+   dst0->u[1] = 0;
+   dst0->u[2] = 0;
+   dst0->u[3] = 0;
+}
+
+static void
+micro_movc(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1,
+   const union tgsi_exec_channel *src2 )
+{
+   dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
+   dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
+   dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
+   dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
+}
+
+static void
+micro_neg(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src )
+{
+   dst->f[0] = -src->f[0];
+   dst->f[1] = -src->f[1];
+   dst->f[2] = -src->f[2];
+   dst->f[3] = -src->f[3];
+}
+
+static void
+micro_ineg(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src )
+{
+   dst->i[0] = -src->i[0];
+   dst->i[1] = -src->i[1];
+   dst->i[2] = -src->i[2];
+   dst->i[3] = -src->i[3];
+}
+
+static void
+micro_not(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src )
+{
+   dst->u[0] = ~src->u[0];
+   dst->u[1] = ~src->u[1];
+   dst->u[2] = ~src->u[2];
+   dst->u[3] = ~src->u[3];
+}
+
+static void
+micro_or(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst->u[0] = src0->u[0] | src1->u[0];
+   dst->u[1] = src0->u[1] | src1->u[1];
+   dst->u[2] = src0->u[2] | src1->u[2];
+   dst->u[3] = src0->u[3] | src1->u[3];
+}
+
+static void
+micro_pow(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst->f[0] = (float) pow( (double) src0->f[0], (double) src1->f[0] );
+   dst->f[1] = (float) pow( (double) src0->f[1], (double) src1->f[1] );
+   dst->f[2] = (float) pow( (double) src0->f[2], (double) src1->f[2] );
+   dst->f[3] = (float) pow( (double) src0->f[3], (double) src1->f[3] );
+}
+
+static void
+micro_rnd(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src )
+{
+   dst->f[0] = (float) floor( (double) (src->f[0] + 0.5f) );
+   dst->f[1] = (float) floor( (double) (src->f[1] + 0.5f) );
+   dst->f[2] = (float) floor( (double) (src->f[2] + 0.5f) );
+   dst->f[3] = (float) floor( (double) (src->f[3] + 0.5f) );
+}
+
+static void
+micro_shl(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst->i[0] = src0->i[0] << src1->i[0];
+   dst->i[1] = src0->i[1] << src1->i[1];
+   dst->i[2] = src0->i[2] << src1->i[2];
+   dst->i[3] = src0->i[3] << src1->i[3];
+}
+
+static void
+micro_ishr(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst->i[0] = src0->i[0] >> src1->i[0];
+   dst->i[1] = src0->i[1] >> src1->i[1];
+   dst->i[2] = src0->i[2] >> src1->i[2];
+   dst->i[3] = src0->i[3] >> src1->i[3];
+}
+
+static void
+micro_trunc(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0 )
+{
+   dst->f[0] = (float) (int) src0->f[0];
+   dst->f[1] = (float) (int) src0->f[1];
+   dst->f[2] = (float) (int) src0->f[2];
+   dst->f[3] = (float) (int) src0->f[3];
+}
+
+static void
+micro_ushr(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst->u[0] = src0->u[0] >> src1->u[0];
+   dst->u[1] = src0->u[1] >> src1->u[1];
+   dst->u[2] = src0->u[2] >> src1->u[2];
+   dst->u[3] = src0->u[3] >> src1->u[3];
+}
+
+static void
+micro_sin(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src )
+{
+   dst->f[0] = (float) sin( (double) src->f[0] );
+   dst->f[1] = (float) sin( (double) src->f[1] );
+   dst->f[2] = (float) sin( (double) src->f[2] );
+   dst->f[3] = (float) sin( (double) src->f[3] );
+}
+
+static void
+micro_sqrt( union tgsi_exec_channel *dst,
+            const union tgsi_exec_channel *src )
+{
+   dst->f[0] = (float) sqrt( (double) src->f[0] );
+   dst->f[1] = (float) sqrt( (double) src->f[1] );
+   dst->f[2] = (float) sqrt( (double) src->f[2] );
+   dst->f[3] = (float) sqrt( (double) src->f[3] );
+}
+
+static void
+micro_sub(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst->f[0] = src0->f[0] - src1->f[0];
+   dst->f[1] = src0->f[1] - src1->f[1];
+   dst->f[2] = src0->f[2] - src1->f[2];
+   dst->f[3] = src0->f[3] - src1->f[3];
+}
+
+static void
+micro_u2f(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src )
+{
+   dst->f[0] = (float) src->u[0];
+   dst->f[1] = (float) src->u[1];
+   dst->f[2] = (float) src->u[2];
+   dst->f[3] = (float) src->u[3];
+}
+
+static void
+micro_xor(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst->u[0] = src0->u[0] ^ src1->u[0];
+   dst->u[1] = src0->u[1] ^ src1->u[1];
+   dst->u[2] = src0->u[2] ^ src1->u[2];
+   dst->u[3] = src0->u[3] ^ src1->u[3];
+}
+
+static void
+fetch_src_file_channel(
+   const struct tgsi_exec_machine *mach,
+   const uint file,
+   const uint swizzle,
+   const union tgsi_exec_channel *index,
+   union tgsi_exec_channel *chan )
+{
+   switch( swizzle ) {
+   case TGSI_EXTSWIZZLE_X:
+   case TGSI_EXTSWIZZLE_Y:
+   case TGSI_EXTSWIZZLE_Z:
+   case TGSI_EXTSWIZZLE_W:
+      switch( file ) {
+      case TGSI_FILE_CONSTANT:
+         chan->f[0] = mach->Consts[index->i[0]][swizzle];
+         chan->f[1] = mach->Consts[index->i[1]][swizzle];
+         chan->f[2] = mach->Consts[index->i[2]][swizzle];
+         chan->f[3] = mach->Consts[index->i[3]][swizzle];
+         break;
+
+      case TGSI_FILE_INPUT:
+         chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
+         chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
+         chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
+         chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
+         break;
+
+      case TGSI_FILE_TEMPORARY:
+         chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
+         chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
+         chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
+         chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
+         break;
+
+      case TGSI_FILE_IMMEDIATE:
+         assert( index->i[0] < (int) mach->ImmLimit );
+         chan->f[0] = mach->Imms[index->i[0]][swizzle];
+         assert( index->i[1] < (int) mach->ImmLimit );
+         chan->f[1] = mach->Imms[index->i[1]][swizzle];
+         assert( index->i[2] < (int) mach->ImmLimit );
+         chan->f[2] = mach->Imms[index->i[2]][swizzle];
+         assert( index->i[3] < (int) mach->ImmLimit );
+         chan->f[3] = mach->Imms[index->i[3]][swizzle];
+         break;
+
+      case TGSI_FILE_ADDRESS:
+         chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
+         chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
+         chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
+         chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
+         break;
+
+      case TGSI_FILE_OUTPUT:
+         /* vertex/fragment output vars can be read too */
+         chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
+         chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
+         chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
+         chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
+         break;
+
+      default:
+         assert( 0 );
+      }
+      break;
+
+   case TGSI_EXTSWIZZLE_ZERO:
+      *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
+      break;
+
+   case TGSI_EXTSWIZZLE_ONE:
+      *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
+      break;
+
+   default:
+      assert( 0 );
+   }
+}
+
+static void
+fetch_source(
+   const struct tgsi_exec_machine *mach,
+   union tgsi_exec_channel *chan,
+   const struct tgsi_full_src_register *reg,
+   const uint chan_index )
+{
+   union tgsi_exec_channel index;
+   uint swizzle;
+
+   index.i[0] =
+   index.i[1] =
+   index.i[2] =
+   index.i[3] = reg->SrcRegister.Index;
+
+   if (reg->SrcRegister.Indirect) {
+      union tgsi_exec_channel index2;
+      union tgsi_exec_channel indir_index;
+
+      index2.i[0] =
+      index2.i[1] =
+      index2.i[2] =
+      index2.i[3] = reg->SrcRegisterInd.Index;
+
+      swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterInd, CHAN_X );
+      fetch_src_file_channel(
+         mach,
+         reg->SrcRegisterInd.File,
+         swizzle,
+         &index2,
+         &indir_index );
+
+      index.i[0] += indir_index.i[0];
+      index.i[1] += indir_index.i[1];
+      index.i[2] += indir_index.i[2];
+      index.i[3] += indir_index.i[3];
+   }
+
+   if( reg->SrcRegister.Dimension ) {
+      switch( reg->SrcRegister.File ) {
+      case TGSI_FILE_INPUT:
+         index.i[0] *= 17;
+         index.i[1] *= 17;
+         index.i[2] *= 17;
+         index.i[3] *= 17;
+         break;
+      case TGSI_FILE_CONSTANT:
+         index.i[0] *= 4096;
+         index.i[1] *= 4096;
+         index.i[2] *= 4096;
+         index.i[3] *= 4096;
+         break;
+      default:
+         assert( 0 );
+      }
+
+      index.i[0] += reg->SrcRegisterDim.Index;
+      index.i[1] += reg->SrcRegisterDim.Index;
+      index.i[2] += reg->SrcRegisterDim.Index;
+      index.i[3] += reg->SrcRegisterDim.Index;
+
+      if (reg->SrcRegisterDim.Indirect) {
+         union tgsi_exec_channel index2;
+         union tgsi_exec_channel indir_index;
+
+         index2.i[0] =
+         index2.i[1] =
+         index2.i[2] =
+         index2.i[3] = reg->SrcRegisterDimInd.Index;
+
+         swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
+         fetch_src_file_channel(
+            mach,
+            reg->SrcRegisterDimInd.File,
+            swizzle,
+            &index2,
+            &indir_index );
+
+         index.i[0] += indir_index.i[0];
+         index.i[1] += indir_index.i[1];
+         index.i[2] += indir_index.i[2];
+         index.i[3] += indir_index.i[3];
+      }
+   }
+
+   swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
+   fetch_src_file_channel(
+      mach,
+      reg->SrcRegister.File,
+      swizzle,
+      &index,
+      chan );
+
+   switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
+   case TGSI_UTIL_SIGN_CLEAR:
+      micro_abs( chan, chan );
+      break;
+
+   case TGSI_UTIL_SIGN_SET:
+      micro_abs( chan, chan );
+      micro_neg( chan, chan );
+      break;
+
+   case TGSI_UTIL_SIGN_TOGGLE:
+      micro_neg( chan, chan );
+      break;
+
+   case TGSI_UTIL_SIGN_KEEP:
+      break;
+   }
+
+   if (reg->SrcRegisterExtMod.Complement) {
+      micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan );
+   }
+}
+
+static void
+store_dest(
+   struct tgsi_exec_machine *mach,
+   const union tgsi_exec_channel *chan,
+   const struct tgsi_full_dst_register *reg,
+   const struct tgsi_full_instruction *inst,
+   uint chan_index )
+{
+   union tgsi_exec_channel *dst;
+
+   switch( reg->DstRegister.File ) {
+   case TGSI_FILE_NULL:
+      return;
+
+   case TGSI_FILE_OUTPUT:
+      dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
+                           + reg->DstRegister.Index].xyzw[chan_index];
+      break;
+
+   case TGSI_FILE_TEMPORARY:
+      dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
+      break;
+
+   case TGSI_FILE_ADDRESS:
+      dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
+      break;
+
+   default:
+      assert( 0 );
+      return;
+   }
+
+   switch (inst->Instruction.Saturate)
+   {
+   case TGSI_SAT_NONE:
+      if (mach->ExecMask & 0x1)
+         dst->i[0] = chan->i[0];
+      if (mach->ExecMask & 0x2)
+         dst->i[1] = chan->i[1];
+      if (mach->ExecMask & 0x4)
+         dst->i[2] = chan->i[2];
+      if (mach->ExecMask & 0x8)
+         dst->i[3] = chan->i[3];
+      break;
+
+   case TGSI_SAT_ZERO_ONE:
+      /* XXX need to obey ExecMask here */
+      micro_max(dst, chan, &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
+      micro_min(dst, dst, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
+      break;
+
+   case TGSI_SAT_MINUS_PLUS_ONE:
+      assert( 0 );
+      break;
+
+   default:
+      assert( 0 );
+   }
+}
+
+#define FETCH(VAL,INDEX,CHAN)\
+    fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
+
+#define STORE(VAL,INDEX,CHAN)\
+    store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
+
+
+/**
+ * Execute ARB-style KIL which is predicated by a src register.
+ * Kill fragment if any of the four values is less than zero.
+ */
+static void
+exec_kilp(struct tgsi_exec_machine *mach,
+          const struct tgsi_full_instruction *inst)
+{
+   uint uniquemask;
+   uint chan_index;
+   uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
+   union tgsi_exec_channel r[1];
+
+   /* This mask stores component bits that were already tested. Note that
+    * we test if the value is less than zero, so 1.0 and 0.0 need not to be
+    * tested. */
+   uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
+
+   for (chan_index = 0; chan_index < 4; chan_index++)
+   {
+      uint swizzle;
+      uint i;
+
+      /* unswizzle channel */
+      swizzle = tgsi_util_get_full_src_register_extswizzle (
+                        &inst->FullSrcRegisters[0],
+                        chan_index);
+
+      /* check if the component has not been already tested */
+      if (uniquemask & (1 << swizzle))
+         continue;
+      uniquemask |= 1 << swizzle;
+
+      FETCH(&r[0], 0, chan_index);
+      for (i = 0; i < 4; i++)
+         if (r[0].f[i] < 0.0f)
+            kilmask |= 1 << i;
+   }
+
+   mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
+}
+
+
+/*
+ * Fetch a texel using STR texture coordinates.
+ */
+static void
+fetch_texel( struct tgsi_sampler *sampler,
+             const union tgsi_exec_channel *s,
+             const union tgsi_exec_channel *t,
+             const union tgsi_exec_channel *p,
+             float lodbias,  /* XXX should be float[4] */
+             union tgsi_exec_channel *r,
+             union tgsi_exec_channel *g,
+             union tgsi_exec_channel *b,
+             union tgsi_exec_channel *a )
+{
+   uint j;
+   float rgba[NUM_CHANNELS][QUAD_SIZE];
+
+   sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
+
+   for (j = 0; j < 4; j++) {
+      r->f[j] = rgba[0][j];
+      g->f[j] = rgba[1][j];
+      b->f[j] = rgba[2][j];
+      a->f[j] = rgba[3][j];
+   }
+}
+
+
+static void
+exec_tex(struct tgsi_exec_machine *mach,
+         const struct tgsi_full_instruction *inst,
+         boolean biasLod)
+{
+   const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
+   union tgsi_exec_channel r[8];
+   uint chan_index;
+   float lodBias;
+
+   /*   debug_printf("Sampler %u unit %u\n", sampler, unit); */
+
+   switch (inst->InstructionExtTexture.Texture) {
+   case TGSI_TEXTURE_1D:
+
+      FETCH(&r[0], 0, CHAN_X);
+
+      switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) {
+      case TGSI_EXTSWIZZLE_W:
+         FETCH(&r[1], 0, CHAN_W);
+         micro_div( &r[0], &r[0], &r[1] );
+         break;
+
+      case TGSI_EXTSWIZZLE_ONE:
+         break;
+
+      default:
+         assert (0);
+      }
+
+      if (biasLod) {
+         FETCH(&r[1], 0, CHAN_W);
+         lodBias = r[2].f[0];
+      }
+      else
+         lodBias = 0.0;
+
+      fetch_texel(&mach->Samplers[unit],
+                  &r[0], NULL, NULL, lodBias,  /* S, T, P, BIAS */
+                  &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
+      break;
+
+   case TGSI_TEXTURE_2D:
+   case TGSI_TEXTURE_RECT:
+
+      FETCH(&r[0], 0, CHAN_X);
+      FETCH(&r[1], 0, CHAN_Y);
+      FETCH(&r[2], 0, CHAN_Z);
+
+      switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) {
+      case TGSI_EXTSWIZZLE_W:
+         FETCH(&r[3], 0, CHAN_W);
+         micro_div( &r[0], &r[0], &r[3] );
+         micro_div( &r[1], &r[1], &r[3] );
+         micro_div( &r[2], &r[2], &r[3] );
+         break;
+
+      case TGSI_EXTSWIZZLE_ONE:
+         break;
+
+      default:
+         assert (0);
+      }
+
+      if (biasLod) {
+         FETCH(&r[3], 0, CHAN_W);
+         lodBias = r[3].f[0];
+      }
+      else
+         lodBias = 0.0;
+
+      fetch_texel(&mach->Samplers[unit],
+                  &r[0], &r[1], &r[2], lodBias,  /* inputs */
+                  &r[0], &r[1], &r[2], &r[3]);  /* outputs */
+      break;
+
+   case TGSI_TEXTURE_3D:
+   case TGSI_TEXTURE_CUBE:
+
+      FETCH(&r[0], 0, CHAN_X);
+      FETCH(&r[1], 0, CHAN_Y);
+      FETCH(&r[2], 0, CHAN_Z);
+
+      switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) {
+      case TGSI_EXTSWIZZLE_W:
+         FETCH(&r[3], 0, CHAN_W);
+         micro_div( &r[0], &r[0], &r[3] );
+         micro_div( &r[1], &r[1], &r[3] );
+         micro_div( &r[2], &r[2], &r[3] );
+         break;
+
+      case TGSI_EXTSWIZZLE_ONE:
+         break;
+
+      default:
+         assert (0);
+      }
+
+      if (biasLod) {
+         FETCH(&r[3], 0, CHAN_W);
+         lodBias = r[3].f[0];
+      }
+      else
+         lodBias = 0.0;
+
+      fetch_texel(&mach->Samplers[unit],
+                  &r[0], &r[1], &r[2], lodBias,
+                  &r[0], &r[1], &r[2], &r[3]);
+      break;
+
+   default:
+      assert (0);
+   }
+
+   FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+      STORE( &r[chan_index], 0, chan_index );
+   }
+}
+
+
+/**
+ * Evaluate a constant-valued coefficient at the position of the
+ * current quad.
+ */
+static void
+eval_constant_coef(
+   struct tgsi_exec_machine *mach,
+   unsigned attrib,
+   unsigned chan )
+{
+   unsigned i;
+
+   for( i = 0; i < QUAD_SIZE; i++ ) {
+      mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
+   }
+}
+
+/**
+ * Evaluate a linear-valued coefficient at the position of the
+ * current quad.
+ */
+static void
+eval_linear_coef(
+   struct tgsi_exec_machine *mach,
+   unsigned attrib,
+   unsigned chan )
+{
+   const float x = mach->QuadPos.xyzw[0].f[0];
+   const float y = mach->QuadPos.xyzw[1].f[0];
+   const float dadx = mach->InterpCoefs[attrib].dadx[chan];
+   const float dady = mach->InterpCoefs[attrib].dady[chan];
+   const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
+   mach->Inputs[attrib].xyzw[chan].f[0] = a0;
+   mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
+   mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
+   mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
+}
+
+/**
+ * Evaluate a perspective-valued coefficient at the position of the
+ * current quad.
+ */
+static void
+eval_perspective_coef(
+   struct tgsi_exec_machine *mach,
+   unsigned attrib,
+   unsigned chan )
+{
+   const float x = mach->QuadPos.xyzw[0].f[0];
+   const float y = mach->QuadPos.xyzw[1].f[0];
+   const float dadx = mach->InterpCoefs[attrib].dadx[chan];
+   const float dady = mach->InterpCoefs[attrib].dady[chan];
+   const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
+   const float *w = mach->QuadPos.xyzw[3].f;
+   /* divide by W here */
+   mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
+   mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
+   mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
+   mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
+}
+
+
+typedef void (* eval_coef_func)(
+   struct tgsi_exec_machine *mach,
+   unsigned attrib,
+   unsigned chan );
+
+static void
+exec_declaration(
+   struct tgsi_exec_machine *mach,
+   const struct tgsi_full_declaration *decl )
+{
+   if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
+      if( decl->Declaration.File == TGSI_FILE_INPUT ) {
+         unsigned first, last, mask;
+         eval_coef_func eval;
+
+         assert( decl->Declaration.Declare == TGSI_DECLARE_RANGE );
+
+         first = decl->u.DeclarationRange.First;
+         last = decl->u.DeclarationRange.Last;
+         mask = decl->Declaration.UsageMask;
+
+         switch( decl->Interpolation.Interpolate ) {
+         case TGSI_INTERPOLATE_CONSTANT:
+            eval = eval_constant_coef;
+            break;
+
+         case TGSI_INTERPOLATE_LINEAR:
+            eval = eval_linear_coef;
+            break;
+
+         case TGSI_INTERPOLATE_PERSPECTIVE:
+            eval = eval_perspective_coef;
+            break;
+
+         default:
+            assert( 0 );
+         }
+
+         if( mask == TGSI_WRITEMASK_XYZW ) {
+            unsigned i, j;
+
+            for( i = first; i <= last; i++ ) {
+               for( j = 0; j < NUM_CHANNELS; j++ ) {
+                  eval( mach, i, j );
+               }
+            }
+         }
+         else {
+            unsigned i, j;
+
+            for( j = 0; j < NUM_CHANNELS; j++ ) {
+               if( mask & (1 << j) ) {
+                  for( i = first; i <= last; i++ ) {
+                     eval( mach, i, j );
+                  }
+               }
+            }
+         }
+      }
+   }
+}
+
+static void
+exec_instruction(
+   struct tgsi_exec_machine *mach,
+   const struct tgsi_full_instruction *inst,
+   int *pc )
+{
+   uint chan_index;
+   union tgsi_exec_channel r[8];
+
+   (*pc)++;
+
+   switch (inst->Instruction.Opcode) {
+   case TGSI_OPCODE_ARL:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+	 FETCH( &r[0], 0, chan_index );
+	 micro_f2it( &r[0], &r[0] );
+	 STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_MOV:
+   /* TGSI_OPCODE_SWZ */
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_LIT:
+      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
+	 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
+      }
+
+      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+	 FETCH( &r[0], 0, CHAN_X );
+	 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
+	    micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
+	    STORE( &r[0], 0, CHAN_Y );
+	 }
+
+	 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+	    FETCH( &r[1], 0, CHAN_Y );
+	    micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
+
+	    FETCH( &r[2], 0, CHAN_W );
+	    micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
+	    micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
+	    micro_pow( &r[1], &r[1], &r[2] );
+	    micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
+	    STORE( &r[0], 0, CHAN_Z );
+	 }
+      }
+
+      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
+	 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
+      }
+      break;
+
+   case TGSI_OPCODE_RCP:
+   /* TGSI_OPCODE_RECIP */
+      FETCH( &r[0], 0, CHAN_X );
+      micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+	 STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_RSQ:
+   /* TGSI_OPCODE_RECIPSQRT */
+      FETCH( &r[0], 0, CHAN_X );
+      micro_sqrt( &r[0], &r[0] );
+      micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+	 STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_EXP:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_LOG:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_MUL:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
+      {
+         FETCH(&r[0], 0, chan_index);
+         FETCH(&r[1], 1, chan_index);
+
+         micro_mul( &r[0], &r[0], &r[1] );
+
+         STORE(&r[0], 0, chan_index);
+      }
+      break;
+
+   case TGSI_OPCODE_ADD:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         FETCH( &r[1], 1, chan_index );
+         micro_add( &r[0], &r[0], &r[1] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_DP3:
+   /* TGSI_OPCODE_DOT3 */
+      FETCH( &r[0], 0, CHAN_X );
+      FETCH( &r[1], 1, CHAN_X );
+      micro_mul( &r[0], &r[0], &r[1] );
+
+      FETCH( &r[1], 0, CHAN_Y );
+      FETCH( &r[2], 1, CHAN_Y );
+      micro_mul( &r[1], &r[1], &r[2] );
+      micro_add( &r[0], &r[0], &r[1] );
+
+      FETCH( &r[1], 0, CHAN_Z );
+      FETCH( &r[2], 1, CHAN_Z );
+      micro_mul( &r[1], &r[1], &r[2] );
+      micro_add( &r[0], &r[0], &r[1] );
+
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+    case TGSI_OPCODE_DP4:
+    /* TGSI_OPCODE_DOT4 */
+       FETCH(&r[0], 0, CHAN_X);
+       FETCH(&r[1], 1, CHAN_X);
+
+       micro_mul( &r[0], &r[0], &r[1] );
+
+       FETCH(&r[1], 0, CHAN_Y);
+       FETCH(&r[2], 1, CHAN_Y);
+
+       micro_mul( &r[1], &r[1], &r[2] );
+       micro_add( &r[0], &r[0], &r[1] );
+
+       FETCH(&r[1], 0, CHAN_Z);
+       FETCH(&r[2], 1, CHAN_Z);
+
+       micro_mul( &r[1], &r[1], &r[2] );
+       micro_add( &r[0], &r[0], &r[1] );
+
+       FETCH(&r[1], 0, CHAN_W);
+       FETCH(&r[2], 1, CHAN_W);
+
+       micro_mul( &r[1], &r[1], &r[2] );
+       micro_add( &r[0], &r[0], &r[1] );
+
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+	 STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_DST:
+      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
+	 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
+      }
+
+      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
+	 FETCH( &r[0], 0, CHAN_Y );
+	 FETCH( &r[1], 1, CHAN_Y);
+	 micro_mul( &r[0], &r[0], &r[1] );
+	 STORE( &r[0], 0, CHAN_Y );
+      }
+
+      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+	 FETCH( &r[0], 0, CHAN_Z );
+	 STORE( &r[0], 0, CHAN_Z );
+      }
+
+      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
+	 FETCH( &r[0], 1, CHAN_W );
+	 STORE( &r[0], 0, CHAN_W );
+      }
+      break;
+
+   case TGSI_OPCODE_MIN:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH(&r[0], 0, chan_index);
+         FETCH(&r[1], 1, chan_index);
+
+         /* XXX use micro_min()?? */
+         micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] );
+
+         STORE(&r[0], 0, chan_index);
+      }
+      break;
+
+   case TGSI_OPCODE_MAX:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH(&r[0], 0, chan_index);
+         FETCH(&r[1], 1, chan_index);
+
+         /* XXX use micro_max()?? */
+         micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] );
+
+         STORE(&r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_SLT:
+   /* TGSI_OPCODE_SETLT */
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         FETCH( &r[1], 1, chan_index );
+         micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_SGE:
+   /* TGSI_OPCODE_SETGE */
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         FETCH( &r[1], 1, chan_index );
+         micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_MAD:
+   /* TGSI_OPCODE_MADD */
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         FETCH( &r[1], 1, chan_index );
+         micro_mul( &r[0], &r[0], &r[1] );
+         FETCH( &r[1], 2, chan_index );
+         micro_add( &r[0], &r[0], &r[1] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_SUB:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH(&r[0], 0, chan_index);
+         FETCH(&r[1], 1, chan_index);
+
+         micro_sub( &r[0], &r[0], &r[1] );
+
+         STORE(&r[0], 0, chan_index);
+      }
+      break;
+
+   case TGSI_OPCODE_LERP:
+   /* TGSI_OPCODE_LRP */
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH(&r[0], 0, chan_index);
+         FETCH(&r[1], 1, chan_index);
+         FETCH(&r[2], 2, chan_index);
+
+         micro_sub( &r[1], &r[1], &r[2] );
+         micro_mul( &r[0], &r[0], &r[1] );
+         micro_add( &r[0], &r[0], &r[2] );
+
+         STORE(&r[0], 0, chan_index);
+      }
+      break;
+
+   case TGSI_OPCODE_CND:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_CND0:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_DOT2ADD:
+      /* TGSI_OPCODE_DP2A */
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_INDEX:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_NEGATE:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_FRAC:
+   /* TGSI_OPCODE_FRC */
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         micro_frc( &r[0], &r[0] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_CLAMP:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_FLOOR:
+   /* TGSI_OPCODE_FLR */
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         micro_flr( &r[0], &r[0] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_ROUND:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         micro_rnd( &r[0], &r[0] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_EXPBASE2:
+    /* TGSI_OPCODE_EX2 */
+      FETCH(&r[0], 0, CHAN_X);
+
+      micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
+
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+	 STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_LOGBASE2:
+   /* TGSI_OPCODE_LG2 */
+      FETCH( &r[0], 0, CHAN_X );
+      micro_lg2( &r[0], &r[0] );
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_POWER:
+      /* TGSI_OPCODE_POW */
+      FETCH(&r[0], 0, CHAN_X);
+      FETCH(&r[1], 1, CHAN_X);
+
+      micro_pow( &r[0], &r[0], &r[1] );
+
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+	 STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_CROSSPRODUCT:
+      /* TGSI_OPCODE_XPD */
+      FETCH(&r[0], 0, CHAN_Y);
+      FETCH(&r[1], 1, CHAN_Z);
+
+      micro_mul( &r[2], &r[0], &r[1] );
+
+      FETCH(&r[3], 0, CHAN_Z);
+      FETCH(&r[4], 1, CHAN_Y);
+
+      micro_mul( &r[5], &r[3], &r[4] );
+      micro_sub( &r[2], &r[2], &r[5] );
+
+      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
+         STORE( &r[2], 0, CHAN_X );
+      }
+
+      FETCH(&r[2], 1, CHAN_X);
+
+      micro_mul( &r[3], &r[3], &r[2] );
+
+      FETCH(&r[5], 0, CHAN_X);
+
+      micro_mul( &r[1], &r[1], &r[5] );
+      micro_sub( &r[3], &r[3], &r[1] );
+
+      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
+         STORE( &r[3], 0, CHAN_Y );
+      }
+
+      micro_mul( &r[5], &r[5], &r[4] );
+      micro_mul( &r[0], &r[0], &r[2] );
+      micro_sub( &r[5], &r[5], &r[0] );
+
+      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+         STORE( &r[5], 0, CHAN_Z );
+      }
+
+      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
+         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
+      }
+      break;
+
+    case TGSI_OPCODE_MULTIPLYMATRIX:
+       assert (0);
+       break;
+
+    case TGSI_OPCODE_ABS:
+       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+          FETCH(&r[0], 0, chan_index);
+
+          micro_abs( &r[0], &r[0] );
+
+          STORE(&r[0], 0, chan_index);
+       }
+       break;
+
+   case TGSI_OPCODE_RCC:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_DPH:
+      FETCH(&r[0], 0, CHAN_X);
+      FETCH(&r[1], 1, CHAN_X);
+
+      micro_mul( &r[0], &r[0], &r[1] );
+
+      FETCH(&r[1], 0, CHAN_Y);
+      FETCH(&r[2], 1, CHAN_Y);
+
+      micro_mul( &r[1], &r[1], &r[2] );
+      micro_add( &r[0], &r[0], &r[1] );
+
+      FETCH(&r[1], 0, CHAN_Z);
+      FETCH(&r[2], 1, CHAN_Z);
+
+      micro_mul( &r[1], &r[1], &r[2] );
+      micro_add( &r[0], &r[0], &r[1] );
+
+      FETCH(&r[1], 1, CHAN_W);
+
+      micro_add( &r[0], &r[0], &r[1] );
+
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+	 STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_COS:
+      FETCH(&r[0], 0, CHAN_X);
+
+      micro_cos( &r[0], &r[0] );
+
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+	 STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_DDX:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         micro_ddx( &r[0], &r[0] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_DDY:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         micro_ddy( &r[0], &r[0] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_KILP:
+      exec_kilp (mach, inst);
+      break;
+
+   case TGSI_OPCODE_KIL:
+      /* for enabled ExecMask bits, set the killed bit */
+      mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= mach->ExecMask;
+      break;
+
+   case TGSI_OPCODE_PK2H:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_PK2US:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_PK4B:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_PK4UB:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_RFL:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_SEQ:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         FETCH( &r[1], 1, chan_index );
+         micro_eq( &r[0], &r[0], &r[1],
+                   &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C],
+                   &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_SFL:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_SGT:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         FETCH( &r[1], 1, chan_index );
+         micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_SIN:
+      FETCH( &r[0], 0, CHAN_X );
+      micro_sin( &r[0], &r[0] );
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_SLE:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         FETCH( &r[1], 1, chan_index );
+         micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_SNE:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         FETCH( &r[1], 1, chan_index );
+         micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_STR:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_TEX:
+      /* simple texture lookup */
+      /* src[0] = texcoord */
+      /* src[1] = sampler unit */
+      exec_tex(mach, inst, FALSE);
+      break;
+
+   case TGSI_OPCODE_TXB:
+      /* Texture lookup with lod bias */
+      /* src[0] = texcoord (src[0].w = LOD bias) */
+      /* src[1] = sampler unit */
+      exec_tex(mach, inst, TRUE);
+      break;
+
+   case TGSI_OPCODE_TXD:
+      /* Texture lookup with explict partial derivatives */
+      /* src[0] = texcoord */
+      /* src[1] = d[strq]/dx */
+      /* src[2] = d[strq]/dy */
+      /* src[3] = sampler unit */
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_TXL:
+      /* Texture lookup with explit LOD */
+      /* src[0] = texcoord (src[0].w = LOD) */
+      /* src[1] = sampler unit */
+      exec_tex(mach, inst, TRUE);
+      break;
+
+   case TGSI_OPCODE_UP2H:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_UP2US:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_UP4B:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_UP4UB:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_X2D:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_ARA:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_ARR:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_BRA:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_CAL:
+      /* skip the call if no execution channels are enabled */
+      if (mach->ExecMask) {
+         /* do the call */
+
+         /* push the Cond, Loop, Cont stacks */
+         assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
+         mach->CondStack[mach->CondStackTop++] = mach->CondMask;
+         assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
+         mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
+         assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
+         mach->ContStack[mach->ContStackTop++] = mach->ContMask;
+
+         assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
+         mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
+
+         /* note that PC was already incremented above */
+         mach->CallStack[mach->CallStackTop++] = *pc;
+         *pc = inst->InstructionExtLabel.Label;
+      }
+      break;
+
+   case TGSI_OPCODE_RET:
+      mach->FuncMask &= ~mach->ExecMask;
+      UPDATE_EXEC_MASK(mach);
+
+      if (mach->ExecMask == 0x0) {
+         /* really return now (otherwise, keep executing */
+
+         if (mach->CallStackTop == 0) {
+            /* returning from main() */
+            *pc = -1;
+            return;
+         }
+         *pc = mach->CallStack[--mach->CallStackTop];
+
+         /* pop the Cond, Loop, Cont stacks */
+         assert(mach->CondStackTop > 0);
+         mach->CondMask = mach->CondStack[--mach->CondStackTop];
+         assert(mach->LoopStackTop > 0);
+         mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
+         assert(mach->ContStackTop > 0);
+         mach->ContMask = mach->ContStack[--mach->ContStackTop];
+         assert(mach->FuncStackTop > 0);
+         mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
+
+         UPDATE_EXEC_MASK(mach);
+      }
+      break;
+
+   case TGSI_OPCODE_SSG:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_CMP:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH(&r[0], 0, chan_index);
+         FETCH(&r[1], 1, chan_index);
+         FETCH(&r[2], 2, chan_index);
+
+         micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] );
+
+         STORE(&r[0], 0, chan_index);
+      }
+      break;
+
+   case TGSI_OPCODE_SCS:
+      if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
+         FETCH( &r[0], 0, CHAN_X );
+      }
+      if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
+         micro_cos( &r[1], &r[0] );
+         STORE( &r[1], 0, CHAN_X );
+      }
+      if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
+         micro_sin( &r[1], &r[0] );
+         STORE( &r[1], 0, CHAN_Y );
+      }
+      if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
+         STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
+      }
+      if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
+         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
+      }
+      break;
+
+   case TGSI_OPCODE_NRM:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_DIV:
+      assert( 0 );
+      break;
+
+   case TGSI_OPCODE_DP2:
+      FETCH( &r[0], 0, CHAN_X );
+      FETCH( &r[1], 1, CHAN_X );
+      micro_mul( &r[0], &r[0], &r[1] );
+
+      FETCH( &r[1], 0, CHAN_Y );
+      FETCH( &r[2], 1, CHAN_Y );
+      micro_mul( &r[1], &r[1], &r[2] );
+      micro_add( &r[0], &r[0], &r[1] );
+
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_IF:
+      /* push CondMask */
+      assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
+      mach->CondStack[mach->CondStackTop++] = mach->CondMask;
+      FETCH( &r[0], 0, CHAN_X );
+      /* update CondMask */
+      if( ! r[0].u[0] ) {
+         mach->CondMask &= ~0x1;
+      }
+      if( ! r[0].u[1] ) {
+         mach->CondMask &= ~0x2;
+      }
+      if( ! r[0].u[2] ) {
+         mach->CondMask &= ~0x4;
+      }
+      if( ! r[0].u[3] ) {
+         mach->CondMask &= ~0x8;
+      }
+      UPDATE_EXEC_MASK(mach);
+      /* Todo: If CondMask==0, jump to ELSE */
+      break;
+
+   case TGSI_OPCODE_ELSE:
+      /* invert CondMask wrt previous mask */
+      {
+         uint prevMask;
+         assert(mach->CondStackTop > 0);
+         prevMask = mach->CondStack[mach->CondStackTop - 1];
+         mach->CondMask = ~mach->CondMask & prevMask;
+         UPDATE_EXEC_MASK(mach);
+         /* Todo: If CondMask==0, jump to ENDIF */
+      }
+      break;
+
+   case TGSI_OPCODE_ENDIF:
+      /* pop CondMask */
+      assert(mach->CondStackTop > 0);
+      mach->CondMask = mach->CondStack[--mach->CondStackTop];
+      UPDATE_EXEC_MASK(mach);
+      break;
+
+   case TGSI_OPCODE_END:
+      /* halt execution */
+      *pc = -1;
+      break;
+
+   case TGSI_OPCODE_REP:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_ENDREP:
+       assert (0);
+       break;
+
+   case TGSI_OPCODE_PUSHA:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_POPA:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_CEIL:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         micro_ceil( &r[0], &r[0] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_I2F:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         micro_i2f( &r[0], &r[0] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_NOT:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         micro_not( &r[0], &r[0] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_TRUNC:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         micro_trunc( &r[0], &r[0] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_SHL:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         FETCH( &r[1], 1, chan_index );
+         micro_shl( &r[0], &r[0], &r[1] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_SHR:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         FETCH( &r[1], 1, chan_index );
+         micro_ishr( &r[0], &r[0], &r[1] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_AND:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         FETCH( &r[1], 1, chan_index );
+         micro_and( &r[0], &r[0], &r[1] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_OR:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         FETCH( &r[1], 1, chan_index );
+         micro_or( &r[0], &r[0], &r[1] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_MOD:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_XOR:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         FETCH( &r[1], 1, chan_index );
+         micro_xor( &r[0], &r[0], &r[1] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_SAD:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_TXF:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_TXQ:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_EMIT:
+      mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
+      mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
+      break;
+
+   case TGSI_OPCODE_ENDPRIM:
+      mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
+      mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
+      break;
+
+   case TGSI_OPCODE_LOOP:
+      /* fall-through (for now) */
+   case TGSI_OPCODE_BGNLOOP2:
+      /* push LoopMask and ContMasks */
+      assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
+      mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
+      assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
+      mach->ContStack[mach->ContStackTop++] = mach->ContMask;
+      break;
+
+   case TGSI_OPCODE_ENDLOOP:
+      /* fall-through (for now at least) */
+   case TGSI_OPCODE_ENDLOOP2:
+      /* Restore ContMask, but don't pop */
+      assert(mach->ContStackTop > 0);
+      mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
+      if (mach->LoopMask) {
+         /* repeat loop: jump to instruction just past BGNLOOP */
+         *pc = inst->InstructionExtLabel.Label + 1;
+      }
+      else {
+         /* exit loop: pop LoopMask */
+         assert(mach->LoopStackTop > 0);
+         mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
+         /* pop ContMask */
+         assert(mach->ContStackTop > 0);
+         mach->ContMask = mach->ContStack[--mach->ContStackTop];
+      }
+      UPDATE_EXEC_MASK(mach);
+      break;
+
+   case TGSI_OPCODE_BRK:
+      /* turn off loop channels for each enabled exec channel */
+      mach->LoopMask &= ~mach->ExecMask;
+      /* Todo: if mach->LoopMask == 0, jump to end of loop */
+      UPDATE_EXEC_MASK(mach);
+      break;
+
+   case TGSI_OPCODE_CONT:
+      /* turn off cont channels for each enabled exec channel */
+      mach->ContMask &= ~mach->ExecMask;
+      /* Todo: if mach->LoopMask == 0, jump to end of loop */
+      UPDATE_EXEC_MASK(mach);
+      break;
+
+   case TGSI_OPCODE_BGNSUB:
+      /* no-op */
+      break;
+
+   case TGSI_OPCODE_ENDSUB:
+      /* no-op */
+      break;
+
+   case TGSI_OPCODE_NOISE1:
+      assert( 0 );
+      break;
+
+   case TGSI_OPCODE_NOISE2:
+      assert( 0 );
+      break;
+
+   case TGSI_OPCODE_NOISE3:
+      assert( 0 );
+      break;
+
+   case TGSI_OPCODE_NOISE4:
+      assert( 0 );
+      break;
+
+   case TGSI_OPCODE_NOP:
+      break;
+
+   default:
+      assert( 0 );
+   }
+}
+
+
+/**
+ * Run TGSI interpreter.
+ * \return bitmask of "alive" quad components
+ */
+uint
+tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
+{
+   uint i;
+   int pc = 0;
+
+   mach->CondMask = 0xf;
+   mach->LoopMask = 0xf;
+   mach->ContMask = 0xf;
+   mach->FuncMask = 0xf;
+   mach->ExecMask = 0xf;
+
+   mach->CondStackTop = 0; /* temporarily subvert this assertion */
+   assert(mach->CondStackTop == 0);
+   assert(mach->LoopStackTop == 0);
+   assert(mach->ContStackTop == 0);
+   assert(mach->CallStackTop == 0);
+
+   mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
+   mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
+
+   if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
+      mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
+      mach->Primitives[0] = 0;
+   }
+
+
+   /* execute declarations (interpolants) */
+   for (i = 0; i < mach->NumDeclarations; i++) {
+      exec_declaration( mach, mach->Declarations+i );
+   }
+
+   /* execute instructions, until pc is set to -1 */
+   while (pc != -1) {
+      assert(pc < mach->NumInstructions);
+      exec_instruction( mach, mach->Instructions + pc, &pc );
+   }
+
+#if 0
+   /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
+   if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
+      /*
+       * Scale back depth component.
+       */
+      for (i = 0; i < 4; i++)
+         mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
+   }
+#endif
+
+   return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
+}
+
+
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h
new file mode 100644
index 00000000000..1fb66ee960f
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h
@@ -0,0 +1,239 @@
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#if !defined TGSI_EXEC_H
+#define TGSI_EXEC_H
+
+#include "pipe/p_compiler.h"
+
+#if defined __cplusplus
+extern "C" {
+#endif
+
+#define NUM_CHANNELS 4  /* R,G,B,A */
+#define QUAD_SIZE    4  /* 4 pixel/quad */
+
+/**
+  * Registers may be treated as float, signed int or unsigned int.
+  */
+union tgsi_exec_channel
+{
+   float    f[QUAD_SIZE];
+   int      i[QUAD_SIZE];
+   unsigned u[QUAD_SIZE];
+};
+
+/**
+  * A vector[RGBA] of channels[4 pixels]
+  */
+struct tgsi_exec_vector
+{
+   union tgsi_exec_channel xyzw[NUM_CHANNELS];
+};
+
+/**
+ * For fragment programs, information for computing fragment input
+ * values from plane equation of the triangle/line.
+ */
+struct tgsi_interp_coef
+{
+   float a0[NUM_CHANNELS];	/* in an xyzw layout */
+   float dadx[NUM_CHANNELS];
+   float dady[NUM_CHANNELS];
+};
+
+
+struct softpipe_tile_cache;  /**< Opaque to TGSI */
+
+/**
+ * Information for sampling textures, which must be implemented
+ * by code outside the TGSI executor.
+ */
+struct tgsi_sampler
+{
+   const struct pipe_sampler_state *state;
+   struct pipe_texture *texture;
+   /** Get samples for four fragments in a quad */
+   void (*get_samples)(struct tgsi_sampler *sampler,
+                       const float s[QUAD_SIZE],
+                       const float t[QUAD_SIZE],
+                       const float p[QUAD_SIZE],
+                       float lodbias,
+                       float rgba[NUM_CHANNELS][QUAD_SIZE]);
+   void *pipe; /*XXX temporary*/
+   struct softpipe_tile_cache *cache;
+};
+
+/**
+ * For branching/calling subroutines.
+ */
+struct tgsi_exec_labels
+{
+   unsigned labels[128][2];
+   unsigned count;
+};
+
+/*
+ * Locations of various utility registers (_I = Index, _C = Channel)
+ */
+#define TGSI_EXEC_TEMP_00000000_I   32
+#define TGSI_EXEC_TEMP_00000000_C   0
+
+#define TGSI_EXEC_TEMP_7FFFFFFF_I   32
+#define TGSI_EXEC_TEMP_7FFFFFFF_C   1
+
+#define TGSI_EXEC_TEMP_80000000_I   32
+#define TGSI_EXEC_TEMP_80000000_C   2
+
+#define TGSI_EXEC_TEMP_FFFFFFFF_I   32
+#define TGSI_EXEC_TEMP_FFFFFFFF_C   3
+
+#define TGSI_EXEC_TEMP_ONE_I        33
+#define TGSI_EXEC_TEMP_ONE_C        0
+
+#define TGSI_EXEC_TEMP_TWO_I        33
+#define TGSI_EXEC_TEMP_TWO_C        1
+
+#define TGSI_EXEC_TEMP_128_I        33
+#define TGSI_EXEC_TEMP_128_C        2
+
+#define TGSI_EXEC_TEMP_MINUS_128_I  33
+#define TGSI_EXEC_TEMP_MINUS_128_C  3
+
+#define TGSI_EXEC_TEMP_KILMASK_I    34
+#define TGSI_EXEC_TEMP_KILMASK_C    0
+
+#define TGSI_EXEC_TEMP_OUTPUT_I     34
+#define TGSI_EXEC_TEMP_OUTPUT_C     1
+
+#define TGSI_EXEC_TEMP_PRIMITIVE_I  34
+#define TGSI_EXEC_TEMP_PRIMITIVE_C  2
+
+#define TGSI_EXEC_TEMP_R0           35
+
+#define TGSI_EXEC_NUM_TEMPS   (32 + 4)
+#define TGSI_EXEC_NUM_ADDRS   1
+#define TGSI_EXEC_NUM_IMMEDIATES  256
+
+#define TGSI_EXEC_MAX_COND_NESTING  10
+#define TGSI_EXEC_MAX_LOOP_NESTING  10
+#define TGSI_EXEC_MAX_CALL_NESTING  10
+
+/**
+ * Run-time virtual machine state for executing TGSI shader.
+ */
+struct tgsi_exec_machine
+{
+   /*
+    * 32 program temporaries
+    * 4  internal temporaries
+    * 1  address
+    * 1  temporary of padding to align to 16 bytes
+    */
+   struct tgsi_exec_vector       _Temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_ADDRS + 1];
+
+   /*
+    * This will point to _Temps after aligning to 16B boundary.
+    */
+   struct tgsi_exec_vector       *Temps;
+   struct tgsi_exec_vector       *Addrs;
+
+   struct tgsi_sampler           *Samplers;
+
+   float                         Imms[TGSI_EXEC_NUM_IMMEDIATES][4];
+   unsigned                      ImmLimit;
+   float                         (*Consts)[4];
+   struct tgsi_exec_vector       *Inputs;
+   struct tgsi_exec_vector       *Outputs;
+   const struct tgsi_token       *Tokens;
+   unsigned                      Processor;
+
+   /* GEOMETRY processor only. */
+   unsigned                      *Primitives;
+
+   /* FRAGMENT processor only. */
+   const struct tgsi_interp_coef *InterpCoefs;
+   struct tgsi_exec_vector       QuadPos;
+
+   /* Conditional execution masks */
+   uint CondMask;  /**< For IF/ELSE/ENDIF */
+   uint LoopMask;  /**< For BGNLOOP/ENDLOOP */
+   uint ContMask;  /**< For loop CONT statements */
+   uint FuncMask;  /**< For function calls */
+   uint ExecMask;  /**< = CondMask & LoopMask */
+
+   /** Condition mask stack (for nested conditionals) */
+   uint CondStack[TGSI_EXEC_MAX_COND_NESTING];
+   int CondStackTop;
+
+   /** Loop mask stack (for nested loops) */
+   uint LoopStack[TGSI_EXEC_MAX_LOOP_NESTING];
+   int LoopStackTop;
+
+   /** Loop continue mask stack (see comments in tgsi_exec.c) */
+   uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING];
+   int ContStackTop;
+
+   /** Function execution mask stack (for executing subroutine code) */
+   uint FuncStack[TGSI_EXEC_MAX_CALL_NESTING];
+   int FuncStackTop;
+
+   /** Function call stack for saving/restoring the program counter */
+   uint CallStack[TGSI_EXEC_MAX_CALL_NESTING];
+   int CallStackTop;
+
+   struct tgsi_full_instruction *Instructions;
+   uint NumInstructions;
+
+   struct tgsi_full_declaration *Declarations;
+   uint NumDeclarations;
+
+   struct tgsi_exec_labels Labels;
+};
+
+
+void
+tgsi_exec_machine_init(
+   struct tgsi_exec_machine *mach,
+   const struct tgsi_token *tokens,
+   unsigned numSamplers,
+   struct tgsi_sampler *samplers);
+
+uint
+tgsi_exec_machine_run(
+   struct tgsi_exec_machine *mach );
+
+
+void
+tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach);
+
+
+#if defined __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* TGSI_EXEC_H */
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
new file mode 100755
index 00000000000..593464db3ed
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
@@ -0,0 +1,2378 @@
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/util/tgsi_parse.h"
+#include "tgsi/util/tgsi_util.h"
+#include "tgsi_exec.h"
+#include "tgsi_sse2.h"
+
+#include "x86/rtasm/x86sse.h"
+
+#if defined(__i386__) || defined(__386__)
+
+#define DUMP_SSE  0
+
+#if DUMP_SSE
+
+static void
+_print_reg(
+   struct x86_reg reg )
+{
+   if (reg.mod != mod_REG) 
+      debug_printf( "[" );
+      
+   switch( reg.file ) {
+   case file_REG32:
+      switch( reg.idx ) {
+      case reg_AX:
+         debug_printf( "EAX" );
+         break;
+      case reg_CX:
+         debug_printf( "ECX" );
+         break;
+      case reg_DX:
+         debug_printf( "EDX" );
+         break;
+      case reg_BX:
+         debug_printf( "EBX" );
+         break;
+      case reg_SP:
+         debug_printf( "ESP" );
+         break;
+      case reg_BP:
+         debug_printf( "EBP" );
+         break;
+      case reg_SI:
+         debug_printf( "ESI" );
+         break;
+      case reg_DI:
+         debug_printf( "EDI" );
+         break;
+      }
+      break;
+   case file_MMX:
+      assert( 0 );
+      break;
+   case file_XMM:
+      debug_printf( "XMM%u", reg.idx );
+      break;
+   case file_x87:
+      assert( 0 );
+      break;
+   }
+
+   if (reg.mod == mod_DISP8 ||
+       reg.mod == mod_DISP32)
+      debug_printf("+%d", reg.disp);
+
+   if (reg.mod != mod_REG) 
+      debug_printf( "]" );
+}
+
+static void
+_fill(
+   const char  *op )
+{
+   unsigned count = 10 - strlen( op );
+
+   while( count-- ) {
+      debug_printf( " " );
+   }
+}
+
+#define DUMP_START() debug_printf( "\nsse-dump start ----------------" )
+#define DUMP_END() debug_printf( "\nsse-dump end ----------------\n" )
+#define DUMP( OP ) debug_printf( "\n%s", OP )
+#define DUMP_I( OP, I ) do {\
+   debug_printf( "\n%s", OP );\
+   _fill( OP );\
+   debug_printf( "%u", I ); } while( 0 )
+#define DUMP_R( OP, R0 ) do {\
+   debug_printf( "\n%s", OP );\
+   _fill( OP );\
+   _print_reg( R0 ); } while( 0 )
+#define DUMP_RR( OP, R0, R1 ) do {\
+   debug_printf( "\n%s", OP );\
+   _fill( OP );\
+   _print_reg( R0 );\
+   debug_printf( ", " );\
+   _print_reg( R1 ); } while( 0 )
+#define DUMP_RRI( OP, R0, R1, I ) do {\
+   debug_printf( "\n%s", OP );\
+   _fill( OP );\
+   _print_reg( R0 );\
+   debug_printf( ", " );\
+   _print_reg( R1 );\
+   debug_printf( ", " );\
+   debug_printf( "%u", I ); } while( 0 )
+
+#else
+
+#define DUMP_START()
+#define DUMP_END()
+#define DUMP( OP )
+#define DUMP_I( OP, I )
+#define DUMP_R( OP, R0 )
+#define DUMP_RR( OP, R0, R1 )
+#define DUMP_RRI( OP, R0, R1, I )
+
+#endif
+
+#define FOR_EACH_CHANNEL( CHAN )\
+   for( CHAN = 0; CHAN < 4; CHAN++ )
+
+#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
+   ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
+
+#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
+   if( IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
+
+#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
+   FOR_EACH_CHANNEL( CHAN )\
+      IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
+
+#define CHAN_X 0
+#define CHAN_Y 1
+#define CHAN_Z 2
+#define CHAN_W 3
+
+#define TEMP_R0   TGSI_EXEC_TEMP_R0
+
+/**
+ * X86 utility functions.
+ */
+
+static struct x86_reg
+make_xmm(
+   unsigned xmm )
+{
+   return x86_make_reg(
+      file_XMM,
+      (enum x86_reg_name) xmm );
+}
+
+/**
+ * X86 register mapping helpers.
+ */
+
+static struct x86_reg
+get_const_base( void )
+{
+   return x86_make_reg(
+      file_REG32,
+      reg_CX );
+}
+
+static struct x86_reg
+get_input_base( void )
+{
+   return x86_make_reg(
+      file_REG32,
+      reg_AX );
+}
+
+static struct x86_reg
+get_output_base( void )
+{
+   return x86_make_reg(
+      file_REG32,
+      reg_DX );
+}
+
+static struct x86_reg
+get_temp_base( void )
+{
+#ifdef WIN32
+   return x86_make_reg(
+      file_REG32,
+      reg_BX );
+#else
+   return x86_make_reg(
+      file_REG32,
+      reg_SI );
+#endif
+}
+
+static struct x86_reg
+get_coef_base( void )
+{
+   return get_output_base();
+}
+
+/**
+ * Data access helpers.
+ */
+
+static struct x86_reg
+get_argument(
+   unsigned index )
+{
+   return x86_make_disp(
+      x86_make_reg( file_REG32, reg_SP ),
+      (index + 1) * 4 );
+}
+
+static struct x86_reg
+get_const(
+   unsigned vec,
+   unsigned chan )
+{
+   return x86_make_disp(
+      get_const_base(),
+      (vec * 4 + chan) * 4 );
+}
+
+static struct x86_reg
+get_input(
+   unsigned vec,
+   unsigned chan )
+{
+   return x86_make_disp(
+      get_input_base(),
+      (vec * 4 + chan) * 16 );
+}
+
+static struct x86_reg
+get_output(
+   unsigned vec,
+   unsigned chan )
+{
+   return x86_make_disp(
+      get_output_base(),
+      (vec * 4 + chan) * 16 );
+}
+
+static struct x86_reg
+get_temp(
+   unsigned vec,
+   unsigned chan )
+{
+   return x86_make_disp(
+      get_temp_base(),
+      (vec * 4 + chan) * 16 );
+}
+
+static struct x86_reg
+get_coef(
+   unsigned vec,
+   unsigned chan,
+   unsigned member )
+{
+   return x86_make_disp(
+      get_coef_base(),
+      ((vec * 3 + member) * 4 + chan) * 4 );
+}
+
+/**
+ * X86 rtasm wrappers.
+ */
+
+static void
+emit_addps(
+   struct x86_function  *func,
+   struct x86_reg       dst,
+   struct x86_reg       src )
+{
+   DUMP_RR( "ADDPS", dst, src );
+   sse_addps( func, dst, src );
+}
+
+static void
+emit_andnps(
+   struct x86_function  *func,
+   struct x86_reg       dst,
+   struct x86_reg       src )
+{
+   DUMP_RR( "ANDNPS", dst, src );
+   sse_andnps( func, dst, src );
+}
+
+static void
+emit_andps(
+   struct x86_function  *func,
+   struct x86_reg       dst,
+   struct x86_reg       src )
+{
+   DUMP_RR( "ANDPS", dst, src );
+   sse_andps( func, dst, src );
+}
+
+static void
+emit_call(
+   struct x86_function  *func,
+   void                 (* addr)() )
+{
+   struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX );
+
+   DUMP_I( "CALL", addr );
+   x86_mov_reg_imm( func, ecx, (unsigned long) addr );
+   x86_call( func, ecx );
+}
+
+static void
+emit_cmpps(
+   struct x86_function  *func,
+   struct x86_reg       dst,
+   struct x86_reg       src,
+   enum sse_cc          cc )
+{
+   DUMP_RRI( "CMPPS", dst, src, cc );
+   sse_cmpps( func, dst, src, cc );
+}
+
+static void
+emit_cvttps2dq(
+   struct x86_function  *func,
+   struct x86_reg       dst,
+   struct x86_reg       src )
+{
+   DUMP_RR( "CVTTPS2DQ", dst, src );
+   sse2_cvttps2dq( func, dst, src );
+}
+
+static void
+emit_maxps(
+   struct x86_function  *func,
+   struct x86_reg       dst,
+   struct x86_reg       src )
+{
+   DUMP_RR( "MAXPS", dst, src );
+   sse_maxps( func, dst, src );
+}
+
+static void
+emit_minps(
+   struct x86_function  *func,
+   struct x86_reg       dst,
+   struct x86_reg       src )
+{
+   DUMP_RR( "MINPS", dst, src );
+   sse_minps( func, dst, src );
+}
+
+static void
+emit_mov(
+   struct x86_function  *func,
+   struct x86_reg       dst,
+   struct x86_reg       src )
+{
+   DUMP_RR( "MOV", dst, src );
+   x86_mov( func, dst, src );
+}
+
+static void
+emit_movaps(
+   struct x86_function  *func,
+   struct x86_reg       dst,
+   struct x86_reg       src )
+{
+   DUMP_RR( "MOVAPS", dst, src );
+   sse_movaps( func, dst, src );
+}
+
+static void
+emit_movss(
+   struct x86_function  *func,
+   struct x86_reg       dst,
+   struct x86_reg       src )
+{
+   DUMP_RR( "MOVSS", dst, src );
+   sse_movss( func, dst, src );
+}
+
+static void
+emit_movups(
+   struct x86_function  *func,
+   struct x86_reg       dst,
+   struct x86_reg       src )
+{
+   DUMP_RR( "MOVUPS", dst, src );
+   sse_movups( func, dst, src );
+}
+
+static void
+emit_mulps(
+   struct x86_function  *func,
+   struct x86_reg       dst,
+   struct x86_reg       src )
+{
+   DUMP_RR( "MULPS", dst, src );
+   sse_mulps( func, dst, src );
+}
+
+static void
+emit_or(
+   struct x86_function  *func,
+   struct x86_reg       dst,
+   struct x86_reg       src )
+{
+   DUMP_RR( "OR", dst, src );
+   x86_or( func, dst, src );
+}
+
+static void
+emit_orps(
+   struct x86_function  *func,
+   struct x86_reg       dst,
+   struct x86_reg       src )
+{
+   DUMP_RR( "ORPS", dst, src );
+   sse_orps( func, dst, src );
+}
+
+static void
+emit_pmovmskb(
+   struct x86_function  *func,
+   struct x86_reg       dst,
+   struct x86_reg       src )
+{
+   DUMP_RR( "PMOVMSKB", dst, src );
+   sse_pmovmskb( func, dst, src );
+}
+
+static void
+emit_pop(
+   struct x86_function  *func,
+   struct x86_reg       dst )
+{
+   DUMP_R( "POP", dst );
+   x86_pop( func, dst );
+}
+
+static void
+emit_push(
+   struct x86_function  *func,
+   struct x86_reg       dst )
+{
+   DUMP_R( "PUSH", dst );
+   x86_push( func, dst );
+}
+
+static void
+emit_rcpps(
+   struct x86_function  *func,
+   struct x86_reg       dst,
+   struct x86_reg       src )
+{
+   DUMP_RR( "RCPPS", dst, src );
+   sse2_rcpps( func, dst, src );
+}
+
+#ifdef WIN32
+static void
+emit_retw(
+   struct x86_function  *func,
+   unsigned             size )
+{
+   DUMP_I( "RET", size );
+   x86_retw( func, size );
+}
+#else
+static void
+emit_ret(
+   struct x86_function  *func )
+{
+   DUMP( "RET" );
+   x86_ret( func );
+}
+#endif
+
+static void
+emit_rsqrtps(
+   struct x86_function  *func,
+   struct x86_reg       dst,
+   struct x86_reg       src )
+{
+   DUMP_RR( "RSQRTPS", dst, src );
+   sse_rsqrtps( func, dst, src );
+}
+
+static void
+emit_shufps(
+   struct x86_function  *func,
+   struct x86_reg       dst,
+   struct x86_reg       src,
+   unsigned char        shuf )
+{
+   DUMP_RRI( "SHUFPS", dst, src, shuf );
+   sse_shufps( func, dst, src, shuf );
+}
+
+static void
+emit_subps(
+   struct x86_function  *func,
+   struct x86_reg       dst,
+   struct x86_reg       src )
+{
+   DUMP_RR( "SUBPS", dst, src );
+   sse_subps( func, dst, src );
+}
+
+static void
+emit_xorps(
+   struct x86_function  *func,
+   struct x86_reg       dst,
+   struct x86_reg       src )
+{
+   DUMP_RR( "XORPS", dst, src );
+   sse_xorps( func, dst, src );
+}
+
+/**
+ * Data fetch helpers.
+ */
+
+static void
+emit_const(
+   struct x86_function *func,
+   unsigned xmm,
+   unsigned vec,
+   unsigned chan )
+{
+   emit_movss(
+      func,
+      make_xmm( xmm ),
+      get_const( vec, chan ) );
+   emit_shufps(
+      func,
+      make_xmm( xmm ),
+      make_xmm( xmm ),
+      SHUF( 0, 0, 0, 0 ) );
+}
+
+static void
+emit_inputf(
+   struct x86_function *func,
+   unsigned xmm,
+   unsigned vec,
+   unsigned chan )
+{
+   emit_movups(
+      func,
+      make_xmm( xmm ),
+      get_input( vec, chan ) );
+}
+
+static void
+emit_output(
+   struct x86_function *func,
+   unsigned xmm,
+   unsigned vec,
+   unsigned chan )
+{
+   emit_movups(
+      func,
+      get_output( vec, chan ),
+      make_xmm( xmm ) );
+}
+
+static void
+emit_tempf(
+   struct x86_function *func,
+   unsigned xmm,
+   unsigned vec,
+   unsigned chan )
+{
+   emit_movaps(
+      func,
+      make_xmm( xmm ),
+      get_temp( vec, chan ) );
+}
+
+static void
+emit_coef(
+   struct x86_function *func,
+   unsigned xmm,
+   unsigned vec,
+   unsigned chan,
+   unsigned member )
+{
+   emit_movss(
+      func,
+      make_xmm( xmm ),
+      get_coef( vec, chan, member ) );
+   emit_shufps(
+      func,
+      make_xmm( xmm ),
+      make_xmm( xmm ),
+      SHUF( 0, 0, 0, 0 ) );
+}
+
+/**
+ * Data store helpers.
+ */
+
+static void
+emit_inputs(
+   struct x86_function *func,
+   unsigned xmm,
+   unsigned vec,
+   unsigned chan )
+{
+   emit_movups(
+      func,
+      get_input( vec, chan ),
+      make_xmm( xmm ) );
+}
+
+static void
+emit_temps(
+   struct x86_function *func,
+   unsigned xmm,
+   unsigned vec,
+   unsigned chan )
+{
+   emit_movaps(
+      func,
+      get_temp( vec, chan ),
+      make_xmm( xmm ) );
+}
+
+static void
+emit_addrs(
+   struct x86_function *func,
+   unsigned xmm,
+   unsigned vec,
+   unsigned chan )
+{
+   emit_temps(
+      func,
+      xmm,
+      vec + TGSI_EXEC_NUM_TEMPS,
+      chan );
+}
+
+/**
+ * Coefficent fetch helpers.
+ */
+
+static void
+emit_coef_a0(
+   struct x86_function *func,
+   unsigned xmm,
+   unsigned vec,
+   unsigned chan )
+{
+   emit_coef(
+      func,
+      xmm,
+      vec,
+      chan,
+      0 );
+}
+
+static void
+emit_coef_dadx(
+   struct x86_function *func,
+   unsigned xmm,
+   unsigned vec,
+   unsigned chan )
+{
+   emit_coef(
+      func,
+      xmm,
+      vec,
+      chan,
+      1 );
+}
+
+static void
+emit_coef_dady(
+   struct x86_function *func,
+   unsigned xmm,
+   unsigned vec,
+   unsigned chan )
+{
+   emit_coef(
+      func,
+      xmm,
+      vec,
+      chan,
+      2 );
+}
+
+/**
+ * Function call helpers.
+ */
+
+static void
+emit_push_gp(
+   struct x86_function *func )
+{
+   emit_push(
+      func,
+      get_const_base() );
+   emit_push(
+      func,
+      get_input_base() );
+   emit_push(
+      func,
+      get_output_base() );
+
+   /* It is important on non-win32 platforms that temp base is pushed last.
+    */
+   emit_push(
+      func,
+      get_temp_base() );
+}
+
+static void
+emit_pop_gp(
+   struct x86_function *func )
+{
+   /* Restore GP registers in a reverse order.
+    */
+   emit_pop(
+      func,
+      get_temp_base() );
+   emit_pop(
+      func,
+      get_output_base() );
+   emit_pop(
+      func,
+      get_input_base() );
+   emit_pop(
+      func,
+      get_const_base() );
+}
+
+static void
+emit_func_call_dst(
+   struct x86_function *func,
+   unsigned xmm_dst,
+   void (*code)() )
+{
+   emit_movaps(
+      func,
+      get_temp( TEMP_R0, 0 ),
+      make_xmm( xmm_dst ) );
+
+   emit_push_gp(
+      func );
+
+#ifdef WIN32
+   emit_push(
+      func,
+      get_temp( TEMP_R0, 0 ) );
+#endif
+
+   emit_call(
+      func,
+      code );
+
+   emit_pop_gp(
+      func );
+
+   emit_movaps(
+      func,
+      make_xmm( xmm_dst ),
+      get_temp( TEMP_R0, 0 ) );
+}
+
+static void
+emit_func_call_dst_src(
+   struct x86_function *func,
+   unsigned xmm_dst,
+   unsigned xmm_src,
+   void (*code)() )
+{
+   emit_movaps(
+      func,
+      get_temp( TEMP_R0, 1 ),
+      make_xmm( xmm_src ) );
+
+   emit_func_call_dst(
+      func,
+      xmm_dst,
+      code );
+}
+
+/**
+ * Low-level instruction translators.
+ */
+
+static void
+emit_abs(
+   struct x86_function *func,
+   unsigned xmm )
+{
+   emit_andps(
+      func,
+      make_xmm( xmm ),
+      get_temp(
+         TGSI_EXEC_TEMP_7FFFFFFF_I,
+         TGSI_EXEC_TEMP_7FFFFFFF_C ) );
+}
+
+static void
+emit_add(
+   struct x86_function *func,
+   unsigned xmm_dst,
+   unsigned xmm_src )
+{
+   emit_addps(
+      func,
+      make_xmm( xmm_dst ),
+      make_xmm( xmm_src ) );
+}
+
+static void XSTDCALL
+cos4f(
+   float *store )
+{
+#ifdef WIN32
+   store[0] = (float) cos( (double) store[0] );
+   store[1] = (float) cos( (double) store[1] );
+   store[2] = (float) cos( (double) store[2] );
+   store[3] = (float) cos( (double) store[3] );
+#else
+   const unsigned X = TEMP_R0 * 16;
+   store[X + 0] = cosf( store[X + 0] );
+   store[X + 1] = cosf( store[X + 1] );
+   store[X + 2] = cosf( store[X + 2] );
+   store[X + 3] = cosf( store[X + 3] );
+#endif
+}
+
+static void
+emit_cos(
+   struct x86_function *func,
+   unsigned xmm_dst )
+{
+   emit_func_call_dst(
+      func,
+      xmm_dst,
+      cos4f );
+}
+
+static void XSTDCALL
+ex24f(
+   float *store )
+{
+#ifdef WIN32
+   store[0] = (float) pow( 2.0, (double) store[0] );
+   store[1] = (float) pow( 2.0, (double) store[1] );
+   store[2] = (float) pow( 2.0, (double) store[2] );
+   store[3] = (float) pow( 2.0, (double) store[3] );
+#else
+   const unsigned X = TEMP_R0 * 16;
+   store[X + 0] = powf( 2.0f, store[X + 0] );
+   store[X + 1] = powf( 2.0f, store[X + 1] );
+   store[X + 2] = powf( 2.0f, store[X + 2] );
+   store[X + 3] = powf( 2.0f, store[X + 3] );
+#endif
+}
+
+static void
+emit_ex2(
+   struct x86_function *func,
+   unsigned xmm_dst )
+{
+   emit_func_call_dst(
+      func,
+      xmm_dst,
+      ex24f );
+}
+
+static void
+emit_f2it(
+   struct x86_function *func,
+   unsigned xmm )
+{
+   emit_cvttps2dq(
+      func,
+      make_xmm( xmm ),
+      make_xmm( xmm ) );
+}
+
+static void XSTDCALL
+flr4f(
+   float *store )
+{
+#ifdef WIN32
+   const unsigned X = 0;
+#else
+   const unsigned X = TEMP_R0 * 16;
+#endif
+   store[X + 0] = (float) floor( (double) store[X + 0] );
+   store[X + 1] = (float) floor( (double) store[X + 1] );
+   store[X + 2] = (float) floor( (double) store[X + 2] );
+   store[X + 3] = (float) floor( (double) store[X + 3] );
+}
+
+static void
+emit_flr(
+   struct x86_function *func,
+   unsigned xmm_dst )
+{
+   emit_func_call_dst(
+      func,
+      xmm_dst,
+      flr4f );
+}
+
+static void XSTDCALL
+frc4f(
+   float *store )
+{
+#ifdef WIN32
+   const unsigned X = 0;
+#else
+   const unsigned X = TEMP_R0 * 16;
+#endif
+   store[X + 0] -= (float) floor( (double) store[X + 0] );
+   store[X + 1] -= (float) floor( (double) store[X + 1] );
+   store[X + 2] -= (float) floor( (double) store[X + 2] );
+   store[X + 3] -= (float) floor( (double) store[X + 3] );
+}
+
+static void
+emit_frc(
+   struct x86_function *func,
+   unsigned xmm_dst )
+{
+   emit_func_call_dst(
+      func,
+      xmm_dst,
+      frc4f );
+}
+
+static void XSTDCALL
+lg24f(
+   float *store )
+{
+#ifdef WIN32
+   const unsigned X = 0;
+#else
+   const unsigned X = TEMP_R0 * 16;
+#endif
+   store[X + 0] = LOG2( store[X + 0] );
+   store[X + 1] = LOG2( store[X + 1] );
+   store[X + 2] = LOG2( store[X + 2] );
+   store[X + 3] = LOG2( store[X + 3] );
+}
+
+static void
+emit_lg2(
+   struct x86_function *func,
+   unsigned xmm_dst )
+{
+   emit_func_call_dst(
+      func,
+      xmm_dst,
+      lg24f );
+}
+
+static void
+emit_MOV(
+   struct x86_function *func,
+   unsigned xmm_dst,
+   unsigned xmm_src )
+{
+   emit_movups(
+      func,
+      make_xmm( xmm_dst ),
+      make_xmm( xmm_src ) );
+}
+
+static void
+emit_mul (struct x86_function *func,
+          unsigned xmm_dst,
+          unsigned xmm_src)
+{
+   emit_mulps(
+      func,
+      make_xmm( xmm_dst ),
+      make_xmm( xmm_src ) );
+}
+
+static void
+emit_neg(
+   struct x86_function *func,
+   unsigned xmm )
+{
+   emit_xorps(
+      func,
+      make_xmm( xmm ),
+      get_temp(
+         TGSI_EXEC_TEMP_80000000_I,
+         TGSI_EXEC_TEMP_80000000_C ) );
+}
+
+static void XSTDCALL
+pow4f(
+   float *store )
+{
+#ifdef WIN32
+   store[0] = (float) pow( (double) store[0], (double) store[4] );
+   store[1] = (float) pow( (double) store[1], (double) store[5] );
+   store[2] = (float) pow( (double) store[2], (double) store[6] );
+   store[3] = (float) pow( (double) store[3], (double) store[7] );
+#else
+   const unsigned X = TEMP_R0 * 16;
+   store[X + 0] = powf( store[X + 0], store[X + 4] );
+   store[X + 1] = powf( store[X + 1], store[X + 5] );
+   store[X + 2] = powf( store[X + 2], store[X + 6] );
+   store[X + 3] = powf( store[X + 3], store[X + 7] );
+#endif
+}
+
+static void
+emit_pow(
+   struct x86_function *func,
+   unsigned xmm_dst,
+   unsigned xmm_src )
+{
+   emit_func_call_dst_src(
+      func,
+      xmm_dst,
+      xmm_src,
+      pow4f );
+}
+
+static void
+emit_rcp (
+   struct x86_function *func,
+   unsigned xmm_dst,
+   unsigned xmm_src )
+{
+   emit_rcpps(
+      func,
+      make_xmm( xmm_dst ),
+      make_xmm( xmm_src ) );
+}
+
+static void
+emit_rsqrt(
+   struct x86_function *func,
+   unsigned xmm_dst,
+   unsigned xmm_src )
+{
+   emit_rsqrtps(
+      func,
+      make_xmm( xmm_dst ),
+      make_xmm( xmm_src ) );
+}
+
+static void
+emit_setsign(
+   struct x86_function *func,
+   unsigned xmm )
+{
+   emit_orps(
+      func,
+      make_xmm( xmm ),
+      get_temp(
+         TGSI_EXEC_TEMP_80000000_I,
+         TGSI_EXEC_TEMP_80000000_C ) );
+}
+
+static void XSTDCALL
+sin4f(
+   float *store )
+{
+#ifdef WIN32
+   store[0] = (float) sin( (double) store[0] );
+   store[1] = (float) sin( (double) store[1] );
+   store[2] = (float) sin( (double) store[2] );
+   store[3] = (float) sin( (double) store[3] );
+#else
+   const unsigned X = TEMP_R0 * 16;
+   store[X + 0] = sinf( store[X + 0] );
+   store[X + 1] = sinf( store[X + 1] );
+   store[X + 2] = sinf( store[X + 2] );
+   store[X + 3] = sinf( store[X + 3] );
+#endif
+}
+
+static void
+emit_sin (struct x86_function *func,
+          unsigned xmm_dst)
+{
+   emit_func_call_dst(
+      func,
+      xmm_dst,
+      sin4f );
+}
+
+static void
+emit_sub(
+   struct x86_function *func,
+   unsigned xmm_dst,
+   unsigned xmm_src )
+{
+   emit_subps(
+      func,
+      make_xmm( xmm_dst ),
+      make_xmm( xmm_src ) );
+}
+
+/**
+ * Register fetch.
+ */
+
+static void
+emit_fetch(
+   struct x86_function *func,
+   unsigned xmm,
+   const struct tgsi_full_src_register *reg,
+   const unsigned chan_index )
+{
+   unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
+
+   switch( swizzle ) {
+   case TGSI_EXTSWIZZLE_X:
+   case TGSI_EXTSWIZZLE_Y:
+   case TGSI_EXTSWIZZLE_Z:
+   case TGSI_EXTSWIZZLE_W:
+      switch( reg->SrcRegister.File ) {
+      case TGSI_FILE_CONSTANT:
+         emit_const(
+            func,
+            xmm,
+            reg->SrcRegister.Index,
+            swizzle );
+         break;
+
+      case TGSI_FILE_INPUT:
+         emit_inputf(
+            func,
+            xmm,
+            reg->SrcRegister.Index,
+            swizzle );
+         break;
+
+      case TGSI_FILE_TEMPORARY:
+         emit_tempf(
+            func,
+            xmm,
+            reg->SrcRegister.Index,
+            swizzle );
+         break;
+
+      default:
+         assert( 0 );
+      }
+      break;
+
+   case TGSI_EXTSWIZZLE_ZERO:
+      emit_tempf(
+         func,
+         xmm,
+         TGSI_EXEC_TEMP_00000000_I,
+         TGSI_EXEC_TEMP_00000000_C );
+      break;
+
+   case TGSI_EXTSWIZZLE_ONE:
+      emit_tempf(
+         func,
+         xmm,
+         TGSI_EXEC_TEMP_ONE_I,
+         TGSI_EXEC_TEMP_ONE_C );
+      break;
+
+   default:
+      assert( 0 );
+   }
+
+   switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
+   case TGSI_UTIL_SIGN_CLEAR:
+      emit_abs( func, xmm );
+      break;
+
+   case TGSI_UTIL_SIGN_SET:
+      emit_setsign( func, xmm );
+      break;
+
+   case TGSI_UTIL_SIGN_TOGGLE:
+      emit_neg( func, xmm );
+      break;
+
+   case TGSI_UTIL_SIGN_KEEP:
+      break;
+   }
+}
+
+#define FETCH( FUNC, INST, XMM, INDEX, CHAN )\
+   emit_fetch( FUNC, XMM, &(INST).FullSrcRegisters[INDEX], CHAN )
+
+/**
+ * Register store.
+ */
+
+static void
+emit_store(
+   struct x86_function *func,
+   unsigned xmm,
+   const struct tgsi_full_dst_register *reg,
+   const struct tgsi_full_instruction *inst,
+   unsigned chan_index )
+{
+   switch( reg->DstRegister.File ) {
+   case TGSI_FILE_OUTPUT:
+      emit_output(
+         func,
+         xmm,
+         reg->DstRegister.Index,
+         chan_index );
+      break;
+
+   case TGSI_FILE_TEMPORARY:
+      emit_temps(
+         func,
+         xmm,
+         reg->DstRegister.Index,
+         chan_index );
+      break;
+
+   case TGSI_FILE_ADDRESS:
+      emit_addrs(
+         func,
+         xmm,
+         reg->DstRegister.Index,
+         chan_index );
+      break;
+
+   default:
+      assert( 0 );
+   }
+
+   switch( inst->Instruction.Saturate ) {
+   case TGSI_SAT_NONE:
+      break;
+
+   case TGSI_SAT_ZERO_ONE:
+//      assert( 0 );
+      break;
+
+   case TGSI_SAT_MINUS_PLUS_ONE:
+      assert( 0 );
+      break;
+   }
+}
+
+#define STORE( FUNC, INST, XMM, INDEX, CHAN )\
+   emit_store( FUNC, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN )
+
+/**
+ * High-level instruction translators.
+ */
+
+static void
+emit_kil(
+   struct x86_function *func,
+   const struct tgsi_full_src_register *reg )
+{
+   unsigned uniquemask;
+   unsigned registers[4];
+   unsigned nextregister = 0;
+   unsigned firstchan = ~0;
+   unsigned chan_index;
+
+   /* This mask stores component bits that were already tested. Note that
+    * we test if the value is less than zero, so 1.0 and 0.0 need not to be
+    * tested. */
+   uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
+
+   FOR_EACH_CHANNEL( chan_index ) {
+      unsigned swizzle;
+
+      /* unswizzle channel */
+      swizzle = tgsi_util_get_full_src_register_extswizzle(
+         reg,
+         chan_index );
+
+      /* check if the component has not been already tested */
+      if( !(uniquemask & (1 << swizzle)) ) {
+         uniquemask |= 1 << swizzle;
+
+         /* allocate register */
+         registers[chan_index] = nextregister;
+         emit_fetch(
+            func,
+            nextregister,
+            reg,
+            chan_index );
+         nextregister++;
+
+         /* mark the first channel used */
+         if( firstchan == ~0 ) {
+            firstchan = chan_index;
+         }
+      }
+   }
+
+   emit_push(
+      func,
+      x86_make_reg( file_REG32, reg_AX ) );
+   emit_push(
+      func,
+      x86_make_reg( file_REG32, reg_DX ) );
+
+   FOR_EACH_CHANNEL( chan_index ) {
+      if( uniquemask & (1 << chan_index) ) {
+         emit_cmpps(
+            func,
+            make_xmm( registers[chan_index] ),
+            get_temp(
+               TGSI_EXEC_TEMP_00000000_I,
+               TGSI_EXEC_TEMP_00000000_C ),
+            cc_LessThan );
+
+         if( chan_index == firstchan ) {
+            emit_pmovmskb(
+               func,
+               x86_make_reg( file_REG32, reg_AX ),
+               make_xmm( registers[chan_index] ) );
+         }
+         else {
+            emit_pmovmskb(
+               func,
+               x86_make_reg( file_REG32, reg_DX ),
+               make_xmm( registers[chan_index] ) );
+            emit_or(
+               func,
+               x86_make_reg( file_REG32, reg_AX ),
+               x86_make_reg( file_REG32, reg_DX ) );
+         }
+      }
+   }
+
+   emit_or(
+      func,
+      get_temp(
+         TGSI_EXEC_TEMP_KILMASK_I,
+         TGSI_EXEC_TEMP_KILMASK_C ),
+      x86_make_reg( file_REG32, reg_AX ) );
+
+   emit_pop(
+      func,
+      x86_make_reg( file_REG32, reg_DX ) );
+   emit_pop(
+      func,
+      x86_make_reg( file_REG32, reg_AX ) );
+}
+
+static void
+emit_setcc(
+   struct x86_function *func,
+   struct tgsi_full_instruction *inst,
+   enum sse_cc cc )
+{
+   unsigned chan_index;
+
+   FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+      FETCH( func, *inst, 0, 0, chan_index );
+      FETCH( func, *inst, 1, 1, chan_index );
+      emit_cmpps(
+         func,
+         make_xmm( 0 ),
+         make_xmm( 1 ),
+         cc );
+      emit_andps(
+         func,
+         make_xmm( 0 ),
+         get_temp(
+            TGSI_EXEC_TEMP_ONE_I,
+            TGSI_EXEC_TEMP_ONE_C ) );
+      STORE( func, *inst, 0, 0, chan_index );
+   }
+}
+
+static void
+emit_cmp(
+   struct x86_function *func,
+   struct tgsi_full_instruction *inst )
+{
+   unsigned chan_index;
+
+   FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+      FETCH( func, *inst, 0, 0, chan_index );
+      FETCH( func, *inst, 1, 1, chan_index );
+      FETCH( func, *inst, 2, 2, chan_index );
+      emit_cmpps(
+         func,
+         make_xmm( 0 ),
+         get_temp(
+            TGSI_EXEC_TEMP_00000000_I,
+            TGSI_EXEC_TEMP_00000000_C ),
+         cc_LessThan );
+      emit_andps(
+         func,
+         make_xmm( 1 ),
+         make_xmm( 0 ) );
+      emit_andnps(
+         func,
+         make_xmm( 0 ),
+         make_xmm( 2 ) );
+      emit_orps(
+         func,
+         make_xmm( 0 ),
+         make_xmm( 1 ) );
+      STORE( func, *inst, 0, 0, chan_index );
+   }
+}
+
+static int
+emit_instruction(
+   struct x86_function *func,
+   struct tgsi_full_instruction *inst )
+{
+   unsigned chan_index;
+
+   switch( inst->Instruction.Opcode ) {
+   case TGSI_OPCODE_ARL:
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         emit_f2it( func, 0 );
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_MOV:
+   /* TGSI_OPCODE_SWZ */
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_LIT:
+      if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
+          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
+         emit_tempf(
+            func,
+            0,
+            TGSI_EXEC_TEMP_ONE_I,
+            TGSI_EXEC_TEMP_ONE_C);
+         if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
+            STORE( func, *inst, 0, 0, CHAN_X );
+         }
+         if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
+            STORE( func, *inst, 0, 0, CHAN_W );
+         }
+      }
+      if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
+          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
+         if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
+            FETCH( func, *inst, 0, 0, CHAN_X );
+            emit_maxps(
+               func,
+               make_xmm( 0 ),
+               get_temp(
+                  TGSI_EXEC_TEMP_00000000_I,
+                  TGSI_EXEC_TEMP_00000000_C ) );
+            STORE( func, *inst, 0, 0, CHAN_Y );
+         }
+         if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
+            FETCH( func, *inst, 1, 0, CHAN_Y );
+            emit_maxps(
+               func,
+               make_xmm( 1 ),
+               get_temp(
+                  TGSI_EXEC_TEMP_00000000_I,
+                  TGSI_EXEC_TEMP_00000000_C ) );
+            FETCH( func, *inst, 2, 0, CHAN_W );
+            emit_minps(
+               func,
+               make_xmm( 2 ),
+               get_temp(
+                  TGSI_EXEC_TEMP_128_I,
+                  TGSI_EXEC_TEMP_128_C ) );
+            emit_maxps(
+               func,
+               make_xmm( 2 ),
+               get_temp(
+                  TGSI_EXEC_TEMP_MINUS_128_I,
+                  TGSI_EXEC_TEMP_MINUS_128_C ) );
+            emit_pow( func, 1, 2 );
+            FETCH( func, *inst, 0, 0, CHAN_X );
+            emit_xorps(
+               func,
+               make_xmm( 2 ),
+               make_xmm( 2 ) );
+            emit_cmpps(
+               func,
+               make_xmm( 2 ),
+               make_xmm( 0 ),
+               cc_LessThanEqual );
+            emit_andps(
+               func,
+               make_xmm( 2 ),
+               make_xmm( 1 ) );
+            STORE( func, *inst, 2, 0, CHAN_Z );
+         }
+      }
+      break;
+
+   case TGSI_OPCODE_RCP:
+   /* TGSI_OPCODE_RECIP */
+      FETCH( func, *inst, 0, 0, CHAN_X );
+      emit_rcp( func, 0, 0 );
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_RSQ:
+   /* TGSI_OPCODE_RECIPSQRT */
+      FETCH( func, *inst, 0, 0, CHAN_X );
+      emit_rsqrt( func, 0, 0 );
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_EXP:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_LOG:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_MUL:
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         FETCH( func, *inst, 1, 1, chan_index );
+         emit_mul( func, 0, 1 );
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_ADD:
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         FETCH( func, *inst, 1, 1, chan_index );
+         emit_add( func, 0, 1 );
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_DP3:
+   /* TGSI_OPCODE_DOT3 */
+      FETCH( func, *inst, 0, 0, CHAN_X );
+      FETCH( func, *inst, 1, 1, CHAN_X );
+      emit_mul( func, 0, 1 );
+      FETCH( func, *inst, 1, 0, CHAN_Y );
+      FETCH( func, *inst, 2, 1, CHAN_Y );
+      emit_mul( func, 1, 2 );
+      emit_add( func, 0, 1 );
+      FETCH( func, *inst, 1, 0, CHAN_Z );
+      FETCH( func, *inst, 2, 1, CHAN_Z );
+      emit_mul( func, 1, 2 );
+      emit_add( func, 0, 1 );
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_DP4:
+   /* TGSI_OPCODE_DOT4 */
+      FETCH( func, *inst, 0, 0, CHAN_X );
+      FETCH( func, *inst, 1, 1, CHAN_X );
+      emit_mul( func, 0, 1 );
+      FETCH( func, *inst, 1, 0, CHAN_Y );
+      FETCH( func, *inst, 2, 1, CHAN_Y );
+      emit_mul( func, 1, 2 );
+      emit_add( func, 0, 1 );
+      FETCH( func, *inst, 1, 0, CHAN_Z );
+      FETCH( func, *inst, 2, 1, CHAN_Z );
+      emit_mul(func, 1, 2 );
+      emit_add(func, 0, 1 );
+      FETCH( func, *inst, 1, 0, CHAN_W );
+      FETCH( func, *inst, 2, 1, CHAN_W );
+      emit_mul( func, 1, 2 );
+      emit_add( func, 0, 1 );
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_DST:
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
+         emit_tempf(
+            func,
+            0,
+            TGSI_EXEC_TEMP_ONE_I,
+            TGSI_EXEC_TEMP_ONE_C );
+         STORE( func, *inst, 0, 0, CHAN_X );
+      }
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
+         FETCH( func, *inst, 0, 0, CHAN_Y );
+         FETCH( func, *inst, 1, 1, CHAN_Y );
+         emit_mul( func, 0, 1 );
+         STORE( func, *inst, 0, 0, CHAN_Y );
+      }
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
+         FETCH( func, *inst, 0, 0, CHAN_Z );
+         STORE( func, *inst, 0, 0, CHAN_Z );
+      }
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
+         FETCH( func, *inst, 0, 1, CHAN_W );
+         STORE( func, *inst, 0, 0, CHAN_W );
+      }
+      break;
+
+   case TGSI_OPCODE_MIN:
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         FETCH( func, *inst, 1, 1, chan_index );
+         emit_minps(
+            func,
+            make_xmm( 0 ),
+            make_xmm( 1 ) );
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_MAX:
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         FETCH( func, *inst, 1, 1, chan_index );
+         emit_maxps(
+            func,
+            make_xmm( 0 ),
+            make_xmm( 1 ) );
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_SLT:
+   /* TGSI_OPCODE_SETLT */
+      emit_setcc( func, inst, cc_LessThan );
+      break;
+
+   case TGSI_OPCODE_SGE:
+   /* TGSI_OPCODE_SETGE */
+      emit_setcc( func, inst, cc_NotLessThan );
+      break;
+
+   case TGSI_OPCODE_MAD:
+   /* TGSI_OPCODE_MADD */
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         FETCH( func, *inst, 1, 1, chan_index );
+         FETCH( func, *inst, 2, 2, chan_index );
+         emit_mul( func, 0, 1 );
+         emit_add( func, 0, 2 );
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_SUB:
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         FETCH( func, *inst, 1, 1, chan_index );
+         emit_sub( func, 0, 1 );
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_LERP:
+   /* TGSI_OPCODE_LRP */
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         FETCH( func, *inst, 1, 1, chan_index );
+         FETCH( func, *inst, 2, 2, chan_index );
+         emit_sub( func, 1, 2 );
+         emit_mul( func, 0, 1 );
+         emit_add( func, 0, 2 );
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_CND:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_CND0:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_DOT2ADD:
+   /* TGSI_OPCODE_DP2A */
+      return 0;
+      break;
+
+   case TGSI_OPCODE_INDEX:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_NEGATE:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_FRAC:
+   /* TGSI_OPCODE_FRC */
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         emit_frc( func, 0 );
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_CLAMP:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_FLOOR:
+   /* TGSI_OPCODE_FLR */
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         emit_flr( func, 0 );
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_ROUND:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_EXPBASE2:
+   /* TGSI_OPCODE_EX2 */
+      FETCH( func, *inst, 0, 0, CHAN_X );
+      emit_ex2( func, 0 );
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_LOGBASE2:
+   /* TGSI_OPCODE_LG2 */
+      FETCH( func, *inst, 0, 0, CHAN_X );
+      emit_lg2( func, 0 );
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_POWER:
+   /* TGSI_OPCODE_POW */
+      FETCH( func, *inst, 0, 0, CHAN_X );
+      FETCH( func, *inst, 1, 1, CHAN_X );
+      emit_pow( func, 0, 1 );
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_CROSSPRODUCT:
+   /* TGSI_OPCODE_XPD */
+      if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
+          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
+         FETCH( func, *inst, 1, 1, CHAN_Z );
+         FETCH( func, *inst, 3, 0, CHAN_Z );
+      }
+      if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
+          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
+         FETCH( func, *inst, 0, 0, CHAN_Y );
+         FETCH( func, *inst, 4, 1, CHAN_Y );
+      }
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
+         emit_MOV( func, 2, 0 );
+         emit_mul( func, 2, 1 );
+         emit_MOV( func, 5, 3 );
+         emit_mul( func, 5, 4 );
+         emit_sub( func, 2, 5 );
+         STORE( func, *inst, 2, 0, CHAN_X );
+      }
+      if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
+          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
+         FETCH( func, *inst, 2, 1, CHAN_X );
+         FETCH( func, *inst, 5, 0, CHAN_X );
+      }
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
+         emit_mul( func, 3, 2 );
+         emit_mul( func, 1, 5 );
+         emit_sub( func, 3, 1 );
+         STORE( func, *inst, 3, 0, CHAN_Y );
+      }
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
+         emit_mul( func, 5, 4 );
+         emit_mul( func, 0, 2 );
+         emit_sub( func, 5, 0 );
+         STORE( func, *inst, 5, 0, CHAN_Z );
+      }
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
+         FETCH( func, *inst, 0, TGSI_EXEC_TEMP_ONE_I, TGSI_EXEC_TEMP_ONE_C );
+         STORE( func, *inst, 0, 0, CHAN_W );
+      }
+      break;
+
+   case TGSI_OPCODE_MULTIPLYMATRIX:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_ABS:
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         emit_abs( func, 0) ;
+
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_RCC:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_DPH:
+      FETCH( func, *inst, 0, 0, CHAN_X );
+      FETCH( func, *inst, 1, 1, CHAN_X );
+      emit_mul( func, 0, 1 );
+      FETCH( func, *inst, 1, 0, CHAN_Y );
+      FETCH( func, *inst, 2, 1, CHAN_Y );
+      emit_mul( func, 1, 2 );
+      emit_add( func, 0, 1 );
+      FETCH( func, *inst, 1, 0, CHAN_Z );
+      FETCH( func, *inst, 2, 1, CHAN_Z );
+      emit_mul( func, 1, 2 );
+      emit_add( func, 0, 1 );
+      FETCH( func, *inst, 1, 1, CHAN_W );
+      emit_add( func, 0, 1 );
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_COS:
+      FETCH( func, *inst, 0, 0, CHAN_X );
+      emit_cos( func, 0 );
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_DDX:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_DDY:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_KIL:
+      emit_kil( func, &inst->FullSrcRegisters[0] );
+      break;
+
+   case TGSI_OPCODE_PK2H:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_PK2US:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_PK4B:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_PK4UB:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_RFL:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_SEQ:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_SFL:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_SGT:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_SIN:
+      FETCH( func, *inst, 0, 0, CHAN_X );
+      emit_sin( func, 0 );
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_SLE:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_SNE:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_STR:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_TEX:
+      emit_tempf(
+         func,
+         0,
+         TGSI_EXEC_TEMP_ONE_I,
+         TGSI_EXEC_TEMP_ONE_C );
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_TXD:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_UP2H:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_UP2US:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_UP4B:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_UP4UB:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_X2D:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_ARA:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_ARR:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_BRA:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_CAL:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_RET:
+   case TGSI_OPCODE_END:
+#ifdef WIN32
+      emit_retw( func, 16 );
+#else
+      emit_ret( func );
+#endif
+      break;
+
+   case TGSI_OPCODE_SSG:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_CMP:
+      emit_cmp (func, inst);
+      break;
+
+   case TGSI_OPCODE_SCS:
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
+         FETCH( func, *inst, 0, 0, CHAN_X );
+         emit_cos( func, 0 );
+         STORE( func, *inst, 0, 0, CHAN_X );
+      }
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
+         FETCH( func, *inst, 0, 0, CHAN_Y );
+         emit_sin( func, 0 );
+         STORE( func, *inst, 0, 0, CHAN_Y );
+      }
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
+         FETCH( func, *inst, 0, TGSI_EXEC_TEMP_00000000_I, TGSI_EXEC_TEMP_00000000_C );
+         STORE( func, *inst, 0, 0, CHAN_Z );
+      }
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
+         FETCH( func, *inst, 0, TGSI_EXEC_TEMP_ONE_I, TGSI_EXEC_TEMP_ONE_C );
+         STORE( func, *inst, 0, 0, CHAN_W );
+      }
+      break;
+
+   case TGSI_OPCODE_TXB:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_NRM:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_DIV:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_DP2:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_TXL:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_BRK:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_IF:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_LOOP:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_REP:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_ELSE:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_ENDIF:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_ENDLOOP:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_ENDREP:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_PUSHA:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_POPA:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_CEIL:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_I2F:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_NOT:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_TRUNC:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_SHL:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_SHR:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_AND:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_OR:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_MOD:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_XOR:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_SAD:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_TXF:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_TXQ:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_CONT:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_EMIT:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_ENDPRIM:
+      return 0;
+      break;
+
+   default:
+      return 0;
+   }
+   
+   return 1;
+}
+
+static void
+emit_declaration(
+   struct x86_function *func,
+   struct tgsi_full_declaration *decl )
+{
+   if( decl->Declaration.File == TGSI_FILE_INPUT ) {
+      unsigned first, last, mask;
+      unsigned i, j;
+
+      assert( decl->Declaration.Declare == TGSI_DECLARE_RANGE );
+
+      first = decl->u.DeclarationRange.First;
+      last = decl->u.DeclarationRange.Last;
+      mask = decl->Declaration.UsageMask;
+
+      /* Do not touch WPOS.xy */
+      if( first == 0 ) {
+         mask &= ~TGSI_WRITEMASK_XY;
+         if( mask == TGSI_WRITEMASK_NONE ) {
+            first++;
+         }
+      }
+
+      for( i = first; i <= last; i++ ) {
+         for( j = 0; j < NUM_CHANNELS; j++ ) {
+            if( mask & (1 << j) ) {
+               switch( decl->Interpolation.Interpolate ) {
+               case TGSI_INTERPOLATE_CONSTANT:
+                  emit_coef_a0( func, 0, i, j );
+                  emit_inputs( func, 0, i, j );
+                  break;
+
+               case TGSI_INTERPOLATE_LINEAR:
+                  emit_inputf( func, 0, 0, TGSI_SWIZZLE_X );
+                  emit_coef_dadx( func, 1, i, j );
+                  emit_inputf( func, 2, 0, TGSI_SWIZZLE_Y );
+                  emit_coef_dady( func, 3, i, j );
+                  emit_mul( func, 0, 1 );    /* x * dadx */
+                  emit_coef_a0( func, 4, i, j );
+                  emit_mul( func, 2, 3 );    /* y * dady */
+                  emit_add( func, 0, 4 );    /* x * dadx + a0 */
+                  emit_add( func, 0, 2 );    /* x * dadx + y * dady + a0 */
+                  emit_inputs( func, 0, i, j );
+                  break;
+
+               case TGSI_INTERPOLATE_PERSPECTIVE:
+                  emit_inputf( func, 0, 0, TGSI_SWIZZLE_X );
+                  emit_coef_dadx( func, 1, i, j );
+                  emit_inputf( func, 2, 0, TGSI_SWIZZLE_Y );
+                  emit_coef_dady( func, 3, i, j );
+                  emit_mul( func, 0, 1 );    /* x * dadx */
+                  emit_inputf( func, 4, 0, TGSI_SWIZZLE_W );
+                  emit_coef_a0( func, 5, i, j );
+                  emit_rcp( func, 4, 4 );    /* 1.0 / w */
+                  emit_mul( func, 2, 3 );    /* y * dady */
+                  emit_add( func, 0, 5 );    /* x * dadx + a0 */
+                  emit_add( func, 0, 2 );    /* x * dadx + y * dady + a0 */
+                  emit_mul( func, 0, 4 );    /* (x * dadx + y * dady + a0) / w */
+                  emit_inputs( func, 0, i, j );
+                  break;
+
+               default:
+                  assert( 0 );
+		  break;
+               }
+            }
+         }
+      }
+   }
+}
+
+unsigned
+tgsi_emit_sse2(
+   struct tgsi_token *tokens,
+   struct x86_function *func )
+{
+   struct tgsi_parse_context parse;
+   unsigned ok = 1;
+
+   DUMP_START();
+
+   func->csr = func->store;
+
+   emit_mov(
+      func,
+      get_input_base(),
+      get_argument( 0 ) );
+   emit_mov(
+      func,
+      get_output_base(),
+      get_argument( 1 ) );
+   emit_mov(
+      func,
+      get_const_base(),
+      get_argument( 2 ) );
+   emit_mov(
+      func,
+      get_temp_base(),
+      get_argument( 3 ) );
+
+   tgsi_parse_init( &parse, tokens );
+
+   while( !tgsi_parse_end_of_tokens( &parse ) && ok ) {
+      tgsi_parse_token( &parse );
+
+      switch( parse.FullToken.Token.Type ) {
+      case TGSI_TOKEN_TYPE_DECLARATION:
+         break;
+
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+         ok = emit_instruction(
+	    func,
+	    &parse.FullToken.FullInstruction );
+
+	 if (!ok) {
+	    debug_printf("failed to translate tgsi opcode %d\n", 
+			 parse.FullToken.FullInstruction.Instruction.Opcode );
+	 }
+         break;
+
+      case TGSI_TOKEN_TYPE_IMMEDIATE:
+         /* XXX implement this */
+	 ok = 0;
+	 debug_printf("failed to emit immediate value\n");
+	 break;
+
+      default:
+         assert( 0 );
+	 ok = 0;
+	 break;
+      }
+   }
+
+   tgsi_parse_free( &parse );
+
+   DUMP_END();
+
+   return ok;
+}
+
+/**
+ * Fragment shaders are responsible for interpolating shader inputs. Because on
+ * x86 we have only 4 GP registers, and here we have 5 shader arguments (input,
+ * output, const, temp and coef), the code is split into two phases --
+ * DECLARATION and INSTRUCTION phase.
+ * GP register holding the output argument is aliased with the coeff argument,
+ * as outputs are not needed in the DECLARATION phase.
+ */
+unsigned
+tgsi_emit_sse2_fs(
+   struct tgsi_token *tokens,
+   struct x86_function *func )
+{
+   struct tgsi_parse_context parse;
+   boolean instruction_phase = FALSE;
+
+   DUMP_START();
+
+   func->csr = func->store;
+
+   /* DECLARATION phase, do not load output argument. */
+   emit_mov(
+      func,
+      get_input_base(),
+      get_argument( 0 ) );
+   emit_mov(
+      func,
+      get_const_base(),
+      get_argument( 2 ) );
+   emit_mov(
+      func,
+      get_temp_base(),
+      get_argument( 3 ) );
+   emit_mov(
+      func,
+      get_coef_base(),
+      get_argument( 4 ) );
+
+   tgsi_parse_init( &parse, tokens );
+
+   while( !tgsi_parse_end_of_tokens( &parse ) ) {
+      tgsi_parse_token( &parse );
+
+      switch( parse.FullToken.Token.Type ) {
+      case TGSI_TOKEN_TYPE_DECLARATION:
+         emit_declaration(
+            func,
+            &parse.FullToken.FullDeclaration );
+         break;
+
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+         if( !instruction_phase ) {
+            /* INSTRUCTION phase, overwrite coeff with output. */
+            instruction_phase = TRUE;
+            emit_mov(
+               func,
+               get_output_base(),
+               get_argument( 1 ) );
+         }
+         emit_instruction(
+            func,
+            &parse.FullToken.FullInstruction );
+         break;
+
+      case TGSI_TOKEN_TYPE_IMMEDIATE:
+         /* XXX implement this */
+         assert(0);
+         break;
+
+      default:
+         assert( 0 );
+      }
+   }
+
+   tgsi_parse_free( &parse );
+
+   DUMP_END();
+
+   return 1;
+}
+
+#endif /* i386 */
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h
new file mode 100755
index 00000000000..9bee3717665
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h
@@ -0,0 +1,26 @@
+#if !defined TGSI_SSE2_H
+#define TGSI_SSE2_H
+
+#if defined __cplusplus
+extern "C" {
+#endif // defined __cplusplus
+
+struct tgsi_token;
+struct x86_function;
+
+unsigned
+tgsi_emit_sse2(
+   struct tgsi_token *tokens,
+   struct x86_function *function );
+
+unsigned
+tgsi_emit_sse2_fs(
+   struct tgsi_token *tokens,
+   struct x86_function *function );
+
+#if defined __cplusplus
+} // extern "C"
+#endif // defined __cplusplus
+
+#endif // !defined TGSI_SSE2_H
+
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_build.c b/src/gallium/auxiliary/tgsi/util/tgsi_build.c
new file mode 100644
index 00000000000..a00ff1c2a5f
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_build.c
@@ -0,0 +1,1371 @@
+#include "pipe/p_debug.h"
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+#include "tgsi_build.h"
+#include "tgsi_parse.h"
+
+/*
+ * version
+ */
+
+struct tgsi_version
+tgsi_build_version( void )
+{
+   struct tgsi_version  version;
+
+   version.MajorVersion = 1;
+   version.MinorVersion = 1;
+   version.Padding = 0;
+
+   return version;
+}
+
+/*
+ * header
+ */
+
+struct tgsi_header
+tgsi_build_header( void )
+{
+   struct tgsi_header header;
+
+   header.HeaderSize = 1;
+   header.BodySize = 0;
+
+   return header;
+}
+
+static void
+header_headersize_grow( struct tgsi_header *header )
+{
+   assert( header->HeaderSize < 0xFF );
+   assert( header->BodySize == 0 );
+
+   header->HeaderSize++;
+}
+
+static void
+header_bodysize_grow( struct tgsi_header *header )
+{
+   assert( header->BodySize < 0xFFFFFF );
+
+   header->BodySize++;
+}
+
+struct tgsi_processor
+tgsi_default_processor( void )
+{
+   struct tgsi_processor processor;
+
+   processor.Processor = TGSI_PROCESSOR_FRAGMENT;
+   processor.Padding = 0;
+
+   return processor;
+}
+
+struct tgsi_processor
+tgsi_build_processor(
+   unsigned type,
+   struct tgsi_header *header )
+{
+   struct tgsi_processor processor;
+
+   processor = tgsi_default_processor();
+   processor.Processor = type;
+
+   header_headersize_grow( header );
+
+   return processor;
+}
+
+/*
+ * declaration
+ */
+
+struct tgsi_declaration
+tgsi_default_declaration( void )
+{
+   struct tgsi_declaration declaration;
+
+   declaration.Type = TGSI_TOKEN_TYPE_DECLARATION;
+   declaration.Size = 1;
+   declaration.File = TGSI_FILE_NULL;
+   declaration.Declare = TGSI_DECLARE_RANGE;
+   declaration.UsageMask = TGSI_WRITEMASK_XYZW;
+   declaration.Interpolate = 0;
+   declaration.Semantic = 0;
+   declaration.Padding = 0;
+   declaration.Extended = 0;
+
+   return declaration;
+}
+
+struct tgsi_declaration
+tgsi_build_declaration(
+   unsigned file,
+   unsigned declare,
+   unsigned usage_mask,
+   unsigned interpolate,
+   unsigned semantic,
+   struct tgsi_header *header )
+{
+   struct tgsi_declaration declaration;
+
+   assert( file <= TGSI_FILE_IMMEDIATE );
+   assert( declare <= TGSI_DECLARE_MASK );
+
+   declaration = tgsi_default_declaration();
+   declaration.File = file;
+   declaration.Declare = declare;
+   declaration.UsageMask = usage_mask;
+   declaration.Interpolate = interpolate;
+   declaration.Semantic = semantic;
+
+   header_bodysize_grow( header );
+
+   return declaration;
+}
+
+static void
+declaration_grow(
+   struct tgsi_declaration *declaration,
+   struct tgsi_header *header )
+{
+   assert( declaration->Size < 0xFF );
+
+   declaration->Size++;
+
+   header_bodysize_grow( header );
+}
+
+struct tgsi_full_declaration
+tgsi_default_full_declaration( void )
+{
+   struct tgsi_full_declaration  full_declaration;
+
+   full_declaration.Declaration  = tgsi_default_declaration();
+   full_declaration.Interpolation = tgsi_default_declaration_interpolation();
+   full_declaration.Semantic = tgsi_default_declaration_semantic();
+
+   return full_declaration;
+}
+
+unsigned
+tgsi_build_full_declaration(
+   const struct tgsi_full_declaration *full_decl,
+   struct tgsi_token *tokens,
+   struct tgsi_header *header,
+   unsigned maxsize )
+{
+   unsigned size = 0;
+   struct tgsi_declaration *declaration;
+
+   if( maxsize <= size )
+     return 0;
+   declaration = (struct tgsi_declaration *) &tokens[size];
+   size++;
+
+   *declaration = tgsi_build_declaration(
+      full_decl->Declaration.File,
+      full_decl->Declaration.Declare,
+      full_decl->Declaration.UsageMask,
+      full_decl->Declaration.Interpolate,
+      full_decl->Declaration.Semantic,
+      header );
+
+   switch( full_decl->Declaration.Declare )  {
+   case TGSI_DECLARE_RANGE:
+   {
+      struct tgsi_declaration_range *dr;
+
+      if( maxsize <= size )
+         return 0;
+      dr = (struct tgsi_declaration_range *) &tokens[size];
+      size++;
+
+      *dr = tgsi_build_declaration_range(
+         full_decl->u.DeclarationRange.First,
+         full_decl->u.DeclarationRange.Last,
+         declaration,
+         header );
+      break;
+    }
+
+   case TGSI_DECLARE_MASK:
+   {
+      struct tgsi_declaration_mask *dm;
+
+      if( maxsize <= size )
+         return 0;
+      dm = (struct tgsi_declaration_mask  *) &tokens[size];
+      size++;
+
+      *dm = tgsi_build_declaration_mask(
+         full_decl->u.DeclarationMask.Mask,
+         declaration,
+         header );
+      break;
+   }
+
+   default:
+      assert( 0 );
+   }
+
+   if( full_decl->Declaration.Interpolate ) {
+      struct tgsi_declaration_interpolation *di;
+
+      if( maxsize <= size )
+         return  0;
+      di = (struct tgsi_declaration_interpolation *) &tokens[size];
+      size++;
+
+      *di = tgsi_build_declaration_interpolation(
+         full_decl->Interpolation.Interpolate,
+         declaration,
+         header );
+   }
+
+   if( full_decl->Declaration.Semantic ) {
+      struct tgsi_declaration_semantic *ds;
+
+      if( maxsize <= size )
+         return  0;
+      ds = (struct tgsi_declaration_semantic *) &tokens[size];
+      size++;
+
+      *ds = tgsi_build_declaration_semantic(
+         full_decl->Semantic.SemanticName,
+         full_decl->Semantic.SemanticIndex,
+         declaration,
+         header );
+   }
+
+   return size;
+}
+
+struct tgsi_declaration_range
+tgsi_build_declaration_range(
+   unsigned first,
+   unsigned last,
+   struct tgsi_declaration *declaration,
+   struct tgsi_header *header )
+{
+   struct tgsi_declaration_range declaration_range;
+
+   assert( last >= first );
+   assert( last <= 0xFFFF );
+
+   declaration_range.First = first;
+   declaration_range.Last = last;
+
+   declaration_grow( declaration, header );
+
+   return declaration_range;
+}
+
+struct tgsi_declaration_mask
+tgsi_build_declaration_mask(
+   unsigned mask,
+   struct tgsi_declaration *declaration,
+   struct tgsi_header *header )
+{
+   struct tgsi_declaration_mask  declaration_mask;
+
+   declaration_mask.Mask = mask;
+
+   declaration_grow( declaration, header );
+
+   return declaration_mask;
+}
+
+struct tgsi_declaration_interpolation
+tgsi_default_declaration_interpolation( void )
+{
+   struct tgsi_declaration_interpolation di;
+
+   di.Interpolate = TGSI_INTERPOLATE_CONSTANT;
+   di.Padding = 0;
+
+   return di;
+}
+
+struct tgsi_declaration_interpolation
+tgsi_build_declaration_interpolation(
+   unsigned interpolate,
+   struct tgsi_declaration *declaration,
+   struct tgsi_header *header )
+{
+   struct tgsi_declaration_interpolation di;
+
+   assert( interpolate <= TGSI_INTERPOLATE_PERSPECTIVE );
+
+   di = tgsi_default_declaration_interpolation();
+   di.Interpolate = interpolate;
+
+   declaration_grow( declaration, header );
+
+   return di;
+}
+
+struct tgsi_declaration_semantic
+tgsi_default_declaration_semantic( void )
+{
+   struct tgsi_declaration_semantic ds;
+
+   ds.SemanticName = TGSI_SEMANTIC_POSITION;
+   ds.SemanticIndex = 0;
+   ds.Padding = 0;
+
+   return ds;
+}
+
+struct tgsi_declaration_semantic
+tgsi_build_declaration_semantic(
+   unsigned semantic_name,
+   unsigned semantic_index,
+   struct tgsi_declaration *declaration,
+   struct tgsi_header *header )
+{
+   struct tgsi_declaration_semantic ds;
+
+   assert( semantic_name <= TGSI_SEMANTIC_COUNT );
+   assert( semantic_index <= 0xFFFF );
+
+   ds = tgsi_default_declaration_semantic();
+   ds.SemanticName = semantic_name;
+   ds.SemanticIndex = semantic_index;
+
+   declaration_grow( declaration, header );
+
+   return ds;
+}
+
+/*
+ * immediate
+ */
+
+struct tgsi_immediate
+tgsi_default_immediate( void )
+{
+   struct tgsi_immediate immediate;
+
+   immediate.Type = TGSI_TOKEN_TYPE_IMMEDIATE;
+   immediate.Size = 1;
+   immediate.DataType = TGSI_IMM_FLOAT32;
+   immediate.Padding = 0;
+   immediate.Extended = 0;
+
+   return immediate;
+}
+
+struct tgsi_immediate
+tgsi_build_immediate(
+   struct tgsi_header *header )
+{
+   struct tgsi_immediate immediate;
+
+   immediate = tgsi_default_immediate();
+
+   header_bodysize_grow( header );
+
+   return immediate;
+}
+
+struct tgsi_full_immediate
+tgsi_default_full_immediate( void )
+{
+   struct tgsi_full_immediate fullimm;
+
+   fullimm.Immediate = tgsi_default_immediate();
+   fullimm.u.Pointer = (void *) 0;
+
+   return fullimm;
+}
+
+static void
+immediate_grow(
+   struct tgsi_immediate *immediate,
+   struct tgsi_header *header )
+{
+   assert( immediate->Size < 0xFF );
+
+   immediate->Size++;
+
+   header_bodysize_grow( header );
+}
+
+struct tgsi_immediate_float32
+tgsi_build_immediate_float32(
+   float value,
+   struct tgsi_immediate *immediate,
+   struct tgsi_header *header )
+{
+   struct tgsi_immediate_float32 immediate_float32;
+
+   immediate_float32.Float = value;
+
+   immediate_grow( immediate, header );
+
+   return immediate_float32;
+}
+
+unsigned
+tgsi_build_full_immediate(
+   const struct tgsi_full_immediate *full_imm,
+   struct tgsi_token *tokens,
+   struct tgsi_header *header,
+   unsigned maxsize )
+{
+   unsigned size = 0, i;
+   struct tgsi_immediate *immediate;
+
+   if( maxsize <= size )
+      return 0;
+   immediate = (struct tgsi_immediate *) &tokens[size];
+   size++;
+
+   *immediate = tgsi_build_immediate( header );
+
+   for( i = 0; i < full_imm->Immediate.Size - 1; i++ ) {
+      struct tgsi_immediate_float32 *if32;
+
+      if( maxsize <= size )
+         return  0;
+      if32 = (struct tgsi_immediate_float32 *) &tokens[size];
+      size++;
+
+      *if32 = tgsi_build_immediate_float32(
+         full_imm->u.ImmediateFloat32[i].Float,
+         immediate,
+         header );
+   }
+
+   return size;
+}
+
+/*
+ * instruction
+ */
+
+struct tgsi_instruction
+tgsi_default_instruction( void )
+{
+   struct tgsi_instruction instruction;
+
+   instruction.Type = TGSI_TOKEN_TYPE_INSTRUCTION;
+   instruction.Size = 1;
+   instruction.Opcode = TGSI_OPCODE_MOV;
+   instruction.Saturate = TGSI_SAT_NONE;
+   instruction.NumDstRegs = 1;
+   instruction.NumSrcRegs = 1;
+   instruction.Padding  = 0;
+   instruction.Extended = 0;
+
+   return instruction;
+}
+
+struct tgsi_instruction
+tgsi_build_instruction(
+   unsigned opcode,
+   unsigned saturate,
+   unsigned num_dst_regs,
+   unsigned num_src_regs,
+   struct tgsi_header *header )
+{
+   struct tgsi_instruction instruction;
+
+   assert (opcode <= TGSI_OPCODE_LAST);
+   assert (saturate <= TGSI_SAT_MINUS_PLUS_ONE);
+   assert (num_dst_regs <= 3);
+   assert (num_src_regs <= 15);
+
+   instruction = tgsi_default_instruction();
+   instruction.Opcode = opcode;
+   instruction.Saturate = saturate;
+   instruction.NumDstRegs = num_dst_regs;
+   instruction.NumSrcRegs = num_src_regs;
+
+   header_bodysize_grow( header );
+
+   return instruction;
+}
+
+static void
+instruction_grow(
+   struct tgsi_instruction *instruction,
+   struct tgsi_header *header )
+{
+   assert (instruction->Size <   0xFF);
+
+   instruction->Size++;
+
+   header_bodysize_grow( header );
+}
+
+struct tgsi_full_instruction
+tgsi_default_full_instruction( void )
+{
+   struct tgsi_full_instruction full_instruction;
+   unsigned i;
+
+   full_instruction.Instruction = tgsi_default_instruction();
+   full_instruction.InstructionExtNv = tgsi_default_instruction_ext_nv();
+   full_instruction.InstructionExtLabel = tgsi_default_instruction_ext_label();
+   full_instruction.InstructionExtTexture = tgsi_default_instruction_ext_texture();
+   for( i = 0;  i < TGSI_FULL_MAX_DST_REGISTERS; i++ ) {
+      full_instruction.FullDstRegisters[i] = tgsi_default_full_dst_register();
+   }
+   for( i = 0;  i < TGSI_FULL_MAX_SRC_REGISTERS; i++ ) {
+      full_instruction.FullSrcRegisters[i] = tgsi_default_full_src_register();
+   }
+
+   return full_instruction;
+}
+
+unsigned
+tgsi_build_full_instruction(
+   const struct tgsi_full_instruction *full_inst,
+   struct  tgsi_token *tokens,
+   struct  tgsi_header *header,
+   unsigned  maxsize )
+{
+   unsigned size = 0;
+   unsigned i;
+   struct tgsi_instruction *instruction;
+   struct tgsi_token *prev_token;
+
+   if( maxsize <= size )
+      return 0;
+   instruction = (struct tgsi_instruction *) &tokens[size];
+   size++;
+
+   *instruction = tgsi_build_instruction(
+      full_inst->Instruction.Opcode,
+      full_inst->Instruction.Saturate,
+      full_inst->Instruction.NumDstRegs,
+      full_inst->Instruction.NumSrcRegs,
+      header );
+   prev_token = (struct tgsi_token  *) instruction;
+
+   if( tgsi_compare_instruction_ext_nv(
+         full_inst->InstructionExtNv,
+         tgsi_default_instruction_ext_nv() ) ) {
+      struct tgsi_instruction_ext_nv *instruction_ext_nv;
+
+      if( maxsize <= size )
+         return 0;
+      instruction_ext_nv =
+         (struct  tgsi_instruction_ext_nv *) &tokens[size];
+      size++;
+
+      *instruction_ext_nv  = tgsi_build_instruction_ext_nv(
+         full_inst->InstructionExtNv.Precision,
+         full_inst->InstructionExtNv.CondDstIndex,
+         full_inst->InstructionExtNv.CondFlowIndex,
+         full_inst->InstructionExtNv.CondMask,
+         full_inst->InstructionExtNv.CondSwizzleX,
+         full_inst->InstructionExtNv.CondSwizzleY,
+         full_inst->InstructionExtNv.CondSwizzleZ,
+         full_inst->InstructionExtNv.CondSwizzleW,
+         full_inst->InstructionExtNv.CondDstUpdate,
+         full_inst->InstructionExtNv.CondFlowEnable,
+         prev_token,
+         instruction,
+         header );
+      prev_token = (struct tgsi_token  *) instruction_ext_nv;
+   }
+
+   if( tgsi_compare_instruction_ext_label(
+         full_inst->InstructionExtLabel,
+         tgsi_default_instruction_ext_label() ) ) {
+      struct tgsi_instruction_ext_label *instruction_ext_label;
+
+      if( maxsize <= size )
+         return 0;
+      instruction_ext_label =
+         (struct  tgsi_instruction_ext_label *) &tokens[size];
+      size++;
+
+      *instruction_ext_label = tgsi_build_instruction_ext_label(
+         full_inst->InstructionExtLabel.Label,
+         prev_token,
+         instruction,
+         header );
+      prev_token = (struct tgsi_token  *) instruction_ext_label;
+   }
+
+   if( tgsi_compare_instruction_ext_texture(
+         full_inst->InstructionExtTexture,
+         tgsi_default_instruction_ext_texture() ) ) {
+      struct tgsi_instruction_ext_texture *instruction_ext_texture;
+
+      if( maxsize <= size )
+         return 0;
+      instruction_ext_texture =
+         (struct  tgsi_instruction_ext_texture *) &tokens[size];
+      size++;
+
+      *instruction_ext_texture = tgsi_build_instruction_ext_texture(
+         full_inst->InstructionExtTexture.Texture,
+         prev_token,
+         instruction,
+         header   );
+      prev_token = (struct tgsi_token  *) instruction_ext_texture;
+   }
+
+   for( i = 0;  i <   full_inst->Instruction.NumDstRegs; i++ ) {
+      const struct tgsi_full_dst_register *reg = &full_inst->FullDstRegisters[i];
+      struct tgsi_dst_register *dst_register;
+      struct tgsi_token *prev_token;
+
+      if( maxsize <= size )
+         return 0;
+      dst_register = (struct tgsi_dst_register *) &tokens[size];
+      size++;
+
+      *dst_register = tgsi_build_dst_register(
+         reg->DstRegister.File,
+         reg->DstRegister.WriteMask,
+         reg->DstRegister.Index,
+         instruction,
+         header );
+      prev_token = (struct tgsi_token  *) dst_register;
+
+      if( tgsi_compare_dst_register_ext_concode(
+            reg->DstRegisterExtConcode,
+            tgsi_default_dst_register_ext_concode() ) ) {
+         struct tgsi_dst_register_ext_concode *dst_register_ext_concode;
+
+         if( maxsize <= size )
+            return 0;
+         dst_register_ext_concode =
+            (struct  tgsi_dst_register_ext_concode *) &tokens[size];
+         size++;
+
+         *dst_register_ext_concode =   tgsi_build_dst_register_ext_concode(
+            reg->DstRegisterExtConcode.CondMask,
+            reg->DstRegisterExtConcode.CondSwizzleX,
+            reg->DstRegisterExtConcode.CondSwizzleY,
+            reg->DstRegisterExtConcode.CondSwizzleZ,
+            reg->DstRegisterExtConcode.CondSwizzleW,
+            reg->DstRegisterExtConcode.CondSrcIndex,
+            prev_token,
+            instruction,
+            header );
+         prev_token = (struct tgsi_token  *) dst_register_ext_concode;
+      }
+
+      if( tgsi_compare_dst_register_ext_modulate(
+            reg->DstRegisterExtModulate,
+            tgsi_default_dst_register_ext_modulate() ) ) {
+         struct tgsi_dst_register_ext_modulate *dst_register_ext_modulate;
+
+         if( maxsize <= size )
+            return 0;
+         dst_register_ext_modulate =
+            (struct  tgsi_dst_register_ext_modulate *) &tokens[size];
+         size++;
+
+         *dst_register_ext_modulate = tgsi_build_dst_register_ext_modulate(
+            reg->DstRegisterExtModulate.Modulate,
+            prev_token,
+            instruction,
+            header );
+         prev_token = (struct tgsi_token  *) dst_register_ext_modulate;
+      }
+   }
+
+   for( i = 0;  i < full_inst->Instruction.NumSrcRegs; i++ ) {
+      const struct tgsi_full_src_register *reg = &full_inst->FullSrcRegisters[i];
+      struct tgsi_src_register *src_register;
+      struct tgsi_token *prev_token;
+
+      if( maxsize <= size )
+         return 0;
+      src_register = (struct tgsi_src_register *)  &tokens[size];
+      size++;
+
+      *src_register = tgsi_build_src_register(
+         reg->SrcRegister.File,
+         reg->SrcRegister.SwizzleX,
+         reg->SrcRegister.SwizzleY,
+         reg->SrcRegister.SwizzleZ,
+         reg->SrcRegister.SwizzleW,
+         reg->SrcRegister.Negate,
+         reg->SrcRegister.Indirect,
+         reg->SrcRegister.Dimension,
+         reg->SrcRegister.Index,
+         instruction,
+         header );
+      prev_token = (struct tgsi_token  *) src_register;
+
+      if( tgsi_compare_src_register_ext_swz(
+            reg->SrcRegisterExtSwz,
+            tgsi_default_src_register_ext_swz() ) ) {
+         struct tgsi_src_register_ext_swz *src_register_ext_swz;
+
+         if( maxsize <= size )
+            return 0;
+         src_register_ext_swz =
+            (struct  tgsi_src_register_ext_swz *) &tokens[size];
+         size++;
+
+         *src_register_ext_swz = tgsi_build_src_register_ext_swz(
+            reg->SrcRegisterExtSwz.ExtSwizzleX,
+            reg->SrcRegisterExtSwz.ExtSwizzleY,
+            reg->SrcRegisterExtSwz.ExtSwizzleZ,
+            reg->SrcRegisterExtSwz.ExtSwizzleW,
+            reg->SrcRegisterExtSwz.NegateX,
+            reg->SrcRegisterExtSwz.NegateY,
+            reg->SrcRegisterExtSwz.NegateZ,
+            reg->SrcRegisterExtSwz.NegateW,
+            reg->SrcRegisterExtSwz.ExtDivide,
+            prev_token,
+            instruction,
+            header );
+         prev_token = (struct tgsi_token  *) src_register_ext_swz;
+      }
+
+      if( tgsi_compare_src_register_ext_mod(
+            reg->SrcRegisterExtMod,
+            tgsi_default_src_register_ext_mod() ) ) {
+         struct tgsi_src_register_ext_mod *src_register_ext_mod;
+
+         if( maxsize <= size )
+            return 0;
+         src_register_ext_mod =
+            (struct  tgsi_src_register_ext_mod *) &tokens[size];
+         size++;
+
+         *src_register_ext_mod = tgsi_build_src_register_ext_mod(
+            reg->SrcRegisterExtMod.Complement,
+            reg->SrcRegisterExtMod.Bias,
+            reg->SrcRegisterExtMod.Scale2X,
+            reg->SrcRegisterExtMod.Absolute,
+            reg->SrcRegisterExtMod.Negate,
+            prev_token,
+            instruction,
+            header );
+         prev_token = (struct tgsi_token  *) src_register_ext_mod;
+      }
+
+      if( reg->SrcRegister.Indirect ) {
+         struct  tgsi_src_register *ind;
+
+         if( maxsize <= size )
+            return 0;
+         ind = (struct tgsi_src_register *) &tokens[size];
+         size++;
+
+         *ind = tgsi_build_src_register(
+            reg->SrcRegisterInd.File,
+            reg->SrcRegisterInd.SwizzleX,
+            reg->SrcRegisterInd.SwizzleY,
+            reg->SrcRegisterInd.SwizzleZ,
+            reg->SrcRegisterInd.SwizzleW,
+            reg->SrcRegisterInd.Negate,
+            reg->SrcRegisterInd.Indirect,
+            reg->SrcRegisterInd.Dimension,
+            reg->SrcRegisterInd.Index,
+            instruction,
+            header );
+      }
+
+      if( reg->SrcRegister.Dimension ) {
+         struct  tgsi_dimension *dim;
+
+         assert( !reg->SrcRegisterDim.Dimension );
+
+         if( maxsize <= size )
+            return 0;
+         dim = (struct tgsi_dimension *) &tokens[size];
+         size++;
+
+         *dim = tgsi_build_dimension(
+            reg->SrcRegisterDim.Indirect,
+            reg->SrcRegisterDim.Index,
+            instruction,
+            header );
+
+         if( reg->SrcRegisterDim.Indirect ) {
+            struct tgsi_src_register *ind;
+
+            if( maxsize <= size )
+               return 0;
+            ind = (struct tgsi_src_register *) &tokens[size];
+            size++;
+
+            *ind = tgsi_build_src_register(
+               reg->SrcRegisterDimInd.File,
+               reg->SrcRegisterDimInd.SwizzleX,
+               reg->SrcRegisterDimInd.SwizzleY,
+               reg->SrcRegisterDimInd.SwizzleZ,
+               reg->SrcRegisterDimInd.SwizzleW,
+               reg->SrcRegisterDimInd.Negate,
+               reg->SrcRegisterDimInd.Indirect,
+               reg->SrcRegisterDimInd.Dimension,
+               reg->SrcRegisterDimInd.Index,
+               instruction,
+               header );
+         }
+      }
+   }
+
+   return size;
+}
+
+struct tgsi_instruction_ext_nv
+tgsi_default_instruction_ext_nv( void )
+{
+   struct tgsi_instruction_ext_nv instruction_ext_nv;
+
+   instruction_ext_nv.Type = TGSI_INSTRUCTION_EXT_TYPE_NV;
+   instruction_ext_nv.Precision = TGSI_PRECISION_DEFAULT;
+   instruction_ext_nv.CondDstIndex = 0;
+   instruction_ext_nv.CondFlowIndex = 0;
+   instruction_ext_nv.CondMask = TGSI_CC_TR;
+   instruction_ext_nv.CondSwizzleX = TGSI_SWIZZLE_X;
+   instruction_ext_nv.CondSwizzleY = TGSI_SWIZZLE_Y;
+   instruction_ext_nv.CondSwizzleZ = TGSI_SWIZZLE_Z;
+   instruction_ext_nv.CondSwizzleW = TGSI_SWIZZLE_W;
+   instruction_ext_nv.CondDstUpdate = 0;
+   instruction_ext_nv.CondFlowEnable = 0;
+   instruction_ext_nv.Padding = 0;
+   instruction_ext_nv.Extended = 0;
+
+   return instruction_ext_nv;
+}
+
+union token_u32
+{
+   unsigned u32;
+};
+
+unsigned
+tgsi_compare_instruction_ext_nv(
+   struct tgsi_instruction_ext_nv a,
+   struct tgsi_instruction_ext_nv b )
+{
+   a.Padding = b.Padding = 0;
+   a.Extended = b.Extended = 0;
+   return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
+}
+
+struct tgsi_instruction_ext_nv
+tgsi_build_instruction_ext_nv(
+   unsigned precision,
+   unsigned cond_dst_index,
+   unsigned cond_flow_index,
+   unsigned cond_mask,
+   unsigned cond_swizzle_x,
+   unsigned cond_swizzle_y,
+   unsigned cond_swizzle_z,
+   unsigned cond_swizzle_w,
+   unsigned cond_dst_update,
+   unsigned cond_flow_update,
+   struct tgsi_token *prev_token,
+   struct tgsi_instruction *instruction,
+   struct tgsi_header *header )
+{
+   struct tgsi_instruction_ext_nv instruction_ext_nv;
+
+   instruction_ext_nv = tgsi_default_instruction_ext_nv();
+   instruction_ext_nv.Precision = precision;
+   instruction_ext_nv.CondDstIndex = cond_dst_index;
+   instruction_ext_nv.CondFlowIndex = cond_flow_index;
+   instruction_ext_nv.CondMask = cond_mask;
+   instruction_ext_nv.CondSwizzleX = cond_swizzle_x;
+   instruction_ext_nv.CondSwizzleY = cond_swizzle_y;
+   instruction_ext_nv.CondSwizzleZ = cond_swizzle_z;
+   instruction_ext_nv.CondSwizzleW = cond_swizzle_w;
+   instruction_ext_nv.CondDstUpdate = cond_dst_update;
+   instruction_ext_nv.CondFlowEnable = cond_flow_update;
+
+   prev_token->Extended = 1;
+   instruction_grow( instruction, header );
+
+   return instruction_ext_nv;
+}
+
+struct tgsi_instruction_ext_label
+tgsi_default_instruction_ext_label( void )
+{
+   struct tgsi_instruction_ext_label instruction_ext_label;
+
+   instruction_ext_label.Type = TGSI_INSTRUCTION_EXT_TYPE_LABEL;
+   instruction_ext_label.Label = 0;
+   instruction_ext_label.Padding = 0;
+   instruction_ext_label.Extended = 0;
+
+   return instruction_ext_label;
+}
+
+unsigned
+tgsi_compare_instruction_ext_label(
+   struct tgsi_instruction_ext_label a,
+   struct tgsi_instruction_ext_label b )
+{
+   a.Padding = b.Padding = 0;
+   a.Extended = b.Extended = 0;
+   return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
+}
+
+struct tgsi_instruction_ext_label
+tgsi_build_instruction_ext_label(
+   unsigned label,
+   struct tgsi_token  *prev_token,
+   struct tgsi_instruction *instruction,
+   struct tgsi_header *header )
+{
+   struct tgsi_instruction_ext_label instruction_ext_label;
+
+   instruction_ext_label = tgsi_default_instruction_ext_label();
+   instruction_ext_label.Label = label;
+
+   prev_token->Extended = 1;
+   instruction_grow( instruction, header );
+
+   return instruction_ext_label;
+}
+
+struct tgsi_instruction_ext_texture
+tgsi_default_instruction_ext_texture( void )
+{
+   struct tgsi_instruction_ext_texture instruction_ext_texture;
+
+   instruction_ext_texture.Type = TGSI_INSTRUCTION_EXT_TYPE_TEXTURE;
+   instruction_ext_texture.Texture = TGSI_TEXTURE_UNKNOWN;
+   instruction_ext_texture.Padding = 0;
+   instruction_ext_texture.Extended = 0;
+
+   return instruction_ext_texture;
+}
+
+unsigned
+tgsi_compare_instruction_ext_texture(
+   struct tgsi_instruction_ext_texture a,
+   struct tgsi_instruction_ext_texture b )
+{
+   a.Padding = b.Padding = 0;
+   a.Extended = b.Extended = 0;
+   return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
+}
+
+struct tgsi_instruction_ext_texture
+tgsi_build_instruction_ext_texture(
+   unsigned texture,
+   struct tgsi_token *prev_token,
+   struct tgsi_instruction *instruction,
+   struct tgsi_header *header )
+{
+   struct tgsi_instruction_ext_texture instruction_ext_texture;
+
+   instruction_ext_texture = tgsi_default_instruction_ext_texture();
+   instruction_ext_texture.Texture = texture;
+
+   prev_token->Extended = 1;
+   instruction_grow( instruction, header );
+
+   return instruction_ext_texture;
+}
+
+struct tgsi_src_register
+tgsi_default_src_register( void )
+{
+   struct tgsi_src_register src_register;
+
+   src_register.File = TGSI_FILE_NULL;
+   src_register.SwizzleX = TGSI_SWIZZLE_X;
+   src_register.SwizzleY = TGSI_SWIZZLE_Y;
+   src_register.SwizzleZ = TGSI_SWIZZLE_Z;
+   src_register.SwizzleW = TGSI_SWIZZLE_W;
+   src_register.Negate = 0;
+   src_register.Indirect = 0;
+   src_register.Dimension = 0;
+   src_register.Index = 0;
+   src_register.Extended = 0;
+
+   return src_register;
+}
+
+struct tgsi_src_register
+tgsi_build_src_register(
+   unsigned file,
+   unsigned swizzle_x,
+   unsigned swizzle_y,
+   unsigned swizzle_z,
+   unsigned swizzle_w,
+   unsigned negate,
+   unsigned indirect,
+   unsigned dimension,
+   int index,
+   struct tgsi_instruction *instruction,
+   struct tgsi_header *header )
+{
+   struct tgsi_src_register   src_register;
+
+   assert( file <= TGSI_FILE_IMMEDIATE );
+   assert( swizzle_x <= TGSI_SWIZZLE_W );
+   assert( swizzle_y <= TGSI_SWIZZLE_W );
+   assert( swizzle_z <= TGSI_SWIZZLE_W );
+   assert( swizzle_w <= TGSI_SWIZZLE_W );
+   assert( negate <= 1 );
+   assert( index >= -0x8000 && index <= 0x7FFF );
+
+   src_register = tgsi_default_src_register();
+   src_register.File = file;
+   src_register.SwizzleX = swizzle_x;
+   src_register.SwizzleY = swizzle_y;
+   src_register.SwizzleZ = swizzle_z;
+   src_register.SwizzleW = swizzle_w;
+   src_register.Negate = negate;
+   src_register.Indirect = indirect;
+   src_register.Dimension = dimension;
+   src_register.Index = index;
+
+   instruction_grow( instruction, header );
+
+   return src_register;
+}
+
+struct tgsi_full_src_register
+tgsi_default_full_src_register( void )
+{
+   struct tgsi_full_src_register full_src_register;
+
+   full_src_register.SrcRegister = tgsi_default_src_register();
+   full_src_register.SrcRegisterExtSwz = tgsi_default_src_register_ext_swz();
+   full_src_register.SrcRegisterExtMod = tgsi_default_src_register_ext_mod();
+   full_src_register.SrcRegisterInd = tgsi_default_src_register();
+   full_src_register.SrcRegisterDim = tgsi_default_dimension();
+   full_src_register.SrcRegisterDimInd = tgsi_default_src_register();
+
+   return full_src_register;
+}
+
+struct tgsi_src_register_ext_swz
+tgsi_default_src_register_ext_swz( void )
+{
+   struct tgsi_src_register_ext_swz src_register_ext_swz;
+
+   src_register_ext_swz.Type = TGSI_SRC_REGISTER_EXT_TYPE_SWZ;
+   src_register_ext_swz.ExtSwizzleX = TGSI_EXTSWIZZLE_X;
+   src_register_ext_swz.ExtSwizzleY = TGSI_EXTSWIZZLE_Y;
+   src_register_ext_swz.ExtSwizzleZ = TGSI_EXTSWIZZLE_Z;
+   src_register_ext_swz.ExtSwizzleW = TGSI_EXTSWIZZLE_W;
+   src_register_ext_swz.NegateX = 0;
+   src_register_ext_swz.NegateY = 0;
+   src_register_ext_swz.NegateZ = 0;
+   src_register_ext_swz.NegateW = 0;
+   src_register_ext_swz.ExtDivide = TGSI_EXTSWIZZLE_ONE;
+   src_register_ext_swz.Padding = 0;
+   src_register_ext_swz.Extended = 0;
+
+   return src_register_ext_swz;
+}
+
+unsigned
+tgsi_compare_src_register_ext_swz(
+   struct tgsi_src_register_ext_swz a,
+   struct tgsi_src_register_ext_swz b )
+{
+   a.Padding = b.Padding = 0;
+   a.Extended = b.Extended = 0;
+   return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
+}
+
+struct tgsi_src_register_ext_swz
+tgsi_build_src_register_ext_swz(
+   unsigned ext_swizzle_x,
+   unsigned ext_swizzle_y,
+   unsigned ext_swizzle_z,
+   unsigned ext_swizzle_w,
+   unsigned negate_x,
+   unsigned negate_y,
+   unsigned negate_z,
+   unsigned negate_w,
+   unsigned ext_divide,
+   struct tgsi_token *prev_token,
+   struct tgsi_instruction *instruction,
+   struct tgsi_header *header )
+{
+   struct tgsi_src_register_ext_swz src_register_ext_swz;
+
+   assert( ext_swizzle_x <= TGSI_EXTSWIZZLE_ONE );
+   assert( ext_swizzle_y <= TGSI_EXTSWIZZLE_ONE );
+   assert( ext_swizzle_z <= TGSI_EXTSWIZZLE_ONE );
+   assert( ext_swizzle_w <= TGSI_EXTSWIZZLE_ONE );
+   assert( negate_x <= 1 );
+   assert( negate_y <= 1 );
+   assert( negate_z <= 1 );
+   assert( negate_w <= 1 );
+   assert( ext_divide <= TGSI_EXTSWIZZLE_ONE );
+
+   src_register_ext_swz = tgsi_default_src_register_ext_swz();
+   src_register_ext_swz.ExtSwizzleX = ext_swizzle_x;
+   src_register_ext_swz.ExtSwizzleY = ext_swizzle_y;
+   src_register_ext_swz.ExtSwizzleZ = ext_swizzle_z;
+   src_register_ext_swz.ExtSwizzleW = ext_swizzle_w;
+   src_register_ext_swz.NegateX = negate_x;
+   src_register_ext_swz.NegateY = negate_y;
+   src_register_ext_swz.NegateZ = negate_z;
+   src_register_ext_swz.NegateW = negate_w;
+   src_register_ext_swz.ExtDivide = ext_divide;
+
+   prev_token->Extended = 1;
+   instruction_grow( instruction, header );
+
+   return src_register_ext_swz;
+}
+
+struct tgsi_src_register_ext_mod
+tgsi_default_src_register_ext_mod( void )
+{
+   struct tgsi_src_register_ext_mod src_register_ext_mod;
+
+   src_register_ext_mod.Type = TGSI_SRC_REGISTER_EXT_TYPE_MOD;
+   src_register_ext_mod.Complement = 0;
+   src_register_ext_mod.Bias = 0;
+   src_register_ext_mod.Scale2X = 0;
+   src_register_ext_mod.Absolute = 0;
+   src_register_ext_mod.Negate = 0;
+   src_register_ext_mod.Padding = 0;
+   src_register_ext_mod.Extended = 0;
+
+   return src_register_ext_mod;
+}
+
+unsigned
+tgsi_compare_src_register_ext_mod(
+   struct tgsi_src_register_ext_mod a,
+   struct tgsi_src_register_ext_mod b )
+{
+   a.Padding = b.Padding = 0;
+   a.Extended = b.Extended = 0;
+   return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
+}
+
+struct tgsi_src_register_ext_mod
+tgsi_build_src_register_ext_mod(
+   unsigned complement,
+   unsigned bias,
+   unsigned scale_2x,
+   unsigned absolute,
+   unsigned negate,
+   struct tgsi_token *prev_token,
+   struct tgsi_instruction *instruction,
+   struct tgsi_header *header )
+{
+   struct tgsi_src_register_ext_mod src_register_ext_mod;
+
+   assert( complement <= 1 );
+   assert( bias <= 1 );
+   assert( scale_2x <= 1 );
+   assert( absolute <= 1 );
+   assert( negate <= 1 );
+
+   src_register_ext_mod = tgsi_default_src_register_ext_mod();
+   src_register_ext_mod.Complement = complement;
+   src_register_ext_mod.Bias = bias;
+   src_register_ext_mod.Scale2X = scale_2x;
+   src_register_ext_mod.Absolute = absolute;
+   src_register_ext_mod.Negate = negate;
+
+   prev_token->Extended = 1;
+   instruction_grow( instruction, header );
+
+   return src_register_ext_mod;
+}
+
+struct tgsi_dimension
+tgsi_default_dimension( void )
+{
+   struct tgsi_dimension dimension;
+
+   dimension.Indirect = 0;
+   dimension.Dimension = 0;
+   dimension.Padding = 0;
+   dimension.Index = 0;
+   dimension.Extended = 0;
+
+   return dimension;
+}
+
+struct tgsi_dimension
+tgsi_build_dimension(
+   unsigned indirect,
+   unsigned index,
+   struct tgsi_instruction *instruction,
+   struct tgsi_header *header )
+{
+   struct tgsi_dimension dimension;
+
+   dimension = tgsi_default_dimension();
+   dimension.Indirect = indirect;
+   dimension.Index = index;
+
+   instruction_grow( instruction, header );
+
+   return dimension;
+}
+
+struct tgsi_dst_register
+tgsi_default_dst_register( void )
+{
+   struct tgsi_dst_register dst_register;
+
+   dst_register.File = TGSI_FILE_NULL;
+   dst_register.WriteMask = TGSI_WRITEMASK_XYZW;
+   dst_register.Indirect = 0;
+   dst_register.Dimension = 0;
+   dst_register.Index = 0;
+   dst_register.Padding = 0;
+   dst_register.Extended = 0;
+
+   return dst_register;
+}
+
+struct tgsi_dst_register
+tgsi_build_dst_register(
+   unsigned file,
+   unsigned mask,
+   int index,
+   struct tgsi_instruction *instruction,
+   struct tgsi_header *header )
+{
+   struct tgsi_dst_register dst_register;
+
+   assert( file <= TGSI_FILE_IMMEDIATE );
+   assert( mask <= TGSI_WRITEMASK_XYZW );
+   assert( index >= -32768 && index <= 32767 );
+
+   dst_register = tgsi_default_dst_register();
+   dst_register.File = file;
+   dst_register.WriteMask = mask;
+   dst_register.Index = index;
+
+   instruction_grow( instruction, header );
+
+   return dst_register;
+}
+
+struct tgsi_full_dst_register
+tgsi_default_full_dst_register( void )
+{
+   struct tgsi_full_dst_register full_dst_register;
+
+   full_dst_register.DstRegister = tgsi_default_dst_register();
+   full_dst_register.DstRegisterExtConcode =
+      tgsi_default_dst_register_ext_concode();
+   full_dst_register.DstRegisterExtModulate =
+      tgsi_default_dst_register_ext_modulate();
+
+   return full_dst_register;
+}
+
+struct tgsi_dst_register_ext_concode
+tgsi_default_dst_register_ext_concode( void )
+{
+   struct tgsi_dst_register_ext_concode dst_register_ext_concode;
+
+   dst_register_ext_concode.Type = TGSI_DST_REGISTER_EXT_TYPE_CONDCODE;
+   dst_register_ext_concode.CondMask = TGSI_CC_TR;
+   dst_register_ext_concode.CondSwizzleX = TGSI_SWIZZLE_X;
+   dst_register_ext_concode.CondSwizzleY = TGSI_SWIZZLE_Y;
+   dst_register_ext_concode.CondSwizzleZ = TGSI_SWIZZLE_Z;
+   dst_register_ext_concode.CondSwizzleW = TGSI_SWIZZLE_W;
+   dst_register_ext_concode.CondSrcIndex = 0;
+   dst_register_ext_concode.Padding = 0;
+   dst_register_ext_concode.Extended = 0;
+
+   return dst_register_ext_concode;
+}
+
+unsigned
+tgsi_compare_dst_register_ext_concode(
+   struct tgsi_dst_register_ext_concode a,
+   struct tgsi_dst_register_ext_concode b )
+{
+   a.Padding = b.Padding = 0;
+   a.Extended = b.Extended = 0;
+   return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
+}
+
+struct tgsi_dst_register_ext_concode
+tgsi_build_dst_register_ext_concode(
+   unsigned cc,
+   unsigned swizzle_x,
+   unsigned swizzle_y,
+   unsigned swizzle_z,
+   unsigned swizzle_w,
+   int index,
+   struct tgsi_token *prev_token,
+   struct tgsi_instruction *instruction,
+   struct tgsi_header *header )
+{
+   struct tgsi_dst_register_ext_concode dst_register_ext_concode;
+
+   assert( cc <= TGSI_CC_FL );
+   assert( swizzle_x <= TGSI_SWIZZLE_W );
+   assert( swizzle_y <= TGSI_SWIZZLE_W );
+   assert( swizzle_z <= TGSI_SWIZZLE_W );
+   assert( swizzle_w <= TGSI_SWIZZLE_W );
+   assert( index >= -32768 && index <= 32767 );
+
+   dst_register_ext_concode = tgsi_default_dst_register_ext_concode();
+   dst_register_ext_concode.CondMask = cc;
+   dst_register_ext_concode.CondSwizzleX = swizzle_x;
+   dst_register_ext_concode.CondSwizzleY = swizzle_y;
+   dst_register_ext_concode.CondSwizzleZ = swizzle_z;
+   dst_register_ext_concode.CondSwizzleW = swizzle_w;
+   dst_register_ext_concode.CondSrcIndex = index;
+
+   prev_token->Extended = 1;
+   instruction_grow( instruction, header );
+
+   return dst_register_ext_concode;
+}
+
+struct tgsi_dst_register_ext_modulate
+tgsi_default_dst_register_ext_modulate( void )
+{
+   struct tgsi_dst_register_ext_modulate dst_register_ext_modulate;
+
+   dst_register_ext_modulate.Type = TGSI_DST_REGISTER_EXT_TYPE_MODULATE;
+   dst_register_ext_modulate.Modulate = TGSI_MODULATE_1X;
+   dst_register_ext_modulate.Padding = 0;
+   dst_register_ext_modulate.Extended = 0;
+
+   return dst_register_ext_modulate;
+}
+
+unsigned
+tgsi_compare_dst_register_ext_modulate(
+   struct tgsi_dst_register_ext_modulate a,
+   struct tgsi_dst_register_ext_modulate b )
+{
+   a.Padding = b.Padding = 0;
+   a.Extended = b.Extended = 0;
+   return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
+}
+
+struct tgsi_dst_register_ext_modulate
+tgsi_build_dst_register_ext_modulate(
+   unsigned modulate,
+   struct tgsi_token *prev_token,
+   struct tgsi_instruction *instruction,
+   struct tgsi_header *header )
+{
+   struct tgsi_dst_register_ext_modulate dst_register_ext_modulate;
+
+   assert( modulate <= TGSI_MODULATE_EIGHTH );
+
+   dst_register_ext_modulate = tgsi_default_dst_register_ext_modulate();
+   dst_register_ext_modulate.Modulate = modulate;
+
+   prev_token->Extended = 1;
+   instruction_grow( instruction, header );
+
+   return dst_register_ext_modulate;
+}
+
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_build.h b/src/gallium/auxiliary/tgsi/util/tgsi_build.h
new file mode 100644
index 00000000000..116c78abf34
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_build.h
@@ -0,0 +1,320 @@
+#if !defined TGSI_BUILD_H
+#define TGSI_BUILD_H
+
+#if defined __cplusplus
+extern "C" {
+#endif // defined __cplusplus
+
+/*
+ * version
+ */
+
+struct tgsi_version
+tgsi_build_version( void );
+
+/*
+ * header
+ */
+
+struct tgsi_header
+tgsi_build_header( void );
+
+struct tgsi_processor
+tgsi_default_processor( void );
+
+struct tgsi_processor
+tgsi_build_processor(
+   unsigned processor,
+   struct tgsi_header *header );
+
+/*
+ * declaration
+ */
+
+struct tgsi_declaration
+tgsi_default_declaration( void );
+
+struct tgsi_declaration
+tgsi_build_declaration(
+   unsigned file,
+   unsigned declare,
+   unsigned usage_mask,
+   unsigned interpolate,
+   unsigned semantic,
+   struct tgsi_header *header );
+
+struct tgsi_full_declaration
+tgsi_default_full_declaration( void );
+
+unsigned
+tgsi_build_full_declaration(
+   const struct tgsi_full_declaration *full_decl,
+   struct tgsi_token *tokens,
+   struct tgsi_header *header,
+   unsigned maxsize );
+
+struct tgsi_declaration_range
+tgsi_build_declaration_range(
+   unsigned first,
+   unsigned last,
+   struct tgsi_declaration *declaration,
+   struct tgsi_header *header );
+
+struct tgsi_declaration_mask
+tgsi_build_declaration_mask(
+   unsigned mask,
+   struct tgsi_declaration *declaration,
+   struct tgsi_header *header );
+
+struct tgsi_declaration_interpolation
+tgsi_default_declaration_interpolation( void );
+
+struct tgsi_declaration_interpolation
+tgsi_build_declaration_interpolation(
+   unsigned interpolate,
+   struct tgsi_declaration *declaration,
+   struct tgsi_header *header );
+
+struct tgsi_declaration_semantic
+tgsi_default_declaration_semantic( void );
+
+struct tgsi_declaration_semantic
+tgsi_build_declaration_semantic(
+   unsigned semantic_name,
+   unsigned semantic_index,
+   struct tgsi_declaration *declaration,
+   struct tgsi_header *header );
+
+/*
+ * immediate
+ */
+
+struct tgsi_immediate
+tgsi_default_immediate( void );
+
+struct tgsi_immediate
+tgsi_build_immediate(
+   struct tgsi_header *header );
+
+struct tgsi_full_immediate
+tgsi_default_full_immediate( void );
+
+struct tgsi_immediate_float32
+tgsi_build_immediate_float32(
+   float value,
+   struct tgsi_immediate *immediate,
+   struct tgsi_header *header );
+
+unsigned
+tgsi_build_full_immediate(
+   const struct tgsi_full_immediate *full_imm,
+   struct tgsi_token *tokens,
+   struct tgsi_header *header,
+   unsigned maxsize );
+
+/*
+ * instruction
+ */
+
+struct tgsi_instruction
+tgsi_default_instruction( void );
+
+struct tgsi_instruction
+tgsi_build_instruction(
+   unsigned opcode,
+   unsigned saturate,
+   unsigned num_dst_regs,
+   unsigned num_src_regs,
+   struct tgsi_header *header );
+
+struct tgsi_full_instruction
+tgsi_default_full_instruction( void );
+
+unsigned
+tgsi_build_full_instruction(
+   const struct tgsi_full_instruction *full_inst,
+   struct tgsi_token *tokens,
+   struct tgsi_header *header,
+   unsigned maxsize );
+
+struct tgsi_instruction_ext_nv
+tgsi_default_instruction_ext_nv( void );
+
+unsigned
+tgsi_compare_instruction_ext_nv(
+   struct tgsi_instruction_ext_nv a,
+   struct tgsi_instruction_ext_nv b );
+
+struct tgsi_instruction_ext_nv
+tgsi_build_instruction_ext_nv(
+   unsigned precision,
+   unsigned cond_dst_index,
+   unsigned cond_flow_index,
+   unsigned cond_mask,
+   unsigned cond_swizzle_x,
+   unsigned cond_swizzle_y,
+   unsigned cond_swizzle_z,
+   unsigned cond_swizzle_w,
+   unsigned cond_dst_update,
+   unsigned cond_flow_update,
+   struct tgsi_token *prev_token,
+   struct tgsi_instruction *instruction,
+   struct tgsi_header *header );
+
+struct tgsi_instruction_ext_label
+tgsi_default_instruction_ext_label( void );
+
+unsigned
+tgsi_compare_instruction_ext_label(
+   struct tgsi_instruction_ext_label a,
+   struct tgsi_instruction_ext_label b );
+
+struct tgsi_instruction_ext_label
+tgsi_build_instruction_ext_label(
+   unsigned label,
+   struct tgsi_token *prev_token,
+   struct tgsi_instruction *instruction,
+   struct tgsi_header *header );
+
+struct tgsi_instruction_ext_texture
+tgsi_default_instruction_ext_texture( void );
+
+unsigned
+tgsi_compare_instruction_ext_texture(
+   struct tgsi_instruction_ext_texture a,
+   struct tgsi_instruction_ext_texture b );
+
+struct tgsi_instruction_ext_texture
+tgsi_build_instruction_ext_texture(
+   unsigned texture,
+   struct tgsi_token *prev_token,
+   struct tgsi_instruction *instruction,
+   struct tgsi_header *header );
+
+struct tgsi_src_register
+tgsi_default_src_register( void );
+
+struct tgsi_src_register
+tgsi_build_src_register(
+   unsigned file,
+   unsigned swizzle_x,
+   unsigned swizzle_y,
+   unsigned swizzle_z,
+   unsigned swizzle_w,
+   unsigned negate,
+   unsigned indirect,
+   unsigned dimension,
+   int index,
+   struct tgsi_instruction *instruction,
+   struct tgsi_header *header );
+
+struct tgsi_full_src_register
+tgsi_default_full_src_register( void );
+
+struct tgsi_src_register_ext_swz
+tgsi_default_src_register_ext_swz( void );
+
+unsigned
+tgsi_compare_src_register_ext_swz(
+   struct tgsi_src_register_ext_swz a,
+   struct tgsi_src_register_ext_swz b );
+
+struct tgsi_src_register_ext_swz
+tgsi_build_src_register_ext_swz(
+   unsigned ext_swizzle_x,
+   unsigned ext_swizzle_y,
+   unsigned ext_swizzle_z,
+   unsigned ext_swizzle_w,
+   unsigned negate_x,
+   unsigned negate_y,
+   unsigned negate_z,
+   unsigned negate_w,
+   unsigned ext_divide,
+   struct tgsi_token *prev_token,
+   struct tgsi_instruction *instruction,
+   struct tgsi_header *header );
+
+struct tgsi_src_register_ext_mod
+tgsi_default_src_register_ext_mod( void );
+
+unsigned
+tgsi_compare_src_register_ext_mod(
+   struct tgsi_src_register_ext_mod a,
+   struct tgsi_src_register_ext_mod b );
+
+struct tgsi_src_register_ext_mod
+tgsi_build_src_register_ext_mod(
+   unsigned complement,
+   unsigned bias,
+   unsigned scale_2x,
+   unsigned absolute,
+   unsigned negate,
+   struct tgsi_token *prev_token,
+   struct tgsi_instruction *instruction,
+   struct tgsi_header *header );
+
+struct tgsi_dimension
+tgsi_default_dimension( void );
+
+struct tgsi_dimension
+tgsi_build_dimension(
+   unsigned indirect,
+   unsigned index,
+   struct tgsi_instruction *instruction,
+   struct tgsi_header *header );
+
+struct tgsi_dst_register
+tgsi_default_dst_register( void );
+
+struct tgsi_dst_register
+tgsi_build_dst_register(
+   unsigned file,
+   unsigned mask,
+   int index,
+   struct tgsi_instruction *instruction,
+   struct tgsi_header *header );
+
+struct tgsi_full_dst_register
+tgsi_default_full_dst_register( void );
+
+struct tgsi_dst_register_ext_concode
+tgsi_default_dst_register_ext_concode( void );
+
+unsigned
+tgsi_compare_dst_register_ext_concode(
+   struct tgsi_dst_register_ext_concode a,
+   struct tgsi_dst_register_ext_concode b );
+
+struct tgsi_dst_register_ext_concode
+tgsi_build_dst_register_ext_concode(
+   unsigned cc,
+   unsigned swizzle_x,
+   unsigned swizzle_y,
+   unsigned swizzle_z,
+   unsigned swizzle_w,
+   int index,
+   struct tgsi_token *prev_token,
+   struct tgsi_instruction *instruction,
+   struct tgsi_header *header );
+
+struct tgsi_dst_register_ext_modulate
+tgsi_default_dst_register_ext_modulate( void );
+
+unsigned
+tgsi_compare_dst_register_ext_modulate(
+   struct tgsi_dst_register_ext_modulate a,
+   struct tgsi_dst_register_ext_modulate b );
+
+struct tgsi_dst_register_ext_modulate
+tgsi_build_dst_register_ext_modulate(
+   unsigned modulate,
+   struct tgsi_token *prev_token,
+   struct tgsi_instruction *instruction,
+   struct tgsi_header *header );
+
+#if defined __cplusplus
+} // extern "C"
+#endif // defined __cplusplus
+
+#endif // !defined TGSI_BUILD_H
+
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
new file mode 100644
index 00000000000..b5c54847e0b
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
@@ -0,0 +1,1581 @@
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include <stdio.h> 
+
+#include "pipe/p_debug.h"
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+#include "tgsi_dump.h"
+#include "tgsi_parse.h"
+#include "tgsi_build.h"
+
+struct gen_dump
+{
+   unsigned tabs;
+   void  (* write)(
+               struct gen_dump   *dump,
+               const void        *data,
+               unsigned          size );
+};
+
+struct text_dump
+{
+   struct gen_dump   base;
+   char              *text;
+   unsigned          length;
+   unsigned          capacity;
+};
+
+static void
+_text_dump_write(
+   struct gen_dump   *dump,
+   const void        *data,
+   unsigned          size )
+{
+   struct text_dump  *td = (struct text_dump *) dump;
+   unsigned          new_length = td->length + size;
+
+   if( new_length >= td->capacity ) {
+      unsigned new_capacity = td->capacity;
+
+      do {
+         if( new_capacity == 0 ) {
+            new_capacity = 256;
+         }
+         else {
+            new_capacity *= 2;
+         }
+      } while( new_length >= new_capacity );
+      td->text = (char *) REALLOC(
+         td->text,
+         td->capacity,
+         new_capacity );
+      td->capacity = new_capacity;
+   }
+   memcpy(
+      &td->text[td->length],
+      data,
+      size );
+   td->length = new_length;
+   td->text[td->length] = '\0';
+}
+
+struct file_dump
+{
+   struct gen_dump   base;
+   FILE              *file;
+};
+
+static void
+_file_dump_write(
+   struct gen_dump   *dump,
+   const void        *data,
+   unsigned          size )
+{
+   struct file_dump  *fd = (struct file_dump *) dump;
+
+#if 0
+   fwrite( data, 1, size, fd->file );
+#else
+   {
+      unsigned i;
+
+      for (i = 0; i < size; i++ ) {
+         fprintf( fd->file, "%c", ((const char *) data)[i] );
+      }
+   }
+#endif
+}
+
+static void
+gen_dump_str(
+   struct gen_dump   *dump,
+   const char        *str )
+{
+   unsigned i;
+   size_t   len = strlen( str );
+
+   for (i = 0; i < len; i++) {
+      dump->write( dump, &str[i], 1 );
+      if (str[i] == '\n') {
+         unsigned i;
+
+         for (i = 0; i < dump->tabs; i++) {
+            dump->write( dump, "    ", 4 );
+         }
+      }
+   }
+}
+
+static void
+gen_dump_chr(
+   struct gen_dump   *dump,
+   const char        chr )
+{
+   dump->write( dump, &chr, 1 );
+}
+
+static void
+gen_dump_uix(
+   struct gen_dump   *dump,
+   const unsigned    ui )
+{
+   char  str[36];
+
+   sprintf( str, "0x%x", ui );
+   gen_dump_str( dump, str );
+}
+
+static void
+gen_dump_uid(
+   struct gen_dump   *dump,
+   const unsigned    ui )
+{
+   char  str[16];
+
+   sprintf( str, "%u", ui );
+   gen_dump_str( dump, str );
+}
+
+static void
+gen_dump_sid(
+   struct gen_dump   *dump,
+   const int         si )
+{
+   char  str[16];
+
+   sprintf( str, "%d", si );
+   gen_dump_str( dump, str );
+}
+
+static void
+gen_dump_flt(
+   struct gen_dump   *dump,
+   const float       flt )
+{
+   char  str[48];
+
+   sprintf( str, "%10.4f", flt );
+   gen_dump_str( dump, str );
+}
+
+static void
+gen_dump_enum(
+   struct gen_dump   *dump,
+   const unsigned    e,
+   const char        **enums,
+   const unsigned    enums_count )
+{
+   if (e >= enums_count) {
+      gen_dump_uid( dump, e );
+   }
+   else {
+      gen_dump_str( dump, enums[e] );
+   }
+}
+
+static void
+gen_dump_tab(
+   struct gen_dump   *dump )
+{
+   ++dump->tabs;
+}
+
+static void
+gen_dump_untab(
+   struct gen_dump   *dump )
+{
+   assert( dump->tabs > 0 );
+
+   --dump->tabs;
+}
+
+#define TXT(S)          gen_dump_str( dump, S )
+#define CHR(C)          gen_dump_chr( dump, C )
+#define UIX(I)          gen_dump_uix( dump, I )
+#define UID(I)          gen_dump_uid( dump, I )
+#define SID(I)          gen_dump_sid( dump, I )
+#define FLT(F)          gen_dump_flt( dump, F )
+#define TAB()           gen_dump_tab( dump )
+#define UNT()           gen_dump_untab( dump )
+#define ENM(E,ENUMS)    gen_dump_enum( dump, E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) )
+
+static const char *TGSI_PROCESSOR_TYPES[] =
+{
+   "PROCESSOR_FRAGMENT",
+   "PROCESSOR_VERTEX",
+   "PROCESSOR_GEOMETRY"
+};
+
+static const char *TGSI_PROCESSOR_TYPES_SHORT[] =
+{
+   "FRAG",
+   "VERT",
+   "GEOM"
+};
+
+static const char *TGSI_TOKEN_TYPES[] =
+{
+   "TOKEN_TYPE_DECLARATION",
+   "TOKEN_TYPE_IMMEDIATE",
+   "TOKEN_TYPE_INSTRUCTION"
+};
+
+static const char *TGSI_FILES[] =
+{
+   "FILE_NULL",
+   "FILE_CONSTANT",
+   "FILE_INPUT",
+   "FILE_OUTPUT",
+   "FILE_TEMPORARY",
+   "FILE_SAMPLER",
+   "FILE_ADDRESS",
+   "FILE_IMMEDIATE"
+};
+
+static const char *TGSI_FILES_SHORT[] =
+{
+   "NULL",
+   "CONST",
+   "IN",
+   "OUT",
+   "TEMP",
+   "SAMP",
+   "ADDR",
+   "IMM"
+};
+
+static const char *TGSI_DECLARES[] =
+{
+   "DECLARE_RANGE",
+   "DECLARE_MASK"
+};
+
+static const char *TGSI_INTERPOLATES[] =
+{
+   "INTERPOLATE_CONSTANT",
+   "INTERPOLATE_LINEAR",
+   "INTERPOLATE_PERSPECTIVE",
+   "INTERPOLATE_ATTRIB"
+};
+
+static const char *TGSI_INTERPOLATES_SHORT[] =
+{
+   "CONSTANT",
+   "LINEAR",
+   "PERSPECTIVE",
+   "ATTRIB"
+};
+
+static const char *TGSI_SEMANTICS[] =
+{
+   "SEMANTIC_POSITION",
+   "SEMANTIC_COLOR",
+   "SEMANTIC_BCOLOR",
+   "SEMANTIC_FOG",
+   "SEMANTIC_PSIZE",
+   "SEMANTIC_GENERIC,"
+};
+
+static const char *TGSI_SEMANTICS_SHORT[] =
+{
+   "POSITION",
+   "COLOR",
+   "BCOLOR",
+   "FOG",
+   "PSIZE",
+   "GENERIC",
+};
+
+static const char *TGSI_IMMS[] =
+{
+   "IMM_FLOAT32"
+};
+
+static const char *TGSI_IMMS_SHORT[] =
+{
+   "FLT32"
+};
+
+static const char *TGSI_OPCODES[] =
+{
+   "OPCODE_ARL",
+   "OPCODE_MOV",
+   "OPCODE_LIT",
+   "OPCODE_RCP",
+   "OPCODE_RSQ",
+   "OPCODE_EXP",
+   "OPCODE_LOG",
+   "OPCODE_MUL",
+   "OPCODE_ADD",
+   "OPCODE_DP3",
+   "OPCODE_DP4",
+   "OPCODE_DST",
+   "OPCODE_MIN",
+   "OPCODE_MAX",
+   "OPCODE_SLT",
+   "OPCODE_SGE",
+   "OPCODE_MAD",
+   "OPCODE_SUB",
+   "OPCODE_LERP",
+   "OPCODE_CND",
+   "OPCODE_CND0",
+   "OPCODE_DOT2ADD",
+   "OPCODE_INDEX",
+   "OPCODE_NEGATE",
+   "OPCODE_FRAC",
+   "OPCODE_CLAMP",
+   "OPCODE_FLOOR",
+   "OPCODE_ROUND",
+   "OPCODE_EXPBASE2",
+   "OPCODE_LOGBASE2",
+   "OPCODE_POWER",
+   "OPCODE_CROSSPRODUCT",
+   "OPCODE_MULTIPLYMATRIX",
+   "OPCODE_ABS",
+   "OPCODE_RCC",
+   "OPCODE_DPH",
+   "OPCODE_COS",
+   "OPCODE_DDX",
+   "OPCODE_DDY",
+   "OPCODE_KILP",
+   "OPCODE_PK2H",
+   "OPCODE_PK2US",
+   "OPCODE_PK4B",
+   "OPCODE_PK4UB",
+   "OPCODE_RFL",
+   "OPCODE_SEQ",
+   "OPCODE_SFL",
+   "OPCODE_SGT",
+   "OPCODE_SIN",
+   "OPCODE_SLE",
+   "OPCODE_SNE",
+   "OPCODE_STR",
+   "OPCODE_TEX",
+   "OPCODE_TXD",
+   "OPCODE_UP2H",
+   "OPCODE_UP2US",
+   "OPCODE_UP4B",
+   "OPCODE_UP4UB",
+   "OPCODE_X2D",
+   "OPCODE_ARA",
+   "OPCODE_ARR",
+   "OPCODE_BRA",
+   "OPCODE_CAL",
+   "OPCODE_RET",
+   "OPCODE_SSG",
+   "OPCODE_CMP",
+   "OPCODE_SCS",
+   "OPCODE_TXB",
+   "OPCODE_NRM",
+   "OPCODE_DIV",
+   "OPCODE_DP2",
+   "OPCODE_TXL",
+   "OPCODE_BRK",
+   "OPCODE_IF",
+   "OPCODE_LOOP",
+   "OPCODE_REP",
+   "OPCODE_ELSE",
+   "OPCODE_ENDIF",
+   "OPCODE_ENDLOOP",
+   "OPCODE_ENDREP",
+   "OPCODE_PUSHA",
+   "OPCODE_POPA",
+   "OPCODE_CEIL",
+   "OPCODE_I2F",
+   "OPCODE_NOT",
+   "OPCODE_TRUNC",
+   "OPCODE_SHL",
+   "OPCODE_SHR",
+   "OPCODE_AND",
+   "OPCODE_OR",
+   "OPCODE_MOD",
+   "OPCODE_XOR",
+   "OPCODE_SAD",
+   "OPCODE_TXF",
+   "OPCODE_TXQ",
+   "OPCODE_CONT",
+   "OPCODE_EMIT",
+   "OPCODE_ENDPRIM",
+   "OPCODE_BGNLOOP2",
+   "OPCODE_BGNSUB",
+   "OPCODE_ENDLOOP2",
+   "OPCODE_ENDSUB",
+   "OPCODE_NOISE1",
+   "OPCODE_NOISE2",
+   "OPCODE_NOISE3",
+   "OPCODE_NOISE4",
+   "OPCODE_NOP",
+   "OPCODE_TEXBEM",
+   "OPCODE_TEXBEML",
+   "OPCODE_TEXREG2AR",
+   "OPCODE_TEXM3X2PAD",
+   "OPCODE_TEXM3X2TEX",
+   "OPCODE_TEXM3X3PAD",
+   "OPCODE_TEXM3X3TEX",
+   "OPCODE_TEXM3X3SPEC",
+   "OPCODE_TEXM3X3VSPEC",
+   "OPCODE_TEXREG2GB",
+   "OPCODE_TEXREG2RGB",
+   "OPCODE_TEXDP3TEX",
+   "OPCODE_TEXDP3",
+   "OPCODE_TEXM3X3",
+   "OPCODE_TEXM3X2DEPTH",
+   "OPCODE_TEXDEPTH",
+   "OPCODE_BEM",
+   "OPCODE_M4X3",
+   "OPCODE_M3X4",
+   "OPCODE_M3X3",
+   "OPCODE_M3X2",
+   "OPCODE_NRM4",
+   "OPCODE_CALLNZ",
+   "OPCODE_IFC",
+   "OPCODE_BREAKC",
+   "OPCODE_KIL",
+   "OPCODE_END"
+};
+
+static const char *TGSI_OPCODES_SHORT[] =
+{
+   "ARL",
+   "MOV",
+   "LIT",
+   "RCP",
+   "RSQ",
+   "EXP",
+   "LOG",
+   "MUL",
+   "ADD",
+   "DP3",
+   "DP4",
+   "DST",
+   "MIN",
+   "MAX",
+   "SLT",
+   "SGE",
+   "MAD",
+   "SUB",
+   "LERP",
+   "CND",
+   "CND0",
+   "DOT2ADD",
+   "INDEX",
+   "NEGATE",
+   "FRAC",
+   "CLAMP",
+   "FLOOR",
+   "ROUND",
+   "EXPBASE2",
+   "LOGBASE2",
+   "POWER",
+   "CROSSPRODUCT",
+   "MULTIPLYMATRIX",
+   "ABS",
+   "RCC",
+   "DPH",
+   "COS",
+   "DDX",
+   "DDY",
+   "KILP",
+   "PK2H",
+   "PK2US",
+   "PK4B",
+   "PK4UB",
+   "RFL",
+   "SEQ",
+   "SFL",
+   "SGT",
+   "SIN",
+   "SLE",
+   "SNE",
+   "STR",
+   "TEX",
+   "TXD",
+   "UP2H",
+   "UP2US",
+   "UP4B",
+   "UP4UB",
+   "X2D",
+   "ARA",
+   "ARR",
+   "BRA",
+   "CAL",
+   "RET",
+   "SSG",
+   "CMP",
+   "SCS",
+   "TXB",
+   "NRM",
+   "DIV",
+   "DP2",
+   "TXL",
+   "BRK",
+   "IF",
+   "LOOP",
+   "REP",
+   "ELSE",
+   "ENDIF",
+   "ENDLOOP",
+   "ENDREP",
+   "PUSHA",
+   "POPA",
+   "CEIL",
+   "I2F",
+   "NOT",
+   "TRUNC",
+   "SHL",
+   "SHR",
+   "AND",
+   "OR",
+   "MOD",
+   "XOR",
+   "SAD",
+   "TXF",
+   "TXQ",
+   "CONT",
+   "EMIT",
+   "ENDPRIM",
+   "BGNLOOP2",
+   "BGNSUB",
+   "ENDLOOP2",
+   "ENDSUB",
+   "NOISE1",
+   "NOISE2",
+   "NOISE3",
+   "NOISE4",
+   "NOP",
+   "TEXBEM",
+   "TEXBEML",
+   "TEXREG2AR",
+   "TEXM3X2PAD",
+   "TEXM3X2TEX",
+   "TEXM3X3PAD",
+   "TEXM3X3TEX",
+   "TEXM3X3SPEC",
+   "TEXM3X3VSPEC",
+   "TEXREG2GB",
+   "TEXREG2RGB",
+   "TEXDP3TEX",
+   "TEXDP3",
+   "TEXM3X3",
+   "TEXM3X2DEPTH",
+   "TEXDEPTH",
+   "BEM",
+   "M4X3",
+   "M3X4",
+   "M3X3",
+   "M3X2",
+   "NRM4",
+   "CALLNZ",
+   "IFC",
+   "BREAKC",
+   "KIL",
+   "END"
+};
+
+static const char *TGSI_SATS[] =
+{
+   "SAT_NONE",
+   "SAT_ZERO_ONE",
+   "SAT_MINUS_PLUS_ONE"
+};
+
+static const char *TGSI_INSTRUCTION_EXTS[] =
+{
+   "INSTRUCTION_EXT_TYPE_NV",
+   "INSTRUCTION_EXT_TYPE_LABEL",
+   "INSTRUCTION_EXT_TYPE_TEXTURE"
+};
+
+static const char *TGSI_PRECISIONS[] =
+{
+   "PRECISION_DEFAULT",
+   "TGSI_PRECISION_FLOAT32",
+   "TGSI_PRECISION_FLOAT16",
+   "TGSI_PRECISION_FIXED12"
+};
+
+static const char *TGSI_CCS[] =
+{
+   "CC_GT",
+   "CC_EQ",
+   "CC_LT",
+   "CC_UN",
+   "CC_GE",
+   "CC_LE",
+   "CC_NE",
+   "CC_TR",
+   "CC_FL"
+};
+
+static const char *TGSI_SWIZZLES[] =
+{
+   "SWIZZLE_X",
+   "SWIZZLE_Y",
+   "SWIZZLE_Z",
+   "SWIZZLE_W"
+};
+
+static const char *TGSI_SWIZZLES_SHORT[] =
+{
+   "x",
+   "y",
+   "z",
+   "w"
+};
+
+static const char *TGSI_TEXTURES[] =
+{
+   "TEXTURE_UNKNOWN",
+   "TEXTURE_1D",
+   "TEXTURE_2D",
+   "TEXTURE_3D",
+   "TEXTURE_CUBE",
+   "TEXTURE_RECT",
+   "TEXTURE_SHADOW1D",
+   "TEXTURE_SHADOW2D",
+   "TEXTURE_SHADOWRECT"
+};
+
+static const char *TGSI_SRC_REGISTER_EXTS[] =
+{
+   "SRC_REGISTER_EXT_TYPE_SWZ",
+   "SRC_REGISTER_EXT_TYPE_MOD"
+};
+
+static const char *TGSI_EXTSWIZZLES[] =
+{
+   "EXTSWIZZLE_X",
+   "EXTSWIZZLE_Y",
+   "EXTSWIZZLE_Z",
+   "EXTSWIZZLE_W",
+   "EXTSWIZZLE_ZERO",
+   "EXTSWIZZLE_ONE"
+};
+
+static const char *TGSI_EXTSWIZZLES_SHORT[] =
+{
+   "x",
+   "y",
+   "z",
+   "w",
+   "0",
+   "1"
+};
+
+static const char *TGSI_WRITEMASKS[] =
+{
+   "0",
+   "WRITEMASK_X",
+   "WRITEMASK_Y",
+   "WRITEMASK_XY",
+   "WRITEMASK_Z",
+   "WRITEMASK_XZ",
+   "WRITEMASK_YZ",
+   "WRITEMASK_XYZ",
+   "WRITEMASK_W",
+   "WRITEMASK_XW",
+   "WRITEMASK_YW",
+   "WRITEMASK_XYW",
+   "WRITEMASK_ZW",
+   "WRITEMASK_XZW",
+   "WRITEMASK_YZW",
+   "WRITEMASK_XYZW"
+};
+
+static const char *TGSI_DST_REGISTER_EXTS[] =
+{
+   "DST_REGISTER_EXT_TYPE_CONDCODE",
+   "DST_REGISTER_EXT_TYPE_MODULATE"
+};
+
+static const char *TGSI_MODULATES[] =
+{
+   "MODULATE_1X",
+   "MODULATE_2X",
+   "MODULATE_4X",
+   "MODULATE_8X",
+   "MODULATE_HALF",
+   "MODULATE_QUARTER",
+   "MODULATE_EIGHTH"
+};
+
+static void
+dump_declaration_short(
+   struct gen_dump               *dump,
+   struct tgsi_full_declaration  *decl )
+{
+   TXT( "\nDCL " );
+   ENM( decl->Declaration.File, TGSI_FILES_SHORT );
+
+   switch( decl->Declaration.Declare ) {
+   case TGSI_DECLARE_RANGE:
+      CHR( '[' );
+      UID( decl->u.DeclarationRange.First );
+      if( decl->u.DeclarationRange.First != decl->u.DeclarationRange.Last ) {
+         TXT( ".." );
+         UID( decl->u.DeclarationRange.Last );
+      }
+      CHR( ']' );
+      break;
+   default:
+      assert( 0 );
+   }
+
+   if( decl->Declaration.UsageMask != TGSI_WRITEMASK_XYZW ) {
+      CHR( '.' );
+      if( decl->Declaration.UsageMask & TGSI_WRITEMASK_X ) {
+         CHR( 'x' );
+      }
+      if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Y ) {
+         CHR( 'y' );
+      }
+      if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Z ) {
+         CHR( 'z' );
+      }
+      if( decl->Declaration.UsageMask & TGSI_WRITEMASK_W ) {
+         CHR( 'w' );
+      }
+   }
+
+   if( decl->Declaration.Interpolate ) {
+      TXT( ", " );
+      ENM( decl->Interpolation.Interpolate, TGSI_INTERPOLATES_SHORT );
+   }
+
+   if( decl->Declaration.Semantic ) {
+      TXT( ", " );
+      ENM( decl->Semantic.SemanticName, TGSI_SEMANTICS_SHORT );
+      CHR( '[' );
+      UID( decl->Semantic.SemanticIndex );
+      CHR( ']' );
+   }
+}
+
+static void
+dump_declaration_verbose(
+   struct gen_dump               *dump,
+   struct tgsi_full_declaration  *decl,
+   unsigned                      ignored,
+   unsigned                      deflt,
+   struct tgsi_full_declaration  *fd )
+{
+   TXT( "\nFile       : " );
+   ENM( decl->Declaration.File, TGSI_FILES );
+   TXT( "\nDeclare    : " );
+   ENM( decl->Declaration.Declare, TGSI_DECLARES );
+   if( deflt || fd->Declaration.UsageMask != decl->Declaration.UsageMask ) {
+      TXT( "\nUsageMask  : " );
+      if( decl->Declaration.UsageMask & TGSI_WRITEMASK_X ) {
+         CHR( 'X' );
+      }
+      if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Y ) {
+         CHR( 'Y' );
+      }
+      if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Z ) {
+         CHR( 'Z' );
+      }
+      if( decl->Declaration.UsageMask & TGSI_WRITEMASK_W ) {
+         CHR( 'W' );
+      }
+   }
+   if( deflt || fd->Declaration.Interpolate != decl->Declaration.Interpolate ) {
+      TXT( "\nInterpolate: " );
+      UID( decl->Declaration.Interpolate );
+   }
+   if( deflt || fd->Declaration.Semantic != decl->Declaration.Semantic ) {
+      TXT( "\nSemantic   : " );
+      UID( decl->Declaration.Semantic );
+   }
+   if( ignored ) {
+      TXT( "\nPadding    : " );
+      UIX( decl->Declaration.Padding );
+   }
+
+   CHR( '\n' );
+   switch( decl->Declaration.Declare ) {
+   case TGSI_DECLARE_RANGE:
+      TXT( "\nFirst: " );
+      UID( decl->u.DeclarationRange.First );
+      TXT( "\nLast : " );
+      UID( decl->u.DeclarationRange.Last );
+      break;
+
+   case TGSI_DECLARE_MASK:
+      TXT( "\nMask: " );
+      UIX( decl->u.DeclarationMask.Mask );
+      break;
+
+   default:
+      assert( 0 );
+   }
+
+   if( decl->Declaration.Interpolate ) {
+      CHR( '\n' );
+      TXT( "\nInterpolate: " );
+      ENM( decl->Interpolation.Interpolate, TGSI_INTERPOLATES );
+      if( ignored ) {
+         TXT( "\nPadding    : " );
+         UIX( decl->Interpolation.Padding );
+      }
+   }
+
+   if( decl->Declaration.Semantic ) {
+      CHR( '\n' );
+      TXT( "\nSemanticName : " );
+      ENM( decl->Semantic.SemanticName, TGSI_SEMANTICS );
+      TXT( "\nSemanticIndex: " );
+      UID( decl->Semantic.SemanticIndex );
+      if( ignored ) {
+         TXT( "\nPadding      : " );
+         UIX( decl->Semantic.Padding );
+      }
+   }
+}
+
+static void
+dump_immediate_short(
+   struct gen_dump            *dump,
+   struct tgsi_full_immediate *imm )
+{
+   unsigned i;
+
+   TXT( "\nIMM " );
+   ENM( imm->Immediate.DataType, TGSI_IMMS_SHORT );
+
+   TXT( " { " );
+   for( i = 0; i < imm->Immediate.Size - 1; i++ ) {
+      switch( imm->Immediate.DataType ) {
+      case TGSI_IMM_FLOAT32:
+         FLT( imm->u.ImmediateFloat32[i].Float );
+         break;
+
+      default:
+         assert( 0 );
+      }
+
+      if( i < imm->Immediate.Size - 2 ) {
+         TXT( ", " );
+      }
+   }
+   TXT( " }" );
+}
+
+static void
+dump_immediate_verbose(
+   struct gen_dump            *dump,
+   struct tgsi_full_immediate *imm,
+   unsigned                   ignored )
+{
+   unsigned i;
+
+   TXT( "\nDataType   : " );
+   ENM( imm->Immediate.DataType, TGSI_IMMS );
+   if( ignored ) {
+      TXT( "\nPadding    : " );
+      UIX( imm->Immediate.Padding );
+   }
+
+   for( i = 0; i < imm->Immediate.Size - 1; i++ ) {
+      CHR( '\n' );
+      switch( imm->Immediate.DataType ) {
+      case TGSI_IMM_FLOAT32:
+         TXT( "\nFloat: " );
+         FLT( imm->u.ImmediateFloat32[i].Float );
+         break;
+
+      default:
+         assert( 0 );
+      }
+   }
+}
+
+static void
+dump_instruction_short(
+   struct gen_dump               *dump,
+   struct tgsi_full_instruction  *inst,
+   unsigned                      instno )
+{
+   unsigned i;
+   boolean  first_reg = TRUE;
+
+   CHR( '\n' );
+   UID( instno );
+   CHR( ':' );
+   ENM( inst->Instruction.Opcode, TGSI_OPCODES_SHORT );
+
+   switch( inst->Instruction.Saturate ) {
+   case TGSI_SAT_NONE:
+      break;
+   case TGSI_SAT_ZERO_ONE:
+      TXT( "_SAT" );
+      break;
+   case TGSI_SAT_MINUS_PLUS_ONE:
+      TXT( "_SAT[-1,1]" );
+      break;
+   default:
+      assert( 0 );
+   }
+
+   for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) {
+      struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
+
+      if( !first_reg ) {
+         CHR( ',' );
+      }
+      CHR( ' ' );
+
+      ENM( dst->DstRegister.File, TGSI_FILES_SHORT );
+
+      CHR( '[' );
+      SID( dst->DstRegister.Index );
+      CHR( ']' );
+
+      if( dst->DstRegister.WriteMask != TGSI_WRITEMASK_XYZW ) {
+         CHR( '.' );
+         if( dst->DstRegister.WriteMask & TGSI_WRITEMASK_X ) {
+            CHR( 'x' );
+         }
+         if( dst->DstRegister.WriteMask & TGSI_WRITEMASK_Y ) {
+            CHR( 'y' );
+         }
+         if( dst->DstRegister.WriteMask & TGSI_WRITEMASK_Z ) {
+            CHR( 'z' );
+         }
+         if( dst->DstRegister.WriteMask & TGSI_WRITEMASK_W ) {
+            CHR( 'w' );
+         }
+      }
+
+      first_reg = FALSE;
+   }
+
+   for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) {
+      struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i];
+
+      if( !first_reg ) {
+         CHR( ',' );
+      }
+      CHR( ' ' );
+
+      if( src->SrcRegisterExtMod.Complement ) {
+         TXT( "(1 - " );
+      }
+      if( src->SrcRegisterExtMod.Negate  ) {
+         CHR( '-' );
+      }
+      if( src->SrcRegisterExtMod.Absolute ) {
+         CHR( '|' );
+      }
+      if( src->SrcRegister.Negate ) {
+         CHR( '-' );
+      }
+
+      ENM( src->SrcRegister.File, TGSI_FILES_SHORT );
+
+      CHR( '[' );
+      SID( src->SrcRegister.Index );
+      CHR( ']' );
+
+      if (src->SrcRegister.Extended) {
+         if (src->SrcRegisterExtSwz.ExtSwizzleX != TGSI_EXTSWIZZLE_X ||
+             src->SrcRegisterExtSwz.ExtSwizzleY != TGSI_EXTSWIZZLE_Y ||
+             src->SrcRegisterExtSwz.ExtSwizzleZ != TGSI_EXTSWIZZLE_Z ||
+             src->SrcRegisterExtSwz.ExtSwizzleW != TGSI_EXTSWIZZLE_W) {
+            CHR( '.' );
+            ENM( src->SrcRegisterExtSwz.ExtSwizzleX, TGSI_EXTSWIZZLES_SHORT );
+            ENM( src->SrcRegisterExtSwz.ExtSwizzleY, TGSI_EXTSWIZZLES_SHORT );
+            ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, TGSI_EXTSWIZZLES_SHORT );
+            ENM( src->SrcRegisterExtSwz.ExtSwizzleW, TGSI_EXTSWIZZLES_SHORT );
+         }
+      }
+      else if( src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X ||
+               src->SrcRegister.SwizzleY != TGSI_SWIZZLE_Y ||
+               src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z ||
+               src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W ) {
+         CHR( '.' );
+         ENM( src->SrcRegister.SwizzleX, TGSI_SWIZZLES_SHORT );
+         ENM( src->SrcRegister.SwizzleY, TGSI_SWIZZLES_SHORT );
+         ENM( src->SrcRegister.SwizzleZ, TGSI_SWIZZLES_SHORT );
+         ENM( src->SrcRegister.SwizzleW, TGSI_SWIZZLES_SHORT );
+      }
+
+      if( src->SrcRegisterExtMod.Absolute ) {
+         CHR( '|' );
+      }
+      if( src->SrcRegisterExtMod.Complement ) {
+         CHR( ')' );
+      }
+
+      first_reg = FALSE;
+   }
+
+   switch( inst->Instruction.Opcode ) {
+   case TGSI_OPCODE_IF:
+   case TGSI_OPCODE_ELSE:
+   case TGSI_OPCODE_BGNLOOP2:
+   case TGSI_OPCODE_ENDLOOP2:
+   case TGSI_OPCODE_CAL:
+      TXT( " :" );
+      UID( inst->InstructionExtLabel.Label );
+      break;
+   }
+}
+
+static void
+dump_instruction_verbose(
+   struct gen_dump               *dump,
+   struct tgsi_full_instruction  *inst,
+   unsigned                      ignored,
+   unsigned                      deflt,
+   struct tgsi_full_instruction  *fi )
+{
+   unsigned i;
+
+   TXT( "\nOpcode     : " );
+   ENM( inst->Instruction.Opcode, TGSI_OPCODES );
+   if( deflt || fi->Instruction.Saturate != inst->Instruction.Saturate ) {
+      TXT( "\nSaturate   : " );
+      ENM( inst->Instruction.Saturate, TGSI_SATS );
+   }
+   if( deflt || fi->Instruction.NumDstRegs != inst->Instruction.NumDstRegs ) {
+      TXT( "\nNumDstRegs : " );
+      UID( inst->Instruction.NumDstRegs );
+   }
+   if( deflt || fi->Instruction.NumSrcRegs != inst->Instruction.NumSrcRegs ) {
+      TXT( "\nNumSrcRegs : " );
+      UID( inst->Instruction.NumSrcRegs );
+   }
+   if( ignored ) {
+      TXT( "\nPadding    : " );
+      UIX( inst->Instruction.Padding );
+   }
+
+   if( deflt || tgsi_compare_instruction_ext_nv( inst->InstructionExtNv, fi->InstructionExtNv ) ) {
+      CHR( '\n' );
+      TXT( "\nType          : " );
+      ENM( inst->InstructionExtNv.Type, TGSI_INSTRUCTION_EXTS );
+      if( deflt || fi->InstructionExtNv.Precision != inst->InstructionExtNv.Precision ) {
+         TXT( "\nPrecision     : " );
+         ENM( inst->InstructionExtNv.Precision, TGSI_PRECISIONS );
+      }
+      if( deflt || fi->InstructionExtNv.CondDstIndex != inst->InstructionExtNv.CondDstIndex ) {
+         TXT( "\nCondDstIndex  : " );
+         UID( inst->InstructionExtNv.CondDstIndex );
+      }
+      if( deflt || fi->InstructionExtNv.CondFlowIndex != inst->InstructionExtNv.CondFlowIndex ) {
+         TXT( "\nCondFlowIndex : " );
+         UID( inst->InstructionExtNv.CondFlowIndex );
+      }
+      if( deflt || fi->InstructionExtNv.CondMask != inst->InstructionExtNv.CondMask ) {
+         TXT( "\nCondMask      : " );
+         ENM( inst->InstructionExtNv.CondMask, TGSI_CCS );
+      }
+      if( deflt || fi->InstructionExtNv.CondSwizzleX != inst->InstructionExtNv.CondSwizzleX ) {
+         TXT( "\nCondSwizzleX  : " );
+         ENM( inst->InstructionExtNv.CondSwizzleX, TGSI_SWIZZLES );
+      }
+      if( deflt || fi->InstructionExtNv.CondSwizzleY != inst->InstructionExtNv.CondSwizzleY ) {
+         TXT( "\nCondSwizzleY  : " );
+         ENM( inst->InstructionExtNv.CondSwizzleY, TGSI_SWIZZLES );
+      }
+      if( deflt || fi->InstructionExtNv.CondSwizzleZ != inst->InstructionExtNv.CondSwizzleZ ) {
+         TXT( "\nCondSwizzleZ  : " );
+         ENM( inst->InstructionExtNv.CondSwizzleZ, TGSI_SWIZZLES );
+      }
+      if( deflt || fi->InstructionExtNv.CondSwizzleW != inst->InstructionExtNv.CondSwizzleW ) {
+         TXT( "\nCondSwizzleW  : " );
+         ENM( inst->InstructionExtNv.CondSwizzleW, TGSI_SWIZZLES );
+      }
+      if( deflt || fi->InstructionExtNv.CondDstUpdate != inst->InstructionExtNv.CondDstUpdate ) {
+         TXT( "\nCondDstUpdate : " );
+         UID( inst->InstructionExtNv.CondDstUpdate );
+      }
+      if( deflt || fi->InstructionExtNv.CondFlowEnable != inst->InstructionExtNv.CondFlowEnable ) {
+         TXT( "\nCondFlowEnable: " );
+         UID( inst->InstructionExtNv.CondFlowEnable );
+      }
+      if( ignored ) {
+         TXT( "\nPadding       : " );
+         UIX( inst->InstructionExtNv.Padding );
+         if( deflt || fi->InstructionExtNv.Extended != inst->InstructionExtNv.Extended ) {
+            TXT( "\nExtended      : " );
+            UID( inst->InstructionExtNv.Extended );
+         }
+      }
+   }
+
+   if( deflt || tgsi_compare_instruction_ext_label( inst->InstructionExtLabel, fi->InstructionExtLabel ) ) {
+      CHR( '\n' );
+      TXT( "\nType    : " );
+      ENM( inst->InstructionExtLabel.Type, TGSI_INSTRUCTION_EXTS );
+      if( deflt || fi->InstructionExtLabel.Label != inst->InstructionExtLabel.Label ) {
+         TXT( "\nLabel   : " );
+         UID( inst->InstructionExtLabel.Label );
+      }
+      if( ignored ) {
+         TXT( "\nPadding : " );
+         UIX( inst->InstructionExtLabel.Padding );
+         if( deflt || fi->InstructionExtLabel.Extended != inst->InstructionExtLabel.Extended ) {
+            TXT( "\nExtended: " );
+            UID( inst->InstructionExtLabel.Extended );
+         }
+      }
+   }
+
+   if( deflt || tgsi_compare_instruction_ext_texture( inst->InstructionExtTexture, fi->InstructionExtTexture ) ) {
+      CHR( '\n' );
+      TXT( "\nType    : " );
+      ENM( inst->InstructionExtTexture.Type, TGSI_INSTRUCTION_EXTS );
+      if( deflt || fi->InstructionExtTexture.Texture != inst->InstructionExtTexture.Texture ) {
+         TXT( "\nTexture : " );
+         ENM( inst->InstructionExtTexture.Texture, TGSI_TEXTURES );
+      }
+      if( ignored ) {
+         TXT( "\nPadding : " );
+         UIX( inst->InstructionExtTexture.Padding );
+         if( deflt || fi->InstructionExtTexture.Extended != inst->InstructionExtTexture.Extended ) {
+            TXT( "\nExtended: " );
+            UID( inst->InstructionExtTexture.Extended );
+         }
+      }
+   }
+
+   for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) {
+      struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
+      struct tgsi_full_dst_register *fd = &fi->FullDstRegisters[i];
+
+      CHR( '\n' );
+      TXT( "\nFile     : " );
+      ENM( dst->DstRegister.File, TGSI_FILES );
+      if( deflt || fd->DstRegister.WriteMask != dst->DstRegister.WriteMask ) {
+         TXT( "\nWriteMask: " );
+         ENM( dst->DstRegister.WriteMask, TGSI_WRITEMASKS );
+      }
+      if( ignored ) {
+         if( deflt || fd->DstRegister.Indirect != dst->DstRegister.Indirect ) {
+            TXT( "\nIndirect : " );
+            UID( dst->DstRegister.Indirect );
+         }
+         if( deflt || fd->DstRegister.Dimension != dst->DstRegister.Dimension ) {
+            TXT( "\nDimension: " );
+            UID( dst->DstRegister.Dimension );
+         }
+      }
+      if( deflt || fd->DstRegister.Index != dst->DstRegister.Index ) {
+         TXT( "\nIndex    : " );
+         SID( dst->DstRegister.Index );
+      }
+      if( ignored ) {
+         TXT( "\nPadding  : " );
+         UIX( dst->DstRegister.Padding );
+         if( deflt || fd->DstRegister.Extended != dst->DstRegister.Extended ) {
+            TXT( "\nExtended : " );
+            UID( dst->DstRegister.Extended );
+         }
+      }
+
+      if( deflt || tgsi_compare_dst_register_ext_concode( dst->DstRegisterExtConcode, fd->DstRegisterExtConcode ) ) {
+         CHR( '\n' );
+         TXT( "\nType        : " );
+         ENM( dst->DstRegisterExtConcode.Type, TGSI_DST_REGISTER_EXTS );
+         if( deflt || fd->DstRegisterExtConcode.CondMask != dst->DstRegisterExtConcode.CondMask ) {
+            TXT( "\nCondMask    : " );
+            ENM( dst->DstRegisterExtConcode.CondMask, TGSI_CCS );
+         }
+         if( deflt || fd->DstRegisterExtConcode.CondSwizzleX != dst->DstRegisterExtConcode.CondSwizzleX ) {
+            TXT( "\nCondSwizzleX: " );
+            ENM( dst->DstRegisterExtConcode.CondSwizzleX, TGSI_SWIZZLES );
+         }
+         if( deflt || fd->DstRegisterExtConcode.CondSwizzleY != dst->DstRegisterExtConcode.CondSwizzleY ) {
+            TXT( "\nCondSwizzleY: " );
+            ENM( dst->DstRegisterExtConcode.CondSwizzleY, TGSI_SWIZZLES );
+         }
+         if( deflt || fd->DstRegisterExtConcode.CondSwizzleZ != dst->DstRegisterExtConcode.CondSwizzleZ ) {
+            TXT( "\nCondSwizzleZ: " );
+            ENM( dst->DstRegisterExtConcode.CondSwizzleZ, TGSI_SWIZZLES );
+         }
+         if( deflt || fd->DstRegisterExtConcode.CondSwizzleW != dst->DstRegisterExtConcode.CondSwizzleW ) {
+            TXT( "\nCondSwizzleW: " );
+            ENM( dst->DstRegisterExtConcode.CondSwizzleW, TGSI_SWIZZLES );
+         }
+         if( deflt || fd->DstRegisterExtConcode.CondSrcIndex != dst->DstRegisterExtConcode.CondSrcIndex ) {
+            TXT( "\nCondSrcIndex: " );
+            UID( dst->DstRegisterExtConcode.CondSrcIndex );
+         }
+         if( ignored ) {
+            TXT( "\nPadding     : " );
+            UIX( dst->DstRegisterExtConcode.Padding );
+            if( deflt || fd->DstRegisterExtConcode.Extended != dst->DstRegisterExtConcode.Extended ) {
+               TXT( "\nExtended    : " );
+               UID( dst->DstRegisterExtConcode.Extended );
+            }
+         }
+      }
+
+      if( deflt || tgsi_compare_dst_register_ext_modulate( dst->DstRegisterExtModulate, fd->DstRegisterExtModulate ) ) {
+         CHR( '\n' );
+         TXT( "\nType    : " );
+         ENM( dst->DstRegisterExtModulate.Type, TGSI_DST_REGISTER_EXTS );
+         if( deflt || fd->DstRegisterExtModulate.Modulate != dst->DstRegisterExtModulate.Modulate ) {
+            TXT( "\nModulate: " );
+            ENM( dst->DstRegisterExtModulate.Modulate, TGSI_MODULATES );
+         }
+         if( ignored ) {
+            TXT( "\nPadding : " );
+            UIX( dst->DstRegisterExtModulate.Padding );
+            if( deflt || fd->DstRegisterExtModulate.Extended != dst->DstRegisterExtModulate.Extended ) {
+               TXT( "\nExtended: " );
+               UID( dst->DstRegisterExtModulate.Extended );
+            }
+         }
+      }
+   }
+
+   for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) {
+      struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i];
+      struct tgsi_full_src_register *fs = &fi->FullSrcRegisters[i];
+
+      CHR( '\n' );
+      TXT( "\nFile     : ");
+      ENM( src->SrcRegister.File, TGSI_FILES );
+      if( deflt || fs->SrcRegister.SwizzleX != src->SrcRegister.SwizzleX ) {
+         TXT( "\nSwizzleX : " );
+         ENM( src->SrcRegister.SwizzleX, TGSI_SWIZZLES );
+      }
+      if( deflt || fs->SrcRegister.SwizzleY != src->SrcRegister.SwizzleY ) {
+         TXT( "\nSwizzleY : " );
+         ENM( src->SrcRegister.SwizzleY, TGSI_SWIZZLES );
+      }
+      if( deflt || fs->SrcRegister.SwizzleZ != src->SrcRegister.SwizzleZ ) {
+         TXT( "\nSwizzleZ : " );
+         ENM( src->SrcRegister.SwizzleZ, TGSI_SWIZZLES );
+      }
+      if( deflt || fs->SrcRegister.SwizzleW != src->SrcRegister.SwizzleW ) {
+         TXT( "\nSwizzleW : " );
+         ENM( src->SrcRegister.SwizzleW, TGSI_SWIZZLES );
+      }
+      if( deflt || fs->SrcRegister.Negate != src->SrcRegister.Negate ) {
+         TXT( "\nNegate   : " );
+         UID( src->SrcRegister.Negate );
+      }
+      if( ignored ) {
+         if( deflt || fs->SrcRegister.Indirect != src->SrcRegister.Indirect ) {
+            TXT( "\nIndirect : " );
+            UID( src->SrcRegister.Indirect );
+         }
+         if( deflt || fs->SrcRegister.Dimension != src->SrcRegister.Dimension ) {
+            TXT( "\nDimension: " );
+            UID( src->SrcRegister.Dimension );
+         }
+      }
+      if( deflt || fs->SrcRegister.Index != src->SrcRegister.Index ) {
+         TXT( "\nIndex    : " );
+         SID( src->SrcRegister.Index );
+      }
+      if( ignored ) {
+         if( deflt || fs->SrcRegister.Extended != src->SrcRegister.Extended ) {
+            TXT( "\nExtended : " );
+            UID( src->SrcRegister.Extended );
+         }
+      }
+
+      if( deflt || tgsi_compare_src_register_ext_swz( src->SrcRegisterExtSwz, fs->SrcRegisterExtSwz ) ) {
+         CHR( '\n' );
+         TXT( "\nType       : " );
+         ENM( src->SrcRegisterExtSwz.Type, TGSI_SRC_REGISTER_EXTS );
+         if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleX != src->SrcRegisterExtSwz.ExtSwizzleX ) {
+            TXT( "\nExtSwizzleX: " );
+            ENM( src->SrcRegisterExtSwz.ExtSwizzleX, TGSI_EXTSWIZZLES );
+         }
+         if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleY != src->SrcRegisterExtSwz.ExtSwizzleY ) {
+            TXT( "\nExtSwizzleY: " );
+            ENM( src->SrcRegisterExtSwz.ExtSwizzleY, TGSI_EXTSWIZZLES );
+         }
+         if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleZ != src->SrcRegisterExtSwz.ExtSwizzleZ ) {
+            TXT( "\nExtSwizzleZ: " );
+            ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, TGSI_EXTSWIZZLES );
+         }
+         if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleW != src->SrcRegisterExtSwz.ExtSwizzleW ) {
+            TXT( "\nExtSwizzleW: " );
+            ENM( src->SrcRegisterExtSwz.ExtSwizzleW, TGSI_EXTSWIZZLES );
+         }
+         if( deflt || fs->SrcRegisterExtSwz.NegateX != src->SrcRegisterExtSwz.NegateX ) {
+            TXT( "\nNegateX   : " );
+            UID( src->SrcRegisterExtSwz.NegateX );
+         }
+         if( deflt || fs->SrcRegisterExtSwz.NegateY != src->SrcRegisterExtSwz.NegateY ) {
+            TXT( "\nNegateY   : " );
+            UID( src->SrcRegisterExtSwz.NegateY );
+         }
+         if( deflt || fs->SrcRegisterExtSwz.NegateZ != src->SrcRegisterExtSwz.NegateZ ) {
+            TXT( "\nNegateZ   : " );
+            UID( src->SrcRegisterExtSwz.NegateZ );
+         }
+         if( deflt || fs->SrcRegisterExtSwz.NegateW != src->SrcRegisterExtSwz.NegateW ) {
+            TXT( "\nNegateW   : " );
+            UID( src->SrcRegisterExtSwz.NegateW );
+         }
+         if( deflt || fs->SrcRegisterExtSwz.ExtDivide != src->SrcRegisterExtSwz.ExtDivide ) {
+            TXT( "\nExtDivide  : " );
+            ENM( src->SrcRegisterExtSwz.ExtDivide, TGSI_EXTSWIZZLES );
+         }
+         if( ignored ) {
+            TXT( "\nPadding   : " );
+            UIX( src->SrcRegisterExtSwz.Padding );
+            if( deflt || fs->SrcRegisterExtSwz.Extended != src->SrcRegisterExtSwz.Extended ) {
+               TXT( "\nExtended   : " );
+               UID( src->SrcRegisterExtSwz.Extended );
+            }
+         }
+      }
+
+      if( deflt || tgsi_compare_src_register_ext_mod( src->SrcRegisterExtMod, fs->SrcRegisterExtMod ) ) {
+         CHR( '\n' );
+         TXT( "\nType     : " );
+         ENM( src->SrcRegisterExtMod.Type, TGSI_SRC_REGISTER_EXTS );
+         if( deflt || fs->SrcRegisterExtMod.Complement != src->SrcRegisterExtMod.Complement ) {
+            TXT( "\nComplement: " );
+            UID( src->SrcRegisterExtMod.Complement );
+         }
+         if( deflt || fs->SrcRegisterExtMod.Bias != src->SrcRegisterExtMod.Bias ) {
+            TXT( "\nBias     : " );
+            UID( src->SrcRegisterExtMod.Bias );
+         }
+         if( deflt || fs->SrcRegisterExtMod.Scale2X != src->SrcRegisterExtMod.Scale2X ) {
+            TXT( "\nScale2X   : " );
+            UID( src->SrcRegisterExtMod.Scale2X );
+         }
+         if( deflt || fs->SrcRegisterExtMod.Absolute != src->SrcRegisterExtMod.Absolute ) {
+            TXT( "\nAbsolute  : " );
+            UID( src->SrcRegisterExtMod.Absolute );
+         }
+         if( deflt || fs->SrcRegisterExtMod.Negate != src->SrcRegisterExtMod.Negate ) {
+            TXT( "\nNegate   : " );
+            UID( src->SrcRegisterExtMod.Negate );
+         }
+         if( ignored ) {
+            TXT( "\nPadding   : " );
+            UIX( src->SrcRegisterExtMod.Padding );
+            if( deflt || fs->SrcRegisterExtMod.Extended != src->SrcRegisterExtMod.Extended ) {
+               TXT( "\nExtended  : " );
+               UID( src->SrcRegisterExtMod.Extended );
+            }
+         }
+      }
+   }
+}
+
+static void
+dump_gen(
+   struct gen_dump         *dump,
+   const struct tgsi_token *tokens,
+   unsigned                flags )
+{
+   struct tgsi_parse_context parse;
+   struct tgsi_full_instruction fi;
+   struct tgsi_full_declaration fd;
+   unsigned verbose = flags & TGSI_DUMP_VERBOSE;
+   unsigned ignored = !(flags & TGSI_DUMP_NO_IGNORED);
+   unsigned deflt = !(flags & TGSI_DUMP_NO_DEFAULT);
+   unsigned instno = 0;
+
+   dump->tabs = 0;
+
+   /* sanity check */
+   assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_CONT], "OPCODE_CONT") == 0);
+
+   tgsi_parse_init( &parse, tokens );
+
+   TXT( "tgsi-dump begin -----------------" );
+
+   CHR( '\n' );
+   ENM( parse.FullHeader.Processor.Processor, TGSI_PROCESSOR_TYPES_SHORT );
+   CHR( ' ' );
+   UID( parse.FullVersion.Version.MajorVersion );
+   CHR( '.' );
+   UID( parse.FullVersion.Version.MinorVersion );
+
+   if( verbose ) {
+      TXT( "\nMajorVersion: " );
+      UID( parse.FullVersion.Version.MajorVersion );
+      TXT( "\nMinorVersion: " );
+      UID( parse.FullVersion.Version.MinorVersion );
+      CHR( '\n' );
+
+      TXT( "\nHeaderSize: " );
+      UID( parse.FullHeader.Header.HeaderSize );
+      TXT( "\nBodySize  : " );
+      UID( parse.FullHeader.Header.BodySize );
+      TXT( "\nProcessor : " );
+      ENM( parse.FullHeader.Processor.Processor, TGSI_PROCESSOR_TYPES );
+      CHR( '\n' );
+   }
+
+   fi = tgsi_default_full_instruction();
+   fd = tgsi_default_full_declaration();
+
+   while( !tgsi_parse_end_of_tokens( &parse ) ) {
+      tgsi_parse_token( &parse );
+
+      switch( parse.FullToken.Token.Type ) {
+      case TGSI_TOKEN_TYPE_DECLARATION:
+         dump_declaration_short(
+            dump,
+            &parse.FullToken.FullDeclaration );
+         break;
+
+      case TGSI_TOKEN_TYPE_IMMEDIATE:
+         dump_immediate_short(
+            dump,
+            &parse.FullToken.FullImmediate );
+         break;
+
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+         dump_instruction_short(
+            dump,
+            &parse.FullToken.FullInstruction,
+            instno );
+         instno++;
+         break;
+
+      default:
+         assert( 0 );
+      }
+
+      if( verbose ) {
+         TXT( "\nType       : " );
+         ENM( parse.FullToken.Token.Type, TGSI_TOKEN_TYPES );
+         if( ignored ) {
+            TXT( "\nSize       : " );
+            UID( parse.FullToken.Token.Size );
+            if( deflt || parse.FullToken.Token.Extended ) {
+               TXT( "\nExtended   : " );
+               UID( parse.FullToken.Token.Extended );
+            }
+         }
+
+         switch( parse.FullToken.Token.Type ) {
+         case TGSI_TOKEN_TYPE_DECLARATION:
+            dump_declaration_verbose(
+               dump,
+               &parse.FullToken.FullDeclaration,
+               ignored,
+               deflt,
+               &fd );
+            break;
+
+         case TGSI_TOKEN_TYPE_IMMEDIATE:
+            dump_immediate_verbose(
+               dump,
+               &parse.FullToken.FullImmediate,
+               ignored );
+            break;
+
+         case TGSI_TOKEN_TYPE_INSTRUCTION:
+            dump_instruction_verbose(
+               dump,
+               &parse.FullToken.FullInstruction,
+               ignored,
+               deflt,
+               &fi );
+            break;
+
+         default:
+            assert( 0 );
+         }
+
+         CHR( '\n' );
+      }
+   }
+
+   TXT( "\ntgsi-dump end -------------------\n" );
+
+   tgsi_parse_free( &parse );
+}
+
+
+static void
+sanity_checks(void)
+{
+   assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_END], "OPCODE_END") == 0);
+   assert(strcmp(TGSI_OPCODES_SHORT[TGSI_OPCODE_END], "END") == 0);
+}
+
+
+void
+tgsi_dump(
+   const struct tgsi_token *tokens,
+   unsigned                flags )
+{
+   struct file_dump  dump;
+
+   sanity_checks();
+
+   dump.base.write = _file_dump_write;
+#if 0
+   {
+      static unsigned   counter = 0;
+      char              buffer[64];
+      sprintf( buffer, "tgsi-dump-%.4u.txt", counter++ );
+      dump.file = fopen( buffer, "wt" );
+   }
+#else
+   dump.file = stderr;
+#endif
+
+   dump_gen(
+      &dump.base,
+      tokens,
+      flags );
+
+#if 0
+   fclose( dump.file );
+#endif
+}
+
+void
+tgsi_dump_str(
+   char                    **str,
+   const struct tgsi_token *tokens,
+   unsigned                flags )
+{
+   struct text_dump  dump;
+
+   dump.base.write = _text_dump_write;
+   dump.text = NULL;
+   dump.length = 0;
+   dump.capacity = 0;
+
+   dump_gen(
+      &dump.base,
+      tokens,
+      flags );
+
+   *str = dump.text;
+}
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h
new file mode 100644
index 00000000000..1adc9db2519
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h
@@ -0,0 +1,28 @@
+#if !defined TGSI_DUMP_H
+#define TGSI_DUMP_H
+
+#if defined __cplusplus
+extern "C" {
+#endif // defined __cplusplus
+
+#define TGSI_DUMP_VERBOSE       1
+#define TGSI_DUMP_NO_IGNORED    2
+#define TGSI_DUMP_NO_DEFAULT    4
+
+void
+tgsi_dump(
+   const struct tgsi_token *tokens,
+   unsigned                flags );
+
+void
+tgsi_dump_str(
+   char                    **str,
+   const struct tgsi_token *tokens,
+   unsigned                flags );
+
+#if defined __cplusplus
+} // extern "C"
+#endif // defined __cplusplus
+
+#endif // !defined TGSI_DUMP_H
+
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.c b/src/gallium/auxiliary/tgsi/util/tgsi_parse.c
new file mode 100644
index 00000000000..bf6b89ce564
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_parse.c
@@ -0,0 +1,319 @@
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "pipe/p_debug.h"
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+#include "tgsi_parse.h"
+#include "tgsi_build.h"
+
+void
+tgsi_full_token_init(
+   union tgsi_full_token *full_token )
+{
+   full_token->Token.Type = TGSI_TOKEN_TYPE_DECLARATION;
+}
+
+void
+tgsi_full_token_free(
+   union tgsi_full_token *full_token )
+{
+   if( full_token->Token.Type == TGSI_TOKEN_TYPE_IMMEDIATE ) {
+      FREE( full_token->FullImmediate.u.Pointer );
+   }
+}
+
+unsigned
+tgsi_parse_init(
+   struct tgsi_parse_context *ctx,
+   const struct tgsi_token *tokens )
+{
+   ctx->FullVersion.Version = *(struct tgsi_version *) &tokens[0];
+   if( ctx->FullVersion.Version.MajorVersion > 1 ) {
+      return TGSI_PARSE_ERROR;
+   }
+
+   ctx->FullHeader.Header = *(struct tgsi_header *) &tokens[1];
+   if( ctx->FullHeader.Header.HeaderSize >= 2 ) {
+      ctx->FullHeader.Processor = *(struct tgsi_processor *) &tokens[2];
+   }
+   else {
+      ctx->FullHeader.Processor = tgsi_default_processor();
+   }
+
+   ctx->Tokens = tokens;
+   ctx->Position = 1 + ctx->FullHeader.Header.HeaderSize;
+
+   tgsi_full_token_init( &ctx->FullToken );
+
+   return TGSI_PARSE_OK;
+}
+
+void
+tgsi_parse_free(
+   struct tgsi_parse_context *ctx )
+{
+   tgsi_full_token_free( &ctx->FullToken );
+}
+
+boolean
+tgsi_parse_end_of_tokens(
+   struct tgsi_parse_context *ctx )
+{
+   return ctx->Position >=
+      1 + ctx->FullHeader.Header.HeaderSize + ctx->FullHeader.Header.BodySize;
+}
+
+static void
+next_token(
+   struct tgsi_parse_context *ctx,
+   void *token )
+{
+   assert( !tgsi_parse_end_of_tokens( ctx ) );
+
+   *(struct tgsi_token *) token = ctx->Tokens[ctx->Position++];
+}
+
+void
+tgsi_parse_token(
+   struct tgsi_parse_context *ctx )
+{
+   struct tgsi_token token;
+   unsigned i;
+
+   tgsi_full_token_free( &ctx->FullToken );
+   tgsi_full_token_init( &ctx->FullToken );
+
+   next_token( ctx, &token );
+
+   switch( token.Type ) {
+   case TGSI_TOKEN_TYPE_DECLARATION:
+   {
+      struct tgsi_full_declaration *decl = &ctx->FullToken.FullDeclaration;
+
+      *decl = tgsi_default_full_declaration();
+      decl->Declaration = *(struct tgsi_declaration *) &token;
+
+      switch( decl->Declaration.Type ) {
+      case TGSI_DECLARE_RANGE:
+         next_token( ctx, &decl->u.DeclarationRange );
+         break;
+
+      case TGSI_DECLARE_MASK:
+         next_token( ctx, &decl->u.DeclarationMask );
+         break;
+
+      default:
+         assert (0);
+      }
+
+      if( decl->Declaration.Interpolate ) {
+         next_token( ctx, &decl->Interpolation );
+      }
+
+      if( decl->Declaration.Semantic ) {
+         next_token( ctx, &decl->Semantic );
+      }
+
+      break;
+   }
+
+   case TGSI_TOKEN_TYPE_IMMEDIATE:
+   {
+      struct tgsi_full_immediate *imm = &ctx->FullToken.FullImmediate;
+
+      *imm = tgsi_default_full_immediate();
+      imm->Immediate = *(struct tgsi_immediate *) &token;
+
+      assert( !imm->Immediate.Extended );
+
+      switch (imm->Immediate.DataType) {
+      case TGSI_IMM_FLOAT32:
+         imm->u.Pointer = MALLOC(
+            sizeof( struct tgsi_immediate_float32 ) * (imm->Immediate.Size - 1) );
+         for( i = 0; i < imm->Immediate.Size - 1; i++ ) {
+            next_token( ctx, &imm->u.ImmediateFloat32[i] );
+         }
+         break;
+
+      default:
+         assert( 0 );
+      }
+
+      break;
+   }
+
+   case TGSI_TOKEN_TYPE_INSTRUCTION:
+   {
+      struct tgsi_full_instruction *inst = &ctx->FullToken.FullInstruction;
+      unsigned extended;
+
+      *inst = tgsi_default_full_instruction();
+      inst->Instruction = *(struct tgsi_instruction *) &token;
+
+      extended = inst->Instruction.Extended;
+
+      while( extended ) {
+         struct tgsi_src_register_ext token;
+
+         next_token( ctx, &token );
+
+         switch( token.Type ) {
+         case TGSI_INSTRUCTION_EXT_TYPE_NV:
+            inst->InstructionExtNv =
+               *(struct tgsi_instruction_ext_nv *) &token;
+            break;
+
+         case TGSI_INSTRUCTION_EXT_TYPE_LABEL:
+            inst->InstructionExtLabel =
+               *(struct tgsi_instruction_ext_label *) &token;
+            break;
+
+         case TGSI_INSTRUCTION_EXT_TYPE_TEXTURE:
+            inst->InstructionExtTexture =
+               *(struct tgsi_instruction_ext_texture *) &token;
+            break;
+
+         default:
+            assert( 0 );
+         }
+
+         extended = token.Extended;
+      }
+
+      assert( inst->Instruction.NumDstRegs <= TGSI_FULL_MAX_DST_REGISTERS );
+
+      for(  i = 0; i < inst->Instruction.NumDstRegs; i++ ) {
+         unsigned extended;
+
+         next_token( ctx, &inst->FullDstRegisters[i].DstRegister );
+
+         /*
+          * No support for indirect or multi-dimensional addressing.
+          */
+         assert( !inst->FullDstRegisters[i].DstRegister.Indirect );
+         assert( !inst->FullDstRegisters[i].DstRegister.Dimension );
+
+         extended = inst->FullDstRegisters[i].DstRegister.Extended;
+
+         while( extended ) {
+            struct tgsi_src_register_ext token;
+
+            next_token( ctx, &token );
+
+            switch( token.Type ) {
+            case TGSI_DST_REGISTER_EXT_TYPE_CONDCODE:
+               inst->FullDstRegisters[i].DstRegisterExtConcode =
+                  *(struct tgsi_dst_register_ext_concode *) &token;
+               break;
+
+            case TGSI_DST_REGISTER_EXT_TYPE_MODULATE:
+               inst->FullDstRegisters[i].DstRegisterExtModulate =
+                  *(struct tgsi_dst_register_ext_modulate *) &token;
+               break;
+
+            default:
+               assert( 0 );
+            }
+
+            extended = token.Extended;
+         }
+      }
+
+      assert( inst->Instruction.NumSrcRegs <= TGSI_FULL_MAX_SRC_REGISTERS );
+
+      for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) {
+         unsigned extended;
+
+         next_token( ctx, &inst->FullSrcRegisters[i].SrcRegister );
+
+         extended = inst->FullSrcRegisters[i].SrcRegister.Extended;
+
+         while( extended ) {
+            struct tgsi_src_register_ext token;
+
+            next_token( ctx, &token );
+
+            switch( token.Type ) {
+            case TGSI_SRC_REGISTER_EXT_TYPE_SWZ:
+               inst->FullSrcRegisters[i].SrcRegisterExtSwz =
+                  *(struct tgsi_src_register_ext_swz *) &token;
+               break;
+
+            case TGSI_SRC_REGISTER_EXT_TYPE_MOD:
+               inst->FullSrcRegisters[i].SrcRegisterExtMod =
+                  *(struct tgsi_src_register_ext_mod *) &token;
+               break;
+
+            default:
+               assert( 0 );
+            }
+
+            extended = token.Extended;
+         }
+
+         if( inst->FullSrcRegisters[i].SrcRegister.Indirect ) {
+            next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterInd );
+
+            /*
+             * No support for indirect or multi-dimensional addressing.
+             */
+            assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Indirect );
+            assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Dimension );
+            assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Extended );
+         }
+
+         if( inst->FullSrcRegisters[i].SrcRegister.Dimension ) {
+            next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterDim );
+
+            /*
+             * No support for multi-dimensional addressing.
+             */
+            assert( !inst->FullSrcRegisters[i].SrcRegisterDim.Dimension );
+            assert( !inst->FullSrcRegisters[i].SrcRegisterDim.Extended );
+
+            if( inst->FullSrcRegisters[i].SrcRegisterDim.Indirect ) {
+               next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterDimInd );
+
+               /*
+               * No support for indirect or multi-dimensional addressing.
+               */
+               assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Indirect );
+               assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Dimension );
+               assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Extended );
+            }
+         }
+      }
+
+      break;
+   }
+
+   default:
+      assert( 0 );
+   }
+}
+
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h
new file mode 100644
index 00000000000..9372da8d5d1
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h
@@ -0,0 +1,121 @@
+#if !defined TGSI_PARSE_H
+#define TGSI_PARSE_H
+
+#if defined __cplusplus
+extern "C" {
+#endif // defined __cplusplus
+
+struct tgsi_full_version
+{
+   struct tgsi_version  Version;
+};
+
+struct tgsi_full_header
+{
+   struct tgsi_header      Header;
+   struct tgsi_processor   Processor;
+};
+
+struct tgsi_full_dst_register
+{
+   struct tgsi_dst_register               DstRegister;
+   struct tgsi_dst_register_ext_concode   DstRegisterExtConcode;
+   struct tgsi_dst_register_ext_modulate  DstRegisterExtModulate;
+};
+
+struct tgsi_full_src_register
+{
+   struct tgsi_src_register         SrcRegister;
+   struct tgsi_src_register_ext_swz SrcRegisterExtSwz;
+   struct tgsi_src_register_ext_mod SrcRegisterExtMod;
+   struct tgsi_src_register         SrcRegisterInd;
+   struct tgsi_dimension            SrcRegisterDim;
+   struct tgsi_src_register         SrcRegisterDimInd;
+};
+
+struct tgsi_full_declaration
+{
+   struct tgsi_declaration Declaration;
+   union
+   {
+      struct tgsi_declaration_range DeclarationRange;
+      struct tgsi_declaration_mask  DeclarationMask;
+   } u;
+   struct tgsi_declaration_interpolation  Interpolation;
+   struct tgsi_declaration_semantic       Semantic;
+};
+
+struct tgsi_full_immediate
+{
+   struct tgsi_immediate   Immediate;
+   union
+   {
+      void                          *Pointer;
+      struct tgsi_immediate_float32 *ImmediateFloat32;
+   } u;
+};
+
+#define TGSI_FULL_MAX_DST_REGISTERS 2
+#define TGSI_FULL_MAX_SRC_REGISTERS 3
+
+struct tgsi_full_instruction
+{
+   struct tgsi_instruction             Instruction;
+   struct tgsi_instruction_ext_nv      InstructionExtNv;
+   struct tgsi_instruction_ext_label   InstructionExtLabel;
+   struct tgsi_instruction_ext_texture InstructionExtTexture;
+   struct tgsi_full_dst_register       FullDstRegisters[TGSI_FULL_MAX_DST_REGISTERS];
+   struct tgsi_full_src_register       FullSrcRegisters[TGSI_FULL_MAX_SRC_REGISTERS];
+};
+
+union tgsi_full_token
+{
+   struct tgsi_token             Token;
+   struct tgsi_full_declaration  FullDeclaration;
+   struct tgsi_full_immediate    FullImmediate;
+   struct tgsi_full_instruction  FullInstruction;
+};
+
+void
+tgsi_full_token_init(
+   union tgsi_full_token *full_token );
+
+void
+tgsi_full_token_free(
+   union tgsi_full_token *full_token );
+
+struct tgsi_parse_context
+{
+   const struct tgsi_token    *Tokens;
+   unsigned                   Position;
+   struct tgsi_full_version   FullVersion;
+   struct tgsi_full_header    FullHeader;
+   union tgsi_full_token      FullToken;
+};
+
+#define TGSI_PARSE_OK      0
+#define TGSI_PARSE_ERROR   1
+
+unsigned
+tgsi_parse_init(
+   struct tgsi_parse_context *ctx,
+   const struct tgsi_token *tokens );
+
+void
+tgsi_parse_free(
+   struct tgsi_parse_context *ctx );
+
+boolean
+tgsi_parse_end_of_tokens(
+   struct tgsi_parse_context *ctx );
+
+void
+tgsi_parse_token(
+   struct tgsi_parse_context *ctx );
+
+#if defined __cplusplus
+} // extern "C"
+#endif // defined __cplusplus
+
+#endif // !defined TGSI_PARSE_H
+
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_transform.c b/src/gallium/auxiliary/tgsi/util/tgsi_transform.c
new file mode 100644
index 00000000000..357f77b05a6
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_transform.c
@@ -0,0 +1,199 @@
+/**************************************************************************
+ * 
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+/**
+ * TGSI program transformation utility.
+ *
+ * Authors:  Brian Paul
+ */
+
+
+#include "tgsi_transform.h"
+
+
+
+static void
+emit_instruction(struct tgsi_transform_context *ctx,
+                 const struct tgsi_full_instruction *inst)
+{
+   uint ti = ctx->ti;
+
+   ti += tgsi_build_full_instruction(inst,
+                                     ctx->tokens_out + ti,
+                                     ctx->header,
+                                     ctx->max_tokens_out - ti);
+   ctx->ti = ti;
+}
+
+
+static void
+emit_declaration(struct tgsi_transform_context *ctx,
+                 const struct tgsi_full_declaration *decl)
+{
+   uint ti = ctx->ti;
+
+   ti += tgsi_build_full_declaration(decl,
+                                     ctx->tokens_out + ti,
+                                     ctx->header,
+                                     ctx->max_tokens_out - ti);
+   ctx->ti = ti;
+}
+
+
+static void
+emit_immediate(struct tgsi_transform_context *ctx,
+               const struct tgsi_full_immediate *imm)
+{
+   uint ti = ctx->ti;
+
+   ti += tgsi_build_full_immediate(imm,
+                                   ctx->tokens_out + ti,
+                                   ctx->header,
+                                   ctx->max_tokens_out - ti);
+   ctx->ti = ti;
+}
+
+
+
+/**
+ * Apply user-defined transformations to the input shader to produce
+ * the output shader.
+ * For example, a register search-and-replace operation could be applied
+ * by defining a transform_instruction() callback that examined and changed
+ * the instruction src/dest regs.
+ *
+ * \return number of tokens emitted
+ */
+int
+tgsi_transform_shader(const struct tgsi_token *tokens_in,
+                      struct tgsi_token *tokens_out,
+                      uint max_tokens_out,
+                      struct tgsi_transform_context *ctx)
+{
+   uint procType;
+
+   /* input shader */
+   struct tgsi_parse_context parse;
+
+   /* output shader */
+   struct tgsi_processor *processor;
+
+
+   /**
+    ** callback context init
+    **/
+   ctx->emit_instruction = emit_instruction;
+   ctx->emit_declaration = emit_declaration;
+   ctx->emit_immediate = emit_immediate;
+   ctx->tokens_out = tokens_out;
+   ctx->max_tokens_out = max_tokens_out;
+
+
+   /**
+    ** Setup to begin parsing input shader
+    **/
+   if (tgsi_parse_init( &parse, tokens_in ) != TGSI_PARSE_OK) {
+      debug_printf("tgsi_parse_init() failed in tgsi_transform_shader()!\n");
+      return -1;
+   }
+   procType = parse.FullHeader.Processor.Processor;
+   assert(procType == TGSI_PROCESSOR_FRAGMENT ||
+          procType == TGSI_PROCESSOR_VERTEX ||
+          procType == TGSI_PROCESSOR_GEOMETRY);
+
+
+   /**
+    **  Setup output shader
+    **/
+   *(struct tgsi_version *) &tokens_out[0] = tgsi_build_version();
+
+   ctx->header = (struct tgsi_header *) (tokens_out + 1);
+   *ctx->header = tgsi_build_header();
+
+   processor = (struct tgsi_processor *) (tokens_out + 2);
+   *processor = tgsi_build_processor( procType, ctx->header );
+
+   ctx->ti = 3;
+
+
+   /**
+    ** Loop over incoming program tokens/instructions
+    */
+   while( !tgsi_parse_end_of_tokens( &parse ) ) {
+
+      tgsi_parse_token( &parse );
+
+      switch( parse.FullToken.Token.Type ) {
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+         {
+            struct tgsi_full_instruction *fullinst
+               = &parse.FullToken.FullInstruction;
+
+            if (ctx->transform_instruction)
+               ctx->transform_instruction(ctx, fullinst);
+            else
+               ctx->emit_instruction(ctx, fullinst);
+         }
+         break;
+
+      case TGSI_TOKEN_TYPE_DECLARATION:
+         {
+            struct tgsi_full_declaration *fulldecl
+               = &parse.FullToken.FullDeclaration;
+
+            if (ctx->transform_declaration)
+               ctx->transform_declaration(ctx, fulldecl);
+            else
+               ctx->emit_declaration(ctx, fulldecl);
+         }
+         break;
+
+      case TGSI_TOKEN_TYPE_IMMEDIATE:
+         {
+            struct tgsi_full_immediate *fullimm
+               = &parse.FullToken.FullImmediate;
+
+            if (ctx->transform_immediate)
+               ctx->transform_immediate(ctx, fullimm);
+            else
+               ctx->emit_immediate(ctx, fullimm);
+         }
+         break;
+
+      default:
+         assert( 0 );
+      }
+   }
+
+   if (ctx->epilog) {
+      ctx->epilog(ctx);
+   }
+
+   tgsi_parse_free (&parse);
+
+   return ctx->ti;
+}
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_transform.h b/src/gallium/auxiliary/tgsi/util/tgsi_transform.h
new file mode 100644
index 00000000000..fcf85d603be
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_transform.h
@@ -0,0 +1,93 @@
+/**************************************************************************
+ * 
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef TGSI_TRANSFORM_H
+#define TGSI_TRANSFORM_H
+
+
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/util/tgsi_parse.h"
+#include "tgsi/util/tgsi_build.h"
+
+
+
+/**
+ * Subclass this to add caller-specific data
+ */
+struct tgsi_transform_context
+{
+/**** PUBLIC ***/
+
+   /**
+    * User-defined callbacks invoked per instruction.
+    */
+   void (*transform_instruction)(struct tgsi_transform_context *ctx,
+                                 struct tgsi_full_instruction *inst);
+
+   void (*transform_declaration)(struct tgsi_transform_context *ctx,
+                                 struct tgsi_full_declaration *decl);
+
+   void (*transform_immediate)(struct tgsi_transform_context *ctx,
+                               struct tgsi_full_immediate *imm);
+
+   /**
+    * Called at end of input program to allow caller to append extra
+    * instructions.  Return number of tokens emitted.
+    */
+   void (*epilog)(struct tgsi_transform_context *ctx);
+
+
+/*** PRIVATE ***/
+
+   /**
+    * These are setup by tgsi_transform_shader() and cannot be overridden.
+    * Meant to be called from in the above user callback functions.
+    */
+   void (*emit_instruction)(struct tgsi_transform_context *ctx,
+                            const struct tgsi_full_instruction *inst);
+   void (*emit_declaration)(struct tgsi_transform_context *ctx,
+                            const struct tgsi_full_declaration *decl);
+   void (*emit_immediate)(struct tgsi_transform_context *ctx,
+                          const struct tgsi_full_immediate *imm);
+
+   struct tgsi_header *header;
+   uint max_tokens_out;
+   struct tgsi_token *tokens_out;
+   uint ti;
+};
+
+
+
+extern int
+tgsi_transform_shader(const struct tgsi_token *tokens_in,
+                      struct tgsi_token *tokens_out,
+                      uint max_tokens_out,
+                      struct tgsi_transform_context *ctx);
+
+
+#endif /* TGSI_TRANSFORM_H */
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_util.c b/src/gallium/auxiliary/tgsi/util/tgsi_util.c
new file mode 100644
index 00000000000..4cdd89182ad
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_util.c
@@ -0,0 +1,274 @@
+#include "pipe/p_debug.h"
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+#include "tgsi_parse.h"
+#include "tgsi_build.h"
+#include "tgsi_util.h"
+
+union pointer_hack
+{
+   void *pointer;
+   unsigned long long uint64;
+};
+
+void *
+tgsi_align_128bit(
+   void *unaligned )
+{
+   union pointer_hack ph;
+
+   ph.uint64 = 0;
+   ph.pointer = unaligned;
+   ph.uint64 = (ph.uint64 + 15) & ~15;
+   return ph.pointer;
+}
+
+unsigned
+tgsi_util_get_src_register_swizzle(
+   const struct tgsi_src_register *reg,
+   unsigned component )
+{
+   switch( component ) {
+   case 0:
+      return reg->SwizzleX;
+   case 1:
+      return reg->SwizzleY;
+   case 2:
+      return reg->SwizzleZ;
+   case 3:
+      return reg->SwizzleW;
+   default:
+      assert( 0 );
+   }
+   return 0;
+}
+
+unsigned
+tgsi_util_get_src_register_extswizzle(
+   const struct tgsi_src_register_ext_swz *reg,
+   unsigned component )
+{
+   switch( component ) {
+   case 0:
+      return reg->ExtSwizzleX;
+   case 1:
+      return reg->ExtSwizzleY;
+   case 2:
+      return reg->ExtSwizzleZ;
+   case 3:
+      return reg->ExtSwizzleW;
+   default:
+      assert( 0 );
+   }
+   return 0;
+}
+
+unsigned
+tgsi_util_get_full_src_register_extswizzle(
+   const struct tgsi_full_src_register  *reg,
+   unsigned component )
+{
+   unsigned swizzle;
+
+   /*
+    * First, calculate  the   extended swizzle for a given channel. This will give
+    * us either a channel index into the simple swizzle or  a constant 1 or   0.
+    */
+   swizzle = tgsi_util_get_src_register_extswizzle(
+      &reg->SrcRegisterExtSwz,
+      component );
+
+   assert (TGSI_SWIZZLE_X == TGSI_EXTSWIZZLE_X);
+   assert (TGSI_SWIZZLE_Y == TGSI_EXTSWIZZLE_Y);
+   assert (TGSI_SWIZZLE_Z == TGSI_EXTSWIZZLE_Z);
+   assert (TGSI_SWIZZLE_W == TGSI_EXTSWIZZLE_W);
+   assert (TGSI_EXTSWIZZLE_ZERO > TGSI_SWIZZLE_W);
+   assert (TGSI_EXTSWIZZLE_ONE > TGSI_SWIZZLE_W);
+
+   /*
+    * Second, calculate the simple  swizzle  for   the   unswizzled channel index.
+    * Leave the constants intact, they are   not   affected by the   simple swizzle.
+    */
+   if( swizzle <= TGSI_SWIZZLE_W ) {
+      swizzle = tgsi_util_get_src_register_swizzle(
+         &reg->SrcRegister,
+         component );
+   }
+
+   return swizzle;
+}
+
+void
+tgsi_util_set_src_register_swizzle(
+   struct tgsi_src_register *reg,
+   unsigned swizzle,
+   unsigned component )
+{
+   switch( component ) {
+   case 0:
+      reg->SwizzleX = swizzle;
+      break;
+   case 1:
+      reg->SwizzleY = swizzle;
+      break;
+   case 2:
+      reg->SwizzleZ = swizzle;
+      break;
+   case 3:
+      reg->SwizzleW = swizzle;
+      break;
+   default:
+      assert( 0 );
+   }
+}
+
+void
+tgsi_util_set_src_register_extswizzle(
+   struct tgsi_src_register_ext_swz *reg,
+   unsigned swizzle,
+   unsigned component )
+{
+   switch( component ) {
+   case 0:
+      reg->ExtSwizzleX = swizzle;
+      break;
+   case 1:
+      reg->ExtSwizzleY = swizzle;
+      break;
+   case 2:
+      reg->ExtSwizzleZ = swizzle;
+      break;
+   case 3:
+      reg->ExtSwizzleW = swizzle;
+      break;
+   default:
+      assert( 0 );
+   }
+}
+
+unsigned
+tgsi_util_get_src_register_extnegate(
+   const  struct tgsi_src_register_ext_swz *reg,
+   unsigned component )
+{
+   switch( component ) {
+   case 0:
+      return reg->NegateX;
+   case 1:
+      return reg->NegateY;
+   case 2:
+      return reg->NegateZ;
+   case 3:
+      return reg->NegateW;
+   default:
+      assert( 0 );
+   }
+   return 0;
+}
+
+void
+tgsi_util_set_src_register_extnegate(
+   struct tgsi_src_register_ext_swz *reg,
+   unsigned negate,
+   unsigned component )
+{
+   switch( component ) {
+   case 0:
+      reg->NegateX = negate;
+      break;
+   case 1:
+      reg->NegateY = negate;
+      break;
+   case 2:
+      reg->NegateZ = negate;
+      break;
+   case 3:
+      reg->NegateW = negate;
+      break;
+   default:
+      assert( 0 );
+   }
+}
+
+unsigned
+tgsi_util_get_full_src_register_sign_mode(
+   const struct  tgsi_full_src_register *reg,
+   unsigned component )
+{
+   unsigned sign_mode;
+
+   if( reg->SrcRegisterExtMod.Absolute ) {
+      /* Consider only the post-abs negation. */
+
+      if( reg->SrcRegisterExtMod.Negate ) {
+         sign_mode = TGSI_UTIL_SIGN_SET;
+      }
+      else {
+         sign_mode = TGSI_UTIL_SIGN_CLEAR;
+      }
+   }
+   else {
+      /* Accumulate the three negations. */
+
+      unsigned negate;
+
+      negate = reg->SrcRegister.Negate;
+      if( tgsi_util_get_src_register_extnegate( &reg->SrcRegisterExtSwz, component ) ) {
+         negate = !negate;
+      }
+      if( reg->SrcRegisterExtMod.Negate ) {
+         negate = !negate;
+      }
+
+      if( negate ) {
+         sign_mode = TGSI_UTIL_SIGN_TOGGLE;
+      }
+      else {
+         sign_mode = TGSI_UTIL_SIGN_KEEP;
+      }
+   }
+
+   return sign_mode;
+}
+
+void
+tgsi_util_set_full_src_register_sign_mode(
+   struct tgsi_full_src_register *reg,
+   unsigned sign_mode )
+{
+   reg->SrcRegisterExtSwz.NegateX = 0;
+   reg->SrcRegisterExtSwz.NegateY = 0;
+   reg->SrcRegisterExtSwz.NegateZ = 0;
+   reg->SrcRegisterExtSwz.NegateW = 0;
+
+   switch (sign_mode)
+   {
+   case TGSI_UTIL_SIGN_CLEAR:
+      reg->SrcRegister.Negate = 0;
+      reg->SrcRegisterExtMod.Absolute = 1;
+      reg->SrcRegisterExtMod.Negate = 0;
+      break;
+
+   case TGSI_UTIL_SIGN_SET:
+      reg->SrcRegister.Negate = 0;
+      reg->SrcRegisterExtMod.Absolute = 1;
+      reg->SrcRegisterExtMod.Negate = 1;
+      break;
+
+   case TGSI_UTIL_SIGN_TOGGLE:
+      reg->SrcRegister.Negate = 1;
+      reg->SrcRegisterExtMod.Absolute = 0;
+      reg->SrcRegisterExtMod.Negate = 0;
+      break;
+
+   case TGSI_UTIL_SIGN_KEEP:
+      reg->SrcRegister.Negate = 0;
+      reg->SrcRegisterExtMod.Absolute = 0;
+      reg->SrcRegisterExtMod.Negate = 0;
+      break;
+
+   default:
+      assert( 0 );
+   }
+}
+
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_util.h b/src/gallium/auxiliary/tgsi/util/tgsi_util.h
new file mode 100644
index 00000000000..ef14446f0e4
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_util.h
@@ -0,0 +1,70 @@
+#if !defined TGSI_UTIL_H
+#define TGSI_UTIL_H
+
+#if defined __cplusplus
+extern "C" {
+#endif // defined __cplusplus
+
+void *
+tgsi_align_128bit(
+   void *unaligned );
+
+unsigned
+tgsi_util_get_src_register_swizzle(
+   const struct tgsi_src_register *reg,
+   unsigned component );
+
+unsigned
+tgsi_util_get_src_register_extswizzle(
+   const struct tgsi_src_register_ext_swz *reg,
+   unsigned component);
+
+unsigned
+tgsi_util_get_full_src_register_extswizzle(
+   const struct tgsi_full_src_register *reg,
+   unsigned component );
+
+void
+tgsi_util_set_src_register_swizzle(
+   struct tgsi_src_register *reg,
+   unsigned swizzle,
+   unsigned component );
+
+void
+tgsi_util_set_src_register_extswizzle(
+   struct tgsi_src_register_ext_swz *reg,
+   unsigned swizzle,
+   unsigned component );
+
+unsigned
+tgsi_util_get_src_register_extnegate(
+   const struct tgsi_src_register_ext_swz *reg,
+   unsigned component );
+
+void
+tgsi_util_set_src_register_extnegate(
+   struct tgsi_src_register_ext_swz *reg,
+   unsigned negate,
+   unsigned component );
+
+#define TGSI_UTIL_SIGN_CLEAR    0   /* Force positive */
+#define TGSI_UTIL_SIGN_SET      1   /* Force negative */
+#define TGSI_UTIL_SIGN_TOGGLE   2   /* Negate */
+#define TGSI_UTIL_SIGN_KEEP     3   /* No change */
+
+unsigned
+tgsi_util_get_full_src_register_sign_mode(
+   const struct tgsi_full_src_register *reg,
+   unsigned component );
+
+void
+tgsi_util_set_full_src_register_sign_mode(
+   struct tgsi_full_src_register *reg,
+   unsigned sign_mode );
+
+#if defined __cplusplus
+} // extern "C"
+#endif // defined __cplusplus
+
+#endif // !defined TGSI_UTIL_H
+
-- 
cgit v1.2.3


From c179bc990108a8ea691ceab03fd68a12396ab538 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keith@tungstengraphics.com>
Date: Fri, 15 Feb 2008 13:39:24 +0000
Subject: tgsi:  pass through failure to sse-codegenerate for fragment programs
 too.

In particular, will fallback to interpreted execution for shaders with
TEX instructions.
---
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
index 79209575bc4..62c6a69c63f 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
@@ -2308,6 +2308,7 @@ tgsi_emit_sse2_fs(
 {
    struct tgsi_parse_context parse;
    boolean instruction_phase = FALSE;
+   unsigned ok = 1;
 
    DUMP_START();
 
@@ -2333,7 +2334,7 @@ tgsi_emit_sse2_fs(
 
    tgsi_parse_init( &parse, tokens );
 
-   while( !tgsi_parse_end_of_tokens( &parse ) ) {
+   while( !tgsi_parse_end_of_tokens( &parse ) && ok ) {
       tgsi_parse_token( &parse );
 
       switch( parse.FullToken.Token.Type ) {
@@ -2352,17 +2353,18 @@ tgsi_emit_sse2_fs(
                get_output_base(),
                get_argument( 1 ) );
          }
-         emit_instruction(
+         ok = emit_instruction(
             func,
             &parse.FullToken.FullInstruction );
          break;
 
       case TGSI_TOKEN_TYPE_IMMEDIATE:
          /* XXX implement this */
-         assert(0);
+	 ok = 0;
          break;
 
       default:
+	 ok = 0;
          assert( 0 );
       }
    }
@@ -2371,7 +2373,7 @@ tgsi_emit_sse2_fs(
 
    DUMP_END();
 
-   return 1;
+   return ok;
 }
 
 #endif /* i386 */
-- 
cgit v1.2.3


From bfd5916eafb9a97ad10f1d4a8738e7dcb02e04f4 Mon Sep 17 00:00:00 2001
From: José Fonseca <jrfonseca@tungstengraphics.com>
Date: Mon, 18 Feb 2008 14:25:04 +0900
Subject: Code reorganization: split gallium and mesa makefiles.

In other words, don't build src/gallium source code from within src/mesa/Makefile.

Also, allow to customize which gallium auxiliary dirs, driver driver, winsys
dirs get built from the config/* files.
---
 configs/default                          |  12 +++-
 configs/linux-cell                       |   3 +
 configs/linux-dri                        |   6 +-
 configs/linux-llvm                       |   2 +
 src/gallium/Makefile.template            |   1 +
 src/gallium/auxiliary/Makefile           |   6 +-
 src/gallium/auxiliary/cso_cache/Makefile |  18 +++++
 src/gallium/auxiliary/draw/Makefile      |  41 ++++++++++-
 src/gallium/auxiliary/tgsi/Makefile      |  24 ++++++-
 src/gallium/auxiliary/tgsi/exec/Makefile |   3 -
 src/gallium/auxiliary/util/Makefile      |  20 ++++++
 src/gallium/drivers/Makefile             |   6 +-
 src/gallium/winsys/Makefile              |  20 ++++++
 src/gallium/winsys/dri/Makefile.template |   6 +-
 src/gallium/winsys/xlib/Makefile         | 113 +++++++++++++++++++++++++++++++
 src/mesa/Makefile                        |  74 ++------------------
 src/mesa/sources                         |  56 +--------------
 17 files changed, 264 insertions(+), 147 deletions(-)
 create mode 100644 src/gallium/auxiliary/cso_cache/Makefile
 delete mode 100644 src/gallium/auxiliary/tgsi/exec/Makefile
 create mode 100644 src/gallium/auxiliary/util/Makefile
 create mode 100644 src/gallium/winsys/Makefile
 create mode 100644 src/gallium/winsys/xlib/Makefile

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/configs/default b/configs/default
index 25a87e66a1b..7f659725cba 100644
--- a/configs/default
+++ b/configs/default
@@ -60,13 +60,21 @@ GLW_SOURCES = GLwDrawA.c
 
 # Directories to build
 LIB_DIR = lib
-SRC_DIRS = gallium mesa glu glut/glx glw
+SRC_DIRS = gallium mesa gallium/winsys glu glut/glx glw
 GLU_DIRS = sgi
-DRIVER_DIRS = x11 osmesa
+DRIVER_DIRS = 
 # Which subdirs under $(TOP)/progs/ to enter:
 PROGRAM_DIRS = demos redbook samples glsl xdemos
 
 
+# Gallium directories and 
+GALLIUM_AUXILIARY_DIRS = draw cso_cache pipebuffer tgsi util
+GALLIUM_AUXILIARIES = $(foreach DIR,$(GALLIUM_AUXILIARY_DIRS),$(TOP)/src/gallium/auxiliary/$(DIR)/lib$(DIR).a)
+GALLIUM_DRIVER_DIRS = softpipe i915simple i965simple failover
+GALLIUM_DRIVERS = $(foreach DIR,$(GALLIUM_DRIVER_DIRS),$(TOP)/src/gallium/drivers/$(DIR)/lib$(DIR).a)
+GALLIUM_WINSYS_DIRS = xlib
+
+
 # Library/program dependencies
 #EXTRA_LIB_PATH ?=
 GL_LIB_DEPS     = $(EXTRA_LIB_PATH) -lX11 -lXext -lm -lpthread
diff --git a/configs/linux-cell b/configs/linux-cell
index 09f62fc1ff0..807ede75a7e 100644
--- a/configs/linux-cell
+++ b/configs/linux-cell
@@ -5,6 +5,9 @@ include $(TOP)/configs/default
 CONFIG_NAME = linux-cell
 
 
+GALLIUM_DRIVER_DIRS += cell
+
+
 # Compiler and flags
 CC = ppu32-gcc
 CXX = ppu32-g++
diff --git a/configs/linux-dri b/configs/linux-dri
index e6135856fc1..68afcb1d28d 100644
--- a/configs/linux-dri
+++ b/configs/linux-dri
@@ -54,15 +54,13 @@ USING_EGL=0
 
 # Directories
 ifeq ($(USING_EGL), 1)
-SRC_DIRS = egl glx/x11 gallium mesa glu glut/glx glw
+SRC_DIRS := egl $(SRC_DIRS)
 PROGRAM_DIRS = egl
-else
-SRC_DIRS = glx/x11 gallium mesa glu glut/glx glw
-PROGRAM_DIRS =
 endif
 
 DRIVER_DIRS = dri
 WINDOW_SYSTEM=dri
+GALLIUM_WINSYS_DIRS = dri
 
 # gamma are missing because they have not been converted to use the new
 # interface.
diff --git a/configs/linux-llvm b/configs/linux-llvm
index 915189f4625..a8889321a02 100644
--- a/configs/linux-llvm
+++ b/configs/linux-llvm
@@ -5,6 +5,8 @@ include $(TOP)/configs/linux
 
 CONFIG_NAME = linux-llvm
 
+GALLIUM_AUXILIARY_DIRS += llvm
+
 OPT_FLAGS = -g -ansi -pedantic
 DEFINES += -DDEBUG -DDEBUG_MATH -DMESA_LLVM=1
 
diff --git a/src/gallium/Makefile.template b/src/gallium/Makefile.template
index 83b25f9b47c..6ad58c205c2 100644
--- a/src/gallium/Makefile.template
+++ b/src/gallium/Makefile.template
@@ -20,6 +20,7 @@ INCLUDES = \
 	-I$(TOP)/src/gallium/auxiliary \
 	-I$(TOP)/src/gallium/drivers \
 	-I$(TOP)/src/mesa \
+	-I$(TOP)/src/mesa/main \
 	-I$(TOP)/include \
         $(DRIVER_INCLUDES)
 
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index da68498aa1f..eaa0f2fe4e8 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -2,11 +2,7 @@ TOP = ../../..
 include $(TOP)/configs/current
 
 
-ifeq ($(CONFIG_NAME), linux-llvm)
-LLVM_DIR = llvm
-endif
-
-SUBDIRS = pipebuffer $(LLVM_DIR)
+SUBDIRS = $(GALLIUM_AUXILIARY_DIRS)
 
 
 default: subdirs
diff --git a/src/gallium/auxiliary/cso_cache/Makefile b/src/gallium/auxiliary/cso_cache/Makefile
new file mode 100644
index 00000000000..8248b097fde
--- /dev/null
+++ b/src/gallium/auxiliary/cso_cache/Makefile
@@ -0,0 +1,18 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = cso_cache
+
+DRIVER_SOURCES = \
+	cso_cache.c \
+	cso_hash.c
+
+C_SOURCES = \
+	$(DRIVER_SOURCES)
+
+ASM_SOURCES = 
+
+include ../../Makefile.template
+
+symlinks:
+
diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile
index fe9b150f304..c56b63d85b8 100644
--- a/src/gallium/auxiliary/draw/Makefile
+++ b/src/gallium/auxiliary/draw/Makefile
@@ -1,2 +1,39 @@
-default:
-	cd ../../../mesa ; make
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = draw
+
+DRIVER_SOURCES = \
+	draw_clip.c \
+	draw_vs_exec.c \
+	draw_vs_sse.c \
+	draw_vs_llvm.c \
+	draw_context.c\
+	draw_cull.c \
+	draw_debug.c \
+	draw_flatshade.c \
+	draw_offset.c \
+	draw_prim.c \
+	draw_stipple.c \
+	draw_twoside.c \
+	draw_unfilled.c \
+	draw_validate.c \
+	draw_vbuf.c \
+	draw_vertex.c \
+	draw_vertex_cache.c \
+	draw_vertex_fetch.c \
+	draw_vertex_shader.c \
+	draw_vf.c \
+	draw_vf_generic.c \
+	draw_vf_sse.c \
+	draw_wide_prims.c
+
+C_SOURCES = \
+	$(DRIVER_SOURCES)
+
+ASM_SOURCES = 
+
+include ../../Makefile.template
+
+symlinks:
+
diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile
index 12a8bd0409e..c10ab396d8d 100644
--- a/src/gallium/auxiliary/tgsi/Makefile
+++ b/src/gallium/auxiliary/tgsi/Makefile
@@ -1,3 +1,23 @@
-default:
-	cd ../.. ; make
+
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = tgsi
+
+DRIVER_SOURCES = \
+	exec/tgsi_exec.c \
+	exec/tgsi_sse2.c \
+	util/tgsi_build.c \
+	util/tgsi_dump.c \
+	util/tgsi_parse.c \
+	util/tgsi_util.c
+
+C_SOURCES = \
+	$(DRIVER_SOURCES)
+
+ASM_SOURCES = 
+
+include ../../Makefile.template
+
+symlinks:
 
diff --git a/src/gallium/auxiliary/tgsi/exec/Makefile b/src/gallium/auxiliary/tgsi/exec/Makefile
deleted file mode 100644
index eb8b14e0e89..00000000000
--- a/src/gallium/auxiliary/tgsi/exec/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
-default:
-	cd ../../.. ; make
-
diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile
new file mode 100644
index 00000000000..b8cb148c4f5
--- /dev/null
+++ b/src/gallium/auxiliary/util/Makefile
@@ -0,0 +1,20 @@
+
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = util
+
+DRIVER_SOURCES = \
+	p_debug.c \
+	p_tile.c \
+	p_util.c
+
+C_SOURCES = \
+	$(DRIVER_SOURCES)
+
+ASM_SOURCES = 
+
+include ../../Makefile.template
+
+symlinks:
+
diff --git a/src/gallium/drivers/Makefile b/src/gallium/drivers/Makefile
index c0345a9cb54..8dcfb18a16f 100644
--- a/src/gallium/drivers/Makefile
+++ b/src/gallium/drivers/Makefile
@@ -2,11 +2,7 @@ TOP = ../../..
 include $(TOP)/configs/current
 
 
-ifeq ($(CONFIG_NAME), linux-cell)
-CELL_DIR = cell
-endif
-
-SUBDIRS = softpipe i915simple i965simple failover pipebuffer $(CELL_DIR)
+SUBDIRS = softpipe i915simple i965simple failover
 
 
 default: subdirs
diff --git a/src/gallium/winsys/Makefile b/src/gallium/winsys/Makefile
new file mode 100644
index 00000000000..3dc5ea57441
--- /dev/null
+++ b/src/gallium/winsys/Makefile
@@ -0,0 +1,20 @@
+TOP = ../../..
+include $(TOP)/configs/current
+
+
+SUBDIRS = $(GALLIUM_WINSYS_DIRS)
+
+
+default: subdirs
+
+
+subdirs:
+	@for dir in $(SUBDIRS) ; do \
+		if [ -d $$dir ] ; then \
+			(cd $$dir && $(MAKE)) || exit 1 ; \
+		fi \
+	done
+
+
+clean:
+	rm -f `find . -name \*.[oa]`
diff --git a/src/gallium/winsys/dri/Makefile.template b/src/gallium/winsys/dri/Makefile.template
index 2a261ed6694..65a93bd53e3 100644
--- a/src/gallium/winsys/dri/Makefile.template
+++ b/src/gallium/winsys/dri/Makefile.template
@@ -1,7 +1,9 @@
 # -*-makefile-*-
 
-MESA_MODULES = $(TOP)/src/mesa/libmesa.a
-
+MESA_MODULES = \
+	$(TOP)/src/mesa/libmesa.a \
+	$(GALLIUM_AUXILIARIES)
+	
 COMMON_GALLIUM_SOURCES = \
         $(TOP)/src/mesa/drivers/dri/common/utils.c \
         $(TOP)/src/mesa/drivers/dri/common/vblank.c \
diff --git a/src/gallium/winsys/xlib/Makefile b/src/gallium/winsys/xlib/Makefile
new file mode 100644
index 00000000000..2664ac47ceb
--- /dev/null
+++ b/src/gallium/winsys/xlib/Makefile
@@ -0,0 +1,113 @@
+# src/mesa/Makefile
+
+TOP = ../../../..
+include $(TOP)/configs/current
+
+
+INCLUDE_DIRS = \
+	-I$(TOP)/include \
+	-I$(TOP)/src/mesa \
+	-I$(TOP)/src/mesa/main \
+	-I$(TOP)/src/gallium/include \
+	-I$(TOP)/src/gallium/drivers \
+	-I$(TOP)/src/gallium/auxiliary
+
+X11_DRIVER_SOURCES = \
+	glxapi.c	\
+	fakeglx.c	\
+	xfonts.c	\
+	xm_api.c	\
+	xm_winsys.c	\
+	xm_winsys_aub.c	\
+	brw_aub.c
+
+
+GL_MAJOR = 1
+GL_MINOR = 5
+GL_TINY = 0$(MESA_MAJOR)0$(MESA_MINOR)0$(MESA_TINY)
+
+
+PIPE_LIB = \
+	$(GALLIUM_DRIVERS) \
+	$(TOP)/src/mesa/libglapi.a \
+	$(TOP)/src/mesa/libmesa.a \
+	$(GALLIUM_AUXILIARIES)
+
+ifeq ($(CONFIG_NAME), linux-cell)
+CELL_LIB = $(TOP)/src/gallium/drivers/cell/ppu/libcell.a
+CELL_LIB_SPU = $(TOP)/src/gallium/drivers/cell/spu/g3d_spu.a
+endif
+
+ifeq ($(CONFIG_NAME), linux-llvm)
+LLVM_LIB = $(TOP)/src/gallium/auxiliary/llvm/libgallivm.a
+endif
+
+
+.SUFFIXES : .cpp
+
+.c.o:
+	$(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@
+
+.cpp.o:
+	$(CXX) -c $(INCLUDE_DIRS) $(CXXFLAGS) $< -o $@
+
+.S.o:
+	$(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@
+
+
+default: $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME)
+
+
+######################################################################
+# Stand-alone Mesa libGL and libOSMesa
+STAND_ALONE_DRIVER_SOURCES = \
+	$(X11_DRIVER_SOURCES)
+
+STAND_ALONE_DRIVER_OBJECTS = $(STAND_ALONE_DRIVER_SOURCES:.c=.o)
+
+STAND_ALONE_OBJECTS = \
+	$(STAND_ALONE_DRIVER_OBJECTS)
+
+# Make the GL library
+$(TOP)/$(LIB_DIR)/$(GL_LIB_NAME): $(STAND_ALONE_OBJECTS) $(LLVM_LIB) $(PIPE_LIB) $(CELL_LIB) $(CELL_LIB_SPU)
+	$(TOP)/bin/mklib -o $(GL_LIB) \
+		-linker "$(CC)" \
+		-major $(GL_MAJOR) -minor $(GL_MINOR) -patch $(GL_TINY) \
+		-install $(TOP)/$(LIB_DIR) \
+		$(MKLIB_OPTIONS) $(STAND_ALONE_OBJECTS) \
+		--start-group $(PIPE_LIB) $(LLVM_LIB) --end-group $(CELL_LIB) $(CELL_LIB_SPU) $(GL_LIB_DEPS)
+
+
+######################################################################
+# Generic stuff
+
+depend: $(ALL_SOURCES)
+	@ echo "running $(MKDEP)"
+	@ rm -f depend  # workaround oops on gutsy?!?
+	@ touch depend
+	@ $(MKDEP) $(MKDEP_OPTIONS) $(DEFINES) $(INCLUDE_DIRS) $(ALL_SOURCES) \
+		> /dev/null 2>/dev/null
+
+
+install: default
+	$(INSTALL) -d $(INSTALL_DIR)/include/GL
+	$(INSTALL) -d $(INSTALL_DIR)/$(LIB_DIR)
+	$(INSTALL) -m 644 $(TOP)/include/GL/*.h $(INSTALL_DIR)/include/GL
+	@if [ -e $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME) ]; then \
+		$(INSTALL) $(TOP)/$(LIB_DIR)/libGL* $(INSTALL_DIR)/$(LIB_DIR); \
+	fi
+
+## NOT INSTALLED YET:
+## $(INSTALL) -d $(INSTALL_DIR)/include/GLES
+## $(INSTALL) -m 644 include/GLES/*.h $(INSTALL_DIR)/include/GLES
+
+
+# Emacs tags
+tags:
+	etags `find . -name \*.[ch]` $(TOP)/include/GL/*.h
+
+clean:
+	-rm -f *.o
+
+
+include depend
diff --git a/src/mesa/Makefile b/src/mesa/Makefile
index c8cb2b592fe..86a9cf0b88c 100644
--- a/src/mesa/Makefile
+++ b/src/mesa/Makefile
@@ -11,19 +11,6 @@ GL_MINOR = 5
 GL_TINY = 0$(MESA_MAJOR)0$(MESA_MINOR)0$(MESA_TINY)
 
 
-PIPE_LIB = \
-	$(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \
-	$(TOP)/src/gallium/drivers/i965simple/libi965simple.a
-
-ifeq ($(CONFIG_NAME), linux-cell)
-CELL_LIB = $(TOP)/src/gallium/drivers/cell/ppu/libcell.a
-CELL_LIB_SPU = $(TOP)/src/gallium/drivers/cell/spu/g3d_spu.a
-endif
-
-ifeq ($(CONFIG_NAME), linux-llvm)
-LLVM_LIB = $(TOP)/src/gallium/auxiliary/llvm/libgallivm.a
-endif
-
 .SUFFIXES : .cpp
 
 .c.o:
@@ -36,33 +23,14 @@ endif
 	$(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@
 
 
-# Figure out what to make here
-default:
-	@if [ "${DRIVER_DIRS}" = "dri" ] ; then \
-		$(MAKE) linux-solo ; \
-	elif [ "${DRIVER_DIRS}" = "osmesa" ] ; then \
-		$(MAKE) osmesa-only ; \
-	elif [ "$(DRIVER_DIRS)" = "beos" ]; then \
-		$(MAKE) beos ; \
-	elif [ "$(DRIVER_DIRS)" = "directfb" ]; then \
-		$(MAKE) directfb ; \
-	elif [ "$(DRIVER_DIRS)" = "fbdev osmesa" ]; then \
-		$(MAKE) fbdev ; $(MAKE) osmesa-only ; \
-	else \
-		$(MAKE) stand-alone ; \
-	fi
-
+default: depend subdirs libmesa.a
 
-######################################################################
-# BeOS driver target
-
-beos: depend subdirs libmesa.a
-	cd drivers/beos; $(MAKE)
+ifneq ($(DRIVER_DIRS),dri)
+default: libglapi.a
+endif
 
 
 ######################################################################
-# Linux DRI drivers
-
 # Make archive of core object files
 libmesa.a: $(SOLO_OBJECTS)
 	@ $(TOP)/bin/mklib -o mesa -static $(SOLO_OBJECTS);
@@ -70,32 +38,8 @@ libmesa.a: $(SOLO_OBJECTS)
 		mimeset -f "$@" ; \
 	fi
 
-linux-solo: depend subdirs libmesa.a
-	cd $(TOP)/src/gallium/winsys/dri ; $(MAKE)
-
-
-#####################################################################
-# Stand-alone Mesa libGL, no built-in drivers (DirectFB)
-
-libgl-core: $(CORE_OBJECTS)
-	@ $(TOP)/bin/mklib -o $(GL_LIB) \
-		-major $(MESA_MAJOR) -minor $(MESA_MINOR) -patch $(MESA_TINY) \
-		-install $(TOP)/$(LIB_DIR) $(MKLIB_OPTIONS) $(CORE_OBJECTS) \
-		$(GL_LIB_DEPS)
-
-directfb: depend subdirs libgl-core
-	cd drivers/directfb ; $(MAKE)
-
-
-#####################################################################
-# fbdev Mesa driver (libGL.so)
-
-fbdev: $(CORE_OBJECTS) $(FBDEV_DRIVER_OBJECTS) $(COMMON_DRIVER_OBJECTS)
-	@ $(TOP)/bin/mklib -o $(GL_LIB) \
-		-major $(MESA_MAJOR) -minor $(MESA_MINOR) -patch $(MESA_TINY) \
-		-install $(TOP)/$(LIB_DIR) $(MKLIB_OPTIONS) \
-		$(CORE_OBJECTS) $(FBDEV_DRIVER_OBJECTS) \
-		$(COMMON_DRIVER_OBJECTS) $(GL_LIB_DEPS)
+libglapi.a: $(GLAPI_OBJECTS)
+	@ $(TOP)/bin/mklib -o glapi -static $(GLAPI_OBJECTS)
 
 
 ######################################################################
@@ -176,9 +120,6 @@ install: default
 	@if [ -e $(TOP)/$(LIB_DIR)/$(OSMESA_LIB_NAME) ]; then \
 		$(INSTALL) $(TOP)/$(LIB_DIR)/libOSMesa* $(INSTALL_DIR)/$(LIB_DIR); \
 	fi
-	@if [ "${DRIVER_DIRS}" = "dri" ] ; then \
-		cd $(TOP)/gallium/winsys/dri ; $(MAKE) install ; \
-	fi
 
 ## NOT INSTALLED YET:
 ## $(INSTALL) -d $(INSTALL_DIR)/include/GLES
@@ -192,9 +133,8 @@ tags:
 clean:
 	-rm -f */*.o
 	-rm -f */*/*.o
-	-rm -f depend depend.bak libmesa.a
+	-rm -f depend depend.bak libmesa.a libglapi.a
 	-rm -f drivers/*/*.o
-	(cd drivers/dri && $(MAKE) clean)
 	(cd x86 && $(MAKE) clean)
 	(cd x86-64 && $(MAKE) clean)
 
diff --git a/src/mesa/sources b/src/mesa/sources
index f83d247a1e2..0d185fd5f3f 100644
--- a/src/mesa/sources
+++ b/src/mesa/sources
@@ -156,51 +156,6 @@ VF_SOURCES = \
 	vf/vf_generic.c \
 	vf/vf_sse.c
 
-
-DRAW_SOURCES = \
-	$(TOP)/src/gallium/auxiliary/draw/draw_clip.c \
-	$(TOP)/src/gallium/auxiliary/draw/draw_vs_exec.c \
-	$(TOP)/src/gallium/auxiliary/draw/draw_vs_sse.c \
-	$(TOP)/src/gallium/auxiliary/draw/draw_vs_llvm.c \
-	$(TOP)/src/gallium/auxiliary/draw/draw_context.c\
-	$(TOP)/src/gallium/auxiliary/draw/draw_cull.c \
-	$(TOP)/src/gallium/auxiliary/draw/draw_debug.c \
-	$(TOP)/src/gallium/auxiliary/draw/draw_flatshade.c \
-	$(TOP)/src/gallium/auxiliary/draw/draw_offset.c \
-	$(TOP)/src/gallium/auxiliary/draw/draw_prim.c \
-	$(TOP)/src/gallium/auxiliary/draw/draw_stipple.c \
-	$(TOP)/src/gallium/auxiliary/draw/draw_twoside.c \
-	$(TOP)/src/gallium/auxiliary/draw/draw_unfilled.c \
-	$(TOP)/src/gallium/auxiliary/draw/draw_validate.c \
-	$(TOP)/src/gallium/auxiliary/draw/draw_vbuf.c \
-	$(TOP)/src/gallium/auxiliary/draw/draw_vertex.c \
-	$(TOP)/src/gallium/auxiliary/draw/draw_vertex_cache.c \
-	$(TOP)/src/gallium/auxiliary/draw/draw_vertex_fetch.c \
-	$(TOP)/src/gallium/auxiliary/draw/draw_vertex_shader.c \
-	$(TOP)/src/gallium/auxiliary/draw/draw_vf.c \
-	$(TOP)/src/gallium/auxiliary/draw/draw_vf_generic.c \
-	$(TOP)/src/gallium/auxiliary/draw/draw_vf_sse.c \
-	$(TOP)/src/gallium/auxiliary/draw/draw_wide_prims.c
-
-TGSIEXEC_SOURCES = \
-	$(TOP)/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c \
-	$(TOP)/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
-
-TGSIUTIL_SOURCES = \
-	$(TOP)/src/gallium/auxiliary/tgsi/util/tgsi_build.c \
-	$(TOP)/src/gallium/auxiliary/tgsi/util/tgsi_dump.c \
-	$(TOP)/src/gallium/auxiliary/tgsi/util/tgsi_parse.c \
-	$(TOP)/src/gallium/auxiliary/tgsi/util/tgsi_util.c
-
-STATECACHE_SOURCES = \
-	$(TOP)/src/gallium/auxiliary/cso_cache/cso_hash.c \
-	$(TOP)/src/gallium/auxiliary/cso_cache/cso_cache.c
-
-PIPEUTIL_SOURCES = \
-	$(TOP)/src/gallium/auxiliary/util/p_debug.c \
-	$(TOP)/src/gallium/auxiliary/util/p_tile.c \
-	$(TOP)/src/gallium/auxiliary/util/p_util.c
-
 STATETRACKER_SOURCES = \
 	state_tracker/st_atom.c \
 	state_tracker/st_atom_blend.c \
@@ -229,7 +184,7 @@ STATETRACKER_SOURCES = \
 	state_tracker/st_cb_readpixels.c \
 	state_tracker/st_cb_strings.c \
 	state_tracker/st_cb_texture.c \
-        state_tracker/st_cache.c \
+	state_tracker/st_cache.c \
 	state_tracker/st_context.c \
 	state_tracker/st_debug.c \
 	state_tracker/st_draw.c \
@@ -333,15 +288,6 @@ SPARC_API =			\
 __COMMON_DRIVER_SOURCES =			\
 	drivers/common/driverfuncs.c
 
-X11_DRIVER_SOURCES =		\
-	$(TOP)/src/gallium/winsys/xlib/glxapi.c	\
-	$(TOP)/src/gallium/winsys/xlib/fakeglx.c	\
-	$(TOP)/src/gallium/winsys/xlib/xfonts.c	\
-	$(TOP)/src/gallium/winsys/xlib/xm_api.c	\
-	$(TOP)/src/gallium/winsys/xlib/xm_winsys.c	\
-	$(TOP)/src/gallium/winsys/xlib/xm_winsys_aub.c	\
-	$(TOP)/src/gallium/winsys/xlib/brw_aub.c
-
 OSMESA_DRIVER_SOURCES = \
 	drivers/osmesa/osmesa.c
 
-- 
cgit v1.2.3


From 33ceb6716a2166db75659fa66d85fb4cfb9633c7 Mon Sep 17 00:00:00 2001
From: José Fonseca <jrfonseca@tungstengraphics.com>
Date: Mon, 18 Feb 2008 10:52:44 +0000
Subject: Update scons build for new code layout.

---
 SConstruct                                  |   7 +-
 src/SConscript                              |   7 ++
 src/gallium/SConscript                      |  23 +++-
 src/gallium/auxiliary/cso_cache/SConscript  |  10 ++
 src/gallium/auxiliary/draw/SConscript       |  31 +++++
 src/gallium/auxiliary/pipebuffer/SConscript |  14 +++
 src/gallium/auxiliary/tgsi/SConscript       |  14 +++
 src/gallium/auxiliary/util/SConscript       |  11 ++
 src/gallium/drivers/failover/SConscript     |  13 +++
 src/gallium/winsys/SConscript               |  10 ++
 src/gallium/winsys/dri/SConscript           |  51 +++++++++
 src/gallium/winsys/dri/intel/SConscript     |  14 +--
 src/gallium/winsys/xlib/SConscript          |  28 +++++
 src/mesa/SConscript                         | 170 ++--------------------------
 src/mesa/drivers/dri/SConscript             |  48 --------
 15 files changed, 229 insertions(+), 222 deletions(-)
 create mode 100644 src/SConscript
 create mode 100644 src/gallium/auxiliary/cso_cache/SConscript
 create mode 100644 src/gallium/auxiliary/draw/SConscript
 create mode 100644 src/gallium/auxiliary/pipebuffer/SConscript
 create mode 100644 src/gallium/auxiliary/tgsi/SConscript
 create mode 100644 src/gallium/auxiliary/util/SConscript
 create mode 100644 src/gallium/drivers/failover/SConscript
 create mode 100644 src/gallium/winsys/SConscript
 create mode 100644 src/gallium/winsys/dri/SConscript
 create mode 100644 src/gallium/winsys/xlib/SConscript
 delete mode 100644 src/mesa/drivers/dri/SConscript

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/SConstruct b/SConstruct
index 22a4072c933..1126aff21b5 100644
--- a/SConstruct
+++ b/SConstruct
@@ -108,7 +108,10 @@ env.Append(CPPPATH = [
 	'#/include',
 	'#/src/mesa',
 	'#/src/mesa/main',
-	'#/src/mesa/pipe',
+	'#/src/gallium/include/pipe',
+	'#/src/gallium/include',
+	'#/src/gallium/auxiliary',
+	'#/src/gallium/drivers',
 ])
 
 
@@ -222,7 +225,7 @@ build_dir = os.path.join(build_topdir, build_subdir)
 # http://www.scons.org/wiki/SimultaneousVariantBuilds
 
 SConscript(
-	'src/mesa/SConscript',
+	'src/SConscript',
 	build_dir = build_dir,
 	duplicate = 0 # http://www.scons.org/doc/0.97/HTML/scons-user/x2261.html
 )
diff --git a/src/SConscript b/src/SConscript
new file mode 100644
index 00000000000..5b099438940
--- /dev/null
+++ b/src/SConscript
@@ -0,0 +1,7 @@
+Import('*')
+
+SConscript([
+	'gallium/SConscript',
+	'mesa/SConscript',
+	'gallium/winsys/SConscript',
+])
diff --git a/src/gallium/SConscript b/src/gallium/SConscript
index d9c20e01007..a835f6d6614 100644
--- a/src/gallium/SConscript
+++ b/src/gallium/SConscript
@@ -1,9 +1,24 @@
 Import('*')
 
-#env = env.Clone()
+env = env.Clone()
+
+auxiliaries = []
+
+Export('auxiliaries')
+
 
 SConscript([
-	'softpipe/SConscript',
-	'i915simple/SConscript',
-	'i965simple/SConscript',
+	# NOTE: order matters!
+	'auxiliary/util/SConscript',
+	'auxiliary/tgsi/SConscript',
+	'auxiliary/cso_cache/SConscript',
+	'auxiliary/draw/SConscript',
+	#'auxiliary/llvm/SConscript',
+	'auxiliary/pipebuffer/SConscript',
+
+	'drivers/softpipe/SConscript',
+	'drivers/i915simple/SConscript',
+	'drivers/i965simple/SConscript',
+	'drivers/failover/SConscript',
+	#'drivers/cell/SConscript',
 ])
diff --git a/src/gallium/auxiliary/cso_cache/SConscript b/src/gallium/auxiliary/cso_cache/SConscript
new file mode 100644
index 00000000000..9751881613e
--- /dev/null
+++ b/src/gallium/auxiliary/cso_cache/SConscript
@@ -0,0 +1,10 @@
+Import('*')
+
+cso_cache = env.ConvenienceLibrary(
+	target = 'cso_cache',
+	source = [
+		'cso_cache.c',
+		'cso_hash.c',
+	])
+
+auxiliaries.insert(0, cso_cache)
diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript
new file mode 100644
index 00000000000..8e3a8caa74c
--- /dev/null
+++ b/src/gallium/auxiliary/draw/SConscript
@@ -0,0 +1,31 @@
+Import('*')
+
+draw = env.ConvenienceLibrary(
+	target = 'draw',
+	source = [
+		'draw_clip.c',
+		'draw_vs_exec.c',
+		'draw_vs_sse.c',
+		'draw_vs_llvm.c',
+		'draw_context.c',
+		'draw_cull.c',
+		'draw_debug.c',
+		'draw_flatshade.c',
+		'draw_offset.c',
+		'draw_prim.c',
+		'draw_stipple.c',
+		'draw_twoside.c',
+		'draw_unfilled.c',
+		'draw_validate.c',
+		'draw_vbuf.c',
+		'draw_vertex.c',
+		'draw_vertex_cache.c',
+		'draw_vertex_fetch.c',
+		'draw_vertex_shader.c',
+		'draw_vf.c',
+		'draw_vf_generic.c',
+		'draw_vf_sse.c',
+		'draw_wide_prims.c',
+	])
+
+auxiliaries.insert(0, draw)
diff --git a/src/gallium/auxiliary/pipebuffer/SConscript b/src/gallium/auxiliary/pipebuffer/SConscript
new file mode 100644
index 00000000000..3d41fd84a6c
--- /dev/null
+++ b/src/gallium/auxiliary/pipebuffer/SConscript
@@ -0,0 +1,14 @@
+Import('*')
+
+pipebuffer = env.ConvenienceLibrary(
+	target = 'pipebuffer',
+	source = [
+		'pb_buffer_fenced.c',
+		'pb_buffer_malloc.c',
+		'pb_bufmgr_fenced.c',
+		'pb_bufmgr_mm.c',
+		'pb_bufmgr_pool.c',
+		'pb_winsys.c',
+	])
+
+auxiliaries.insert(0, pipebuffer)
diff --git a/src/gallium/auxiliary/tgsi/SConscript b/src/gallium/auxiliary/tgsi/SConscript
new file mode 100644
index 00000000000..8464bfe9444
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/SConscript
@@ -0,0 +1,14 @@
+Import('*')
+
+tgsi = env.ConvenienceLibrary(
+	target = 'tgsi',
+	source = [
+		'exec/tgsi_exec.c',
+		'exec/tgsi_sse2.c',
+		'util/tgsi_build.c',
+		'util/tgsi_dump.c',
+		'util/tgsi_parse.c',
+		'util/tgsi_util.c',
+	])
+
+auxiliaries.insert(0, tgsi)
diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript
new file mode 100644
index 00000000000..b126cf44d6f
--- /dev/null
+++ b/src/gallium/auxiliary/util/SConscript
@@ -0,0 +1,11 @@
+Import('*')
+
+util = env.ConvenienceLibrary(
+	target = 'util',
+	source = [
+		'p_debug.c',
+		'p_tile.c',
+		'p_util.c',
+	])
+
+auxiliaries.insert(0, util)
diff --git a/src/gallium/drivers/failover/SConscript b/src/gallium/drivers/failover/SConscript
new file mode 100644
index 00000000000..f8e9b1b4911
--- /dev/null
+++ b/src/gallium/drivers/failover/SConscript
@@ -0,0 +1,13 @@
+Import('*')
+
+env = env.Clone()
+
+failover = env.ConvenienceLibrary(
+	target = 'failover',
+	source = [
+		'fo_state.c',
+		'fo_state_emit.c',
+		'fo_context.c',
+	])
+
+Export('failover')
diff --git a/src/gallium/winsys/SConscript b/src/gallium/winsys/SConscript
new file mode 100644
index 00000000000..32215d8d586
--- /dev/null
+++ b/src/gallium/winsys/SConscript
@@ -0,0 +1,10 @@
+Import('*')
+
+if dri:
+	SConscript([
+		'dri/SConscript',
+	])
+else:
+	SConscript([
+		'xlib/SConscript',
+	])
diff --git a/src/gallium/winsys/dri/SConscript b/src/gallium/winsys/dri/SConscript
new file mode 100644
index 00000000000..8c56ce917c7
--- /dev/null
+++ b/src/gallium/winsys/dri/SConscript
@@ -0,0 +1,51 @@
+Import('*')
+
+drienv = env.Clone()
+
+drienv.Replace(CPPPATH = [
+	'#src/mesa/drivers/dri/common',
+	'#include',
+	'#include/GL/internal',
+	'#src/gallium/include',
+	'#src/gallium/auxiliary',
+	'#src/gallium/drivers',
+	'#src/mesa',
+	'#src/mesa/main',
+	'#src/mesa/glapi',
+	'#src/mesa/math',
+	'#src/mesa/transform',
+	'#src/mesa/shader',
+	'#src/mesa/swrast',
+	'#src/mesa/swrast_setup',
+	'#src/egl/main',
+	'#src/egl/drivers/dri',
+])
+
+drienv.ParseConfig('pkg-config --cflags --libs libdrm')
+
+COMMON_GALLIUM_SOURCES = [
+	'#src/mesa/drivers/dri/common/utils.c',
+	'#src/mesa/drivers/dri/common/vblank.c',
+	'#src/mesa/drivers/dri/common/dri_util.c',
+	'#src/mesa/drivers/dri/common/xmlconfig.c',
+]
+
+COMMON_BM_SOURCES = [
+	'#src/mesa/drivers/dri/common/dri_bufmgr.c',
+	'#src/mesa/drivers/dri/common/dri_drmpool.c',
+]
+
+Export([
+	'drienv',
+	'COMMON_GALLIUM_SOURCES',
+	'COMMON_BM_SOURCES',
+])
+
+# TODO: Installation
+#install: $(LIBNAME)
+#	$(INSTALL) -d $(DRI_DRIVER_INSTALL_DIR)
+#	$(INSTALL) -m 755 $(LIBNAME) $(DRI_DRIVER_INSTALL_DIR)
+
+SConscript([
+	'intel/SConscript',
+])
diff --git a/src/gallium/winsys/dri/intel/SConscript b/src/gallium/winsys/dri/intel/SConscript
index a7cc10450e3..525ba580e8e 100644
--- a/src/gallium/winsys/dri/intel/SConscript
+++ b/src/gallium/winsys/dri/intel/SConscript
@@ -9,11 +9,6 @@ env.Append(CPPPATH = [
 
 #MINIGLX_SOURCES = server/intel_dri.c
 
-pipe_drivers = [
-	softpipe,
-	i915simple
-]
-
 DRIVER_SOURCES = [
 	'intel_winsys_pipe.c',
 	'intel_winsys_softpipe.c',
@@ -31,11 +26,14 @@ sources = \
 	COMMON_BM_SOURCES + \
 	DRIVER_SOURCES
 
-# DRIVER_DEFINES = -I../intel $(shell pkg-config libdrm --atleast-version=2.3.1 \
-#				&& echo "-DDRM_VBLANK_FLIP=DRM_VBLANK_FLIP")
+drivers = [
+	softpipe,
+	i915simple
+]
 
+# TODO: write a wrapper function http://www.scons.org/wiki/WrapperFunctions
 env.SharedLibrary(
 	target ='i915tex_dri.so',
 	source = sources,
-	LIBS = pipe_drivers + env['LIBS'],
+	LIBS = mesa + drivers + auxiliaries + env['LIBS'],
 )
\ No newline at end of file
diff --git a/src/gallium/winsys/xlib/SConscript b/src/gallium/winsys/xlib/SConscript
new file mode 100644
index 00000000000..f8aa5ef945d
--- /dev/null
+++ b/src/gallium/winsys/xlib/SConscript
@@ -0,0 +1,28 @@
+#######################################################################
+# SConscript for xlib winsys
+
+Import('*')
+
+
+sources = [
+	'glxapi.c',
+	'fakeglx.c',
+	'xfonts.c',
+	'xm_api.c',
+	'xm_winsys.c',
+	'xm_winsys_aub.c',
+	'brw_aub.c',
+]
+	
+drivers = [
+	softpipe,
+	i915simple,
+	i965simple,
+]
+
+# TODO: write a wrapper function http://www.scons.org/wiki/WrapperFunctions
+env.SharedLibrary(
+	target ='GL',
+	source = sources,
+	LIBS = glapi + mesa + drivers + auxiliaries + env['LIBS'],
+)
diff --git a/src/mesa/SConscript b/src/mesa/SConscript
index faf8c84872c..a828133580a 100644
--- a/src/mesa/SConscript
+++ b/src/mesa/SConscript
@@ -1,7 +1,5 @@
 #######################################################################
-# SConscript for mesa
-#
-# TODO: Split this into per-module SConscripts 
+# SConscript for Mesa
 
 
 Import('*')
@@ -116,53 +114,6 @@ VF_SOURCES = [
 	'vf/vf_sse.c',
 ]
 
-DRAW_SOURCES = [
-	'pipe/draw/draw_clip.c',
-	'pipe/draw/draw_context.c',
-	'pipe/draw/draw_cull.c',
-	'pipe/draw/draw_debug.c',
-	'pipe/draw/draw_flatshade.c',
-	'pipe/draw/draw_offset.c',
-	'pipe/draw/draw_prim.c',
-	'pipe/draw/draw_stipple.c',
-	'pipe/draw/draw_twoside.c',
-	'pipe/draw/draw_unfilled.c',
-	'pipe/draw/draw_validate.c',
-	'pipe/draw/draw_vbuf.c',
-	'pipe/draw/draw_vertex.c',
-	'pipe/draw/draw_vertex_cache.c',
-	'pipe/draw/draw_vertex_fetch.c',
-	'pipe/draw/draw_vertex_shader.c',
-	'pipe/draw/draw_vertex_shader_llvm.c',
-	'pipe/draw/draw_vf.c',
-	'pipe/draw/draw_vf_generic.c',
-	'pipe/draw/draw_vf_sse.c',
-	'pipe/draw/draw_wide_prims.c',
-]
-
-TGSIEXEC_SOURCES = [
-	'pipe/tgsi/exec/tgsi_exec.c',
-	'pipe/tgsi/exec/tgsi_sse2.c',
-]
-
-TGSIUTIL_SOURCES = [
-	'pipe/tgsi/util/tgsi_build.c',
-	'pipe/tgsi/util/tgsi_dump.c',
-	'pipe/tgsi/util/tgsi_parse.c',
-	'pipe/tgsi/util/tgsi_util.c',
-]
-
-STATECACHE_SOURCES = [
-	'pipe/cso_cache/cso_hash.c',
-	'pipe/cso_cache/cso_cache.c',
-]
-
-PIPEUTIL_SOURCES = [
-	'pipe/util/p_debug.c',
-	'pipe/util/p_tile.c',
-	'pipe/util/p_util.c',
-]
-
 STATETRACKER_SOURCES = [
 	'state_tracker/st_atom.c',
 	'state_tracker/st_atom_blend.c',
@@ -311,126 +262,25 @@ else:
 	ASM_SOURCES = []
 	API_SOURCES = []
 
-
-#######################################################################
-# Driver sources
-
-
-X11_DRIVER_SOURCES = [
-	'pipe/xlib/glxapi.c',
-	'pipe/xlib/fakeglx.c',
-	'pipe/xlib/xfonts.c',
-	'pipe/xlib/xm_api.c',
-	'pipe/xlib/xm_winsys.c',
-	'pipe/xlib/xm_winsys_aub.c',
-	'pipe/xlib/brw_aub.c',
-]
-
-OSMESA_DRIVER_SOURCES = [
-	'drivers/osmesa/osmesa.c',
-]
-
-GLIDE_DRIVER_SOURCES = [
-	'drivers/glide/fxapi.c',
-	'drivers/glide/fxdd.c',
-	'drivers/glide/fxddspan.c',
-	'drivers/glide/fxddtex.c',
-	'drivers/glide/fxsetup.c',
-	'drivers/glide/fxtexman.c',
-	'drivers/glide/fxtris.c',
-	'drivers/glide/fxvb.c',
-	'drivers/glide/fxglidew.c',
-	'drivers/glide/fxg.c',
-]
-
-SVGA_DRIVER_SOURCES = [
-	'drivers/svga/svgamesa.c',
-	'drivers/svga/svgamesa8.c',
-	'drivers/svga/svgamesa15.c',
-	'drivers/svga/svgamesa16.c',
-	'drivers/svga/svgamesa24.c',
-	'drivers/svga/svgamesa32.c',
-]
-
-FBDEV_DRIVER_SOURCES = [
-	'drivers/fbdev/glfbdev.c',
-]
-
-
-### All the core C sources
-
 SOLO_SOURCES = \
 	MAIN_SOURCES + \
 	MATH_SOURCES + \
 	VBO_SOURCES + \
 	VF_SOURCES + \
-	DRAW_SOURCES + \
-	TGSIEXEC_SOURCES + \
-	TGSIUTIL_SOURCES + \
-	PIPEUTIL_SOURCES + \
-	STATECACHE_SOURCES + \
 	STATETRACKER_SOURCES + \
 	SHADER_SOURCES + \
 	ASM_SOURCES + \
 	SLANG_SOURCES
 
-CORE_SOURCES = \
-	GLAPI_SOURCES + API_SOURCES + \
-	SOLO_SOURCES
-
-ALL_SOURCES = \
-	GLAPI_SOURCES + API_SOURCES + \
-	SOLO_SOURCES + \
-	ASM_SOURCES + \
-	X11_DRIVER_SOURCES + \
-	FBDEV_DRIVER_SOURCES + \
-	OSMESA_DRIVER_SOURCES
-
-
-######################################################################
-# Gallium sources
-
-SConscript([
-	'pipe/SConscript',
-])
-
-
-######################################################################
-# libGL
+mesa = env.ConvenienceLibrary(
+	target = 'mesa',
+	source = SOLO_SOURCES,
+)
+Export('mesa')
 
 if not dri:
-	STAND_ALONE_DRIVER_SOURCES = \
-		CORE_SOURCES + \
-		X11_DRIVER_SOURCES
-	
-	Import(
-		'softpipe', 
-		'i915simple',
-		'i965simple'
+	glapi = env.ConvenienceLibrary(
+		target = 'glapi',
+		source = GLAPI_SOURCES + API_SOURCES,
 	)
-	
-	pipe_drivers = [
-		softpipe,
-		i965simple
-	]
-	
-	env.SharedLibrary(
-		target ='GL',
-		source = STAND_ALONE_DRIVER_SOURCES,
-		LIBS = [softpipe, i965simple] + env['LIBS'],
-	)
-
-
-######################################################################
-# Driver sources
-
-if dri:
-	mesa = env.ConvenienceLibrary(
-		target = 'mesa',
-		source = SOLO_SOURCES,
-	)
-	env.Prepend(LIBS = [mesa])
-
-	SConscript([
-		'drivers/dri/SConscript',
-	])
+	Export('glapi')
diff --git a/src/mesa/drivers/dri/SConscript b/src/mesa/drivers/dri/SConscript
deleted file mode 100644
index d32bd086696..00000000000
--- a/src/mesa/drivers/dri/SConscript
+++ /dev/null
@@ -1,48 +0,0 @@
-Import('*')
-
-drienv = env.Clone()
-
-drienv.Replace(CPPPATH = [
-	'#src/mesa/drivers/dri/common',
-	'#include',
-	'#include/GL/internal',
-	'#src/mesa',
-	'#src/mesa/main',
-	'#src/mesa/glapi',
-	'#src/mesa/math',
-	'#src/mesa/transform',
-	'#src/mesa/shader',
-	'#src/mesa/swrast',
-	'#src/mesa/swrast_setup',
-	'#src/egl/main',
-	'#src/egl/drivers/dri',
-])
-
-drienv.ParseConfig('pkg-config --cflags --libs libdrm')
-
-COMMON_GALLIUM_SOURCES = [
-	'../common/utils.c',
-	'../common/vblank.c',
-	'../common/dri_util.c',
-	'../common/xmlconfig.c',
-]
-
-COMMON_BM_SOURCES = [
-	'../common/dri_bufmgr.c',
-	'../common/dri_drmpool.c',
-]
-
-Export([
-	'drienv',
-	'COMMON_GALLIUM_SOURCES',
-	'COMMON_BM_SOURCES',
-])
-
-# TODO: Installation
-#install: $(LIBNAME)
-#	$(INSTALL) -d $(DRI_DRIVER_INSTALL_DIR)
-#	$(INSTALL) -m 755 $(LIBNAME) $(DRI_DRIVER_INSTALL_DIR)
-
-SConscript([
-	'intel_winsys/SConscript',
-])
-- 
cgit v1.2.3


From aceeb80d4f706980aaf71b8e098d4c6718d8ac90 Mon Sep 17 00:00:00 2001
From: Brian <brian.paul@tungstengraphics.com>
Date: Mon, 18 Feb 2008 16:19:05 -0700
Subject: gallium: antialiased line drawing

New draw/prim stage: draw_aaline.  When installed, lines are replaced by
textured quads to do antialiasing.  The current user-defined fragment shader
is modified to do a texture fetch and modulate fragment alpha.
---
 src/gallium/auxiliary/draw/Makefile             |   1 +
 src/gallium/auxiliary/draw/draw_aaline.c        | 832 ++++++++++++++++++++++++
 src/gallium/auxiliary/draw/draw_context.c       |  22 +
 src/gallium/auxiliary/draw/draw_context.h       |  21 +-
 src/gallium/auxiliary/draw/draw_prim.c          |   7 +
 src/gallium/auxiliary/draw/draw_private.h       |  12 +-
 src/gallium/auxiliary/draw/draw_validate.c      |   8 +-
 src/gallium/auxiliary/tgsi/Makefile             |   1 +
 src/gallium/drivers/softpipe/sp_context.c       |   3 +
 src/gallium/drivers/softpipe/sp_state_derived.c |  17 +-
 10 files changed, 914 insertions(+), 10 deletions(-)
 create mode 100644 src/gallium/auxiliary/draw/draw_aaline.c

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile
index c56b63d85b8..c8000cbe9cb 100644
--- a/src/gallium/auxiliary/draw/Makefile
+++ b/src/gallium/auxiliary/draw/Makefile
@@ -4,6 +4,7 @@ include $(TOP)/configs/current
 LIBNAME = draw
 
 DRIVER_SOURCES = \
+	draw_aaline.c \
 	draw_clip.c \
 	draw_vs_exec.c \
 	draw_vs_sse.c \
diff --git a/src/gallium/auxiliary/draw/draw_aaline.c b/src/gallium/auxiliary/draw/draw_aaline.c
new file mode 100644
index 00000000000..f1fee4f8ba0
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_aaline.c
@@ -0,0 +1,832 @@
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+/**
+ * AA line stage:  AA lines are converted to texture mapped triangles.
+ *
+ * Authors:  Brian Paul
+ */
+
+
+#include "pipe/p_util.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+
+#include "tgsi/util/tgsi_transform.h"
+#include "tgsi/util/tgsi_dump.h"
+
+#include "draw_context.h"
+#include "draw_private.h"
+
+
+/**
+ * Max texture level for the alpha texture used for antialiasing
+ */
+#define MAX_TEXTURE_LEVEL  5   /* 32 x 32 */
+
+
+/**
+ * Subclass of pipe_shader_state to carry extra fragment shader info.
+ */
+struct aaline_fragment_shader
+{
+   struct pipe_shader_state state;
+   void *driver_fs;
+   void *aaline_fs;
+   void *aapoint_fs; /* not yet */
+   void *sprite_fs; /* not yet */
+};
+
+
+/**
+ * Subclass of draw_stage
+ */
+struct aaline_stage
+{
+   struct draw_stage stage;
+
+   float half_line_width;
+
+   /** For AA lines, this is the vertex attrib slot for the new texcoords */
+   uint tex_slot;
+
+   void *sampler_cso;
+   struct pipe_texture *texture;
+   uint sampler_unit;
+
+
+   /*
+    * Currently bound state
+    */
+   struct aaline_fragment_shader *fs;
+   struct {
+      void *sampler[PIPE_MAX_SAMPLERS];
+      struct pipe_texture *texture[PIPE_MAX_SAMPLERS];
+   } state;
+
+   /*
+    * Driver interface/override functions
+    */
+   void * (*driver_create_fs_state)(struct pipe_context *,
+                                    const struct pipe_shader_state *);
+   void (*driver_bind_fs_state)(struct pipe_context *, void *);
+   void (*driver_delete_fs_state)(struct pipe_context *, void *);
+
+   void (*driver_bind_sampler_state)(struct pipe_context *, unsigned, void *);
+
+   void (*driver_set_sampler_texture)(struct pipe_context *,
+                                      unsigned sampler,
+                                      struct pipe_texture *);
+
+   struct pipe_context *pipe;
+};
+
+
+
+/**
+ * Subclass of tgsi_transform_context, used for transforming the
+ * user's fragment shader to add the special AA instructions.
+ */
+struct aa_transform_context {
+   struct tgsi_transform_context base;
+   uint tempsUsed;  /**< bitmask */
+   int colorOutput; /**< which output is the primary color */
+   int maxSampler;  /**< max sampler index found */
+   int maxInput, maxGeneric;  /**< max input index found */
+   int colorTemp, texTemp;  /**< temp registers */
+   boolean firstInstruction;
+};
+
+
+/**
+ * TGSI declaration transform callback.
+ * Look for a free sampler, a free input attrib, and two free temp regs.
+ */
+static void
+aa_transform_decl(struct tgsi_transform_context *ctx,
+                  struct tgsi_full_declaration *decl)
+{
+   struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
+
+   if (decl->Declaration.File == TGSI_FILE_OUTPUT &&
+       decl->Semantic.SemanticName == TGSI_SEMANTIC_COLOR &&
+       decl->Semantic.SemanticIndex == 0) {
+      aactx->colorOutput = decl->u.DeclarationRange.First;
+   }
+   else if (decl->Declaration.File == TGSI_FILE_SAMPLER) {
+      if (decl->u.DeclarationRange.Last > aactx->maxSampler)
+         aactx->maxSampler = decl->u.DeclarationRange.Last + 1;
+   }
+   else if (decl->Declaration.File == TGSI_FILE_INPUT) {
+      if (decl->u.DeclarationRange.Last > aactx->maxInput)
+         aactx->maxInput = decl->u.DeclarationRange.Last;
+      if (decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC  &&
+          decl->Semantic.SemanticIndex > aactx->maxGeneric) {
+         aactx->maxGeneric = decl->Semantic.SemanticIndex;
+      }
+   }
+   else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
+      uint i;
+      for (i = decl->u.DeclarationRange.First;
+           i <= decl->u.DeclarationRange.Last; i++) {
+         aactx->tempsUsed |= (1 << i);
+      }
+   }
+
+   ctx->emit_declaration(ctx, decl);
+}
+
+
+/**
+ * TGSI instruction transform callback.
+ * Replace writes to result.color w/ a temp reg.
+ * Upon END instruction, insert texture sampling code for antialiasing.
+ */
+static void
+aa_transform_inst(struct tgsi_transform_context *ctx,
+                  struct tgsi_full_instruction *inst)
+{
+   struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
+
+   if (aactx->firstInstruction) {
+      /* emit our new declarations before the first instruction */
+
+      struct tgsi_full_declaration decl;
+      uint i;
+
+      /* find two free temp regs */
+      for (i = 0; i < 32; i++) {
+         if ((aactx->tempsUsed & (1 << i)) == 0) {
+            /* found a free temp */
+            if (aactx->colorTemp < 0)
+               aactx->colorTemp  = i;
+            else if (aactx->texTemp < 0)
+               aactx->texTemp  = i;
+            else
+               break;
+         }
+      }
+      assert(aactx->colorTemp >= 0);
+      assert(aactx->texTemp >= 0);
+
+      /* declare new generic input/texcoord */
+      decl = tgsi_default_full_declaration();
+      decl.Declaration.File = TGSI_FILE_INPUT;
+      decl.Declaration.Semantic = 1;
+      decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC;
+      decl.Semantic.SemanticIndex = aactx->maxGeneric + 1;
+      decl.Declaration.Interpolate = 1;
+      /* XXX this could be linear... */
+      decl.Interpolation.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
+      decl.u.DeclarationRange.First = 
+      decl.u.DeclarationRange.Last = aactx->maxInput + 1;
+      ctx->emit_declaration(ctx, &decl);
+
+      /* declare new sampler */
+      decl = tgsi_default_full_declaration();
+      decl.Declaration.File = TGSI_FILE_SAMPLER;
+      decl.u.DeclarationRange.First = 
+      decl.u.DeclarationRange.Last = aactx->maxSampler + 1;
+      ctx->emit_declaration(ctx, &decl);
+
+      /* declare new temp regs */
+      decl = tgsi_default_full_declaration();
+      decl.Declaration.File = TGSI_FILE_TEMPORARY;
+      decl.u.DeclarationRange.First = 
+      decl.u.DeclarationRange.Last = aactx->texTemp;
+      ctx->emit_declaration(ctx, &decl);
+
+      decl = tgsi_default_full_declaration();
+      decl.Declaration.File = TGSI_FILE_TEMPORARY;
+      decl.u.DeclarationRange.First = 
+      decl.u.DeclarationRange.Last = aactx->colorTemp;
+      ctx->emit_declaration(ctx, &decl);
+
+      aactx->firstInstruction = FALSE;
+   }
+
+   if (inst->Instruction.Opcode == TGSI_OPCODE_END &&
+       aactx->colorOutput != -1) {
+      struct tgsi_full_instruction newInst;
+
+      /* TEX */
+      newInst = tgsi_default_full_instruction();
+      newInst.Instruction.Opcode = TGSI_OPCODE_TEX;
+      newInst.Instruction.NumDstRegs = 1;
+      newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+      newInst.FullDstRegisters[0].DstRegister.Index = aactx->texTemp;
+      newInst.Instruction.NumSrcRegs = 2;
+      newInst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D;
+      newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
+      newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->maxInput + 1;
+      newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER;
+      newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->maxSampler + 1;
+
+      ctx->emit_instruction(ctx, &newInst);
+
+      /* MOV rgb */
+      newInst = tgsi_default_full_instruction();
+      newInst.Instruction.Opcode = TGSI_OPCODE_MOV;
+      newInst.Instruction.NumDstRegs = 1;
+      newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT;
+      newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput;
+      newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XYZ;
+      newInst.Instruction.NumSrcRegs = 1;
+      newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+      newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp;
+      ctx->emit_instruction(ctx, &newInst);
+
+      /* MUL alpha */
+      newInst = tgsi_default_full_instruction();
+      newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
+      newInst.Instruction.NumDstRegs = 1;
+      newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT;
+      newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput;
+      newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W;
+      newInst.Instruction.NumSrcRegs = 2;
+      newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+      newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp;
+      newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
+      newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->texTemp;
+      ctx->emit_instruction(ctx, &newInst);
+
+      /* END */
+      newInst = tgsi_default_full_instruction();
+      newInst.Instruction.Opcode = TGSI_OPCODE_END;
+      newInst.Instruction.NumDstRegs = 0;
+      newInst.Instruction.NumSrcRegs = 0;
+      ctx->emit_instruction(ctx, &newInst);
+   }
+   else {
+      /* Not an END instruction.
+       * Look for writes to result.color and replace with colorTemp reg.
+       */
+      uint i;
+
+      for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
+         struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
+         if (dst->DstRegister.File == TGSI_FILE_OUTPUT &&
+             dst->DstRegister.Index == aactx->colorOutput) {
+            dst->DstRegister.File = TGSI_FILE_TEMPORARY;
+            dst->DstRegister.Index = aactx->colorTemp;
+         }
+      }
+
+      ctx->emit_instruction(ctx, inst);
+   }
+}
+
+
+/**
+ * Generate the frag shader we'll use for drawing AA lines.
+ * This will be the user's shader plus some texture/modulate instructions.
+ */
+static void
+generate_aaline_fs(struct aaline_stage *aaline)
+{
+   const struct pipe_shader_state *orig_fs = &aaline->fs->state;
+   struct draw_context *draw = aaline->stage.draw;
+   struct pipe_shader_state aaline_fs;
+   struct aa_transform_context transform;
+
+#define MAX 1000
+
+   aaline_fs = *orig_fs; /* copy to init */
+   aaline_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX);
+
+   memset(&transform, 0, sizeof(transform));
+   transform.colorOutput = -1;
+   transform.maxSampler = -1;
+   transform.maxInput = -1;
+   transform.maxGeneric = -1;
+   transform.colorTemp = -1;
+   transform.texTemp = -1;
+   transform.firstInstruction = TRUE;
+   transform.base.transform_instruction = aa_transform_inst;
+   transform.base.transform_declaration = aa_transform_decl;
+
+   tgsi_transform_shader(orig_fs->tokens,
+                         (struct tgsi_token *) aaline_fs.tokens,
+                         MAX, &transform.base);
+
+#if 0 /* DEBUG */
+   tgsi_dump(orig_fs->tokens, 0);
+   tgsi_dump(aaline_fs.tokens, 0);
+#endif
+
+   aaline_fs.input_semantic_name[aaline_fs.num_inputs] = TGSI_SEMANTIC_GENERIC;
+   aaline_fs.input_semantic_index[aaline_fs.num_inputs] = transform.maxGeneric + 1;
+   aaline_fs.num_inputs++;
+
+   aaline->fs->aaline_fs
+      = aaline->driver_create_fs_state(aaline->pipe, &aaline_fs);
+
+   /* advertise the extra post-transform vertex attributes which will have
+    * the texcoords.
+    */
+   draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC;
+   draw->extra_vp_outputs.semantic_index = transform.maxGeneric + 1;
+}
+
+
+/**
+ * Create the texture map we'll use for antialiasing the lines.
+ */
+static void
+aaline_create_texture(struct aaline_stage *aaline)
+{
+   struct pipe_context *pipe = aaline->pipe;
+   struct pipe_texture texTemp;
+   uint level;
+
+   memset(&texTemp, 0, sizeof(texTemp));
+   texTemp.target = PIPE_TEXTURE_2D;
+   texTemp.format = PIPE_FORMAT_U_A8; /* XXX verify supported by driver! */
+   texTemp.last_level = MAX_TEXTURE_LEVEL;
+   texTemp.width[0] = 1 << MAX_TEXTURE_LEVEL;
+   texTemp.height[0] = 1 << MAX_TEXTURE_LEVEL;
+   texTemp.depth[0] = 1;
+   texTemp.cpp = 1;
+
+   aaline->texture = pipe->texture_create(pipe, &texTemp);
+
+   /* Fill in mipmap images.
+    * Basically each level is solid opaque, except for the outermost
+    * texels which are zero.  Special case the 1x1 and 2x2 levels.
+    */
+   for (level = 0; level <= MAX_TEXTURE_LEVEL; level++) {
+      struct pipe_surface *surface;
+      const uint size = aaline->texture->width[level];
+      ubyte *data;
+      uint i, j;
+
+      assert(aaline->texture->width[level] == aaline->texture->height[level]);
+
+      surface = pipe->get_tex_surface(pipe, aaline->texture, 0, level, 0);
+      data = pipe_surface_map(surface);
+
+      for (i = 0; i < size; i++) {
+         for (j = 0; j < size; j++) {
+            uint d;
+            if (size == 1) {
+               d = 255;
+            }
+            else if (size == 2) {
+               d = 200; /* tuneable */
+            }
+            else if (i == 0 || j == 0 || i == size - 1 || j == size - 1) {
+               d = 0;
+            }
+            else {
+               d = 255;
+            }
+            data[i * surface->pitch + j] = d;
+         }
+      }
+
+      /* unmap */
+      pipe_surface_unmap(surface);
+      pipe_surface_reference(&surface, NULL);
+   }
+}
+
+
+/**
+ * Create the sampler CSO that'll be used for antialiasing.
+ * By using a mipmapped texture, we don't have to generate a different
+ * texture image for each line size.
+ */
+static void
+aaline_create_sampler(struct aaline_stage *aaline)
+{
+   struct pipe_sampler_state sampler;
+   struct pipe_context *pipe = aaline->pipe;
+
+   memset(&sampler, 0, sizeof(sampler));
+   sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+   sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+   sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+   sampler.min_mip_filter = PIPE_TEX_MIPFILTER_LINEAR;
+   sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR;
+   sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
+   sampler.normalized_coords = 1;
+   sampler.min_lod = 0.0f;
+   sampler.max_lod = MAX_TEXTURE_LEVEL;
+
+   aaline->sampler_cso = pipe->create_sampler_state(pipe, &sampler);
+}
+
+
+/**
+ * When we're about to draw our first AA line in a batch, this function is
+ * called to tell the driver to bind our modified fragment shader.
+ */
+static void
+bind_aaline_fragment_shader(struct aaline_stage *aaline)
+{
+   if (!aaline->fs->aaline_fs) {
+      generate_aaline_fs(aaline);
+   }
+   aaline->driver_bind_fs_state(aaline->pipe, aaline->fs->aaline_fs);
+}
+
+
+
+static INLINE struct aaline_stage *
+aaline_stage( struct draw_stage *stage )
+{
+   return (struct aaline_stage *) stage;
+}
+
+
+static void
+passthrough_point(struct draw_stage *stage, struct prim_header *header)
+{
+   stage->next->point(stage->next, header);
+}
+
+
+static void
+passthrough_tri(struct draw_stage *stage, struct prim_header *header)
+{
+   stage->next->tri(stage->next, header);
+}
+
+
+/**
+ * Draw a wide line by drawing a quad, using geometry which will
+ * fullfill GL's antialiased line requirements.
+ */
+static void
+aaline_line(struct draw_stage *stage, struct prim_header *header)
+{
+   const struct aaline_stage *aaline = aaline_stage(stage);
+   const float half_width = aaline->half_line_width;
+   struct prim_header tri;
+   struct vertex_header *v[8];
+   uint texPos = aaline->tex_slot;
+   float *pos, *tex;
+   float dx = header->v[1]->data[0][0] - header->v[0]->data[0][0];
+   float dy = header->v[1]->data[0][1] - header->v[0]->data[0][1];
+   float a = atan2(dy, dx);
+   float c_a = cos(a), s_a = sin(a);
+   uint i;
+
+   /* XXX the ends of lines aren't quite perfect yet, but probably passable */
+   dx = 0.5 * half_width;
+   dy = half_width;
+
+   /* allocate/dup new verts */
+   for (i = 0; i < 8; i++) {
+      v[i] = dup_vert(stage, header->v[i/4], i);
+   }
+
+   /*
+    * Quad strip for line from v0 to v1 (*=endpoints):
+    *
+    *  1   3                     5   7
+    *  +---+---------------------+---+
+    *  |                             |
+    *  | *v0                     v1* |
+    *  |                             |
+    *  +---+---------------------+---+
+    *  0   2                     4   6
+    */
+
+   /* new verts */
+   pos = v[0]->data[0];
+   pos[0] += (-dx * c_a -  dy * s_a);
+   pos[1] += (-dx * s_a +  dy * c_a);
+
+   pos = v[1]->data[0];
+   pos[0] += (-dx * c_a - -dy * s_a);
+   pos[1] += (-dx * s_a + -dy * c_a);
+
+   pos = v[2]->data[0];
+   pos[0] += ( dx * c_a -  dy * s_a);
+   pos[1] += ( dx * s_a +  dy * c_a);
+
+   pos = v[3]->data[0];
+   pos[0] += ( dx * c_a - -dy * s_a);
+   pos[1] += ( dx * s_a + -dy * c_a);
+
+   pos = v[4]->data[0];
+   pos[0] += (-dx * c_a -  dy * s_a);
+   pos[1] += (-dx * s_a +  dy * c_a);
+
+   pos = v[5]->data[0];
+   pos[0] += (-dx * c_a - -dy * s_a);
+   pos[1] += (-dx * s_a + -dy * c_a);
+
+   pos = v[6]->data[0];
+   pos[0] += ( dx * c_a -  dy * s_a);
+   pos[1] += ( dx * s_a +  dy * c_a);
+
+   pos = v[7]->data[0];
+   pos[0] += ( dx * c_a - -dy * s_a);
+   pos[1] += ( dx * s_a + -dy * c_a);
+
+   /* new texcoords */
+   tex = v[0]->data[texPos];
+   ASSIGN_4V(tex, 0, 0, 0, 1);
+
+   tex = v[1]->data[texPos];
+   ASSIGN_4V(tex, 0, 1, 0, 1);
+
+   tex = v[2]->data[texPos];
+   ASSIGN_4V(tex, .5, 0, 0, 1);
+
+   tex = v[3]->data[texPos];
+   ASSIGN_4V(tex, .5, 1, 0, 1);
+
+   tex = v[4]->data[texPos];
+   ASSIGN_4V(tex, .5, 0, 0, 1);
+
+   tex = v[5]->data[texPos];
+   ASSIGN_4V(tex, .5, 1, 0, 1);
+
+   tex = v[6]->data[texPos];
+   ASSIGN_4V(tex, 1, 0, 0, 1);
+
+   tex = v[7]->data[texPos];
+   ASSIGN_4V(tex, 1, 1, 0, 1);
+
+   /* emit 6 tris for the quad strip */
+   tri.v[0] = v[2];  tri.v[1] = v[1];  tri.v[2] = v[0];
+   stage->next->tri( stage->next, &tri );
+
+   tri.v[0] = v[3];  tri.v[1] = v[1];  tri.v[2] = v[2];
+   stage->next->tri( stage->next, &tri );
+
+   tri.v[0] = v[4];  tri.v[1] = v[3];  tri.v[2] = v[2];
+   stage->next->tri( stage->next, &tri );
+
+   tri.v[0] = v[5];  tri.v[1] = v[3];  tri.v[2] = v[4];
+   stage->next->tri( stage->next, &tri );
+
+   tri.v[0] = v[6];  tri.v[1] = v[5];  tri.v[2] = v[4];
+   stage->next->tri( stage->next, &tri );
+
+   tri.v[0] = v[7];  tri.v[1] = v[5];  tri.v[2] = v[6];
+   stage->next->tri( stage->next, &tri );
+}
+
+
+static void
+aaline_first_line(struct draw_stage *stage, struct prim_header *header)
+{
+   auto struct aaline_stage *aaline = aaline_stage(stage);
+   struct draw_context *draw = stage->draw;
+   struct pipe_context *pipe = aaline->pipe;
+
+   assert(draw->rasterizer->line_smooth);
+
+   if (draw->rasterizer->line_width <= 3.0)
+      aaline->half_line_width = 1.5f;
+   else
+      aaline->half_line_width = 0.5f * draw->rasterizer->line_width;
+
+   aaline->tex_slot = draw->num_vs_outputs;
+   assert(aaline->tex_slot > 0); /* output[0] is vertex pos */
+   draw->extra_vp_outputs.slot = aaline->tex_slot;
+
+   /*
+    * Bind our fragprog, sampler and texture
+    */
+   bind_aaline_fragment_shader(aaline);
+
+   aaline->driver_bind_sampler_state(pipe, aaline->sampler_unit, aaline->sampler_cso);
+   aaline->driver_set_sampler_texture(pipe, aaline->sampler_unit, aaline->texture);
+
+   /* now really draw first line */
+   stage->line = aaline_line;
+   stage->line(stage, header);
+}
+
+
+static void
+aaline_flush(struct draw_stage *stage, unsigned flags)
+{
+   struct draw_context *draw = stage->draw;
+   struct aaline_stage *aaline = aaline_stage(stage);
+   struct pipe_context *pipe = aaline->pipe;
+
+   stage->line = aaline_first_line;
+   stage->next->flush( stage->next, flags );
+
+   /* restore original frag shader */
+   aaline->driver_bind_fs_state(pipe, aaline->fs->driver_fs);
+
+   /* XXX restore original texture, sampler state */
+   aaline->driver_bind_sampler_state(pipe, aaline->sampler_unit,
+                                 aaline->state.sampler[aaline->sampler_unit]);
+   aaline->driver_set_sampler_texture(pipe, aaline->sampler_unit,
+                                 aaline->state.texture[aaline->sampler_unit]);
+
+   draw->extra_vp_outputs.slot = 0;
+}
+
+
+static void
+aaline_reset_stipple_counter(struct draw_stage *stage)
+{
+   stage->next->reset_stipple_counter( stage->next );
+}
+
+
+static void
+aaline_destroy(struct draw_stage *stage)
+{
+   draw_free_temp_verts( stage );
+   FREE( stage );
+}
+
+
+static struct aaline_stage *
+draw_aaline_stage(struct draw_context *draw)
+{
+   struct aaline_stage *aaline = CALLOC_STRUCT(aaline_stage);
+
+   draw_alloc_temp_verts( &aaline->stage, 8 );
+
+   aaline->stage.draw = draw;
+   aaline->stage.next = NULL;
+   aaline->stage.point = passthrough_point;
+   aaline->stage.line = aaline_first_line;
+   aaline->stage.tri = passthrough_tri;
+   aaline->stage.flush = aaline_flush;
+   aaline->stage.reset_stipple_counter = aaline_reset_stipple_counter;
+   aaline->stage.destroy = aaline_destroy;
+
+   return aaline;
+}
+
+
+/*
+ * XXX temporary? solution to mapping a pipe_context to a aaline_stage.
+ */
+
+#define MAX_CONTEXTS 10
+
+static struct pipe_context *Pipe[MAX_CONTEXTS];
+static struct aaline_stage *Stage[MAX_CONTEXTS];
+static uint NumContexts;
+
+static void
+add_aa_pipe_context(struct pipe_context *pipe, struct aaline_stage *aa)
+{
+   assert(NumContexts < MAX_CONTEXTS);
+   Pipe[NumContexts] = pipe;
+   Stage[NumContexts] = aa;
+   NumContexts++;
+}
+
+static struct aaline_stage *
+aaline_stage_from_pipe(struct pipe_context *pipe)
+{
+   uint i;
+   for (i = 0; i < NumContexts; i++) {
+      if (Pipe[i] == pipe)
+         return Stage[i];
+   }
+   assert(0);
+   return NULL;
+}
+
+
+/**
+ * This function overrides the driver's create_fs_state() function and
+ * will typically be called by the state tracker.
+ */
+static void *
+aaline_create_fs_state(struct pipe_context *pipe,
+                       const struct pipe_shader_state *fs)
+{
+   struct aaline_stage *aaline = aaline_stage_from_pipe(pipe);
+   struct aaline_fragment_shader *aafs = CALLOC_STRUCT(aaline_fragment_shader);
+
+   if (aafs) {
+      aafs->state = *fs;
+
+      /* pass-through */
+      aafs->driver_fs = aaline->driver_create_fs_state(aaline->pipe, fs);
+   }
+
+   return aafs;
+}
+
+
+static void
+aaline_bind_fs_state(struct pipe_context *pipe, void *fs)
+{
+   struct aaline_stage *aaline = aaline_stage_from_pipe(pipe);
+   struct aaline_fragment_shader *aafs = (struct aaline_fragment_shader *) fs;
+   /* save current */
+   aaline->fs = aafs;
+   /* pass-through */
+   aaline->driver_bind_fs_state(aaline->pipe, aafs->driver_fs);
+}
+
+
+static void
+aaline_delete_fs_state(struct pipe_context *pipe, void *fs)
+{
+   struct aaline_stage *aaline = aaline_stage_from_pipe(pipe);
+   struct aaline_fragment_shader *aafs = (struct aaline_fragment_shader *) fs;
+   /* pass-through */
+   aaline->driver_delete_fs_state(aaline->pipe, aafs->driver_fs);
+   FREE(aafs);
+}
+
+
+static void
+aaline_bind_sampler_state(struct pipe_context *pipe,
+                          unsigned unit, void *sampler)
+{
+   struct aaline_stage *aaline = aaline_stage_from_pipe(pipe);
+   /* save current */
+   aaline->state.sampler[unit] = sampler;
+   /* pass-through */
+   aaline->driver_bind_sampler_state(aaline->pipe, unit, sampler);
+}
+
+
+static void
+aaline_set_sampler_texture(struct pipe_context *pipe,
+                           unsigned sampler, struct pipe_texture *texture)
+{
+   struct aaline_stage *aaline = aaline_stage_from_pipe(pipe);
+   /* save current */
+   aaline->state.texture[sampler] = texture;
+   /* pass-through */
+   aaline->driver_set_sampler_texture(aaline->pipe, sampler, texture);
+}
+
+
+/**
+ * Called by drivers that want to install this AA line prim stage
+ * into the draw module's pipeline.  This will not be used if the
+ * hardware has native support for AA lines.
+ */
+void
+draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe)
+{
+   struct aaline_stage *aaline;
+
+   /*
+    * Create / install AA line drawing / prim stage
+    */
+   aaline = draw_aaline_stage( draw );
+   assert(aaline);
+   draw->pipeline.aaline = &aaline->stage;
+
+   aaline->pipe = pipe;
+
+   /* create special texture, sampler state */
+   aaline_create_texture(aaline);
+   aaline_create_sampler(aaline);
+
+   /* save original driver functions */
+   aaline->driver_create_fs_state = pipe->create_fs_state;
+   aaline->driver_bind_fs_state = pipe->bind_fs_state;
+   aaline->driver_delete_fs_state = pipe->delete_fs_state;
+
+   aaline->driver_bind_sampler_state = pipe->bind_sampler_state;
+   aaline->driver_set_sampler_texture = pipe->set_sampler_texture;
+
+   /* override the driver's functions */
+   pipe->create_fs_state = aaline_create_fs_state;
+   pipe->bind_fs_state = aaline_bind_fs_state;
+   pipe->delete_fs_state = aaline_delete_fs_state;
+
+   pipe->bind_sampler_state = aaline_bind_sampler_state;
+   pipe->set_sampler_texture = aaline_set_sampler_texture;
+
+   add_aa_pipe_context(pipe, aaline);
+}
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
index 4be38303169..a7f3b4aecb2 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -103,6 +103,8 @@ void draw_destroy( struct draw_context *draw )
    draw->pipeline.flatshade->destroy( draw->pipeline.flatshade );
    draw->pipeline.cull->destroy( draw->pipeline.cull );
    draw->pipeline.validate->destroy( draw->pipeline.validate );
+   if (draw->pipeline.aaline)
+      draw->pipeline.aaline->destroy( draw->pipeline.aaline );
    if (draw->pipeline.rasterize)
       draw->pipeline.rasterize->destroy( draw->pipeline.rasterize );
    tgsi_exec_machine_free_data(&draw->machine);
@@ -239,6 +241,26 @@ draw_convert_wide_lines(struct draw_context *draw, boolean enable)
 }
 
 
+/**
+ * The draw module may sometimes generate vertices with extra attributes
+ * (such as texcoords for AA lines).  The driver can call this function
+ * to find those attributes.
+ */
+int
+draw_find_vs_output(struct draw_context *draw,
+                    uint semantic_name, uint semantic_index)
+{
+   /* XXX there may be more than one extra vertex attrib.
+    * For example, simulated gl_FragCoord and gl_PointCoord.
+    */
+   if (draw->extra_vp_outputs.semantic_name == semantic_name &&
+       draw->extra_vp_outputs.semantic_index == semantic_index) {
+      return draw->extra_vp_outputs.slot;
+   }
+   return 0;
+}
+
+
 /**
  * Allocate space for temporary post-transform vertices, such as for clipping.
  */
diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h
index ddeb184497a..82035aa8ada 100644
--- a/src/gallium/auxiliary/draw/draw_context.h
+++ b/src/gallium/auxiliary/draw/draw_context.h
@@ -41,6 +41,7 @@
 #include "pipe/p_state.h"
 
 
+struct pipe_context;
 struct vertex_buffer;
 struct vertex_info;
 struct draw_context;
@@ -93,6 +94,20 @@ void draw_convert_wide_points(struct draw_context *draw, boolean enable);
 
 void draw_convert_wide_lines(struct draw_context *draw, boolean enable);
 
+boolean draw_use_sse(struct draw_context *draw);
+
+void
+draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe);
+
+
+int
+draw_find_vs_output(struct draw_context *draw,
+                    uint semantic_name, uint semantic_index);
+
+
+/*
+ * Vertex shader functions
+ */
 
 struct draw_vertex_shader *
 draw_create_vertex_shader(struct draw_context *draw,
@@ -102,7 +117,11 @@ void draw_bind_vertex_shader(struct draw_context *draw,
 void draw_delete_vertex_shader(struct draw_context *draw,
                                struct draw_vertex_shader *dvs);
 
-boolean draw_use_sse(struct draw_context *draw);
+
+
+/*
+ * Vertex data functions
+ */
 
 void draw_set_vertex_buffer(struct draw_context *draw,
 			    unsigned attr,
diff --git a/src/gallium/auxiliary/draw/draw_prim.c b/src/gallium/auxiliary/draw/draw_prim.c
index 51e2242719a..dd9a8488637 100644
--- a/src/gallium/auxiliary/draw/draw_prim.c
+++ b/src/gallium/auxiliary/draw/draw_prim.c
@@ -121,11 +121,15 @@ static void draw_prim_queue_flush( struct draw_context *draw )
 
 void draw_do_flush( struct draw_context *draw, unsigned flags )
 {
+   static boolean flushing = FALSE;
+
    if (0)
       debug_printf("Flushing with %d verts, %d prims\n",
                    draw->vs.queue_nr,
                    draw->pq.queue_nr );
 
+   if (!flushing) {
+      flushing = TRUE;
 
    if (flags >= DRAW_FLUSH_SHADER_QUEUE) {
       if (draw->vs.queue_nr)
@@ -146,6 +150,9 @@ void draw_do_flush( struct draw_context *draw, unsigned flags )
 	 }
       }    
    }
+
+      flushing = FALSE;
+   }
 }
 
 
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index bc11259cb21..dd75f9aefc3 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -48,6 +48,7 @@
 #include "tgsi/exec/tgsi_exec.h"
 
 
+struct pipe_context;
 struct gallivm_prog;
 struct gallivm_cpu_engine;
 
@@ -179,6 +180,7 @@ struct draw_context
       struct draw_stage *offset;
       struct draw_stage *unfilled;
       struct draw_stage *stipple;
+      struct draw_stage *aaline;
       struct draw_stage *wide;
       struct draw_stage *rasterize;
    } pipeline;
@@ -212,9 +214,17 @@ struct draw_context
    unsigned nr_planes;
 
    boolean convert_wide_points; /**< convert wide points to tris? */
-   boolean convert_wide_lines;  /**< convert side lines to tris? */
+   boolean convert_wide_lines;  /**< convert wide lines to tris? */
    boolean use_sse;
 
+   /* If a prim stage introduces new vertex attributes, they'll be stored here
+    */
+   struct {
+      uint semantic_name;
+      uint semantic_index;
+      int slot;
+   } extra_vp_outputs;
+
    unsigned reduced_prim;
 
    /** TGSI program interpreter runtime state */
diff --git a/src/gallium/auxiliary/draw/draw_validate.c b/src/gallium/auxiliary/draw/draw_validate.c
index 4375ebabbc4..97e40e372b8 100644
--- a/src/gallium/auxiliary/draw/draw_validate.c
+++ b/src/gallium/auxiliary/draw/draw_validate.c
@@ -58,7 +58,13 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage )
     * shorter pipelines for lines & points.
     */
 
-   if ((draw->rasterizer->line_width != 1.0 && draw->convert_wide_lines) ||
+   if (draw->rasterizer->line_smooth && draw->pipeline.aaline) {
+      draw->pipeline.aaline->next = next;
+      next = draw->pipeline.aaline;
+   }
+
+   if ((draw->rasterizer->line_width != 1.0 && draw->convert_wide_lines
+        && !draw->rasterizer->line_smooth) ||
        (draw->rasterizer->point_size != 1.0 && draw->convert_wide_points) ||
        draw->rasterizer->point_sprite) {
       draw->pipeline.wide->next = next;
diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile
index c10ab396d8d..8bb62b2a0a5 100644
--- a/src/gallium/auxiliary/tgsi/Makefile
+++ b/src/gallium/auxiliary/tgsi/Makefile
@@ -10,6 +10,7 @@ DRIVER_SOURCES = \
 	util/tgsi_build.c \
 	util/tgsi_dump.c \
 	util/tgsi_parse.c \
+	util/tgsi_transform.c \
 	util/tgsi_util.c
 
 C_SOURCES = \
diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c
index 5e98f190bbf..254c6adca44 100644
--- a/src/gallium/drivers/softpipe/sp_context.c
+++ b/src/gallium/drivers/softpipe/sp_context.c
@@ -327,6 +327,9 @@ struct pipe_context *softpipe_create( struct pipe_winsys *pipe_winsys,
       draw_set_rasterize_stage(softpipe->draw, softpipe->setup);
    }
 
+   /* enable aaline stage */
+   draw_install_aaline_stage(softpipe->draw, &softpipe->pipe);
+
    sp_init_surface_functions(softpipe);
 
    return &softpipe->pipe;
diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c
index 9d8fd8b750f..f9f2c5eaa8f 100644
--- a/src/gallium/drivers/softpipe/sp_state_derived.c
+++ b/src/gallium/drivers/softpipe/sp_state_derived.c
@@ -44,7 +44,8 @@
  * condition that users shouldn't hit anyway.
  */
 static int
-find_vs_output(const struct pipe_shader_state *vs,
+find_vs_output(struct softpipe_context *sp,
+               const struct pipe_shader_state *vs,
                uint semantic_name,
                uint semantic_index)
 {
@@ -54,7 +55,9 @@ find_vs_output(const struct pipe_shader_state *vs,
           vs->output_semantic_index[i] == semantic_index)
          return i;
    }
-   return 0;
+
+   /* See if the draw module is introducing a new attribute... */
+   return draw_find_vs_output(sp->draw, semantic_name, semantic_index);
 }
 
 
@@ -111,24 +114,24 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe)
          int src;
          switch (fs->input_semantic_name[i]) {
          case TGSI_SEMANTIC_POSITION:
-            src = find_vs_output(vs, TGSI_SEMANTIC_POSITION, 0);
+            src = find_vs_output(softpipe, vs, TGSI_SEMANTIC_POSITION, 0);
             draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src);
             break;
 
          case TGSI_SEMANTIC_COLOR:
-            src = find_vs_output(vs, TGSI_SEMANTIC_COLOR, 
+            src = find_vs_output(softpipe, vs, TGSI_SEMANTIC_COLOR, 
                                  fs->input_semantic_index[i]);
             draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src);
             break;
 
          case TGSI_SEMANTIC_FOG:
-            src = find_vs_output(vs, TGSI_SEMANTIC_FOG, 0);
+            src = find_vs_output(softpipe, vs, TGSI_SEMANTIC_FOG, 0);
             draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
             break;
 
          case TGSI_SEMANTIC_GENERIC:
             /* this includes texcoords and varying vars */
-            src = find_vs_output(vs, TGSI_SEMANTIC_GENERIC,
+            src = find_vs_output(softpipe, vs, TGSI_SEMANTIC_GENERIC,
                                  fs->input_semantic_index[i]);
             draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
             break;
@@ -138,7 +141,7 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe)
          }
       }
 
-      softpipe->psize_slot = find_vs_output(vs, TGSI_SEMANTIC_PSIZE, 0);
+      softpipe->psize_slot = find_vs_output(softpipe, vs, TGSI_SEMANTIC_PSIZE, 0);
       if (softpipe->psize_slot > 0) {
          draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT,
                                softpipe->psize_slot);
-- 
cgit v1.2.3


From f430d95a36d55141cd9ef911aab70364ce4a4108 Mon Sep 17 00:00:00 2001
From: José Fonseca <jrfonseca@tungstengraphics.com>
Date: Tue, 19 Feb 2008 12:52:28 +0900
Subject: Use gallium's rtasm module.

---
 src/gallium/auxiliary/draw/draw_private.h   |  2 +-
 src/gallium/auxiliary/draw/draw_vf.c        | 10 +++-------
 src/gallium/auxiliary/draw/draw_vf_sse.c    |  2 +-
 src/gallium/auxiliary/draw/draw_vs_sse.c    |  2 +-
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c |  2 +-
 src/gallium/drivers/softpipe/sp_fs_sse.c    |  2 +-
 src/mesa/state_tracker/st_program.h         |  1 -
 7 files changed, 8 insertions(+), 13 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index dd75f9aefc3..6c7e860861e 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -44,7 +44,7 @@
 #include "pipe/p_state.h"
 #include "pipe/p_defines.h"
 
-#include "x86/rtasm/x86sse.h"
+#include "rtasm/rtasm_x86sse.h"
 #include "tgsi/exec/tgsi_exec.h"
 
 
diff --git a/src/gallium/auxiliary/draw/draw_vf.c b/src/gallium/auxiliary/draw/draw_vf.c
index dc3a5ecd219..901ff20a7e7 100644
--- a/src/gallium/auxiliary/draw/draw_vf.c
+++ b/src/gallium/auxiliary/draw/draw_vf.c
@@ -30,6 +30,7 @@
 
 #include "pipe/p_compiler.h"
 #include "pipe/p_util.h"
+#include "rtasm/rtasm_execmem.h"
 
 #include "draw_vf.h"
 
@@ -37,11 +38,6 @@
 #define DRAW_VF_DBG 0
 
 
-/* TODO: remove this */
-extern void 
-_mesa_exec_free( void *addr );
-
-
 static boolean match_fastpath( struct draw_vertex_fetch *vf,
 				 const struct draw_vf_fastpath *fp)
 {
@@ -414,12 +410,12 @@ void draw_vf_destroy( struct draw_vertex_fetch *vf )
       FREE(fp->attr);
 
       /* KW: At the moment, fp->func is constrained to be allocated by
-       * _mesa_exec_alloc(), as the hardwired fastpaths in
+       * rtasm_exec_alloc(), as the hardwired fastpaths in
        * t_vertex_generic.c are handled specially.  It would be nice
        * to unify them, but this probably won't change until this
        * module gets another overhaul.
        */
-      //_mesa_exec_free((void *) fp->func);
+      //rtasm_exec_free((void *) fp->func);
       FREE(fp);
    }
    
diff --git a/src/gallium/auxiliary/draw/draw_vf_sse.c b/src/gallium/auxiliary/draw/draw_vf_sse.c
index 1ad2ae756dd..1e889deeea8 100644
--- a/src/gallium/auxiliary/draw/draw_vf_sse.c
+++ b/src/gallium/auxiliary/draw/draw_vf_sse.c
@@ -35,7 +35,7 @@
 
 #if defined(USE_SSE_ASM)
 
-#include "x86/rtasm/x86sse.h"
+#include "rtasm/rtasm_x86sse.h"
 #include "x86/common_x86_asm.h"
 
 
diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c
index 27bc66812c3..11ef0c503dc 100644
--- a/src/gallium/auxiliary/draw/draw_vs_sse.c
+++ b/src/gallium/auxiliary/draw/draw_vs_sse.c
@@ -41,7 +41,7 @@
 #include "draw_private.h"
 #include "draw_context.h"
 
-#include "x86/rtasm/x86sse.h"
+#include "rtasm/rtasm_x86sse.h"
 #include "tgsi/exec/tgsi_sse2.h"
 
 
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
index 62c6a69c63f..29a7f842ed8 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
@@ -32,7 +32,7 @@
 #include "tgsi_exec.h"
 #include "tgsi_sse2.h"
 
-#include "x86/rtasm/x86sse.h"
+#include "rtasm/rtasm_x86sse.h"
 
 #if defined(__i386__) || defined(__386__)
 
diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c
index d90066e0257..b18772f4e6d 100644
--- a/src/gallium/drivers/softpipe/sp_fs_sse.c
+++ b/src/gallium/drivers/softpipe/sp_fs_sse.c
@@ -42,7 +42,7 @@
 
 #if defined(__i386__) || defined(__386__)
 
-#include "x86/rtasm/x86sse.h"
+#include "rtasm/rtasm_x86sse.h"
 
 /* Surely this should be defined somewhere in a tgsi header:
  */
diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h
index ea1dde4a7a3..25cf3e94a8f 100644
--- a/src/mesa/state_tracker/st_program.h
+++ b/src/mesa/state_tracker/st_program.h
@@ -36,7 +36,6 @@
 
 #include "mtypes.h"
 #include "pipe/p_shader_tokens.h"
-#include "x86/rtasm/x86sse.h"
 
 
 #define ST_MAX_SHADER_TOKENS 1024
-- 
cgit v1.2.3


From 90b2beb661f630966788a6e909dc759c99e38973 Mon Sep 17 00:00:00 2001
From: José Fonseca <jrfonseca@tungstengraphics.com>
Date: Tue, 19 Feb 2008 13:27:13 +0900
Subject: Simplify makefile boilerplate code.

Don't define ASM_SOURCES variable globally -- reserve that variable to be defined
locally by makefiles, together with C_SOURCES and CPP_SOURCES.
---
 configs/beos                              |  4 ++--
 configs/default                           |  2 +-
 configs/freebsd-dri                       |  2 +-
 configs/freebsd-dri-amd64                 |  4 ++--
 configs/freebsd-dri-x86                   |  4 ++--
 configs/linux-directfb                    |  4 ++--
 configs/linux-dri                         |  2 +-
 configs/linux-dri-ppc                     |  2 +-
 configs/linux-dri-x86                     |  4 ++--
 configs/linux-dri-x86-64                  |  4 ++--
 configs/linux-dri-xcb                     |  2 +-
 configs/linux-icc                         |  4 ++--
 configs/linux-icc-static                  |  4 ++--
 configs/linux-indirect                    |  2 +-
 configs/linux-solo                        |  2 +-
 configs/linux-solo-x86                    |  4 ++--
 configs/linux-sparc                       |  4 ++--
 configs/linux-x86                         |  4 ++--
 configs/linux-x86-64                      |  4 ++--
 configs/linux-x86-glide                   |  4 ++--
 configs/sunos5-gcc                        |  4 ++--
 src/gallium/auxiliary/cso_cache/Makefile  |  7 +------
 src/gallium/auxiliary/draw/Makefile       |  7 +------
 src/gallium/auxiliary/pipebuffer/Makefile |  8 +-------
 src/gallium/auxiliary/rtasm/Makefile      |  8 +-------
 src/gallium/auxiliary/tgsi/Makefile       |  8 +-------
 src/gallium/auxiliary/util/Makefile       |  8 +-------
 src/gallium/drivers/failover/Makefile     |  9 +--------
 src/gallium/drivers/i915simple/Makefile   |  9 +--------
 src/gallium/drivers/i965simple/Makefile   | 25 +++++++------------------
 src/gallium/drivers/softpipe/Makefile     |  9 +--------
 src/gallium/winsys/dri/Makefile.template  |  5 +++--
 src/glx/x11/Makefile                      |  6 +++---
 src/mesa/sources                          |  6 +++---
 34 files changed, 60 insertions(+), 125 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/configs/beos b/configs/beos
index 2b74af739d0..c6e972789a5 100644
--- a/configs/beos
+++ b/configs/beos
@@ -26,8 +26,8 @@ ifeq ($(CPU), x86)
 		-DUSE_3DNOW_ASM \
 		-DUSE_SSE_ASM
 	
-	ASM_SOURCES = $(X86_SOURCES)
-	ASM_API = $(X86_API)
+	MESA_ASM_SOURCES = $(X86_SOURCES)
+	GLAPI_ASM_SOURCES = $(X86_API)
 
 	CC = gcc
 	CXX = g++
diff --git a/configs/default b/configs/default
index c9be5ec3e33..48ddd29282b 100644
--- a/configs/default
+++ b/configs/default
@@ -51,7 +51,7 @@ OSMESA_LIB_NAME = lib$(OSMESA_LIB).so
 
 
 # Optional assembly language optimization files for libGL
-ASM_SOURCES = 
+MESA_ASM_SOURCES = 
 
 # GLw widget sources (Append "GLwMDrawA.c" here and add -lXm to GLW_LIB_DEPS in
 # order to build the Motif widget too)
diff --git a/configs/freebsd-dri b/configs/freebsd-dri
index 67d253b8695..6fc1abbc802 100644
--- a/configs/freebsd-dri
+++ b/configs/freebsd-dri
@@ -22,7 +22,7 @@ CFLAGS = $(WARN_FLAGS) $(OPT_FLAGS) $(PIC_FLAGS) -Wmissing-prototypes -std=c99 -
 
 CXXFLAGS = $(WARN_FLAGS) $(OPT_FLAGS) $(PIC_FLAGS) $(DEFINES) -Wall -ansi -pedantic $(ASM_FLAGS) $(X11_INCLUDES) 
 
-ASM_SOURCES = 
+MESA_ASM_SOURCES = 
 
 # Library/program dependencies
 LIBDRM_CFLAGS = `pkg-config --cflags libdrm`
diff --git a/configs/freebsd-dri-amd64 b/configs/freebsd-dri-amd64
index 39341b9701a..bb6c361398a 100644
--- a/configs/freebsd-dri-amd64
+++ b/configs/freebsd-dri-amd64
@@ -6,5 +6,5 @@ include $(TOP)/configs/freebsd-dri
 CONFIG_NAME = freebsd-dri-x86-64
 
 ASM_FLAGS = -DUSE_X86_64_ASM
-ASM_SOURCES = $(X86-64_SOURCES)
-ASM_API = $(X86-64_API)
+MESA_ASM_SOURCES = $(X86-64_SOURCES)
+GLAPI_ASM_SOURCES = $(X86-64_API)
diff --git a/configs/freebsd-dri-x86 b/configs/freebsd-dri-x86
index af0d27ff47d..9475437fc5b 100644
--- a/configs/freebsd-dri-x86
+++ b/configs/freebsd-dri-x86
@@ -9,5 +9,5 @@ CONFIG_NAME = freebsd-dri-x86
 PIC_FLAGS = 
 
 ASM_FLAGS = -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_SSE_ASM
-ASM_SOURCES = $(X86_SOURCES)
-ASM_API = $(X86_API)
+MESA_ASM_SOURCES = $(X86_SOURCES)
+GLAPI_ASM_SOURCES = $(X86_API)
diff --git a/configs/linux-directfb b/configs/linux-directfb
index dff27f78503..2ed94fe2750 100644
--- a/configs/linux-directfb
+++ b/configs/linux-directfb
@@ -17,8 +17,8 @@ HAVE_X86 = $(shell uname -m | grep 'i[3-6]86' >/dev/null && echo yes)
 ifeq ($(HAVE_X86), yes)
      CFLAGS   += -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_SSE_ASM
      CXXFLAGS += -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_SSE_ASM
-     ASM_SOURCES = $(X86_SOURCES)
-     ASM_API = $(X86_API)
+     MESA_ASM_SOURCES = $(X86_SOURCES)
+     GLAPI_ASM_SOURCES = $(X86_API)
 endif
 
 # Directories
diff --git a/configs/linux-dri b/configs/linux-dri
index c45b600013c..67e60cbd4c5 100644
--- a/configs/linux-dri
+++ b/configs/linux-dri
@@ -33,7 +33,7 @@ CFLAGS = -Wall -Wmissing-prototypes -std=c99 -ffast-math \
 CXXFLAGS = -Wall $(OPT_FLAGS) $(PIC_FLAGS) $(ARCH_FLAGS) $(DEFINES)
 
 
-ASM_SOURCES = 
+MESA_ASM_SOURCES = 
 
 # Library/program dependencies
 EXTRA_LIB_PATH=-L/usr/X11R6/lib
diff --git a/configs/linux-dri-ppc b/configs/linux-dri-ppc
index fb87688065f..a3a3ca83cb3 100644
--- a/configs/linux-dri-ppc
+++ b/configs/linux-dri-ppc
@@ -9,7 +9,7 @@ OPT_FLAGS = -Os -mcpu=603
 PIC_FLAGS = -fPIC
 
 ASM_FLAGS = -DUSE_PPC_ASM -DUSE_VMX_ASM
-ASM_SOURCES = $(PPC_SOURCES)
+MESA_ASM_SOURCES = $(PPC_SOURCES)
 
 # Build only the drivers for cards that exist on PowerPC.  At some point MGA
 # will be added, but not yet.
diff --git a/configs/linux-dri-x86 b/configs/linux-dri-x86
index b196004e58f..ec8242dd68f 100644
--- a/configs/linux-dri-x86
+++ b/configs/linux-dri-x86
@@ -12,6 +12,6 @@ PIC_FLAGS =
 ARCH_FLAGS = -m32
 
 ASM_FLAGS = -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_SSE_ASM
-ASM_SOURCES = $(X86_SOURCES)
-ASM_API = $(X86_API)
+MESA_ASM_SOURCES = $(X86_SOURCES)
+GLAPI_ASM_SOURCES = $(X86_API)
 
diff --git a/configs/linux-dri-x86-64 b/configs/linux-dri-x86-64
index 821ab3e3366..bb56de375a1 100644
--- a/configs/linux-dri-x86-64
+++ b/configs/linux-dri-x86-64
@@ -8,8 +8,8 @@ CONFIG_NAME = linux-dri-x86-64
 ARCH_FLAGS = -m64
 
 ASM_FLAGS = -DUSE_X86_64_ASM
-ASM_SOURCES = $(X86-64_SOURCES)
-ASM_API = $(X86-64_API)
+MESA_ASM_SOURCES = $(X86-64_SOURCES)
+GLAPI_ASM_SOURCES = $(X86-64_API)
 
 LIB_DIR = lib64
 
diff --git a/configs/linux-dri-xcb b/configs/linux-dri-xcb
index ea4bdf1864c..fbf9b9b2687 100644
--- a/configs/linux-dri-xcb
+++ b/configs/linux-dri-xcb
@@ -33,7 +33,7 @@ CFLAGS = -Wall -Wmissing-prototypes $(OPT_FLAGS) $(PIC_FLAGS) $(ARCH_FLAGS) \
 CXXFLAGS = -Wall $(OPT_FLAGS) $(PIC_FLAGS) $(ARCH_FLAGS) $(DEFINES)
 
 
-ASM_SOURCES = 
+MESA_ASM_SOURCES = 
 
 # Library/program dependencies
 EXTRA_LIB_PATH=$(shell pkg-config --libs-only-L x11)
diff --git a/configs/linux-icc b/configs/linux-icc
index 978a45af70b..d90a1dab3d4 100644
--- a/configs/linux-icc
+++ b/configs/linux-icc
@@ -16,7 +16,7 @@ GL_LIB_DEPS = -L/usr/X11R6/lib -lX11 -lXext -lm -lpthread
 GLUT_LIB_DEPS = -L$(TOP)/$(LIB_DIR) -l$(GLU_LIB) -l$(GL_LIB) -L/usr/X11R6/lib -lX11 -lXmu -lXt -lXi -lm
 APP_LIB_DEPS = -L$(TOP)/$(LIB_DIR) -l$(GLUT_LIB) -l$(GLU_LIB) -l$(GL_LIB) -lm
 
-ASM_SOURCES = $(X86_SOURCES)
-ASM_API = $(X86_API)
+MESA_ASM_SOURCES = $(X86_SOURCES)
+GLAPI_ASM_SOURCES = $(X86_API)
 
 
diff --git a/configs/linux-icc-static b/configs/linux-icc-static
index 0c957568c22..384db3bfe48 100644
--- a/configs/linux-icc-static
+++ b/configs/linux-icc-static
@@ -23,5 +23,5 @@ GL_LIB_DEPS =
 GLUT_LIB_DEPS =
 APP_LIB_DEPS = -L$(TOP)/$(LIB_DIR) -l$(GLUT_LIB) -l$(GLU_LIB) -l$(GL_LIB) -L/usr/X11R6/lib -lX11 -lXmu -lXt -lXi -lm -lpthread -lcxa -lunwind
 
-ASM_SOURCES = $(X86_SOURCES)
-ASM_API = $(X86_API)
+MESA_ASM_SOURCES = $(X86_SOURCES)
+GLAPI_ASM_SOURCES = $(X86_API)
diff --git a/configs/linux-indirect b/configs/linux-indirect
index bd33345ed70..0c4805ea87c 100644
--- a/configs/linux-indirect
+++ b/configs/linux-indirect
@@ -34,7 +34,7 @@ CFLAGS   = $(WARN_FLAGS) $(OPT_FLAGS) $(PIC_FLAGS) $(ARCH_FLAGS) $(DEFINES) \
 CXXFLAGS = $(WARN_FLAGS) $(OPT_FLAGS) $(PIC_FLAGS) $(ARCH_FLAGS) $(DEFINES)
 
 
-ASM_SOURCES = 
+MESA_ASM_SOURCES = 
 
 # Library/program dependencies
 EXTRA_LIB_PATH=-L/usr/X11R6/lib
diff --git a/configs/linux-solo b/configs/linux-solo
index d49b9722282..3145e127757 100644
--- a/configs/linux-solo
+++ b/configs/linux-solo
@@ -33,7 +33,7 @@ CFLAGS   = $(WARN_FLAGS) $(OPT_FLAGS) $(PIC_FLAGS) $(ARCH_FLAGS) $(DEFINES) \
 CXXFLAGS = $(WARN_FLAGS) $(OPT_FLAGS) $(PIC_FLAGS) $(ARCH_FLAGS) $(DEFINES)
 
 
-ASM_SOURCES = 
+MESA_ASM_SOURCES = 
 
 # Library/program dependencies
 DRI_LIB_DEPS = -lm -lpthread -lexpat -ldl -L$(TOP)/$(LIB_DIR) $(PCIACCESS_LIB)
diff --git a/configs/linux-solo-x86 b/configs/linux-solo-x86
index 13cab376587..5f5aa09c826 100644
--- a/configs/linux-solo-x86
+++ b/configs/linux-solo-x86
@@ -9,5 +9,5 @@ CONFIG_NAME = linux-solo-x86
 PIC_FLAGS = 
 
 ASM_FLAGS = -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_SSE_ASM
-ASM_SOURCES = $(X86_SOURCES)
-ASM_API = $(X86_API)
+MESA_ASM_SOURCES = $(X86_SOURCES)
+GLAPI_ASM_SOURCES = $(X86_API)
diff --git a/configs/linux-sparc b/configs/linux-sparc
index 9925afc19b6..346d438c283 100644
--- a/configs/linux-sparc
+++ b/configs/linux-sparc
@@ -5,5 +5,5 @@ include $(TOP)/configs/linux
 CONFIG_NAME = linux-sparc
 
 #ASM_FLAGS = -DUSE_SPARC_ASM
-#ASM_SOURCES = $(SPARC_SOURCES)
-#ASM_API = $(SPARC_API)
+#MESA_ASM_SOURCES = $(SPARC_SOURCES)
+#GLAPI_ASM_SOURCES = $(SPARC_API)
diff --git a/configs/linux-x86 b/configs/linux-x86
index 18fa06101de..a4cf4e8d624 100644
--- a/configs/linux-x86
+++ b/configs/linux-x86
@@ -5,5 +5,5 @@ include $(TOP)/configs/linux
 CONFIG_NAME = linux-x86
 
 ASM_FLAGS = -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_SSE_ASM
-ASM_SOURCES = $(X86_SOURCES)
-ASM_API = $(X86_API)
+MESA_ASM_SOURCES = $(X86_SOURCES)
+GLAPI_ASM_SOURCES = $(X86_API)
diff --git a/configs/linux-x86-64 b/configs/linux-x86-64
index 67c03918362..c2441e09d07 100644
--- a/configs/linux-x86-64
+++ b/configs/linux-x86-64
@@ -6,8 +6,8 @@ CONFIG_NAME = linux-x86-64
 
 ARCH_FLAGS = -m64
 
-ASM_SOURCES = $(X86-64_SOURCES)
-ASM_API = $(X86-64_API)
+MESA_ASM_SOURCES = $(X86-64_SOURCES)
+GLAPI_ASM_SOURCES = $(X86-64_API)
 ASM_FLAGS = -DUSE_X86_64_ASM
 
 LIB_DIR = lib64
diff --git a/configs/linux-x86-glide b/configs/linux-x86-glide
index f2f8aeea60e..b963fbdc66b 100644
--- a/configs/linux-x86-glide
+++ b/configs/linux-x86-glide
@@ -15,8 +15,8 @@ CXXFLAGS = -Wall -O3 -ansi -pedantic -fPIC -D_POSIX_SOURCE -D_POSIX_C_SOURCE=199
 GLUT_CFLAGS = -fexceptions
 
 
-ASM_SOURCES = $(X86_SOURCES)
-ASM_API = $(X86_API)
+MESA_ASM_SOURCES = $(X86_SOURCES)
+GLAPI_ASM_SOURCES = $(X86_API)
 
 # Library/program dependencies
 GL_LIB_DEPS = -L/usr/X11R6/lib -lX11 -lXext -L/usr/local/glide/lib -lglide3x -lm -lpthread
diff --git a/configs/sunos5-gcc b/configs/sunos5-gcc
index 77b293c5452..3fa13d0496f 100644
--- a/configs/sunos5-gcc
+++ b/configs/sunos5-gcc
@@ -16,8 +16,8 @@ ARCH_FLAGS ?=
 
 DEFINES = -D_REENTRANT -DUSE_XSHM
 
-ASM_SOURCES = $(SPARC_SOURCES)
-ASM_API = $(SPARC_API)
+MESA_ASM_SOURCES = $(SPARC_SOURCES)
+GLAPI_ASM_SOURCES = $(SPARC_API)
 ASM_FLAGS = -DUSE_SPARC_ASM
 
 CFLAGS   = $(WARN_FLAGS) $(OPT_FLAGS) $(PIC_FLAGS) $(ARCH_FLAGS) $(DEFINES) \
diff --git a/src/gallium/auxiliary/cso_cache/Makefile b/src/gallium/auxiliary/cso_cache/Makefile
index 8248b097fde..3e49266163b 100644
--- a/src/gallium/auxiliary/cso_cache/Makefile
+++ b/src/gallium/auxiliary/cso_cache/Makefile
@@ -3,15 +3,10 @@ include $(TOP)/configs/current
 
 LIBNAME = cso_cache
 
-DRIVER_SOURCES = \
+C_SOURCES = \
 	cso_cache.c \
 	cso_hash.c
 
-C_SOURCES = \
-	$(DRIVER_SOURCES)
-
-ASM_SOURCES = 
-
 include ../../Makefile.template
 
 symlinks:
diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile
index c8000cbe9cb..1ee9eca0ca5 100644
--- a/src/gallium/auxiliary/draw/Makefile
+++ b/src/gallium/auxiliary/draw/Makefile
@@ -3,7 +3,7 @@ include $(TOP)/configs/current
 
 LIBNAME = draw
 
-DRIVER_SOURCES = \
+C_SOURCES = \
 	draw_aaline.c \
 	draw_clip.c \
 	draw_vs_exec.c \
@@ -29,11 +29,6 @@ DRIVER_SOURCES = \
 	draw_vf_sse.c \
 	draw_wide_prims.c
 
-C_SOURCES = \
-	$(DRIVER_SOURCES)
-
-ASM_SOURCES = 
-
 include ../../Makefile.template
 
 symlinks:
diff --git a/src/gallium/auxiliary/pipebuffer/Makefile b/src/gallium/auxiliary/pipebuffer/Makefile
index 588629e8701..a9fa518c674 100644
--- a/src/gallium/auxiliary/pipebuffer/Makefile
+++ b/src/gallium/auxiliary/pipebuffer/Makefile
@@ -1,10 +1,9 @@
-
 TOP = ../../../..
 include $(TOP)/configs/current
 
 LIBNAME = pipebuffer
 
-DRIVER_SOURCES = \
+C_SOURCES = \
 	pb_buffer_fenced.c \
 	pb_buffer_malloc.c \
 	pb_bufmgr_fenced.c \
@@ -12,11 +11,6 @@ DRIVER_SOURCES = \
 	pb_bufmgr_pool.c \
 	pb_winsys.c
 
-C_SOURCES = \
-	$(DRIVER_SOURCES)
-
-ASM_SOURCES = 
-
 include ../../Makefile.template
 
 symlinks:
diff --git a/src/gallium/auxiliary/rtasm/Makefile b/src/gallium/auxiliary/rtasm/Makefile
index edfae2a204f..bc339d2aa6e 100644
--- a/src/gallium/auxiliary/rtasm/Makefile
+++ b/src/gallium/auxiliary/rtasm/Makefile
@@ -1,18 +1,12 @@
-
 TOP = ../../../..
 include $(TOP)/configs/current
 
 LIBNAME = rtasm
 
-DRIVER_SOURCES = \
+C_SOURCES = \
 	rtasm_execmem.c \
 	rtasm_x86sse.c
 
-C_SOURCES = \
-	$(DRIVER_SOURCES)
-
-ASM_SOURCES = 
-
 include ../../Makefile.template
 
 symlinks:
diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile
index 8bb62b2a0a5..71f64b747c2 100644
--- a/src/gallium/auxiliary/tgsi/Makefile
+++ b/src/gallium/auxiliary/tgsi/Makefile
@@ -1,10 +1,9 @@
-
 TOP = ../../../..
 include $(TOP)/configs/current
 
 LIBNAME = tgsi
 
-DRIVER_SOURCES = \
+C_SOURCES = \
 	exec/tgsi_exec.c \
 	exec/tgsi_sse2.c \
 	util/tgsi_build.c \
@@ -13,11 +12,6 @@ DRIVER_SOURCES = \
 	util/tgsi_transform.c \
 	util/tgsi_util.c
 
-C_SOURCES = \
-	$(DRIVER_SOURCES)
-
-ASM_SOURCES = 
-
 include ../../Makefile.template
 
 symlinks:
diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile
index 7cc2aa44f96..906a46d6b4e 100644
--- a/src/gallium/auxiliary/util/Makefile
+++ b/src/gallium/auxiliary/util/Makefile
@@ -1,20 +1,14 @@
-
 TOP = ../../../..
 include $(TOP)/configs/current
 
 LIBNAME = util
 
-DRIVER_SOURCES = \
+C_SOURCES = \
 	p_debug.c \
 	p_tile.c \
 	p_util.c \
 	u_mm.c
 
-C_SOURCES = \
-	$(DRIVER_SOURCES)
-
-ASM_SOURCES = 
-
 include ../../Makefile.template
 
 symlinks:
diff --git a/src/gallium/drivers/failover/Makefile b/src/gallium/drivers/failover/Makefile
index 14389bd0551..f08b8df07a1 100644
--- a/src/gallium/drivers/failover/Makefile
+++ b/src/gallium/drivers/failover/Makefile
@@ -1,20 +1,13 @@
-
 TOP = ../../../..
 include $(TOP)/configs/current
 
 LIBNAME = failover
 
-DRIVER_SOURCES = \
+C_SOURCES = \
 	fo_state.c \
 	fo_state_emit.c \
 	fo_context.c 
 
-C_SOURCES = \
-	$(COMMON_SOURCES) \
-	$(DRIVER_SOURCES)
-
-ASM_SOURCES = 
-
 include ../../Makefile.template
 
 symlinks:
diff --git a/src/gallium/drivers/i915simple/Makefile b/src/gallium/drivers/i915simple/Makefile
index ee22ba86f9b..2a75f5d57ce 100644
--- a/src/gallium/drivers/i915simple/Makefile
+++ b/src/gallium/drivers/i915simple/Makefile
@@ -1,10 +1,9 @@
-
 TOP = ../../../..
 include $(TOP)/configs/current
 
 LIBNAME = i915simple
 
-DRIVER_SOURCES = \
+C_SOURCES = \
 	i915_blit.c \
 	i915_clear.c \
 	i915_flush.c \
@@ -26,12 +25,6 @@ DRIVER_SOURCES = \
 	i915_fpc_translate.c \
 	i915_surface.c 
 
-C_SOURCES = \
-	$(COMMON_SOURCES) \
-	$(DRIVER_SOURCES)
-
-ASM_SOURCES = 
-
 include ../../Makefile.template
 
 symlinks:
diff --git a/src/gallium/drivers/i965simple/Makefile b/src/gallium/drivers/i965simple/Makefile
index 1dec1f97495..cc8580836ce 100644
--- a/src/gallium/drivers/i965simple/Makefile
+++ b/src/gallium/drivers/i965simple/Makefile
@@ -1,14 +1,13 @@
-
 TOP = ../../../..
 include $(TOP)/configs/current
 
 LIBNAME = i965simple
 
-DRIVER_SOURCES = \
-        brw_blit.c \
-        brw_flush.c \
-        brw_strings.c \
-        brw_surface.c \
+C_SOURCES = \
+	brw_blit.c \
+	brw_flush.c \
+	brw_strings.c \
+	brw_surface.c \
 	brw_cc.c \
 	brw_clip.c \
 	brw_clip_line.c \
@@ -31,8 +30,8 @@ DRIVER_SOURCES = \
 	brw_sf.c \
 	brw_sf_emit.c \
 	brw_sf_state.c \
-        brw_shader_info.c \
-        brw_state.c \
+	brw_shader_info.c \
+	brw_state.c \
 	brw_state_batch.c \
 	brw_state_cache.c \
 	brw_state_pool.c \
@@ -51,16 +50,6 @@ DRIVER_SOURCES = \
 	brw_wm_state.c \
 	brw_wm_surface_state.c
 
-C_SOURCES = \
-	$(COMMON_SOURCES) \
-	$(COMMON_BM_SOURCES) \
-	$(MINIGLX_SOURCES) \
-	$(DRIVER_SOURCES)
-
-ASM_SOURCES =
-
-DRIVER_DEFINES = -I.
-
 include ../../Makefile.template
 
 symlinks:
diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile
index 5479daf8ea2..539ffb77f55 100644
--- a/src/gallium/drivers/softpipe/Makefile
+++ b/src/gallium/drivers/softpipe/Makefile
@@ -1,10 +1,9 @@
-
 TOP = ../../../..
 include $(TOP)/configs/current
 
 LIBNAME = softpipe
 
-DRIVER_SOURCES = \
+C_SOURCES = \
 	sp_fs_exec.c \
 	sp_fs_sse.c \
 	sp_fs_llvm.c \
@@ -41,12 +40,6 @@ DRIVER_SOURCES = \
 	sp_tile_cache.c \
 	sp_surface.c 
 
-C_SOURCES = \
-	$(COMMON_SOURCES) \
-	$(DRIVER_SOURCES)
-
-ASM_SOURCES = 
-
 include ../../Makefile.template
 
 symlinks:
diff --git a/src/gallium/winsys/dri/Makefile.template b/src/gallium/winsys/dri/Makefile.template
index 65a93bd53e3..3bc1fdd4d40 100644
--- a/src/gallium/winsys/dri/Makefile.template
+++ b/src/gallium/winsys/dri/Makefile.template
@@ -25,8 +25,9 @@ WINOBJ=
 WINLIB=
 INCLUDES = $(SHARED_INCLUDES) $(EXPAT_INCLUDES)
 
-OBJECTS = $(C_SOURCES:.c=.o) \
-	  $(ASM_SOURCES:.S=.o) 
+OBJECTS = \
+	$(C_SOURCES:.c=.o) \
+	$(ASM_SOURCES:.S=.o) 
 
 else
 # miniglx
diff --git a/src/glx/x11/Makefile b/src/glx/x11/Makefile
index 5f74fcff060..b404727f08b 100644
--- a/src/glx/x11/Makefile
+++ b/src/glx/x11/Makefile
@@ -35,7 +35,7 @@ SOURCES = \
 
 include $(TOP)/src/mesa/sources
 
-MESA_ASM_API = $(addprefix $(TOP)/src/mesa/, $(ASM_API))
+MESA_GLAPI_ASM_SOURCES = $(addprefix $(TOP)/src/mesa/, $(GLAPI_ASM_SOURCES))
 MESA_GLAPI_SOURCES = $(addprefix $(TOP)/src/mesa/, $(GLAPI_SOURCES))
 MESA_GLAPI_OBJECTS = $(addprefix $(TOP)/src/mesa/, $(GLAPI_OBJECTS))
 
@@ -70,11 +70,11 @@ $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME):  $(OBJECTS) Makefile
 		-install $(TOP)/$(LIB_DIR) $(GL_LIB_DEPS) $(OBJECTS)
 
 
-depend: $(SOURCES) $(MESA_GLAPI_SOURCES) $(MESA_ASM_API) Makefile
+depend: $(SOURCES) $(MESA_GLAPI_SOURCES) $(MESA_GLAPI_ASM_SOURCES) Makefile
 	rm -f depend
 	touch depend
 	$(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $(SOURCES) \
-		$(MESA_GLAPI_SOURCES) $(MESA_ASM_API) 
+		$(MESA_GLAPI_SOURCES) $(MESA_GLAPI_ASM_SOURCES) 
 
 
 # Emacs tags
diff --git a/src/mesa/sources b/src/mesa/sources
index 0d185fd5f3f..9e56694893e 100644
--- a/src/mesa/sources
+++ b/src/mesa/sources
@@ -320,7 +320,7 @@ FBDEV_DRIVER_SOURCES =			\
 ALL_SOURCES = \
 	$(GLAPI_SOURCES)	\
 	$(SOLO_SOURCES)		\
-	$(ASM_SOURCES)		\
+	$(MESA_ASM_SOURCES)		\
 	$(COMMON_DRIVER_SOURCES)\
 	$(X11_DRIVER_SOURCES)	\
 	$(FBDEV_DRIVER_SOURCES) \
@@ -353,11 +353,11 @@ CORE_SOURCES = \
 
 SOLO_OBJECTS = \
 	$(SOLO_SOURCES:.c=.o) \
-	$(ASM_SOURCES:.S=.o)
+	$(MESA_ASM_SOURCES:.S=.o)
 
 GLAPI_OBJECTS = \
 	$(GLAPI_SOURCES:.c=.o) \
-	$(ASM_API:.S=.o)
+	$(GLAPI_ASM_SOURCES:.S=.o)
 
 CORE_OBJECTS = $(SOLO_OBJECTS) $(GLAPI_OBJECTS)
 
-- 
cgit v1.2.3


From 1eaf7b775ba0dacff8a3debd7c0f260970e5a61d Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keith@tungstengraphics.com>
Date: Fri, 15 Feb 2008 18:54:00 +0000
Subject: tgsi: print debug messages on failure to codegenerate

---
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
index 29a7f842ed8..779b901f2b8 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
@@ -2356,11 +2356,17 @@ tgsi_emit_sse2_fs(
          ok = emit_instruction(
             func,
             &parse.FullToken.FullInstruction );
+
+	 if (!ok) {
+	    debug_printf("failed to translate tgsi opcode %d\n", 
+			 parse.FullToken.FullInstruction.Instruction.Opcode );
+	 }
          break;
 
       case TGSI_TOKEN_TYPE_IMMEDIATE:
          /* XXX implement this */
 	 ok = 0;
+	 debug_printf("failed to emit immediate value\n");
          break;
 
       default:
-- 
cgit v1.2.3


From e8de5c70e3370e9112a5facc870075eea60c4c46 Mon Sep 17 00:00:00 2001
From: José Fonseca <jrfonseca@tungstengraphics.com>
Date: Sat, 23 Feb 2008 14:14:54 +0900
Subject: Bring in several forgotten MSVC fixes.

---
 src/gallium/auxiliary/pipebuffer/pb_buffer.h    | 1 +
 src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c | 4 ++--
 src/gallium/auxiliary/tgsi/exec/tgsi_exec.c     | 2 +-
 src/gallium/auxiliary/tgsi/util/tgsi_dump.c     | 4 +++-
 src/gallium/drivers/softpipe/sp_fs_exec.c       | 4 ++--
 src/gallium/drivers/softpipe/sp_fs_llvm.c       | 2 +-
 src/gallium/include/pipe/p_compiler.h           | 8 ++++++++
 7 files changed, 18 insertions(+), 7 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer.h b/src/gallium/auxiliary/pipebuffer/pb_buffer.h
index 97beb5f72a9..f5b5f4052f8 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer.h
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer.h
@@ -166,6 +166,7 @@ static INLINE void
 pb_destroy(struct pb_buffer *buf)
 {
    assert(buf);
+   assert(buf->vtbl);
    buf->vtbl->destroy(buf);
 }
 
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
index ff4fd123f36..66256f3fa70 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
@@ -192,8 +192,8 @@ mm_bufmgr_create_buffer(struct pb_manager *mgr,
    }
    
    /* Some sanity checks */
-   assert(0 <= mm_buf->block->ofs && mm_buf->block->ofs < mm->size);
-   assert(size <= mm_buf->block->size && mm_buf->block->ofs + mm_buf->block->size <= mm->size);
+   assert(0 <= (unsigned)mm_buf->block->ofs && (unsigned)mm_buf->block->ofs < mm->size);
+   assert(size <= (unsigned)mm_buf->block->size && (unsigned)mm_buf->block->ofs + (unsigned)mm_buf->block->size <= mm->size);
    
    _glthread_UNLOCK_MUTEX(mm->mutex);
    return SUPER(mm_buf);
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
index d7b18dc9c59..ac524414001 100644
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
@@ -2455,7 +2455,7 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
 
    /* execute instructions, until pc is set to -1 */
    while (pc != -1) {
-      assert(pc < mach->NumInstructions);
+      assert(pc < (int) mach->NumInstructions);
       exec_instruction( mach, mach->Instructions + pc, &pc );
    }
 
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
index b5c54847e0b..ff74e6117c4 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
@@ -299,7 +299,8 @@ static const char *TGSI_SEMANTICS[] =
    "SEMANTIC_BCOLOR",
    "SEMANTIC_FOG",
    "SEMANTIC_PSIZE",
-   "SEMANTIC_GENERIC,"
+   "SEMANTIC_GENERIC",
+   "SEMANTIC_NORMAL"
 };
 
 static const char *TGSI_SEMANTICS_SHORT[] =
@@ -310,6 +311,7 @@ static const char *TGSI_SEMANTICS_SHORT[] =
    "FOG",
    "PSIZE",
    "GENERIC",
+   "NORMAL"
 };
 
 static const char *TGSI_IMMS[] =
diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c
index 8cb0534342d..d5bd7a702f1 100644
--- a/src/gallium/drivers/softpipe/sp_fs_exec.c
+++ b/src/gallium/drivers/softpipe/sp_fs_exec.c
@@ -81,7 +81,7 @@ sp_setup_pos_vector(const struct tgsi_interp_coef *coef,
 
 
 static void
-exec_prepare( struct sp_fragment_shader *base,
+exec_prepare( const struct sp_fragment_shader *base,
 	      struct tgsi_exec_machine *machine,
 	      struct tgsi_sampler *samplers )
 {
@@ -98,7 +98,7 @@ exec_prepare( struct sp_fragment_shader *base,
  * interface:
  */
 static unsigned 
-exec_run( struct sp_fragment_shader *base,
+exec_run( const struct sp_fragment_shader *base,
 	  struct tgsi_exec_machine *machine,
 	  struct quad_header *quad )
 {
diff --git a/src/gallium/drivers/softpipe/sp_fs_llvm.c b/src/gallium/drivers/softpipe/sp_fs_llvm.c
index 22da4714533..34b2b7d4e24 100644
--- a/src/gallium/drivers/softpipe/sp_fs_llvm.c
+++ b/src/gallium/drivers/softpipe/sp_fs_llvm.c
@@ -146,7 +146,7 @@ shade_quad_llvm(struct quad_stage *qs,
 
 
 unsigned 
-run_llvm_fs( struct sp_fragment_shader *base,
+run_llvm_fs( const struct sp_fragment_shader *base,
 	     struct foo *machine )
 {
 }
diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h
index ab527f2afe3..91f3d2ac2d9 100644
--- a/src/gallium/include/pipe/p_compiler.h
+++ b/src/gallium/include/pipe/p_compiler.h
@@ -42,6 +42,14 @@
 #endif
 
 
+#if defined(__MSC__)
+
+/* Avoid 'expression is always true' warning */
+#pragma warning(disable: 4296)
+
+#endif /* __MSC__ */
+
+
 typedef unsigned int       uint;
 typedef unsigned char      ubyte;
 typedef unsigned char      boolean;
-- 
cgit v1.2.3


From 1d77d6caf647424f9c1c481145be0465e96c9e3e Mon Sep 17 00:00:00 2001
From: Brian <brian.paul@tungstengraphics.com>
Date: Sat, 23 Feb 2008 16:15:29 -0700
Subject: gallium: added new tgsi_scan.c / tgsi_scan_shader() function

Used to get information about registers, instructions used in a shader.
---
 src/gallium/auxiliary/tgsi/Makefile         |   1 +
 src/gallium/auxiliary/tgsi/util/tgsi_scan.c | 117 ++++++++++++++++++++++++++++
 src/gallium/auxiliary/tgsi/util/tgsi_scan.h |  57 ++++++++++++++
 3 files changed, 175 insertions(+)
 create mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_scan.c
 create mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_scan.h

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile
index 71f64b747c2..5555639b705 100644
--- a/src/gallium/auxiliary/tgsi/Makefile
+++ b/src/gallium/auxiliary/tgsi/Makefile
@@ -9,6 +9,7 @@ C_SOURCES = \
 	util/tgsi_build.c \
 	util/tgsi_dump.c \
 	util/tgsi_parse.c \
+	util/tgsi_scan.c \
 	util/tgsi_transform.c \
 	util/tgsi_util.c
 
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c
new file mode 100644
index 00000000000..4b99ac37cce
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c
@@ -0,0 +1,117 @@
+/**************************************************************************
+ * 
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+/**
+ * TGSI program scan utility.
+ * Used to determine which registers and instructions are used by a shader.
+ *
+ * Authors:  Brian Paul
+ */
+
+
+#include "tgsi_scan.h"
+#include "tgsi/util/tgsi_parse.h"
+#include "tgsi/util/tgsi_build.h"
+
+
+
+
+/**
+ */
+void
+tgsi_scan_shader(const struct tgsi_token *tokens,
+                 struct tgsi_shader_info *info)
+{
+   uint procType;
+   struct tgsi_parse_context parse;
+
+   memset(info, 0, sizeof(*info));
+
+   /**
+    ** Setup to begin parsing input shader
+    **/
+   if (tgsi_parse_init( &parse, tokens ) != TGSI_PARSE_OK) {
+      debug_printf("tgsi_parse_init() failed in tgsi_scan_shader()!\n");
+      return;
+   }
+   procType = parse.FullHeader.Processor.Processor;
+   assert(procType == TGSI_PROCESSOR_FRAGMENT ||
+          procType == TGSI_PROCESSOR_VERTEX ||
+          procType == TGSI_PROCESSOR_GEOMETRY);
+
+
+   /**
+    ** Loop over incoming program tokens/instructions
+    */
+   while( !tgsi_parse_end_of_tokens( &parse ) ) {
+
+      tgsi_parse_token( &parse );
+
+      switch( parse.FullToken.Token.Type ) {
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+         {
+            struct tgsi_full_instruction *fullinst
+               = &parse.FullToken.FullInstruction;
+
+            assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST);
+            info->opcode_count[fullinst->Instruction.Opcode]++;
+         }
+         break;
+
+      case TGSI_TOKEN_TYPE_DECLARATION:
+         {
+            struct tgsi_full_declaration *fulldecl
+               = &parse.FullToken.FullDeclaration;
+            uint file = fulldecl->Declaration.File;
+            uint i;
+            for (i = fulldecl->u.DeclarationRange.First;
+                 i <= fulldecl->u.DeclarationRange.Last;
+                 i++) {
+               info->file_mask[file] |= (1 << i);
+               info->file_count[file]++;
+
+               /* special case */
+               if (procType == TGSI_PROCESSOR_FRAGMENT &&
+                   file == TGSI_FILE_OUTPUT &&
+                   fulldecl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) {
+                  info->writes_z = TRUE;
+               }
+            }
+         }
+         break;
+
+      case TGSI_TOKEN_TYPE_IMMEDIATE:
+         info->immediate_count++;
+         break;
+
+      default:
+         assert( 0 );
+      }
+   }
+
+   tgsi_parse_free (&parse);
+}
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.h b/src/gallium/auxiliary/tgsi/util/tgsi_scan.h
new file mode 100644
index 00000000000..757446437c9
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_scan.h
@@ -0,0 +1,57 @@
+/**************************************************************************
+ * 
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef TGSI_SCAN_H
+#define TGSI_SCAN_H
+
+
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+
+
+/**
+ * Shader summary info
+ */
+struct tgsi_shader_info
+{
+   uint file_mask[TGSI_FILE_COUNT];  /**< bitmask of declared registers */
+   uint file_count[TGSI_FILE_COUNT];  /**< number of declared registers */
+
+   uint immediate_count; /**< number of immediates declared */
+
+   uint opcode_count[TGSI_OPCODE_LAST];  /**< opcode histogram */
+
+   boolean writes_z;  /**< does fragment shader write Z value? */
+};
+
+
+extern void
+tgsi_scan_shader(const struct tgsi_token *tokens,
+                 struct tgsi_shader_info *info);
+
+
+#endif /* TGSI_SCAN_H */
-- 
cgit v1.2.3


From fdcb9260eea8f9b9deaeeade2a46cffbf3dcaa59 Mon Sep 17 00:00:00 2001
From: José Fonseca <jrfonseca@tungstengraphics.com>
Date: Sun, 24 Feb 2008 17:58:05 +0900
Subject: Add new files.

---
 src/gallium/auxiliary/draw/SConscript   | 3 +++
 src/gallium/auxiliary/tgsi/SConscript   | 2 ++
 src/gallium/drivers/softpipe/SConscript | 3 +++
 3 files changed, 8 insertions(+)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript
index 8e3a8caa74c..3302dc44f78 100644
--- a/src/gallium/auxiliary/draw/SConscript
+++ b/src/gallium/auxiliary/draw/SConscript
@@ -3,6 +3,8 @@ Import('*')
 draw = env.ConvenienceLibrary(
 	target = 'draw',
 	source = [
+		'draw_aaline.c',
+		'draw_aapoint.c',
 		'draw_clip.c',
 		'draw_vs_exec.c',
 		'draw_vs_sse.c',
@@ -13,6 +15,7 @@ draw = env.ConvenienceLibrary(
 		'draw_flatshade.c',
 		'draw_offset.c',
 		'draw_prim.c',
+		'draw_pstipple.c',
 		'draw_stipple.c',
 		'draw_twoside.c',
 		'draw_unfilled.c',
diff --git a/src/gallium/auxiliary/tgsi/SConscript b/src/gallium/auxiliary/tgsi/SConscript
index 8464bfe9444..4632dcc0728 100644
--- a/src/gallium/auxiliary/tgsi/SConscript
+++ b/src/gallium/auxiliary/tgsi/SConscript
@@ -8,6 +8,8 @@ tgsi = env.ConvenienceLibrary(
 		'util/tgsi_build.c',
 		'util/tgsi_dump.c',
 		'util/tgsi_parse.c',
+		'util/tgsi_scan.c',
+		'util/tgsi_transform.c',
 		'util/tgsi_util.c',
 	])
 
diff --git a/src/gallium/drivers/softpipe/SConscript b/src/gallium/drivers/softpipe/SConscript
index d581ee8d3ca..4c1a6d5df0b 100644
--- a/src/gallium/drivers/softpipe/SConscript
+++ b/src/gallium/drivers/softpipe/SConscript
@@ -5,6 +5,9 @@ env = env.Clone()
 softpipe = env.ConvenienceLibrary(
 	target = 'softpipe',
 	source = [
+		'sp_fs_exec.c',
+		'sp_fs_sse.c',
+		'sp_fs_llvm.c',
 		'sp_clear.c',
 		'sp_context.c',
 		'sp_draw_arrays.c',
-- 
cgit v1.2.3


From 1410b7bb509ef37c41043b173bc1047257483af0 Mon Sep 17 00:00:00 2001
From: Brian <brian.paul@tungstengraphics.com>
Date: Tue, 26 Feb 2008 10:12:17 -0700
Subject: gallium: collect more shader info in tgsi_scan_shader()

Now getting input/output semantic info so we can eventually remove those
fields from pipe_shader_state.
---
 src/gallium/auxiliary/tgsi/util/tgsi_scan.c | 20 ++++++++++++++++++++
 src/gallium/auxiliary/tgsi/util/tgsi_scan.h | 11 +++++++++++
 2 files changed, 31 insertions(+)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c
index 4b99ac37cce..a1cc681492f 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c
@@ -69,6 +69,8 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
     */
    while( !tgsi_parse_end_of_tokens( &parse ) ) {
 
+      info->num_tokens++;
+
       tgsi_parse_token( &parse );
 
       switch( parse.FullToken.Token.Type ) {
@@ -91,9 +93,27 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
             for (i = fulldecl->u.DeclarationRange.First;
                  i <= fulldecl->u.DeclarationRange.Last;
                  i++) {
+
+               /* only first 32 regs will appear in this bitfield */
                info->file_mask[file] |= (1 << i);
                info->file_count[file]++;
 
+               if (file == TGSI_FILE_INPUT) {
+                  info->input_semantic_name[info->num_inputs]
+                     = fulldecl->Semantic.SemanticName;
+                  info->input_semantic_index[info->num_inputs]
+                     = fulldecl->Semantic.SemanticIndex;
+                  info->num_inputs++;
+               }
+
+               if (file == TGSI_FILE_OUTPUT) {
+                  info->output_semantic_name[info->num_outputs]
+                     = fulldecl->Semantic.SemanticName;
+                  info->output_semantic_index[info->num_outputs]
+                     = fulldecl->Semantic.SemanticIndex;
+                  info->num_outputs++;
+               }
+
                /* special case */
                if (procType == TGSI_PROCESSOR_FRAGMENT &&
                    file == TGSI_FILE_OUTPUT &&
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.h b/src/gallium/auxiliary/tgsi/util/tgsi_scan.h
index 757446437c9..dc6dfd6d0b3 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_scan.h
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_scan.h
@@ -30,6 +30,7 @@
 
 
 #include "pipe/p_util.h"
+#include "pipe/p_state.h"
 #include "pipe/p_shader_tokens.h"
 
 
@@ -38,6 +39,16 @@
  */
 struct tgsi_shader_info
 {
+   uint num_tokens;
+
+   /* XXX eventually remove the corresponding fields from pipe_shader_state: */
+   ubyte num_inputs;
+   ubyte num_outputs;
+   ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; /**< TGSI_SEMANTIC_x */
+   ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
+   ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; /**< TGSI_SEMANTIC_x */
+   ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
+
    uint file_mask[TGSI_FILE_COUNT];  /**< bitmask of declared registers */
    uint file_count[TGSI_FILE_COUNT];  /**< number of declared registers */
 
-- 
cgit v1.2.3


From 36aa9cf781440ce685930586cbf53248cf9c0dc2 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Tue, 26 Feb 2008 20:32:42 +0100
Subject: gallium: Print texture target for short dumps.

---
 src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
index ff74e6117c4..59be14a748c 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
@@ -664,6 +664,19 @@ static const char *TGSI_TEXTURES[] =
    "TEXTURE_SHADOWRECT"
 };
 
+static const char *TGSI_TEXTURES_SHORT[] =
+{
+   "UNKNOWN",
+   "1D",
+   "2D",
+   "3D",
+   "CUBE",
+   "RECT",
+   "SHADOW1D",
+   "SHADOW2D",
+   "SHADOWRECT"
+};
+
 static const char *TGSI_SRC_REGISTER_EXTS[] =
 {
    "SRC_REGISTER_EXT_TYPE_SWZ",
@@ -1037,6 +1050,11 @@ dump_instruction_short(
       first_reg = FALSE;
    }
 
+   if (inst->InstructionExtTexture.Texture != TGSI_TEXTURE_UNKNOWN) {
+      TXT( ", " );
+      ENM( inst->InstructionExtTexture.Texture, TGSI_TEXTURES_SHORT );
+   }
+
    switch( inst->Instruction.Opcode ) {
    case TGSI_OPCODE_IF:
    case TGSI_OPCODE_ELSE:
-- 
cgit v1.2.3


From 9a8a5d7c2fe7f32c8d15bc0a77f86e1f2f995ffe Mon Sep 17 00:00:00 2001
From: José Fonseca <jrfonseca@tungstengraphics.com>
Date: Wed, 27 Feb 2008 16:42:15 +0900
Subject: gallium: Replace // comments.

---
 src/gallium/auxiliary/gallivm/gallivm.h      | 2 +-
 src/gallium/auxiliary/gallivm/gallivm_p.h    | 6 +++---
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c  | 2 +-
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h  | 8 ++++----
 src/gallium/auxiliary/tgsi/util/tgsi_build.h | 8 ++++----
 src/gallium/auxiliary/tgsi/util/tgsi_dump.h  | 8 ++++----
 src/gallium/auxiliary/tgsi/util/tgsi_parse.h | 8 ++++----
 src/gallium/auxiliary/tgsi/util/tgsi_util.h  | 8 ++++----
 8 files changed, 25 insertions(+), 25 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/gallivm/gallivm.h b/src/gallium/auxiliary/gallivm/gallivm.h
index 92da4bca7f4..57912a952fb 100644
--- a/src/gallium/auxiliary/gallivm/gallivm.h
+++ b/src/gallium/auxiliary/gallivm/gallivm.h
@@ -97,7 +97,7 @@ void gallivm_cpu_engine_delete(struct gallivm_cpu_engine *ee);
 #endif /* MESA_LLVM */
 
 #if defined __cplusplus
-} // extern "C"
+}
 #endif
 
 #endif
diff --git a/src/gallium/auxiliary/gallivm/gallivm_p.h b/src/gallium/auxiliary/gallivm/gallivm_p.h
index cfe7b1901b3..ebf3e11cd56 100644
--- a/src/gallium/auxiliary/gallivm/gallivm_p.h
+++ b/src/gallium/auxiliary/gallivm/gallivm_p.h
@@ -32,7 +32,7 @@ struct gallivm_ir {
    int num_components;
    int   num_consts;
 
-   //FIXME: this might not be enough for some shaders
+   /* FIXME: this might not be enough for some shaders */
    struct gallivm_interpolate interpolators[32*4];
    int   num_interp;
 };
@@ -46,7 +46,7 @@ struct gallivm_prog {
 
    int   num_consts;
 
-   //FIXME: this might not be enough for some shaders
+   /* FIXME: this might not be enough for some shaders */
    struct gallivm_interpolate interpolators[32*4];
    int   num_interp;
 };
@@ -104,7 +104,7 @@ static INLINE int gallivm_w_swizzle(int swizzle)
 #endif /* MESA_LLVM */
 
 #if defined __cplusplus
-} // extern "C"
+}
 #endif
 
 #endif
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
index 779b901f2b8..57ccd86be4c 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
@@ -1268,7 +1268,7 @@ emit_store(
       break;
 
    case TGSI_SAT_ZERO_ONE:
-//      assert( 0 );
+      /* assert( 0 ); */
       break;
 
    case TGSI_SAT_MINUS_PLUS_ONE:
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h
index 9bee3717665..63b8ef39112 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h
@@ -3,7 +3,7 @@
 
 #if defined __cplusplus
 extern "C" {
-#endif // defined __cplusplus
+#endif
 
 struct tgsi_token;
 struct x86_function;
@@ -19,8 +19,8 @@ tgsi_emit_sse2_fs(
    struct x86_function *function );
 
 #if defined __cplusplus
-} // extern "C"
-#endif // defined __cplusplus
+}
+#endif
 
-#endif // !defined TGSI_SSE2_H
+#endif /* TGSI_SSE2_H */
 
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_build.h b/src/gallium/auxiliary/tgsi/util/tgsi_build.h
index 116c78abf34..607860e7fc5 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_build.h
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_build.h
@@ -3,7 +3,7 @@
 
 #if defined __cplusplus
 extern "C" {
-#endif // defined __cplusplus
+#endif
 
 /*
  * version
@@ -313,8 +313,8 @@ tgsi_build_dst_register_ext_modulate(
    struct tgsi_header *header );
 
 #if defined __cplusplus
-} // extern "C"
-#endif // defined __cplusplus
+}
+#endif
 
-#endif // !defined TGSI_BUILD_H
+#endif /* TGSI_BUILD_H */
 
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h
index 1adc9db2519..b983b382268 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h
@@ -3,7 +3,7 @@
 
 #if defined __cplusplus
 extern "C" {
-#endif // defined __cplusplus
+#endif
 
 #define TGSI_DUMP_VERBOSE       1
 #define TGSI_DUMP_NO_IGNORED    2
@@ -21,8 +21,8 @@ tgsi_dump_str(
    unsigned                flags );
 
 #if defined __cplusplus
-} // extern "C"
-#endif // defined __cplusplus
+}
+#endif
 
-#endif // !defined TGSI_DUMP_H
+#endif /* TGSI_DUMP_H */
 
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h
index 9372da8d5d1..5ccb5bfcf68 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h
@@ -3,7 +3,7 @@
 
 #if defined __cplusplus
 extern "C" {
-#endif // defined __cplusplus
+#endif
 
 struct tgsi_full_version
 {
@@ -114,8 +114,8 @@ tgsi_parse_token(
    struct tgsi_parse_context *ctx );
 
 #if defined __cplusplus
-} // extern "C"
-#endif // defined __cplusplus
+}
+#endif
 
-#endif // !defined TGSI_PARSE_H
+#endif /* TGSI_PARSE_H */
 
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_util.h b/src/gallium/auxiliary/tgsi/util/tgsi_util.h
index ef14446f0e4..45f5f0be0e4 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_util.h
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_util.h
@@ -3,7 +3,7 @@
 
 #if defined __cplusplus
 extern "C" {
-#endif // defined __cplusplus
+#endif
 
 void *
 tgsi_align_128bit(
@@ -63,8 +63,8 @@ tgsi_util_set_full_src_register_sign_mode(
    unsigned sign_mode );
 
 #if defined __cplusplus
-} // extern "C"
-#endif // defined __cplusplus
+}
+#endif
 
-#endif // !defined TGSI_UTIL_H
+#endif /* TGSI_UTIL_H */
 
-- 
cgit v1.2.3


From 31358282d4bd5a9708dba7be059dcff02233b4e1 Mon Sep 17 00:00:00 2001
From: Brian <brian@i915.localnet.net>
Date: Wed, 27 Feb 2008 09:15:15 -0700
Subject: gallium: better debug messages

---
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
index 57ccd86be4c..0da3b0bdb75 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
@@ -2268,7 +2268,7 @@ tgsi_emit_sse2(
 	    &parse.FullToken.FullInstruction );
 
 	 if (!ok) {
-	    debug_printf("failed to translate tgsi opcode %d\n", 
+	    debug_printf("failed to translate tgsi opcode %d to SSE\n", 
 			 parse.FullToken.FullInstruction.Instruction.Opcode );
 	 }
          break;
@@ -2276,7 +2276,7 @@ tgsi_emit_sse2(
       case TGSI_TOKEN_TYPE_IMMEDIATE:
          /* XXX implement this */
 	 ok = 0;
-	 debug_printf("failed to emit immediate value\n");
+	 debug_printf("failed to emit immediate value to SSE\n");
 	 break;
 
       default:
@@ -2358,7 +2358,7 @@ tgsi_emit_sse2_fs(
             &parse.FullToken.FullInstruction );
 
 	 if (!ok) {
-	    debug_printf("failed to translate tgsi opcode %d\n", 
+	    debug_printf("failed to translate tgsi opcode %d to SSE\n", 
 			 parse.FullToken.FullInstruction.Instruction.Opcode );
 	 }
          break;
@@ -2366,7 +2366,7 @@ tgsi_emit_sse2_fs(
       case TGSI_TOKEN_TYPE_IMMEDIATE:
          /* XXX implement this */
 	 ok = 0;
-	 debug_printf("failed to emit immediate value\n");
+	 debug_printf("failed to emit immediate value to SSE\n");
          break;
 
       default:
-- 
cgit v1.2.3


From fb40c5a9c7dc91c03f80780e0a09be0cade98705 Mon Sep 17 00:00:00 2001
From: Brian <brian@i915.localnet.net>
Date: Wed, 27 Feb 2008 15:06:04 -0700
Subject: gallium: added uses_kill field to tgsi_shader_info

---
 src/gallium/auxiliary/tgsi/util/tgsi_scan.c | 3 +++
 src/gallium/auxiliary/tgsi/util/tgsi_scan.h | 1 +
 2 files changed, 4 insertions(+)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c
index a1cc681492f..a973aeb62f9 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c
@@ -133,5 +133,8 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
       }
    }
 
+   info->uses_kill = (info->opcode_count[TGSI_OPCODE_KIL] ||
+                      info->opcode_count[TGSI_OPCODE_KILP]);
+
    tgsi_parse_free (&parse);
 }
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.h b/src/gallium/auxiliary/tgsi/util/tgsi_scan.h
index dc6dfd6d0b3..d10d300c4d9 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_scan.h
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_scan.h
@@ -57,6 +57,7 @@ struct tgsi_shader_info
    uint opcode_count[TGSI_OPCODE_LAST];  /**< opcode histogram */
 
    boolean writes_z;  /**< does fragment shader write Z value? */
+   boolean uses_kill;  /**< KIL or KILP instruction used? */
 };
 
 
-- 
cgit v1.2.3


From 3197ad5a56ee94773f974ac727b316c5adfe1b6f Mon Sep 17 00:00:00 2001
From: Brian <brian@i915.localnet.net>
Date: Wed, 27 Feb 2008 15:47:24 -0700
Subject: gallium: added file_max[] array to tgsi_shader_info

Records the highest index of a declared register.
---
 src/gallium/auxiliary/tgsi/util/tgsi_scan.c | 5 ++++-
 src/gallium/auxiliary/tgsi/util/tgsi_scan.h | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c
index a973aeb62f9..8c886edc65a 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c
@@ -46,10 +46,12 @@ void
 tgsi_scan_shader(const struct tgsi_token *tokens,
                  struct tgsi_shader_info *info)
 {
-   uint procType;
+   uint procType, i;
    struct tgsi_parse_context parse;
 
    memset(info, 0, sizeof(*info));
+   for (i = 0; i < TGSI_FILE_COUNT; i++)
+      info->file_max[i] = -1;
 
    /**
     ** Setup to begin parsing input shader
@@ -97,6 +99,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
                /* only first 32 regs will appear in this bitfield */
                info->file_mask[file] |= (1 << i);
                info->file_count[file]++;
+               info->file_max[file] = MAX2(info->file_max[file], i);
 
                if (file == TGSI_FILE_INPUT) {
                   info->input_semantic_name[info->num_inputs]
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.h b/src/gallium/auxiliary/tgsi/util/tgsi_scan.h
index d10d300c4d9..563ee900fa5 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_scan.h
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_scan.h
@@ -51,6 +51,7 @@ struct tgsi_shader_info
 
    uint file_mask[TGSI_FILE_COUNT];  /**< bitmask of declared registers */
    uint file_count[TGSI_FILE_COUNT];  /**< number of declared registers */
+   int file_max[TGSI_FILE_COUNT];  /**< highest index of declared registers */
 
    uint immediate_count; /**< number of immediates declared */
 
-- 
cgit v1.2.3


From 679b6cf0a0e662513c8d7732049c44916e0e9e86 Mon Sep 17 00:00:00 2001
From: Brian <brian@i915.localnet.net>
Date: Wed, 27 Feb 2008 16:00:04 -0700
Subject: gallium: include p_compiler.h instead of p_util.h

---
 src/gallium/auxiliary/tgsi/util/tgsi_scan.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.h b/src/gallium/auxiliary/tgsi/util/tgsi_scan.h
index 563ee900fa5..0530bc6b510 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_scan.h
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_scan.h
@@ -29,7 +29,7 @@
 #define TGSI_SCAN_H
 
 
-#include "pipe/p_util.h"
+#include "pipe/p_compiler.h"
 #include "pipe/p_state.h"
 #include "pipe/p_shader_tokens.h"
 
-- 
cgit v1.2.3


From 7df26d76d2a37e9e828296bfbcf7cec04bfbe233 Mon Sep 17 00:00:00 2001
From: Brian <brian@i915.localnet.net>
Date: Wed, 27 Feb 2008 16:01:35 -0700
Subject: gallium: include p_util.h

---
 src/gallium/auxiliary/tgsi/util/tgsi_scan.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c
index 8c886edc65a..b7bbff1689f 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c
@@ -37,6 +37,7 @@
 #include "tgsi/util/tgsi_parse.h"
 #include "tgsi/util/tgsi_build.h"
 
+#include "pipe/p_util.h"
 
 
 
-- 
cgit v1.2.3


From 510bc3535c4af68db71e5ffd19f3e21e10ec6004 Mon Sep 17 00:00:00 2001
From: José Fonseca <jrfonseca@tungstengraphics.com>
Date: Thu, 28 Feb 2008 11:23:01 +0900
Subject: gallium: Fix sign/unsign comparison.

---
 src/gallium/auxiliary/tgsi/util/tgsi_scan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c
index b7bbff1689f..cf6de1e82f3 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c
@@ -100,7 +100,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
                /* only first 32 regs will appear in this bitfield */
                info->file_mask[file] |= (1 << i);
                info->file_count[file]++;
-               info->file_max[file] = MAX2(info->file_max[file], i);
+               info->file_max[file] = MAX2(info->file_max[file], (int)i);
 
                if (file == TGSI_FILE_INPUT) {
                   info->input_semantic_name[info->num_inputs]
-- 
cgit v1.2.3


From e280bd50cc46ddbf5ac7fbaafc934d1048d77ba2 Mon Sep 17 00:00:00 2001
From: José Fonseca <jrfonseca@tungstengraphics.com>
Date: Thu, 28 Feb 2008 21:25:54 +0900
Subject: gallium: Fix MSVC warnings.

---
 src/gallium/auxiliary/tgsi/util/tgsi_scan.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c
index cf6de1e82f3..ea4a72967d6 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c
@@ -104,17 +104,17 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
 
                if (file == TGSI_FILE_INPUT) {
                   info->input_semantic_name[info->num_inputs]
-                     = fulldecl->Semantic.SemanticName;
+                     = (ubyte)fulldecl->Semantic.SemanticName;
                   info->input_semantic_index[info->num_inputs]
-                     = fulldecl->Semantic.SemanticIndex;
+                     = (ubyte)fulldecl->Semantic.SemanticIndex;
                   info->num_inputs++;
                }
 
                if (file == TGSI_FILE_OUTPUT) {
                   info->output_semantic_name[info->num_outputs]
-                     = fulldecl->Semantic.SemanticName;
+                     = (ubyte)fulldecl->Semantic.SemanticName;
                   info->output_semantic_index[info->num_outputs]
-                     = fulldecl->Semantic.SemanticIndex;
+                     = (ubyte)fulldecl->Semantic.SemanticIndex;
                   info->num_outputs++;
                }
 
-- 
cgit v1.2.3


From feb02084a88ca6e23c34fa06e963765c890f0b65 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Wed, 12 Mar 2008 11:37:02 +0100
Subject: tgsi: Dump source register divide component.

---
 src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
index 59be14a748c..a393c65da6f 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
@@ -1047,6 +1047,11 @@ dump_instruction_short(
          CHR( ')' );
       }
 
+      if (src->SrcRegisterExtSwz.ExtDivide != TGSI_EXTSWIZZLE_ONE) {
+         CHR( '/' );
+         ENM( src->SrcRegisterExtSwz.ExtDivide, TGSI_EXTSWIZZLES_SHORT );
+      }
+
       first_reg = FALSE;
    }
 
-- 
cgit v1.2.3


From e5b1a53c9f9ad247272415e0e21e83cfe00728a9 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Wed, 12 Mar 2008 15:29:39 +0100
Subject: tgsi: Dump TXP opcode.

---
 src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
index a393c65da6f..1913d482f1a 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
@@ -459,7 +459,8 @@ static const char *TGSI_OPCODES[] =
    "OPCODE_IFC",
    "OPCODE_BREAKC",
    "OPCODE_KIL",
-   "OPCODE_END"
+   "OPCODE_END",
+   "OPCODE_TXP"
 };
 
 static const char *TGSI_OPCODES_SHORT[] =
@@ -597,7 +598,8 @@ static const char *TGSI_OPCODES_SHORT[] =
    "IFC",
    "BREAKC",
    "KIL",
-   "END"
+   "END",
+   "TXP"
 };
 
 static const char *TGSI_SATS[] =
-- 
cgit v1.2.3


From ba75e82b6ebaf88dd2e4a8f764b2d296d715bf8a Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Wed, 12 Mar 2008 16:41:25 +0100
Subject: tgsi: Remove ExtDivide field from existence. Implement OPCODE_TXP.

---
 src/gallium/auxiliary/tgsi/exec/tgsi_exec.c        | 46 +++++++---------------
 src/gallium/auxiliary/tgsi/util/tgsi_build.c       |  5 ---
 src/gallium/auxiliary/tgsi/util/tgsi_build.h       |  1 -
 src/gallium/auxiliary/tgsi/util/tgsi_dump.c        |  9 -----
 .../drivers/i915simple/i915_fpc_translate.c        | 12 +++---
 src/gallium/include/pipe/p_shader_tokens.h         | 13 +-----
 6 files changed, 21 insertions(+), 65 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
index ac524414001..f2ed9e0353e 100644
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
@@ -1220,7 +1220,8 @@ fetch_texel( struct tgsi_sampler *sampler,
 static void
 exec_tex(struct tgsi_exec_machine *mach,
          const struct tgsi_full_instruction *inst,
-         boolean biasLod)
+         boolean biasLod,
+         boolean projected)
 {
    const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
    union tgsi_exec_channel r[8];
@@ -1234,17 +1235,9 @@ exec_tex(struct tgsi_exec_machine *mach,
 
       FETCH(&r[0], 0, CHAN_X);
 
-      switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) {
-      case TGSI_EXTSWIZZLE_W:
+      if (projected) {
          FETCH(&r[1], 0, CHAN_W);
          micro_div( &r[0], &r[0], &r[1] );
-         break;
-
-      case TGSI_EXTSWIZZLE_ONE:
-         break;
-
-      default:
-         assert (0);
       }
 
       if (biasLod) {
@@ -1266,19 +1259,11 @@ exec_tex(struct tgsi_exec_machine *mach,
       FETCH(&r[1], 0, CHAN_Y);
       FETCH(&r[2], 0, CHAN_Z);
 
-      switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) {
-      case TGSI_EXTSWIZZLE_W:
+      if (projected) {
          FETCH(&r[3], 0, CHAN_W);
          micro_div( &r[0], &r[0], &r[3] );
          micro_div( &r[1], &r[1], &r[3] );
          micro_div( &r[2], &r[2], &r[3] );
-         break;
-
-      case TGSI_EXTSWIZZLE_ONE:
-         break;
-
-      default:
-         assert (0);
       }
 
       if (biasLod) {
@@ -1300,19 +1285,11 @@ exec_tex(struct tgsi_exec_machine *mach,
       FETCH(&r[1], 0, CHAN_Y);
       FETCH(&r[2], 0, CHAN_Z);
 
-      switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) {
-      case TGSI_EXTSWIZZLE_W:
+      if (projected) {
          FETCH(&r[3], 0, CHAN_W);
          micro_div( &r[0], &r[0], &r[3] );
          micro_div( &r[1], &r[1], &r[3] );
          micro_div( &r[2], &r[2], &r[3] );
-         break;
-
-      case TGSI_EXTSWIZZLE_ONE:
-         break;
-
-      default:
-         assert (0);
       }
 
       if (biasLod) {
@@ -2007,14 +1984,14 @@ exec_instruction(
       /* simple texture lookup */
       /* src[0] = texcoord */
       /* src[1] = sampler unit */
-      exec_tex(mach, inst, FALSE);
+      exec_tex(mach, inst, FALSE, FALSE);
       break;
 
    case TGSI_OPCODE_TXB:
       /* Texture lookup with lod bias */
       /* src[0] = texcoord (src[0].w = LOD bias) */
       /* src[1] = sampler unit */
-      exec_tex(mach, inst, TRUE);
+      exec_tex(mach, inst, TRUE, FALSE);
       break;
 
    case TGSI_OPCODE_TXD:
@@ -2030,7 +2007,14 @@ exec_instruction(
       /* Texture lookup with explit LOD */
       /* src[0] = texcoord (src[0].w = LOD) */
       /* src[1] = sampler unit */
-      exec_tex(mach, inst, TRUE);
+      exec_tex(mach, inst, TRUE, FALSE);
+      break;
+
+   case TGSI_OPCODE_TXP:
+      /* Texture lookup with projection */
+      /* src[0] = texcoord (src[0].w = projection) */
+      /* src[1] = sampler unit */
+      exec_tex(mach, inst, FALSE, TRUE);
       break;
 
    case TGSI_OPCODE_UP2H:
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_build.c b/src/gallium/auxiliary/tgsi/util/tgsi_build.c
index a00ff1c2a5f..9c883ab7043 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_build.c
@@ -719,7 +719,6 @@ tgsi_build_full_instruction(
             reg->SrcRegisterExtSwz.NegateY,
             reg->SrcRegisterExtSwz.NegateZ,
             reg->SrcRegisterExtSwz.NegateW,
-            reg->SrcRegisterExtSwz.ExtDivide,
             prev_token,
             instruction,
             header );
@@ -1057,7 +1056,6 @@ tgsi_default_src_register_ext_swz( void )
    src_register_ext_swz.NegateY = 0;
    src_register_ext_swz.NegateZ = 0;
    src_register_ext_swz.NegateW = 0;
-   src_register_ext_swz.ExtDivide = TGSI_EXTSWIZZLE_ONE;
    src_register_ext_swz.Padding = 0;
    src_register_ext_swz.Extended = 0;
 
@@ -1084,7 +1082,6 @@ tgsi_build_src_register_ext_swz(
    unsigned negate_y,
    unsigned negate_z,
    unsigned negate_w,
-   unsigned ext_divide,
    struct tgsi_token *prev_token,
    struct tgsi_instruction *instruction,
    struct tgsi_header *header )
@@ -1099,7 +1096,6 @@ tgsi_build_src_register_ext_swz(
    assert( negate_y <= 1 );
    assert( negate_z <= 1 );
    assert( negate_w <= 1 );
-   assert( ext_divide <= TGSI_EXTSWIZZLE_ONE );
 
    src_register_ext_swz = tgsi_default_src_register_ext_swz();
    src_register_ext_swz.ExtSwizzleX = ext_swizzle_x;
@@ -1110,7 +1106,6 @@ tgsi_build_src_register_ext_swz(
    src_register_ext_swz.NegateY = negate_y;
    src_register_ext_swz.NegateZ = negate_z;
    src_register_ext_swz.NegateW = negate_w;
-   src_register_ext_swz.ExtDivide = ext_divide;
 
    prev_token->Extended = 1;
    instruction_grow( instruction, header );
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_build.h b/src/gallium/auxiliary/tgsi/util/tgsi_build.h
index 607860e7fc5..80bffc4ae71 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_build.h
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_build.h
@@ -229,7 +229,6 @@ tgsi_build_src_register_ext_swz(
    unsigned negate_y,
    unsigned negate_z,
    unsigned negate_w,
-   unsigned ext_divide,
    struct tgsi_token *prev_token,
    struct tgsi_instruction *instruction,
    struct tgsi_header *header );
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
index 1913d482f1a..ceb407b884b 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
@@ -1049,11 +1049,6 @@ dump_instruction_short(
          CHR( ')' );
       }
 
-      if (src->SrcRegisterExtSwz.ExtDivide != TGSI_EXTSWIZZLE_ONE) {
-         CHR( '/' );
-         ENM( src->SrcRegisterExtSwz.ExtDivide, TGSI_EXTSWIZZLES_SHORT );
-      }
-
       first_reg = FALSE;
    }
 
@@ -1368,10 +1363,6 @@ dump_instruction_verbose(
             TXT( "\nNegateW   : " );
             UID( src->SrcRegisterExtSwz.NegateW );
          }
-         if( deflt || fs->SrcRegisterExtSwz.ExtDivide != src->SrcRegisterExtSwz.ExtDivide ) {
-            TXT( "\nExtDivide  : " );
-            ENM( src->SrcRegisterExtSwz.ExtDivide, TGSI_EXTSWIZZLES );
-         }
          if( ignored ) {
             TXT( "\nPadding   : " );
             UIX( src->SrcRegisterExtSwz.Padding );
diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c
index c2d9a93c6c8..7b4fca5db1b 100644
--- a/src/gallium/drivers/i915simple/i915_fpc_translate.c
+++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c
@@ -872,19 +872,17 @@ i915_translate_instruction(struct i915_fp_compile *p,
       break;
 
    case TGSI_OPCODE_TEX:
-      if (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide
-          == TGSI_EXTSWIZZLE_W) {
-         emit_tex(p, inst, T0_TEXLDP);
-      }
-      else {
-         emit_tex(p, inst, T0_TEXLD);
-      }
+      emit_tex(p, inst, T0_TEXLD);
       break;
 
    case TGSI_OPCODE_TXB:
       emit_tex(p, inst, T0_TEXLDB);
       break;
 
+   case TGSI_OPCODE_TXP:
+      emit_tex(p, inst, T0_TEXLDP);
+      break;
+
    case TGSI_OPCODE_XPD:
       /* Cross product:
        *      result.x = src0.y * src1.z - src0.z * src1.y;
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index f0bb43bc5ba..210169f54f2 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -655,9 +655,6 @@ struct tgsi_src_register_ext
  *
  * NegateX, NegateY, NegateZ and NegateW negate individual components of the
  * source register.
- *
- * ExtDivide specifies which component is used to divide all components of the
- * source register.
  */
 
 struct tgsi_src_register_ext_swz
@@ -671,15 +668,7 @@ struct tgsi_src_register_ext_swz
    unsigned NegateY      : 1;    /* BOOL */
    unsigned NegateZ      : 1;    /* BOOL */
    unsigned NegateW      : 1;    /* BOOL */
-
-   /*
-    * XXX: Do not use. This field has been depricated.
-    * XXX: If using in conjunction with OPCODE_TEX, please use OPCODE_TXP
-    * XXX: and, if needed, perform a swizzle on the texture coordinate.
-    */
-   unsigned ExtDivide    : 4;    /* TGSI_EXTSWIZZLE_ */
-   
-   unsigned Padding      : 3;
+   unsigned Padding      : 7;
    unsigned Extended     : 1;    /* BOOL */
 };
 
-- 
cgit v1.2.3


From 5ba2f0a507b8d413eea2eb7da09c304ab5a8d3f1 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keith@tungstengraphics.com>
Date: Thu, 13 Mar 2008 09:57:01 +0000
Subject: tgsi: bump MAX_SRC_REGS to 4, for TXD

---
 src/gallium/auxiliary/tgsi/util/tgsi_parse.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h
index 5ccb5bfcf68..40083728d64 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h
@@ -56,7 +56,7 @@ struct tgsi_full_immediate
 };
 
 #define TGSI_FULL_MAX_DST_REGISTERS 2
-#define TGSI_FULL_MAX_SRC_REGISTERS 3
+#define TGSI_FULL_MAX_SRC_REGISTERS 4 /* TXD has 4 */
 
 struct tgsi_full_instruction
 {
-- 
cgit v1.2.3


From ddb4e5cbaced3e96117a97fe362ab890794f5ab7 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keith@tungstengraphics.com>
Date: Thu, 13 Mar 2008 10:01:38 +0000
Subject: tgsi: replace erroneous use of FETCH with emit_tempf

---
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
index 0da3b0bdb75..4e80597b3f3 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
@@ -1821,7 +1821,11 @@ emit_instruction(
          STORE( func, *inst, 5, 0, CHAN_Z );
       }
       IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
-         FETCH( func, *inst, 0, TGSI_EXEC_TEMP_ONE_I, TGSI_EXEC_TEMP_ONE_C );
+	 emit_tempf(
+	    func,
+	    0,
+	    TGSI_EXEC_TEMP_ONE_I,
+	    TGSI_EXEC_TEMP_ONE_C );
          STORE( func, *inst, 0, 0, CHAN_W );
       }
       break;
@@ -2021,11 +2025,19 @@ emit_instruction(
          STORE( func, *inst, 0, 0, CHAN_Y );
       }
       IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
-         FETCH( func, *inst, 0, TGSI_EXEC_TEMP_00000000_I, TGSI_EXEC_TEMP_00000000_C );
+	 emit_tempf(
+	    func,
+	    0,
+	    TGSI_EXEC_TEMP_00000000_I,
+	    TGSI_EXEC_TEMP_00000000_C );
          STORE( func, *inst, 0, 0, CHAN_Z );
       }
       IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
-         FETCH( func, *inst, 0, TGSI_EXEC_TEMP_ONE_I, TGSI_EXEC_TEMP_ONE_C );
+	 emit_tempf(
+	    func,
+	    0,
+	    TGSI_EXEC_TEMP_ONE_I,
+	    TGSI_EXEC_TEMP_ONE_C );
          STORE( func, *inst, 0, 0, CHAN_W );
       }
       break;
-- 
cgit v1.2.3


From bcb454e7a6e2f7efae114321c65bf98e91d5892f Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Thu, 13 Mar 2008 18:12:36 +0100
Subject: tgsi: Drop pre-ps_2_0 opcodes.

---
 src/gallium/auxiliary/gallivm/tgsitollvm.cpp |  68 -------------
 src/gallium/auxiliary/tgsi/util/tgsi_dump.c  |  46 ++-------
 src/gallium/include/pipe/p_shader_tokens.h   | 147 ++++++++++++---------------
 3 files changed, 71 insertions(+), 190 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
index d2b747ffd1c..ab9e7a06fba 100644
--- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
+++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
@@ -588,40 +588,6 @@ translate_instruction(llvm::Module *module,
       break;
    case TGSI_OPCODE_NOP:
       break;
-   case TGSI_OPCODE_TEXBEM:
-      break;
-   case TGSI_OPCODE_TEXBEML:
-      break;
-   case TGSI_OPCODE_TEXREG2AR:
-      break;
-   case TGSI_OPCODE_TEXM3X2PAD:
-      break;
-   case TGSI_OPCODE_TEXM3X2TEX:
-      break;
-   case TGSI_OPCODE_TEXM3X3PAD:
-      break;
-   case TGSI_OPCODE_TEXM3X3TEX:
-      break;
-   case TGSI_OPCODE_TEXM3X3SPEC:
-      break;
-   case TGSI_OPCODE_TEXM3X3VSPEC:
-      break;
-   case TGSI_OPCODE_TEXREG2GB:
-      break;
-   case TGSI_OPCODE_TEXREG2RGB:
-      break;
-   case TGSI_OPCODE_TEXDP3TEX:
-      break;
-   case TGSI_OPCODE_TEXDP3:
-      break;
-   case TGSI_OPCODE_TEXM3X3:
-      break;
-   case TGSI_OPCODE_TEXM3X2DEPTH:
-      break;
-   case TGSI_OPCODE_TEXDEPTH:
-      break;
-   case TGSI_OPCODE_BEM:
-      break;
    case TGSI_OPCODE_M4X3:
       break;
    case TGSI_OPCODE_M3X4:
@@ -981,40 +947,6 @@ translate_instructionir(llvm::Module *module,
       break;
    case TGSI_OPCODE_NOP:
       break;
-   case TGSI_OPCODE_TEXBEM:
-      break;
-   case TGSI_OPCODE_TEXBEML:
-      break;
-   case TGSI_OPCODE_TEXREG2AR:
-      break;
-   case TGSI_OPCODE_TEXM3X2PAD:
-      break;
-   case TGSI_OPCODE_TEXM3X2TEX:
-      break;
-   case TGSI_OPCODE_TEXM3X3PAD:
-      break;
-   case TGSI_OPCODE_TEXM3X3TEX:
-      break;
-   case TGSI_OPCODE_TEXM3X3SPEC:
-      break;
-   case TGSI_OPCODE_TEXM3X3VSPEC:
-      break;
-   case TGSI_OPCODE_TEXREG2GB:
-      break;
-   case TGSI_OPCODE_TEXREG2RGB:
-      break;
-   case TGSI_OPCODE_TEXDP3TEX:
-      break;
-   case TGSI_OPCODE_TEXDP3:
-      break;
-   case TGSI_OPCODE_TEXM3X3:
-      break;
-   case TGSI_OPCODE_TEXM3X2DEPTH:
-      break;
-   case TGSI_OPCODE_TEXDEPTH:
-      break;
-   case TGSI_OPCODE_BEM:
-      break;
    case TGSI_OPCODE_M4X3:
       break;
    case TGSI_OPCODE_M3X4:
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
index ceb407b884b..aa782787009 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
@@ -324,7 +324,7 @@ static const char *TGSI_IMMS_SHORT[] =
    "FLT32"
 };
 
-static const char *TGSI_OPCODES[] =
+static const char *TGSI_OPCODES[TGSI_OPCODE_LAST] =
 {
    "OPCODE_ARL",
    "OPCODE_MOV",
@@ -380,6 +380,7 @@ static const char *TGSI_OPCODES[] =
    "OPCODE_STR",
    "OPCODE_TEX",
    "OPCODE_TXD",
+   "OPCODE_TXP",
    "OPCODE_UP2H",
    "OPCODE_UP2US",
    "OPCODE_UP4B",
@@ -433,23 +434,6 @@ static const char *TGSI_OPCODES[] =
    "OPCODE_NOISE3",
    "OPCODE_NOISE4",
    "OPCODE_NOP",
-   "OPCODE_TEXBEM",
-   "OPCODE_TEXBEML",
-   "OPCODE_TEXREG2AR",
-   "OPCODE_TEXM3X2PAD",
-   "OPCODE_TEXM3X2TEX",
-   "OPCODE_TEXM3X3PAD",
-   "OPCODE_TEXM3X3TEX",
-   "OPCODE_TEXM3X3SPEC",
-   "OPCODE_TEXM3X3VSPEC",
-   "OPCODE_TEXREG2GB",
-   "OPCODE_TEXREG2RGB",
-   "OPCODE_TEXDP3TEX",
-   "OPCODE_TEXDP3",
-   "OPCODE_TEXM3X3",
-   "OPCODE_TEXM3X2DEPTH",
-   "OPCODE_TEXDEPTH",
-   "OPCODE_BEM",
    "OPCODE_M4X3",
    "OPCODE_M3X4",
    "OPCODE_M3X3",
@@ -459,11 +443,10 @@ static const char *TGSI_OPCODES[] =
    "OPCODE_IFC",
    "OPCODE_BREAKC",
    "OPCODE_KIL",
-   "OPCODE_END",
-   "OPCODE_TXP"
+   "OPCODE_END"
 };
 
-static const char *TGSI_OPCODES_SHORT[] =
+static const char *TGSI_OPCODES_SHORT[TGSI_OPCODE_LAST] =
 {
    "ARL",
    "MOV",
@@ -519,6 +502,7 @@ static const char *TGSI_OPCODES_SHORT[] =
    "STR",
    "TEX",
    "TXD",
+   "TXP",
    "UP2H",
    "UP2US",
    "UP4B",
@@ -572,23 +556,6 @@ static const char *TGSI_OPCODES_SHORT[] =
    "NOISE3",
    "NOISE4",
    "NOP",
-   "TEXBEM",
-   "TEXBEML",
-   "TEXREG2AR",
-   "TEXM3X2PAD",
-   "TEXM3X2TEX",
-   "TEXM3X3PAD",
-   "TEXM3X3TEX",
-   "TEXM3X3SPEC",
-   "TEXM3X3VSPEC",
-   "TEXREG2GB",
-   "TEXREG2RGB",
-   "TEXDP3TEX",
-   "TEXDP3",
-   "TEXM3X3",
-   "TEXM3X2DEPTH",
-   "TEXDEPTH",
-   "BEM",
    "M4X3",
    "M3X4",
    "M3X3",
@@ -598,8 +565,7 @@ static const char *TGSI_OPCODES_SHORT[] =
    "IFC",
    "BREAKC",
    "KIL",
-   "END",
-   "TXP"
+   "END"
 };
 
 static const char *TGSI_SATS[] =
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index e3947f663be..f05e1a34b32 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -227,22 +227,22 @@ struct tgsi_immediate_float32
 #define TGSI_OPCODE_STR                 51
 #define TGSI_OPCODE_TEX                 52
 #define TGSI_OPCODE_TXD                 53
-#define TGSI_OPCODE_TXP                 134
-#define TGSI_OPCODE_UP2H                54
-#define TGSI_OPCODE_UP2US               55
-#define TGSI_OPCODE_UP4B                56
-#define TGSI_OPCODE_UP4UB               57
-#define TGSI_OPCODE_X2D                 58
+#define TGSI_OPCODE_TXP                 54
+#define TGSI_OPCODE_UP2H                55
+#define TGSI_OPCODE_UP2US               56
+#define TGSI_OPCODE_UP4B                57
+#define TGSI_OPCODE_UP4UB               58
+#define TGSI_OPCODE_X2D                 59
 
 /*
  * GL_NV_vertex_program2
  */
-#define TGSI_OPCODE_ARA                 59
-#define TGSI_OPCODE_ARR                 60
-#define TGSI_OPCODE_BRA                 61
-#define TGSI_OPCODE_CAL                 62
-#define TGSI_OPCODE_RET                 63
-#define TGSI_OPCODE_SSG                 64
+#define TGSI_OPCODE_ARA                 60
+#define TGSI_OPCODE_ARR                 61
+#define TGSI_OPCODE_BRA                 62
+#define TGSI_OPCODE_CAL                 63
+#define TGSI_OPCODE_RET                 64
+#define TGSI_OPCODE_SSG                 65
 
 /*
  * GL_ARB_vertex_program
@@ -253,9 +253,9 @@ struct tgsi_immediate_float32
 /*
  * GL_ARB_fragment_program
  */
-#define TGSI_OPCODE_CMP                 65
-#define TGSI_OPCODE_SCS                 66
-#define TGSI_OPCODE_TXB                 67
+#define TGSI_OPCODE_CMP                 66
+#define TGSI_OPCODE_SCS                 67
+#define TGSI_OPCODE_TXB                 68
 
 /*
  * GL_NV_fragment_program_option
@@ -265,19 +265,19 @@ struct tgsi_immediate_float32
 /*
  * GL_NV_fragment_program2
  */
-#define TGSI_OPCODE_NRM                 68
-#define TGSI_OPCODE_DIV                 69
-#define TGSI_OPCODE_DP2                 70
+#define TGSI_OPCODE_NRM                 69
+#define TGSI_OPCODE_DIV                 70
+#define TGSI_OPCODE_DP2                 71
 #define TGSI_OPCODE_DP2A                TGSI_OPCODE_DOT2ADD
-#define TGSI_OPCODE_TXL                 71
-#define TGSI_OPCODE_BRK                 72
-#define TGSI_OPCODE_IF                  73
-#define TGSI_OPCODE_LOOP                74
-#define TGSI_OPCODE_REP                 75
-#define TGSI_OPCODE_ELSE                76
-#define TGSI_OPCODE_ENDIF               77
-#define TGSI_OPCODE_ENDLOOP             78
-#define TGSI_OPCODE_ENDREP              79
+#define TGSI_OPCODE_TXL                 72
+#define TGSI_OPCODE_BRK                 73
+#define TGSI_OPCODE_IF                  74
+#define TGSI_OPCODE_LOOP                75
+#define TGSI_OPCODE_REP                 76
+#define TGSI_OPCODE_ELSE                77
+#define TGSI_OPCODE_ENDIF               78
+#define TGSI_OPCODE_ENDLOOP             79
+#define TGSI_OPCODE_ENDREP              80
 
 /*
  * GL_NV_vertex_program2_option
@@ -286,26 +286,26 @@ struct tgsi_immediate_float32
 /*
  * GL_NV_vertex_program3
  */
-#define TGSI_OPCODE_PUSHA               80
-#define TGSI_OPCODE_POPA                81
+#define TGSI_OPCODE_PUSHA               81
+#define TGSI_OPCODE_POPA                82
 
 /*
  * GL_NV_gpu_program4
  */
-#define TGSI_OPCODE_CEIL                82
-#define TGSI_OPCODE_I2F                 83
-#define TGSI_OPCODE_NOT                 84
-#define TGSI_OPCODE_TRUNC               85
-#define TGSI_OPCODE_SHL                 86
-#define TGSI_OPCODE_SHR                 87
-#define TGSI_OPCODE_AND                 88
-#define TGSI_OPCODE_OR                  89
-#define TGSI_OPCODE_MOD                 90
-#define TGSI_OPCODE_XOR                 91
-#define TGSI_OPCODE_SAD                 92
-#define TGSI_OPCODE_TXF                 93
-#define TGSI_OPCODE_TXQ                 94
-#define TGSI_OPCODE_CONT                95
+#define TGSI_OPCODE_CEIL                83
+#define TGSI_OPCODE_I2F                 84
+#define TGSI_OPCODE_NOT                 85
+#define TGSI_OPCODE_TRUNC               86
+#define TGSI_OPCODE_SHL                 87
+#define TGSI_OPCODE_SHR                 88
+#define TGSI_OPCODE_AND                 89
+#define TGSI_OPCODE_OR                  90
+#define TGSI_OPCODE_MOD                 91
+#define TGSI_OPCODE_XOR                 92
+#define TGSI_OPCODE_SAD                 93
+#define TGSI_OPCODE_TXF                 94
+#define TGSI_OPCODE_TXQ                 95
+#define TGSI_OPCODE_CONT                96
 
 /*
  * GL_NV_vertex_program4
@@ -321,70 +321,53 @@ struct tgsi_immediate_float32
  * GL_NV_geometry_program4
  */
 /* Same as GL_NV_gpu_program4 */
-#define TGSI_OPCODE_EMIT                96
-#define TGSI_OPCODE_ENDPRIM             97
+#define TGSI_OPCODE_EMIT                97
+#define TGSI_OPCODE_ENDPRIM             98
 
 /*
  * GLSL
  */
-#define TGSI_OPCODE_BGNLOOP2            98
-#define TGSI_OPCODE_BGNSUB              99
-#define TGSI_OPCODE_ENDLOOP2            100
-#define TGSI_OPCODE_ENDSUB              101
+#define TGSI_OPCODE_BGNLOOP2            99
+#define TGSI_OPCODE_BGNSUB              100
+#define TGSI_OPCODE_ENDLOOP2            101
+#define TGSI_OPCODE_ENDSUB              102
 #define TGSI_OPCODE_INT                 TGSI_OPCODE_TRUNC
-#define TGSI_OPCODE_NOISE1              102
-#define TGSI_OPCODE_NOISE2              103
-#define TGSI_OPCODE_NOISE3              104
-#define TGSI_OPCODE_NOISE4              105
-#define TGSI_OPCODE_NOP                 106
+#define TGSI_OPCODE_NOISE1              103
+#define TGSI_OPCODE_NOISE2              104
+#define TGSI_OPCODE_NOISE3              105
+#define TGSI_OPCODE_NOISE4              106
+#define TGSI_OPCODE_NOP                 107
 
 /*
  * ps_1_1
  */
 #define TGSI_OPCODE_TEXKILL             TGSI_OPCODE_KILP
-#define TGSI_OPCODE_TEXBEM              107
-#define TGSI_OPCODE_TEXBEML             108
-#define TGSI_OPCODE_TEXREG2AR           109
-#define TGSI_OPCODE_TEXM3X2PAD          110
-#define TGSI_OPCODE_TEXM3X2TEX          111
-#define TGSI_OPCODE_TEXM3X3PAD          112
-#define TGSI_OPCODE_TEXM3X3TEX          113
-#define TGSI_OPCODE_TEXM3X3SPEC         114
-#define TGSI_OPCODE_TEXM3X3VSPEC        115
 
 /*
  * ps_1_2
  */
-#define TGSI_OPCODE_TEXREG2GB           116
-#define TGSI_OPCODE_TEXREG2RGB          117
-#define TGSI_OPCODE_TEXDP3TEX           118
-#define TGSI_OPCODE_TEXDP3              119
-#define TGSI_OPCODE_TEXM3X3             120
 /* CMP - use TGSI_OPCODE_CND0 */
 
 /*
  * ps_1_3
  */
-#define TGSI_OPCODE_TEXM3X2DEPTH        121
 /* CMP - use TGSI_OPCODE_CND0 */
 
 /*
  * ps_1_4
  */
 #define TGSI_OPCODE_TEXLD               TGSI_OPCODE_TEX
-#define TGSI_OPCODE_TEXDEPTH            122
-#define TGSI_OPCODE_BEM                 123
 
 /*
  * ps_2_0
  */
 #define TGSI_OPCODE_M4X4                TGSI_OPCODE_MULTIPLYMATRIX
-#define TGSI_OPCODE_M4X3                124
-#define TGSI_OPCODE_M3X4                125
-#define TGSI_OPCODE_M3X3                126
-#define TGSI_OPCODE_M3X2                127
+#define TGSI_OPCODE_M4X3                108
+#define TGSI_OPCODE_M3X4                109
+#define TGSI_OPCODE_M3X3                110
+#define TGSI_OPCODE_M3X2                111
 #define TGSI_OPCODE_CRS                 TGSI_OPCODE_XPD
-#define TGSI_OPCODE_NRM4                128
+#define TGSI_OPCODE_NRM4                112
 #define TGSI_OPCODE_SINCOS              TGSI_OPCODE_SCS
 #define TGSI_OPCODE_TEXLDB              TGSI_OPCODE_TXB
 #define TGSI_OPCODE_DP2ADD              TGSI_OPCODE_DP2A
@@ -393,10 +376,10 @@ struct tgsi_immediate_float32
  * ps_2_x
  */
 #define TGSI_OPCODE_CALL                TGSI_OPCODE_CAL
-#define TGSI_OPCODE_CALLNZ              129
-#define TGSI_OPCODE_IFC                 130
+#define TGSI_OPCODE_CALLNZ              113
+#define TGSI_OPCODE_IFC                 114
 #define TGSI_OPCODE_BREAK               TGSI_OPCODE_BRK
-#define TGSI_OPCODE_BREAKC              131
+#define TGSI_OPCODE_BREAKC              115
 #define TGSI_OPCODE_DSX                 TGSI_OPCODE_DDX
 #define TGSI_OPCODE_DSY                 TGSI_OPCODE_DDY
 #define TGSI_OPCODE_TEXLDD              TGSI_OPCODE_TXD
@@ -417,10 +400,10 @@ struct tgsi_immediate_float32
  * vs_2_x
  */
 
-#define TGSI_OPCODE_KIL                 132  /* unpredicated kill */
-#define TGSI_OPCODE_END                 133  /* aka HALT */
+#define TGSI_OPCODE_KIL                 116  /* unpredicated kill */
+#define TGSI_OPCODE_END                 117  /* aka HALT */
 
-#define TGSI_OPCODE_LAST                135
+#define TGSI_OPCODE_LAST                118
 
 #define TGSI_SAT_NONE            0  /* do not saturate */
 #define TGSI_SAT_ZERO_ONE        1  /* clamp to [0,1] */
-- 
cgit v1.2.3


From d34bc880a4d17420ec20d422dcb461783457c473 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keith@tungstengraphics.com>
Date: Fri, 14 Mar 2008 08:42:08 +0000
Subject: tgsi: add debug_printf version of tgsi_dump

---
 src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 22 ++++++++++++++++++++++
 src/gallium/auxiliary/tgsi/util/tgsi_dump.h |  4 ++++
 2 files changed, 26 insertions(+)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
index aa782787009..7d292778ad5 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
@@ -1563,3 +1563,25 @@ tgsi_dump_str(
 
    *str = dump.text;
 }
+
+
+void tgsi_debug_dump( struct tgsi_token *tokens )
+{
+   char *str, *p;
+
+   tgsi_dump_str( &str, tokens, 0 );
+
+   p = str;
+   while (p != NULL)
+   {
+      char *end = strchr( p, '\n' );
+      if (end != NULL)
+      {
+         *end++ = '\0';
+      }
+      debug_printf( "%s\n", p );
+      p = end;
+   }
+
+   FREE( str );
+}
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h
index b983b382268..51d79a0362e 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h
@@ -20,6 +20,10 @@ tgsi_dump_str(
    const struct tgsi_token *tokens,
    unsigned                flags );
 
+/* Dump to debug_printf()
+ */
+void tgsi_debug_dump( struct tgsi_token *tokens );
+
 #if defined __cplusplus
 }
 #endif
-- 
cgit v1.2.3


From 9de9e1fe8c3f87fe672aed074348f07107fa3cec Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Fri, 14 Mar 2008 11:24:28 -0600
Subject: gallium: print warning rather than assert(0) for LOG/EXP opcodes

Glean vertProg1 runs all the way through, rather than aborting.
---
 src/gallium/auxiliary/tgsi/exec/tgsi_exec.c | 34 +++++++++++++++++++++++++++--
 1 file changed, 32 insertions(+), 2 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
index f2ed9e0353e..ad871d2bdfb 100644
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
@@ -1516,11 +1516,41 @@ exec_instruction(
       break;
 
    case TGSI_OPCODE_EXP:
-      assert (0);
+      printf("TGSI: EXP opcode not implemented\n");
+      /* from ARB_v_p:
+      tmp = ScalarLoad(op0);
+      result.x = 2^floor(tmp);
+      result.y = tmp - floor(tmp);
+      result.z = RoughApprox2ToX(tmp);
+      result.w = 1.0;
+      */
+#if 0
+      /* something like this: */
+      FETCH( &r[0], 0, CHAN_X );
+      micro_exp2( &r[0], &r[0] );
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+	 STORE( &r[0], 0, chan_index );
+      }
+#endif
       break;
 
    case TGSI_OPCODE_LOG:
-      assert (0);
+      printf("TGSI: LOG opcode not implemented\n");
+      /* from ARB_v_p:
+      tmp = fabs(ScalarLoad(op0));
+      result.x = floor(log2(tmp));
+      result.y = tmp / 2^(floor(log2(tmp)));
+      result.z = RoughApproxLog2(tmp);
+      result.w = 1.0;
+      */
+#if 0
+      /* something like this: */
+      FETCH( &r[0], 0, CHAN_X );
+      micro_lg2( &r[0], &r[0] );
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+	 STORE( &r[0], 0, chan_index );
+      }
+#endif
       break;
 
    case TGSI_OPCODE_MUL:
-- 
cgit v1.2.3


From f23207ca57095b620febaf723815cc3eef3e87bd Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Fri, 14 Mar 2008 21:44:06 +0100
Subject: tgsi: Use debug_printf().

---
 src/gallium/auxiliary/tgsi/exec/tgsi_exec.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
index ad871d2bdfb..78e7dec5690 100644
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
@@ -1516,7 +1516,7 @@ exec_instruction(
       break;
 
    case TGSI_OPCODE_EXP:
-      printf("TGSI: EXP opcode not implemented\n");
+      debug_printf("TGSI: EXP opcode not implemented\n");
       /* from ARB_v_p:
       tmp = ScalarLoad(op0);
       result.x = 2^floor(tmp);
@@ -1535,7 +1535,7 @@ exec_instruction(
       break;
 
    case TGSI_OPCODE_LOG:
-      printf("TGSI: LOG opcode not implemented\n");
+      debug_printf("TGSI: LOG opcode not implemented\n");
       /* from ARB_v_p:
       tmp = fabs(ScalarLoad(op0));
       result.x = floor(log2(tmp));
-- 
cgit v1.2.3


From 648e26aa95b519f1f4abc429b5a23abaf4a5195b Mon Sep 17 00:00:00 2001
From: Brian <brian.paul@tungstengraphics.com>
Date: Mon, 24 Mar 2008 16:24:32 -0600
Subject: gallium: added tgsi_num_tokens() function to return number of tokens
 in token array.

Maybe move to a different file someday.
---
 src/gallium/auxiliary/tgsi/util/tgsi_parse.c | 13 +++++++++++++
 src/gallium/auxiliary/tgsi/util/tgsi_parse.h |  4 ++++
 2 files changed, 17 insertions(+)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.c b/src/gallium/auxiliary/tgsi/util/tgsi_parse.c
index bf6b89ce564..c3526cb71ff 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_parse.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_parse.c
@@ -317,3 +317,16 @@ tgsi_parse_token(
    }
 }
 
+
+unsigned
+tgsi_num_tokens(const struct tgsi_token *tokens)
+{
+   struct tgsi_parse_context ctx;
+   if (tgsi_parse_init(&ctx, tokens) == TGSI_PARSE_OK) {
+      unsigned len = (ctx.FullHeader.Header.HeaderSize +
+                      ctx.FullHeader.Header.BodySize +
+                      1);
+      return len;
+   }
+   return 0;
+}
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h
index 40083728d64..a98e88e3437 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h
@@ -113,6 +113,10 @@ void
 tgsi_parse_token(
    struct tgsi_parse_context *ctx );
 
+unsigned
+tgsi_num_tokens(const struct tgsi_token *tokens);
+
+
 #if defined __cplusplus
 }
 #endif
-- 
cgit v1.2.3


From 2483062f10e93fbbc5e3f629627b9e8addcc3f84 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Mon, 7 Apr 2008 11:44:34 +0200
Subject: tgsi: Dump semantics before interpolator.

---
 src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
index 7d292778ad5..ad9770e4742 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
@@ -746,17 +746,19 @@ dump_declaration_short(
       }
    }
 
-   if( decl->Declaration.Interpolate ) {
+   if (decl->Declaration.Semantic) {
       TXT( ", " );
-      ENM( decl->Interpolation.Interpolate, TGSI_INTERPOLATES_SHORT );
+      ENM( decl->Semantic.SemanticName, TGSI_SEMANTICS_SHORT );
+      if (decl->Semantic.SemanticIndex != 0) {
+         CHR( '[' );
+         UID( decl->Semantic.SemanticIndex );
+         CHR( ']' );
+      }
    }
 
-   if( decl->Declaration.Semantic ) {
+   if (decl->Declaration.Interpolate) {
       TXT( ", " );
-      ENM( decl->Semantic.SemanticName, TGSI_SEMANTICS_SHORT );
-      CHR( '[' );
-      UID( decl->Semantic.SemanticIndex );
-      CHR( ']' );
+      ENM( decl->Interpolation.Interpolate, TGSI_INTERPOLATES_SHORT );
    }
 }
 
-- 
cgit v1.2.3


From f41cc50ef063e9a56dc2d0fc6564a0c0f4344b83 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Mon, 7 Apr 2008 11:46:57 +0200
Subject: tgsi: Dump processor type and version as a single token.

---
 src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
index ad9770e4742..1d6e8b155d7 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
@@ -1402,7 +1402,6 @@ dump_gen(
 
    CHR( '\n' );
    ENM( parse.FullHeader.Processor.Processor, TGSI_PROCESSOR_TYPES_SHORT );
-   CHR( ' ' );
    UID( parse.FullVersion.Version.MajorVersion );
    CHR( '.' );
    UID( parse.FullVersion.Version.MinorVersion );
-- 
cgit v1.2.3


From 9e8a85ef670dfb7d356d6066bad4710683a07fd9 Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Fri, 11 Apr 2008 13:21:39 -0600
Subject: gallium: fix SCS codegen (sin scalar src comes from X, not Y)

---
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
index 4e80597b3f3..35c63fecccb 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
@@ -2020,7 +2020,7 @@ emit_instruction(
          STORE( func, *inst, 0, 0, CHAN_X );
       }
       IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
-         FETCH( func, *inst, 0, 0, CHAN_Y );
+         FETCH( func, *inst, 0, 0, CHAN_X );
          emit_sin( func, 0 );
          STORE( func, *inst, 0, 0, CHAN_Y );
       }
-- 
cgit v1.2.3


From 097301395d33d57d19bc942f236b4a8c912cc0cc Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Fri, 11 Apr 2008 13:44:47 -0600
Subject: gallium: comments

---
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 31 +++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
index 35c63fecccb..93813f36cbe 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
@@ -548,6 +548,12 @@ emit_xorps(
  * Data fetch helpers.
  */
 
+/**
+ * Copy a shader constant to xmm register
+ * \param xmm  the destination xmm register
+ * \param vec  the src const buffer index
+ * \param chan  src channel to fetch (X, Y, Z or W)
+ */
 static void
 emit_const(
    struct x86_function *func,
@@ -566,6 +572,12 @@ emit_const(
       SHUF( 0, 0, 0, 0 ) );
 }
 
+/**
+ * Copy a shader input to xmm register
+ * \param xmm  the destination xmm register
+ * \param vec  the src input attrib
+ * \param chan  src channel to fetch (X, Y, Z or W)
+ */
 static void
 emit_inputf(
    struct x86_function *func,
@@ -579,6 +591,12 @@ emit_inputf(
       get_input( vec, chan ) );
 }
 
+/**
+ * Store an xmm register to a shader output
+ * \param xmm  the source xmm register
+ * \param vec  the dest output attrib
+ * \param chan  src dest channel to store (X, Y, Z or W)
+ */
 static void
 emit_output(
    struct x86_function *func,
@@ -592,6 +610,12 @@ emit_output(
       make_xmm( xmm ) );
 }
 
+/**
+ * Copy a shader temporary to xmm register
+ * \param xmm  the destination xmm register
+ * \param vec  the src temp register
+ * \param chan  src channel to fetch (X, Y, Z or W)
+ */
 static void
 emit_tempf(
    struct x86_function *func,
@@ -605,6 +629,13 @@ emit_tempf(
       get_temp( vec, chan ) );
 }
 
+/**
+ * Load an xmm register with an input attrib coefficient (a0, dadx or dady)
+ * \param xmm  the destination xmm register
+ * \param vec  the src input/attribute coefficient index
+ * \param chan  src channel to fetch (X, Y, Z or W)
+ * \param member  0=a0, 1=dadx, 2=dady
+ */
 static void
 emit_coef(
    struct x86_function *func,
-- 
cgit v1.2.3


From e3cf0cd6a9f3f072594e5712763b98ce7e579bcf Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Fri, 11 Apr 2008 14:18:07 -0600
Subject: gallium: implement immediates (aka literals) for SSE fragment shaders

---
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 82 +++++++++++++++++++++++++++--
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h |  4 +-
 src/gallium/drivers/softpipe/sp_fs_sse.c    | 10 ++--
 3 files changed, 88 insertions(+), 8 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
index 93813f36cbe..748c53dbfc9 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
@@ -225,6 +225,15 @@ get_coef_base( void )
    return get_output_base();
 }
 
+static struct x86_reg
+get_immediate_base( void )
+{
+   return x86_make_reg(
+      file_REG32,
+      reg_DI );
+}
+
+
 /**
  * Data access helpers.
  */
@@ -238,6 +247,16 @@ get_argument(
       (index + 1) * 4 );
 }
 
+static struct x86_reg
+get_immediate(
+   unsigned vec,
+   unsigned chan )
+{
+   return x86_make_disp(
+      get_immediate_base(),
+      (vec * 4 + chan) * 4 );
+}
+
 static struct x86_reg
 get_const(
    unsigned vec,
@@ -572,6 +591,25 @@ emit_const(
       SHUF( 0, 0, 0, 0 ) );
 }
 
+static void
+emit_immediate(
+   struct x86_function *func,
+   unsigned xmm,
+   unsigned vec,
+   unsigned chan )
+{
+   emit_movss(
+      func,
+      make_xmm( xmm ),
+      get_immediate( vec, chan ) );
+   emit_shufps(
+      func,
+      make_xmm( xmm ),
+      make_xmm( xmm ),
+      SHUF( 0, 0, 0, 0 ) );
+}
+
+
 /**
  * Copy a shader input to xmm register
  * \param xmm  the destination xmm register
@@ -1191,6 +1229,14 @@ emit_fetch(
             swizzle );
          break;
 
+      case TGSI_FILE_IMMEDIATE:
+         emit_immediate(
+            func,
+            xmm,
+            reg->SrcRegister.Index,
+            swizzle );
+         break;
+
       case TGSI_FILE_INPUT:
          emit_inputf(
             func,
@@ -2343,15 +2389,21 @@ tgsi_emit_sse2(
  * DECLARATION and INSTRUCTION phase.
  * GP register holding the output argument is aliased with the coeff argument,
  * as outputs are not needed in the DECLARATION phase.
+ *
+ * \param tokens  the TGSI input shader
+ * \param func  the output SSE code/function
+ * \param immediates  buffer to place immediates, later passed to SSE func
  */
 unsigned
 tgsi_emit_sse2_fs(
    struct tgsi_token *tokens,
-   struct x86_function *func )
+   struct x86_function *func,
+   float (*immediates)[4])
 {
    struct tgsi_parse_context parse;
    boolean instruction_phase = FALSE;
    unsigned ok = 1;
+   uint num_immediates = 0;
 
    DUMP_START();
 
@@ -2362,6 +2414,7 @@ tgsi_emit_sse2_fs(
       func,
       get_input_base(),
       get_argument( 0 ) );
+   /* skipping outputs argument here */
    emit_mov(
       func,
       get_const_base(),
@@ -2374,6 +2427,10 @@ tgsi_emit_sse2_fs(
       func,
       get_coef_base(),
       get_argument( 4 ) );
+   emit_mov(
+      func,
+      get_immediate_base(),
+      get_argument( 5 ) );
 
    tgsi_parse_init( &parse, tokens );
 
@@ -2407,9 +2464,26 @@ tgsi_emit_sse2_fs(
          break;
 
       case TGSI_TOKEN_TYPE_IMMEDIATE:
-         /* XXX implement this */
-	 ok = 0;
-	 debug_printf("failed to emit immediate value to SSE\n");
+         /* simply copy the immediate values into the next immediates[] slot */
+         {
+            const uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
+            uint i;
+            assert(size <= 4);
+            assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES);
+            for( i = 0; i < size; i++ ) {
+               immediates[num_immediates][i] =
+		  parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
+            }
+#if 0
+            debug_printf("SSE FS immediate[%d] = %f %f %f %f\n",
+                   num_immediates,
+                   immediates[num_immediates][0],
+                   immediates[num_immediates][1],
+                   immediates[num_immediates][2],
+                   immediates[num_immediates][3]);
+#endif
+            num_immediates++;
+         }
          break;
 
       default:
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h
index 63b8ef39112..fde06047fe8 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h
@@ -16,7 +16,9 @@ tgsi_emit_sse2(
 unsigned
 tgsi_emit_sse2_fs(
    struct tgsi_token *tokens,
-   struct x86_function *function );
+   struct x86_function *function,
+   float (*immediates)[4]
+ );
 
 #if defined __cplusplus
 }
diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c
index 8095d662ee6..9c7948264fe 100644
--- a/src/gallium/drivers/softpipe/sp_fs_sse.c
+++ b/src/gallium/drivers/softpipe/sp_fs_sse.c
@@ -51,7 +51,8 @@ typedef void (XSTDCALL *codegen_function)(
    struct tgsi_exec_vector *output,
    float (*constant)[4],
    struct tgsi_exec_vector *temporary,
-   const struct tgsi_interp_coef *coef
+   const struct tgsi_interp_coef *coef,
+   float (*immediates)[4]
    //, const struct tgsi_exec_vector *quadPos
  );
 
@@ -60,6 +61,7 @@ struct sp_sse_fragment_shader {
    struct sp_fragment_shader base;
    struct x86_function             sse2_program;
    codegen_function func;
+   float immediates[TGSI_EXEC_NUM_IMMEDIATES][4];
 };
 
 
@@ -96,7 +98,8 @@ fs_sse_run( struct sp_fragment_shader *base,
 		 machine->Outputs,
 		 machine->Consts,
 		 machine->Temps,
-		 machine->InterpCoefs
+		 machine->InterpCoefs,
+                 shader->immediates
 		 //	 , &machine->QuadPos
       );
 
@@ -129,7 +132,8 @@ softpipe_create_fs_sse(struct softpipe_context *softpipe,
 
    x86_init_func( &shader->sse2_program );
    
-   if (!tgsi_emit_sse2_fs( templ->tokens, &shader->sse2_program )) {
+   if (!tgsi_emit_sse2_fs( templ->tokens, &shader->sse2_program,
+                           shader->immediates)) {
       FREE(shader);
       return NULL;
    }
-- 
cgit v1.2.3


From 7c2416f06e518bc1491fe13e145dcc9487d75449 Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Fri, 11 Apr 2008 15:02:21 -0600
Subject: gallium: handle TGSI immediates in SSE code for vertex shaders

---
 src/gallium/auxiliary/draw/draw_vs_sse.c    |  9 ++++++---
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 23 +++++++++++++++++++----
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h |  4 +++-
 3 files changed, 28 insertions(+), 8 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c
index f40d65df08f..13394129bc8 100644
--- a/src/gallium/auxiliary/draw/draw_vs_sse.c
+++ b/src/gallium/auxiliary/draw/draw_vs_sse.c
@@ -50,13 +50,15 @@ typedef void (XSTDCALL *codegen_function) (
    const struct tgsi_exec_vector *input,
    struct tgsi_exec_vector *output,
    float (*constant)[4],
-   struct tgsi_exec_vector *temporary );
+   struct tgsi_exec_vector *temporary,
+   float (*immediates)[4] );
 
 
 struct draw_sse_vertex_shader {
    struct draw_vertex_shader base;
    struct x86_function sse2_program;
    codegen_function func;
+   float immediates[TGSI_EXEC_NUM_IMMEDIATES][4];
 };
 
 
@@ -149,7 +151,8 @@ vs_sse_run( struct draw_vertex_shader *base,
       shader->func(machine->Inputs,
                    machine->Outputs,
                    machine->Consts,
-                   machine->Temps );
+                   machine->Temps,
+                   shader->immediates);
    }
 
 
@@ -243,7 +246,7 @@ draw_create_vs_sse(struct draw_context *draw,
    x86_init_func( &vs->sse2_program );
 
    if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state.tokens,
-			&vs->sse2_program )) 
+			&vs->sse2_program, vs->immediates )) 
       goto fail;
       
    vs->func = (codegen_function) x86_get_func( &vs->sse2_program );
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
index 748c53dbfc9..e55fae6047b 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
@@ -2316,10 +2316,12 @@ emit_declaration(
 unsigned
 tgsi_emit_sse2(
    struct tgsi_token *tokens,
-   struct x86_function *func )
+   struct x86_function *func,
+   float (*immediates)[4] )
 {
    struct tgsi_parse_context parse;
    unsigned ok = 1;
+   uint num_immediates = 0;
 
    DUMP_START();
 
@@ -2341,6 +2343,10 @@ tgsi_emit_sse2(
       func,
       get_temp_base(),
       get_argument( 3 ) );
+   emit_mov(
+      func,
+      get_immediate_base(),
+      get_argument( 4 ) );
 
    tgsi_parse_init( &parse, tokens );
 
@@ -2363,9 +2369,18 @@ tgsi_emit_sse2(
          break;
 
       case TGSI_TOKEN_TYPE_IMMEDIATE:
-         /* XXX implement this */
-	 ok = 0;
-	 debug_printf("failed to emit immediate value to SSE\n");
+         /* simply copy the immediate values into the next immediates[] slot */
+         {
+            const uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
+            uint i;
+            assert(size <= 4);
+            assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES);
+            for( i = 0; i < size; i++ ) {
+               immediates[num_immediates][i] =
+		  parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
+            }
+            num_immediates++;
+         }
 	 break;
 
       default:
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h
index fde06047fe8..d1190727d03 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h
@@ -11,7 +11,9 @@ struct x86_function;
 unsigned
 tgsi_emit_sse2(
    struct tgsi_token *tokens,
-   struct x86_function *function );
+   struct x86_function *function,
+   float (*immediates)[4]
+ );
 
 unsigned
 tgsi_emit_sse2_fs(
-- 
cgit v1.2.3


From 593cf5a6b55eb9b490a2aee2c3850d2d493fc4df Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Fri, 11 Apr 2008 15:27:00 -0600
Subject: gallium: merge the tgsi_emit_sse2() and tgsi_emit_sse2_fs()
 functions.

The two functions were mostly the same.  We can look at the shader header
info to determine if it's a vertex or fragment shader.
---
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 201 +++++++++++-----------------
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h |   7 -
 src/gallium/drivers/softpipe/sp_fs_sse.c    |   4 +-
 3 files changed, 80 insertions(+), 132 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
index e55fae6047b..c37e201b2bc 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
@@ -2313,104 +2313,25 @@ emit_declaration(
    }
 }
 
-unsigned
-tgsi_emit_sse2(
-   struct tgsi_token *tokens,
-   struct x86_function *func,
-   float (*immediates)[4] )
-{
-   struct tgsi_parse_context parse;
-   unsigned ok = 1;
-   uint num_immediates = 0;
-
-   DUMP_START();
-
-   func->csr = func->store;
-
-   emit_mov(
-      func,
-      get_input_base(),
-      get_argument( 0 ) );
-   emit_mov(
-      func,
-      get_output_base(),
-      get_argument( 1 ) );
-   emit_mov(
-      func,
-      get_const_base(),
-      get_argument( 2 ) );
-   emit_mov(
-      func,
-      get_temp_base(),
-      get_argument( 3 ) );
-   emit_mov(
-      func,
-      get_immediate_base(),
-      get_argument( 4 ) );
-
-   tgsi_parse_init( &parse, tokens );
-
-   while( !tgsi_parse_end_of_tokens( &parse ) && ok ) {
-      tgsi_parse_token( &parse );
-
-      switch( parse.FullToken.Token.Type ) {
-      case TGSI_TOKEN_TYPE_DECLARATION:
-         break;
-
-      case TGSI_TOKEN_TYPE_INSTRUCTION:
-         ok = emit_instruction(
-	    func,
-	    &parse.FullToken.FullInstruction );
-
-	 if (!ok) {
-	    debug_printf("failed to translate tgsi opcode %d to SSE\n", 
-			 parse.FullToken.FullInstruction.Instruction.Opcode );
-	 }
-         break;
-
-      case TGSI_TOKEN_TYPE_IMMEDIATE:
-         /* simply copy the immediate values into the next immediates[] slot */
-         {
-            const uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
-            uint i;
-            assert(size <= 4);
-            assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES);
-            for( i = 0; i < size; i++ ) {
-               immediates[num_immediates][i] =
-		  parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
-            }
-            num_immediates++;
-         }
-	 break;
-
-      default:
-         assert( 0 );
-	 ok = 0;
-	 break;
-      }
-   }
-
-   tgsi_parse_free( &parse );
-
-   DUMP_END();
-
-   return ok;
-}
 
 /**
- * Fragment shaders are responsible for interpolating shader inputs. Because on
- * x86 we have only 4 GP registers, and here we have 5 shader arguments (input,
- * output, const, temp and coef), the code is split into two phases --
- * DECLARATION and INSTRUCTION phase.
- * GP register holding the output argument is aliased with the coeff argument,
- * as outputs are not needed in the DECLARATION phase.
+ * Translate a TGSI vertex/fragment shader to SSE2 code.
+ * Slightly different things are done for vertex vs. fragment shaders.
+ *
+ * Note that fragment shaders are responsible for interpolating shader
+ * inputs. Because on x86 we have only 4 GP registers, and here we
+ * have 5 shader arguments (input, output, const, temp and coef), the
+ * code is split into two phases -- DECLARATION and INSTRUCTION phase.
+ * GP register holding the output argument is aliased with the coeff
+ * argument, as outputs are not needed in the DECLARATION phase.
  *
  * \param tokens  the TGSI input shader
  * \param func  the output SSE code/function
  * \param immediates  buffer to place immediates, later passed to SSE func
+ * \param return  1 for success, 0 if translation failed
  */
 unsigned
-tgsi_emit_sse2_fs(
+tgsi_emit_sse2(
    struct tgsi_token *tokens,
    struct x86_function *func,
    float (*immediates)[4])
@@ -2424,50 +2345,84 @@ tgsi_emit_sse2_fs(
 
    func->csr = func->store;
 
-   /* DECLARATION phase, do not load output argument. */
-   emit_mov(
-      func,
-      get_input_base(),
-      get_argument( 0 ) );
-   /* skipping outputs argument here */
-   emit_mov(
-      func,
-      get_const_base(),
-      get_argument( 2 ) );
-   emit_mov(
-      func,
-      get_temp_base(),
-      get_argument( 3 ) );
-   emit_mov(
-      func,
-      get_coef_base(),
-      get_argument( 4 ) );
-   emit_mov(
-      func,
-      get_immediate_base(),
-      get_argument( 5 ) );
-
    tgsi_parse_init( &parse, tokens );
 
+   /*
+    * Different function args for vertex/fragment shaders:
+    */
+   if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
+      /* DECLARATION phase, do not load output argument. */
+      emit_mov(
+         func,
+         get_input_base(),
+         get_argument( 0 ) );
+      /* skipping outputs argument here */
+      emit_mov(
+         func,
+         get_const_base(),
+         get_argument( 2 ) );
+      emit_mov(
+         func,
+         get_temp_base(),
+         get_argument( 3 ) );
+      emit_mov(
+         func,
+         get_coef_base(),
+         get_argument( 4 ) );
+      emit_mov(
+         func,
+         get_immediate_base(),
+         get_argument( 5 ) );
+   }
+   else {
+      assert(parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX);
+
+      emit_mov(
+         func,
+         get_input_base(),
+         get_argument( 0 ) );
+      emit_mov(
+         func,
+         get_output_base(),
+         get_argument( 1 ) );
+      emit_mov(
+         func,
+         get_const_base(),
+         get_argument( 2 ) );
+      emit_mov(
+         func,
+         get_temp_base(),
+         get_argument( 3 ) );
+      emit_mov(
+         func,
+         get_immediate_base(),
+         get_argument( 4 ) );
+   }
+
    while( !tgsi_parse_end_of_tokens( &parse ) && ok ) {
       tgsi_parse_token( &parse );
 
       switch( parse.FullToken.Token.Type ) {
       case TGSI_TOKEN_TYPE_DECLARATION:
-         emit_declaration(
-            func,
-            &parse.FullToken.FullDeclaration );
+         if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
+            emit_declaration(
+               func,
+               &parse.FullToken.FullDeclaration );
+         }
          break;
 
       case TGSI_TOKEN_TYPE_INSTRUCTION:
-         if( !instruction_phase ) {
-            /* INSTRUCTION phase, overwrite coeff with output. */
-            instruction_phase = TRUE;
-            emit_mov(
-               func,
-               get_output_base(),
-               get_argument( 1 ) );
+         if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
+            if( !instruction_phase ) {
+               /* INSTRUCTION phase, overwrite coeff with output. */
+               instruction_phase = TRUE;
+               emit_mov(
+                  func,
+                  get_output_base(),
+                  get_argument( 1 ) );
+            }
          }
+
          ok = emit_instruction(
             func,
             &parse.FullToken.FullInstruction );
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h
index d1190727d03..d56bf7f98ab 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h
@@ -15,13 +15,6 @@ tgsi_emit_sse2(
    float (*immediates)[4]
  );
 
-unsigned
-tgsi_emit_sse2_fs(
-   struct tgsi_token *tokens,
-   struct x86_function *function,
-   float (*immediates)[4]
- );
-
 #if defined __cplusplus
 }
 #endif
diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c
index 9c7948264fe..5ef02a71426 100644
--- a/src/gallium/drivers/softpipe/sp_fs_sse.c
+++ b/src/gallium/drivers/softpipe/sp_fs_sse.c
@@ -132,8 +132,8 @@ softpipe_create_fs_sse(struct softpipe_context *softpipe,
 
    x86_init_func( &shader->sse2_program );
    
-   if (!tgsi_emit_sse2_fs( templ->tokens, &shader->sse2_program,
-                           shader->immediates)) {
+   if (!tgsi_emit_sse2( templ->tokens, &shader->sse2_program,
+                        shader->immediates)) {
       FREE(shader);
       return NULL;
    }
-- 
cgit v1.2.3


From 45cc4f546e034adff39c42032a47147e6ab91a08 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Sat, 12 Apr 2008 22:59:17 +0200
Subject: tgsi: Fix source register short dump code.

---
 src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 62 +++++++++++++++--------------
 1 file changed, 32 insertions(+), 30 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
index 1d6e8b155d7..cb3573ceb66 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
@@ -968,18 +968,18 @@ dump_instruction_short(
       }
       CHR( ' ' );
 
-      if( src->SrcRegisterExtMod.Complement ) {
-         TXT( "(1 - " );
-      }
-      if( src->SrcRegisterExtMod.Negate  ) {
-         CHR( '-' );
-      }
-      if( src->SrcRegisterExtMod.Absolute ) {
+      if (src->SrcRegisterExtMod.Negate)
+         TXT( "-(" );
+      if (src->SrcRegisterExtMod.Absolute)
          CHR( '|' );
-      }
-      if( src->SrcRegister.Negate ) {
+      if (src->SrcRegisterExtMod.Scale2X)
+         TXT( "2*(" );
+      if (src->SrcRegisterExtMod.Bias)
+         CHR( '(' );
+      if (src->SrcRegisterExtMod.Complement)
+         TXT( "1-(" );
+      if (src->SrcRegister.Negate)
          CHR( '-' );
-      }
 
       ENM( src->SrcRegister.File, TGSI_FILES_SHORT );
 
@@ -987,35 +987,37 @@ dump_instruction_short(
       SID( src->SrcRegister.Index );
       CHR( ']' );
 
-      if (src->SrcRegister.Extended) {
-         if (src->SrcRegisterExtSwz.ExtSwizzleX != TGSI_EXTSWIZZLE_X ||
-             src->SrcRegisterExtSwz.ExtSwizzleY != TGSI_EXTSWIZZLE_Y ||
-             src->SrcRegisterExtSwz.ExtSwizzleZ != TGSI_EXTSWIZZLE_Z ||
-             src->SrcRegisterExtSwz.ExtSwizzleW != TGSI_EXTSWIZZLE_W) {
-            CHR( '.' );
-            ENM( src->SrcRegisterExtSwz.ExtSwizzleX, TGSI_EXTSWIZZLES_SHORT );
-            ENM( src->SrcRegisterExtSwz.ExtSwizzleY, TGSI_EXTSWIZZLES_SHORT );
-            ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, TGSI_EXTSWIZZLES_SHORT );
-            ENM( src->SrcRegisterExtSwz.ExtSwizzleW, TGSI_EXTSWIZZLES_SHORT );
-         }
-      }
-      else if( src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X ||
-               src->SrcRegister.SwizzleY != TGSI_SWIZZLE_Y ||
-               src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z ||
-               src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W ) {
+      if (src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X ||
+          src->SrcRegister.SwizzleY != TGSI_SWIZZLE_Y ||
+          src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z ||
+          src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W) {
          CHR( '.' );
          ENM( src->SrcRegister.SwizzleX, TGSI_SWIZZLES_SHORT );
          ENM( src->SrcRegister.SwizzleY, TGSI_SWIZZLES_SHORT );
          ENM( src->SrcRegister.SwizzleZ, TGSI_SWIZZLES_SHORT );
          ENM( src->SrcRegister.SwizzleW, TGSI_SWIZZLES_SHORT );
       }
+      if (src->SrcRegisterExtSwz.ExtSwizzleX != TGSI_EXTSWIZZLE_X ||
+          src->SrcRegisterExtSwz.ExtSwizzleY != TGSI_EXTSWIZZLE_Y ||
+          src->SrcRegisterExtSwz.ExtSwizzleZ != TGSI_EXTSWIZZLE_Z ||
+          src->SrcRegisterExtSwz.ExtSwizzleW != TGSI_EXTSWIZZLE_W) {
+         CHR( '.' );
+         ENM( src->SrcRegisterExtSwz.ExtSwizzleX, TGSI_EXTSWIZZLES_SHORT );
+         ENM( src->SrcRegisterExtSwz.ExtSwizzleY, TGSI_EXTSWIZZLES_SHORT );
+         ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, TGSI_EXTSWIZZLES_SHORT );
+         ENM( src->SrcRegisterExtSwz.ExtSwizzleW, TGSI_EXTSWIZZLES_SHORT );
+      }
 
-      if( src->SrcRegisterExtMod.Absolute ) {
+      if (src->SrcRegisterExtMod.Complement)
+         CHR( ')' );
+      if (src->SrcRegisterExtMod.Bias)
+         TXT( ")-.5" );
+      if (src->SrcRegisterExtMod.Scale2X)
+         CHR( ')' );
+      if (src->SrcRegisterExtMod.Absolute)
          CHR( '|' );
-      }
-      if( src->SrcRegisterExtMod.Complement ) {
+      if (src->SrcRegisterExtMod.Negate)
          CHR( ')' );
-      }
 
       first_reg = FALSE;
    }
-- 
cgit v1.2.3


From 5b8fa518476868530d748ce6d03674e9cca3d89f Mon Sep 17 00:00:00 2001
From: José Fonseca <jrfonseca@tungstengraphics.com>
Date: Mon, 14 Apr 2008 23:55:36 +0900
Subject: gallium: Don't assume snprintf are always available.

---
 src/gallium/auxiliary/tgsi/util/tgsi_dump.c        |  9 ++--
 src/gallium/auxiliary/util/p_debug.c               |  7 +--
 src/gallium/auxiliary/util/u_snprintf.c            | 16 +++---
 src/gallium/auxiliary/util/u_string.h              | 63 ++++++++++++++++++++++
 .../drivers/i915simple/i915_fpc_translate.c        |  3 +-
 src/gallium/drivers/i915simple/i915_screen.c       |  3 +-
 src/gallium/drivers/i965simple/brw_screen.c        |  3 +-
 src/gallium/include/pipe/p_format.h                |  4 +-
 src/gallium/include/pipe/p_util.h                  |  8 ---
 9 files changed, 88 insertions(+), 28 deletions(-)
 create mode 100644 src/gallium/auxiliary/util/u_string.h

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
index cb3573ceb66..ff6a2c4194d 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
@@ -30,6 +30,7 @@
 #include "pipe/p_debug.h"
 #include "pipe/p_util.h"
 #include "pipe/p_shader_tokens.h"
+#include "util/u_string.h"
 #include "tgsi_dump.h"
 #include "tgsi_parse.h"
 #include "tgsi_build.h"
@@ -147,7 +148,7 @@ gen_dump_uix(
 {
    char  str[36];
 
-   sprintf( str, "0x%x", ui );
+   util_snprintf( str, sizeof(str), "0x%x", ui );
    gen_dump_str( dump, str );
 }
 
@@ -158,7 +159,7 @@ gen_dump_uid(
 {
    char  str[16];
 
-   sprintf( str, "%u", ui );
+   util_snprintf( str, sizeof(str), "%u", ui );
    gen_dump_str( dump, str );
 }
 
@@ -169,7 +170,7 @@ gen_dump_sid(
 {
    char  str[16];
 
-   sprintf( str, "%d", si );
+   util_snprintf( str, sizeof(str), "%d", si );
    gen_dump_str( dump, str );
 }
 
@@ -180,7 +181,7 @@ gen_dump_flt(
 {
    char  str[48];
 
-   sprintf( str, "%10.4f", flt );
+   util_snprintf( str, sizeof(str), "%10.4f", flt );
    gen_dump_str( dump, str );
 }
 
diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c
index 090e3b7794d..f9366467cd5 100644
--- a/src/gallium/auxiliary/util/p_debug.c
+++ b/src/gallium/auxiliary/util/p_debug.c
@@ -39,6 +39,7 @@
 #include "pipe/p_compiler.h" 
 #include "pipe/p_util.h" 
 #include "pipe/p_debug.h" 
+#include "util/u_string.h" 
 
 
 #ifdef WIN32
@@ -60,7 +61,7 @@ void _debug_vprintf(const char *format, va_list ap)
    /* EngDebugPrint does not handle float point arguments, so we need to use
     * our own vsnprintf implementation */
    char buf[512 + 1];
-   vsnprintf(buf, sizeof(buf), format, ap);
+   util_vsnprintf(buf, sizeof(buf), format, ap);
    _EngDebugPrint("%s", buf);
 #else
    /* TODO: Implement debug print for WINCE */
@@ -311,7 +312,7 @@ debug_dump_enum(const struct debug_named_value *names,
       ++names;
    }
 
-   snprintf(rest, sizeof(rest), "0x%08lx", value);
+   util_snprintf(rest, sizeof(rest), "0x%08lx", value);
    return rest;
 }
 
@@ -344,7 +345,7 @@ debug_dump_flags(const struct debug_named_value *names,
       else
 	 first = 0;
       
-      snprintf(rest, sizeof(rest), "0x%08lx", value);
+      util_snprintf(rest, sizeof(rest), "0x%08lx", value);
       strncat(output, rest, sizeof(output));
    }
    
diff --git a/src/gallium/auxiliary/util/u_snprintf.c b/src/gallium/auxiliary/util/u_snprintf.c
index 48426abcb7a..c4f4bbd30c2 100644
--- a/src/gallium/auxiliary/util/u_snprintf.c
+++ b/src/gallium/auxiliary/util/u_snprintf.c
@@ -166,8 +166,8 @@
 #include <config.h>
 #else
 #ifdef WIN32
-#define vsnprintf rpl_vsnprintf
-#define snprintf rpl_snprintf
+#define vsnprintf util_vsnprintf
+#define snprintf util_snprintf
 #define HAVE_VSNPRINTF 0
 #define HAVE_SNPRINTF 0
 #define HAVE_VASPRINTF 1 /* not needed */
@@ -445,7 +445,7 @@ static UINTMAX_T myround(LDOUBLE);
 static LDOUBLE mypow10(int);
 
 int
-rpl_vsnprintf(char *str, size_t size, const char *format, va_list args)
+util_vsnprintf(char *str, size_t size, const char *format, va_list args)
 {
 	LDOUBLE fvalue;
 	INTMAX_T value;
@@ -1404,7 +1404,7 @@ mymemcpy(void *dst, void *src, size_t len)
 #endif	/* NEED_MYMEMCPY */
 
 int
-rpl_vasprintf(char **ret, const char *format, va_list ap)
+util_vasprintf(char **ret, const char *format, va_list ap)
 {
 	size_t size;
 	int len;
@@ -1422,10 +1422,10 @@ rpl_vasprintf(char **ret, const char *format, va_list ap)
 #if !HAVE_SNPRINTF
 #if HAVE_STDARG_H
 int
-rpl_snprintf(char *str, size_t size, const char *format, ...)
+util_snprintf(char *str, size_t size, const char *format, ...)
 #else
 int
-rpl_snprintf(va_alist) va_dcl
+util_snprintf(va_alist) va_dcl
 #endif	/* HAVE_STDARG_H */
 {
 #if !HAVE_STDARG_H
@@ -1449,10 +1449,10 @@ rpl_snprintf(va_alist) va_dcl
 #if !HAVE_ASPRINTF
 #if HAVE_STDARG_H
 int
-rpl_asprintf(char **ret, const char *format, ...)
+util_asprintf(char **ret, const char *format, ...)
 #else
 int
-rpl_asprintf(va_alist) va_dcl
+util_asprintf(va_alist) va_dcl
 #endif	/* HAVE_STDARG_H */
 {
 #if !HAVE_STDARG_H
diff --git a/src/gallium/auxiliary/util/u_string.h b/src/gallium/auxiliary/util/u_string.h
new file mode 100644
index 00000000000..b99d4e8021c
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_string.h
@@ -0,0 +1,63 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Platform independent functions for string manipulation.
+ * 
+ * @author Jose Fonseca <jrfonseca@tungstengraphics.com>
+ */
+
+#ifndef U_STRING_H_
+#define U_STRING_H_
+
+#ifndef WIN32
+#include <stdio.h>
+#endif
+#include <stddef.h>
+#include <stdarg.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+   
+#ifdef WIN32
+int util_vsnprintf(char *, size_t, const char *, va_list);
+int util_snprintf(char *str, size_t size, const char *format, ...);
+#else
+#define util_vsnprintf vsnprintf
+#define util_snprintf snprintf
+#endif
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* U_STRING_H_ */
diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c
index 7b4fca5db1b..3ccf74c72c4 100644
--- a/src/gallium/drivers/i915simple/i915_fpc_translate.c
+++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c
@@ -33,6 +33,7 @@
 #include "i915_fpc.h"
 
 #include "pipe/p_shader_tokens.h"
+#include "util/u_string.h"
 #include "tgsi/util/tgsi_parse.h"
 #include "tgsi/util/tgsi_dump.h"
 
@@ -122,7 +123,7 @@ i915_program_error(struct i915_fp_compile *p, const char *msg, ...)
 
    debug_printf("i915_program_error: ");
    va_start( args, msg );  
-   vsprintf( buffer, msg, args );
+   util_vsnprintf( buffer, sizeof(buffer), msg, args );
    va_end( args );
    debug_printf(buffer);
    debug_printf("\n");
diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c
index 839b98c0cec..9ae594ce54c 100644
--- a/src/gallium/drivers/i915simple/i915_screen.c
+++ b/src/gallium/drivers/i915simple/i915_screen.c
@@ -28,6 +28,7 @@
 
 #include "pipe/p_util.h"
 #include "pipe/p_winsys.h"
+#include "util/u_string.h"
 
 #include "i915_reg.h"
 #include "i915_context.h"
@@ -78,7 +79,7 @@ i915_get_name( struct pipe_screen *pscreen )
       break;
    }
 
-   sprintf(buffer, "i915 (chipset: %s)", chipset);
+   util_snprintf(buffer, sizeof(buffer), "i915 (chipset: %s)", chipset);
    return buffer;
 }
 
diff --git a/src/gallium/drivers/i965simple/brw_screen.c b/src/gallium/drivers/i965simple/brw_screen.c
index 5be369fe521..6845c7abde2 100644
--- a/src/gallium/drivers/i965simple/brw_screen.c
+++ b/src/gallium/drivers/i965simple/brw_screen.c
@@ -28,6 +28,7 @@
 
 #include "pipe/p_util.h"
 #include "pipe/p_winsys.h"
+#include "util/u_string.h"
 
 #include "brw_context.h"
 #include "brw_screen.h"
@@ -66,7 +67,7 @@ brw_get_name( struct pipe_screen *screen )
       break;
    }
 
-   sprintf(buffer, "i965 (chipset: %s)", chipset);
+   util_snprintf(buffer, sizeof(buffer), "i965 (chipset: %s)", chipset);
    return buffer;
 }
 
diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h
index 9e0f91f202b..ef9e3a3d6c0 100644
--- a/src/gallium/include/pipe/p_format.h
+++ b/src/gallium/include/pipe/p_format.h
@@ -28,7 +28,7 @@
 #ifndef PIPE_FORMAT_H
 #define PIPE_FORMAT_H
 
-#include <stdio.h> /* for sprintf */
+#include "util/u_string.h"
 
 #include "p_compiler.h"
 #include "p_debug.h"
@@ -367,7 +367,7 @@ static INLINE char *pf_sprint_name( char *str, enum pipe_format format )
                strcat( str, "S" );
                break;
             }
-            sprintf( &str[strlen( str )], "%u", size * scale );
+            util_snprintf( &str[strlen( str )], 32, "%u", size * scale );
          }
          if (i != 0) {
             strcat( str, "_" );
diff --git a/src/gallium/include/pipe/p_util.h b/src/gallium/include/pipe/p_util.h
index 8e3aaee496f..dbca080a4b9 100644
--- a/src/gallium/include/pipe/p_util.h
+++ b/src/gallium/include/pipe/p_util.h
@@ -138,14 +138,6 @@ REALLOC( void *old_ptr, unsigned old_size, unsigned new_size )
 #define GETENV( X ) debug_get_option( X, NULL )
 
 
-#ifdef WIN32
-int rpl_vsnprintf(char *, size_t, const char *, va_list);
-int rpl_snprintf(char *str, size_t size, const char *format, ...);
-#define vsnprintf rpl_vsnprintf
-#define snprintf rpl_snprintf
-#endif
-
-
 /**
  * Return memory on given byte alignment
  */
-- 
cgit v1.2.3


From 3c4f1ba5a2edefd69b2c47abaf534fb3af3f259d Mon Sep 17 00:00:00 2001
From: José Fonseca <jrfonseca@tungstengraphics.com>
Date: Tue, 15 Apr 2008 17:08:10 +0900
Subject: gallium: Eliminate stdio file usage. Remove unused stuff.

---
 src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 334 +++-------------------------
 src/gallium/auxiliary/tgsi/util/tgsi_dump.h |  10 -
 2 files changed, 34 insertions(+), 310 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
index ff6a2c4194d..26bfc2051f2 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
@@ -25,8 +25,6 @@
  * 
  **************************************************************************/
 
-#include <stdio.h> 
-
 #include "pipe/p_debug.h"
 #include "pipe/p_util.h"
 #include "pipe/p_shader_tokens.h"
@@ -35,196 +33,28 @@
 #include "tgsi_parse.h"
 #include "tgsi_build.h"
 
-struct gen_dump
-{
-   unsigned tabs;
-   void  (* write)(
-               struct gen_dump   *dump,
-               const void        *data,
-               unsigned          size );
-};
-
-struct text_dump
-{
-   struct gen_dump   base;
-   char              *text;
-   unsigned          length;
-   unsigned          capacity;
-};
-
-static void
-_text_dump_write(
-   struct gen_dump   *dump,
-   const void        *data,
-   unsigned          size )
-{
-   struct text_dump  *td = (struct text_dump *) dump;
-   unsigned          new_length = td->length + size;
-
-   if( new_length >= td->capacity ) {
-      unsigned new_capacity = td->capacity;
-
-      do {
-         if( new_capacity == 0 ) {
-            new_capacity = 256;
-         }
-         else {
-            new_capacity *= 2;
-         }
-      } while( new_length >= new_capacity );
-      td->text = (char *) REALLOC(
-         td->text,
-         td->capacity,
-         new_capacity );
-      td->capacity = new_capacity;
-   }
-   memcpy(
-      &td->text[td->length],
-      data,
-      size );
-   td->length = new_length;
-   td->text[td->length] = '\0';
-}
-
-struct file_dump
-{
-   struct gen_dump   base;
-   FILE              *file;
-};
-
-static void
-_file_dump_write(
-   struct gen_dump   *dump,
-   const void        *data,
-   unsigned          size )
-{
-   struct file_dump  *fd = (struct file_dump *) dump;
-
-#if 0
-   fwrite( data, 1, size, fd->file );
-#else
-   {
-      unsigned i;
-
-      for (i = 0; i < size; i++ ) {
-         fprintf( fd->file, "%c", ((const char *) data)[i] );
-      }
-   }
-#endif
-}
-
-static void
-gen_dump_str(
-   struct gen_dump   *dump,
-   const char        *str )
-{
-   unsigned i;
-   size_t   len = strlen( str );
-
-   for (i = 0; i < len; i++) {
-      dump->write( dump, &str[i], 1 );
-      if (str[i] == '\n') {
-         unsigned i;
-
-         for (i = 0; i < dump->tabs; i++) {
-            dump->write( dump, "    ", 4 );
-         }
-      }
-   }
-}
-
-static void
-gen_dump_chr(
-   struct gen_dump   *dump,
-   const char        chr )
-{
-   dump->write( dump, &chr, 1 );
-}
-
-static void
-gen_dump_uix(
-   struct gen_dump   *dump,
-   const unsigned    ui )
-{
-   char  str[36];
-
-   util_snprintf( str, sizeof(str), "0x%x", ui );
-   gen_dump_str( dump, str );
-}
-
-static void
-gen_dump_uid(
-   struct gen_dump   *dump,
-   const unsigned    ui )
-{
-   char  str[16];
-
-   util_snprintf( str, sizeof(str), "%u", ui );
-   gen_dump_str( dump, str );
-}
-
-static void
-gen_dump_sid(
-   struct gen_dump   *dump,
-   const int         si )
-{
-   char  str[16];
-
-   util_snprintf( str, sizeof(str), "%d", si );
-   gen_dump_str( dump, str );
-}
-
 static void
-gen_dump_flt(
-   struct gen_dump   *dump,
-   const float       flt )
-{
-   char  str[48];
-
-   util_snprintf( str, sizeof(str), "%10.4f", flt );
-   gen_dump_str( dump, str );
-}
-
-static void
-gen_dump_enum(
-   struct gen_dump   *dump,
+dump_enum(
    const unsigned    e,
    const char        **enums,
    const unsigned    enums_count )
 {
    if (e >= enums_count) {
-      gen_dump_uid( dump, e );
+      debug_printf( "%u", e );
    }
    else {
-      gen_dump_str( dump, enums[e] );
+      debug_printf( "%s", enums[e] );
    }
 }
 
-static void
-gen_dump_tab(
-   struct gen_dump   *dump )
-{
-   ++dump->tabs;
-}
-
-static void
-gen_dump_untab(
-   struct gen_dump   *dump )
-{
-   assert( dump->tabs > 0 );
-
-   --dump->tabs;
-}
-
-#define TXT(S)          gen_dump_str( dump, S )
-#define CHR(C)          gen_dump_chr( dump, C )
-#define UIX(I)          gen_dump_uix( dump, I )
-#define UID(I)          gen_dump_uid( dump, I )
-#define SID(I)          gen_dump_sid( dump, I )
-#define FLT(F)          gen_dump_flt( dump, F )
-#define TAB()           gen_dump_tab( dump )
-#define UNT()           gen_dump_untab( dump )
-#define ENM(E,ENUMS)    gen_dump_enum( dump, E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) )
+#define EOL()           debug_printf( "\n" )
+#define TXT(S)          debug_printf( "%s", S )
+#define CHR(C)          debug_printf( "%c", C )
+#define UIX(I)          debug_printf( "0x%x", I )
+#define UID(I)          debug_printf( "%u", I )
+#define SID(I)          debug_printf( "%d", I )
+#define FLT(F)          debug_printf( "%10.4f", F )
+#define ENM(E,ENUMS)    dump_enum( E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) )
 
 static const char *TGSI_PROCESSOR_TYPES[] =
 {
@@ -711,7 +541,6 @@ static const char *TGSI_MODULATES[] =
 
 static void
 dump_declaration_short(
-   struct gen_dump               *dump,
    struct tgsi_full_declaration  *decl )
 {
    TXT( "\nDCL " );
@@ -765,7 +594,6 @@ dump_declaration_short(
 
 static void
 dump_declaration_verbose(
-   struct gen_dump               *dump,
    struct tgsi_full_declaration  *decl,
    unsigned                      ignored,
    unsigned                      deflt,
@@ -803,7 +631,7 @@ dump_declaration_verbose(
       UIX( decl->Declaration.Padding );
    }
 
-   CHR( '\n' );
+   EOL();
    switch( decl->Declaration.Declare ) {
    case TGSI_DECLARE_RANGE:
       TXT( "\nFirst: " );
@@ -822,7 +650,7 @@ dump_declaration_verbose(
    }
 
    if( decl->Declaration.Interpolate ) {
-      CHR( '\n' );
+      EOL();
       TXT( "\nInterpolate: " );
       ENM( decl->Interpolation.Interpolate, TGSI_INTERPOLATES );
       if( ignored ) {
@@ -832,7 +660,7 @@ dump_declaration_verbose(
    }
 
    if( decl->Declaration.Semantic ) {
-      CHR( '\n' );
+      EOL();
       TXT( "\nSemanticName : " );
       ENM( decl->Semantic.SemanticName, TGSI_SEMANTICS );
       TXT( "\nSemanticIndex: " );
@@ -846,7 +674,6 @@ dump_declaration_verbose(
 
 static void
 dump_immediate_short(
-   struct gen_dump            *dump,
    struct tgsi_full_immediate *imm )
 {
    unsigned i;
@@ -874,7 +701,6 @@ dump_immediate_short(
 
 static void
 dump_immediate_verbose(
-   struct gen_dump            *dump,
    struct tgsi_full_immediate *imm,
    unsigned                   ignored )
 {
@@ -888,7 +714,7 @@ dump_immediate_verbose(
    }
 
    for( i = 0; i < imm->Immediate.Size - 1; i++ ) {
-      CHR( '\n' );
+      EOL();
       switch( imm->Immediate.DataType ) {
       case TGSI_IMM_FLOAT32:
          TXT( "\nFloat: " );
@@ -903,14 +729,13 @@ dump_immediate_verbose(
 
 static void
 dump_instruction_short(
-   struct gen_dump               *dump,
    struct tgsi_full_instruction  *inst,
    unsigned                      instno )
 {
    unsigned i;
    boolean  first_reg = TRUE;
 
-   CHR( '\n' );
+   EOL();
    UID( instno );
    CHR( ':' );
    ENM( inst->Instruction.Opcode, TGSI_OPCODES_SHORT );
@@ -1042,7 +867,6 @@ dump_instruction_short(
 
 static void
 dump_instruction_verbose(
-   struct gen_dump               *dump,
    struct tgsi_full_instruction  *inst,
    unsigned                      ignored,
    unsigned                      deflt,
@@ -1070,7 +894,7 @@ dump_instruction_verbose(
    }
 
    if( deflt || tgsi_compare_instruction_ext_nv( inst->InstructionExtNv, fi->InstructionExtNv ) ) {
-      CHR( '\n' );
+      EOL();
       TXT( "\nType          : " );
       ENM( inst->InstructionExtNv.Type, TGSI_INSTRUCTION_EXTS );
       if( deflt || fi->InstructionExtNv.Precision != inst->InstructionExtNv.Precision ) {
@@ -1124,7 +948,7 @@ dump_instruction_verbose(
    }
 
    if( deflt || tgsi_compare_instruction_ext_label( inst->InstructionExtLabel, fi->InstructionExtLabel ) ) {
-      CHR( '\n' );
+      EOL();
       TXT( "\nType    : " );
       ENM( inst->InstructionExtLabel.Type, TGSI_INSTRUCTION_EXTS );
       if( deflt || fi->InstructionExtLabel.Label != inst->InstructionExtLabel.Label ) {
@@ -1142,7 +966,7 @@ dump_instruction_verbose(
    }
 
    if( deflt || tgsi_compare_instruction_ext_texture( inst->InstructionExtTexture, fi->InstructionExtTexture ) ) {
-      CHR( '\n' );
+      EOL();
       TXT( "\nType    : " );
       ENM( inst->InstructionExtTexture.Type, TGSI_INSTRUCTION_EXTS );
       if( deflt || fi->InstructionExtTexture.Texture != inst->InstructionExtTexture.Texture ) {
@@ -1163,7 +987,7 @@ dump_instruction_verbose(
       struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
       struct tgsi_full_dst_register *fd = &fi->FullDstRegisters[i];
 
-      CHR( '\n' );
+      EOL();
       TXT( "\nFile     : " );
       ENM( dst->DstRegister.File, TGSI_FILES );
       if( deflt || fd->DstRegister.WriteMask != dst->DstRegister.WriteMask ) {
@@ -1194,7 +1018,7 @@ dump_instruction_verbose(
       }
 
       if( deflt || tgsi_compare_dst_register_ext_concode( dst->DstRegisterExtConcode, fd->DstRegisterExtConcode ) ) {
-         CHR( '\n' );
+         EOL();
          TXT( "\nType        : " );
          ENM( dst->DstRegisterExtConcode.Type, TGSI_DST_REGISTER_EXTS );
          if( deflt || fd->DstRegisterExtConcode.CondMask != dst->DstRegisterExtConcode.CondMask ) {
@@ -1232,7 +1056,7 @@ dump_instruction_verbose(
       }
 
       if( deflt || tgsi_compare_dst_register_ext_modulate( dst->DstRegisterExtModulate, fd->DstRegisterExtModulate ) ) {
-         CHR( '\n' );
+         EOL();
          TXT( "\nType    : " );
          ENM( dst->DstRegisterExtModulate.Type, TGSI_DST_REGISTER_EXTS );
          if( deflt || fd->DstRegisterExtModulate.Modulate != dst->DstRegisterExtModulate.Modulate ) {
@@ -1254,7 +1078,7 @@ dump_instruction_verbose(
       struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i];
       struct tgsi_full_src_register *fs = &fi->FullSrcRegisters[i];
 
-      CHR( '\n' );
+      EOL();
       TXT( "\nFile     : ");
       ENM( src->SrcRegister.File, TGSI_FILES );
       if( deflt || fs->SrcRegister.SwizzleX != src->SrcRegister.SwizzleX ) {
@@ -1299,7 +1123,7 @@ dump_instruction_verbose(
       }
 
       if( deflt || tgsi_compare_src_register_ext_swz( src->SrcRegisterExtSwz, fs->SrcRegisterExtSwz ) ) {
-         CHR( '\n' );
+         EOL();
          TXT( "\nType       : " );
          ENM( src->SrcRegisterExtSwz.Type, TGSI_SRC_REGISTER_EXTS );
          if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleX != src->SrcRegisterExtSwz.ExtSwizzleX ) {
@@ -1345,7 +1169,7 @@ dump_instruction_verbose(
       }
 
       if( deflt || tgsi_compare_src_register_ext_mod( src->SrcRegisterExtMod, fs->SrcRegisterExtMod ) ) {
-         CHR( '\n' );
+         EOL();
          TXT( "\nType     : " );
          ENM( src->SrcRegisterExtMod.Type, TGSI_SRC_REGISTER_EXTS );
          if( deflt || fs->SrcRegisterExtMod.Complement != src->SrcRegisterExtMod.Complement ) {
@@ -1380,9 +1204,8 @@ dump_instruction_verbose(
    }
 }
 
-static void
-dump_gen(
-   struct gen_dump         *dump,
+void
+tgsi_dump(
    const struct tgsi_token *tokens,
    unsigned                flags )
 {
@@ -1394,16 +1217,16 @@ dump_gen(
    unsigned deflt = !(flags & TGSI_DUMP_NO_DEFAULT);
    unsigned instno = 0;
 
-   dump->tabs = 0;
-
-   /* sanity check */
+   /* sanity checks */
    assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_CONT], "OPCODE_CONT") == 0);
+   assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_END], "OPCODE_END") == 0);
+   assert(strcmp(TGSI_OPCODES_SHORT[TGSI_OPCODE_END], "END") == 0);
 
    tgsi_parse_init( &parse, tokens );
 
    TXT( "tgsi-dump begin -----------------" );
 
-   CHR( '\n' );
+   EOL();
    ENM( parse.FullHeader.Processor.Processor, TGSI_PROCESSOR_TYPES_SHORT );
    UID( parse.FullVersion.Version.MajorVersion );
    CHR( '.' );
@@ -1414,7 +1237,7 @@ dump_gen(
       UID( parse.FullVersion.Version.MajorVersion );
       TXT( "\nMinorVersion: " );
       UID( parse.FullVersion.Version.MinorVersion );
-      CHR( '\n' );
+      EOL();
 
       TXT( "\nHeaderSize: " );
       UID( parse.FullHeader.Header.HeaderSize );
@@ -1422,7 +1245,7 @@ dump_gen(
       UID( parse.FullHeader.Header.BodySize );
       TXT( "\nProcessor : " );
       ENM( parse.FullHeader.Processor.Processor, TGSI_PROCESSOR_TYPES );
-      CHR( '\n' );
+      EOL();
    }
 
    fi = tgsi_default_full_instruction();
@@ -1434,19 +1257,16 @@ dump_gen(
       switch( parse.FullToken.Token.Type ) {
       case TGSI_TOKEN_TYPE_DECLARATION:
          dump_declaration_short(
-            dump,
             &parse.FullToken.FullDeclaration );
          break;
 
       case TGSI_TOKEN_TYPE_IMMEDIATE:
          dump_immediate_short(
-            dump,
             &parse.FullToken.FullImmediate );
          break;
 
       case TGSI_TOKEN_TYPE_INSTRUCTION:
          dump_instruction_short(
-            dump,
             &parse.FullToken.FullInstruction,
             instno );
          instno++;
@@ -1471,7 +1291,6 @@ dump_gen(
          switch( parse.FullToken.Token.Type ) {
          case TGSI_TOKEN_TYPE_DECLARATION:
             dump_declaration_verbose(
-               dump,
                &parse.FullToken.FullDeclaration,
                ignored,
                deflt,
@@ -1480,14 +1299,12 @@ dump_gen(
 
          case TGSI_TOKEN_TYPE_IMMEDIATE:
             dump_immediate_verbose(
-               dump,
                &parse.FullToken.FullImmediate,
                ignored );
             break;
 
          case TGSI_TOKEN_TYPE_INSTRUCTION:
             dump_instruction_verbose(
-               dump,
                &parse.FullToken.FullInstruction,
                ignored,
                deflt,
@@ -1498,7 +1315,7 @@ dump_gen(
             assert( 0 );
          }
 
-         CHR( '\n' );
+         EOL();
       }
    }
 
@@ -1506,86 +1323,3 @@ dump_gen(
 
    tgsi_parse_free( &parse );
 }
-
-
-static void
-sanity_checks(void)
-{
-   assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_END], "OPCODE_END") == 0);
-   assert(strcmp(TGSI_OPCODES_SHORT[TGSI_OPCODE_END], "END") == 0);
-}
-
-
-void
-tgsi_dump(
-   const struct tgsi_token *tokens,
-   unsigned                flags )
-{
-   struct file_dump  dump;
-
-   sanity_checks();
-
-   dump.base.write = _file_dump_write;
-#if 0
-   {
-      static unsigned   counter = 0;
-      char              buffer[64];
-      sprintf( buffer, "tgsi-dump-%.4u.txt", counter++ );
-      dump.file = fopen( buffer, "wt" );
-   }
-#else
-   dump.file = stderr;
-#endif
-
-   dump_gen(
-      &dump.base,
-      tokens,
-      flags );
-
-#if 0
-   fclose( dump.file );
-#endif
-}
-
-void
-tgsi_dump_str(
-   char                    **str,
-   const struct tgsi_token *tokens,
-   unsigned                flags )
-{
-   struct text_dump  dump;
-
-   dump.base.write = _text_dump_write;
-   dump.text = NULL;
-   dump.length = 0;
-   dump.capacity = 0;
-
-   dump_gen(
-      &dump.base,
-      tokens,
-      flags );
-
-   *str = dump.text;
-}
-
-
-void tgsi_debug_dump( struct tgsi_token *tokens )
-{
-   char *str, *p;
-
-   tgsi_dump_str( &str, tokens, 0 );
-
-   p = str;
-   while (p != NULL)
-   {
-      char *end = strchr( p, '\n' );
-      if (end != NULL)
-      {
-         *end++ = '\0';
-      }
-      debug_printf( "%s\n", p );
-      p = end;
-   }
-
-   FREE( str );
-}
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h
index 51d79a0362e..beb0155d56c 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h
@@ -14,16 +14,6 @@ tgsi_dump(
    const struct tgsi_token *tokens,
    unsigned                flags );
 
-void
-tgsi_dump_str(
-   char                    **str,
-   const struct tgsi_token *tokens,
-   unsigned                flags );
-
-/* Dump to debug_printf()
- */
-void tgsi_debug_dump( struct tgsi_token *tokens );
-
 #if defined __cplusplus
 }
 #endif
-- 
cgit v1.2.3


From 938ec19210c5b4e19dcb2b606c9ade415f2c1f84 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keith@tungstengraphics.com>
Date: Thu, 17 Apr 2008 14:19:03 +0100
Subject: tsgi: make Consts const

---
 src/gallium/auxiliary/tgsi/exec/tgsi_exec.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h
index 45c49dd007c..92e2e5e9859 100644
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h
@@ -166,7 +166,7 @@ struct tgsi_exec_machine
 
    float                         Imms[TGSI_EXEC_NUM_IMMEDIATES][4];
    unsigned                      ImmLimit;
-   float                         (*Consts)[4];
+   const float                   (*Consts)[4];
    struct tgsi_exec_vector       *Inputs;
    struct tgsi_exec_vector       *Outputs;
    const struct tgsi_token       *Tokens;
-- 
cgit v1.2.3


From c5f0158a9179463593d63b33cf3b5490167faac9 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keith@tungstengraphics.com>
Date: Fri, 18 Apr 2008 17:35:32 +0100
Subject: tgsi: add const qualifier to tokens on sse emit

---
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 2 +-
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
index c37e201b2bc..0a3a7559ca4 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
@@ -2332,7 +2332,7 @@ emit_declaration(
  */
 unsigned
 tgsi_emit_sse2(
-   struct tgsi_token *tokens,
+   const struct tgsi_token *tokens,
    struct x86_function *func,
    float (*immediates)[4])
 {
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h
index d56bf7f98ab..063287dc5e9 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h
@@ -10,7 +10,7 @@ struct x86_function;
 
 unsigned
 tgsi_emit_sse2(
-   struct tgsi_token *tokens,
+   const struct tgsi_token *tokens,
    struct x86_function *function,
    float (*immediates)[4]
  );
-- 
cgit v1.2.3


From 19218e2195f3dffc9403f16a742ba8c63edbf8b4 Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Fri, 18 Apr 2008 11:15:18 -0600
Subject: gallium: implement recip sqrt() with C code for now.

Some conformance lighting tests fail with the SSE rsqrt instruction.
---
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 30 +++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
index 0a3a7559ca4..6f785be3f56 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
@@ -36,6 +36,8 @@
 
 #if defined(__i386__) || defined(__386__)
 
+#define HIGH_PRECISION 1  /* for 1/sqrt() */
+
 #define DUMP_SSE  0
 
 #if DUMP_SSE
@@ -1137,16 +1139,44 @@ emit_rcp (
       make_xmm( xmm_src ) );
 }
 
+#if HIGH_PRECISION
+static void XSTDCALL
+rsqrt4f(
+   float *store )
+{
+#ifdef WIN32
+   store[0] = 1.0F / (float) sqrt( (double) store[0] );
+   store[1] = 1.0F / (float) sqrt( (double) store[1] );
+   store[2] = 1.0F / (float) sqrt( (double) store[2] );
+   store[3] = 1.0F / (float) sqrt( (double) store[3] );
+#else
+   const unsigned X = TEMP_R0 * 16;
+   store[X + 0] = 1.0F / sqrt( store[X + 0] );
+   store[X + 1] = 1.0F / sqrt( store[X + 1] );
+   store[X + 2] = 1.0F / sqrt( store[X + 2] );
+   store[X + 3] = 1.0F / sqrt( store[X + 3] );
+#endif
+}
+#endif
+
 static void
 emit_rsqrt(
    struct x86_function *func,
    unsigned xmm_dst,
    unsigned xmm_src )
 {
+#if HIGH_PRECISION
+   emit_func_call_dst_src(
+      func,
+      xmm_dst,
+      xmm_src,
+      rsqrt4f );
+#else
    emit_rsqrtps(
       func,
       make_xmm( xmm_dst ),
       make_xmm( xmm_src ) );
+#endif
 }
 
 static void
-- 
cgit v1.2.3


From e430d885e0d819172068805b1492cb6f10eb5d7f Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Fri, 18 Apr 2008 11:15:53 -0600
Subject: gallium: a few comments

---
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
index 6f785be3f56..d47935e9828 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
@@ -1598,20 +1598,25 @@ emit_instruction(
             STORE( func, *inst, 0, 0, CHAN_Y );
          }
          if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
+            /* XMM[1] = SrcReg[0].yyyy */
             FETCH( func, *inst, 1, 0, CHAN_Y );
+            /* XMM[1] = max(XMM[1], 0) */
             emit_maxps(
                func,
                make_xmm( 1 ),
                get_temp(
                   TGSI_EXEC_TEMP_00000000_I,
                   TGSI_EXEC_TEMP_00000000_C ) );
+            /* XMM[2] = SrcReg[0].wwww */
             FETCH( func, *inst, 2, 0, CHAN_W );
+            /* XMM[2] = min(XMM[2], 128.0) */
             emit_minps(
                func,
                make_xmm( 2 ),
                get_temp(
                   TGSI_EXEC_TEMP_128_I,
                   TGSI_EXEC_TEMP_128_C ) );
+            /* XMM[2] = max(XMM[2], -128.0) */
             emit_maxps(
                func,
                make_xmm( 2 ),
-- 
cgit v1.2.3


From 615cdd3a535bb71754baa8b37e79b85af01854dd Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keith@tungstengraphics.com>
Date: Mon, 21 Apr 2008 12:39:59 +0100
Subject: tgsi: use new float math funcs, drop local disassembly code

---
 src/gallium/auxiliary/tgsi/exec/tgsi_exec.c |  88 ++---
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 591 +++++++---------------------
 2 files changed, 178 insertions(+), 501 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
index 78e7dec5690..29e104bbd17 100644
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
@@ -287,10 +287,10 @@ micro_abs(
    union tgsi_exec_channel *dst,
    const union tgsi_exec_channel *src )
 {
-   dst->f[0] = (float) fabs( (double) src->f[0] );
-   dst->f[1] = (float) fabs( (double) src->f[1] );
-   dst->f[2] = (float) fabs( (double) src->f[2] );
-   dst->f[3] = (float) fabs( (double) src->f[3] );
+   dst->f[0] = fabsf( src->f[0] );
+   dst->f[1] = fabsf( src->f[1] );
+   dst->f[2] = fabsf( src->f[2] );
+   dst->f[3] = fabsf( src->f[3] );
 }
 
 static void
@@ -334,10 +334,10 @@ micro_ceil(
    union tgsi_exec_channel *dst,
    const union tgsi_exec_channel *src )
 {
-   dst->f[0] = (float) ceil( (double) src->f[0] );
-   dst->f[1] = (float) ceil( (double) src->f[1] );
-   dst->f[2] = (float) ceil( (double) src->f[2] );
-   dst->f[3] = (float) ceil( (double) src->f[3] );
+   dst->f[0] = ceilf( src->f[0] );
+   dst->f[1] = ceilf( src->f[1] );
+   dst->f[2] = ceilf( src->f[2] );
+   dst->f[3] = ceilf( src->f[3] );
 }
 
 static void
@@ -345,10 +345,10 @@ micro_cos(
    union tgsi_exec_channel *dst,
    const union tgsi_exec_channel *src )
 {
-   dst->f[0] = (float) cos( (double) src->f[0] );
-   dst->f[1] = (float) cos( (double) src->f[1] );
-   dst->f[2] = (float) cos( (double) src->f[2] );
-   dst->f[3] = (float) cos( (double) src->f[3] );
+   dst->f[0] = cosf( src->f[0] );
+   dst->f[1] = cosf( src->f[1] );
+   dst->f[2] = cosf( src->f[2] );
+   dst->f[3] = cosf( src->f[3] );
 }
 
 static void
@@ -430,10 +430,10 @@ micro_exp2(
    union tgsi_exec_channel *dst,
    const union tgsi_exec_channel *src)
 {
-   dst->f[0] = (float) pow( 2.0, (double) src->f[0] );
-   dst->f[1] = (float) pow( 2.0, (double) src->f[1] );
-   dst->f[2] = (float) pow( 2.0, (double) src->f[2] );
-   dst->f[3] = (float) pow( 2.0, (double) src->f[3] );
+   dst->f[0] = powf( 2.0f, src->f[0] );
+   dst->f[1] = powf( 2.0f, src->f[1] );
+   dst->f[2] = powf( 2.0f, src->f[2] );
+   dst->f[3] = powf( 2.0f, src->f[3] );
 }
 
 static void
@@ -463,10 +463,10 @@ micro_flr(
    union tgsi_exec_channel *dst,
    const union tgsi_exec_channel *src )
 {
-   dst->f[0] = (float) floor( (double) src->f[0] );
-   dst->f[1] = (float) floor( (double) src->f[1] );
-   dst->f[2] = (float) floor( (double) src->f[2] );
-   dst->f[3] = (float) floor( (double) src->f[3] );
+   dst->f[0] = floorf( src->f[0] );
+   dst->f[1] = floorf( src->f[1] );
+   dst->f[2] = floorf( src->f[2] );
+   dst->f[3] = floorf( src->f[3] );
 }
 
 static void
@@ -474,10 +474,10 @@ micro_frc(
    union tgsi_exec_channel *dst,
    const union tgsi_exec_channel *src )
 {
-   dst->f[0] = src->f[0] - (float) floor( (double) src->f[0] );
-   dst->f[1] = src->f[1] - (float) floor( (double) src->f[1] );
-   dst->f[2] = src->f[2] - (float) floor( (double) src->f[2] );
-   dst->f[3] = src->f[3] - (float) floor( (double) src->f[3] );
+   dst->f[0] = src->f[0] - floorf( src->f[0] );
+   dst->f[1] = src->f[1] - floorf( src->f[1] );
+   dst->f[2] = src->f[2] - floorf( src->f[2] );
+   dst->f[3] = src->f[3] - floorf( src->f[3] );
 }
 
 static void
@@ -510,10 +510,10 @@ micro_lg2(
    union tgsi_exec_channel *dst,
    const union tgsi_exec_channel *src )
 {
-   dst->f[0] = (float) log( (double) src->f[0] ) * 1.442695f;
-   dst->f[1] = (float) log( (double) src->f[1] ) * 1.442695f;
-   dst->f[2] = (float) log( (double) src->f[2] ) * 1.442695f;
-   dst->f[3] = (float) log( (double) src->f[3] ) * 1.442695f;
+   dst->f[0] = logf( src->f[0] ) * 1.442695f;
+   dst->f[1] = logf( src->f[1] ) * 1.442695f;
+   dst->f[2] = logf( src->f[2] ) * 1.442695f;
+   dst->f[3] = logf( src->f[3] ) * 1.442695f;
 }
 
 static void
@@ -764,10 +764,10 @@ micro_pow(
    const union tgsi_exec_channel *src0,
    const union tgsi_exec_channel *src1 )
 {
-   dst->f[0] = (float) pow( (double) src0->f[0], (double) src1->f[0] );
-   dst->f[1] = (float) pow( (double) src0->f[1], (double) src1->f[1] );
-   dst->f[2] = (float) pow( (double) src0->f[2], (double) src1->f[2] );
-   dst->f[3] = (float) pow( (double) src0->f[3], (double) src1->f[3] );
+   dst->f[0] = powf( src0->f[0], src1->f[0] );
+   dst->f[1] = powf( src0->f[1], src1->f[1] );
+   dst->f[2] = powf( src0->f[2], src1->f[2] );
+   dst->f[3] = powf( src0->f[3], src1->f[3] );
 }
 
 static void
@@ -775,10 +775,10 @@ micro_rnd(
    union tgsi_exec_channel *dst,
    const union tgsi_exec_channel *src )
 {
-   dst->f[0] = (float) floor( (double) (src->f[0] + 0.5f) );
-   dst->f[1] = (float) floor( (double) (src->f[1] + 0.5f) );
-   dst->f[2] = (float) floor( (double) (src->f[2] + 0.5f) );
-   dst->f[3] = (float) floor( (double) (src->f[3] + 0.5f) );
+   dst->f[0] = floorf( src->f[0] + 0.5f );
+   dst->f[1] = floorf( src->f[1] + 0.5f );
+   dst->f[2] = floorf( src->f[2] + 0.5f );
+   dst->f[3] = floorf( src->f[3] + 0.5f );
 }
 
 static void
@@ -833,20 +833,20 @@ micro_sin(
    union tgsi_exec_channel *dst,
    const union tgsi_exec_channel *src )
 {
-   dst->f[0] = (float) sin( (double) src->f[0] );
-   dst->f[1] = (float) sin( (double) src->f[1] );
-   dst->f[2] = (float) sin( (double) src->f[2] );
-   dst->f[3] = (float) sin( (double) src->f[3] );
+   dst->f[0] = sinf( src->f[0] );
+   dst->f[1] = sinf( src->f[1] );
+   dst->f[2] = sinf( src->f[2] );
+   dst->f[3] = sinf( src->f[3] );
 }
 
 static void
 micro_sqrt( union tgsi_exec_channel *dst,
             const union tgsi_exec_channel *src )
 {
-   dst->f[0] = (float) sqrt( (double) src->f[0] );
-   dst->f[1] = (float) sqrt( (double) src->f[1] );
-   dst->f[2] = (float) sqrt( (double) src->f[2] );
-   dst->f[3] = (float) sqrt( (double) src->f[3] );
+   dst->f[0] = sqrtf( src->f[0] );
+   dst->f[1] = sqrtf( src->f[1] );
+   dst->f[2] = sqrtf( src->f[2] );
+   dst->f[3] = sqrtf( src->f[3] );
 }
 
 static void
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
index d47935e9828..c3295a27fff 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
@@ -38,113 +38,6 @@
 
 #define HIGH_PRECISION 1  /* for 1/sqrt() */
 
-#define DUMP_SSE  0
-
-#if DUMP_SSE
-
-static void
-_print_reg(
-   struct x86_reg reg )
-{
-   if (reg.mod != mod_REG) 
-      debug_printf( "[" );
-      
-   switch( reg.file ) {
-   case file_REG32:
-      switch( reg.idx ) {
-      case reg_AX:
-         debug_printf( "EAX" );
-         break;
-      case reg_CX:
-         debug_printf( "ECX" );
-         break;
-      case reg_DX:
-         debug_printf( "EDX" );
-         break;
-      case reg_BX:
-         debug_printf( "EBX" );
-         break;
-      case reg_SP:
-         debug_printf( "ESP" );
-         break;
-      case reg_BP:
-         debug_printf( "EBP" );
-         break;
-      case reg_SI:
-         debug_printf( "ESI" );
-         break;
-      case reg_DI:
-         debug_printf( "EDI" );
-         break;
-      }
-      break;
-   case file_MMX:
-      assert( 0 );
-      break;
-   case file_XMM:
-      debug_printf( "XMM%u", reg.idx );
-      break;
-   case file_x87:
-      assert( 0 );
-      break;
-   }
-
-   if (reg.mod == mod_DISP8 ||
-       reg.mod == mod_DISP32)
-      debug_printf("+%d", reg.disp);
-
-   if (reg.mod != mod_REG) 
-      debug_printf( "]" );
-}
-
-static void
-_fill(
-   const char  *op )
-{
-   unsigned count = 10 - strlen( op );
-
-   while( count-- ) {
-      debug_printf( " " );
-   }
-}
-
-#define DUMP_START() debug_printf( "\nsse-dump start ----------------" )
-#define DUMP_END() debug_printf( "\nsse-dump end ----------------\n" )
-#define DUMP( OP ) debug_printf( "\n%s", OP )
-#define DUMP_I( OP, I ) do {\
-   debug_printf( "\n%s", OP );\
-   _fill( OP );\
-   debug_printf( "%u", I ); } while( 0 )
-#define DUMP_R( OP, R0 ) do {\
-   debug_printf( "\n%s", OP );\
-   _fill( OP );\
-   _print_reg( R0 ); } while( 0 )
-#define DUMP_RR( OP, R0, R1 ) do {\
-   debug_printf( "\n%s", OP );\
-   _fill( OP );\
-   _print_reg( R0 );\
-   debug_printf( ", " );\
-   _print_reg( R1 ); } while( 0 )
-#define DUMP_RRI( OP, R0, R1, I ) do {\
-   debug_printf( "\n%s", OP );\
-   _fill( OP );\
-   _print_reg( R0 );\
-   debug_printf( ", " );\
-   _print_reg( R1 );\
-   debug_printf( ", " );\
-   debug_printf( "%u", I ); } while( 0 )
-
-#else
-
-#define DUMP_START()
-#define DUMP_END()
-#define DUMP( OP )
-#define DUMP_I( OP, I )
-#define DUMP_R( OP, R0 )
-#define DUMP_RR( OP, R0, R1 )
-#define DUMP_RRI( OP, R0, R1, I )
-
-#endif
 
 #define FOR_EACH_CHANNEL( CHAN )\
    for( CHAN = 0; CHAN < 4; CHAN++ )
@@ -310,200 +203,6 @@ get_coef(
       ((vec * 3 + member) * 4 + chan) * 4 );
 }
 
-/**
- * X86 rtasm wrappers.
- */
-
-static void
-emit_addps(
-   struct x86_function  *func,
-   struct x86_reg       dst,
-   struct x86_reg       src )
-{
-   DUMP_RR( "ADDPS", dst, src );
-   sse_addps( func, dst, src );
-}
-
-static void
-emit_andnps(
-   struct x86_function  *func,
-   struct x86_reg       dst,
-   struct x86_reg       src )
-{
-   DUMP_RR( "ANDNPS", dst, src );
-   sse_andnps( func, dst, src );
-}
-
-static void
-emit_andps(
-   struct x86_function  *func,
-   struct x86_reg       dst,
-   struct x86_reg       src )
-{
-   DUMP_RR( "ANDPS", dst, src );
-   sse_andps( func, dst, src );
-}
-
-static void
-emit_call(
-   struct x86_function  *func,
-   void                 (* addr)() )
-{
-   struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX );
-
-   DUMP_I( "CALL", addr );
-   x86_mov_reg_imm( func, ecx, (unsigned long) addr );
-   x86_call( func, ecx );
-}
-
-static void
-emit_cmpps(
-   struct x86_function  *func,
-   struct x86_reg       dst,
-   struct x86_reg       src,
-   enum sse_cc          cc )
-{
-   DUMP_RRI( "CMPPS", dst, src, cc );
-   sse_cmpps( func, dst, src, cc );
-}
-
-static void
-emit_cvttps2dq(
-   struct x86_function  *func,
-   struct x86_reg       dst,
-   struct x86_reg       src )
-{
-   DUMP_RR( "CVTTPS2DQ", dst, src );
-   sse2_cvttps2dq( func, dst, src );
-}
-
-static void
-emit_maxps(
-   struct x86_function  *func,
-   struct x86_reg       dst,
-   struct x86_reg       src )
-{
-   DUMP_RR( "MAXPS", dst, src );
-   sse_maxps( func, dst, src );
-}
-
-static void
-emit_minps(
-   struct x86_function  *func,
-   struct x86_reg       dst,
-   struct x86_reg       src )
-{
-   DUMP_RR( "MINPS", dst, src );
-   sse_minps( func, dst, src );
-}
-
-static void
-emit_mov(
-   struct x86_function  *func,
-   struct x86_reg       dst,
-   struct x86_reg       src )
-{
-   DUMP_RR( "MOV", dst, src );
-   x86_mov( func, dst, src );
-}
-
-static void
-emit_movaps(
-   struct x86_function  *func,
-   struct x86_reg       dst,
-   struct x86_reg       src )
-{
-   DUMP_RR( "MOVAPS", dst, src );
-   sse_movaps( func, dst, src );
-}
-
-static void
-emit_movss(
-   struct x86_function  *func,
-   struct x86_reg       dst,
-   struct x86_reg       src )
-{
-   DUMP_RR( "MOVSS", dst, src );
-   sse_movss( func, dst, src );
-}
-
-static void
-emit_movups(
-   struct x86_function  *func,
-   struct x86_reg       dst,
-   struct x86_reg       src )
-{
-   DUMP_RR( "MOVUPS", dst, src );
-   sse_movups( func, dst, src );
-}
-
-static void
-emit_mulps(
-   struct x86_function  *func,
-   struct x86_reg       dst,
-   struct x86_reg       src )
-{
-   DUMP_RR( "MULPS", dst, src );
-   sse_mulps( func, dst, src );
-}
-
-static void
-emit_or(
-   struct x86_function  *func,
-   struct x86_reg       dst,
-   struct x86_reg       src )
-{
-   DUMP_RR( "OR", dst, src );
-   x86_or( func, dst, src );
-}
-
-static void
-emit_orps(
-   struct x86_function  *func,
-   struct x86_reg       dst,
-   struct x86_reg       src )
-{
-   DUMP_RR( "ORPS", dst, src );
-   sse_orps( func, dst, src );
-}
-
-static void
-emit_pmovmskb(
-   struct x86_function  *func,
-   struct x86_reg       dst,
-   struct x86_reg       src )
-{
-   DUMP_RR( "PMOVMSKB", dst, src );
-   sse_pmovmskb( func, dst, src );
-}
-
-static void
-emit_pop(
-   struct x86_function  *func,
-   struct x86_reg       dst )
-{
-   DUMP_R( "POP", dst );
-   x86_pop( func, dst );
-}
-
-static void
-emit_push(
-   struct x86_function  *func,
-   struct x86_reg       dst )
-{
-   DUMP_R( "PUSH", dst );
-   x86_push( func, dst );
-}
-
-static void
-emit_rcpps(
-   struct x86_function  *func,
-   struct x86_reg       dst,
-   struct x86_reg       src )
-{
-   DUMP_RR( "RCPPS", dst, src );
-   sse2_rcpps( func, dst, src );
-}
 
 #ifdef WIN32
 static void
@@ -511,7 +210,6 @@ emit_retw(
    struct x86_function  *func,
    unsigned             size )
 {
-   DUMP_I( "RET", size );
    x86_retw( func, size );
 }
 #else
@@ -519,51 +217,10 @@ static void
 emit_ret(
    struct x86_function  *func )
 {
-   DUMP( "RET" );
    x86_ret( func );
 }
 #endif
 
-static void
-emit_rsqrtps(
-   struct x86_function  *func,
-   struct x86_reg       dst,
-   struct x86_reg       src )
-{
-   DUMP_RR( "RSQRTPS", dst, src );
-   sse_rsqrtps( func, dst, src );
-}
-
-static void
-emit_shufps(
-   struct x86_function  *func,
-   struct x86_reg       dst,
-   struct x86_reg       src,
-   unsigned char        shuf )
-{
-   DUMP_RRI( "SHUFPS", dst, src, shuf );
-   sse_shufps( func, dst, src, shuf );
-}
-
-static void
-emit_subps(
-   struct x86_function  *func,
-   struct x86_reg       dst,
-   struct x86_reg       src )
-{
-   DUMP_RR( "SUBPS", dst, src );
-   sse_subps( func, dst, src );
-}
-
-static void
-emit_xorps(
-   struct x86_function  *func,
-   struct x86_reg       dst,
-   struct x86_reg       src )
-{
-   DUMP_RR( "XORPS", dst, src );
-   sse_xorps( func, dst, src );
-}
 
 /**
  * Data fetch helpers.
@@ -582,11 +239,11 @@ emit_const(
    unsigned vec,
    unsigned chan )
 {
-   emit_movss(
+   sse_movss(
       func,
       make_xmm( xmm ),
       get_const( vec, chan ) );
-   emit_shufps(
+   sse_shufps(
       func,
       make_xmm( xmm ),
       make_xmm( xmm ),
@@ -600,11 +257,11 @@ emit_immediate(
    unsigned vec,
    unsigned chan )
 {
-   emit_movss(
+   sse_movss(
       func,
       make_xmm( xmm ),
       get_immediate( vec, chan ) );
-   emit_shufps(
+   sse_shufps(
       func,
       make_xmm( xmm ),
       make_xmm( xmm ),
@@ -625,7 +282,7 @@ emit_inputf(
    unsigned vec,
    unsigned chan )
 {
-   emit_movups(
+   sse_movups(
       func,
       make_xmm( xmm ),
       get_input( vec, chan ) );
@@ -644,7 +301,7 @@ emit_output(
    unsigned vec,
    unsigned chan )
 {
-   emit_movups(
+   sse_movups(
       func,
       get_output( vec, chan ),
       make_xmm( xmm ) );
@@ -663,7 +320,7 @@ emit_tempf(
    unsigned vec,
    unsigned chan )
 {
-   emit_movaps(
+   sse_movaps(
       func,
       make_xmm( xmm ),
       get_temp( vec, chan ) );
@@ -684,11 +341,11 @@ emit_coef(
    unsigned chan,
    unsigned member )
 {
-   emit_movss(
+   sse_movss(
       func,
       make_xmm( xmm ),
       get_coef( vec, chan, member ) );
-   emit_shufps(
+   sse_shufps(
       func,
       make_xmm( xmm ),
       make_xmm( xmm ),
@@ -706,7 +363,7 @@ emit_inputs(
    unsigned vec,
    unsigned chan )
 {
-   emit_movups(
+   sse_movups(
       func,
       get_input( vec, chan ),
       make_xmm( xmm ) );
@@ -719,7 +376,7 @@ emit_temps(
    unsigned vec,
    unsigned chan )
 {
-   emit_movaps(
+   sse_movaps(
       func,
       get_temp( vec, chan ),
       make_xmm( xmm ) );
@@ -796,39 +453,39 @@ static void
 emit_push_gp(
    struct x86_function *func )
 {
-   emit_push(
+   x86_push(
       func,
       get_const_base() );
-   emit_push(
+   x86_push(
       func,
       get_input_base() );
-   emit_push(
+   x86_push(
       func,
       get_output_base() );
 
    /* It is important on non-win32 platforms that temp base is pushed last.
     */
-   emit_push(
+   x86_push(
       func,
       get_temp_base() );
 }
 
 static void
-emit_pop_gp(
+x86_pop_gp(
    struct x86_function *func )
 {
    /* Restore GP registers in a reverse order.
     */
-   emit_pop(
+   x86_pop(
       func,
       get_temp_base() );
-   emit_pop(
+   x86_pop(
       func,
       get_output_base() );
-   emit_pop(
+   x86_pop(
       func,
       get_input_base() );
-   emit_pop(
+   x86_pop(
       func,
       get_const_base() );
 }
@@ -839,7 +496,7 @@ emit_func_call_dst(
    unsigned xmm_dst,
    void (*code)() )
 {
-   emit_movaps(
+   sse_movaps(
       func,
       get_temp( TEMP_R0, 0 ),
       make_xmm( xmm_dst ) );
@@ -848,19 +505,22 @@ emit_func_call_dst(
       func );
 
 #ifdef WIN32
-   emit_push(
+   x86_push(
       func,
       get_temp( TEMP_R0, 0 ) );
 #endif
 
-   emit_call(
-      func,
-      code );
+   {
+      struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX );
+
+      x86_mov_reg_imm( func, ecx, (unsigned long) code );
+      x86_call( func, ecx );
+   }
 
-   emit_pop_gp(
+   x86_pop_gp(
       func );
 
-   emit_movaps(
+   sse_movaps(
       func,
       make_xmm( xmm_dst ),
       get_temp( TEMP_R0, 0 ) );
@@ -873,7 +533,7 @@ emit_func_call_dst_src(
    unsigned xmm_src,
    void (*code)() )
 {
-   emit_movaps(
+   sse_movaps(
       func,
       get_temp( TEMP_R0, 1 ),
       make_xmm( xmm_src ) );
@@ -893,7 +553,7 @@ emit_abs(
    struct x86_function *func,
    unsigned xmm )
 {
-   emit_andps(
+   sse_andps(
       func,
       make_xmm( xmm ),
       get_temp(
@@ -907,7 +567,7 @@ emit_add(
    unsigned xmm_dst,
    unsigned xmm_src )
 {
-   emit_addps(
+   sse_addps(
       func,
       make_xmm( xmm_dst ),
       make_xmm( xmm_src ) );
@@ -918,17 +578,15 @@ cos4f(
    float *store )
 {
 #ifdef WIN32
-   store[0] = (float) cos( (double) store[0] );
-   store[1] = (float) cos( (double) store[1] );
-   store[2] = (float) cos( (double) store[2] );
-   store[3] = (float) cos( (double) store[3] );
+   const unsigned X = 0;
 #else
    const unsigned X = TEMP_R0 * 16;
+#endif
+
    store[X + 0] = cosf( store[X + 0] );
    store[X + 1] = cosf( store[X + 1] );
    store[X + 2] = cosf( store[X + 2] );
    store[X + 3] = cosf( store[X + 3] );
-#endif
 }
 
 static void
@@ -947,17 +605,14 @@ ex24f(
    float *store )
 {
 #ifdef WIN32
-   store[0] = (float) pow( 2.0, (double) store[0] );
-   store[1] = (float) pow( 2.0, (double) store[1] );
-   store[2] = (float) pow( 2.0, (double) store[2] );
-   store[3] = (float) pow( 2.0, (double) store[3] );
+   const unsigned X = 0;
 #else
    const unsigned X = TEMP_R0 * 16;
+#endif
    store[X + 0] = powf( 2.0f, store[X + 0] );
    store[X + 1] = powf( 2.0f, store[X + 1] );
    store[X + 2] = powf( 2.0f, store[X + 2] );
    store[X + 3] = powf( 2.0f, store[X + 3] );
-#endif
 }
 
 static void
@@ -976,7 +631,7 @@ emit_f2it(
    struct x86_function *func,
    unsigned xmm )
 {
-   emit_cvttps2dq(
+   sse2_cvttps2dq(
       func,
       make_xmm( xmm ),
       make_xmm( xmm ) );
@@ -991,10 +646,10 @@ flr4f(
 #else
    const unsigned X = TEMP_R0 * 16;
 #endif
-   store[X + 0] = (float) floor( (double) store[X + 0] );
-   store[X + 1] = (float) floor( (double) store[X + 1] );
-   store[X + 2] = (float) floor( (double) store[X + 2] );
-   store[X + 3] = (float) floor( (double) store[X + 3] );
+   store[X + 0] = floorf( store[X + 0] );
+   store[X + 1] = floorf( store[X + 1] );
+   store[X + 2] = floorf( store[X + 2] );
+   store[X + 3] = floorf( store[X + 3] );
 }
 
 static void
@@ -1017,10 +672,10 @@ frc4f(
 #else
    const unsigned X = TEMP_R0 * 16;
 #endif
-   store[X + 0] -= (float) floor( (double) store[X + 0] );
-   store[X + 1] -= (float) floor( (double) store[X + 1] );
-   store[X + 2] -= (float) floor( (double) store[X + 2] );
-   store[X + 3] -= (float) floor( (double) store[X + 3] );
+   store[X + 0] -= floorf( store[X + 0] );
+   store[X + 1] -= floorf( store[X + 1] );
+   store[X + 2] -= floorf( store[X + 2] );
+   store[X + 3] -= floorf( store[X + 3] );
 }
 
 static void
@@ -1066,7 +721,7 @@ emit_MOV(
    unsigned xmm_dst,
    unsigned xmm_src )
 {
-   emit_movups(
+   sse_movups(
       func,
       make_xmm( xmm_dst ),
       make_xmm( xmm_src ) );
@@ -1077,7 +732,7 @@ emit_mul (struct x86_function *func,
           unsigned xmm_dst,
           unsigned xmm_src)
 {
-   emit_mulps(
+   sse_mulps(
       func,
       make_xmm( xmm_dst ),
       make_xmm( xmm_src ) );
@@ -1088,7 +743,7 @@ emit_neg(
    struct x86_function *func,
    unsigned xmm )
 {
-   emit_xorps(
+   sse_xorps(
       func,
       make_xmm( xmm ),
       get_temp(
@@ -1101,17 +756,14 @@ pow4f(
    float *store )
 {
 #ifdef WIN32
-   store[0] = (float) pow( (double) store[0], (double) store[4] );
-   store[1] = (float) pow( (double) store[1], (double) store[5] );
-   store[2] = (float) pow( (double) store[2], (double) store[6] );
-   store[3] = (float) pow( (double) store[3], (double) store[7] );
+   const unsigned X = 0;
 #else
    const unsigned X = TEMP_R0 * 16;
+#endif
    store[X + 0] = powf( store[X + 0], store[X + 4] );
    store[X + 1] = powf( store[X + 1], store[X + 5] );
    store[X + 2] = powf( store[X + 2], store[X + 6] );
    store[X + 3] = powf( store[X + 3], store[X + 7] );
-#endif
 }
 
 static void
@@ -1133,7 +785,11 @@ emit_rcp (
    unsigned xmm_dst,
    unsigned xmm_src )
 {
-   emit_rcpps(
+   /* On Intel CPUs at least, this is only accurate to 12 bits -- not
+    * good enough.  Need to either emit a proper divide or use the
+    * iterative technique described below in emit_rsqrt().
+    */
+   sse2_rcpps(
       func,
       make_xmm( xmm_dst ),
       make_xmm( xmm_src ) );
@@ -1145,17 +801,14 @@ rsqrt4f(
    float *store )
 {
 #ifdef WIN32
-   store[0] = 1.0F / (float) sqrt( (double) store[0] );
-   store[1] = 1.0F / (float) sqrt( (double) store[1] );
-   store[2] = 1.0F / (float) sqrt( (double) store[2] );
-   store[3] = 1.0F / (float) sqrt( (double) store[3] );
+   const unsigned X = 0;
 #else
    const unsigned X = TEMP_R0 * 16;
-   store[X + 0] = 1.0F / sqrt( store[X + 0] );
-   store[X + 1] = 1.0F / sqrt( store[X + 1] );
-   store[X + 2] = 1.0F / sqrt( store[X + 2] );
-   store[X + 3] = 1.0F / sqrt( store[X + 3] );
 #endif
+   store[X + 0] = 1.0F / sqrtf( store[X + 0] );
+   store[X + 1] = 1.0F / sqrtf( store[X + 1] );
+   store[X + 2] = 1.0F / sqrtf( store[X + 2] );
+   store[X + 3] = 1.0F / sqrtf( store[X + 3] );
 }
 #endif
 
@@ -1166,12 +819,41 @@ emit_rsqrt(
    unsigned xmm_src )
 {
 #if HIGH_PRECISION
+#if 1
    emit_func_call_dst_src(
       func,
       xmm_dst,
       xmm_src,
       rsqrt4f );
 #else
+   /* Although rsqrtps() and rcpps() are low precision on some/all SSE
+    * implementations, it is possible to improve its precision at
+    * fairly low cost, using a newton/raphson step, as below:
+    * 
+    * x1 = 2 * rcpps(a) - a * rcpps(a) * rcpps(a)
+    * x1 = 0.5 * rsqrtps(a) * [3.0 - (a * rsqrtps(a))* rsqrtps(a)]
+    *
+    * See: http://softwarecommunity.intel.com/articles/eng/1818.htm
+    */
+   /* This is some code that woudl do the above for a scalar 'a'.  We
+    * obviously are interested in a vector version:
+    *
+    * movss   xmm3, a;
+    * movss   xmm1, half;
+    * movss   xmm2, three;
+    * rsqrtss xmm0, xmm3;
+    * mulss   xmm3, xmm0;
+    * mulss   xmm1, xmm0;
+    * mulss   xmm3, xmm0;
+    * subss   xmm2, xmm3;
+    * mulss   xmm1, xmm2;
+    * movss   x,    xmm1;
+    */
+#endif
+#else
+   /* On Intel CPUs at least, this is only accurate to 12 bits -- not
+    * good enough.
+    */
    emit_rsqrtps(
       func,
       make_xmm( xmm_dst ),
@@ -1184,7 +866,7 @@ emit_setsign(
    struct x86_function *func,
    unsigned xmm )
 {
-   emit_orps(
+   sse_orps(
       func,
       make_xmm( xmm ),
       get_temp(
@@ -1197,17 +879,14 @@ sin4f(
    float *store )
 {
 #ifdef WIN32
-   store[0] = (float) sin( (double) store[0] );
-   store[1] = (float) sin( (double) store[1] );
-   store[2] = (float) sin( (double) store[2] );
-   store[3] = (float) sin( (double) store[3] );
+   const unsigned X = 0;
 #else
    const unsigned X = TEMP_R0 * 16;
+#endif
    store[X + 0] = sinf( store[X + 0] );
    store[X + 1] = sinf( store[X + 1] );
    store[X + 2] = sinf( store[X + 2] );
    store[X + 3] = sinf( store[X + 3] );
-#endif
 }
 
 static void
@@ -1226,7 +905,7 @@ emit_sub(
    unsigned xmm_dst,
    unsigned xmm_src )
 {
-   emit_subps(
+   sse_subps(
       func,
       make_xmm( xmm_dst ),
       make_xmm( xmm_src ) );
@@ -1435,16 +1114,16 @@ emit_kil(
       }
    }
 
-   emit_push(
+   x86_push(
       func,
       x86_make_reg( file_REG32, reg_AX ) );
-   emit_push(
+   x86_push(
       func,
       x86_make_reg( file_REG32, reg_DX ) );
 
    FOR_EACH_CHANNEL( chan_index ) {
       if( uniquemask & (1 << chan_index) ) {
-         emit_cmpps(
+         sse_cmpps(
             func,
             make_xmm( registers[chan_index] ),
             get_temp(
@@ -1453,17 +1132,17 @@ emit_kil(
             cc_LessThan );
 
          if( chan_index == firstchan ) {
-            emit_pmovmskb(
+            sse_pmovmskb(
                func,
                x86_make_reg( file_REG32, reg_AX ),
                make_xmm( registers[chan_index] ) );
          }
          else {
-            emit_pmovmskb(
+            sse_pmovmskb(
                func,
                x86_make_reg( file_REG32, reg_DX ),
                make_xmm( registers[chan_index] ) );
-            emit_or(
+            x86_or(
                func,
                x86_make_reg( file_REG32, reg_AX ),
                x86_make_reg( file_REG32, reg_DX ) );
@@ -1471,17 +1150,17 @@ emit_kil(
       }
    }
 
-   emit_or(
+   x86_or(
       func,
       get_temp(
          TGSI_EXEC_TEMP_KILMASK_I,
          TGSI_EXEC_TEMP_KILMASK_C ),
       x86_make_reg( file_REG32, reg_AX ) );
 
-   emit_pop(
+   x86_pop(
       func,
       x86_make_reg( file_REG32, reg_DX ) );
-   emit_pop(
+   x86_pop(
       func,
       x86_make_reg( file_REG32, reg_AX ) );
 }
@@ -1497,12 +1176,12 @@ emit_setcc(
    FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
       FETCH( func, *inst, 0, 0, chan_index );
       FETCH( func, *inst, 1, 1, chan_index );
-      emit_cmpps(
+      sse_cmpps(
          func,
          make_xmm( 0 ),
          make_xmm( 1 ),
          cc );
-      emit_andps(
+      sse_andps(
          func,
          make_xmm( 0 ),
          get_temp(
@@ -1523,22 +1202,22 @@ emit_cmp(
       FETCH( func, *inst, 0, 0, chan_index );
       FETCH( func, *inst, 1, 1, chan_index );
       FETCH( func, *inst, 2, 2, chan_index );
-      emit_cmpps(
+      sse_cmpps(
          func,
          make_xmm( 0 ),
          get_temp(
             TGSI_EXEC_TEMP_00000000_I,
             TGSI_EXEC_TEMP_00000000_C ),
          cc_LessThan );
-      emit_andps(
+      sse_andps(
          func,
          make_xmm( 1 ),
          make_xmm( 0 ) );
-      emit_andnps(
+      sse_andnps(
          func,
          make_xmm( 0 ),
          make_xmm( 2 ) );
-      emit_orps(
+      sse_orps(
          func,
          make_xmm( 0 ),
          make_xmm( 1 ) );
@@ -1589,7 +1268,7 @@ emit_instruction(
           IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
          if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
             FETCH( func, *inst, 0, 0, CHAN_X );
-            emit_maxps(
+            sse_maxps(
                func,
                make_xmm( 0 ),
                get_temp(
@@ -1601,7 +1280,7 @@ emit_instruction(
             /* XMM[1] = SrcReg[0].yyyy */
             FETCH( func, *inst, 1, 0, CHAN_Y );
             /* XMM[1] = max(XMM[1], 0) */
-            emit_maxps(
+            sse_maxps(
                func,
                make_xmm( 1 ),
                get_temp(
@@ -1610,14 +1289,14 @@ emit_instruction(
             /* XMM[2] = SrcReg[0].wwww */
             FETCH( func, *inst, 2, 0, CHAN_W );
             /* XMM[2] = min(XMM[2], 128.0) */
-            emit_minps(
+            sse_minps(
                func,
                make_xmm( 2 ),
                get_temp(
                   TGSI_EXEC_TEMP_128_I,
                   TGSI_EXEC_TEMP_128_C ) );
             /* XMM[2] = max(XMM[2], -128.0) */
-            emit_maxps(
+            sse_maxps(
                func,
                make_xmm( 2 ),
                get_temp(
@@ -1625,16 +1304,16 @@ emit_instruction(
                   TGSI_EXEC_TEMP_MINUS_128_C ) );
             emit_pow( func, 1, 2 );
             FETCH( func, *inst, 0, 0, CHAN_X );
-            emit_xorps(
+            sse_xorps(
                func,
                make_xmm( 2 ),
                make_xmm( 2 ) );
-            emit_cmpps(
+            sse_cmpps(
                func,
                make_xmm( 2 ),
                make_xmm( 0 ),
                cc_LessThanEqual );
-            emit_andps(
+            sse_andps(
                func,
                make_xmm( 2 ),
                make_xmm( 1 ) );
@@ -1756,7 +1435,7 @@ emit_instruction(
       FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( func, *inst, 0, 0, chan_index );
          FETCH( func, *inst, 1, 1, chan_index );
-         emit_minps(
+         sse_minps(
             func,
             make_xmm( 0 ),
             make_xmm( 1 ) );
@@ -1768,7 +1447,7 @@ emit_instruction(
       FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( func, *inst, 0, 0, chan_index );
          FETCH( func, *inst, 1, 1, chan_index );
-         emit_maxps(
+         sse_maxps(
             func,
             make_xmm( 0 ),
             make_xmm( 1 ) );
@@ -2376,8 +2055,6 @@ tgsi_emit_sse2(
    unsigned ok = 1;
    uint num_immediates = 0;
 
-   DUMP_START();
-
    func->csr = func->store;
 
    tgsi_parse_init( &parse, tokens );
@@ -2387,24 +2064,24 @@ tgsi_emit_sse2(
     */
    if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
       /* DECLARATION phase, do not load output argument. */
-      emit_mov(
+      x86_mov(
          func,
          get_input_base(),
          get_argument( 0 ) );
       /* skipping outputs argument here */
-      emit_mov(
+      x86_mov(
          func,
          get_const_base(),
          get_argument( 2 ) );
-      emit_mov(
+      x86_mov(
          func,
          get_temp_base(),
          get_argument( 3 ) );
-      emit_mov(
+      x86_mov(
          func,
          get_coef_base(),
          get_argument( 4 ) );
-      emit_mov(
+      x86_mov(
          func,
          get_immediate_base(),
          get_argument( 5 ) );
@@ -2412,23 +2089,23 @@ tgsi_emit_sse2(
    else {
       assert(parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX);
 
-      emit_mov(
+      x86_mov(
          func,
          get_input_base(),
          get_argument( 0 ) );
-      emit_mov(
+      x86_mov(
          func,
          get_output_base(),
          get_argument( 1 ) );
-      emit_mov(
+      x86_mov(
          func,
          get_const_base(),
          get_argument( 2 ) );
-      emit_mov(
+      x86_mov(
          func,
          get_temp_base(),
          get_argument( 3 ) );
-      emit_mov(
+      x86_mov(
          func,
          get_immediate_base(),
          get_argument( 4 ) );
@@ -2451,7 +2128,7 @@ tgsi_emit_sse2(
             if( !instruction_phase ) {
                /* INSTRUCTION phase, overwrite coeff with output. */
                instruction_phase = TRUE;
-               emit_mov(
+               x86_mov(
                   func,
                   get_output_base(),
                   get_argument( 1 ) );
@@ -2463,8 +2140,10 @@ tgsi_emit_sse2(
             &parse.FullToken.FullInstruction );
 
 	 if (!ok) {
-	    debug_printf("failed to translate tgsi opcode %d to SSE\n", 
-			 parse.FullToken.FullInstruction.Instruction.Opcode );
+	    debug_printf("failed to translate tgsi opcode %d to SSE (%s)\n", 
+			 parse.FullToken.FullInstruction.Instruction.Opcode,
+                         parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ?
+                         "vertex shader" : "fragment shader");
 	 }
          break;
 
@@ -2499,8 +2178,6 @@ tgsi_emit_sse2(
 
    tgsi_parse_free( &parse );
 
-   DUMP_END();
-
    return ok;
 }
 
-- 
cgit v1.2.3


From e29583afcb238cf7a70089cfdf50a69ca277c53a Mon Sep 17 00:00:00 2001
From: José Fonseca <jrfonseca@tungstengraphics.com>
Date: Tue, 22 Apr 2008 00:14:08 +0900
Subject: gallium: Include dependent header.

---
 src/gallium/auxiliary/tgsi/util/tgsi_parse.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h
index a98e88e3437..da0121c482e 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h
@@ -1,6 +1,8 @@
 #if !defined TGSI_PARSE_H
 #define TGSI_PARSE_H
 
+#include "pipe/p_shader_tokens.h"
+
 #if defined __cplusplus
 extern "C" {
 #endif
-- 
cgit v1.2.3


From 2325d1959783aaf57178a86d8a0b28f168761e13 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keith@tungstengraphics.com>
Date: Fri, 25 Apr 2008 14:20:02 +0100
Subject: tgsi: fix compile when HIGH_PRECISION not defined

---
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
index c3295a27fff..559370e613a 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
@@ -854,7 +854,7 @@ emit_rsqrt(
    /* On Intel CPUs at least, this is only accurate to 12 bits -- not
     * good enough.
     */
-   emit_rsqrtps(
+   sse_rsqrtps(
       func,
       make_xmm( xmm_dst ),
       make_xmm( xmm_src ) );
-- 
cgit v1.2.3


From a94aad297d9804688f888a5112326104a5d00e07 Mon Sep 17 00:00:00 2001
From: Brian <brian.paul@tungstengraphics.com>
Date: Fri, 25 Apr 2008 18:46:29 -0600
Subject: use PIPE_ARCH_X86

---
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
index 559370e613a..9061e00b635 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
@@ -34,7 +34,7 @@
 
 #include "rtasm/rtasm_x86sse.h"
 
-#if defined(__i386__) || defined(__386__)
+#ifdef PIPE_ARCH_X86
 
 #define HIGH_PRECISION 1  /* for 1/sqrt() */
 
@@ -2181,4 +2181,4 @@ tgsi_emit_sse2(
    return ok;
 }
 
-#endif /* i386 */
+#endif /* PIPE_ARCH_X86 */
-- 
cgit v1.2.3


From a8e39b6f5a1fedf2f8719e1adb8802ebbfc09688 Mon Sep 17 00:00:00 2001
From: Brian <brian.paul@tungstengraphics.com>
Date: Fri, 25 Apr 2008 19:25:26 -0600
Subject: gallium: fix broken SGT, SLE

---
 src/gallium/auxiliary/tgsi/exec/tgsi_exec.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
index 29e104bbd17..d55f907c0d3 100644
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
@@ -516,6 +516,20 @@ micro_lg2(
    dst->f[3] = logf( src->f[3] ) * 1.442695f;
 }
 
+static void
+micro_le(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1,
+   const union tgsi_exec_channel *src2,
+   const union tgsi_exec_channel *src3 )
+{
+   dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0];
+   dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1];
+   dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2];
+   dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3];
+}
+
 static void
 micro_lt(
    union tgsi_exec_channel *dst,
@@ -1975,7 +1989,7 @@ exec_instruction(
       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( &r[0], 0, chan_index );
          FETCH( &r[1], 1, chan_index );
-         micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
+         micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
          STORE( &r[0], 0, chan_index );
       }
       break;
@@ -1992,7 +2006,7 @@ exec_instruction(
       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( &r[0], 0, chan_index );
          FETCH( &r[1], 1, chan_index );
-         micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
+         micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
          STORE( &r[0], 0, chan_index );
       }
       break;
-- 
cgit v1.2.3


From 58d3dff0d3115ddd5397b7f77b5bcf4f9ca616b6 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@ubuntu-vbox.(none)>
Date: Mon, 28 Apr 2008 18:50:27 +0200
Subject: gallium: Generate SSE code to swizzle and unswizzle vs inputs and
 outputs.

Change SSE_SWIZZLES #define to 0 to disable it.
---
 .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c  |   3 +-
 src/gallium/auxiliary/draw/draw_vs_sse.c           |  52 ++++++--
 src/gallium/auxiliary/rtasm/rtasm_x86sse.c         |  14 ++
 src/gallium/auxiliary/rtasm/rtasm_x86sse.h         |   2 +
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c        | 142 ++++++++++++++++++++-
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h        |   4 +-
 src/gallium/drivers/softpipe/sp_fs_sse.c           |   2 +-
 7 files changed, 204 insertions(+), 15 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
index f0763dad8d7..4ec20493c4c 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
@@ -109,9 +109,10 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
    struct draw_context *draw = fpme->draw;
    struct draw_vertex_shader *shader = draw->vertex_shader;
    unsigned opt = fpme->opt;
+   unsigned alloc_count = align_int( fetch_count, 4 );
 
    struct vertex_header *pipeline_verts = 
-      (struct vertex_header *)MALLOC(fpme->vertex_size * fetch_count);
+      (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count);
 
    if (!pipeline_verts) {
       /* Not much we can do here - just skip the rendering.
diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c
index b1e9f671147..07f85bc448f 100644
--- a/src/gallium/auxiliary/draw/draw_vs_sse.c
+++ b/src/gallium/auxiliary/draw/draw_vs_sse.c
@@ -47,14 +47,29 @@
 #include "tgsi/util/tgsi_parse.h"
 
 #define SSE_MAX_VERTICES 4
+#define SSE_SWIZZLES 1
 
+#if SSE_SWIZZLES
+typedef void (XSTDCALL *codegen_function) (
+   const struct tgsi_exec_vector *input,
+   struct tgsi_exec_vector *output,
+   float (*constant)[4],
+   struct tgsi_exec_vector *temporary,
+   float (*immediates)[4],
+   const float (*aos_input)[4],
+   uint num_inputs,
+   uint input_stride,
+   float (*aos_output)[4],
+   uint num_outputs,
+   uint output_stride );
+#else
 typedef void (XSTDCALL *codegen_function) (
    const struct tgsi_exec_vector *input,
    struct tgsi_exec_vector *output,
    float (*constant)[4],
    struct tgsi_exec_vector *temporary,
    float (*immediates)[4] );
-
+#endif
 
 struct draw_sse_vertex_shader {
    struct draw_vertex_shader base;
@@ -91,12 +106,31 @@ vs_sse_run_linear( struct draw_vertex_shader *base,
 {
    struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base;
    struct tgsi_exec_machine *machine = shader->machine;
-   unsigned int i, j;
-   unsigned slot;
+   unsigned int i;
 
    for (i = 0; i < count; i += MAX_TGSI_VERTICES) {
       unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i);
 
+#if SSE_SWIZZLES
+      /* run compiled shader
+       */
+      shader->func(machine->Inputs,
+		   machine->Outputs,
+		   (float (*)[4])constants,
+		   machine->Temps,
+		   shader->immediates,
+                   input,
+                   base->info.num_inputs,
+                   input_stride,
+                   output,
+                   base->info.num_outputs,
+                   output_stride );
+
+      input = (const float (*)[4])((const char *)input + input_stride * max_vertices);
+      output = (float (*)[4])((char *)output + output_stride * max_vertices);
+#else
+      unsigned int j, slot;
+
       /* Swizzle inputs.  
        */
       for (j = 0; j < max_vertices; j++) {
@@ -105,10 +139,10 @@ vs_sse_run_linear( struct draw_vertex_shader *base,
             machine->Inputs[slot].xyzw[1].f[j] = input[slot][1];
             machine->Inputs[slot].xyzw[2].f[j] = input[slot][2];
             machine->Inputs[slot].xyzw[3].f[j] = input[slot][3];
-         }
+         } 
 
 	 input = (const float (*)[4])((const char *)input + input_stride);
-      } 
+      }
 
       /* run compiled shader
        */
@@ -118,7 +152,6 @@ vs_sse_run_linear( struct draw_vertex_shader *base,
 		   machine->Temps,
 		   shader->immediates);
 
-
       /* Unswizzle all output results.  
        */
       for (j = 0; j < max_vertices; j++) {
@@ -127,10 +160,11 @@ vs_sse_run_linear( struct draw_vertex_shader *base,
             output[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
             output[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
             output[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
-         }
+         } 
 
 	 output = (float (*)[4])((char *)output + output_stride);
-      } 
+      }
+#endif
    }
 }
 
@@ -176,7 +210,7 @@ draw_create_vs_sse(struct draw_context *draw,
    x86_init_func( &vs->sse2_program );
 
    if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state.tokens,
-			&vs->sse2_program, vs->immediates )) 
+			&vs->sse2_program, vs->immediates, SSE_SWIZZLES )) 
       goto fail;
       
    vs->func = (codegen_function) x86_get_func( &vs->sse2_program );
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
index e6cbe9967fa..d7e22305573 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
@@ -853,6 +853,20 @@ void sse_shufps( struct x86_function *p,
    emit_1ub(p, shuf); 
 }
 
+void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+   DUMP_RR( dst, src );
+   emit_2ub( p, X86_TWOB, 0x15 );
+   emit_modrm( p, dst, src );
+}
+
+void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+   DUMP_RR( dst, src );
+   emit_2ub( p, X86_TWOB, 0x14 );
+   emit_modrm( p, dst, src );
+}
+
 void sse_cmpps( struct x86_function *p,
 		struct x86_reg dst,
 		struct x86_reg src,
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
index 1962b07bc5b..ad79b1facf8 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
@@ -203,6 +203,8 @@ void sse_rsqrtps( struct x86_function *p, struct x86_reg dst, struct x86_reg src
 void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
 void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
                  unsigned char shuf );
+void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
 void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src );
 void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
 
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
index 9061e00b635..86ca16c246b 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
@@ -1788,7 +1788,6 @@ emit_instruction(
       break;
 
    case TGSI_OPCODE_RET:
-   case TGSI_OPCODE_END:
 #ifdef WIN32
       emit_retw( func, 16 );
 #else
@@ -1796,6 +1795,9 @@ emit_instruction(
 #endif
       break;
 
+   case TGSI_OPCODE_END:
+      break;
+
    case TGSI_OPCODE_SSG:
       return 0;
       break;
@@ -2027,6 +2029,127 @@ emit_declaration(
    }
 }
 
+static void aos_to_soa( struct x86_function *func, uint aos, uint soa, uint num, uint stride )
+{
+   struct x86_reg soa_input;
+   struct x86_reg aos_input;
+   struct x86_reg num_inputs;
+   struct x86_reg temp;
+   unsigned char *inner_loop;
+
+   soa_input = x86_make_reg( file_REG32, reg_AX );
+   aos_input = x86_make_reg( file_REG32, reg_BX );
+   num_inputs = x86_make_reg( file_REG32, reg_CX );
+   temp = x86_make_reg( file_REG32, reg_DX );
+
+   /* Save EBX */
+   x86_push( func, x86_make_reg( file_REG32, reg_BX ) );
+
+   x86_mov( func, soa_input, get_argument( soa + 1 ) );
+   x86_mov( func, aos_input, get_argument( aos + 1 ) );
+   x86_mov( func, num_inputs, get_argument( num + 1 ) );
+
+   inner_loop = x86_get_label( func );
+
+   x86_mov( func, temp, get_argument( stride + 1 ) );
+   x86_push( func, aos_input );
+   sse_movlps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) );
+   sse_movlps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) );
+   x86_add( func, aos_input, temp );
+   sse_movhps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) );
+   sse_movhps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) );
+   x86_add( func, aos_input, temp );
+   sse_movlps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) );
+   sse_movlps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) );
+   x86_add( func, aos_input, temp );
+   sse_movhps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) );
+   sse_movhps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) );
+   x86_pop( func, aos_input );
+
+   sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) );
+   sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) );
+   sse_shufps( func, make_xmm( 0 ), make_xmm( 1 ), 0x88 );
+   sse_shufps( func, make_xmm( 2 ), make_xmm( 1 ), 0xdd );
+   sse_shufps( func, make_xmm( 3 ), make_xmm( 4 ), 0x88 );
+   sse_shufps( func, make_xmm( 5 ), make_xmm( 4 ), 0xdd );
+
+   sse_movups( func, x86_make_disp( soa_input, 0 ), make_xmm( 0 ) );
+   sse_movups( func, x86_make_disp( soa_input, 16 ), make_xmm( 2 ) );
+   sse_movups( func, x86_make_disp( soa_input, 32 ), make_xmm( 3 ) );
+   sse_movups( func, x86_make_disp( soa_input, 48 ), make_xmm( 5 ) );
+
+   /* Advance to next input */
+   x86_mov_reg_imm( func, temp, 16 );
+   x86_add( func, aos_input, temp );
+   x86_mov_reg_imm( func, temp, 64 );
+   x86_add( func, soa_input, temp );
+   x86_dec( func, num_inputs );
+   x86_jcc( func, cc_NE, inner_loop );
+
+   /* Restore EBX */
+   x86_pop( func, x86_make_reg( file_REG32, reg_BX ) );
+}
+
+static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num, uint stride )
+{
+   struct x86_reg soa_output;
+   struct x86_reg aos_output;
+   struct x86_reg num_outputs;
+   struct x86_reg temp;
+   unsigned char *inner_loop;
+
+   soa_output = x86_make_reg( file_REG32, reg_AX );
+   aos_output = x86_make_reg( file_REG32, reg_BX );
+   num_outputs = x86_make_reg( file_REG32, reg_CX );
+   temp = x86_make_reg( file_REG32, reg_DX );
+
+   /* Save EBX */
+   x86_push( func, x86_make_reg( file_REG32, reg_BX ) );
+
+   x86_mov( func, soa_output, get_argument( soa + 1 ) );
+   x86_mov( func, aos_output, get_argument( aos + 1 ) );
+   x86_mov( func, num_outputs, get_argument( num + 1 ) );
+
+   inner_loop = x86_get_label( func );
+
+   sse_movups( func, make_xmm( 0 ), x86_make_disp( soa_output, 0 ) );
+   sse_movups( func, make_xmm( 1 ), x86_make_disp( soa_output, 16 ) );
+   sse_movups( func, make_xmm( 3 ), x86_make_disp( soa_output, 32 ) );
+   sse_movups( func, make_xmm( 4 ), x86_make_disp( soa_output, 48 ) );
+
+   sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) );
+   sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) );
+   sse_unpcklps( func, make_xmm( 0 ), make_xmm( 1 ) );
+   sse_unpckhps( func, make_xmm( 2 ), make_xmm( 1 ) );
+   sse_unpcklps( func, make_xmm( 3 ), make_xmm( 4 ) );
+   sse_unpckhps( func, make_xmm( 5 ), make_xmm( 4 ) );
+
+   x86_mov( func, temp, get_argument( stride + 1 ) );
+   x86_push( func, aos_output );
+   sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) );
+   sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) );
+   x86_add( func, aos_output, temp );
+   sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) );
+   sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) );
+   x86_add( func, aos_output, temp );
+   sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) );
+   sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) );
+   x86_add( func, aos_output, temp );
+   sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) );
+   sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) );
+   x86_pop( func, aos_output );
+
+   /* Advance to next output */
+   x86_mov_reg_imm( func, temp, 16 );
+   x86_add( func, aos_output, temp );
+   x86_mov_reg_imm( func, temp, 64 );
+   x86_add( func, soa_output, temp );
+   x86_dec( func, num_outputs );
+   x86_jcc( func, cc_NE, inner_loop );
+
+   /* Restore EBX */
+   x86_pop( func, x86_make_reg( file_REG32, reg_BX ) );
+}
 
 /**
  * Translate a TGSI vertex/fragment shader to SSE2 code.
@@ -2048,7 +2171,8 @@ unsigned
 tgsi_emit_sse2(
    const struct tgsi_token *tokens,
    struct x86_function *func,
-   float (*immediates)[4])
+   float (*immediates)[4],
+   boolean do_swizzles )
 {
    struct tgsi_parse_context parse;
    boolean instruction_phase = FALSE;
@@ -2089,6 +2213,9 @@ tgsi_emit_sse2(
    else {
       assert(parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX);
 
+      if (do_swizzles)
+         aos_to_soa( func, 5, 0, 6, 7 );
+
       x86_mov(
          func,
          get_input_base(),
@@ -2176,6 +2303,17 @@ tgsi_emit_sse2(
       }
    }
 
+   if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX) {
+      if (do_swizzles)
+         soa_to_aos( func, 8, 1, 9, 10 );
+   }
+
+#ifdef WIN32
+   emit_retw( func, 16 );
+#else
+   emit_ret( func );
+#endif
+
    tgsi_parse_free( &parse );
 
    return ok;
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h
index 063287dc5e9..e66d1152836 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h
@@ -12,8 +12,8 @@ unsigned
 tgsi_emit_sse2(
    const struct tgsi_token *tokens,
    struct x86_function *function,
-   float (*immediates)[4]
- );
+   float (*immediates)[4],
+   boolean do_swizzles );
 
 #if defined __cplusplus
 }
diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c
index f857d26143e..4d569e1e22f 100644
--- a/src/gallium/drivers/softpipe/sp_fs_sse.c
+++ b/src/gallium/drivers/softpipe/sp_fs_sse.c
@@ -133,7 +133,7 @@ softpipe_create_fs_sse(struct softpipe_context *softpipe,
    x86_init_func( &shader->sse2_program );
    
    if (!tgsi_emit_sse2( templ->tokens, &shader->sse2_program,
-                        shader->immediates)) {
+                        shader->immediates, FALSE )) {
       FREE(shader);
       return NULL;
    }
-- 
cgit v1.2.3


From 9fb444f191015b44498a5c83d762519ccc98ed55 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keith@tungstengraphics.com>
Date: Mon, 28 Apr 2008 18:43:27 +0100
Subject: tsgi: add a makefile

---
 src/gallium/auxiliary/tgsi/exec/Makefile | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 src/gallium/auxiliary/tgsi/exec/Makefile

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/exec/Makefile b/src/gallium/auxiliary/tgsi/exec/Makefile
new file mode 100644
index 00000000000..451911a3545
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/exec/Makefile
@@ -0,0 +1,2 @@
+default:
+	cd .. ; make
-- 
cgit v1.2.3


From c4917c62311522df902003d77b146fc677c09a4e Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keith@tungstengraphics.com>
Date: Mon, 28 Apr 2008 18:50:31 +0100
Subject: tgsi: make loop structure clearer, use x86_lea for increments

---
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 134 ++++++++++++++--------------
 1 file changed, 68 insertions(+), 66 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
index 86ca16c246b..1138f599972 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
@@ -2049,40 +2049,41 @@ static void aos_to_soa( struct x86_function *func, uint aos, uint soa, uint num,
    x86_mov( func, aos_input, get_argument( aos + 1 ) );
    x86_mov( func, num_inputs, get_argument( num + 1 ) );
 
+   /* do */
    inner_loop = x86_get_label( func );
-
-   x86_mov( func, temp, get_argument( stride + 1 ) );
-   x86_push( func, aos_input );
-   sse_movlps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) );
-   sse_movlps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) );
-   x86_add( func, aos_input, temp );
-   sse_movhps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) );
-   sse_movhps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) );
-   x86_add( func, aos_input, temp );
-   sse_movlps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) );
-   sse_movlps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) );
-   x86_add( func, aos_input, temp );
-   sse_movhps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) );
-   sse_movhps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) );
-   x86_pop( func, aos_input );
-
-   sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) );
-   sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) );
-   sse_shufps( func, make_xmm( 0 ), make_xmm( 1 ), 0x88 );
-   sse_shufps( func, make_xmm( 2 ), make_xmm( 1 ), 0xdd );
-   sse_shufps( func, make_xmm( 3 ), make_xmm( 4 ), 0x88 );
-   sse_shufps( func, make_xmm( 5 ), make_xmm( 4 ), 0xdd );
-
-   sse_movups( func, x86_make_disp( soa_input, 0 ), make_xmm( 0 ) );
-   sse_movups( func, x86_make_disp( soa_input, 16 ), make_xmm( 2 ) );
-   sse_movups( func, x86_make_disp( soa_input, 32 ), make_xmm( 3 ) );
-   sse_movups( func, x86_make_disp( soa_input, 48 ), make_xmm( 5 ) );
-
-   /* Advance to next input */
-   x86_mov_reg_imm( func, temp, 16 );
-   x86_add( func, aos_input, temp );
-   x86_mov_reg_imm( func, temp, 64 );
-   x86_add( func, soa_input, temp );
+   {
+      x86_mov( func, temp, get_argument( stride + 1 ) );
+      x86_push( func, aos_input );
+      sse_movlps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) );
+      sse_movlps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) );
+      x86_add( func, aos_input, temp );
+      sse_movhps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) );
+      sse_movhps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) );
+      x86_add( func, aos_input, temp );
+      sse_movlps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) );
+      sse_movlps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) );
+      x86_add( func, aos_input, temp );
+      sse_movhps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) );
+      sse_movhps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) );
+      x86_pop( func, aos_input );
+
+      sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) );
+      sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) );
+      sse_shufps( func, make_xmm( 0 ), make_xmm( 1 ), 0x88 );
+      sse_shufps( func, make_xmm( 2 ), make_xmm( 1 ), 0xdd );
+      sse_shufps( func, make_xmm( 3 ), make_xmm( 4 ), 0x88 );
+      sse_shufps( func, make_xmm( 5 ), make_xmm( 4 ), 0xdd );
+
+      sse_movups( func, x86_make_disp( soa_input, 0 ), make_xmm( 0 ) );
+      sse_movups( func, x86_make_disp( soa_input, 16 ), make_xmm( 2 ) );
+      sse_movups( func, x86_make_disp( soa_input, 32 ), make_xmm( 3 ) );
+      sse_movups( func, x86_make_disp( soa_input, 48 ), make_xmm( 5 ) );
+
+      /* Advance to next input */
+      x86_lea( func, aos_input, x86_make_disp(aos_input, 16) );
+      x86_lea( func, soa_input, x86_make_disp(soa_input, 64) );
+   }
+   /* while --num_inputs */
    x86_dec( func, num_inputs );
    x86_jcc( func, cc_NE, inner_loop );
 
@@ -2110,40 +2111,41 @@ static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num,
    x86_mov( func, aos_output, get_argument( aos + 1 ) );
    x86_mov( func, num_outputs, get_argument( num + 1 ) );
 
+   /* do */
    inner_loop = x86_get_label( func );
-
-   sse_movups( func, make_xmm( 0 ), x86_make_disp( soa_output, 0 ) );
-   sse_movups( func, make_xmm( 1 ), x86_make_disp( soa_output, 16 ) );
-   sse_movups( func, make_xmm( 3 ), x86_make_disp( soa_output, 32 ) );
-   sse_movups( func, make_xmm( 4 ), x86_make_disp( soa_output, 48 ) );
-
-   sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) );
-   sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) );
-   sse_unpcklps( func, make_xmm( 0 ), make_xmm( 1 ) );
-   sse_unpckhps( func, make_xmm( 2 ), make_xmm( 1 ) );
-   sse_unpcklps( func, make_xmm( 3 ), make_xmm( 4 ) );
-   sse_unpckhps( func, make_xmm( 5 ), make_xmm( 4 ) );
-
-   x86_mov( func, temp, get_argument( stride + 1 ) );
-   x86_push( func, aos_output );
-   sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) );
-   sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) );
-   x86_add( func, aos_output, temp );
-   sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) );
-   sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) );
-   x86_add( func, aos_output, temp );
-   sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) );
-   sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) );
-   x86_add( func, aos_output, temp );
-   sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) );
-   sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) );
-   x86_pop( func, aos_output );
-
-   /* Advance to next output */
-   x86_mov_reg_imm( func, temp, 16 );
-   x86_add( func, aos_output, temp );
-   x86_mov_reg_imm( func, temp, 64 );
-   x86_add( func, soa_output, temp );
+   {
+      sse_movups( func, make_xmm( 0 ), x86_make_disp( soa_output, 0 ) );
+      sse_movups( func, make_xmm( 1 ), x86_make_disp( soa_output, 16 ) );
+      sse_movups( func, make_xmm( 3 ), x86_make_disp( soa_output, 32 ) );
+      sse_movups( func, make_xmm( 4 ), x86_make_disp( soa_output, 48 ) );
+
+      sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) );
+      sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) );
+      sse_unpcklps( func, make_xmm( 0 ), make_xmm( 1 ) );
+      sse_unpckhps( func, make_xmm( 2 ), make_xmm( 1 ) );
+      sse_unpcklps( func, make_xmm( 3 ), make_xmm( 4 ) );
+      sse_unpckhps( func, make_xmm( 5 ), make_xmm( 4 ) );
+
+      x86_mov( func, temp, get_argument( stride + 1 ) );
+      x86_push( func, aos_output );
+      sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) );
+      sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) );
+      x86_add( func, aos_output, temp );
+      sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) );
+      sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) );
+      x86_add( func, aos_output, temp );
+      sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) );
+      sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) );
+      x86_add( func, aos_output, temp );
+      sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) );
+      sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) );
+      x86_pop( func, aos_output );
+
+      /* Advance to next output */
+      x86_lea( func, aos_output, x86_make_disp(aos_output, 16) );
+      x86_lea( func, soa_output, x86_make_disp(soa_output, 64) );
+   }
+   /* while --num_outputs */
    x86_dec( func, num_outputs );
    x86_jcc( func, cc_NE, inner_loop );
 
-- 
cgit v1.2.3


From 1e4217e1b857c6a3c5da7d1eceb74683bf0b9a00 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keith@tungstengraphics.com>
Date: Thu, 1 May 2008 14:51:18 +0100
Subject: tgsi: use ESI instead of EBX on non-win32 platforms

---
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
index 1138f599972..4587b0930aa 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
@@ -36,7 +36,7 @@
 
 #ifdef PIPE_ARCH_X86
 
-#define HIGH_PRECISION 1  /* for 1/sqrt() */
+#define HIGH_PRECISION 0  /* for 1/sqrt() */
 
 
 #define FOR_EACH_CHANNEL( CHAN )\
@@ -2038,12 +2038,12 @@ static void aos_to_soa( struct x86_function *func, uint aos, uint soa, uint num,
    unsigned char *inner_loop;
 
    soa_input = x86_make_reg( file_REG32, reg_AX );
-   aos_input = x86_make_reg( file_REG32, reg_BX );
+   aos_input = get_temp_base(); /* BX or SI */
    num_inputs = x86_make_reg( file_REG32, reg_CX );
    temp = x86_make_reg( file_REG32, reg_DX );
 
    /* Save EBX */
-   x86_push( func, x86_make_reg( file_REG32, reg_BX ) );
+   x86_push( func, aos_input );
 
    x86_mov( func, soa_input, get_argument( soa + 1 ) );
    x86_mov( func, aos_input, get_argument( aos + 1 ) );
@@ -2088,7 +2088,7 @@ static void aos_to_soa( struct x86_function *func, uint aos, uint soa, uint num,
    x86_jcc( func, cc_NE, inner_loop );
 
    /* Restore EBX */
-   x86_pop( func, x86_make_reg( file_REG32, reg_BX ) );
+   x86_pop( func, aos_input );
 }
 
 static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num, uint stride )
@@ -2100,12 +2100,12 @@ static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num,
    unsigned char *inner_loop;
 
    soa_output = x86_make_reg( file_REG32, reg_AX );
-   aos_output = x86_make_reg( file_REG32, reg_BX );
+   aos_output = get_temp_base(); /* BX or SI */
    num_outputs = x86_make_reg( file_REG32, reg_CX );
    temp = x86_make_reg( file_REG32, reg_DX );
 
    /* Save EBX */
-   x86_push( func, x86_make_reg( file_REG32, reg_BX ) );
+   x86_push( func, aos_output );
 
    x86_mov( func, soa_output, get_argument( soa + 1 ) );
    x86_mov( func, aos_output, get_argument( aos + 1 ) );
@@ -2150,7 +2150,7 @@ static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num,
    x86_jcc( func, cc_NE, inner_loop );
 
    /* Restore EBX */
-   x86_pop( func, x86_make_reg( file_REG32, reg_BX ) );
+   x86_pop( func, aos_output );
 }
 
 /**
-- 
cgit v1.2.3


From 7810e7f623a47978cdd1a167cc9e6b743d56d949 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keith@tungstengraphics.com>
Date: Thu, 1 May 2008 15:13:46 +0100
Subject: tgsi: use x86_fn_arg instead of get_argument() -- it knows about
 push/pops to the stack

---
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 63 ++++++++++++++++-------------
 1 file changed, 34 insertions(+), 29 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
index 4587b0930aa..5e133a96634 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
@@ -133,14 +133,6 @@ get_immediate_base( void )
  * Data access helpers.
  */
 
-static struct x86_reg
-get_argument(
-   unsigned index )
-{
-   return x86_make_disp(
-      x86_make_reg( file_REG32, reg_SP ),
-      (index + 1) * 4 );
-}
 
 static struct x86_reg
 get_immediate(
@@ -2045,14 +2037,14 @@ static void aos_to_soa( struct x86_function *func, uint aos, uint soa, uint num,
    /* Save EBX */
    x86_push( func, aos_input );
 
-   x86_mov( func, soa_input, get_argument( soa + 1 ) );
-   x86_mov( func, aos_input, get_argument( aos + 1 ) );
-   x86_mov( func, num_inputs, get_argument( num + 1 ) );
+   x86_mov( func, soa_input, x86_fn_arg( func, soa ) );
+   x86_mov( func, aos_input, x86_fn_arg( func, aos ) );
+   x86_mov( func, num_inputs, x86_fn_arg( func, num ) );
 
    /* do */
    inner_loop = x86_get_label( func );
    {
-      x86_mov( func, temp, get_argument( stride + 1 ) );
+      x86_mov( func, temp, x86_fn_arg( func, stride ) );
       x86_push( func, aos_input );
       sse_movlps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) );
       sse_movlps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) );
@@ -2107,9 +2099,9 @@ static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num,
    /* Save EBX */
    x86_push( func, aos_output );
 
-   x86_mov( func, soa_output, get_argument( soa + 1 ) );
-   x86_mov( func, aos_output, get_argument( aos + 1 ) );
-   x86_mov( func, num_outputs, get_argument( num + 1 ) );
+   x86_mov( func, soa_output, x86_fn_arg( func, soa ) );
+   x86_mov( func, aos_output, x86_fn_arg( func, aos ) );
+   x86_mov( func, num_outputs, x86_fn_arg( func, num ) );
 
    /* do */
    inner_loop = x86_get_label( func );
@@ -2126,7 +2118,7 @@ static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num,
       sse_unpcklps( func, make_xmm( 3 ), make_xmm( 4 ) );
       sse_unpckhps( func, make_xmm( 5 ), make_xmm( 4 ) );
 
-      x86_mov( func, temp, get_argument( stride + 1 ) );
+      x86_mov( func, temp, x86_fn_arg( func, stride ) );
       x86_push( func, aos_output );
       sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) );
       sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) );
@@ -2185,6 +2177,13 @@ tgsi_emit_sse2(
 
    tgsi_parse_init( &parse, tokens );
 
+   /* Can't just use EDI without save/restoring it:
+    */
+   x86_push(
+      func,
+      get_immediate_base() );
+
+
    /*
     * Different function args for vertex/fragment shaders:
     */
@@ -2193,51 +2192,51 @@ tgsi_emit_sse2(
       x86_mov(
          func,
          get_input_base(),
-         get_argument( 0 ) );
+         x86_fn_arg( func, 1 ) );
       /* skipping outputs argument here */
       x86_mov(
          func,
          get_const_base(),
-         get_argument( 2 ) );
+         x86_fn_arg( func, 3 ) );
       x86_mov(
          func,
          get_temp_base(),
-         get_argument( 3 ) );
+         x86_fn_arg( func, 4 ) );
       x86_mov(
          func,
          get_coef_base(),
-         get_argument( 4 ) );
+         x86_fn_arg( func, 5 ) );
       x86_mov(
          func,
          get_immediate_base(),
-         get_argument( 5 ) );
+         x86_fn_arg( func, 6 ) );
    }
    else {
       assert(parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX);
 
       if (do_swizzles)
-         aos_to_soa( func, 5, 0, 6, 7 );
+         aos_to_soa( func, 6, 1, 7, 8 );
 
       x86_mov(
          func,
          get_input_base(),
-         get_argument( 0 ) );
+         x86_fn_arg( func, 1 ) );
       x86_mov(
          func,
          get_output_base(),
-         get_argument( 1 ) );
+         x86_fn_arg( func, 2 ) );
       x86_mov(
          func,
          get_const_base(),
-         get_argument( 2 ) );
+         x86_fn_arg( func, 3 ) );
       x86_mov(
          func,
          get_temp_base(),
-         get_argument( 3 ) );
+         x86_fn_arg( func, 4 ) );
       x86_mov(
          func,
          get_immediate_base(),
-         get_argument( 4 ) );
+         x86_fn_arg( func, 5 ) );
    }
 
    while( !tgsi_parse_end_of_tokens( &parse ) && ok ) {
@@ -2260,7 +2259,7 @@ tgsi_emit_sse2(
                x86_mov(
                   func,
                   get_output_base(),
-                  get_argument( 1 ) );
+                  x86_fn_arg( func, 2 ) );
             }
          }
 
@@ -2307,9 +2306,15 @@ tgsi_emit_sse2(
 
    if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX) {
       if (do_swizzles)
-         soa_to_aos( func, 8, 1, 9, 10 );
+         soa_to_aos( func, 9, 2, 10, 11 );
    }
 
+   /* Can't just use EDI without save/restoring it:
+    */
+   x86_pop(
+      func,
+      get_immediate_base() );
+
 #ifdef WIN32
    emit_retw( func, 16 );
 #else
-- 
cgit v1.2.3


From 47aa416821b69d3afa33c79ec8cb8499688a0e8e Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keith@tungstengraphics.com>
Date: Thu, 1 May 2008 15:27:53 +0100
Subject: tgsi: use EBX everywhere, be sure to push/pop it

---
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
index 5e133a96634..06df3dbb05b 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
@@ -103,15 +103,9 @@ get_output_base( void )
 static struct x86_reg
 get_temp_base( void )
 {
-#ifdef WIN32
    return x86_make_reg(
       file_REG32,
       reg_BX );
-#else
-   return x86_make_reg(
-      file_REG32,
-      reg_SI );
-#endif
 }
 
 static struct x86_reg
@@ -2177,12 +2171,16 @@ tgsi_emit_sse2(
 
    tgsi_parse_init( &parse, tokens );
 
-   /* Can't just use EDI without save/restoring it:
+   /* Can't just use EDI, EBX without save/restoring them:
     */
    x86_push(
       func,
       get_immediate_base() );
 
+   x86_push(
+      func,
+      get_temp_base() );
+
 
    /*
     * Different function args for vertex/fragment shaders:
@@ -2309,8 +2307,12 @@ tgsi_emit_sse2(
          soa_to_aos( func, 9, 2, 10, 11 );
    }
 
-   /* Can't just use EDI without save/restoring it:
+   /* Can't just use EBX, EDI without save/restoring them:
     */
+   x86_pop(
+      func,
+      get_temp_base() );
+
    x86_pop(
       func,
       get_immediate_base() );
-- 
cgit v1.2.3


From afe67db8038855d9f7b4ce46b610701c55736c1f Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keith@tungstengraphics.com>
Date: Thu, 1 May 2008 15:36:14 +0100
Subject: tgsi: add some const qualifiers to immediate pointers

---
 src/gallium/auxiliary/tgsi/util/tgsi_parse.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h
index da0121c482e..15e76feb7ca 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h
@@ -52,8 +52,8 @@ struct tgsi_full_immediate
    struct tgsi_immediate   Immediate;
    union
    {
-      void                          *Pointer;
-      struct tgsi_immediate_float32 *ImmediateFloat32;
+      const void                          *Pointer;
+      const struct tgsi_immediate_float32 *ImmediateFloat32;
    } u;
 };
 
-- 
cgit v1.2.3


From 419f3c447520d1dc95c529afa693ffe3fffe5560 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keith@tungstengraphics.com>
Date: Thu, 1 May 2008 15:45:51 +0100
Subject: tgsi: restore HIGH_PRECISION setting

---
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
index 06df3dbb05b..45453c34cee 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
@@ -36,7 +36,7 @@
 
 #ifdef PIPE_ARCH_X86
 
-#define HIGH_PRECISION 0  /* for 1/sqrt() */
+#define HIGH_PRECISION 1  /* for 1/sqrt() */
 
 
 #define FOR_EACH_CHANNEL( CHAN )\
-- 
cgit v1.2.3


From fb3623b235f5caa9d76e656b1e5eda797c7c73eb Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keith@tungstengraphics.com>
Date: Thu, 1 May 2008 20:41:03 +0100
Subject: rtasm: fix labels after (not so) recent change to allow dynamic fn
 growth

Using char * for labels doesn't work if you realloc the function
during assembly and free the old storage...
---
 src/gallium/auxiliary/rtasm/rtasm_x86sse.c      | 49 +++++++++----------------
 src/gallium/auxiliary/rtasm/rtasm_x86sse.h      | 14 +++----
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c     |  4 +-
 src/gallium/auxiliary/translate/translate_sse.c |  2 +-
 4 files changed, 28 insertions(+), 41 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
index 40f6f973d69..e69251f0720 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
@@ -347,9 +347,9 @@ struct x86_reg x86_get_base_reg( struct x86_reg reg )
    return x86_make_reg( reg.file, reg.idx );
 }
 
-unsigned char *x86_get_label( struct x86_function *p )
+int x86_get_label( struct x86_function *p )
 {
-   return p->csr;
+   return p->csr - p->store;
 }
 
 
@@ -361,17 +361,22 @@ unsigned char *x86_get_label( struct x86_function *p )
 
 void x86_jcc( struct x86_function *p,
 	      enum x86_cc cc,
-	      unsigned char *label )
+	      int label )
 {
-   intptr_t offset = pointer_to_intptr( label ) - (pointer_to_intptr( x86_get_label(p) ) + 2);
+   int offset = label - (x86_get_label(p) + 2);
    DUMP_I(cc);
    
+   if (offset < 0) {
+      int amt = p->csr - p->store;
+      assert(amt > -offset);
+   }
+
    if (offset <= 127 && offset >= -128) {
       emit_1ub(p, 0x70 + cc);
       emit_1b(p, (char) offset);
    }
    else {
-      offset = pointer_to_intptr( label ) - (pointer_to_intptr( x86_get_label(p) ) + 6);
+      offset = label - (x86_get_label(p) + 6);
       emit_2ub(p, 0x0f, 0x80 + cc);
       emit_1i(p, offset);
    }
@@ -379,8 +384,8 @@ void x86_jcc( struct x86_function *p,
 
 /* Always use a 32bit offset for forward jumps:
  */
-unsigned char *x86_jcc_forward( struct x86_function *p,
-			  enum x86_cc cc )
+int x86_jcc_forward( struct x86_function *p,
+                     enum x86_cc cc )
 {
    DUMP_I(cc);
    emit_2ub(p, 0x0f, 0x80 + cc);
@@ -388,7 +393,7 @@ unsigned char *x86_jcc_forward( struct x86_function *p,
    return x86_get_label(p);
 }
 
-unsigned char *x86_jmp_forward( struct x86_function *p)
+int x86_jmp_forward( struct x86_function *p)
 {
    DUMP();
    emit_1ub(p, 0xe9);
@@ -396,7 +401,7 @@ unsigned char *x86_jmp_forward( struct x86_function *p)
    return x86_get_label(p);
 }
 
-unsigned char *x86_call_forward( struct x86_function *p)
+int x86_call_forward( struct x86_function *p)
 {
    DUMP();
 
@@ -408,42 +413,24 @@ unsigned char *x86_call_forward( struct x86_function *p)
 /* Fixup offset from forward jump:
  */
 void x86_fixup_fwd_jump( struct x86_function *p,
-			 unsigned char *fixup )
+			 int fixup )
 {
-   *(int *)(fixup - 4) = pointer_to_intptr( x86_get_label(p) ) - pointer_to_intptr( fixup );
+   *(int *)(p->store + fixup - 4) = x86_get_label(p) - fixup;
 }
 
-void x86_jmp( struct x86_function *p, unsigned char *label)
+void x86_jmp( struct x86_function *p, int label)
 {
    DUMP_I( label );
    emit_1ub(p, 0xe9);
-   emit_1i(p, pointer_to_intptr( label ) - pointer_to_intptr( x86_get_label(p) ) - 4);
-}
-
-#if 0
-static unsigned char *cptr( void (*label)() )
-{
-   return (unsigned char *) label;
+   emit_1i(p, label - x86_get_label(p) - 4);
 }
 
-/* This doesn't work once we start reallocating & copying the
- * generated code on buffer fills, because the call is relative to the
- * current pc.
- */
-void x86_call( struct x86_function *p, void (*label)())
-{
-   DUMP_I( label );
-   emit_1ub(p, 0xe8);
-   emit_1i(p, cptr(label) - x86_get_label(p) - 4);
-}
-#else
 void x86_call( struct x86_function *p, struct x86_reg reg)
 {
    DUMP_R( reg );
    emit_1ub(p, 0xff);
    emit_modrm_noreg(p, 2, reg);
 }
-#endif
 
 
 /* michal:
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
index ad79b1facf8..eacaeeaf6fe 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
@@ -124,23 +124,23 @@ struct x86_reg x86_get_base_reg( struct x86_reg reg );
 
 /* Labels, jumps and fixup:
  */
-unsigned char *x86_get_label( struct x86_function *p );
+int x86_get_label( struct x86_function *p );
 
 void x86_jcc( struct x86_function *p,
 	      enum x86_cc cc,
-	      unsigned char *label );
+	      int label );
 
-unsigned char *x86_jcc_forward( struct x86_function *p,
+int x86_jcc_forward( struct x86_function *p,
 			  enum x86_cc cc );
 
-unsigned char *x86_jmp_forward( struct x86_function *p);
+int x86_jmp_forward( struct x86_function *p);
 
-unsigned char *x86_call_forward( struct x86_function *p);
+int x86_call_forward( struct x86_function *p);
 
 void x86_fixup_fwd_jump( struct x86_function *p,
-			 unsigned char *fixup );
+			 int fixup );
 
-void x86_jmp( struct x86_function *p, unsigned char *label );
+void x86_jmp( struct x86_function *p, int label );
 
 /* void x86_call( struct x86_function *p, void (*label)() ); */
 void x86_call( struct x86_function *p, struct x86_reg reg);
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
index 45453c34cee..07db3292b44 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
@@ -2021,7 +2021,7 @@ static void aos_to_soa( struct x86_function *func, uint aos, uint soa, uint num,
    struct x86_reg aos_input;
    struct x86_reg num_inputs;
    struct x86_reg temp;
-   unsigned char *inner_loop;
+   int inner_loop;
 
    soa_input = x86_make_reg( file_REG32, reg_AX );
    aos_input = get_temp_base(); /* BX or SI */
@@ -2083,7 +2083,7 @@ static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num,
    struct x86_reg aos_output;
    struct x86_reg num_outputs;
    struct x86_reg temp;
-   unsigned char *inner_loop;
+   int inner_loop;
 
    soa_output = x86_make_reg( file_REG32, reg_AX );
    aos_output = get_temp_base(); /* BX or SI */
diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c
index f590d48b787..a54ac5a82ff 100644
--- a/src/gallium/auxiliary/translate/translate_sse.c
+++ b/src/gallium/auxiliary/translate/translate_sse.c
@@ -404,7 +404,7 @@ static boolean build_vertex_emit( struct translate_sse *p,
    struct x86_reg srcEAX       = x86_make_reg(file_REG32, reg_CX);
    struct x86_reg countEBP     = x86_make_reg(file_REG32, reg_BP);
    struct x86_reg translateESI = x86_make_reg(file_REG32, reg_SI);
-   uint8_t *fixup, *label;
+   int fixup, label;
    unsigned j;
 
    p->func = func;
-- 
cgit v1.2.3


From f067c6c452bdd5f5cc6b0f6b2f79fb3fc1162822 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keith@tungstengraphics.com>
Date: Thu, 1 May 2008 20:45:15 +0100
Subject: tgsi: remove some bogus win vs. linux crud

Pass arguments properly in linux now.  Still need to change this to use
a single calling convention on both platforms.
---
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 118 +++++++++++-----------------
 1 file changed, 46 insertions(+), 72 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
index 07db3292b44..a67bc8567b5 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
@@ -441,19 +441,13 @@ emit_push_gp(
 {
    x86_push(
       func,
-      get_const_base() );
+      x86_make_reg( file_REG32, reg_AX) );
    x86_push(
       func,
-      get_input_base() );
+      x86_make_reg( file_REG32, reg_CX) );
    x86_push(
       func,
-      get_output_base() );
-
-   /* It is important on non-win32 platforms that temp base is pushed last.
-    */
-   x86_push(
-      func,
-      get_temp_base() );
+      x86_make_reg( file_REG32, reg_DX) );
 }
 
 static void
@@ -464,16 +458,13 @@ x86_pop_gp(
     */
    x86_pop(
       func,
-      get_temp_base() );
+      x86_make_reg( file_REG32, reg_DX) );
    x86_pop(
       func,
-      get_output_base() );
+      x86_make_reg( file_REG32, reg_CX) );
    x86_pop(
       func,
-      get_input_base() );
-   x86_pop(
-      func,
-      get_const_base() );
+      x86_make_reg( file_REG32, reg_AX) );
 }
 
 static void
@@ -490,19 +481,23 @@ emit_func_call_dst(
    emit_push_gp(
       func );
 
-#ifdef WIN32
-   x86_push(
-      func,
-      get_temp( TEMP_R0, 0 ) );
-#endif
-
    {
       struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX );
 
+      x86_lea(
+         func,
+         ecx,
+         get_temp( TEMP_R0, 0 ) );
+
+      x86_push( func, ecx );
       x86_mov_reg_imm( func, ecx, (unsigned long) code );
       x86_call( func, ecx );
+#ifndef WIN32
+      x86_pop(func, ecx ); 
+#endif
    }
 
+
    x86_pop_gp(
       func );
 
@@ -563,11 +558,7 @@ static void XSTDCALL
 cos4f(
    float *store )
 {
-#ifdef WIN32
    const unsigned X = 0;
-#else
-   const unsigned X = TEMP_R0 * 16;
-#endif
 
    store[X + 0] = cosf( store[X + 0] );
    store[X + 1] = cosf( store[X + 1] );
@@ -590,11 +581,8 @@ static void XSTDCALL
 ex24f(
    float *store )
 {
-#ifdef WIN32
    const unsigned X = 0;
-#else
-   const unsigned X = TEMP_R0 * 16;
-#endif
+
    store[X + 0] = powf( 2.0f, store[X + 0] );
    store[X + 1] = powf( 2.0f, store[X + 1] );
    store[X + 2] = powf( 2.0f, store[X + 2] );
@@ -627,11 +615,8 @@ static void XSTDCALL
 flr4f(
    float *store )
 {
-#ifdef WIN32
    const unsigned X = 0;
-#else
-   const unsigned X = TEMP_R0 * 16;
-#endif
+
    store[X + 0] = floorf( store[X + 0] );
    store[X + 1] = floorf( store[X + 1] );
    store[X + 2] = floorf( store[X + 2] );
@@ -653,11 +638,8 @@ static void XSTDCALL
 frc4f(
    float *store )
 {
-#ifdef WIN32
    const unsigned X = 0;
-#else
-   const unsigned X = TEMP_R0 * 16;
-#endif
+
    store[X + 0] -= floorf( store[X + 0] );
    store[X + 1] -= floorf( store[X + 1] );
    store[X + 2] -= floorf( store[X + 2] );
@@ -679,11 +661,8 @@ static void XSTDCALL
 lg24f(
    float *store )
 {
-#ifdef WIN32
    const unsigned X = 0;
-#else
-   const unsigned X = TEMP_R0 * 16;
-#endif
+
    store[X + 0] = LOG2( store[X + 0] );
    store[X + 1] = LOG2( store[X + 1] );
    store[X + 2] = LOG2( store[X + 2] );
@@ -741,11 +720,8 @@ static void XSTDCALL
 pow4f(
    float *store )
 {
-#ifdef WIN32
    const unsigned X = 0;
-#else
-   const unsigned X = TEMP_R0 * 16;
-#endif
+
    store[X + 0] = powf( store[X + 0], store[X + 4] );
    store[X + 1] = powf( store[X + 1], store[X + 5] );
    store[X + 2] = powf( store[X + 2], store[X + 6] );
@@ -786,11 +762,8 @@ static void XSTDCALL
 rsqrt4f(
    float *store )
 {
-#ifdef WIN32
    const unsigned X = 0;
-#else
-   const unsigned X = TEMP_R0 * 16;
-#endif
+
    store[X + 0] = 1.0F / sqrtf( store[X + 0] );
    store[X + 1] = 1.0F / sqrtf( store[X + 1] );
    store[X + 2] = 1.0F / sqrtf( store[X + 2] );
@@ -864,11 +837,8 @@ static void XSTDCALL
 sin4f(
    float *store )
 {
-#ifdef WIN32
    const unsigned X = 0;
-#else
-   const unsigned X = TEMP_R0 * 16;
-#endif
+
    store[X + 0] = sinf( store[X + 0] );
    store[X + 1] = sinf( store[X + 1] );
    store[X + 2] = sinf( store[X + 2] );
@@ -2015,40 +1985,40 @@ emit_declaration(
    }
 }
 
-static void aos_to_soa( struct x86_function *func, uint aos, uint soa, uint num, uint stride )
+static void aos_to_soa( struct x86_function *func, 
+                        uint arg_aos,
+                        uint arg_soa, 
+                        uint arg_num, 
+                        uint arg_stride )
 {
-   struct x86_reg soa_input;
-   struct x86_reg aos_input;
-   struct x86_reg num_inputs;
-   struct x86_reg temp;
+   struct x86_reg soa_input = x86_make_reg( file_REG32, reg_AX );
+   struct x86_reg aos_input = x86_make_reg( file_REG32, reg_BX );
+   struct x86_reg num_inputs = x86_make_reg( file_REG32, reg_CX );
+   struct x86_reg stride = x86_make_reg( file_REG32, reg_DX );
    int inner_loop;
 
-   soa_input = x86_make_reg( file_REG32, reg_AX );
-   aos_input = get_temp_base(); /* BX or SI */
-   num_inputs = x86_make_reg( file_REG32, reg_CX );
-   temp = x86_make_reg( file_REG32, reg_DX );
 
    /* Save EBX */
-   x86_push( func, aos_input );
+   x86_push( func, x86_make_reg( file_REG32, reg_BX ) );
 
-   x86_mov( func, soa_input, x86_fn_arg( func, soa ) );
-   x86_mov( func, aos_input, x86_fn_arg( func, aos ) );
-   x86_mov( func, num_inputs, x86_fn_arg( func, num ) );
+   x86_mov( func, aos_input,  x86_fn_arg( func, arg_aos ) );
+   x86_mov( func, soa_input,  x86_fn_arg( func, arg_soa ) );
+   x86_mov( func, num_inputs, x86_fn_arg( func, arg_num ) );
+   x86_mov( func, stride,     x86_fn_arg( func, arg_stride ) );
 
    /* do */
    inner_loop = x86_get_label( func );
    {
-      x86_mov( func, temp, x86_fn_arg( func, stride ) );
       x86_push( func, aos_input );
       sse_movlps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) );
       sse_movlps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) );
-      x86_add( func, aos_input, temp );
+      x86_add( func, aos_input, stride );
       sse_movhps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) );
       sse_movhps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) );
-      x86_add( func, aos_input, temp );
+      x86_add( func, aos_input, stride );
       sse_movlps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) );
       sse_movlps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) );
-      x86_add( func, aos_input, temp );
+      x86_add( func, aos_input, stride );
       sse_movhps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) );
       sse_movhps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) );
       x86_pop( func, aos_input );
@@ -2086,7 +2056,7 @@ static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num,
    int inner_loop;
 
    soa_output = x86_make_reg( file_REG32, reg_AX );
-   aos_output = get_temp_base(); /* BX or SI */
+   aos_output = x86_make_reg( file_REG32, reg_BX );
    num_outputs = x86_make_reg( file_REG32, reg_CX );
    temp = x86_make_reg( file_REG32, reg_DX );
 
@@ -2213,7 +2183,11 @@ tgsi_emit_sse2(
       assert(parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX);
 
       if (do_swizzles)
-         aos_to_soa( func, 6, 1, 7, 8 );
+         aos_to_soa( func, 
+                     6,         /* aos_input */
+                     1,         /* machine->input */
+                     7,         /* num_inputs */
+                     8 );       /* input_stride */
 
       x86_mov(
          func,
-- 
cgit v1.2.3


From 7a4313b63bcd06318437d384875472e7139070a1 Mon Sep 17 00:00:00 2001
From: Brian <brian.paul@tungstengraphics.com>
Date: Thu, 1 May 2008 18:42:01 -0600
Subject: gallium: implement TGSI_OPCODE_LOG

---
 src/gallium/auxiliary/tgsi/exec/tgsi_exec.c | 30 +++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
index d55f907c0d3..4b33d7e8bd1 100644
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
@@ -1549,22 +1549,24 @@ exec_instruction(
       break;
 
    case TGSI_OPCODE_LOG:
-      debug_printf("TGSI: LOG opcode not implemented\n");
-      /* from ARB_v_p:
-      tmp = fabs(ScalarLoad(op0));
-      result.x = floor(log2(tmp));
-      result.y = tmp / 2^(floor(log2(tmp)));
-      result.z = RoughApproxLog2(tmp);
-      result.w = 1.0;
-      */
-#if 0
-      /* something like this: */
       FETCH( &r[0], 0, CHAN_X );
-      micro_lg2( &r[0], &r[0] );
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-	 STORE( &r[0], 0, chan_index );
+      micro_abs( &r[2], &r[0] );  /* r2 = abs(r0) */
+      micro_lg2( &r[1], &r[2] );  /* r1 = lg2(r2) */
+      micro_flr( &r[0], &r[1] );  /* r0 = floor(r1) */
+      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
+	 STORE( &r[0], 0, CHAN_X );
+      }
+      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
+         micro_exp2( &r[0], &r[0] );       /* r0 = 2 ^ r0 */
+         micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
+	 STORE( &r[0], 0, CHAN_Y );
+      }
+      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+	 STORE( &r[1], 0, CHAN_Z );
+      }
+      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
+	 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
       }
-#endif
       break;
 
    case TGSI_OPCODE_MUL:
-- 
cgit v1.2.3


From 3b63bc8ac6db7af4077f12cfd44876a9d43cc6ec Mon Sep 17 00:00:00 2001
From: Brian <brian.paul@tungstengraphics.com>
Date: Thu, 1 May 2008 18:49:20 -0600
Subject: gallium: implement TGSI_OPCODE_EXP

---
 src/gallium/auxiliary/tgsi/exec/tgsi_exec.c | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
index 4b33d7e8bd1..5d5125f7cbe 100644
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
@@ -1530,22 +1530,23 @@ exec_instruction(
       break;
 
    case TGSI_OPCODE_EXP:
-      debug_printf("TGSI: EXP opcode not implemented\n");
-      /* from ARB_v_p:
-      tmp = ScalarLoad(op0);
-      result.x = 2^floor(tmp);
-      result.y = tmp - floor(tmp);
-      result.z = RoughApprox2ToX(tmp);
-      result.w = 1.0;
-      */
-#if 0
-      /* something like this: */
       FETCH( &r[0], 0, CHAN_X );
-      micro_exp2( &r[0], &r[0] );
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-	 STORE( &r[0], 0, chan_index );
+      micro_flr( &r[1], &r[0] );  /* r1 = floor(r0) */
+      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
+         micro_exp2( &r[2], &r[1] );       /* r2 = 2 ^ r1 */
+	 STORE( &r[2], 0, CHAN_X );        /* store r2 */
+      }
+      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
+         micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
+	 STORE( &r[2], 0, CHAN_Y );        /* store r2 */
+      }
+      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+         micro_exp2( &r[2], &r[0] );       /* r2 = 2 ^ r0 */
+	 STORE( &r[2], 0, CHAN_Z );        /* store r2 */
+      }
+      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
+	 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
       }
-#endif
       break;
 
    case TGSI_OPCODE_LOG:
-- 
cgit v1.2.3


From 869b0836c1c4339de91c9918ae07926c846a004c Mon Sep 17 00:00:00 2001
From: Brian <brian.paul@tungstengraphics.com>
Date: Thu, 1 May 2008 18:56:20 -0600
Subject: gallium: temporarily disable broken SSE2 code for ARL opcode

---
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
index a67bc8567b5..2fd76a3072d 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
@@ -1190,11 +1190,16 @@ emit_instruction(
 
    switch( inst->Instruction.Opcode ) {
    case TGSI_OPCODE_ARL:
+#if 0
+      /* XXX this isn't working properly (see glean vertProg1 test) */
       FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( func, *inst, 0, 0, chan_index );
          emit_f2it( func, 0 );
          STORE( func, *inst, 0, 0, chan_index );
       }
+#else
+      return 0;
+#endif
       break;
 
    case TGSI_OPCODE_MOV:
-- 
cgit v1.2.3


From 9d151a2517de3f83d676624a21b4f73d5accecbe Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Thu, 1 May 2008 16:39:54 +0200
Subject: tgsi: Dump destination register modulate modifier.

---
 src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
index 26bfc2051f2..4c65ffd7807 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
@@ -767,6 +767,31 @@ dump_instruction_short(
       SID( dst->DstRegister.Index );
       CHR( ']' );
 
+      switch (dst->DstRegisterExtModulate.Modulate) {
+      case TGSI_MODULATE_1X:
+         break;
+      case TGSI_MODULATE_2X:
+         TXT( "_2X" );
+         break;
+      case TGSI_MODULATE_4X:
+         TXT( "_4X" );
+         break;
+      case TGSI_MODULATE_8X:
+         TXT( "_8X" );
+         break;
+      case TGSI_MODULATE_HALF:
+         TXT( "_D2" );
+         break;
+      case TGSI_MODULATE_QUARTER:
+         TXT( "_D4" );
+         break;
+      case TGSI_MODULATE_EIGHTH:
+         TXT( "_D8" );
+         break;
+      default:
+         assert( 0 );
+      }
+
       if( dst->DstRegister.WriteMask != TGSI_WRITEMASK_XYZW ) {
          CHR( '.' );
          if( dst->DstRegister.WriteMask & TGSI_WRITEMASK_X ) {
-- 
cgit v1.2.3


From 36f93c5e5159ebd99a5a4504efccdf6c5bf40716 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Fri, 2 May 2008 10:20:53 +0200
Subject: tgsi: Fix build on Win32.

---
 src/gallium/auxiliary/tgsi/util/tgsi_parse.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.c b/src/gallium/auxiliary/tgsi/util/tgsi_parse.c
index c3526cb71ff..5bea7738401 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_parse.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_parse.c
@@ -43,7 +43,7 @@ tgsi_full_token_free(
    union tgsi_full_token *full_token )
 {
    if( full_token->Token.Type == TGSI_TOKEN_TYPE_IMMEDIATE ) {
-      FREE( full_token->FullImmediate.u.Pointer );
+      FREE( (void *) full_token->FullImmediate.u.Pointer );
    }
 }
 
@@ -156,7 +156,7 @@ tgsi_parse_token(
          imm->u.Pointer = MALLOC(
             sizeof( struct tgsi_immediate_float32 ) * (imm->Immediate.Size - 1) );
          for( i = 0; i < imm->Immediate.Size - 1; i++ ) {
-            next_token( ctx, &imm->u.ImmediateFloat32[i] );
+            next_token( ctx, (struct tgsi_immediate_float32 *) &imm->u.ImmediateFloat32[i] );
          }
          break;
 
-- 
cgit v1.2.3


From a1cb0c2b915532e934b5d37bd0c550b1bfcc77ba Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Fri, 2 May 2008 11:13:58 +0200
Subject: tgsi: Do not assume IN and OUT registers are declared sequentially.

---
 src/gallium/auxiliary/tgsi/util/tgsi_scan.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c
index ea4a72967d6..65650ed22a4 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c
@@ -103,18 +103,14 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
                info->file_max[file] = MAX2(info->file_max[file], (int)i);
 
                if (file == TGSI_FILE_INPUT) {
-                  info->input_semantic_name[info->num_inputs]
-                     = (ubyte)fulldecl->Semantic.SemanticName;
-                  info->input_semantic_index[info->num_inputs]
-                     = (ubyte)fulldecl->Semantic.SemanticIndex;
+                  info->input_semantic_name[i] = (ubyte)fulldecl->Semantic.SemanticName;
+                  info->input_semantic_index[i] = (ubyte)fulldecl->Semantic.SemanticIndex;
                   info->num_inputs++;
                }
 
                if (file == TGSI_FILE_OUTPUT) {
-                  info->output_semantic_name[info->num_outputs]
-                     = (ubyte)fulldecl->Semantic.SemanticName;
-                  info->output_semantic_index[info->num_outputs]
-                     = (ubyte)fulldecl->Semantic.SemanticIndex;
+                  info->output_semantic_name[i] = (ubyte)fulldecl->Semantic.SemanticName;
+                  info->output_semantic_index[i] = (ubyte)fulldecl->Semantic.SemanticIndex;
                   info->num_outputs++;
                }
 
@@ -137,6 +133,9 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
       }
    }
 
+   assert( info->file_max[TGSI_FILE_INPUT] + 1 == info->num_inputs );
+   assert( info->file_max[TGSI_FILE_OUTPUT] + 1 == info->num_outputs );
+
    info->uses_kill = (info->opcode_count[TGSI_OPCODE_KIL] ||
                       info->opcode_count[TGSI_OPCODE_KILP]);
 
-- 
cgit v1.2.3


From 17058e07469f2dc5b47b4f820bd5a31b7ed9177c Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keith@tungstengraphics.com>
Date: Fri, 2 May 2008 16:02:18 +0200
Subject: tgsi: Implement fast rsqrtf. Not tested, inactive.

---
 src/gallium/auxiliary/tgsi/exec/tgsi_exec.c |  6 ++++
 src/gallium/auxiliary/tgsi/exec/tgsi_exec.h | 10 +++++--
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 43 +++++++++++++++++------------
 3 files changed, 40 insertions(+), 19 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
index 5d5125f7cbe..826b432f09e 100644
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
@@ -88,6 +88,10 @@
 #define TEMP_OUTPUT_C      TGSI_EXEC_TEMP_OUTPUT_C
 #define TEMP_PRIMITIVE_I   TGSI_EXEC_TEMP_PRIMITIVE_I
 #define TEMP_PRIMITIVE_C   TGSI_EXEC_TEMP_PRIMITIVE_C
+#define TEMP_3_I           TGSI_EXEC_TEMP_THREE_I
+#define TEMP_3_C           TGSI_EXEC_TEMP_THREE_C
+#define TEMP_HALF_I        TGSI_EXEC_TEMP_HALF_I
+#define TEMP_HALF_C        TGSI_EXEC_TEMP_HALF_C
 #define TEMP_R0            TGSI_EXEC_TEMP_R0
 
 #define FOR_EACH_CHANNEL(CHAN)\
@@ -262,6 +266,8 @@ tgsi_exec_machine_init(
       mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
       mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
       mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
+      mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
+      mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
    }
 }
 
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h
index 92e2e5e9859..19bd78df3d3 100644
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h
@@ -133,9 +133,15 @@ struct tgsi_exec_labels
 #define TGSI_EXEC_TEMP_PRIMITIVE_I  34
 #define TGSI_EXEC_TEMP_PRIMITIVE_C  2
 
-#define TGSI_EXEC_TEMP_R0           35
+#define TGSI_EXEC_TEMP_THREE_I      34
+#define TGSI_EXEC_TEMP_THREE_C      3
 
-#define TGSI_EXEC_NUM_TEMPS   (32 + 4)
+#define TGSI_EXEC_TEMP_HALF_I       35
+#define TGSI_EXEC_TEMP_HALF_C       0
+
+#define TGSI_EXEC_TEMP_R0           36
+
+#define TGSI_EXEC_NUM_TEMPS   (32 + 5)
 #define TGSI_EXEC_NUM_ADDRS   1
 #define TGSI_EXEC_NUM_IMMEDIATES  256
 
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
index 2fd76a3072d..dbf002130bb 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
@@ -36,7 +36,11 @@
 
 #ifdef PIPE_ARCH_X86
 
-#define HIGH_PRECISION 1  /* for 1/sqrt() */
+/* for 1/sqrt() 
+ * 
+ * This costs about 100fps (close to 10%) in gears:
+ */
+#define HIGH_PRECISION 1  
 
 
 #define FOR_EACH_CHANNEL( CHAN )\
@@ -794,20 +798,25 @@ emit_rsqrt(
     *
     * See: http://softwarecommunity.intel.com/articles/eng/1818.htm
     */
-   /* This is some code that woudl do the above for a scalar 'a'.  We
-    * obviously are interested in a vector version:
-    *
-    * movss   xmm3, a;
-    * movss   xmm1, half;
-    * movss   xmm2, three;
-    * rsqrtss xmm0, xmm3;
-    * mulss   xmm3, xmm0;
-    * mulss   xmm1, xmm0;
-    * mulss   xmm3, xmm0;
-    * subss   xmm2, xmm3;
-    * mulss   xmm1, xmm2;
-    * movss   x,    xmm1;
-    */
+   {
+      struct x86_reg dst = make_xmm( xmm_dst );
+      struct x86_reg src = make_xmm( xmm_src );
+      struct x86_reg tmp0 = make_xmm( 2 );
+      struct x86_reg tmp1 = make_xmm( 3 );
+
+      assert( xmm_dst != xmm_src );
+      assert( xmm_dst != 2 && xmm_dst != 3 );
+      assert( xmm_src != 2 && xmm_src != 3 );
+
+      sse_movaps(  func, dst,  get_temp( TGSI_EXEC_TEMP_HALF_I, TGSI_EXEC_TEMP_HALF_C ) );
+      sse_movaps(  func, tmp0, get_temp( TGSI_EXEC_TEMP_THREE_I, TGSI_EXEC_TEMP_THREE_C ) );
+      sse_rsqrtps( func, tmp1, src  );
+      sse_mulps(   func, src,  tmp1 );
+      sse_mulps(   func, dst,  tmp1 );
+      sse_mulps(   func, src,  tmp1 );
+      sse_subps(   func, tmp0, src  );
+      sse_mulps(   func, dst,  tmp0 );
+   }
 #endif
 #else
    /* On Intel CPUs at least, this is only accurate to 12 bits -- not
@@ -1295,9 +1304,9 @@ emit_instruction(
    case TGSI_OPCODE_RSQ:
    /* TGSI_OPCODE_RECIPSQRT */
       FETCH( func, *inst, 0, 0, CHAN_X );
-      emit_rsqrt( func, 0, 0 );
+      emit_rsqrt( func, 1, 0 );
       FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         STORE( func, *inst, 0, 0, chan_index );
+         STORE( func, *inst, 1, 0, chan_index );
       }
       break;
 
-- 
cgit v1.2.3


From 6c15a70b75b1625b69790f98f2f44e9ae4435f6a Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@ubuntu-vbox.(none)>
Date: Fri, 2 May 2008 16:12:55 +0200
Subject: tgsi: Enable fast high precision rsqrt.

---
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 22 ----------------------
 1 file changed, 22 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
index dbf002130bb..b6b05944be7 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
@@ -761,20 +761,6 @@ emit_rcp (
       make_xmm( xmm_src ) );
 }
 
-#if HIGH_PRECISION
-static void XSTDCALL
-rsqrt4f(
-   float *store )
-{
-   const unsigned X = 0;
-
-   store[X + 0] = 1.0F / sqrtf( store[X + 0] );
-   store[X + 1] = 1.0F / sqrtf( store[X + 1] );
-   store[X + 2] = 1.0F / sqrtf( store[X + 2] );
-   store[X + 3] = 1.0F / sqrtf( store[X + 3] );
-}
-#endif
-
 static void
 emit_rsqrt(
    struct x86_function *func,
@@ -782,13 +768,6 @@ emit_rsqrt(
    unsigned xmm_src )
 {
 #if HIGH_PRECISION
-#if 1
-   emit_func_call_dst_src(
-      func,
-      xmm_dst,
-      xmm_src,
-      rsqrt4f );
-#else
    /* Although rsqrtps() and rcpps() are low precision on some/all SSE
     * implementations, it is possible to improve its precision at
     * fairly low cost, using a newton/raphson step, as below:
@@ -817,7 +796,6 @@ emit_rsqrt(
       sse_subps(   func, tmp0, src  );
       sse_mulps(   func, dst,  tmp0 );
    }
-#endif
 #else
    /* On Intel CPUs at least, this is only accurate to 12 bits -- not
     * good enough.
-- 
cgit v1.2.3


From cff8d3bdcbf78b57b52a2f60c54e5a3cae286137 Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Fri, 2 May 2008 08:22:25 -0600
Subject: gallium: remove ^M (CR) chars

---
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 46 ++++++++++++++---------------
 1 file changed, 23 insertions(+), 23 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
index b6b05944be7..8018bd7fa45 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
@@ -36,11 +36,11 @@
 
 #ifdef PIPE_ARCH_X86
 
-/* for 1/sqrt() 
- * 
- * This costs about 100fps (close to 10%) in gears:
- */
-#define HIGH_PRECISION 1  
+/* for 1/sqrt()
+ *
+ * This costs about 100fps (close to 10%) in gears:
+ */
+#define HIGH_PRECISION 1
 
 
 #define FOR_EACH_CHANNEL( CHAN )\
@@ -777,24 +777,24 @@ emit_rsqrt(
     *
     * See: http://softwarecommunity.intel.com/articles/eng/1818.htm
     */
-   {
-      struct x86_reg dst = make_xmm( xmm_dst );
-      struct x86_reg src = make_xmm( xmm_src );
-      struct x86_reg tmp0 = make_xmm( 2 );
-      struct x86_reg tmp1 = make_xmm( 3 );
-
-      assert( xmm_dst != xmm_src );
-      assert( xmm_dst != 2 && xmm_dst != 3 );
-      assert( xmm_src != 2 && xmm_src != 3 );
-
-      sse_movaps(  func, dst,  get_temp( TGSI_EXEC_TEMP_HALF_I, TGSI_EXEC_TEMP_HALF_C ) );
-      sse_movaps(  func, tmp0, get_temp( TGSI_EXEC_TEMP_THREE_I, TGSI_EXEC_TEMP_THREE_C ) );
-      sse_rsqrtps( func, tmp1, src  );
-      sse_mulps(   func, src,  tmp1 );
-      sse_mulps(   func, dst,  tmp1 );
-      sse_mulps(   func, src,  tmp1 );
-      sse_subps(   func, tmp0, src  );
-      sse_mulps(   func, dst,  tmp0 );
+   {
+      struct x86_reg dst = make_xmm( xmm_dst );
+      struct x86_reg src = make_xmm( xmm_src );
+      struct x86_reg tmp0 = make_xmm( 2 );
+      struct x86_reg tmp1 = make_xmm( 3 );
+
+      assert( xmm_dst != xmm_src );
+      assert( xmm_dst != 2 && xmm_dst != 3 );
+      assert( xmm_src != 2 && xmm_src != 3 );
+
+      sse_movaps(  func, dst,  get_temp( TGSI_EXEC_TEMP_HALF_I, TGSI_EXEC_TEMP_HALF_C ) );
+      sse_movaps(  func, tmp0, get_temp( TGSI_EXEC_TEMP_THREE_I, TGSI_EXEC_TEMP_THREE_C ) );
+      sse_rsqrtps( func, tmp1, src  );
+      sse_mulps(   func, src,  tmp1 );
+      sse_mulps(   func, dst,  tmp1 );
+      sse_mulps(   func, src,  tmp1 );
+      sse_subps(   func, tmp0, src  );
+      sse_mulps(   func, dst,  tmp0 );
    }
 #else
    /* On Intel CPUs at least, this is only accurate to 12 bits -- not
-- 
cgit v1.2.3


From 9671f7ae476cadb46f9f8f9d0363f24aabaf9f87 Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Sat, 17 May 2008 10:30:21 -0600
Subject: gallium: in drivers, make copy of tokens passed to
 pipe->create_vs/fs_state()

The caller can then free the token array immediately.
---
 src/gallium/auxiliary/draw/draw_vs_exec.c        | 11 ++--
 src/gallium/auxiliary/draw/draw_vs_llvm.c        |  7 +-
 src/gallium/auxiliary/draw/draw_vs_sse.c         |  5 +-
 src/gallium/auxiliary/tgsi/util/tgsi_parse.c     | 15 +++++
 src/gallium/auxiliary/tgsi/util/tgsi_parse.h     | 31 ++++++++-
 src/gallium/drivers/cell/ppu/cell_state_shader.c | 84 +++++++++++++++---------
 src/gallium/drivers/i915simple/i915_state.c      |  5 +-
 src/gallium/drivers/i965simple/brw_state.c       | 19 +++---
 src/gallium/drivers/softpipe/sp_fs_exec.c        |  5 +-
 src/gallium/drivers/softpipe/sp_state.h          |  7 +-
 src/gallium/drivers/softpipe/sp_state_fs.c       |  5 +-
 11 files changed, 137 insertions(+), 57 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c
index 54a2b2ab040..7a02f6334b1 100644
--- a/src/gallium/auxiliary/draw/draw_vs_exec.c
+++ b/src/gallium/auxiliary/draw/draw_vs_exec.c
@@ -39,6 +39,7 @@
 #include "draw_vs.h"
 
 #include "tgsi/util/tgsi_parse.h"
+#include "tgsi/util/tgsi_scan.h"
 
 
 struct exec_vertex_shader {
@@ -165,21 +166,23 @@ draw_create_vs_exec(struct draw_context *draw,
 		    const struct pipe_shader_state *state)
 {
    struct exec_vertex_shader *vs = CALLOC_STRUCT( exec_vertex_shader );
-   uint nt = tgsi_num_tokens(state->tokens);
 
    if (vs == NULL) 
       return NULL;
 
    /* we make a private copy of the tokens */
-   vs->base.state.tokens = mem_dup(state->tokens, nt * sizeof(state->tokens[0]));
-   tgsi_scan_shader(state->tokens, &vs->base.info);
+   vs->base.state.tokens = tgsi_dup_tokens(state->tokens);
+   if (!vs->base.state.tokens) {
+      FREE(vs);
+      return NULL;
+   }
 
+   tgsi_scan_shader(state->tokens, &vs->base.info);
 
    vs->base.prepare = vs_exec_prepare;
    vs->base.run_linear = vs_exec_run_linear;
    vs->base.delete = vs_exec_delete;
    vs->machine = &draw->machine;
 
-
    return &vs->base;
 }
diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c
index dcada665143..be6907ed042 100644
--- a/src/gallium/auxiliary/draw/draw_vs_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c
@@ -138,14 +138,17 @@ draw_create_vs_llvm(struct draw_context *draw,
 		    const struct pipe_shader_state *templ)
 {
    struct draw_llvm_vertex_shader *vs;
-   uint nt = tgsi_num_tokens(templ->tokens);
 
    vs = CALLOC_STRUCT( draw_llvm_vertex_shader );
    if (vs == NULL) 
       return NULL;
 
    /* we make a private copy of the tokens */
-   vs->base.state.tokens = mem_dup(templ->tokens, nt * sizeof(templ->tokens[0]));
+   vs->base.state.tokens = tgsi_dup_tokens(templ->tokens);
+   if (!vs->base.state.tokens) {
+      FREE(vs);
+      return NULL;
+   }
 
    tgsi_scan_shader(vs->base.state.tokens, &vs->base.info);
 
diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c
index a57c938fbf1..5929ea76b23 100644
--- a/src/gallium/auxiliary/draw/draw_vs_sse.c
+++ b/src/gallium/auxiliary/draw/draw_vs_sse.c
@@ -188,7 +188,6 @@ draw_create_vs_sse(struct draw_context *draw,
                           const struct pipe_shader_state *templ)
 {
    struct draw_sse_vertex_shader *vs;
-   uint nt = tgsi_num_tokens(templ->tokens);
 
    if (!rtasm_cpu_has_sse2())
       return NULL;
@@ -198,7 +197,9 @@ draw_create_vs_sse(struct draw_context *draw,
       return NULL;
 
    /* we make a private copy of the tokens */
-   vs->base.state.tokens = mem_dup(templ->tokens, nt * sizeof(templ->tokens[0]));
+   vs->base.state.tokens = tgsi_dup_tokens(templ->tokens);
+   if (!vs->base.state.tokens)
+      goto fail;
 
    tgsi_scan_shader(templ->tokens, &vs->base.info);
 
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.c b/src/gallium/auxiliary/tgsi/util/tgsi_parse.c
index 5bea7738401..5c0b0bfd61b 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_parse.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_parse.c
@@ -330,3 +330,18 @@ tgsi_num_tokens(const struct tgsi_token *tokens)
    }
    return 0;
 }
+
+
+/**
+ * Make a new copy of a token array.
+ */
+struct tgsi_token *
+tgsi_dup_tokens(const struct tgsi_token *tokens)
+{
+   unsigned n = tgsi_num_tokens(tokens);
+   unsigned bytes = n * sizeof(struct tgsi_token);
+   struct tgsi_token *new_tokens = (struct tgsi_token *) MALLOC(bytes);
+   if (new_tokens)
+      memcpy(new_tokens, tokens, bytes);
+   return new_tokens;
+}
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h
index 15e76feb7ca..41021010936 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h
@@ -1,4 +1,31 @@
-#if !defined TGSI_PARSE_H
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef TGSI_PARSE_H
 #define TGSI_PARSE_H
 
 #include "pipe/p_shader_tokens.h"
@@ -118,6 +145,8 @@ tgsi_parse_token(
 unsigned
 tgsi_num_tokens(const struct tgsi_token *tokens);
 
+struct tgsi_token *
+tgsi_dup_tokens(const struct tgsi_token *tokens);
 
 #if defined __cplusplus
 }
diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c
index fb2e940348a..c3a3fbd066d 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_shader.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c
@@ -30,33 +30,50 @@
 #include "pipe/p_inlines.h"
 #include "pipe/p_winsys.h"
 #include "draw/draw_context.h"
-#if 0
-#include "pipe/p_shader_tokens.h"
-#include "gallivm/gallivm.h"
-#include "tgsi/util/tgsi_dump.h"
-#include "tgsi/exec/tgsi_sse2.h"
-#endif
+#include "tgsi/util/tgsi_parse.h"
 
 #include "cell_context.h"
 #include "cell_state.h"
 
 
+
+/** cast wrapper */
+static INLINE struct cell_fragment_shader_state *
+cell_fragment_shader_state(void *shader)
+{
+   return (struct pipe_shader_state *) shader;
+}
+
+
+/** cast wrapper */
+static INLINE struct cell_vertex_shader_state *
+cell_vertex_shader_state(void *shader)
+{
+   return (struct pipe_shader_state *) shader;
+}
+
+
+
 static void *
 cell_create_fs_state(struct pipe_context *pipe,
                      const struct pipe_shader_state *templ)
 {
    /*struct cell_context *cell = cell_context(pipe);*/
-   struct cell_fragment_shader_state *state;
+   struct cell_fragment_shader_state *cfs;
 
-   state = CALLOC_STRUCT(cell_fragment_shader_state);
-   if (!state)
+   cfs = CALLOC_STRUCT(cell_fragment_shader_state);
+   if (!cfs)
       return NULL;
 
-   state->shader = *templ;
+   cfs->shader.tokens = tgsi_dup_tokens(templ->tokens);
+   if (!cfs->shader.tokens) {
+      FREE(cfs);
+      return NULL;
+   }
 
-   tgsi_scan_shader(templ->tokens, &state->info);
+   tgsi_scan_shader(templ->tokens, &cfs->info);
 
-   return state;
+   return cfs;
 }
 
 
@@ -65,7 +82,7 @@ cell_bind_fs_state(struct pipe_context *pipe, void *fs)
 {
    struct cell_context *cell = cell_context(pipe);
 
-   cell->fs = (struct cell_fragment_shader_state *) fs;
+   cell->fs = cell_fragment_shader_state(fs);
 
    cell->dirty |= CELL_NEW_FS;
 }
@@ -74,10 +91,10 @@ cell_bind_fs_state(struct pipe_context *pipe, void *fs)
 static void
 cell_delete_fs_state(struct pipe_context *pipe, void *fs)
 {
-   struct cell_fragment_shader_state *state =
-      (struct cell_fragment_shader_state *) fs;
+   struct cell_fragment_shader_state *cfs = cell_fragment_shader_state(fs);
 
-   FREE( state );
+   FREE((void *) cfs->shader.tokens);
+   FREE(cfs);
 }
 
 
@@ -86,22 +103,28 @@ cell_create_vs_state(struct pipe_context *pipe,
                      const struct pipe_shader_state *templ)
 {
    struct cell_context *cell = cell_context(pipe);
-   struct cell_vertex_shader_state *state;
+   struct cell_vertex_shader_state *cvs;
 
-   state = CALLOC_STRUCT(cell_vertex_shader_state);
-   if (!state)
+   cvs = CALLOC_STRUCT(cell_vertex_shader_state);
+   if (!cvs)
       return NULL;
 
-   state->shader = *templ;
-   tgsi_scan_shader(templ->tokens, &state->info);
+   cvs->shader.tokens = tgsi_dup_tokens(templ->tokens);
+   if (!cvs->shader.tokens) {
+      FREE(cvs);
+      return NULL;
+   }
 
-   state->draw_data = draw_create_vertex_shader(cell->draw, &state->shader);
-   if (state->draw_data == NULL) {
-      FREE( state );
+   tgsi_scan_shader(templ->tokens, &cvs->info);
+
+   cvs->draw_data = draw_create_vertex_shader(cell->draw, &cvs->shader);
+   if (cvs->draw_data == NULL) {
+      FREE( (void *) cvs->shader.tokens );
+      FREE( cvs );
       return NULL;
    }
 
-   return state;
+   return cvs;
 }
 
 
@@ -110,7 +133,7 @@ cell_bind_vs_state(struct pipe_context *pipe, void *vs)
 {
    struct cell_context *cell = cell_context(pipe);
 
-   cell->vs = (const struct cell_vertex_shader_state *) vs;
+   cell->vs = cell_vertex_shader_state(vs);
 
    draw_bind_vertex_shader(cell->draw,
                            (cell->vs ? cell->vs->draw_data : NULL));
@@ -123,12 +146,11 @@ static void
 cell_delete_vs_state(struct pipe_context *pipe, void *vs)
 {
    struct cell_context *cell = cell_context(pipe);
+   struct cell_vertex_shader_state *cvs = cell_vertex_shader_state(vs);
 
-   struct cell_vertex_shader_state *state =
-      (struct cell_vertex_shader_state *) vs;
-
-   draw_delete_vertex_shader(cell->draw, state->draw_data);
-   FREE( state );
+   draw_delete_vertex_shader(cell->draw, cvs->draw_data);
+   FREE( (void *) cvs->shader.tokens );
+   FREE( cvs );
 }
 
 
diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c
index 3d94b523660..4adeb37e860 100644
--- a/src/gallium/drivers/i915simple/i915_state.c
+++ b/src/gallium/drivers/i915simple/i915_state.c
@@ -33,6 +33,7 @@
 #include "pipe/p_winsys.h"
 #include "pipe/p_util.h"
 #include "pipe/p_inlines.h"
+#include "tgsi/util/tgsi_parse.h"
 
 #include "i915_context.h"
 #include "i915_reg.h"
@@ -436,7 +437,7 @@ i915_create_fs_state(struct pipe_context *pipe,
    if (!ifs)
       return NULL;
 
-   ifs->state = *templ;
+   ifs->state.tokens = tgsi_dup_tokens(templ->tokens);
 
    tgsi_scan_shader(templ->tokens, &ifs->info);
 
@@ -465,6 +466,8 @@ void i915_delete_fs_state(struct pipe_context *pipe, void *shader)
       FREE(ifs->program);
    ifs->program_len = 0;
 
+   FREE(ifs->state.tokens);
+
    FREE(ifs);
 }
 
diff --git a/src/gallium/drivers/i965simple/brw_state.c b/src/gallium/drivers/i965simple/brw_state.c
index 376f1487b29..ac243b7e4f9 100644
--- a/src/gallium/drivers/i965simple/brw_state.c
+++ b/src/gallium/drivers/i965simple/brw_state.c
@@ -35,6 +35,7 @@
 #include "pipe/p_inlines.h"
 #include "pipe/p_shader_tokens.h"
 #include "tgsi/util/tgsi_dump.h"
+#include "tgsi/util/tgsi_parse.h"
 
 #include "brw_context.h"
 #include "brw_defines.h"
@@ -182,9 +183,7 @@ static void * brw_create_fs_state(struct pipe_context *pipe,
 {
    struct brw_fragment_program *brw_fp = CALLOC_STRUCT(brw_fragment_program);
 
-   /* XXX: Do I have to duplicate the tokens as well??
-    */
-   brw_fp->program = *shader;
+   brw_fp->program.tokens = tgsi_dup_tokens(shader->tokens);
    brw_fp->id = brw_context(pipe)->program_id++;
 
    tgsi_scan_shader(shader->tokens, &brw_fp->info);
@@ -210,7 +209,10 @@ static void brw_bind_fs_state(struct pipe_context *pipe, void *shader)
 
 static void brw_delete_fs_state(struct pipe_context *pipe, void *shader)
 {
-   FREE(shader);
+   struct brw_fragment_program *brw_fp = (struct brw_fragment_program *) shader;
+
+   FREE((void *) brw_fp->program.tokens);
+   FREE(brw_fp);
 }
 
 
@@ -223,9 +225,7 @@ static void *brw_create_vs_state(struct pipe_context *pipe,
 {
    struct brw_vertex_program *brw_vp = CALLOC_STRUCT(brw_vertex_program);
 
-   /* XXX: Do I have to duplicate the tokens as well??
-    */
-   brw_vp->program = *shader;
+   brw_vp->program.tokens = tgsi_dup_tokens(shader->tokens);
    brw_vp->id = brw_context(pipe)->program_id++;
 
    tgsi_scan_shader(shader->tokens, &brw_vp->info);
@@ -251,7 +251,10 @@ static void brw_bind_vs_state(struct pipe_context *pipe, void *vs)
 
 static void brw_delete_vs_state(struct pipe_context *pipe, void *shader)
 {
-   FREE(shader);
+   struct brw_vertex_program *brw_vp = (struct brw_vertex_program *) shader;
+
+   FREE((void *) brw_vp->program.tokens);
+   FREE(brw_vp);
 }
 
 
diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c
index d5bd7a702f1..0b199a2193e 100644
--- a/src/gallium/drivers/softpipe/sp_fs_exec.c
+++ b/src/gallium/drivers/softpipe/sp_fs_exec.c
@@ -37,6 +37,7 @@
 #include "pipe/p_util.h"
 #include "pipe/p_inlines.h"
 #include "tgsi/exec/tgsi_exec.h"
+#include "tgsi/util/tgsi_parse.h"
 
 struct sp_exec_fragment_shader {
    struct sp_fragment_shader base;
@@ -116,6 +117,7 @@ exec_run( const struct sp_fragment_shader *base,
 static void 
 exec_delete( struct sp_fragment_shader *base )
 {
+   FREE((void *) base->shader.tokens);
    FREE(base);
 }
 
@@ -137,7 +139,8 @@ softpipe_create_fs_exec(struct softpipe_context *softpipe,
    if (!shader)
       return NULL;
 
-   shader->base.shader = *templ;
+   /* we need to keep a local copy of the tokens */
+   shader->base.shader.tokens = tgsi_dup_tokens(templ->tokens);
    shader->base.prepare = exec_prepare;
    shader->base.run = exec_run;
    shader->base.delete = exec_delete;
diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h
index 45056502b8e..452e51fa791 100644
--- a/src/gallium/drivers/softpipe/sp_state.h
+++ b/src/gallium/drivers/softpipe/sp_state.h
@@ -54,9 +54,11 @@
 
 struct tgsi_sampler;
 struct tgsi_exec_machine;
+struct vertex_info;
 
 
-/** Subclass of pipe_shader_state (though it doesn't really need to be).
+/**
+ * Subclass of pipe_shader_state (though it doesn't really need to be).
  *
  * This is starting to look an awful lot like a quad pipeline stage...
  */
@@ -80,11 +82,10 @@ struct sp_fragment_shader {
    void (*delete)( struct sp_fragment_shader * );
 };
 
-struct vertex_info;
 
 /** Subclass of pipe_shader_state */
 struct sp_vertex_shader {
-   struct pipe_shader_state shader;
+   struct pipe_shader_state shader;  /* Note: this field not actually used */
    struct draw_vertex_shader *draw_data;
 };
 
diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c
index 9e77b7e91bc..24b91fbc794 100644
--- a/src/gallium/drivers/softpipe/sp_state_fs.c
+++ b/src/gallium/drivers/softpipe/sp_state_fs.c
@@ -102,10 +102,7 @@ softpipe_create_vs_state(struct pipe_context *pipe,
       return NULL;
    }
 
-   state->shader = *templ;
-
-   state->draw_data = draw_create_vertex_shader(softpipe->draw,
-                                                &state->shader);
+   state->draw_data = draw_create_vertex_shader(softpipe->draw, templ);
    if (state->draw_data == NULL) {
       FREE( state );
       return NULL;
-- 
cgit v1.2.3


From adaaa29218f1babad874f50681ca971fdd3b8a40 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keith@tungstengraphics.com>
Date: Tue, 27 May 2008 11:12:42 +0100
Subject: tgsi: export utils for dumping individual instructions

---
 src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 24 ++++++++++++------------
 src/gallium/auxiliary/tgsi/util/tgsi_dump.h | 18 ++++++++++++++++++
 2 files changed, 30 insertions(+), 12 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
index 4c65ffd7807..648afa2a51b 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
@@ -539,9 +539,9 @@ static const char *TGSI_MODULATES[] =
    "MODULATE_EIGHTH"
 };
 
-static void
-dump_declaration_short(
-   struct tgsi_full_declaration  *decl )
+void
+tgsi_dump_declaration(
+   const struct tgsi_full_declaration  *decl )
 {
    TXT( "\nDCL " );
    ENM( decl->Declaration.File, TGSI_FILES_SHORT );
@@ -672,9 +672,9 @@ dump_declaration_verbose(
    }
 }
 
-static void
-dump_immediate_short(
-   struct tgsi_full_immediate *imm )
+void
+tgsi_dump_immediate(
+   const struct tgsi_full_immediate *imm )
 {
    unsigned i;
 
@@ -727,9 +727,9 @@ dump_immediate_verbose(
    }
 }
 
-static void
-dump_instruction_short(
-   struct tgsi_full_instruction  *inst,
+void
+tgsi_dump_instruction(
+   const struct tgsi_full_instruction  *inst,
    unsigned                      instno )
 {
    unsigned i;
@@ -1281,17 +1281,17 @@ tgsi_dump(
 
       switch( parse.FullToken.Token.Type ) {
       case TGSI_TOKEN_TYPE_DECLARATION:
-         dump_declaration_short(
+         tgsi_dump_declaration(
             &parse.FullToken.FullDeclaration );
          break;
 
       case TGSI_TOKEN_TYPE_IMMEDIATE:
-         dump_immediate_short(
+         tgsi_dump_immediate(
             &parse.FullToken.FullImmediate );
          break;
 
       case TGSI_TOKEN_TYPE_INSTRUCTION:
-         dump_instruction_short(
+         tgsi_dump_instruction(
             &parse.FullToken.FullInstruction,
             instno );
          instno++;
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h
index beb0155d56c..ca83bdef206 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h
@@ -14,6 +14,24 @@ tgsi_dump(
    const struct tgsi_token *tokens,
    unsigned                flags );
 
+struct tgsi_full_immediate;
+struct tgsi_full_instruction;
+struct tgsi_full_declaration;
+
+void
+tgsi_dump_immediate(
+   const struct tgsi_full_immediate *imm );
+
+void
+tgsi_dump_instruction(
+   const struct tgsi_full_instruction  *inst,
+   unsigned                      instno );
+
+void
+tgsi_dump_declaration(
+   const struct tgsi_full_declaration  *decl );
+
+
 #if defined __cplusplus
 }
 #endif
-- 
cgit v1.2.3


From 55d29a8d48663982a1aeea414f69a5896b97d1ea Mon Sep 17 00:00:00 2001
From: José Fonseca <jrfonseca@tungstengraphics.com>
Date: Wed, 28 May 2008 16:12:14 +0900
Subject: gallium: Windows CE portability fixes.

---
 src/gallium/auxiliary/draw/draw_pt_elts.c          |   8 +-
 src/gallium/auxiliary/draw/draw_pt_varray.c        |   4 +-
 src/gallium/auxiliary/draw/draw_vs_sse.c           |   4 +-
 src/gallium/auxiliary/rtasm/rtasm_cpu.c            |   4 +-
 src/gallium/auxiliary/rtasm/rtasm_x86sse.c         |   4 +-
 src/gallium/auxiliary/rtasm/rtasm_x86sse.h         |   4 +-
 src/gallium/auxiliary/tgsi/util/tgsi_util.c        |   2 +-
 src/gallium/auxiliary/translate/translate.c        |   3 +-
 .../auxiliary/translate/translate_generic.c        | 236 ++++++++++-----------
 src/gallium/auxiliary/translate/translate_sse.c    |   5 +-
 src/gallium/auxiliary/util/u_time.h                |   2 +-
 src/gallium/include/pipe/p_compiler.h              |  54 +++--
 src/gallium/include/pipe/p_config.h                |   8 +-
 src/gallium/include/pipe/p_debug.h                 |  11 +-
 14 files changed, 192 insertions(+), 157 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/draw/draw_pt_elts.c b/src/gallium/auxiliary/draw/draw_pt_elts.c
index 2094c081ed4..b7780fb5073 100644
--- a/src/gallium/auxiliary/draw/draw_pt_elts.c
+++ b/src/gallium/auxiliary/draw/draw_pt_elts.c
@@ -60,10 +60,10 @@ static unsigned elt_vert( const void *elts, unsigned idx )
 pt_elt_func draw_pt_elt_func( struct draw_context *draw )
 {
    switch (draw->pt.user.eltSize) {
-   case 0: return elt_vert;
-   case 1: return elt_ubyte;
-   case 2: return elt_ushort; 
-   case 4: return elt_uint;
+   case 0: return &elt_vert;
+   case 1: return &elt_ubyte;
+   case 2: return &elt_ushort; 
+   case 4: return &elt_uint;
    default: return NULL;
    }
 }     
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c
index 355093f945e..c7c66b34d49 100644
--- a/src/gallium/auxiliary/draw/draw_pt_varray.c
+++ b/src/gallium/auxiliary/draw/draw_pt_varray.c
@@ -147,8 +147,8 @@ static INLINE void varray_ef_quad( struct varray_frontend *varray,
                                    unsigned i2,
                                    unsigned i3 )
 {
-   const unsigned omitEdge1 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_2;
-   const unsigned omitEdge2 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_1;
+   const ushort omitEdge1 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_2;
+   const ushort omitEdge2 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_1;
 
    varray_triangle_flags( varray,
                           DRAW_PIPE_RESET_STIPPLE | omitEdge1,
diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c
index e3f4e67472a..c88bc137eec 100644
--- a/src/gallium/auxiliary/draw/draw_vs_sse.c
+++ b/src/gallium/auxiliary/draw/draw_vs_sse.c
@@ -31,9 +31,11 @@
   *   Brian Paul
   */
 
+#include "pipe/p_config.h"
+
 #include "draw_vs.h"
 
-#if defined(__i386__) || defined(__386__)
+#if defined(PIPE_ARCH_X86)
 
 #include "pipe/p_util.h"
 #include "pipe/p_shader_tokens.h"
diff --git a/src/gallium/auxiliary/rtasm/rtasm_cpu.c b/src/gallium/auxiliary/rtasm/rtasm_cpu.c
index f01e12faa07..5499018b219 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_cpu.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_cpu.c
@@ -47,7 +47,7 @@ static boolean rtasm_sse_enabled(void)
 int rtasm_cpu_has_sse(void)
 {
    /* FIXME: actually detect this at run-time */
-#if defined(__i386__) || defined(__386__) || defined(i386)
+#if defined(PIPE_ARCH_X86)
    return rtasm_sse_enabled();
 #else
    return 0;
@@ -57,7 +57,7 @@ int rtasm_cpu_has_sse(void)
 int rtasm_cpu_has_sse2(void) 
 {
    /* FIXME: actually detect this at run-time */
-#if defined(__i386__) || defined(__386__) || defined(i386)
+#if defined(PIPE_ARCH_X86)
    return rtasm_sse_enabled();
 #else
    return 0;
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
index 4e036d9032c..6cd88ebca3d 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
@@ -21,7 +21,9 @@
  *
  **************************************************************************/
 
-#if defined(__i386__) || defined(__386__) || defined(i386)
+#include "pipe/p_config.h"
+
+#if defined(PIPE_ARCH_X86)
 
 #include "pipe/p_compiler.h"
 #include "pipe/p_debug.h"
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
index eacaeeaf6fe..a5afa16395e 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
@@ -24,7 +24,9 @@
 #ifndef _RTASM_X86SSE_H_
 #define _RTASM_X86SSE_H_
 
-#if defined(__i386__) || defined(__386__) || defined(i386)
+#include "pipe/p_config.h"
+
+#if defined(PIPE_ARCH_X86)
 
 /* It is up to the caller to ensure that instructions issued are
  * suitable for the host cpu.  There are no checks made in this module
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_util.c b/src/gallium/auxiliary/tgsi/util/tgsi_util.c
index 4cdd89182ad..56a50d3b210 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_util.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_util.c
@@ -8,7 +8,7 @@
 union pointer_hack
 {
    void *pointer;
-   unsigned long long uint64;
+   uint64_t uint64;
 };
 
 void *
diff --git a/src/gallium/auxiliary/translate/translate.c b/src/gallium/auxiliary/translate/translate.c
index b04bc6eefd7..b93fbf9033e 100644
--- a/src/gallium/auxiliary/translate/translate.c
+++ b/src/gallium/auxiliary/translate/translate.c
@@ -30,6 +30,7 @@
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
 
+#include "pipe/p_config.h"
 #include "pipe/p_util.h"
 #include "pipe/p_state.h"
 #include "translate.h"
@@ -38,7 +39,7 @@ struct translate *translate_create( const struct translate_key *key )
 {
    struct translate *translate = NULL;
 
-#if defined(__i386__) || defined(__386__) || defined(i386)
+#if defined(PIPE_ARCH_X86)
    translate = translate_sse2_create( key );
    if (translate)
       return translate;
diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c
index 402780ee539..8f3b470333f 100644
--- a/src/gallium/auxiliary/translate/translate_generic.c
+++ b/src/gallium/auxiliary/translate/translate_generic.c
@@ -255,140 +255,140 @@ static fetch_func get_fetch_func( enum pipe_format format )
 {
    switch (format) {
    case PIPE_FORMAT_R64_FLOAT:
-      return fetch_R64_FLOAT;
+      return &fetch_R64_FLOAT;
    case PIPE_FORMAT_R64G64_FLOAT:
-      return fetch_R64G64_FLOAT;
+      return &fetch_R64G64_FLOAT;
    case PIPE_FORMAT_R64G64B64_FLOAT:
-      return fetch_R64G64B64_FLOAT;
+      return &fetch_R64G64B64_FLOAT;
    case PIPE_FORMAT_R64G64B64A64_FLOAT:
-      return fetch_R64G64B64A64_FLOAT;
+      return &fetch_R64G64B64A64_FLOAT;
 
    case PIPE_FORMAT_R32_FLOAT:
-      return fetch_R32_FLOAT;
+      return &fetch_R32_FLOAT;
    case PIPE_FORMAT_R32G32_FLOAT:
-      return fetch_R32G32_FLOAT;
+      return &fetch_R32G32_FLOAT;
    case PIPE_FORMAT_R32G32B32_FLOAT:
-      return fetch_R32G32B32_FLOAT;
+      return &fetch_R32G32B32_FLOAT;
    case PIPE_FORMAT_R32G32B32A32_FLOAT:
-      return fetch_R32G32B32A32_FLOAT;
+      return &fetch_R32G32B32A32_FLOAT;
 
    case PIPE_FORMAT_R32_UNORM:
-      return fetch_R32_UNORM;
+      return &fetch_R32_UNORM;
    case PIPE_FORMAT_R32G32_UNORM:
-      return fetch_R32G32_UNORM;
+      return &fetch_R32G32_UNORM;
    case PIPE_FORMAT_R32G32B32_UNORM:
-      return fetch_R32G32B32_UNORM;
+      return &fetch_R32G32B32_UNORM;
    case PIPE_FORMAT_R32G32B32A32_UNORM:
-      return fetch_R32G32B32A32_UNORM;
+      return &fetch_R32G32B32A32_UNORM;
 
    case PIPE_FORMAT_R32_USCALED:
-      return fetch_R32_USCALED;
+      return &fetch_R32_USCALED;
    case PIPE_FORMAT_R32G32_USCALED:
-      return fetch_R32G32_USCALED;
+      return &fetch_R32G32_USCALED;
    case PIPE_FORMAT_R32G32B32_USCALED:
-      return fetch_R32G32B32_USCALED;
+      return &fetch_R32G32B32_USCALED;
    case PIPE_FORMAT_R32G32B32A32_USCALED:
-      return fetch_R32G32B32A32_USCALED;
+      return &fetch_R32G32B32A32_USCALED;
 
    case PIPE_FORMAT_R32_SNORM:
-      return fetch_R32_SNORM;
+      return &fetch_R32_SNORM;
    case PIPE_FORMAT_R32G32_SNORM:
-      return fetch_R32G32_SNORM;
+      return &fetch_R32G32_SNORM;
    case PIPE_FORMAT_R32G32B32_SNORM:
-      return fetch_R32G32B32_SNORM;
+      return &fetch_R32G32B32_SNORM;
    case PIPE_FORMAT_R32G32B32A32_SNORM:
-      return fetch_R32G32B32A32_SNORM;
+      return &fetch_R32G32B32A32_SNORM;
 
    case PIPE_FORMAT_R32_SSCALED:
-      return fetch_R32_SSCALED;
+      return &fetch_R32_SSCALED;
    case PIPE_FORMAT_R32G32_SSCALED:
-      return fetch_R32G32_SSCALED;
+      return &fetch_R32G32_SSCALED;
    case PIPE_FORMAT_R32G32B32_SSCALED:
-      return fetch_R32G32B32_SSCALED;
+      return &fetch_R32G32B32_SSCALED;
    case PIPE_FORMAT_R32G32B32A32_SSCALED:
-      return fetch_R32G32B32A32_SSCALED;
+      return &fetch_R32G32B32A32_SSCALED;
 
    case PIPE_FORMAT_R16_UNORM:
-      return fetch_R16_UNORM;
+      return &fetch_R16_UNORM;
    case PIPE_FORMAT_R16G16_UNORM:
-      return fetch_R16G16_UNORM;
+      return &fetch_R16G16_UNORM;
    case PIPE_FORMAT_R16G16B16_UNORM:
-      return fetch_R16G16B16_UNORM;
+      return &fetch_R16G16B16_UNORM;
    case PIPE_FORMAT_R16G16B16A16_UNORM:
-      return fetch_R16G16B16A16_UNORM;
+      return &fetch_R16G16B16A16_UNORM;
 
    case PIPE_FORMAT_R16_USCALED:
-      return fetch_R16_USCALED;
+      return &fetch_R16_USCALED;
    case PIPE_FORMAT_R16G16_USCALED:
-      return fetch_R16G16_USCALED;
+      return &fetch_R16G16_USCALED;
    case PIPE_FORMAT_R16G16B16_USCALED:
-      return fetch_R16G16B16_USCALED;
+      return &fetch_R16G16B16_USCALED;
    case PIPE_FORMAT_R16G16B16A16_USCALED:
-      return fetch_R16G16B16A16_USCALED;
+      return &fetch_R16G16B16A16_USCALED;
 
    case PIPE_FORMAT_R16_SNORM:
-      return fetch_R16_SNORM;
+      return &fetch_R16_SNORM;
    case PIPE_FORMAT_R16G16_SNORM:
-      return fetch_R16G16_SNORM;
+      return &fetch_R16G16_SNORM;
    case PIPE_FORMAT_R16G16B16_SNORM:
-      return fetch_R16G16B16_SNORM;
+      return &fetch_R16G16B16_SNORM;
    case PIPE_FORMAT_R16G16B16A16_SNORM:
-      return fetch_R16G16B16A16_SNORM;
+      return &fetch_R16G16B16A16_SNORM;
 
    case PIPE_FORMAT_R16_SSCALED:
-      return fetch_R16_SSCALED;
+      return &fetch_R16_SSCALED;
    case PIPE_FORMAT_R16G16_SSCALED:
-      return fetch_R16G16_SSCALED;
+      return &fetch_R16G16_SSCALED;
    case PIPE_FORMAT_R16G16B16_SSCALED:
-      return fetch_R16G16B16_SSCALED;
+      return &fetch_R16G16B16_SSCALED;
    case PIPE_FORMAT_R16G16B16A16_SSCALED:
-      return fetch_R16G16B16A16_SSCALED;
+      return &fetch_R16G16B16A16_SSCALED;
 
    case PIPE_FORMAT_R8_UNORM:
-      return fetch_R8_UNORM;
+      return &fetch_R8_UNORM;
    case PIPE_FORMAT_R8G8_UNORM:
-      return fetch_R8G8_UNORM;
+      return &fetch_R8G8_UNORM;
    case PIPE_FORMAT_R8G8B8_UNORM:
-      return fetch_R8G8B8_UNORM;
+      return &fetch_R8G8B8_UNORM;
    case PIPE_FORMAT_R8G8B8A8_UNORM:
-      return fetch_R8G8B8A8_UNORM;
+      return &fetch_R8G8B8A8_UNORM;
 
    case PIPE_FORMAT_R8_USCALED:
-      return fetch_R8_USCALED;
+      return &fetch_R8_USCALED;
    case PIPE_FORMAT_R8G8_USCALED:
-      return fetch_R8G8_USCALED;
+      return &fetch_R8G8_USCALED;
    case PIPE_FORMAT_R8G8B8_USCALED:
-      return fetch_R8G8B8_USCALED;
+      return &fetch_R8G8B8_USCALED;
    case PIPE_FORMAT_R8G8B8A8_USCALED:
-      return fetch_R8G8B8A8_USCALED;
+      return &fetch_R8G8B8A8_USCALED;
 
    case PIPE_FORMAT_R8_SNORM:
-      return fetch_R8_SNORM;
+      return &fetch_R8_SNORM;
    case PIPE_FORMAT_R8G8_SNORM:
-      return fetch_R8G8_SNORM;
+      return &fetch_R8G8_SNORM;
    case PIPE_FORMAT_R8G8B8_SNORM:
-      return fetch_R8G8B8_SNORM;
+      return &fetch_R8G8B8_SNORM;
    case PIPE_FORMAT_R8G8B8A8_SNORM:
-      return fetch_R8G8B8A8_SNORM;
+      return &fetch_R8G8B8A8_SNORM;
 
    case PIPE_FORMAT_R8_SSCALED:
-      return fetch_R8_SSCALED;
+      return &fetch_R8_SSCALED;
    case PIPE_FORMAT_R8G8_SSCALED:
-      return fetch_R8G8_SSCALED;
+      return &fetch_R8G8_SSCALED;
    case PIPE_FORMAT_R8G8B8_SSCALED:
-      return fetch_R8G8B8_SSCALED;
+      return &fetch_R8G8B8_SSCALED;
    case PIPE_FORMAT_R8G8B8A8_SSCALED:
-      return fetch_R8G8B8A8_SSCALED;
+      return &fetch_R8G8B8A8_SSCALED;
 
    case PIPE_FORMAT_A8R8G8B8_UNORM:
-      return fetch_A8R8G8B8_UNORM;
+      return &fetch_A8R8G8B8_UNORM;
 
    case PIPE_FORMAT_B8G8R8A8_UNORM:
-      return fetch_B8G8R8A8_UNORM;
+      return &fetch_B8G8R8A8_UNORM;
 
    default:
       assert(0); 
-      return fetch_NULL;
+      return &fetch_NULL;
    }
 }
 
@@ -399,140 +399,140 @@ static emit_func get_emit_func( enum pipe_format format )
 {
    switch (format) {
    case PIPE_FORMAT_R64_FLOAT:
-      return emit_R64_FLOAT;
+      return &emit_R64_FLOAT;
    case PIPE_FORMAT_R64G64_FLOAT:
-      return emit_R64G64_FLOAT;
+      return &emit_R64G64_FLOAT;
    case PIPE_FORMAT_R64G64B64_FLOAT:
-      return emit_R64G64B64_FLOAT;
+      return &emit_R64G64B64_FLOAT;
    case PIPE_FORMAT_R64G64B64A64_FLOAT:
-      return emit_R64G64B64A64_FLOAT;
+      return &emit_R64G64B64A64_FLOAT;
 
    case PIPE_FORMAT_R32_FLOAT:
-      return emit_R32_FLOAT;
+      return &emit_R32_FLOAT;
    case PIPE_FORMAT_R32G32_FLOAT:
-      return emit_R32G32_FLOAT;
+      return &emit_R32G32_FLOAT;
    case PIPE_FORMAT_R32G32B32_FLOAT:
-      return emit_R32G32B32_FLOAT;
+      return &emit_R32G32B32_FLOAT;
    case PIPE_FORMAT_R32G32B32A32_FLOAT:
-      return emit_R32G32B32A32_FLOAT;
+      return &emit_R32G32B32A32_FLOAT;
 
    case PIPE_FORMAT_R32_UNORM:
-      return emit_R32_UNORM;
+      return &emit_R32_UNORM;
    case PIPE_FORMAT_R32G32_UNORM:
-      return emit_R32G32_UNORM;
+      return &emit_R32G32_UNORM;
    case PIPE_FORMAT_R32G32B32_UNORM:
-      return emit_R32G32B32_UNORM;
+      return &emit_R32G32B32_UNORM;
    case PIPE_FORMAT_R32G32B32A32_UNORM:
-      return emit_R32G32B32A32_UNORM;
+      return &emit_R32G32B32A32_UNORM;
 
    case PIPE_FORMAT_R32_USCALED:
-      return emit_R32_USCALED;
+      return &emit_R32_USCALED;
    case PIPE_FORMAT_R32G32_USCALED:
-      return emit_R32G32_USCALED;
+      return &emit_R32G32_USCALED;
    case PIPE_FORMAT_R32G32B32_USCALED:
-      return emit_R32G32B32_USCALED;
+      return &emit_R32G32B32_USCALED;
    case PIPE_FORMAT_R32G32B32A32_USCALED:
-      return emit_R32G32B32A32_USCALED;
+      return &emit_R32G32B32A32_USCALED;
 
    case PIPE_FORMAT_R32_SNORM:
-      return emit_R32_SNORM;
+      return &emit_R32_SNORM;
    case PIPE_FORMAT_R32G32_SNORM:
-      return emit_R32G32_SNORM;
+      return &emit_R32G32_SNORM;
    case PIPE_FORMAT_R32G32B32_SNORM:
-      return emit_R32G32B32_SNORM;
+      return &emit_R32G32B32_SNORM;
    case PIPE_FORMAT_R32G32B32A32_SNORM:
-      return emit_R32G32B32A32_SNORM;
+      return &emit_R32G32B32A32_SNORM;
 
    case PIPE_FORMAT_R32_SSCALED:
-      return emit_R32_SSCALED;
+      return &emit_R32_SSCALED;
    case PIPE_FORMAT_R32G32_SSCALED:
-      return emit_R32G32_SSCALED;
+      return &emit_R32G32_SSCALED;
    case PIPE_FORMAT_R32G32B32_SSCALED:
-      return emit_R32G32B32_SSCALED;
+      return &emit_R32G32B32_SSCALED;
    case PIPE_FORMAT_R32G32B32A32_SSCALED:
-      return emit_R32G32B32A32_SSCALED;
+      return &emit_R32G32B32A32_SSCALED;
 
    case PIPE_FORMAT_R16_UNORM:
-      return emit_R16_UNORM;
+      return &emit_R16_UNORM;
    case PIPE_FORMAT_R16G16_UNORM:
-      return emit_R16G16_UNORM;
+      return &emit_R16G16_UNORM;
    case PIPE_FORMAT_R16G16B16_UNORM:
-      return emit_R16G16B16_UNORM;
+      return &emit_R16G16B16_UNORM;
    case PIPE_FORMAT_R16G16B16A16_UNORM:
-      return emit_R16G16B16A16_UNORM;
+      return &emit_R16G16B16A16_UNORM;
 
    case PIPE_FORMAT_R16_USCALED:
-      return emit_R16_USCALED;
+      return &emit_R16_USCALED;
    case PIPE_FORMAT_R16G16_USCALED:
-      return emit_R16G16_USCALED;
+      return &emit_R16G16_USCALED;
    case PIPE_FORMAT_R16G16B16_USCALED:
-      return emit_R16G16B16_USCALED;
+      return &emit_R16G16B16_USCALED;
    case PIPE_FORMAT_R16G16B16A16_USCALED:
-      return emit_R16G16B16A16_USCALED;
+      return &emit_R16G16B16A16_USCALED;
 
    case PIPE_FORMAT_R16_SNORM:
-      return emit_R16_SNORM;
+      return &emit_R16_SNORM;
    case PIPE_FORMAT_R16G16_SNORM:
-      return emit_R16G16_SNORM;
+      return &emit_R16G16_SNORM;
    case PIPE_FORMAT_R16G16B16_SNORM:
-      return emit_R16G16B16_SNORM;
+      return &emit_R16G16B16_SNORM;
    case PIPE_FORMAT_R16G16B16A16_SNORM:
-      return emit_R16G16B16A16_SNORM;
+      return &emit_R16G16B16A16_SNORM;
 
    case PIPE_FORMAT_R16_SSCALED:
-      return emit_R16_SSCALED;
+      return &emit_R16_SSCALED;
    case PIPE_FORMAT_R16G16_SSCALED:
-      return emit_R16G16_SSCALED;
+      return &emit_R16G16_SSCALED;
    case PIPE_FORMAT_R16G16B16_SSCALED:
-      return emit_R16G16B16_SSCALED;
+      return &emit_R16G16B16_SSCALED;
    case PIPE_FORMAT_R16G16B16A16_SSCALED:
-      return emit_R16G16B16A16_SSCALED;
+      return &emit_R16G16B16A16_SSCALED;
 
    case PIPE_FORMAT_R8_UNORM:
-      return emit_R8_UNORM;
+      return &emit_R8_UNORM;
    case PIPE_FORMAT_R8G8_UNORM:
-      return emit_R8G8_UNORM;
+      return &emit_R8G8_UNORM;
    case PIPE_FORMAT_R8G8B8_UNORM:
-      return emit_R8G8B8_UNORM;
+      return &emit_R8G8B8_UNORM;
    case PIPE_FORMAT_R8G8B8A8_UNORM:
-      return emit_R8G8B8A8_UNORM;
+      return &emit_R8G8B8A8_UNORM;
 
    case PIPE_FORMAT_R8_USCALED:
-      return emit_R8_USCALED;
+      return &emit_R8_USCALED;
    case PIPE_FORMAT_R8G8_USCALED:
-      return emit_R8G8_USCALED;
+      return &emit_R8G8_USCALED;
    case PIPE_FORMAT_R8G8B8_USCALED:
-      return emit_R8G8B8_USCALED;
+      return &emit_R8G8B8_USCALED;
    case PIPE_FORMAT_R8G8B8A8_USCALED:
-      return emit_R8G8B8A8_USCALED;
+      return &emit_R8G8B8A8_USCALED;
 
    case PIPE_FORMAT_R8_SNORM:
-      return emit_R8_SNORM;
+      return &emit_R8_SNORM;
    case PIPE_FORMAT_R8G8_SNORM:
-      return emit_R8G8_SNORM;
+      return &emit_R8G8_SNORM;
    case PIPE_FORMAT_R8G8B8_SNORM:
-      return emit_R8G8B8_SNORM;
+      return &emit_R8G8B8_SNORM;
    case PIPE_FORMAT_R8G8B8A8_SNORM:
-      return emit_R8G8B8A8_SNORM;
+      return &emit_R8G8B8A8_SNORM;
 
    case PIPE_FORMAT_R8_SSCALED:
-      return emit_R8_SSCALED;
+      return &emit_R8_SSCALED;
    case PIPE_FORMAT_R8G8_SSCALED:
-      return emit_R8G8_SSCALED;
+      return &emit_R8G8_SSCALED;
    case PIPE_FORMAT_R8G8B8_SSCALED:
-      return emit_R8G8B8_SSCALED;
+      return &emit_R8G8B8_SSCALED;
    case PIPE_FORMAT_R8G8B8A8_SSCALED:
-      return emit_R8G8B8A8_SSCALED;
+      return &emit_R8G8B8A8_SSCALED;
 
    case PIPE_FORMAT_A8R8G8B8_UNORM:
-      return emit_A8R8G8B8_UNORM;
+      return &emit_A8R8G8B8_UNORM;
 
    case PIPE_FORMAT_B8G8R8A8_UNORM:
-      return emit_B8G8R8A8_UNORM;
+      return &emit_B8G8R8A8_UNORM;
 
    default:
       assert(0); 
-      return emit_NULL;
+      return &emit_NULL;
    }
 }
 
diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c
index a54ac5a82ff..634b05b8a92 100644
--- a/src/gallium/auxiliary/translate/translate_sse.c
+++ b/src/gallium/auxiliary/translate/translate_sse.c
@@ -26,6 +26,7 @@
  */
 
 
+#include "pipe/p_config.h"
 #include "pipe/p_compiler.h"
 #include "pipe/p_util.h"
 #include "util/u_simple_list.h"
@@ -33,7 +34,7 @@
 #include "translate.h"
 
 
-#if defined(__i386__) || defined(__386__) || defined(i386)
+#if defined(PIPE_ARCH_X86)
 
 #include "rtasm/rtasm_cpu.h"
 #include "rtasm/rtasm_x86sse.h"
@@ -617,7 +618,7 @@ struct translate *translate_sse2_create( const struct translate_key *key )
 
 #else
 
-void translate_create_sse( const struct translate_key *key )
+struct translate *translate_sse2_create( const struct translate_key *key )
 {
    return NULL;
 }
diff --git a/src/gallium/auxiliary/util/u_time.h b/src/gallium/auxiliary/util/u_time.h
index 48ec7a4a963..f9963ce0e27 100644
--- a/src/gallium/auxiliary/util/u_time.h
+++ b/src/gallium/auxiliary/util/u_time.h
@@ -61,7 +61,7 @@ struct util_time
 #if defined(PIPE_OS_LINUX)
    struct timeval tv;
 #else
-   long long counter;
+   int64_t counter;
 #endif
 };
    
diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h
index a4b772bc4fd..96b21d998d1 100644
--- a/src/gallium/include/pipe/p_compiler.h
+++ b/src/gallium/include/pipe/p_compiler.h
@@ -52,39 +52,55 @@
 #endif /* __MSC__ */
 
 
-typedef unsigned int       uint;
-typedef unsigned char      ubyte;
-typedef unsigned char      boolean;
-typedef unsigned short     ushort;
-typedef unsigned long long uint64;
-
-
 #if defined(__MSC__)
 
-typedef char               int8_t;
-typedef unsigned char      uint8_t;
-typedef short              int16_t;
-typedef unsigned short     uint16_t;
-typedef long               int32_t;
-typedef unsigned long      uint32_t;
-typedef long long          int64_t;
-typedef unsigned long long uint64_t;
+typedef __int8             int8_t;
+typedef unsigned __int8    uint8_t;
+typedef __int16            int16_t;
+typedef unsigned __int16   uint16_t;
+typedef __int32            int32_t;
+typedef unsigned __int32   uint32_t;
+typedef __int64            int64_t;
+typedef unsigned __int64   uint64_t;
 
 #if defined(_WIN64)
 typedef __int64            intptr_t;
 typedef unsigned __int64   uintptr_t;
 #else
-typedef int                intptr_t;
-typedef unsigned int       uintptr_t;
+typedef __int32            intptr_t;
+typedef unsigned __int32   uintptr_t;
 #endif
 
+#ifndef __cplusplus
+#define false   0
+#define true    1
+#define bool    _Bool
+typedef int     _Bool;
+#define __bool_true_false_are_defined   1
+#endif /* !__cplusplus */
+
 #else
 #include <stdint.h>
+#include <stdbool.h>
 #endif
 
 
-#define TRUE  1
-#define FALSE 0
+typedef unsigned int       uint;
+typedef unsigned char      ubyte;
+typedef unsigned short     ushort;
+typedef uint64_t           uint64;
+
+#if 0
+#define boolean bool
+#else
+typedef unsigned char boolean;
+#endif
+#ifndef TRUE
+#define TRUE  true
+#endif
+#ifndef FALSE
+#define FALSE false
+#endif
 
 
 /* Function inlining */
diff --git a/src/gallium/include/pipe/p_config.h b/src/gallium/include/pipe/p_config.h
index 6ba211a1fcc..d2d2ae16172 100644
--- a/src/gallium/include/pipe/p_config.h
+++ b/src/gallium/include/pipe/p_config.h
@@ -35,6 +35,10 @@
  * this file is auto-generated by an autoconf-like tool at some point, as some 
  * things cannot be determined by existing defines alone. 
  * 
+ * See also:
+ * - http://gcc.gnu.org/onlinedocs/cpp/Common-Predefined-Macros.html
+ * - echo | gcc -dM -E - | sort
+ * - http://msdn.microsoft.com/en-us/library/b0084kay.aspx
  * @author José Fonseca <jrfonseca@tungstengraphics.com>
  */
 
@@ -63,11 +67,11 @@
  * Processor architecture
  */
 
-#if defined(_X86_) || defined(__i386__) || defined(__386__) || defined(i386)
+#if defined(__i386__) /* gcc */ || defined(_M_IX86) /* msvc */ || defined(_X86_) || defined(__386__) || defined(i386)
 #define PIPE_ARCH_X86
 #endif
 
-#if 0 /* FIXME */
+#if defined(__x86_64__) /* gcc */ || defined(_M_X64) /* msvc */ || defined(_M_AMD64) /* msvc */
 #define PIPE_ARCH_X86_64
 #endif
 
diff --git a/src/gallium/include/pipe/p_debug.h b/src/gallium/include/pipe/p_debug.h
index 0af635be573..05eca752012 100644
--- a/src/gallium/include/pipe/p_debug.h
+++ b/src/gallium/include/pipe/p_debug.h
@@ -59,6 +59,13 @@ extern "C" {
 #endif
 #endif
 
+   
+/* MSVC bebore VC7 does not have the __FUNCTION__ macro */
+#if defined(_MSC_VER) && _MSC_VER < 1300
+#define __FUNCTION__ "???"
+#endif
+
+
 void _debug_vprintf(const char *format, va_list ap);
    
 
@@ -127,8 +134,8 @@ void _debug_break(void);
 #ifdef DEBUG
 #if (defined(__i386__) || defined(__386__)) && defined(__GNUC__)
 #define debug_break() __asm("int3")
-#elif (defined(__i386__) || defined(__386__)) && defined(__MSC__)
-#define debug_break()  _asm {int 3}
+#elif defined(_M_IX86) && defined(_MSC_VER)
+#define debug_break()  do { _asm {int 3} } while(0)
 #else
 #define debug_break() _debug_break()
 #endif
-- 
cgit v1.2.3


From 7a986792dabe6556c63b2f2a997c7c6217604e2d Mon Sep 17 00:00:00 2001
From: José Fonseca <jrfonseca@tungstengraphics.com>
Date: Wed, 28 May 2008 21:48:30 +0900
Subject: tgsi: Observe constness.

---
 src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
index 648afa2a51b..b018ea9fa1c 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
@@ -754,7 +754,7 @@ tgsi_dump_instruction(
    }
 
    for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) {
-      struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
+      const struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
 
       if( !first_reg ) {
          CHR( ',' );
@@ -812,7 +812,7 @@ tgsi_dump_instruction(
    }
 
    for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) {
-      struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i];
+      const struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i];
 
       if( !first_reg ) {
          CHR( ',' );
-- 
cgit v1.2.3


From 8808d62f608d1397ee75d0087301d0b0a0278244 Mon Sep 17 00:00:00 2001
From: José Fonseca <jrfonseca@tungstengraphics.com>
Date: Thu, 29 May 2008 22:26:56 +0900
Subject: gallium: MSVC warning fixes.

Conflicts:

	src/gallium/auxiliary/draw/draw_pt_varray.c
	src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
	src/gallium/auxiliary/draw/draw_pt_vcache.c
---
 src/gallium/auxiliary/draw/draw_pipe_vbuf.c     | 2 +-
 src/gallium/auxiliary/draw/draw_pt_varray_tmp.h | 9 +++++----
 src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h | 9 +++++----
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c     | 2 +-
 src/gallium/auxiliary/translate/translate_sse.c | 2 +-
 5 files changed, 13 insertions(+), 11 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
index 67b9a9503d2..d514e28b77f 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
@@ -145,7 +145,7 @@ emit_vertex( struct vbuf_stage *vbuf,
       vertex->vertex_id = vbuf->nr_vertices++;
    }
 
-   return vertex->vertex_id;
+   return (ushort)vertex->vertex_id;
 }
 
 
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
index 6979f6b544f..7c722457c3c 100644
--- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
@@ -10,7 +10,8 @@ static void FUNC(struct draw_pt_front_end *frontend,
 
    boolean flatfirst = (draw->rasterizer->flatshade &&
                         draw->rasterizer->flatshade_first);
-   unsigned i, j, flags;
+   unsigned i, j;
+   ushort flags;
    unsigned first, incr;
 
    varray->fetch_start = start;
@@ -200,9 +201,9 @@ static void FUNC(struct draw_pt_front_end *frontend,
       /* These bitflags look a little odd because we submit the
        * vertices as (1,2,0) to satisfy flatshade requirements.
        */
-      const unsigned edge_first  = DRAW_PIPE_EDGE_FLAG_2;
-      const unsigned edge_middle = DRAW_PIPE_EDGE_FLAG_0;
-      const unsigned edge_last   = DRAW_PIPE_EDGE_FLAG_1;
+      const ushort edge_first  = DRAW_PIPE_EDGE_FLAG_2;
+      const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0;
+      const ushort edge_last   = DRAW_PIPE_EDGE_FLAG_1;
 
       flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle;
       for (j = 0; j + first <= count; j += i) {
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h
index cf9f394aa3b..5c99a47e49a 100644
--- a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h
@@ -10,7 +10,8 @@ static void FUNC( struct draw_pt_front_end *frontend,
 
    boolean flatfirst = (draw->rasterizer->flatshade && 
                         draw->rasterizer->flatshade_first);
-   unsigned i, flags;
+   unsigned i;
+   ushort flags;
 
 
    switch (vcache->input_prim) {
@@ -138,9 +139,9 @@ static void FUNC( struct draw_pt_front_end *frontend,
          /* These bitflags look a little odd because we submit the
           * vertices as (1,2,0) to satisfy flatshade requirements.  
           */
-         const unsigned edge_first  = DRAW_PIPE_EDGE_FLAG_2;
-         const unsigned edge_middle = DRAW_PIPE_EDGE_FLAG_0;
-         const unsigned edge_last   = DRAW_PIPE_EDGE_FLAG_1;
+         const ushort edge_first  = DRAW_PIPE_EDGE_FLAG_2;
+         const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0;
+         const ushort edge_last   = DRAW_PIPE_EDGE_FLAG_1;
 
          flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle;
 
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
index 8018bd7fa45..189dc6024d1 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
@@ -198,7 +198,7 @@ get_coef(
 static void
 emit_retw(
    struct x86_function  *func,
-   unsigned             size )
+   unsigned short        size )
 {
    x86_retw( func, size );
 }
diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c
index 69dd4d3dffe..be48a147378 100644
--- a/src/gallium/auxiliary/translate/translate_sse.c
+++ b/src/gallium/auxiliary/translate/translate_sse.c
@@ -309,7 +309,7 @@ static void get_src_ptr( struct translate_sse *p,
 static void emit_swizzle( struct translate_sse *p,
 			  struct x86_reg dest,
 			  struct x86_reg src,
-			  unsigned shuffle )
+			  unsigned char shuffle )
 {
    sse_shufps(p->func, dest, src, shuffle);
 }
-- 
cgit v1.2.3


From a49381587f73c67469ec7546419cfc41387f938c Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Sat, 31 May 2008 19:48:13 +0200
Subject: tgsi: Fix build after TGSI declaration interface changes.

---
 src/gallium/auxiliary/tgsi/exec/tgsi_exec.c  |   8 +-
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c  |   8 +-
 src/gallium/auxiliary/tgsi/util/tgsi_build.c | 126 ++++++---------------------
 src/gallium/auxiliary/tgsi/util/tgsi_build.h |  19 +---
 src/gallium/auxiliary/tgsi/util/tgsi_dump.c  |  58 +++---------
 src/gallium/auxiliary/tgsi/util/tgsi_parse.c |  17 +---
 src/gallium/auxiliary/tgsi/util/tgsi_parse.h |   9 +-
 src/gallium/auxiliary/tgsi/util/tgsi_scan.c  |   4 +-
 8 files changed, 52 insertions(+), 197 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
index 826b432f09e..6ba09491835 100644
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
@@ -1411,13 +1411,11 @@ exec_declaration(
          unsigned first, last, mask;
          eval_coef_func eval;
 
-         assert( decl->Declaration.Declare == TGSI_DECLARE_RANGE );
-
-         first = decl->u.DeclarationRange.First;
-         last = decl->u.DeclarationRange.Last;
+         first = decl->DeclarationRange.First;
+         last = decl->DeclarationRange.Last;
          mask = decl->Declaration.UsageMask;
 
-         switch( decl->Interpolation.Interpolate ) {
+         switch( decl->Declaration.Interpolate ) {
          case TGSI_INTERPOLATE_CONSTANT:
             eval = eval_constant_coef;
             break;
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
index 189dc6024d1..a59e22b6ea8 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
@@ -1923,16 +1923,14 @@ emit_declaration(
       unsigned first, last, mask;
       unsigned i, j;
 
-      assert( decl->Declaration.Declare == TGSI_DECLARE_RANGE );
-
-      first = decl->u.DeclarationRange.First;
-      last = decl->u.DeclarationRange.Last;
+      first = decl->DeclarationRange.First;
+      last = decl->DeclarationRange.Last;
       mask = decl->Declaration.UsageMask;
 
       for( i = first; i <= last; i++ ) {
          for( j = 0; j < NUM_CHANNELS; j++ ) {
             if( mask & (1 << j) ) {
-               switch( decl->Interpolation.Interpolate ) {
+               switch( decl->Declaration.Interpolate ) {
                case TGSI_INTERPOLATE_CONSTANT:
                   emit_coef_a0( func, 0, i, j );
                   emit_inputs( func, 0, i, j );
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_build.c b/src/gallium/auxiliary/tgsi/util/tgsi_build.c
index 9c883ab7043..63cc27becc0 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_build.c
@@ -90,9 +90,8 @@ tgsi_default_declaration( void )
    declaration.Type = TGSI_TOKEN_TYPE_DECLARATION;
    declaration.Size = 1;
    declaration.File = TGSI_FILE_NULL;
-   declaration.Declare = TGSI_DECLARE_RANGE;
    declaration.UsageMask = TGSI_WRITEMASK_XYZW;
-   declaration.Interpolate = 0;
+   declaration.Interpolate = TGSI_INTERPOLATE_CONSTANT;
    declaration.Semantic = 0;
    declaration.Padding = 0;
    declaration.Extended = 0;
@@ -103,7 +102,6 @@ tgsi_default_declaration( void )
 struct tgsi_declaration
 tgsi_build_declaration(
    unsigned file,
-   unsigned declare,
    unsigned usage_mask,
    unsigned interpolate,
    unsigned semantic,
@@ -112,11 +110,10 @@ tgsi_build_declaration(
    struct tgsi_declaration declaration;
 
    assert( file <= TGSI_FILE_IMMEDIATE );
-   assert( declare <= TGSI_DECLARE_MASK );
+   assert( interpolate <= TGSI_INTERPOLATE_PERSPECTIVE );
 
    declaration = tgsi_default_declaration();
    declaration.File = file;
-   declaration.Declare = declare;
    declaration.UsageMask = usage_mask;
    declaration.Interpolate = interpolate;
    declaration.Semantic = semantic;
@@ -144,7 +141,7 @@ tgsi_default_full_declaration( void )
    struct tgsi_full_declaration  full_declaration;
 
    full_declaration.Declaration  = tgsi_default_declaration();
-   full_declaration.Interpolation = tgsi_default_declaration_interpolation();
+   full_declaration.DeclarationRange = tgsi_default_declaration_range();
    full_declaration.Semantic = tgsi_default_declaration_semantic();
 
    return full_declaration;
@@ -159,6 +156,7 @@ tgsi_build_full_declaration(
 {
    unsigned size = 0;
    struct tgsi_declaration *declaration;
+   struct tgsi_declaration_range *dr;
 
    if( maxsize <= size )
      return 0;
@@ -167,63 +165,21 @@ tgsi_build_full_declaration(
 
    *declaration = tgsi_build_declaration(
       full_decl->Declaration.File,
-      full_decl->Declaration.Declare,
       full_decl->Declaration.UsageMask,
       full_decl->Declaration.Interpolate,
       full_decl->Declaration.Semantic,
       header );
 
-   switch( full_decl->Declaration.Declare )  {
-   case TGSI_DECLARE_RANGE:
-   {
-      struct tgsi_declaration_range *dr;
-
-      if( maxsize <= size )
-         return 0;
-      dr = (struct tgsi_declaration_range *) &tokens[size];
-      size++;
-
-      *dr = tgsi_build_declaration_range(
-         full_decl->u.DeclarationRange.First,
-         full_decl->u.DeclarationRange.Last,
-         declaration,
-         header );
-      break;
-    }
-
-   case TGSI_DECLARE_MASK:
-   {
-      struct tgsi_declaration_mask *dm;
-
-      if( maxsize <= size )
-         return 0;
-      dm = (struct tgsi_declaration_mask  *) &tokens[size];
-      size++;
-
-      *dm = tgsi_build_declaration_mask(
-         full_decl->u.DeclarationMask.Mask,
-         declaration,
-         header );
-      break;
-   }
-
-   default:
-      assert( 0 );
-   }
-
-   if( full_decl->Declaration.Interpolate ) {
-      struct tgsi_declaration_interpolation *di;
-
-      if( maxsize <= size )
-         return  0;
-      di = (struct tgsi_declaration_interpolation *) &tokens[size];
-      size++;
+   if (maxsize <= size)
+      return 0;
+   dr = (struct tgsi_declaration_range *) &tokens[size];
+   size++;
 
-      *di = tgsi_build_declaration_interpolation(
-         full_decl->Interpolation.Interpolate,
-         declaration,
-         header );
-   }
+   *dr = tgsi_build_declaration_range(
+      full_decl->DeclarationRange.First,
+      full_decl->DeclarationRange.Last,
+      declaration,
+      header );
 
    if( full_decl->Declaration.Semantic ) {
       struct tgsi_declaration_semantic *ds;
@@ -243,6 +199,17 @@ tgsi_build_full_declaration(
    return size;
 }
 
+struct tgsi_declaration_range
+tgsi_default_declaration_range( void )
+{
+   struct tgsi_declaration_range dr;
+
+   dr.First = 0;
+   dr.Last = 0;
+
+   return dr;
+}
+
 struct tgsi_declaration_range
 tgsi_build_declaration_range(
    unsigned first,
@@ -255,6 +222,7 @@ tgsi_build_declaration_range(
    assert( last >= first );
    assert( last <= 0xFFFF );
 
+   declaration_range = tgsi_default_declaration_range();
    declaration_range.First = first;
    declaration_range.Last = last;
 
@@ -263,50 +231,6 @@ tgsi_build_declaration_range(
    return declaration_range;
 }
 
-struct tgsi_declaration_mask
-tgsi_build_declaration_mask(
-   unsigned mask,
-   struct tgsi_declaration *declaration,
-   struct tgsi_header *header )
-{
-   struct tgsi_declaration_mask  declaration_mask;
-
-   declaration_mask.Mask = mask;
-
-   declaration_grow( declaration, header );
-
-   return declaration_mask;
-}
-
-struct tgsi_declaration_interpolation
-tgsi_default_declaration_interpolation( void )
-{
-   struct tgsi_declaration_interpolation di;
-
-   di.Interpolate = TGSI_INTERPOLATE_CONSTANT;
-   di.Padding = 0;
-
-   return di;
-}
-
-struct tgsi_declaration_interpolation
-tgsi_build_declaration_interpolation(
-   unsigned interpolate,
-   struct tgsi_declaration *declaration,
-   struct tgsi_header *header )
-{
-   struct tgsi_declaration_interpolation di;
-
-   assert( interpolate <= TGSI_INTERPOLATE_PERSPECTIVE );
-
-   di = tgsi_default_declaration_interpolation();
-   di.Interpolate = interpolate;
-
-   declaration_grow( declaration, header );
-
-   return di;
-}
-
 struct tgsi_declaration_semantic
 tgsi_default_declaration_semantic( void )
 {
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_build.h b/src/gallium/auxiliary/tgsi/util/tgsi_build.h
index 80bffc4ae71..423cf141f56 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_build.h
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_build.h
@@ -37,7 +37,6 @@ tgsi_default_declaration( void );
 struct tgsi_declaration
 tgsi_build_declaration(
    unsigned file,
-   unsigned declare,
    unsigned usage_mask,
    unsigned interpolate,
    unsigned semantic,
@@ -53,6 +52,9 @@ tgsi_build_full_declaration(
    struct tgsi_header *header,
    unsigned maxsize );
 
+struct tgsi_declaration_range
+tgsi_default_declaration_range( void );
+
 struct tgsi_declaration_range
 tgsi_build_declaration_range(
    unsigned first,
@@ -60,21 +62,6 @@ tgsi_build_declaration_range(
    struct tgsi_declaration *declaration,
    struct tgsi_header *header );
 
-struct tgsi_declaration_mask
-tgsi_build_declaration_mask(
-   unsigned mask,
-   struct tgsi_declaration *declaration,
-   struct tgsi_header *header );
-
-struct tgsi_declaration_interpolation
-tgsi_default_declaration_interpolation( void );
-
-struct tgsi_declaration_interpolation
-tgsi_build_declaration_interpolation(
-   unsigned interpolate,
-   struct tgsi_declaration *declaration,
-   struct tgsi_header *header );
-
 struct tgsi_declaration_semantic
 tgsi_default_declaration_semantic( void );
 
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
index b018ea9fa1c..d1a3dfd9c73 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
@@ -546,19 +546,13 @@ tgsi_dump_declaration(
    TXT( "\nDCL " );
    ENM( decl->Declaration.File, TGSI_FILES_SHORT );
 
-   switch( decl->Declaration.Declare ) {
-   case TGSI_DECLARE_RANGE:
-      CHR( '[' );
-      UID( decl->u.DeclarationRange.First );
-      if( decl->u.DeclarationRange.First != decl->u.DeclarationRange.Last ) {
-         TXT( ".." );
-         UID( decl->u.DeclarationRange.Last );
-      }
-      CHR( ']' );
-      break;
-   default:
-      assert( 0 );
+   CHR( '[' );
+   UID( decl->DeclarationRange.First );
+   if (decl->DeclarationRange.First != decl->DeclarationRange.Last) {
+      TXT( ".." );
+      UID( decl->DeclarationRange.Last );
    }
+   CHR( ']' );
 
    if( decl->Declaration.UsageMask != TGSI_WRITEMASK_XYZW ) {
       CHR( '.' );
@@ -586,10 +580,8 @@ tgsi_dump_declaration(
       }
    }
 
-   if (decl->Declaration.Interpolate) {
-      TXT( ", " );
-      ENM( decl->Interpolation.Interpolate, TGSI_INTERPOLATES_SHORT );
-   }
+   TXT( ", " );
+   ENM( decl->Declaration.Interpolate, TGSI_INTERPOLATES_SHORT );
 }
 
 static void
@@ -601,8 +593,6 @@ dump_declaration_verbose(
 {
    TXT( "\nFile       : " );
    ENM( decl->Declaration.File, TGSI_FILES );
-   TXT( "\nDeclare    : " );
-   ENM( decl->Declaration.Declare, TGSI_DECLARES );
    if( deflt || fd->Declaration.UsageMask != decl->Declaration.UsageMask ) {
       TXT( "\nUsageMask  : " );
       if( decl->Declaration.UsageMask & TGSI_WRITEMASK_X ) {
@@ -620,7 +610,7 @@ dump_declaration_verbose(
    }
    if( deflt || fd->Declaration.Interpolate != decl->Declaration.Interpolate ) {
       TXT( "\nInterpolate: " );
-      UID( decl->Declaration.Interpolate );
+      ENM( decl->Declaration.Interpolate, TGSI_INTERPOLATES );
    }
    if( deflt || fd->Declaration.Semantic != decl->Declaration.Semantic ) {
       TXT( "\nSemantic   : " );
@@ -632,32 +622,10 @@ dump_declaration_verbose(
    }
 
    EOL();
-   switch( decl->Declaration.Declare ) {
-   case TGSI_DECLARE_RANGE:
-      TXT( "\nFirst: " );
-      UID( decl->u.DeclarationRange.First );
-      TXT( "\nLast : " );
-      UID( decl->u.DeclarationRange.Last );
-      break;
-
-   case TGSI_DECLARE_MASK:
-      TXT( "\nMask: " );
-      UIX( decl->u.DeclarationMask.Mask );
-      break;
-
-   default:
-      assert( 0 );
-   }
-
-   if( decl->Declaration.Interpolate ) {
-      EOL();
-      TXT( "\nInterpolate: " );
-      ENM( decl->Interpolation.Interpolate, TGSI_INTERPOLATES );
-      if( ignored ) {
-         TXT( "\nPadding    : " );
-         UIX( decl->Interpolation.Padding );
-      }
-   }
+   TXT( "\nFirst: " );
+   UID( decl->DeclarationRange.First );
+   TXT( "\nLast : " );
+   UID( decl->DeclarationRange.Last );
 
    if( decl->Declaration.Semantic ) {
       EOL();
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.c b/src/gallium/auxiliary/tgsi/util/tgsi_parse.c
index 5c0b0bfd61b..d16f0cdcad4 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_parse.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_parse.c
@@ -118,22 +118,7 @@ tgsi_parse_token(
       *decl = tgsi_default_full_declaration();
       decl->Declaration = *(struct tgsi_declaration *) &token;
 
-      switch( decl->Declaration.Type ) {
-      case TGSI_DECLARE_RANGE:
-         next_token( ctx, &decl->u.DeclarationRange );
-         break;
-
-      case TGSI_DECLARE_MASK:
-         next_token( ctx, &decl->u.DeclarationMask );
-         break;
-
-      default:
-         assert (0);
-      }
-
-      if( decl->Declaration.Interpolate ) {
-         next_token( ctx, &decl->Interpolation );
-      }
+      next_token( ctx, &decl->DeclarationRange );
 
       if( decl->Declaration.Semantic ) {
          next_token( ctx, &decl->Semantic );
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h
index 41021010936..054350712d8 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h
@@ -65,13 +65,8 @@ struct tgsi_full_src_register
 struct tgsi_full_declaration
 {
    struct tgsi_declaration Declaration;
-   union
-   {
-      struct tgsi_declaration_range DeclarationRange;
-      struct tgsi_declaration_mask  DeclarationMask;
-   } u;
-   struct tgsi_declaration_interpolation  Interpolation;
-   struct tgsi_declaration_semantic       Semantic;
+   struct tgsi_declaration_range DeclarationRange;
+   struct tgsi_declaration_semantic Semantic;
 };
 
 struct tgsi_full_immediate
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c
index 65650ed22a4..bda7bc2e2ed 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c
@@ -93,8 +93,8 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
                = &parse.FullToken.FullDeclaration;
             uint file = fulldecl->Declaration.File;
             uint i;
-            for (i = fulldecl->u.DeclarationRange.First;
-                 i <= fulldecl->u.DeclarationRange.Last;
+            for (i = fulldecl->DeclarationRange.First;
+                 i <= fulldecl->DeclarationRange.Last;
                  i++) {
 
                /* only first 32 regs will appear in this bitfield */
-- 
cgit v1.2.3


From c6ae627fdca417318d27a8c26e6d9bc23577aabe Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Mon, 2 Jun 2008 11:39:59 +0200
Subject: tgsi: SWZ no longer aliases to MOV.

---
 src/gallium/auxiliary/tgsi/exec/tgsi_exec.c | 2 +-
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
index 6ba09491835..6689c3f1fb2 100644
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
@@ -1477,7 +1477,7 @@ exec_instruction(
       break;
 
    case TGSI_OPCODE_MOV:
-   /* TGSI_OPCODE_SWZ */
+   case TGSI_OPCODE_SWZ:
       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( &r[0], 0, chan_index );
          STORE( &r[0], 0, chan_index );
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
index a59e22b6ea8..cdce5ea9c52 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
@@ -1190,7 +1190,7 @@ emit_instruction(
       break;
 
    case TGSI_OPCODE_MOV:
-   /* TGSI_OPCODE_SWZ */
+   case TGSI_OPCODE_SWZ:
       FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( func, *inst, 0, 0, chan_index );
          STORE( func, *inst, 0, 0, chan_index );
-- 
cgit v1.2.3


From 2c7ae3371b6058988f7f10bf031d630b649f3831 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Mon, 2 Jun 2008 11:59:04 +0200
Subject: tgsi: Add assertions to the new rule that when an extended swizzle is
 used, the simple swizzle must be set to identity.

---
 src/gallium/auxiliary/tgsi/util/tgsi_build.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_build.c b/src/gallium/auxiliary/tgsi/util/tgsi_build.c
index 63cc27becc0..18e44b38c24 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_build.c
@@ -628,6 +628,14 @@ tgsi_build_full_instruction(
             tgsi_default_src_register_ext_swz() ) ) {
          struct tgsi_src_register_ext_swz *src_register_ext_swz;
 
+         /* Use of the extended swizzle requires the simple swizzle to be identity.
+          */
+         assert( reg->SrcRegister.SwizzleX == TGSI_SWIZZLE_X );
+         assert( reg->SrcRegister.SwizzleY == TGSI_SWIZZLE_Y );
+         assert( reg->SrcRegister.SwizzleZ == TGSI_SWIZZLE_Z );
+         assert( reg->SrcRegister.SwizzleW == TGSI_SWIZZLE_W );
+         assert( reg->SrcRegister.Negate == FALSE );
+
          if( maxsize <= size )
             return 0;
          src_register_ext_swz =
-- 
cgit v1.2.3


From e0860518dfb5a5c6ba6584e3c1b5d7b203277dac Mon Sep 17 00:00:00 2001
From: José Fonseca <jrfonseca@tungstengraphics.com>
Date: Mon, 2 Jun 2008 22:31:02 +0900
Subject: gallium: Replace XSTDCALL by PIPE_CDECL.

---
 src/gallium/auxiliary/draw/draw_vs_sse.c    |  2 +-
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 38 +++++++----------------------
 src/gallium/drivers/softpipe/sp_fs_sse.c    |  2 +-
 src/gallium/include/pipe/p_compiler.h       | 16 +-----------
 4 files changed, 12 insertions(+), 46 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c
index f638208bf58..0aa0c9a76b8 100644
--- a/src/gallium/auxiliary/draw/draw_vs_sse.c
+++ b/src/gallium/auxiliary/draw/draw_vs_sse.c
@@ -50,7 +50,7 @@
 
 #define SSE_MAX_VERTICES 4
 
-typedef void (XSTDCALL *codegen_function) (
+typedef void (PIPE_CDECL *codegen_function) (
    const struct tgsi_exec_vector *input, /* 1 */
    struct tgsi_exec_vector *output, /* 2 */
    float (*constant)[4],        /* 3 */
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
index cdce5ea9c52..cdbdf5c8827 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
@@ -194,22 +194,12 @@ get_coef(
 }
 
 
-#ifdef WIN32
-static void
-emit_retw(
-   struct x86_function  *func,
-   unsigned short        size )
-{
-   x86_retw( func, size );
-}
-#else
 static void
 emit_ret(
    struct x86_function  *func )
 {
    x86_ret( func );
 }
-#endif
 
 
 /**
@@ -475,7 +465,7 @@ static void
 emit_func_call_dst(
    struct x86_function *func,
    unsigned xmm_dst,
-   void (*code)() )
+   void (PIPE_CDECL *code)() )
 {
    sse_movaps(
       func,
@@ -496,9 +486,7 @@ emit_func_call_dst(
       x86_push( func, ecx );
       x86_mov_reg_imm( func, ecx, (unsigned long) code );
       x86_call( func, ecx );
-#ifndef WIN32
       x86_pop(func, ecx ); 
-#endif
    }
 
 
@@ -516,7 +504,7 @@ emit_func_call_dst_src(
    struct x86_function *func,
    unsigned xmm_dst,
    unsigned xmm_src,
-   void (*code)() )
+   void (PIPE_CDECL *code)() )
 {
    sse_movaps(
       func,
@@ -558,7 +546,7 @@ emit_add(
       make_xmm( xmm_src ) );
 }
 
-static void XSTDCALL
+static void PIPE_CDECL
 cos4f(
    float *store )
 {
@@ -581,7 +569,7 @@ emit_cos(
       cos4f );
 }
 
-static void XSTDCALL
+static void PIPE_CDECL
 ex24f(
    float *store )
 {
@@ -615,7 +603,7 @@ emit_f2it(
       make_xmm( xmm ) );
 }
 
-static void XSTDCALL
+static void PIPE_CDECL
 flr4f(
    float *store )
 {
@@ -638,7 +626,7 @@ emit_flr(
       flr4f );
 }
 
-static void XSTDCALL
+static void PIPE_CDECL
 frc4f(
    float *store )
 {
@@ -661,7 +649,7 @@ emit_frc(
       frc4f );
 }
 
-static void XSTDCALL
+static void PIPE_CDECL
 lg24f(
    float *store )
 {
@@ -720,7 +708,7 @@ emit_neg(
          TGSI_EXEC_TEMP_80000000_C ) );
 }
 
-static void XSTDCALL
+static void PIPE_CDECL
 pow4f(
    float *store )
 {
@@ -820,7 +808,7 @@ emit_setsign(
          TGSI_EXEC_TEMP_80000000_C ) );
 }
 
-static void XSTDCALL
+static void PIPE_CDECL
 sin4f(
    float *store )
 {
@@ -1736,11 +1724,7 @@ emit_instruction(
       break;
 
    case TGSI_OPCODE_RET:
-#ifdef WIN32
-      emit_retw( func, 16 );
-#else
       emit_ret( func );
-#endif
       break;
 
    case TGSI_OPCODE_END:
@@ -2281,11 +2265,7 @@ tgsi_emit_sse2(
       func,
       get_immediate_base() );
 
-#ifdef WIN32
-   emit_retw( func, 16 );
-#else
    emit_ret( func );
-#endif
 
    tgsi_parse_free( &parse );
 
diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c
index 55741cc1df8..69f7f960aa1 100644
--- a/src/gallium/drivers/softpipe/sp_fs_sse.c
+++ b/src/gallium/drivers/softpipe/sp_fs_sse.c
@@ -46,7 +46,7 @@
 
 /* Surely this should be defined somewhere in a tgsi header:
  */
-typedef void (XSTDCALL *codegen_function)(
+typedef void (PIPE_CDECL *codegen_function)(
    const struct tgsi_exec_vector *input,
    struct tgsi_exec_vector *output,
    const float (*constant)[4],
diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h
index f6707385cd8..b14260dd901 100644
--- a/src/gallium/include/pipe/p_compiler.h
+++ b/src/gallium/include/pipe/p_compiler.h
@@ -128,7 +128,7 @@ typedef unsigned char boolean;
 /* This should match linux gcc cdecl semantics everywhere, so that we
  * just codegen one calling convention on all platforms.
  */
-#ifdef WIN32
+#ifdef _MSC_VER
 #define PIPE_CDECL __cdecl
 #else
 #define PIPE_CDECL
@@ -148,18 +148,4 @@ typedef unsigned char boolean;
 
 
 
-/** 
- * For calling code-gen'd functions, phase out in favor of
- * PIPE_CDECL, above, which really means cdecl on all platforms, not
- * like the below...
- */
-#if !defined(XSTDCALL) 
-#if defined(WIN32)
-#define XSTDCALL __stdcall      /* phase this out */
-#else
-#define XSTDCALL                /* XXX: NOTE! not STDCALL! */
-#endif
-#endif
-
-
 #endif /* P_COMPILER_H */
-- 
cgit v1.2.3


From 8223add3304451d5e75737a6d1be1739e4517943 Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Wed, 4 Jun 2008 08:56:06 -0600
Subject: gallium: added tgsi_is_passthrough_shader() function

Checks if all instructions are of the form MOV OUT[n], IN[n]
Untested at this time.
---
 src/gallium/auxiliary/tgsi/util/tgsi_scan.c | 83 +++++++++++++++++++++++++++++
 src/gallium/auxiliary/tgsi/util/tgsi_scan.h |  4 ++
 2 files changed, 87 insertions(+)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c
index bda7bc2e2ed..240aaaf362c 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c
@@ -141,3 +141,86 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
 
    tgsi_parse_free (&parse);
 }
+
+
+
+/**
+ * Check if the given shader is a "passthrough" shader consisting of only
+ * MOV instructions of the form:  MOV OUT[n], IN[n]
+ *  
+ */
+boolean
+tgsi_is_passthrough_shader(const struct tgsi_token *tokens)
+{
+   struct tgsi_parse_context parse;
+
+   /**
+    ** Setup to begin parsing input shader
+    **/
+   if (tgsi_parse_init(&parse, tokens) != TGSI_PARSE_OK) {
+      debug_printf("tgsi_parse_init() failed in tgsi_is_passthrough_shader()!\n");
+      return FALSE;
+   }
+
+   /**
+    ** Loop over incoming program tokens/instructions
+    */
+   while (!tgsi_parse_end_of_tokens(&parse)) {
+
+      tgsi_parse_token(&parse);
+
+      switch (parse.FullToken.Token.Type) {
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+         {
+            struct tgsi_full_instruction *fullinst =
+               &parse.FullToken.FullInstruction;
+            const struct tgsi_full_src_register *src =
+               &fullinst->FullSrcRegisters[0];
+            const struct tgsi_full_dst_register *dst =
+               &fullinst->FullDstRegisters[0];
+
+            /* Do a whole bunch of checks for a simple move */
+            if (fullinst->Instruction.Opcode != TGSI_OPCODE_MOV ||
+                src->SrcRegister.File != TGSI_FILE_INPUT ||
+                dst->DstRegister.File != TGSI_FILE_OUTPUT ||
+                src->SrcRegister.Index != dst->DstRegister.Index ||
+
+                src->SrcRegister.Negate ||
+                src->SrcRegisterExtMod.Negate ||
+                src->SrcRegisterExtMod.Absolute ||
+                src->SrcRegisterExtMod.Scale2X ||
+                src->SrcRegisterExtMod.Bias ||
+                src->SrcRegisterExtMod.Complement ||
+
+                src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X ||
+                src->SrcRegister.SwizzleY != TGSI_SWIZZLE_Y ||
+                src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z ||
+                src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W ||
+
+                src->SrcRegisterExtSwz.ExtSwizzleX != TGSI_EXTSWIZZLE_X ||
+                src->SrcRegisterExtSwz.ExtSwizzleY != TGSI_EXTSWIZZLE_Y ||
+                src->SrcRegisterExtSwz.ExtSwizzleZ != TGSI_EXTSWIZZLE_Z ||
+                src->SrcRegisterExtSwz.ExtSwizzleW != TGSI_EXTSWIZZLE_W ||
+
+                dst->DstRegister.WriteMask != TGSI_WRITEMASK_XYZW)
+            {
+               tgsi_parse_free(&parse);
+               return FALSE;
+            }
+         }
+         break;
+
+      case TGSI_TOKEN_TYPE_DECLARATION:
+         /* fall-through */
+      case TGSI_TOKEN_TYPE_IMMEDIATE:
+         /* fall-through */
+      default:
+         ; /* no-op */
+      }
+   }
+
+   tgsi_parse_free(&parse);
+
+   /* if we get here, it's a pass-through shader */
+   return TRUE;
+}
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.h b/src/gallium/auxiliary/tgsi/util/tgsi_scan.h
index 0530bc6b510..5cb6efb3439 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_scan.h
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_scan.h
@@ -67,4 +67,8 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
                  struct tgsi_shader_info *info);
 
 
+extern boolean
+tgsi_is_passthrough_shader(const struct tgsi_token *tokens);
+
+
 #endif /* TGSI_SCAN_H */
-- 
cgit v1.2.3


From 68ef8e89a5f25cd9f80e2b9088604631a28edc3c Mon Sep 17 00:00:00 2001
From: Zack Rusin <zack@tungstengraphics.com>
Date: Tue, 10 Jun 2008 16:59:44 -0400
Subject: glsl: implement variable array indexes

---
 src/gallium/auxiliary/tgsi/util/tgsi_dump.c |  9 ++++++++-
 src/mesa/shader/arbprogparse.c              |  2 +-
 src/mesa/shader/prog_print.c                | 17 +++++++++++------
 src/mesa/shader/slang/slang_emit.c          | 16 +++++++++++-----
 src/mesa/shader/slang/slang_ir.h            |  1 +
 src/mesa/state_tracker/st_mesa_to_tgsi.c    |  4 ++--
 6 files changed, 34 insertions(+), 15 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
index d1a3dfd9c73..92aff889259 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
@@ -803,7 +803,14 @@ tgsi_dump_instruction(
       ENM( src->SrcRegister.File, TGSI_FILES_SHORT );
 
       CHR( '[' );
-      SID( src->SrcRegister.Index );
+      if (src->SrcRegister.Indirect) {
+         TXT( "addr" );
+         if (src->SrcRegister.Index > 0)
+            CHR( '+' );
+         SID( src->SrcRegister.Index );
+      }
+      else
+         SID( src->SrcRegister.Index );
       CHR( ']' );
 
       if (src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X ||
diff --git a/src/mesa/shader/arbprogparse.c b/src/mesa/shader/arbprogparse.c
index b60b9656c62..a6bbdc64f16 100644
--- a/src/mesa/shader/arbprogparse.c
+++ b/src/mesa/shader/arbprogparse.c
@@ -3880,7 +3880,7 @@ _mesa_parse_arb_fragment_program(GLcontext* ctx, GLenum target,
    }
 
 #if DEBUG_FP
-   _mesa_printf("____________Fragment program %u ________\n", program->Base.ID);
+   _mesa_printf("____________Fragment program %u ________\n", program->Base.Id);
    _mesa_print_program(&program->Base);
 #endif
 }
diff --git a/src/mesa/shader/prog_print.c b/src/mesa/shader/prog_print.c
index 1c35ce3fecf..09bf15f0045 100644
--- a/src/mesa/shader/prog_print.c
+++ b/src/mesa/shader/prog_print.c
@@ -206,7 +206,7 @@ arb_output_attrib_string(GLint index, GLenum progType)
  */
 static const char *
 reg_string(enum register_file f, GLint index, gl_prog_print_mode mode,
-           const struct gl_program *prog)
+           GLint relAddr, const struct gl_program *prog)
 {
    static char str[100];
 
@@ -214,7 +214,10 @@ reg_string(enum register_file f, GLint index, gl_prog_print_mode mode,
 
    switch (mode) {
    case PROG_PRINT_DEBUG:
-      sprintf(str, "%s[%d]", file_string(f, mode), index);
+      if (relAddr)
+         sprintf(str, "%s[ADDR%s%d]", file_string(f, mode), (index > 0) ? "+" : "", index);
+      else
+         sprintf(str, "%s[%d]", file_string(f, mode), index);
       break;
 
    case PROG_PRINT_ARB:
@@ -401,7 +404,7 @@ print_dst_reg(const struct prog_dst_register *dstReg, gl_prog_print_mode mode,
 {
    _mesa_printf("%s%s",
                 reg_string((enum register_file) dstReg->File,
-                           dstReg->Index, mode, prog),
+                           dstReg->Index, mode, GL_FALSE, prog),
                 writemask_string(dstReg->WriteMask));
 
    if (dstReg->CondMask != COND_TR) {
@@ -424,9 +427,9 @@ print_src_reg(const struct prog_src_register *srcReg, gl_prog_print_mode mode,
 {
    _mesa_printf("%s%s",
                 reg_string((enum register_file) srcReg->File,
-                           srcReg->Index, mode, prog),
+                           srcReg->Index, mode, srcReg->RelAddr, prog),
                 _mesa_swizzle_string(srcReg->Swizzle,
-                               srcReg->NegateBase, GL_FALSE));
+                                     srcReg->NegateBase, GL_FALSE));
 #if 0
    _mesa_printf("%s[%d]%s",
                 file_string((enum register_file) srcReg->File, mode),
@@ -590,7 +593,9 @@ _mesa_print_instruction_opt(const struct prog_instruction *inst, GLint indent,
       break;
 
    case OPCODE_ARL:
-      _mesa_printf("ARL addr.x, ");
+      _mesa_printf("ARL ");
+      print_dst_reg(&inst->DstReg, mode, prog);
+      _mesa_printf(", ");
       print_src_reg(&inst->SrcReg[0], mode, prog);
       print_comment(inst);
       break;
diff --git a/src/mesa/shader/slang/slang_emit.c b/src/mesa/shader/slang/slang_emit.c
index ff63e05dd20..93256f8647c 100644
--- a/src/mesa/shader/slang/slang_emit.c
+++ b/src/mesa/shader/slang/slang_emit.c
@@ -223,6 +223,7 @@ storage_to_src_reg(struct prog_src_register *src, const slang_ir_storage *st)
    assert(st->Size <= 4);
    src->File = st->File;
    src->Index = st->Index;
+   src->RelAddr = st->RelAddr;
    if (st->Swizzle != SWIZZLE_NOOP)
       src->Swizzle = st->Swizzle;
    else
@@ -1488,11 +1489,16 @@ emit_array_element(slang_emit_info *emitInfo, slang_ir_node *n)
       n->Store->Index = arrayAddr + index;
    }
    else {
-      /* Variable index - PROBLEM */
-      const GLint arrayAddr = n->Children[0]->Store->Index;
-      const GLint index = 0;
-      _mesa_problem(NULL, "variable array indexes not supported yet!");
-      n->Store->Index = arrayAddr + index;
+      /* Variable index*/
+      struct prog_instruction *inst;
+      inst = new_instruction(emitInfo, OPCODE_ARL);
+      storage_to_dst_reg(&inst->DstReg, n->Store, n->Writemask);
+      storage_to_src_reg(&inst->SrcReg[0], n->Children[1]->Store);
+      inst->DstReg.File = PROGRAM_ADDRESS;
+      inst->Comment = _mesa_strdup("ARL ADDR");
+      n->Store->RelAddr = GL_TRUE;
+      n->Store->Index = inst->DstReg.Index;/*index of the array*/
+      inst->DstReg.Index = 0; /*addr index is always 0*/
    }
    return NULL; /* no instruction */
 }
diff --git a/src/mesa/shader/slang/slang_ir.h b/src/mesa/shader/slang/slang_ir.h
index c7c0ddbf9a6..ba0735d64dd 100644
--- a/src/mesa/shader/slang/slang_ir.h
+++ b/src/mesa/shader/slang/slang_ir.h
@@ -146,6 +146,7 @@ struct _slang_ir_storage
    GLint Size;  /**< number of floats */
    GLuint Swizzle;
    GLint RefCount; /**< Used during IR tree delete */
+   GLboolean RelAddr;
 };
 
 typedef struct _slang_ir_storage slang_ir_storage;
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c
index 12979de5233..a8b6faad1c2 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -68,8 +68,8 @@ map_register_file(
    case PROGRAM_STATE_VAR:
    case PROGRAM_NAMED_PARAM:
    case PROGRAM_UNIFORM:
-      if (immediateMapping[index] != ~0) 
-	 return TGSI_FILE_IMMEDIATE;
+      if (immediateMapping && immediateMapping[index] != ~0)
+         return TGSI_FILE_IMMEDIATE;
       else
 	 return TGSI_FILE_CONSTANT;
    case PROGRAM_CONSTANT:
-- 
cgit v1.2.3


From 6fbfcf922210ddf29b73290557f9d40171b09d2e Mon Sep 17 00:00:00 2001
From: José Fonseca <jrfonseca@tungstengraphics.com>
Date: Thu, 19 Jun 2008 22:57:33 +0900
Subject: gallium: Handle malloc failure.

---
 src/gallium/auxiliary/tgsi/exec/tgsi_exec.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
index 6689c3f1fb2..13b8c2e5bf3 100644
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
@@ -165,9 +165,17 @@ tgsi_exec_machine_bind_shader(
    declarations = (struct tgsi_full_declaration *)
       MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
 
+   if (!declarations) {
+      return;
+   }
+
    instructions = (struct tgsi_full_instruction *)
       MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
 
+   if (!instructions) {
+      FREE( declarations );
+      return;
+   }
 
    while( !tgsi_parse_end_of_tokens( &parse ) ) {
       uint pointer = parse.Position;
-- 
cgit v1.2.3


From 019ad5e284db08b46b484b409c1a54c302afd50b Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Wed, 2 Jul 2008 12:41:18 -0600
Subject: gallium: replace 128 with MAX_LABELS

---
 src/gallium/auxiliary/tgsi/exec/tgsi_exec.c | 2 +-
 src/gallium/auxiliary/tgsi/exec/tgsi_exec.h | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
index 13b8c2e5bf3..9649396c9b1 100644
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
@@ -214,7 +214,7 @@ tgsi_exec_machine_bind_shader(
          break;
 
       case TGSI_TOKEN_TYPE_INSTRUCTION:
-         assert( labels->count < 128 );
+         assert( labels->count < MAX_LABELS );
 
          labels->labels[labels->count][0] = instno;
          labels->labels[labels->count][1] = pointer;
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h
index 19bd78df3d3..ea182bc4540 100644
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h
@@ -34,6 +34,8 @@
 extern "C" {
 #endif
 
+#define MAX_LABELS 1024
+
 #define NUM_CHANNELS 4  /* R,G,B,A */
 #define QUAD_SIZE    4  /* 4 pixel/quad */
 
@@ -93,7 +95,7 @@ struct tgsi_sampler
  */
 struct tgsi_exec_labels
 {
-   unsigned labels[128][2];
+   unsigned labels[MAX_LABELS][2];
    unsigned count;
 };
 
-- 
cgit v1.2.3


From 3c128748579c637ba7c777ba91ff4287a03190f6 Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Thu, 3 Jul 2008 10:42:31 -0600
Subject: gallium: increase TGSI_EXEC_MAX_COND_NESTING, etc

---
 src/gallium/auxiliary/tgsi/exec/tgsi_exec.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h
index ea182bc4540..c89dfcb167b 100644
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h
@@ -147,9 +147,9 @@ struct tgsi_exec_labels
 #define TGSI_EXEC_NUM_ADDRS   1
 #define TGSI_EXEC_NUM_IMMEDIATES  256
 
-#define TGSI_EXEC_MAX_COND_NESTING  10
-#define TGSI_EXEC_MAX_LOOP_NESTING  10
-#define TGSI_EXEC_MAX_CALL_NESTING  10
+#define TGSI_EXEC_MAX_COND_NESTING  20
+#define TGSI_EXEC_MAX_LOOP_NESTING  20
+#define TGSI_EXEC_MAX_CALL_NESTING  20
 
 /**
  * Run-time virtual machine state for executing TGSI shader.
-- 
cgit v1.2.3


From f042d662e2cec4315ddaae1ee536f593139f703d Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Thu, 3 Jul 2008 12:56:33 -0600
Subject: gallium: increase TGSI interpreter's number of temp registers to 64

Also, clean up the definitions of the misc/extra temp regs.
A few new assertions too.
---
 src/gallium/auxiliary/tgsi/exec/tgsi_exec.c |  4 ++-
 src/gallium/auxiliary/tgsi/exec/tgsi_exec.h | 48 +++++++++++++++--------------
 2 files changed, 28 insertions(+), 24 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
index 9649396c9b1..46949661aff 100644
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
@@ -262,7 +262,7 @@ tgsi_exec_machine_init(
    uint i;
 
    mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps);
-   mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS];
+   mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
 
    /* Setup constants. */
    for( i = 0; i < 4; i++ ) {
@@ -941,6 +941,7 @@ fetch_src_file_channel(
          break;
 
       case TGSI_FILE_TEMPORARY:
+         assert(index->i[0] < TGSI_EXEC_NUM_TEMPS);
          chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
          chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
          chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
@@ -1127,6 +1128,7 @@ store_dest(
       break;
 
    case TGSI_FILE_TEMPORARY:
+      assert(reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS);
       dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
       break;
 
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h
index c89dfcb167b..18abdd9ac00 100644
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h
@@ -99,53 +99,57 @@ struct tgsi_exec_labels
    unsigned count;
 };
 
+
+#define TGSI_EXEC_NUM_TEMPS        64
+#define TGSI_EXEC_NUM_TEMP_EXTRAS   6
+#define TGSI_EXEC_NUM_IMMEDIATES  256
+
 /*
  * Locations of various utility registers (_I = Index, _C = Channel)
  */
-#define TGSI_EXEC_TEMP_00000000_I   32
+#define TGSI_EXEC_TEMP_00000000_I   (TGSI_EXEC_NUM_TEMPS + 0)
 #define TGSI_EXEC_TEMP_00000000_C   0
 
-#define TGSI_EXEC_TEMP_7FFFFFFF_I   32
+#define TGSI_EXEC_TEMP_7FFFFFFF_I   (TGSI_EXEC_NUM_TEMPS + 0)
 #define TGSI_EXEC_TEMP_7FFFFFFF_C   1
 
-#define TGSI_EXEC_TEMP_80000000_I   32
+#define TGSI_EXEC_TEMP_80000000_I   (TGSI_EXEC_NUM_TEMPS + 0)
 #define TGSI_EXEC_TEMP_80000000_C   2
 
-#define TGSI_EXEC_TEMP_FFFFFFFF_I   32
+#define TGSI_EXEC_TEMP_FFFFFFFF_I   (TGSI_EXEC_NUM_TEMPS + 0)
 #define TGSI_EXEC_TEMP_FFFFFFFF_C   3
 
-#define TGSI_EXEC_TEMP_ONE_I        33
+#define TGSI_EXEC_TEMP_ONE_I        (TGSI_EXEC_NUM_TEMPS + 1)
 #define TGSI_EXEC_TEMP_ONE_C        0
 
-#define TGSI_EXEC_TEMP_TWO_I        33
+#define TGSI_EXEC_TEMP_TWO_I        (TGSI_EXEC_NUM_TEMPS + 1)
 #define TGSI_EXEC_TEMP_TWO_C        1
 
-#define TGSI_EXEC_TEMP_128_I        33
+#define TGSI_EXEC_TEMP_128_I        (TGSI_EXEC_NUM_TEMPS + 1)
 #define TGSI_EXEC_TEMP_128_C        2
 
-#define TGSI_EXEC_TEMP_MINUS_128_I  33
+#define TGSI_EXEC_TEMP_MINUS_128_I  (TGSI_EXEC_NUM_TEMPS + 1)
 #define TGSI_EXEC_TEMP_MINUS_128_C  3
 
-#define TGSI_EXEC_TEMP_KILMASK_I    34
+#define TGSI_EXEC_TEMP_KILMASK_I    (TGSI_EXEC_NUM_TEMPS + 2)
 #define TGSI_EXEC_TEMP_KILMASK_C    0
 
-#define TGSI_EXEC_TEMP_OUTPUT_I     34
+#define TGSI_EXEC_TEMP_OUTPUT_I     (TGSI_EXEC_NUM_TEMPS + 2)
 #define TGSI_EXEC_TEMP_OUTPUT_C     1
 
-#define TGSI_EXEC_TEMP_PRIMITIVE_I  34
+#define TGSI_EXEC_TEMP_PRIMITIVE_I  (TGSI_EXEC_NUM_TEMPS + 2)
 #define TGSI_EXEC_TEMP_PRIMITIVE_C  2
 
-#define TGSI_EXEC_TEMP_THREE_I      34
+#define TGSI_EXEC_TEMP_THREE_I      (TGSI_EXEC_NUM_TEMPS + 2)
 #define TGSI_EXEC_TEMP_THREE_C      3
 
-#define TGSI_EXEC_TEMP_HALF_I       35
+#define TGSI_EXEC_TEMP_HALF_I       (TGSI_EXEC_NUM_TEMPS + 3)
 #define TGSI_EXEC_TEMP_HALF_C       0
 
-#define TGSI_EXEC_TEMP_R0           36
+#define TGSI_EXEC_TEMP_R0           (TGSI_EXEC_NUM_TEMPS + 4)
+
+#define TGSI_EXEC_TEMP_ADDR         (TGSI_EXEC_NUM_TEMPS + 5)
 
-#define TGSI_EXEC_NUM_TEMPS   (32 + 5)
-#define TGSI_EXEC_NUM_ADDRS   1
-#define TGSI_EXEC_NUM_IMMEDIATES  256
 
 #define TGSI_EXEC_MAX_COND_NESTING  20
 #define TGSI_EXEC_MAX_LOOP_NESTING  20
@@ -156,13 +160,11 @@ struct tgsi_exec_labels
  */
 struct tgsi_exec_machine
 {
-   /*
-    * 32 program temporaries
-    * 4  internal temporaries
-    * 1  address
-    * 1  temporary of padding to align to 16 bytes
+   /* Total = program temporaries + internal temporaries
+    *         + 1 padding to align to 16 bytes
     */
-   struct tgsi_exec_vector       _Temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_ADDRS + 1];
+   struct tgsi_exec_vector       _Temps[TGSI_EXEC_NUM_TEMPS +
+                                        TGSI_EXEC_NUM_TEMP_EXTRAS + 1];
 
    /*
     * This will point to _Temps after aligning to 16B boundary.
-- 
cgit v1.2.3


From 7cbc244c52610eb59b0fe1fc7275f307b560c281 Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Tue, 8 Jul 2008 15:00:11 -0600
Subject: gallium: tweak printing of generic declarations

---
 src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
index 92aff889259..a395c630cc7 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
@@ -573,7 +573,8 @@ tgsi_dump_declaration(
    if (decl->Declaration.Semantic) {
       TXT( ", " );
       ENM( decl->Semantic.SemanticName, TGSI_SEMANTICS_SHORT );
-      if (decl->Semantic.SemanticIndex != 0) {
+      if (decl->Semantic.SemanticIndex != 0 ||
+          decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC) {
          CHR( '[' );
          UID( decl->Semantic.SemanticIndex );
          CHR( ']' );
-- 
cgit v1.2.3


From 7279d663e984ae8a243f56c010f175fee9ffccb3 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Sat, 12 Jul 2008 11:16:01 +0200
Subject: tgsi: Add missing copyright headers.

---
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h  | 30 +++++++++++++++++++++++++--
 src/gallium/auxiliary/tgsi/util/tgsi_build.c | 28 ++++++++++++++++++++++++-
 src/gallium/auxiliary/tgsi/util/tgsi_build.h | 30 +++++++++++++++++++++++++--
 src/gallium/auxiliary/tgsi/util/tgsi_dump.h  | 31 +++++++++++++++++++++++++---
 src/gallium/auxiliary/tgsi/util/tgsi_util.c  | 28 ++++++++++++++++++++++++-
 src/gallium/auxiliary/tgsi/util/tgsi_util.h  | 30 +++++++++++++++++++++++++--
 6 files changed, 166 insertions(+), 11 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h
index e66d1152836..af838b2a25b 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h
@@ -1,4 +1,31 @@
-#if !defined TGSI_SSE2_H
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef TGSI_SSE2_H
 #define TGSI_SSE2_H
 
 #if defined __cplusplus
@@ -20,4 +47,3 @@ tgsi_emit_sse2(
 #endif
 
 #endif /* TGSI_SSE2_H */
-
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_build.c b/src/gallium/auxiliary/tgsi/util/tgsi_build.c
index 18e44b38c24..742ef14c352 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_build.c
@@ -1,3 +1,30 @@
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
 #include "pipe/p_debug.h"
 #include "pipe/p_util.h"
 #include "pipe/p_shader_tokens.h"
@@ -1295,4 +1322,3 @@ tgsi_build_dst_register_ext_modulate(
 
    return dst_register_ext_modulate;
 }
-
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_build.h b/src/gallium/auxiliary/tgsi/util/tgsi_build.h
index 423cf141f56..ed258302487 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_build.h
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_build.h
@@ -1,4 +1,31 @@
-#if !defined TGSI_BUILD_H
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef TGSI_BUILD_H
 #define TGSI_BUILD_H
 
 #if defined __cplusplus
@@ -303,4 +330,3 @@ tgsi_build_dst_register_ext_modulate(
 #endif
 
 #endif /* TGSI_BUILD_H */
-
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h
index ca83bdef206..ba7692b5112 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h
@@ -1,4 +1,31 @@
-#if !defined TGSI_DUMP_H
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef TGSI_DUMP_H
 #define TGSI_DUMP_H
 
 #if defined __cplusplus
@@ -31,10 +58,8 @@ void
 tgsi_dump_declaration(
    const struct tgsi_full_declaration  *decl );
 
-
 #if defined __cplusplus
 }
 #endif
 
 #endif /* TGSI_DUMP_H */
-
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_util.c b/src/gallium/auxiliary/tgsi/util/tgsi_util.c
index 56a50d3b210..10762b6c1a0 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_util.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_util.c
@@ -1,3 +1,30 @@
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
 #include "pipe/p_debug.h"
 #include "pipe/p_util.h"
 #include "pipe/p_shader_tokens.h"
@@ -271,4 +298,3 @@ tgsi_util_set_full_src_register_sign_mode(
       assert( 0 );
    }
 }
-
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_util.h b/src/gallium/auxiliary/tgsi/util/tgsi_util.h
index 45f5f0be0e4..7877f345587 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_util.h
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_util.h
@@ -1,4 +1,31 @@
-#if !defined TGSI_UTIL_H
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef TGSI_UTIL_H
 #define TGSI_UTIL_H
 
 #if defined __cplusplus
@@ -67,4 +94,3 @@ tgsi_util_set_full_src_register_sign_mode(
 #endif
 
 #endif /* TGSI_UTIL_H */
-
-- 
cgit v1.2.3


From 9ea485f8865404e3e2e10cdb3b1627e7194c27fe Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Sat, 12 Jul 2008 17:03:30 +0200
Subject: tgsi: Fix dumping of indirect addressing.

---
 src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
index a395c630cc7..6356b6de06c 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
@@ -805,10 +805,12 @@ tgsi_dump_instruction(
 
       CHR( '[' );
       if (src->SrcRegister.Indirect) {
-         TXT( "addr" );
-         if (src->SrcRegister.Index > 0)
-            CHR( '+' );
-         SID( src->SrcRegister.Index );
+         TXT( "ADDR[0]" );
+         if (src->SrcRegister.Index != 0) {
+            if (src->SrcRegister.Index > 0)
+               CHR( '+' );
+            SID( src->SrcRegister.Index );
+         }
       }
       else
          SID( src->SrcRegister.Index );
-- 
cgit v1.2.3


From d0386d55ff257ab09475178d058ddcd9f1e37c2d Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Sat, 12 Jul 2008 17:06:37 +0200
Subject: tgsi: Add tgsi_text utility module.

Translates textual shader into a binary token stream.
The syntax matches the tgsi_dump module, so it's possible to
simply copy-paste the shader dump and transform it back
to a binary form.
---
 src/gallium/auxiliary/tgsi/util/tgsi_text.c | 580 ++++++++++++++++++++++++++++
 src/gallium/auxiliary/tgsi/util/tgsi_text.h |  47 +++
 2 files changed, 627 insertions(+)
 create mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_text.c
 create mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_text.h

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_text.c b/src/gallium/auxiliary/tgsi/util/tgsi_text.c
new file mode 100644
index 00000000000..bb365d1efe9
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_text.c
@@ -0,0 +1,580 @@
+/**************************************************************************
+ * 
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "pipe/p_debug.h"
+#include "tgsi_text.h"
+#include "tgsi_build.h"
+#include "tgsi_parse.h"
+#include "tgsi_util.h"
+
+static boolean is_alpha_underscore( const char *cur )
+{
+   return
+      (*cur >= 'a' && *cur <= 'z') ||
+      (*cur >= 'A' && *cur <= 'Z') ||
+      *cur == '_';
+}
+
+static boolean is_digit( const char *cur )
+{
+   return *cur >= '0' && *cur <= '9';
+}
+
+static boolean is_digit_alpha_underscore( const char *cur )
+{
+   return is_digit( cur ) || is_alpha_underscore( cur );
+}
+
+/* Eat zero or more whitespaces.
+ */
+static void eat_opt_white( const char **pcur )
+{
+   while (**pcur == ' ' || **pcur == '\t' || **pcur == '\n')
+      (*pcur)++;
+}
+
+/* Eat one or more whitespaces.
+ * Return TRUE if at least one whitespace eaten.
+ */
+static boolean eat_white( const char **pcur )
+{
+   const char *cur = *pcur;
+
+   eat_opt_white( pcur );
+   return *pcur > cur;
+}
+
+/* Parse unsigned integer.
+ * No checks for overflow.
+ */
+static boolean parse_uint( const char **pcur, uint *val )
+{
+   const char *cur = *pcur;
+
+   if (is_digit( cur )) {
+      *val = *cur++ - '0';
+      while (is_digit( cur ))
+         *val = *val * 10 + *cur++ - '0';
+      *pcur = cur;
+      return TRUE;
+   }
+   return FALSE;
+}
+
+struct translate_ctx
+{
+   const char *text;
+   const char *cur;
+   struct tgsi_token *tokens;
+   struct tgsi_token *tokens_cur;
+   struct tgsi_token *tokens_end;
+   struct tgsi_header *header;
+};
+
+static void report_error( struct translate_ctx *ctx, const char *msg )
+{
+   debug_printf( "\nError: %s", msg );
+}
+
+/* Parse shader header.
+ * Return TRUE for one of the following headers.
+ *    FRAG1.1
+ *    GEOM1.1
+ *    VERT1.1
+ */
+static boolean parse_header( struct translate_ctx *ctx )
+{
+   uint processor;
+
+   if (ctx->cur[0] == 'F' && ctx->cur[1] == 'R' && ctx->cur[2] == 'A' && ctx->cur[3] == 'G') {
+      ctx->cur += 4;
+      processor = TGSI_PROCESSOR_FRAGMENT;
+   }
+   else if (ctx->cur[0] == 'V' && ctx->cur[1] == 'E' && ctx->cur[2] == 'R' && ctx->cur[3] == 'T') {
+      ctx->cur += 4;
+      processor = TGSI_PROCESSOR_VERTEX;
+   }
+   else if (ctx->cur[0] == 'G' && ctx->cur[1] == 'E' && ctx->cur[2] == 'O' && ctx->cur[3] == 'M') {
+      ctx->cur += 4;
+      processor = TGSI_PROCESSOR_GEOMETRY;
+   }
+   else {
+      report_error( ctx, "Unknown processor type" );
+      return FALSE;
+   }
+
+   if (ctx->cur[0] == '1' && ctx->cur[1] == '.' && ctx->cur[2] == '1') {
+      ctx->cur += 3;
+   }
+   else {
+      report_error( ctx, "Unknown version" );
+      return FALSE;
+   }
+
+   if (ctx->tokens_cur >= ctx->tokens_end)
+      return FALSE;
+   *(struct tgsi_version *) ctx->tokens_cur++ = tgsi_build_version();
+
+   if (ctx->tokens_cur >= ctx->tokens_end)
+      return FALSE;
+   ctx->header = (struct tgsi_header *) ctx->tokens_cur++;
+   *ctx->header = tgsi_build_header();
+
+   if (ctx->tokens_cur >= ctx->tokens_end)
+      return FALSE;
+   *(struct tgsi_processor *) ctx->tokens_cur++ = tgsi_build_processor( processor, ctx->header );
+
+   return TRUE;
+}
+
+static boolean parse_label( struct translate_ctx *ctx, uint *val )
+{
+   const char *cur = ctx->cur;
+
+   if (parse_uint( &cur, val )) {
+      eat_opt_white( &cur );
+      if (*cur == ':') {
+         cur++;
+         ctx->cur = cur;
+         return TRUE;
+      }
+   }
+   return FALSE;
+}
+
+static const char *file_names[TGSI_FILE_COUNT] =
+{
+   "NULL",
+   "CONST",
+   "IN",
+   "OUT",
+   "TEMP",
+   "SAMP",
+   "ADDR",
+   "IMM"
+};
+
+static boolean
+parse_file(
+   struct translate_ctx *ctx,
+   uint *file )
+{
+   uint i;
+
+   for (i = 0; i < TGSI_FILE_COUNT; i++) {
+      const char *cur = ctx->cur;
+      const char *name = file_names[i];
+
+      while (*name != '\0' && *name == toupper( *cur )) {
+         name++;
+         cur++;
+      }
+      if (*name == '\0' && !is_digit_alpha_underscore( cur )) {
+         ctx->cur = cur;
+         *file = i;
+         return TRUE;
+      }
+   }
+   report_error( ctx, "Unknown register file" );
+   return FALSE;
+}
+
+static boolean
+parse_register(
+   struct translate_ctx *ctx,
+   uint *file,
+   uint *index )
+{
+   if (!parse_file( ctx, file ))
+      return FALSE;
+   eat_opt_white( &ctx->cur );
+   if (*ctx->cur != '[') {
+      report_error( ctx, "Expected `['" );
+      return FALSE;
+   }
+   ctx->cur++;
+   eat_opt_white( &ctx->cur );
+   if (!parse_uint( &ctx->cur, index )) {
+      report_error( ctx, "Expected literal integer" );
+      return FALSE;
+   }
+   eat_opt_white( &ctx->cur );
+   if (*ctx->cur != ']') {
+      report_error( ctx, "Expected `]'" );
+      return FALSE;
+   }
+   ctx->cur++;
+   /* TODO: Modulate suffix */
+   return TRUE;
+}
+
+static boolean
+parse_dst_operand(
+   struct translate_ctx *ctx,
+   struct tgsi_full_dst_register *dst )
+{
+   const char *cur;
+   uint file;
+   uint index;
+
+   if (!parse_register( ctx, &file, &index ))
+      return FALSE;
+   dst->DstRegister.File = file;
+   dst->DstRegister.Index = index;
+
+   /* Parse optional write mask.
+    */
+   cur = ctx->cur;
+   eat_opt_white( &cur );
+   if (*cur == '.') {
+      uint writemask = TGSI_WRITEMASK_NONE;
+
+      cur++;
+      eat_opt_white( &cur );
+      if (toupper( *cur ) == 'X') {
+         cur++;
+         writemask |= TGSI_WRITEMASK_X;
+      }
+      if (toupper( *cur ) == 'Y') {
+         cur++;
+         writemask |= TGSI_WRITEMASK_Y;
+      }
+      if (toupper( *cur ) == 'Z') {
+         cur++;
+         writemask |= TGSI_WRITEMASK_Z;
+      }
+      if (toupper( *cur ) == 'W') {
+         cur++;
+         writemask |= TGSI_WRITEMASK_W;
+      }
+
+      if (writemask == TGSI_WRITEMASK_NONE) {
+         report_error( ctx, "Writemask expected" );
+         return FALSE;
+      }
+
+      dst->DstRegister.WriteMask = writemask;
+      ctx->cur = cur;
+   }
+   return TRUE;
+}
+
+static boolean
+parse_src_operand(
+   struct translate_ctx *ctx,
+   struct tgsi_full_src_register *src )
+{
+   const char *cur;
+   uint file;
+   uint index;
+
+   /* TODO: Extended register modifiers */
+   if (*ctx->cur == '-') {
+      ctx->cur++;
+      src->SrcRegister.Negate = 1;
+      eat_opt_white( &ctx->cur );
+   }
+
+   if (!parse_register( ctx, &file, &index ))
+      return FALSE;
+   src->SrcRegister.File = file;
+   src->SrcRegister.Index = index;
+
+   /* Parse optional swizzle
+    */
+   cur = ctx->cur;
+   eat_opt_white( &cur );
+   if (*cur == '.') {
+      uint i;
+
+      cur++;
+      eat_opt_white( &cur );
+      for (i = 0; i < 4; i++) {
+         uint swizzle;
+
+         if (toupper( *cur ) == 'X')
+            swizzle = TGSI_SWIZZLE_X;
+         else if (toupper( *cur ) == 'Y')
+            swizzle = TGSI_SWIZZLE_Y;
+         else if (toupper( *cur ) == 'Z')
+            swizzle = TGSI_SWIZZLE_Z;
+         else if (toupper( *cur ) == 'W')
+            swizzle = TGSI_SWIZZLE_W;
+         else {
+            report_error( ctx, "Expected register swizzle component either `x', `y', `z' or `w'" );
+            return FALSE;
+         }
+         cur++;
+         tgsi_util_set_src_register_swizzle( &src->SrcRegister, swizzle, i );
+      }
+
+      ctx->cur = cur;
+   }
+   return TRUE;
+}
+
+struct opcode_info
+{
+   uint num_dst;
+   uint num_src;
+   const char *mnemonic;
+};
+
+static const struct opcode_info opcode_info[TGSI_OPCODE_LAST] =
+{
+{ 1, 1, "ARL" },
+   { 1, 1, "MOV" },
+{ 1, 1, "LIT" },
+   { 1, 1, "RCP" },
+   { 1, 1, "RSQ" },
+   { 1, 1, "EXP" },
+   { 1, 1, "LOG" },
+   { 1, 2, "MUL" },
+   { 1, 2, "ADD" },
+   { 1, 2, "DP3" },
+   { 1, 2, "DP4" },
+{ 1, 2, "DST" },
+   { 1, 2, "MIN" },
+   { 1, 2, "MAX" },
+{ 1, 2, "SLT" },
+{ 1, 2, "SGE" },
+   { 1, 3, "MAD" },
+   { 1, 2, "SUB" },
+{ 1, 3, "LERP" },
+{ 1, 2, "CND" },
+{ 1, 2, "CND0" },
+{ 1, 2, "DOT2ADD" },
+{ 1, 2, "INDEX" },
+{ 1, 2, "NEGATE" },
+{ 1, 2, "FRAC" },
+{ 1, 2, "CLAMP" },
+{ 1, 2, "FLOOR" },
+{ 1, 2, "ROUND" },
+{ 1, 2, "EXPBASE2" },
+{ 1, 2, "LOGBASE2" },
+{ 1, 2, "POWER" },
+{ 1, 2, "CROSSPRODUCT" },
+{ 1, 2, "MULTIPLYMATRIX" },
+   { 1, 2, "ABS" },
+{ 1, 2, "RCC" },
+{ 1, 2, "DPH" },
+{ 1, 2, "COS" },
+{ 1, 2, "DDX" },
+{ 1, 2, "DDY" },
+{ 1, 2, "KILP" },
+{ 1, 2, "PK2H" },
+{ 1, 2, "PK2US" },
+{ 1, 2, "PK4B" },
+{ 1, 2, "PK4UB" },
+{ 1, 2, "RFL" },
+{ 1, 2, "SEQ" },
+{ 1, 2, "SFL" },
+{ 1, 2, "SGT" },
+{ 1, 2, "SIN" },
+{ 1, 2, "SLE" },
+{ 1, 2, "SNE" },
+{ 1, 2, "STR" },
+{ 1, 2, "TEX" },
+{ 1, 2, "TXD" },
+{ 1, 2, "TXP" },
+{ 1, 2, "UP2H" },
+{ 1, 2, "UP2US" },
+{ 1, 2, "UP4B" },
+{ 1, 2, "UP4UB" },
+{ 1, 2, "X2D" },
+{ 1, 2, "ARA" },
+{ 1, 2, "ARR" },
+{ 1, 2, "BRA" },
+{ 1, 2, "CAL" },
+{ 1, 2, "RET" },
+{ 1, 2, "SSG" },
+{ 1, 2, "CMP" },
+{ 1, 2, "SCS" },
+{ 1, 2, "TXB" },
+{ 1, 2, "NRM" },
+{ 1, 2, "DIV" },
+{ 1, 2, "DP2" },
+{ 1, 2, "TXL" },
+{ 1, 2, "BRK" },
+{ 1, 2, "IF" },
+{ 1, 2, "LOOP" },
+{ 1, 2, "REP" },
+{ 1, 2, "ELSE" },
+{ 1, 2, "ENDIF" },
+{ 1, 2, "ENDLOOP" },
+{ 1, 2, "ENDREP" },
+{ 1, 2, "PUSHA" },
+{ 1, 2, "POPA" },
+{ 1, 2, "CEIL" },
+{ 1, 2, "I2F" },
+{ 1, 2, "NOT" },
+{ 1, 2, "TRUNC" },
+{ 1, 2, "SHL" },
+{ 1, 2, "SHR" },
+{ 1, 2, "AND" },
+{ 1, 2, "OR" },
+{ 1, 2, "MOD" },
+{ 1, 2, "XOR" },
+{ 1, 2, "SAD" },
+{ 1, 2, "TXF" },
+{ 1, 2, "TXQ" },
+{ 1, 2, "CONT" },
+{ 1, 2, "EMIT" },
+{ 1, 2, "ENDPRIM" },
+{ 1, 2, "BGNLOOP2" },
+{ 1, 2, "BGNSUB" },
+{ 1, 2, "ENDLOOP2" },
+{ 1, 2, "ENDSUB" },
+{ 1, 2, "NOISE1" },
+{ 1, 2, "NOISE2" },
+{ 1, 2, "NOISE3" },
+{ 1, 2, "NOISE4" },
+{ 1, 2, "NOP" },
+{ 1, 2, "M4X3" },
+{ 1, 2, "M3X4" },
+{ 1, 2, "M3X3" },
+{ 1, 2, "M3X2" },
+{ 1, 2, "NRM4" },
+{ 1, 2, "CALLNZ" },
+{ 1, 2, "IFC" },
+{ 1, 2, "BREAKC" },
+{ 1, 2, "KIL" },
+   { 0, 0, "END" }
+};
+
+static boolean parse_instruction( struct translate_ctx *ctx )
+{
+   uint i;
+   const struct opcode_info *info;
+   struct tgsi_full_instruction inst;
+   uint advance;
+
+   /* Parse instruction name.
+    */
+   eat_opt_white( &ctx->cur );
+   for (i = 0; i < TGSI_OPCODE_LAST; i++) {
+      const char *cur = ctx->cur;
+      const char *op = opcode_info[i].mnemonic;
+
+      while (*op != '\0' && *op == toupper( *cur )) {
+         op++;
+         cur++;
+      }
+      if (*op == '\0') {
+         /* TODO: _SAT suffix */
+         if (*cur == '\0' || eat_white( &cur )) {
+            ctx->cur = cur;
+            break;
+         }
+      }
+   }
+   if (i == TGSI_OPCODE_LAST) {
+      report_error( ctx, "Unknown opcode" );
+      return FALSE;
+   }
+   info = &opcode_info[i];
+
+   inst = tgsi_default_full_instruction();
+   inst.Instruction.Opcode = i;
+   inst.Instruction.NumDstRegs = info->num_dst;
+   inst.Instruction.NumSrcRegs = info->num_src;
+
+   /* Parse instruction operands.
+    */
+   for (i = 0; i < info->num_dst + info->num_src; i++) {
+      if (i > 0) {
+         eat_opt_white( &ctx->cur );
+         if (*ctx->cur != ',') {
+            report_error( ctx, "Expected `,'" );
+            return FALSE;
+         }
+         ctx->cur++;
+         eat_opt_white( &ctx->cur );
+      }
+
+      if (i < info->num_dst) {
+         if (!parse_dst_operand( ctx, &inst.FullDstRegisters[i] ))
+            return FALSE;
+      }
+      else {
+         if (!parse_src_operand( ctx, &inst.FullSrcRegisters[i - info->num_dst] ))
+            return FALSE;
+      }
+   }
+   eat_opt_white( &ctx->cur );
+
+   advance = tgsi_build_full_instruction(
+      &inst,
+      ctx->tokens_cur,
+      ctx->header,
+      (uint) (ctx->tokens_end - ctx->tokens_cur) );
+   if (advance == 0)
+      return FALSE;
+   ctx->tokens_cur += advance;
+
+   return TRUE;
+}
+
+static boolean translate( struct translate_ctx *ctx )
+{
+   eat_opt_white( &ctx->cur );
+   if (!parse_header( ctx ))
+      return FALSE;
+
+   eat_white( &ctx->cur );
+   while (*ctx->cur != '\0') {
+      uint label_val = 0;
+
+      if (parse_label( ctx, &label_val )) {
+         if (!parse_instruction( ctx ))
+            return FALSE;
+      }
+      else {
+         report_error( ctx, "Instruction expected" );
+         return FALSE;
+      }
+   }
+
+   return TRUE;
+}
+
+boolean
+tgsi_text_translate(
+   const char *text,
+   struct tgsi_token *tokens,
+   uint num_tokens )
+{
+   struct translate_ctx ctx;
+
+   ctx.text = text;
+   ctx.cur = text;
+   ctx.tokens = tokens;
+   ctx.tokens_cur = tokens;
+   ctx.tokens_end = tokens + num_tokens;
+
+   return translate( &ctx );
+}
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_text.h b/src/gallium/auxiliary/tgsi/util/tgsi_text.h
new file mode 100644
index 00000000000..8eeeeef1402
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_text.h
@@ -0,0 +1,47 @@
+/**************************************************************************
+ * 
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef TGSI_TEXT_H
+#define TGSI_TEXT_H
+
+#include "pipe/p_shader_tokens.h"
+
+#if defined __cplusplus
+extern "C" {
+#endif
+
+boolean
+tgsi_text_translate(
+   const char *text,
+   struct tgsi_token *tokens,
+   uint num_tokens );
+
+#if defined __cplusplus
+}
+#endif
+
+#endif /* TGSI_TEXT_H */
-- 
cgit v1.2.3


From c415de5e251eb4004b1ef5bc57299032d95c4842 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Sat, 12 Jul 2008 17:10:21 +0200
Subject: scons: List `util/tgsi_text.c'.

---
 src/gallium/auxiliary/tgsi/SConscript | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/SConscript b/src/gallium/auxiliary/tgsi/SConscript
index 4632dcc0728..b62c8efe027 100644
--- a/src/gallium/auxiliary/tgsi/SConscript
+++ b/src/gallium/auxiliary/tgsi/SConscript
@@ -9,6 +9,7 @@ tgsi = env.ConvenienceLibrary(
 		'util/tgsi_dump.c',
 		'util/tgsi_parse.c',
 		'util/tgsi_scan.c',
+		'util/tgsi_text.c',
 		'util/tgsi_transform.c',
 		'util/tgsi_util.c',
 	])
-- 
cgit v1.2.3


From bd3b47590e2a8e91a8f5545d7c8872b26879c228 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Sun, 13 Jul 2008 11:30:02 +0200
Subject: tgsi: Remove depricated ATTRIB interpolate mode.

---
 src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 3 +--
 src/gallium/include/pipe/p_shader_tokens.h  | 1 +
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
index 6356b6de06c..59e7aaeceb4 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
@@ -119,8 +119,7 @@ static const char *TGSI_INTERPOLATES_SHORT[] =
 {
    "CONSTANT",
    "LINEAR",
-   "PERSPECTIVE",
-   "ATTRIB"
+   "PERSPECTIVE"
 };
 
 static const char *TGSI_SEMANTICS[] =
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index 84e5096418b..33268553ff7 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -75,6 +75,7 @@ enum tgsi_file_type {
 #define TGSI_INTERPOLATE_CONSTANT      0
 #define TGSI_INTERPOLATE_LINEAR        1
 #define TGSI_INTERPOLATE_PERSPECTIVE   2
+#define TGSI_INTERPOLATE_COUNT         3
 
 struct tgsi_declaration
 {
-- 
cgit v1.2.3


From ee647b9020b5e16b9b6d399edfa4a2c99f491863 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Sun, 13 Jul 2008 11:37:36 +0200
Subject: tgsi: Parse DCL statements.

---
 src/gallium/auxiliary/tgsi/util/tgsi_text.c | 330 +++++++++++++++++++++-------
 1 file changed, 256 insertions(+), 74 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_text.c b/src/gallium/auxiliary/tgsi/util/tgsi_text.c
index bb365d1efe9..03c92172436 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_text.c
@@ -49,6 +49,21 @@ static boolean is_digit_alpha_underscore( const char *cur )
    return is_digit( cur ) || is_alpha_underscore( cur );
 }
 
+static boolean str_match_no_case( const char **pcur, const char *str )
+{
+   const char *cur = *pcur;
+
+   while (*str != '\0' && *str == toupper( *cur )) {
+      str++;
+      cur++;
+   }
+   if (*str == '\0') {
+      *pcur = cur;
+      return TRUE;
+   }
+   return FALSE;
+}
+
 /* Eat zero or more whitespaces.
  */
 static void eat_opt_white( const char **pcur )
@@ -110,28 +125,14 @@ static boolean parse_header( struct translate_ctx *ctx )
 {
    uint processor;
 
-   if (ctx->cur[0] == 'F' && ctx->cur[1] == 'R' && ctx->cur[2] == 'A' && ctx->cur[3] == 'G') {
-      ctx->cur += 4;
+   if (str_match_no_case( &ctx->cur, "FRAG1.1" ))
       processor = TGSI_PROCESSOR_FRAGMENT;
-   }
-   else if (ctx->cur[0] == 'V' && ctx->cur[1] == 'E' && ctx->cur[2] == 'R' && ctx->cur[3] == 'T') {
-      ctx->cur += 4;
+   else if (str_match_no_case( &ctx->cur, "VERT1.1" ))
       processor = TGSI_PROCESSOR_VERTEX;
-   }
-   else if (ctx->cur[0] == 'G' && ctx->cur[1] == 'E' && ctx->cur[2] == 'O' && ctx->cur[3] == 'M') {
-      ctx->cur += 4;
+   else if (str_match_no_case( &ctx->cur, "GEOM1.1" ))
       processor = TGSI_PROCESSOR_GEOMETRY;
-   }
-   else {
-      report_error( ctx, "Unknown processor type" );
-      return FALSE;
-   }
-
-   if (ctx->cur[0] == '1' && ctx->cur[1] == '.' && ctx->cur[2] == '1') {
-      ctx->cur += 3;
-   }
    else {
-      report_error( ctx, "Unknown version" );
+      report_error( ctx, "Unknown header" );
       return FALSE;
    }
 
@@ -187,16 +188,13 @@ parse_file(
 
    for (i = 0; i < TGSI_FILE_COUNT; i++) {
       const char *cur = ctx->cur;
-      const char *name = file_names[i];
 
-      while (*name != '\0' && *name == toupper( *cur )) {
-         name++;
-         cur++;
-      }
-      if (*name == '\0' && !is_digit_alpha_underscore( cur )) {
-         ctx->cur = cur;
-         *file = i;
-         return TRUE;
+      if (str_match_no_case( &cur, file_names[i] )) {
+         if (!is_digit_alpha_underscore( cur )) {
+            ctx->cur = cur;
+            *file = i;
+            return TRUE;
+         }
       }
    }
    report_error( ctx, "Unknown register file" );
@@ -204,7 +202,53 @@ parse_file(
 }
 
 static boolean
-parse_register(
+parse_opt_writemask(
+   struct translate_ctx *ctx,
+   uint *writemask )
+{
+   const char *cur;
+
+   cur = ctx->cur;
+   eat_opt_white( &cur );
+   if (*cur == '.') {
+      cur++;
+      *writemask = TGSI_WRITEMASK_NONE;
+      eat_opt_white( &cur );
+      if (toupper( *cur ) == 'X') {
+         cur++;
+         *writemask |= TGSI_WRITEMASK_X;
+      }
+      if (toupper( *cur ) == 'Y') {
+         cur++;
+         *writemask |= TGSI_WRITEMASK_Y;
+      }
+      if (toupper( *cur ) == 'Z') {
+         cur++;
+         *writemask |= TGSI_WRITEMASK_Z;
+      }
+      if (toupper( *cur ) == 'W') {
+         cur++;
+         *writemask |= TGSI_WRITEMASK_W;
+      }
+
+      if (*writemask == TGSI_WRITEMASK_NONE) {
+         report_error( ctx, "Writemask expected" );
+         return FALSE;
+      }
+
+      ctx->cur = cur;
+   }
+   else {
+      *writemask = TGSI_WRITEMASK_XYZW;
+   }
+   return TRUE;
+}
+
+/* Parse register part common for decls and operands.
+ *    <register_prefix> ::= <file> `[' <index>
+ */
+static boolean
+parse_register_prefix(
    struct translate_ctx *ctx,
    uint *file,
    uint *index )
@@ -222,6 +266,20 @@ parse_register(
       report_error( ctx, "Expected literal integer" );
       return FALSE;
    }
+   return TRUE;
+}
+
+/* Parse register operand.
+ *    <register> ::= <register_prefix> `]'
+ */
+static boolean
+parse_register(
+   struct translate_ctx *ctx,
+   uint *file,
+   uint *index )
+{
+   if (!parse_register_prefix( ctx, file, index ))
+      return FALSE;
    eat_opt_white( &ctx->cur );
    if (*ctx->cur != ']') {
       report_error( ctx, "Expected `]'" );
@@ -232,54 +290,55 @@ parse_register(
    return TRUE;
 }
 
+/* Parse register declaration.
+ *    <register> ::= <register_prefix> `]' | <register_prefix> `..' <index> `]'
+ */
+static boolean
+parse_register_dcl(
+   struct translate_ctx *ctx,
+   uint *file,
+   uint *first,
+   uint *last )
+{
+   if (!parse_register_prefix( ctx, file, first ))
+      return FALSE;
+   eat_opt_white( &ctx->cur );
+   if (ctx->cur[0] == '.' && ctx->cur[1] == '.') {
+      ctx->cur += 2;
+      eat_opt_white( &ctx->cur );
+      if (!parse_uint( &ctx->cur, last )) {
+         report_error( ctx, "Expected literal integer" );
+         return FALSE;
+      }
+      eat_opt_white( &ctx->cur );
+   }
+   else {
+      *last = *first;
+   }
+   if (*ctx->cur != ']') {
+      report_error( ctx, "Expected `]' or `..'" );
+      return FALSE;
+   }
+   ctx->cur++;
+   return TRUE;
+}
+
 static boolean
 parse_dst_operand(
    struct translate_ctx *ctx,
    struct tgsi_full_dst_register *dst )
 {
-   const char *cur;
    uint file;
    uint index;
+   uint writemask;
 
    if (!parse_register( ctx, &file, &index ))
       return FALSE;
+   if (!parse_opt_writemask( ctx, &writemask ))
+      return FALSE;
    dst->DstRegister.File = file;
    dst->DstRegister.Index = index;
-
-   /* Parse optional write mask.
-    */
-   cur = ctx->cur;
-   eat_opt_white( &cur );
-   if (*cur == '.') {
-      uint writemask = TGSI_WRITEMASK_NONE;
-
-      cur++;
-      eat_opt_white( &cur );
-      if (toupper( *cur ) == 'X') {
-         cur++;
-         writemask |= TGSI_WRITEMASK_X;
-      }
-      if (toupper( *cur ) == 'Y') {
-         cur++;
-         writemask |= TGSI_WRITEMASK_Y;
-      }
-      if (toupper( *cur ) == 'Z') {
-         cur++;
-         writemask |= TGSI_WRITEMASK_Z;
-      }
-      if (toupper( *cur ) == 'W') {
-         cur++;
-         writemask |= TGSI_WRITEMASK_W;
-      }
-
-      if (writemask == TGSI_WRITEMASK_NONE) {
-         report_error( ctx, "Writemask expected" );
-         return FALSE;
-      }
-
-      dst->DstRegister.WriteMask = writemask;
-      ctx->cur = cur;
-   }
+   dst->DstRegister.WriteMask = writemask;
    return TRUE;
 }
 
@@ -478,15 +537,10 @@ static boolean parse_instruction( struct translate_ctx *ctx )
    eat_opt_white( &ctx->cur );
    for (i = 0; i < TGSI_OPCODE_LAST; i++) {
       const char *cur = ctx->cur;
-      const char *op = opcode_info[i].mnemonic;
 
-      while (*op != '\0' && *op == toupper( *cur )) {
-         op++;
-         cur++;
-      }
-      if (*op == '\0') {
+      if (str_match_no_case( &cur, opcode_info[i].mnemonic )) {
          /* TODO: _SAT suffix */
-         if (*cur == '\0' || eat_white( &cur )) {
+         if (eat_white( &cur )) {
             ctx->cur = cur;
             break;
          }
@@ -525,7 +579,6 @@ static boolean parse_instruction( struct translate_ctx *ctx )
             return FALSE;
       }
    }
-   eat_opt_white( &ctx->cur );
 
    advance = tgsi_build_full_instruction(
       &inst,
@@ -539,22 +592,151 @@ static boolean parse_instruction( struct translate_ctx *ctx )
    return TRUE;
 }
 
+static const char *semantic_names[TGSI_SEMANTIC_COUNT] =
+{
+   "POSITION",
+   "COLOR",
+   "BCOLOR",
+   "FOG",
+   "PSIZE",
+   "GENERIC",
+   "NORMAL"
+};
+
+static const char *interpolate_names[TGSI_INTERPOLATE_COUNT] =
+{
+   "CONSTANT",
+   "LINEAR",
+   "PERSPECTIVE"
+};
+
+static boolean parse_declaration( struct translate_ctx *ctx )
+{
+   struct tgsi_full_declaration decl;
+   uint file;
+   uint first;
+   uint last;
+   uint writemask;
+   const char *cur;
+   uint advance;
+
+   if (!eat_white( &ctx->cur )) {
+      report_error( ctx, "Syntax error" );
+      return FALSE;
+   }
+   if (!parse_register_dcl( ctx, &file, &first, &last ))
+      return FALSE;
+   if (!parse_opt_writemask( ctx, &writemask ))
+      return FALSE;
+
+   decl = tgsi_default_full_declaration();
+   decl.Declaration.File = file;
+   decl.Declaration.UsageMask = writemask;
+   decl.DeclarationRange.First = first;
+   decl.DeclarationRange.Last = last;
+
+   cur = ctx->cur;
+   eat_opt_white( &cur );
+   if (*cur == ',') {
+      uint i;
+
+      cur++;
+      eat_opt_white( &cur );
+      for (i = 0; i < TGSI_SEMANTIC_COUNT; i++) {
+         if (str_match_no_case( &cur, semantic_names[i] )) {
+            const char *cur2 = cur;
+            uint index;
+
+            if (is_digit_alpha_underscore( cur ))
+               continue;
+            eat_opt_white( &cur2 );
+            if (*cur2 == '[') {
+               cur2++;
+               eat_opt_white( &cur2 );
+               if (!parse_uint( &cur2, &index )) {
+                  report_error( ctx, "Expected literal integer" );
+                  return FALSE;
+               }
+               eat_opt_white( &cur2 );
+               if (*cur2 != ']') {
+                  report_error( ctx, "Expected `]'" );
+                  return FALSE;
+               }
+               cur2++;
+
+               decl.Semantic.SemanticIndex = index;
+
+               cur = cur2;
+            }
+
+            decl.Declaration.Semantic = 1;
+            decl.Semantic.SemanticName = i;
+
+            ctx->cur = cur;
+            break;
+         }
+      }
+   }
+
+   cur = ctx->cur;
+   eat_opt_white( &cur );
+   if (*cur == ',') {
+      uint i;
+
+      cur++;
+      eat_opt_white( &cur );
+      for (i = 0; i < TGSI_INTERPOLATE_COUNT; i++) {
+         if (str_match_no_case( &cur, interpolate_names[i] )) {
+            if (is_digit_alpha_underscore( cur ))
+               continue;
+            decl.Declaration.Interpolate = i;
+
+            ctx->cur = cur;
+            break;
+         }
+      }
+      if (i == TGSI_INTERPOLATE_COUNT) {
+         report_error( ctx, "Expected semantic or interpolate attribute" );
+         return FALSE;
+      }
+   }
+
+   advance = tgsi_build_full_declaration(
+      &decl,
+      ctx->tokens_cur,
+      ctx->header,
+      (uint) (ctx->tokens_end - ctx->tokens_cur) );
+   if (advance == 0)
+      return FALSE;
+   ctx->tokens_cur += advance;
+
+   return TRUE;
+}
+
 static boolean translate( struct translate_ctx *ctx )
 {
    eat_opt_white( &ctx->cur );
    if (!parse_header( ctx ))
       return FALSE;
 
-   eat_white( &ctx->cur );
    while (*ctx->cur != '\0') {
       uint label_val = 0;
 
+      if (!eat_white( &ctx->cur )) {
+         report_error( ctx, "Syntax error" );
+         return FALSE;
+      }
+
       if (parse_label( ctx, &label_val )) {
          if (!parse_instruction( ctx ))
             return FALSE;
       }
+      else if (str_match_no_case( &ctx->cur, "DCL" )) {
+         if (!parse_declaration( ctx ))
+            return FALSE;
+      }
       else {
-         report_error( ctx, "Instruction expected" );
+         report_error( ctx, "Expected `DCL' or a label" );
          return FALSE;
       }
    }
-- 
cgit v1.2.3


From 625034104aacaca793ff373414eff4ee53afc1fe Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Sun, 13 Jul 2008 11:42:33 +0200
Subject: tgsi: Add missing SWZ opcode.

---
 src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 3 ++-
 src/gallium/auxiliary/tgsi/util/tgsi_text.c | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
index 59e7aaeceb4..cae74a92672 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
@@ -395,7 +395,8 @@ static const char *TGSI_OPCODES_SHORT[TGSI_OPCODE_LAST] =
    "IFC",
    "BREAKC",
    "KIL",
-   "END"
+   "END",
+   "SWZ"
 };
 
 static const char *TGSI_SATS[] =
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_text.c b/src/gallium/auxiliary/tgsi/util/tgsi_text.c
index 03c92172436..3415b38f9c6 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_text.c
@@ -522,7 +522,8 @@ static const struct opcode_info opcode_info[TGSI_OPCODE_LAST] =
 { 1, 2, "IFC" },
 { 1, 2, "BREAKC" },
 { 1, 2, "KIL" },
-   { 0, 0, "END" }
+   { 0, 0, "END" },
+   { 1, 1, "SWZ" }
 };
 
 static boolean parse_instruction( struct translate_ctx *ctx )
-- 
cgit v1.2.3


From cfd2bf9fa127a7f0b1a89650fde34a23f318f90c Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Sun, 13 Jul 2008 11:43:30 +0200
Subject: tgsi: Fix instruction opcode parsing.

---
 src/gallium/auxiliary/tgsi/util/tgsi_text.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_text.c b/src/gallium/auxiliary/tgsi/util/tgsi_text.c
index 3415b38f9c6..7848f3d55f2 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_text.c
@@ -541,7 +541,7 @@ static boolean parse_instruction( struct translate_ctx *ctx )
 
       if (str_match_no_case( &cur, opcode_info[i].mnemonic )) {
          /* TODO: _SAT suffix */
-         if (eat_white( &cur )) {
+         if (*cur == '\0' || eat_white( &cur )) {
             ctx->cur = cur;
             break;
          }
-- 
cgit v1.2.3


From 46a7843099f02b6dcff56a52c9247c11d5c4aa8b Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Sun, 13 Jul 2008 12:16:16 +0200
Subject: tgsi: Fix instruction operand counts.

---
 src/gallium/auxiliary/tgsi/util/tgsi_text.c | 208 ++++++++++++++--------------
 1 file changed, 104 insertions(+), 104 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_text.c b/src/gallium/auxiliary/tgsi/util/tgsi_text.c
index 7848f3d55f2..3c7a4688c99 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_text.c
@@ -405,9 +405,9 @@ struct opcode_info
 
 static const struct opcode_info opcode_info[TGSI_OPCODE_LAST] =
 {
-{ 1, 1, "ARL" },
+   { 1, 1, "ARL" },
    { 1, 1, "MOV" },
-{ 1, 1, "LIT" },
+   { 1, 1, "LIT" },
    { 1, 1, "RCP" },
    { 1, 1, "RSQ" },
    { 1, 1, "EXP" },
@@ -416,112 +416,112 @@ static const struct opcode_info opcode_info[TGSI_OPCODE_LAST] =
    { 1, 2, "ADD" },
    { 1, 2, "DP3" },
    { 1, 2, "DP4" },
-{ 1, 2, "DST" },
+   { 1, 2, "DST" },
    { 1, 2, "MIN" },
    { 1, 2, "MAX" },
-{ 1, 2, "SLT" },
-{ 1, 2, "SGE" },
+   { 1, 2, "SLT" },
+   { 1, 2, "SGE" },
    { 1, 3, "MAD" },
    { 1, 2, "SUB" },
-{ 1, 3, "LERP" },
-{ 1, 2, "CND" },
-{ 1, 2, "CND0" },
-{ 1, 2, "DOT2ADD" },
-{ 1, 2, "INDEX" },
-{ 1, 2, "NEGATE" },
-{ 1, 2, "FRAC" },
-{ 1, 2, "CLAMP" },
-{ 1, 2, "FLOOR" },
-{ 1, 2, "ROUND" },
-{ 1, 2, "EXPBASE2" },
-{ 1, 2, "LOGBASE2" },
-{ 1, 2, "POWER" },
-{ 1, 2, "CROSSPRODUCT" },
-{ 1, 2, "MULTIPLYMATRIX" },
-   { 1, 2, "ABS" },
-{ 1, 2, "RCC" },
-{ 1, 2, "DPH" },
-{ 1, 2, "COS" },
-{ 1, 2, "DDX" },
-{ 1, 2, "DDY" },
-{ 1, 2, "KILP" },
-{ 1, 2, "PK2H" },
-{ 1, 2, "PK2US" },
-{ 1, 2, "PK4B" },
-{ 1, 2, "PK4UB" },
-{ 1, 2, "RFL" },
-{ 1, 2, "SEQ" },
-{ 1, 2, "SFL" },
-{ 1, 2, "SGT" },
-{ 1, 2, "SIN" },
-{ 1, 2, "SLE" },
-{ 1, 2, "SNE" },
-{ 1, 2, "STR" },
-{ 1, 2, "TEX" },
-{ 1, 2, "TXD" },
-{ 1, 2, "TXP" },
-{ 1, 2, "UP2H" },
-{ 1, 2, "UP2US" },
-{ 1, 2, "UP4B" },
-{ 1, 2, "UP4UB" },
-{ 1, 2, "X2D" },
-{ 1, 2, "ARA" },
-{ 1, 2, "ARR" },
-{ 1, 2, "BRA" },
-{ 1, 2, "CAL" },
-{ 1, 2, "RET" },
-{ 1, 2, "SSG" },
-{ 1, 2, "CMP" },
-{ 1, 2, "SCS" },
-{ 1, 2, "TXB" },
-{ 1, 2, "NRM" },
-{ 1, 2, "DIV" },
-{ 1, 2, "DP2" },
-{ 1, 2, "TXL" },
-{ 1, 2, "BRK" },
-{ 1, 2, "IF" },
-{ 1, 2, "LOOP" },
-{ 1, 2, "REP" },
-{ 1, 2, "ELSE" },
-{ 1, 2, "ENDIF" },
-{ 1, 2, "ENDLOOP" },
-{ 1, 2, "ENDREP" },
-{ 1, 2, "PUSHA" },
-{ 1, 2, "POPA" },
-{ 1, 2, "CEIL" },
-{ 1, 2, "I2F" },
-{ 1, 2, "NOT" },
-{ 1, 2, "TRUNC" },
-{ 1, 2, "SHL" },
-{ 1, 2, "SHR" },
-{ 1, 2, "AND" },
-{ 1, 2, "OR" },
-{ 1, 2, "MOD" },
-{ 1, 2, "XOR" },
-{ 1, 2, "SAD" },
-{ 1, 2, "TXF" },
-{ 1, 2, "TXQ" },
-{ 1, 2, "CONT" },
-{ 1, 2, "EMIT" },
-{ 1, 2, "ENDPRIM" },
-{ 1, 2, "BGNLOOP2" },
-{ 1, 2, "BGNSUB" },
-{ 1, 2, "ENDLOOP2" },
-{ 1, 2, "ENDSUB" },
-{ 1, 2, "NOISE1" },
-{ 1, 2, "NOISE2" },
-{ 1, 2, "NOISE3" },
-{ 1, 2, "NOISE4" },
-{ 1, 2, "NOP" },
-{ 1, 2, "M4X3" },
-{ 1, 2, "M3X4" },
-{ 1, 2, "M3X3" },
-{ 1, 2, "M3X2" },
-{ 1, 2, "NRM4" },
-{ 1, 2, "CALLNZ" },
-{ 1, 2, "IFC" },
-{ 1, 2, "BREAKC" },
-{ 1, 2, "KIL" },
+   { 1, 3, "LERP" },
+   { 1, 3, "CND" },
+   { 1, 3, "CND0" },
+   { 1, 3, "DOT2ADD" },
+   { 1, 2, "INDEX" },
+   { 1, 1, "NEGATE" },
+   { 1, 1, "FRAC" },
+   { 1, 3, "CLAMP" },
+   { 1, 1, "FLOOR" },
+   { 1, 1, "ROUND" },
+   { 1, 1, "EXPBASE2" },
+   { 1, 1, "LOGBASE2" },
+   { 1, 2, "POWER" },
+   { 1, 2, "CROSSPRODUCT" },
+   { 1, 2, "MULTIPLYMATRIX" },
+   { 1, 1, "ABS" },
+   { 1, 1, "RCC" },
+   { 1, 2, "DPH" },
+   { 1, 1, "COS" },
+   { 1, 1, "DDX" },
+   { 1, 1, "DDY" },
+   { 0, 1, "KILP" },
+   { 1, 1, "PK2H" },
+   { 1, 1, "PK2US" },
+   { 1, 1, "PK4B" },
+   { 1, 1, "PK4UB" },
+   { 1, 2, "RFL" },
+   { 1, 2, "SEQ" },
+   { 1, 2, "SFL" },
+   { 1, 2, "SGT" },
+   { 1, 1, "SIN" },
+   { 1, 2, "SLE" },
+   { 1, 2, "SNE" },
+   { 1, 2, "STR" },
+   { 1, 1, "TEX" },
+   { 1, 3, "TXD" },
+   { 1, 1, "TXP" },
+   { 1, 1, "UP2H" },
+   { 1, 1, "UP2US" },
+   { 1, 1, "UP4B" },
+   { 1, 1, "UP4UB" },
+   { 1, 3, "X2D" },
+   { 1, 1, "ARA" },
+   { 1, 1, "ARR" },
+   { 0, 1, "BRA" },
+   { 0, 0, "CAL" },
+   { 0, 0, "RET" },
+   { 1, 1, "SSG" },
+   { 1, 3, "CMP" },
+   { 1, 1, "SCS" },
+   { 1, 1, "TXB" },
+   { 1, 1, "NRM" },
+   { 1, 2, "DIV" },
+   { 1, 2, "DP2" },
+   { 1, 1, "TXL" },
+   { 0, 0, "BRK" },
+   { 0, 1, "IF" },
+   { 0, 0, "LOOP" },
+   { 0, 1, "REP" },
+   { 0, 0, "ELSE" },
+   { 0, 0, "ENDIF" },
+   { 0, 0, "ENDLOOP" },
+   { 0, 0, "ENDREP" },
+   { 0, 1, "PUSHA" },
+   { 1, 0, "POPA" },
+   { 1, 1, "CEIL" },
+   { 1, 1, "I2F" },
+   { 1, 1, "NOT" },
+   { 1, 1, "TRUNC" },
+   { 1, 2, "SHL" },
+   { 1, 2, "SHR" },
+   { 1, 2, "AND" },
+   { 1, 2, "OR" },
+   { 1, 2, "MOD" },
+   { 1, 2, "XOR" },
+   { 1, 3, "SAD" },
+   { 1, 1, "TXF" },
+   { 1, 1, "TXQ" },
+   { 0, 0, "CONT" },
+   { 0, 0, "EMIT" },
+   { 0, 0, "ENDPRIM" },
+   { 0, 0, "BGNLOOP2" },
+   { 0, 0, "BGNSUB" },
+   { 0, 0, "ENDLOOP2" },
+   { 0, 0, "ENDSUB" },
+   { 1, 1, "NOISE1" },
+   { 1, 1, "NOISE2" },
+   { 1, 1, "NOISE3" },
+   { 1, 1, "NOISE4" },
+   { 0, 0, "NOP" },
+   { 1, 2, "M4X3" },
+   { 1, 2, "M3X4" },
+   { 1, 2, "M3X3" },
+   { 1, 2, "M3X2" },
+   { 1, 1, "NRM4" },
+   { 0, 1, "CALLNZ" },
+   { 0, 1, "IFC" },
+   { 0, 1, "BREAKC" },
+   { 0, 0, "KIL" },
    { 0, 0, "END" },
    { 1, 1, "SWZ" }
 };
-- 
cgit v1.2.3


From 3d5dcc2203f5018863ad51958871542696e1ed1b Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Sun, 13 Jul 2008 13:13:39 +0200
Subject: tgsi: Parse texture instructions correctly.

---
 src/gallium/auxiliary/tgsi/util/tgsi_text.c | 272 +++++++++++++++-------------
 src/gallium/include/pipe/p_shader_tokens.h  |   1 +
 2 files changed, 152 insertions(+), 121 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_text.c b/src/gallium/auxiliary/tgsi/util/tgsi_text.c
index 3c7a4688c99..74850028627 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_text.c
@@ -400,130 +400,144 @@ struct opcode_info
 {
    uint num_dst;
    uint num_src;
+   uint is_tex;
    const char *mnemonic;
 };
 
 static const struct opcode_info opcode_info[TGSI_OPCODE_LAST] =
 {
-   { 1, 1, "ARL" },
-   { 1, 1, "MOV" },
-   { 1, 1, "LIT" },
-   { 1, 1, "RCP" },
-   { 1, 1, "RSQ" },
-   { 1, 1, "EXP" },
-   { 1, 1, "LOG" },
-   { 1, 2, "MUL" },
-   { 1, 2, "ADD" },
-   { 1, 2, "DP3" },
-   { 1, 2, "DP4" },
-   { 1, 2, "DST" },
-   { 1, 2, "MIN" },
-   { 1, 2, "MAX" },
-   { 1, 2, "SLT" },
-   { 1, 2, "SGE" },
-   { 1, 3, "MAD" },
-   { 1, 2, "SUB" },
-   { 1, 3, "LERP" },
-   { 1, 3, "CND" },
-   { 1, 3, "CND0" },
-   { 1, 3, "DOT2ADD" },
-   { 1, 2, "INDEX" },
-   { 1, 1, "NEGATE" },
-   { 1, 1, "FRAC" },
-   { 1, 3, "CLAMP" },
-   { 1, 1, "FLOOR" },
-   { 1, 1, "ROUND" },
-   { 1, 1, "EXPBASE2" },
-   { 1, 1, "LOGBASE2" },
-   { 1, 2, "POWER" },
-   { 1, 2, "CROSSPRODUCT" },
-   { 1, 2, "MULTIPLYMATRIX" },
-   { 1, 1, "ABS" },
-   { 1, 1, "RCC" },
-   { 1, 2, "DPH" },
-   { 1, 1, "COS" },
-   { 1, 1, "DDX" },
-   { 1, 1, "DDY" },
-   { 0, 1, "KILP" },
-   { 1, 1, "PK2H" },
-   { 1, 1, "PK2US" },
-   { 1, 1, "PK4B" },
-   { 1, 1, "PK4UB" },
-   { 1, 2, "RFL" },
-   { 1, 2, "SEQ" },
-   { 1, 2, "SFL" },
-   { 1, 2, "SGT" },
-   { 1, 1, "SIN" },
-   { 1, 2, "SLE" },
-   { 1, 2, "SNE" },
-   { 1, 2, "STR" },
-   { 1, 1, "TEX" },
-   { 1, 3, "TXD" },
-   { 1, 1, "TXP" },
-   { 1, 1, "UP2H" },
-   { 1, 1, "UP2US" },
-   { 1, 1, "UP4B" },
-   { 1, 1, "UP4UB" },
-   { 1, 3, "X2D" },
-   { 1, 1, "ARA" },
-   { 1, 1, "ARR" },
-   { 0, 1, "BRA" },
-   { 0, 0, "CAL" },
-   { 0, 0, "RET" },
-   { 1, 1, "SSG" },
-   { 1, 3, "CMP" },
-   { 1, 1, "SCS" },
-   { 1, 1, "TXB" },
-   { 1, 1, "NRM" },
-   { 1, 2, "DIV" },
-   { 1, 2, "DP2" },
-   { 1, 1, "TXL" },
-   { 0, 0, "BRK" },
-   { 0, 1, "IF" },
-   { 0, 0, "LOOP" },
-   { 0, 1, "REP" },
-   { 0, 0, "ELSE" },
-   { 0, 0, "ENDIF" },
-   { 0, 0, "ENDLOOP" },
-   { 0, 0, "ENDREP" },
-   { 0, 1, "PUSHA" },
-   { 1, 0, "POPA" },
-   { 1, 1, "CEIL" },
-   { 1, 1, "I2F" },
-   { 1, 1, "NOT" },
-   { 1, 1, "TRUNC" },
-   { 1, 2, "SHL" },
-   { 1, 2, "SHR" },
-   { 1, 2, "AND" },
-   { 1, 2, "OR" },
-   { 1, 2, "MOD" },
-   { 1, 2, "XOR" },
-   { 1, 3, "SAD" },
-   { 1, 1, "TXF" },
-   { 1, 1, "TXQ" },
-   { 0, 0, "CONT" },
-   { 0, 0, "EMIT" },
-   { 0, 0, "ENDPRIM" },
-   { 0, 0, "BGNLOOP2" },
-   { 0, 0, "BGNSUB" },
-   { 0, 0, "ENDLOOP2" },
-   { 0, 0, "ENDSUB" },
-   { 1, 1, "NOISE1" },
-   { 1, 1, "NOISE2" },
-   { 1, 1, "NOISE3" },
-   { 1, 1, "NOISE4" },
-   { 0, 0, "NOP" },
-   { 1, 2, "M4X3" },
-   { 1, 2, "M3X4" },
-   { 1, 2, "M3X3" },
-   { 1, 2, "M3X2" },
-   { 1, 1, "NRM4" },
-   { 0, 1, "CALLNZ" },
-   { 0, 1, "IFC" },
-   { 0, 1, "BREAKC" },
-   { 0, 0, "KIL" },
-   { 0, 0, "END" },
-   { 1, 1, "SWZ" }
+   { 1, 1, 0, "ARL" },
+   { 1, 1, 0, "MOV" },
+   { 1, 1, 0, "LIT" },
+   { 1, 1, 0, "RCP" },
+   { 1, 1, 0, "RSQ" },
+   { 1, 1, 0, "EXP" },
+   { 1, 1, 0, "LOG" },
+   { 1, 2, 0, "MUL" },
+   { 1, 2, 0, "ADD" },
+   { 1, 2, 0, "DP3" },
+   { 1, 2, 0, "DP4" },
+   { 1, 2, 0, "DST" },
+   { 1, 2, 0, "MIN" },
+   { 1, 2, 0, "MAX" },
+   { 1, 2, 0, "SLT" },
+   { 1, 2, 0, "SGE" },
+   { 1, 3, 0, "MAD" },
+   { 1, 2, 0, "SUB" },
+   { 1, 3, 0, "LERP" },
+   { 1, 3, 0, "CND" },
+   { 1, 3, 0, "CND0" },
+   { 1, 3, 0, "DOT2ADD" },
+   { 1, 2, 0, "INDEX" },
+   { 1, 1, 0, "NEGATE" },
+   { 1, 1, 0, "FRAC" },
+   { 1, 3, 0, "CLAMP" },
+   { 1, 1, 0, "FLOOR" },
+   { 1, 1, 0, "ROUND" },
+   { 1, 1, 0, "EXPBASE2" },
+   { 1, 1, 0, "LOGBASE2" },
+   { 1, 2, 0, "POWER" },
+   { 1, 2, 0, "CROSSPRODUCT" },
+   { 1, 2, 0, "MULTIPLYMATRIX" },
+   { 1, 1, 0, "ABS" },
+   { 1, 1, 0, "RCC" },
+   { 1, 2, 0, "DPH" },
+   { 1, 1, 0, "COS" },
+   { 1, 1, 0, "DDX" },
+   { 1, 1, 0, "DDY" },
+   { 0, 1, 0, "KILP" },
+   { 1, 1, 0, "PK2H" },
+   { 1, 1, 0, "PK2US" },
+   { 1, 1, 0, "PK4B" },
+   { 1, 1, 0, "PK4UB" },
+   { 1, 2, 0, "RFL" },
+   { 1, 2, 0, "SEQ" },
+   { 1, 2, 0, "SFL" },
+   { 1, 2, 0, "SGT" },
+   { 1, 1, 0, "SIN" },
+   { 1, 2, 0, "SLE" },
+   { 1, 2, 0, "SNE" },
+   { 1, 2, 0, "STR" },
+   { 1, 2, 1, "TEX" },
+   { 1, 4, 1, "TXD" },
+   { 1, 2, 1, "TXP" },
+   { 1, 1, 0, "UP2H" },
+   { 1, 1, 0, "UP2US" },
+   { 1, 1, 0, "UP4B" },
+   { 1, 1, 0, "UP4UB" },
+   { 1, 3, 0, "X2D" },
+   { 1, 1, 0, "ARA" },
+   { 1, 1, 0, "ARR" },
+   { 0, 1, 0, "BRA" },
+   { 0, 0, 0, "CAL" },
+   { 0, 0, 0, "RET" },
+   { 1, 1, 0, "SSG" },
+   { 1, 3, 0, "CMP" },
+   { 1, 1, 0, "SCS" },
+   { 1, 2, 1, "TXB" },
+   { 1, 1, 0, "NRM" },
+   { 1, 2, 0, "DIV" },
+   { 1, 2, 0, "DP2" },
+   { 1, 2, 1, "TXL" },
+   { 0, 0, 0, "BRK" },
+   { 0, 1, 0, "IF" },
+   { 0, 0, 0, "LOOP" },
+   { 0, 1, 0, "REP" },
+   { 0, 0, 0, "ELSE" },
+   { 0, 0, 0, "ENDIF" },
+   { 0, 0, 0, "ENDLOOP" },
+   { 0, 0, 0, "ENDREP" },
+   { 0, 1, 0, "PUSHA" },
+   { 1, 0, 0, "POPA" },
+   { 1, 1, 0, "CEIL" },
+   { 1, 1, 0, "I2F" },
+   { 1, 1, 0, "NOT" },
+   { 1, 1, 0, "TRUNC" },
+   { 1, 2, 0, "SHL" },
+   { 1, 2, 0, "SHR" },
+   { 1, 2, 0, "AND" },
+   { 1, 2, 0, "OR" },
+   { 1, 2, 0, "MOD" },
+   { 1, 2, 0, "XOR" },
+   { 1, 3, 0, "SAD" },
+   { 1, 2, 1, "TXF" },
+   { 1, 2, 1, "TXQ" },
+   { 0, 0, 0, "CONT" },
+   { 0, 0, 0, "EMIT" },
+   { 0, 0, 0, "ENDPRIM" },
+   { 0, 0, 0, "BGNLOOP2" },
+   { 0, 0, 0, "BGNSUB" },
+   { 0, 0, 0, "ENDLOOP2" },
+   { 0, 0, 0, "ENDSUB" },
+   { 1, 1, 0, "NOISE1" },
+   { 1, 1, 0, "NOISE2" },
+   { 1, 1, 0, "NOISE3" },
+   { 1, 1, 0, "NOISE4" },
+   { 0, 0, 0, "NOP" },
+   { 1, 2, 0, "M4X3" },
+   { 1, 2, 0, "M3X4" },
+   { 1, 2, 0, "M3X3" },
+   { 1, 2, 0, "M3X2" },
+   { 1, 1, 0, "NRM4" },
+   { 0, 1, 0, "CALLNZ" },
+   { 0, 1, 0, "IFC" },
+   { 0, 1, 0, "BREAKC" },
+   { 0, 0, 0, "KIL" },
+   { 0, 0, 0, "END" },
+   { 1, 1, 0, "SWZ" }
+};
+
+static const char *texture_names[TGSI_TEXTURE_COUNT] =
+{
+   "UNKNOWN",
+   "1D",
+   "2D",
+   "3D",
+   "CUBE",
+   "RECT",
+   "SHADOW1D",
+   "SHADOW2D",
+   "SHADOWRECT"
 };
 
 static boolean parse_instruction( struct translate_ctx *ctx )
@@ -560,7 +574,7 @@ static boolean parse_instruction( struct translate_ctx *ctx )
 
    /* Parse instruction operands.
     */
-   for (i = 0; i < info->num_dst + info->num_src; i++) {
+   for (i = 0; i < info->num_dst + info->num_src + info->is_tex; i++) {
       if (i > 0) {
          eat_opt_white( &ctx->cur );
          if (*ctx->cur != ',') {
@@ -575,10 +589,26 @@ static boolean parse_instruction( struct translate_ctx *ctx )
          if (!parse_dst_operand( ctx, &inst.FullDstRegisters[i] ))
             return FALSE;
       }
-      else {
+      else if (i < info->num_dst + info->num_src) {
          if (!parse_src_operand( ctx, &inst.FullSrcRegisters[i - info->num_dst] ))
             return FALSE;
       }
+      else {
+         uint j;
+
+         for (j = 0; j < TGSI_TEXTURE_COUNT; j++) {
+            if (str_match_no_case( &ctx->cur, texture_names[j] )) {
+               if (!is_digit_alpha_underscore( ctx->cur )) {
+                  inst.InstructionExtTexture.Texture = j;
+                  break;
+               }
+            }
+         }
+         if (j == TGSI_TEXTURE_COUNT) {
+            report_error( ctx, "Expected texture target" );
+            return FALSE;
+         }
+      }
    }
 
    advance = tgsi_build_full_instruction(
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index 33268553ff7..c8e2830516a 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -529,6 +529,7 @@ struct tgsi_instruction_ext_label
 #define TGSI_TEXTURE_SHADOW1D       6
 #define TGSI_TEXTURE_SHADOW2D       7
 #define TGSI_TEXTURE_SHADOWRECT     8
+#define TGSI_TEXTURE_COUNT          9
 
 struct tgsi_instruction_ext_texture
 {
-- 
cgit v1.2.3


From a7d8eed61c1fd0bd3d99ebcd10c24bc70f8e3293 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Sun, 13 Jul 2008 14:03:21 +0200
Subject: tgsi: Parse IMM statements.

---
 src/gallium/auxiliary/tgsi/util/tgsi_text.c | 111 +++++++++++++++++++++++++++-
 1 file changed, 110 insertions(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_text.c b/src/gallium/auxiliary/tgsi/util/tgsi_text.c
index 74850028627..d5a9480f4ae 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_text.c
@@ -100,6 +100,51 @@ static boolean parse_uint( const char **pcur, uint *val )
    return FALSE;
 }
 
+/* Parse floating point.
+ */
+static boolean parse_float( const char **pcur, float *val )
+{
+   const char *cur = *pcur;
+   boolean integral_part = FALSE;
+   boolean fractional_part = FALSE;
+
+   *val = (float) atof( cur );
+
+   if (*cur == '-' || *cur == '+')
+      cur++;
+   if (is_digit( cur )) {
+      cur++;
+      integral_part = TRUE;
+      while (is_digit( cur ))
+         cur++;
+   }
+   if (*cur == '.') {
+      cur++;
+      if (is_digit( cur )) {
+         cur++;
+         fractional_part = TRUE;
+         while (is_digit( cur ))
+            cur++;
+      }
+   }
+   if (!integral_part && !fractional_part)
+      return FALSE;
+   if (toupper( *cur ) == 'E') {
+      cur++;
+      if (*cur == '-' || *cur == '+')
+         cur++;
+      if (is_digit( cur )) {
+         cur++;
+         while (is_digit( cur ))
+            cur++;
+      }
+      else
+         return FALSE;
+   }
+   *pcur = cur;
+   return TRUE;
+}
+
 struct translate_ctx
 {
    const char *text;
@@ -744,6 +789,66 @@ static boolean parse_declaration( struct translate_ctx *ctx )
    return TRUE;
 }
 
+static boolean parse_immediate( struct translate_ctx *ctx )
+{
+   struct tgsi_full_immediate imm;
+   uint i;
+   float values[4];
+   uint advance;
+
+   if (!eat_white( &ctx->cur )) {
+      report_error( ctx, "Syntax error" );
+      return FALSE;
+   }
+   if (!str_match_no_case( &ctx->cur, "FLT32" ) || is_digit_alpha_underscore( ctx->cur )) {
+      report_error( ctx, "Expected `FLT32'" );
+      return FALSE;
+   }
+   eat_opt_white( &ctx->cur );
+   if (*ctx->cur != '{') {
+      report_error( ctx, "Expected `{'" );
+      return FALSE;
+   }
+   ctx->cur++;
+   for (i = 0; i < 4; i++) {
+      eat_opt_white( &ctx->cur );
+      if (i > 0) {
+         if (*ctx->cur != ',') {
+            report_error( ctx, "Expected `,'" );
+            return FALSE;
+         }
+         ctx->cur++;
+         eat_opt_white( &ctx->cur );
+      }
+      if (!parse_float( &ctx->cur, &values[i] )) {
+         report_error( ctx, "Expected literal floating point" );
+         return FALSE;
+      }
+   }
+   eat_opt_white( &ctx->cur );
+   if (*ctx->cur != '}') {
+      report_error( ctx, "Expected `}'" );
+      return FALSE;
+   }
+   ctx->cur++;
+
+   imm = tgsi_default_full_immediate();
+   imm.Immediate.Size += 4;
+   imm.Immediate.DataType = TGSI_IMM_FLOAT32;
+   imm.u.Pointer = values;
+
+   advance = tgsi_build_full_immediate(
+      &imm,
+      ctx->tokens_cur,
+      ctx->header,
+      (uint) (ctx->tokens_end - ctx->tokens_cur) );
+   if (advance == 0)
+      return FALSE;
+   ctx->tokens_cur += advance;
+
+   return TRUE;
+}
+
 static boolean translate( struct translate_ctx *ctx )
 {
    eat_opt_white( &ctx->cur );
@@ -766,8 +871,12 @@ static boolean translate( struct translate_ctx *ctx )
          if (!parse_declaration( ctx ))
             return FALSE;
       }
+      else if (str_match_no_case( &ctx->cur, "IMM" )) {
+         if (!parse_immediate( ctx ))
+            return FALSE;
+      }
       else {
-         report_error( ctx, "Expected `DCL' or a label" );
+         report_error( ctx, "Expected `DCL', `IMM' or a label" );
          return FALSE;
       }
    }
-- 
cgit v1.2.3


From 47a45aaa0fd1dbc0de45de2ed2995f81a0154baf Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Sun, 13 Jul 2008 14:50:12 +0200
Subject: tgsi: Parse _SAT and _SAT opcode suffix.

---
 src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 2 +-
 src/gallium/auxiliary/tgsi/util/tgsi_text.c | 8 +++++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
index cae74a92672..8220e091993 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
@@ -716,7 +716,7 @@ tgsi_dump_instruction(
       TXT( "_SAT" );
       break;
    case TGSI_SAT_MINUS_PLUS_ONE:
-      TXT( "_SAT[-1,1]" );
+      TXT( "_SATNV" );
       break;
    default:
       assert( 0 );
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_text.c b/src/gallium/auxiliary/tgsi/util/tgsi_text.c
index d5a9480f4ae..f3fc6758078 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_text.c
@@ -588,6 +588,7 @@ static const char *texture_names[TGSI_TEXTURE_COUNT] =
 static boolean parse_instruction( struct translate_ctx *ctx )
 {
    uint i;
+   uint saturate = TGSI_SAT_NONE;
    const struct opcode_info *info;
    struct tgsi_full_instruction inst;
    uint advance;
@@ -599,7 +600,11 @@ static boolean parse_instruction( struct translate_ctx *ctx )
       const char *cur = ctx->cur;
 
       if (str_match_no_case( &cur, opcode_info[i].mnemonic )) {
-         /* TODO: _SAT suffix */
+         if (str_match_no_case( &cur, "_SATNV" ))
+            saturate = TGSI_SAT_MINUS_PLUS_ONE;
+         else if (str_match_no_case( &cur, "_SAT" ))
+            saturate = TGSI_SAT_ZERO_ONE;
+
          if (*cur == '\0' || eat_white( &cur )) {
             ctx->cur = cur;
             break;
@@ -614,6 +619,7 @@ static boolean parse_instruction( struct translate_ctx *ctx )
 
    inst = tgsi_default_full_instruction();
    inst.Instruction.Opcode = i;
+   inst.Instruction.Saturate = saturate;
    inst.Instruction.NumDstRegs = info->num_dst;
    inst.Instruction.NumSrcRegs = info->num_src;
 
-- 
cgit v1.2.3


From 94013b66b91112f31802c3d935e8d918ba12be62 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Sun, 13 Jul 2008 15:14:31 +0200
Subject: tgsi: Parse extended source register modifiers.

---
 src/gallium/auxiliary/tgsi/util/tgsi_text.c | 127 +++++++++++++++++++++++++++-
 1 file changed, 126 insertions(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_text.c b/src/gallium/auxiliary/tgsi/util/tgsi_text.c
index f3fc6758078..c618a50fb9a 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_text.c
@@ -393,14 +393,78 @@ parse_src_operand(
    struct tgsi_full_src_register *src )
 {
    const char *cur;
+   float value;
    uint file;
    uint index;
 
-   /* TODO: Extended register modifiers */
+   if (*ctx->cur == '-') {
+      cur = ctx->cur;
+      cur++;
+      eat_opt_white( &cur );
+      if (*cur == '(') {
+         cur++;
+         src->SrcRegisterExtMod.Negate = 1;
+         eat_opt_white( &cur );
+         ctx->cur = cur;
+      }
+   }
+
+   if (*ctx->cur == '|') {
+      ctx->cur++;
+      eat_opt_white( &ctx->cur );
+      src->SrcRegisterExtMod.Absolute = 1;
+   }
+
    if (*ctx->cur == '-') {
       ctx->cur++;
+      eat_opt_white( &ctx->cur );
       src->SrcRegister.Negate = 1;
+   }
+
+   cur = ctx->cur;
+   if (parse_float( &cur, &value )) {
+      if (value == 2.0f) {
+         eat_opt_white( &cur );
+         if (*cur != '*') {
+            report_error( ctx, "Expected `*'" );
+            return FALSE;
+         }
+         cur++;
+         if (*cur != '(') {
+            report_error( ctx, "Expected `('" );
+            return FALSE;
+         }
+         cur++;
+         src->SrcRegisterExtMod.Scale2X = 1;
+         eat_opt_white( &cur );
+         ctx->cur = cur;
+      }
+   }
+
+   if (*ctx->cur == '(') {
+      ctx->cur++;
       eat_opt_white( &ctx->cur );
+      src->SrcRegisterExtMod.Bias = 1;
+   }
+
+   cur = ctx->cur;
+   if (parse_float( &cur, &value )) {
+      if (value == 1.0f) {
+         eat_opt_white( &cur );
+         if (*cur != '-') {
+            report_error( ctx, "Expected `-'" );
+            return FALSE;
+         }
+         cur++;
+         if (*cur != '(') {
+            report_error( ctx, "Expected `('" );
+            return FALSE;
+         }
+         cur++;
+         src->SrcRegisterExtMod.Complement = 1;
+         eat_opt_white( &cur );
+         ctx->cur = cur;
+      }
    }
 
    if (!parse_register( ctx, &file, &index ))
@@ -438,6 +502,67 @@ parse_src_operand(
 
       ctx->cur = cur;
    }
+
+   if (src->SrcRegisterExtMod.Complement) {
+      eat_opt_white( &ctx->cur );
+      if (*ctx->cur != ')') {
+         report_error( ctx, "Expected `)'" );
+         return FALSE;
+      }
+      ctx->cur++;
+   }
+
+   if (src->SrcRegisterExtMod.Bias) {
+      eat_opt_white( &ctx->cur );
+      if (*ctx->cur != ')') {
+         report_error( ctx, "Expected `)'" );
+         return FALSE;
+      }
+      ctx->cur++;
+      eat_opt_white( &ctx->cur );
+      if (*ctx->cur != '-') {
+         report_error( ctx, "Expected `-'" );
+         return FALSE;
+      }
+      ctx->cur++;
+      eat_opt_white( &ctx->cur );
+      if (!parse_float( &ctx->cur, &value )) {
+         report_error( ctx, "Expected literal floating point" );
+         return FALSE;
+      }
+      if (value != 0.5f) {
+         report_error( ctx, "Expected 0.5" );
+         return FALSE;
+      }
+   }
+
+   if (src->SrcRegisterExtMod.Scale2X) {
+      eat_opt_white( &ctx->cur );
+      if (*ctx->cur != ')') {
+         report_error( ctx, "Expected `)'" );
+         return FALSE;
+      }
+      ctx->cur++;
+   }
+
+   if (src->SrcRegisterExtMod.Absolute) {
+      eat_opt_white( &ctx->cur );
+      if (*ctx->cur != '|') {
+         report_error( ctx, "Expected `|'" );
+         return FALSE;
+      }
+      ctx->cur++;
+   }
+
+   if (src->SrcRegisterExtMod.Negate) {
+      eat_opt_white( &ctx->cur );
+      if (*ctx->cur != ')') {
+         report_error( ctx, "Expected `)'" );
+         return FALSE;
+      }
+      ctx->cur++;
+   }
+
    return TRUE;
 }
 
-- 
cgit v1.2.3


From f5c51ebd2afdfc87de40dca115526a5e0f6ab115 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Sun, 13 Jul 2008 15:23:14 +0200
Subject: tgsi: Parse destination operand modulate modifier.

---
 src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 36 ++++++++++-------------------
 src/gallium/auxiliary/tgsi/util/tgsi_text.c | 30 ++++++++++++++++++++++++
 src/gallium/include/pipe/p_shader_tokens.h  |  1 +
 3 files changed, 43 insertions(+), 24 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
index 8220e091993..0cf4454f254 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
@@ -539,6 +539,17 @@ static const char *TGSI_MODULATES[] =
    "MODULATE_EIGHTH"
 };
 
+static const char *TGSI_MODULATES_SHORT[TGSI_MODULATE_COUNT] =
+{
+   "",
+   "_2X",
+   "_4X",
+   "_8X",
+   "_D2",
+   "_D4",
+   "_D8"
+};
+
 void
 tgsi_dump_declaration(
    const struct tgsi_full_declaration  *decl )
@@ -736,30 +747,7 @@ tgsi_dump_instruction(
       SID( dst->DstRegister.Index );
       CHR( ']' );
 
-      switch (dst->DstRegisterExtModulate.Modulate) {
-      case TGSI_MODULATE_1X:
-         break;
-      case TGSI_MODULATE_2X:
-         TXT( "_2X" );
-         break;
-      case TGSI_MODULATE_4X:
-         TXT( "_4X" );
-         break;
-      case TGSI_MODULATE_8X:
-         TXT( "_8X" );
-         break;
-      case TGSI_MODULATE_HALF:
-         TXT( "_D2" );
-         break;
-      case TGSI_MODULATE_QUARTER:
-         TXT( "_D4" );
-         break;
-      case TGSI_MODULATE_EIGHTH:
-         TXT( "_D8" );
-         break;
-      default:
-         assert( 0 );
-      }
+      ENM( dst->DstRegisterExtModulate.Modulate, TGSI_MODULATES_SHORT );
 
       if( dst->DstRegister.WriteMask != TGSI_WRITEMASK_XYZW ) {
          CHR( '.' );
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_text.c b/src/gallium/auxiliary/tgsi/util/tgsi_text.c
index c618a50fb9a..1b283feb4c8 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_text.c
@@ -368,6 +368,17 @@ parse_register_dcl(
    return TRUE;
 }
 
+static const char *modulate_names[TGSI_MODULATE_COUNT] =
+{
+   "_1X",
+   "_2X",
+   "_4X",
+   "_8X",
+   "_D2",
+   "_D4",
+   "_D8"
+};
+
 static boolean
 parse_dst_operand(
    struct translate_ctx *ctx,
@@ -376,11 +387,30 @@ parse_dst_operand(
    uint file;
    uint index;
    uint writemask;
+   const char *cur;
 
    if (!parse_register( ctx, &file, &index ))
       return FALSE;
+
+   cur = ctx->cur;
+   eat_opt_white( &cur );
+   if (*cur == '_') {
+      uint i;
+
+      for (i = 0; i < TGSI_MODULATE_COUNT; i++) {
+         if (str_match_no_case( &cur, modulate_names[i] )) {
+            if (!is_digit_alpha_underscore( cur )) {
+               dst->DstRegisterExtModulate.Modulate = i;
+               ctx->cur = cur;
+               break;
+            }
+         }
+      }
+   }
+
    if (!parse_opt_writemask( ctx, &writemask ))
       return FALSE;
+
    dst->DstRegister.File = file;
    dst->DstRegister.Index = index;
    dst->DstRegister.WriteMask = writemask;
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index c8e2830516a..ed931d24b99 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -743,6 +743,7 @@ struct tgsi_dst_register_ext_concode
 #define TGSI_MODULATE_HALF      4
 #define TGSI_MODULATE_QUARTER   5
 #define TGSI_MODULATE_EIGHTH    6
+#define TGSI_MODULATE_COUNT     7
 
 struct tgsi_dst_register_ext_modulate
 {
-- 
cgit v1.2.3


From 6eb7f763fbbbb7a32640760cd5d122020866fea1 Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Mon, 14 Jul 2008 18:08:52 -0600
Subject: tgsi: fix bug in execution of loops inside of conditionals.

Fixes infinite loop bug.
---
 src/gallium/auxiliary/tgsi/exec/tgsi_exec.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
index 46949661aff..001a4c4b152 100644
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
@@ -2400,7 +2400,8 @@ exec_instruction(
       /* Restore ContMask, but don't pop */
       assert(mach->ContStackTop > 0);
       mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
-      if (mach->LoopMask) {
+      UPDATE_EXEC_MASK(mach);
+      if (mach->ExecMask) {
          /* repeat loop: jump to instruction just past BGNLOOP */
          *pc = inst->InstructionExtLabel.Label + 1;
       }
-- 
cgit v1.2.3


From c60e009a9138a573d2219ee2ad85e3203b09a7ae Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Tue, 15 Jul 2008 10:56:30 +0200
Subject: tgsi: Numerical label before an instruction is optional.

---
 src/gallium/auxiliary/tgsi/util/tgsi_text.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_text.c b/src/gallium/auxiliary/tgsi/util/tgsi_text.c
index 1b283feb4c8..6cab475b889 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_text.c
@@ -740,7 +740,10 @@ static const char *texture_names[TGSI_TEXTURE_COUNT] =
    "SHADOWRECT"
 };
 
-static boolean parse_instruction( struct translate_ctx *ctx )
+static boolean
+parse_instruction(
+   struct translate_ctx *ctx,
+   boolean has_label )
 {
    uint i;
    uint saturate = TGSI_SAT_NONE;
@@ -767,7 +770,10 @@ static boolean parse_instruction( struct translate_ctx *ctx )
       }
    }
    if (i == TGSI_OPCODE_LAST) {
-      report_error( ctx, "Unknown opcode" );
+      if (has_label)
+         report_error( ctx, "Unknown opcode" );
+      else
+         report_error( ctx, "Expected `DCL', `IMM' or a label" );
       return FALSE;
    }
    info = &opcode_info[i];
@@ -1025,7 +1031,7 @@ static boolean translate( struct translate_ctx *ctx )
       }
 
       if (parse_label( ctx, &label_val )) {
-         if (!parse_instruction( ctx ))
+         if (!parse_instruction( ctx, TRUE ))
             return FALSE;
       }
       else if (str_match_no_case( &ctx->cur, "DCL" )) {
@@ -1036,8 +1042,7 @@ static boolean translate( struct translate_ctx *ctx )
          if (!parse_immediate( ctx ))
             return FALSE;
       }
-      else {
-         report_error( ctx, "Expected `DCL', `IMM' or a label" );
+      else if (!parse_instruction( ctx, FALSE )) {
          return FALSE;
       }
    }
-- 
cgit v1.2.3


From 2727702b1731a478de8806481416080d02af5862 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Thu, 17 Jul 2008 20:19:40 +0200
Subject: tgsi: New file.

---
 src/gallium/auxiliary/tgsi/util/tgsi_sanity.c | 258 ++++++++++++++++++++++++++
 src/gallium/auxiliary/tgsi/util/tgsi_sanity.h |  49 +++++
 2 files changed, 307 insertions(+)
 create mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_sanity.c
 create mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_sanity.h

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c
new file mode 100644
index 00000000000..377309c3fd0
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c
@@ -0,0 +1,258 @@
+/**************************************************************************
+ * 
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "pipe/p_debug.h"
+#include "tgsi_sanity.h"
+#include "tgsi_transform.h"
+
+#define MAX_REGISTERS 256
+
+typedef uint reg_flag;
+
+#define BITS_IN_REG_FLAG (sizeof( reg_flag ) * 8)
+
+struct sanity_check_ctx
+{
+   struct tgsi_transform_context xform;
+
+   reg_flag regs_decl[TGSI_FILE_COUNT][MAX_REGISTERS / sizeof( uint ) / 8];
+   reg_flag regs_used[TGSI_FILE_COUNT][MAX_REGISTERS / sizeof( uint ) / 8];
+   uint num_imms;
+   uint num_instructions;
+   uint index_of_END;
+
+   uint errors;
+   uint warnings;
+};
+
+static void
+report_error(
+   struct sanity_check_ctx *ctx,
+   const char *msg )
+{
+   debug_printf( "\nError: %s", msg );
+   ctx->errors++;
+}
+
+static void
+report_warning(
+   struct sanity_check_ctx *ctx,
+   const char *msg )
+{
+   debug_printf( "\nWarning: %s", msg );
+   ctx->warnings++;
+}
+
+static boolean
+check_file_name(
+   struct sanity_check_ctx *ctx,
+   uint file )
+{
+   if (file <= TGSI_FILE_NULL || file >= TGSI_FILE_COUNT) {
+      report_error( ctx, "Invalid file name" );
+      return FALSE;
+   }
+   return TRUE;
+}
+
+static boolean
+is_register_declared(
+   struct sanity_check_ctx *ctx,
+   uint file,
+   uint index )
+{
+   assert( index < MAX_REGISTERS );
+
+   return (ctx->regs_decl[file][index / BITS_IN_REG_FLAG] & (1 << (index % BITS_IN_REG_FLAG))) ? TRUE : FALSE;
+}
+
+static boolean
+is_register_used(
+   struct sanity_check_ctx *ctx,
+   uint file,
+   uint index )
+{
+   assert( index < MAX_REGISTERS );
+
+   return (ctx->regs_used[file][index / BITS_IN_REG_FLAG] & (1 << (index % BITS_IN_REG_FLAG))) ? TRUE : FALSE;
+}
+
+static void xform_instruction(
+   struct tgsi_transform_context *xform,
+   struct tgsi_full_instruction *inst )
+{
+   struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) xform;
+   uint i;
+
+   /* There must be no other instructions after END.
+    */
+   if (ctx->index_of_END != ~0) {
+      report_error( ctx, "Unexpected instruction after END" );
+   }
+   else if (inst->Instruction.Opcode == TGSI_OPCODE_END) {
+      ctx->index_of_END = ctx->num_instructions;
+   }
+
+   /* Check destination and source registers' validity.
+    * Mark the registers as used.
+    */
+   for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
+      uint file;
+      uint index;
+
+      file = inst->FullDstRegisters[i].DstRegister.File;
+      if (!check_file_name( ctx, file ))
+         return;
+      index = inst->FullDstRegisters[i].DstRegister.Index;
+      if (!is_register_declared( ctx, file, index ))
+         report_error( ctx, "Undeclared destination register" );
+      ctx->regs_used[file][index / BITS_IN_REG_FLAG] |= (1 << (index % BITS_IN_REG_FLAG));
+   }
+   for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
+      uint file;
+      uint index;
+
+      file = inst->FullSrcRegisters[i].SrcRegister.File;
+      if (!check_file_name( ctx, file ))
+         return;
+      index = inst->FullSrcRegisters[i].SrcRegister.Index;
+      if (!is_register_declared( ctx, file, index ))
+         report_error( ctx, "Undeclared source register" );
+      ctx->regs_used[file][index / BITS_IN_REG_FLAG] |= (1 << (index % BITS_IN_REG_FLAG));
+   }
+
+   ctx->num_instructions++;
+}
+
+static void xform_declaration(
+   struct tgsi_transform_context *xform,
+   struct tgsi_full_declaration *decl )
+{
+   struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) xform;
+   uint file;
+   uint i;
+
+   /* No declarations allowed after the first instruction.
+    */
+   if (ctx->num_instructions > 0)
+      report_error( ctx, "Instruction expected but declaration found" );
+
+   /* Check registers' validity.
+    * Mark the registers as declared.
+    */
+   file = decl->Declaration.File;
+   if (!check_file_name( ctx, file ))
+      return;
+   for (i = decl->DeclarationRange.First; i <= decl->DeclarationRange.Last; i++) {
+      if (is_register_declared( ctx, file, i ))
+         report_error( ctx, "The same register declared twice" );
+      ctx->regs_decl[file][i / BITS_IN_REG_FLAG] |= (1 << (i % BITS_IN_REG_FLAG));
+   }
+}
+
+static void xform_immediate(
+   struct tgsi_transform_context *xform,
+   struct tgsi_full_immediate *imm )
+{
+   struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) xform;
+
+   assert( ctx->num_imms < MAX_REGISTERS );
+
+   /* No immediates allowed after the first instruction.
+    */
+   if (ctx->num_instructions > 0)
+      report_error( ctx, "Instruction expected but immediate found" );
+
+   /* Mark the register as declared.
+    */
+   ctx->num_imms++;
+   ctx->regs_decl[TGSI_FILE_IMMEDIATE][ctx->num_imms / BITS_IN_REG_FLAG] |= (1 << (ctx->num_imms % BITS_IN_REG_FLAG));
+
+   /* Check data type validity.
+    */
+   if (imm->Immediate.DataType != TGSI_IMM_FLOAT32) {
+      report_error( ctx, "Invalid immediate data type" );
+      return;
+   }
+}
+
+static void epilog(
+   struct tgsi_transform_context *xform )
+{
+   struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) xform;
+   uint file;
+
+   /* There must be an END instruction at the end.
+    */
+   if (ctx->index_of_END == ~0 || ctx->index_of_END != ctx->num_instructions - 1) {
+      report_error( ctx, "Expected END at end of instruction sequence" );
+   }
+
+   /* Check if all declared registers were used.
+    */
+   for (file = TGSI_FILE_NULL; file < TGSI_FILE_COUNT; file++) {
+      uint i;
+
+      for (i = 0; i < MAX_REGISTERS; i++) {
+         if (is_register_declared( ctx, file, i ) && !is_register_used( ctx, file, i )) {
+            report_warning( ctx, "Register never used" );
+         }
+      }
+   }
+
+   /* Print totals, if any.
+    */
+   if (ctx->errors || ctx->warnings)
+      debug_printf( "\n%u errors, %u warnings", ctx->errors, ctx->warnings );
+}
+
+boolean
+tgsi_sanity_check(
+   struct tgsi_token *tokens )
+{
+   struct sanity_check_ctx ctx;
+   struct tgsi_token dummy_tokens[16];
+
+   ctx.xform.transform_instruction = xform_instruction;
+   ctx.xform.transform_declaration = xform_declaration;
+   ctx.xform.transform_immediate = xform_immediate;
+   ctx.xform.epilog = epilog;
+
+   memset( ctx.regs_decl, 0, sizeof( ctx.regs_decl ) );
+   memset( ctx.regs_used, 0, sizeof( ctx.regs_used ) );
+   ctx.num_imms = 0;
+   ctx.num_instructions = 0;
+   ctx.index_of_END = ~0;
+
+   ctx.errors = 0;
+   ctx.warnings = 0;
+
+   if (tgsi_transform_shader( tokens, dummy_tokens, 16, &ctx.xform ) == -1)
+      return FALSE;
+
+   return ctx.errors > 0;
+}
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.h b/src/gallium/auxiliary/tgsi/util/tgsi_sanity.h
new file mode 100644
index 00000000000..ca45e94c7ad
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_sanity.h
@@ -0,0 +1,49 @@
+/**************************************************************************
+ * 
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef TGSI_SANITY_H
+#define TGSI_SANITY_H
+
+#include "pipe/p_shader_tokens.h"
+
+#if defined __cplusplus
+extern "C" {
+#endif
+
+/* Check the given token stream for errors and common mistakes.
+ * Diagnostic messages are printed out to the debug output.
+ * Returns TRUE if there are no errors, even though there could be some warnings.
+ */
+boolean
+tgsi_sanity_check(
+   struct tgsi_token *tokens );
+
+#if defined __cplusplus
+}
+#endif
+
+#endif /* TGSI_SANITY_H */
-- 
cgit v1.2.3


From 3c5ec98e45880d6a896f3c094f8004000f50a149 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Thu, 17 Jul 2008 20:22:47 +0200
Subject: scons: List util/tgsi_sanity.c.

---
 src/gallium/auxiliary/tgsi/SConscript | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/SConscript b/src/gallium/auxiliary/tgsi/SConscript
index b62c8efe027..67230f6ce03 100644
--- a/src/gallium/auxiliary/tgsi/SConscript
+++ b/src/gallium/auxiliary/tgsi/SConscript
@@ -8,6 +8,7 @@ tgsi = env.ConvenienceLibrary(
 		'util/tgsi_build.c',
 		'util/tgsi_dump.c',
 		'util/tgsi_parse.c',
+		'util/tgsi_sanity.c',
 		'util/tgsi_scan.c',
 		'util/tgsi_text.c',
 		'util/tgsi_transform.c',
-- 
cgit v1.2.3


From 10d1dc68a413eaf642bf1bda2e5452835f7b7050 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Thu, 17 Jul 2008 20:23:26 +0200
Subject: tgsi: Perform a sanity check after reading a shader from text.

---
 src/gallium/auxiliary/tgsi/util/tgsi_text.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_text.c b/src/gallium/auxiliary/tgsi/util/tgsi_text.c
index 6cab475b889..803f7a23b8e 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_text.c
@@ -29,6 +29,7 @@
 #include "tgsi_text.h"
 #include "tgsi_build.h"
 #include "tgsi_parse.h"
+#include "tgsi_sanity.h"
 #include "tgsi_util.h"
 
 static boolean is_alpha_underscore( const char *cur )
@@ -1064,5 +1065,8 @@ tgsi_text_translate(
    ctx.tokens_cur = tokens;
    ctx.tokens_end = tokens + num_tokens;
 
-   return translate( &ctx );
+   if (!translate( &ctx ))
+      return FALSE;
+
+   return tgsi_sanity_check( tokens );
 }
-- 
cgit v1.2.3


From f2053cfa1ac4e4e2e0083670aac5df766adad5f9 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Thu, 17 Jul 2008 20:40:13 +0200
Subject: tgsi: Fix parsing an instruction with no operands.

---
 src/gallium/auxiliary/tgsi/util/tgsi_text.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_text.c b/src/gallium/auxiliary/tgsi/util/tgsi_text.c
index 803f7a23b8e..9ed0f52fc7c 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_text.c
@@ -758,13 +758,20 @@ parse_instruction(
    for (i = 0; i < TGSI_OPCODE_LAST; i++) {
       const char *cur = ctx->cur;
 
-      if (str_match_no_case( &cur, opcode_info[i].mnemonic )) {
+      info = &opcode_info[i];
+      if (str_match_no_case( &cur, info->mnemonic )) {
          if (str_match_no_case( &cur, "_SATNV" ))
             saturate = TGSI_SAT_MINUS_PLUS_ONE;
          else if (str_match_no_case( &cur, "_SAT" ))
             saturate = TGSI_SAT_ZERO_ONE;
 
-         if (*cur == '\0' || eat_white( &cur )) {
+         if (info->num_dst + info->num_src + info->is_tex == 0) {
+            if (!is_digit_alpha_underscore( cur )) {
+               ctx->cur = cur;
+               break;
+            }
+         }
+         else if (*cur == '\0' || eat_white( &cur )) {
             ctx->cur = cur;
             break;
          }
@@ -777,7 +784,6 @@ parse_instruction(
          report_error( ctx, "Expected `DCL', `IMM' or a label" );
       return FALSE;
    }
-   info = &opcode_info[i];
 
    inst = tgsi_default_full_instruction();
    inst.Instruction.Opcode = i;
@@ -1031,6 +1037,9 @@ static boolean translate( struct translate_ctx *ctx )
          return FALSE;
       }
 
+      if (*ctx->cur == '\0')
+         break;
+
       if (parse_label( ctx, &label_val )) {
          if (!parse_instruction( ctx, TRUE ))
             return FALSE;
-- 
cgit v1.2.3


From 638ecbda3eed8319ce82be9c119ca1f96f21d976 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Fri, 18 Jul 2008 00:39:23 +0200
Subject: tgsi: Add tgsi_iterate_shader utility.

Walks the token stream and invokes callbacks.
---
 src/gallium/auxiliary/tgsi/util/tgsi_iterate.c | 82 ++++++++++++++++++++++++++
 src/gallium/auxiliary/tgsi/util/tgsi_iterate.h | 73 +++++++++++++++++++++++
 2 files changed, 155 insertions(+)
 create mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_iterate.c
 create mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_iterate.h

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_iterate.c b/src/gallium/auxiliary/tgsi/util/tgsi_iterate.c
new file mode 100644
index 00000000000..43d7a6b1d59
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_iterate.c
@@ -0,0 +1,82 @@
+/**************************************************************************
+ * 
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "pipe/p_debug.h"
+#include "tgsi_iterate.h"
+
+boolean
+tgsi_iterate_shader(
+   const struct tgsi_token *tokens,
+   struct tgsi_iterate_context *ctx )
+{
+   struct tgsi_parse_context parse;
+
+   if (tgsi_parse_init( &parse, tokens ) != TGSI_PARSE_OK)
+      return FALSE;
+
+   if (ctx->prolog)
+      if (!ctx->prolog( ctx ))
+         goto fail;
+
+   while (!tgsi_parse_end_of_tokens( &parse )) {
+      tgsi_parse_token( &parse );
+
+      switch (parse.FullToken.Token.Type) {
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+         if (ctx->iterate_instruction)
+            if (!ctx->iterate_instruction( ctx, &parse.FullToken.FullInstruction ))
+               goto fail;
+         break;
+
+      case TGSI_TOKEN_TYPE_DECLARATION:
+         if (ctx->iterate_declaration)
+            if (!ctx->iterate_declaration( ctx, &parse.FullToken.FullDeclaration ))
+               goto fail;
+         break;
+
+      case TGSI_TOKEN_TYPE_IMMEDIATE:
+         if (ctx->iterate_immediate)
+            if (!ctx->iterate_immediate( ctx, &parse.FullToken.FullImmediate ))
+               goto fail;
+         break;
+
+      default:
+         assert( 0 );
+      }
+   }
+
+   if (ctx->epilog)
+      if (!ctx->epilog( ctx ))
+         goto fail;
+
+   tgsi_parse_free( &parse );
+   return TRUE;
+
+fail:
+   tgsi_parse_free( &parse );
+   return FALSE;
+}
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_iterate.h b/src/gallium/auxiliary/tgsi/util/tgsi_iterate.h
new file mode 100644
index 00000000000..18729c2d1a8
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_iterate.h
@@ -0,0 +1,73 @@
+/**************************************************************************
+ * 
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef TGSI_ITERATE_H
+#define TGSI_ITERATE_H
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/util/tgsi_parse.h"
+
+#if defined __cplusplus
+extern "C" {
+#endif
+
+struct tgsi_iterate_context
+{
+   boolean
+   (* prolog)(
+      struct tgsi_iterate_context *ctx );
+
+   boolean
+   (* iterate_instruction)(
+      struct tgsi_iterate_context *ctx,
+      struct tgsi_full_instruction *inst );
+
+   boolean
+   (* iterate_declaration)(
+      struct tgsi_iterate_context *ctx,
+      struct tgsi_full_declaration *decl );
+
+   boolean
+   (* iterate_immediate)(
+      struct tgsi_iterate_context *ctx,
+      struct tgsi_full_immediate *imm );
+
+   boolean
+   (* epilog)(
+      struct tgsi_iterate_context *ctx );
+};
+
+boolean
+tgsi_iterate_shader(
+   const struct tgsi_token *tokens,
+   struct tgsi_iterate_context *ctx );
+
+#if defined __cplusplus
+}
+#endif
+
+#endif /* TGSI_ITERATE_H */
-- 
cgit v1.2.3


From e1fd3ea415db1bc80d0bc6d94645eaac60bb4121 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Fri, 18 Jul 2008 00:40:28 +0200
Subject: scons: List util/tgsi_iterate.c.

---
 src/gallium/auxiliary/tgsi/SConscript | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/SConscript b/src/gallium/auxiliary/tgsi/SConscript
index 67230f6ce03..54f5c75db47 100644
--- a/src/gallium/auxiliary/tgsi/SConscript
+++ b/src/gallium/auxiliary/tgsi/SConscript
@@ -7,6 +7,7 @@ tgsi = env.ConvenienceLibrary(
 		'exec/tgsi_sse2.c',
 		'util/tgsi_build.c',
 		'util/tgsi_dump.c',
+		'util/tgsi_iterate.c',
 		'util/tgsi_parse.c',
 		'util/tgsi_sanity.c',
 		'util/tgsi_scan.c',
-- 
cgit v1.2.3


From 1e5419fa3061386413a98b75d0908cb3e3c6894e Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Fri, 18 Jul 2008 00:41:45 +0200
Subject: tgsi: Express tgsi_sanity in terms of tgsi_iterate.

---
 src/gallium/auxiliary/tgsi/util/tgsi_sanity.c | 60 ++++++++++++++++-----------
 1 file changed, 36 insertions(+), 24 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c
index 377309c3fd0..933127e661b 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c
@@ -27,7 +27,7 @@
 
 #include "pipe/p_debug.h"
 #include "tgsi_sanity.h"
-#include "tgsi_transform.h"
+#include "tgsi_iterate.h"
 
 #define MAX_REGISTERS 256
 
@@ -37,7 +37,7 @@ typedef uint reg_flag;
 
 struct sanity_check_ctx
 {
-   struct tgsi_transform_context xform;
+   struct tgsi_iterate_context iter;
 
    reg_flag regs_decl[TGSI_FILE_COUNT][MAX_REGISTERS / sizeof( uint ) / 8];
    reg_flag regs_used[TGSI_FILE_COUNT][MAX_REGISTERS / sizeof( uint ) / 8];
@@ -101,11 +101,12 @@ is_register_used(
    return (ctx->regs_used[file][index / BITS_IN_REG_FLAG] & (1 << (index % BITS_IN_REG_FLAG))) ? TRUE : FALSE;
 }
 
-static void xform_instruction(
-   struct tgsi_transform_context *xform,
+static boolean
+iter_instruction(
+   struct tgsi_iterate_context *iter,
    struct tgsi_full_instruction *inst )
 {
-   struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) xform;
+   struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter;
    uint i;
 
    /* There must be no other instructions after END.
@@ -126,7 +127,7 @@ static void xform_instruction(
 
       file = inst->FullDstRegisters[i].DstRegister.File;
       if (!check_file_name( ctx, file ))
-         return;
+         return TRUE;
       index = inst->FullDstRegisters[i].DstRegister.Index;
       if (!is_register_declared( ctx, file, index ))
          report_error( ctx, "Undeclared destination register" );
@@ -138,7 +139,7 @@ static void xform_instruction(
 
       file = inst->FullSrcRegisters[i].SrcRegister.File;
       if (!check_file_name( ctx, file ))
-         return;
+         return TRUE;
       index = inst->FullSrcRegisters[i].SrcRegister.Index;
       if (!is_register_declared( ctx, file, index ))
          report_error( ctx, "Undeclared source register" );
@@ -146,13 +147,16 @@ static void xform_instruction(
    }
 
    ctx->num_instructions++;
+
+   return TRUE;
 }
 
-static void xform_declaration(
-   struct tgsi_transform_context *xform,
+static boolean
+iter_declaration(
+   struct tgsi_iterate_context *iter,
    struct tgsi_full_declaration *decl )
 {
-   struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) xform;
+   struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter;
    uint file;
    uint i;
 
@@ -166,19 +170,22 @@ static void xform_declaration(
     */
    file = decl->Declaration.File;
    if (!check_file_name( ctx, file ))
-      return;
+      return TRUE;
    for (i = decl->DeclarationRange.First; i <= decl->DeclarationRange.Last; i++) {
       if (is_register_declared( ctx, file, i ))
          report_error( ctx, "The same register declared twice" );
       ctx->regs_decl[file][i / BITS_IN_REG_FLAG] |= (1 << (i % BITS_IN_REG_FLAG));
    }
+
+   return TRUE;
 }
 
-static void xform_immediate(
-   struct tgsi_transform_context *xform,
+static boolean
+iter_immediate(
+   struct tgsi_iterate_context *iter,
    struct tgsi_full_immediate *imm )
 {
-   struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) xform;
+   struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter;
 
    assert( ctx->num_imms < MAX_REGISTERS );
 
@@ -196,14 +203,17 @@ static void xform_immediate(
     */
    if (imm->Immediate.DataType != TGSI_IMM_FLOAT32) {
       report_error( ctx, "Invalid immediate data type" );
-      return;
+      return TRUE;
    }
+
+   return TRUE;
 }
 
-static void epilog(
-   struct tgsi_transform_context *xform )
+static boolean
+epilog(
+   struct tgsi_iterate_context *iter )
 {
-   struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) xform;
+   struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter;
    uint file;
 
    /* There must be an END instruction at the end.
@@ -228,6 +238,8 @@ static void epilog(
     */
    if (ctx->errors || ctx->warnings)
       debug_printf( "\n%u errors, %u warnings", ctx->errors, ctx->warnings );
+
+   return TRUE;
 }
 
 boolean
@@ -235,12 +247,12 @@ tgsi_sanity_check(
    struct tgsi_token *tokens )
 {
    struct sanity_check_ctx ctx;
-   struct tgsi_token dummy_tokens[16];
 
-   ctx.xform.transform_instruction = xform_instruction;
-   ctx.xform.transform_declaration = xform_declaration;
-   ctx.xform.transform_immediate = xform_immediate;
-   ctx.xform.epilog = epilog;
+   ctx.iter.prolog = NULL;
+   ctx.iter.iterate_instruction = iter_instruction;
+   ctx.iter.iterate_declaration = iter_declaration;
+   ctx.iter.iterate_immediate = iter_immediate;
+   ctx.iter.epilog = epilog;
 
    memset( ctx.regs_decl, 0, sizeof( ctx.regs_decl ) );
    memset( ctx.regs_used, 0, sizeof( ctx.regs_used ) );
@@ -251,7 +263,7 @@ tgsi_sanity_check(
    ctx.errors = 0;
    ctx.warnings = 0;
 
-   if (tgsi_transform_shader( tokens, dummy_tokens, 16, &ctx.xform ) == -1)
+   if (tgsi_iterate_shader( tokens, &ctx.iter ) == -1)
       return FALSE;
 
    return ctx.errors > 0;
-- 
cgit v1.2.3


From ff26c50153b3a348b35843262ceb27062ab37214 Mon Sep 17 00:00:00 2001
From: José Fonseca <jrfonseca@tungstengraphics.com>
Date: Sat, 19 Jul 2008 11:52:41 +0900
Subject: tgsi: Make tgsi_sanity return TRUE on success as documented.

---
 src/gallium/auxiliary/tgsi/util/tgsi_sanity.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c
index 933127e661b..7fc4c29c685 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c
@@ -266,5 +266,5 @@ tgsi_sanity_check(
    if (tgsi_iterate_shader( tokens, &ctx.iter ) == -1)
       return FALSE;
 
-   return ctx.errors > 0;
+   return ctx.errors == 0;
 }
-- 
cgit v1.2.3


From 5ded4ffc506eb051b151d3e8b1e71b13576e951a Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Sun, 20 Jul 2008 16:01:50 +0200
Subject: tgsi: Split tgsi_dump into two modules.

---
 src/gallium/auxiliary/tgsi/util/tgsi_dump.c   | 791 +-----------------------
 src/gallium/auxiliary/tgsi/util/tgsi_dump.h   |  17 +-
 src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c | 853 ++++++++++++++++++++++++++
 src/gallium/auxiliary/tgsi/util/tgsi_dump_c.h |  49 ++
 4 files changed, 916 insertions(+), 794 deletions(-)
 create mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c
 create mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_dump_c.h

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
index 0cf4454f254..5a84b99166b 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
@@ -1,6 +1,6 @@
 /**************************************************************************
  * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
  * All Rights Reserved.
  * 
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -27,7 +27,6 @@
 
 #include "pipe/p_debug.h"
 #include "pipe/p_util.h"
-#include "pipe/p_shader_tokens.h"
 #include "util/u_string.h"
 #include "tgsi_dump.h"
 #include "tgsi_parse.h"
@@ -56,13 +55,6 @@ dump_enum(
 #define FLT(F)          debug_printf( "%10.4f", F )
 #define ENM(E,ENUMS)    dump_enum( E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) )
 
-static const char *TGSI_PROCESSOR_TYPES[] =
-{
-   "PROCESSOR_FRAGMENT",
-   "PROCESSOR_VERTEX",
-   "PROCESSOR_GEOMETRY"
-};
-
 static const char *TGSI_PROCESSOR_TYPES_SHORT[] =
 {
    "FRAG",
@@ -70,25 +62,6 @@ static const char *TGSI_PROCESSOR_TYPES_SHORT[] =
    "GEOM"
 };
 
-static const char *TGSI_TOKEN_TYPES[] =
-{
-   "TOKEN_TYPE_DECLARATION",
-   "TOKEN_TYPE_IMMEDIATE",
-   "TOKEN_TYPE_INSTRUCTION"
-};
-
-static const char *TGSI_FILES[] =
-{
-   "FILE_NULL",
-   "FILE_CONSTANT",
-   "FILE_INPUT",
-   "FILE_OUTPUT",
-   "FILE_TEMPORARY",
-   "FILE_SAMPLER",
-   "FILE_ADDRESS",
-   "FILE_IMMEDIATE"
-};
-
 static const char *TGSI_FILES_SHORT[] =
 {
    "NULL",
@@ -101,20 +74,6 @@ static const char *TGSI_FILES_SHORT[] =
    "IMM"
 };
 
-static const char *TGSI_DECLARES[] =
-{
-   "DECLARE_RANGE",
-   "DECLARE_MASK"
-};
-
-static const char *TGSI_INTERPOLATES[] =
-{
-   "INTERPOLATE_CONSTANT",
-   "INTERPOLATE_LINEAR",
-   "INTERPOLATE_PERSPECTIVE",
-   "INTERPOLATE_ATTRIB"
-};
-
 static const char *TGSI_INTERPOLATES_SHORT[] =
 {
    "CONSTANT",
@@ -122,17 +81,6 @@ static const char *TGSI_INTERPOLATES_SHORT[] =
    "PERSPECTIVE"
 };
 
-static const char *TGSI_SEMANTICS[] =
-{
-   "SEMANTIC_POSITION",
-   "SEMANTIC_COLOR",
-   "SEMANTIC_BCOLOR",
-   "SEMANTIC_FOG",
-   "SEMANTIC_PSIZE",
-   "SEMANTIC_GENERIC",
-   "SEMANTIC_NORMAL"
-};
-
 static const char *TGSI_SEMANTICS_SHORT[] =
 {
    "POSITION",
@@ -144,138 +92,11 @@ static const char *TGSI_SEMANTICS_SHORT[] =
    "NORMAL"
 };
 
-static const char *TGSI_IMMS[] =
-{
-   "IMM_FLOAT32"
-};
-
 static const char *TGSI_IMMS_SHORT[] =
 {
    "FLT32"
 };
 
-static const char *TGSI_OPCODES[TGSI_OPCODE_LAST] =
-{
-   "OPCODE_ARL",
-   "OPCODE_MOV",
-   "OPCODE_LIT",
-   "OPCODE_RCP",
-   "OPCODE_RSQ",
-   "OPCODE_EXP",
-   "OPCODE_LOG",
-   "OPCODE_MUL",
-   "OPCODE_ADD",
-   "OPCODE_DP3",
-   "OPCODE_DP4",
-   "OPCODE_DST",
-   "OPCODE_MIN",
-   "OPCODE_MAX",
-   "OPCODE_SLT",
-   "OPCODE_SGE",
-   "OPCODE_MAD",
-   "OPCODE_SUB",
-   "OPCODE_LERP",
-   "OPCODE_CND",
-   "OPCODE_CND0",
-   "OPCODE_DOT2ADD",
-   "OPCODE_INDEX",
-   "OPCODE_NEGATE",
-   "OPCODE_FRAC",
-   "OPCODE_CLAMP",
-   "OPCODE_FLOOR",
-   "OPCODE_ROUND",
-   "OPCODE_EXPBASE2",
-   "OPCODE_LOGBASE2",
-   "OPCODE_POWER",
-   "OPCODE_CROSSPRODUCT",
-   "OPCODE_MULTIPLYMATRIX",
-   "OPCODE_ABS",
-   "OPCODE_RCC",
-   "OPCODE_DPH",
-   "OPCODE_COS",
-   "OPCODE_DDX",
-   "OPCODE_DDY",
-   "OPCODE_KILP",
-   "OPCODE_PK2H",
-   "OPCODE_PK2US",
-   "OPCODE_PK4B",
-   "OPCODE_PK4UB",
-   "OPCODE_RFL",
-   "OPCODE_SEQ",
-   "OPCODE_SFL",
-   "OPCODE_SGT",
-   "OPCODE_SIN",
-   "OPCODE_SLE",
-   "OPCODE_SNE",
-   "OPCODE_STR",
-   "OPCODE_TEX",
-   "OPCODE_TXD",
-   "OPCODE_TXP",
-   "OPCODE_UP2H",
-   "OPCODE_UP2US",
-   "OPCODE_UP4B",
-   "OPCODE_UP4UB",
-   "OPCODE_X2D",
-   "OPCODE_ARA",
-   "OPCODE_ARR",
-   "OPCODE_BRA",
-   "OPCODE_CAL",
-   "OPCODE_RET",
-   "OPCODE_SSG",
-   "OPCODE_CMP",
-   "OPCODE_SCS",
-   "OPCODE_TXB",
-   "OPCODE_NRM",
-   "OPCODE_DIV",
-   "OPCODE_DP2",
-   "OPCODE_TXL",
-   "OPCODE_BRK",
-   "OPCODE_IF",
-   "OPCODE_LOOP",
-   "OPCODE_REP",
-   "OPCODE_ELSE",
-   "OPCODE_ENDIF",
-   "OPCODE_ENDLOOP",
-   "OPCODE_ENDREP",
-   "OPCODE_PUSHA",
-   "OPCODE_POPA",
-   "OPCODE_CEIL",
-   "OPCODE_I2F",
-   "OPCODE_NOT",
-   "OPCODE_TRUNC",
-   "OPCODE_SHL",
-   "OPCODE_SHR",
-   "OPCODE_AND",
-   "OPCODE_OR",
-   "OPCODE_MOD",
-   "OPCODE_XOR",
-   "OPCODE_SAD",
-   "OPCODE_TXF",
-   "OPCODE_TXQ",
-   "OPCODE_CONT",
-   "OPCODE_EMIT",
-   "OPCODE_ENDPRIM",
-   "OPCODE_BGNLOOP2",
-   "OPCODE_BGNSUB",
-   "OPCODE_ENDLOOP2",
-   "OPCODE_ENDSUB",
-   "OPCODE_NOISE1",
-   "OPCODE_NOISE2",
-   "OPCODE_NOISE3",
-   "OPCODE_NOISE4",
-   "OPCODE_NOP",
-   "OPCODE_M4X3",
-   "OPCODE_M3X4",
-   "OPCODE_M3X3",
-   "OPCODE_M3X2",
-   "OPCODE_NRM4",
-   "OPCODE_CALLNZ",
-   "OPCODE_IFC",
-   "OPCODE_BREAKC",
-   "OPCODE_KIL",
-   "OPCODE_END"
-};
-
 static const char *TGSI_OPCODES_SHORT[TGSI_OPCODE_LAST] =
 {
    "ARL",
@@ -399,49 +220,6 @@ static const char *TGSI_OPCODES_SHORT[TGSI_OPCODE_LAST] =
    "SWZ"
 };
 
-static const char *TGSI_SATS[] =
-{
-   "SAT_NONE",
-   "SAT_ZERO_ONE",
-   "SAT_MINUS_PLUS_ONE"
-};
-
-static const char *TGSI_INSTRUCTION_EXTS[] =
-{
-   "INSTRUCTION_EXT_TYPE_NV",
-   "INSTRUCTION_EXT_TYPE_LABEL",
-   "INSTRUCTION_EXT_TYPE_TEXTURE"
-};
-
-static const char *TGSI_PRECISIONS[] =
-{
-   "PRECISION_DEFAULT",
-   "TGSI_PRECISION_FLOAT32",
-   "TGSI_PRECISION_FLOAT16",
-   "TGSI_PRECISION_FIXED12"
-};
-
-static const char *TGSI_CCS[] =
-{
-   "CC_GT",
-   "CC_EQ",
-   "CC_LT",
-   "CC_UN",
-   "CC_GE",
-   "CC_LE",
-   "CC_NE",
-   "CC_TR",
-   "CC_FL"
-};
-
-static const char *TGSI_SWIZZLES[] =
-{
-   "SWIZZLE_X",
-   "SWIZZLE_Y",
-   "SWIZZLE_Z",
-   "SWIZZLE_W"
-};
-
 static const char *TGSI_SWIZZLES_SHORT[] =
 {
    "x",
@@ -450,19 +228,6 @@ static const char *TGSI_SWIZZLES_SHORT[] =
    "w"
 };
 
-static const char *TGSI_TEXTURES[] =
-{
-   "TEXTURE_UNKNOWN",
-   "TEXTURE_1D",
-   "TEXTURE_2D",
-   "TEXTURE_3D",
-   "TEXTURE_CUBE",
-   "TEXTURE_RECT",
-   "TEXTURE_SHADOW1D",
-   "TEXTURE_SHADOW2D",
-   "TEXTURE_SHADOWRECT"
-};
-
 static const char *TGSI_TEXTURES_SHORT[] =
 {
    "UNKNOWN",
@@ -476,22 +241,6 @@ static const char *TGSI_TEXTURES_SHORT[] =
    "SHADOWRECT"
 };
 
-static const char *TGSI_SRC_REGISTER_EXTS[] =
-{
-   "SRC_REGISTER_EXT_TYPE_SWZ",
-   "SRC_REGISTER_EXT_TYPE_MOD"
-};
-
-static const char *TGSI_EXTSWIZZLES[] =
-{
-   "EXTSWIZZLE_X",
-   "EXTSWIZZLE_Y",
-   "EXTSWIZZLE_Z",
-   "EXTSWIZZLE_W",
-   "EXTSWIZZLE_ZERO",
-   "EXTSWIZZLE_ONE"
-};
-
 static const char *TGSI_EXTSWIZZLES_SHORT[] =
 {
    "x",
@@ -502,43 +251,6 @@ static const char *TGSI_EXTSWIZZLES_SHORT[] =
    "1"
 };
 
-static const char *TGSI_WRITEMASKS[] =
-{
-   "0",
-   "WRITEMASK_X",
-   "WRITEMASK_Y",
-   "WRITEMASK_XY",
-   "WRITEMASK_Z",
-   "WRITEMASK_XZ",
-   "WRITEMASK_YZ",
-   "WRITEMASK_XYZ",
-   "WRITEMASK_W",
-   "WRITEMASK_XW",
-   "WRITEMASK_YW",
-   "WRITEMASK_XYW",
-   "WRITEMASK_ZW",
-   "WRITEMASK_XZW",
-   "WRITEMASK_YZW",
-   "WRITEMASK_XYZW"
-};
-
-static const char *TGSI_DST_REGISTER_EXTS[] =
-{
-   "DST_REGISTER_EXT_TYPE_CONDCODE",
-   "DST_REGISTER_EXT_TYPE_MODULATE"
-};
-
-static const char *TGSI_MODULATES[] =
-{
-   "MODULATE_1X",
-   "MODULATE_2X",
-   "MODULATE_4X",
-   "MODULATE_8X",
-   "MODULATE_HALF",
-   "MODULATE_QUARTER",
-   "MODULATE_EIGHTH"
-};
-
 static const char *TGSI_MODULATES_SHORT[TGSI_MODULATE_COUNT] =
 {
    "",
@@ -552,7 +264,7 @@ static const char *TGSI_MODULATES_SHORT[TGSI_MODULATE_COUNT] =
 
 void
 tgsi_dump_declaration(
-   const struct tgsi_full_declaration  *decl )
+   const struct tgsi_full_declaration *decl )
 {
    TXT( "\nDCL " );
    ENM( decl->Declaration.File, TGSI_FILES_SHORT );
@@ -596,62 +308,6 @@ tgsi_dump_declaration(
    ENM( decl->Declaration.Interpolate, TGSI_INTERPOLATES_SHORT );
 }
 
-static void
-dump_declaration_verbose(
-   struct tgsi_full_declaration  *decl,
-   unsigned                      ignored,
-   unsigned                      deflt,
-   struct tgsi_full_declaration  *fd )
-{
-   TXT( "\nFile       : " );
-   ENM( decl->Declaration.File, TGSI_FILES );
-   if( deflt || fd->Declaration.UsageMask != decl->Declaration.UsageMask ) {
-      TXT( "\nUsageMask  : " );
-      if( decl->Declaration.UsageMask & TGSI_WRITEMASK_X ) {
-         CHR( 'X' );
-      }
-      if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Y ) {
-         CHR( 'Y' );
-      }
-      if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Z ) {
-         CHR( 'Z' );
-      }
-      if( decl->Declaration.UsageMask & TGSI_WRITEMASK_W ) {
-         CHR( 'W' );
-      }
-   }
-   if( deflt || fd->Declaration.Interpolate != decl->Declaration.Interpolate ) {
-      TXT( "\nInterpolate: " );
-      ENM( decl->Declaration.Interpolate, TGSI_INTERPOLATES );
-   }
-   if( deflt || fd->Declaration.Semantic != decl->Declaration.Semantic ) {
-      TXT( "\nSemantic   : " );
-      UID( decl->Declaration.Semantic );
-   }
-   if( ignored ) {
-      TXT( "\nPadding    : " );
-      UIX( decl->Declaration.Padding );
-   }
-
-   EOL();
-   TXT( "\nFirst: " );
-   UID( decl->DeclarationRange.First );
-   TXT( "\nLast : " );
-   UID( decl->DeclarationRange.Last );
-
-   if( decl->Declaration.Semantic ) {
-      EOL();
-      TXT( "\nSemanticName : " );
-      ENM( decl->Semantic.SemanticName, TGSI_SEMANTICS );
-      TXT( "\nSemanticIndex: " );
-      UID( decl->Semantic.SemanticIndex );
-      if( ignored ) {
-         TXT( "\nPadding      : " );
-         UIX( decl->Semantic.Padding );
-      }
-   }
-}
-
 void
 tgsi_dump_immediate(
    const struct tgsi_full_immediate *imm )
@@ -679,38 +335,10 @@ tgsi_dump_immediate(
    TXT( " }" );
 }
 
-static void
-dump_immediate_verbose(
-   struct tgsi_full_immediate *imm,
-   unsigned                   ignored )
-{
-   unsigned i;
-
-   TXT( "\nDataType   : " );
-   ENM( imm->Immediate.DataType, TGSI_IMMS );
-   if( ignored ) {
-      TXT( "\nPadding    : " );
-      UIX( imm->Immediate.Padding );
-   }
-
-   for( i = 0; i < imm->Immediate.Size - 1; i++ ) {
-      EOL();
-      switch( imm->Immediate.DataType ) {
-      case TGSI_IMM_FLOAT32:
-         TXT( "\nFloat: " );
-         FLT( imm->u.ImmediateFloat32[i].Float );
-         break;
-
-      default:
-         assert( 0 );
-      }
-   }
-}
-
 void
 tgsi_dump_instruction(
-   const struct tgsi_full_instruction  *inst,
-   unsigned                      instno )
+   const struct tgsi_full_instruction *inst,
+   uint instno )
 {
    unsigned i;
    boolean  first_reg = TRUE;
@@ -856,389 +484,28 @@ tgsi_dump_instruction(
    }
 }
 
-static void
-dump_instruction_verbose(
-   struct tgsi_full_instruction  *inst,
-   unsigned                      ignored,
-   unsigned                      deflt,
-   struct tgsi_full_instruction  *fi )
-{
-   unsigned i;
-
-   TXT( "\nOpcode     : " );
-   ENM( inst->Instruction.Opcode, TGSI_OPCODES );
-   if( deflt || fi->Instruction.Saturate != inst->Instruction.Saturate ) {
-      TXT( "\nSaturate   : " );
-      ENM( inst->Instruction.Saturate, TGSI_SATS );
-   }
-   if( deflt || fi->Instruction.NumDstRegs != inst->Instruction.NumDstRegs ) {
-      TXT( "\nNumDstRegs : " );
-      UID( inst->Instruction.NumDstRegs );
-   }
-   if( deflt || fi->Instruction.NumSrcRegs != inst->Instruction.NumSrcRegs ) {
-      TXT( "\nNumSrcRegs : " );
-      UID( inst->Instruction.NumSrcRegs );
-   }
-   if( ignored ) {
-      TXT( "\nPadding    : " );
-      UIX( inst->Instruction.Padding );
-   }
-
-   if( deflt || tgsi_compare_instruction_ext_nv( inst->InstructionExtNv, fi->InstructionExtNv ) ) {
-      EOL();
-      TXT( "\nType          : " );
-      ENM( inst->InstructionExtNv.Type, TGSI_INSTRUCTION_EXTS );
-      if( deflt || fi->InstructionExtNv.Precision != inst->InstructionExtNv.Precision ) {
-         TXT( "\nPrecision     : " );
-         ENM( inst->InstructionExtNv.Precision, TGSI_PRECISIONS );
-      }
-      if( deflt || fi->InstructionExtNv.CondDstIndex != inst->InstructionExtNv.CondDstIndex ) {
-         TXT( "\nCondDstIndex  : " );
-         UID( inst->InstructionExtNv.CondDstIndex );
-      }
-      if( deflt || fi->InstructionExtNv.CondFlowIndex != inst->InstructionExtNv.CondFlowIndex ) {
-         TXT( "\nCondFlowIndex : " );
-         UID( inst->InstructionExtNv.CondFlowIndex );
-      }
-      if( deflt || fi->InstructionExtNv.CondMask != inst->InstructionExtNv.CondMask ) {
-         TXT( "\nCondMask      : " );
-         ENM( inst->InstructionExtNv.CondMask, TGSI_CCS );
-      }
-      if( deflt || fi->InstructionExtNv.CondSwizzleX != inst->InstructionExtNv.CondSwizzleX ) {
-         TXT( "\nCondSwizzleX  : " );
-         ENM( inst->InstructionExtNv.CondSwizzleX, TGSI_SWIZZLES );
-      }
-      if( deflt || fi->InstructionExtNv.CondSwizzleY != inst->InstructionExtNv.CondSwizzleY ) {
-         TXT( "\nCondSwizzleY  : " );
-         ENM( inst->InstructionExtNv.CondSwizzleY, TGSI_SWIZZLES );
-      }
-      if( deflt || fi->InstructionExtNv.CondSwizzleZ != inst->InstructionExtNv.CondSwizzleZ ) {
-         TXT( "\nCondSwizzleZ  : " );
-         ENM( inst->InstructionExtNv.CondSwizzleZ, TGSI_SWIZZLES );
-      }
-      if( deflt || fi->InstructionExtNv.CondSwizzleW != inst->InstructionExtNv.CondSwizzleW ) {
-         TXT( "\nCondSwizzleW  : " );
-         ENM( inst->InstructionExtNv.CondSwizzleW, TGSI_SWIZZLES );
-      }
-      if( deflt || fi->InstructionExtNv.CondDstUpdate != inst->InstructionExtNv.CondDstUpdate ) {
-         TXT( "\nCondDstUpdate : " );
-         UID( inst->InstructionExtNv.CondDstUpdate );
-      }
-      if( deflt || fi->InstructionExtNv.CondFlowEnable != inst->InstructionExtNv.CondFlowEnable ) {
-         TXT( "\nCondFlowEnable: " );
-         UID( inst->InstructionExtNv.CondFlowEnable );
-      }
-      if( ignored ) {
-         TXT( "\nPadding       : " );
-         UIX( inst->InstructionExtNv.Padding );
-         if( deflt || fi->InstructionExtNv.Extended != inst->InstructionExtNv.Extended ) {
-            TXT( "\nExtended      : " );
-            UID( inst->InstructionExtNv.Extended );
-         }
-      }
-   }
-
-   if( deflt || tgsi_compare_instruction_ext_label( inst->InstructionExtLabel, fi->InstructionExtLabel ) ) {
-      EOL();
-      TXT( "\nType    : " );
-      ENM( inst->InstructionExtLabel.Type, TGSI_INSTRUCTION_EXTS );
-      if( deflt || fi->InstructionExtLabel.Label != inst->InstructionExtLabel.Label ) {
-         TXT( "\nLabel   : " );
-         UID( inst->InstructionExtLabel.Label );
-      }
-      if( ignored ) {
-         TXT( "\nPadding : " );
-         UIX( inst->InstructionExtLabel.Padding );
-         if( deflt || fi->InstructionExtLabel.Extended != inst->InstructionExtLabel.Extended ) {
-            TXT( "\nExtended: " );
-            UID( inst->InstructionExtLabel.Extended );
-         }
-      }
-   }
-
-   if( deflt || tgsi_compare_instruction_ext_texture( inst->InstructionExtTexture, fi->InstructionExtTexture ) ) {
-      EOL();
-      TXT( "\nType    : " );
-      ENM( inst->InstructionExtTexture.Type, TGSI_INSTRUCTION_EXTS );
-      if( deflt || fi->InstructionExtTexture.Texture != inst->InstructionExtTexture.Texture ) {
-         TXT( "\nTexture : " );
-         ENM( inst->InstructionExtTexture.Texture, TGSI_TEXTURES );
-      }
-      if( ignored ) {
-         TXT( "\nPadding : " );
-         UIX( inst->InstructionExtTexture.Padding );
-         if( deflt || fi->InstructionExtTexture.Extended != inst->InstructionExtTexture.Extended ) {
-            TXT( "\nExtended: " );
-            UID( inst->InstructionExtTexture.Extended );
-         }
-      }
-   }
-
-   for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) {
-      struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
-      struct tgsi_full_dst_register *fd = &fi->FullDstRegisters[i];
-
-      EOL();
-      TXT( "\nFile     : " );
-      ENM( dst->DstRegister.File, TGSI_FILES );
-      if( deflt || fd->DstRegister.WriteMask != dst->DstRegister.WriteMask ) {
-         TXT( "\nWriteMask: " );
-         ENM( dst->DstRegister.WriteMask, TGSI_WRITEMASKS );
-      }
-      if( ignored ) {
-         if( deflt || fd->DstRegister.Indirect != dst->DstRegister.Indirect ) {
-            TXT( "\nIndirect : " );
-            UID( dst->DstRegister.Indirect );
-         }
-         if( deflt || fd->DstRegister.Dimension != dst->DstRegister.Dimension ) {
-            TXT( "\nDimension: " );
-            UID( dst->DstRegister.Dimension );
-         }
-      }
-      if( deflt || fd->DstRegister.Index != dst->DstRegister.Index ) {
-         TXT( "\nIndex    : " );
-         SID( dst->DstRegister.Index );
-      }
-      if( ignored ) {
-         TXT( "\nPadding  : " );
-         UIX( dst->DstRegister.Padding );
-         if( deflt || fd->DstRegister.Extended != dst->DstRegister.Extended ) {
-            TXT( "\nExtended : " );
-            UID( dst->DstRegister.Extended );
-         }
-      }
-
-      if( deflt || tgsi_compare_dst_register_ext_concode( dst->DstRegisterExtConcode, fd->DstRegisterExtConcode ) ) {
-         EOL();
-         TXT( "\nType        : " );
-         ENM( dst->DstRegisterExtConcode.Type, TGSI_DST_REGISTER_EXTS );
-         if( deflt || fd->DstRegisterExtConcode.CondMask != dst->DstRegisterExtConcode.CondMask ) {
-            TXT( "\nCondMask    : " );
-            ENM( dst->DstRegisterExtConcode.CondMask, TGSI_CCS );
-         }
-         if( deflt || fd->DstRegisterExtConcode.CondSwizzleX != dst->DstRegisterExtConcode.CondSwizzleX ) {
-            TXT( "\nCondSwizzleX: " );
-            ENM( dst->DstRegisterExtConcode.CondSwizzleX, TGSI_SWIZZLES );
-         }
-         if( deflt || fd->DstRegisterExtConcode.CondSwizzleY != dst->DstRegisterExtConcode.CondSwizzleY ) {
-            TXT( "\nCondSwizzleY: " );
-            ENM( dst->DstRegisterExtConcode.CondSwizzleY, TGSI_SWIZZLES );
-         }
-         if( deflt || fd->DstRegisterExtConcode.CondSwizzleZ != dst->DstRegisterExtConcode.CondSwizzleZ ) {
-            TXT( "\nCondSwizzleZ: " );
-            ENM( dst->DstRegisterExtConcode.CondSwizzleZ, TGSI_SWIZZLES );
-         }
-         if( deflt || fd->DstRegisterExtConcode.CondSwizzleW != dst->DstRegisterExtConcode.CondSwizzleW ) {
-            TXT( "\nCondSwizzleW: " );
-            ENM( dst->DstRegisterExtConcode.CondSwizzleW, TGSI_SWIZZLES );
-         }
-         if( deflt || fd->DstRegisterExtConcode.CondSrcIndex != dst->DstRegisterExtConcode.CondSrcIndex ) {
-            TXT( "\nCondSrcIndex: " );
-            UID( dst->DstRegisterExtConcode.CondSrcIndex );
-         }
-         if( ignored ) {
-            TXT( "\nPadding     : " );
-            UIX( dst->DstRegisterExtConcode.Padding );
-            if( deflt || fd->DstRegisterExtConcode.Extended != dst->DstRegisterExtConcode.Extended ) {
-               TXT( "\nExtended    : " );
-               UID( dst->DstRegisterExtConcode.Extended );
-            }
-         }
-      }
-
-      if( deflt || tgsi_compare_dst_register_ext_modulate( dst->DstRegisterExtModulate, fd->DstRegisterExtModulate ) ) {
-         EOL();
-         TXT( "\nType    : " );
-         ENM( dst->DstRegisterExtModulate.Type, TGSI_DST_REGISTER_EXTS );
-         if( deflt || fd->DstRegisterExtModulate.Modulate != dst->DstRegisterExtModulate.Modulate ) {
-            TXT( "\nModulate: " );
-            ENM( dst->DstRegisterExtModulate.Modulate, TGSI_MODULATES );
-         }
-         if( ignored ) {
-            TXT( "\nPadding : " );
-            UIX( dst->DstRegisterExtModulate.Padding );
-            if( deflt || fd->DstRegisterExtModulate.Extended != dst->DstRegisterExtModulate.Extended ) {
-               TXT( "\nExtended: " );
-               UID( dst->DstRegisterExtModulate.Extended );
-            }
-         }
-      }
-   }
-
-   for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) {
-      struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i];
-      struct tgsi_full_src_register *fs = &fi->FullSrcRegisters[i];
-
-      EOL();
-      TXT( "\nFile     : ");
-      ENM( src->SrcRegister.File, TGSI_FILES );
-      if( deflt || fs->SrcRegister.SwizzleX != src->SrcRegister.SwizzleX ) {
-         TXT( "\nSwizzleX : " );
-         ENM( src->SrcRegister.SwizzleX, TGSI_SWIZZLES );
-      }
-      if( deflt || fs->SrcRegister.SwizzleY != src->SrcRegister.SwizzleY ) {
-         TXT( "\nSwizzleY : " );
-         ENM( src->SrcRegister.SwizzleY, TGSI_SWIZZLES );
-      }
-      if( deflt || fs->SrcRegister.SwizzleZ != src->SrcRegister.SwizzleZ ) {
-         TXT( "\nSwizzleZ : " );
-         ENM( src->SrcRegister.SwizzleZ, TGSI_SWIZZLES );
-      }
-      if( deflt || fs->SrcRegister.SwizzleW != src->SrcRegister.SwizzleW ) {
-         TXT( "\nSwizzleW : " );
-         ENM( src->SrcRegister.SwizzleW, TGSI_SWIZZLES );
-      }
-      if( deflt || fs->SrcRegister.Negate != src->SrcRegister.Negate ) {
-         TXT( "\nNegate   : " );
-         UID( src->SrcRegister.Negate );
-      }
-      if( ignored ) {
-         if( deflt || fs->SrcRegister.Indirect != src->SrcRegister.Indirect ) {
-            TXT( "\nIndirect : " );
-            UID( src->SrcRegister.Indirect );
-         }
-         if( deflt || fs->SrcRegister.Dimension != src->SrcRegister.Dimension ) {
-            TXT( "\nDimension: " );
-            UID( src->SrcRegister.Dimension );
-         }
-      }
-      if( deflt || fs->SrcRegister.Index != src->SrcRegister.Index ) {
-         TXT( "\nIndex    : " );
-         SID( src->SrcRegister.Index );
-      }
-      if( ignored ) {
-         if( deflt || fs->SrcRegister.Extended != src->SrcRegister.Extended ) {
-            TXT( "\nExtended : " );
-            UID( src->SrcRegister.Extended );
-         }
-      }
-
-      if( deflt || tgsi_compare_src_register_ext_swz( src->SrcRegisterExtSwz, fs->SrcRegisterExtSwz ) ) {
-         EOL();
-         TXT( "\nType       : " );
-         ENM( src->SrcRegisterExtSwz.Type, TGSI_SRC_REGISTER_EXTS );
-         if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleX != src->SrcRegisterExtSwz.ExtSwizzleX ) {
-            TXT( "\nExtSwizzleX: " );
-            ENM( src->SrcRegisterExtSwz.ExtSwizzleX, TGSI_EXTSWIZZLES );
-         }
-         if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleY != src->SrcRegisterExtSwz.ExtSwizzleY ) {
-            TXT( "\nExtSwizzleY: " );
-            ENM( src->SrcRegisterExtSwz.ExtSwizzleY, TGSI_EXTSWIZZLES );
-         }
-         if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleZ != src->SrcRegisterExtSwz.ExtSwizzleZ ) {
-            TXT( "\nExtSwizzleZ: " );
-            ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, TGSI_EXTSWIZZLES );
-         }
-         if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleW != src->SrcRegisterExtSwz.ExtSwizzleW ) {
-            TXT( "\nExtSwizzleW: " );
-            ENM( src->SrcRegisterExtSwz.ExtSwizzleW, TGSI_EXTSWIZZLES );
-         }
-         if( deflt || fs->SrcRegisterExtSwz.NegateX != src->SrcRegisterExtSwz.NegateX ) {
-            TXT( "\nNegateX   : " );
-            UID( src->SrcRegisterExtSwz.NegateX );
-         }
-         if( deflt || fs->SrcRegisterExtSwz.NegateY != src->SrcRegisterExtSwz.NegateY ) {
-            TXT( "\nNegateY   : " );
-            UID( src->SrcRegisterExtSwz.NegateY );
-         }
-         if( deflt || fs->SrcRegisterExtSwz.NegateZ != src->SrcRegisterExtSwz.NegateZ ) {
-            TXT( "\nNegateZ   : " );
-            UID( src->SrcRegisterExtSwz.NegateZ );
-         }
-         if( deflt || fs->SrcRegisterExtSwz.NegateW != src->SrcRegisterExtSwz.NegateW ) {
-            TXT( "\nNegateW   : " );
-            UID( src->SrcRegisterExtSwz.NegateW );
-         }
-         if( ignored ) {
-            TXT( "\nPadding   : " );
-            UIX( src->SrcRegisterExtSwz.Padding );
-            if( deflt || fs->SrcRegisterExtSwz.Extended != src->SrcRegisterExtSwz.Extended ) {
-               TXT( "\nExtended   : " );
-               UID( src->SrcRegisterExtSwz.Extended );
-            }
-         }
-      }
-
-      if( deflt || tgsi_compare_src_register_ext_mod( src->SrcRegisterExtMod, fs->SrcRegisterExtMod ) ) {
-         EOL();
-         TXT( "\nType     : " );
-         ENM( src->SrcRegisterExtMod.Type, TGSI_SRC_REGISTER_EXTS );
-         if( deflt || fs->SrcRegisterExtMod.Complement != src->SrcRegisterExtMod.Complement ) {
-            TXT( "\nComplement: " );
-            UID( src->SrcRegisterExtMod.Complement );
-         }
-         if( deflt || fs->SrcRegisterExtMod.Bias != src->SrcRegisterExtMod.Bias ) {
-            TXT( "\nBias     : " );
-            UID( src->SrcRegisterExtMod.Bias );
-         }
-         if( deflt || fs->SrcRegisterExtMod.Scale2X != src->SrcRegisterExtMod.Scale2X ) {
-            TXT( "\nScale2X   : " );
-            UID( src->SrcRegisterExtMod.Scale2X );
-         }
-         if( deflt || fs->SrcRegisterExtMod.Absolute != src->SrcRegisterExtMod.Absolute ) {
-            TXT( "\nAbsolute  : " );
-            UID( src->SrcRegisterExtMod.Absolute );
-         }
-         if( deflt || fs->SrcRegisterExtMod.Negate != src->SrcRegisterExtMod.Negate ) {
-            TXT( "\nNegate   : " );
-            UID( src->SrcRegisterExtMod.Negate );
-         }
-         if( ignored ) {
-            TXT( "\nPadding   : " );
-            UIX( src->SrcRegisterExtMod.Padding );
-            if( deflt || fs->SrcRegisterExtMod.Extended != src->SrcRegisterExtMod.Extended ) {
-               TXT( "\nExtended  : " );
-               UID( src->SrcRegisterExtMod.Extended );
-            }
-         }
-      }
-   }
-}
-
 void
 tgsi_dump(
-   const struct tgsi_token *tokens,
-   unsigned                flags )
+   const struct tgsi_token *tokens )
 {
    struct tgsi_parse_context parse;
    struct tgsi_full_instruction fi;
    struct tgsi_full_declaration fd;
-   unsigned verbose = flags & TGSI_DUMP_VERBOSE;
-   unsigned ignored = !(flags & TGSI_DUMP_NO_IGNORED);
-   unsigned deflt = !(flags & TGSI_DUMP_NO_DEFAULT);
    unsigned instno = 0;
 
    /* sanity checks */
-   assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_CONT], "OPCODE_CONT") == 0);
-   assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_END], "OPCODE_END") == 0);
+   assert(strcmp(TGSI_OPCODES_SHORT[TGSI_OPCODE_CONT], "OPCODE_CONT") == 0);
+   assert(strcmp(TGSI_OPCODES_SHORT[TGSI_OPCODE_END], "OPCODE_END") == 0);
    assert(strcmp(TGSI_OPCODES_SHORT[TGSI_OPCODE_END], "END") == 0);
 
    tgsi_parse_init( &parse, tokens );
 
-   TXT( "tgsi-dump begin -----------------" );
-
    EOL();
    ENM( parse.FullHeader.Processor.Processor, TGSI_PROCESSOR_TYPES_SHORT );
    UID( parse.FullVersion.Version.MajorVersion );
    CHR( '.' );
    UID( parse.FullVersion.Version.MinorVersion );
 
-   if( verbose ) {
-      TXT( "\nMajorVersion: " );
-      UID( parse.FullVersion.Version.MajorVersion );
-      TXT( "\nMinorVersion: " );
-      UID( parse.FullVersion.Version.MinorVersion );
-      EOL();
-
-      TXT( "\nHeaderSize: " );
-      UID( parse.FullHeader.Header.HeaderSize );
-      TXT( "\nBodySize  : " );
-      UID( parse.FullHeader.Header.BodySize );
-      TXT( "\nProcessor : " );
-      ENM( parse.FullHeader.Processor.Processor, TGSI_PROCESSOR_TYPES );
-      EOL();
-   }
-
    fi = tgsi_default_full_instruction();
    fd = tgsi_default_full_declaration();
 
@@ -1266,51 +533,7 @@ tgsi_dump(
       default:
          assert( 0 );
       }
-
-      if( verbose ) {
-         TXT( "\nType       : " );
-         ENM( parse.FullToken.Token.Type, TGSI_TOKEN_TYPES );
-         if( ignored ) {
-            TXT( "\nSize       : " );
-            UID( parse.FullToken.Token.Size );
-            if( deflt || parse.FullToken.Token.Extended ) {
-               TXT( "\nExtended   : " );
-               UID( parse.FullToken.Token.Extended );
-            }
-         }
-
-         switch( parse.FullToken.Token.Type ) {
-         case TGSI_TOKEN_TYPE_DECLARATION:
-            dump_declaration_verbose(
-               &parse.FullToken.FullDeclaration,
-               ignored,
-               deflt,
-               &fd );
-            break;
-
-         case TGSI_TOKEN_TYPE_IMMEDIATE:
-            dump_immediate_verbose(
-               &parse.FullToken.FullImmediate,
-               ignored );
-            break;
-
-         case TGSI_TOKEN_TYPE_INSTRUCTION:
-            dump_instruction_verbose(
-               &parse.FullToken.FullInstruction,
-               ignored,
-               deflt,
-               &fi );
-            break;
-
-         default:
-            assert( 0 );
-         }
-
-         EOL();
-      }
    }
 
-   TXT( "\ntgsi-dump end -------------------\n" );
-
    tgsi_parse_free( &parse );
 }
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h
index ba7692b5112..c130b9f1b7a 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h
@@ -1,6 +1,6 @@
 /**************************************************************************
  * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
  * All Rights Reserved.
  * 
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -28,18 +28,15 @@
 #ifndef TGSI_DUMP_H
 #define TGSI_DUMP_H
 
+#include "pipe/p_shader_tokens.h"
+
 #if defined __cplusplus
 extern "C" {
 #endif
 
-#define TGSI_DUMP_VERBOSE       1
-#define TGSI_DUMP_NO_IGNORED    2
-#define TGSI_DUMP_NO_DEFAULT    4
-
 void
 tgsi_dump(
-   const struct tgsi_token *tokens,
-   unsigned                flags );
+   const struct tgsi_token *tokens );
 
 struct tgsi_full_immediate;
 struct tgsi_full_instruction;
@@ -51,12 +48,12 @@ tgsi_dump_immediate(
 
 void
 tgsi_dump_instruction(
-   const struct tgsi_full_instruction  *inst,
-   unsigned                      instno );
+   const struct tgsi_full_instruction *inst,
+   uint instno );
 
 void
 tgsi_dump_declaration(
-   const struct tgsi_full_declaration  *decl );
+   const struct tgsi_full_declaration *decl );
 
 #if defined __cplusplus
 }
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c
new file mode 100644
index 00000000000..27b085dc077
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c
@@ -0,0 +1,853 @@
+/**************************************************************************
+ * 
+ * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "pipe/p_debug.h"
+#include "pipe/p_util.h"
+#include "util/u_string.h"
+#include "tgsi_dump_c.h"
+#include "tgsi_parse.h"
+#include "tgsi_build.h"
+
+static void
+dump_enum(
+   const unsigned    e,
+   const char        **enums,
+   const unsigned    enums_count )
+{
+   if (e >= enums_count) {
+      debug_printf( "%u", e );
+   }
+   else {
+      debug_printf( "%s", enums[e] );
+   }
+}
+
+#define EOL()           debug_printf( "\n" )
+#define TXT(S)          debug_printf( "%s", S )
+#define CHR(C)          debug_printf( "%c", C )
+#define UIX(I)          debug_printf( "0x%x", I )
+#define UID(I)          debug_printf( "%u", I )
+#define SID(I)          debug_printf( "%d", I )
+#define FLT(F)          debug_printf( "%10.4f", F )
+#define ENM(E,ENUMS)    dump_enum( E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) )
+
+static const char *TGSI_PROCESSOR_TYPES[] =
+{
+   "PROCESSOR_FRAGMENT",
+   "PROCESSOR_VERTEX",
+   "PROCESSOR_GEOMETRY"
+};
+
+static const char *TGSI_TOKEN_TYPES[] =
+{
+   "TOKEN_TYPE_DECLARATION",
+   "TOKEN_TYPE_IMMEDIATE",
+   "TOKEN_TYPE_INSTRUCTION"
+};
+
+static const char *TGSI_FILES[] =
+{
+   "FILE_NULL",
+   "FILE_CONSTANT",
+   "FILE_INPUT",
+   "FILE_OUTPUT",
+   "FILE_TEMPORARY",
+   "FILE_SAMPLER",
+   "FILE_ADDRESS",
+   "FILE_IMMEDIATE"
+};
+
+static const char *TGSI_DECLARES[] =
+{
+   "DECLARE_RANGE",
+   "DECLARE_MASK"
+};
+
+static const char *TGSI_INTERPOLATES[] =
+{
+   "INTERPOLATE_CONSTANT",
+   "INTERPOLATE_LINEAR",
+   "INTERPOLATE_PERSPECTIVE",
+   "INTERPOLATE_ATTRIB"
+};
+
+static const char *TGSI_SEMANTICS[] =
+{
+   "SEMANTIC_POSITION",
+   "SEMANTIC_COLOR",
+   "SEMANTIC_BCOLOR",
+   "SEMANTIC_FOG",
+   "SEMANTIC_PSIZE",
+   "SEMANTIC_GENERIC",
+   "SEMANTIC_NORMAL"
+};
+
+static const char *TGSI_IMMS[] =
+{
+   "IMM_FLOAT32"
+};
+
+static const char *TGSI_OPCODES[TGSI_OPCODE_LAST] =
+{
+   "OPCODE_ARL",
+   "OPCODE_MOV",
+   "OPCODE_LIT",
+   "OPCODE_RCP",
+   "OPCODE_RSQ",
+   "OPCODE_EXP",
+   "OPCODE_LOG",
+   "OPCODE_MUL",
+   "OPCODE_ADD",
+   "OPCODE_DP3",
+   "OPCODE_DP4",
+   "OPCODE_DST",
+   "OPCODE_MIN",
+   "OPCODE_MAX",
+   "OPCODE_SLT",
+   "OPCODE_SGE",
+   "OPCODE_MAD",
+   "OPCODE_SUB",
+   "OPCODE_LERP",
+   "OPCODE_CND",
+   "OPCODE_CND0",
+   "OPCODE_DOT2ADD",
+   "OPCODE_INDEX",
+   "OPCODE_NEGATE",
+   "OPCODE_FRAC",
+   "OPCODE_CLAMP",
+   "OPCODE_FLOOR",
+   "OPCODE_ROUND",
+   "OPCODE_EXPBASE2",
+   "OPCODE_LOGBASE2",
+   "OPCODE_POWER",
+   "OPCODE_CROSSPRODUCT",
+   "OPCODE_MULTIPLYMATRIX",
+   "OPCODE_ABS",
+   "OPCODE_RCC",
+   "OPCODE_DPH",
+   "OPCODE_COS",
+   "OPCODE_DDX",
+   "OPCODE_DDY",
+   "OPCODE_KILP",
+   "OPCODE_PK2H",
+   "OPCODE_PK2US",
+   "OPCODE_PK4B",
+   "OPCODE_PK4UB",
+   "OPCODE_RFL",
+   "OPCODE_SEQ",
+   "OPCODE_SFL",
+   "OPCODE_SGT",
+   "OPCODE_SIN",
+   "OPCODE_SLE",
+   "OPCODE_SNE",
+   "OPCODE_STR",
+   "OPCODE_TEX",
+   "OPCODE_TXD",
+   "OPCODE_TXP",
+   "OPCODE_UP2H",
+   "OPCODE_UP2US",
+   "OPCODE_UP4B",
+   "OPCODE_UP4UB",
+   "OPCODE_X2D",
+   "OPCODE_ARA",
+   "OPCODE_ARR",
+   "OPCODE_BRA",
+   "OPCODE_CAL",
+   "OPCODE_RET",
+   "OPCODE_SSG",
+   "OPCODE_CMP",
+   "OPCODE_SCS",
+   "OPCODE_TXB",
+   "OPCODE_NRM",
+   "OPCODE_DIV",
+   "OPCODE_DP2",
+   "OPCODE_TXL",
+   "OPCODE_BRK",
+   "OPCODE_IF",
+   "OPCODE_LOOP",
+   "OPCODE_REP",
+   "OPCODE_ELSE",
+   "OPCODE_ENDIF",
+   "OPCODE_ENDLOOP",
+   "OPCODE_ENDREP",
+   "OPCODE_PUSHA",
+   "OPCODE_POPA",
+   "OPCODE_CEIL",
+   "OPCODE_I2F",
+   "OPCODE_NOT",
+   "OPCODE_TRUNC",
+   "OPCODE_SHL",
+   "OPCODE_SHR",
+   "OPCODE_AND",
+   "OPCODE_OR",
+   "OPCODE_MOD",
+   "OPCODE_XOR",
+   "OPCODE_SAD",
+   "OPCODE_TXF",
+   "OPCODE_TXQ",
+   "OPCODE_CONT",
+   "OPCODE_EMIT",
+   "OPCODE_ENDPRIM",
+   "OPCODE_BGNLOOP2",
+   "OPCODE_BGNSUB",
+   "OPCODE_ENDLOOP2",
+   "OPCODE_ENDSUB",
+   "OPCODE_NOISE1",
+   "OPCODE_NOISE2",
+   "OPCODE_NOISE3",
+   "OPCODE_NOISE4",
+   "OPCODE_NOP",
+   "OPCODE_M4X3",
+   "OPCODE_M3X4",
+   "OPCODE_M3X3",
+   "OPCODE_M3X2",
+   "OPCODE_NRM4",
+   "OPCODE_CALLNZ",
+   "OPCODE_IFC",
+   "OPCODE_BREAKC",
+   "OPCODE_KIL",
+   "OPCODE_END"
+};
+
+static const char *TGSI_SATS[] =
+{
+   "SAT_NONE",
+   "SAT_ZERO_ONE",
+   "SAT_MINUS_PLUS_ONE"
+};
+
+static const char *TGSI_INSTRUCTION_EXTS[] =
+{
+   "INSTRUCTION_EXT_TYPE_NV",
+   "INSTRUCTION_EXT_TYPE_LABEL",
+   "INSTRUCTION_EXT_TYPE_TEXTURE"
+};
+
+static const char *TGSI_PRECISIONS[] =
+{
+   "PRECISION_DEFAULT",
+   "TGSI_PRECISION_FLOAT32",
+   "TGSI_PRECISION_FLOAT16",
+   "TGSI_PRECISION_FIXED12"
+};
+
+static const char *TGSI_CCS[] =
+{
+   "CC_GT",
+   "CC_EQ",
+   "CC_LT",
+   "CC_UN",
+   "CC_GE",
+   "CC_LE",
+   "CC_NE",
+   "CC_TR",
+   "CC_FL"
+};
+
+static const char *TGSI_SWIZZLES[] =
+{
+   "SWIZZLE_X",
+   "SWIZZLE_Y",
+   "SWIZZLE_Z",
+   "SWIZZLE_W"
+};
+
+static const char *TGSI_TEXTURES[] =
+{
+   "TEXTURE_UNKNOWN",
+   "TEXTURE_1D",
+   "TEXTURE_2D",
+   "TEXTURE_3D",
+   "TEXTURE_CUBE",
+   "TEXTURE_RECT",
+   "TEXTURE_SHADOW1D",
+   "TEXTURE_SHADOW2D",
+   "TEXTURE_SHADOWRECT"
+};
+
+static const char *TGSI_SRC_REGISTER_EXTS[] =
+{
+   "SRC_REGISTER_EXT_TYPE_SWZ",
+   "SRC_REGISTER_EXT_TYPE_MOD"
+};
+
+static const char *TGSI_EXTSWIZZLES[] =
+{
+   "EXTSWIZZLE_X",
+   "EXTSWIZZLE_Y",
+   "EXTSWIZZLE_Z",
+   "EXTSWIZZLE_W",
+   "EXTSWIZZLE_ZERO",
+   "EXTSWIZZLE_ONE"
+};
+
+static const char *TGSI_WRITEMASKS[] =
+{
+   "0",
+   "WRITEMASK_X",
+   "WRITEMASK_Y",
+   "WRITEMASK_XY",
+   "WRITEMASK_Z",
+   "WRITEMASK_XZ",
+   "WRITEMASK_YZ",
+   "WRITEMASK_XYZ",
+   "WRITEMASK_W",
+   "WRITEMASK_XW",
+   "WRITEMASK_YW",
+   "WRITEMASK_XYW",
+   "WRITEMASK_ZW",
+   "WRITEMASK_XZW",
+   "WRITEMASK_YZW",
+   "WRITEMASK_XYZW"
+};
+
+static const char *TGSI_DST_REGISTER_EXTS[] =
+{
+   "DST_REGISTER_EXT_TYPE_CONDCODE",
+   "DST_REGISTER_EXT_TYPE_MODULATE"
+};
+
+static const char *TGSI_MODULATES[] =
+{
+   "MODULATE_1X",
+   "MODULATE_2X",
+   "MODULATE_4X",
+   "MODULATE_8X",
+   "MODULATE_HALF",
+   "MODULATE_QUARTER",
+   "MODULATE_EIGHTH"
+};
+
+static void
+dump_declaration_verbose(
+   struct tgsi_full_declaration  *decl,
+   unsigned                      ignored,
+   unsigned                      deflt,
+   struct tgsi_full_declaration  *fd )
+{
+   TXT( "\nFile       : " );
+   ENM( decl->Declaration.File, TGSI_FILES );
+   if( deflt || fd->Declaration.UsageMask != decl->Declaration.UsageMask ) {
+      TXT( "\nUsageMask  : " );
+      if( decl->Declaration.UsageMask & TGSI_WRITEMASK_X ) {
+         CHR( 'X' );
+      }
+      if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Y ) {
+         CHR( 'Y' );
+      }
+      if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Z ) {
+         CHR( 'Z' );
+      }
+      if( decl->Declaration.UsageMask & TGSI_WRITEMASK_W ) {
+         CHR( 'W' );
+      }
+   }
+   if( deflt || fd->Declaration.Interpolate != decl->Declaration.Interpolate ) {
+      TXT( "\nInterpolate: " );
+      ENM( decl->Declaration.Interpolate, TGSI_INTERPOLATES );
+   }
+   if( deflt || fd->Declaration.Semantic != decl->Declaration.Semantic ) {
+      TXT( "\nSemantic   : " );
+      UID( decl->Declaration.Semantic );
+   }
+   if( ignored ) {
+      TXT( "\nPadding    : " );
+      UIX( decl->Declaration.Padding );
+   }
+
+   EOL();
+   TXT( "\nFirst: " );
+   UID( decl->DeclarationRange.First );
+   TXT( "\nLast : " );
+   UID( decl->DeclarationRange.Last );
+
+   if( decl->Declaration.Semantic ) {
+      EOL();
+      TXT( "\nSemanticName : " );
+      ENM( decl->Semantic.SemanticName, TGSI_SEMANTICS );
+      TXT( "\nSemanticIndex: " );
+      UID( decl->Semantic.SemanticIndex );
+      if( ignored ) {
+         TXT( "\nPadding      : " );
+         UIX( decl->Semantic.Padding );
+      }
+   }
+}
+
+static void
+dump_immediate_verbose(
+   struct tgsi_full_immediate *imm,
+   unsigned                   ignored )
+{
+   unsigned i;
+
+   TXT( "\nDataType   : " );
+   ENM( imm->Immediate.DataType, TGSI_IMMS );
+   if( ignored ) {
+      TXT( "\nPadding    : " );
+      UIX( imm->Immediate.Padding );
+   }
+
+   for( i = 0; i < imm->Immediate.Size - 1; i++ ) {
+      EOL();
+      switch( imm->Immediate.DataType ) {
+      case TGSI_IMM_FLOAT32:
+         TXT( "\nFloat: " );
+         FLT( imm->u.ImmediateFloat32[i].Float );
+         break;
+
+      default:
+         assert( 0 );
+      }
+   }
+}
+
+static void
+dump_instruction_verbose(
+   struct tgsi_full_instruction  *inst,
+   unsigned                      ignored,
+   unsigned                      deflt,
+   struct tgsi_full_instruction  *fi )
+{
+   unsigned i;
+
+   TXT( "\nOpcode     : " );
+   ENM( inst->Instruction.Opcode, TGSI_OPCODES );
+   if( deflt || fi->Instruction.Saturate != inst->Instruction.Saturate ) {
+      TXT( "\nSaturate   : " );
+      ENM( inst->Instruction.Saturate, TGSI_SATS );
+   }
+   if( deflt || fi->Instruction.NumDstRegs != inst->Instruction.NumDstRegs ) {
+      TXT( "\nNumDstRegs : " );
+      UID( inst->Instruction.NumDstRegs );
+   }
+   if( deflt || fi->Instruction.NumSrcRegs != inst->Instruction.NumSrcRegs ) {
+      TXT( "\nNumSrcRegs : " );
+      UID( inst->Instruction.NumSrcRegs );
+   }
+   if( ignored ) {
+      TXT( "\nPadding    : " );
+      UIX( inst->Instruction.Padding );
+   }
+
+   if( deflt || tgsi_compare_instruction_ext_nv( inst->InstructionExtNv, fi->InstructionExtNv ) ) {
+      EOL();
+      TXT( "\nType          : " );
+      ENM( inst->InstructionExtNv.Type, TGSI_INSTRUCTION_EXTS );
+      if( deflt || fi->InstructionExtNv.Precision != inst->InstructionExtNv.Precision ) {
+         TXT( "\nPrecision     : " );
+         ENM( inst->InstructionExtNv.Precision, TGSI_PRECISIONS );
+      }
+      if( deflt || fi->InstructionExtNv.CondDstIndex != inst->InstructionExtNv.CondDstIndex ) {
+         TXT( "\nCondDstIndex  : " );
+         UID( inst->InstructionExtNv.CondDstIndex );
+      }
+      if( deflt || fi->InstructionExtNv.CondFlowIndex != inst->InstructionExtNv.CondFlowIndex ) {
+         TXT( "\nCondFlowIndex : " );
+         UID( inst->InstructionExtNv.CondFlowIndex );
+      }
+      if( deflt || fi->InstructionExtNv.CondMask != inst->InstructionExtNv.CondMask ) {
+         TXT( "\nCondMask      : " );
+         ENM( inst->InstructionExtNv.CondMask, TGSI_CCS );
+      }
+      if( deflt || fi->InstructionExtNv.CondSwizzleX != inst->InstructionExtNv.CondSwizzleX ) {
+         TXT( "\nCondSwizzleX  : " );
+         ENM( inst->InstructionExtNv.CondSwizzleX, TGSI_SWIZZLES );
+      }
+      if( deflt || fi->InstructionExtNv.CondSwizzleY != inst->InstructionExtNv.CondSwizzleY ) {
+         TXT( "\nCondSwizzleY  : " );
+         ENM( inst->InstructionExtNv.CondSwizzleY, TGSI_SWIZZLES );
+      }
+      if( deflt || fi->InstructionExtNv.CondSwizzleZ != inst->InstructionExtNv.CondSwizzleZ ) {
+         TXT( "\nCondSwizzleZ  : " );
+         ENM( inst->InstructionExtNv.CondSwizzleZ, TGSI_SWIZZLES );
+      }
+      if( deflt || fi->InstructionExtNv.CondSwizzleW != inst->InstructionExtNv.CondSwizzleW ) {
+         TXT( "\nCondSwizzleW  : " );
+         ENM( inst->InstructionExtNv.CondSwizzleW, TGSI_SWIZZLES );
+      }
+      if( deflt || fi->InstructionExtNv.CondDstUpdate != inst->InstructionExtNv.CondDstUpdate ) {
+         TXT( "\nCondDstUpdate : " );
+         UID( inst->InstructionExtNv.CondDstUpdate );
+      }
+      if( deflt || fi->InstructionExtNv.CondFlowEnable != inst->InstructionExtNv.CondFlowEnable ) {
+         TXT( "\nCondFlowEnable: " );
+         UID( inst->InstructionExtNv.CondFlowEnable );
+      }
+      if( ignored ) {
+         TXT( "\nPadding       : " );
+         UIX( inst->InstructionExtNv.Padding );
+         if( deflt || fi->InstructionExtNv.Extended != inst->InstructionExtNv.Extended ) {
+            TXT( "\nExtended      : " );
+            UID( inst->InstructionExtNv.Extended );
+         }
+      }
+   }
+
+   if( deflt || tgsi_compare_instruction_ext_label( inst->InstructionExtLabel, fi->InstructionExtLabel ) ) {
+      EOL();
+      TXT( "\nType    : " );
+      ENM( inst->InstructionExtLabel.Type, TGSI_INSTRUCTION_EXTS );
+      if( deflt || fi->InstructionExtLabel.Label != inst->InstructionExtLabel.Label ) {
+         TXT( "\nLabel   : " );
+         UID( inst->InstructionExtLabel.Label );
+      }
+      if( ignored ) {
+         TXT( "\nPadding : " );
+         UIX( inst->InstructionExtLabel.Padding );
+         if( deflt || fi->InstructionExtLabel.Extended != inst->InstructionExtLabel.Extended ) {
+            TXT( "\nExtended: " );
+            UID( inst->InstructionExtLabel.Extended );
+         }
+      }
+   }
+
+   if( deflt || tgsi_compare_instruction_ext_texture( inst->InstructionExtTexture, fi->InstructionExtTexture ) ) {
+      EOL();
+      TXT( "\nType    : " );
+      ENM( inst->InstructionExtTexture.Type, TGSI_INSTRUCTION_EXTS );
+      if( deflt || fi->InstructionExtTexture.Texture != inst->InstructionExtTexture.Texture ) {
+         TXT( "\nTexture : " );
+         ENM( inst->InstructionExtTexture.Texture, TGSI_TEXTURES );
+      }
+      if( ignored ) {
+         TXT( "\nPadding : " );
+         UIX( inst->InstructionExtTexture.Padding );
+         if( deflt || fi->InstructionExtTexture.Extended != inst->InstructionExtTexture.Extended ) {
+            TXT( "\nExtended: " );
+            UID( inst->InstructionExtTexture.Extended );
+         }
+      }
+   }
+
+   for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) {
+      struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
+      struct tgsi_full_dst_register *fd = &fi->FullDstRegisters[i];
+
+      EOL();
+      TXT( "\nFile     : " );
+      ENM( dst->DstRegister.File, TGSI_FILES );
+      if( deflt || fd->DstRegister.WriteMask != dst->DstRegister.WriteMask ) {
+         TXT( "\nWriteMask: " );
+         ENM( dst->DstRegister.WriteMask, TGSI_WRITEMASKS );
+      }
+      if( ignored ) {
+         if( deflt || fd->DstRegister.Indirect != dst->DstRegister.Indirect ) {
+            TXT( "\nIndirect : " );
+            UID( dst->DstRegister.Indirect );
+         }
+         if( deflt || fd->DstRegister.Dimension != dst->DstRegister.Dimension ) {
+            TXT( "\nDimension: " );
+            UID( dst->DstRegister.Dimension );
+         }
+      }
+      if( deflt || fd->DstRegister.Index != dst->DstRegister.Index ) {
+         TXT( "\nIndex    : " );
+         SID( dst->DstRegister.Index );
+      }
+      if( ignored ) {
+         TXT( "\nPadding  : " );
+         UIX( dst->DstRegister.Padding );
+         if( deflt || fd->DstRegister.Extended != dst->DstRegister.Extended ) {
+            TXT( "\nExtended : " );
+            UID( dst->DstRegister.Extended );
+         }
+      }
+
+      if( deflt || tgsi_compare_dst_register_ext_concode( dst->DstRegisterExtConcode, fd->DstRegisterExtConcode ) ) {
+         EOL();
+         TXT( "\nType        : " );
+         ENM( dst->DstRegisterExtConcode.Type, TGSI_DST_REGISTER_EXTS );
+         if( deflt || fd->DstRegisterExtConcode.CondMask != dst->DstRegisterExtConcode.CondMask ) {
+            TXT( "\nCondMask    : " );
+            ENM( dst->DstRegisterExtConcode.CondMask, TGSI_CCS );
+         }
+         if( deflt || fd->DstRegisterExtConcode.CondSwizzleX != dst->DstRegisterExtConcode.CondSwizzleX ) {
+            TXT( "\nCondSwizzleX: " );
+            ENM( dst->DstRegisterExtConcode.CondSwizzleX, TGSI_SWIZZLES );
+         }
+         if( deflt || fd->DstRegisterExtConcode.CondSwizzleY != dst->DstRegisterExtConcode.CondSwizzleY ) {
+            TXT( "\nCondSwizzleY: " );
+            ENM( dst->DstRegisterExtConcode.CondSwizzleY, TGSI_SWIZZLES );
+         }
+         if( deflt || fd->DstRegisterExtConcode.CondSwizzleZ != dst->DstRegisterExtConcode.CondSwizzleZ ) {
+            TXT( "\nCondSwizzleZ: " );
+            ENM( dst->DstRegisterExtConcode.CondSwizzleZ, TGSI_SWIZZLES );
+         }
+         if( deflt || fd->DstRegisterExtConcode.CondSwizzleW != dst->DstRegisterExtConcode.CondSwizzleW ) {
+            TXT( "\nCondSwizzleW: " );
+            ENM( dst->DstRegisterExtConcode.CondSwizzleW, TGSI_SWIZZLES );
+         }
+         if( deflt || fd->DstRegisterExtConcode.CondSrcIndex != dst->DstRegisterExtConcode.CondSrcIndex ) {
+            TXT( "\nCondSrcIndex: " );
+            UID( dst->DstRegisterExtConcode.CondSrcIndex );
+         }
+         if( ignored ) {
+            TXT( "\nPadding     : " );
+            UIX( dst->DstRegisterExtConcode.Padding );
+            if( deflt || fd->DstRegisterExtConcode.Extended != dst->DstRegisterExtConcode.Extended ) {
+               TXT( "\nExtended    : " );
+               UID( dst->DstRegisterExtConcode.Extended );
+            }
+         }
+      }
+
+      if( deflt || tgsi_compare_dst_register_ext_modulate( dst->DstRegisterExtModulate, fd->DstRegisterExtModulate ) ) {
+         EOL();
+         TXT( "\nType    : " );
+         ENM( dst->DstRegisterExtModulate.Type, TGSI_DST_REGISTER_EXTS );
+         if( deflt || fd->DstRegisterExtModulate.Modulate != dst->DstRegisterExtModulate.Modulate ) {
+            TXT( "\nModulate: " );
+            ENM( dst->DstRegisterExtModulate.Modulate, TGSI_MODULATES );
+         }
+         if( ignored ) {
+            TXT( "\nPadding : " );
+            UIX( dst->DstRegisterExtModulate.Padding );
+            if( deflt || fd->DstRegisterExtModulate.Extended != dst->DstRegisterExtModulate.Extended ) {
+               TXT( "\nExtended: " );
+               UID( dst->DstRegisterExtModulate.Extended );
+            }
+         }
+      }
+   }
+
+   for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) {
+      struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i];
+      struct tgsi_full_src_register *fs = &fi->FullSrcRegisters[i];
+
+      EOL();
+      TXT( "\nFile     : ");
+      ENM( src->SrcRegister.File, TGSI_FILES );
+      if( deflt || fs->SrcRegister.SwizzleX != src->SrcRegister.SwizzleX ) {
+         TXT( "\nSwizzleX : " );
+         ENM( src->SrcRegister.SwizzleX, TGSI_SWIZZLES );
+      }
+      if( deflt || fs->SrcRegister.SwizzleY != src->SrcRegister.SwizzleY ) {
+         TXT( "\nSwizzleY : " );
+         ENM( src->SrcRegister.SwizzleY, TGSI_SWIZZLES );
+      }
+      if( deflt || fs->SrcRegister.SwizzleZ != src->SrcRegister.SwizzleZ ) {
+         TXT( "\nSwizzleZ : " );
+         ENM( src->SrcRegister.SwizzleZ, TGSI_SWIZZLES );
+      }
+      if( deflt || fs->SrcRegister.SwizzleW != src->SrcRegister.SwizzleW ) {
+         TXT( "\nSwizzleW : " );
+         ENM( src->SrcRegister.SwizzleW, TGSI_SWIZZLES );
+      }
+      if( deflt || fs->SrcRegister.Negate != src->SrcRegister.Negate ) {
+         TXT( "\nNegate   : " );
+         UID( src->SrcRegister.Negate );
+      }
+      if( ignored ) {
+         if( deflt || fs->SrcRegister.Indirect != src->SrcRegister.Indirect ) {
+            TXT( "\nIndirect : " );
+            UID( src->SrcRegister.Indirect );
+         }
+         if( deflt || fs->SrcRegister.Dimension != src->SrcRegister.Dimension ) {
+            TXT( "\nDimension: " );
+            UID( src->SrcRegister.Dimension );
+         }
+      }
+      if( deflt || fs->SrcRegister.Index != src->SrcRegister.Index ) {
+         TXT( "\nIndex    : " );
+         SID( src->SrcRegister.Index );
+      }
+      if( ignored ) {
+         if( deflt || fs->SrcRegister.Extended != src->SrcRegister.Extended ) {
+            TXT( "\nExtended : " );
+            UID( src->SrcRegister.Extended );
+         }
+      }
+
+      if( deflt || tgsi_compare_src_register_ext_swz( src->SrcRegisterExtSwz, fs->SrcRegisterExtSwz ) ) {
+         EOL();
+         TXT( "\nType       : " );
+         ENM( src->SrcRegisterExtSwz.Type, TGSI_SRC_REGISTER_EXTS );
+         if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleX != src->SrcRegisterExtSwz.ExtSwizzleX ) {
+            TXT( "\nExtSwizzleX: " );
+            ENM( src->SrcRegisterExtSwz.ExtSwizzleX, TGSI_EXTSWIZZLES );
+         }
+         if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleY != src->SrcRegisterExtSwz.ExtSwizzleY ) {
+            TXT( "\nExtSwizzleY: " );
+            ENM( src->SrcRegisterExtSwz.ExtSwizzleY, TGSI_EXTSWIZZLES );
+         }
+         if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleZ != src->SrcRegisterExtSwz.ExtSwizzleZ ) {
+            TXT( "\nExtSwizzleZ: " );
+            ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, TGSI_EXTSWIZZLES );
+         }
+         if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleW != src->SrcRegisterExtSwz.ExtSwizzleW ) {
+            TXT( "\nExtSwizzleW: " );
+            ENM( src->SrcRegisterExtSwz.ExtSwizzleW, TGSI_EXTSWIZZLES );
+         }
+         if( deflt || fs->SrcRegisterExtSwz.NegateX != src->SrcRegisterExtSwz.NegateX ) {
+            TXT( "\nNegateX   : " );
+            UID( src->SrcRegisterExtSwz.NegateX );
+         }
+         if( deflt || fs->SrcRegisterExtSwz.NegateY != src->SrcRegisterExtSwz.NegateY ) {
+            TXT( "\nNegateY   : " );
+            UID( src->SrcRegisterExtSwz.NegateY );
+         }
+         if( deflt || fs->SrcRegisterExtSwz.NegateZ != src->SrcRegisterExtSwz.NegateZ ) {
+            TXT( "\nNegateZ   : " );
+            UID( src->SrcRegisterExtSwz.NegateZ );
+         }
+         if( deflt || fs->SrcRegisterExtSwz.NegateW != src->SrcRegisterExtSwz.NegateW ) {
+            TXT( "\nNegateW   : " );
+            UID( src->SrcRegisterExtSwz.NegateW );
+         }
+         if( ignored ) {
+            TXT( "\nPadding   : " );
+            UIX( src->SrcRegisterExtSwz.Padding );
+            if( deflt || fs->SrcRegisterExtSwz.Extended != src->SrcRegisterExtSwz.Extended ) {
+               TXT( "\nExtended   : " );
+               UID( src->SrcRegisterExtSwz.Extended );
+            }
+         }
+      }
+
+      if( deflt || tgsi_compare_src_register_ext_mod( src->SrcRegisterExtMod, fs->SrcRegisterExtMod ) ) {
+         EOL();
+         TXT( "\nType     : " );
+         ENM( src->SrcRegisterExtMod.Type, TGSI_SRC_REGISTER_EXTS );
+         if( deflt || fs->SrcRegisterExtMod.Complement != src->SrcRegisterExtMod.Complement ) {
+            TXT( "\nComplement: " );
+            UID( src->SrcRegisterExtMod.Complement );
+         }
+         if( deflt || fs->SrcRegisterExtMod.Bias != src->SrcRegisterExtMod.Bias ) {
+            TXT( "\nBias     : " );
+            UID( src->SrcRegisterExtMod.Bias );
+         }
+         if( deflt || fs->SrcRegisterExtMod.Scale2X != src->SrcRegisterExtMod.Scale2X ) {
+            TXT( "\nScale2X   : " );
+            UID( src->SrcRegisterExtMod.Scale2X );
+         }
+         if( deflt || fs->SrcRegisterExtMod.Absolute != src->SrcRegisterExtMod.Absolute ) {
+            TXT( "\nAbsolute  : " );
+            UID( src->SrcRegisterExtMod.Absolute );
+         }
+         if( deflt || fs->SrcRegisterExtMod.Negate != src->SrcRegisterExtMod.Negate ) {
+            TXT( "\nNegate   : " );
+            UID( src->SrcRegisterExtMod.Negate );
+         }
+         if( ignored ) {
+            TXT( "\nPadding   : " );
+            UIX( src->SrcRegisterExtMod.Padding );
+            if( deflt || fs->SrcRegisterExtMod.Extended != src->SrcRegisterExtMod.Extended ) {
+               TXT( "\nExtended  : " );
+               UID( src->SrcRegisterExtMod.Extended );
+            }
+         }
+      }
+   }
+}
+
+void
+tgsi_dump_c(
+   const struct tgsi_token *tokens,
+   uint flags )
+{
+   struct tgsi_parse_context parse;
+   struct tgsi_full_instruction fi;
+   struct tgsi_full_declaration fd;
+   uint ignored = flags & TGSI_DUMP_C_IGNORED;
+   uint deflt = flags & TGSI_DUMP_C_DEFAULT;
+   uint instno = 0;
+
+   /* sanity checks */
+   assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_CONT], "OPCODE_CONT") == 0);
+   assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_END], "OPCODE_END") == 0);
+   assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_END], "END") == 0);
+
+   tgsi_parse_init( &parse, tokens );
+
+   TXT( "tgsi-dump begin -----------------" );
+
+   TXT( "\nMajorVersion: " );
+   UID( parse.FullVersion.Version.MajorVersion );
+   TXT( "\nMinorVersion: " );
+   UID( parse.FullVersion.Version.MinorVersion );
+   EOL();
+
+   TXT( "\nHeaderSize: " );
+   UID( parse.FullHeader.Header.HeaderSize );
+   TXT( "\nBodySize  : " );
+   UID( parse.FullHeader.Header.BodySize );
+   TXT( "\nProcessor : " );
+   ENM( parse.FullHeader.Processor.Processor, TGSI_PROCESSOR_TYPES );
+   EOL();
+
+   fi = tgsi_default_full_instruction();
+   fd = tgsi_default_full_declaration();
+
+   while( !tgsi_parse_end_of_tokens( &parse ) ) {
+      tgsi_parse_token( &parse );
+
+      TXT( "\nType       : " );
+      ENM( parse.FullToken.Token.Type, TGSI_TOKEN_TYPES );
+      if( ignored ) {
+         TXT( "\nSize       : " );
+         UID( parse.FullToken.Token.Size );
+         if( deflt || parse.FullToken.Token.Extended ) {
+            TXT( "\nExtended   : " );
+            UID( parse.FullToken.Token.Extended );
+         }
+      }
+
+      switch( parse.FullToken.Token.Type ) {
+      case TGSI_TOKEN_TYPE_DECLARATION:
+         dump_declaration_verbose(
+            &parse.FullToken.FullDeclaration,
+            ignored,
+            deflt,
+            &fd );
+         break;
+
+      case TGSI_TOKEN_TYPE_IMMEDIATE:
+         dump_immediate_verbose(
+            &parse.FullToken.FullImmediate,
+            ignored );
+         break;
+
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+         dump_instruction_verbose(
+            &parse.FullToken.FullInstruction,
+            ignored,
+            deflt,
+            &fi );
+         break;
+
+      default:
+         assert( 0 );
+      }
+
+      EOL();
+   }
+
+   TXT( "\ntgsi-dump end -------------------\n" );
+
+   tgsi_parse_free( &parse );
+}
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.h b/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.h
new file mode 100644
index 00000000000..d91cd35b3b7
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.h
@@ -0,0 +1,49 @@
+/**************************************************************************
+ * 
+ * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef TGSI_DUMP_C_H
+#define TGSI_DUMP_C_H
+
+#include "pipe/p_shader_tokens.h"
+
+#if defined __cplusplus
+extern "C" {
+#endif
+
+#define TGSI_DUMP_C_IGNORED 1
+#define TGSI_DUMP_C_DEFAULT 2
+
+void
+tgsi_dump_c(
+   const struct tgsi_token *tokens,
+   uint flags );
+
+#if defined __cplusplus
+}
+#endif
+
+#endif /* TGSI_DUMP_C_H */
-- 
cgit v1.2.3


From 5e9ea9d938e9734524707e46be29e243e96f32a9 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Sun, 20 Jul 2008 16:07:06 +0200
Subject: scons: List util/tgsi_dump_c.c.

---
 src/gallium/auxiliary/tgsi/SConscript | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/SConscript b/src/gallium/auxiliary/tgsi/SConscript
index 54f5c75db47..3bbfa1be547 100644
--- a/src/gallium/auxiliary/tgsi/SConscript
+++ b/src/gallium/auxiliary/tgsi/SConscript
@@ -7,6 +7,7 @@ tgsi = env.ConvenienceLibrary(
 		'exec/tgsi_sse2.c',
 		'util/tgsi_build.c',
 		'util/tgsi_dump.c',
+		'util/tgsi_dump_c.c',
 		'util/tgsi_iterate.c',
 		'util/tgsi_parse.c',
 		'util/tgsi_sanity.c',
-- 
cgit v1.2.3


From 15c902455fe1b4572e614bf30912d92fe9c7bb28 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Sun, 20 Jul 2008 16:08:36 +0200
Subject: tgsi: Preserve flags parameter for tgsi_dump().

---
 src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 3 ++-
 src/gallium/auxiliary/tgsi/util/tgsi_dump.h | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
index 5a84b99166b..1056951bc84 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
@@ -486,7 +486,8 @@ tgsi_dump_instruction(
 
 void
 tgsi_dump(
-   const struct tgsi_token *tokens )
+   const struct tgsi_token *tokens,
+   uint flags )
 {
    struct tgsi_parse_context parse;
    struct tgsi_full_instruction fi;
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h
index c130b9f1b7a..51c230b5db4 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h
@@ -36,7 +36,8 @@ extern "C" {
 
 void
 tgsi_dump(
-   const struct tgsi_token *tokens );
+   const struct tgsi_token *tokens,
+   uint flags );
 
 struct tgsi_full_immediate;
 struct tgsi_full_instruction;
-- 
cgit v1.2.3


From 307a252d1529e8e9ffa10c88cc8f5d5412f587f5 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Sun, 20 Jul 2008 16:12:44 +0200
Subject: tgsi: Fix dump enums.

---
 src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c
index 27b085dc077..acb1246b7a1 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c
@@ -81,18 +81,11 @@ static const char *TGSI_FILES[] =
    "FILE_IMMEDIATE"
 };
 
-static const char *TGSI_DECLARES[] =
-{
-   "DECLARE_RANGE",
-   "DECLARE_MASK"
-};
-
 static const char *TGSI_INTERPOLATES[] =
 {
    "INTERPOLATE_CONSTANT",
    "INTERPOLATE_LINEAR",
-   "INTERPOLATE_PERSPECTIVE",
-   "INTERPOLATE_ATTRIB"
+   "INTERPOLATE_PERSPECTIVE"
 };
 
 static const char *TGSI_SEMANTICS[] =
@@ -250,9 +243,9 @@ static const char *TGSI_INSTRUCTION_EXTS[] =
 static const char *TGSI_PRECISIONS[] =
 {
    "PRECISION_DEFAULT",
-   "TGSI_PRECISION_FLOAT32",
-   "TGSI_PRECISION_FLOAT16",
-   "TGSI_PRECISION_FIXED12"
+   "PRECISION_FLOAT32",
+   "PRECISION_FLOAT16",
+   "PRECISION_FIXED12"
 };
 
 static const char *TGSI_CCS[] =
-- 
cgit v1.2.3


From 82f11f7e7c0bbe0452da65f08195c2a346f820e9 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Sun, 20 Jul 2008 16:28:59 +0200
Subject: tgsi: Tidy up and fix tgsi_dump.

---
 src/gallium/auxiliary/tgsi/util/tgsi_dump.c   | 215 ++++++++++++--------------
 src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c |   2 +-
 2 files changed, 101 insertions(+), 116 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
index 1056951bc84..fe8742dc02e 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
@@ -34,16 +34,14 @@
 
 static void
 dump_enum(
-   const unsigned    e,
-   const char        **enums,
-   const unsigned    enums_count )
+   uint e,
+   const char **enums,
+   uint enum_count )
 {
-   if (e >= enums_count) {
+   if (e >= enum_count)
       debug_printf( "%u", e );
-   }
-   else {
+   else
       debug_printf( "%s", enums[e] );
-   }
 }
 
 #define EOL()           debug_printf( "\n" )
@@ -55,14 +53,14 @@ dump_enum(
 #define FLT(F)          debug_printf( "%10.4f", F )
 #define ENM(E,ENUMS)    dump_enum( E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) )
 
-static const char *TGSI_PROCESSOR_TYPES_SHORT[] =
+static const char *processor_type_names[] =
 {
    "FRAG",
    "VERT",
    "GEOM"
 };
 
-static const char *TGSI_FILES_SHORT[] =
+static const char *file_names[] =
 {
    "NULL",
    "CONST",
@@ -74,14 +72,14 @@ static const char *TGSI_FILES_SHORT[] =
    "IMM"
 };
 
-static const char *TGSI_INTERPOLATES_SHORT[] =
+static const char *interpolate_names[] =
 {
    "CONSTANT",
    "LINEAR",
    "PERSPECTIVE"
 };
 
-static const char *TGSI_SEMANTICS_SHORT[] =
+static const char *semantic_names[] =
 {
    "POSITION",
    "COLOR",
@@ -92,12 +90,12 @@ static const char *TGSI_SEMANTICS_SHORT[] =
    "NORMAL"
 };
 
-static const char *TGSI_IMMS_SHORT[] =
+static const char *immediate_type_names[] =
 {
    "FLT32"
 };
 
-static const char *TGSI_OPCODES_SHORT[TGSI_OPCODE_LAST] =
+static const char *opcode_names[TGSI_OPCODE_LAST] =
 {
    "ARL",
    "MOV",
@@ -220,7 +218,7 @@ static const char *TGSI_OPCODES_SHORT[TGSI_OPCODE_LAST] =
    "SWZ"
 };
 
-static const char *TGSI_SWIZZLES_SHORT[] =
+static const char *swizzle_names[] =
 {
    "x",
    "y",
@@ -228,7 +226,7 @@ static const char *TGSI_SWIZZLES_SHORT[] =
    "w"
 };
 
-static const char *TGSI_TEXTURES_SHORT[] =
+static const char *texture_names[] =
 {
    "UNKNOWN",
    "1D",
@@ -241,7 +239,7 @@ static const char *TGSI_TEXTURES_SHORT[] =
    "SHADOWRECT"
 };
 
-static const char *TGSI_EXTSWIZZLES_SHORT[] =
+static const char *extswizzle_names[] =
 {
    "x",
    "y",
@@ -251,7 +249,7 @@ static const char *TGSI_EXTSWIZZLES_SHORT[] =
    "1"
 };
 
-static const char *TGSI_MODULATES_SHORT[TGSI_MODULATE_COUNT] =
+static const char *modulate_names[TGSI_MODULATE_COUNT] =
 {
    "",
    "_2X",
@@ -262,40 +260,55 @@ static const char *TGSI_MODULATES_SHORT[TGSI_MODULATE_COUNT] =
    "_D8"
 };
 
-void
-tgsi_dump_declaration(
-   const struct tgsi_full_declaration *decl )
+static void
+_dump_register(
+   uint file,
+   uint first,
+   uint last )
 {
-   TXT( "\nDCL " );
-   ENM( decl->Declaration.File, TGSI_FILES_SHORT );
-
+   ENM( file, file_names );
    CHR( '[' );
-   UID( decl->DeclarationRange.First );
-   if (decl->DeclarationRange.First != decl->DeclarationRange.Last) {
+   UID( first );
+   if (first != last) {
       TXT( ".." );
-      UID( decl->DeclarationRange.Last );
+      UID( last );
    }
    CHR( ']' );
+}
 
-   if( decl->Declaration.UsageMask != TGSI_WRITEMASK_XYZW ) {
+static void
+_dump_writemask(
+   uint writemask )
+{
+   if (writemask != TGSI_WRITEMASK_XYZW) {
       CHR( '.' );
-      if( decl->Declaration.UsageMask & TGSI_WRITEMASK_X ) {
+      if (writemask & TGSI_WRITEMASK_X)
          CHR( 'x' );
-      }
-      if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Y ) {
+      if (writemask & TGSI_WRITEMASK_Y)
          CHR( 'y' );
-      }
-      if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Z ) {
+      if (writemask & TGSI_WRITEMASK_Z)
          CHR( 'z' );
-      }
-      if( decl->Declaration.UsageMask & TGSI_WRITEMASK_W ) {
+      if (writemask & TGSI_WRITEMASK_W)
          CHR( 'w' );
-      }
    }
+}
+
+void
+tgsi_dump_declaration(
+   const struct tgsi_full_declaration *decl )
+{
+   TXT( "\nDCL " );
+
+   _dump_register(
+      decl->Declaration.File,
+      decl->DeclarationRange.First,
+      decl->DeclarationRange.Last );
+   _dump_writemask(
+      decl->Declaration.UsageMask );
 
    if (decl->Declaration.Semantic) {
       TXT( ", " );
-      ENM( decl->Semantic.SemanticName, TGSI_SEMANTICS_SHORT );
+      ENM( decl->Semantic.SemanticName, semantic_names );
       if (decl->Semantic.SemanticIndex != 0 ||
           decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC) {
          CHR( '[' );
@@ -305,32 +318,30 @@ tgsi_dump_declaration(
    }
 
    TXT( ", " );
-   ENM( decl->Declaration.Interpolate, TGSI_INTERPOLATES_SHORT );
+   ENM( decl->Declaration.Interpolate, interpolate_names );
 }
 
 void
 tgsi_dump_immediate(
    const struct tgsi_full_immediate *imm )
 {
-   unsigned i;
+   uint i;
 
    TXT( "\nIMM " );
-   ENM( imm->Immediate.DataType, TGSI_IMMS_SHORT );
+   ENM( imm->Immediate.DataType, immediate_type_names );
 
    TXT( " { " );
-   for( i = 0; i < imm->Immediate.Size - 1; i++ ) {
-      switch( imm->Immediate.DataType ) {
+   for (i = 0; i < imm->Immediate.Size - 1; i++) {
+      switch (imm->Immediate.DataType) {
       case TGSI_IMM_FLOAT32:
          FLT( imm->u.ImmediateFloat32[i].Float );
          break;
-
       default:
          assert( 0 );
       }
 
-      if( i < imm->Immediate.Size - 2 ) {
+      if (i < imm->Immediate.Size - 2)
          TXT( ", " );
-      }
    }
    TXT( " }" );
 }
@@ -340,15 +351,15 @@ tgsi_dump_instruction(
    const struct tgsi_full_instruction *inst,
    uint instno )
 {
-   unsigned i;
-   boolean  first_reg = TRUE;
+   uint i;
+   boolean first_reg = TRUE;
 
    EOL();
    UID( instno );
    CHR( ':' );
-   ENM( inst->Instruction.Opcode, TGSI_OPCODES_SHORT );
+   ENM( inst->Instruction.Opcode, opcode_names );
 
-   switch( inst->Instruction.Saturate ) {
+   switch (inst->Instruction.Saturate) {
    case TGSI_SAT_NONE:
       break;
    case TGSI_SAT_ZERO_ONE:
@@ -361,47 +372,28 @@ tgsi_dump_instruction(
       assert( 0 );
    }
 
-   for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) {
+   for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
       const struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
 
-      if( !first_reg ) {
+      if (!first_reg)
          CHR( ',' );
-      }
       CHR( ' ' );
 
-      ENM( dst->DstRegister.File, TGSI_FILES_SHORT );
-
-      CHR( '[' );
-      SID( dst->DstRegister.Index );
-      CHR( ']' );
-
-      ENM( dst->DstRegisterExtModulate.Modulate, TGSI_MODULATES_SHORT );
-
-      if( dst->DstRegister.WriteMask != TGSI_WRITEMASK_XYZW ) {
-         CHR( '.' );
-         if( dst->DstRegister.WriteMask & TGSI_WRITEMASK_X ) {
-            CHR( 'x' );
-         }
-         if( dst->DstRegister.WriteMask & TGSI_WRITEMASK_Y ) {
-            CHR( 'y' );
-         }
-         if( dst->DstRegister.WriteMask & TGSI_WRITEMASK_Z ) {
-            CHR( 'z' );
-         }
-         if( dst->DstRegister.WriteMask & TGSI_WRITEMASK_W ) {
-            CHR( 'w' );
-         }
-      }
+      _dump_register(
+         dst->DstRegister.File,
+         dst->DstRegister.Index,
+         dst->DstRegister.Index );
+      ENM( dst->DstRegisterExtModulate.Modulate, modulate_names );
+      _dump_writemask( dst->DstRegister.WriteMask );
 
       first_reg = FALSE;
    }
 
-   for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) {
+   for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
       const struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i];
 
-      if( !first_reg ) {
+      if (!first_reg)
          CHR( ',' );
-      }
       CHR( ' ' );
 
       if (src->SrcRegisterExtMod.Negate)
@@ -417,40 +409,45 @@ tgsi_dump_instruction(
       if (src->SrcRegister.Negate)
          CHR( '-' );
 
-      ENM( src->SrcRegister.File, TGSI_FILES_SHORT );
-
-      CHR( '[' );
       if (src->SrcRegister.Indirect) {
+         /* TODO: Tidy up
+          */
+         ENM( src->SrcRegister.File, file_names );
+         CHR( '[' );
          TXT( "ADDR[0]" );
          if (src->SrcRegister.Index != 0) {
             if (src->SrcRegister.Index > 0)
                CHR( '+' );
             SID( src->SrcRegister.Index );
          }
+         CHR( ']' );
+      }
+      else {
+         _dump_register(
+            src->SrcRegister.File,
+            src->SrcRegister.Index,
+            src->SrcRegister.Index );
       }
-      else
-         SID( src->SrcRegister.Index );
-      CHR( ']' );
 
       if (src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X ||
           src->SrcRegister.SwizzleY != TGSI_SWIZZLE_Y ||
           src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z ||
           src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W) {
          CHR( '.' );
-         ENM( src->SrcRegister.SwizzleX, TGSI_SWIZZLES_SHORT );
-         ENM( src->SrcRegister.SwizzleY, TGSI_SWIZZLES_SHORT );
-         ENM( src->SrcRegister.SwizzleZ, TGSI_SWIZZLES_SHORT );
-         ENM( src->SrcRegister.SwizzleW, TGSI_SWIZZLES_SHORT );
+         ENM( src->SrcRegister.SwizzleX, swizzle_names );
+         ENM( src->SrcRegister.SwizzleY, swizzle_names );
+         ENM( src->SrcRegister.SwizzleZ, swizzle_names );
+         ENM( src->SrcRegister.SwizzleW, swizzle_names );
       }
       if (src->SrcRegisterExtSwz.ExtSwizzleX != TGSI_EXTSWIZZLE_X ||
           src->SrcRegisterExtSwz.ExtSwizzleY != TGSI_EXTSWIZZLE_Y ||
           src->SrcRegisterExtSwz.ExtSwizzleZ != TGSI_EXTSWIZZLE_Z ||
           src->SrcRegisterExtSwz.ExtSwizzleW != TGSI_EXTSWIZZLE_W) {
          CHR( '.' );
-         ENM( src->SrcRegisterExtSwz.ExtSwizzleX, TGSI_EXTSWIZZLES_SHORT );
-         ENM( src->SrcRegisterExtSwz.ExtSwizzleY, TGSI_EXTSWIZZLES_SHORT );
-         ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, TGSI_EXTSWIZZLES_SHORT );
-         ENM( src->SrcRegisterExtSwz.ExtSwizzleW, TGSI_EXTSWIZZLES_SHORT );
+         ENM( src->SrcRegisterExtSwz.ExtSwizzleX, extswizzle_names );
+         ENM( src->SrcRegisterExtSwz.ExtSwizzleY, extswizzle_names );
+         ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, extswizzle_names );
+         ENM( src->SrcRegisterExtSwz.ExtSwizzleW, extswizzle_names );
       }
 
       if (src->SrcRegisterExtMod.Complement)
@@ -469,10 +466,10 @@ tgsi_dump_instruction(
 
    if (inst->InstructionExtTexture.Texture != TGSI_TEXTURE_UNKNOWN) {
       TXT( ", " );
-      ENM( inst->InstructionExtTexture.Texture, TGSI_TEXTURES_SHORT );
+      ENM( inst->InstructionExtTexture.Texture, texture_names );
    }
 
-   switch( inst->Instruction.Opcode ) {
+   switch (inst->Instruction.Opcode) {
    case TGSI_OPCODE_IF:
    case TGSI_OPCODE_ELSE:
    case TGSI_OPCODE_BGNLOOP2:
@@ -490,47 +487,35 @@ tgsi_dump(
    uint flags )
 {
    struct tgsi_parse_context parse;
-   struct tgsi_full_instruction fi;
-   struct tgsi_full_declaration fd;
-   unsigned instno = 0;
+   uint instno = 0;
 
    /* sanity checks */
-   assert(strcmp(TGSI_OPCODES_SHORT[TGSI_OPCODE_CONT], "OPCODE_CONT") == 0);
-   assert(strcmp(TGSI_OPCODES_SHORT[TGSI_OPCODE_END], "OPCODE_END") == 0);
-   assert(strcmp(TGSI_OPCODES_SHORT[TGSI_OPCODE_END], "END") == 0);
+   assert( strcmp( opcode_names[TGSI_OPCODE_CONT], "CONT" ) == 0 );
+   assert( strcmp( opcode_names[TGSI_OPCODE_END], "END" ) == 0 );
+   assert( strcmp( opcode_names[TGSI_OPCODE_END], "END" ) == 0 );
 
    tgsi_parse_init( &parse, tokens );
 
    EOL();
-   ENM( parse.FullHeader.Processor.Processor, TGSI_PROCESSOR_TYPES_SHORT );
+   ENM( parse.FullHeader.Processor.Processor, processor_type_names );
    UID( parse.FullVersion.Version.MajorVersion );
    CHR( '.' );
    UID( parse.FullVersion.Version.MinorVersion );
 
-   fi = tgsi_default_full_instruction();
-   fd = tgsi_default_full_declaration();
-
-   while( !tgsi_parse_end_of_tokens( &parse ) ) {
+   while (!tgsi_parse_end_of_tokens( &parse )) {
       tgsi_parse_token( &parse );
 
-      switch( parse.FullToken.Token.Type ) {
+      switch (parse.FullToken.Token.Type) {
       case TGSI_TOKEN_TYPE_DECLARATION:
-         tgsi_dump_declaration(
-            &parse.FullToken.FullDeclaration );
+         tgsi_dump_declaration( &parse.FullToken.FullDeclaration );
          break;
-
       case TGSI_TOKEN_TYPE_IMMEDIATE:
-         tgsi_dump_immediate(
-            &parse.FullToken.FullImmediate );
+         tgsi_dump_immediate( &parse.FullToken.FullImmediate );
          break;
-
       case TGSI_TOKEN_TYPE_INSTRUCTION:
-         tgsi_dump_instruction(
-            &parse.FullToken.FullInstruction,
-            instno );
+         tgsi_dump_instruction( &parse.FullToken.FullInstruction, instno );
          instno++;
          break;
-
       default:
          assert( 0 );
       }
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c
index acb1246b7a1..e7d1079588a 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c
@@ -773,7 +773,7 @@ tgsi_dump_c(
    /* sanity checks */
    assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_CONT], "OPCODE_CONT") == 0);
    assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_END], "OPCODE_END") == 0);
-   assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_END], "END") == 0);
+   assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_END], "OPCODE_END") == 0);
 
    tgsi_parse_init( &parse, tokens );
 
-- 
cgit v1.2.3


From fef7f2b070ab797f78ebc210bb40a24685c6d4b7 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Sun, 20 Jul 2008 16:42:43 +0200
Subject: tgsi: Fix error handling.

---
 src/gallium/auxiliary/tgsi/util/tgsi_sanity.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c
index 7fc4c29c685..a4edbb6d08f 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c
@@ -263,7 +263,7 @@ tgsi_sanity_check(
    ctx.errors = 0;
    ctx.warnings = 0;
 
-   if (tgsi_iterate_shader( tokens, &ctx.iter ) == -1)
+   if (!tgsi_iterate_shader( tokens, &ctx.iter ))
       return FALSE;
 
    return ctx.errors == 0;
-- 
cgit v1.2.3


From 57482e1549e131fa1aebd442a677a95909b22508 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Sun, 20 Jul 2008 16:42:57 +0200
Subject: tgsi: Keep version and processor info in iterate_ctx.

---
 src/gallium/auxiliary/tgsi/util/tgsi_iterate.c | 3 +++
 src/gallium/auxiliary/tgsi/util/tgsi_iterate.h | 3 +++
 2 files changed, 6 insertions(+)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_iterate.c b/src/gallium/auxiliary/tgsi/util/tgsi_iterate.c
index 43d7a6b1d59..5371a88b964 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_iterate.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_iterate.c
@@ -38,6 +38,9 @@ tgsi_iterate_shader(
    if (tgsi_parse_init( &parse, tokens ) != TGSI_PARSE_OK)
       return FALSE;
 
+   ctx->processor = parse.FullHeader.Processor;
+   ctx->version = parse.FullVersion.Version;
+
    if (ctx->prolog)
       if (!ctx->prolog( ctx ))
          goto fail;
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_iterate.h b/src/gallium/auxiliary/tgsi/util/tgsi_iterate.h
index 18729c2d1a8..f5bebf89b82 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_iterate.h
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_iterate.h
@@ -59,6 +59,9 @@ struct tgsi_iterate_context
    boolean
    (* epilog)(
       struct tgsi_iterate_context *ctx );
+
+   struct tgsi_processor processor;
+   struct tgsi_version version;
 };
 
 boolean
-- 
cgit v1.2.3


From 83f245bd242cd2c5f59f072095dcc47aa6153b21 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Sun, 20 Jul 2008 16:43:26 +0200
Subject: tgsi: Use tgsi_iterate in tgsi_dump.

---
 src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 89 ++++++++++++++++++-----------
 1 file changed, 57 insertions(+), 32 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
index fe8742dc02e..08d0d5200c8 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
@@ -26,11 +26,15 @@
  **************************************************************************/
 
 #include "pipe/p_debug.h"
-#include "pipe/p_util.h"
-#include "util/u_string.h"
 #include "tgsi_dump.h"
-#include "tgsi_parse.h"
-#include "tgsi_build.h"
+#include "tgsi_iterate.h"
+
+struct dump_ctx
+{
+   struct tgsi_iterate_context iter;
+
+   uint instno;
+};
 
 static void
 dump_enum(
@@ -321,6 +325,15 @@ tgsi_dump_declaration(
    ENM( decl->Declaration.Interpolate, interpolate_names );
 }
 
+static boolean
+iter_declaration(
+   struct tgsi_iterate_context *iter,
+   struct tgsi_full_declaration *decl )
+{
+   tgsi_dump_declaration( decl );
+   return TRUE;
+}
+
 void
 tgsi_dump_immediate(
    const struct tgsi_full_immediate *imm )
@@ -346,6 +359,15 @@ tgsi_dump_immediate(
    TXT( " }" );
 }
 
+static boolean
+iter_immediate(
+   struct tgsi_iterate_context *iter,
+   struct tgsi_full_immediate *imm )
+{
+   tgsi_dump_immediate( imm );
+   return TRUE;
+}
+
 void
 tgsi_dump_instruction(
    const struct tgsi_full_instruction *inst,
@@ -481,45 +503,48 @@ tgsi_dump_instruction(
    }
 }
 
+static boolean
+iter_instruction(
+   struct tgsi_iterate_context *iter,
+   struct tgsi_full_instruction *inst )
+{
+   struct dump_ctx *ctx = (struct dump_ctx *) iter;
+
+   tgsi_dump_instruction( inst, ctx->instno++ );
+   return TRUE;
+}
+
+static boolean
+prolog(
+   struct tgsi_iterate_context *ctx )
+{
+   EOL();
+   ENM( ctx->processor.Processor, processor_type_names );
+   UID( ctx->version.MajorVersion );
+   CHR( '.' );
+   UID( ctx->version.MinorVersion );
+   return TRUE;
+}
+
 void
 tgsi_dump(
    const struct tgsi_token *tokens,
    uint flags )
 {
-   struct tgsi_parse_context parse;
-   uint instno = 0;
+   struct dump_ctx ctx;
 
    /* sanity checks */
    assert( strcmp( opcode_names[TGSI_OPCODE_CONT], "CONT" ) == 0 );
    assert( strcmp( opcode_names[TGSI_OPCODE_END], "END" ) == 0 );
    assert( strcmp( opcode_names[TGSI_OPCODE_END], "END" ) == 0 );
 
-   tgsi_parse_init( &parse, tokens );
-
-   EOL();
-   ENM( parse.FullHeader.Processor.Processor, processor_type_names );
-   UID( parse.FullVersion.Version.MajorVersion );
-   CHR( '.' );
-   UID( parse.FullVersion.Version.MinorVersion );
-
-   while (!tgsi_parse_end_of_tokens( &parse )) {
-      tgsi_parse_token( &parse );
+   ctx.iter.prolog = prolog;
+   ctx.iter.iterate_instruction = iter_instruction;
+   ctx.iter.iterate_declaration = iter_declaration;
+   ctx.iter.iterate_immediate = iter_immediate;
+   ctx.iter.epilog = NULL;
 
-      switch (parse.FullToken.Token.Type) {
-      case TGSI_TOKEN_TYPE_DECLARATION:
-         tgsi_dump_declaration( &parse.FullToken.FullDeclaration );
-         break;
-      case TGSI_TOKEN_TYPE_IMMEDIATE:
-         tgsi_dump_immediate( &parse.FullToken.FullImmediate );
-         break;
-      case TGSI_TOKEN_TYPE_INSTRUCTION:
-         tgsi_dump_instruction( &parse.FullToken.FullInstruction, instno );
-         instno++;
-         break;
-      default:
-         assert( 0 );
-      }
-   }
+   ctx.instno = 0;
 
-   tgsi_parse_free( &parse );
+   tgsi_iterate_shader( tokens, &ctx.iter );
 }
-- 
cgit v1.2.3


From 613f0df64dd2c2db71dd73b595225016ae596576 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Sun, 20 Jul 2008 19:29:52 +0200
Subject: tgsi: Remove redundant code.

---
 src/gallium/auxiliary/tgsi/util/tgsi_dump.c   | 1 -
 src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c | 1 -
 2 files changed, 2 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
index 08d0d5200c8..a9d500c8cf7 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
@@ -536,7 +536,6 @@ tgsi_dump(
    /* sanity checks */
    assert( strcmp( opcode_names[TGSI_OPCODE_CONT], "CONT" ) == 0 );
    assert( strcmp( opcode_names[TGSI_OPCODE_END], "END" ) == 0 );
-   assert( strcmp( opcode_names[TGSI_OPCODE_END], "END" ) == 0 );
 
    ctx.iter.prolog = prolog;
    ctx.iter.iterate_instruction = iter_instruction;
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c
index e7d1079588a..eabd74bd6d9 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c
@@ -773,7 +773,6 @@ tgsi_dump_c(
    /* sanity checks */
    assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_CONT], "OPCODE_CONT") == 0);
    assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_END], "OPCODE_END") == 0);
-   assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_END], "OPCODE_END") == 0);
 
    tgsi_parse_init( &parse, tokens );
 
-- 
cgit v1.2.3


From 73e1d0be756537376495547bc1e798805884b8ef Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Sun, 20 Jul 2008 21:23:04 +0200
Subject: tgsi: Add support for indirect addressing in dump, sanity and text
 modules.

---
 src/gallium/auxiliary/tgsi/util/tgsi_dump.c   |  53 +++++++---
 src/gallium/auxiliary/tgsi/util/tgsi_sanity.c |  86 ++++++++++-----
 src/gallium/auxiliary/tgsi/util/tgsi_text.c   | 147 ++++++++++++++++++++------
 3 files changed, 215 insertions(+), 71 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
index a9d500c8cf7..94180f7e507 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
@@ -265,17 +265,48 @@ static const char *modulate_names[TGSI_MODULATE_COUNT] =
 };
 
 static void
-_dump_register(
+_dump_register_prefix(
    uint file,
    uint first,
    uint last )
 {
+   
+   
+}
+
+static void
+_dump_register(
+   uint file,
+   int first,
+   int last )
+{
    ENM( file, file_names );
    CHR( '[' );
-   UID( first );
+   SID( first );
    if (first != last) {
       TXT( ".." );
-      UID( last );
+      SID( last );
+   }
+   CHR( ']' );
+}
+
+static void
+_dump_register_ind(
+   uint file,
+   int index,
+   uint ind_file,
+   int ind_index )
+{
+   ENM( file, file_names );
+   CHR( '[' );
+   ENM( ind_file, file_names );
+   CHR( '[' );
+   SID( ind_index );
+   CHR( ']' );
+   if (index != 0) {
+      if (index > 0)
+         CHR( '+' );
+      SID( index );
    }
    CHR( ']' );
 }
@@ -432,17 +463,11 @@ tgsi_dump_instruction(
          CHR( '-' );
 
       if (src->SrcRegister.Indirect) {
-         /* TODO: Tidy up
-          */
-         ENM( src->SrcRegister.File, file_names );
-         CHR( '[' );
-         TXT( "ADDR[0]" );
-         if (src->SrcRegister.Index != 0) {
-            if (src->SrcRegister.Index > 0)
-               CHR( '+' );
-            SID( src->SrcRegister.Index );
-         }
-         CHR( ']' );
+         _dump_register_ind(
+            src->SrcRegister.File,
+            src->SrcRegister.Index,
+            src->SrcRegisterInd.File,
+            src->SrcRegisterInd.Index );
       }
       else {
          _dump_register(
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c
index a4edbb6d08f..f11de815b06 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c
@@ -39,8 +39,9 @@ struct sanity_check_ctx
 {
    struct tgsi_iterate_context iter;
 
-   reg_flag regs_decl[TGSI_FILE_COUNT][MAX_REGISTERS / sizeof( uint ) / 8];
-   reg_flag regs_used[TGSI_FILE_COUNT][MAX_REGISTERS / sizeof( uint ) / 8];
+   reg_flag regs_decl[TGSI_FILE_COUNT][MAX_REGISTERS / BITS_IN_REG_FLAG];
+   reg_flag regs_used[TGSI_FILE_COUNT][MAX_REGISTERS / BITS_IN_REG_FLAG];
+   boolean regs_ind_used[TGSI_FILE_COUNT];
    uint num_imms;
    uint num_instructions;
    uint index_of_END;
@@ -83,24 +84,59 @@ static boolean
 is_register_declared(
    struct sanity_check_ctx *ctx,
    uint file,
-   uint index )
+   int index )
 {
-   assert( index < MAX_REGISTERS );
+   assert( index >= 0 && index < MAX_REGISTERS );
 
    return (ctx->regs_decl[file][index / BITS_IN_REG_FLAG] & (1 << (index % BITS_IN_REG_FLAG))) ? TRUE : FALSE;
 }
 
+static boolean
+is_any_register_declared(
+   struct sanity_check_ctx *ctx,
+   uint file )
+{
+   uint i;
+
+   for (i = 0; i < MAX_REGISTERS / BITS_IN_REG_FLAG; i++)
+      if (ctx->regs_decl[file][i])
+         return TRUE;
+   return FALSE;
+}
+
 static boolean
 is_register_used(
    struct sanity_check_ctx *ctx,
    uint file,
-   uint index )
+   int index )
 {
    assert( index < MAX_REGISTERS );
 
    return (ctx->regs_used[file][index / BITS_IN_REG_FLAG] & (1 << (index % BITS_IN_REG_FLAG))) ? TRUE : FALSE;
 }
 
+static boolean
+check_register_usage(
+   struct sanity_check_ctx *ctx,
+   uint file,
+   int index,
+   boolean indirect )
+{
+   if (!check_file_name( ctx, file ))
+      return FALSE;
+   if (indirect) {
+      if (!is_any_register_declared( ctx, file ))
+         report_error( ctx, "Undeclared source register" );
+      ctx->regs_ind_used[file] = TRUE;
+   }
+   else {
+      if (!is_register_declared( ctx, file, index ))
+         report_error( ctx, "Undeclared destination register" );
+      ctx->regs_used[file][index / BITS_IN_REG_FLAG] |= (1 << (index % BITS_IN_REG_FLAG));
+   }
+   return TRUE;
+}
+
 static boolean
 iter_instruction(
    struct tgsi_iterate_context *iter,
@@ -122,28 +158,25 @@ iter_instruction(
     * Mark the registers as used.
     */
    for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
-      uint file;
-      uint index;
-
-      file = inst->FullDstRegisters[i].DstRegister.File;
-      if (!check_file_name( ctx, file ))
-         return TRUE;
-      index = inst->FullDstRegisters[i].DstRegister.Index;
-      if (!is_register_declared( ctx, file, index ))
-         report_error( ctx, "Undeclared destination register" );
-      ctx->regs_used[file][index / BITS_IN_REG_FLAG] |= (1 << (index % BITS_IN_REG_FLAG));
+      check_register_usage(
+         ctx,
+         inst->FullDstRegisters[i].DstRegister.File,
+         inst->FullDstRegisters[i].DstRegister.Index,
+         FALSE );
    }
    for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
-      uint file;
-      uint index;
-
-      file = inst->FullSrcRegisters[i].SrcRegister.File;
-      if (!check_file_name( ctx, file ))
-         return TRUE;
-      index = inst->FullSrcRegisters[i].SrcRegister.Index;
-      if (!is_register_declared( ctx, file, index ))
-         report_error( ctx, "Undeclared source register" );
-      ctx->regs_used[file][index / BITS_IN_REG_FLAG] |= (1 << (index % BITS_IN_REG_FLAG));
+      check_register_usage(
+         ctx,
+         inst->FullSrcRegisters[i].SrcRegister.File,
+         inst->FullSrcRegisters[i].SrcRegister.Index,
+         inst->FullSrcRegisters[i].SrcRegister.Indirect );
+      if (inst->FullSrcRegisters[i].SrcRegister.Indirect) {
+         check_register_usage(
+            ctx,
+            inst->FullSrcRegisters[i].SrcRegisterInd.File,
+            inst->FullSrcRegisters[i].SrcRegisterInd.Index,
+            FALSE );
+      }
    }
 
    ctx->num_instructions++;
@@ -228,7 +261,7 @@ epilog(
       uint i;
 
       for (i = 0; i < MAX_REGISTERS; i++) {
-         if (is_register_declared( ctx, file, i ) && !is_register_used( ctx, file, i )) {
+         if (is_register_declared( ctx, file, i ) && !is_register_used( ctx, file, i ) && !ctx->regs_ind_used[file]) {
             report_warning( ctx, "Register never used" );
          }
       }
@@ -256,6 +289,7 @@ tgsi_sanity_check(
 
    memset( ctx.regs_decl, 0, sizeof( ctx.regs_decl ) );
    memset( ctx.regs_used, 0, sizeof( ctx.regs_used ) );
+   memset( ctx.regs_ind_used, 0, sizeof( ctx.regs_ind_used ) );
    ctx.num_imms = 0;
    ctx.num_instructions = 0;
    ctx.index_of_END = ~0;
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_text.c b/src/gallium/auxiliary/tgsi/util/tgsi_text.c
index 9ed0f52fc7c..2a503154630 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_text.c
@@ -226,24 +226,21 @@ static const char *file_names[TGSI_FILE_COUNT] =
 };
 
 static boolean
-parse_file(
-   struct translate_ctx *ctx,
-   uint *file )
+parse_file( const char **pcur, uint *file )
 {
    uint i;
 
    for (i = 0; i < TGSI_FILE_COUNT; i++) {
-      const char *cur = ctx->cur;
+      const char *cur = *pcur;
 
       if (str_match_no_case( &cur, file_names[i] )) {
          if (!is_digit_alpha_underscore( cur )) {
-            ctx->cur = cur;
+            *pcur = cur;
             *file = i;
             return TRUE;
          }
       }
    }
-   report_error( ctx, "Unknown register file" );
    return FALSE;
 }
 
@@ -290,41 +287,57 @@ parse_opt_writemask(
    return TRUE;
 }
 
-/* Parse register part common for decls and operands.
- *    <register_prefix> ::= <file> `[' <index>
+/* <register_file_bracket> ::= <file> `['
  */
 static boolean
-parse_register_prefix(
+parse_register_file_bracket(
    struct translate_ctx *ctx,
-   uint *file,
-   uint *index )
+   uint *file )
 {
-   if (!parse_file( ctx, file ))
+   if (!parse_file( &ctx->cur, file )) {
+      report_error( ctx, "Unknown register file" );
       return FALSE;
+   }
    eat_opt_white( &ctx->cur );
    if (*ctx->cur != '[') {
       report_error( ctx, "Expected `['" );
       return FALSE;
    }
    ctx->cur++;
+   return TRUE;
+}
+
+/* <register_file_bracket_index> ::= <register_file_bracket> <uint>
+ */
+static boolean
+parse_register_file_bracket_index(
+   struct translate_ctx *ctx,
+   uint *file,
+   int *index )
+{
+   uint uindex;
+
+   if (!parse_register_file_bracket( ctx, file ))
+      return FALSE;
    eat_opt_white( &ctx->cur );
-   if (!parse_uint( &ctx->cur, index )) {
-      report_error( ctx, "Expected literal integer" );
+   if (!parse_uint( &ctx->cur, &uindex )) {
+      report_error( ctx, "Expected literal unsigned integer" );
       return FALSE;
    }
+   *index = (int) uindex;
    return TRUE;
 }
 
-/* Parse register operand.
- *    <register> ::= <register_prefix> `]'
+/* Parse destination register operand.
+ *    <register_dst> ::= <register_file_bracket_index> `]'
  */
 static boolean
-parse_register(
+parse_register_dst(
    struct translate_ctx *ctx,
    uint *file,
-   uint *index )
+   int *index )
 {
-   if (!parse_register_prefix( ctx, file, index ))
+   if (!parse_register_file_bracket_index( ctx, file, index ))
       return FALSE;
    eat_opt_white( &ctx->cur );
    if (*ctx->cur != ']') {
@@ -332,30 +345,95 @@ parse_register(
       return FALSE;
    }
    ctx->cur++;
-   /* TODO: Modulate suffix */
+   return TRUE;
+}
+
+/* Parse source register operand.
+ *    <register_src> ::= <register_file_bracket_index> `]' |
+ *                       <register_file_bracket> <register_dst> `]' |
+ *                       <register_file_bracket> <register_dst> `+' <uint> `]' |
+ *                       <register_file_bracket> <register_dst> `-' <uint> `]'
+ */
+static boolean
+parse_register_src(
+   struct translate_ctx *ctx,
+   uint *file,
+   int *index,
+   uint *ind_file,
+   int *ind_index )
+{
+   const char *cur;
+   uint uindex;
+
+   if (!parse_register_file_bracket( ctx, file ))
+      return FALSE;
+   eat_opt_white( &ctx->cur );
+   cur = ctx->cur;
+   if (parse_file( &cur, ind_file )) {
+      if (!parse_register_dst( ctx, ind_file, ind_index ))
+         return FALSE;
+      eat_opt_white( &ctx->cur );
+      if (*ctx->cur == '+' || *ctx->cur == '-') {
+         boolean negate;
+
+         negate = *ctx->cur == '-';
+         ctx->cur++;
+         eat_opt_white( &ctx->cur );
+         if (!parse_uint( &ctx->cur, &uindex )) {
+            report_error( ctx, "Expected literal unsigned integer" );
+            return FALSE;
+         }
+         if (negate)
+            *index = -(int) uindex;
+         else
+            *index = (int) uindex;
+      }
+      else {
+         *index = 0;
+      }
+   }
+   else {
+      if (!parse_uint( &ctx->cur, &uindex )) {
+         report_error( ctx, "Expected literal unsigned integer" );
+         return FALSE;
+      }
+      *index = (int) uindex;
+      *ind_file = TGSI_FILE_NULL;
+      *ind_index = 0;
+   }
+   eat_opt_white( &ctx->cur );
+   if (*ctx->cur != ']') {
+      report_error( ctx, "Expected `]'" );
+      return FALSE;
+   }
+   ctx->cur++;
    return TRUE;
 }
 
 /* Parse register declaration.
- *    <register> ::= <register_prefix> `]' | <register_prefix> `..' <index> `]'
+ *    <register_dcl> ::= <register_file_bracket_index> `]' |
+ *                       <register_file_bracket_index> `..' <index> `]'
  */
 static boolean
 parse_register_dcl(
    struct translate_ctx *ctx,
    uint *file,
-   uint *first,
-   uint *last )
+   int *first,
+   int *last )
 {
-   if (!parse_register_prefix( ctx, file, first ))
+   if (!parse_register_file_bracket_index( ctx, file, first ))
       return FALSE;
    eat_opt_white( &ctx->cur );
    if (ctx->cur[0] == '.' && ctx->cur[1] == '.') {
+      uint uindex;
+
       ctx->cur += 2;
       eat_opt_white( &ctx->cur );
-      if (!parse_uint( &ctx->cur, last )) {
+      if (!parse_uint( &ctx->cur, &uindex )) {
          report_error( ctx, "Expected literal integer" );
          return FALSE;
       }
+      *last = (int) uindex;
       eat_opt_white( &ctx->cur );
    }
    else {
@@ -386,11 +464,11 @@ parse_dst_operand(
    struct tgsi_full_dst_register *dst )
 {
    uint file;
-   uint index;
+   int index;
    uint writemask;
    const char *cur;
 
-   if (!parse_register( ctx, &file, &index ))
+   if (!parse_register_dst( ctx, &file, &index ))
       return FALSE;
 
    cur = ctx->cur;
@@ -426,7 +504,9 @@ parse_src_operand(
    const char *cur;
    float value;
    uint file;
-   uint index;
+   int index;
+   uint ind_file;
+   int ind_index;
 
    if (*ctx->cur == '-') {
       cur = ctx->cur;
@@ -498,10 +578,15 @@ parse_src_operand(
       }
    }
 
-   if (!parse_register( ctx, &file, &index ))
+   if (!parse_register_src( ctx, &file, &index, &ind_file, &ind_index ))
       return FALSE;
    src->SrcRegister.File = file;
    src->SrcRegister.Index = index;
+   if (ind_file != TGSI_FILE_NULL) {
+      src->SrcRegister.Indirect = 1;
+      src->SrcRegisterInd.File = ind_file;
+      src->SrcRegisterInd.Index = ind_index;
+   }
 
    /* Parse optional swizzle
     */
@@ -864,8 +949,8 @@ static boolean parse_declaration( struct translate_ctx *ctx )
 {
    struct tgsi_full_declaration decl;
    uint file;
-   uint first;
-   uint last;
+   int first;
+   int last;
    uint writemask;
    const char *cur;
    uint advance;
-- 
cgit v1.2.3


From 25a7f422b4e307dce966220d47794fb056d04aac Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Sun, 20 Jul 2008 21:28:28 +0200
Subject: tgsi: Warn if an indirect register not ADDR[0].

---
 src/gallium/auxiliary/tgsi/util/tgsi_sanity.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c
index f11de815b06..08f3995b13e 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c
@@ -171,11 +171,14 @@ iter_instruction(
          inst->FullSrcRegisters[i].SrcRegister.Index,
          inst->FullSrcRegisters[i].SrcRegister.Indirect );
       if (inst->FullSrcRegisters[i].SrcRegister.Indirect) {
-         check_register_usage(
-            ctx,
-            inst->FullSrcRegisters[i].SrcRegisterInd.File,
-            inst->FullSrcRegisters[i].SrcRegisterInd.Index,
-            FALSE );
+         uint file;
+         int index;
+
+         file = inst->FullSrcRegisters[i].SrcRegisterInd.File;
+         index = inst->FullSrcRegisters[i].SrcRegisterInd.Index;
+         check_register_usage( ctx, file, index, FALSE );
+         if (file != TGSI_FILE_ADDRESS || index != 0)
+            report_warning( ctx, "Indirect register not ADDR[0]" );
       }
    }
 
-- 
cgit v1.2.3


From 9d068d4b90813d5afa243d669e8437b8922ac656 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Sun, 20 Jul 2008 21:38:20 +0200
Subject: tgsi: Add support for branch instructions with target labels.

---
 src/gallium/auxiliary/tgsi/util/tgsi_text.c | 256 +++++++++++++++-------------
 1 file changed, 137 insertions(+), 119 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_text.c b/src/gallium/auxiliary/tgsi/util/tgsi_text.c
index 2a503154630..442016104b8 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_text.c
@@ -687,130 +687,131 @@ struct opcode_info
    uint num_dst;
    uint num_src;
    uint is_tex;
+   uint is_branch;
    const char *mnemonic;
 };
 
 static const struct opcode_info opcode_info[TGSI_OPCODE_LAST] =
 {
-   { 1, 1, 0, "ARL" },
-   { 1, 1, 0, "MOV" },
-   { 1, 1, 0, "LIT" },
-   { 1, 1, 0, "RCP" },
-   { 1, 1, 0, "RSQ" },
-   { 1, 1, 0, "EXP" },
-   { 1, 1, 0, "LOG" },
-   { 1, 2, 0, "MUL" },
-   { 1, 2, 0, "ADD" },
-   { 1, 2, 0, "DP3" },
-   { 1, 2, 0, "DP4" },
-   { 1, 2, 0, "DST" },
-   { 1, 2, 0, "MIN" },
-   { 1, 2, 0, "MAX" },
-   { 1, 2, 0, "SLT" },
-   { 1, 2, 0, "SGE" },
-   { 1, 3, 0, "MAD" },
-   { 1, 2, 0, "SUB" },
-   { 1, 3, 0, "LERP" },
-   { 1, 3, 0, "CND" },
-   { 1, 3, 0, "CND0" },
-   { 1, 3, 0, "DOT2ADD" },
-   { 1, 2, 0, "INDEX" },
-   { 1, 1, 0, "NEGATE" },
-   { 1, 1, 0, "FRAC" },
-   { 1, 3, 0, "CLAMP" },
-   { 1, 1, 0, "FLOOR" },
-   { 1, 1, 0, "ROUND" },
-   { 1, 1, 0, "EXPBASE2" },
-   { 1, 1, 0, "LOGBASE2" },
-   { 1, 2, 0, "POWER" },
-   { 1, 2, 0, "CROSSPRODUCT" },
-   { 1, 2, 0, "MULTIPLYMATRIX" },
-   { 1, 1, 0, "ABS" },
-   { 1, 1, 0, "RCC" },
-   { 1, 2, 0, "DPH" },
-   { 1, 1, 0, "COS" },
-   { 1, 1, 0, "DDX" },
-   { 1, 1, 0, "DDY" },
-   { 0, 1, 0, "KILP" },
-   { 1, 1, 0, "PK2H" },
-   { 1, 1, 0, "PK2US" },
-   { 1, 1, 0, "PK4B" },
-   { 1, 1, 0, "PK4UB" },
-   { 1, 2, 0, "RFL" },
-   { 1, 2, 0, "SEQ" },
-   { 1, 2, 0, "SFL" },
-   { 1, 2, 0, "SGT" },
-   { 1, 1, 0, "SIN" },
-   { 1, 2, 0, "SLE" },
-   { 1, 2, 0, "SNE" },
-   { 1, 2, 0, "STR" },
-   { 1, 2, 1, "TEX" },
-   { 1, 4, 1, "TXD" },
-   { 1, 2, 1, "TXP" },
-   { 1, 1, 0, "UP2H" },
-   { 1, 1, 0, "UP2US" },
-   { 1, 1, 0, "UP4B" },
-   { 1, 1, 0, "UP4UB" },
-   { 1, 3, 0, "X2D" },
-   { 1, 1, 0, "ARA" },
-   { 1, 1, 0, "ARR" },
-   { 0, 1, 0, "BRA" },
-   { 0, 0, 0, "CAL" },
-   { 0, 0, 0, "RET" },
-   { 1, 1, 0, "SSG" },
-   { 1, 3, 0, "CMP" },
-   { 1, 1, 0, "SCS" },
-   { 1, 2, 1, "TXB" },
-   { 1, 1, 0, "NRM" },
-   { 1, 2, 0, "DIV" },
-   { 1, 2, 0, "DP2" },
-   { 1, 2, 1, "TXL" },
-   { 0, 0, 0, "BRK" },
-   { 0, 1, 0, "IF" },
-   { 0, 0, 0, "LOOP" },
-   { 0, 1, 0, "REP" },
-   { 0, 0, 0, "ELSE" },
-   { 0, 0, 0, "ENDIF" },
-   { 0, 0, 0, "ENDLOOP" },
-   { 0, 0, 0, "ENDREP" },
-   { 0, 1, 0, "PUSHA" },
-   { 1, 0, 0, "POPA" },
-   { 1, 1, 0, "CEIL" },
-   { 1, 1, 0, "I2F" },
-   { 1, 1, 0, "NOT" },
-   { 1, 1, 0, "TRUNC" },
-   { 1, 2, 0, "SHL" },
-   { 1, 2, 0, "SHR" },
-   { 1, 2, 0, "AND" },
-   { 1, 2, 0, "OR" },
-   { 1, 2, 0, "MOD" },
-   { 1, 2, 0, "XOR" },
-   { 1, 3, 0, "SAD" },
-   { 1, 2, 1, "TXF" },
-   { 1, 2, 1, "TXQ" },
-   { 0, 0, 0, "CONT" },
-   { 0, 0, 0, "EMIT" },
-   { 0, 0, 0, "ENDPRIM" },
-   { 0, 0, 0, "BGNLOOP2" },
-   { 0, 0, 0, "BGNSUB" },
-   { 0, 0, 0, "ENDLOOP2" },
-   { 0, 0, 0, "ENDSUB" },
-   { 1, 1, 0, "NOISE1" },
-   { 1, 1, 0, "NOISE2" },
-   { 1, 1, 0, "NOISE3" },
-   { 1, 1, 0, "NOISE4" },
-   { 0, 0, 0, "NOP" },
-   { 1, 2, 0, "M4X3" },
-   { 1, 2, 0, "M3X4" },
-   { 1, 2, 0, "M3X3" },
-   { 1, 2, 0, "M3X2" },
-   { 1, 1, 0, "NRM4" },
-   { 0, 1, 0, "CALLNZ" },
-   { 0, 1, 0, "IFC" },
-   { 0, 1, 0, "BREAKC" },
-   { 0, 0, 0, "KIL" },
-   { 0, 0, 0, "END" },
-   { 1, 1, 0, "SWZ" }
+   { 1, 1, 0, 0, "ARL" },
+   { 1, 1, 0, 0, "MOV" },
+   { 1, 1, 0, 0, "LIT" },
+   { 1, 1, 0, 0, "RCP" },
+   { 1, 1, 0, 0, "RSQ" },
+   { 1, 1, 0, 0, "EXP" },
+   { 1, 1, 0, 0, "LOG" },
+   { 1, 2, 0, 0, "MUL" },
+   { 1, 2, 0, 0, "ADD" },
+   { 1, 2, 0, 0, "DP3" },
+   { 1, 2, 0, 0, "DP4" },
+   { 1, 2, 0, 0, "DST" },
+   { 1, 2, 0, 0, "MIN" },
+   { 1, 2, 0, 0, "MAX" },
+   { 1, 2, 0, 0, "SLT" },
+   { 1, 2, 0, 0, "SGE" },
+   { 1, 3, 0, 0, "MAD" },
+   { 1, 2, 0, 0, "SUB" },
+   { 1, 3, 0, 0, "LERP" },
+   { 1, 3, 0, 0, "CND" },
+   { 1, 3, 0, 0, "CND0" },
+   { 1, 3, 0, 0, "DOT2ADD" },
+   { 1, 2, 0, 0, "INDEX" },
+   { 1, 1, 0, 0, "NEGATE" },
+   { 1, 1, 0, 0, "FRAC" },
+   { 1, 3, 0, 0, "CLAMP" },
+   { 1, 1, 0, 0, "FLOOR" },
+   { 1, 1, 0, 0, "ROUND" },
+   { 1, 1, 0, 0, "EXPBASE2" },
+   { 1, 1, 0, 0, "LOGBASE2" },
+   { 1, 2, 0, 0, "POWER" },
+   { 1, 2, 0, 0, "CROSSPRODUCT" },
+   { 1, 2, 0, 0, "MULTIPLYMATRIX" },
+   { 1, 1, 0, 0, "ABS" },
+   { 1, 1, 0, 0, "RCC" },
+   { 1, 2, 0, 0, "DPH" },
+   { 1, 1, 0, 0, "COS" },
+   { 1, 1, 0, 0, "DDX" },
+   { 1, 1, 0, 0, "DDY" },
+   { 0, 1, 0, 0, "KILP" },
+   { 1, 1, 0, 0, "PK2H" },
+   { 1, 1, 0, 0, "PK2US" },
+   { 1, 1, 0, 0, "PK4B" },
+   { 1, 1, 0, 0, "PK4UB" },
+   { 1, 2, 0, 0, "RFL" },
+   { 1, 2, 0, 0, "SEQ" },
+   { 1, 2, 0, 0, "SFL" },
+   { 1, 2, 0, 0, "SGT" },
+   { 1, 1, 0, 0, "SIN" },
+   { 1, 2, 0, 0, "SLE" },
+   { 1, 2, 0, 0, "SNE" },
+   { 1, 2, 0, 0, "STR" },
+   { 1, 2, 1, 0, "TEX" },
+   { 1, 4, 1, 0, "TXD" },
+   { 1, 2, 1, 0, "TXP" },
+   { 1, 1, 0, 0, "UP2H" },
+   { 1, 1, 0, 0, "UP2US" },
+   { 1, 1, 0, 0, "UP4B" },
+   { 1, 1, 0, 0, "UP4UB" },
+   { 1, 3, 0, 0, "X2D" },
+   { 1, 1, 0, 0, "ARA" },
+   { 1, 1, 0, 0, "ARR" },
+   { 0, 1, 0, 0, "BRA" },
+   { 0, 0, 0, 1, "CAL" },
+   { 0, 0, 0, 0, "RET" },
+   { 1, 1, 0, 0, "SSG" },
+   { 1, 3, 0, 0, "CMP" },
+   { 1, 1, 0, 0, "SCS" },
+   { 1, 2, 1, 0, "TXB" },
+   { 1, 1, 0, 0, "NRM" },
+   { 1, 2, 0, 0, "DIV" },
+   { 1, 2, 0, 0, "DP2" },
+   { 1, 2, 1, 0, "TXL" },
+   { 0, 0, 0, 0, "BRK" },
+   { 0, 1, 0, 1, "IF" },
+   { 0, 0, 0, 0, "LOOP" },
+   { 0, 1, 0, 0, "REP" },
+   { 0, 0, 0, 1, "ELSE" },
+   { 0, 0, 0, 0, "ENDIF" },
+   { 0, 0, 0, 0, "ENDLOOP" },
+   { 0, 0, 0, 0, "ENDREP" },
+   { 0, 1, 0, 0, "PUSHA" },
+   { 1, 0, 0, 0, "POPA" },
+   { 1, 1, 0, 0, "CEIL" },
+   { 1, 1, 0, 0, "I2F" },
+   { 1, 1, 0, 0, "NOT" },
+   { 1, 1, 0, 0, "TRUNC" },
+   { 1, 2, 0, 0, "SHL" },
+   { 1, 2, 0, 0, "SHR" },
+   { 1, 2, 0, 0, "AND" },
+   { 1, 2, 0, 0, "OR" },
+   { 1, 2, 0, 0, "MOD" },
+   { 1, 2, 0, 0, "XOR" },
+   { 1, 3, 0, 0, "SAD" },
+   { 1, 2, 1, 0, "TXF" },
+   { 1, 2, 1, 0, "TXQ" },
+   { 0, 0, 0, 0, "CONT" },
+   { 0, 0, 0, 0, "EMIT" },
+   { 0, 0, 0, 0, "ENDPRIM" },
+   { 0, 0, 0, 1, "BGNLOOP2" },
+   { 0, 0, 0, 0, "BGNSUB" },
+   { 0, 0, 0, 1, "ENDLOOP2" },
+   { 0, 0, 0, 0, "ENDSUB" },
+   { 1, 1, 0, 0, "NOISE1" },
+   { 1, 1, 0, 0, "NOISE2" },
+   { 1, 1, 0, 0, "NOISE3" },
+   { 1, 1, 0, 0, "NOISE4" },
+   { 0, 0, 0, 0, "NOP" },
+   { 1, 2, 0, 0, "M4X3" },
+   { 1, 2, 0, 0, "M3X4" },
+   { 1, 2, 0, 0, "M3X3" },
+   { 1, 2, 0, 0, "M3X2" },
+   { 1, 1, 0, 0, "NRM4" },
+   { 0, 1, 0, 0, "CALLNZ" },
+   { 0, 1, 0, 0, "IFC" },
+   { 0, 1, 0, 0, "BREAKC" },
+   { 0, 0, 0, 0, "KIL" },
+   { 0, 0, 0, 0, "END" },
+   { 1, 1, 0, 0, "SWZ" }
 };
 
 static const char *texture_names[TGSI_TEXTURE_COUNT] =
@@ -915,6 +916,23 @@ parse_instruction(
       }
    }
 
+   if (info->is_branch) {
+      uint target;
+
+      eat_opt_white( &ctx->cur );
+      if (*ctx->cur != ':') {
+         report_error( ctx, "Expected `:'" );
+         return FALSE;
+      }
+      ctx->cur++;
+      eat_opt_white( &ctx->cur );
+      if (!parse_uint( &ctx->cur, &target )) {
+         report_error( ctx, "Expected a label" );
+         return FALSE;
+      }
+      inst.InstructionExtLabel.Label = target;
+   }
+
    advance = tgsi_build_full_instruction(
       &inst,
       ctx->tokens_cur,
-- 
cgit v1.2.3


From b3c8d0c348c5d894e4df0314f060361893bdd38e Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Sun, 20 Jul 2008 21:58:07 +0200
Subject: tgsi: Parse source register extended swizzle.

---
 src/gallium/auxiliary/tgsi/util/tgsi_text.c | 91 ++++++++++++++++++++---------
 1 file changed, 64 insertions(+), 27 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_text.c b/src/gallium/auxiliary/tgsi/util/tgsi_text.c
index 442016104b8..35cb3055bb2 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_text.c
@@ -496,6 +496,52 @@ parse_dst_operand(
    return TRUE;
 }
 
+static boolean
+parse_optional_swizzle(
+   struct translate_ctx *ctx,
+   uint swizzle[4],
+   boolean *parsed_swizzle,
+   boolean *parsed_extswizzle )
+{
+   const char *cur = ctx->cur;
+
+   *parsed_swizzle = FALSE;
+   *parsed_extswizzle = FALSE;
+
+   eat_opt_white( &cur );
+   if (*cur == '.') {
+      uint i;
+
+      cur++;
+      eat_opt_white( &cur );
+      for (i = 0; i < 4; i++) {
+         if (toupper( *cur ) == 'X')
+            swizzle[i] = TGSI_SWIZZLE_X;
+         else if (toupper( *cur ) == 'Y')
+            swizzle[i] = TGSI_SWIZZLE_Y;
+         else if (toupper( *cur ) == 'Z')
+            swizzle[i] = TGSI_SWIZZLE_Z;
+         else if (toupper( *cur ) == 'W')
+            swizzle[i] = TGSI_SWIZZLE_W;
+         else {
+            if (*cur == '0')
+               swizzle[i] = TGSI_EXTSWIZZLE_ZERO;
+            else if (*cur == '1')
+               swizzle[i] = TGSI_EXTSWIZZLE_ONE;
+            else {
+               report_error( ctx, "Expected register swizzle component `x', `y', `z', `w', `0' or `1'" );
+               return FALSE;
+            }
+            *parsed_extswizzle = TRUE;
+         }
+         cur++;
+      }
+      *parsed_swizzle = TRUE;
+      ctx->cur = cur;
+   }
+   return TRUE;
+}
+
 static boolean
 parse_src_operand(
    struct translate_ctx *ctx,
@@ -507,6 +553,9 @@ parse_src_operand(
    int index;
    uint ind_file;
    int ind_index;
+   uint swizzle[4];
+   boolean parsed_swizzle;
+   boolean parsed_extswizzle;
 
    if (*ctx->cur == '-') {
       cur = ctx->cur;
@@ -588,35 +637,23 @@ parse_src_operand(
       src->SrcRegisterInd.Index = ind_index;
    }
 
-   /* Parse optional swizzle
+   /* Parse optional swizzle.
     */
-   cur = ctx->cur;
-   eat_opt_white( &cur );
-   if (*cur == '.') {
-      uint i;
-
-      cur++;
-      eat_opt_white( &cur );
-      for (i = 0; i < 4; i++) {
-         uint swizzle;
-
-         if (toupper( *cur ) == 'X')
-            swizzle = TGSI_SWIZZLE_X;
-         else if (toupper( *cur ) == 'Y')
-            swizzle = TGSI_SWIZZLE_Y;
-         else if (toupper( *cur ) == 'Z')
-            swizzle = TGSI_SWIZZLE_Z;
-         else if (toupper( *cur ) == 'W')
-            swizzle = TGSI_SWIZZLE_W;
-         else {
-            report_error( ctx, "Expected register swizzle component either `x', `y', `z' or `w'" );
-            return FALSE;
-         }
-         cur++;
-         tgsi_util_set_src_register_swizzle( &src->SrcRegister, swizzle, i );
+   if (parse_optional_swizzle( ctx, swizzle, &parsed_swizzle, &parsed_extswizzle )) {
+      if (parsed_extswizzle) {
+         assert( parsed_swizzle );
+
+         src->SrcRegisterExtSwz.ExtSwizzleX = swizzle[0];
+         src->SrcRegisterExtSwz.ExtSwizzleY = swizzle[1];
+         src->SrcRegisterExtSwz.ExtSwizzleZ = swizzle[2];
+         src->SrcRegisterExtSwz.ExtSwizzleW = swizzle[3];
+      }
+      else if (parsed_swizzle) {
+         src->SrcRegister.SwizzleX = swizzle[0];
+         src->SrcRegister.SwizzleY = swizzle[1];
+         src->SrcRegister.SwizzleZ = swizzle[2];
+         src->SrcRegister.SwizzleW = swizzle[3];
       }
-
-      ctx->cur = cur;
    }
 
    if (src->SrcRegisterExtMod.Complement) {
-- 
cgit v1.2.3


From 1753ff9872562bc05aff2472fc1256539085bbfc Mon Sep 17 00:00:00 2001
From: Jakob Bornecrantz <jakob@tungstengraphics.com>
Date: Mon, 21 Jul 2008 14:38:57 +0200
Subject: tgsi: Update Makefile to include tgsi_iterate.c

---
 src/gallium/auxiliary/tgsi/Makefile | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile
index 5555639b705..9c4b9676511 100644
--- a/src/gallium/auxiliary/tgsi/Makefile
+++ b/src/gallium/auxiliary/tgsi/Makefile
@@ -6,6 +6,7 @@ LIBNAME = tgsi
 C_SOURCES = \
 	exec/tgsi_exec.c \
 	exec/tgsi_sse2.c \
+	util/tgsi_iterate.c \
 	util/tgsi_build.c \
 	util/tgsi_dump.c \
 	util/tgsi_parse.c \
-- 
cgit v1.2.3


From 51d219dbfe6852d348755574184639940af444e3 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Tue, 22 Jul 2008 17:14:54 +0200
Subject: tgsi: Fix immediate usage checks. Provide more info for register
 usage errors/warnings.

---
 src/gallium/auxiliary/tgsi/util/tgsi_sanity.c | 56 +++++++++++++++++++++------
 1 file changed, 45 insertions(+), 11 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c
index 08f3995b13e..9673f061ce3 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c
@@ -53,18 +53,32 @@ struct sanity_check_ctx
 static void
 report_error(
    struct sanity_check_ctx *ctx,
-   const char *msg )
+   const char *format,
+   ... )
 {
-   debug_printf( "\nError: %s", msg );
+   va_list args;
+
+   debug_printf( "Error  : " );
+   va_start( args, format );
+   _debug_vprintf( format, args );
+   va_end( args );
+   debug_printf( "\n" );
    ctx->errors++;
 }
 
 static void
 report_warning(
    struct sanity_check_ctx *ctx,
-   const char *msg )
+   const char *format,
+   ... )
 {
-   debug_printf( "\nWarning: %s", msg );
+   va_list args;
+
+   debug_printf( "Warning: " );
+   va_start( args, format );
+   _debug_vprintf( format, args );
+   va_end( args );
+   debug_printf( "\n" );
    ctx->warnings++;
 }
 
@@ -74,7 +88,7 @@ check_file_name(
    uint file )
 {
    if (file <= TGSI_FILE_NULL || file >= TGSI_FILE_COUNT) {
-      report_error( ctx, "Invalid file name" );
+      report_error( ctx, "Invalid register file name" );
       return FALSE;
    }
    return TRUE;
@@ -115,23 +129,36 @@ is_register_used(
    return (ctx->regs_used[file][index / BITS_IN_REG_FLAG] & (1 << (index % BITS_IN_REG_FLAG))) ? TRUE : FALSE;
 }
 
+static const char *file_names[] =
+{
+   "NULL",
+   "CONST",
+   "IN",
+   "OUT",
+   "TEMP",
+   "SAMP",
+   "ADDR",
+   "IMM"
+};
+
 static boolean
 check_register_usage(
    struct sanity_check_ctx *ctx,
    uint file,
    int index,
-   boolean indirect )
+   const char *name,
+   boolean indirect_access )
 {
    if (!check_file_name( ctx, file ))
       return FALSE;
-   if (indirect) {
+   if (indirect_access) {
       if (!is_any_register_declared( ctx, file ))
-         report_error( ctx, "Undeclared source register" );
+         report_error( ctx, "%s: Undeclared %s register", file_names[file], name );
       ctx->regs_ind_used[file] = TRUE;
    }
    else {
       if (!is_register_declared( ctx, file, index ))
-         report_error( ctx, "Undeclared destination register" );
+         report_error( ctx, "%s[%d]: Undeclared %s register", file_names[file], index, name );
       ctx->regs_used[file][index / BITS_IN_REG_FLAG] |= (1 << (index % BITS_IN_REG_FLAG));
    }
    return TRUE;
@@ -162,6 +189,7 @@ iter_instruction(
          ctx,
          inst->FullDstRegisters[i].DstRegister.File,
          inst->FullDstRegisters[i].DstRegister.Index,
+         "destination",
          FALSE );
    }
    for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
@@ -169,6 +197,7 @@ iter_instruction(
          ctx,
          inst->FullSrcRegisters[i].SrcRegister.File,
          inst->FullSrcRegisters[i].SrcRegister.Index,
+         "source",
          inst->FullSrcRegisters[i].SrcRegister.Indirect );
       if (inst->FullSrcRegisters[i].SrcRegister.Indirect) {
          uint file;
@@ -176,7 +205,12 @@ iter_instruction(
 
          file = inst->FullSrcRegisters[i].SrcRegisterInd.File;
          index = inst->FullSrcRegisters[i].SrcRegisterInd.Index;
-         check_register_usage( ctx, file, index, FALSE );
+         check_register_usage(
+            ctx,
+            file,
+            index,
+            "indirect",
+            FALSE );
          if (file != TGSI_FILE_ADDRESS || index != 0)
             report_warning( ctx, "Indirect register not ADDR[0]" );
       }
@@ -232,8 +266,8 @@ iter_immediate(
 
    /* Mark the register as declared.
     */
-   ctx->num_imms++;
    ctx->regs_decl[TGSI_FILE_IMMEDIATE][ctx->num_imms / BITS_IN_REG_FLAG] |= (1 << (ctx->num_imms % BITS_IN_REG_FLAG));
+   ctx->num_imms++;
 
    /* Check data type validity.
     */
-- 
cgit v1.2.3


From 0aa0141e0cd7b5231140805b1cda821bb3d32a86 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Wed, 23 Jul 2008 17:52:56 +0200
Subject: tgsi: Fix tgsi_util_get_full_src_register_extswizzle().

---
 src/gallium/auxiliary/tgsi/util/tgsi_util.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_util.c b/src/gallium/auxiliary/tgsi/util/tgsi_util.c
index 10762b6c1a0..09486e649e1 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_util.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_util.c
@@ -119,7 +119,7 @@ tgsi_util_get_full_src_register_extswizzle(
    if( swizzle <= TGSI_SWIZZLE_W ) {
       swizzle = tgsi_util_get_src_register_swizzle(
          &reg->SrcRegister,
-         component );
+         swizzle );
    }
 
    return swizzle;
-- 
cgit v1.2.3


From 5f2a5f6164ace6e12c1a3ba95f1103dc8fafa68f Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Wed, 23 Jul 2008 09:56:44 -0600
Subject: gallium: print extended swizzle negation flags

---
 src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
index 94180f7e507..d2e6375212f 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
@@ -491,9 +491,17 @@ tgsi_dump_instruction(
           src->SrcRegisterExtSwz.ExtSwizzleZ != TGSI_EXTSWIZZLE_Z ||
           src->SrcRegisterExtSwz.ExtSwizzleW != TGSI_EXTSWIZZLE_W) {
          CHR( '.' );
+         if (src->SrcRegisterExtSwz.NegateX)
+            TXT("-");
          ENM( src->SrcRegisterExtSwz.ExtSwizzleX, extswizzle_names );
+         if (src->SrcRegisterExtSwz.NegateY)
+            TXT("-");
          ENM( src->SrcRegisterExtSwz.ExtSwizzleY, extswizzle_names );
+         if (src->SrcRegisterExtSwz.NegateZ)
+            TXT("-");
          ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, extswizzle_names );
+         if (src->SrcRegisterExtSwz.NegateW)
+            TXT("-");
          ENM( src->SrcRegisterExtSwz.ExtSwizzleW, extswizzle_names );
       }
 
-- 
cgit v1.2.3


From f7be39ea105aa951d0f6e1d8ffbea63412e30801 Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Wed, 23 Jul 2008 16:28:15 -0600
Subject: gallium: bump TGSI_EXEC_NUM_TEMPS to 128

---
 src/gallium/auxiliary/tgsi/exec/tgsi_exec.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h
index 18abdd9ac00..4f30650b07b 100644
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h
@@ -100,7 +100,7 @@ struct tgsi_exec_labels
 };
 
 
-#define TGSI_EXEC_NUM_TEMPS        64
+#define TGSI_EXEC_NUM_TEMPS       128
 #define TGSI_EXEC_NUM_TEMP_EXTRAS   6
 #define TGSI_EXEC_NUM_IMMEDIATES  256
 
-- 
cgit v1.2.3


From 83869ceab5b3faed8569a5ca752d4dc426db1aec Mon Sep 17 00:00:00 2001
From: José Fonseca <jrfonseca@tungstengraphics.com>
Date: Tue, 22 Jul 2008 22:07:22 +0900
Subject: tgsi: Silent msvc warning.

Rather stupid warning: msvc is warning that converting from a 1bit
structure bitfield to a unsigned char looses precision... /WX makes this
an error.
---
 src/gallium/auxiliary/tgsi/util/tgsi_sanity.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c
index 9673f061ce3..2e3ec96b5b5 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c
@@ -198,7 +198,7 @@ iter_instruction(
          inst->FullSrcRegisters[i].SrcRegister.File,
          inst->FullSrcRegisters[i].SrcRegister.Index,
          "source",
-         inst->FullSrcRegisters[i].SrcRegister.Indirect );
+         (boolean)inst->FullSrcRegisters[i].SrcRegister.Indirect );
       if (inst->FullSrcRegisters[i].SrcRegister.Indirect) {
          uint file;
          int index;
-- 
cgit v1.2.3


From c208a2c791fa24c7c5887fc496738cbddbfafc72 Mon Sep 17 00:00:00 2001
From: José Fonseca <jrfonseca@tungstengraphics.com>
Date: Mon, 28 Jul 2008 12:42:13 +0900
Subject: Merge tgsi/exec and tgsi/util directories.

---
 src/gallium/auxiliary/cso_cache/cso_context.c      |    2 +-
 src/gallium/auxiliary/draw/draw_pipe_aaline.c      |    4 +-
 src/gallium/auxiliary/draw/draw_pipe_aapoint.c     |    4 +-
 src/gallium/auxiliary/draw/draw_pipe_pstipple.c    |    4 +-
 src/gallium/auxiliary/draw/draw_private.h          |    4 +-
 src/gallium/auxiliary/draw/draw_vs_aos.c           |    8 +-
 src/gallium/auxiliary/draw/draw_vs_aos_io.c        |    6 +-
 src/gallium/auxiliary/draw/draw_vs_aos_machine.c   |    6 +-
 src/gallium/auxiliary/draw/draw_vs_exec.c          |    4 +-
 src/gallium/auxiliary/draw/draw_vs_llvm.c          |    2 +-
 src/gallium/auxiliary/draw/draw_vs_sse.c           |    4 +-
 src/gallium/auxiliary/gallivm/gallivm.cpp          |    4 +-
 src/gallium/auxiliary/gallivm/gallivm_cpu.cpp      |    4 +-
 src/gallium/auxiliary/gallivm/tgsitollvm.cpp       |   10 +-
 src/gallium/auxiliary/tgsi/Makefile                |   18 +-
 src/gallium/auxiliary/tgsi/SConscript              |   24 +-
 src/gallium/auxiliary/tgsi/exec/Makefile           |    2 -
 src/gallium/auxiliary/tgsi/exec/tgsi_exec.c        | 2522 --------------------
 src/gallium/auxiliary/tgsi/exec/tgsi_exec.h        |  253 --
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c        | 2275 ------------------
 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h        |   49 -
 src/gallium/auxiliary/tgsi/tgsi_build.c            | 1324 ++++++++++
 src/gallium/auxiliary/tgsi/tgsi_build.h            |  332 +++
 src/gallium/auxiliary/tgsi/tgsi_dump.c             |  582 +++++
 src/gallium/auxiliary/tgsi/tgsi_dump.h             |   63 +
 src/gallium/auxiliary/tgsi/tgsi_dump_c.c           |  845 +++++++
 src/gallium/auxiliary/tgsi/tgsi_dump_c.h           |   49 +
 src/gallium/auxiliary/tgsi/tgsi_exec.c             | 2522 ++++++++++++++++++++
 src/gallium/auxiliary/tgsi/tgsi_exec.h             |  253 ++
 src/gallium/auxiliary/tgsi/tgsi_iterate.c          |   85 +
 src/gallium/auxiliary/tgsi/tgsi_iterate.h          |   76 +
 src/gallium/auxiliary/tgsi/tgsi_parse.c            |  332 +++
 src/gallium/auxiliary/tgsi/tgsi_parse.h            |  151 ++
 src/gallium/auxiliary/tgsi/tgsi_sanity.c           |  341 +++
 src/gallium/auxiliary/tgsi/tgsi_sanity.h           |   49 +
 src/gallium/auxiliary/tgsi/tgsi_scan.c             |  226 ++
 src/gallium/auxiliary/tgsi/tgsi_scan.h             |   74 +
 src/gallium/auxiliary/tgsi/tgsi_sse2.c             | 2275 ++++++++++++++++++
 src/gallium/auxiliary/tgsi/tgsi_sse2.h             |   49 +
 src/gallium/auxiliary/tgsi/tgsi_text.c             | 1221 ++++++++++
 src/gallium/auxiliary/tgsi/tgsi_text.h             |   47 +
 src/gallium/auxiliary/tgsi/tgsi_transform.c        |  199 ++
 src/gallium/auxiliary/tgsi/tgsi_transform.h        |   93 +
 src/gallium/auxiliary/tgsi/tgsi_util.c             |  300 +++
 src/gallium/auxiliary/tgsi/tgsi_util.h             |   96 +
 src/gallium/auxiliary/tgsi/util/tgsi_build.c       | 1324 ----------
 src/gallium/auxiliary/tgsi/util/tgsi_build.h       |  332 ---
 src/gallium/auxiliary/tgsi/util/tgsi_dump.c        |  582 -----
 src/gallium/auxiliary/tgsi/util/tgsi_dump.h        |   63 -
 src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c      |  845 -------
 src/gallium/auxiliary/tgsi/util/tgsi_dump_c.h      |   49 -
 src/gallium/auxiliary/tgsi/util/tgsi_iterate.c     |   85 -
 src/gallium/auxiliary/tgsi/util/tgsi_iterate.h     |   76 -
 src/gallium/auxiliary/tgsi/util/tgsi_parse.c       |  332 ---
 src/gallium/auxiliary/tgsi/util/tgsi_parse.h       |  151 --
 src/gallium/auxiliary/tgsi/util/tgsi_sanity.c      |  341 ---
 src/gallium/auxiliary/tgsi/util/tgsi_sanity.h      |   49 -
 src/gallium/auxiliary/tgsi/util/tgsi_scan.c        |  226 --
 src/gallium/auxiliary/tgsi/util/tgsi_scan.h        |   74 -
 src/gallium/auxiliary/tgsi/util/tgsi_text.c        | 1221 ----------
 src/gallium/auxiliary/tgsi/util/tgsi_text.h        |   47 -
 src/gallium/auxiliary/tgsi/util/tgsi_transform.c   |  199 --
 src/gallium/auxiliary/tgsi/util/tgsi_transform.h   |   93 -
 src/gallium/auxiliary/tgsi/util/tgsi_util.c        |  300 ---
 src/gallium/auxiliary/tgsi/util/tgsi_util.h        |   96 -
 src/gallium/auxiliary/util/u_gen_mipmap.c          |    6 +-
 src/gallium/auxiliary/util/u_simple_shaders.c      |    6 +-
 src/gallium/drivers/cell/ppu/cell_context.h        |    2 +-
 src/gallium/drivers/cell/ppu/cell_state_shader.c   |    2 +-
 src/gallium/drivers/cell/spu/spu_exec.c            |    4 +-
 src/gallium/drivers/cell/spu/spu_exec.h            |    2 +-
 src/gallium/drivers/cell/spu/spu_util.c            |    4 +-
 src/gallium/drivers/i915simple/i915_context.h      |    2 +-
 .../drivers/i915simple/i915_fpc_translate.c        |    4 +-
 src/gallium/drivers/i915simple/i915_state.c        |    2 +-
 src/gallium/drivers/i965simple/brw_context.h       |    2 +-
 src/gallium/drivers/i965simple/brw_sf.c            |    2 +-
 src/gallium/drivers/i965simple/brw_shader_info.c   |    2 +-
 src/gallium/drivers/i965simple/brw_state.c         |    4 +-
 src/gallium/drivers/i965simple/brw_vs_emit.c       |    2 +-
 src/gallium/drivers/i965simple/brw_wm_decl.c       |    2 +-
 src/gallium/drivers/i965simple/brw_wm_glsl.c       |    2 +-
 src/gallium/drivers/softpipe/sp_fs_exec.c          |    4 +-
 src/gallium/drivers/softpipe/sp_fs_llvm.c          |    2 +-
 src/gallium/drivers/softpipe/sp_fs_sse.c           |    4 +-
 src/gallium/drivers/softpipe/sp_headers.h          |    2 +-
 src/gallium/drivers/softpipe/sp_state.h            |    2 +-
 src/gallium/drivers/softpipe/sp_state_fs.c         |    4 +-
 src/gallium/drivers/softpipe/sp_tex_sample.c       |    2 +-
 src/gallium/state_trackers/python/gallium.i        |    4 +-
 src/mesa/state_tracker/st_debug.c                  |    2 +-
 src/mesa/state_tracker/st_mesa_to_tgsi.c           |    6 +-
 src/mesa/state_tracker/st_program.c                |    2 +-
 93 files changed, 11680 insertions(+), 11682 deletions(-)
 delete mode 100644 src/gallium/auxiliary/tgsi/exec/Makefile
 delete mode 100644 src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
 delete mode 100644 src/gallium/auxiliary/tgsi/exec/tgsi_exec.h
 delete mode 100755 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
 delete mode 100755 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h
 create mode 100644 src/gallium/auxiliary/tgsi/tgsi_build.c
 create mode 100644 src/gallium/auxiliary/tgsi/tgsi_build.h
 create mode 100644 src/gallium/auxiliary/tgsi/tgsi_dump.c
 create mode 100644 src/gallium/auxiliary/tgsi/tgsi_dump.h
 create mode 100644 src/gallium/auxiliary/tgsi/tgsi_dump_c.c
 create mode 100644 src/gallium/auxiliary/tgsi/tgsi_dump_c.h
 create mode 100644 src/gallium/auxiliary/tgsi/tgsi_exec.c
 create mode 100644 src/gallium/auxiliary/tgsi/tgsi_exec.h
 create mode 100644 src/gallium/auxiliary/tgsi/tgsi_iterate.c
 create mode 100644 src/gallium/auxiliary/tgsi/tgsi_iterate.h
 create mode 100644 src/gallium/auxiliary/tgsi/tgsi_parse.c
 create mode 100644 src/gallium/auxiliary/tgsi/tgsi_parse.h
 create mode 100644 src/gallium/auxiliary/tgsi/tgsi_sanity.c
 create mode 100644 src/gallium/auxiliary/tgsi/tgsi_sanity.h
 create mode 100644 src/gallium/auxiliary/tgsi/tgsi_scan.c
 create mode 100644 src/gallium/auxiliary/tgsi/tgsi_scan.h
 create mode 100644 src/gallium/auxiliary/tgsi/tgsi_sse2.c
 create mode 100644 src/gallium/auxiliary/tgsi/tgsi_sse2.h
 create mode 100644 src/gallium/auxiliary/tgsi/tgsi_text.c
 create mode 100644 src/gallium/auxiliary/tgsi/tgsi_text.h
 create mode 100644 src/gallium/auxiliary/tgsi/tgsi_transform.c
 create mode 100644 src/gallium/auxiliary/tgsi/tgsi_transform.h
 create mode 100644 src/gallium/auxiliary/tgsi/tgsi_util.c
 create mode 100644 src/gallium/auxiliary/tgsi/tgsi_util.h
 delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_build.c
 delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_build.h
 delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_dump.c
 delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_dump.h
 delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c
 delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_dump_c.h
 delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_iterate.c
 delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_iterate.h
 delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_parse.c
 delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_parse.h
 delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_sanity.c
 delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_sanity.h
 delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_scan.c
 delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_scan.h
 delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_text.c
 delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_text.h
 delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_transform.c
 delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_transform.h
 delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_util.c
 delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_util.h

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c
index af4af8ac1d4..86e4d46a200 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -38,7 +38,7 @@
 #include "pipe/p_state.h"
 #include "pipe/p_util.h"
 #include "pipe/p_inlines.h"
-#include "tgsi/util/tgsi_parse.h"
+#include "tgsi/tgsi_parse.h"
 
 #include "cso_cache/cso_context.h"
 #include "cso_cache/cso_cache.h"
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
index 3dd7ee19fd1..991304b2c84 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -38,8 +38,8 @@
 #include "pipe/p_defines.h"
 #include "pipe/p_shader_tokens.h"
 
-#include "tgsi/util/tgsi_transform.h"
-#include "tgsi/util/tgsi_dump.h"
+#include "tgsi/tgsi_transform.h"
+#include "tgsi/tgsi_dump.h"
 
 #include "draw_context.h"
 #include "draw_private.h"
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
index 87fd3036493..13b44015212 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
@@ -44,8 +44,8 @@
 #include "pipe/p_defines.h"
 #include "pipe/p_shader_tokens.h"
 
-#include "tgsi/util/tgsi_transform.h"
-#include "tgsi/util/tgsi_dump.h"
+#include "tgsi/tgsi_transform.h"
+#include "tgsi/tgsi_dump.h"
 
 #include "draw_context.h"
 #include "draw_vs.h"
diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
index 1f63f943656..d3bd9baddd6 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -40,8 +40,8 @@
 #include "pipe/p_defines.h"
 #include "pipe/p_shader_tokens.h"
 
-#include "tgsi/util/tgsi_transform.h"
-#include "tgsi/util/tgsi_dump.h"
+#include "tgsi/tgsi_transform.h"
+#include "tgsi/tgsi_dump.h"
 
 #include "draw_context.h"
 #include "draw_pipe.h"
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index 7bd1e670b4b..626a2e3e304 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -44,8 +44,8 @@
 #include "pipe/p_state.h"
 #include "pipe/p_defines.h"
 
-#include "tgsi/exec/tgsi_exec.h"
-#include "tgsi/util/tgsi_scan.h"
+#include "tgsi/tgsi_exec.h"
+#include "tgsi/tgsi_scan.h"
 
 
 struct pipe_context;
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index 1f926b3e850..441877d46f0 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -31,10 +31,10 @@
 
 #include "pipe/p_util.h"
 #include "pipe/p_shader_tokens.h"
-#include "tgsi/util/tgsi_parse.h"
-#include "tgsi/util/tgsi_util.h"
-#include "tgsi/exec/tgsi_exec.h"
-#include "tgsi/util/tgsi_dump.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+#include "tgsi/tgsi_exec.h"
+#include "tgsi/tgsi_dump.h"
 
 #include "draw_vs.h"
 #include "draw_vs_aos.h"
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c
index 8e834501a4c..eda677cc62c 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c
@@ -28,9 +28,9 @@
 
 #include "pipe/p_util.h"
 #include "pipe/p_shader_tokens.h"
-#include "tgsi/util/tgsi_parse.h"
-#include "tgsi/util/tgsi_util.h"
-#include "tgsi/exec/tgsi_exec.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+#include "tgsi/tgsi_exec.h"
 #include "draw_vs.h"
 #include "draw_vs_aos.h"
 #include "draw_vertex.h"
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_machine.c b/src/gallium/auxiliary/draw/draw_vs_aos_machine.c
index 6a54917ae38..e029b7b4bb5 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos_machine.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos_machine.c
@@ -31,9 +31,9 @@
 
 #include "pipe/p_util.h"
 #include "pipe/p_shader_tokens.h"
-#include "tgsi/util/tgsi_parse.h"
-#include "tgsi/util/tgsi_util.h"
-#include "tgsi/exec/tgsi_exec.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+#include "tgsi/tgsi_exec.h"
 #include "draw_vs.h"
 #include "draw_vs_aos.h"
 #include "draw_vertex.h"
diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c
index 4501877efcf..e26903d8cc5 100644
--- a/src/gallium/auxiliary/draw/draw_vs_exec.c
+++ b/src/gallium/auxiliary/draw/draw_vs_exec.c
@@ -38,8 +38,8 @@
 #include "draw_context.h"
 #include "draw_vs.h"
 
-#include "tgsi/util/tgsi_parse.h"
-#include "tgsi/util/tgsi_scan.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_scan.h"
 
 
 struct exec_vertex_shader {
diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c
index c63bd51a106..fc03473b919 100644
--- a/src/gallium/auxiliary/draw/draw_vs_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c
@@ -38,7 +38,7 @@
 #include "draw_context.h"
 #include "draw_vs.h"
 
-#include "tgsi/util/tgsi_parse.h"
+#include "tgsi/tgsi_parse.h"
 
 #ifdef MESA_LLVM
 
diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c
index c3189c707db..61f0c084c38 100644
--- a/src/gallium/auxiliary/draw/draw_vs_sse.c
+++ b/src/gallium/auxiliary/draw/draw_vs_sse.c
@@ -45,8 +45,8 @@
 
 #include "rtasm/rtasm_cpu.h"
 #include "rtasm/rtasm_x86sse.h"
-#include "tgsi/exec/tgsi_sse2.h"
-#include "tgsi/util/tgsi_parse.h"
+#include "tgsi/tgsi_sse2.h"
+#include "tgsi/tgsi_parse.h"
 
 #define SSE_MAX_VERTICES 4
 
diff --git a/src/gallium/auxiliary/gallivm/gallivm.cpp b/src/gallium/auxiliary/gallivm/gallivm.cpp
index 77900e342b1..29adeea47de 100644
--- a/src/gallium/auxiliary/gallivm/gallivm.cpp
+++ b/src/gallium/auxiliary/gallivm/gallivm.cpp
@@ -42,8 +42,8 @@
 #include "pipe/p_context.h"
 #include "pipe/p_shader_tokens.h"
 
-#include "tgsi/exec/tgsi_exec.h"
-#include "tgsi/util/tgsi_dump.h"
+#include "tgsi/tgsi_exec.h"
+#include "tgsi/tgsi_dump.h"
 
 #include <llvm/Module.h>
 #include <llvm/CallingConv.h>
diff --git a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp
index 857c190f7b6..cf5b9788375 100644
--- a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp
+++ b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp
@@ -43,8 +43,8 @@
 #include "pipe/p_shader_tokens.h"
 #include "pipe/p_util.h"
 
-#include "tgsi/exec/tgsi_exec.h"
-#include "tgsi/util/tgsi_dump.h"
+#include "tgsi/tgsi_exec.h"
+#include "tgsi/tgsi_dump.h"
 
 #include <llvm/Module.h>
 #include <llvm/CallingConv.h>
diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
index 98014bdaa15..b14e2affd6f 100644
--- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
+++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
@@ -10,11 +10,11 @@
 
 #include "pipe/p_shader_tokens.h"
 
-#include "tgsi/util/tgsi_parse.h"
-#include "tgsi/exec/tgsi_exec.h"
-#include "tgsi/util/tgsi_util.h"
-#include "tgsi/util/tgsi_build.h"
-#include "tgsi/util/tgsi_dump.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_exec.h"
+#include "tgsi/tgsi_util.h"
+#include "tgsi/tgsi_build.h"
+#include "tgsi/tgsi_dump.h"
 
 
 #include <llvm/Module.h>
diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile
index 9c4b9676511..bbeff1304d1 100644
--- a/src/gallium/auxiliary/tgsi/Makefile
+++ b/src/gallium/auxiliary/tgsi/Makefile
@@ -4,15 +4,15 @@ include $(TOP)/configs/current
 LIBNAME = tgsi
 
 C_SOURCES = \
-	exec/tgsi_exec.c \
-	exec/tgsi_sse2.c \
-	util/tgsi_iterate.c \
-	util/tgsi_build.c \
-	util/tgsi_dump.c \
-	util/tgsi_parse.c \
-	util/tgsi_scan.c \
-	util/tgsi_transform.c \
-	util/tgsi_util.c
+	tgsi_build.c \
+	tgsi_dump.c \
+	tgsi_exec.c \
+	tgsi_iterate.c \
+	tgsi_parse.c \
+	tgsi_scan.c \
+	tgsi_sse2.c \
+	tgsi_transform.c \
+	tgsi_util.c
 
 include ../../Makefile.template
 
diff --git a/src/gallium/auxiliary/tgsi/SConscript b/src/gallium/auxiliary/tgsi/SConscript
index 3bbfa1be547..03982e21943 100644
--- a/src/gallium/auxiliary/tgsi/SConscript
+++ b/src/gallium/auxiliary/tgsi/SConscript
@@ -3,18 +3,18 @@ Import('*')
 tgsi = env.ConvenienceLibrary(
 	target = 'tgsi',
 	source = [
-		'exec/tgsi_exec.c',
-		'exec/tgsi_sse2.c',
-		'util/tgsi_build.c',
-		'util/tgsi_dump.c',
-		'util/tgsi_dump_c.c',
-		'util/tgsi_iterate.c',
-		'util/tgsi_parse.c',
-		'util/tgsi_sanity.c',
-		'util/tgsi_scan.c',
-		'util/tgsi_text.c',
-		'util/tgsi_transform.c',
-		'util/tgsi_util.c',
+		'tgsi_build.c',
+		'tgsi_dump.c',
+		'tgsi_dump_c.c',
+		'tgsi_exec.c',
+		'tgsi_iterate.c',
+		'tgsi_parse.c',
+		'tgsi_sanity.c',
+		'tgsi_scan.c',
+		'tgsi_sse2.c',
+		'tgsi_text.c',
+		'tgsi_transform.c',
+		'tgsi_util.c',
 	])
 
 auxiliaries.insert(0, tgsi)
diff --git a/src/gallium/auxiliary/tgsi/exec/Makefile b/src/gallium/auxiliary/tgsi/exec/Makefile
deleted file mode 100644
index 451911a3545..00000000000
--- a/src/gallium/auxiliary/tgsi/exec/Makefile
+++ /dev/null
@@ -1,2 +0,0 @@
-default:
-	cd .. ; make
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
deleted file mode 100644
index 001a4c4b152..00000000000
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
+++ /dev/null
@@ -1,2522 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-/**
- * TGSI interpretor/executor.
- *
- * Flow control information:
- *
- * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
- * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
- * care since a condition may be true for some quad components but false
- * for other components.
- *
- * We basically execute all statements (even if they're in the part of
- * an IF/ELSE clause that's "not taken") and use a special mask to
- * control writing to destination registers.  This is the ExecMask.
- * See store_dest().
- *
- * The ExecMask is computed from three other masks (CondMask, LoopMask and
- * ContMask) which are controlled by the flow control instructions (namely:
- * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
- *
- *
- * Authors:
- *   Michal Krol
- *   Brian Paul
- */
-
-#include "pipe/p_compiler.h"
-#include "pipe/p_state.h"
-#include "pipe/p_util.h"
-#include "pipe/p_shader_tokens.h"
-#include "tgsi/util/tgsi_parse.h"
-#include "tgsi/util/tgsi_util.h"
-#include "tgsi_exec.h"
-
-#define TILE_TOP_LEFT     0
-#define TILE_TOP_RIGHT    1
-#define TILE_BOTTOM_LEFT  2
-#define TILE_BOTTOM_RIGHT 3
-
-/*
- * Shorthand locations of various utility registers (_I = Index, _C = Channel)
- */
-#define TEMP_0_I           TGSI_EXEC_TEMP_00000000_I
-#define TEMP_0_C           TGSI_EXEC_TEMP_00000000_C
-#define TEMP_7F_I          TGSI_EXEC_TEMP_7FFFFFFF_I
-#define TEMP_7F_C          TGSI_EXEC_TEMP_7FFFFFFF_C
-#define TEMP_80_I          TGSI_EXEC_TEMP_80000000_I
-#define TEMP_80_C          TGSI_EXEC_TEMP_80000000_C
-#define TEMP_FF_I          TGSI_EXEC_TEMP_FFFFFFFF_I
-#define TEMP_FF_C          TGSI_EXEC_TEMP_FFFFFFFF_C
-#define TEMP_1_I           TGSI_EXEC_TEMP_ONE_I
-#define TEMP_1_C           TGSI_EXEC_TEMP_ONE_C
-#define TEMP_2_I           TGSI_EXEC_TEMP_TWO_I
-#define TEMP_2_C           TGSI_EXEC_TEMP_TWO_C
-#define TEMP_128_I         TGSI_EXEC_TEMP_128_I
-#define TEMP_128_C         TGSI_EXEC_TEMP_128_C
-#define TEMP_M128_I        TGSI_EXEC_TEMP_MINUS_128_I
-#define TEMP_M128_C        TGSI_EXEC_TEMP_MINUS_128_C
-#define TEMP_KILMASK_I     TGSI_EXEC_TEMP_KILMASK_I
-#define TEMP_KILMASK_C     TGSI_EXEC_TEMP_KILMASK_C
-#define TEMP_OUTPUT_I      TGSI_EXEC_TEMP_OUTPUT_I
-#define TEMP_OUTPUT_C      TGSI_EXEC_TEMP_OUTPUT_C
-#define TEMP_PRIMITIVE_I   TGSI_EXEC_TEMP_PRIMITIVE_I
-#define TEMP_PRIMITIVE_C   TGSI_EXEC_TEMP_PRIMITIVE_C
-#define TEMP_3_I           TGSI_EXEC_TEMP_THREE_I
-#define TEMP_3_C           TGSI_EXEC_TEMP_THREE_C
-#define TEMP_HALF_I        TGSI_EXEC_TEMP_HALF_I
-#define TEMP_HALF_C        TGSI_EXEC_TEMP_HALF_C
-#define TEMP_R0            TGSI_EXEC_TEMP_R0
-
-#define FOR_EACH_CHANNEL(CHAN)\
-   for (CHAN = 0; CHAN < 4; CHAN++)
-
-#define IS_CHANNEL_ENABLED(INST, CHAN)\
-   ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
-
-#define IS_CHANNEL_ENABLED2(INST, CHAN)\
-   ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
-
-#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
-   FOR_EACH_CHANNEL( CHAN )\
-      if (IS_CHANNEL_ENABLED( INST, CHAN ))
-
-#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
-   FOR_EACH_CHANNEL( CHAN )\
-      if (IS_CHANNEL_ENABLED2( INST, CHAN ))
-
-
-/** The execution mask depends on the conditional mask and the loop mask */
-#define UPDATE_EXEC_MASK(MACH) \
-      MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
-
-
-#define CHAN_X  0
-#define CHAN_Y  1
-#define CHAN_Z  2
-#define CHAN_W  3
-
-
-
-/**
- * Initialize machine state by expanding tokens to full instructions,
- * allocating temporary storage, setting up constants, etc.
- * After this, we can call tgsi_exec_machine_run() many times.
- */
-void 
-tgsi_exec_machine_bind_shader(
-   struct tgsi_exec_machine *mach,
-   const struct tgsi_token *tokens,
-   uint numSamplers,
-   struct tgsi_sampler *samplers)
-{
-   uint k;
-   struct tgsi_parse_context parse;
-   struct tgsi_exec_labels *labels = &mach->Labels;
-   struct tgsi_full_instruction *instructions;
-   struct tgsi_full_declaration *declarations;
-   uint maxInstructions = 10, numInstructions = 0;
-   uint maxDeclarations = 10, numDeclarations = 0;
-   uint instno = 0;
-
-#if 0
-   tgsi_dump(tokens, 0);
-#endif
-
-   mach->Tokens = tokens;
-   mach->Samplers = samplers;
-
-   k = tgsi_parse_init (&parse, mach->Tokens);
-   if (k != TGSI_PARSE_OK) {
-      debug_printf( "Problem parsing!\n" );
-      return;
-   }
-
-   mach->Processor = parse.FullHeader.Processor.Processor;
-   mach->ImmLimit = 0;
-   labels->count = 0;
-
-   declarations = (struct tgsi_full_declaration *)
-      MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
-
-   if (!declarations) {
-      return;
-   }
-
-   instructions = (struct tgsi_full_instruction *)
-      MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
-
-   if (!instructions) {
-      FREE( declarations );
-      return;
-   }
-
-   while( !tgsi_parse_end_of_tokens( &parse ) ) {
-      uint pointer = parse.Position;
-      uint i;
-
-      tgsi_parse_token( &parse );
-      switch( parse.FullToken.Token.Type ) {
-      case TGSI_TOKEN_TYPE_DECLARATION:
-         /* save expanded declaration */
-         if (numDeclarations == maxDeclarations) {
-            declarations = REALLOC(declarations,
-                                   maxDeclarations
-                                   * sizeof(struct tgsi_full_declaration),
-                                   (maxDeclarations + 10)
-                                   * sizeof(struct tgsi_full_declaration));
-            maxDeclarations += 10;
-         }
-         memcpy(declarations + numDeclarations,
-                &parse.FullToken.FullDeclaration,
-                sizeof(declarations[0]));
-         numDeclarations++;
-         break;
-
-      case TGSI_TOKEN_TYPE_IMMEDIATE:
-         {
-            uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
-            assert( size % 4 == 0 );
-            assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES );
-
-            for( i = 0; i < size; i++ ) {
-               mach->Imms[mach->ImmLimit + i / 4][i % 4] = 
-		  parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
-            }
-            mach->ImmLimit += size / 4;
-         }
-         break;
-
-      case TGSI_TOKEN_TYPE_INSTRUCTION:
-         assert( labels->count < MAX_LABELS );
-
-         labels->labels[labels->count][0] = instno;
-         labels->labels[labels->count][1] = pointer;
-         labels->count++;
-
-         /* save expanded instruction */
-         if (numInstructions == maxInstructions) {
-            instructions = REALLOC(instructions,
-                                   maxInstructions
-                                   * sizeof(struct tgsi_full_instruction),
-                                   (maxInstructions + 10)
-                                   * sizeof(struct tgsi_full_instruction));
-            maxInstructions += 10;
-         }
-         memcpy(instructions + numInstructions,
-                &parse.FullToken.FullInstruction,
-                sizeof(instructions[0]));
-         numInstructions++;
-         break;
-
-      default:
-         assert( 0 );
-      }
-   }
-   tgsi_parse_free (&parse);
-
-   if (mach->Declarations) {
-      FREE( mach->Declarations );
-   }
-   mach->Declarations = declarations;
-   mach->NumDeclarations = numDeclarations;
-
-   if (mach->Instructions) {
-      FREE( mach->Instructions );
-   }
-   mach->Instructions = instructions;
-   mach->NumInstructions = numInstructions;
-}
-
-
-void
-tgsi_exec_machine_init(
-   struct tgsi_exec_machine *mach )
-{
-   uint i;
-
-   mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps);
-   mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
-
-   /* Setup constants. */
-   for( i = 0; i < 4; i++ ) {
-      mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
-      mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
-      mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
-      mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
-      mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
-      mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
-      mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
-      mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
-      mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
-      mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
-   }
-}
-
-
-void
-tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach)
-{
-   if (mach->Instructions) {
-      FREE(mach->Instructions);
-      mach->Instructions = NULL;
-      mach->NumInstructions = 0;
-   }
-   if (mach->Declarations) {
-      FREE(mach->Declarations);
-      mach->Declarations = NULL;
-      mach->NumDeclarations = 0;
-   }
-}
-
-
-static void
-micro_abs(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src )
-{
-   dst->f[0] = fabsf( src->f[0] );
-   dst->f[1] = fabsf( src->f[1] );
-   dst->f[2] = fabsf( src->f[2] );
-   dst->f[3] = fabsf( src->f[3] );
-}
-
-static void
-micro_add(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src0,
-   const union tgsi_exec_channel *src1 )
-{
-   dst->f[0] = src0->f[0] + src1->f[0];
-   dst->f[1] = src0->f[1] + src1->f[1];
-   dst->f[2] = src0->f[2] + src1->f[2];
-   dst->f[3] = src0->f[3] + src1->f[3];
-}
-
-static void
-micro_iadd(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src0,
-   const union tgsi_exec_channel *src1 )
-{
-   dst->i[0] = src0->i[0] + src1->i[0];
-   dst->i[1] = src0->i[1] + src1->i[1];
-   dst->i[2] = src0->i[2] + src1->i[2];
-   dst->i[3] = src0->i[3] + src1->i[3];
-}
-
-static void
-micro_and(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src0,
-   const union tgsi_exec_channel *src1 )
-{
-   dst->u[0] = src0->u[0] & src1->u[0];
-   dst->u[1] = src0->u[1] & src1->u[1];
-   dst->u[2] = src0->u[2] & src1->u[2];
-   dst->u[3] = src0->u[3] & src1->u[3];
-}
-
-static void
-micro_ceil(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src )
-{
-   dst->f[0] = ceilf( src->f[0] );
-   dst->f[1] = ceilf( src->f[1] );
-   dst->f[2] = ceilf( src->f[2] );
-   dst->f[3] = ceilf( src->f[3] );
-}
-
-static void
-micro_cos(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src )
-{
-   dst->f[0] = cosf( src->f[0] );
-   dst->f[1] = cosf( src->f[1] );
-   dst->f[2] = cosf( src->f[2] );
-   dst->f[3] = cosf( src->f[3] );
-}
-
-static void
-micro_ddx(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src )
-{
-   dst->f[0] =
-   dst->f[1] =
-   dst->f[2] =
-   dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
-}
-
-static void
-micro_ddy(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src )
-{
-   dst->f[0] =
-   dst->f[1] =
-   dst->f[2] =
-   dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT];
-}
-
-static void
-micro_div(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src0,
-   const union tgsi_exec_channel *src1 )
-{
-   dst->f[0] = src0->f[0] / src1->f[0];
-   dst->f[1] = src0->f[1] / src1->f[1];
-   dst->f[2] = src0->f[2] / src1->f[2];
-   dst->f[3] = src0->f[3] / src1->f[3];
-}
-
-static void
-micro_udiv(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src0,
-   const union tgsi_exec_channel *src1 )
-{
-   dst->u[0] = src0->u[0] / src1->u[0];
-   dst->u[1] = src0->u[1] / src1->u[1];
-   dst->u[2] = src0->u[2] / src1->u[2];
-   dst->u[3] = src0->u[3] / src1->u[3];
-}
-
-static void
-micro_eq(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src0,
-   const union tgsi_exec_channel *src1,
-   const union tgsi_exec_channel *src2,
-   const union tgsi_exec_channel *src3 )
-{
-   dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
-   dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
-   dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
-   dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
-}
-
-static void
-micro_ieq(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src0,
-   const union tgsi_exec_channel *src1,
-   const union tgsi_exec_channel *src2,
-   const union tgsi_exec_channel *src3 )
-{
-   dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0];
-   dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1];
-   dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
-   dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
-}
-
-static void
-micro_exp2(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src)
-{
-   dst->f[0] = powf( 2.0f, src->f[0] );
-   dst->f[1] = powf( 2.0f, src->f[1] );
-   dst->f[2] = powf( 2.0f, src->f[2] );
-   dst->f[3] = powf( 2.0f, src->f[3] );
-}
-
-static void
-micro_f2it(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src )
-{
-   dst->i[0] = (int) src->f[0];
-   dst->i[1] = (int) src->f[1];
-   dst->i[2] = (int) src->f[2];
-   dst->i[3] = (int) src->f[3];
-}
-
-static void
-micro_f2ut(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src )
-{
-   dst->u[0] = (uint) src->f[0];
-   dst->u[1] = (uint) src->f[1];
-   dst->u[2] = (uint) src->f[2];
-   dst->u[3] = (uint) src->f[3];
-}
-
-static void
-micro_flr(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src )
-{
-   dst->f[0] = floorf( src->f[0] );
-   dst->f[1] = floorf( src->f[1] );
-   dst->f[2] = floorf( src->f[2] );
-   dst->f[3] = floorf( src->f[3] );
-}
-
-static void
-micro_frc(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src )
-{
-   dst->f[0] = src->f[0] - floorf( src->f[0] );
-   dst->f[1] = src->f[1] - floorf( src->f[1] );
-   dst->f[2] = src->f[2] - floorf( src->f[2] );
-   dst->f[3] = src->f[3] - floorf( src->f[3] );
-}
-
-static void
-micro_ge(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src0,
-   const union tgsi_exec_channel *src1,
-   const union tgsi_exec_channel *src2,
-   const union tgsi_exec_channel *src3 )
-{
-   dst->f[0] = src0->f[0] >= src1->f[0] ? src2->f[0] : src3->f[0];
-   dst->f[1] = src0->f[1] >= src1->f[1] ? src2->f[1] : src3->f[1];
-   dst->f[2] = src0->f[2] >= src1->f[2] ? src2->f[2] : src3->f[2];
-   dst->f[3] = src0->f[3] >= src1->f[3] ? src2->f[3] : src3->f[3];
-}
-
-static void
-micro_i2f(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src )
-{
-   dst->f[0] = (float) src->i[0];
-   dst->f[1] = (float) src->i[1];
-   dst->f[2] = (float) src->i[2];
-   dst->f[3] = (float) src->i[3];
-}
-
-static void
-micro_lg2(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src )
-{
-   dst->f[0] = logf( src->f[0] ) * 1.442695f;
-   dst->f[1] = logf( src->f[1] ) * 1.442695f;
-   dst->f[2] = logf( src->f[2] ) * 1.442695f;
-   dst->f[3] = logf( src->f[3] ) * 1.442695f;
-}
-
-static void
-micro_le(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src0,
-   const union tgsi_exec_channel *src1,
-   const union tgsi_exec_channel *src2,
-   const union tgsi_exec_channel *src3 )
-{
-   dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0];
-   dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1];
-   dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2];
-   dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3];
-}
-
-static void
-micro_lt(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src0,
-   const union tgsi_exec_channel *src1,
-   const union tgsi_exec_channel *src2,
-   const union tgsi_exec_channel *src3 )
-{
-   dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
-   dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
-   dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
-   dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
-}
-
-static void
-micro_ilt(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src0,
-   const union tgsi_exec_channel *src1,
-   const union tgsi_exec_channel *src2,
-   const union tgsi_exec_channel *src3 )
-{
-   dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0];
-   dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1];
-   dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
-   dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
-}
-
-static void
-micro_ult(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src0,
-   const union tgsi_exec_channel *src1,
-   const union tgsi_exec_channel *src2,
-   const union tgsi_exec_channel *src3 )
-{
-   dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0];
-   dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1];
-   dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
-   dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
-}
-
-static void
-micro_max(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src0,
-   const union tgsi_exec_channel *src1 )
-{
-   dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
-   dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
-   dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
-   dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
-}
-
-static void
-micro_imax(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src0,
-   const union tgsi_exec_channel *src1 )
-{
-   dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
-   dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
-   dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
-   dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
-}
-
-static void
-micro_umax(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src0,
-   const union tgsi_exec_channel *src1 )
-{
-   dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
-   dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
-   dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
-   dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
-}
-
-static void
-micro_min(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src0,
-   const union tgsi_exec_channel *src1 )
-{
-   dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
-   dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
-   dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
-   dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
-}
-
-static void
-micro_imin(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src0,
-   const union tgsi_exec_channel *src1 )
-{
-   dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
-   dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
-   dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
-   dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
-}
-
-static void
-micro_umin(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src0,
-   const union tgsi_exec_channel *src1 )
-{
-   dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
-   dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
-   dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
-   dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
-}
-
-static void
-micro_umod(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src0,
-   const union tgsi_exec_channel *src1 )
-{
-   dst->u[0] = src0->u[0] % src1->u[0];
-   dst->u[1] = src0->u[1] % src1->u[1];
-   dst->u[2] = src0->u[2] % src1->u[2];
-   dst->u[3] = src0->u[3] % src1->u[3];
-}
-
-static void
-micro_mul(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src0,
-   const union tgsi_exec_channel *src1 )
-{
-   dst->f[0] = src0->f[0] * src1->f[0];
-   dst->f[1] = src0->f[1] * src1->f[1];
-   dst->f[2] = src0->f[2] * src1->f[2];
-   dst->f[3] = src0->f[3] * src1->f[3];
-}
-
-static void
-micro_imul(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src0,
-   const union tgsi_exec_channel *src1 )
-{
-   dst->i[0] = src0->i[0] * src1->i[0];
-   dst->i[1] = src0->i[1] * src1->i[1];
-   dst->i[2] = src0->i[2] * src1->i[2];
-   dst->i[3] = src0->i[3] * src1->i[3];
-}
-
-static void
-micro_imul64(
-   union tgsi_exec_channel *dst0,
-   union tgsi_exec_channel *dst1,
-   const union tgsi_exec_channel *src0,
-   const union tgsi_exec_channel *src1 )
-{
-   dst1->i[0] = src0->i[0] * src1->i[0];
-   dst1->i[1] = src0->i[1] * src1->i[1];
-   dst1->i[2] = src0->i[2] * src1->i[2];
-   dst1->i[3] = src0->i[3] * src1->i[3];
-   dst0->i[0] = 0;
-   dst0->i[1] = 0;
-   dst0->i[2] = 0;
-   dst0->i[3] = 0;
-}
-
-static void
-micro_umul64(
-   union tgsi_exec_channel *dst0,
-   union tgsi_exec_channel *dst1,
-   const union tgsi_exec_channel *src0,
-   const union tgsi_exec_channel *src1 )
-{
-   dst1->u[0] = src0->u[0] * src1->u[0];
-   dst1->u[1] = src0->u[1] * src1->u[1];
-   dst1->u[2] = src0->u[2] * src1->u[2];
-   dst1->u[3] = src0->u[3] * src1->u[3];
-   dst0->u[0] = 0;
-   dst0->u[1] = 0;
-   dst0->u[2] = 0;
-   dst0->u[3] = 0;
-}
-
-static void
-micro_movc(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src0,
-   const union tgsi_exec_channel *src1,
-   const union tgsi_exec_channel *src2 )
-{
-   dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
-   dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
-   dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
-   dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
-}
-
-static void
-micro_neg(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src )
-{
-   dst->f[0] = -src->f[0];
-   dst->f[1] = -src->f[1];
-   dst->f[2] = -src->f[2];
-   dst->f[3] = -src->f[3];
-}
-
-static void
-micro_ineg(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src )
-{
-   dst->i[0] = -src->i[0];
-   dst->i[1] = -src->i[1];
-   dst->i[2] = -src->i[2];
-   dst->i[3] = -src->i[3];
-}
-
-static void
-micro_not(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src )
-{
-   dst->u[0] = ~src->u[0];
-   dst->u[1] = ~src->u[1];
-   dst->u[2] = ~src->u[2];
-   dst->u[3] = ~src->u[3];
-}
-
-static void
-micro_or(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src0,
-   const union tgsi_exec_channel *src1 )
-{
-   dst->u[0] = src0->u[0] | src1->u[0];
-   dst->u[1] = src0->u[1] | src1->u[1];
-   dst->u[2] = src0->u[2] | src1->u[2];
-   dst->u[3] = src0->u[3] | src1->u[3];
-}
-
-static void
-micro_pow(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src0,
-   const union tgsi_exec_channel *src1 )
-{
-   dst->f[0] = powf( src0->f[0], src1->f[0] );
-   dst->f[1] = powf( src0->f[1], src1->f[1] );
-   dst->f[2] = powf( src0->f[2], src1->f[2] );
-   dst->f[3] = powf( src0->f[3], src1->f[3] );
-}
-
-static void
-micro_rnd(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src )
-{
-   dst->f[0] = floorf( src->f[0] + 0.5f );
-   dst->f[1] = floorf( src->f[1] + 0.5f );
-   dst->f[2] = floorf( src->f[2] + 0.5f );
-   dst->f[3] = floorf( src->f[3] + 0.5f );
-}
-
-static void
-micro_shl(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src0,
-   const union tgsi_exec_channel *src1 )
-{
-   dst->i[0] = src0->i[0] << src1->i[0];
-   dst->i[1] = src0->i[1] << src1->i[1];
-   dst->i[2] = src0->i[2] << src1->i[2];
-   dst->i[3] = src0->i[3] << src1->i[3];
-}
-
-static void
-micro_ishr(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src0,
-   const union tgsi_exec_channel *src1 )
-{
-   dst->i[0] = src0->i[0] >> src1->i[0];
-   dst->i[1] = src0->i[1] >> src1->i[1];
-   dst->i[2] = src0->i[2] >> src1->i[2];
-   dst->i[3] = src0->i[3] >> src1->i[3];
-}
-
-static void
-micro_trunc(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src0 )
-{
-   dst->f[0] = (float) (int) src0->f[0];
-   dst->f[1] = (float) (int) src0->f[1];
-   dst->f[2] = (float) (int) src0->f[2];
-   dst->f[3] = (float) (int) src0->f[3];
-}
-
-static void
-micro_ushr(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src0,
-   const union tgsi_exec_channel *src1 )
-{
-   dst->u[0] = src0->u[0] >> src1->u[0];
-   dst->u[1] = src0->u[1] >> src1->u[1];
-   dst->u[2] = src0->u[2] >> src1->u[2];
-   dst->u[3] = src0->u[3] >> src1->u[3];
-}
-
-static void
-micro_sin(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src )
-{
-   dst->f[0] = sinf( src->f[0] );
-   dst->f[1] = sinf( src->f[1] );
-   dst->f[2] = sinf( src->f[2] );
-   dst->f[3] = sinf( src->f[3] );
-}
-
-static void
-micro_sqrt( union tgsi_exec_channel *dst,
-            const union tgsi_exec_channel *src )
-{
-   dst->f[0] = sqrtf( src->f[0] );
-   dst->f[1] = sqrtf( src->f[1] );
-   dst->f[2] = sqrtf( src->f[2] );
-   dst->f[3] = sqrtf( src->f[3] );
-}
-
-static void
-micro_sub(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src0,
-   const union tgsi_exec_channel *src1 )
-{
-   dst->f[0] = src0->f[0] - src1->f[0];
-   dst->f[1] = src0->f[1] - src1->f[1];
-   dst->f[2] = src0->f[2] - src1->f[2];
-   dst->f[3] = src0->f[3] - src1->f[3];
-}
-
-static void
-micro_u2f(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src )
-{
-   dst->f[0] = (float) src->u[0];
-   dst->f[1] = (float) src->u[1];
-   dst->f[2] = (float) src->u[2];
-   dst->f[3] = (float) src->u[3];
-}
-
-static void
-micro_xor(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src0,
-   const union tgsi_exec_channel *src1 )
-{
-   dst->u[0] = src0->u[0] ^ src1->u[0];
-   dst->u[1] = src0->u[1] ^ src1->u[1];
-   dst->u[2] = src0->u[2] ^ src1->u[2];
-   dst->u[3] = src0->u[3] ^ src1->u[3];
-}
-
-static void
-fetch_src_file_channel(
-   const struct tgsi_exec_machine *mach,
-   const uint file,
-   const uint swizzle,
-   const union tgsi_exec_channel *index,
-   union tgsi_exec_channel *chan )
-{
-   switch( swizzle ) {
-   case TGSI_EXTSWIZZLE_X:
-   case TGSI_EXTSWIZZLE_Y:
-   case TGSI_EXTSWIZZLE_Z:
-   case TGSI_EXTSWIZZLE_W:
-      switch( file ) {
-      case TGSI_FILE_CONSTANT:
-         chan->f[0] = mach->Consts[index->i[0]][swizzle];
-         chan->f[1] = mach->Consts[index->i[1]][swizzle];
-         chan->f[2] = mach->Consts[index->i[2]][swizzle];
-         chan->f[3] = mach->Consts[index->i[3]][swizzle];
-         break;
-
-      case TGSI_FILE_INPUT:
-         chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
-         chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
-         chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
-         chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
-         break;
-
-      case TGSI_FILE_TEMPORARY:
-         assert(index->i[0] < TGSI_EXEC_NUM_TEMPS);
-         chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
-         chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
-         chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
-         chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
-         break;
-
-      case TGSI_FILE_IMMEDIATE:
-         assert( index->i[0] < (int) mach->ImmLimit );
-         chan->f[0] = mach->Imms[index->i[0]][swizzle];
-         assert( index->i[1] < (int) mach->ImmLimit );
-         chan->f[1] = mach->Imms[index->i[1]][swizzle];
-         assert( index->i[2] < (int) mach->ImmLimit );
-         chan->f[2] = mach->Imms[index->i[2]][swizzle];
-         assert( index->i[3] < (int) mach->ImmLimit );
-         chan->f[3] = mach->Imms[index->i[3]][swizzle];
-         break;
-
-      case TGSI_FILE_ADDRESS:
-         chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
-         chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
-         chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
-         chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
-         break;
-
-      case TGSI_FILE_OUTPUT:
-         /* vertex/fragment output vars can be read too */
-         chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
-         chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
-         chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
-         chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
-         break;
-
-      default:
-         assert( 0 );
-      }
-      break;
-
-   case TGSI_EXTSWIZZLE_ZERO:
-      *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
-      break;
-
-   case TGSI_EXTSWIZZLE_ONE:
-      *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
-      break;
-
-   default:
-      assert( 0 );
-   }
-}
-
-static void
-fetch_source(
-   const struct tgsi_exec_machine *mach,
-   union tgsi_exec_channel *chan,
-   const struct tgsi_full_src_register *reg,
-   const uint chan_index )
-{
-   union tgsi_exec_channel index;
-   uint swizzle;
-
-   index.i[0] =
-   index.i[1] =
-   index.i[2] =
-   index.i[3] = reg->SrcRegister.Index;
-
-   if (reg->SrcRegister.Indirect) {
-      union tgsi_exec_channel index2;
-      union tgsi_exec_channel indir_index;
-
-      index2.i[0] =
-      index2.i[1] =
-      index2.i[2] =
-      index2.i[3] = reg->SrcRegisterInd.Index;
-
-      swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterInd, CHAN_X );
-      fetch_src_file_channel(
-         mach,
-         reg->SrcRegisterInd.File,
-         swizzle,
-         &index2,
-         &indir_index );
-
-      index.i[0] += indir_index.i[0];
-      index.i[1] += indir_index.i[1];
-      index.i[2] += indir_index.i[2];
-      index.i[3] += indir_index.i[3];
-   }
-
-   if( reg->SrcRegister.Dimension ) {
-      switch( reg->SrcRegister.File ) {
-      case TGSI_FILE_INPUT:
-         index.i[0] *= 17;
-         index.i[1] *= 17;
-         index.i[2] *= 17;
-         index.i[3] *= 17;
-         break;
-      case TGSI_FILE_CONSTANT:
-         index.i[0] *= 4096;
-         index.i[1] *= 4096;
-         index.i[2] *= 4096;
-         index.i[3] *= 4096;
-         break;
-      default:
-         assert( 0 );
-      }
-
-      index.i[0] += reg->SrcRegisterDim.Index;
-      index.i[1] += reg->SrcRegisterDim.Index;
-      index.i[2] += reg->SrcRegisterDim.Index;
-      index.i[3] += reg->SrcRegisterDim.Index;
-
-      if (reg->SrcRegisterDim.Indirect) {
-         union tgsi_exec_channel index2;
-         union tgsi_exec_channel indir_index;
-
-         index2.i[0] =
-         index2.i[1] =
-         index2.i[2] =
-         index2.i[3] = reg->SrcRegisterDimInd.Index;
-
-         swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
-         fetch_src_file_channel(
-            mach,
-            reg->SrcRegisterDimInd.File,
-            swizzle,
-            &index2,
-            &indir_index );
-
-         index.i[0] += indir_index.i[0];
-         index.i[1] += indir_index.i[1];
-         index.i[2] += indir_index.i[2];
-         index.i[3] += indir_index.i[3];
-      }
-   }
-
-   swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
-   fetch_src_file_channel(
-      mach,
-      reg->SrcRegister.File,
-      swizzle,
-      &index,
-      chan );
-
-   switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
-   case TGSI_UTIL_SIGN_CLEAR:
-      micro_abs( chan, chan );
-      break;
-
-   case TGSI_UTIL_SIGN_SET:
-      micro_abs( chan, chan );
-      micro_neg( chan, chan );
-      break;
-
-   case TGSI_UTIL_SIGN_TOGGLE:
-      micro_neg( chan, chan );
-      break;
-
-   case TGSI_UTIL_SIGN_KEEP:
-      break;
-   }
-
-   if (reg->SrcRegisterExtMod.Complement) {
-      micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan );
-   }
-}
-
-static void
-store_dest(
-   struct tgsi_exec_machine *mach,
-   const union tgsi_exec_channel *chan,
-   const struct tgsi_full_dst_register *reg,
-   const struct tgsi_full_instruction *inst,
-   uint chan_index )
-{
-   union tgsi_exec_channel *dst;
-
-   switch( reg->DstRegister.File ) {
-   case TGSI_FILE_NULL:
-      return;
-
-   case TGSI_FILE_OUTPUT:
-      dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
-                           + reg->DstRegister.Index].xyzw[chan_index];
-      break;
-
-   case TGSI_FILE_TEMPORARY:
-      assert(reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS);
-      dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
-      break;
-
-   case TGSI_FILE_ADDRESS:
-      dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
-      break;
-
-   default:
-      assert( 0 );
-      return;
-   }
-
-   switch (inst->Instruction.Saturate)
-   {
-   case TGSI_SAT_NONE:
-      if (mach->ExecMask & 0x1)
-         dst->i[0] = chan->i[0];
-      if (mach->ExecMask & 0x2)
-         dst->i[1] = chan->i[1];
-      if (mach->ExecMask & 0x4)
-         dst->i[2] = chan->i[2];
-      if (mach->ExecMask & 0x8)
-         dst->i[3] = chan->i[3];
-      break;
-
-   case TGSI_SAT_ZERO_ONE:
-      /* XXX need to obey ExecMask here */
-      micro_max(dst, chan, &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
-      micro_min(dst, dst, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
-      break;
-
-   case TGSI_SAT_MINUS_PLUS_ONE:
-      assert( 0 );
-      break;
-
-   default:
-      assert( 0 );
-   }
-}
-
-#define FETCH(VAL,INDEX,CHAN)\
-    fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
-
-#define STORE(VAL,INDEX,CHAN)\
-    store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
-
-
-/**
- * Execute ARB-style KIL which is predicated by a src register.
- * Kill fragment if any of the four values is less than zero.
- */
-static void
-exec_kilp(struct tgsi_exec_machine *mach,
-          const struct tgsi_full_instruction *inst)
-{
-   uint uniquemask;
-   uint chan_index;
-   uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
-   union tgsi_exec_channel r[1];
-
-   /* This mask stores component bits that were already tested. Note that
-    * we test if the value is less than zero, so 1.0 and 0.0 need not to be
-    * tested. */
-   uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
-
-   for (chan_index = 0; chan_index < 4; chan_index++)
-   {
-      uint swizzle;
-      uint i;
-
-      /* unswizzle channel */
-      swizzle = tgsi_util_get_full_src_register_extswizzle (
-                        &inst->FullSrcRegisters[0],
-                        chan_index);
-
-      /* check if the component has not been already tested */
-      if (uniquemask & (1 << swizzle))
-         continue;
-      uniquemask |= 1 << swizzle;
-
-      FETCH(&r[0], 0, chan_index);
-      for (i = 0; i < 4; i++)
-         if (r[0].f[i] < 0.0f)
-            kilmask |= 1 << i;
-   }
-
-   mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
-}
-
-
-/*
- * Fetch a texel using STR texture coordinates.
- */
-static void
-fetch_texel( struct tgsi_sampler *sampler,
-             const union tgsi_exec_channel *s,
-             const union tgsi_exec_channel *t,
-             const union tgsi_exec_channel *p,
-             float lodbias,  /* XXX should be float[4] */
-             union tgsi_exec_channel *r,
-             union tgsi_exec_channel *g,
-             union tgsi_exec_channel *b,
-             union tgsi_exec_channel *a )
-{
-   uint j;
-   float rgba[NUM_CHANNELS][QUAD_SIZE];
-
-   sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
-
-   for (j = 0; j < 4; j++) {
-      r->f[j] = rgba[0][j];
-      g->f[j] = rgba[1][j];
-      b->f[j] = rgba[2][j];
-      a->f[j] = rgba[3][j];
-   }
-}
-
-
-static void
-exec_tex(struct tgsi_exec_machine *mach,
-         const struct tgsi_full_instruction *inst,
-         boolean biasLod,
-         boolean projected)
-{
-   const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
-   union tgsi_exec_channel r[8];
-   uint chan_index;
-   float lodBias;
-
-   /*   debug_printf("Sampler %u unit %u\n", sampler, unit); */
-
-   switch (inst->InstructionExtTexture.Texture) {
-   case TGSI_TEXTURE_1D:
-
-      FETCH(&r[0], 0, CHAN_X);
-
-      if (projected) {
-         FETCH(&r[1], 0, CHAN_W);
-         micro_div( &r[0], &r[0], &r[1] );
-      }
-
-      if (biasLod) {
-         FETCH(&r[1], 0, CHAN_W);
-         lodBias = r[2].f[0];
-      }
-      else
-         lodBias = 0.0;
-
-      fetch_texel(&mach->Samplers[unit],
-                  &r[0], NULL, NULL, lodBias,  /* S, T, P, BIAS */
-                  &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
-      break;
-
-   case TGSI_TEXTURE_2D:
-   case TGSI_TEXTURE_RECT:
-
-      FETCH(&r[0], 0, CHAN_X);
-      FETCH(&r[1], 0, CHAN_Y);
-      FETCH(&r[2], 0, CHAN_Z);
-
-      if (projected) {
-         FETCH(&r[3], 0, CHAN_W);
-         micro_div( &r[0], &r[0], &r[3] );
-         micro_div( &r[1], &r[1], &r[3] );
-         micro_div( &r[2], &r[2], &r[3] );
-      }
-
-      if (biasLod) {
-         FETCH(&r[3], 0, CHAN_W);
-         lodBias = r[3].f[0];
-      }
-      else
-         lodBias = 0.0;
-
-      fetch_texel(&mach->Samplers[unit],
-                  &r[0], &r[1], &r[2], lodBias,  /* inputs */
-                  &r[0], &r[1], &r[2], &r[3]);  /* outputs */
-      break;
-
-   case TGSI_TEXTURE_3D:
-   case TGSI_TEXTURE_CUBE:
-
-      FETCH(&r[0], 0, CHAN_X);
-      FETCH(&r[1], 0, CHAN_Y);
-      FETCH(&r[2], 0, CHAN_Z);
-
-      if (projected) {
-         FETCH(&r[3], 0, CHAN_W);
-         micro_div( &r[0], &r[0], &r[3] );
-         micro_div( &r[1], &r[1], &r[3] );
-         micro_div( &r[2], &r[2], &r[3] );
-      }
-
-      if (biasLod) {
-         FETCH(&r[3], 0, CHAN_W);
-         lodBias = r[3].f[0];
-      }
-      else
-         lodBias = 0.0;
-
-      fetch_texel(&mach->Samplers[unit],
-                  &r[0], &r[1], &r[2], lodBias,
-                  &r[0], &r[1], &r[2], &r[3]);
-      break;
-
-   default:
-      assert (0);
-   }
-
-   FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-      STORE( &r[chan_index], 0, chan_index );
-   }
-}
-
-
-/**
- * Evaluate a constant-valued coefficient at the position of the
- * current quad.
- */
-static void
-eval_constant_coef(
-   struct tgsi_exec_machine *mach,
-   unsigned attrib,
-   unsigned chan )
-{
-   unsigned i;
-
-   for( i = 0; i < QUAD_SIZE; i++ ) {
-      mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
-   }
-}
-
-/**
- * Evaluate a linear-valued coefficient at the position of the
- * current quad.
- */
-static void
-eval_linear_coef(
-   struct tgsi_exec_machine *mach,
-   unsigned attrib,
-   unsigned chan )
-{
-   const float x = mach->QuadPos.xyzw[0].f[0];
-   const float y = mach->QuadPos.xyzw[1].f[0];
-   const float dadx = mach->InterpCoefs[attrib].dadx[chan];
-   const float dady = mach->InterpCoefs[attrib].dady[chan];
-   const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
-   mach->Inputs[attrib].xyzw[chan].f[0] = a0;
-   mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
-   mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
-   mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
-}
-
-/**
- * Evaluate a perspective-valued coefficient at the position of the
- * current quad.
- */
-static void
-eval_perspective_coef(
-   struct tgsi_exec_machine *mach,
-   unsigned attrib,
-   unsigned chan )
-{
-   const float x = mach->QuadPos.xyzw[0].f[0];
-   const float y = mach->QuadPos.xyzw[1].f[0];
-   const float dadx = mach->InterpCoefs[attrib].dadx[chan];
-   const float dady = mach->InterpCoefs[attrib].dady[chan];
-   const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
-   const float *w = mach->QuadPos.xyzw[3].f;
-   /* divide by W here */
-   mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
-   mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
-   mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
-   mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
-}
-
-
-typedef void (* eval_coef_func)(
-   struct tgsi_exec_machine *mach,
-   unsigned attrib,
-   unsigned chan );
-
-static void
-exec_declaration(
-   struct tgsi_exec_machine *mach,
-   const struct tgsi_full_declaration *decl )
-{
-   if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
-      if( decl->Declaration.File == TGSI_FILE_INPUT ) {
-         unsigned first, last, mask;
-         eval_coef_func eval;
-
-         first = decl->DeclarationRange.First;
-         last = decl->DeclarationRange.Last;
-         mask = decl->Declaration.UsageMask;
-
-         switch( decl->Declaration.Interpolate ) {
-         case TGSI_INTERPOLATE_CONSTANT:
-            eval = eval_constant_coef;
-            break;
-
-         case TGSI_INTERPOLATE_LINEAR:
-            eval = eval_linear_coef;
-            break;
-
-         case TGSI_INTERPOLATE_PERSPECTIVE:
-            eval = eval_perspective_coef;
-            break;
-
-         default:
-            assert( 0 );
-         }
-
-         if( mask == TGSI_WRITEMASK_XYZW ) {
-            unsigned i, j;
-
-            for( i = first; i <= last; i++ ) {
-               for( j = 0; j < NUM_CHANNELS; j++ ) {
-                  eval( mach, i, j );
-               }
-            }
-         }
-         else {
-            unsigned i, j;
-
-            for( j = 0; j < NUM_CHANNELS; j++ ) {
-               if( mask & (1 << j) ) {
-                  for( i = first; i <= last; i++ ) {
-                     eval( mach, i, j );
-                  }
-               }
-            }
-         }
-      }
-   }
-}
-
-static void
-exec_instruction(
-   struct tgsi_exec_machine *mach,
-   const struct tgsi_full_instruction *inst,
-   int *pc )
-{
-   uint chan_index;
-   union tgsi_exec_channel r[8];
-
-   (*pc)++;
-
-   switch (inst->Instruction.Opcode) {
-   case TGSI_OPCODE_ARL:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-	 FETCH( &r[0], 0, chan_index );
-	 micro_f2it( &r[0], &r[0] );
-	 STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_MOV:
-   case TGSI_OPCODE_SWZ:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_LIT:
-      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
-	 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
-      }
-
-      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
-	 FETCH( &r[0], 0, CHAN_X );
-	 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
-	    micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
-	    STORE( &r[0], 0, CHAN_Y );
-	 }
-
-	 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
-	    FETCH( &r[1], 0, CHAN_Y );
-	    micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
-
-	    FETCH( &r[2], 0, CHAN_W );
-	    micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
-	    micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
-	    micro_pow( &r[1], &r[1], &r[2] );
-	    micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
-	    STORE( &r[0], 0, CHAN_Z );
-	 }
-      }
-
-      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
-	 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
-      }
-      break;
-
-   case TGSI_OPCODE_RCP:
-   /* TGSI_OPCODE_RECIP */
-      FETCH( &r[0], 0, CHAN_X );
-      micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-	 STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_RSQ:
-   /* TGSI_OPCODE_RECIPSQRT */
-      FETCH( &r[0], 0, CHAN_X );
-      micro_sqrt( &r[0], &r[0] );
-      micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-	 STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_EXP:
-      FETCH( &r[0], 0, CHAN_X );
-      micro_flr( &r[1], &r[0] );  /* r1 = floor(r0) */
-      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
-         micro_exp2( &r[2], &r[1] );       /* r2 = 2 ^ r1 */
-	 STORE( &r[2], 0, CHAN_X );        /* store r2 */
-      }
-      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
-         micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
-	 STORE( &r[2], 0, CHAN_Y );        /* store r2 */
-      }
-      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
-         micro_exp2( &r[2], &r[0] );       /* r2 = 2 ^ r0 */
-	 STORE( &r[2], 0, CHAN_Z );        /* store r2 */
-      }
-      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
-	 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
-      }
-      break;
-
-   case TGSI_OPCODE_LOG:
-      FETCH( &r[0], 0, CHAN_X );
-      micro_abs( &r[2], &r[0] );  /* r2 = abs(r0) */
-      micro_lg2( &r[1], &r[2] );  /* r1 = lg2(r2) */
-      micro_flr( &r[0], &r[1] );  /* r0 = floor(r1) */
-      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
-	 STORE( &r[0], 0, CHAN_X );
-      }
-      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
-         micro_exp2( &r[0], &r[0] );       /* r0 = 2 ^ r0 */
-         micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
-	 STORE( &r[0], 0, CHAN_Y );
-      }
-      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
-	 STORE( &r[1], 0, CHAN_Z );
-      }
-      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
-	 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
-      }
-      break;
-
-   case TGSI_OPCODE_MUL:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
-      {
-         FETCH(&r[0], 0, chan_index);
-         FETCH(&r[1], 1, chan_index);
-
-         micro_mul( &r[0], &r[0], &r[1] );
-
-         STORE(&r[0], 0, chan_index);
-      }
-      break;
-
-   case TGSI_OPCODE_ADD:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         FETCH( &r[1], 1, chan_index );
-         micro_add( &r[0], &r[0], &r[1] );
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_DP3:
-   /* TGSI_OPCODE_DOT3 */
-      FETCH( &r[0], 0, CHAN_X );
-      FETCH( &r[1], 1, CHAN_X );
-      micro_mul( &r[0], &r[0], &r[1] );
-
-      FETCH( &r[1], 0, CHAN_Y );
-      FETCH( &r[2], 1, CHAN_Y );
-      micro_mul( &r[1], &r[1], &r[2] );
-      micro_add( &r[0], &r[0], &r[1] );
-
-      FETCH( &r[1], 0, CHAN_Z );
-      FETCH( &r[2], 1, CHAN_Z );
-      micro_mul( &r[1], &r[1], &r[2] );
-      micro_add( &r[0], &r[0], &r[1] );
-
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-    case TGSI_OPCODE_DP4:
-    /* TGSI_OPCODE_DOT4 */
-       FETCH(&r[0], 0, CHAN_X);
-       FETCH(&r[1], 1, CHAN_X);
-
-       micro_mul( &r[0], &r[0], &r[1] );
-
-       FETCH(&r[1], 0, CHAN_Y);
-       FETCH(&r[2], 1, CHAN_Y);
-
-       micro_mul( &r[1], &r[1], &r[2] );
-       micro_add( &r[0], &r[0], &r[1] );
-
-       FETCH(&r[1], 0, CHAN_Z);
-       FETCH(&r[2], 1, CHAN_Z);
-
-       micro_mul( &r[1], &r[1], &r[2] );
-       micro_add( &r[0], &r[0], &r[1] );
-
-       FETCH(&r[1], 0, CHAN_W);
-       FETCH(&r[2], 1, CHAN_W);
-
-       micro_mul( &r[1], &r[1], &r[2] );
-       micro_add( &r[0], &r[0], &r[1] );
-
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-	 STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_DST:
-      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
-	 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
-      }
-
-      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
-	 FETCH( &r[0], 0, CHAN_Y );
-	 FETCH( &r[1], 1, CHAN_Y);
-	 micro_mul( &r[0], &r[0], &r[1] );
-	 STORE( &r[0], 0, CHAN_Y );
-      }
-
-      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
-	 FETCH( &r[0], 0, CHAN_Z );
-	 STORE( &r[0], 0, CHAN_Z );
-      }
-
-      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
-	 FETCH( &r[0], 1, CHAN_W );
-	 STORE( &r[0], 0, CHAN_W );
-      }
-      break;
-
-   case TGSI_OPCODE_MIN:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH(&r[0], 0, chan_index);
-         FETCH(&r[1], 1, chan_index);
-
-         /* XXX use micro_min()?? */
-         micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] );
-
-         STORE(&r[0], 0, chan_index);
-      }
-      break;
-
-   case TGSI_OPCODE_MAX:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH(&r[0], 0, chan_index);
-         FETCH(&r[1], 1, chan_index);
-
-         /* XXX use micro_max()?? */
-         micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] );
-
-         STORE(&r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_SLT:
-   /* TGSI_OPCODE_SETLT */
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         FETCH( &r[1], 1, chan_index );
-         micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_SGE:
-   /* TGSI_OPCODE_SETGE */
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         FETCH( &r[1], 1, chan_index );
-         micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_MAD:
-   /* TGSI_OPCODE_MADD */
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         FETCH( &r[1], 1, chan_index );
-         micro_mul( &r[0], &r[0], &r[1] );
-         FETCH( &r[1], 2, chan_index );
-         micro_add( &r[0], &r[0], &r[1] );
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_SUB:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH(&r[0], 0, chan_index);
-         FETCH(&r[1], 1, chan_index);
-
-         micro_sub( &r[0], &r[0], &r[1] );
-
-         STORE(&r[0], 0, chan_index);
-      }
-      break;
-
-   case TGSI_OPCODE_LERP:
-   /* TGSI_OPCODE_LRP */
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH(&r[0], 0, chan_index);
-         FETCH(&r[1], 1, chan_index);
-         FETCH(&r[2], 2, chan_index);
-
-         micro_sub( &r[1], &r[1], &r[2] );
-         micro_mul( &r[0], &r[0], &r[1] );
-         micro_add( &r[0], &r[0], &r[2] );
-
-         STORE(&r[0], 0, chan_index);
-      }
-      break;
-
-   case TGSI_OPCODE_CND:
-      assert (0);
-      break;
-
-   case TGSI_OPCODE_CND0:
-      assert (0);
-      break;
-
-   case TGSI_OPCODE_DOT2ADD:
-      /* TGSI_OPCODE_DP2A */
-      assert (0);
-      break;
-
-   case TGSI_OPCODE_INDEX:
-      assert (0);
-      break;
-
-   case TGSI_OPCODE_NEGATE:
-      assert (0);
-      break;
-
-   case TGSI_OPCODE_FRAC:
-   /* TGSI_OPCODE_FRC */
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         micro_frc( &r[0], &r[0] );
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_CLAMP:
-      assert (0);
-      break;
-
-   case TGSI_OPCODE_FLOOR:
-   /* TGSI_OPCODE_FLR */
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         micro_flr( &r[0], &r[0] );
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_ROUND:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         micro_rnd( &r[0], &r[0] );
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_EXPBASE2:
-    /* TGSI_OPCODE_EX2 */
-      FETCH(&r[0], 0, CHAN_X);
-
-      micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
-
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-	 STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_LOGBASE2:
-   /* TGSI_OPCODE_LG2 */
-      FETCH( &r[0], 0, CHAN_X );
-      micro_lg2( &r[0], &r[0] );
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_POWER:
-      /* TGSI_OPCODE_POW */
-      FETCH(&r[0], 0, CHAN_X);
-      FETCH(&r[1], 1, CHAN_X);
-
-      micro_pow( &r[0], &r[0], &r[1] );
-
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-	 STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_CROSSPRODUCT:
-      /* TGSI_OPCODE_XPD */
-      FETCH(&r[0], 0, CHAN_Y);
-      FETCH(&r[1], 1, CHAN_Z);
-
-      micro_mul( &r[2], &r[0], &r[1] );
-
-      FETCH(&r[3], 0, CHAN_Z);
-      FETCH(&r[4], 1, CHAN_Y);
-
-      micro_mul( &r[5], &r[3], &r[4] );
-      micro_sub( &r[2], &r[2], &r[5] );
-
-      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
-         STORE( &r[2], 0, CHAN_X );
-      }
-
-      FETCH(&r[2], 1, CHAN_X);
-
-      micro_mul( &r[3], &r[3], &r[2] );
-
-      FETCH(&r[5], 0, CHAN_X);
-
-      micro_mul( &r[1], &r[1], &r[5] );
-      micro_sub( &r[3], &r[3], &r[1] );
-
-      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
-         STORE( &r[3], 0, CHAN_Y );
-      }
-
-      micro_mul( &r[5], &r[5], &r[4] );
-      micro_mul( &r[0], &r[0], &r[2] );
-      micro_sub( &r[5], &r[5], &r[0] );
-
-      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
-         STORE( &r[5], 0, CHAN_Z );
-      }
-
-      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
-         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
-      }
-      break;
-
-    case TGSI_OPCODE_MULTIPLYMATRIX:
-       assert (0);
-       break;
-
-    case TGSI_OPCODE_ABS:
-       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-          FETCH(&r[0], 0, chan_index);
-
-          micro_abs( &r[0], &r[0] );
-
-          STORE(&r[0], 0, chan_index);
-       }
-       break;
-
-   case TGSI_OPCODE_RCC:
-      assert (0);
-      break;
-
-   case TGSI_OPCODE_DPH:
-      FETCH(&r[0], 0, CHAN_X);
-      FETCH(&r[1], 1, CHAN_X);
-
-      micro_mul( &r[0], &r[0], &r[1] );
-
-      FETCH(&r[1], 0, CHAN_Y);
-      FETCH(&r[2], 1, CHAN_Y);
-
-      micro_mul( &r[1], &r[1], &r[2] );
-      micro_add( &r[0], &r[0], &r[1] );
-
-      FETCH(&r[1], 0, CHAN_Z);
-      FETCH(&r[2], 1, CHAN_Z);
-
-      micro_mul( &r[1], &r[1], &r[2] );
-      micro_add( &r[0], &r[0], &r[1] );
-
-      FETCH(&r[1], 1, CHAN_W);
-
-      micro_add( &r[0], &r[0], &r[1] );
-
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-	 STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_COS:
-      FETCH(&r[0], 0, CHAN_X);
-
-      micro_cos( &r[0], &r[0] );
-
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-	 STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_DDX:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         micro_ddx( &r[0], &r[0] );
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_DDY:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         micro_ddy( &r[0], &r[0] );
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_KILP:
-      exec_kilp (mach, inst);
-      break;
-
-   case TGSI_OPCODE_KIL:
-      /* for enabled ExecMask bits, set the killed bit */
-      mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= mach->ExecMask;
-      break;
-
-   case TGSI_OPCODE_PK2H:
-      assert (0);
-      break;
-
-   case TGSI_OPCODE_PK2US:
-      assert (0);
-      break;
-
-   case TGSI_OPCODE_PK4B:
-      assert (0);
-      break;
-
-   case TGSI_OPCODE_PK4UB:
-      assert (0);
-      break;
-
-   case TGSI_OPCODE_RFL:
-      assert (0);
-      break;
-
-   case TGSI_OPCODE_SEQ:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         FETCH( &r[1], 1, chan_index );
-         micro_eq( &r[0], &r[0], &r[1],
-                   &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C],
-                   &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_SFL:
-      assert (0);
-      break;
-
-   case TGSI_OPCODE_SGT:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         FETCH( &r[1], 1, chan_index );
-         micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_SIN:
-      FETCH( &r[0], 0, CHAN_X );
-      micro_sin( &r[0], &r[0] );
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_SLE:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         FETCH( &r[1], 1, chan_index );
-         micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_SNE:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         FETCH( &r[1], 1, chan_index );
-         micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_STR:
-      assert (0);
-      break;
-
-   case TGSI_OPCODE_TEX:
-      /* simple texture lookup */
-      /* src[0] = texcoord */
-      /* src[1] = sampler unit */
-      exec_tex(mach, inst, FALSE, FALSE);
-      break;
-
-   case TGSI_OPCODE_TXB:
-      /* Texture lookup with lod bias */
-      /* src[0] = texcoord (src[0].w = LOD bias) */
-      /* src[1] = sampler unit */
-      exec_tex(mach, inst, TRUE, FALSE);
-      break;
-
-   case TGSI_OPCODE_TXD:
-      /* Texture lookup with explict partial derivatives */
-      /* src[0] = texcoord */
-      /* src[1] = d[strq]/dx */
-      /* src[2] = d[strq]/dy */
-      /* src[3] = sampler unit */
-      assert (0);
-      break;
-
-   case TGSI_OPCODE_TXL:
-      /* Texture lookup with explit LOD */
-      /* src[0] = texcoord (src[0].w = LOD) */
-      /* src[1] = sampler unit */
-      exec_tex(mach, inst, TRUE, FALSE);
-      break;
-
-   case TGSI_OPCODE_TXP:
-      /* Texture lookup with projection */
-      /* src[0] = texcoord (src[0].w = projection) */
-      /* src[1] = sampler unit */
-      exec_tex(mach, inst, FALSE, TRUE);
-      break;
-
-   case TGSI_OPCODE_UP2H:
-      assert (0);
-      break;
-
-   case TGSI_OPCODE_UP2US:
-      assert (0);
-      break;
-
-   case TGSI_OPCODE_UP4B:
-      assert (0);
-      break;
-
-   case TGSI_OPCODE_UP4UB:
-      assert (0);
-      break;
-
-   case TGSI_OPCODE_X2D:
-      assert (0);
-      break;
-
-   case TGSI_OPCODE_ARA:
-      assert (0);
-      break;
-
-   case TGSI_OPCODE_ARR:
-      assert (0);
-      break;
-
-   case TGSI_OPCODE_BRA:
-      assert (0);
-      break;
-
-   case TGSI_OPCODE_CAL:
-      /* skip the call if no execution channels are enabled */
-      if (mach->ExecMask) {
-         /* do the call */
-
-         /* push the Cond, Loop, Cont stacks */
-         assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
-         mach->CondStack[mach->CondStackTop++] = mach->CondMask;
-         assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
-         mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
-         assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
-         mach->ContStack[mach->ContStackTop++] = mach->ContMask;
-
-         assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
-         mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
-
-         /* note that PC was already incremented above */
-         mach->CallStack[mach->CallStackTop++] = *pc;
-         *pc = inst->InstructionExtLabel.Label;
-      }
-      break;
-
-   case TGSI_OPCODE_RET:
-      mach->FuncMask &= ~mach->ExecMask;
-      UPDATE_EXEC_MASK(mach);
-
-      if (mach->ExecMask == 0x0) {
-         /* really return now (otherwise, keep executing */
-
-         if (mach->CallStackTop == 0) {
-            /* returning from main() */
-            *pc = -1;
-            return;
-         }
-         *pc = mach->CallStack[--mach->CallStackTop];
-
-         /* pop the Cond, Loop, Cont stacks */
-         assert(mach->CondStackTop > 0);
-         mach->CondMask = mach->CondStack[--mach->CondStackTop];
-         assert(mach->LoopStackTop > 0);
-         mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
-         assert(mach->ContStackTop > 0);
-         mach->ContMask = mach->ContStack[--mach->ContStackTop];
-         assert(mach->FuncStackTop > 0);
-         mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
-
-         UPDATE_EXEC_MASK(mach);
-      }
-      break;
-
-   case TGSI_OPCODE_SSG:
-      assert (0);
-      break;
-
-   case TGSI_OPCODE_CMP:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH(&r[0], 0, chan_index);
-         FETCH(&r[1], 1, chan_index);
-         FETCH(&r[2], 2, chan_index);
-
-         micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] );
-
-         STORE(&r[0], 0, chan_index);
-      }
-      break;
-
-   case TGSI_OPCODE_SCS:
-      if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
-         FETCH( &r[0], 0, CHAN_X );
-      }
-      if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
-         micro_cos( &r[1], &r[0] );
-         STORE( &r[1], 0, CHAN_X );
-      }
-      if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
-         micro_sin( &r[1], &r[0] );
-         STORE( &r[1], 0, CHAN_Y );
-      }
-      if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
-         STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
-      }
-      if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
-         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
-      }
-      break;
-
-   case TGSI_OPCODE_NRM:
-      assert (0);
-      break;
-
-   case TGSI_OPCODE_DIV:
-      assert( 0 );
-      break;
-
-   case TGSI_OPCODE_DP2:
-      FETCH( &r[0], 0, CHAN_X );
-      FETCH( &r[1], 1, CHAN_X );
-      micro_mul( &r[0], &r[0], &r[1] );
-
-      FETCH( &r[1], 0, CHAN_Y );
-      FETCH( &r[2], 1, CHAN_Y );
-      micro_mul( &r[1], &r[1], &r[2] );
-      micro_add( &r[0], &r[0], &r[1] );
-
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_IF:
-      /* push CondMask */
-      assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
-      mach->CondStack[mach->CondStackTop++] = mach->CondMask;
-      FETCH( &r[0], 0, CHAN_X );
-      /* update CondMask */
-      if( ! r[0].u[0] ) {
-         mach->CondMask &= ~0x1;
-      }
-      if( ! r[0].u[1] ) {
-         mach->CondMask &= ~0x2;
-      }
-      if( ! r[0].u[2] ) {
-         mach->CondMask &= ~0x4;
-      }
-      if( ! r[0].u[3] ) {
-         mach->CondMask &= ~0x8;
-      }
-      UPDATE_EXEC_MASK(mach);
-      /* Todo: If CondMask==0, jump to ELSE */
-      break;
-
-   case TGSI_OPCODE_ELSE:
-      /* invert CondMask wrt previous mask */
-      {
-         uint prevMask;
-         assert(mach->CondStackTop > 0);
-         prevMask = mach->CondStack[mach->CondStackTop - 1];
-         mach->CondMask = ~mach->CondMask & prevMask;
-         UPDATE_EXEC_MASK(mach);
-         /* Todo: If CondMask==0, jump to ENDIF */
-      }
-      break;
-
-   case TGSI_OPCODE_ENDIF:
-      /* pop CondMask */
-      assert(mach->CondStackTop > 0);
-      mach->CondMask = mach->CondStack[--mach->CondStackTop];
-      UPDATE_EXEC_MASK(mach);
-      break;
-
-   case TGSI_OPCODE_END:
-      /* halt execution */
-      *pc = -1;
-      break;
-
-   case TGSI_OPCODE_REP:
-      assert (0);
-      break;
-
-   case TGSI_OPCODE_ENDREP:
-       assert (0);
-       break;
-
-   case TGSI_OPCODE_PUSHA:
-      assert (0);
-      break;
-
-   case TGSI_OPCODE_POPA:
-      assert (0);
-      break;
-
-   case TGSI_OPCODE_CEIL:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         micro_ceil( &r[0], &r[0] );
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_I2F:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         micro_i2f( &r[0], &r[0] );
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_NOT:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         micro_not( &r[0], &r[0] );
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_TRUNC:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         micro_trunc( &r[0], &r[0] );
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_SHL:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         FETCH( &r[1], 1, chan_index );
-         micro_shl( &r[0], &r[0], &r[1] );
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_SHR:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         FETCH( &r[1], 1, chan_index );
-         micro_ishr( &r[0], &r[0], &r[1] );
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_AND:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         FETCH( &r[1], 1, chan_index );
-         micro_and( &r[0], &r[0], &r[1] );
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_OR:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         FETCH( &r[1], 1, chan_index );
-         micro_or( &r[0], &r[0], &r[1] );
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_MOD:
-      assert (0);
-      break;
-
-   case TGSI_OPCODE_XOR:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         FETCH( &r[1], 1, chan_index );
-         micro_xor( &r[0], &r[0], &r[1] );
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_SAD:
-      assert (0);
-      break;
-
-   case TGSI_OPCODE_TXF:
-      assert (0);
-      break;
-
-   case TGSI_OPCODE_TXQ:
-      assert (0);
-      break;
-
-   case TGSI_OPCODE_EMIT:
-      mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
-      mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
-      break;
-
-   case TGSI_OPCODE_ENDPRIM:
-      mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
-      mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
-      break;
-
-   case TGSI_OPCODE_LOOP:
-      /* fall-through (for now) */
-   case TGSI_OPCODE_BGNLOOP2:
-      /* push LoopMask and ContMasks */
-      assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
-      mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
-      assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
-      mach->ContStack[mach->ContStackTop++] = mach->ContMask;
-      break;
-
-   case TGSI_OPCODE_ENDLOOP:
-      /* fall-through (for now at least) */
-   case TGSI_OPCODE_ENDLOOP2:
-      /* Restore ContMask, but don't pop */
-      assert(mach->ContStackTop > 0);
-      mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
-      UPDATE_EXEC_MASK(mach);
-      if (mach->ExecMask) {
-         /* repeat loop: jump to instruction just past BGNLOOP */
-         *pc = inst->InstructionExtLabel.Label + 1;
-      }
-      else {
-         /* exit loop: pop LoopMask */
-         assert(mach->LoopStackTop > 0);
-         mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
-         /* pop ContMask */
-         assert(mach->ContStackTop > 0);
-         mach->ContMask = mach->ContStack[--mach->ContStackTop];
-      }
-      UPDATE_EXEC_MASK(mach);
-      break;
-
-   case TGSI_OPCODE_BRK:
-      /* turn off loop channels for each enabled exec channel */
-      mach->LoopMask &= ~mach->ExecMask;
-      /* Todo: if mach->LoopMask == 0, jump to end of loop */
-      UPDATE_EXEC_MASK(mach);
-      break;
-
-   case TGSI_OPCODE_CONT:
-      /* turn off cont channels for each enabled exec channel */
-      mach->ContMask &= ~mach->ExecMask;
-      /* Todo: if mach->LoopMask == 0, jump to end of loop */
-      UPDATE_EXEC_MASK(mach);
-      break;
-
-   case TGSI_OPCODE_BGNSUB:
-      /* no-op */
-      break;
-
-   case TGSI_OPCODE_ENDSUB:
-      /* no-op */
-      break;
-
-   case TGSI_OPCODE_NOISE1:
-      assert( 0 );
-      break;
-
-   case TGSI_OPCODE_NOISE2:
-      assert( 0 );
-      break;
-
-   case TGSI_OPCODE_NOISE3:
-      assert( 0 );
-      break;
-
-   case TGSI_OPCODE_NOISE4:
-      assert( 0 );
-      break;
-
-   case TGSI_OPCODE_NOP:
-      break;
-
-   default:
-      assert( 0 );
-   }
-}
-
-
-/**
- * Run TGSI interpreter.
- * \return bitmask of "alive" quad components
- */
-uint
-tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
-{
-   uint i;
-   int pc = 0;
-
-   mach->CondMask = 0xf;
-   mach->LoopMask = 0xf;
-   mach->ContMask = 0xf;
-   mach->FuncMask = 0xf;
-   mach->ExecMask = 0xf;
-
-   mach->CondStackTop = 0; /* temporarily subvert this assertion */
-   assert(mach->CondStackTop == 0);
-   assert(mach->LoopStackTop == 0);
-   assert(mach->ContStackTop == 0);
-   assert(mach->CallStackTop == 0);
-
-   mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
-   mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
-
-   if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
-      mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
-      mach->Primitives[0] = 0;
-   }
-
-
-   /* execute declarations (interpolants) */
-   for (i = 0; i < mach->NumDeclarations; i++) {
-      exec_declaration( mach, mach->Declarations+i );
-   }
-
-   /* execute instructions, until pc is set to -1 */
-   while (pc != -1) {
-      assert(pc < (int) mach->NumInstructions);
-      exec_instruction( mach, mach->Instructions + pc, &pc );
-   }
-
-#if 0
-   /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
-   if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
-      /*
-       * Scale back depth component.
-       */
-      for (i = 0; i < 4; i++)
-         mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
-   }
-#endif
-
-   return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
-}
-
-
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h
deleted file mode 100644
index 4f30650b07b..00000000000
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h
+++ /dev/null
@@ -1,253 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#if !defined TGSI_EXEC_H
-#define TGSI_EXEC_H
-
-#include "pipe/p_compiler.h"
-
-#if defined __cplusplus
-extern "C" {
-#endif
-
-#define MAX_LABELS 1024
-
-#define NUM_CHANNELS 4  /* R,G,B,A */
-#define QUAD_SIZE    4  /* 4 pixel/quad */
-
-/**
-  * Registers may be treated as float, signed int or unsigned int.
-  */
-union tgsi_exec_channel
-{
-   float    f[QUAD_SIZE];
-   int      i[QUAD_SIZE];
-   unsigned u[QUAD_SIZE];
-};
-
-/**
-  * A vector[RGBA] of channels[4 pixels]
-  */
-struct tgsi_exec_vector
-{
-   union tgsi_exec_channel xyzw[NUM_CHANNELS];
-};
-
-/**
- * For fragment programs, information for computing fragment input
- * values from plane equation of the triangle/line.
- */
-struct tgsi_interp_coef
-{
-   float a0[NUM_CHANNELS];	/* in an xyzw layout */
-   float dadx[NUM_CHANNELS];
-   float dady[NUM_CHANNELS];
-};
-
-
-struct softpipe_tile_cache;  /**< Opaque to TGSI */
-
-/**
- * Information for sampling textures, which must be implemented
- * by code outside the TGSI executor.
- */
-struct tgsi_sampler
-{
-   const struct pipe_sampler_state *state;
-   struct pipe_texture *texture;
-   /** Get samples for four fragments in a quad */
-   void (*get_samples)(struct tgsi_sampler *sampler,
-                       const float s[QUAD_SIZE],
-                       const float t[QUAD_SIZE],
-                       const float p[QUAD_SIZE],
-                       float lodbias,
-                       float rgba[NUM_CHANNELS][QUAD_SIZE]);
-   void *pipe; /*XXX temporary*/
-   struct softpipe_tile_cache *cache;
-};
-
-/**
- * For branching/calling subroutines.
- */
-struct tgsi_exec_labels
-{
-   unsigned labels[MAX_LABELS][2];
-   unsigned count;
-};
-
-
-#define TGSI_EXEC_NUM_TEMPS       128
-#define TGSI_EXEC_NUM_TEMP_EXTRAS   6
-#define TGSI_EXEC_NUM_IMMEDIATES  256
-
-/*
- * Locations of various utility registers (_I = Index, _C = Channel)
- */
-#define TGSI_EXEC_TEMP_00000000_I   (TGSI_EXEC_NUM_TEMPS + 0)
-#define TGSI_EXEC_TEMP_00000000_C   0
-
-#define TGSI_EXEC_TEMP_7FFFFFFF_I   (TGSI_EXEC_NUM_TEMPS + 0)
-#define TGSI_EXEC_TEMP_7FFFFFFF_C   1
-
-#define TGSI_EXEC_TEMP_80000000_I   (TGSI_EXEC_NUM_TEMPS + 0)
-#define TGSI_EXEC_TEMP_80000000_C   2
-
-#define TGSI_EXEC_TEMP_FFFFFFFF_I   (TGSI_EXEC_NUM_TEMPS + 0)
-#define TGSI_EXEC_TEMP_FFFFFFFF_C   3
-
-#define TGSI_EXEC_TEMP_ONE_I        (TGSI_EXEC_NUM_TEMPS + 1)
-#define TGSI_EXEC_TEMP_ONE_C        0
-
-#define TGSI_EXEC_TEMP_TWO_I        (TGSI_EXEC_NUM_TEMPS + 1)
-#define TGSI_EXEC_TEMP_TWO_C        1
-
-#define TGSI_EXEC_TEMP_128_I        (TGSI_EXEC_NUM_TEMPS + 1)
-#define TGSI_EXEC_TEMP_128_C        2
-
-#define TGSI_EXEC_TEMP_MINUS_128_I  (TGSI_EXEC_NUM_TEMPS + 1)
-#define TGSI_EXEC_TEMP_MINUS_128_C  3
-
-#define TGSI_EXEC_TEMP_KILMASK_I    (TGSI_EXEC_NUM_TEMPS + 2)
-#define TGSI_EXEC_TEMP_KILMASK_C    0
-
-#define TGSI_EXEC_TEMP_OUTPUT_I     (TGSI_EXEC_NUM_TEMPS + 2)
-#define TGSI_EXEC_TEMP_OUTPUT_C     1
-
-#define TGSI_EXEC_TEMP_PRIMITIVE_I  (TGSI_EXEC_NUM_TEMPS + 2)
-#define TGSI_EXEC_TEMP_PRIMITIVE_C  2
-
-#define TGSI_EXEC_TEMP_THREE_I      (TGSI_EXEC_NUM_TEMPS + 2)
-#define TGSI_EXEC_TEMP_THREE_C      3
-
-#define TGSI_EXEC_TEMP_HALF_I       (TGSI_EXEC_NUM_TEMPS + 3)
-#define TGSI_EXEC_TEMP_HALF_C       0
-
-#define TGSI_EXEC_TEMP_R0           (TGSI_EXEC_NUM_TEMPS + 4)
-
-#define TGSI_EXEC_TEMP_ADDR         (TGSI_EXEC_NUM_TEMPS + 5)
-
-
-#define TGSI_EXEC_MAX_COND_NESTING  20
-#define TGSI_EXEC_MAX_LOOP_NESTING  20
-#define TGSI_EXEC_MAX_CALL_NESTING  20
-
-/**
- * Run-time virtual machine state for executing TGSI shader.
- */
-struct tgsi_exec_machine
-{
-   /* Total = program temporaries + internal temporaries
-    *         + 1 padding to align to 16 bytes
-    */
-   struct tgsi_exec_vector       _Temps[TGSI_EXEC_NUM_TEMPS +
-                                        TGSI_EXEC_NUM_TEMP_EXTRAS + 1];
-
-   /*
-    * This will point to _Temps after aligning to 16B boundary.
-    */
-   struct tgsi_exec_vector       *Temps;
-   struct tgsi_exec_vector       *Addrs;
-
-   struct tgsi_sampler           *Samplers;
-
-   float                         Imms[TGSI_EXEC_NUM_IMMEDIATES][4];
-   unsigned                      ImmLimit;
-   const float                   (*Consts)[4];
-   struct tgsi_exec_vector       *Inputs;
-   struct tgsi_exec_vector       *Outputs;
-   const struct tgsi_token       *Tokens;
-   unsigned                      Processor;
-
-   /* GEOMETRY processor only. */
-   unsigned                      *Primitives;
-
-   /* FRAGMENT processor only. */
-   const struct tgsi_interp_coef *InterpCoefs;
-   struct tgsi_exec_vector       QuadPos;
-
-   /* Conditional execution masks */
-   uint CondMask;  /**< For IF/ELSE/ENDIF */
-   uint LoopMask;  /**< For BGNLOOP/ENDLOOP */
-   uint ContMask;  /**< For loop CONT statements */
-   uint FuncMask;  /**< For function calls */
-   uint ExecMask;  /**< = CondMask & LoopMask */
-
-   /** Condition mask stack (for nested conditionals) */
-   uint CondStack[TGSI_EXEC_MAX_COND_NESTING];
-   int CondStackTop;
-
-   /** Loop mask stack (for nested loops) */
-   uint LoopStack[TGSI_EXEC_MAX_LOOP_NESTING];
-   int LoopStackTop;
-
-   /** Loop continue mask stack (see comments in tgsi_exec.c) */
-   uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING];
-   int ContStackTop;
-
-   /** Function execution mask stack (for executing subroutine code) */
-   uint FuncStack[TGSI_EXEC_MAX_CALL_NESTING];
-   int FuncStackTop;
-
-   /** Function call stack for saving/restoring the program counter */
-   uint CallStack[TGSI_EXEC_MAX_CALL_NESTING];
-   int CallStackTop;
-
-   struct tgsi_full_instruction *Instructions;
-   uint NumInstructions;
-
-   struct tgsi_full_declaration *Declarations;
-   uint NumDeclarations;
-
-   struct tgsi_exec_labels Labels;
-};
-
-void
-tgsi_exec_machine_init(
-   struct tgsi_exec_machine *mach );
-
-
-void 
-tgsi_exec_machine_bind_shader(
-   struct tgsi_exec_machine *mach,
-   const struct tgsi_token *tokens,
-   uint numSamplers,
-   struct tgsi_sampler *samplers);
-
-uint
-tgsi_exec_machine_run(
-   struct tgsi_exec_machine *mach );
-
-
-void
-tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach);
-
-
-#if defined __cplusplus
-} /* extern "C" */
-#endif
-
-#endif /* TGSI_EXEC_H */
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
deleted file mode 100755
index cdbdf5c8827..00000000000
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
+++ /dev/null
@@ -1,2275 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include "pipe/p_util.h"
-#include "pipe/p_shader_tokens.h"
-#include "tgsi/util/tgsi_parse.h"
-#include "tgsi/util/tgsi_util.h"
-#include "tgsi_exec.h"
-#include "tgsi_sse2.h"
-
-#include "rtasm/rtasm_x86sse.h"
-
-#ifdef PIPE_ARCH_X86
-
-/* for 1/sqrt()
- *
- * This costs about 100fps (close to 10%) in gears:
- */
-#define HIGH_PRECISION 1
-
-
-#define FOR_EACH_CHANNEL( CHAN )\
-   for( CHAN = 0; CHAN < 4; CHAN++ )
-
-#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
-   ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
-
-#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
-   if( IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
-
-#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
-   FOR_EACH_CHANNEL( CHAN )\
-      IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
-
-#define CHAN_X 0
-#define CHAN_Y 1
-#define CHAN_Z 2
-#define CHAN_W 3
-
-#define TEMP_R0   TGSI_EXEC_TEMP_R0
-
-/**
- * X86 utility functions.
- */
-
-static struct x86_reg
-make_xmm(
-   unsigned xmm )
-{
-   return x86_make_reg(
-      file_XMM,
-      (enum x86_reg_name) xmm );
-}
-
-/**
- * X86 register mapping helpers.
- */
-
-static struct x86_reg
-get_const_base( void )
-{
-   return x86_make_reg(
-      file_REG32,
-      reg_CX );
-}
-
-static struct x86_reg
-get_input_base( void )
-{
-   return x86_make_reg(
-      file_REG32,
-      reg_AX );
-}
-
-static struct x86_reg
-get_output_base( void )
-{
-   return x86_make_reg(
-      file_REG32,
-      reg_DX );
-}
-
-static struct x86_reg
-get_temp_base( void )
-{
-   return x86_make_reg(
-      file_REG32,
-      reg_BX );
-}
-
-static struct x86_reg
-get_coef_base( void )
-{
-   return get_output_base();
-}
-
-static struct x86_reg
-get_immediate_base( void )
-{
-   return x86_make_reg(
-      file_REG32,
-      reg_DI );
-}
-
-
-/**
- * Data access helpers.
- */
-
-
-static struct x86_reg
-get_immediate(
-   unsigned vec,
-   unsigned chan )
-{
-   return x86_make_disp(
-      get_immediate_base(),
-      (vec * 4 + chan) * 4 );
-}
-
-static struct x86_reg
-get_const(
-   unsigned vec,
-   unsigned chan )
-{
-   return x86_make_disp(
-      get_const_base(),
-      (vec * 4 + chan) * 4 );
-}
-
-static struct x86_reg
-get_input(
-   unsigned vec,
-   unsigned chan )
-{
-   return x86_make_disp(
-      get_input_base(),
-      (vec * 4 + chan) * 16 );
-}
-
-static struct x86_reg
-get_output(
-   unsigned vec,
-   unsigned chan )
-{
-   return x86_make_disp(
-      get_output_base(),
-      (vec * 4 + chan) * 16 );
-}
-
-static struct x86_reg
-get_temp(
-   unsigned vec,
-   unsigned chan )
-{
-   return x86_make_disp(
-      get_temp_base(),
-      (vec * 4 + chan) * 16 );
-}
-
-static struct x86_reg
-get_coef(
-   unsigned vec,
-   unsigned chan,
-   unsigned member )
-{
-   return x86_make_disp(
-      get_coef_base(),
-      ((vec * 3 + member) * 4 + chan) * 4 );
-}
-
-
-static void
-emit_ret(
-   struct x86_function  *func )
-{
-   x86_ret( func );
-}
-
-
-/**
- * Data fetch helpers.
- */
-
-/**
- * Copy a shader constant to xmm register
- * \param xmm  the destination xmm register
- * \param vec  the src const buffer index
- * \param chan  src channel to fetch (X, Y, Z or W)
- */
-static void
-emit_const(
-   struct x86_function *func,
-   unsigned xmm,
-   unsigned vec,
-   unsigned chan )
-{
-   sse_movss(
-      func,
-      make_xmm( xmm ),
-      get_const( vec, chan ) );
-   sse_shufps(
-      func,
-      make_xmm( xmm ),
-      make_xmm( xmm ),
-      SHUF( 0, 0, 0, 0 ) );
-}
-
-static void
-emit_immediate(
-   struct x86_function *func,
-   unsigned xmm,
-   unsigned vec,
-   unsigned chan )
-{
-   sse_movss(
-      func,
-      make_xmm( xmm ),
-      get_immediate( vec, chan ) );
-   sse_shufps(
-      func,
-      make_xmm( xmm ),
-      make_xmm( xmm ),
-      SHUF( 0, 0, 0, 0 ) );
-}
-
-
-/**
- * Copy a shader input to xmm register
- * \param xmm  the destination xmm register
- * \param vec  the src input attrib
- * \param chan  src channel to fetch (X, Y, Z or W)
- */
-static void
-emit_inputf(
-   struct x86_function *func,
-   unsigned xmm,
-   unsigned vec,
-   unsigned chan )
-{
-   sse_movups(
-      func,
-      make_xmm( xmm ),
-      get_input( vec, chan ) );
-}
-
-/**
- * Store an xmm register to a shader output
- * \param xmm  the source xmm register
- * \param vec  the dest output attrib
- * \param chan  src dest channel to store (X, Y, Z or W)
- */
-static void
-emit_output(
-   struct x86_function *func,
-   unsigned xmm,
-   unsigned vec,
-   unsigned chan )
-{
-   sse_movups(
-      func,
-      get_output( vec, chan ),
-      make_xmm( xmm ) );
-}
-
-/**
- * Copy a shader temporary to xmm register
- * \param xmm  the destination xmm register
- * \param vec  the src temp register
- * \param chan  src channel to fetch (X, Y, Z or W)
- */
-static void
-emit_tempf(
-   struct x86_function *func,
-   unsigned xmm,
-   unsigned vec,
-   unsigned chan )
-{
-   sse_movaps(
-      func,
-      make_xmm( xmm ),
-      get_temp( vec, chan ) );
-}
-
-/**
- * Load an xmm register with an input attrib coefficient (a0, dadx or dady)
- * \param xmm  the destination xmm register
- * \param vec  the src input/attribute coefficient index
- * \param chan  src channel to fetch (X, Y, Z or W)
- * \param member  0=a0, 1=dadx, 2=dady
- */
-static void
-emit_coef(
-   struct x86_function *func,
-   unsigned xmm,
-   unsigned vec,
-   unsigned chan,
-   unsigned member )
-{
-   sse_movss(
-      func,
-      make_xmm( xmm ),
-      get_coef( vec, chan, member ) );
-   sse_shufps(
-      func,
-      make_xmm( xmm ),
-      make_xmm( xmm ),
-      SHUF( 0, 0, 0, 0 ) );
-}
-
-/**
- * Data store helpers.
- */
-
-static void
-emit_inputs(
-   struct x86_function *func,
-   unsigned xmm,
-   unsigned vec,
-   unsigned chan )
-{
-   sse_movups(
-      func,
-      get_input( vec, chan ),
-      make_xmm( xmm ) );
-}
-
-static void
-emit_temps(
-   struct x86_function *func,
-   unsigned xmm,
-   unsigned vec,
-   unsigned chan )
-{
-   sse_movaps(
-      func,
-      get_temp( vec, chan ),
-      make_xmm( xmm ) );
-}
-
-static void
-emit_addrs(
-   struct x86_function *func,
-   unsigned xmm,
-   unsigned vec,
-   unsigned chan )
-{
-   emit_temps(
-      func,
-      xmm,
-      vec + TGSI_EXEC_NUM_TEMPS,
-      chan );
-}
-
-/**
- * Coefficent fetch helpers.
- */
-
-static void
-emit_coef_a0(
-   struct x86_function *func,
-   unsigned xmm,
-   unsigned vec,
-   unsigned chan )
-{
-   emit_coef(
-      func,
-      xmm,
-      vec,
-      chan,
-      0 );
-}
-
-static void
-emit_coef_dadx(
-   struct x86_function *func,
-   unsigned xmm,
-   unsigned vec,
-   unsigned chan )
-{
-   emit_coef(
-      func,
-      xmm,
-      vec,
-      chan,
-      1 );
-}
-
-static void
-emit_coef_dady(
-   struct x86_function *func,
-   unsigned xmm,
-   unsigned vec,
-   unsigned chan )
-{
-   emit_coef(
-      func,
-      xmm,
-      vec,
-      chan,
-      2 );
-}
-
-/**
- * Function call helpers.
- */
-
-static void
-emit_push_gp(
-   struct x86_function *func )
-{
-   x86_push(
-      func,
-      x86_make_reg( file_REG32, reg_AX) );
-   x86_push(
-      func,
-      x86_make_reg( file_REG32, reg_CX) );
-   x86_push(
-      func,
-      x86_make_reg( file_REG32, reg_DX) );
-}
-
-static void
-x86_pop_gp(
-   struct x86_function *func )
-{
-   /* Restore GP registers in a reverse order.
-    */
-   x86_pop(
-      func,
-      x86_make_reg( file_REG32, reg_DX) );
-   x86_pop(
-      func,
-      x86_make_reg( file_REG32, reg_CX) );
-   x86_pop(
-      func,
-      x86_make_reg( file_REG32, reg_AX) );
-}
-
-static void
-emit_func_call_dst(
-   struct x86_function *func,
-   unsigned xmm_dst,
-   void (PIPE_CDECL *code)() )
-{
-   sse_movaps(
-      func,
-      get_temp( TEMP_R0, 0 ),
-      make_xmm( xmm_dst ) );
-
-   emit_push_gp(
-      func );
-
-   {
-      struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX );
-
-      x86_lea(
-         func,
-         ecx,
-         get_temp( TEMP_R0, 0 ) );
-
-      x86_push( func, ecx );
-      x86_mov_reg_imm( func, ecx, (unsigned long) code );
-      x86_call( func, ecx );
-      x86_pop(func, ecx ); 
-   }
-
-
-   x86_pop_gp(
-      func );
-
-   sse_movaps(
-      func,
-      make_xmm( xmm_dst ),
-      get_temp( TEMP_R0, 0 ) );
-}
-
-static void
-emit_func_call_dst_src(
-   struct x86_function *func,
-   unsigned xmm_dst,
-   unsigned xmm_src,
-   void (PIPE_CDECL *code)() )
-{
-   sse_movaps(
-      func,
-      get_temp( TEMP_R0, 1 ),
-      make_xmm( xmm_src ) );
-
-   emit_func_call_dst(
-      func,
-      xmm_dst,
-      code );
-}
-
-/**
- * Low-level instruction translators.
- */
-
-static void
-emit_abs(
-   struct x86_function *func,
-   unsigned xmm )
-{
-   sse_andps(
-      func,
-      make_xmm( xmm ),
-      get_temp(
-         TGSI_EXEC_TEMP_7FFFFFFF_I,
-         TGSI_EXEC_TEMP_7FFFFFFF_C ) );
-}
-
-static void
-emit_add(
-   struct x86_function *func,
-   unsigned xmm_dst,
-   unsigned xmm_src )
-{
-   sse_addps(
-      func,
-      make_xmm( xmm_dst ),
-      make_xmm( xmm_src ) );
-}
-
-static void PIPE_CDECL
-cos4f(
-   float *store )
-{
-   const unsigned X = 0;
-
-   store[X + 0] = cosf( store[X + 0] );
-   store[X + 1] = cosf( store[X + 1] );
-   store[X + 2] = cosf( store[X + 2] );
-   store[X + 3] = cosf( store[X + 3] );
-}
-
-static void
-emit_cos(
-   struct x86_function *func,
-   unsigned xmm_dst )
-{
-   emit_func_call_dst(
-      func,
-      xmm_dst,
-      cos4f );
-}
-
-static void PIPE_CDECL
-ex24f(
-   float *store )
-{
-   const unsigned X = 0;
-
-   store[X + 0] = powf( 2.0f, store[X + 0] );
-   store[X + 1] = powf( 2.0f, store[X + 1] );
-   store[X + 2] = powf( 2.0f, store[X + 2] );
-   store[X + 3] = powf( 2.0f, store[X + 3] );
-}
-
-static void
-emit_ex2(
-   struct x86_function *func,
-   unsigned xmm_dst )
-{
-   emit_func_call_dst(
-      func,
-      xmm_dst,
-      ex24f );
-}
-
-static void
-emit_f2it(
-   struct x86_function *func,
-   unsigned xmm )
-{
-   sse2_cvttps2dq(
-      func,
-      make_xmm( xmm ),
-      make_xmm( xmm ) );
-}
-
-static void PIPE_CDECL
-flr4f(
-   float *store )
-{
-   const unsigned X = 0;
-
-   store[X + 0] = floorf( store[X + 0] );
-   store[X + 1] = floorf( store[X + 1] );
-   store[X + 2] = floorf( store[X + 2] );
-   store[X + 3] = floorf( store[X + 3] );
-}
-
-static void
-emit_flr(
-   struct x86_function *func,
-   unsigned xmm_dst )
-{
-   emit_func_call_dst(
-      func,
-      xmm_dst,
-      flr4f );
-}
-
-static void PIPE_CDECL
-frc4f(
-   float *store )
-{
-   const unsigned X = 0;
-
-   store[X + 0] -= floorf( store[X + 0] );
-   store[X + 1] -= floorf( store[X + 1] );
-   store[X + 2] -= floorf( store[X + 2] );
-   store[X + 3] -= floorf( store[X + 3] );
-}
-
-static void
-emit_frc(
-   struct x86_function *func,
-   unsigned xmm_dst )
-{
-   emit_func_call_dst(
-      func,
-      xmm_dst,
-      frc4f );
-}
-
-static void PIPE_CDECL
-lg24f(
-   float *store )
-{
-   const unsigned X = 0;
-
-   store[X + 0] = LOG2( store[X + 0] );
-   store[X + 1] = LOG2( store[X + 1] );
-   store[X + 2] = LOG2( store[X + 2] );
-   store[X + 3] = LOG2( store[X + 3] );
-}
-
-static void
-emit_lg2(
-   struct x86_function *func,
-   unsigned xmm_dst )
-{
-   emit_func_call_dst(
-      func,
-      xmm_dst,
-      lg24f );
-}
-
-static void
-emit_MOV(
-   struct x86_function *func,
-   unsigned xmm_dst,
-   unsigned xmm_src )
-{
-   sse_movups(
-      func,
-      make_xmm( xmm_dst ),
-      make_xmm( xmm_src ) );
-}
-
-static void
-emit_mul (struct x86_function *func,
-          unsigned xmm_dst,
-          unsigned xmm_src)
-{
-   sse_mulps(
-      func,
-      make_xmm( xmm_dst ),
-      make_xmm( xmm_src ) );
-}
-
-static void
-emit_neg(
-   struct x86_function *func,
-   unsigned xmm )
-{
-   sse_xorps(
-      func,
-      make_xmm( xmm ),
-      get_temp(
-         TGSI_EXEC_TEMP_80000000_I,
-         TGSI_EXEC_TEMP_80000000_C ) );
-}
-
-static void PIPE_CDECL
-pow4f(
-   float *store )
-{
-   const unsigned X = 0;
-
-   store[X + 0] = powf( store[X + 0], store[X + 4] );
-   store[X + 1] = powf( store[X + 1], store[X + 5] );
-   store[X + 2] = powf( store[X + 2], store[X + 6] );
-   store[X + 3] = powf( store[X + 3], store[X + 7] );
-}
-
-static void
-emit_pow(
-   struct x86_function *func,
-   unsigned xmm_dst,
-   unsigned xmm_src )
-{
-   emit_func_call_dst_src(
-      func,
-      xmm_dst,
-      xmm_src,
-      pow4f );
-}
-
-static void
-emit_rcp (
-   struct x86_function *func,
-   unsigned xmm_dst,
-   unsigned xmm_src )
-{
-   /* On Intel CPUs at least, this is only accurate to 12 bits -- not
-    * good enough.  Need to either emit a proper divide or use the
-    * iterative technique described below in emit_rsqrt().
-    */
-   sse2_rcpps(
-      func,
-      make_xmm( xmm_dst ),
-      make_xmm( xmm_src ) );
-}
-
-static void
-emit_rsqrt(
-   struct x86_function *func,
-   unsigned xmm_dst,
-   unsigned xmm_src )
-{
-#if HIGH_PRECISION
-   /* Although rsqrtps() and rcpps() are low precision on some/all SSE
-    * implementations, it is possible to improve its precision at
-    * fairly low cost, using a newton/raphson step, as below:
-    * 
-    * x1 = 2 * rcpps(a) - a * rcpps(a) * rcpps(a)
-    * x1 = 0.5 * rsqrtps(a) * [3.0 - (a * rsqrtps(a))* rsqrtps(a)]
-    *
-    * See: http://softwarecommunity.intel.com/articles/eng/1818.htm
-    */
-   {
-      struct x86_reg dst = make_xmm( xmm_dst );
-      struct x86_reg src = make_xmm( xmm_src );
-      struct x86_reg tmp0 = make_xmm( 2 );
-      struct x86_reg tmp1 = make_xmm( 3 );
-
-      assert( xmm_dst != xmm_src );
-      assert( xmm_dst != 2 && xmm_dst != 3 );
-      assert( xmm_src != 2 && xmm_src != 3 );
-
-      sse_movaps(  func, dst,  get_temp( TGSI_EXEC_TEMP_HALF_I, TGSI_EXEC_TEMP_HALF_C ) );
-      sse_movaps(  func, tmp0, get_temp( TGSI_EXEC_TEMP_THREE_I, TGSI_EXEC_TEMP_THREE_C ) );
-      sse_rsqrtps( func, tmp1, src  );
-      sse_mulps(   func, src,  tmp1 );
-      sse_mulps(   func, dst,  tmp1 );
-      sse_mulps(   func, src,  tmp1 );
-      sse_subps(   func, tmp0, src  );
-      sse_mulps(   func, dst,  tmp0 );
-   }
-#else
-   /* On Intel CPUs at least, this is only accurate to 12 bits -- not
-    * good enough.
-    */
-   sse_rsqrtps(
-      func,
-      make_xmm( xmm_dst ),
-      make_xmm( xmm_src ) );
-#endif
-}
-
-static void
-emit_setsign(
-   struct x86_function *func,
-   unsigned xmm )
-{
-   sse_orps(
-      func,
-      make_xmm( xmm ),
-      get_temp(
-         TGSI_EXEC_TEMP_80000000_I,
-         TGSI_EXEC_TEMP_80000000_C ) );
-}
-
-static void PIPE_CDECL
-sin4f(
-   float *store )
-{
-   const unsigned X = 0;
-
-   store[X + 0] = sinf( store[X + 0] );
-   store[X + 1] = sinf( store[X + 1] );
-   store[X + 2] = sinf( store[X + 2] );
-   store[X + 3] = sinf( store[X + 3] );
-}
-
-static void
-emit_sin (struct x86_function *func,
-          unsigned xmm_dst)
-{
-   emit_func_call_dst(
-      func,
-      xmm_dst,
-      sin4f );
-}
-
-static void
-emit_sub(
-   struct x86_function *func,
-   unsigned xmm_dst,
-   unsigned xmm_src )
-{
-   sse_subps(
-      func,
-      make_xmm( xmm_dst ),
-      make_xmm( xmm_src ) );
-}
-
-/**
- * Register fetch.
- */
-
-static void
-emit_fetch(
-   struct x86_function *func,
-   unsigned xmm,
-   const struct tgsi_full_src_register *reg,
-   const unsigned chan_index )
-{
-   unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
-
-   switch( swizzle ) {
-   case TGSI_EXTSWIZZLE_X:
-   case TGSI_EXTSWIZZLE_Y:
-   case TGSI_EXTSWIZZLE_Z:
-   case TGSI_EXTSWIZZLE_W:
-      switch( reg->SrcRegister.File ) {
-      case TGSI_FILE_CONSTANT:
-         emit_const(
-            func,
-            xmm,
-            reg->SrcRegister.Index,
-            swizzle );
-         break;
-
-      case TGSI_FILE_IMMEDIATE:
-         emit_immediate(
-            func,
-            xmm,
-            reg->SrcRegister.Index,
-            swizzle );
-         break;
-
-      case TGSI_FILE_INPUT:
-         emit_inputf(
-            func,
-            xmm,
-            reg->SrcRegister.Index,
-            swizzle );
-         break;
-
-      case TGSI_FILE_TEMPORARY:
-         emit_tempf(
-            func,
-            xmm,
-            reg->SrcRegister.Index,
-            swizzle );
-         break;
-
-      default:
-         assert( 0 );
-      }
-      break;
-
-   case TGSI_EXTSWIZZLE_ZERO:
-      emit_tempf(
-         func,
-         xmm,
-         TGSI_EXEC_TEMP_00000000_I,
-         TGSI_EXEC_TEMP_00000000_C );
-      break;
-
-   case TGSI_EXTSWIZZLE_ONE:
-      emit_tempf(
-         func,
-         xmm,
-         TGSI_EXEC_TEMP_ONE_I,
-         TGSI_EXEC_TEMP_ONE_C );
-      break;
-
-   default:
-      assert( 0 );
-   }
-
-   switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
-   case TGSI_UTIL_SIGN_CLEAR:
-      emit_abs( func, xmm );
-      break;
-
-   case TGSI_UTIL_SIGN_SET:
-      emit_setsign( func, xmm );
-      break;
-
-   case TGSI_UTIL_SIGN_TOGGLE:
-      emit_neg( func, xmm );
-      break;
-
-   case TGSI_UTIL_SIGN_KEEP:
-      break;
-   }
-}
-
-#define FETCH( FUNC, INST, XMM, INDEX, CHAN )\
-   emit_fetch( FUNC, XMM, &(INST).FullSrcRegisters[INDEX], CHAN )
-
-/**
- * Register store.
- */
-
-static void
-emit_store(
-   struct x86_function *func,
-   unsigned xmm,
-   const struct tgsi_full_dst_register *reg,
-   const struct tgsi_full_instruction *inst,
-   unsigned chan_index )
-{
-   switch( reg->DstRegister.File ) {
-   case TGSI_FILE_OUTPUT:
-      emit_output(
-         func,
-         xmm,
-         reg->DstRegister.Index,
-         chan_index );
-      break;
-
-   case TGSI_FILE_TEMPORARY:
-      emit_temps(
-         func,
-         xmm,
-         reg->DstRegister.Index,
-         chan_index );
-      break;
-
-   case TGSI_FILE_ADDRESS:
-      emit_addrs(
-         func,
-         xmm,
-         reg->DstRegister.Index,
-         chan_index );
-      break;
-
-   default:
-      assert( 0 );
-   }
-
-   switch( inst->Instruction.Saturate ) {
-   case TGSI_SAT_NONE:
-      break;
-
-   case TGSI_SAT_ZERO_ONE:
-      /* assert( 0 ); */
-      break;
-
-   case TGSI_SAT_MINUS_PLUS_ONE:
-      assert( 0 );
-      break;
-   }
-}
-
-#define STORE( FUNC, INST, XMM, INDEX, CHAN )\
-   emit_store( FUNC, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN )
-
-/**
- * High-level instruction translators.
- */
-
-static void
-emit_kil(
-   struct x86_function *func,
-   const struct tgsi_full_src_register *reg )
-{
-   unsigned uniquemask;
-   unsigned registers[4];
-   unsigned nextregister = 0;
-   unsigned firstchan = ~0;
-   unsigned chan_index;
-
-   /* This mask stores component bits that were already tested. Note that
-    * we test if the value is less than zero, so 1.0 and 0.0 need not to be
-    * tested. */
-   uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
-
-   FOR_EACH_CHANNEL( chan_index ) {
-      unsigned swizzle;
-
-      /* unswizzle channel */
-      swizzle = tgsi_util_get_full_src_register_extswizzle(
-         reg,
-         chan_index );
-
-      /* check if the component has not been already tested */
-      if( !(uniquemask & (1 << swizzle)) ) {
-         uniquemask |= 1 << swizzle;
-
-         /* allocate register */
-         registers[chan_index] = nextregister;
-         emit_fetch(
-            func,
-            nextregister,
-            reg,
-            chan_index );
-         nextregister++;
-
-         /* mark the first channel used */
-         if( firstchan == ~0 ) {
-            firstchan = chan_index;
-         }
-      }
-   }
-
-   x86_push(
-      func,
-      x86_make_reg( file_REG32, reg_AX ) );
-   x86_push(
-      func,
-      x86_make_reg( file_REG32, reg_DX ) );
-
-   FOR_EACH_CHANNEL( chan_index ) {
-      if( uniquemask & (1 << chan_index) ) {
-         sse_cmpps(
-            func,
-            make_xmm( registers[chan_index] ),
-            get_temp(
-               TGSI_EXEC_TEMP_00000000_I,
-               TGSI_EXEC_TEMP_00000000_C ),
-            cc_LessThan );
-
-         if( chan_index == firstchan ) {
-            sse_pmovmskb(
-               func,
-               x86_make_reg( file_REG32, reg_AX ),
-               make_xmm( registers[chan_index] ) );
-         }
-         else {
-            sse_pmovmskb(
-               func,
-               x86_make_reg( file_REG32, reg_DX ),
-               make_xmm( registers[chan_index] ) );
-            x86_or(
-               func,
-               x86_make_reg( file_REG32, reg_AX ),
-               x86_make_reg( file_REG32, reg_DX ) );
-         }
-      }
-   }
-
-   x86_or(
-      func,
-      get_temp(
-         TGSI_EXEC_TEMP_KILMASK_I,
-         TGSI_EXEC_TEMP_KILMASK_C ),
-      x86_make_reg( file_REG32, reg_AX ) );
-
-   x86_pop(
-      func,
-      x86_make_reg( file_REG32, reg_DX ) );
-   x86_pop(
-      func,
-      x86_make_reg( file_REG32, reg_AX ) );
-}
-
-static void
-emit_setcc(
-   struct x86_function *func,
-   struct tgsi_full_instruction *inst,
-   enum sse_cc cc )
-{
-   unsigned chan_index;
-
-   FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-      FETCH( func, *inst, 0, 0, chan_index );
-      FETCH( func, *inst, 1, 1, chan_index );
-      sse_cmpps(
-         func,
-         make_xmm( 0 ),
-         make_xmm( 1 ),
-         cc );
-      sse_andps(
-         func,
-         make_xmm( 0 ),
-         get_temp(
-            TGSI_EXEC_TEMP_ONE_I,
-            TGSI_EXEC_TEMP_ONE_C ) );
-      STORE( func, *inst, 0, 0, chan_index );
-   }
-}
-
-static void
-emit_cmp(
-   struct x86_function *func,
-   struct tgsi_full_instruction *inst )
-{
-   unsigned chan_index;
-
-   FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-      FETCH( func, *inst, 0, 0, chan_index );
-      FETCH( func, *inst, 1, 1, chan_index );
-      FETCH( func, *inst, 2, 2, chan_index );
-      sse_cmpps(
-         func,
-         make_xmm( 0 ),
-         get_temp(
-            TGSI_EXEC_TEMP_00000000_I,
-            TGSI_EXEC_TEMP_00000000_C ),
-         cc_LessThan );
-      sse_andps(
-         func,
-         make_xmm( 1 ),
-         make_xmm( 0 ) );
-      sse_andnps(
-         func,
-         make_xmm( 0 ),
-         make_xmm( 2 ) );
-      sse_orps(
-         func,
-         make_xmm( 0 ),
-         make_xmm( 1 ) );
-      STORE( func, *inst, 0, 0, chan_index );
-   }
-}
-
-static int
-emit_instruction(
-   struct x86_function *func,
-   struct tgsi_full_instruction *inst )
-{
-   unsigned chan_index;
-
-   switch( inst->Instruction.Opcode ) {
-   case TGSI_OPCODE_ARL:
-#if 0
-      /* XXX this isn't working properly (see glean vertProg1 test) */
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( func, *inst, 0, 0, chan_index );
-         emit_f2it( func, 0 );
-         STORE( func, *inst, 0, 0, chan_index );
-      }
-#else
-      return 0;
-#endif
-      break;
-
-   case TGSI_OPCODE_MOV:
-   case TGSI_OPCODE_SWZ:
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( func, *inst, 0, 0, chan_index );
-         STORE( func, *inst, 0, 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_LIT:
-      if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
-          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
-         emit_tempf(
-            func,
-            0,
-            TGSI_EXEC_TEMP_ONE_I,
-            TGSI_EXEC_TEMP_ONE_C);
-         if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
-            STORE( func, *inst, 0, 0, CHAN_X );
-         }
-         if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
-            STORE( func, *inst, 0, 0, CHAN_W );
-         }
-      }
-      if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
-          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
-         if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
-            FETCH( func, *inst, 0, 0, CHAN_X );
-            sse_maxps(
-               func,
-               make_xmm( 0 ),
-               get_temp(
-                  TGSI_EXEC_TEMP_00000000_I,
-                  TGSI_EXEC_TEMP_00000000_C ) );
-            STORE( func, *inst, 0, 0, CHAN_Y );
-         }
-         if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
-            /* XMM[1] = SrcReg[0].yyyy */
-            FETCH( func, *inst, 1, 0, CHAN_Y );
-            /* XMM[1] = max(XMM[1], 0) */
-            sse_maxps(
-               func,
-               make_xmm( 1 ),
-               get_temp(
-                  TGSI_EXEC_TEMP_00000000_I,
-                  TGSI_EXEC_TEMP_00000000_C ) );
-            /* XMM[2] = SrcReg[0].wwww */
-            FETCH( func, *inst, 2, 0, CHAN_W );
-            /* XMM[2] = min(XMM[2], 128.0) */
-            sse_minps(
-               func,
-               make_xmm( 2 ),
-               get_temp(
-                  TGSI_EXEC_TEMP_128_I,
-                  TGSI_EXEC_TEMP_128_C ) );
-            /* XMM[2] = max(XMM[2], -128.0) */
-            sse_maxps(
-               func,
-               make_xmm( 2 ),
-               get_temp(
-                  TGSI_EXEC_TEMP_MINUS_128_I,
-                  TGSI_EXEC_TEMP_MINUS_128_C ) );
-            emit_pow( func, 1, 2 );
-            FETCH( func, *inst, 0, 0, CHAN_X );
-            sse_xorps(
-               func,
-               make_xmm( 2 ),
-               make_xmm( 2 ) );
-            sse_cmpps(
-               func,
-               make_xmm( 2 ),
-               make_xmm( 0 ),
-               cc_LessThanEqual );
-            sse_andps(
-               func,
-               make_xmm( 2 ),
-               make_xmm( 1 ) );
-            STORE( func, *inst, 2, 0, CHAN_Z );
-         }
-      }
-      break;
-
-   case TGSI_OPCODE_RCP:
-   /* TGSI_OPCODE_RECIP */
-      FETCH( func, *inst, 0, 0, CHAN_X );
-      emit_rcp( func, 0, 0 );
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         STORE( func, *inst, 0, 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_RSQ:
-   /* TGSI_OPCODE_RECIPSQRT */
-      FETCH( func, *inst, 0, 0, CHAN_X );
-      emit_rsqrt( func, 1, 0 );
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         STORE( func, *inst, 1, 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_EXP:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_LOG:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_MUL:
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( func, *inst, 0, 0, chan_index );
-         FETCH( func, *inst, 1, 1, chan_index );
-         emit_mul( func, 0, 1 );
-         STORE( func, *inst, 0, 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_ADD:
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( func, *inst, 0, 0, chan_index );
-         FETCH( func, *inst, 1, 1, chan_index );
-         emit_add( func, 0, 1 );
-         STORE( func, *inst, 0, 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_DP3:
-   /* TGSI_OPCODE_DOT3 */
-      FETCH( func, *inst, 0, 0, CHAN_X );
-      FETCH( func, *inst, 1, 1, CHAN_X );
-      emit_mul( func, 0, 1 );
-      FETCH( func, *inst, 1, 0, CHAN_Y );
-      FETCH( func, *inst, 2, 1, CHAN_Y );
-      emit_mul( func, 1, 2 );
-      emit_add( func, 0, 1 );
-      FETCH( func, *inst, 1, 0, CHAN_Z );
-      FETCH( func, *inst, 2, 1, CHAN_Z );
-      emit_mul( func, 1, 2 );
-      emit_add( func, 0, 1 );
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         STORE( func, *inst, 0, 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_DP4:
-   /* TGSI_OPCODE_DOT4 */
-      FETCH( func, *inst, 0, 0, CHAN_X );
-      FETCH( func, *inst, 1, 1, CHAN_X );
-      emit_mul( func, 0, 1 );
-      FETCH( func, *inst, 1, 0, CHAN_Y );
-      FETCH( func, *inst, 2, 1, CHAN_Y );
-      emit_mul( func, 1, 2 );
-      emit_add( func, 0, 1 );
-      FETCH( func, *inst, 1, 0, CHAN_Z );
-      FETCH( func, *inst, 2, 1, CHAN_Z );
-      emit_mul(func, 1, 2 );
-      emit_add(func, 0, 1 );
-      FETCH( func, *inst, 1, 0, CHAN_W );
-      FETCH( func, *inst, 2, 1, CHAN_W );
-      emit_mul( func, 1, 2 );
-      emit_add( func, 0, 1 );
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         STORE( func, *inst, 0, 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_DST:
-      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
-         emit_tempf(
-            func,
-            0,
-            TGSI_EXEC_TEMP_ONE_I,
-            TGSI_EXEC_TEMP_ONE_C );
-         STORE( func, *inst, 0, 0, CHAN_X );
-      }
-      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
-         FETCH( func, *inst, 0, 0, CHAN_Y );
-         FETCH( func, *inst, 1, 1, CHAN_Y );
-         emit_mul( func, 0, 1 );
-         STORE( func, *inst, 0, 0, CHAN_Y );
-      }
-      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
-         FETCH( func, *inst, 0, 0, CHAN_Z );
-         STORE( func, *inst, 0, 0, CHAN_Z );
-      }
-      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
-         FETCH( func, *inst, 0, 1, CHAN_W );
-         STORE( func, *inst, 0, 0, CHAN_W );
-      }
-      break;
-
-   case TGSI_OPCODE_MIN:
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( func, *inst, 0, 0, chan_index );
-         FETCH( func, *inst, 1, 1, chan_index );
-         sse_minps(
-            func,
-            make_xmm( 0 ),
-            make_xmm( 1 ) );
-         STORE( func, *inst, 0, 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_MAX:
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( func, *inst, 0, 0, chan_index );
-         FETCH( func, *inst, 1, 1, chan_index );
-         sse_maxps(
-            func,
-            make_xmm( 0 ),
-            make_xmm( 1 ) );
-         STORE( func, *inst, 0, 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_SLT:
-   /* TGSI_OPCODE_SETLT */
-      emit_setcc( func, inst, cc_LessThan );
-      break;
-
-   case TGSI_OPCODE_SGE:
-   /* TGSI_OPCODE_SETGE */
-      emit_setcc( func, inst, cc_NotLessThan );
-      break;
-
-   case TGSI_OPCODE_MAD:
-   /* TGSI_OPCODE_MADD */
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( func, *inst, 0, 0, chan_index );
-         FETCH( func, *inst, 1, 1, chan_index );
-         FETCH( func, *inst, 2, 2, chan_index );
-         emit_mul( func, 0, 1 );
-         emit_add( func, 0, 2 );
-         STORE( func, *inst, 0, 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_SUB:
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( func, *inst, 0, 0, chan_index );
-         FETCH( func, *inst, 1, 1, chan_index );
-         emit_sub( func, 0, 1 );
-         STORE( func, *inst, 0, 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_LERP:
-   /* TGSI_OPCODE_LRP */
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( func, *inst, 0, 0, chan_index );
-         FETCH( func, *inst, 1, 1, chan_index );
-         FETCH( func, *inst, 2, 2, chan_index );
-         emit_sub( func, 1, 2 );
-         emit_mul( func, 0, 1 );
-         emit_add( func, 0, 2 );
-         STORE( func, *inst, 0, 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_CND:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_CND0:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_DOT2ADD:
-   /* TGSI_OPCODE_DP2A */
-      return 0;
-      break;
-
-   case TGSI_OPCODE_INDEX:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_NEGATE:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_FRAC:
-   /* TGSI_OPCODE_FRC */
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( func, *inst, 0, 0, chan_index );
-         emit_frc( func, 0 );
-         STORE( func, *inst, 0, 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_CLAMP:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_FLOOR:
-   /* TGSI_OPCODE_FLR */
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( func, *inst, 0, 0, chan_index );
-         emit_flr( func, 0 );
-         STORE( func, *inst, 0, 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_ROUND:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_EXPBASE2:
-   /* TGSI_OPCODE_EX2 */
-      FETCH( func, *inst, 0, 0, CHAN_X );
-      emit_ex2( func, 0 );
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         STORE( func, *inst, 0, 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_LOGBASE2:
-   /* TGSI_OPCODE_LG2 */
-      FETCH( func, *inst, 0, 0, CHAN_X );
-      emit_lg2( func, 0 );
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         STORE( func, *inst, 0, 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_POWER:
-   /* TGSI_OPCODE_POW */
-      FETCH( func, *inst, 0, 0, CHAN_X );
-      FETCH( func, *inst, 1, 1, CHAN_X );
-      emit_pow( func, 0, 1 );
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         STORE( func, *inst, 0, 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_CROSSPRODUCT:
-   /* TGSI_OPCODE_XPD */
-      if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
-          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
-         FETCH( func, *inst, 1, 1, CHAN_Z );
-         FETCH( func, *inst, 3, 0, CHAN_Z );
-      }
-      if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
-          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
-         FETCH( func, *inst, 0, 0, CHAN_Y );
-         FETCH( func, *inst, 4, 1, CHAN_Y );
-      }
-      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
-         emit_MOV( func, 2, 0 );
-         emit_mul( func, 2, 1 );
-         emit_MOV( func, 5, 3 );
-         emit_mul( func, 5, 4 );
-         emit_sub( func, 2, 5 );
-         STORE( func, *inst, 2, 0, CHAN_X );
-      }
-      if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
-          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
-         FETCH( func, *inst, 2, 1, CHAN_X );
-         FETCH( func, *inst, 5, 0, CHAN_X );
-      }
-      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
-         emit_mul( func, 3, 2 );
-         emit_mul( func, 1, 5 );
-         emit_sub( func, 3, 1 );
-         STORE( func, *inst, 3, 0, CHAN_Y );
-      }
-      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
-         emit_mul( func, 5, 4 );
-         emit_mul( func, 0, 2 );
-         emit_sub( func, 5, 0 );
-         STORE( func, *inst, 5, 0, CHAN_Z );
-      }
-      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
-	 emit_tempf(
-	    func,
-	    0,
-	    TGSI_EXEC_TEMP_ONE_I,
-	    TGSI_EXEC_TEMP_ONE_C );
-         STORE( func, *inst, 0, 0, CHAN_W );
-      }
-      break;
-
-   case TGSI_OPCODE_MULTIPLYMATRIX:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_ABS:
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( func, *inst, 0, 0, chan_index );
-         emit_abs( func, 0) ;
-
-         STORE( func, *inst, 0, 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_RCC:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_DPH:
-      FETCH( func, *inst, 0, 0, CHAN_X );
-      FETCH( func, *inst, 1, 1, CHAN_X );
-      emit_mul( func, 0, 1 );
-      FETCH( func, *inst, 1, 0, CHAN_Y );
-      FETCH( func, *inst, 2, 1, CHAN_Y );
-      emit_mul( func, 1, 2 );
-      emit_add( func, 0, 1 );
-      FETCH( func, *inst, 1, 0, CHAN_Z );
-      FETCH( func, *inst, 2, 1, CHAN_Z );
-      emit_mul( func, 1, 2 );
-      emit_add( func, 0, 1 );
-      FETCH( func, *inst, 1, 1, CHAN_W );
-      emit_add( func, 0, 1 );
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         STORE( func, *inst, 0, 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_COS:
-      FETCH( func, *inst, 0, 0, CHAN_X );
-      emit_cos( func, 0 );
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         STORE( func, *inst, 0, 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_DDX:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_DDY:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_KIL:
-      emit_kil( func, &inst->FullSrcRegisters[0] );
-      break;
-
-   case TGSI_OPCODE_PK2H:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_PK2US:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_PK4B:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_PK4UB:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_RFL:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_SEQ:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_SFL:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_SGT:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_SIN:
-      FETCH( func, *inst, 0, 0, CHAN_X );
-      emit_sin( func, 0 );
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         STORE( func, *inst, 0, 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_SLE:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_SNE:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_STR:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_TEX:
-      if (0) {
-	 /* Disable dummy texture code: 
-	  */
-	 emit_tempf(
-	    func,
-	    0,
-	    TGSI_EXEC_TEMP_ONE_I,
-	    TGSI_EXEC_TEMP_ONE_C );
-	 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-	    STORE( func, *inst, 0, 0, chan_index );
-	 }
-      }
-      else {
-	 return 0;
-      }
-      break;
-
-   case TGSI_OPCODE_TXD:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_UP2H:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_UP2US:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_UP4B:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_UP4UB:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_X2D:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_ARA:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_ARR:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_BRA:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_CAL:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_RET:
-      emit_ret( func );
-      break;
-
-   case TGSI_OPCODE_END:
-      break;
-
-   case TGSI_OPCODE_SSG:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_CMP:
-      emit_cmp (func, inst);
-      break;
-
-   case TGSI_OPCODE_SCS:
-      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
-         FETCH( func, *inst, 0, 0, CHAN_X );
-         emit_cos( func, 0 );
-         STORE( func, *inst, 0, 0, CHAN_X );
-      }
-      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
-         FETCH( func, *inst, 0, 0, CHAN_X );
-         emit_sin( func, 0 );
-         STORE( func, *inst, 0, 0, CHAN_Y );
-      }
-      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
-	 emit_tempf(
-	    func,
-	    0,
-	    TGSI_EXEC_TEMP_00000000_I,
-	    TGSI_EXEC_TEMP_00000000_C );
-         STORE( func, *inst, 0, 0, CHAN_Z );
-      }
-      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
-	 emit_tempf(
-	    func,
-	    0,
-	    TGSI_EXEC_TEMP_ONE_I,
-	    TGSI_EXEC_TEMP_ONE_C );
-         STORE( func, *inst, 0, 0, CHAN_W );
-      }
-      break;
-
-   case TGSI_OPCODE_TXB:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_NRM:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_DIV:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_DP2:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_TXL:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_BRK:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_IF:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_LOOP:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_REP:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_ELSE:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_ENDIF:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_ENDLOOP:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_ENDREP:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_PUSHA:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_POPA:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_CEIL:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_I2F:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_NOT:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_TRUNC:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_SHL:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_SHR:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_AND:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_OR:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_MOD:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_XOR:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_SAD:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_TXF:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_TXQ:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_CONT:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_EMIT:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_ENDPRIM:
-      return 0;
-      break;
-
-   default:
-      return 0;
-   }
-   
-   return 1;
-}
-
-static void
-emit_declaration(
-   struct x86_function *func,
-   struct tgsi_full_declaration *decl )
-{
-   if( decl->Declaration.File == TGSI_FILE_INPUT ) {
-      unsigned first, last, mask;
-      unsigned i, j;
-
-      first = decl->DeclarationRange.First;
-      last = decl->DeclarationRange.Last;
-      mask = decl->Declaration.UsageMask;
-
-      for( i = first; i <= last; i++ ) {
-         for( j = 0; j < NUM_CHANNELS; j++ ) {
-            if( mask & (1 << j) ) {
-               switch( decl->Declaration.Interpolate ) {
-               case TGSI_INTERPOLATE_CONSTANT:
-                  emit_coef_a0( func, 0, i, j );
-                  emit_inputs( func, 0, i, j );
-                  break;
-
-               case TGSI_INTERPOLATE_LINEAR:
-                  emit_tempf( func, 0, 0, TGSI_SWIZZLE_X );
-                  emit_coef_dadx( func, 1, i, j );
-                  emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y );
-                  emit_coef_dady( func, 3, i, j );
-                  emit_mul( func, 0, 1 );    /* x * dadx */
-                  emit_coef_a0( func, 4, i, j );
-                  emit_mul( func, 2, 3 );    /* y * dady */
-                  emit_add( func, 0, 4 );    /* x * dadx + a0 */
-                  emit_add( func, 0, 2 );    /* x * dadx + y * dady + a0 */
-                  emit_inputs( func, 0, i, j );
-                  break;
-
-               case TGSI_INTERPOLATE_PERSPECTIVE:
-                  emit_tempf( func, 0, 0, TGSI_SWIZZLE_X );
-                  emit_coef_dadx( func, 1, i, j );
-                  emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y );
-                  emit_coef_dady( func, 3, i, j );
-                  emit_mul( func, 0, 1 );    /* x * dadx */
-                  emit_tempf( func, 4, 0, TGSI_SWIZZLE_W );
-                  emit_coef_a0( func, 5, i, j );
-                  emit_rcp( func, 4, 4 );    /* 1.0 / w */
-                  emit_mul( func, 2, 3 );    /* y * dady */
-                  emit_add( func, 0, 5 );    /* x * dadx + a0 */
-                  emit_add( func, 0, 2 );    /* x * dadx + y * dady + a0 */
-                  emit_mul( func, 0, 4 );    /* (x * dadx + y * dady + a0) / w */
-                  emit_inputs( func, 0, i, j );
-                  break;
-
-               default:
-                  assert( 0 );
-		  break;
-               }
-            }
-         }
-      }
-   }
-}
-
-static void aos_to_soa( struct x86_function *func, 
-                        uint arg_aos,
-                        uint arg_soa, 
-                        uint arg_num, 
-                        uint arg_stride )
-{
-   struct x86_reg soa_input = x86_make_reg( file_REG32, reg_AX );
-   struct x86_reg aos_input = x86_make_reg( file_REG32, reg_BX );
-   struct x86_reg num_inputs = x86_make_reg( file_REG32, reg_CX );
-   struct x86_reg stride = x86_make_reg( file_REG32, reg_DX );
-   int inner_loop;
-
-
-   /* Save EBX */
-   x86_push( func, x86_make_reg( file_REG32, reg_BX ) );
-
-   x86_mov( func, aos_input,  x86_fn_arg( func, arg_aos ) );
-   x86_mov( func, soa_input,  x86_fn_arg( func, arg_soa ) );
-   x86_mov( func, num_inputs, x86_fn_arg( func, arg_num ) );
-   x86_mov( func, stride,     x86_fn_arg( func, arg_stride ) );
-
-   /* do */
-   inner_loop = x86_get_label( func );
-   {
-      x86_push( func, aos_input );
-      sse_movlps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) );
-      sse_movlps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) );
-      x86_add( func, aos_input, stride );
-      sse_movhps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) );
-      sse_movhps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) );
-      x86_add( func, aos_input, stride );
-      sse_movlps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) );
-      sse_movlps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) );
-      x86_add( func, aos_input, stride );
-      sse_movhps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) );
-      sse_movhps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) );
-      x86_pop( func, aos_input );
-
-      sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) );
-      sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) );
-      sse_shufps( func, make_xmm( 0 ), make_xmm( 1 ), 0x88 );
-      sse_shufps( func, make_xmm( 2 ), make_xmm( 1 ), 0xdd );
-      sse_shufps( func, make_xmm( 3 ), make_xmm( 4 ), 0x88 );
-      sse_shufps( func, make_xmm( 5 ), make_xmm( 4 ), 0xdd );
-
-      sse_movups( func, x86_make_disp( soa_input, 0 ), make_xmm( 0 ) );
-      sse_movups( func, x86_make_disp( soa_input, 16 ), make_xmm( 2 ) );
-      sse_movups( func, x86_make_disp( soa_input, 32 ), make_xmm( 3 ) );
-      sse_movups( func, x86_make_disp( soa_input, 48 ), make_xmm( 5 ) );
-
-      /* Advance to next input */
-      x86_lea( func, aos_input, x86_make_disp(aos_input, 16) );
-      x86_lea( func, soa_input, x86_make_disp(soa_input, 64) );
-   }
-   /* while --num_inputs */
-   x86_dec( func, num_inputs );
-   x86_jcc( func, cc_NE, inner_loop );
-
-   /* Restore EBX */
-   x86_pop( func, aos_input );
-}
-
-static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num, uint stride )
-{
-   struct x86_reg soa_output;
-   struct x86_reg aos_output;
-   struct x86_reg num_outputs;
-   struct x86_reg temp;
-   int inner_loop;
-
-   soa_output = x86_make_reg( file_REG32, reg_AX );
-   aos_output = x86_make_reg( file_REG32, reg_BX );
-   num_outputs = x86_make_reg( file_REG32, reg_CX );
-   temp = x86_make_reg( file_REG32, reg_DX );
-
-   /* Save EBX */
-   x86_push( func, aos_output );
-
-   x86_mov( func, soa_output, x86_fn_arg( func, soa ) );
-   x86_mov( func, aos_output, x86_fn_arg( func, aos ) );
-   x86_mov( func, num_outputs, x86_fn_arg( func, num ) );
-
-   /* do */
-   inner_loop = x86_get_label( func );
-   {
-      sse_movups( func, make_xmm( 0 ), x86_make_disp( soa_output, 0 ) );
-      sse_movups( func, make_xmm( 1 ), x86_make_disp( soa_output, 16 ) );
-      sse_movups( func, make_xmm( 3 ), x86_make_disp( soa_output, 32 ) );
-      sse_movups( func, make_xmm( 4 ), x86_make_disp( soa_output, 48 ) );
-
-      sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) );
-      sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) );
-      sse_unpcklps( func, make_xmm( 0 ), make_xmm( 1 ) );
-      sse_unpckhps( func, make_xmm( 2 ), make_xmm( 1 ) );
-      sse_unpcklps( func, make_xmm( 3 ), make_xmm( 4 ) );
-      sse_unpckhps( func, make_xmm( 5 ), make_xmm( 4 ) );
-
-      x86_mov( func, temp, x86_fn_arg( func, stride ) );
-      x86_push( func, aos_output );
-      sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) );
-      sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) );
-      x86_add( func, aos_output, temp );
-      sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) );
-      sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) );
-      x86_add( func, aos_output, temp );
-      sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) );
-      sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) );
-      x86_add( func, aos_output, temp );
-      sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) );
-      sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) );
-      x86_pop( func, aos_output );
-
-      /* Advance to next output */
-      x86_lea( func, aos_output, x86_make_disp(aos_output, 16) );
-      x86_lea( func, soa_output, x86_make_disp(soa_output, 64) );
-   }
-   /* while --num_outputs */
-   x86_dec( func, num_outputs );
-   x86_jcc( func, cc_NE, inner_loop );
-
-   /* Restore EBX */
-   x86_pop( func, aos_output );
-}
-
-/**
- * Translate a TGSI vertex/fragment shader to SSE2 code.
- * Slightly different things are done for vertex vs. fragment shaders.
- *
- * Note that fragment shaders are responsible for interpolating shader
- * inputs. Because on x86 we have only 4 GP registers, and here we
- * have 5 shader arguments (input, output, const, temp and coef), the
- * code is split into two phases -- DECLARATION and INSTRUCTION phase.
- * GP register holding the output argument is aliased with the coeff
- * argument, as outputs are not needed in the DECLARATION phase.
- *
- * \param tokens  the TGSI input shader
- * \param func  the output SSE code/function
- * \param immediates  buffer to place immediates, later passed to SSE func
- * \param return  1 for success, 0 if translation failed
- */
-unsigned
-tgsi_emit_sse2(
-   const struct tgsi_token *tokens,
-   struct x86_function *func,
-   float (*immediates)[4],
-   boolean do_swizzles )
-{
-   struct tgsi_parse_context parse;
-   boolean instruction_phase = FALSE;
-   unsigned ok = 1;
-   uint num_immediates = 0;
-
-   func->csr = func->store;
-
-   tgsi_parse_init( &parse, tokens );
-
-   /* Can't just use EDI, EBX without save/restoring them:
-    */
-   x86_push(
-      func,
-      get_immediate_base() );
-
-   x86_push(
-      func,
-      get_temp_base() );
-
-
-   /*
-    * Different function args for vertex/fragment shaders:
-    */
-   if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
-      /* DECLARATION phase, do not load output argument. */
-      x86_mov(
-         func,
-         get_input_base(),
-         x86_fn_arg( func, 1 ) );
-      /* skipping outputs argument here */
-      x86_mov(
-         func,
-         get_const_base(),
-         x86_fn_arg( func, 3 ) );
-      x86_mov(
-         func,
-         get_temp_base(),
-         x86_fn_arg( func, 4 ) );
-      x86_mov(
-         func,
-         get_coef_base(),
-         x86_fn_arg( func, 5 ) );
-      x86_mov(
-         func,
-         get_immediate_base(),
-         x86_fn_arg( func, 6 ) );
-   }
-   else {
-      assert(parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX);
-
-      if (do_swizzles)
-         aos_to_soa( func, 
-                     6,         /* aos_input */
-                     1,         /* machine->input */
-                     7,         /* num_inputs */
-                     8 );       /* input_stride */
-
-      x86_mov(
-         func,
-         get_input_base(),
-         x86_fn_arg( func, 1 ) );
-      x86_mov(
-         func,
-         get_output_base(),
-         x86_fn_arg( func, 2 ) );
-      x86_mov(
-         func,
-         get_const_base(),
-         x86_fn_arg( func, 3 ) );
-      x86_mov(
-         func,
-         get_temp_base(),
-         x86_fn_arg( func, 4 ) );
-      x86_mov(
-         func,
-         get_immediate_base(),
-         x86_fn_arg( func, 5 ) );
-   }
-
-   while( !tgsi_parse_end_of_tokens( &parse ) && ok ) {
-      tgsi_parse_token( &parse );
-
-      switch( parse.FullToken.Token.Type ) {
-      case TGSI_TOKEN_TYPE_DECLARATION:
-         if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
-            emit_declaration(
-               func,
-               &parse.FullToken.FullDeclaration );
-         }
-         break;
-
-      case TGSI_TOKEN_TYPE_INSTRUCTION:
-         if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
-            if( !instruction_phase ) {
-               /* INSTRUCTION phase, overwrite coeff with output. */
-               instruction_phase = TRUE;
-               x86_mov(
-                  func,
-                  get_output_base(),
-                  x86_fn_arg( func, 2 ) );
-            }
-         }
-
-         ok = emit_instruction(
-            func,
-            &parse.FullToken.FullInstruction );
-
-	 if (!ok) {
-	    debug_printf("failed to translate tgsi opcode %d to SSE (%s)\n", 
-			 parse.FullToken.FullInstruction.Instruction.Opcode,
-                         parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ?
-                         "vertex shader" : "fragment shader");
-	 }
-         break;
-
-      case TGSI_TOKEN_TYPE_IMMEDIATE:
-         /* simply copy the immediate values into the next immediates[] slot */
-         {
-            const uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
-            uint i;
-            assert(size <= 4);
-            assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES);
-            for( i = 0; i < size; i++ ) {
-               immediates[num_immediates][i] =
-		  parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
-            }
-#if 0
-            debug_printf("SSE FS immediate[%d] = %f %f %f %f\n",
-                   num_immediates,
-                   immediates[num_immediates][0],
-                   immediates[num_immediates][1],
-                   immediates[num_immediates][2],
-                   immediates[num_immediates][3]);
-#endif
-            num_immediates++;
-         }
-         break;
-
-      default:
-	 ok = 0;
-         assert( 0 );
-      }
-   }
-
-   if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX) {
-      if (do_swizzles)
-         soa_to_aos( func, 9, 2, 10, 11 );
-   }
-
-   /* Can't just use EBX, EDI without save/restoring them:
-    */
-   x86_pop(
-      func,
-      get_temp_base() );
-
-   x86_pop(
-      func,
-      get_immediate_base() );
-
-   emit_ret( func );
-
-   tgsi_parse_free( &parse );
-
-   return ok;
-}
-
-#endif /* PIPE_ARCH_X86 */
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h
deleted file mode 100755
index af838b2a25b..00000000000
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef TGSI_SSE2_H
-#define TGSI_SSE2_H
-
-#if defined __cplusplus
-extern "C" {
-#endif
-
-struct tgsi_token;
-struct x86_function;
-
-unsigned
-tgsi_emit_sse2(
-   const struct tgsi_token *tokens,
-   struct x86_function *function,
-   float (*immediates)[4],
-   boolean do_swizzles );
-
-#if defined __cplusplus
-}
-#endif
-
-#endif /* TGSI_SSE2_H */
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c
new file mode 100644
index 00000000000..742ef14c352
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.c
@@ -0,0 +1,1324 @@
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "pipe/p_debug.h"
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+#include "tgsi_build.h"
+#include "tgsi_parse.h"
+
+/*
+ * version
+ */
+
+struct tgsi_version
+tgsi_build_version( void )
+{
+   struct tgsi_version  version;
+
+   version.MajorVersion = 1;
+   version.MinorVersion = 1;
+   version.Padding = 0;
+
+   return version;
+}
+
+/*
+ * header
+ */
+
+struct tgsi_header
+tgsi_build_header( void )
+{
+   struct tgsi_header header;
+
+   header.HeaderSize = 1;
+   header.BodySize = 0;
+
+   return header;
+}
+
+static void
+header_headersize_grow( struct tgsi_header *header )
+{
+   assert( header->HeaderSize < 0xFF );
+   assert( header->BodySize == 0 );
+
+   header->HeaderSize++;
+}
+
+static void
+header_bodysize_grow( struct tgsi_header *header )
+{
+   assert( header->BodySize < 0xFFFFFF );
+
+   header->BodySize++;
+}
+
+struct tgsi_processor
+tgsi_default_processor( void )
+{
+   struct tgsi_processor processor;
+
+   processor.Processor = TGSI_PROCESSOR_FRAGMENT;
+   processor.Padding = 0;
+
+   return processor;
+}
+
+struct tgsi_processor
+tgsi_build_processor(
+   unsigned type,
+   struct tgsi_header *header )
+{
+   struct tgsi_processor processor;
+
+   processor = tgsi_default_processor();
+   processor.Processor = type;
+
+   header_headersize_grow( header );
+
+   return processor;
+}
+
+/*
+ * declaration
+ */
+
+struct tgsi_declaration
+tgsi_default_declaration( void )
+{
+   struct tgsi_declaration declaration;
+
+   declaration.Type = TGSI_TOKEN_TYPE_DECLARATION;
+   declaration.Size = 1;
+   declaration.File = TGSI_FILE_NULL;
+   declaration.UsageMask = TGSI_WRITEMASK_XYZW;
+   declaration.Interpolate = TGSI_INTERPOLATE_CONSTANT;
+   declaration.Semantic = 0;
+   declaration.Padding = 0;
+   declaration.Extended = 0;
+
+   return declaration;
+}
+
+struct tgsi_declaration
+tgsi_build_declaration(
+   unsigned file,
+   unsigned usage_mask,
+   unsigned interpolate,
+   unsigned semantic,
+   struct tgsi_header *header )
+{
+   struct tgsi_declaration declaration;
+
+   assert( file <= TGSI_FILE_IMMEDIATE );
+   assert( interpolate <= TGSI_INTERPOLATE_PERSPECTIVE );
+
+   declaration = tgsi_default_declaration();
+   declaration.File = file;
+   declaration.UsageMask = usage_mask;
+   declaration.Interpolate = interpolate;
+   declaration.Semantic = semantic;
+
+   header_bodysize_grow( header );
+
+   return declaration;
+}
+
+static void
+declaration_grow(
+   struct tgsi_declaration *declaration,
+   struct tgsi_header *header )
+{
+   assert( declaration->Size < 0xFF );
+
+   declaration->Size++;
+
+   header_bodysize_grow( header );
+}
+
+struct tgsi_full_declaration
+tgsi_default_full_declaration( void )
+{
+   struct tgsi_full_declaration  full_declaration;
+
+   full_declaration.Declaration  = tgsi_default_declaration();
+   full_declaration.DeclarationRange = tgsi_default_declaration_range();
+   full_declaration.Semantic = tgsi_default_declaration_semantic();
+
+   return full_declaration;
+}
+
+unsigned
+tgsi_build_full_declaration(
+   const struct tgsi_full_declaration *full_decl,
+   struct tgsi_token *tokens,
+   struct tgsi_header *header,
+   unsigned maxsize )
+{
+   unsigned size = 0;
+   struct tgsi_declaration *declaration;
+   struct tgsi_declaration_range *dr;
+
+   if( maxsize <= size )
+     return 0;
+   declaration = (struct tgsi_declaration *) &tokens[size];
+   size++;
+
+   *declaration = tgsi_build_declaration(
+      full_decl->Declaration.File,
+      full_decl->Declaration.UsageMask,
+      full_decl->Declaration.Interpolate,
+      full_decl->Declaration.Semantic,
+      header );
+
+   if (maxsize <= size)
+      return 0;
+   dr = (struct tgsi_declaration_range *) &tokens[size];
+   size++;
+
+   *dr = tgsi_build_declaration_range(
+      full_decl->DeclarationRange.First,
+      full_decl->DeclarationRange.Last,
+      declaration,
+      header );
+
+   if( full_decl->Declaration.Semantic ) {
+      struct tgsi_declaration_semantic *ds;
+
+      if( maxsize <= size )
+         return  0;
+      ds = (struct tgsi_declaration_semantic *) &tokens[size];
+      size++;
+
+      *ds = tgsi_build_declaration_semantic(
+         full_decl->Semantic.SemanticName,
+         full_decl->Semantic.SemanticIndex,
+         declaration,
+         header );
+   }
+
+   return size;
+}
+
+struct tgsi_declaration_range
+tgsi_default_declaration_range( void )
+{
+   struct tgsi_declaration_range dr;
+
+   dr.First = 0;
+   dr.Last = 0;
+
+   return dr;
+}
+
+struct tgsi_declaration_range
+tgsi_build_declaration_range(
+   unsigned first,
+   unsigned last,
+   struct tgsi_declaration *declaration,
+   struct tgsi_header *header )
+{
+   struct tgsi_declaration_range declaration_range;
+
+   assert( last >= first );
+   assert( last <= 0xFFFF );
+
+   declaration_range = tgsi_default_declaration_range();
+   declaration_range.First = first;
+   declaration_range.Last = last;
+
+   declaration_grow( declaration, header );
+
+   return declaration_range;
+}
+
+struct tgsi_declaration_semantic
+tgsi_default_declaration_semantic( void )
+{
+   struct tgsi_declaration_semantic ds;
+
+   ds.SemanticName = TGSI_SEMANTIC_POSITION;
+   ds.SemanticIndex = 0;
+   ds.Padding = 0;
+
+   return ds;
+}
+
+struct tgsi_declaration_semantic
+tgsi_build_declaration_semantic(
+   unsigned semantic_name,
+   unsigned semantic_index,
+   struct tgsi_declaration *declaration,
+   struct tgsi_header *header )
+{
+   struct tgsi_declaration_semantic ds;
+
+   assert( semantic_name <= TGSI_SEMANTIC_COUNT );
+   assert( semantic_index <= 0xFFFF );
+
+   ds = tgsi_default_declaration_semantic();
+   ds.SemanticName = semantic_name;
+   ds.SemanticIndex = semantic_index;
+
+   declaration_grow( declaration, header );
+
+   return ds;
+}
+
+/*
+ * immediate
+ */
+
+struct tgsi_immediate
+tgsi_default_immediate( void )
+{
+   struct tgsi_immediate immediate;
+
+   immediate.Type = TGSI_TOKEN_TYPE_IMMEDIATE;
+   immediate.Size = 1;
+   immediate.DataType = TGSI_IMM_FLOAT32;
+   immediate.Padding = 0;
+   immediate.Extended = 0;
+
+   return immediate;
+}
+
+struct tgsi_immediate
+tgsi_build_immediate(
+   struct tgsi_header *header )
+{
+   struct tgsi_immediate immediate;
+
+   immediate = tgsi_default_immediate();
+
+   header_bodysize_grow( header );
+
+   return immediate;
+}
+
+struct tgsi_full_immediate
+tgsi_default_full_immediate( void )
+{
+   struct tgsi_full_immediate fullimm;
+
+   fullimm.Immediate = tgsi_default_immediate();
+   fullimm.u.Pointer = (void *) 0;
+
+   return fullimm;
+}
+
+static void
+immediate_grow(
+   struct tgsi_immediate *immediate,
+   struct tgsi_header *header )
+{
+   assert( immediate->Size < 0xFF );
+
+   immediate->Size++;
+
+   header_bodysize_grow( header );
+}
+
+struct tgsi_immediate_float32
+tgsi_build_immediate_float32(
+   float value,
+   struct tgsi_immediate *immediate,
+   struct tgsi_header *header )
+{
+   struct tgsi_immediate_float32 immediate_float32;
+
+   immediate_float32.Float = value;
+
+   immediate_grow( immediate, header );
+
+   return immediate_float32;
+}
+
+unsigned
+tgsi_build_full_immediate(
+   const struct tgsi_full_immediate *full_imm,
+   struct tgsi_token *tokens,
+   struct tgsi_header *header,
+   unsigned maxsize )
+{
+   unsigned size = 0, i;
+   struct tgsi_immediate *immediate;
+
+   if( maxsize <= size )
+      return 0;
+   immediate = (struct tgsi_immediate *) &tokens[size];
+   size++;
+
+   *immediate = tgsi_build_immediate( header );
+
+   for( i = 0; i < full_imm->Immediate.Size - 1; i++ ) {
+      struct tgsi_immediate_float32 *if32;
+
+      if( maxsize <= size )
+         return  0;
+      if32 = (struct tgsi_immediate_float32 *) &tokens[size];
+      size++;
+
+      *if32 = tgsi_build_immediate_float32(
+         full_imm->u.ImmediateFloat32[i].Float,
+         immediate,
+         header );
+   }
+
+   return size;
+}
+
+/*
+ * instruction
+ */
+
+struct tgsi_instruction
+tgsi_default_instruction( void )
+{
+   struct tgsi_instruction instruction;
+
+   instruction.Type = TGSI_TOKEN_TYPE_INSTRUCTION;
+   instruction.Size = 1;
+   instruction.Opcode = TGSI_OPCODE_MOV;
+   instruction.Saturate = TGSI_SAT_NONE;
+   instruction.NumDstRegs = 1;
+   instruction.NumSrcRegs = 1;
+   instruction.Padding  = 0;
+   instruction.Extended = 0;
+
+   return instruction;
+}
+
+struct tgsi_instruction
+tgsi_build_instruction(
+   unsigned opcode,
+   unsigned saturate,
+   unsigned num_dst_regs,
+   unsigned num_src_regs,
+   struct tgsi_header *header )
+{
+   struct tgsi_instruction instruction;
+
+   assert (opcode <= TGSI_OPCODE_LAST);
+   assert (saturate <= TGSI_SAT_MINUS_PLUS_ONE);
+   assert (num_dst_regs <= 3);
+   assert (num_src_regs <= 15);
+
+   instruction = tgsi_default_instruction();
+   instruction.Opcode = opcode;
+   instruction.Saturate = saturate;
+   instruction.NumDstRegs = num_dst_regs;
+   instruction.NumSrcRegs = num_src_regs;
+
+   header_bodysize_grow( header );
+
+   return instruction;
+}
+
+static void
+instruction_grow(
+   struct tgsi_instruction *instruction,
+   struct tgsi_header *header )
+{
+   assert (instruction->Size <   0xFF);
+
+   instruction->Size++;
+
+   header_bodysize_grow( header );
+}
+
+struct tgsi_full_instruction
+tgsi_default_full_instruction( void )
+{
+   struct tgsi_full_instruction full_instruction;
+   unsigned i;
+
+   full_instruction.Instruction = tgsi_default_instruction();
+   full_instruction.InstructionExtNv = tgsi_default_instruction_ext_nv();
+   full_instruction.InstructionExtLabel = tgsi_default_instruction_ext_label();
+   full_instruction.InstructionExtTexture = tgsi_default_instruction_ext_texture();
+   for( i = 0;  i < TGSI_FULL_MAX_DST_REGISTERS; i++ ) {
+      full_instruction.FullDstRegisters[i] = tgsi_default_full_dst_register();
+   }
+   for( i = 0;  i < TGSI_FULL_MAX_SRC_REGISTERS; i++ ) {
+      full_instruction.FullSrcRegisters[i] = tgsi_default_full_src_register();
+   }
+
+   return full_instruction;
+}
+
+unsigned
+tgsi_build_full_instruction(
+   const struct tgsi_full_instruction *full_inst,
+   struct  tgsi_token *tokens,
+   struct  tgsi_header *header,
+   unsigned  maxsize )
+{
+   unsigned size = 0;
+   unsigned i;
+   struct tgsi_instruction *instruction;
+   struct tgsi_token *prev_token;
+
+   if( maxsize <= size )
+      return 0;
+   instruction = (struct tgsi_instruction *) &tokens[size];
+   size++;
+
+   *instruction = tgsi_build_instruction(
+      full_inst->Instruction.Opcode,
+      full_inst->Instruction.Saturate,
+      full_inst->Instruction.NumDstRegs,
+      full_inst->Instruction.NumSrcRegs,
+      header );
+   prev_token = (struct tgsi_token  *) instruction;
+
+   if( tgsi_compare_instruction_ext_nv(
+         full_inst->InstructionExtNv,
+         tgsi_default_instruction_ext_nv() ) ) {
+      struct tgsi_instruction_ext_nv *instruction_ext_nv;
+
+      if( maxsize <= size )
+         return 0;
+      instruction_ext_nv =
+         (struct  tgsi_instruction_ext_nv *) &tokens[size];
+      size++;
+
+      *instruction_ext_nv  = tgsi_build_instruction_ext_nv(
+         full_inst->InstructionExtNv.Precision,
+         full_inst->InstructionExtNv.CondDstIndex,
+         full_inst->InstructionExtNv.CondFlowIndex,
+         full_inst->InstructionExtNv.CondMask,
+         full_inst->InstructionExtNv.CondSwizzleX,
+         full_inst->InstructionExtNv.CondSwizzleY,
+         full_inst->InstructionExtNv.CondSwizzleZ,
+         full_inst->InstructionExtNv.CondSwizzleW,
+         full_inst->InstructionExtNv.CondDstUpdate,
+         full_inst->InstructionExtNv.CondFlowEnable,
+         prev_token,
+         instruction,
+         header );
+      prev_token = (struct tgsi_token  *) instruction_ext_nv;
+   }
+
+   if( tgsi_compare_instruction_ext_label(
+         full_inst->InstructionExtLabel,
+         tgsi_default_instruction_ext_label() ) ) {
+      struct tgsi_instruction_ext_label *instruction_ext_label;
+
+      if( maxsize <= size )
+         return 0;
+      instruction_ext_label =
+         (struct  tgsi_instruction_ext_label *) &tokens[size];
+      size++;
+
+      *instruction_ext_label = tgsi_build_instruction_ext_label(
+         full_inst->InstructionExtLabel.Label,
+         prev_token,
+         instruction,
+         header );
+      prev_token = (struct tgsi_token  *) instruction_ext_label;
+   }
+
+   if( tgsi_compare_instruction_ext_texture(
+         full_inst->InstructionExtTexture,
+         tgsi_default_instruction_ext_texture() ) ) {
+      struct tgsi_instruction_ext_texture *instruction_ext_texture;
+
+      if( maxsize <= size )
+         return 0;
+      instruction_ext_texture =
+         (struct  tgsi_instruction_ext_texture *) &tokens[size];
+      size++;
+
+      *instruction_ext_texture = tgsi_build_instruction_ext_texture(
+         full_inst->InstructionExtTexture.Texture,
+         prev_token,
+         instruction,
+         header   );
+      prev_token = (struct tgsi_token  *) instruction_ext_texture;
+   }
+
+   for( i = 0;  i <   full_inst->Instruction.NumDstRegs; i++ ) {
+      const struct tgsi_full_dst_register *reg = &full_inst->FullDstRegisters[i];
+      struct tgsi_dst_register *dst_register;
+      struct tgsi_token *prev_token;
+
+      if( maxsize <= size )
+         return 0;
+      dst_register = (struct tgsi_dst_register *) &tokens[size];
+      size++;
+
+      *dst_register = tgsi_build_dst_register(
+         reg->DstRegister.File,
+         reg->DstRegister.WriteMask,
+         reg->DstRegister.Index,
+         instruction,
+         header );
+      prev_token = (struct tgsi_token  *) dst_register;
+
+      if( tgsi_compare_dst_register_ext_concode(
+            reg->DstRegisterExtConcode,
+            tgsi_default_dst_register_ext_concode() ) ) {
+         struct tgsi_dst_register_ext_concode *dst_register_ext_concode;
+
+         if( maxsize <= size )
+            return 0;
+         dst_register_ext_concode =
+            (struct  tgsi_dst_register_ext_concode *) &tokens[size];
+         size++;
+
+         *dst_register_ext_concode =   tgsi_build_dst_register_ext_concode(
+            reg->DstRegisterExtConcode.CondMask,
+            reg->DstRegisterExtConcode.CondSwizzleX,
+            reg->DstRegisterExtConcode.CondSwizzleY,
+            reg->DstRegisterExtConcode.CondSwizzleZ,
+            reg->DstRegisterExtConcode.CondSwizzleW,
+            reg->DstRegisterExtConcode.CondSrcIndex,
+            prev_token,
+            instruction,
+            header );
+         prev_token = (struct tgsi_token  *) dst_register_ext_concode;
+      }
+
+      if( tgsi_compare_dst_register_ext_modulate(
+            reg->DstRegisterExtModulate,
+            tgsi_default_dst_register_ext_modulate() ) ) {
+         struct tgsi_dst_register_ext_modulate *dst_register_ext_modulate;
+
+         if( maxsize <= size )
+            return 0;
+         dst_register_ext_modulate =
+            (struct  tgsi_dst_register_ext_modulate *) &tokens[size];
+         size++;
+
+         *dst_register_ext_modulate = tgsi_build_dst_register_ext_modulate(
+            reg->DstRegisterExtModulate.Modulate,
+            prev_token,
+            instruction,
+            header );
+         prev_token = (struct tgsi_token  *) dst_register_ext_modulate;
+      }
+   }
+
+   for( i = 0;  i < full_inst->Instruction.NumSrcRegs; i++ ) {
+      const struct tgsi_full_src_register *reg = &full_inst->FullSrcRegisters[i];
+      struct tgsi_src_register *src_register;
+      struct tgsi_token *prev_token;
+
+      if( maxsize <= size )
+         return 0;
+      src_register = (struct tgsi_src_register *)  &tokens[size];
+      size++;
+
+      *src_register = tgsi_build_src_register(
+         reg->SrcRegister.File,
+         reg->SrcRegister.SwizzleX,
+         reg->SrcRegister.SwizzleY,
+         reg->SrcRegister.SwizzleZ,
+         reg->SrcRegister.SwizzleW,
+         reg->SrcRegister.Negate,
+         reg->SrcRegister.Indirect,
+         reg->SrcRegister.Dimension,
+         reg->SrcRegister.Index,
+         instruction,
+         header );
+      prev_token = (struct tgsi_token  *) src_register;
+
+      if( tgsi_compare_src_register_ext_swz(
+            reg->SrcRegisterExtSwz,
+            tgsi_default_src_register_ext_swz() ) ) {
+         struct tgsi_src_register_ext_swz *src_register_ext_swz;
+
+         /* Use of the extended swizzle requires the simple swizzle to be identity.
+          */
+         assert( reg->SrcRegister.SwizzleX == TGSI_SWIZZLE_X );
+         assert( reg->SrcRegister.SwizzleY == TGSI_SWIZZLE_Y );
+         assert( reg->SrcRegister.SwizzleZ == TGSI_SWIZZLE_Z );
+         assert( reg->SrcRegister.SwizzleW == TGSI_SWIZZLE_W );
+         assert( reg->SrcRegister.Negate == FALSE );
+
+         if( maxsize <= size )
+            return 0;
+         src_register_ext_swz =
+            (struct  tgsi_src_register_ext_swz *) &tokens[size];
+         size++;
+
+         *src_register_ext_swz = tgsi_build_src_register_ext_swz(
+            reg->SrcRegisterExtSwz.ExtSwizzleX,
+            reg->SrcRegisterExtSwz.ExtSwizzleY,
+            reg->SrcRegisterExtSwz.ExtSwizzleZ,
+            reg->SrcRegisterExtSwz.ExtSwizzleW,
+            reg->SrcRegisterExtSwz.NegateX,
+            reg->SrcRegisterExtSwz.NegateY,
+            reg->SrcRegisterExtSwz.NegateZ,
+            reg->SrcRegisterExtSwz.NegateW,
+            prev_token,
+            instruction,
+            header );
+         prev_token = (struct tgsi_token  *) src_register_ext_swz;
+      }
+
+      if( tgsi_compare_src_register_ext_mod(
+            reg->SrcRegisterExtMod,
+            tgsi_default_src_register_ext_mod() ) ) {
+         struct tgsi_src_register_ext_mod *src_register_ext_mod;
+
+         if( maxsize <= size )
+            return 0;
+         src_register_ext_mod =
+            (struct  tgsi_src_register_ext_mod *) &tokens[size];
+         size++;
+
+         *src_register_ext_mod = tgsi_build_src_register_ext_mod(
+            reg->SrcRegisterExtMod.Complement,
+            reg->SrcRegisterExtMod.Bias,
+            reg->SrcRegisterExtMod.Scale2X,
+            reg->SrcRegisterExtMod.Absolute,
+            reg->SrcRegisterExtMod.Negate,
+            prev_token,
+            instruction,
+            header );
+         prev_token = (struct tgsi_token  *) src_register_ext_mod;
+      }
+
+      if( reg->SrcRegister.Indirect ) {
+         struct  tgsi_src_register *ind;
+
+         if( maxsize <= size )
+            return 0;
+         ind = (struct tgsi_src_register *) &tokens[size];
+         size++;
+
+         *ind = tgsi_build_src_register(
+            reg->SrcRegisterInd.File,
+            reg->SrcRegisterInd.SwizzleX,
+            reg->SrcRegisterInd.SwizzleY,
+            reg->SrcRegisterInd.SwizzleZ,
+            reg->SrcRegisterInd.SwizzleW,
+            reg->SrcRegisterInd.Negate,
+            reg->SrcRegisterInd.Indirect,
+            reg->SrcRegisterInd.Dimension,
+            reg->SrcRegisterInd.Index,
+            instruction,
+            header );
+      }
+
+      if( reg->SrcRegister.Dimension ) {
+         struct  tgsi_dimension *dim;
+
+         assert( !reg->SrcRegisterDim.Dimension );
+
+         if( maxsize <= size )
+            return 0;
+         dim = (struct tgsi_dimension *) &tokens[size];
+         size++;
+
+         *dim = tgsi_build_dimension(
+            reg->SrcRegisterDim.Indirect,
+            reg->SrcRegisterDim.Index,
+            instruction,
+            header );
+
+         if( reg->SrcRegisterDim.Indirect ) {
+            struct tgsi_src_register *ind;
+
+            if( maxsize <= size )
+               return 0;
+            ind = (struct tgsi_src_register *) &tokens[size];
+            size++;
+
+            *ind = tgsi_build_src_register(
+               reg->SrcRegisterDimInd.File,
+               reg->SrcRegisterDimInd.SwizzleX,
+               reg->SrcRegisterDimInd.SwizzleY,
+               reg->SrcRegisterDimInd.SwizzleZ,
+               reg->SrcRegisterDimInd.SwizzleW,
+               reg->SrcRegisterDimInd.Negate,
+               reg->SrcRegisterDimInd.Indirect,
+               reg->SrcRegisterDimInd.Dimension,
+               reg->SrcRegisterDimInd.Index,
+               instruction,
+               header );
+         }
+      }
+   }
+
+   return size;
+}
+
+struct tgsi_instruction_ext_nv
+tgsi_default_instruction_ext_nv( void )
+{
+   struct tgsi_instruction_ext_nv instruction_ext_nv;
+
+   instruction_ext_nv.Type = TGSI_INSTRUCTION_EXT_TYPE_NV;
+   instruction_ext_nv.Precision = TGSI_PRECISION_DEFAULT;
+   instruction_ext_nv.CondDstIndex = 0;
+   instruction_ext_nv.CondFlowIndex = 0;
+   instruction_ext_nv.CondMask = TGSI_CC_TR;
+   instruction_ext_nv.CondSwizzleX = TGSI_SWIZZLE_X;
+   instruction_ext_nv.CondSwizzleY = TGSI_SWIZZLE_Y;
+   instruction_ext_nv.CondSwizzleZ = TGSI_SWIZZLE_Z;
+   instruction_ext_nv.CondSwizzleW = TGSI_SWIZZLE_W;
+   instruction_ext_nv.CondDstUpdate = 0;
+   instruction_ext_nv.CondFlowEnable = 0;
+   instruction_ext_nv.Padding = 0;
+   instruction_ext_nv.Extended = 0;
+
+   return instruction_ext_nv;
+}
+
+union token_u32
+{
+   unsigned u32;
+};
+
+unsigned
+tgsi_compare_instruction_ext_nv(
+   struct tgsi_instruction_ext_nv a,
+   struct tgsi_instruction_ext_nv b )
+{
+   a.Padding = b.Padding = 0;
+   a.Extended = b.Extended = 0;
+   return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
+}
+
+struct tgsi_instruction_ext_nv
+tgsi_build_instruction_ext_nv(
+   unsigned precision,
+   unsigned cond_dst_index,
+   unsigned cond_flow_index,
+   unsigned cond_mask,
+   unsigned cond_swizzle_x,
+   unsigned cond_swizzle_y,
+   unsigned cond_swizzle_z,
+   unsigned cond_swizzle_w,
+   unsigned cond_dst_update,
+   unsigned cond_flow_update,
+   struct tgsi_token *prev_token,
+   struct tgsi_instruction *instruction,
+   struct tgsi_header *header )
+{
+   struct tgsi_instruction_ext_nv instruction_ext_nv;
+
+   instruction_ext_nv = tgsi_default_instruction_ext_nv();
+   instruction_ext_nv.Precision = precision;
+   instruction_ext_nv.CondDstIndex = cond_dst_index;
+   instruction_ext_nv.CondFlowIndex = cond_flow_index;
+   instruction_ext_nv.CondMask = cond_mask;
+   instruction_ext_nv.CondSwizzleX = cond_swizzle_x;
+   instruction_ext_nv.CondSwizzleY = cond_swizzle_y;
+   instruction_ext_nv.CondSwizzleZ = cond_swizzle_z;
+   instruction_ext_nv.CondSwizzleW = cond_swizzle_w;
+   instruction_ext_nv.CondDstUpdate = cond_dst_update;
+   instruction_ext_nv.CondFlowEnable = cond_flow_update;
+
+   prev_token->Extended = 1;
+   instruction_grow( instruction, header );
+
+   return instruction_ext_nv;
+}
+
+struct tgsi_instruction_ext_label
+tgsi_default_instruction_ext_label( void )
+{
+   struct tgsi_instruction_ext_label instruction_ext_label;
+
+   instruction_ext_label.Type = TGSI_INSTRUCTION_EXT_TYPE_LABEL;
+   instruction_ext_label.Label = 0;
+   instruction_ext_label.Padding = 0;
+   instruction_ext_label.Extended = 0;
+
+   return instruction_ext_label;
+}
+
+unsigned
+tgsi_compare_instruction_ext_label(
+   struct tgsi_instruction_ext_label a,
+   struct tgsi_instruction_ext_label b )
+{
+   a.Padding = b.Padding = 0;
+   a.Extended = b.Extended = 0;
+   return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
+}
+
+struct tgsi_instruction_ext_label
+tgsi_build_instruction_ext_label(
+   unsigned label,
+   struct tgsi_token  *prev_token,
+   struct tgsi_instruction *instruction,
+   struct tgsi_header *header )
+{
+   struct tgsi_instruction_ext_label instruction_ext_label;
+
+   instruction_ext_label = tgsi_default_instruction_ext_label();
+   instruction_ext_label.Label = label;
+
+   prev_token->Extended = 1;
+   instruction_grow( instruction, header );
+
+   return instruction_ext_label;
+}
+
+struct tgsi_instruction_ext_texture
+tgsi_default_instruction_ext_texture( void )
+{
+   struct tgsi_instruction_ext_texture instruction_ext_texture;
+
+   instruction_ext_texture.Type = TGSI_INSTRUCTION_EXT_TYPE_TEXTURE;
+   instruction_ext_texture.Texture = TGSI_TEXTURE_UNKNOWN;
+   instruction_ext_texture.Padding = 0;
+   instruction_ext_texture.Extended = 0;
+
+   return instruction_ext_texture;
+}
+
+unsigned
+tgsi_compare_instruction_ext_texture(
+   struct tgsi_instruction_ext_texture a,
+   struct tgsi_instruction_ext_texture b )
+{
+   a.Padding = b.Padding = 0;
+   a.Extended = b.Extended = 0;
+   return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
+}
+
+struct tgsi_instruction_ext_texture
+tgsi_build_instruction_ext_texture(
+   unsigned texture,
+   struct tgsi_token *prev_token,
+   struct tgsi_instruction *instruction,
+   struct tgsi_header *header )
+{
+   struct tgsi_instruction_ext_texture instruction_ext_texture;
+
+   instruction_ext_texture = tgsi_default_instruction_ext_texture();
+   instruction_ext_texture.Texture = texture;
+
+   prev_token->Extended = 1;
+   instruction_grow( instruction, header );
+
+   return instruction_ext_texture;
+}
+
+struct tgsi_src_register
+tgsi_default_src_register( void )
+{
+   struct tgsi_src_register src_register;
+
+   src_register.File = TGSI_FILE_NULL;
+   src_register.SwizzleX = TGSI_SWIZZLE_X;
+   src_register.SwizzleY = TGSI_SWIZZLE_Y;
+   src_register.SwizzleZ = TGSI_SWIZZLE_Z;
+   src_register.SwizzleW = TGSI_SWIZZLE_W;
+   src_register.Negate = 0;
+   src_register.Indirect = 0;
+   src_register.Dimension = 0;
+   src_register.Index = 0;
+   src_register.Extended = 0;
+
+   return src_register;
+}
+
+struct tgsi_src_register
+tgsi_build_src_register(
+   unsigned file,
+   unsigned swizzle_x,
+   unsigned swizzle_y,
+   unsigned swizzle_z,
+   unsigned swizzle_w,
+   unsigned negate,
+   unsigned indirect,
+   unsigned dimension,
+   int index,
+   struct tgsi_instruction *instruction,
+   struct tgsi_header *header )
+{
+   struct tgsi_src_register   src_register;
+
+   assert( file <= TGSI_FILE_IMMEDIATE );
+   assert( swizzle_x <= TGSI_SWIZZLE_W );
+   assert( swizzle_y <= TGSI_SWIZZLE_W );
+   assert( swizzle_z <= TGSI_SWIZZLE_W );
+   assert( swizzle_w <= TGSI_SWIZZLE_W );
+   assert( negate <= 1 );
+   assert( index >= -0x8000 && index <= 0x7FFF );
+
+   src_register = tgsi_default_src_register();
+   src_register.File = file;
+   src_register.SwizzleX = swizzle_x;
+   src_register.SwizzleY = swizzle_y;
+   src_register.SwizzleZ = swizzle_z;
+   src_register.SwizzleW = swizzle_w;
+   src_register.Negate = negate;
+   src_register.Indirect = indirect;
+   src_register.Dimension = dimension;
+   src_register.Index = index;
+
+   instruction_grow( instruction, header );
+
+   return src_register;
+}
+
+struct tgsi_full_src_register
+tgsi_default_full_src_register( void )
+{
+   struct tgsi_full_src_register full_src_register;
+
+   full_src_register.SrcRegister = tgsi_default_src_register();
+   full_src_register.SrcRegisterExtSwz = tgsi_default_src_register_ext_swz();
+   full_src_register.SrcRegisterExtMod = tgsi_default_src_register_ext_mod();
+   full_src_register.SrcRegisterInd = tgsi_default_src_register();
+   full_src_register.SrcRegisterDim = tgsi_default_dimension();
+   full_src_register.SrcRegisterDimInd = tgsi_default_src_register();
+
+   return full_src_register;
+}
+
+struct tgsi_src_register_ext_swz
+tgsi_default_src_register_ext_swz( void )
+{
+   struct tgsi_src_register_ext_swz src_register_ext_swz;
+
+   src_register_ext_swz.Type = TGSI_SRC_REGISTER_EXT_TYPE_SWZ;
+   src_register_ext_swz.ExtSwizzleX = TGSI_EXTSWIZZLE_X;
+   src_register_ext_swz.ExtSwizzleY = TGSI_EXTSWIZZLE_Y;
+   src_register_ext_swz.ExtSwizzleZ = TGSI_EXTSWIZZLE_Z;
+   src_register_ext_swz.ExtSwizzleW = TGSI_EXTSWIZZLE_W;
+   src_register_ext_swz.NegateX = 0;
+   src_register_ext_swz.NegateY = 0;
+   src_register_ext_swz.NegateZ = 0;
+   src_register_ext_swz.NegateW = 0;
+   src_register_ext_swz.Padding = 0;
+   src_register_ext_swz.Extended = 0;
+
+   return src_register_ext_swz;
+}
+
+unsigned
+tgsi_compare_src_register_ext_swz(
+   struct tgsi_src_register_ext_swz a,
+   struct tgsi_src_register_ext_swz b )
+{
+   a.Padding = b.Padding = 0;
+   a.Extended = b.Extended = 0;
+   return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
+}
+
+struct tgsi_src_register_ext_swz
+tgsi_build_src_register_ext_swz(
+   unsigned ext_swizzle_x,
+   unsigned ext_swizzle_y,
+   unsigned ext_swizzle_z,
+   unsigned ext_swizzle_w,
+   unsigned negate_x,
+   unsigned negate_y,
+   unsigned negate_z,
+   unsigned negate_w,
+   struct tgsi_token *prev_token,
+   struct tgsi_instruction *instruction,
+   struct tgsi_header *header )
+{
+   struct tgsi_src_register_ext_swz src_register_ext_swz;
+
+   assert( ext_swizzle_x <= TGSI_EXTSWIZZLE_ONE );
+   assert( ext_swizzle_y <= TGSI_EXTSWIZZLE_ONE );
+   assert( ext_swizzle_z <= TGSI_EXTSWIZZLE_ONE );
+   assert( ext_swizzle_w <= TGSI_EXTSWIZZLE_ONE );
+   assert( negate_x <= 1 );
+   assert( negate_y <= 1 );
+   assert( negate_z <= 1 );
+   assert( negate_w <= 1 );
+
+   src_register_ext_swz = tgsi_default_src_register_ext_swz();
+   src_register_ext_swz.ExtSwizzleX = ext_swizzle_x;
+   src_register_ext_swz.ExtSwizzleY = ext_swizzle_y;
+   src_register_ext_swz.ExtSwizzleZ = ext_swizzle_z;
+   src_register_ext_swz.ExtSwizzleW = ext_swizzle_w;
+   src_register_ext_swz.NegateX = negate_x;
+   src_register_ext_swz.NegateY = negate_y;
+   src_register_ext_swz.NegateZ = negate_z;
+   src_register_ext_swz.NegateW = negate_w;
+
+   prev_token->Extended = 1;
+   instruction_grow( instruction, header );
+
+   return src_register_ext_swz;
+}
+
+struct tgsi_src_register_ext_mod
+tgsi_default_src_register_ext_mod( void )
+{
+   struct tgsi_src_register_ext_mod src_register_ext_mod;
+
+   src_register_ext_mod.Type = TGSI_SRC_REGISTER_EXT_TYPE_MOD;
+   src_register_ext_mod.Complement = 0;
+   src_register_ext_mod.Bias = 0;
+   src_register_ext_mod.Scale2X = 0;
+   src_register_ext_mod.Absolute = 0;
+   src_register_ext_mod.Negate = 0;
+   src_register_ext_mod.Padding = 0;
+   src_register_ext_mod.Extended = 0;
+
+   return src_register_ext_mod;
+}
+
+unsigned
+tgsi_compare_src_register_ext_mod(
+   struct tgsi_src_register_ext_mod a,
+   struct tgsi_src_register_ext_mod b )
+{
+   a.Padding = b.Padding = 0;
+   a.Extended = b.Extended = 0;
+   return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
+}
+
+struct tgsi_src_register_ext_mod
+tgsi_build_src_register_ext_mod(
+   unsigned complement,
+   unsigned bias,
+   unsigned scale_2x,
+   unsigned absolute,
+   unsigned negate,
+   struct tgsi_token *prev_token,
+   struct tgsi_instruction *instruction,
+   struct tgsi_header *header )
+{
+   struct tgsi_src_register_ext_mod src_register_ext_mod;
+
+   assert( complement <= 1 );
+   assert( bias <= 1 );
+   assert( scale_2x <= 1 );
+   assert( absolute <= 1 );
+   assert( negate <= 1 );
+
+   src_register_ext_mod = tgsi_default_src_register_ext_mod();
+   src_register_ext_mod.Complement = complement;
+   src_register_ext_mod.Bias = bias;
+   src_register_ext_mod.Scale2X = scale_2x;
+   src_register_ext_mod.Absolute = absolute;
+   src_register_ext_mod.Negate = negate;
+
+   prev_token->Extended = 1;
+   instruction_grow( instruction, header );
+
+   return src_register_ext_mod;
+}
+
+struct tgsi_dimension
+tgsi_default_dimension( void )
+{
+   struct tgsi_dimension dimension;
+
+   dimension.Indirect = 0;
+   dimension.Dimension = 0;
+   dimension.Padding = 0;
+   dimension.Index = 0;
+   dimension.Extended = 0;
+
+   return dimension;
+}
+
+struct tgsi_dimension
+tgsi_build_dimension(
+   unsigned indirect,
+   unsigned index,
+   struct tgsi_instruction *instruction,
+   struct tgsi_header *header )
+{
+   struct tgsi_dimension dimension;
+
+   dimension = tgsi_default_dimension();
+   dimension.Indirect = indirect;
+   dimension.Index = index;
+
+   instruction_grow( instruction, header );
+
+   return dimension;
+}
+
+struct tgsi_dst_register
+tgsi_default_dst_register( void )
+{
+   struct tgsi_dst_register dst_register;
+
+   dst_register.File = TGSI_FILE_NULL;
+   dst_register.WriteMask = TGSI_WRITEMASK_XYZW;
+   dst_register.Indirect = 0;
+   dst_register.Dimension = 0;
+   dst_register.Index = 0;
+   dst_register.Padding = 0;
+   dst_register.Extended = 0;
+
+   return dst_register;
+}
+
+struct tgsi_dst_register
+tgsi_build_dst_register(
+   unsigned file,
+   unsigned mask,
+   int index,
+   struct tgsi_instruction *instruction,
+   struct tgsi_header *header )
+{
+   struct tgsi_dst_register dst_register;
+
+   assert( file <= TGSI_FILE_IMMEDIATE );
+   assert( mask <= TGSI_WRITEMASK_XYZW );
+   assert( index >= -32768 && index <= 32767 );
+
+   dst_register = tgsi_default_dst_register();
+   dst_register.File = file;
+   dst_register.WriteMask = mask;
+   dst_register.Index = index;
+
+   instruction_grow( instruction, header );
+
+   return dst_register;
+}
+
+struct tgsi_full_dst_register
+tgsi_default_full_dst_register( void )
+{
+   struct tgsi_full_dst_register full_dst_register;
+
+   full_dst_register.DstRegister = tgsi_default_dst_register();
+   full_dst_register.DstRegisterExtConcode =
+      tgsi_default_dst_register_ext_concode();
+   full_dst_register.DstRegisterExtModulate =
+      tgsi_default_dst_register_ext_modulate();
+
+   return full_dst_register;
+}
+
+struct tgsi_dst_register_ext_concode
+tgsi_default_dst_register_ext_concode( void )
+{
+   struct tgsi_dst_register_ext_concode dst_register_ext_concode;
+
+   dst_register_ext_concode.Type = TGSI_DST_REGISTER_EXT_TYPE_CONDCODE;
+   dst_register_ext_concode.CondMask = TGSI_CC_TR;
+   dst_register_ext_concode.CondSwizzleX = TGSI_SWIZZLE_X;
+   dst_register_ext_concode.CondSwizzleY = TGSI_SWIZZLE_Y;
+   dst_register_ext_concode.CondSwizzleZ = TGSI_SWIZZLE_Z;
+   dst_register_ext_concode.CondSwizzleW = TGSI_SWIZZLE_W;
+   dst_register_ext_concode.CondSrcIndex = 0;
+   dst_register_ext_concode.Padding = 0;
+   dst_register_ext_concode.Extended = 0;
+
+   return dst_register_ext_concode;
+}
+
+unsigned
+tgsi_compare_dst_register_ext_concode(
+   struct tgsi_dst_register_ext_concode a,
+   struct tgsi_dst_register_ext_concode b )
+{
+   a.Padding = b.Padding = 0;
+   a.Extended = b.Extended = 0;
+   return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
+}
+
+struct tgsi_dst_register_ext_concode
+tgsi_build_dst_register_ext_concode(
+   unsigned cc,
+   unsigned swizzle_x,
+   unsigned swizzle_y,
+   unsigned swizzle_z,
+   unsigned swizzle_w,
+   int index,
+   struct tgsi_token *prev_token,
+   struct tgsi_instruction *instruction,
+   struct tgsi_header *header )
+{
+   struct tgsi_dst_register_ext_concode dst_register_ext_concode;
+
+   assert( cc <= TGSI_CC_FL );
+   assert( swizzle_x <= TGSI_SWIZZLE_W );
+   assert( swizzle_y <= TGSI_SWIZZLE_W );
+   assert( swizzle_z <= TGSI_SWIZZLE_W );
+   assert( swizzle_w <= TGSI_SWIZZLE_W );
+   assert( index >= -32768 && index <= 32767 );
+
+   dst_register_ext_concode = tgsi_default_dst_register_ext_concode();
+   dst_register_ext_concode.CondMask = cc;
+   dst_register_ext_concode.CondSwizzleX = swizzle_x;
+   dst_register_ext_concode.CondSwizzleY = swizzle_y;
+   dst_register_ext_concode.CondSwizzleZ = swizzle_z;
+   dst_register_ext_concode.CondSwizzleW = swizzle_w;
+   dst_register_ext_concode.CondSrcIndex = index;
+
+   prev_token->Extended = 1;
+   instruction_grow( instruction, header );
+
+   return dst_register_ext_concode;
+}
+
+struct tgsi_dst_register_ext_modulate
+tgsi_default_dst_register_ext_modulate( void )
+{
+   struct tgsi_dst_register_ext_modulate dst_register_ext_modulate;
+
+   dst_register_ext_modulate.Type = TGSI_DST_REGISTER_EXT_TYPE_MODULATE;
+   dst_register_ext_modulate.Modulate = TGSI_MODULATE_1X;
+   dst_register_ext_modulate.Padding = 0;
+   dst_register_ext_modulate.Extended = 0;
+
+   return dst_register_ext_modulate;
+}
+
+unsigned
+tgsi_compare_dst_register_ext_modulate(
+   struct tgsi_dst_register_ext_modulate a,
+   struct tgsi_dst_register_ext_modulate b )
+{
+   a.Padding = b.Padding = 0;
+   a.Extended = b.Extended = 0;
+   return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
+}
+
+struct tgsi_dst_register_ext_modulate
+tgsi_build_dst_register_ext_modulate(
+   unsigned modulate,
+   struct tgsi_token *prev_token,
+   struct tgsi_instruction *instruction,
+   struct tgsi_header *header )
+{
+   struct tgsi_dst_register_ext_modulate dst_register_ext_modulate;
+
+   assert( modulate <= TGSI_MODULATE_EIGHTH );
+
+   dst_register_ext_modulate = tgsi_default_dst_register_ext_modulate();
+   dst_register_ext_modulate.Modulate = modulate;
+
+   prev_token->Extended = 1;
+   instruction_grow( instruction, header );
+
+   return dst_register_ext_modulate;
+}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h b/src/gallium/auxiliary/tgsi/tgsi_build.h
new file mode 100644
index 00000000000..ed258302487
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.h
@@ -0,0 +1,332 @@
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef TGSI_BUILD_H
+#define TGSI_BUILD_H
+
+#if defined __cplusplus
+extern "C" {
+#endif
+
+/*
+ * version
+ */
+
+struct tgsi_version
+tgsi_build_version( void );
+
+/*
+ * header
+ */
+
+struct tgsi_header
+tgsi_build_header( void );
+
+struct tgsi_processor
+tgsi_default_processor( void );
+
+struct tgsi_processor
+tgsi_build_processor(
+   unsigned processor,
+   struct tgsi_header *header );
+
+/*
+ * declaration
+ */
+
+struct tgsi_declaration
+tgsi_default_declaration( void );
+
+struct tgsi_declaration
+tgsi_build_declaration(
+   unsigned file,
+   unsigned usage_mask,
+   unsigned interpolate,
+   unsigned semantic,
+   struct tgsi_header *header );
+
+struct tgsi_full_declaration
+tgsi_default_full_declaration( void );
+
+unsigned
+tgsi_build_full_declaration(
+   const struct tgsi_full_declaration *full_decl,
+   struct tgsi_token *tokens,
+   struct tgsi_header *header,
+   unsigned maxsize );
+
+struct tgsi_declaration_range
+tgsi_default_declaration_range( void );
+
+struct tgsi_declaration_range
+tgsi_build_declaration_range(
+   unsigned first,
+   unsigned last,
+   struct tgsi_declaration *declaration,
+   struct tgsi_header *header );
+
+struct tgsi_declaration_semantic
+tgsi_default_declaration_semantic( void );
+
+struct tgsi_declaration_semantic
+tgsi_build_declaration_semantic(
+   unsigned semantic_name,
+   unsigned semantic_index,
+   struct tgsi_declaration *declaration,
+   struct tgsi_header *header );
+
+/*
+ * immediate
+ */
+
+struct tgsi_immediate
+tgsi_default_immediate( void );
+
+struct tgsi_immediate
+tgsi_build_immediate(
+   struct tgsi_header *header );
+
+struct tgsi_full_immediate
+tgsi_default_full_immediate( void );
+
+struct tgsi_immediate_float32
+tgsi_build_immediate_float32(
+   float value,
+   struct tgsi_immediate *immediate,
+   struct tgsi_header *header );
+
+unsigned
+tgsi_build_full_immediate(
+   const struct tgsi_full_immediate *full_imm,
+   struct tgsi_token *tokens,
+   struct tgsi_header *header,
+   unsigned maxsize );
+
+/*
+ * instruction
+ */
+
+struct tgsi_instruction
+tgsi_default_instruction( void );
+
+struct tgsi_instruction
+tgsi_build_instruction(
+   unsigned opcode,
+   unsigned saturate,
+   unsigned num_dst_regs,
+   unsigned num_src_regs,
+   struct tgsi_header *header );
+
+struct tgsi_full_instruction
+tgsi_default_full_instruction( void );
+
+unsigned
+tgsi_build_full_instruction(
+   const struct tgsi_full_instruction *full_inst,
+   struct tgsi_token *tokens,
+   struct tgsi_header *header,
+   unsigned maxsize );
+
+struct tgsi_instruction_ext_nv
+tgsi_default_instruction_ext_nv( void );
+
+unsigned
+tgsi_compare_instruction_ext_nv(
+   struct tgsi_instruction_ext_nv a,
+   struct tgsi_instruction_ext_nv b );
+
+struct tgsi_instruction_ext_nv
+tgsi_build_instruction_ext_nv(
+   unsigned precision,
+   unsigned cond_dst_index,
+   unsigned cond_flow_index,
+   unsigned cond_mask,
+   unsigned cond_swizzle_x,
+   unsigned cond_swizzle_y,
+   unsigned cond_swizzle_z,
+   unsigned cond_swizzle_w,
+   unsigned cond_dst_update,
+   unsigned cond_flow_update,
+   struct tgsi_token *prev_token,
+   struct tgsi_instruction *instruction,
+   struct tgsi_header *header );
+
+struct tgsi_instruction_ext_label
+tgsi_default_instruction_ext_label( void );
+
+unsigned
+tgsi_compare_instruction_ext_label(
+   struct tgsi_instruction_ext_label a,
+   struct tgsi_instruction_ext_label b );
+
+struct tgsi_instruction_ext_label
+tgsi_build_instruction_ext_label(
+   unsigned label,
+   struct tgsi_token *prev_token,
+   struct tgsi_instruction *instruction,
+   struct tgsi_header *header );
+
+struct tgsi_instruction_ext_texture
+tgsi_default_instruction_ext_texture( void );
+
+unsigned
+tgsi_compare_instruction_ext_texture(
+   struct tgsi_instruction_ext_texture a,
+   struct tgsi_instruction_ext_texture b );
+
+struct tgsi_instruction_ext_texture
+tgsi_build_instruction_ext_texture(
+   unsigned texture,
+   struct tgsi_token *prev_token,
+   struct tgsi_instruction *instruction,
+   struct tgsi_header *header );
+
+struct tgsi_src_register
+tgsi_default_src_register( void );
+
+struct tgsi_src_register
+tgsi_build_src_register(
+   unsigned file,
+   unsigned swizzle_x,
+   unsigned swizzle_y,
+   unsigned swizzle_z,
+   unsigned swizzle_w,
+   unsigned negate,
+   unsigned indirect,
+   unsigned dimension,
+   int index,
+   struct tgsi_instruction *instruction,
+   struct tgsi_header *header );
+
+struct tgsi_full_src_register
+tgsi_default_full_src_register( void );
+
+struct tgsi_src_register_ext_swz
+tgsi_default_src_register_ext_swz( void );
+
+unsigned
+tgsi_compare_src_register_ext_swz(
+   struct tgsi_src_register_ext_swz a,
+   struct tgsi_src_register_ext_swz b );
+
+struct tgsi_src_register_ext_swz
+tgsi_build_src_register_ext_swz(
+   unsigned ext_swizzle_x,
+   unsigned ext_swizzle_y,
+   unsigned ext_swizzle_z,
+   unsigned ext_swizzle_w,
+   unsigned negate_x,
+   unsigned negate_y,
+   unsigned negate_z,
+   unsigned negate_w,
+   struct tgsi_token *prev_token,
+   struct tgsi_instruction *instruction,
+   struct tgsi_header *header );
+
+struct tgsi_src_register_ext_mod
+tgsi_default_src_register_ext_mod( void );
+
+unsigned
+tgsi_compare_src_register_ext_mod(
+   struct tgsi_src_register_ext_mod a,
+   struct tgsi_src_register_ext_mod b );
+
+struct tgsi_src_register_ext_mod
+tgsi_build_src_register_ext_mod(
+   unsigned complement,
+   unsigned bias,
+   unsigned scale_2x,
+   unsigned absolute,
+   unsigned negate,
+   struct tgsi_token *prev_token,
+   struct tgsi_instruction *instruction,
+   struct tgsi_header *header );
+
+struct tgsi_dimension
+tgsi_default_dimension( void );
+
+struct tgsi_dimension
+tgsi_build_dimension(
+   unsigned indirect,
+   unsigned index,
+   struct tgsi_instruction *instruction,
+   struct tgsi_header *header );
+
+struct tgsi_dst_register
+tgsi_default_dst_register( void );
+
+struct tgsi_dst_register
+tgsi_build_dst_register(
+   unsigned file,
+   unsigned mask,
+   int index,
+   struct tgsi_instruction *instruction,
+   struct tgsi_header *header );
+
+struct tgsi_full_dst_register
+tgsi_default_full_dst_register( void );
+
+struct tgsi_dst_register_ext_concode
+tgsi_default_dst_register_ext_concode( void );
+
+unsigned
+tgsi_compare_dst_register_ext_concode(
+   struct tgsi_dst_register_ext_concode a,
+   struct tgsi_dst_register_ext_concode b );
+
+struct tgsi_dst_register_ext_concode
+tgsi_build_dst_register_ext_concode(
+   unsigned cc,
+   unsigned swizzle_x,
+   unsigned swizzle_y,
+   unsigned swizzle_z,
+   unsigned swizzle_w,
+   int index,
+   struct tgsi_token *prev_token,
+   struct tgsi_instruction *instruction,
+   struct tgsi_header *header );
+
+struct tgsi_dst_register_ext_modulate
+tgsi_default_dst_register_ext_modulate( void );
+
+unsigned
+tgsi_compare_dst_register_ext_modulate(
+   struct tgsi_dst_register_ext_modulate a,
+   struct tgsi_dst_register_ext_modulate b );
+
+struct tgsi_dst_register_ext_modulate
+tgsi_build_dst_register_ext_modulate(
+   unsigned modulate,
+   struct tgsi_token *prev_token,
+   struct tgsi_instruction *instruction,
+   struct tgsi_header *header );
+
+#if defined __cplusplus
+}
+#endif
+
+#endif /* TGSI_BUILD_H */
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
new file mode 100644
index 00000000000..d2e6375212f
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -0,0 +1,582 @@
+/**************************************************************************
+ * 
+ * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "pipe/p_debug.h"
+#include "tgsi_dump.h"
+#include "tgsi_iterate.h"
+
+struct dump_ctx
+{
+   struct tgsi_iterate_context iter;
+
+   uint instno;
+};
+
+static void
+dump_enum(
+   uint e,
+   const char **enums,
+   uint enum_count )
+{
+   if (e >= enum_count)
+      debug_printf( "%u", e );
+   else
+      debug_printf( "%s", enums[e] );
+}
+
+#define EOL()           debug_printf( "\n" )
+#define TXT(S)          debug_printf( "%s", S )
+#define CHR(C)          debug_printf( "%c", C )
+#define UIX(I)          debug_printf( "0x%x", I )
+#define UID(I)          debug_printf( "%u", I )
+#define SID(I)          debug_printf( "%d", I )
+#define FLT(F)          debug_printf( "%10.4f", F )
+#define ENM(E,ENUMS)    dump_enum( E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) )
+
+static const char *processor_type_names[] =
+{
+   "FRAG",
+   "VERT",
+   "GEOM"
+};
+
+static const char *file_names[] =
+{
+   "NULL",
+   "CONST",
+   "IN",
+   "OUT",
+   "TEMP",
+   "SAMP",
+   "ADDR",
+   "IMM"
+};
+
+static const char *interpolate_names[] =
+{
+   "CONSTANT",
+   "LINEAR",
+   "PERSPECTIVE"
+};
+
+static const char *semantic_names[] =
+{
+   "POSITION",
+   "COLOR",
+   "BCOLOR",
+   "FOG",
+   "PSIZE",
+   "GENERIC",
+   "NORMAL"
+};
+
+static const char *immediate_type_names[] =
+{
+   "FLT32"
+};
+
+static const char *opcode_names[TGSI_OPCODE_LAST] =
+{
+   "ARL",
+   "MOV",
+   "LIT",
+   "RCP",
+   "RSQ",
+   "EXP",
+   "LOG",
+   "MUL",
+   "ADD",
+   "DP3",
+   "DP4",
+   "DST",
+   "MIN",
+   "MAX",
+   "SLT",
+   "SGE",
+   "MAD",
+   "SUB",
+   "LERP",
+   "CND",
+   "CND0",
+   "DOT2ADD",
+   "INDEX",
+   "NEGATE",
+   "FRAC",
+   "CLAMP",
+   "FLOOR",
+   "ROUND",
+   "EXPBASE2",
+   "LOGBASE2",
+   "POWER",
+   "CROSSPRODUCT",
+   "MULTIPLYMATRIX",
+   "ABS",
+   "RCC",
+   "DPH",
+   "COS",
+   "DDX",
+   "DDY",
+   "KILP",
+   "PK2H",
+   "PK2US",
+   "PK4B",
+   "PK4UB",
+   "RFL",
+   "SEQ",
+   "SFL",
+   "SGT",
+   "SIN",
+   "SLE",
+   "SNE",
+   "STR",
+   "TEX",
+   "TXD",
+   "TXP",
+   "UP2H",
+   "UP2US",
+   "UP4B",
+   "UP4UB",
+   "X2D",
+   "ARA",
+   "ARR",
+   "BRA",
+   "CAL",
+   "RET",
+   "SSG",
+   "CMP",
+   "SCS",
+   "TXB",
+   "NRM",
+   "DIV",
+   "DP2",
+   "TXL",
+   "BRK",
+   "IF",
+   "LOOP",
+   "REP",
+   "ELSE",
+   "ENDIF",
+   "ENDLOOP",
+   "ENDREP",
+   "PUSHA",
+   "POPA",
+   "CEIL",
+   "I2F",
+   "NOT",
+   "TRUNC",
+   "SHL",
+   "SHR",
+   "AND",
+   "OR",
+   "MOD",
+   "XOR",
+   "SAD",
+   "TXF",
+   "TXQ",
+   "CONT",
+   "EMIT",
+   "ENDPRIM",
+   "BGNLOOP2",
+   "BGNSUB",
+   "ENDLOOP2",
+   "ENDSUB",
+   "NOISE1",
+   "NOISE2",
+   "NOISE3",
+   "NOISE4",
+   "NOP",
+   "M4X3",
+   "M3X4",
+   "M3X3",
+   "M3X2",
+   "NRM4",
+   "CALLNZ",
+   "IFC",
+   "BREAKC",
+   "KIL",
+   "END",
+   "SWZ"
+};
+
+static const char *swizzle_names[] =
+{
+   "x",
+   "y",
+   "z",
+   "w"
+};
+
+static const char *texture_names[] =
+{
+   "UNKNOWN",
+   "1D",
+   "2D",
+   "3D",
+   "CUBE",
+   "RECT",
+   "SHADOW1D",
+   "SHADOW2D",
+   "SHADOWRECT"
+};
+
+static const char *extswizzle_names[] =
+{
+   "x",
+   "y",
+   "z",
+   "w",
+   "0",
+   "1"
+};
+
+static const char *modulate_names[TGSI_MODULATE_COUNT] =
+{
+   "",
+   "_2X",
+   "_4X",
+   "_8X",
+   "_D2",
+   "_D4",
+   "_D8"
+};
+
+static void
+_dump_register_prefix(
+   uint file,
+   uint first,
+   uint last )
+{
+   
+   
+}
+
+static void
+_dump_register(
+   uint file,
+   int first,
+   int last )
+{
+   ENM( file, file_names );
+   CHR( '[' );
+   SID( first );
+   if (first != last) {
+      TXT( ".." );
+      SID( last );
+   }
+   CHR( ']' );
+}
+
+static void
+_dump_register_ind(
+   uint file,
+   int index,
+   uint ind_file,
+   int ind_index )
+{
+   ENM( file, file_names );
+   CHR( '[' );
+   ENM( ind_file, file_names );
+   CHR( '[' );
+   SID( ind_index );
+   CHR( ']' );
+   if (index != 0) {
+      if (index > 0)
+         CHR( '+' );
+      SID( index );
+   }
+   CHR( ']' );
+}
+
+static void
+_dump_writemask(
+   uint writemask )
+{
+   if (writemask != TGSI_WRITEMASK_XYZW) {
+      CHR( '.' );
+      if (writemask & TGSI_WRITEMASK_X)
+         CHR( 'x' );
+      if (writemask & TGSI_WRITEMASK_Y)
+         CHR( 'y' );
+      if (writemask & TGSI_WRITEMASK_Z)
+         CHR( 'z' );
+      if (writemask & TGSI_WRITEMASK_W)
+         CHR( 'w' );
+   }
+}
+
+void
+tgsi_dump_declaration(
+   const struct tgsi_full_declaration *decl )
+{
+   TXT( "\nDCL " );
+
+   _dump_register(
+      decl->Declaration.File,
+      decl->DeclarationRange.First,
+      decl->DeclarationRange.Last );
+   _dump_writemask(
+      decl->Declaration.UsageMask );
+
+   if (decl->Declaration.Semantic) {
+      TXT( ", " );
+      ENM( decl->Semantic.SemanticName, semantic_names );
+      if (decl->Semantic.SemanticIndex != 0 ||
+          decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC) {
+         CHR( '[' );
+         UID( decl->Semantic.SemanticIndex );
+         CHR( ']' );
+      }
+   }
+
+   TXT( ", " );
+   ENM( decl->Declaration.Interpolate, interpolate_names );
+}
+
+static boolean
+iter_declaration(
+   struct tgsi_iterate_context *iter,
+   struct tgsi_full_declaration *decl )
+{
+   tgsi_dump_declaration( decl );
+   return TRUE;
+}
+
+void
+tgsi_dump_immediate(
+   const struct tgsi_full_immediate *imm )
+{
+   uint i;
+
+   TXT( "\nIMM " );
+   ENM( imm->Immediate.DataType, immediate_type_names );
+
+   TXT( " { " );
+   for (i = 0; i < imm->Immediate.Size - 1; i++) {
+      switch (imm->Immediate.DataType) {
+      case TGSI_IMM_FLOAT32:
+         FLT( imm->u.ImmediateFloat32[i].Float );
+         break;
+      default:
+         assert( 0 );
+      }
+
+      if (i < imm->Immediate.Size - 2)
+         TXT( ", " );
+   }
+   TXT( " }" );
+}
+
+static boolean
+iter_immediate(
+   struct tgsi_iterate_context *iter,
+   struct tgsi_full_immediate *imm )
+{
+   tgsi_dump_immediate( imm );
+   return TRUE;
+}
+
+void
+tgsi_dump_instruction(
+   const struct tgsi_full_instruction *inst,
+   uint instno )
+{
+   uint i;
+   boolean first_reg = TRUE;
+
+   EOL();
+   UID( instno );
+   CHR( ':' );
+   ENM( inst->Instruction.Opcode, opcode_names );
+
+   switch (inst->Instruction.Saturate) {
+   case TGSI_SAT_NONE:
+      break;
+   case TGSI_SAT_ZERO_ONE:
+      TXT( "_SAT" );
+      break;
+   case TGSI_SAT_MINUS_PLUS_ONE:
+      TXT( "_SATNV" );
+      break;
+   default:
+      assert( 0 );
+   }
+
+   for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
+      const struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
+
+      if (!first_reg)
+         CHR( ',' );
+      CHR( ' ' );
+
+      _dump_register(
+         dst->DstRegister.File,
+         dst->DstRegister.Index,
+         dst->DstRegister.Index );
+      ENM( dst->DstRegisterExtModulate.Modulate, modulate_names );
+      _dump_writemask( dst->DstRegister.WriteMask );
+
+      first_reg = FALSE;
+   }
+
+   for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
+      const struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i];
+
+      if (!first_reg)
+         CHR( ',' );
+      CHR( ' ' );
+
+      if (src->SrcRegisterExtMod.Negate)
+         TXT( "-(" );
+      if (src->SrcRegisterExtMod.Absolute)
+         CHR( '|' );
+      if (src->SrcRegisterExtMod.Scale2X)
+         TXT( "2*(" );
+      if (src->SrcRegisterExtMod.Bias)
+         CHR( '(' );
+      if (src->SrcRegisterExtMod.Complement)
+         TXT( "1-(" );
+      if (src->SrcRegister.Negate)
+         CHR( '-' );
+
+      if (src->SrcRegister.Indirect) {
+         _dump_register_ind(
+            src->SrcRegister.File,
+            src->SrcRegister.Index,
+            src->SrcRegisterInd.File,
+            src->SrcRegisterInd.Index );
+      }
+      else {
+         _dump_register(
+            src->SrcRegister.File,
+            src->SrcRegister.Index,
+            src->SrcRegister.Index );
+      }
+
+      if (src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X ||
+          src->SrcRegister.SwizzleY != TGSI_SWIZZLE_Y ||
+          src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z ||
+          src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W) {
+         CHR( '.' );
+         ENM( src->SrcRegister.SwizzleX, swizzle_names );
+         ENM( src->SrcRegister.SwizzleY, swizzle_names );
+         ENM( src->SrcRegister.SwizzleZ, swizzle_names );
+         ENM( src->SrcRegister.SwizzleW, swizzle_names );
+      }
+      if (src->SrcRegisterExtSwz.ExtSwizzleX != TGSI_EXTSWIZZLE_X ||
+          src->SrcRegisterExtSwz.ExtSwizzleY != TGSI_EXTSWIZZLE_Y ||
+          src->SrcRegisterExtSwz.ExtSwizzleZ != TGSI_EXTSWIZZLE_Z ||
+          src->SrcRegisterExtSwz.ExtSwizzleW != TGSI_EXTSWIZZLE_W) {
+         CHR( '.' );
+         if (src->SrcRegisterExtSwz.NegateX)
+            TXT("-");
+         ENM( src->SrcRegisterExtSwz.ExtSwizzleX, extswizzle_names );
+         if (src->SrcRegisterExtSwz.NegateY)
+            TXT("-");
+         ENM( src->SrcRegisterExtSwz.ExtSwizzleY, extswizzle_names );
+         if (src->SrcRegisterExtSwz.NegateZ)
+            TXT("-");
+         ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, extswizzle_names );
+         if (src->SrcRegisterExtSwz.NegateW)
+            TXT("-");
+         ENM( src->SrcRegisterExtSwz.ExtSwizzleW, extswizzle_names );
+      }
+
+      if (src->SrcRegisterExtMod.Complement)
+         CHR( ')' );
+      if (src->SrcRegisterExtMod.Bias)
+         TXT( ")-.5" );
+      if (src->SrcRegisterExtMod.Scale2X)
+         CHR( ')' );
+      if (src->SrcRegisterExtMod.Absolute)
+         CHR( '|' );
+      if (src->SrcRegisterExtMod.Negate)
+         CHR( ')' );
+
+      first_reg = FALSE;
+   }
+
+   if (inst->InstructionExtTexture.Texture != TGSI_TEXTURE_UNKNOWN) {
+      TXT( ", " );
+      ENM( inst->InstructionExtTexture.Texture, texture_names );
+   }
+
+   switch (inst->Instruction.Opcode) {
+   case TGSI_OPCODE_IF:
+   case TGSI_OPCODE_ELSE:
+   case TGSI_OPCODE_BGNLOOP2:
+   case TGSI_OPCODE_ENDLOOP2:
+   case TGSI_OPCODE_CAL:
+      TXT( " :" );
+      UID( inst->InstructionExtLabel.Label );
+      break;
+   }
+}
+
+static boolean
+iter_instruction(
+   struct tgsi_iterate_context *iter,
+   struct tgsi_full_instruction *inst )
+{
+   struct dump_ctx *ctx = (struct dump_ctx *) iter;
+
+   tgsi_dump_instruction( inst, ctx->instno++ );
+   return TRUE;
+}
+
+static boolean
+prolog(
+   struct tgsi_iterate_context *ctx )
+{
+   EOL();
+   ENM( ctx->processor.Processor, processor_type_names );
+   UID( ctx->version.MajorVersion );
+   CHR( '.' );
+   UID( ctx->version.MinorVersion );
+   return TRUE;
+}
+
+void
+tgsi_dump(
+   const struct tgsi_token *tokens,
+   uint flags )
+{
+   struct dump_ctx ctx;
+
+   /* sanity checks */
+   assert( strcmp( opcode_names[TGSI_OPCODE_CONT], "CONT" ) == 0 );
+   assert( strcmp( opcode_names[TGSI_OPCODE_END], "END" ) == 0 );
+
+   ctx.iter.prolog = prolog;
+   ctx.iter.iterate_instruction = iter_instruction;
+   ctx.iter.iterate_declaration = iter_declaration;
+   ctx.iter.iterate_immediate = iter_immediate;
+   ctx.iter.epilog = NULL;
+
+   ctx.instno = 0;
+
+   tgsi_iterate_shader( tokens, &ctx.iter );
+}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.h b/src/gallium/auxiliary/tgsi/tgsi_dump.h
new file mode 100644
index 00000000000..51c230b5db4
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.h
@@ -0,0 +1,63 @@
+/**************************************************************************
+ * 
+ * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef TGSI_DUMP_H
+#define TGSI_DUMP_H
+
+#include "pipe/p_shader_tokens.h"
+
+#if defined __cplusplus
+extern "C" {
+#endif
+
+void
+tgsi_dump(
+   const struct tgsi_token *tokens,
+   uint flags );
+
+struct tgsi_full_immediate;
+struct tgsi_full_instruction;
+struct tgsi_full_declaration;
+
+void
+tgsi_dump_immediate(
+   const struct tgsi_full_immediate *imm );
+
+void
+tgsi_dump_instruction(
+   const struct tgsi_full_instruction *inst,
+   uint instno );
+
+void
+tgsi_dump_declaration(
+   const struct tgsi_full_declaration *decl );
+
+#if defined __cplusplus
+}
+#endif
+
+#endif /* TGSI_DUMP_H */
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
new file mode 100644
index 00000000000..eabd74bd6d9
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
@@ -0,0 +1,845 @@
+/**************************************************************************
+ * 
+ * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "pipe/p_debug.h"
+#include "pipe/p_util.h"
+#include "util/u_string.h"
+#include "tgsi_dump_c.h"
+#include "tgsi_parse.h"
+#include "tgsi_build.h"
+
+static void
+dump_enum(
+   const unsigned    e,
+   const char        **enums,
+   const unsigned    enums_count )
+{
+   if (e >= enums_count) {
+      debug_printf( "%u", e );
+   }
+   else {
+      debug_printf( "%s", enums[e] );
+   }
+}
+
+#define EOL()           debug_printf( "\n" )
+#define TXT(S)          debug_printf( "%s", S )
+#define CHR(C)          debug_printf( "%c", C )
+#define UIX(I)          debug_printf( "0x%x", I )
+#define UID(I)          debug_printf( "%u", I )
+#define SID(I)          debug_printf( "%d", I )
+#define FLT(F)          debug_printf( "%10.4f", F )
+#define ENM(E,ENUMS)    dump_enum( E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) )
+
+static const char *TGSI_PROCESSOR_TYPES[] =
+{
+   "PROCESSOR_FRAGMENT",
+   "PROCESSOR_VERTEX",
+   "PROCESSOR_GEOMETRY"
+};
+
+static const char *TGSI_TOKEN_TYPES[] =
+{
+   "TOKEN_TYPE_DECLARATION",
+   "TOKEN_TYPE_IMMEDIATE",
+   "TOKEN_TYPE_INSTRUCTION"
+};
+
+static const char *TGSI_FILES[] =
+{
+   "FILE_NULL",
+   "FILE_CONSTANT",
+   "FILE_INPUT",
+   "FILE_OUTPUT",
+   "FILE_TEMPORARY",
+   "FILE_SAMPLER",
+   "FILE_ADDRESS",
+   "FILE_IMMEDIATE"
+};
+
+static const char *TGSI_INTERPOLATES[] =
+{
+   "INTERPOLATE_CONSTANT",
+   "INTERPOLATE_LINEAR",
+   "INTERPOLATE_PERSPECTIVE"
+};
+
+static const char *TGSI_SEMANTICS[] =
+{
+   "SEMANTIC_POSITION",
+   "SEMANTIC_COLOR",
+   "SEMANTIC_BCOLOR",
+   "SEMANTIC_FOG",
+   "SEMANTIC_PSIZE",
+   "SEMANTIC_GENERIC",
+   "SEMANTIC_NORMAL"
+};
+
+static const char *TGSI_IMMS[] =
+{
+   "IMM_FLOAT32"
+};
+
+static const char *TGSI_OPCODES[TGSI_OPCODE_LAST] =
+{
+   "OPCODE_ARL",
+   "OPCODE_MOV",
+   "OPCODE_LIT",
+   "OPCODE_RCP",
+   "OPCODE_RSQ",
+   "OPCODE_EXP",
+   "OPCODE_LOG",
+   "OPCODE_MUL",
+   "OPCODE_ADD",
+   "OPCODE_DP3",
+   "OPCODE_DP4",
+   "OPCODE_DST",
+   "OPCODE_MIN",
+   "OPCODE_MAX",
+   "OPCODE_SLT",
+   "OPCODE_SGE",
+   "OPCODE_MAD",
+   "OPCODE_SUB",
+   "OPCODE_LERP",
+   "OPCODE_CND",
+   "OPCODE_CND0",
+   "OPCODE_DOT2ADD",
+   "OPCODE_INDEX",
+   "OPCODE_NEGATE",
+   "OPCODE_FRAC",
+   "OPCODE_CLAMP",
+   "OPCODE_FLOOR",
+   "OPCODE_ROUND",
+   "OPCODE_EXPBASE2",
+   "OPCODE_LOGBASE2",
+   "OPCODE_POWER",
+   "OPCODE_CROSSPRODUCT",
+   "OPCODE_MULTIPLYMATRIX",
+   "OPCODE_ABS",
+   "OPCODE_RCC",
+   "OPCODE_DPH",
+   "OPCODE_COS",
+   "OPCODE_DDX",
+   "OPCODE_DDY",
+   "OPCODE_KILP",
+   "OPCODE_PK2H",
+   "OPCODE_PK2US",
+   "OPCODE_PK4B",
+   "OPCODE_PK4UB",
+   "OPCODE_RFL",
+   "OPCODE_SEQ",
+   "OPCODE_SFL",
+   "OPCODE_SGT",
+   "OPCODE_SIN",
+   "OPCODE_SLE",
+   "OPCODE_SNE",
+   "OPCODE_STR",
+   "OPCODE_TEX",
+   "OPCODE_TXD",
+   "OPCODE_TXP",
+   "OPCODE_UP2H",
+   "OPCODE_UP2US",
+   "OPCODE_UP4B",
+   "OPCODE_UP4UB",
+   "OPCODE_X2D",
+   "OPCODE_ARA",
+   "OPCODE_ARR",
+   "OPCODE_BRA",
+   "OPCODE_CAL",
+   "OPCODE_RET",
+   "OPCODE_SSG",
+   "OPCODE_CMP",
+   "OPCODE_SCS",
+   "OPCODE_TXB",
+   "OPCODE_NRM",
+   "OPCODE_DIV",
+   "OPCODE_DP2",
+   "OPCODE_TXL",
+   "OPCODE_BRK",
+   "OPCODE_IF",
+   "OPCODE_LOOP",
+   "OPCODE_REP",
+   "OPCODE_ELSE",
+   "OPCODE_ENDIF",
+   "OPCODE_ENDLOOP",
+   "OPCODE_ENDREP",
+   "OPCODE_PUSHA",
+   "OPCODE_POPA",
+   "OPCODE_CEIL",
+   "OPCODE_I2F",
+   "OPCODE_NOT",
+   "OPCODE_TRUNC",
+   "OPCODE_SHL",
+   "OPCODE_SHR",
+   "OPCODE_AND",
+   "OPCODE_OR",
+   "OPCODE_MOD",
+   "OPCODE_XOR",
+   "OPCODE_SAD",
+   "OPCODE_TXF",
+   "OPCODE_TXQ",
+   "OPCODE_CONT",
+   "OPCODE_EMIT",
+   "OPCODE_ENDPRIM",
+   "OPCODE_BGNLOOP2",
+   "OPCODE_BGNSUB",
+   "OPCODE_ENDLOOP2",
+   "OPCODE_ENDSUB",
+   "OPCODE_NOISE1",
+   "OPCODE_NOISE2",
+   "OPCODE_NOISE3",
+   "OPCODE_NOISE4",
+   "OPCODE_NOP",
+   "OPCODE_M4X3",
+   "OPCODE_M3X4",
+   "OPCODE_M3X3",
+   "OPCODE_M3X2",
+   "OPCODE_NRM4",
+   "OPCODE_CALLNZ",
+   "OPCODE_IFC",
+   "OPCODE_BREAKC",
+   "OPCODE_KIL",
+   "OPCODE_END"
+};
+
+static const char *TGSI_SATS[] =
+{
+   "SAT_NONE",
+   "SAT_ZERO_ONE",
+   "SAT_MINUS_PLUS_ONE"
+};
+
+static const char *TGSI_INSTRUCTION_EXTS[] =
+{
+   "INSTRUCTION_EXT_TYPE_NV",
+   "INSTRUCTION_EXT_TYPE_LABEL",
+   "INSTRUCTION_EXT_TYPE_TEXTURE"
+};
+
+static const char *TGSI_PRECISIONS[] =
+{
+   "PRECISION_DEFAULT",
+   "PRECISION_FLOAT32",
+   "PRECISION_FLOAT16",
+   "PRECISION_FIXED12"
+};
+
+static const char *TGSI_CCS[] =
+{
+   "CC_GT",
+   "CC_EQ",
+   "CC_LT",
+   "CC_UN",
+   "CC_GE",
+   "CC_LE",
+   "CC_NE",
+   "CC_TR",
+   "CC_FL"
+};
+
+static const char *TGSI_SWIZZLES[] =
+{
+   "SWIZZLE_X",
+   "SWIZZLE_Y",
+   "SWIZZLE_Z",
+   "SWIZZLE_W"
+};
+
+static const char *TGSI_TEXTURES[] =
+{
+   "TEXTURE_UNKNOWN",
+   "TEXTURE_1D",
+   "TEXTURE_2D",
+   "TEXTURE_3D",
+   "TEXTURE_CUBE",
+   "TEXTURE_RECT",
+   "TEXTURE_SHADOW1D",
+   "TEXTURE_SHADOW2D",
+   "TEXTURE_SHADOWRECT"
+};
+
+static const char *TGSI_SRC_REGISTER_EXTS[] =
+{
+   "SRC_REGISTER_EXT_TYPE_SWZ",
+   "SRC_REGISTER_EXT_TYPE_MOD"
+};
+
+static const char *TGSI_EXTSWIZZLES[] =
+{
+   "EXTSWIZZLE_X",
+   "EXTSWIZZLE_Y",
+   "EXTSWIZZLE_Z",
+   "EXTSWIZZLE_W",
+   "EXTSWIZZLE_ZERO",
+   "EXTSWIZZLE_ONE"
+};
+
+static const char *TGSI_WRITEMASKS[] =
+{
+   "0",
+   "WRITEMASK_X",
+   "WRITEMASK_Y",
+   "WRITEMASK_XY",
+   "WRITEMASK_Z",
+   "WRITEMASK_XZ",
+   "WRITEMASK_YZ",
+   "WRITEMASK_XYZ",
+   "WRITEMASK_W",
+   "WRITEMASK_XW",
+   "WRITEMASK_YW",
+   "WRITEMASK_XYW",
+   "WRITEMASK_ZW",
+   "WRITEMASK_XZW",
+   "WRITEMASK_YZW",
+   "WRITEMASK_XYZW"
+};
+
+static const char *TGSI_DST_REGISTER_EXTS[] =
+{
+   "DST_REGISTER_EXT_TYPE_CONDCODE",
+   "DST_REGISTER_EXT_TYPE_MODULATE"
+};
+
+static const char *TGSI_MODULATES[] =
+{
+   "MODULATE_1X",
+   "MODULATE_2X",
+   "MODULATE_4X",
+   "MODULATE_8X",
+   "MODULATE_HALF",
+   "MODULATE_QUARTER",
+   "MODULATE_EIGHTH"
+};
+
+static void
+dump_declaration_verbose(
+   struct tgsi_full_declaration  *decl,
+   unsigned                      ignored,
+   unsigned                      deflt,
+   struct tgsi_full_declaration  *fd )
+{
+   TXT( "\nFile       : " );
+   ENM( decl->Declaration.File, TGSI_FILES );
+   if( deflt || fd->Declaration.UsageMask != decl->Declaration.UsageMask ) {
+      TXT( "\nUsageMask  : " );
+      if( decl->Declaration.UsageMask & TGSI_WRITEMASK_X ) {
+         CHR( 'X' );
+      }
+      if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Y ) {
+         CHR( 'Y' );
+      }
+      if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Z ) {
+         CHR( 'Z' );
+      }
+      if( decl->Declaration.UsageMask & TGSI_WRITEMASK_W ) {
+         CHR( 'W' );
+      }
+   }
+   if( deflt || fd->Declaration.Interpolate != decl->Declaration.Interpolate ) {
+      TXT( "\nInterpolate: " );
+      ENM( decl->Declaration.Interpolate, TGSI_INTERPOLATES );
+   }
+   if( deflt || fd->Declaration.Semantic != decl->Declaration.Semantic ) {
+      TXT( "\nSemantic   : " );
+      UID( decl->Declaration.Semantic );
+   }
+   if( ignored ) {
+      TXT( "\nPadding    : " );
+      UIX( decl->Declaration.Padding );
+   }
+
+   EOL();
+   TXT( "\nFirst: " );
+   UID( decl->DeclarationRange.First );
+   TXT( "\nLast : " );
+   UID( decl->DeclarationRange.Last );
+
+   if( decl->Declaration.Semantic ) {
+      EOL();
+      TXT( "\nSemanticName : " );
+      ENM( decl->Semantic.SemanticName, TGSI_SEMANTICS );
+      TXT( "\nSemanticIndex: " );
+      UID( decl->Semantic.SemanticIndex );
+      if( ignored ) {
+         TXT( "\nPadding      : " );
+         UIX( decl->Semantic.Padding );
+      }
+   }
+}
+
+static void
+dump_immediate_verbose(
+   struct tgsi_full_immediate *imm,
+   unsigned                   ignored )
+{
+   unsigned i;
+
+   TXT( "\nDataType   : " );
+   ENM( imm->Immediate.DataType, TGSI_IMMS );
+   if( ignored ) {
+      TXT( "\nPadding    : " );
+      UIX( imm->Immediate.Padding );
+   }
+
+   for( i = 0; i < imm->Immediate.Size - 1; i++ ) {
+      EOL();
+      switch( imm->Immediate.DataType ) {
+      case TGSI_IMM_FLOAT32:
+         TXT( "\nFloat: " );
+         FLT( imm->u.ImmediateFloat32[i].Float );
+         break;
+
+      default:
+         assert( 0 );
+      }
+   }
+}
+
+static void
+dump_instruction_verbose(
+   struct tgsi_full_instruction  *inst,
+   unsigned                      ignored,
+   unsigned                      deflt,
+   struct tgsi_full_instruction  *fi )
+{
+   unsigned i;
+
+   TXT( "\nOpcode     : " );
+   ENM( inst->Instruction.Opcode, TGSI_OPCODES );
+   if( deflt || fi->Instruction.Saturate != inst->Instruction.Saturate ) {
+      TXT( "\nSaturate   : " );
+      ENM( inst->Instruction.Saturate, TGSI_SATS );
+   }
+   if( deflt || fi->Instruction.NumDstRegs != inst->Instruction.NumDstRegs ) {
+      TXT( "\nNumDstRegs : " );
+      UID( inst->Instruction.NumDstRegs );
+   }
+   if( deflt || fi->Instruction.NumSrcRegs != inst->Instruction.NumSrcRegs ) {
+      TXT( "\nNumSrcRegs : " );
+      UID( inst->Instruction.NumSrcRegs );
+   }
+   if( ignored ) {
+      TXT( "\nPadding    : " );
+      UIX( inst->Instruction.Padding );
+   }
+
+   if( deflt || tgsi_compare_instruction_ext_nv( inst->InstructionExtNv, fi->InstructionExtNv ) ) {
+      EOL();
+      TXT( "\nType          : " );
+      ENM( inst->InstructionExtNv.Type, TGSI_INSTRUCTION_EXTS );
+      if( deflt || fi->InstructionExtNv.Precision != inst->InstructionExtNv.Precision ) {
+         TXT( "\nPrecision     : " );
+         ENM( inst->InstructionExtNv.Precision, TGSI_PRECISIONS );
+      }
+      if( deflt || fi->InstructionExtNv.CondDstIndex != inst->InstructionExtNv.CondDstIndex ) {
+         TXT( "\nCondDstIndex  : " );
+         UID( inst->InstructionExtNv.CondDstIndex );
+      }
+      if( deflt || fi->InstructionExtNv.CondFlowIndex != inst->InstructionExtNv.CondFlowIndex ) {
+         TXT( "\nCondFlowIndex : " );
+         UID( inst->InstructionExtNv.CondFlowIndex );
+      }
+      if( deflt || fi->InstructionExtNv.CondMask != inst->InstructionExtNv.CondMask ) {
+         TXT( "\nCondMask      : " );
+         ENM( inst->InstructionExtNv.CondMask, TGSI_CCS );
+      }
+      if( deflt || fi->InstructionExtNv.CondSwizzleX != inst->InstructionExtNv.CondSwizzleX ) {
+         TXT( "\nCondSwizzleX  : " );
+         ENM( inst->InstructionExtNv.CondSwizzleX, TGSI_SWIZZLES );
+      }
+      if( deflt || fi->InstructionExtNv.CondSwizzleY != inst->InstructionExtNv.CondSwizzleY ) {
+         TXT( "\nCondSwizzleY  : " );
+         ENM( inst->InstructionExtNv.CondSwizzleY, TGSI_SWIZZLES );
+      }
+      if( deflt || fi->InstructionExtNv.CondSwizzleZ != inst->InstructionExtNv.CondSwizzleZ ) {
+         TXT( "\nCondSwizzleZ  : " );
+         ENM( inst->InstructionExtNv.CondSwizzleZ, TGSI_SWIZZLES );
+      }
+      if( deflt || fi->InstructionExtNv.CondSwizzleW != inst->InstructionExtNv.CondSwizzleW ) {
+         TXT( "\nCondSwizzleW  : " );
+         ENM( inst->InstructionExtNv.CondSwizzleW, TGSI_SWIZZLES );
+      }
+      if( deflt || fi->InstructionExtNv.CondDstUpdate != inst->InstructionExtNv.CondDstUpdate ) {
+         TXT( "\nCondDstUpdate : " );
+         UID( inst->InstructionExtNv.CondDstUpdate );
+      }
+      if( deflt || fi->InstructionExtNv.CondFlowEnable != inst->InstructionExtNv.CondFlowEnable ) {
+         TXT( "\nCondFlowEnable: " );
+         UID( inst->InstructionExtNv.CondFlowEnable );
+      }
+      if( ignored ) {
+         TXT( "\nPadding       : " );
+         UIX( inst->InstructionExtNv.Padding );
+         if( deflt || fi->InstructionExtNv.Extended != inst->InstructionExtNv.Extended ) {
+            TXT( "\nExtended      : " );
+            UID( inst->InstructionExtNv.Extended );
+         }
+      }
+   }
+
+   if( deflt || tgsi_compare_instruction_ext_label( inst->InstructionExtLabel, fi->InstructionExtLabel ) ) {
+      EOL();
+      TXT( "\nType    : " );
+      ENM( inst->InstructionExtLabel.Type, TGSI_INSTRUCTION_EXTS );
+      if( deflt || fi->InstructionExtLabel.Label != inst->InstructionExtLabel.Label ) {
+         TXT( "\nLabel   : " );
+         UID( inst->InstructionExtLabel.Label );
+      }
+      if( ignored ) {
+         TXT( "\nPadding : " );
+         UIX( inst->InstructionExtLabel.Padding );
+         if( deflt || fi->InstructionExtLabel.Extended != inst->InstructionExtLabel.Extended ) {
+            TXT( "\nExtended: " );
+            UID( inst->InstructionExtLabel.Extended );
+         }
+      }
+   }
+
+   if( deflt || tgsi_compare_instruction_ext_texture( inst->InstructionExtTexture, fi->InstructionExtTexture ) ) {
+      EOL();
+      TXT( "\nType    : " );
+      ENM( inst->InstructionExtTexture.Type, TGSI_INSTRUCTION_EXTS );
+      if( deflt || fi->InstructionExtTexture.Texture != inst->InstructionExtTexture.Texture ) {
+         TXT( "\nTexture : " );
+         ENM( inst->InstructionExtTexture.Texture, TGSI_TEXTURES );
+      }
+      if( ignored ) {
+         TXT( "\nPadding : " );
+         UIX( inst->InstructionExtTexture.Padding );
+         if( deflt || fi->InstructionExtTexture.Extended != inst->InstructionExtTexture.Extended ) {
+            TXT( "\nExtended: " );
+            UID( inst->InstructionExtTexture.Extended );
+         }
+      }
+   }
+
+   for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) {
+      struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
+      struct tgsi_full_dst_register *fd = &fi->FullDstRegisters[i];
+
+      EOL();
+      TXT( "\nFile     : " );
+      ENM( dst->DstRegister.File, TGSI_FILES );
+      if( deflt || fd->DstRegister.WriteMask != dst->DstRegister.WriteMask ) {
+         TXT( "\nWriteMask: " );
+         ENM( dst->DstRegister.WriteMask, TGSI_WRITEMASKS );
+      }
+      if( ignored ) {
+         if( deflt || fd->DstRegister.Indirect != dst->DstRegister.Indirect ) {
+            TXT( "\nIndirect : " );
+            UID( dst->DstRegister.Indirect );
+         }
+         if( deflt || fd->DstRegister.Dimension != dst->DstRegister.Dimension ) {
+            TXT( "\nDimension: " );
+            UID( dst->DstRegister.Dimension );
+         }
+      }
+      if( deflt || fd->DstRegister.Index != dst->DstRegister.Index ) {
+         TXT( "\nIndex    : " );
+         SID( dst->DstRegister.Index );
+      }
+      if( ignored ) {
+         TXT( "\nPadding  : " );
+         UIX( dst->DstRegister.Padding );
+         if( deflt || fd->DstRegister.Extended != dst->DstRegister.Extended ) {
+            TXT( "\nExtended : " );
+            UID( dst->DstRegister.Extended );
+         }
+      }
+
+      if( deflt || tgsi_compare_dst_register_ext_concode( dst->DstRegisterExtConcode, fd->DstRegisterExtConcode ) ) {
+         EOL();
+         TXT( "\nType        : " );
+         ENM( dst->DstRegisterExtConcode.Type, TGSI_DST_REGISTER_EXTS );
+         if( deflt || fd->DstRegisterExtConcode.CondMask != dst->DstRegisterExtConcode.CondMask ) {
+            TXT( "\nCondMask    : " );
+            ENM( dst->DstRegisterExtConcode.CondMask, TGSI_CCS );
+         }
+         if( deflt || fd->DstRegisterExtConcode.CondSwizzleX != dst->DstRegisterExtConcode.CondSwizzleX ) {
+            TXT( "\nCondSwizzleX: " );
+            ENM( dst->DstRegisterExtConcode.CondSwizzleX, TGSI_SWIZZLES );
+         }
+         if( deflt || fd->DstRegisterExtConcode.CondSwizzleY != dst->DstRegisterExtConcode.CondSwizzleY ) {
+            TXT( "\nCondSwizzleY: " );
+            ENM( dst->DstRegisterExtConcode.CondSwizzleY, TGSI_SWIZZLES );
+         }
+         if( deflt || fd->DstRegisterExtConcode.CondSwizzleZ != dst->DstRegisterExtConcode.CondSwizzleZ ) {
+            TXT( "\nCondSwizzleZ: " );
+            ENM( dst->DstRegisterExtConcode.CondSwizzleZ, TGSI_SWIZZLES );
+         }
+         if( deflt || fd->DstRegisterExtConcode.CondSwizzleW != dst->DstRegisterExtConcode.CondSwizzleW ) {
+            TXT( "\nCondSwizzleW: " );
+            ENM( dst->DstRegisterExtConcode.CondSwizzleW, TGSI_SWIZZLES );
+         }
+         if( deflt || fd->DstRegisterExtConcode.CondSrcIndex != dst->DstRegisterExtConcode.CondSrcIndex ) {
+            TXT( "\nCondSrcIndex: " );
+            UID( dst->DstRegisterExtConcode.CondSrcIndex );
+         }
+         if( ignored ) {
+            TXT( "\nPadding     : " );
+            UIX( dst->DstRegisterExtConcode.Padding );
+            if( deflt || fd->DstRegisterExtConcode.Extended != dst->DstRegisterExtConcode.Extended ) {
+               TXT( "\nExtended    : " );
+               UID( dst->DstRegisterExtConcode.Extended );
+            }
+         }
+      }
+
+      if( deflt || tgsi_compare_dst_register_ext_modulate( dst->DstRegisterExtModulate, fd->DstRegisterExtModulate ) ) {
+         EOL();
+         TXT( "\nType    : " );
+         ENM( dst->DstRegisterExtModulate.Type, TGSI_DST_REGISTER_EXTS );
+         if( deflt || fd->DstRegisterExtModulate.Modulate != dst->DstRegisterExtModulate.Modulate ) {
+            TXT( "\nModulate: " );
+            ENM( dst->DstRegisterExtModulate.Modulate, TGSI_MODULATES );
+         }
+         if( ignored ) {
+            TXT( "\nPadding : " );
+            UIX( dst->DstRegisterExtModulate.Padding );
+            if( deflt || fd->DstRegisterExtModulate.Extended != dst->DstRegisterExtModulate.Extended ) {
+               TXT( "\nExtended: " );
+               UID( dst->DstRegisterExtModulate.Extended );
+            }
+         }
+      }
+   }
+
+   for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) {
+      struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i];
+      struct tgsi_full_src_register *fs = &fi->FullSrcRegisters[i];
+
+      EOL();
+      TXT( "\nFile     : ");
+      ENM( src->SrcRegister.File, TGSI_FILES );
+      if( deflt || fs->SrcRegister.SwizzleX != src->SrcRegister.SwizzleX ) {
+         TXT( "\nSwizzleX : " );
+         ENM( src->SrcRegister.SwizzleX, TGSI_SWIZZLES );
+      }
+      if( deflt || fs->SrcRegister.SwizzleY != src->SrcRegister.SwizzleY ) {
+         TXT( "\nSwizzleY : " );
+         ENM( src->SrcRegister.SwizzleY, TGSI_SWIZZLES );
+      }
+      if( deflt || fs->SrcRegister.SwizzleZ != src->SrcRegister.SwizzleZ ) {
+         TXT( "\nSwizzleZ : " );
+         ENM( src->SrcRegister.SwizzleZ, TGSI_SWIZZLES );
+      }
+      if( deflt || fs->SrcRegister.SwizzleW != src->SrcRegister.SwizzleW ) {
+         TXT( "\nSwizzleW : " );
+         ENM( src->SrcRegister.SwizzleW, TGSI_SWIZZLES );
+      }
+      if( deflt || fs->SrcRegister.Negate != src->SrcRegister.Negate ) {
+         TXT( "\nNegate   : " );
+         UID( src->SrcRegister.Negate );
+      }
+      if( ignored ) {
+         if( deflt || fs->SrcRegister.Indirect != src->SrcRegister.Indirect ) {
+            TXT( "\nIndirect : " );
+            UID( src->SrcRegister.Indirect );
+         }
+         if( deflt || fs->SrcRegister.Dimension != src->SrcRegister.Dimension ) {
+            TXT( "\nDimension: " );
+            UID( src->SrcRegister.Dimension );
+         }
+      }
+      if( deflt || fs->SrcRegister.Index != src->SrcRegister.Index ) {
+         TXT( "\nIndex    : " );
+         SID( src->SrcRegister.Index );
+      }
+      if( ignored ) {
+         if( deflt || fs->SrcRegister.Extended != src->SrcRegister.Extended ) {
+            TXT( "\nExtended : " );
+            UID( src->SrcRegister.Extended );
+         }
+      }
+
+      if( deflt || tgsi_compare_src_register_ext_swz( src->SrcRegisterExtSwz, fs->SrcRegisterExtSwz ) ) {
+         EOL();
+         TXT( "\nType       : " );
+         ENM( src->SrcRegisterExtSwz.Type, TGSI_SRC_REGISTER_EXTS );
+         if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleX != src->SrcRegisterExtSwz.ExtSwizzleX ) {
+            TXT( "\nExtSwizzleX: " );
+            ENM( src->SrcRegisterExtSwz.ExtSwizzleX, TGSI_EXTSWIZZLES );
+         }
+         if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleY != src->SrcRegisterExtSwz.ExtSwizzleY ) {
+            TXT( "\nExtSwizzleY: " );
+            ENM( src->SrcRegisterExtSwz.ExtSwizzleY, TGSI_EXTSWIZZLES );
+         }
+         if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleZ != src->SrcRegisterExtSwz.ExtSwizzleZ ) {
+            TXT( "\nExtSwizzleZ: " );
+            ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, TGSI_EXTSWIZZLES );
+         }
+         if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleW != src->SrcRegisterExtSwz.ExtSwizzleW ) {
+            TXT( "\nExtSwizzleW: " );
+            ENM( src->SrcRegisterExtSwz.ExtSwizzleW, TGSI_EXTSWIZZLES );
+         }
+         if( deflt || fs->SrcRegisterExtSwz.NegateX != src->SrcRegisterExtSwz.NegateX ) {
+            TXT( "\nNegateX   : " );
+            UID( src->SrcRegisterExtSwz.NegateX );
+         }
+         if( deflt || fs->SrcRegisterExtSwz.NegateY != src->SrcRegisterExtSwz.NegateY ) {
+            TXT( "\nNegateY   : " );
+            UID( src->SrcRegisterExtSwz.NegateY );
+         }
+         if( deflt || fs->SrcRegisterExtSwz.NegateZ != src->SrcRegisterExtSwz.NegateZ ) {
+            TXT( "\nNegateZ   : " );
+            UID( src->SrcRegisterExtSwz.NegateZ );
+         }
+         if( deflt || fs->SrcRegisterExtSwz.NegateW != src->SrcRegisterExtSwz.NegateW ) {
+            TXT( "\nNegateW   : " );
+            UID( src->SrcRegisterExtSwz.NegateW );
+         }
+         if( ignored ) {
+            TXT( "\nPadding   : " );
+            UIX( src->SrcRegisterExtSwz.Padding );
+            if( deflt || fs->SrcRegisterExtSwz.Extended != src->SrcRegisterExtSwz.Extended ) {
+               TXT( "\nExtended   : " );
+               UID( src->SrcRegisterExtSwz.Extended );
+            }
+         }
+      }
+
+      if( deflt || tgsi_compare_src_register_ext_mod( src->SrcRegisterExtMod, fs->SrcRegisterExtMod ) ) {
+         EOL();
+         TXT( "\nType     : " );
+         ENM( src->SrcRegisterExtMod.Type, TGSI_SRC_REGISTER_EXTS );
+         if( deflt || fs->SrcRegisterExtMod.Complement != src->SrcRegisterExtMod.Complement ) {
+            TXT( "\nComplement: " );
+            UID( src->SrcRegisterExtMod.Complement );
+         }
+         if( deflt || fs->SrcRegisterExtMod.Bias != src->SrcRegisterExtMod.Bias ) {
+            TXT( "\nBias     : " );
+            UID( src->SrcRegisterExtMod.Bias );
+         }
+         if( deflt || fs->SrcRegisterExtMod.Scale2X != src->SrcRegisterExtMod.Scale2X ) {
+            TXT( "\nScale2X   : " );
+            UID( src->SrcRegisterExtMod.Scale2X );
+         }
+         if( deflt || fs->SrcRegisterExtMod.Absolute != src->SrcRegisterExtMod.Absolute ) {
+            TXT( "\nAbsolute  : " );
+            UID( src->SrcRegisterExtMod.Absolute );
+         }
+         if( deflt || fs->SrcRegisterExtMod.Negate != src->SrcRegisterExtMod.Negate ) {
+            TXT( "\nNegate   : " );
+            UID( src->SrcRegisterExtMod.Negate );
+         }
+         if( ignored ) {
+            TXT( "\nPadding   : " );
+            UIX( src->SrcRegisterExtMod.Padding );
+            if( deflt || fs->SrcRegisterExtMod.Extended != src->SrcRegisterExtMod.Extended ) {
+               TXT( "\nExtended  : " );
+               UID( src->SrcRegisterExtMod.Extended );
+            }
+         }
+      }
+   }
+}
+
+void
+tgsi_dump_c(
+   const struct tgsi_token *tokens,
+   uint flags )
+{
+   struct tgsi_parse_context parse;
+   struct tgsi_full_instruction fi;
+   struct tgsi_full_declaration fd;
+   uint ignored = flags & TGSI_DUMP_C_IGNORED;
+   uint deflt = flags & TGSI_DUMP_C_DEFAULT;
+   uint instno = 0;
+
+   /* sanity checks */
+   assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_CONT], "OPCODE_CONT") == 0);
+   assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_END], "OPCODE_END") == 0);
+
+   tgsi_parse_init( &parse, tokens );
+
+   TXT( "tgsi-dump begin -----------------" );
+
+   TXT( "\nMajorVersion: " );
+   UID( parse.FullVersion.Version.MajorVersion );
+   TXT( "\nMinorVersion: " );
+   UID( parse.FullVersion.Version.MinorVersion );
+   EOL();
+
+   TXT( "\nHeaderSize: " );
+   UID( parse.FullHeader.Header.HeaderSize );
+   TXT( "\nBodySize  : " );
+   UID( parse.FullHeader.Header.BodySize );
+   TXT( "\nProcessor : " );
+   ENM( parse.FullHeader.Processor.Processor, TGSI_PROCESSOR_TYPES );
+   EOL();
+
+   fi = tgsi_default_full_instruction();
+   fd = tgsi_default_full_declaration();
+
+   while( !tgsi_parse_end_of_tokens( &parse ) ) {
+      tgsi_parse_token( &parse );
+
+      TXT( "\nType       : " );
+      ENM( parse.FullToken.Token.Type, TGSI_TOKEN_TYPES );
+      if( ignored ) {
+         TXT( "\nSize       : " );
+         UID( parse.FullToken.Token.Size );
+         if( deflt || parse.FullToken.Token.Extended ) {
+            TXT( "\nExtended   : " );
+            UID( parse.FullToken.Token.Extended );
+         }
+      }
+
+      switch( parse.FullToken.Token.Type ) {
+      case TGSI_TOKEN_TYPE_DECLARATION:
+         dump_declaration_verbose(
+            &parse.FullToken.FullDeclaration,
+            ignored,
+            deflt,
+            &fd );
+         break;
+
+      case TGSI_TOKEN_TYPE_IMMEDIATE:
+         dump_immediate_verbose(
+            &parse.FullToken.FullImmediate,
+            ignored );
+         break;
+
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+         dump_instruction_verbose(
+            &parse.FullToken.FullInstruction,
+            ignored,
+            deflt,
+            &fi );
+         break;
+
+      default:
+         assert( 0 );
+      }
+
+      EOL();
+   }
+
+   TXT( "\ntgsi-dump end -------------------\n" );
+
+   tgsi_parse_free( &parse );
+}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.h b/src/gallium/auxiliary/tgsi/tgsi_dump_c.h
new file mode 100644
index 00000000000..d91cd35b3b7
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.h
@@ -0,0 +1,49 @@
+/**************************************************************************
+ * 
+ * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef TGSI_DUMP_C_H
+#define TGSI_DUMP_C_H
+
+#include "pipe/p_shader_tokens.h"
+
+#if defined __cplusplus
+extern "C" {
+#endif
+
+#define TGSI_DUMP_C_IGNORED 1
+#define TGSI_DUMP_C_DEFAULT 2
+
+void
+tgsi_dump_c(
+   const struct tgsi_token *tokens,
+   uint flags );
+
+#if defined __cplusplus
+}
+#endif
+
+#endif /* TGSI_DUMP_C_H */
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
new file mode 100644
index 00000000000..8b430548bca
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -0,0 +1,2522 @@
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+/**
+ * TGSI interpretor/executor.
+ *
+ * Flow control information:
+ *
+ * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
+ * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
+ * care since a condition may be true for some quad components but false
+ * for other components.
+ *
+ * We basically execute all statements (even if they're in the part of
+ * an IF/ELSE clause that's "not taken") and use a special mask to
+ * control writing to destination registers.  This is the ExecMask.
+ * See store_dest().
+ *
+ * The ExecMask is computed from three other masks (CondMask, LoopMask and
+ * ContMask) which are controlled by the flow control instructions (namely:
+ * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
+ *
+ *
+ * Authors:
+ *   Michal Krol
+ *   Brian Paul
+ */
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_state.h"
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+#include "tgsi_exec.h"
+
+#define TILE_TOP_LEFT     0
+#define TILE_TOP_RIGHT    1
+#define TILE_BOTTOM_LEFT  2
+#define TILE_BOTTOM_RIGHT 3
+
+/*
+ * Shorthand locations of various utility registers (_I = Index, _C = Channel)
+ */
+#define TEMP_0_I           TGSI_EXEC_TEMP_00000000_I
+#define TEMP_0_C           TGSI_EXEC_TEMP_00000000_C
+#define TEMP_7F_I          TGSI_EXEC_TEMP_7FFFFFFF_I
+#define TEMP_7F_C          TGSI_EXEC_TEMP_7FFFFFFF_C
+#define TEMP_80_I          TGSI_EXEC_TEMP_80000000_I
+#define TEMP_80_C          TGSI_EXEC_TEMP_80000000_C
+#define TEMP_FF_I          TGSI_EXEC_TEMP_FFFFFFFF_I
+#define TEMP_FF_C          TGSI_EXEC_TEMP_FFFFFFFF_C
+#define TEMP_1_I           TGSI_EXEC_TEMP_ONE_I
+#define TEMP_1_C           TGSI_EXEC_TEMP_ONE_C
+#define TEMP_2_I           TGSI_EXEC_TEMP_TWO_I
+#define TEMP_2_C           TGSI_EXEC_TEMP_TWO_C
+#define TEMP_128_I         TGSI_EXEC_TEMP_128_I
+#define TEMP_128_C         TGSI_EXEC_TEMP_128_C
+#define TEMP_M128_I        TGSI_EXEC_TEMP_MINUS_128_I
+#define TEMP_M128_C        TGSI_EXEC_TEMP_MINUS_128_C
+#define TEMP_KILMASK_I     TGSI_EXEC_TEMP_KILMASK_I
+#define TEMP_KILMASK_C     TGSI_EXEC_TEMP_KILMASK_C
+#define TEMP_OUTPUT_I      TGSI_EXEC_TEMP_OUTPUT_I
+#define TEMP_OUTPUT_C      TGSI_EXEC_TEMP_OUTPUT_C
+#define TEMP_PRIMITIVE_I   TGSI_EXEC_TEMP_PRIMITIVE_I
+#define TEMP_PRIMITIVE_C   TGSI_EXEC_TEMP_PRIMITIVE_C
+#define TEMP_3_I           TGSI_EXEC_TEMP_THREE_I
+#define TEMP_3_C           TGSI_EXEC_TEMP_THREE_C
+#define TEMP_HALF_I        TGSI_EXEC_TEMP_HALF_I
+#define TEMP_HALF_C        TGSI_EXEC_TEMP_HALF_C
+#define TEMP_R0            TGSI_EXEC_TEMP_R0
+
+#define FOR_EACH_CHANNEL(CHAN)\
+   for (CHAN = 0; CHAN < 4; CHAN++)
+
+#define IS_CHANNEL_ENABLED(INST, CHAN)\
+   ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
+
+#define IS_CHANNEL_ENABLED2(INST, CHAN)\
+   ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
+
+#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
+   FOR_EACH_CHANNEL( CHAN )\
+      if (IS_CHANNEL_ENABLED( INST, CHAN ))
+
+#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
+   FOR_EACH_CHANNEL( CHAN )\
+      if (IS_CHANNEL_ENABLED2( INST, CHAN ))
+
+
+/** The execution mask depends on the conditional mask and the loop mask */
+#define UPDATE_EXEC_MASK(MACH) \
+      MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
+
+
+#define CHAN_X  0
+#define CHAN_Y  1
+#define CHAN_Z  2
+#define CHAN_W  3
+
+
+
+/**
+ * Initialize machine state by expanding tokens to full instructions,
+ * allocating temporary storage, setting up constants, etc.
+ * After this, we can call tgsi_exec_machine_run() many times.
+ */
+void 
+tgsi_exec_machine_bind_shader(
+   struct tgsi_exec_machine *mach,
+   const struct tgsi_token *tokens,
+   uint numSamplers,
+   struct tgsi_sampler *samplers)
+{
+   uint k;
+   struct tgsi_parse_context parse;
+   struct tgsi_exec_labels *labels = &mach->Labels;
+   struct tgsi_full_instruction *instructions;
+   struct tgsi_full_declaration *declarations;
+   uint maxInstructions = 10, numInstructions = 0;
+   uint maxDeclarations = 10, numDeclarations = 0;
+   uint instno = 0;
+
+#if 0
+   tgsi_dump(tokens, 0);
+#endif
+
+   mach->Tokens = tokens;
+   mach->Samplers = samplers;
+
+   k = tgsi_parse_init (&parse, mach->Tokens);
+   if (k != TGSI_PARSE_OK) {
+      debug_printf( "Problem parsing!\n" );
+      return;
+   }
+
+   mach->Processor = parse.FullHeader.Processor.Processor;
+   mach->ImmLimit = 0;
+   labels->count = 0;
+
+   declarations = (struct tgsi_full_declaration *)
+      MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
+
+   if (!declarations) {
+      return;
+   }
+
+   instructions = (struct tgsi_full_instruction *)
+      MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
+
+   if (!instructions) {
+      FREE( declarations );
+      return;
+   }
+
+   while( !tgsi_parse_end_of_tokens( &parse ) ) {
+      uint pointer = parse.Position;
+      uint i;
+
+      tgsi_parse_token( &parse );
+      switch( parse.FullToken.Token.Type ) {
+      case TGSI_TOKEN_TYPE_DECLARATION:
+         /* save expanded declaration */
+         if (numDeclarations == maxDeclarations) {
+            declarations = REALLOC(declarations,
+                                   maxDeclarations
+                                   * sizeof(struct tgsi_full_declaration),
+                                   (maxDeclarations + 10)
+                                   * sizeof(struct tgsi_full_declaration));
+            maxDeclarations += 10;
+         }
+         memcpy(declarations + numDeclarations,
+                &parse.FullToken.FullDeclaration,
+                sizeof(declarations[0]));
+         numDeclarations++;
+         break;
+
+      case TGSI_TOKEN_TYPE_IMMEDIATE:
+         {
+            uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
+            assert( size % 4 == 0 );
+            assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES );
+
+            for( i = 0; i < size; i++ ) {
+               mach->Imms[mach->ImmLimit + i / 4][i % 4] = 
+		  parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
+            }
+            mach->ImmLimit += size / 4;
+         }
+         break;
+
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+         assert( labels->count < MAX_LABELS );
+
+         labels->labels[labels->count][0] = instno;
+         labels->labels[labels->count][1] = pointer;
+         labels->count++;
+
+         /* save expanded instruction */
+         if (numInstructions == maxInstructions) {
+            instructions = REALLOC(instructions,
+                                   maxInstructions
+                                   * sizeof(struct tgsi_full_instruction),
+                                   (maxInstructions + 10)
+                                   * sizeof(struct tgsi_full_instruction));
+            maxInstructions += 10;
+         }
+         memcpy(instructions + numInstructions,
+                &parse.FullToken.FullInstruction,
+                sizeof(instructions[0]));
+         numInstructions++;
+         break;
+
+      default:
+         assert( 0 );
+      }
+   }
+   tgsi_parse_free (&parse);
+
+   if (mach->Declarations) {
+      FREE( mach->Declarations );
+   }
+   mach->Declarations = declarations;
+   mach->NumDeclarations = numDeclarations;
+
+   if (mach->Instructions) {
+      FREE( mach->Instructions );
+   }
+   mach->Instructions = instructions;
+   mach->NumInstructions = numInstructions;
+}
+
+
+void
+tgsi_exec_machine_init(
+   struct tgsi_exec_machine *mach )
+{
+   uint i;
+
+   mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps);
+   mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
+
+   /* Setup constants. */
+   for( i = 0; i < 4; i++ ) {
+      mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
+      mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
+      mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
+      mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
+      mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
+      mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
+      mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
+      mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
+      mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
+      mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
+   }
+}
+
+
+void
+tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach)
+{
+   if (mach->Instructions) {
+      FREE(mach->Instructions);
+      mach->Instructions = NULL;
+      mach->NumInstructions = 0;
+   }
+   if (mach->Declarations) {
+      FREE(mach->Declarations);
+      mach->Declarations = NULL;
+      mach->NumDeclarations = 0;
+   }
+}
+
+
+static void
+micro_abs(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src )
+{
+   dst->f[0] = fabsf( src->f[0] );
+   dst->f[1] = fabsf( src->f[1] );
+   dst->f[2] = fabsf( src->f[2] );
+   dst->f[3] = fabsf( src->f[3] );
+}
+
+static void
+micro_add(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst->f[0] = src0->f[0] + src1->f[0];
+   dst->f[1] = src0->f[1] + src1->f[1];
+   dst->f[2] = src0->f[2] + src1->f[2];
+   dst->f[3] = src0->f[3] + src1->f[3];
+}
+
+static void
+micro_iadd(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst->i[0] = src0->i[0] + src1->i[0];
+   dst->i[1] = src0->i[1] + src1->i[1];
+   dst->i[2] = src0->i[2] + src1->i[2];
+   dst->i[3] = src0->i[3] + src1->i[3];
+}
+
+static void
+micro_and(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst->u[0] = src0->u[0] & src1->u[0];
+   dst->u[1] = src0->u[1] & src1->u[1];
+   dst->u[2] = src0->u[2] & src1->u[2];
+   dst->u[3] = src0->u[3] & src1->u[3];
+}
+
+static void
+micro_ceil(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src )
+{
+   dst->f[0] = ceilf( src->f[0] );
+   dst->f[1] = ceilf( src->f[1] );
+   dst->f[2] = ceilf( src->f[2] );
+   dst->f[3] = ceilf( src->f[3] );
+}
+
+static void
+micro_cos(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src )
+{
+   dst->f[0] = cosf( src->f[0] );
+   dst->f[1] = cosf( src->f[1] );
+   dst->f[2] = cosf( src->f[2] );
+   dst->f[3] = cosf( src->f[3] );
+}
+
+static void
+micro_ddx(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src )
+{
+   dst->f[0] =
+   dst->f[1] =
+   dst->f[2] =
+   dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
+}
+
+static void
+micro_ddy(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src )
+{
+   dst->f[0] =
+   dst->f[1] =
+   dst->f[2] =
+   dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT];
+}
+
+static void
+micro_div(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst->f[0] = src0->f[0] / src1->f[0];
+   dst->f[1] = src0->f[1] / src1->f[1];
+   dst->f[2] = src0->f[2] / src1->f[2];
+   dst->f[3] = src0->f[3] / src1->f[3];
+}
+
+static void
+micro_udiv(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst->u[0] = src0->u[0] / src1->u[0];
+   dst->u[1] = src0->u[1] / src1->u[1];
+   dst->u[2] = src0->u[2] / src1->u[2];
+   dst->u[3] = src0->u[3] / src1->u[3];
+}
+
+static void
+micro_eq(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1,
+   const union tgsi_exec_channel *src2,
+   const union tgsi_exec_channel *src3 )
+{
+   dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
+   dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
+   dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
+   dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
+}
+
+static void
+micro_ieq(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1,
+   const union tgsi_exec_channel *src2,
+   const union tgsi_exec_channel *src3 )
+{
+   dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0];
+   dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1];
+   dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
+   dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
+}
+
+static void
+micro_exp2(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src)
+{
+   dst->f[0] = powf( 2.0f, src->f[0] );
+   dst->f[1] = powf( 2.0f, src->f[1] );
+   dst->f[2] = powf( 2.0f, src->f[2] );
+   dst->f[3] = powf( 2.0f, src->f[3] );
+}
+
+static void
+micro_f2it(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src )
+{
+   dst->i[0] = (int) src->f[0];
+   dst->i[1] = (int) src->f[1];
+   dst->i[2] = (int) src->f[2];
+   dst->i[3] = (int) src->f[3];
+}
+
+static void
+micro_f2ut(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src )
+{
+   dst->u[0] = (uint) src->f[0];
+   dst->u[1] = (uint) src->f[1];
+   dst->u[2] = (uint) src->f[2];
+   dst->u[3] = (uint) src->f[3];
+}
+
+static void
+micro_flr(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src )
+{
+   dst->f[0] = floorf( src->f[0] );
+   dst->f[1] = floorf( src->f[1] );
+   dst->f[2] = floorf( src->f[2] );
+   dst->f[3] = floorf( src->f[3] );
+}
+
+static void
+micro_frc(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src )
+{
+   dst->f[0] = src->f[0] - floorf( src->f[0] );
+   dst->f[1] = src->f[1] - floorf( src->f[1] );
+   dst->f[2] = src->f[2] - floorf( src->f[2] );
+   dst->f[3] = src->f[3] - floorf( src->f[3] );
+}
+
+static void
+micro_ge(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1,
+   const union tgsi_exec_channel *src2,
+   const union tgsi_exec_channel *src3 )
+{
+   dst->f[0] = src0->f[0] >= src1->f[0] ? src2->f[0] : src3->f[0];
+   dst->f[1] = src0->f[1] >= src1->f[1] ? src2->f[1] : src3->f[1];
+   dst->f[2] = src0->f[2] >= src1->f[2] ? src2->f[2] : src3->f[2];
+   dst->f[3] = src0->f[3] >= src1->f[3] ? src2->f[3] : src3->f[3];
+}
+
+static void
+micro_i2f(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src )
+{
+   dst->f[0] = (float) src->i[0];
+   dst->f[1] = (float) src->i[1];
+   dst->f[2] = (float) src->i[2];
+   dst->f[3] = (float) src->i[3];
+}
+
+static void
+micro_lg2(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src )
+{
+   dst->f[0] = logf( src->f[0] ) * 1.442695f;
+   dst->f[1] = logf( src->f[1] ) * 1.442695f;
+   dst->f[2] = logf( src->f[2] ) * 1.442695f;
+   dst->f[3] = logf( src->f[3] ) * 1.442695f;
+}
+
+static void
+micro_le(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1,
+   const union tgsi_exec_channel *src2,
+   const union tgsi_exec_channel *src3 )
+{
+   dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0];
+   dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1];
+   dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2];
+   dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3];
+}
+
+static void
+micro_lt(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1,
+   const union tgsi_exec_channel *src2,
+   const union tgsi_exec_channel *src3 )
+{
+   dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
+   dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
+   dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
+   dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
+}
+
+static void
+micro_ilt(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1,
+   const union tgsi_exec_channel *src2,
+   const union tgsi_exec_channel *src3 )
+{
+   dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0];
+   dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1];
+   dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
+   dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
+}
+
+static void
+micro_ult(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1,
+   const union tgsi_exec_channel *src2,
+   const union tgsi_exec_channel *src3 )
+{
+   dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0];
+   dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1];
+   dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
+   dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
+}
+
+static void
+micro_max(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
+   dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
+   dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
+   dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
+}
+
+static void
+micro_imax(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
+   dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
+   dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
+   dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
+}
+
+static void
+micro_umax(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
+   dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
+   dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
+   dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
+}
+
+static void
+micro_min(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
+   dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
+   dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
+   dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
+}
+
+static void
+micro_imin(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
+   dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
+   dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
+   dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
+}
+
+static void
+micro_umin(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
+   dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
+   dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
+   dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
+}
+
+static void
+micro_umod(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst->u[0] = src0->u[0] % src1->u[0];
+   dst->u[1] = src0->u[1] % src1->u[1];
+   dst->u[2] = src0->u[2] % src1->u[2];
+   dst->u[3] = src0->u[3] % src1->u[3];
+}
+
+static void
+micro_mul(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst->f[0] = src0->f[0] * src1->f[0];
+   dst->f[1] = src0->f[1] * src1->f[1];
+   dst->f[2] = src0->f[2] * src1->f[2];
+   dst->f[3] = src0->f[3] * src1->f[3];
+}
+
+static void
+micro_imul(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst->i[0] = src0->i[0] * src1->i[0];
+   dst->i[1] = src0->i[1] * src1->i[1];
+   dst->i[2] = src0->i[2] * src1->i[2];
+   dst->i[3] = src0->i[3] * src1->i[3];
+}
+
+static void
+micro_imul64(
+   union tgsi_exec_channel *dst0,
+   union tgsi_exec_channel *dst1,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst1->i[0] = src0->i[0] * src1->i[0];
+   dst1->i[1] = src0->i[1] * src1->i[1];
+   dst1->i[2] = src0->i[2] * src1->i[2];
+   dst1->i[3] = src0->i[3] * src1->i[3];
+   dst0->i[0] = 0;
+   dst0->i[1] = 0;
+   dst0->i[2] = 0;
+   dst0->i[3] = 0;
+}
+
+static void
+micro_umul64(
+   union tgsi_exec_channel *dst0,
+   union tgsi_exec_channel *dst1,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst1->u[0] = src0->u[0] * src1->u[0];
+   dst1->u[1] = src0->u[1] * src1->u[1];
+   dst1->u[2] = src0->u[2] * src1->u[2];
+   dst1->u[3] = src0->u[3] * src1->u[3];
+   dst0->u[0] = 0;
+   dst0->u[1] = 0;
+   dst0->u[2] = 0;
+   dst0->u[3] = 0;
+}
+
+static void
+micro_movc(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1,
+   const union tgsi_exec_channel *src2 )
+{
+   dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
+   dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
+   dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
+   dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
+}
+
+static void
+micro_neg(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src )
+{
+   dst->f[0] = -src->f[0];
+   dst->f[1] = -src->f[1];
+   dst->f[2] = -src->f[2];
+   dst->f[3] = -src->f[3];
+}
+
+static void
+micro_ineg(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src )
+{
+   dst->i[0] = -src->i[0];
+   dst->i[1] = -src->i[1];
+   dst->i[2] = -src->i[2];
+   dst->i[3] = -src->i[3];
+}
+
+static void
+micro_not(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src )
+{
+   dst->u[0] = ~src->u[0];
+   dst->u[1] = ~src->u[1];
+   dst->u[2] = ~src->u[2];
+   dst->u[3] = ~src->u[3];
+}
+
+static void
+micro_or(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst->u[0] = src0->u[0] | src1->u[0];
+   dst->u[1] = src0->u[1] | src1->u[1];
+   dst->u[2] = src0->u[2] | src1->u[2];
+   dst->u[3] = src0->u[3] | src1->u[3];
+}
+
+static void
+micro_pow(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst->f[0] = powf( src0->f[0], src1->f[0] );
+   dst->f[1] = powf( src0->f[1], src1->f[1] );
+   dst->f[2] = powf( src0->f[2], src1->f[2] );
+   dst->f[3] = powf( src0->f[3], src1->f[3] );
+}
+
+static void
+micro_rnd(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src )
+{
+   dst->f[0] = floorf( src->f[0] + 0.5f );
+   dst->f[1] = floorf( src->f[1] + 0.5f );
+   dst->f[2] = floorf( src->f[2] + 0.5f );
+   dst->f[3] = floorf( src->f[3] + 0.5f );
+}
+
+static void
+micro_shl(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst->i[0] = src0->i[0] << src1->i[0];
+   dst->i[1] = src0->i[1] << src1->i[1];
+   dst->i[2] = src0->i[2] << src1->i[2];
+   dst->i[3] = src0->i[3] << src1->i[3];
+}
+
+static void
+micro_ishr(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst->i[0] = src0->i[0] >> src1->i[0];
+   dst->i[1] = src0->i[1] >> src1->i[1];
+   dst->i[2] = src0->i[2] >> src1->i[2];
+   dst->i[3] = src0->i[3] >> src1->i[3];
+}
+
+static void
+micro_trunc(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0 )
+{
+   dst->f[0] = (float) (int) src0->f[0];
+   dst->f[1] = (float) (int) src0->f[1];
+   dst->f[2] = (float) (int) src0->f[2];
+   dst->f[3] = (float) (int) src0->f[3];
+}
+
+static void
+micro_ushr(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst->u[0] = src0->u[0] >> src1->u[0];
+   dst->u[1] = src0->u[1] >> src1->u[1];
+   dst->u[2] = src0->u[2] >> src1->u[2];
+   dst->u[3] = src0->u[3] >> src1->u[3];
+}
+
+static void
+micro_sin(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src )
+{
+   dst->f[0] = sinf( src->f[0] );
+   dst->f[1] = sinf( src->f[1] );
+   dst->f[2] = sinf( src->f[2] );
+   dst->f[3] = sinf( src->f[3] );
+}
+
+static void
+micro_sqrt( union tgsi_exec_channel *dst,
+            const union tgsi_exec_channel *src )
+{
+   dst->f[0] = sqrtf( src->f[0] );
+   dst->f[1] = sqrtf( src->f[1] );
+   dst->f[2] = sqrtf( src->f[2] );
+   dst->f[3] = sqrtf( src->f[3] );
+}
+
+static void
+micro_sub(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst->f[0] = src0->f[0] - src1->f[0];
+   dst->f[1] = src0->f[1] - src1->f[1];
+   dst->f[2] = src0->f[2] - src1->f[2];
+   dst->f[3] = src0->f[3] - src1->f[3];
+}
+
+static void
+micro_u2f(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src )
+{
+   dst->f[0] = (float) src->u[0];
+   dst->f[1] = (float) src->u[1];
+   dst->f[2] = (float) src->u[2];
+   dst->f[3] = (float) src->u[3];
+}
+
+static void
+micro_xor(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1 )
+{
+   dst->u[0] = src0->u[0] ^ src1->u[0];
+   dst->u[1] = src0->u[1] ^ src1->u[1];
+   dst->u[2] = src0->u[2] ^ src1->u[2];
+   dst->u[3] = src0->u[3] ^ src1->u[3];
+}
+
+static void
+fetch_src_file_channel(
+   const struct tgsi_exec_machine *mach,
+   const uint file,
+   const uint swizzle,
+   const union tgsi_exec_channel *index,
+   union tgsi_exec_channel *chan )
+{
+   switch( swizzle ) {
+   case TGSI_EXTSWIZZLE_X:
+   case TGSI_EXTSWIZZLE_Y:
+   case TGSI_EXTSWIZZLE_Z:
+   case TGSI_EXTSWIZZLE_W:
+      switch( file ) {
+      case TGSI_FILE_CONSTANT:
+         chan->f[0] = mach->Consts[index->i[0]][swizzle];
+         chan->f[1] = mach->Consts[index->i[1]][swizzle];
+         chan->f[2] = mach->Consts[index->i[2]][swizzle];
+         chan->f[3] = mach->Consts[index->i[3]][swizzle];
+         break;
+
+      case TGSI_FILE_INPUT:
+         chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
+         chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
+         chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
+         chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
+         break;
+
+      case TGSI_FILE_TEMPORARY:
+         assert(index->i[0] < TGSI_EXEC_NUM_TEMPS);
+         chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
+         chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
+         chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
+         chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
+         break;
+
+      case TGSI_FILE_IMMEDIATE:
+         assert( index->i[0] < (int) mach->ImmLimit );
+         chan->f[0] = mach->Imms[index->i[0]][swizzle];
+         assert( index->i[1] < (int) mach->ImmLimit );
+         chan->f[1] = mach->Imms[index->i[1]][swizzle];
+         assert( index->i[2] < (int) mach->ImmLimit );
+         chan->f[2] = mach->Imms[index->i[2]][swizzle];
+         assert( index->i[3] < (int) mach->ImmLimit );
+         chan->f[3] = mach->Imms[index->i[3]][swizzle];
+         break;
+
+      case TGSI_FILE_ADDRESS:
+         chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
+         chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
+         chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
+         chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
+         break;
+
+      case TGSI_FILE_OUTPUT:
+         /* vertex/fragment output vars can be read too */
+         chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
+         chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
+         chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
+         chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
+         break;
+
+      default:
+         assert( 0 );
+      }
+      break;
+
+   case TGSI_EXTSWIZZLE_ZERO:
+      *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
+      break;
+
+   case TGSI_EXTSWIZZLE_ONE:
+      *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
+      break;
+
+   default:
+      assert( 0 );
+   }
+}
+
+static void
+fetch_source(
+   const struct tgsi_exec_machine *mach,
+   union tgsi_exec_channel *chan,
+   const struct tgsi_full_src_register *reg,
+   const uint chan_index )
+{
+   union tgsi_exec_channel index;
+   uint swizzle;
+
+   index.i[0] =
+   index.i[1] =
+   index.i[2] =
+   index.i[3] = reg->SrcRegister.Index;
+
+   if (reg->SrcRegister.Indirect) {
+      union tgsi_exec_channel index2;
+      union tgsi_exec_channel indir_index;
+
+      index2.i[0] =
+      index2.i[1] =
+      index2.i[2] =
+      index2.i[3] = reg->SrcRegisterInd.Index;
+
+      swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterInd, CHAN_X );
+      fetch_src_file_channel(
+         mach,
+         reg->SrcRegisterInd.File,
+         swizzle,
+         &index2,
+         &indir_index );
+
+      index.i[0] += indir_index.i[0];
+      index.i[1] += indir_index.i[1];
+      index.i[2] += indir_index.i[2];
+      index.i[3] += indir_index.i[3];
+   }
+
+   if( reg->SrcRegister.Dimension ) {
+      switch( reg->SrcRegister.File ) {
+      case TGSI_FILE_INPUT:
+         index.i[0] *= 17;
+         index.i[1] *= 17;
+         index.i[2] *= 17;
+         index.i[3] *= 17;
+         break;
+      case TGSI_FILE_CONSTANT:
+         index.i[0] *= 4096;
+         index.i[1] *= 4096;
+         index.i[2] *= 4096;
+         index.i[3] *= 4096;
+         break;
+      default:
+         assert( 0 );
+      }
+
+      index.i[0] += reg->SrcRegisterDim.Index;
+      index.i[1] += reg->SrcRegisterDim.Index;
+      index.i[2] += reg->SrcRegisterDim.Index;
+      index.i[3] += reg->SrcRegisterDim.Index;
+
+      if (reg->SrcRegisterDim.Indirect) {
+         union tgsi_exec_channel index2;
+         union tgsi_exec_channel indir_index;
+
+         index2.i[0] =
+         index2.i[1] =
+         index2.i[2] =
+         index2.i[3] = reg->SrcRegisterDimInd.Index;
+
+         swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
+         fetch_src_file_channel(
+            mach,
+            reg->SrcRegisterDimInd.File,
+            swizzle,
+            &index2,
+            &indir_index );
+
+         index.i[0] += indir_index.i[0];
+         index.i[1] += indir_index.i[1];
+         index.i[2] += indir_index.i[2];
+         index.i[3] += indir_index.i[3];
+      }
+   }
+
+   swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
+   fetch_src_file_channel(
+      mach,
+      reg->SrcRegister.File,
+      swizzle,
+      &index,
+      chan );
+
+   switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
+   case TGSI_UTIL_SIGN_CLEAR:
+      micro_abs( chan, chan );
+      break;
+
+   case TGSI_UTIL_SIGN_SET:
+      micro_abs( chan, chan );
+      micro_neg( chan, chan );
+      break;
+
+   case TGSI_UTIL_SIGN_TOGGLE:
+      micro_neg( chan, chan );
+      break;
+
+   case TGSI_UTIL_SIGN_KEEP:
+      break;
+   }
+
+   if (reg->SrcRegisterExtMod.Complement) {
+      micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan );
+   }
+}
+
+static void
+store_dest(
+   struct tgsi_exec_machine *mach,
+   const union tgsi_exec_channel *chan,
+   const struct tgsi_full_dst_register *reg,
+   const struct tgsi_full_instruction *inst,
+   uint chan_index )
+{
+   union tgsi_exec_channel *dst;
+
+   switch( reg->DstRegister.File ) {
+   case TGSI_FILE_NULL:
+      return;
+
+   case TGSI_FILE_OUTPUT:
+      dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
+                           + reg->DstRegister.Index].xyzw[chan_index];
+      break;
+
+   case TGSI_FILE_TEMPORARY:
+      assert(reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS);
+      dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
+      break;
+
+   case TGSI_FILE_ADDRESS:
+      dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
+      break;
+
+   default:
+      assert( 0 );
+      return;
+   }
+
+   switch (inst->Instruction.Saturate)
+   {
+   case TGSI_SAT_NONE:
+      if (mach->ExecMask & 0x1)
+         dst->i[0] = chan->i[0];
+      if (mach->ExecMask & 0x2)
+         dst->i[1] = chan->i[1];
+      if (mach->ExecMask & 0x4)
+         dst->i[2] = chan->i[2];
+      if (mach->ExecMask & 0x8)
+         dst->i[3] = chan->i[3];
+      break;
+
+   case TGSI_SAT_ZERO_ONE:
+      /* XXX need to obey ExecMask here */
+      micro_max(dst, chan, &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
+      micro_min(dst, dst, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
+      break;
+
+   case TGSI_SAT_MINUS_PLUS_ONE:
+      assert( 0 );
+      break;
+
+   default:
+      assert( 0 );
+   }
+}
+
+#define FETCH(VAL,INDEX,CHAN)\
+    fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
+
+#define STORE(VAL,INDEX,CHAN)\
+    store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
+
+
+/**
+ * Execute ARB-style KIL which is predicated by a src register.
+ * Kill fragment if any of the four values is less than zero.
+ */
+static void
+exec_kilp(struct tgsi_exec_machine *mach,
+          const struct tgsi_full_instruction *inst)
+{
+   uint uniquemask;
+   uint chan_index;
+   uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
+   union tgsi_exec_channel r[1];
+
+   /* This mask stores component bits that were already tested. Note that
+    * we test if the value is less than zero, so 1.0 and 0.0 need not to be
+    * tested. */
+   uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
+
+   for (chan_index = 0; chan_index < 4; chan_index++)
+   {
+      uint swizzle;
+      uint i;
+
+      /* unswizzle channel */
+      swizzle = tgsi_util_get_full_src_register_extswizzle (
+                        &inst->FullSrcRegisters[0],
+                        chan_index);
+
+      /* check if the component has not been already tested */
+      if (uniquemask & (1 << swizzle))
+         continue;
+      uniquemask |= 1 << swizzle;
+
+      FETCH(&r[0], 0, chan_index);
+      for (i = 0; i < 4; i++)
+         if (r[0].f[i] < 0.0f)
+            kilmask |= 1 << i;
+   }
+
+   mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
+}
+
+
+/*
+ * Fetch a texel using STR texture coordinates.
+ */
+static void
+fetch_texel( struct tgsi_sampler *sampler,
+             const union tgsi_exec_channel *s,
+             const union tgsi_exec_channel *t,
+             const union tgsi_exec_channel *p,
+             float lodbias,  /* XXX should be float[4] */
+             union tgsi_exec_channel *r,
+             union tgsi_exec_channel *g,
+             union tgsi_exec_channel *b,
+             union tgsi_exec_channel *a )
+{
+   uint j;
+   float rgba[NUM_CHANNELS][QUAD_SIZE];
+
+   sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
+
+   for (j = 0; j < 4; j++) {
+      r->f[j] = rgba[0][j];
+      g->f[j] = rgba[1][j];
+      b->f[j] = rgba[2][j];
+      a->f[j] = rgba[3][j];
+   }
+}
+
+
+static void
+exec_tex(struct tgsi_exec_machine *mach,
+         const struct tgsi_full_instruction *inst,
+         boolean biasLod,
+         boolean projected)
+{
+   const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
+   union tgsi_exec_channel r[8];
+   uint chan_index;
+   float lodBias;
+
+   /*   debug_printf("Sampler %u unit %u\n", sampler, unit); */
+
+   switch (inst->InstructionExtTexture.Texture) {
+   case TGSI_TEXTURE_1D:
+
+      FETCH(&r[0], 0, CHAN_X);
+
+      if (projected) {
+         FETCH(&r[1], 0, CHAN_W);
+         micro_div( &r[0], &r[0], &r[1] );
+      }
+
+      if (biasLod) {
+         FETCH(&r[1], 0, CHAN_W);
+         lodBias = r[2].f[0];
+      }
+      else
+         lodBias = 0.0;
+
+      fetch_texel(&mach->Samplers[unit],
+                  &r[0], NULL, NULL, lodBias,  /* S, T, P, BIAS */
+                  &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
+      break;
+
+   case TGSI_TEXTURE_2D:
+   case TGSI_TEXTURE_RECT:
+
+      FETCH(&r[0], 0, CHAN_X);
+      FETCH(&r[1], 0, CHAN_Y);
+      FETCH(&r[2], 0, CHAN_Z);
+
+      if (projected) {
+         FETCH(&r[3], 0, CHAN_W);
+         micro_div( &r[0], &r[0], &r[3] );
+         micro_div( &r[1], &r[1], &r[3] );
+         micro_div( &r[2], &r[2], &r[3] );
+      }
+
+      if (biasLod) {
+         FETCH(&r[3], 0, CHAN_W);
+         lodBias = r[3].f[0];
+      }
+      else
+         lodBias = 0.0;
+
+      fetch_texel(&mach->Samplers[unit],
+                  &r[0], &r[1], &r[2], lodBias,  /* inputs */
+                  &r[0], &r[1], &r[2], &r[3]);  /* outputs */
+      break;
+
+   case TGSI_TEXTURE_3D:
+   case TGSI_TEXTURE_CUBE:
+
+      FETCH(&r[0], 0, CHAN_X);
+      FETCH(&r[1], 0, CHAN_Y);
+      FETCH(&r[2], 0, CHAN_Z);
+
+      if (projected) {
+         FETCH(&r[3], 0, CHAN_W);
+         micro_div( &r[0], &r[0], &r[3] );
+         micro_div( &r[1], &r[1], &r[3] );
+         micro_div( &r[2], &r[2], &r[3] );
+      }
+
+      if (biasLod) {
+         FETCH(&r[3], 0, CHAN_W);
+         lodBias = r[3].f[0];
+      }
+      else
+         lodBias = 0.0;
+
+      fetch_texel(&mach->Samplers[unit],
+                  &r[0], &r[1], &r[2], lodBias,
+                  &r[0], &r[1], &r[2], &r[3]);
+      break;
+
+   default:
+      assert (0);
+   }
+
+   FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+      STORE( &r[chan_index], 0, chan_index );
+   }
+}
+
+
+/**
+ * Evaluate a constant-valued coefficient at the position of the
+ * current quad.
+ */
+static void
+eval_constant_coef(
+   struct tgsi_exec_machine *mach,
+   unsigned attrib,
+   unsigned chan )
+{
+   unsigned i;
+
+   for( i = 0; i < QUAD_SIZE; i++ ) {
+      mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
+   }
+}
+
+/**
+ * Evaluate a linear-valued coefficient at the position of the
+ * current quad.
+ */
+static void
+eval_linear_coef(
+   struct tgsi_exec_machine *mach,
+   unsigned attrib,
+   unsigned chan )
+{
+   const float x = mach->QuadPos.xyzw[0].f[0];
+   const float y = mach->QuadPos.xyzw[1].f[0];
+   const float dadx = mach->InterpCoefs[attrib].dadx[chan];
+   const float dady = mach->InterpCoefs[attrib].dady[chan];
+   const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
+   mach->Inputs[attrib].xyzw[chan].f[0] = a0;
+   mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
+   mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
+   mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
+}
+
+/**
+ * Evaluate a perspective-valued coefficient at the position of the
+ * current quad.
+ */
+static void
+eval_perspective_coef(
+   struct tgsi_exec_machine *mach,
+   unsigned attrib,
+   unsigned chan )
+{
+   const float x = mach->QuadPos.xyzw[0].f[0];
+   const float y = mach->QuadPos.xyzw[1].f[0];
+   const float dadx = mach->InterpCoefs[attrib].dadx[chan];
+   const float dady = mach->InterpCoefs[attrib].dady[chan];
+   const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
+   const float *w = mach->QuadPos.xyzw[3].f;
+   /* divide by W here */
+   mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
+   mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
+   mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
+   mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
+}
+
+
+typedef void (* eval_coef_func)(
+   struct tgsi_exec_machine *mach,
+   unsigned attrib,
+   unsigned chan );
+
+static void
+exec_declaration(
+   struct tgsi_exec_machine *mach,
+   const struct tgsi_full_declaration *decl )
+{
+   if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
+      if( decl->Declaration.File == TGSI_FILE_INPUT ) {
+         unsigned first, last, mask;
+         eval_coef_func eval;
+
+         first = decl->DeclarationRange.First;
+         last = decl->DeclarationRange.Last;
+         mask = decl->Declaration.UsageMask;
+
+         switch( decl->Declaration.Interpolate ) {
+         case TGSI_INTERPOLATE_CONSTANT:
+            eval = eval_constant_coef;
+            break;
+
+         case TGSI_INTERPOLATE_LINEAR:
+            eval = eval_linear_coef;
+            break;
+
+         case TGSI_INTERPOLATE_PERSPECTIVE:
+            eval = eval_perspective_coef;
+            break;
+
+         default:
+            assert( 0 );
+         }
+
+         if( mask == TGSI_WRITEMASK_XYZW ) {
+            unsigned i, j;
+
+            for( i = first; i <= last; i++ ) {
+               for( j = 0; j < NUM_CHANNELS; j++ ) {
+                  eval( mach, i, j );
+               }
+            }
+         }
+         else {
+            unsigned i, j;
+
+            for( j = 0; j < NUM_CHANNELS; j++ ) {
+               if( mask & (1 << j) ) {
+                  for( i = first; i <= last; i++ ) {
+                     eval( mach, i, j );
+                  }
+               }
+            }
+         }
+      }
+   }
+}
+
+static void
+exec_instruction(
+   struct tgsi_exec_machine *mach,
+   const struct tgsi_full_instruction *inst,
+   int *pc )
+{
+   uint chan_index;
+   union tgsi_exec_channel r[8];
+
+   (*pc)++;
+
+   switch (inst->Instruction.Opcode) {
+   case TGSI_OPCODE_ARL:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+	 FETCH( &r[0], 0, chan_index );
+	 micro_f2it( &r[0], &r[0] );
+	 STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_MOV:
+   case TGSI_OPCODE_SWZ:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_LIT:
+      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
+	 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
+      }
+
+      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+	 FETCH( &r[0], 0, CHAN_X );
+	 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
+	    micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
+	    STORE( &r[0], 0, CHAN_Y );
+	 }
+
+	 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+	    FETCH( &r[1], 0, CHAN_Y );
+	    micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
+
+	    FETCH( &r[2], 0, CHAN_W );
+	    micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
+	    micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
+	    micro_pow( &r[1], &r[1], &r[2] );
+	    micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
+	    STORE( &r[0], 0, CHAN_Z );
+	 }
+      }
+
+      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
+	 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
+      }
+      break;
+
+   case TGSI_OPCODE_RCP:
+   /* TGSI_OPCODE_RECIP */
+      FETCH( &r[0], 0, CHAN_X );
+      micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+	 STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_RSQ:
+   /* TGSI_OPCODE_RECIPSQRT */
+      FETCH( &r[0], 0, CHAN_X );
+      micro_sqrt( &r[0], &r[0] );
+      micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+	 STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_EXP:
+      FETCH( &r[0], 0, CHAN_X );
+      micro_flr( &r[1], &r[0] );  /* r1 = floor(r0) */
+      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
+         micro_exp2( &r[2], &r[1] );       /* r2 = 2 ^ r1 */
+	 STORE( &r[2], 0, CHAN_X );        /* store r2 */
+      }
+      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
+         micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
+	 STORE( &r[2], 0, CHAN_Y );        /* store r2 */
+      }
+      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+         micro_exp2( &r[2], &r[0] );       /* r2 = 2 ^ r0 */
+	 STORE( &r[2], 0, CHAN_Z );        /* store r2 */
+      }
+      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
+	 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
+      }
+      break;
+
+   case TGSI_OPCODE_LOG:
+      FETCH( &r[0], 0, CHAN_X );
+      micro_abs( &r[2], &r[0] );  /* r2 = abs(r0) */
+      micro_lg2( &r[1], &r[2] );  /* r1 = lg2(r2) */
+      micro_flr( &r[0], &r[1] );  /* r0 = floor(r1) */
+      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
+	 STORE( &r[0], 0, CHAN_X );
+      }
+      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
+         micro_exp2( &r[0], &r[0] );       /* r0 = 2 ^ r0 */
+         micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
+	 STORE( &r[0], 0, CHAN_Y );
+      }
+      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+	 STORE( &r[1], 0, CHAN_Z );
+      }
+      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
+	 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
+      }
+      break;
+
+   case TGSI_OPCODE_MUL:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
+      {
+         FETCH(&r[0], 0, chan_index);
+         FETCH(&r[1], 1, chan_index);
+
+         micro_mul( &r[0], &r[0], &r[1] );
+
+         STORE(&r[0], 0, chan_index);
+      }
+      break;
+
+   case TGSI_OPCODE_ADD:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         FETCH( &r[1], 1, chan_index );
+         micro_add( &r[0], &r[0], &r[1] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_DP3:
+   /* TGSI_OPCODE_DOT3 */
+      FETCH( &r[0], 0, CHAN_X );
+      FETCH( &r[1], 1, CHAN_X );
+      micro_mul( &r[0], &r[0], &r[1] );
+
+      FETCH( &r[1], 0, CHAN_Y );
+      FETCH( &r[2], 1, CHAN_Y );
+      micro_mul( &r[1], &r[1], &r[2] );
+      micro_add( &r[0], &r[0], &r[1] );
+
+      FETCH( &r[1], 0, CHAN_Z );
+      FETCH( &r[2], 1, CHAN_Z );
+      micro_mul( &r[1], &r[1], &r[2] );
+      micro_add( &r[0], &r[0], &r[1] );
+
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+    case TGSI_OPCODE_DP4:
+    /* TGSI_OPCODE_DOT4 */
+       FETCH(&r[0], 0, CHAN_X);
+       FETCH(&r[1], 1, CHAN_X);
+
+       micro_mul( &r[0], &r[0], &r[1] );
+
+       FETCH(&r[1], 0, CHAN_Y);
+       FETCH(&r[2], 1, CHAN_Y);
+
+       micro_mul( &r[1], &r[1], &r[2] );
+       micro_add( &r[0], &r[0], &r[1] );
+
+       FETCH(&r[1], 0, CHAN_Z);
+       FETCH(&r[2], 1, CHAN_Z);
+
+       micro_mul( &r[1], &r[1], &r[2] );
+       micro_add( &r[0], &r[0], &r[1] );
+
+       FETCH(&r[1], 0, CHAN_W);
+       FETCH(&r[2], 1, CHAN_W);
+
+       micro_mul( &r[1], &r[1], &r[2] );
+       micro_add( &r[0], &r[0], &r[1] );
+
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+	 STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_DST:
+      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
+	 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
+      }
+
+      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
+	 FETCH( &r[0], 0, CHAN_Y );
+	 FETCH( &r[1], 1, CHAN_Y);
+	 micro_mul( &r[0], &r[0], &r[1] );
+	 STORE( &r[0], 0, CHAN_Y );
+      }
+
+      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+	 FETCH( &r[0], 0, CHAN_Z );
+	 STORE( &r[0], 0, CHAN_Z );
+      }
+
+      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
+	 FETCH( &r[0], 1, CHAN_W );
+	 STORE( &r[0], 0, CHAN_W );
+      }
+      break;
+
+   case TGSI_OPCODE_MIN:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH(&r[0], 0, chan_index);
+         FETCH(&r[1], 1, chan_index);
+
+         /* XXX use micro_min()?? */
+         micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] );
+
+         STORE(&r[0], 0, chan_index);
+      }
+      break;
+
+   case TGSI_OPCODE_MAX:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH(&r[0], 0, chan_index);
+         FETCH(&r[1], 1, chan_index);
+
+         /* XXX use micro_max()?? */
+         micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] );
+
+         STORE(&r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_SLT:
+   /* TGSI_OPCODE_SETLT */
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         FETCH( &r[1], 1, chan_index );
+         micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_SGE:
+   /* TGSI_OPCODE_SETGE */
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         FETCH( &r[1], 1, chan_index );
+         micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_MAD:
+   /* TGSI_OPCODE_MADD */
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         FETCH( &r[1], 1, chan_index );
+         micro_mul( &r[0], &r[0], &r[1] );
+         FETCH( &r[1], 2, chan_index );
+         micro_add( &r[0], &r[0], &r[1] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_SUB:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH(&r[0], 0, chan_index);
+         FETCH(&r[1], 1, chan_index);
+
+         micro_sub( &r[0], &r[0], &r[1] );
+
+         STORE(&r[0], 0, chan_index);
+      }
+      break;
+
+   case TGSI_OPCODE_LERP:
+   /* TGSI_OPCODE_LRP */
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH(&r[0], 0, chan_index);
+         FETCH(&r[1], 1, chan_index);
+         FETCH(&r[2], 2, chan_index);
+
+         micro_sub( &r[1], &r[1], &r[2] );
+         micro_mul( &r[0], &r[0], &r[1] );
+         micro_add( &r[0], &r[0], &r[2] );
+
+         STORE(&r[0], 0, chan_index);
+      }
+      break;
+
+   case TGSI_OPCODE_CND:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_CND0:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_DOT2ADD:
+      /* TGSI_OPCODE_DP2A */
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_INDEX:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_NEGATE:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_FRAC:
+   /* TGSI_OPCODE_FRC */
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         micro_frc( &r[0], &r[0] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_CLAMP:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_FLOOR:
+   /* TGSI_OPCODE_FLR */
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         micro_flr( &r[0], &r[0] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_ROUND:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         micro_rnd( &r[0], &r[0] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_EXPBASE2:
+    /* TGSI_OPCODE_EX2 */
+      FETCH(&r[0], 0, CHAN_X);
+
+      micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
+
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+	 STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_LOGBASE2:
+   /* TGSI_OPCODE_LG2 */
+      FETCH( &r[0], 0, CHAN_X );
+      micro_lg2( &r[0], &r[0] );
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_POWER:
+      /* TGSI_OPCODE_POW */
+      FETCH(&r[0], 0, CHAN_X);
+      FETCH(&r[1], 1, CHAN_X);
+
+      micro_pow( &r[0], &r[0], &r[1] );
+
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+	 STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_CROSSPRODUCT:
+      /* TGSI_OPCODE_XPD */
+      FETCH(&r[0], 0, CHAN_Y);
+      FETCH(&r[1], 1, CHAN_Z);
+
+      micro_mul( &r[2], &r[0], &r[1] );
+
+      FETCH(&r[3], 0, CHAN_Z);
+      FETCH(&r[4], 1, CHAN_Y);
+
+      micro_mul( &r[5], &r[3], &r[4] );
+      micro_sub( &r[2], &r[2], &r[5] );
+
+      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
+         STORE( &r[2], 0, CHAN_X );
+      }
+
+      FETCH(&r[2], 1, CHAN_X);
+
+      micro_mul( &r[3], &r[3], &r[2] );
+
+      FETCH(&r[5], 0, CHAN_X);
+
+      micro_mul( &r[1], &r[1], &r[5] );
+      micro_sub( &r[3], &r[3], &r[1] );
+
+      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
+         STORE( &r[3], 0, CHAN_Y );
+      }
+
+      micro_mul( &r[5], &r[5], &r[4] );
+      micro_mul( &r[0], &r[0], &r[2] );
+      micro_sub( &r[5], &r[5], &r[0] );
+
+      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+         STORE( &r[5], 0, CHAN_Z );
+      }
+
+      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
+         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
+      }
+      break;
+
+    case TGSI_OPCODE_MULTIPLYMATRIX:
+       assert (0);
+       break;
+
+    case TGSI_OPCODE_ABS:
+       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+          FETCH(&r[0], 0, chan_index);
+
+          micro_abs( &r[0], &r[0] );
+
+          STORE(&r[0], 0, chan_index);
+       }
+       break;
+
+   case TGSI_OPCODE_RCC:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_DPH:
+      FETCH(&r[0], 0, CHAN_X);
+      FETCH(&r[1], 1, CHAN_X);
+
+      micro_mul( &r[0], &r[0], &r[1] );
+
+      FETCH(&r[1], 0, CHAN_Y);
+      FETCH(&r[2], 1, CHAN_Y);
+
+      micro_mul( &r[1], &r[1], &r[2] );
+      micro_add( &r[0], &r[0], &r[1] );
+
+      FETCH(&r[1], 0, CHAN_Z);
+      FETCH(&r[2], 1, CHAN_Z);
+
+      micro_mul( &r[1], &r[1], &r[2] );
+      micro_add( &r[0], &r[0], &r[1] );
+
+      FETCH(&r[1], 1, CHAN_W);
+
+      micro_add( &r[0], &r[0], &r[1] );
+
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+	 STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_COS:
+      FETCH(&r[0], 0, CHAN_X);
+
+      micro_cos( &r[0], &r[0] );
+
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+	 STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_DDX:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         micro_ddx( &r[0], &r[0] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_DDY:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         micro_ddy( &r[0], &r[0] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_KILP:
+      exec_kilp (mach, inst);
+      break;
+
+   case TGSI_OPCODE_KIL:
+      /* for enabled ExecMask bits, set the killed bit */
+      mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= mach->ExecMask;
+      break;
+
+   case TGSI_OPCODE_PK2H:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_PK2US:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_PK4B:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_PK4UB:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_RFL:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_SEQ:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         FETCH( &r[1], 1, chan_index );
+         micro_eq( &r[0], &r[0], &r[1],
+                   &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C],
+                   &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_SFL:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_SGT:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         FETCH( &r[1], 1, chan_index );
+         micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_SIN:
+      FETCH( &r[0], 0, CHAN_X );
+      micro_sin( &r[0], &r[0] );
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_SLE:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         FETCH( &r[1], 1, chan_index );
+         micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_SNE:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         FETCH( &r[1], 1, chan_index );
+         micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_STR:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_TEX:
+      /* simple texture lookup */
+      /* src[0] = texcoord */
+      /* src[1] = sampler unit */
+      exec_tex(mach, inst, FALSE, FALSE);
+      break;
+
+   case TGSI_OPCODE_TXB:
+      /* Texture lookup with lod bias */
+      /* src[0] = texcoord (src[0].w = LOD bias) */
+      /* src[1] = sampler unit */
+      exec_tex(mach, inst, TRUE, FALSE);
+      break;
+
+   case TGSI_OPCODE_TXD:
+      /* Texture lookup with explict partial derivatives */
+      /* src[0] = texcoord */
+      /* src[1] = d[strq]/dx */
+      /* src[2] = d[strq]/dy */
+      /* src[3] = sampler unit */
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_TXL:
+      /* Texture lookup with explit LOD */
+      /* src[0] = texcoord (src[0].w = LOD) */
+      /* src[1] = sampler unit */
+      exec_tex(mach, inst, TRUE, FALSE);
+      break;
+
+   case TGSI_OPCODE_TXP:
+      /* Texture lookup with projection */
+      /* src[0] = texcoord (src[0].w = projection) */
+      /* src[1] = sampler unit */
+      exec_tex(mach, inst, FALSE, TRUE);
+      break;
+
+   case TGSI_OPCODE_UP2H:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_UP2US:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_UP4B:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_UP4UB:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_X2D:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_ARA:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_ARR:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_BRA:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_CAL:
+      /* skip the call if no execution channels are enabled */
+      if (mach->ExecMask) {
+         /* do the call */
+
+         /* push the Cond, Loop, Cont stacks */
+         assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
+         mach->CondStack[mach->CondStackTop++] = mach->CondMask;
+         assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
+         mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
+         assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
+         mach->ContStack[mach->ContStackTop++] = mach->ContMask;
+
+         assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
+         mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
+
+         /* note that PC was already incremented above */
+         mach->CallStack[mach->CallStackTop++] = *pc;
+         *pc = inst->InstructionExtLabel.Label;
+      }
+      break;
+
+   case TGSI_OPCODE_RET:
+      mach->FuncMask &= ~mach->ExecMask;
+      UPDATE_EXEC_MASK(mach);
+
+      if (mach->ExecMask == 0x0) {
+         /* really return now (otherwise, keep executing */
+
+         if (mach->CallStackTop == 0) {
+            /* returning from main() */
+            *pc = -1;
+            return;
+         }
+         *pc = mach->CallStack[--mach->CallStackTop];
+
+         /* pop the Cond, Loop, Cont stacks */
+         assert(mach->CondStackTop > 0);
+         mach->CondMask = mach->CondStack[--mach->CondStackTop];
+         assert(mach->LoopStackTop > 0);
+         mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
+         assert(mach->ContStackTop > 0);
+         mach->ContMask = mach->ContStack[--mach->ContStackTop];
+         assert(mach->FuncStackTop > 0);
+         mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
+
+         UPDATE_EXEC_MASK(mach);
+      }
+      break;
+
+   case TGSI_OPCODE_SSG:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_CMP:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH(&r[0], 0, chan_index);
+         FETCH(&r[1], 1, chan_index);
+         FETCH(&r[2], 2, chan_index);
+
+         micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] );
+
+         STORE(&r[0], 0, chan_index);
+      }
+      break;
+
+   case TGSI_OPCODE_SCS:
+      if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
+         FETCH( &r[0], 0, CHAN_X );
+      }
+      if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
+         micro_cos( &r[1], &r[0] );
+         STORE( &r[1], 0, CHAN_X );
+      }
+      if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
+         micro_sin( &r[1], &r[0] );
+         STORE( &r[1], 0, CHAN_Y );
+      }
+      if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
+         STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
+      }
+      if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
+         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
+      }
+      break;
+
+   case TGSI_OPCODE_NRM:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_DIV:
+      assert( 0 );
+      break;
+
+   case TGSI_OPCODE_DP2:
+      FETCH( &r[0], 0, CHAN_X );
+      FETCH( &r[1], 1, CHAN_X );
+      micro_mul( &r[0], &r[0], &r[1] );
+
+      FETCH( &r[1], 0, CHAN_Y );
+      FETCH( &r[2], 1, CHAN_Y );
+      micro_mul( &r[1], &r[1], &r[2] );
+      micro_add( &r[0], &r[0], &r[1] );
+
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_IF:
+      /* push CondMask */
+      assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
+      mach->CondStack[mach->CondStackTop++] = mach->CondMask;
+      FETCH( &r[0], 0, CHAN_X );
+      /* update CondMask */
+      if( ! r[0].u[0] ) {
+         mach->CondMask &= ~0x1;
+      }
+      if( ! r[0].u[1] ) {
+         mach->CondMask &= ~0x2;
+      }
+      if( ! r[0].u[2] ) {
+         mach->CondMask &= ~0x4;
+      }
+      if( ! r[0].u[3] ) {
+         mach->CondMask &= ~0x8;
+      }
+      UPDATE_EXEC_MASK(mach);
+      /* Todo: If CondMask==0, jump to ELSE */
+      break;
+
+   case TGSI_OPCODE_ELSE:
+      /* invert CondMask wrt previous mask */
+      {
+         uint prevMask;
+         assert(mach->CondStackTop > 0);
+         prevMask = mach->CondStack[mach->CondStackTop - 1];
+         mach->CondMask = ~mach->CondMask & prevMask;
+         UPDATE_EXEC_MASK(mach);
+         /* Todo: If CondMask==0, jump to ENDIF */
+      }
+      break;
+
+   case TGSI_OPCODE_ENDIF:
+      /* pop CondMask */
+      assert(mach->CondStackTop > 0);
+      mach->CondMask = mach->CondStack[--mach->CondStackTop];
+      UPDATE_EXEC_MASK(mach);
+      break;
+
+   case TGSI_OPCODE_END:
+      /* halt execution */
+      *pc = -1;
+      break;
+
+   case TGSI_OPCODE_REP:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_ENDREP:
+       assert (0);
+       break;
+
+   case TGSI_OPCODE_PUSHA:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_POPA:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_CEIL:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         micro_ceil( &r[0], &r[0] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_I2F:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         micro_i2f( &r[0], &r[0] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_NOT:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         micro_not( &r[0], &r[0] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_TRUNC:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         micro_trunc( &r[0], &r[0] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_SHL:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         FETCH( &r[1], 1, chan_index );
+         micro_shl( &r[0], &r[0], &r[1] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_SHR:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         FETCH( &r[1], 1, chan_index );
+         micro_ishr( &r[0], &r[0], &r[1] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_AND:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         FETCH( &r[1], 1, chan_index );
+         micro_and( &r[0], &r[0], &r[1] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_OR:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         FETCH( &r[1], 1, chan_index );
+         micro_or( &r[0], &r[0], &r[1] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_MOD:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_XOR:
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         FETCH( &r[1], 1, chan_index );
+         micro_xor( &r[0], &r[0], &r[1] );
+         STORE( &r[0], 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_SAD:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_TXF:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_TXQ:
+      assert (0);
+      break;
+
+   case TGSI_OPCODE_EMIT:
+      mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
+      mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
+      break;
+
+   case TGSI_OPCODE_ENDPRIM:
+      mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
+      mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
+      break;
+
+   case TGSI_OPCODE_LOOP:
+      /* fall-through (for now) */
+   case TGSI_OPCODE_BGNLOOP2:
+      /* push LoopMask and ContMasks */
+      assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
+      mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
+      assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
+      mach->ContStack[mach->ContStackTop++] = mach->ContMask;
+      break;
+
+   case TGSI_OPCODE_ENDLOOP:
+      /* fall-through (for now at least) */
+   case TGSI_OPCODE_ENDLOOP2:
+      /* Restore ContMask, but don't pop */
+      assert(mach->ContStackTop > 0);
+      mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
+      UPDATE_EXEC_MASK(mach);
+      if (mach->ExecMask) {
+         /* repeat loop: jump to instruction just past BGNLOOP */
+         *pc = inst->InstructionExtLabel.Label + 1;
+      }
+      else {
+         /* exit loop: pop LoopMask */
+         assert(mach->LoopStackTop > 0);
+         mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
+         /* pop ContMask */
+         assert(mach->ContStackTop > 0);
+         mach->ContMask = mach->ContStack[--mach->ContStackTop];
+      }
+      UPDATE_EXEC_MASK(mach);
+      break;
+
+   case TGSI_OPCODE_BRK:
+      /* turn off loop channels for each enabled exec channel */
+      mach->LoopMask &= ~mach->ExecMask;
+      /* Todo: if mach->LoopMask == 0, jump to end of loop */
+      UPDATE_EXEC_MASK(mach);
+      break;
+
+   case TGSI_OPCODE_CONT:
+      /* turn off cont channels for each enabled exec channel */
+      mach->ContMask &= ~mach->ExecMask;
+      /* Todo: if mach->LoopMask == 0, jump to end of loop */
+      UPDATE_EXEC_MASK(mach);
+      break;
+
+   case TGSI_OPCODE_BGNSUB:
+      /* no-op */
+      break;
+
+   case TGSI_OPCODE_ENDSUB:
+      /* no-op */
+      break;
+
+   case TGSI_OPCODE_NOISE1:
+      assert( 0 );
+      break;
+
+   case TGSI_OPCODE_NOISE2:
+      assert( 0 );
+      break;
+
+   case TGSI_OPCODE_NOISE3:
+      assert( 0 );
+      break;
+
+   case TGSI_OPCODE_NOISE4:
+      assert( 0 );
+      break;
+
+   case TGSI_OPCODE_NOP:
+      break;
+
+   default:
+      assert( 0 );
+   }
+}
+
+
+/**
+ * Run TGSI interpreter.
+ * \return bitmask of "alive" quad components
+ */
+uint
+tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
+{
+   uint i;
+   int pc = 0;
+
+   mach->CondMask = 0xf;
+   mach->LoopMask = 0xf;
+   mach->ContMask = 0xf;
+   mach->FuncMask = 0xf;
+   mach->ExecMask = 0xf;
+
+   mach->CondStackTop = 0; /* temporarily subvert this assertion */
+   assert(mach->CondStackTop == 0);
+   assert(mach->LoopStackTop == 0);
+   assert(mach->ContStackTop == 0);
+   assert(mach->CallStackTop == 0);
+
+   mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
+   mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
+
+   if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
+      mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
+      mach->Primitives[0] = 0;
+   }
+
+
+   /* execute declarations (interpolants) */
+   for (i = 0; i < mach->NumDeclarations; i++) {
+      exec_declaration( mach, mach->Declarations+i );
+   }
+
+   /* execute instructions, until pc is set to -1 */
+   while (pc != -1) {
+      assert(pc < (int) mach->NumInstructions);
+      exec_instruction( mach, mach->Instructions + pc, &pc );
+   }
+
+#if 0
+   /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
+   if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
+      /*
+       * Scale back depth component.
+       */
+      for (i = 0; i < 4; i++)
+         mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
+   }
+#endif
+
+   return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
+}
+
+
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
new file mode 100644
index 00000000000..4f30650b07b
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -0,0 +1,253 @@
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#if !defined TGSI_EXEC_H
+#define TGSI_EXEC_H
+
+#include "pipe/p_compiler.h"
+
+#if defined __cplusplus
+extern "C" {
+#endif
+
+#define MAX_LABELS 1024
+
+#define NUM_CHANNELS 4  /* R,G,B,A */
+#define QUAD_SIZE    4  /* 4 pixel/quad */
+
+/**
+  * Registers may be treated as float, signed int or unsigned int.
+  */
+union tgsi_exec_channel
+{
+   float    f[QUAD_SIZE];
+   int      i[QUAD_SIZE];
+   unsigned u[QUAD_SIZE];
+};
+
+/**
+  * A vector[RGBA] of channels[4 pixels]
+  */
+struct tgsi_exec_vector
+{
+   union tgsi_exec_channel xyzw[NUM_CHANNELS];
+};
+
+/**
+ * For fragment programs, information for computing fragment input
+ * values from plane equation of the triangle/line.
+ */
+struct tgsi_interp_coef
+{
+   float a0[NUM_CHANNELS];	/* in an xyzw layout */
+   float dadx[NUM_CHANNELS];
+   float dady[NUM_CHANNELS];
+};
+
+
+struct softpipe_tile_cache;  /**< Opaque to TGSI */
+
+/**
+ * Information for sampling textures, which must be implemented
+ * by code outside the TGSI executor.
+ */
+struct tgsi_sampler
+{
+   const struct pipe_sampler_state *state;
+   struct pipe_texture *texture;
+   /** Get samples for four fragments in a quad */
+   void (*get_samples)(struct tgsi_sampler *sampler,
+                       const float s[QUAD_SIZE],
+                       const float t[QUAD_SIZE],
+                       const float p[QUAD_SIZE],
+                       float lodbias,
+                       float rgba[NUM_CHANNELS][QUAD_SIZE]);
+   void *pipe; /*XXX temporary*/
+   struct softpipe_tile_cache *cache;
+};
+
+/**
+ * For branching/calling subroutines.
+ */
+struct tgsi_exec_labels
+{
+   unsigned labels[MAX_LABELS][2];
+   unsigned count;
+};
+
+
+#define TGSI_EXEC_NUM_TEMPS       128
+#define TGSI_EXEC_NUM_TEMP_EXTRAS   6
+#define TGSI_EXEC_NUM_IMMEDIATES  256
+
+/*
+ * Locations of various utility registers (_I = Index, _C = Channel)
+ */
+#define TGSI_EXEC_TEMP_00000000_I   (TGSI_EXEC_NUM_TEMPS + 0)
+#define TGSI_EXEC_TEMP_00000000_C   0
+
+#define TGSI_EXEC_TEMP_7FFFFFFF_I   (TGSI_EXEC_NUM_TEMPS + 0)
+#define TGSI_EXEC_TEMP_7FFFFFFF_C   1
+
+#define TGSI_EXEC_TEMP_80000000_I   (TGSI_EXEC_NUM_TEMPS + 0)
+#define TGSI_EXEC_TEMP_80000000_C   2
+
+#define TGSI_EXEC_TEMP_FFFFFFFF_I   (TGSI_EXEC_NUM_TEMPS + 0)
+#define TGSI_EXEC_TEMP_FFFFFFFF_C   3
+
+#define TGSI_EXEC_TEMP_ONE_I        (TGSI_EXEC_NUM_TEMPS + 1)
+#define TGSI_EXEC_TEMP_ONE_C        0
+
+#define TGSI_EXEC_TEMP_TWO_I        (TGSI_EXEC_NUM_TEMPS + 1)
+#define TGSI_EXEC_TEMP_TWO_C        1
+
+#define TGSI_EXEC_TEMP_128_I        (TGSI_EXEC_NUM_TEMPS + 1)
+#define TGSI_EXEC_TEMP_128_C        2
+
+#define TGSI_EXEC_TEMP_MINUS_128_I  (TGSI_EXEC_NUM_TEMPS + 1)
+#define TGSI_EXEC_TEMP_MINUS_128_C  3
+
+#define TGSI_EXEC_TEMP_KILMASK_I    (TGSI_EXEC_NUM_TEMPS + 2)
+#define TGSI_EXEC_TEMP_KILMASK_C    0
+
+#define TGSI_EXEC_TEMP_OUTPUT_I     (TGSI_EXEC_NUM_TEMPS + 2)
+#define TGSI_EXEC_TEMP_OUTPUT_C     1
+
+#define TGSI_EXEC_TEMP_PRIMITIVE_I  (TGSI_EXEC_NUM_TEMPS + 2)
+#define TGSI_EXEC_TEMP_PRIMITIVE_C  2
+
+#define TGSI_EXEC_TEMP_THREE_I      (TGSI_EXEC_NUM_TEMPS + 2)
+#define TGSI_EXEC_TEMP_THREE_C      3
+
+#define TGSI_EXEC_TEMP_HALF_I       (TGSI_EXEC_NUM_TEMPS + 3)
+#define TGSI_EXEC_TEMP_HALF_C       0
+
+#define TGSI_EXEC_TEMP_R0           (TGSI_EXEC_NUM_TEMPS + 4)
+
+#define TGSI_EXEC_TEMP_ADDR         (TGSI_EXEC_NUM_TEMPS + 5)
+
+
+#define TGSI_EXEC_MAX_COND_NESTING  20
+#define TGSI_EXEC_MAX_LOOP_NESTING  20
+#define TGSI_EXEC_MAX_CALL_NESTING  20
+
+/**
+ * Run-time virtual machine state for executing TGSI shader.
+ */
+struct tgsi_exec_machine
+{
+   /* Total = program temporaries + internal temporaries
+    *         + 1 padding to align to 16 bytes
+    */
+   struct tgsi_exec_vector       _Temps[TGSI_EXEC_NUM_TEMPS +
+                                        TGSI_EXEC_NUM_TEMP_EXTRAS + 1];
+
+   /*
+    * This will point to _Temps after aligning to 16B boundary.
+    */
+   struct tgsi_exec_vector       *Temps;
+   struct tgsi_exec_vector       *Addrs;
+
+   struct tgsi_sampler           *Samplers;
+
+   float                         Imms[TGSI_EXEC_NUM_IMMEDIATES][4];
+   unsigned                      ImmLimit;
+   const float                   (*Consts)[4];
+   struct tgsi_exec_vector       *Inputs;
+   struct tgsi_exec_vector       *Outputs;
+   const struct tgsi_token       *Tokens;
+   unsigned                      Processor;
+
+   /* GEOMETRY processor only. */
+   unsigned                      *Primitives;
+
+   /* FRAGMENT processor only. */
+   const struct tgsi_interp_coef *InterpCoefs;
+   struct tgsi_exec_vector       QuadPos;
+
+   /* Conditional execution masks */
+   uint CondMask;  /**< For IF/ELSE/ENDIF */
+   uint LoopMask;  /**< For BGNLOOP/ENDLOOP */
+   uint ContMask;  /**< For loop CONT statements */
+   uint FuncMask;  /**< For function calls */
+   uint ExecMask;  /**< = CondMask & LoopMask */
+
+   /** Condition mask stack (for nested conditionals) */
+   uint CondStack[TGSI_EXEC_MAX_COND_NESTING];
+   int CondStackTop;
+
+   /** Loop mask stack (for nested loops) */
+   uint LoopStack[TGSI_EXEC_MAX_LOOP_NESTING];
+   int LoopStackTop;
+
+   /** Loop continue mask stack (see comments in tgsi_exec.c) */
+   uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING];
+   int ContStackTop;
+
+   /** Function execution mask stack (for executing subroutine code) */
+   uint FuncStack[TGSI_EXEC_MAX_CALL_NESTING];
+   int FuncStackTop;
+
+   /** Function call stack for saving/restoring the program counter */
+   uint CallStack[TGSI_EXEC_MAX_CALL_NESTING];
+   int CallStackTop;
+
+   struct tgsi_full_instruction *Instructions;
+   uint NumInstructions;
+
+   struct tgsi_full_declaration *Declarations;
+   uint NumDeclarations;
+
+   struct tgsi_exec_labels Labels;
+};
+
+void
+tgsi_exec_machine_init(
+   struct tgsi_exec_machine *mach );
+
+
+void 
+tgsi_exec_machine_bind_shader(
+   struct tgsi_exec_machine *mach,
+   const struct tgsi_token *tokens,
+   uint numSamplers,
+   struct tgsi_sampler *samplers);
+
+uint
+tgsi_exec_machine_run(
+   struct tgsi_exec_machine *mach );
+
+
+void
+tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach);
+
+
+#if defined __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* TGSI_EXEC_H */
diff --git a/src/gallium/auxiliary/tgsi/tgsi_iterate.c b/src/gallium/auxiliary/tgsi/tgsi_iterate.c
new file mode 100644
index 00000000000..5371a88b964
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_iterate.c
@@ -0,0 +1,85 @@
+/**************************************************************************
+ * 
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "pipe/p_debug.h"
+#include "tgsi_iterate.h"
+
+boolean
+tgsi_iterate_shader(
+   const struct tgsi_token *tokens,
+   struct tgsi_iterate_context *ctx )
+{
+   struct tgsi_parse_context parse;
+
+   if (tgsi_parse_init( &parse, tokens ) != TGSI_PARSE_OK)
+      return FALSE;
+
+   ctx->processor = parse.FullHeader.Processor;
+   ctx->version = parse.FullVersion.Version;
+
+   if (ctx->prolog)
+      if (!ctx->prolog( ctx ))
+         goto fail;
+
+   while (!tgsi_parse_end_of_tokens( &parse )) {
+      tgsi_parse_token( &parse );
+
+      switch (parse.FullToken.Token.Type) {
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+         if (ctx->iterate_instruction)
+            if (!ctx->iterate_instruction( ctx, &parse.FullToken.FullInstruction ))
+               goto fail;
+         break;
+
+      case TGSI_TOKEN_TYPE_DECLARATION:
+         if (ctx->iterate_declaration)
+            if (!ctx->iterate_declaration( ctx, &parse.FullToken.FullDeclaration ))
+               goto fail;
+         break;
+
+      case TGSI_TOKEN_TYPE_IMMEDIATE:
+         if (ctx->iterate_immediate)
+            if (!ctx->iterate_immediate( ctx, &parse.FullToken.FullImmediate ))
+               goto fail;
+         break;
+
+      default:
+         assert( 0 );
+      }
+   }
+
+   if (ctx->epilog)
+      if (!ctx->epilog( ctx ))
+         goto fail;
+
+   tgsi_parse_free( &parse );
+   return TRUE;
+
+fail:
+   tgsi_parse_free( &parse );
+   return FALSE;
+}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_iterate.h b/src/gallium/auxiliary/tgsi/tgsi_iterate.h
new file mode 100644
index 00000000000..ec7b85bf63d
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_iterate.h
@@ -0,0 +1,76 @@
+/**************************************************************************
+ * 
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef TGSI_ITERATE_H
+#define TGSI_ITERATE_H
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+
+#if defined __cplusplus
+extern "C" {
+#endif
+
+struct tgsi_iterate_context
+{
+   boolean
+   (* prolog)(
+      struct tgsi_iterate_context *ctx );
+
+   boolean
+   (* iterate_instruction)(
+      struct tgsi_iterate_context *ctx,
+      struct tgsi_full_instruction *inst );
+
+   boolean
+   (* iterate_declaration)(
+      struct tgsi_iterate_context *ctx,
+      struct tgsi_full_declaration *decl );
+
+   boolean
+   (* iterate_immediate)(
+      struct tgsi_iterate_context *ctx,
+      struct tgsi_full_immediate *imm );
+
+   boolean
+   (* epilog)(
+      struct tgsi_iterate_context *ctx );
+
+   struct tgsi_processor processor;
+   struct tgsi_version version;
+};
+
+boolean
+tgsi_iterate_shader(
+   const struct tgsi_token *tokens,
+   struct tgsi_iterate_context *ctx );
+
+#if defined __cplusplus
+}
+#endif
+
+#endif /* TGSI_ITERATE_H */
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c
new file mode 100644
index 00000000000..d16f0cdcad4
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c
@@ -0,0 +1,332 @@
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "pipe/p_debug.h"
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+#include "tgsi_parse.h"
+#include "tgsi_build.h"
+
+void
+tgsi_full_token_init(
+   union tgsi_full_token *full_token )
+{
+   full_token->Token.Type = TGSI_TOKEN_TYPE_DECLARATION;
+}
+
+void
+tgsi_full_token_free(
+   union tgsi_full_token *full_token )
+{
+   if( full_token->Token.Type == TGSI_TOKEN_TYPE_IMMEDIATE ) {
+      FREE( (void *) full_token->FullImmediate.u.Pointer );
+   }
+}
+
+unsigned
+tgsi_parse_init(
+   struct tgsi_parse_context *ctx,
+   const struct tgsi_token *tokens )
+{
+   ctx->FullVersion.Version = *(struct tgsi_version *) &tokens[0];
+   if( ctx->FullVersion.Version.MajorVersion > 1 ) {
+      return TGSI_PARSE_ERROR;
+   }
+
+   ctx->FullHeader.Header = *(struct tgsi_header *) &tokens[1];
+   if( ctx->FullHeader.Header.HeaderSize >= 2 ) {
+      ctx->FullHeader.Processor = *(struct tgsi_processor *) &tokens[2];
+   }
+   else {
+      ctx->FullHeader.Processor = tgsi_default_processor();
+   }
+
+   ctx->Tokens = tokens;
+   ctx->Position = 1 + ctx->FullHeader.Header.HeaderSize;
+
+   tgsi_full_token_init( &ctx->FullToken );
+
+   return TGSI_PARSE_OK;
+}
+
+void
+tgsi_parse_free(
+   struct tgsi_parse_context *ctx )
+{
+   tgsi_full_token_free( &ctx->FullToken );
+}
+
+boolean
+tgsi_parse_end_of_tokens(
+   struct tgsi_parse_context *ctx )
+{
+   return ctx->Position >=
+      1 + ctx->FullHeader.Header.HeaderSize + ctx->FullHeader.Header.BodySize;
+}
+
+static void
+next_token(
+   struct tgsi_parse_context *ctx,
+   void *token )
+{
+   assert( !tgsi_parse_end_of_tokens( ctx ) );
+
+   *(struct tgsi_token *) token = ctx->Tokens[ctx->Position++];
+}
+
+void
+tgsi_parse_token(
+   struct tgsi_parse_context *ctx )
+{
+   struct tgsi_token token;
+   unsigned i;
+
+   tgsi_full_token_free( &ctx->FullToken );
+   tgsi_full_token_init( &ctx->FullToken );
+
+   next_token( ctx, &token );
+
+   switch( token.Type ) {
+   case TGSI_TOKEN_TYPE_DECLARATION:
+   {
+      struct tgsi_full_declaration *decl = &ctx->FullToken.FullDeclaration;
+
+      *decl = tgsi_default_full_declaration();
+      decl->Declaration = *(struct tgsi_declaration *) &token;
+
+      next_token( ctx, &decl->DeclarationRange );
+
+      if( decl->Declaration.Semantic ) {
+         next_token( ctx, &decl->Semantic );
+      }
+
+      break;
+   }
+
+   case TGSI_TOKEN_TYPE_IMMEDIATE:
+   {
+      struct tgsi_full_immediate *imm = &ctx->FullToken.FullImmediate;
+
+      *imm = tgsi_default_full_immediate();
+      imm->Immediate = *(struct tgsi_immediate *) &token;
+
+      assert( !imm->Immediate.Extended );
+
+      switch (imm->Immediate.DataType) {
+      case TGSI_IMM_FLOAT32:
+         imm->u.Pointer = MALLOC(
+            sizeof( struct tgsi_immediate_float32 ) * (imm->Immediate.Size - 1) );
+         for( i = 0; i < imm->Immediate.Size - 1; i++ ) {
+            next_token( ctx, (struct tgsi_immediate_float32 *) &imm->u.ImmediateFloat32[i] );
+         }
+         break;
+
+      default:
+         assert( 0 );
+      }
+
+      break;
+   }
+
+   case TGSI_TOKEN_TYPE_INSTRUCTION:
+   {
+      struct tgsi_full_instruction *inst = &ctx->FullToken.FullInstruction;
+      unsigned extended;
+
+      *inst = tgsi_default_full_instruction();
+      inst->Instruction = *(struct tgsi_instruction *) &token;
+
+      extended = inst->Instruction.Extended;
+
+      while( extended ) {
+         struct tgsi_src_register_ext token;
+
+         next_token( ctx, &token );
+
+         switch( token.Type ) {
+         case TGSI_INSTRUCTION_EXT_TYPE_NV:
+            inst->InstructionExtNv =
+               *(struct tgsi_instruction_ext_nv *) &token;
+            break;
+
+         case TGSI_INSTRUCTION_EXT_TYPE_LABEL:
+            inst->InstructionExtLabel =
+               *(struct tgsi_instruction_ext_label *) &token;
+            break;
+
+         case TGSI_INSTRUCTION_EXT_TYPE_TEXTURE:
+            inst->InstructionExtTexture =
+               *(struct tgsi_instruction_ext_texture *) &token;
+            break;
+
+         default:
+            assert( 0 );
+         }
+
+         extended = token.Extended;
+      }
+
+      assert( inst->Instruction.NumDstRegs <= TGSI_FULL_MAX_DST_REGISTERS );
+
+      for(  i = 0; i < inst->Instruction.NumDstRegs; i++ ) {
+         unsigned extended;
+
+         next_token( ctx, &inst->FullDstRegisters[i].DstRegister );
+
+         /*
+          * No support for indirect or multi-dimensional addressing.
+          */
+         assert( !inst->FullDstRegisters[i].DstRegister.Indirect );
+         assert( !inst->FullDstRegisters[i].DstRegister.Dimension );
+
+         extended = inst->FullDstRegisters[i].DstRegister.Extended;
+
+         while( extended ) {
+            struct tgsi_src_register_ext token;
+
+            next_token( ctx, &token );
+
+            switch( token.Type ) {
+            case TGSI_DST_REGISTER_EXT_TYPE_CONDCODE:
+               inst->FullDstRegisters[i].DstRegisterExtConcode =
+                  *(struct tgsi_dst_register_ext_concode *) &token;
+               break;
+
+            case TGSI_DST_REGISTER_EXT_TYPE_MODULATE:
+               inst->FullDstRegisters[i].DstRegisterExtModulate =
+                  *(struct tgsi_dst_register_ext_modulate *) &token;
+               break;
+
+            default:
+               assert( 0 );
+            }
+
+            extended = token.Extended;
+         }
+      }
+
+      assert( inst->Instruction.NumSrcRegs <= TGSI_FULL_MAX_SRC_REGISTERS );
+
+      for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) {
+         unsigned extended;
+
+         next_token( ctx, &inst->FullSrcRegisters[i].SrcRegister );
+
+         extended = inst->FullSrcRegisters[i].SrcRegister.Extended;
+
+         while( extended ) {
+            struct tgsi_src_register_ext token;
+
+            next_token( ctx, &token );
+
+            switch( token.Type ) {
+            case TGSI_SRC_REGISTER_EXT_TYPE_SWZ:
+               inst->FullSrcRegisters[i].SrcRegisterExtSwz =
+                  *(struct tgsi_src_register_ext_swz *) &token;
+               break;
+
+            case TGSI_SRC_REGISTER_EXT_TYPE_MOD:
+               inst->FullSrcRegisters[i].SrcRegisterExtMod =
+                  *(struct tgsi_src_register_ext_mod *) &token;
+               break;
+
+            default:
+               assert( 0 );
+            }
+
+            extended = token.Extended;
+         }
+
+         if( inst->FullSrcRegisters[i].SrcRegister.Indirect ) {
+            next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterInd );
+
+            /*
+             * No support for indirect or multi-dimensional addressing.
+             */
+            assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Indirect );
+            assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Dimension );
+            assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Extended );
+         }
+
+         if( inst->FullSrcRegisters[i].SrcRegister.Dimension ) {
+            next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterDim );
+
+            /*
+             * No support for multi-dimensional addressing.
+             */
+            assert( !inst->FullSrcRegisters[i].SrcRegisterDim.Dimension );
+            assert( !inst->FullSrcRegisters[i].SrcRegisterDim.Extended );
+
+            if( inst->FullSrcRegisters[i].SrcRegisterDim.Indirect ) {
+               next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterDimInd );
+
+               /*
+               * No support for indirect or multi-dimensional addressing.
+               */
+               assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Indirect );
+               assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Dimension );
+               assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Extended );
+            }
+         }
+      }
+
+      break;
+   }
+
+   default:
+      assert( 0 );
+   }
+}
+
+
+unsigned
+tgsi_num_tokens(const struct tgsi_token *tokens)
+{
+   struct tgsi_parse_context ctx;
+   if (tgsi_parse_init(&ctx, tokens) == TGSI_PARSE_OK) {
+      unsigned len = (ctx.FullHeader.Header.HeaderSize +
+                      ctx.FullHeader.Header.BodySize +
+                      1);
+      return len;
+   }
+   return 0;
+}
+
+
+/**
+ * Make a new copy of a token array.
+ */
+struct tgsi_token *
+tgsi_dup_tokens(const struct tgsi_token *tokens)
+{
+   unsigned n = tgsi_num_tokens(tokens);
+   unsigned bytes = n * sizeof(struct tgsi_token);
+   struct tgsi_token *new_tokens = (struct tgsi_token *) MALLOC(bytes);
+   if (new_tokens)
+      memcpy(new_tokens, tokens, bytes);
+   return new_tokens;
+}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h
new file mode 100644
index 00000000000..054350712d8
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h
@@ -0,0 +1,151 @@
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef TGSI_PARSE_H
+#define TGSI_PARSE_H
+
+#include "pipe/p_shader_tokens.h"
+
+#if defined __cplusplus
+extern "C" {
+#endif
+
+struct tgsi_full_version
+{
+   struct tgsi_version  Version;
+};
+
+struct tgsi_full_header
+{
+   struct tgsi_header      Header;
+   struct tgsi_processor   Processor;
+};
+
+struct tgsi_full_dst_register
+{
+   struct tgsi_dst_register               DstRegister;
+   struct tgsi_dst_register_ext_concode   DstRegisterExtConcode;
+   struct tgsi_dst_register_ext_modulate  DstRegisterExtModulate;
+};
+
+struct tgsi_full_src_register
+{
+   struct tgsi_src_register         SrcRegister;
+   struct tgsi_src_register_ext_swz SrcRegisterExtSwz;
+   struct tgsi_src_register_ext_mod SrcRegisterExtMod;
+   struct tgsi_src_register         SrcRegisterInd;
+   struct tgsi_dimension            SrcRegisterDim;
+   struct tgsi_src_register         SrcRegisterDimInd;
+};
+
+struct tgsi_full_declaration
+{
+   struct tgsi_declaration Declaration;
+   struct tgsi_declaration_range DeclarationRange;
+   struct tgsi_declaration_semantic Semantic;
+};
+
+struct tgsi_full_immediate
+{
+   struct tgsi_immediate   Immediate;
+   union
+   {
+      const void                          *Pointer;
+      const struct tgsi_immediate_float32 *ImmediateFloat32;
+   } u;
+};
+
+#define TGSI_FULL_MAX_DST_REGISTERS 2
+#define TGSI_FULL_MAX_SRC_REGISTERS 4 /* TXD has 4 */
+
+struct tgsi_full_instruction
+{
+   struct tgsi_instruction             Instruction;
+   struct tgsi_instruction_ext_nv      InstructionExtNv;
+   struct tgsi_instruction_ext_label   InstructionExtLabel;
+   struct tgsi_instruction_ext_texture InstructionExtTexture;
+   struct tgsi_full_dst_register       FullDstRegisters[TGSI_FULL_MAX_DST_REGISTERS];
+   struct tgsi_full_src_register       FullSrcRegisters[TGSI_FULL_MAX_SRC_REGISTERS];
+};
+
+union tgsi_full_token
+{
+   struct tgsi_token             Token;
+   struct tgsi_full_declaration  FullDeclaration;
+   struct tgsi_full_immediate    FullImmediate;
+   struct tgsi_full_instruction  FullInstruction;
+};
+
+void
+tgsi_full_token_init(
+   union tgsi_full_token *full_token );
+
+void
+tgsi_full_token_free(
+   union tgsi_full_token *full_token );
+
+struct tgsi_parse_context
+{
+   const struct tgsi_token    *Tokens;
+   unsigned                   Position;
+   struct tgsi_full_version   FullVersion;
+   struct tgsi_full_header    FullHeader;
+   union tgsi_full_token      FullToken;
+};
+
+#define TGSI_PARSE_OK      0
+#define TGSI_PARSE_ERROR   1
+
+unsigned
+tgsi_parse_init(
+   struct tgsi_parse_context *ctx,
+   const struct tgsi_token *tokens );
+
+void
+tgsi_parse_free(
+   struct tgsi_parse_context *ctx );
+
+boolean
+tgsi_parse_end_of_tokens(
+   struct tgsi_parse_context *ctx );
+
+void
+tgsi_parse_token(
+   struct tgsi_parse_context *ctx );
+
+unsigned
+tgsi_num_tokens(const struct tgsi_token *tokens);
+
+struct tgsi_token *
+tgsi_dup_tokens(const struct tgsi_token *tokens);
+
+#if defined __cplusplus
+}
+#endif
+
+#endif /* TGSI_PARSE_H */
+
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
new file mode 100644
index 00000000000..2e3ec96b5b5
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
@@ -0,0 +1,341 @@
+/**************************************************************************
+ * 
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "pipe/p_debug.h"
+#include "tgsi_sanity.h"
+#include "tgsi_iterate.h"
+
+#define MAX_REGISTERS 256
+
+typedef uint reg_flag;
+
+#define BITS_IN_REG_FLAG (sizeof( reg_flag ) * 8)
+
+struct sanity_check_ctx
+{
+   struct tgsi_iterate_context iter;
+
+   reg_flag regs_decl[TGSI_FILE_COUNT][MAX_REGISTERS / BITS_IN_REG_FLAG];
+   reg_flag regs_used[TGSI_FILE_COUNT][MAX_REGISTERS / BITS_IN_REG_FLAG];
+   boolean regs_ind_used[TGSI_FILE_COUNT];
+   uint num_imms;
+   uint num_instructions;
+   uint index_of_END;
+
+   uint errors;
+   uint warnings;
+};
+
+static void
+report_error(
+   struct sanity_check_ctx *ctx,
+   const char *format,
+   ... )
+{
+   va_list args;
+
+   debug_printf( "Error  : " );
+   va_start( args, format );
+   _debug_vprintf( format, args );
+   va_end( args );
+   debug_printf( "\n" );
+   ctx->errors++;
+}
+
+static void
+report_warning(
+   struct sanity_check_ctx *ctx,
+   const char *format,
+   ... )
+{
+   va_list args;
+
+   debug_printf( "Warning: " );
+   va_start( args, format );
+   _debug_vprintf( format, args );
+   va_end( args );
+   debug_printf( "\n" );
+   ctx->warnings++;
+}
+
+static boolean
+check_file_name(
+   struct sanity_check_ctx *ctx,
+   uint file )
+{
+   if (file <= TGSI_FILE_NULL || file >= TGSI_FILE_COUNT) {
+      report_error( ctx, "Invalid register file name" );
+      return FALSE;
+   }
+   return TRUE;
+}
+
+static boolean
+is_register_declared(
+   struct sanity_check_ctx *ctx,
+   uint file,
+   int index )
+{
+   assert( index >= 0 && index < MAX_REGISTERS );
+
+   return (ctx->regs_decl[file][index / BITS_IN_REG_FLAG] & (1 << (index % BITS_IN_REG_FLAG))) ? TRUE : FALSE;
+}
+
+static boolean
+is_any_register_declared(
+   struct sanity_check_ctx *ctx,
+   uint file )
+{
+   uint i;
+
+   for (i = 0; i < MAX_REGISTERS / BITS_IN_REG_FLAG; i++)
+      if (ctx->regs_decl[file][i])
+         return TRUE;
+   return FALSE;
+}
+
+static boolean
+is_register_used(
+   struct sanity_check_ctx *ctx,
+   uint file,
+   int index )
+{
+   assert( index < MAX_REGISTERS );
+
+   return (ctx->regs_used[file][index / BITS_IN_REG_FLAG] & (1 << (index % BITS_IN_REG_FLAG))) ? TRUE : FALSE;
+}
+
+static const char *file_names[] =
+{
+   "NULL",
+   "CONST",
+   "IN",
+   "OUT",
+   "TEMP",
+   "SAMP",
+   "ADDR",
+   "IMM"
+};
+
+static boolean
+check_register_usage(
+   struct sanity_check_ctx *ctx,
+   uint file,
+   int index,
+   const char *name,
+   boolean indirect_access )
+{
+   if (!check_file_name( ctx, file ))
+      return FALSE;
+   if (indirect_access) {
+      if (!is_any_register_declared( ctx, file ))
+         report_error( ctx, "%s: Undeclared %s register", file_names[file], name );
+      ctx->regs_ind_used[file] = TRUE;
+   }
+   else {
+      if (!is_register_declared( ctx, file, index ))
+         report_error( ctx, "%s[%d]: Undeclared %s register", file_names[file], index, name );
+      ctx->regs_used[file][index / BITS_IN_REG_FLAG] |= (1 << (index % BITS_IN_REG_FLAG));
+   }
+   return TRUE;
+}
+
+static boolean
+iter_instruction(
+   struct tgsi_iterate_context *iter,
+   struct tgsi_full_instruction *inst )
+{
+   struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter;
+   uint i;
+
+   /* There must be no other instructions after END.
+    */
+   if (ctx->index_of_END != ~0) {
+      report_error( ctx, "Unexpected instruction after END" );
+   }
+   else if (inst->Instruction.Opcode == TGSI_OPCODE_END) {
+      ctx->index_of_END = ctx->num_instructions;
+   }
+
+   /* Check destination and source registers' validity.
+    * Mark the registers as used.
+    */
+   for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
+      check_register_usage(
+         ctx,
+         inst->FullDstRegisters[i].DstRegister.File,
+         inst->FullDstRegisters[i].DstRegister.Index,
+         "destination",
+         FALSE );
+   }
+   for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
+      check_register_usage(
+         ctx,
+         inst->FullSrcRegisters[i].SrcRegister.File,
+         inst->FullSrcRegisters[i].SrcRegister.Index,
+         "source",
+         (boolean)inst->FullSrcRegisters[i].SrcRegister.Indirect );
+      if (inst->FullSrcRegisters[i].SrcRegister.Indirect) {
+         uint file;
+         int index;
+
+         file = inst->FullSrcRegisters[i].SrcRegisterInd.File;
+         index = inst->FullSrcRegisters[i].SrcRegisterInd.Index;
+         check_register_usage(
+            ctx,
+            file,
+            index,
+            "indirect",
+            FALSE );
+         if (file != TGSI_FILE_ADDRESS || index != 0)
+            report_warning( ctx, "Indirect register not ADDR[0]" );
+      }
+   }
+
+   ctx->num_instructions++;
+
+   return TRUE;
+}
+
+static boolean
+iter_declaration(
+   struct tgsi_iterate_context *iter,
+   struct tgsi_full_declaration *decl )
+{
+   struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter;
+   uint file;
+   uint i;
+
+   /* No declarations allowed after the first instruction.
+    */
+   if (ctx->num_instructions > 0)
+      report_error( ctx, "Instruction expected but declaration found" );
+
+   /* Check registers' validity.
+    * Mark the registers as declared.
+    */
+   file = decl->Declaration.File;
+   if (!check_file_name( ctx, file ))
+      return TRUE;
+   for (i = decl->DeclarationRange.First; i <= decl->DeclarationRange.Last; i++) {
+      if (is_register_declared( ctx, file, i ))
+         report_error( ctx, "The same register declared twice" );
+      ctx->regs_decl[file][i / BITS_IN_REG_FLAG] |= (1 << (i % BITS_IN_REG_FLAG));
+   }
+
+   return TRUE;
+}
+
+static boolean
+iter_immediate(
+   struct tgsi_iterate_context *iter,
+   struct tgsi_full_immediate *imm )
+{
+   struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter;
+
+   assert( ctx->num_imms < MAX_REGISTERS );
+
+   /* No immediates allowed after the first instruction.
+    */
+   if (ctx->num_instructions > 0)
+      report_error( ctx, "Instruction expected but immediate found" );
+
+   /* Mark the register as declared.
+    */
+   ctx->regs_decl[TGSI_FILE_IMMEDIATE][ctx->num_imms / BITS_IN_REG_FLAG] |= (1 << (ctx->num_imms % BITS_IN_REG_FLAG));
+   ctx->num_imms++;
+
+   /* Check data type validity.
+    */
+   if (imm->Immediate.DataType != TGSI_IMM_FLOAT32) {
+      report_error( ctx, "Invalid immediate data type" );
+      return TRUE;
+   }
+
+   return TRUE;
+}
+
+static boolean
+epilog(
+   struct tgsi_iterate_context *iter )
+{
+   struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter;
+   uint file;
+
+   /* There must be an END instruction at the end.
+    */
+   if (ctx->index_of_END == ~0 || ctx->index_of_END != ctx->num_instructions - 1) {
+      report_error( ctx, "Expected END at end of instruction sequence" );
+   }
+
+   /* Check if all declared registers were used.
+    */
+   for (file = TGSI_FILE_NULL; file < TGSI_FILE_COUNT; file++) {
+      uint i;
+
+      for (i = 0; i < MAX_REGISTERS; i++) {
+         if (is_register_declared( ctx, file, i ) && !is_register_used( ctx, file, i ) && !ctx->regs_ind_used[file]) {
+            report_warning( ctx, "Register never used" );
+         }
+      }
+   }
+
+   /* Print totals, if any.
+    */
+   if (ctx->errors || ctx->warnings)
+      debug_printf( "\n%u errors, %u warnings", ctx->errors, ctx->warnings );
+
+   return TRUE;
+}
+
+boolean
+tgsi_sanity_check(
+   struct tgsi_token *tokens )
+{
+   struct sanity_check_ctx ctx;
+
+   ctx.iter.prolog = NULL;
+   ctx.iter.iterate_instruction = iter_instruction;
+   ctx.iter.iterate_declaration = iter_declaration;
+   ctx.iter.iterate_immediate = iter_immediate;
+   ctx.iter.epilog = epilog;
+
+   memset( ctx.regs_decl, 0, sizeof( ctx.regs_decl ) );
+   memset( ctx.regs_used, 0, sizeof( ctx.regs_used ) );
+   memset( ctx.regs_ind_used, 0, sizeof( ctx.regs_ind_used ) );
+   ctx.num_imms = 0;
+   ctx.num_instructions = 0;
+   ctx.index_of_END = ~0;
+
+   ctx.errors = 0;
+   ctx.warnings = 0;
+
+   if (!tgsi_iterate_shader( tokens, &ctx.iter ))
+      return FALSE;
+
+   return ctx.errors == 0;
+}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.h b/src/gallium/auxiliary/tgsi/tgsi_sanity.h
new file mode 100644
index 00000000000..ca45e94c7ad
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.h
@@ -0,0 +1,49 @@
+/**************************************************************************
+ * 
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef TGSI_SANITY_H
+#define TGSI_SANITY_H
+
+#include "pipe/p_shader_tokens.h"
+
+#if defined __cplusplus
+extern "C" {
+#endif
+
+/* Check the given token stream for errors and common mistakes.
+ * Diagnostic messages are printed out to the debug output.
+ * Returns TRUE if there are no errors, even though there could be some warnings.
+ */
+boolean
+tgsi_sanity_check(
+   struct tgsi_token *tokens );
+
+#if defined __cplusplus
+}
+#endif
+
+#endif /* TGSI_SANITY_H */
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
new file mode 100644
index 00000000000..59bcf10b530
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -0,0 +1,226 @@
+/**************************************************************************
+ * 
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+/**
+ * TGSI program scan utility.
+ * Used to determine which registers and instructions are used by a shader.
+ *
+ * Authors:  Brian Paul
+ */
+
+
+#include "tgsi_scan.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_build.h"
+
+#include "pipe/p_util.h"
+
+
+
+/**
+ */
+void
+tgsi_scan_shader(const struct tgsi_token *tokens,
+                 struct tgsi_shader_info *info)
+{
+   uint procType, i;
+   struct tgsi_parse_context parse;
+
+   memset(info, 0, sizeof(*info));
+   for (i = 0; i < TGSI_FILE_COUNT; i++)
+      info->file_max[i] = -1;
+
+   /**
+    ** Setup to begin parsing input shader
+    **/
+   if (tgsi_parse_init( &parse, tokens ) != TGSI_PARSE_OK) {
+      debug_printf("tgsi_parse_init() failed in tgsi_scan_shader()!\n");
+      return;
+   }
+   procType = parse.FullHeader.Processor.Processor;
+   assert(procType == TGSI_PROCESSOR_FRAGMENT ||
+          procType == TGSI_PROCESSOR_VERTEX ||
+          procType == TGSI_PROCESSOR_GEOMETRY);
+
+
+   /**
+    ** Loop over incoming program tokens/instructions
+    */
+   while( !tgsi_parse_end_of_tokens( &parse ) ) {
+
+      info->num_tokens++;
+
+      tgsi_parse_token( &parse );
+
+      switch( parse.FullToken.Token.Type ) {
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+         {
+            struct tgsi_full_instruction *fullinst
+               = &parse.FullToken.FullInstruction;
+
+            assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST);
+            info->opcode_count[fullinst->Instruction.Opcode]++;
+         }
+         break;
+
+      case TGSI_TOKEN_TYPE_DECLARATION:
+         {
+            struct tgsi_full_declaration *fulldecl
+               = &parse.FullToken.FullDeclaration;
+            uint file = fulldecl->Declaration.File;
+            uint i;
+            for (i = fulldecl->DeclarationRange.First;
+                 i <= fulldecl->DeclarationRange.Last;
+                 i++) {
+
+               /* only first 32 regs will appear in this bitfield */
+               info->file_mask[file] |= (1 << i);
+               info->file_count[file]++;
+               info->file_max[file] = MAX2(info->file_max[file], (int)i);
+
+               if (file == TGSI_FILE_INPUT) {
+                  info->input_semantic_name[i] = (ubyte)fulldecl->Semantic.SemanticName;
+                  info->input_semantic_index[i] = (ubyte)fulldecl->Semantic.SemanticIndex;
+                  info->num_inputs++;
+               }
+
+               if (file == TGSI_FILE_OUTPUT) {
+                  info->output_semantic_name[i] = (ubyte)fulldecl->Semantic.SemanticName;
+                  info->output_semantic_index[i] = (ubyte)fulldecl->Semantic.SemanticIndex;
+                  info->num_outputs++;
+               }
+
+               /* special case */
+               if (procType == TGSI_PROCESSOR_FRAGMENT &&
+                   file == TGSI_FILE_OUTPUT &&
+                   fulldecl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) {
+                  info->writes_z = TRUE;
+               }
+            }
+         }
+         break;
+
+      case TGSI_TOKEN_TYPE_IMMEDIATE:
+         info->immediate_count++;
+         break;
+
+      default:
+         assert( 0 );
+      }
+   }
+
+   assert( info->file_max[TGSI_FILE_INPUT] + 1 == info->num_inputs );
+   assert( info->file_max[TGSI_FILE_OUTPUT] + 1 == info->num_outputs );
+
+   info->uses_kill = (info->opcode_count[TGSI_OPCODE_KIL] ||
+                      info->opcode_count[TGSI_OPCODE_KILP]);
+
+   tgsi_parse_free (&parse);
+}
+
+
+
+/**
+ * Check if the given shader is a "passthrough" shader consisting of only
+ * MOV instructions of the form:  MOV OUT[n], IN[n]
+ *  
+ */
+boolean
+tgsi_is_passthrough_shader(const struct tgsi_token *tokens)
+{
+   struct tgsi_parse_context parse;
+
+   /**
+    ** Setup to begin parsing input shader
+    **/
+   if (tgsi_parse_init(&parse, tokens) != TGSI_PARSE_OK) {
+      debug_printf("tgsi_parse_init() failed in tgsi_is_passthrough_shader()!\n");
+      return FALSE;
+   }
+
+   /**
+    ** Loop over incoming program tokens/instructions
+    */
+   while (!tgsi_parse_end_of_tokens(&parse)) {
+
+      tgsi_parse_token(&parse);
+
+      switch (parse.FullToken.Token.Type) {
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+         {
+            struct tgsi_full_instruction *fullinst =
+               &parse.FullToken.FullInstruction;
+            const struct tgsi_full_src_register *src =
+               &fullinst->FullSrcRegisters[0];
+            const struct tgsi_full_dst_register *dst =
+               &fullinst->FullDstRegisters[0];
+
+            /* Do a whole bunch of checks for a simple move */
+            if (fullinst->Instruction.Opcode != TGSI_OPCODE_MOV ||
+                src->SrcRegister.File != TGSI_FILE_INPUT ||
+                dst->DstRegister.File != TGSI_FILE_OUTPUT ||
+                src->SrcRegister.Index != dst->DstRegister.Index ||
+
+                src->SrcRegister.Negate ||
+                src->SrcRegisterExtMod.Negate ||
+                src->SrcRegisterExtMod.Absolute ||
+                src->SrcRegisterExtMod.Scale2X ||
+                src->SrcRegisterExtMod.Bias ||
+                src->SrcRegisterExtMod.Complement ||
+
+                src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X ||
+                src->SrcRegister.SwizzleY != TGSI_SWIZZLE_Y ||
+                src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z ||
+                src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W ||
+
+                src->SrcRegisterExtSwz.ExtSwizzleX != TGSI_EXTSWIZZLE_X ||
+                src->SrcRegisterExtSwz.ExtSwizzleY != TGSI_EXTSWIZZLE_Y ||
+                src->SrcRegisterExtSwz.ExtSwizzleZ != TGSI_EXTSWIZZLE_Z ||
+                src->SrcRegisterExtSwz.ExtSwizzleW != TGSI_EXTSWIZZLE_W ||
+
+                dst->DstRegister.WriteMask != TGSI_WRITEMASK_XYZW)
+            {
+               tgsi_parse_free(&parse);
+               return FALSE;
+            }
+         }
+         break;
+
+      case TGSI_TOKEN_TYPE_DECLARATION:
+         /* fall-through */
+      case TGSI_TOKEN_TYPE_IMMEDIATE:
+         /* fall-through */
+      default:
+         ; /* no-op */
+      }
+   }
+
+   tgsi_parse_free(&parse);
+
+   /* if we get here, it's a pass-through shader */
+   return TRUE;
+}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h
new file mode 100644
index 00000000000..5cb6efb3439
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h
@@ -0,0 +1,74 @@
+/**************************************************************************
+ * 
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef TGSI_SCAN_H
+#define TGSI_SCAN_H
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_state.h"
+#include "pipe/p_shader_tokens.h"
+
+
+/**
+ * Shader summary info
+ */
+struct tgsi_shader_info
+{
+   uint num_tokens;
+
+   /* XXX eventually remove the corresponding fields from pipe_shader_state: */
+   ubyte num_inputs;
+   ubyte num_outputs;
+   ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; /**< TGSI_SEMANTIC_x */
+   ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
+   ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; /**< TGSI_SEMANTIC_x */
+   ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
+
+   uint file_mask[TGSI_FILE_COUNT];  /**< bitmask of declared registers */
+   uint file_count[TGSI_FILE_COUNT];  /**< number of declared registers */
+   int file_max[TGSI_FILE_COUNT];  /**< highest index of declared registers */
+
+   uint immediate_count; /**< number of immediates declared */
+
+   uint opcode_count[TGSI_OPCODE_LAST];  /**< opcode histogram */
+
+   boolean writes_z;  /**< does fragment shader write Z value? */
+   boolean uses_kill;  /**< KIL or KILP instruction used? */
+};
+
+
+extern void
+tgsi_scan_shader(const struct tgsi_token *tokens,
+                 struct tgsi_shader_info *info);
+
+
+extern boolean
+tgsi_is_passthrough_shader(const struct tgsi_token *tokens);
+
+
+#endif /* TGSI_SCAN_H */
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
new file mode 100644
index 00000000000..0cb1f11ef20
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -0,0 +1,2275 @@
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+#include "tgsi_exec.h"
+#include "tgsi_sse2.h"
+
+#include "rtasm/rtasm_x86sse.h"
+
+#ifdef PIPE_ARCH_X86
+
+/* for 1/sqrt()
+ *
+ * This costs about 100fps (close to 10%) in gears:
+ */
+#define HIGH_PRECISION 1
+
+
+#define FOR_EACH_CHANNEL( CHAN )\
+   for( CHAN = 0; CHAN < 4; CHAN++ )
+
+#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
+   ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
+
+#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
+   if( IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
+
+#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
+   FOR_EACH_CHANNEL( CHAN )\
+      IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
+
+#define CHAN_X 0
+#define CHAN_Y 1
+#define CHAN_Z 2
+#define CHAN_W 3
+
+#define TEMP_R0   TGSI_EXEC_TEMP_R0
+
+/**
+ * X86 utility functions.
+ */
+
+static struct x86_reg
+make_xmm(
+   unsigned xmm )
+{
+   return x86_make_reg(
+      file_XMM,
+      (enum x86_reg_name) xmm );
+}
+
+/**
+ * X86 register mapping helpers.
+ */
+
+static struct x86_reg
+get_const_base( void )
+{
+   return x86_make_reg(
+      file_REG32,
+      reg_CX );
+}
+
+static struct x86_reg
+get_input_base( void )
+{
+   return x86_make_reg(
+      file_REG32,
+      reg_AX );
+}
+
+static struct x86_reg
+get_output_base( void )
+{
+   return x86_make_reg(
+      file_REG32,
+      reg_DX );
+}
+
+static struct x86_reg
+get_temp_base( void )
+{
+   return x86_make_reg(
+      file_REG32,
+      reg_BX );
+}
+
+static struct x86_reg
+get_coef_base( void )
+{
+   return get_output_base();
+}
+
+static struct x86_reg
+get_immediate_base( void )
+{
+   return x86_make_reg(
+      file_REG32,
+      reg_DI );
+}
+
+
+/**
+ * Data access helpers.
+ */
+
+
+static struct x86_reg
+get_immediate(
+   unsigned vec,
+   unsigned chan )
+{
+   return x86_make_disp(
+      get_immediate_base(),
+      (vec * 4 + chan) * 4 );
+}
+
+static struct x86_reg
+get_const(
+   unsigned vec,
+   unsigned chan )
+{
+   return x86_make_disp(
+      get_const_base(),
+      (vec * 4 + chan) * 4 );
+}
+
+static struct x86_reg
+get_input(
+   unsigned vec,
+   unsigned chan )
+{
+   return x86_make_disp(
+      get_input_base(),
+      (vec * 4 + chan) * 16 );
+}
+
+static struct x86_reg
+get_output(
+   unsigned vec,
+   unsigned chan )
+{
+   return x86_make_disp(
+      get_output_base(),
+      (vec * 4 + chan) * 16 );
+}
+
+static struct x86_reg
+get_temp(
+   unsigned vec,
+   unsigned chan )
+{
+   return x86_make_disp(
+      get_temp_base(),
+      (vec * 4 + chan) * 16 );
+}
+
+static struct x86_reg
+get_coef(
+   unsigned vec,
+   unsigned chan,
+   unsigned member )
+{
+   return x86_make_disp(
+      get_coef_base(),
+      ((vec * 3 + member) * 4 + chan) * 4 );
+}
+
+
+static void
+emit_ret(
+   struct x86_function  *func )
+{
+   x86_ret( func );
+}
+
+
+/**
+ * Data fetch helpers.
+ */
+
+/**
+ * Copy a shader constant to xmm register
+ * \param xmm  the destination xmm register
+ * \param vec  the src const buffer index
+ * \param chan  src channel to fetch (X, Y, Z or W)
+ */
+static void
+emit_const(
+   struct x86_function *func,
+   unsigned xmm,
+   unsigned vec,
+   unsigned chan )
+{
+   sse_movss(
+      func,
+      make_xmm( xmm ),
+      get_const( vec, chan ) );
+   sse_shufps(
+      func,
+      make_xmm( xmm ),
+      make_xmm( xmm ),
+      SHUF( 0, 0, 0, 0 ) );
+}
+
+static void
+emit_immediate(
+   struct x86_function *func,
+   unsigned xmm,
+   unsigned vec,
+   unsigned chan )
+{
+   sse_movss(
+      func,
+      make_xmm( xmm ),
+      get_immediate( vec, chan ) );
+   sse_shufps(
+      func,
+      make_xmm( xmm ),
+      make_xmm( xmm ),
+      SHUF( 0, 0, 0, 0 ) );
+}
+
+
+/**
+ * Copy a shader input to xmm register
+ * \param xmm  the destination xmm register
+ * \param vec  the src input attrib
+ * \param chan  src channel to fetch (X, Y, Z or W)
+ */
+static void
+emit_inputf(
+   struct x86_function *func,
+   unsigned xmm,
+   unsigned vec,
+   unsigned chan )
+{
+   sse_movups(
+      func,
+      make_xmm( xmm ),
+      get_input( vec, chan ) );
+}
+
+/**
+ * Store an xmm register to a shader output
+ * \param xmm  the source xmm register
+ * \param vec  the dest output attrib
+ * \param chan  src dest channel to store (X, Y, Z or W)
+ */
+static void
+emit_output(
+   struct x86_function *func,
+   unsigned xmm,
+   unsigned vec,
+   unsigned chan )
+{
+   sse_movups(
+      func,
+      get_output( vec, chan ),
+      make_xmm( xmm ) );
+}
+
+/**
+ * Copy a shader temporary to xmm register
+ * \param xmm  the destination xmm register
+ * \param vec  the src temp register
+ * \param chan  src channel to fetch (X, Y, Z or W)
+ */
+static void
+emit_tempf(
+   struct x86_function *func,
+   unsigned xmm,
+   unsigned vec,
+   unsigned chan )
+{
+   sse_movaps(
+      func,
+      make_xmm( xmm ),
+      get_temp( vec, chan ) );
+}
+
+/**
+ * Load an xmm register with an input attrib coefficient (a0, dadx or dady)
+ * \param xmm  the destination xmm register
+ * \param vec  the src input/attribute coefficient index
+ * \param chan  src channel to fetch (X, Y, Z or W)
+ * \param member  0=a0, 1=dadx, 2=dady
+ */
+static void
+emit_coef(
+   struct x86_function *func,
+   unsigned xmm,
+   unsigned vec,
+   unsigned chan,
+   unsigned member )
+{
+   sse_movss(
+      func,
+      make_xmm( xmm ),
+      get_coef( vec, chan, member ) );
+   sse_shufps(
+      func,
+      make_xmm( xmm ),
+      make_xmm( xmm ),
+      SHUF( 0, 0, 0, 0 ) );
+}
+
+/**
+ * Data store helpers.
+ */
+
+static void
+emit_inputs(
+   struct x86_function *func,
+   unsigned xmm,
+   unsigned vec,
+   unsigned chan )
+{
+   sse_movups(
+      func,
+      get_input( vec, chan ),
+      make_xmm( xmm ) );
+}
+
+static void
+emit_temps(
+   struct x86_function *func,
+   unsigned xmm,
+   unsigned vec,
+   unsigned chan )
+{
+   sse_movaps(
+      func,
+      get_temp( vec, chan ),
+      make_xmm( xmm ) );
+}
+
+static void
+emit_addrs(
+   struct x86_function *func,
+   unsigned xmm,
+   unsigned vec,
+   unsigned chan )
+{
+   emit_temps(
+      func,
+      xmm,
+      vec + TGSI_EXEC_NUM_TEMPS,
+      chan );
+}
+
+/**
+ * Coefficent fetch helpers.
+ */
+
+static void
+emit_coef_a0(
+   struct x86_function *func,
+   unsigned xmm,
+   unsigned vec,
+   unsigned chan )
+{
+   emit_coef(
+      func,
+      xmm,
+      vec,
+      chan,
+      0 );
+}
+
+static void
+emit_coef_dadx(
+   struct x86_function *func,
+   unsigned xmm,
+   unsigned vec,
+   unsigned chan )
+{
+   emit_coef(
+      func,
+      xmm,
+      vec,
+      chan,
+      1 );
+}
+
+static void
+emit_coef_dady(
+   struct x86_function *func,
+   unsigned xmm,
+   unsigned vec,
+   unsigned chan )
+{
+   emit_coef(
+      func,
+      xmm,
+      vec,
+      chan,
+      2 );
+}
+
+/**
+ * Function call helpers.
+ */
+
+static void
+emit_push_gp(
+   struct x86_function *func )
+{
+   x86_push(
+      func,
+      x86_make_reg( file_REG32, reg_AX) );
+   x86_push(
+      func,
+      x86_make_reg( file_REG32, reg_CX) );
+   x86_push(
+      func,
+      x86_make_reg( file_REG32, reg_DX) );
+}
+
+static void
+x86_pop_gp(
+   struct x86_function *func )
+{
+   /* Restore GP registers in a reverse order.
+    */
+   x86_pop(
+      func,
+      x86_make_reg( file_REG32, reg_DX) );
+   x86_pop(
+      func,
+      x86_make_reg( file_REG32, reg_CX) );
+   x86_pop(
+      func,
+      x86_make_reg( file_REG32, reg_AX) );
+}
+
+static void
+emit_func_call_dst(
+   struct x86_function *func,
+   unsigned xmm_dst,
+   void (PIPE_CDECL *code)() )
+{
+   sse_movaps(
+      func,
+      get_temp( TEMP_R0, 0 ),
+      make_xmm( xmm_dst ) );
+
+   emit_push_gp(
+      func );
+
+   {
+      struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX );
+
+      x86_lea(
+         func,
+         ecx,
+         get_temp( TEMP_R0, 0 ) );
+
+      x86_push( func, ecx );
+      x86_mov_reg_imm( func, ecx, (unsigned long) code );
+      x86_call( func, ecx );
+      x86_pop(func, ecx ); 
+   }
+
+
+   x86_pop_gp(
+      func );
+
+   sse_movaps(
+      func,
+      make_xmm( xmm_dst ),
+      get_temp( TEMP_R0, 0 ) );
+}
+
+static void
+emit_func_call_dst_src(
+   struct x86_function *func,
+   unsigned xmm_dst,
+   unsigned xmm_src,
+   void (PIPE_CDECL *code)() )
+{
+   sse_movaps(
+      func,
+      get_temp( TEMP_R0, 1 ),
+      make_xmm( xmm_src ) );
+
+   emit_func_call_dst(
+      func,
+      xmm_dst,
+      code );
+}
+
+/**
+ * Low-level instruction translators.
+ */
+
+static void
+emit_abs(
+   struct x86_function *func,
+   unsigned xmm )
+{
+   sse_andps(
+      func,
+      make_xmm( xmm ),
+      get_temp(
+         TGSI_EXEC_TEMP_7FFFFFFF_I,
+         TGSI_EXEC_TEMP_7FFFFFFF_C ) );
+}
+
+static void
+emit_add(
+   struct x86_function *func,
+   unsigned xmm_dst,
+   unsigned xmm_src )
+{
+   sse_addps(
+      func,
+      make_xmm( xmm_dst ),
+      make_xmm( xmm_src ) );
+}
+
+static void PIPE_CDECL
+cos4f(
+   float *store )
+{
+   const unsigned X = 0;
+
+   store[X + 0] = cosf( store[X + 0] );
+   store[X + 1] = cosf( store[X + 1] );
+   store[X + 2] = cosf( store[X + 2] );
+   store[X + 3] = cosf( store[X + 3] );
+}
+
+static void
+emit_cos(
+   struct x86_function *func,
+   unsigned xmm_dst )
+{
+   emit_func_call_dst(
+      func,
+      xmm_dst,
+      cos4f );
+}
+
+static void PIPE_CDECL
+ex24f(
+   float *store )
+{
+   const unsigned X = 0;
+
+   store[X + 0] = powf( 2.0f, store[X + 0] );
+   store[X + 1] = powf( 2.0f, store[X + 1] );
+   store[X + 2] = powf( 2.0f, store[X + 2] );
+   store[X + 3] = powf( 2.0f, store[X + 3] );
+}
+
+static void
+emit_ex2(
+   struct x86_function *func,
+   unsigned xmm_dst )
+{
+   emit_func_call_dst(
+      func,
+      xmm_dst,
+      ex24f );
+}
+
+static void
+emit_f2it(
+   struct x86_function *func,
+   unsigned xmm )
+{
+   sse2_cvttps2dq(
+      func,
+      make_xmm( xmm ),
+      make_xmm( xmm ) );
+}
+
+static void PIPE_CDECL
+flr4f(
+   float *store )
+{
+   const unsigned X = 0;
+
+   store[X + 0] = floorf( store[X + 0] );
+   store[X + 1] = floorf( store[X + 1] );
+   store[X + 2] = floorf( store[X + 2] );
+   store[X + 3] = floorf( store[X + 3] );
+}
+
+static void
+emit_flr(
+   struct x86_function *func,
+   unsigned xmm_dst )
+{
+   emit_func_call_dst(
+      func,
+      xmm_dst,
+      flr4f );
+}
+
+static void PIPE_CDECL
+frc4f(
+   float *store )
+{
+   const unsigned X = 0;
+
+   store[X + 0] -= floorf( store[X + 0] );
+   store[X + 1] -= floorf( store[X + 1] );
+   store[X + 2] -= floorf( store[X + 2] );
+   store[X + 3] -= floorf( store[X + 3] );
+}
+
+static void
+emit_frc(
+   struct x86_function *func,
+   unsigned xmm_dst )
+{
+   emit_func_call_dst(
+      func,
+      xmm_dst,
+      frc4f );
+}
+
+static void PIPE_CDECL
+lg24f(
+   float *store )
+{
+   const unsigned X = 0;
+
+   store[X + 0] = LOG2( store[X + 0] );
+   store[X + 1] = LOG2( store[X + 1] );
+   store[X + 2] = LOG2( store[X + 2] );
+   store[X + 3] = LOG2( store[X + 3] );
+}
+
+static void
+emit_lg2(
+   struct x86_function *func,
+   unsigned xmm_dst )
+{
+   emit_func_call_dst(
+      func,
+      xmm_dst,
+      lg24f );
+}
+
+static void
+emit_MOV(
+   struct x86_function *func,
+   unsigned xmm_dst,
+   unsigned xmm_src )
+{
+   sse_movups(
+      func,
+      make_xmm( xmm_dst ),
+      make_xmm( xmm_src ) );
+}
+
+static void
+emit_mul (struct x86_function *func,
+          unsigned xmm_dst,
+          unsigned xmm_src)
+{
+   sse_mulps(
+      func,
+      make_xmm( xmm_dst ),
+      make_xmm( xmm_src ) );
+}
+
+static void
+emit_neg(
+   struct x86_function *func,
+   unsigned xmm )
+{
+   sse_xorps(
+      func,
+      make_xmm( xmm ),
+      get_temp(
+         TGSI_EXEC_TEMP_80000000_I,
+         TGSI_EXEC_TEMP_80000000_C ) );
+}
+
+static void PIPE_CDECL
+pow4f(
+   float *store )
+{
+   const unsigned X = 0;
+
+   store[X + 0] = powf( store[X + 0], store[X + 4] );
+   store[X + 1] = powf( store[X + 1], store[X + 5] );
+   store[X + 2] = powf( store[X + 2], store[X + 6] );
+   store[X + 3] = powf( store[X + 3], store[X + 7] );
+}
+
+static void
+emit_pow(
+   struct x86_function *func,
+   unsigned xmm_dst,
+   unsigned xmm_src )
+{
+   emit_func_call_dst_src(
+      func,
+      xmm_dst,
+      xmm_src,
+      pow4f );
+}
+
+static void
+emit_rcp (
+   struct x86_function *func,
+   unsigned xmm_dst,
+   unsigned xmm_src )
+{
+   /* On Intel CPUs at least, this is only accurate to 12 bits -- not
+    * good enough.  Need to either emit a proper divide or use the
+    * iterative technique described below in emit_rsqrt().
+    */
+   sse2_rcpps(
+      func,
+      make_xmm( xmm_dst ),
+      make_xmm( xmm_src ) );
+}
+
+static void
+emit_rsqrt(
+   struct x86_function *func,
+   unsigned xmm_dst,
+   unsigned xmm_src )
+{
+#if HIGH_PRECISION
+   /* Although rsqrtps() and rcpps() are low precision on some/all SSE
+    * implementations, it is possible to improve its precision at
+    * fairly low cost, using a newton/raphson step, as below:
+    * 
+    * x1 = 2 * rcpps(a) - a * rcpps(a) * rcpps(a)
+    * x1 = 0.5 * rsqrtps(a) * [3.0 - (a * rsqrtps(a))* rsqrtps(a)]
+    *
+    * See: http://softwarecommunity.intel.com/articles/eng/1818.htm
+    */
+   {
+      struct x86_reg dst = make_xmm( xmm_dst );
+      struct x86_reg src = make_xmm( xmm_src );
+      struct x86_reg tmp0 = make_xmm( 2 );
+      struct x86_reg tmp1 = make_xmm( 3 );
+
+      assert( xmm_dst != xmm_src );
+      assert( xmm_dst != 2 && xmm_dst != 3 );
+      assert( xmm_src != 2 && xmm_src != 3 );
+
+      sse_movaps(  func, dst,  get_temp( TGSI_EXEC_TEMP_HALF_I, TGSI_EXEC_TEMP_HALF_C ) );
+      sse_movaps(  func, tmp0, get_temp( TGSI_EXEC_TEMP_THREE_I, TGSI_EXEC_TEMP_THREE_C ) );
+      sse_rsqrtps( func, tmp1, src  );
+      sse_mulps(   func, src,  tmp1 );
+      sse_mulps(   func, dst,  tmp1 );
+      sse_mulps(   func, src,  tmp1 );
+      sse_subps(   func, tmp0, src  );
+      sse_mulps(   func, dst,  tmp0 );
+   }
+#else
+   /* On Intel CPUs at least, this is only accurate to 12 bits -- not
+    * good enough.
+    */
+   sse_rsqrtps(
+      func,
+      make_xmm( xmm_dst ),
+      make_xmm( xmm_src ) );
+#endif
+}
+
+static void
+emit_setsign(
+   struct x86_function *func,
+   unsigned xmm )
+{
+   sse_orps(
+      func,
+      make_xmm( xmm ),
+      get_temp(
+         TGSI_EXEC_TEMP_80000000_I,
+         TGSI_EXEC_TEMP_80000000_C ) );
+}
+
+static void PIPE_CDECL
+sin4f(
+   float *store )
+{
+   const unsigned X = 0;
+
+   store[X + 0] = sinf( store[X + 0] );
+   store[X + 1] = sinf( store[X + 1] );
+   store[X + 2] = sinf( store[X + 2] );
+   store[X + 3] = sinf( store[X + 3] );
+}
+
+static void
+emit_sin (struct x86_function *func,
+          unsigned xmm_dst)
+{
+   emit_func_call_dst(
+      func,
+      xmm_dst,
+      sin4f );
+}
+
+static void
+emit_sub(
+   struct x86_function *func,
+   unsigned xmm_dst,
+   unsigned xmm_src )
+{
+   sse_subps(
+      func,
+      make_xmm( xmm_dst ),
+      make_xmm( xmm_src ) );
+}
+
+/**
+ * Register fetch.
+ */
+
+static void
+emit_fetch(
+   struct x86_function *func,
+   unsigned xmm,
+   const struct tgsi_full_src_register *reg,
+   const unsigned chan_index )
+{
+   unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
+
+   switch( swizzle ) {
+   case TGSI_EXTSWIZZLE_X:
+   case TGSI_EXTSWIZZLE_Y:
+   case TGSI_EXTSWIZZLE_Z:
+   case TGSI_EXTSWIZZLE_W:
+      switch( reg->SrcRegister.File ) {
+      case TGSI_FILE_CONSTANT:
+         emit_const(
+            func,
+            xmm,
+            reg->SrcRegister.Index,
+            swizzle );
+         break;
+
+      case TGSI_FILE_IMMEDIATE:
+         emit_immediate(
+            func,
+            xmm,
+            reg->SrcRegister.Index,
+            swizzle );
+         break;
+
+      case TGSI_FILE_INPUT:
+         emit_inputf(
+            func,
+            xmm,
+            reg->SrcRegister.Index,
+            swizzle );
+         break;
+
+      case TGSI_FILE_TEMPORARY:
+         emit_tempf(
+            func,
+            xmm,
+            reg->SrcRegister.Index,
+            swizzle );
+         break;
+
+      default:
+         assert( 0 );
+      }
+      break;
+
+   case TGSI_EXTSWIZZLE_ZERO:
+      emit_tempf(
+         func,
+         xmm,
+         TGSI_EXEC_TEMP_00000000_I,
+         TGSI_EXEC_TEMP_00000000_C );
+      break;
+
+   case TGSI_EXTSWIZZLE_ONE:
+      emit_tempf(
+         func,
+         xmm,
+         TGSI_EXEC_TEMP_ONE_I,
+         TGSI_EXEC_TEMP_ONE_C );
+      break;
+
+   default:
+      assert( 0 );
+   }
+
+   switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
+   case TGSI_UTIL_SIGN_CLEAR:
+      emit_abs( func, xmm );
+      break;
+
+   case TGSI_UTIL_SIGN_SET:
+      emit_setsign( func, xmm );
+      break;
+
+   case TGSI_UTIL_SIGN_TOGGLE:
+      emit_neg( func, xmm );
+      break;
+
+   case TGSI_UTIL_SIGN_KEEP:
+      break;
+   }
+}
+
+#define FETCH( FUNC, INST, XMM, INDEX, CHAN )\
+   emit_fetch( FUNC, XMM, &(INST).FullSrcRegisters[INDEX], CHAN )
+
+/**
+ * Register store.
+ */
+
+static void
+emit_store(
+   struct x86_function *func,
+   unsigned xmm,
+   const struct tgsi_full_dst_register *reg,
+   const struct tgsi_full_instruction *inst,
+   unsigned chan_index )
+{
+   switch( reg->DstRegister.File ) {
+   case TGSI_FILE_OUTPUT:
+      emit_output(
+         func,
+         xmm,
+         reg->DstRegister.Index,
+         chan_index );
+      break;
+
+   case TGSI_FILE_TEMPORARY:
+      emit_temps(
+         func,
+         xmm,
+         reg->DstRegister.Index,
+         chan_index );
+      break;
+
+   case TGSI_FILE_ADDRESS:
+      emit_addrs(
+         func,
+         xmm,
+         reg->DstRegister.Index,
+         chan_index );
+      break;
+
+   default:
+      assert( 0 );
+   }
+
+   switch( inst->Instruction.Saturate ) {
+   case TGSI_SAT_NONE:
+      break;
+
+   case TGSI_SAT_ZERO_ONE:
+      /* assert( 0 ); */
+      break;
+
+   case TGSI_SAT_MINUS_PLUS_ONE:
+      assert( 0 );
+      break;
+   }
+}
+
+#define STORE( FUNC, INST, XMM, INDEX, CHAN )\
+   emit_store( FUNC, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN )
+
+/**
+ * High-level instruction translators.
+ */
+
+static void
+emit_kil(
+   struct x86_function *func,
+   const struct tgsi_full_src_register *reg )
+{
+   unsigned uniquemask;
+   unsigned registers[4];
+   unsigned nextregister = 0;
+   unsigned firstchan = ~0;
+   unsigned chan_index;
+
+   /* This mask stores component bits that were already tested. Note that
+    * we test if the value is less than zero, so 1.0 and 0.0 need not to be
+    * tested. */
+   uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
+
+   FOR_EACH_CHANNEL( chan_index ) {
+      unsigned swizzle;
+
+      /* unswizzle channel */
+      swizzle = tgsi_util_get_full_src_register_extswizzle(
+         reg,
+         chan_index );
+
+      /* check if the component has not been already tested */
+      if( !(uniquemask & (1 << swizzle)) ) {
+         uniquemask |= 1 << swizzle;
+
+         /* allocate register */
+         registers[chan_index] = nextregister;
+         emit_fetch(
+            func,
+            nextregister,
+            reg,
+            chan_index );
+         nextregister++;
+
+         /* mark the first channel used */
+         if( firstchan == ~0 ) {
+            firstchan = chan_index;
+         }
+      }
+   }
+
+   x86_push(
+      func,
+      x86_make_reg( file_REG32, reg_AX ) );
+   x86_push(
+      func,
+      x86_make_reg( file_REG32, reg_DX ) );
+
+   FOR_EACH_CHANNEL( chan_index ) {
+      if( uniquemask & (1 << chan_index) ) {
+         sse_cmpps(
+            func,
+            make_xmm( registers[chan_index] ),
+            get_temp(
+               TGSI_EXEC_TEMP_00000000_I,
+               TGSI_EXEC_TEMP_00000000_C ),
+            cc_LessThan );
+
+         if( chan_index == firstchan ) {
+            sse_pmovmskb(
+               func,
+               x86_make_reg( file_REG32, reg_AX ),
+               make_xmm( registers[chan_index] ) );
+         }
+         else {
+            sse_pmovmskb(
+               func,
+               x86_make_reg( file_REG32, reg_DX ),
+               make_xmm( registers[chan_index] ) );
+            x86_or(
+               func,
+               x86_make_reg( file_REG32, reg_AX ),
+               x86_make_reg( file_REG32, reg_DX ) );
+         }
+      }
+   }
+
+   x86_or(
+      func,
+      get_temp(
+         TGSI_EXEC_TEMP_KILMASK_I,
+         TGSI_EXEC_TEMP_KILMASK_C ),
+      x86_make_reg( file_REG32, reg_AX ) );
+
+   x86_pop(
+      func,
+      x86_make_reg( file_REG32, reg_DX ) );
+   x86_pop(
+      func,
+      x86_make_reg( file_REG32, reg_AX ) );
+}
+
+static void
+emit_setcc(
+   struct x86_function *func,
+   struct tgsi_full_instruction *inst,
+   enum sse_cc cc )
+{
+   unsigned chan_index;
+
+   FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+      FETCH( func, *inst, 0, 0, chan_index );
+      FETCH( func, *inst, 1, 1, chan_index );
+      sse_cmpps(
+         func,
+         make_xmm( 0 ),
+         make_xmm( 1 ),
+         cc );
+      sse_andps(
+         func,
+         make_xmm( 0 ),
+         get_temp(
+            TGSI_EXEC_TEMP_ONE_I,
+            TGSI_EXEC_TEMP_ONE_C ) );
+      STORE( func, *inst, 0, 0, chan_index );
+   }
+}
+
+static void
+emit_cmp(
+   struct x86_function *func,
+   struct tgsi_full_instruction *inst )
+{
+   unsigned chan_index;
+
+   FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+      FETCH( func, *inst, 0, 0, chan_index );
+      FETCH( func, *inst, 1, 1, chan_index );
+      FETCH( func, *inst, 2, 2, chan_index );
+      sse_cmpps(
+         func,
+         make_xmm( 0 ),
+         get_temp(
+            TGSI_EXEC_TEMP_00000000_I,
+            TGSI_EXEC_TEMP_00000000_C ),
+         cc_LessThan );
+      sse_andps(
+         func,
+         make_xmm( 1 ),
+         make_xmm( 0 ) );
+      sse_andnps(
+         func,
+         make_xmm( 0 ),
+         make_xmm( 2 ) );
+      sse_orps(
+         func,
+         make_xmm( 0 ),
+         make_xmm( 1 ) );
+      STORE( func, *inst, 0, 0, chan_index );
+   }
+}
+
+static int
+emit_instruction(
+   struct x86_function *func,
+   struct tgsi_full_instruction *inst )
+{
+   unsigned chan_index;
+
+   switch( inst->Instruction.Opcode ) {
+   case TGSI_OPCODE_ARL:
+#if 0
+      /* XXX this isn't working properly (see glean vertProg1 test) */
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         emit_f2it( func, 0 );
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+#else
+      return 0;
+#endif
+      break;
+
+   case TGSI_OPCODE_MOV:
+   case TGSI_OPCODE_SWZ:
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_LIT:
+      if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
+          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
+         emit_tempf(
+            func,
+            0,
+            TGSI_EXEC_TEMP_ONE_I,
+            TGSI_EXEC_TEMP_ONE_C);
+         if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
+            STORE( func, *inst, 0, 0, CHAN_X );
+         }
+         if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
+            STORE( func, *inst, 0, 0, CHAN_W );
+         }
+      }
+      if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
+          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
+         if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
+            FETCH( func, *inst, 0, 0, CHAN_X );
+            sse_maxps(
+               func,
+               make_xmm( 0 ),
+               get_temp(
+                  TGSI_EXEC_TEMP_00000000_I,
+                  TGSI_EXEC_TEMP_00000000_C ) );
+            STORE( func, *inst, 0, 0, CHAN_Y );
+         }
+         if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
+            /* XMM[1] = SrcReg[0].yyyy */
+            FETCH( func, *inst, 1, 0, CHAN_Y );
+            /* XMM[1] = max(XMM[1], 0) */
+            sse_maxps(
+               func,
+               make_xmm( 1 ),
+               get_temp(
+                  TGSI_EXEC_TEMP_00000000_I,
+                  TGSI_EXEC_TEMP_00000000_C ) );
+            /* XMM[2] = SrcReg[0].wwww */
+            FETCH( func, *inst, 2, 0, CHAN_W );
+            /* XMM[2] = min(XMM[2], 128.0) */
+            sse_minps(
+               func,
+               make_xmm( 2 ),
+               get_temp(
+                  TGSI_EXEC_TEMP_128_I,
+                  TGSI_EXEC_TEMP_128_C ) );
+            /* XMM[2] = max(XMM[2], -128.0) */
+            sse_maxps(
+               func,
+               make_xmm( 2 ),
+               get_temp(
+                  TGSI_EXEC_TEMP_MINUS_128_I,
+                  TGSI_EXEC_TEMP_MINUS_128_C ) );
+            emit_pow( func, 1, 2 );
+            FETCH( func, *inst, 0, 0, CHAN_X );
+            sse_xorps(
+               func,
+               make_xmm( 2 ),
+               make_xmm( 2 ) );
+            sse_cmpps(
+               func,
+               make_xmm( 2 ),
+               make_xmm( 0 ),
+               cc_LessThanEqual );
+            sse_andps(
+               func,
+               make_xmm( 2 ),
+               make_xmm( 1 ) );
+            STORE( func, *inst, 2, 0, CHAN_Z );
+         }
+      }
+      break;
+
+   case TGSI_OPCODE_RCP:
+   /* TGSI_OPCODE_RECIP */
+      FETCH( func, *inst, 0, 0, CHAN_X );
+      emit_rcp( func, 0, 0 );
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_RSQ:
+   /* TGSI_OPCODE_RECIPSQRT */
+      FETCH( func, *inst, 0, 0, CHAN_X );
+      emit_rsqrt( func, 1, 0 );
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( func, *inst, 1, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_EXP:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_LOG:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_MUL:
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         FETCH( func, *inst, 1, 1, chan_index );
+         emit_mul( func, 0, 1 );
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_ADD:
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         FETCH( func, *inst, 1, 1, chan_index );
+         emit_add( func, 0, 1 );
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_DP3:
+   /* TGSI_OPCODE_DOT3 */
+      FETCH( func, *inst, 0, 0, CHAN_X );
+      FETCH( func, *inst, 1, 1, CHAN_X );
+      emit_mul( func, 0, 1 );
+      FETCH( func, *inst, 1, 0, CHAN_Y );
+      FETCH( func, *inst, 2, 1, CHAN_Y );
+      emit_mul( func, 1, 2 );
+      emit_add( func, 0, 1 );
+      FETCH( func, *inst, 1, 0, CHAN_Z );
+      FETCH( func, *inst, 2, 1, CHAN_Z );
+      emit_mul( func, 1, 2 );
+      emit_add( func, 0, 1 );
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_DP4:
+   /* TGSI_OPCODE_DOT4 */
+      FETCH( func, *inst, 0, 0, CHAN_X );
+      FETCH( func, *inst, 1, 1, CHAN_X );
+      emit_mul( func, 0, 1 );
+      FETCH( func, *inst, 1, 0, CHAN_Y );
+      FETCH( func, *inst, 2, 1, CHAN_Y );
+      emit_mul( func, 1, 2 );
+      emit_add( func, 0, 1 );
+      FETCH( func, *inst, 1, 0, CHAN_Z );
+      FETCH( func, *inst, 2, 1, CHAN_Z );
+      emit_mul(func, 1, 2 );
+      emit_add(func, 0, 1 );
+      FETCH( func, *inst, 1, 0, CHAN_W );
+      FETCH( func, *inst, 2, 1, CHAN_W );
+      emit_mul( func, 1, 2 );
+      emit_add( func, 0, 1 );
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_DST:
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
+         emit_tempf(
+            func,
+            0,
+            TGSI_EXEC_TEMP_ONE_I,
+            TGSI_EXEC_TEMP_ONE_C );
+         STORE( func, *inst, 0, 0, CHAN_X );
+      }
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
+         FETCH( func, *inst, 0, 0, CHAN_Y );
+         FETCH( func, *inst, 1, 1, CHAN_Y );
+         emit_mul( func, 0, 1 );
+         STORE( func, *inst, 0, 0, CHAN_Y );
+      }
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
+         FETCH( func, *inst, 0, 0, CHAN_Z );
+         STORE( func, *inst, 0, 0, CHAN_Z );
+      }
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
+         FETCH( func, *inst, 0, 1, CHAN_W );
+         STORE( func, *inst, 0, 0, CHAN_W );
+      }
+      break;
+
+   case TGSI_OPCODE_MIN:
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         FETCH( func, *inst, 1, 1, chan_index );
+         sse_minps(
+            func,
+            make_xmm( 0 ),
+            make_xmm( 1 ) );
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_MAX:
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         FETCH( func, *inst, 1, 1, chan_index );
+         sse_maxps(
+            func,
+            make_xmm( 0 ),
+            make_xmm( 1 ) );
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_SLT:
+   /* TGSI_OPCODE_SETLT */
+      emit_setcc( func, inst, cc_LessThan );
+      break;
+
+   case TGSI_OPCODE_SGE:
+   /* TGSI_OPCODE_SETGE */
+      emit_setcc( func, inst, cc_NotLessThan );
+      break;
+
+   case TGSI_OPCODE_MAD:
+   /* TGSI_OPCODE_MADD */
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         FETCH( func, *inst, 1, 1, chan_index );
+         FETCH( func, *inst, 2, 2, chan_index );
+         emit_mul( func, 0, 1 );
+         emit_add( func, 0, 2 );
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_SUB:
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         FETCH( func, *inst, 1, 1, chan_index );
+         emit_sub( func, 0, 1 );
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_LERP:
+   /* TGSI_OPCODE_LRP */
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         FETCH( func, *inst, 1, 1, chan_index );
+         FETCH( func, *inst, 2, 2, chan_index );
+         emit_sub( func, 1, 2 );
+         emit_mul( func, 0, 1 );
+         emit_add( func, 0, 2 );
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_CND:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_CND0:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_DOT2ADD:
+   /* TGSI_OPCODE_DP2A */
+      return 0;
+      break;
+
+   case TGSI_OPCODE_INDEX:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_NEGATE:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_FRAC:
+   /* TGSI_OPCODE_FRC */
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         emit_frc( func, 0 );
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_CLAMP:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_FLOOR:
+   /* TGSI_OPCODE_FLR */
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         emit_flr( func, 0 );
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_ROUND:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_EXPBASE2:
+   /* TGSI_OPCODE_EX2 */
+      FETCH( func, *inst, 0, 0, CHAN_X );
+      emit_ex2( func, 0 );
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_LOGBASE2:
+   /* TGSI_OPCODE_LG2 */
+      FETCH( func, *inst, 0, 0, CHAN_X );
+      emit_lg2( func, 0 );
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_POWER:
+   /* TGSI_OPCODE_POW */
+      FETCH( func, *inst, 0, 0, CHAN_X );
+      FETCH( func, *inst, 1, 1, CHAN_X );
+      emit_pow( func, 0, 1 );
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_CROSSPRODUCT:
+   /* TGSI_OPCODE_XPD */
+      if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
+          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
+         FETCH( func, *inst, 1, 1, CHAN_Z );
+         FETCH( func, *inst, 3, 0, CHAN_Z );
+      }
+      if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
+          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
+         FETCH( func, *inst, 0, 0, CHAN_Y );
+         FETCH( func, *inst, 4, 1, CHAN_Y );
+      }
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
+         emit_MOV( func, 2, 0 );
+         emit_mul( func, 2, 1 );
+         emit_MOV( func, 5, 3 );
+         emit_mul( func, 5, 4 );
+         emit_sub( func, 2, 5 );
+         STORE( func, *inst, 2, 0, CHAN_X );
+      }
+      if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
+          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
+         FETCH( func, *inst, 2, 1, CHAN_X );
+         FETCH( func, *inst, 5, 0, CHAN_X );
+      }
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
+         emit_mul( func, 3, 2 );
+         emit_mul( func, 1, 5 );
+         emit_sub( func, 3, 1 );
+         STORE( func, *inst, 3, 0, CHAN_Y );
+      }
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
+         emit_mul( func, 5, 4 );
+         emit_mul( func, 0, 2 );
+         emit_sub( func, 5, 0 );
+         STORE( func, *inst, 5, 0, CHAN_Z );
+      }
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
+	 emit_tempf(
+	    func,
+	    0,
+	    TGSI_EXEC_TEMP_ONE_I,
+	    TGSI_EXEC_TEMP_ONE_C );
+         STORE( func, *inst, 0, 0, CHAN_W );
+      }
+      break;
+
+   case TGSI_OPCODE_MULTIPLYMATRIX:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_ABS:
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         emit_abs( func, 0) ;
+
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_RCC:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_DPH:
+      FETCH( func, *inst, 0, 0, CHAN_X );
+      FETCH( func, *inst, 1, 1, CHAN_X );
+      emit_mul( func, 0, 1 );
+      FETCH( func, *inst, 1, 0, CHAN_Y );
+      FETCH( func, *inst, 2, 1, CHAN_Y );
+      emit_mul( func, 1, 2 );
+      emit_add( func, 0, 1 );
+      FETCH( func, *inst, 1, 0, CHAN_Z );
+      FETCH( func, *inst, 2, 1, CHAN_Z );
+      emit_mul( func, 1, 2 );
+      emit_add( func, 0, 1 );
+      FETCH( func, *inst, 1, 1, CHAN_W );
+      emit_add( func, 0, 1 );
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_COS:
+      FETCH( func, *inst, 0, 0, CHAN_X );
+      emit_cos( func, 0 );
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_DDX:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_DDY:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_KIL:
+      emit_kil( func, &inst->FullSrcRegisters[0] );
+      break;
+
+   case TGSI_OPCODE_PK2H:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_PK2US:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_PK4B:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_PK4UB:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_RFL:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_SEQ:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_SFL:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_SGT:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_SIN:
+      FETCH( func, *inst, 0, 0, CHAN_X );
+      emit_sin( func, 0 );
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_SLE:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_SNE:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_STR:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_TEX:
+      if (0) {
+	 /* Disable dummy texture code: 
+	  */
+	 emit_tempf(
+	    func,
+	    0,
+	    TGSI_EXEC_TEMP_ONE_I,
+	    TGSI_EXEC_TEMP_ONE_C );
+	 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+	    STORE( func, *inst, 0, 0, chan_index );
+	 }
+      }
+      else {
+	 return 0;
+      }
+      break;
+
+   case TGSI_OPCODE_TXD:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_UP2H:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_UP2US:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_UP4B:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_UP4UB:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_X2D:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_ARA:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_ARR:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_BRA:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_CAL:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_RET:
+      emit_ret( func );
+      break;
+
+   case TGSI_OPCODE_END:
+      break;
+
+   case TGSI_OPCODE_SSG:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_CMP:
+      emit_cmp (func, inst);
+      break;
+
+   case TGSI_OPCODE_SCS:
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
+         FETCH( func, *inst, 0, 0, CHAN_X );
+         emit_cos( func, 0 );
+         STORE( func, *inst, 0, 0, CHAN_X );
+      }
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
+         FETCH( func, *inst, 0, 0, CHAN_X );
+         emit_sin( func, 0 );
+         STORE( func, *inst, 0, 0, CHAN_Y );
+      }
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
+	 emit_tempf(
+	    func,
+	    0,
+	    TGSI_EXEC_TEMP_00000000_I,
+	    TGSI_EXEC_TEMP_00000000_C );
+         STORE( func, *inst, 0, 0, CHAN_Z );
+      }
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
+	 emit_tempf(
+	    func,
+	    0,
+	    TGSI_EXEC_TEMP_ONE_I,
+	    TGSI_EXEC_TEMP_ONE_C );
+         STORE( func, *inst, 0, 0, CHAN_W );
+      }
+      break;
+
+   case TGSI_OPCODE_TXB:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_NRM:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_DIV:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_DP2:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_TXL:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_BRK:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_IF:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_LOOP:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_REP:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_ELSE:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_ENDIF:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_ENDLOOP:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_ENDREP:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_PUSHA:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_POPA:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_CEIL:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_I2F:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_NOT:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_TRUNC:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_SHL:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_SHR:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_AND:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_OR:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_MOD:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_XOR:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_SAD:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_TXF:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_TXQ:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_CONT:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_EMIT:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_ENDPRIM:
+      return 0;
+      break;
+
+   default:
+      return 0;
+   }
+   
+   return 1;
+}
+
+static void
+emit_declaration(
+   struct x86_function *func,
+   struct tgsi_full_declaration *decl )
+{
+   if( decl->Declaration.File == TGSI_FILE_INPUT ) {
+      unsigned first, last, mask;
+      unsigned i, j;
+
+      first = decl->DeclarationRange.First;
+      last = decl->DeclarationRange.Last;
+      mask = decl->Declaration.UsageMask;
+
+      for( i = first; i <= last; i++ ) {
+         for( j = 0; j < NUM_CHANNELS; j++ ) {
+            if( mask & (1 << j) ) {
+               switch( decl->Declaration.Interpolate ) {
+               case TGSI_INTERPOLATE_CONSTANT:
+                  emit_coef_a0( func, 0, i, j );
+                  emit_inputs( func, 0, i, j );
+                  break;
+
+               case TGSI_INTERPOLATE_LINEAR:
+                  emit_tempf( func, 0, 0, TGSI_SWIZZLE_X );
+                  emit_coef_dadx( func, 1, i, j );
+                  emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y );
+                  emit_coef_dady( func, 3, i, j );
+                  emit_mul( func, 0, 1 );    /* x * dadx */
+                  emit_coef_a0( func, 4, i, j );
+                  emit_mul( func, 2, 3 );    /* y * dady */
+                  emit_add( func, 0, 4 );    /* x * dadx + a0 */
+                  emit_add( func, 0, 2 );    /* x * dadx + y * dady + a0 */
+                  emit_inputs( func, 0, i, j );
+                  break;
+
+               case TGSI_INTERPOLATE_PERSPECTIVE:
+                  emit_tempf( func, 0, 0, TGSI_SWIZZLE_X );
+                  emit_coef_dadx( func, 1, i, j );
+                  emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y );
+                  emit_coef_dady( func, 3, i, j );
+                  emit_mul( func, 0, 1 );    /* x * dadx */
+                  emit_tempf( func, 4, 0, TGSI_SWIZZLE_W );
+                  emit_coef_a0( func, 5, i, j );
+                  emit_rcp( func, 4, 4 );    /* 1.0 / w */
+                  emit_mul( func, 2, 3 );    /* y * dady */
+                  emit_add( func, 0, 5 );    /* x * dadx + a0 */
+                  emit_add( func, 0, 2 );    /* x * dadx + y * dady + a0 */
+                  emit_mul( func, 0, 4 );    /* (x * dadx + y * dady + a0) / w */
+                  emit_inputs( func, 0, i, j );
+                  break;
+
+               default:
+                  assert( 0 );
+		  break;
+               }
+            }
+         }
+      }
+   }
+}
+
+static void aos_to_soa( struct x86_function *func, 
+                        uint arg_aos,
+                        uint arg_soa, 
+                        uint arg_num, 
+                        uint arg_stride )
+{
+   struct x86_reg soa_input = x86_make_reg( file_REG32, reg_AX );
+   struct x86_reg aos_input = x86_make_reg( file_REG32, reg_BX );
+   struct x86_reg num_inputs = x86_make_reg( file_REG32, reg_CX );
+   struct x86_reg stride = x86_make_reg( file_REG32, reg_DX );
+   int inner_loop;
+
+
+   /* Save EBX */
+   x86_push( func, x86_make_reg( file_REG32, reg_BX ) );
+
+   x86_mov( func, aos_input,  x86_fn_arg( func, arg_aos ) );
+   x86_mov( func, soa_input,  x86_fn_arg( func, arg_soa ) );
+   x86_mov( func, num_inputs, x86_fn_arg( func, arg_num ) );
+   x86_mov( func, stride,     x86_fn_arg( func, arg_stride ) );
+
+   /* do */
+   inner_loop = x86_get_label( func );
+   {
+      x86_push( func, aos_input );
+      sse_movlps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) );
+      sse_movlps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) );
+      x86_add( func, aos_input, stride );
+      sse_movhps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) );
+      sse_movhps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) );
+      x86_add( func, aos_input, stride );
+      sse_movlps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) );
+      sse_movlps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) );
+      x86_add( func, aos_input, stride );
+      sse_movhps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) );
+      sse_movhps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) );
+      x86_pop( func, aos_input );
+
+      sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) );
+      sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) );
+      sse_shufps( func, make_xmm( 0 ), make_xmm( 1 ), 0x88 );
+      sse_shufps( func, make_xmm( 2 ), make_xmm( 1 ), 0xdd );
+      sse_shufps( func, make_xmm( 3 ), make_xmm( 4 ), 0x88 );
+      sse_shufps( func, make_xmm( 5 ), make_xmm( 4 ), 0xdd );
+
+      sse_movups( func, x86_make_disp( soa_input, 0 ), make_xmm( 0 ) );
+      sse_movups( func, x86_make_disp( soa_input, 16 ), make_xmm( 2 ) );
+      sse_movups( func, x86_make_disp( soa_input, 32 ), make_xmm( 3 ) );
+      sse_movups( func, x86_make_disp( soa_input, 48 ), make_xmm( 5 ) );
+
+      /* Advance to next input */
+      x86_lea( func, aos_input, x86_make_disp(aos_input, 16) );
+      x86_lea( func, soa_input, x86_make_disp(soa_input, 64) );
+   }
+   /* while --num_inputs */
+   x86_dec( func, num_inputs );
+   x86_jcc( func, cc_NE, inner_loop );
+
+   /* Restore EBX */
+   x86_pop( func, aos_input );
+}
+
+static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num, uint stride )
+{
+   struct x86_reg soa_output;
+   struct x86_reg aos_output;
+   struct x86_reg num_outputs;
+   struct x86_reg temp;
+   int inner_loop;
+
+   soa_output = x86_make_reg( file_REG32, reg_AX );
+   aos_output = x86_make_reg( file_REG32, reg_BX );
+   num_outputs = x86_make_reg( file_REG32, reg_CX );
+   temp = x86_make_reg( file_REG32, reg_DX );
+
+   /* Save EBX */
+   x86_push( func, aos_output );
+
+   x86_mov( func, soa_output, x86_fn_arg( func, soa ) );
+   x86_mov( func, aos_output, x86_fn_arg( func, aos ) );
+   x86_mov( func, num_outputs, x86_fn_arg( func, num ) );
+
+   /* do */
+   inner_loop = x86_get_label( func );
+   {
+      sse_movups( func, make_xmm( 0 ), x86_make_disp( soa_output, 0 ) );
+      sse_movups( func, make_xmm( 1 ), x86_make_disp( soa_output, 16 ) );
+      sse_movups( func, make_xmm( 3 ), x86_make_disp( soa_output, 32 ) );
+      sse_movups( func, make_xmm( 4 ), x86_make_disp( soa_output, 48 ) );
+
+      sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) );
+      sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) );
+      sse_unpcklps( func, make_xmm( 0 ), make_xmm( 1 ) );
+      sse_unpckhps( func, make_xmm( 2 ), make_xmm( 1 ) );
+      sse_unpcklps( func, make_xmm( 3 ), make_xmm( 4 ) );
+      sse_unpckhps( func, make_xmm( 5 ), make_xmm( 4 ) );
+
+      x86_mov( func, temp, x86_fn_arg( func, stride ) );
+      x86_push( func, aos_output );
+      sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) );
+      sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) );
+      x86_add( func, aos_output, temp );
+      sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) );
+      sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) );
+      x86_add( func, aos_output, temp );
+      sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) );
+      sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) );
+      x86_add( func, aos_output, temp );
+      sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) );
+      sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) );
+      x86_pop( func, aos_output );
+
+      /* Advance to next output */
+      x86_lea( func, aos_output, x86_make_disp(aos_output, 16) );
+      x86_lea( func, soa_output, x86_make_disp(soa_output, 64) );
+   }
+   /* while --num_outputs */
+   x86_dec( func, num_outputs );
+   x86_jcc( func, cc_NE, inner_loop );
+
+   /* Restore EBX */
+   x86_pop( func, aos_output );
+}
+
+/**
+ * Translate a TGSI vertex/fragment shader to SSE2 code.
+ * Slightly different things are done for vertex vs. fragment shaders.
+ *
+ * Note that fragment shaders are responsible for interpolating shader
+ * inputs. Because on x86 we have only 4 GP registers, and here we
+ * have 5 shader arguments (input, output, const, temp and coef), the
+ * code is split into two phases -- DECLARATION and INSTRUCTION phase.
+ * GP register holding the output argument is aliased with the coeff
+ * argument, as outputs are not needed in the DECLARATION phase.
+ *
+ * \param tokens  the TGSI input shader
+ * \param func  the output SSE code/function
+ * \param immediates  buffer to place immediates, later passed to SSE func
+ * \param return  1 for success, 0 if translation failed
+ */
+unsigned
+tgsi_emit_sse2(
+   const struct tgsi_token *tokens,
+   struct x86_function *func,
+   float (*immediates)[4],
+   boolean do_swizzles )
+{
+   struct tgsi_parse_context parse;
+   boolean instruction_phase = FALSE;
+   unsigned ok = 1;
+   uint num_immediates = 0;
+
+   func->csr = func->store;
+
+   tgsi_parse_init( &parse, tokens );
+
+   /* Can't just use EDI, EBX without save/restoring them:
+    */
+   x86_push(
+      func,
+      get_immediate_base() );
+
+   x86_push(
+      func,
+      get_temp_base() );
+
+
+   /*
+    * Different function args for vertex/fragment shaders:
+    */
+   if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
+      /* DECLARATION phase, do not load output argument. */
+      x86_mov(
+         func,
+         get_input_base(),
+         x86_fn_arg( func, 1 ) );
+      /* skipping outputs argument here */
+      x86_mov(
+         func,
+         get_const_base(),
+         x86_fn_arg( func, 3 ) );
+      x86_mov(
+         func,
+         get_temp_base(),
+         x86_fn_arg( func, 4 ) );
+      x86_mov(
+         func,
+         get_coef_base(),
+         x86_fn_arg( func, 5 ) );
+      x86_mov(
+         func,
+         get_immediate_base(),
+         x86_fn_arg( func, 6 ) );
+   }
+   else {
+      assert(parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX);
+
+      if (do_swizzles)
+         aos_to_soa( func, 
+                     6,         /* aos_input */
+                     1,         /* machine->input */
+                     7,         /* num_inputs */
+                     8 );       /* input_stride */
+
+      x86_mov(
+         func,
+         get_input_base(),
+         x86_fn_arg( func, 1 ) );
+      x86_mov(
+         func,
+         get_output_base(),
+         x86_fn_arg( func, 2 ) );
+      x86_mov(
+         func,
+         get_const_base(),
+         x86_fn_arg( func, 3 ) );
+      x86_mov(
+         func,
+         get_temp_base(),
+         x86_fn_arg( func, 4 ) );
+      x86_mov(
+         func,
+         get_immediate_base(),
+         x86_fn_arg( func, 5 ) );
+   }
+
+   while( !tgsi_parse_end_of_tokens( &parse ) && ok ) {
+      tgsi_parse_token( &parse );
+
+      switch( parse.FullToken.Token.Type ) {
+      case TGSI_TOKEN_TYPE_DECLARATION:
+         if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
+            emit_declaration(
+               func,
+               &parse.FullToken.FullDeclaration );
+         }
+         break;
+
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+         if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
+            if( !instruction_phase ) {
+               /* INSTRUCTION phase, overwrite coeff with output. */
+               instruction_phase = TRUE;
+               x86_mov(
+                  func,
+                  get_output_base(),
+                  x86_fn_arg( func, 2 ) );
+            }
+         }
+
+         ok = emit_instruction(
+            func,
+            &parse.FullToken.FullInstruction );
+
+	 if (!ok) {
+	    debug_printf("failed to translate tgsi opcode %d to SSE (%s)\n", 
+			 parse.FullToken.FullInstruction.Instruction.Opcode,
+                         parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ?
+                         "vertex shader" : "fragment shader");
+	 }
+         break;
+
+      case TGSI_TOKEN_TYPE_IMMEDIATE:
+         /* simply copy the immediate values into the next immediates[] slot */
+         {
+            const uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
+            uint i;
+            assert(size <= 4);
+            assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES);
+            for( i = 0; i < size; i++ ) {
+               immediates[num_immediates][i] =
+		  parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
+            }
+#if 0
+            debug_printf("SSE FS immediate[%d] = %f %f %f %f\n",
+                   num_immediates,
+                   immediates[num_immediates][0],
+                   immediates[num_immediates][1],
+                   immediates[num_immediates][2],
+                   immediates[num_immediates][3]);
+#endif
+            num_immediates++;
+         }
+         break;
+
+      default:
+	 ok = 0;
+         assert( 0 );
+      }
+   }
+
+   if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX) {
+      if (do_swizzles)
+         soa_to_aos( func, 9, 2, 10, 11 );
+   }
+
+   /* Can't just use EBX, EDI without save/restoring them:
+    */
+   x86_pop(
+      func,
+      get_temp_base() );
+
+   x86_pop(
+      func,
+      get_immediate_base() );
+
+   emit_ret( func );
+
+   tgsi_parse_free( &parse );
+
+   return ok;
+}
+
+#endif /* PIPE_ARCH_X86 */
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.h b/src/gallium/auxiliary/tgsi/tgsi_sse2.h
new file mode 100644
index 00000000000..af838b2a25b
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.h
@@ -0,0 +1,49 @@
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef TGSI_SSE2_H
+#define TGSI_SSE2_H
+
+#if defined __cplusplus
+extern "C" {
+#endif
+
+struct tgsi_token;
+struct x86_function;
+
+unsigned
+tgsi_emit_sse2(
+   const struct tgsi_token *tokens,
+   struct x86_function *function,
+   float (*immediates)[4],
+   boolean do_swizzles );
+
+#if defined __cplusplus
+}
+#endif
+
+#endif /* TGSI_SSE2_H */
diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c
new file mode 100644
index 00000000000..35cb3055bb2
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
@@ -0,0 +1,1221 @@
+/**************************************************************************
+ * 
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "pipe/p_debug.h"
+#include "tgsi_text.h"
+#include "tgsi_build.h"
+#include "tgsi_parse.h"
+#include "tgsi_sanity.h"
+#include "tgsi_util.h"
+
+static boolean is_alpha_underscore( const char *cur )
+{
+   return
+      (*cur >= 'a' && *cur <= 'z') ||
+      (*cur >= 'A' && *cur <= 'Z') ||
+      *cur == '_';
+}
+
+static boolean is_digit( const char *cur )
+{
+   return *cur >= '0' && *cur <= '9';
+}
+
+static boolean is_digit_alpha_underscore( const char *cur )
+{
+   return is_digit( cur ) || is_alpha_underscore( cur );
+}
+
+static boolean str_match_no_case( const char **pcur, const char *str )
+{
+   const char *cur = *pcur;
+
+   while (*str != '\0' && *str == toupper( *cur )) {
+      str++;
+      cur++;
+   }
+   if (*str == '\0') {
+      *pcur = cur;
+      return TRUE;
+   }
+   return FALSE;
+}
+
+/* Eat zero or more whitespaces.
+ */
+static void eat_opt_white( const char **pcur )
+{
+   while (**pcur == ' ' || **pcur == '\t' || **pcur == '\n')
+      (*pcur)++;
+}
+
+/* Eat one or more whitespaces.
+ * Return TRUE if at least one whitespace eaten.
+ */
+static boolean eat_white( const char **pcur )
+{
+   const char *cur = *pcur;
+
+   eat_opt_white( pcur );
+   return *pcur > cur;
+}
+
+/* Parse unsigned integer.
+ * No checks for overflow.
+ */
+static boolean parse_uint( const char **pcur, uint *val )
+{
+   const char *cur = *pcur;
+
+   if (is_digit( cur )) {
+      *val = *cur++ - '0';
+      while (is_digit( cur ))
+         *val = *val * 10 + *cur++ - '0';
+      *pcur = cur;
+      return TRUE;
+   }
+   return FALSE;
+}
+
+/* Parse floating point.
+ */
+static boolean parse_float( const char **pcur, float *val )
+{
+   const char *cur = *pcur;
+   boolean integral_part = FALSE;
+   boolean fractional_part = FALSE;
+
+   *val = (float) atof( cur );
+
+   if (*cur == '-' || *cur == '+')
+      cur++;
+   if (is_digit( cur )) {
+      cur++;
+      integral_part = TRUE;
+      while (is_digit( cur ))
+         cur++;
+   }
+   if (*cur == '.') {
+      cur++;
+      if (is_digit( cur )) {
+         cur++;
+         fractional_part = TRUE;
+         while (is_digit( cur ))
+            cur++;
+      }
+   }
+   if (!integral_part && !fractional_part)
+      return FALSE;
+   if (toupper( *cur ) == 'E') {
+      cur++;
+      if (*cur == '-' || *cur == '+')
+         cur++;
+      if (is_digit( cur )) {
+         cur++;
+         while (is_digit( cur ))
+            cur++;
+      }
+      else
+         return FALSE;
+   }
+   *pcur = cur;
+   return TRUE;
+}
+
+struct translate_ctx
+{
+   const char *text;
+   const char *cur;
+   struct tgsi_token *tokens;
+   struct tgsi_token *tokens_cur;
+   struct tgsi_token *tokens_end;
+   struct tgsi_header *header;
+};
+
+static void report_error( struct translate_ctx *ctx, const char *msg )
+{
+   debug_printf( "\nError: %s", msg );
+}
+
+/* Parse shader header.
+ * Return TRUE for one of the following headers.
+ *    FRAG1.1
+ *    GEOM1.1
+ *    VERT1.1
+ */
+static boolean parse_header( struct translate_ctx *ctx )
+{
+   uint processor;
+
+   if (str_match_no_case( &ctx->cur, "FRAG1.1" ))
+      processor = TGSI_PROCESSOR_FRAGMENT;
+   else if (str_match_no_case( &ctx->cur, "VERT1.1" ))
+      processor = TGSI_PROCESSOR_VERTEX;
+   else if (str_match_no_case( &ctx->cur, "GEOM1.1" ))
+      processor = TGSI_PROCESSOR_GEOMETRY;
+   else {
+      report_error( ctx, "Unknown header" );
+      return FALSE;
+   }
+
+   if (ctx->tokens_cur >= ctx->tokens_end)
+      return FALSE;
+   *(struct tgsi_version *) ctx->tokens_cur++ = tgsi_build_version();
+
+   if (ctx->tokens_cur >= ctx->tokens_end)
+      return FALSE;
+   ctx->header = (struct tgsi_header *) ctx->tokens_cur++;
+   *ctx->header = tgsi_build_header();
+
+   if (ctx->tokens_cur >= ctx->tokens_end)
+      return FALSE;
+   *(struct tgsi_processor *) ctx->tokens_cur++ = tgsi_build_processor( processor, ctx->header );
+
+   return TRUE;
+}
+
+static boolean parse_label( struct translate_ctx *ctx, uint *val )
+{
+   const char *cur = ctx->cur;
+
+   if (parse_uint( &cur, val )) {
+      eat_opt_white( &cur );
+      if (*cur == ':') {
+         cur++;
+         ctx->cur = cur;
+         return TRUE;
+      }
+   }
+   return FALSE;
+}
+
+static const char *file_names[TGSI_FILE_COUNT] =
+{
+   "NULL",
+   "CONST",
+   "IN",
+   "OUT",
+   "TEMP",
+   "SAMP",
+   "ADDR",
+   "IMM"
+};
+
+static boolean
+parse_file( const char **pcur, uint *file )
+{
+   uint i;
+
+   for (i = 0; i < TGSI_FILE_COUNT; i++) {
+      const char *cur = *pcur;
+
+      if (str_match_no_case( &cur, file_names[i] )) {
+         if (!is_digit_alpha_underscore( cur )) {
+            *pcur = cur;
+            *file = i;
+            return TRUE;
+         }
+      }
+   }
+   return FALSE;
+}
+
+static boolean
+parse_opt_writemask(
+   struct translate_ctx *ctx,
+   uint *writemask )
+{
+   const char *cur;
+
+   cur = ctx->cur;
+   eat_opt_white( &cur );
+   if (*cur == '.') {
+      cur++;
+      *writemask = TGSI_WRITEMASK_NONE;
+      eat_opt_white( &cur );
+      if (toupper( *cur ) == 'X') {
+         cur++;
+         *writemask |= TGSI_WRITEMASK_X;
+      }
+      if (toupper( *cur ) == 'Y') {
+         cur++;
+         *writemask |= TGSI_WRITEMASK_Y;
+      }
+      if (toupper( *cur ) == 'Z') {
+         cur++;
+         *writemask |= TGSI_WRITEMASK_Z;
+      }
+      if (toupper( *cur ) == 'W') {
+         cur++;
+         *writemask |= TGSI_WRITEMASK_W;
+      }
+
+      if (*writemask == TGSI_WRITEMASK_NONE) {
+         report_error( ctx, "Writemask expected" );
+         return FALSE;
+      }
+
+      ctx->cur = cur;
+   }
+   else {
+      *writemask = TGSI_WRITEMASK_XYZW;
+   }
+   return TRUE;
+}
+
+/* <register_file_bracket> ::= <file> `['
+ */
+static boolean
+parse_register_file_bracket(
+   struct translate_ctx *ctx,
+   uint *file )
+{
+   if (!parse_file( &ctx->cur, file )) {
+      report_error( ctx, "Unknown register file" );
+      return FALSE;
+   }
+   eat_opt_white( &ctx->cur );
+   if (*ctx->cur != '[') {
+      report_error( ctx, "Expected `['" );
+      return FALSE;
+   }
+   ctx->cur++;
+   return TRUE;
+}
+
+/* <register_file_bracket_index> ::= <register_file_bracket> <uint>
+ */
+static boolean
+parse_register_file_bracket_index(
+   struct translate_ctx *ctx,
+   uint *file,
+   int *index )
+{
+   uint uindex;
+
+   if (!parse_register_file_bracket( ctx, file ))
+      return FALSE;
+   eat_opt_white( &ctx->cur );
+   if (!parse_uint( &ctx->cur, &uindex )) {
+      report_error( ctx, "Expected literal unsigned integer" );
+      return FALSE;
+   }
+   *index = (int) uindex;
+   return TRUE;
+}
+
+/* Parse destination register operand.
+ *    <register_dst> ::= <register_file_bracket_index> `]'
+ */
+static boolean
+parse_register_dst(
+   struct translate_ctx *ctx,
+   uint *file,
+   int *index )
+{
+   if (!parse_register_file_bracket_index( ctx, file, index ))
+      return FALSE;
+   eat_opt_white( &ctx->cur );
+   if (*ctx->cur != ']') {
+      report_error( ctx, "Expected `]'" );
+      return FALSE;
+   }
+   ctx->cur++;
+   return TRUE;
+}
+
+/* Parse source register operand.
+ *    <register_src> ::= <register_file_bracket_index> `]' |
+ *                       <register_file_bracket> <register_dst> `]' |
+ *                       <register_file_bracket> <register_dst> `+' <uint> `]' |
+ *                       <register_file_bracket> <register_dst> `-' <uint> `]'
+ */
+static boolean
+parse_register_src(
+   struct translate_ctx *ctx,
+   uint *file,
+   int *index,
+   uint *ind_file,
+   int *ind_index )
+{
+   const char *cur;
+   uint uindex;
+
+   if (!parse_register_file_bracket( ctx, file ))
+      return FALSE;
+   eat_opt_white( &ctx->cur );
+   cur = ctx->cur;
+   if (parse_file( &cur, ind_file )) {
+      if (!parse_register_dst( ctx, ind_file, ind_index ))
+         return FALSE;
+      eat_opt_white( &ctx->cur );
+      if (*ctx->cur == '+' || *ctx->cur == '-') {
+         boolean negate;
+
+         negate = *ctx->cur == '-';
+         ctx->cur++;
+         eat_opt_white( &ctx->cur );
+         if (!parse_uint( &ctx->cur, &uindex )) {
+            report_error( ctx, "Expected literal unsigned integer" );
+            return FALSE;
+         }
+         if (negate)
+            *index = -(int) uindex;
+         else
+            *index = (int) uindex;
+      }
+      else {
+         *index = 0;
+      }
+   }
+   else {
+      if (!parse_uint( &ctx->cur, &uindex )) {
+         report_error( ctx, "Expected literal unsigned integer" );
+         return FALSE;
+      }
+      *index = (int) uindex;
+      *ind_file = TGSI_FILE_NULL;
+      *ind_index = 0;
+   }
+   eat_opt_white( &ctx->cur );
+   if (*ctx->cur != ']') {
+      report_error( ctx, "Expected `]'" );
+      return FALSE;
+   }
+   ctx->cur++;
+   return TRUE;
+}
+
+/* Parse register declaration.
+ *    <register_dcl> ::= <register_file_bracket_index> `]' |
+ *                       <register_file_bracket_index> `..' <index> `]'
+ */
+static boolean
+parse_register_dcl(
+   struct translate_ctx *ctx,
+   uint *file,
+   int *first,
+   int *last )
+{
+   if (!parse_register_file_bracket_index( ctx, file, first ))
+      return FALSE;
+   eat_opt_white( &ctx->cur );
+   if (ctx->cur[0] == '.' && ctx->cur[1] == '.') {
+      uint uindex;
+
+      ctx->cur += 2;
+      eat_opt_white( &ctx->cur );
+      if (!parse_uint( &ctx->cur, &uindex )) {
+         report_error( ctx, "Expected literal integer" );
+         return FALSE;
+      }
+      *last = (int) uindex;
+      eat_opt_white( &ctx->cur );
+   }
+   else {
+      *last = *first;
+   }
+   if (*ctx->cur != ']') {
+      report_error( ctx, "Expected `]' or `..'" );
+      return FALSE;
+   }
+   ctx->cur++;
+   return TRUE;
+}
+
+static const char *modulate_names[TGSI_MODULATE_COUNT] =
+{
+   "_1X",
+   "_2X",
+   "_4X",
+   "_8X",
+   "_D2",
+   "_D4",
+   "_D8"
+};
+
+static boolean
+parse_dst_operand(
+   struct translate_ctx *ctx,
+   struct tgsi_full_dst_register *dst )
+{
+   uint file;
+   int index;
+   uint writemask;
+   const char *cur;
+
+   if (!parse_register_dst( ctx, &file, &index ))
+      return FALSE;
+
+   cur = ctx->cur;
+   eat_opt_white( &cur );
+   if (*cur == '_') {
+      uint i;
+
+      for (i = 0; i < TGSI_MODULATE_COUNT; i++) {
+         if (str_match_no_case( &cur, modulate_names[i] )) {
+            if (!is_digit_alpha_underscore( cur )) {
+               dst->DstRegisterExtModulate.Modulate = i;
+               ctx->cur = cur;
+               break;
+            }
+         }
+      }
+   }
+
+   if (!parse_opt_writemask( ctx, &writemask ))
+      return FALSE;
+
+   dst->DstRegister.File = file;
+   dst->DstRegister.Index = index;
+   dst->DstRegister.WriteMask = writemask;
+   return TRUE;
+}
+
+static boolean
+parse_optional_swizzle(
+   struct translate_ctx *ctx,
+   uint swizzle[4],
+   boolean *parsed_swizzle,
+   boolean *parsed_extswizzle )
+{
+   const char *cur = ctx->cur;
+
+   *parsed_swizzle = FALSE;
+   *parsed_extswizzle = FALSE;
+
+   eat_opt_white( &cur );
+   if (*cur == '.') {
+      uint i;
+
+      cur++;
+      eat_opt_white( &cur );
+      for (i = 0; i < 4; i++) {
+         if (toupper( *cur ) == 'X')
+            swizzle[i] = TGSI_SWIZZLE_X;
+         else if (toupper( *cur ) == 'Y')
+            swizzle[i] = TGSI_SWIZZLE_Y;
+         else if (toupper( *cur ) == 'Z')
+            swizzle[i] = TGSI_SWIZZLE_Z;
+         else if (toupper( *cur ) == 'W')
+            swizzle[i] = TGSI_SWIZZLE_W;
+         else {
+            if (*cur == '0')
+               swizzle[i] = TGSI_EXTSWIZZLE_ZERO;
+            else if (*cur == '1')
+               swizzle[i] = TGSI_EXTSWIZZLE_ONE;
+            else {
+               report_error( ctx, "Expected register swizzle component `x', `y', `z', `w', `0' or `1'" );
+               return FALSE;
+            }
+            *parsed_extswizzle = TRUE;
+         }
+         cur++;
+      }
+      *parsed_swizzle = TRUE;
+      ctx->cur = cur;
+   }
+   return TRUE;
+}
+
+static boolean
+parse_src_operand(
+   struct translate_ctx *ctx,
+   struct tgsi_full_src_register *src )
+{
+   const char *cur;
+   float value;
+   uint file;
+   int index;
+   uint ind_file;
+   int ind_index;
+   uint swizzle[4];
+   boolean parsed_swizzle;
+   boolean parsed_extswizzle;
+
+   if (*ctx->cur == '-') {
+      cur = ctx->cur;
+      cur++;
+      eat_opt_white( &cur );
+      if (*cur == '(') {
+         cur++;
+         src->SrcRegisterExtMod.Negate = 1;
+         eat_opt_white( &cur );
+         ctx->cur = cur;
+      }
+   }
+
+   if (*ctx->cur == '|') {
+      ctx->cur++;
+      eat_opt_white( &ctx->cur );
+      src->SrcRegisterExtMod.Absolute = 1;
+   }
+
+   if (*ctx->cur == '-') {
+      ctx->cur++;
+      eat_opt_white( &ctx->cur );
+      src->SrcRegister.Negate = 1;
+   }
+
+   cur = ctx->cur;
+   if (parse_float( &cur, &value )) {
+      if (value == 2.0f) {
+         eat_opt_white( &cur );
+         if (*cur != '*') {
+            report_error( ctx, "Expected `*'" );
+            return FALSE;
+         }
+         cur++;
+         if (*cur != '(') {
+            report_error( ctx, "Expected `('" );
+            return FALSE;
+         }
+         cur++;
+         src->SrcRegisterExtMod.Scale2X = 1;
+         eat_opt_white( &cur );
+         ctx->cur = cur;
+      }
+   }
+
+   if (*ctx->cur == '(') {
+      ctx->cur++;
+      eat_opt_white( &ctx->cur );
+      src->SrcRegisterExtMod.Bias = 1;
+   }
+
+   cur = ctx->cur;
+   if (parse_float( &cur, &value )) {
+      if (value == 1.0f) {
+         eat_opt_white( &cur );
+         if (*cur != '-') {
+            report_error( ctx, "Expected `-'" );
+            return FALSE;
+         }
+         cur++;
+         if (*cur != '(') {
+            report_error( ctx, "Expected `('" );
+            return FALSE;
+         }
+         cur++;
+         src->SrcRegisterExtMod.Complement = 1;
+         eat_opt_white( &cur );
+         ctx->cur = cur;
+      }
+   }
+
+   if (!parse_register_src( ctx, &file, &index, &ind_file, &ind_index ))
+      return FALSE;
+   src->SrcRegister.File = file;
+   src->SrcRegister.Index = index;
+   if (ind_file != TGSI_FILE_NULL) {
+      src->SrcRegister.Indirect = 1;
+      src->SrcRegisterInd.File = ind_file;
+      src->SrcRegisterInd.Index = ind_index;
+   }
+
+   /* Parse optional swizzle.
+    */
+   if (parse_optional_swizzle( ctx, swizzle, &parsed_swizzle, &parsed_extswizzle )) {
+      if (parsed_extswizzle) {
+         assert( parsed_swizzle );
+
+         src->SrcRegisterExtSwz.ExtSwizzleX = swizzle[0];
+         src->SrcRegisterExtSwz.ExtSwizzleY = swizzle[1];
+         src->SrcRegisterExtSwz.ExtSwizzleZ = swizzle[2];
+         src->SrcRegisterExtSwz.ExtSwizzleW = swizzle[3];
+      }
+      else if (parsed_swizzle) {
+         src->SrcRegister.SwizzleX = swizzle[0];
+         src->SrcRegister.SwizzleY = swizzle[1];
+         src->SrcRegister.SwizzleZ = swizzle[2];
+         src->SrcRegister.SwizzleW = swizzle[3];
+      }
+   }
+
+   if (src->SrcRegisterExtMod.Complement) {
+      eat_opt_white( &ctx->cur );
+      if (*ctx->cur != ')') {
+         report_error( ctx, "Expected `)'" );
+         return FALSE;
+      }
+      ctx->cur++;
+   }
+
+   if (src->SrcRegisterExtMod.Bias) {
+      eat_opt_white( &ctx->cur );
+      if (*ctx->cur != ')') {
+         report_error( ctx, "Expected `)'" );
+         return FALSE;
+      }
+      ctx->cur++;
+      eat_opt_white( &ctx->cur );
+      if (*ctx->cur != '-') {
+         report_error( ctx, "Expected `-'" );
+         return FALSE;
+      }
+      ctx->cur++;
+      eat_opt_white( &ctx->cur );
+      if (!parse_float( &ctx->cur, &value )) {
+         report_error( ctx, "Expected literal floating point" );
+         return FALSE;
+      }
+      if (value != 0.5f) {
+         report_error( ctx, "Expected 0.5" );
+         return FALSE;
+      }
+   }
+
+   if (src->SrcRegisterExtMod.Scale2X) {
+      eat_opt_white( &ctx->cur );
+      if (*ctx->cur != ')') {
+         report_error( ctx, "Expected `)'" );
+         return FALSE;
+      }
+      ctx->cur++;
+   }
+
+   if (src->SrcRegisterExtMod.Absolute) {
+      eat_opt_white( &ctx->cur );
+      if (*ctx->cur != '|') {
+         report_error( ctx, "Expected `|'" );
+         return FALSE;
+      }
+      ctx->cur++;
+   }
+
+   if (src->SrcRegisterExtMod.Negate) {
+      eat_opt_white( &ctx->cur );
+      if (*ctx->cur != ')') {
+         report_error( ctx, "Expected `)'" );
+         return FALSE;
+      }
+      ctx->cur++;
+   }
+
+   return TRUE;
+}
+
+struct opcode_info
+{
+   uint num_dst;
+   uint num_src;
+   uint is_tex;
+   uint is_branch;
+   const char *mnemonic;
+};
+
+static const struct opcode_info opcode_info[TGSI_OPCODE_LAST] =
+{
+   { 1, 1, 0, 0, "ARL" },
+   { 1, 1, 0, 0, "MOV" },
+   { 1, 1, 0, 0, "LIT" },
+   { 1, 1, 0, 0, "RCP" },
+   { 1, 1, 0, 0, "RSQ" },
+   { 1, 1, 0, 0, "EXP" },
+   { 1, 1, 0, 0, "LOG" },
+   { 1, 2, 0, 0, "MUL" },
+   { 1, 2, 0, 0, "ADD" },
+   { 1, 2, 0, 0, "DP3" },
+   { 1, 2, 0, 0, "DP4" },
+   { 1, 2, 0, 0, "DST" },
+   { 1, 2, 0, 0, "MIN" },
+   { 1, 2, 0, 0, "MAX" },
+   { 1, 2, 0, 0, "SLT" },
+   { 1, 2, 0, 0, "SGE" },
+   { 1, 3, 0, 0, "MAD" },
+   { 1, 2, 0, 0, "SUB" },
+   { 1, 3, 0, 0, "LERP" },
+   { 1, 3, 0, 0, "CND" },
+   { 1, 3, 0, 0, "CND0" },
+   { 1, 3, 0, 0, "DOT2ADD" },
+   { 1, 2, 0, 0, "INDEX" },
+   { 1, 1, 0, 0, "NEGATE" },
+   { 1, 1, 0, 0, "FRAC" },
+   { 1, 3, 0, 0, "CLAMP" },
+   { 1, 1, 0, 0, "FLOOR" },
+   { 1, 1, 0, 0, "ROUND" },
+   { 1, 1, 0, 0, "EXPBASE2" },
+   { 1, 1, 0, 0, "LOGBASE2" },
+   { 1, 2, 0, 0, "POWER" },
+   { 1, 2, 0, 0, "CROSSPRODUCT" },
+   { 1, 2, 0, 0, "MULTIPLYMATRIX" },
+   { 1, 1, 0, 0, "ABS" },
+   { 1, 1, 0, 0, "RCC" },
+   { 1, 2, 0, 0, "DPH" },
+   { 1, 1, 0, 0, "COS" },
+   { 1, 1, 0, 0, "DDX" },
+   { 1, 1, 0, 0, "DDY" },
+   { 0, 1, 0, 0, "KILP" },
+   { 1, 1, 0, 0, "PK2H" },
+   { 1, 1, 0, 0, "PK2US" },
+   { 1, 1, 0, 0, "PK4B" },
+   { 1, 1, 0, 0, "PK4UB" },
+   { 1, 2, 0, 0, "RFL" },
+   { 1, 2, 0, 0, "SEQ" },
+   { 1, 2, 0, 0, "SFL" },
+   { 1, 2, 0, 0, "SGT" },
+   { 1, 1, 0, 0, "SIN" },
+   { 1, 2, 0, 0, "SLE" },
+   { 1, 2, 0, 0, "SNE" },
+   { 1, 2, 0, 0, "STR" },
+   { 1, 2, 1, 0, "TEX" },
+   { 1, 4, 1, 0, "TXD" },
+   { 1, 2, 1, 0, "TXP" },
+   { 1, 1, 0, 0, "UP2H" },
+   { 1, 1, 0, 0, "UP2US" },
+   { 1, 1, 0, 0, "UP4B" },
+   { 1, 1, 0, 0, "UP4UB" },
+   { 1, 3, 0, 0, "X2D" },
+   { 1, 1, 0, 0, "ARA" },
+   { 1, 1, 0, 0, "ARR" },
+   { 0, 1, 0, 0, "BRA" },
+   { 0, 0, 0, 1, "CAL" },
+   { 0, 0, 0, 0, "RET" },
+   { 1, 1, 0, 0, "SSG" },
+   { 1, 3, 0, 0, "CMP" },
+   { 1, 1, 0, 0, "SCS" },
+   { 1, 2, 1, 0, "TXB" },
+   { 1, 1, 0, 0, "NRM" },
+   { 1, 2, 0, 0, "DIV" },
+   { 1, 2, 0, 0, "DP2" },
+   { 1, 2, 1, 0, "TXL" },
+   { 0, 0, 0, 0, "BRK" },
+   { 0, 1, 0, 1, "IF" },
+   { 0, 0, 0, 0, "LOOP" },
+   { 0, 1, 0, 0, "REP" },
+   { 0, 0, 0, 1, "ELSE" },
+   { 0, 0, 0, 0, "ENDIF" },
+   { 0, 0, 0, 0, "ENDLOOP" },
+   { 0, 0, 0, 0, "ENDREP" },
+   { 0, 1, 0, 0, "PUSHA" },
+   { 1, 0, 0, 0, "POPA" },
+   { 1, 1, 0, 0, "CEIL" },
+   { 1, 1, 0, 0, "I2F" },
+   { 1, 1, 0, 0, "NOT" },
+   { 1, 1, 0, 0, "TRUNC" },
+   { 1, 2, 0, 0, "SHL" },
+   { 1, 2, 0, 0, "SHR" },
+   { 1, 2, 0, 0, "AND" },
+   { 1, 2, 0, 0, "OR" },
+   { 1, 2, 0, 0, "MOD" },
+   { 1, 2, 0, 0, "XOR" },
+   { 1, 3, 0, 0, "SAD" },
+   { 1, 2, 1, 0, "TXF" },
+   { 1, 2, 1, 0, "TXQ" },
+   { 0, 0, 0, 0, "CONT" },
+   { 0, 0, 0, 0, "EMIT" },
+   { 0, 0, 0, 0, "ENDPRIM" },
+   { 0, 0, 0, 1, "BGNLOOP2" },
+   { 0, 0, 0, 0, "BGNSUB" },
+   { 0, 0, 0, 1, "ENDLOOP2" },
+   { 0, 0, 0, 0, "ENDSUB" },
+   { 1, 1, 0, 0, "NOISE1" },
+   { 1, 1, 0, 0, "NOISE2" },
+   { 1, 1, 0, 0, "NOISE3" },
+   { 1, 1, 0, 0, "NOISE4" },
+   { 0, 0, 0, 0, "NOP" },
+   { 1, 2, 0, 0, "M4X3" },
+   { 1, 2, 0, 0, "M3X4" },
+   { 1, 2, 0, 0, "M3X3" },
+   { 1, 2, 0, 0, "M3X2" },
+   { 1, 1, 0, 0, "NRM4" },
+   { 0, 1, 0, 0, "CALLNZ" },
+   { 0, 1, 0, 0, "IFC" },
+   { 0, 1, 0, 0, "BREAKC" },
+   { 0, 0, 0, 0, "KIL" },
+   { 0, 0, 0, 0, "END" },
+   { 1, 1, 0, 0, "SWZ" }
+};
+
+static const char *texture_names[TGSI_TEXTURE_COUNT] =
+{
+   "UNKNOWN",
+   "1D",
+   "2D",
+   "3D",
+   "CUBE",
+   "RECT",
+   "SHADOW1D",
+   "SHADOW2D",
+   "SHADOWRECT"
+};
+
+static boolean
+parse_instruction(
+   struct translate_ctx *ctx,
+   boolean has_label )
+{
+   uint i;
+   uint saturate = TGSI_SAT_NONE;
+   const struct opcode_info *info;
+   struct tgsi_full_instruction inst;
+   uint advance;
+
+   /* Parse instruction name.
+    */
+   eat_opt_white( &ctx->cur );
+   for (i = 0; i < TGSI_OPCODE_LAST; i++) {
+      const char *cur = ctx->cur;
+
+      info = &opcode_info[i];
+      if (str_match_no_case( &cur, info->mnemonic )) {
+         if (str_match_no_case( &cur, "_SATNV" ))
+            saturate = TGSI_SAT_MINUS_PLUS_ONE;
+         else if (str_match_no_case( &cur, "_SAT" ))
+            saturate = TGSI_SAT_ZERO_ONE;
+
+         if (info->num_dst + info->num_src + info->is_tex == 0) {
+            if (!is_digit_alpha_underscore( cur )) {
+               ctx->cur = cur;
+               break;
+            }
+         }
+         else if (*cur == '\0' || eat_white( &cur )) {
+            ctx->cur = cur;
+            break;
+         }
+      }
+   }
+   if (i == TGSI_OPCODE_LAST) {
+      if (has_label)
+         report_error( ctx, "Unknown opcode" );
+      else
+         report_error( ctx, "Expected `DCL', `IMM' or a label" );
+      return FALSE;
+   }
+
+   inst = tgsi_default_full_instruction();
+   inst.Instruction.Opcode = i;
+   inst.Instruction.Saturate = saturate;
+   inst.Instruction.NumDstRegs = info->num_dst;
+   inst.Instruction.NumSrcRegs = info->num_src;
+
+   /* Parse instruction operands.
+    */
+   for (i = 0; i < info->num_dst + info->num_src + info->is_tex; i++) {
+      if (i > 0) {
+         eat_opt_white( &ctx->cur );
+         if (*ctx->cur != ',') {
+            report_error( ctx, "Expected `,'" );
+            return FALSE;
+         }
+         ctx->cur++;
+         eat_opt_white( &ctx->cur );
+      }
+
+      if (i < info->num_dst) {
+         if (!parse_dst_operand( ctx, &inst.FullDstRegisters[i] ))
+            return FALSE;
+      }
+      else if (i < info->num_dst + info->num_src) {
+         if (!parse_src_operand( ctx, &inst.FullSrcRegisters[i - info->num_dst] ))
+            return FALSE;
+      }
+      else {
+         uint j;
+
+         for (j = 0; j < TGSI_TEXTURE_COUNT; j++) {
+            if (str_match_no_case( &ctx->cur, texture_names[j] )) {
+               if (!is_digit_alpha_underscore( ctx->cur )) {
+                  inst.InstructionExtTexture.Texture = j;
+                  break;
+               }
+            }
+         }
+         if (j == TGSI_TEXTURE_COUNT) {
+            report_error( ctx, "Expected texture target" );
+            return FALSE;
+         }
+      }
+   }
+
+   if (info->is_branch) {
+      uint target;
+
+      eat_opt_white( &ctx->cur );
+      if (*ctx->cur != ':') {
+         report_error( ctx, "Expected `:'" );
+         return FALSE;
+      }
+      ctx->cur++;
+      eat_opt_white( &ctx->cur );
+      if (!parse_uint( &ctx->cur, &target )) {
+         report_error( ctx, "Expected a label" );
+         return FALSE;
+      }
+      inst.InstructionExtLabel.Label = target;
+   }
+
+   advance = tgsi_build_full_instruction(
+      &inst,
+      ctx->tokens_cur,
+      ctx->header,
+      (uint) (ctx->tokens_end - ctx->tokens_cur) );
+   if (advance == 0)
+      return FALSE;
+   ctx->tokens_cur += advance;
+
+   return TRUE;
+}
+
+static const char *semantic_names[TGSI_SEMANTIC_COUNT] =
+{
+   "POSITION",
+   "COLOR",
+   "BCOLOR",
+   "FOG",
+   "PSIZE",
+   "GENERIC",
+   "NORMAL"
+};
+
+static const char *interpolate_names[TGSI_INTERPOLATE_COUNT] =
+{
+   "CONSTANT",
+   "LINEAR",
+   "PERSPECTIVE"
+};
+
+static boolean parse_declaration( struct translate_ctx *ctx )
+{
+   struct tgsi_full_declaration decl;
+   uint file;
+   int first;
+   int last;
+   uint writemask;
+   const char *cur;
+   uint advance;
+
+   if (!eat_white( &ctx->cur )) {
+      report_error( ctx, "Syntax error" );
+      return FALSE;
+   }
+   if (!parse_register_dcl( ctx, &file, &first, &last ))
+      return FALSE;
+   if (!parse_opt_writemask( ctx, &writemask ))
+      return FALSE;
+
+   decl = tgsi_default_full_declaration();
+   decl.Declaration.File = file;
+   decl.Declaration.UsageMask = writemask;
+   decl.DeclarationRange.First = first;
+   decl.DeclarationRange.Last = last;
+
+   cur = ctx->cur;
+   eat_opt_white( &cur );
+   if (*cur == ',') {
+      uint i;
+
+      cur++;
+      eat_opt_white( &cur );
+      for (i = 0; i < TGSI_SEMANTIC_COUNT; i++) {
+         if (str_match_no_case( &cur, semantic_names[i] )) {
+            const char *cur2 = cur;
+            uint index;
+
+            if (is_digit_alpha_underscore( cur ))
+               continue;
+            eat_opt_white( &cur2 );
+            if (*cur2 == '[') {
+               cur2++;
+               eat_opt_white( &cur2 );
+               if (!parse_uint( &cur2, &index )) {
+                  report_error( ctx, "Expected literal integer" );
+                  return FALSE;
+               }
+               eat_opt_white( &cur2 );
+               if (*cur2 != ']') {
+                  report_error( ctx, "Expected `]'" );
+                  return FALSE;
+               }
+               cur2++;
+
+               decl.Semantic.SemanticIndex = index;
+
+               cur = cur2;
+            }
+
+            decl.Declaration.Semantic = 1;
+            decl.Semantic.SemanticName = i;
+
+            ctx->cur = cur;
+            break;
+         }
+      }
+   }
+
+   cur = ctx->cur;
+   eat_opt_white( &cur );
+   if (*cur == ',') {
+      uint i;
+
+      cur++;
+      eat_opt_white( &cur );
+      for (i = 0; i < TGSI_INTERPOLATE_COUNT; i++) {
+         if (str_match_no_case( &cur, interpolate_names[i] )) {
+            if (is_digit_alpha_underscore( cur ))
+               continue;
+            decl.Declaration.Interpolate = i;
+
+            ctx->cur = cur;
+            break;
+         }
+      }
+      if (i == TGSI_INTERPOLATE_COUNT) {
+         report_error( ctx, "Expected semantic or interpolate attribute" );
+         return FALSE;
+      }
+   }
+
+   advance = tgsi_build_full_declaration(
+      &decl,
+      ctx->tokens_cur,
+      ctx->header,
+      (uint) (ctx->tokens_end - ctx->tokens_cur) );
+   if (advance == 0)
+      return FALSE;
+   ctx->tokens_cur += advance;
+
+   return TRUE;
+}
+
+static boolean parse_immediate( struct translate_ctx *ctx )
+{
+   struct tgsi_full_immediate imm;
+   uint i;
+   float values[4];
+   uint advance;
+
+   if (!eat_white( &ctx->cur )) {
+      report_error( ctx, "Syntax error" );
+      return FALSE;
+   }
+   if (!str_match_no_case( &ctx->cur, "FLT32" ) || is_digit_alpha_underscore( ctx->cur )) {
+      report_error( ctx, "Expected `FLT32'" );
+      return FALSE;
+   }
+   eat_opt_white( &ctx->cur );
+   if (*ctx->cur != '{') {
+      report_error( ctx, "Expected `{'" );
+      return FALSE;
+   }
+   ctx->cur++;
+   for (i = 0; i < 4; i++) {
+      eat_opt_white( &ctx->cur );
+      if (i > 0) {
+         if (*ctx->cur != ',') {
+            report_error( ctx, "Expected `,'" );
+            return FALSE;
+         }
+         ctx->cur++;
+         eat_opt_white( &ctx->cur );
+      }
+      if (!parse_float( &ctx->cur, &values[i] )) {
+         report_error( ctx, "Expected literal floating point" );
+         return FALSE;
+      }
+   }
+   eat_opt_white( &ctx->cur );
+   if (*ctx->cur != '}') {
+      report_error( ctx, "Expected `}'" );
+      return FALSE;
+   }
+   ctx->cur++;
+
+   imm = tgsi_default_full_immediate();
+   imm.Immediate.Size += 4;
+   imm.Immediate.DataType = TGSI_IMM_FLOAT32;
+   imm.u.Pointer = values;
+
+   advance = tgsi_build_full_immediate(
+      &imm,
+      ctx->tokens_cur,
+      ctx->header,
+      (uint) (ctx->tokens_end - ctx->tokens_cur) );
+   if (advance == 0)
+      return FALSE;
+   ctx->tokens_cur += advance;
+
+   return TRUE;
+}
+
+static boolean translate( struct translate_ctx *ctx )
+{
+   eat_opt_white( &ctx->cur );
+   if (!parse_header( ctx ))
+      return FALSE;
+
+   while (*ctx->cur != '\0') {
+      uint label_val = 0;
+
+      if (!eat_white( &ctx->cur )) {
+         report_error( ctx, "Syntax error" );
+         return FALSE;
+      }
+
+      if (*ctx->cur == '\0')
+         break;
+
+      if (parse_label( ctx, &label_val )) {
+         if (!parse_instruction( ctx, TRUE ))
+            return FALSE;
+      }
+      else if (str_match_no_case( &ctx->cur, "DCL" )) {
+         if (!parse_declaration( ctx ))
+            return FALSE;
+      }
+      else if (str_match_no_case( &ctx->cur, "IMM" )) {
+         if (!parse_immediate( ctx ))
+            return FALSE;
+      }
+      else if (!parse_instruction( ctx, FALSE )) {
+         return FALSE;
+      }
+   }
+
+   return TRUE;
+}
+
+boolean
+tgsi_text_translate(
+   const char *text,
+   struct tgsi_token *tokens,
+   uint num_tokens )
+{
+   struct translate_ctx ctx;
+
+   ctx.text = text;
+   ctx.cur = text;
+   ctx.tokens = tokens;
+   ctx.tokens_cur = tokens;
+   ctx.tokens_end = tokens + num_tokens;
+
+   if (!translate( &ctx ))
+      return FALSE;
+
+   return tgsi_sanity_check( tokens );
+}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.h b/src/gallium/auxiliary/tgsi/tgsi_text.h
new file mode 100644
index 00000000000..8eeeeef1402
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_text.h
@@ -0,0 +1,47 @@
+/**************************************************************************
+ * 
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef TGSI_TEXT_H
+#define TGSI_TEXT_H
+
+#include "pipe/p_shader_tokens.h"
+
+#if defined __cplusplus
+extern "C" {
+#endif
+
+boolean
+tgsi_text_translate(
+   const char *text,
+   struct tgsi_token *tokens,
+   uint num_tokens );
+
+#if defined __cplusplus
+}
+#endif
+
+#endif /* TGSI_TEXT_H */
diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.c b/src/gallium/auxiliary/tgsi/tgsi_transform.c
new file mode 100644
index 00000000000..357f77b05a6
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_transform.c
@@ -0,0 +1,199 @@
+/**************************************************************************
+ * 
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+/**
+ * TGSI program transformation utility.
+ *
+ * Authors:  Brian Paul
+ */
+
+
+#include "tgsi_transform.h"
+
+
+
+static void
+emit_instruction(struct tgsi_transform_context *ctx,
+                 const struct tgsi_full_instruction *inst)
+{
+   uint ti = ctx->ti;
+
+   ti += tgsi_build_full_instruction(inst,
+                                     ctx->tokens_out + ti,
+                                     ctx->header,
+                                     ctx->max_tokens_out - ti);
+   ctx->ti = ti;
+}
+
+
+static void
+emit_declaration(struct tgsi_transform_context *ctx,
+                 const struct tgsi_full_declaration *decl)
+{
+   uint ti = ctx->ti;
+
+   ti += tgsi_build_full_declaration(decl,
+                                     ctx->tokens_out + ti,
+                                     ctx->header,
+                                     ctx->max_tokens_out - ti);
+   ctx->ti = ti;
+}
+
+
+static void
+emit_immediate(struct tgsi_transform_context *ctx,
+               const struct tgsi_full_immediate *imm)
+{
+   uint ti = ctx->ti;
+
+   ti += tgsi_build_full_immediate(imm,
+                                   ctx->tokens_out + ti,
+                                   ctx->header,
+                                   ctx->max_tokens_out - ti);
+   ctx->ti = ti;
+}
+
+
+
+/**
+ * Apply user-defined transformations to the input shader to produce
+ * the output shader.
+ * For example, a register search-and-replace operation could be applied
+ * by defining a transform_instruction() callback that examined and changed
+ * the instruction src/dest regs.
+ *
+ * \return number of tokens emitted
+ */
+int
+tgsi_transform_shader(const struct tgsi_token *tokens_in,
+                      struct tgsi_token *tokens_out,
+                      uint max_tokens_out,
+                      struct tgsi_transform_context *ctx)
+{
+   uint procType;
+
+   /* input shader */
+   struct tgsi_parse_context parse;
+
+   /* output shader */
+   struct tgsi_processor *processor;
+
+
+   /**
+    ** callback context init
+    **/
+   ctx->emit_instruction = emit_instruction;
+   ctx->emit_declaration = emit_declaration;
+   ctx->emit_immediate = emit_immediate;
+   ctx->tokens_out = tokens_out;
+   ctx->max_tokens_out = max_tokens_out;
+
+
+   /**
+    ** Setup to begin parsing input shader
+    **/
+   if (tgsi_parse_init( &parse, tokens_in ) != TGSI_PARSE_OK) {
+      debug_printf("tgsi_parse_init() failed in tgsi_transform_shader()!\n");
+      return -1;
+   }
+   procType = parse.FullHeader.Processor.Processor;
+   assert(procType == TGSI_PROCESSOR_FRAGMENT ||
+          procType == TGSI_PROCESSOR_VERTEX ||
+          procType == TGSI_PROCESSOR_GEOMETRY);
+
+
+   /**
+    **  Setup output shader
+    **/
+   *(struct tgsi_version *) &tokens_out[0] = tgsi_build_version();
+
+   ctx->header = (struct tgsi_header *) (tokens_out + 1);
+   *ctx->header = tgsi_build_header();
+
+   processor = (struct tgsi_processor *) (tokens_out + 2);
+   *processor = tgsi_build_processor( procType, ctx->header );
+
+   ctx->ti = 3;
+
+
+   /**
+    ** Loop over incoming program tokens/instructions
+    */
+   while( !tgsi_parse_end_of_tokens( &parse ) ) {
+
+      tgsi_parse_token( &parse );
+
+      switch( parse.FullToken.Token.Type ) {
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+         {
+            struct tgsi_full_instruction *fullinst
+               = &parse.FullToken.FullInstruction;
+
+            if (ctx->transform_instruction)
+               ctx->transform_instruction(ctx, fullinst);
+            else
+               ctx->emit_instruction(ctx, fullinst);
+         }
+         break;
+
+      case TGSI_TOKEN_TYPE_DECLARATION:
+         {
+            struct tgsi_full_declaration *fulldecl
+               = &parse.FullToken.FullDeclaration;
+
+            if (ctx->transform_declaration)
+               ctx->transform_declaration(ctx, fulldecl);
+            else
+               ctx->emit_declaration(ctx, fulldecl);
+         }
+         break;
+
+      case TGSI_TOKEN_TYPE_IMMEDIATE:
+         {
+            struct tgsi_full_immediate *fullimm
+               = &parse.FullToken.FullImmediate;
+
+            if (ctx->transform_immediate)
+               ctx->transform_immediate(ctx, fullimm);
+            else
+               ctx->emit_immediate(ctx, fullimm);
+         }
+         break;
+
+      default:
+         assert( 0 );
+      }
+   }
+
+   if (ctx->epilog) {
+      ctx->epilog(ctx);
+   }
+
+   tgsi_parse_free (&parse);
+
+   return ctx->ti;
+}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.h b/src/gallium/auxiliary/tgsi/tgsi_transform.h
new file mode 100644
index 00000000000..3da0b382711
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_transform.h
@@ -0,0 +1,93 @@
+/**************************************************************************
+ * 
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef TGSI_TRANSFORM_H
+#define TGSI_TRANSFORM_H
+
+
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_build.h"
+
+
+
+/**
+ * Subclass this to add caller-specific data
+ */
+struct tgsi_transform_context
+{
+/**** PUBLIC ***/
+
+   /**
+    * User-defined callbacks invoked per instruction.
+    */
+   void (*transform_instruction)(struct tgsi_transform_context *ctx,
+                                 struct tgsi_full_instruction *inst);
+
+   void (*transform_declaration)(struct tgsi_transform_context *ctx,
+                                 struct tgsi_full_declaration *decl);
+
+   void (*transform_immediate)(struct tgsi_transform_context *ctx,
+                               struct tgsi_full_immediate *imm);
+
+   /**
+    * Called at end of input program to allow caller to append extra
+    * instructions.  Return number of tokens emitted.
+    */
+   void (*epilog)(struct tgsi_transform_context *ctx);
+
+
+/*** PRIVATE ***/
+
+   /**
+    * These are setup by tgsi_transform_shader() and cannot be overridden.
+    * Meant to be called from in the above user callback functions.
+    */
+   void (*emit_instruction)(struct tgsi_transform_context *ctx,
+                            const struct tgsi_full_instruction *inst);
+   void (*emit_declaration)(struct tgsi_transform_context *ctx,
+                            const struct tgsi_full_declaration *decl);
+   void (*emit_immediate)(struct tgsi_transform_context *ctx,
+                          const struct tgsi_full_immediate *imm);
+
+   struct tgsi_header *header;
+   uint max_tokens_out;
+   struct tgsi_token *tokens_out;
+   uint ti;
+};
+
+
+
+extern int
+tgsi_transform_shader(const struct tgsi_token *tokens_in,
+                      struct tgsi_token *tokens_out,
+                      uint max_tokens_out,
+                      struct tgsi_transform_context *ctx);
+
+
+#endif /* TGSI_TRANSFORM_H */
diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c
new file mode 100644
index 00000000000..09486e649e1
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_util.c
@@ -0,0 +1,300 @@
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "pipe/p_debug.h"
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+#include "tgsi_parse.h"
+#include "tgsi_build.h"
+#include "tgsi_util.h"
+
+union pointer_hack
+{
+   void *pointer;
+   uint64_t uint64;
+};
+
+void *
+tgsi_align_128bit(
+   void *unaligned )
+{
+   union pointer_hack ph;
+
+   ph.uint64 = 0;
+   ph.pointer = unaligned;
+   ph.uint64 = (ph.uint64 + 15) & ~15;
+   return ph.pointer;
+}
+
+unsigned
+tgsi_util_get_src_register_swizzle(
+   const struct tgsi_src_register *reg,
+   unsigned component )
+{
+   switch( component ) {
+   case 0:
+      return reg->SwizzleX;
+   case 1:
+      return reg->SwizzleY;
+   case 2:
+      return reg->SwizzleZ;
+   case 3:
+      return reg->SwizzleW;
+   default:
+      assert( 0 );
+   }
+   return 0;
+}
+
+unsigned
+tgsi_util_get_src_register_extswizzle(
+   const struct tgsi_src_register_ext_swz *reg,
+   unsigned component )
+{
+   switch( component ) {
+   case 0:
+      return reg->ExtSwizzleX;
+   case 1:
+      return reg->ExtSwizzleY;
+   case 2:
+      return reg->ExtSwizzleZ;
+   case 3:
+      return reg->ExtSwizzleW;
+   default:
+      assert( 0 );
+   }
+   return 0;
+}
+
+unsigned
+tgsi_util_get_full_src_register_extswizzle(
+   const struct tgsi_full_src_register  *reg,
+   unsigned component )
+{
+   unsigned swizzle;
+
+   /*
+    * First, calculate  the   extended swizzle for a given channel. This will give
+    * us either a channel index into the simple swizzle or  a constant 1 or   0.
+    */
+   swizzle = tgsi_util_get_src_register_extswizzle(
+      &reg->SrcRegisterExtSwz,
+      component );
+
+   assert (TGSI_SWIZZLE_X == TGSI_EXTSWIZZLE_X);
+   assert (TGSI_SWIZZLE_Y == TGSI_EXTSWIZZLE_Y);
+   assert (TGSI_SWIZZLE_Z == TGSI_EXTSWIZZLE_Z);
+   assert (TGSI_SWIZZLE_W == TGSI_EXTSWIZZLE_W);
+   assert (TGSI_EXTSWIZZLE_ZERO > TGSI_SWIZZLE_W);
+   assert (TGSI_EXTSWIZZLE_ONE > TGSI_SWIZZLE_W);
+
+   /*
+    * Second, calculate the simple  swizzle  for   the   unswizzled channel index.
+    * Leave the constants intact, they are   not   affected by the   simple swizzle.
+    */
+   if( swizzle <= TGSI_SWIZZLE_W ) {
+      swizzle = tgsi_util_get_src_register_swizzle(
+         &reg->SrcRegister,
+         swizzle );
+   }
+
+   return swizzle;
+}
+
+void
+tgsi_util_set_src_register_swizzle(
+   struct tgsi_src_register *reg,
+   unsigned swizzle,
+   unsigned component )
+{
+   switch( component ) {
+   case 0:
+      reg->SwizzleX = swizzle;
+      break;
+   case 1:
+      reg->SwizzleY = swizzle;
+      break;
+   case 2:
+      reg->SwizzleZ = swizzle;
+      break;
+   case 3:
+      reg->SwizzleW = swizzle;
+      break;
+   default:
+      assert( 0 );
+   }
+}
+
+void
+tgsi_util_set_src_register_extswizzle(
+   struct tgsi_src_register_ext_swz *reg,
+   unsigned swizzle,
+   unsigned component )
+{
+   switch( component ) {
+   case 0:
+      reg->ExtSwizzleX = swizzle;
+      break;
+   case 1:
+      reg->ExtSwizzleY = swizzle;
+      break;
+   case 2:
+      reg->ExtSwizzleZ = swizzle;
+      break;
+   case 3:
+      reg->ExtSwizzleW = swizzle;
+      break;
+   default:
+      assert( 0 );
+   }
+}
+
+unsigned
+tgsi_util_get_src_register_extnegate(
+   const  struct tgsi_src_register_ext_swz *reg,
+   unsigned component )
+{
+   switch( component ) {
+   case 0:
+      return reg->NegateX;
+   case 1:
+      return reg->NegateY;
+   case 2:
+      return reg->NegateZ;
+   case 3:
+      return reg->NegateW;
+   default:
+      assert( 0 );
+   }
+   return 0;
+}
+
+void
+tgsi_util_set_src_register_extnegate(
+   struct tgsi_src_register_ext_swz *reg,
+   unsigned negate,
+   unsigned component )
+{
+   switch( component ) {
+   case 0:
+      reg->NegateX = negate;
+      break;
+   case 1:
+      reg->NegateY = negate;
+      break;
+   case 2:
+      reg->NegateZ = negate;
+      break;
+   case 3:
+      reg->NegateW = negate;
+      break;
+   default:
+      assert( 0 );
+   }
+}
+
+unsigned
+tgsi_util_get_full_src_register_sign_mode(
+   const struct  tgsi_full_src_register *reg,
+   unsigned component )
+{
+   unsigned sign_mode;
+
+   if( reg->SrcRegisterExtMod.Absolute ) {
+      /* Consider only the post-abs negation. */
+
+      if( reg->SrcRegisterExtMod.Negate ) {
+         sign_mode = TGSI_UTIL_SIGN_SET;
+      }
+      else {
+         sign_mode = TGSI_UTIL_SIGN_CLEAR;
+      }
+   }
+   else {
+      /* Accumulate the three negations. */
+
+      unsigned negate;
+
+      negate = reg->SrcRegister.Negate;
+      if( tgsi_util_get_src_register_extnegate( &reg->SrcRegisterExtSwz, component ) ) {
+         negate = !negate;
+      }
+      if( reg->SrcRegisterExtMod.Negate ) {
+         negate = !negate;
+      }
+
+      if( negate ) {
+         sign_mode = TGSI_UTIL_SIGN_TOGGLE;
+      }
+      else {
+         sign_mode = TGSI_UTIL_SIGN_KEEP;
+      }
+   }
+
+   return sign_mode;
+}
+
+void
+tgsi_util_set_full_src_register_sign_mode(
+   struct tgsi_full_src_register *reg,
+   unsigned sign_mode )
+{
+   reg->SrcRegisterExtSwz.NegateX = 0;
+   reg->SrcRegisterExtSwz.NegateY = 0;
+   reg->SrcRegisterExtSwz.NegateZ = 0;
+   reg->SrcRegisterExtSwz.NegateW = 0;
+
+   switch (sign_mode)
+   {
+   case TGSI_UTIL_SIGN_CLEAR:
+      reg->SrcRegister.Negate = 0;
+      reg->SrcRegisterExtMod.Absolute = 1;
+      reg->SrcRegisterExtMod.Negate = 0;
+      break;
+
+   case TGSI_UTIL_SIGN_SET:
+      reg->SrcRegister.Negate = 0;
+      reg->SrcRegisterExtMod.Absolute = 1;
+      reg->SrcRegisterExtMod.Negate = 1;
+      break;
+
+   case TGSI_UTIL_SIGN_TOGGLE:
+      reg->SrcRegister.Negate = 1;
+      reg->SrcRegisterExtMod.Absolute = 0;
+      reg->SrcRegisterExtMod.Negate = 0;
+      break;
+
+   case TGSI_UTIL_SIGN_KEEP:
+      reg->SrcRegister.Negate = 0;
+      reg->SrcRegisterExtMod.Absolute = 0;
+      reg->SrcRegisterExtMod.Negate = 0;
+      break;
+
+   default:
+      assert( 0 );
+   }
+}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.h b/src/gallium/auxiliary/tgsi/tgsi_util.h
new file mode 100644
index 00000000000..7877f345587
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_util.h
@@ -0,0 +1,96 @@
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef TGSI_UTIL_H
+#define TGSI_UTIL_H
+
+#if defined __cplusplus
+extern "C" {
+#endif
+
+void *
+tgsi_align_128bit(
+   void *unaligned );
+
+unsigned
+tgsi_util_get_src_register_swizzle(
+   const struct tgsi_src_register *reg,
+   unsigned component );
+
+unsigned
+tgsi_util_get_src_register_extswizzle(
+   const struct tgsi_src_register_ext_swz *reg,
+   unsigned component);
+
+unsigned
+tgsi_util_get_full_src_register_extswizzle(
+   const struct tgsi_full_src_register *reg,
+   unsigned component );
+
+void
+tgsi_util_set_src_register_swizzle(
+   struct tgsi_src_register *reg,
+   unsigned swizzle,
+   unsigned component );
+
+void
+tgsi_util_set_src_register_extswizzle(
+   struct tgsi_src_register_ext_swz *reg,
+   unsigned swizzle,
+   unsigned component );
+
+unsigned
+tgsi_util_get_src_register_extnegate(
+   const struct tgsi_src_register_ext_swz *reg,
+   unsigned component );
+
+void
+tgsi_util_set_src_register_extnegate(
+   struct tgsi_src_register_ext_swz *reg,
+   unsigned negate,
+   unsigned component );
+
+#define TGSI_UTIL_SIGN_CLEAR    0   /* Force positive */
+#define TGSI_UTIL_SIGN_SET      1   /* Force negative */
+#define TGSI_UTIL_SIGN_TOGGLE   2   /* Negate */
+#define TGSI_UTIL_SIGN_KEEP     3   /* No change */
+
+unsigned
+tgsi_util_get_full_src_register_sign_mode(
+   const struct tgsi_full_src_register *reg,
+   unsigned component );
+
+void
+tgsi_util_set_full_src_register_sign_mode(
+   struct tgsi_full_src_register *reg,
+   unsigned sign_mode );
+
+#if defined __cplusplus
+}
+#endif
+
+#endif /* TGSI_UTIL_H */
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_build.c b/src/gallium/auxiliary/tgsi/util/tgsi_build.c
deleted file mode 100644
index 742ef14c352..00000000000
--- a/src/gallium/auxiliary/tgsi/util/tgsi_build.c
+++ /dev/null
@@ -1,1324 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include "pipe/p_debug.h"
-#include "pipe/p_util.h"
-#include "pipe/p_shader_tokens.h"
-#include "tgsi_build.h"
-#include "tgsi_parse.h"
-
-/*
- * version
- */
-
-struct tgsi_version
-tgsi_build_version( void )
-{
-   struct tgsi_version  version;
-
-   version.MajorVersion = 1;
-   version.MinorVersion = 1;
-   version.Padding = 0;
-
-   return version;
-}
-
-/*
- * header
- */
-
-struct tgsi_header
-tgsi_build_header( void )
-{
-   struct tgsi_header header;
-
-   header.HeaderSize = 1;
-   header.BodySize = 0;
-
-   return header;
-}
-
-static void
-header_headersize_grow( struct tgsi_header *header )
-{
-   assert( header->HeaderSize < 0xFF );
-   assert( header->BodySize == 0 );
-
-   header->HeaderSize++;
-}
-
-static void
-header_bodysize_grow( struct tgsi_header *header )
-{
-   assert( header->BodySize < 0xFFFFFF );
-
-   header->BodySize++;
-}
-
-struct tgsi_processor
-tgsi_default_processor( void )
-{
-   struct tgsi_processor processor;
-
-   processor.Processor = TGSI_PROCESSOR_FRAGMENT;
-   processor.Padding = 0;
-
-   return processor;
-}
-
-struct tgsi_processor
-tgsi_build_processor(
-   unsigned type,
-   struct tgsi_header *header )
-{
-   struct tgsi_processor processor;
-
-   processor = tgsi_default_processor();
-   processor.Processor = type;
-
-   header_headersize_grow( header );
-
-   return processor;
-}
-
-/*
- * declaration
- */
-
-struct tgsi_declaration
-tgsi_default_declaration( void )
-{
-   struct tgsi_declaration declaration;
-
-   declaration.Type = TGSI_TOKEN_TYPE_DECLARATION;
-   declaration.Size = 1;
-   declaration.File = TGSI_FILE_NULL;
-   declaration.UsageMask = TGSI_WRITEMASK_XYZW;
-   declaration.Interpolate = TGSI_INTERPOLATE_CONSTANT;
-   declaration.Semantic = 0;
-   declaration.Padding = 0;
-   declaration.Extended = 0;
-
-   return declaration;
-}
-
-struct tgsi_declaration
-tgsi_build_declaration(
-   unsigned file,
-   unsigned usage_mask,
-   unsigned interpolate,
-   unsigned semantic,
-   struct tgsi_header *header )
-{
-   struct tgsi_declaration declaration;
-
-   assert( file <= TGSI_FILE_IMMEDIATE );
-   assert( interpolate <= TGSI_INTERPOLATE_PERSPECTIVE );
-
-   declaration = tgsi_default_declaration();
-   declaration.File = file;
-   declaration.UsageMask = usage_mask;
-   declaration.Interpolate = interpolate;
-   declaration.Semantic = semantic;
-
-   header_bodysize_grow( header );
-
-   return declaration;
-}
-
-static void
-declaration_grow(
-   struct tgsi_declaration *declaration,
-   struct tgsi_header *header )
-{
-   assert( declaration->Size < 0xFF );
-
-   declaration->Size++;
-
-   header_bodysize_grow( header );
-}
-
-struct tgsi_full_declaration
-tgsi_default_full_declaration( void )
-{
-   struct tgsi_full_declaration  full_declaration;
-
-   full_declaration.Declaration  = tgsi_default_declaration();
-   full_declaration.DeclarationRange = tgsi_default_declaration_range();
-   full_declaration.Semantic = tgsi_default_declaration_semantic();
-
-   return full_declaration;
-}
-
-unsigned
-tgsi_build_full_declaration(
-   const struct tgsi_full_declaration *full_decl,
-   struct tgsi_token *tokens,
-   struct tgsi_header *header,
-   unsigned maxsize )
-{
-   unsigned size = 0;
-   struct tgsi_declaration *declaration;
-   struct tgsi_declaration_range *dr;
-
-   if( maxsize <= size )
-     return 0;
-   declaration = (struct tgsi_declaration *) &tokens[size];
-   size++;
-
-   *declaration = tgsi_build_declaration(
-      full_decl->Declaration.File,
-      full_decl->Declaration.UsageMask,
-      full_decl->Declaration.Interpolate,
-      full_decl->Declaration.Semantic,
-      header );
-
-   if (maxsize <= size)
-      return 0;
-   dr = (struct tgsi_declaration_range *) &tokens[size];
-   size++;
-
-   *dr = tgsi_build_declaration_range(
-      full_decl->DeclarationRange.First,
-      full_decl->DeclarationRange.Last,
-      declaration,
-      header );
-
-   if( full_decl->Declaration.Semantic ) {
-      struct tgsi_declaration_semantic *ds;
-
-      if( maxsize <= size )
-         return  0;
-      ds = (struct tgsi_declaration_semantic *) &tokens[size];
-      size++;
-
-      *ds = tgsi_build_declaration_semantic(
-         full_decl->Semantic.SemanticName,
-         full_decl->Semantic.SemanticIndex,
-         declaration,
-         header );
-   }
-
-   return size;
-}
-
-struct tgsi_declaration_range
-tgsi_default_declaration_range( void )
-{
-   struct tgsi_declaration_range dr;
-
-   dr.First = 0;
-   dr.Last = 0;
-
-   return dr;
-}
-
-struct tgsi_declaration_range
-tgsi_build_declaration_range(
-   unsigned first,
-   unsigned last,
-   struct tgsi_declaration *declaration,
-   struct tgsi_header *header )
-{
-   struct tgsi_declaration_range declaration_range;
-
-   assert( last >= first );
-   assert( last <= 0xFFFF );
-
-   declaration_range = tgsi_default_declaration_range();
-   declaration_range.First = first;
-   declaration_range.Last = last;
-
-   declaration_grow( declaration, header );
-
-   return declaration_range;
-}
-
-struct tgsi_declaration_semantic
-tgsi_default_declaration_semantic( void )
-{
-   struct tgsi_declaration_semantic ds;
-
-   ds.SemanticName = TGSI_SEMANTIC_POSITION;
-   ds.SemanticIndex = 0;
-   ds.Padding = 0;
-
-   return ds;
-}
-
-struct tgsi_declaration_semantic
-tgsi_build_declaration_semantic(
-   unsigned semantic_name,
-   unsigned semantic_index,
-   struct tgsi_declaration *declaration,
-   struct tgsi_header *header )
-{
-   struct tgsi_declaration_semantic ds;
-
-   assert( semantic_name <= TGSI_SEMANTIC_COUNT );
-   assert( semantic_index <= 0xFFFF );
-
-   ds = tgsi_default_declaration_semantic();
-   ds.SemanticName = semantic_name;
-   ds.SemanticIndex = semantic_index;
-
-   declaration_grow( declaration, header );
-
-   return ds;
-}
-
-/*
- * immediate
- */
-
-struct tgsi_immediate
-tgsi_default_immediate( void )
-{
-   struct tgsi_immediate immediate;
-
-   immediate.Type = TGSI_TOKEN_TYPE_IMMEDIATE;
-   immediate.Size = 1;
-   immediate.DataType = TGSI_IMM_FLOAT32;
-   immediate.Padding = 0;
-   immediate.Extended = 0;
-
-   return immediate;
-}
-
-struct tgsi_immediate
-tgsi_build_immediate(
-   struct tgsi_header *header )
-{
-   struct tgsi_immediate immediate;
-
-   immediate = tgsi_default_immediate();
-
-   header_bodysize_grow( header );
-
-   return immediate;
-}
-
-struct tgsi_full_immediate
-tgsi_default_full_immediate( void )
-{
-   struct tgsi_full_immediate fullimm;
-
-   fullimm.Immediate = tgsi_default_immediate();
-   fullimm.u.Pointer = (void *) 0;
-
-   return fullimm;
-}
-
-static void
-immediate_grow(
-   struct tgsi_immediate *immediate,
-   struct tgsi_header *header )
-{
-   assert( immediate->Size < 0xFF );
-
-   immediate->Size++;
-
-   header_bodysize_grow( header );
-}
-
-struct tgsi_immediate_float32
-tgsi_build_immediate_float32(
-   float value,
-   struct tgsi_immediate *immediate,
-   struct tgsi_header *header )
-{
-   struct tgsi_immediate_float32 immediate_float32;
-
-   immediate_float32.Float = value;
-
-   immediate_grow( immediate, header );
-
-   return immediate_float32;
-}
-
-unsigned
-tgsi_build_full_immediate(
-   const struct tgsi_full_immediate *full_imm,
-   struct tgsi_token *tokens,
-   struct tgsi_header *header,
-   unsigned maxsize )
-{
-   unsigned size = 0, i;
-   struct tgsi_immediate *immediate;
-
-   if( maxsize <= size )
-      return 0;
-   immediate = (struct tgsi_immediate *) &tokens[size];
-   size++;
-
-   *immediate = tgsi_build_immediate( header );
-
-   for( i = 0; i < full_imm->Immediate.Size - 1; i++ ) {
-      struct tgsi_immediate_float32 *if32;
-
-      if( maxsize <= size )
-         return  0;
-      if32 = (struct tgsi_immediate_float32 *) &tokens[size];
-      size++;
-
-      *if32 = tgsi_build_immediate_float32(
-         full_imm->u.ImmediateFloat32[i].Float,
-         immediate,
-         header );
-   }
-
-   return size;
-}
-
-/*
- * instruction
- */
-
-struct tgsi_instruction
-tgsi_default_instruction( void )
-{
-   struct tgsi_instruction instruction;
-
-   instruction.Type = TGSI_TOKEN_TYPE_INSTRUCTION;
-   instruction.Size = 1;
-   instruction.Opcode = TGSI_OPCODE_MOV;
-   instruction.Saturate = TGSI_SAT_NONE;
-   instruction.NumDstRegs = 1;
-   instruction.NumSrcRegs = 1;
-   instruction.Padding  = 0;
-   instruction.Extended = 0;
-
-   return instruction;
-}
-
-struct tgsi_instruction
-tgsi_build_instruction(
-   unsigned opcode,
-   unsigned saturate,
-   unsigned num_dst_regs,
-   unsigned num_src_regs,
-   struct tgsi_header *header )
-{
-   struct tgsi_instruction instruction;
-
-   assert (opcode <= TGSI_OPCODE_LAST);
-   assert (saturate <= TGSI_SAT_MINUS_PLUS_ONE);
-   assert (num_dst_regs <= 3);
-   assert (num_src_regs <= 15);
-
-   instruction = tgsi_default_instruction();
-   instruction.Opcode = opcode;
-   instruction.Saturate = saturate;
-   instruction.NumDstRegs = num_dst_regs;
-   instruction.NumSrcRegs = num_src_regs;
-
-   header_bodysize_grow( header );
-
-   return instruction;
-}
-
-static void
-instruction_grow(
-   struct tgsi_instruction *instruction,
-   struct tgsi_header *header )
-{
-   assert (instruction->Size <   0xFF);
-
-   instruction->Size++;
-
-   header_bodysize_grow( header );
-}
-
-struct tgsi_full_instruction
-tgsi_default_full_instruction( void )
-{
-   struct tgsi_full_instruction full_instruction;
-   unsigned i;
-
-   full_instruction.Instruction = tgsi_default_instruction();
-   full_instruction.InstructionExtNv = tgsi_default_instruction_ext_nv();
-   full_instruction.InstructionExtLabel = tgsi_default_instruction_ext_label();
-   full_instruction.InstructionExtTexture = tgsi_default_instruction_ext_texture();
-   for( i = 0;  i < TGSI_FULL_MAX_DST_REGISTERS; i++ ) {
-      full_instruction.FullDstRegisters[i] = tgsi_default_full_dst_register();
-   }
-   for( i = 0;  i < TGSI_FULL_MAX_SRC_REGISTERS; i++ ) {
-      full_instruction.FullSrcRegisters[i] = tgsi_default_full_src_register();
-   }
-
-   return full_instruction;
-}
-
-unsigned
-tgsi_build_full_instruction(
-   const struct tgsi_full_instruction *full_inst,
-   struct  tgsi_token *tokens,
-   struct  tgsi_header *header,
-   unsigned  maxsize )
-{
-   unsigned size = 0;
-   unsigned i;
-   struct tgsi_instruction *instruction;
-   struct tgsi_token *prev_token;
-
-   if( maxsize <= size )
-      return 0;
-   instruction = (struct tgsi_instruction *) &tokens[size];
-   size++;
-
-   *instruction = tgsi_build_instruction(
-      full_inst->Instruction.Opcode,
-      full_inst->Instruction.Saturate,
-      full_inst->Instruction.NumDstRegs,
-      full_inst->Instruction.NumSrcRegs,
-      header );
-   prev_token = (struct tgsi_token  *) instruction;
-
-   if( tgsi_compare_instruction_ext_nv(
-         full_inst->InstructionExtNv,
-         tgsi_default_instruction_ext_nv() ) ) {
-      struct tgsi_instruction_ext_nv *instruction_ext_nv;
-
-      if( maxsize <= size )
-         return 0;
-      instruction_ext_nv =
-         (struct  tgsi_instruction_ext_nv *) &tokens[size];
-      size++;
-
-      *instruction_ext_nv  = tgsi_build_instruction_ext_nv(
-         full_inst->InstructionExtNv.Precision,
-         full_inst->InstructionExtNv.CondDstIndex,
-         full_inst->InstructionExtNv.CondFlowIndex,
-         full_inst->InstructionExtNv.CondMask,
-         full_inst->InstructionExtNv.CondSwizzleX,
-         full_inst->InstructionExtNv.CondSwizzleY,
-         full_inst->InstructionExtNv.CondSwizzleZ,
-         full_inst->InstructionExtNv.CondSwizzleW,
-         full_inst->InstructionExtNv.CondDstUpdate,
-         full_inst->InstructionExtNv.CondFlowEnable,
-         prev_token,
-         instruction,
-         header );
-      prev_token = (struct tgsi_token  *) instruction_ext_nv;
-   }
-
-   if( tgsi_compare_instruction_ext_label(
-         full_inst->InstructionExtLabel,
-         tgsi_default_instruction_ext_label() ) ) {
-      struct tgsi_instruction_ext_label *instruction_ext_label;
-
-      if( maxsize <= size )
-         return 0;
-      instruction_ext_label =
-         (struct  tgsi_instruction_ext_label *) &tokens[size];
-      size++;
-
-      *instruction_ext_label = tgsi_build_instruction_ext_label(
-         full_inst->InstructionExtLabel.Label,
-         prev_token,
-         instruction,
-         header );
-      prev_token = (struct tgsi_token  *) instruction_ext_label;
-   }
-
-   if( tgsi_compare_instruction_ext_texture(
-         full_inst->InstructionExtTexture,
-         tgsi_default_instruction_ext_texture() ) ) {
-      struct tgsi_instruction_ext_texture *instruction_ext_texture;
-
-      if( maxsize <= size )
-         return 0;
-      instruction_ext_texture =
-         (struct  tgsi_instruction_ext_texture *) &tokens[size];
-      size++;
-
-      *instruction_ext_texture = tgsi_build_instruction_ext_texture(
-         full_inst->InstructionExtTexture.Texture,
-         prev_token,
-         instruction,
-         header   );
-      prev_token = (struct tgsi_token  *) instruction_ext_texture;
-   }
-
-   for( i = 0;  i <   full_inst->Instruction.NumDstRegs; i++ ) {
-      const struct tgsi_full_dst_register *reg = &full_inst->FullDstRegisters[i];
-      struct tgsi_dst_register *dst_register;
-      struct tgsi_token *prev_token;
-
-      if( maxsize <= size )
-         return 0;
-      dst_register = (struct tgsi_dst_register *) &tokens[size];
-      size++;
-
-      *dst_register = tgsi_build_dst_register(
-         reg->DstRegister.File,
-         reg->DstRegister.WriteMask,
-         reg->DstRegister.Index,
-         instruction,
-         header );
-      prev_token = (struct tgsi_token  *) dst_register;
-
-      if( tgsi_compare_dst_register_ext_concode(
-            reg->DstRegisterExtConcode,
-            tgsi_default_dst_register_ext_concode() ) ) {
-         struct tgsi_dst_register_ext_concode *dst_register_ext_concode;
-
-         if( maxsize <= size )
-            return 0;
-         dst_register_ext_concode =
-            (struct  tgsi_dst_register_ext_concode *) &tokens[size];
-         size++;
-
-         *dst_register_ext_concode =   tgsi_build_dst_register_ext_concode(
-            reg->DstRegisterExtConcode.CondMask,
-            reg->DstRegisterExtConcode.CondSwizzleX,
-            reg->DstRegisterExtConcode.CondSwizzleY,
-            reg->DstRegisterExtConcode.CondSwizzleZ,
-            reg->DstRegisterExtConcode.CondSwizzleW,
-            reg->DstRegisterExtConcode.CondSrcIndex,
-            prev_token,
-            instruction,
-            header );
-         prev_token = (struct tgsi_token  *) dst_register_ext_concode;
-      }
-
-      if( tgsi_compare_dst_register_ext_modulate(
-            reg->DstRegisterExtModulate,
-            tgsi_default_dst_register_ext_modulate() ) ) {
-         struct tgsi_dst_register_ext_modulate *dst_register_ext_modulate;
-
-         if( maxsize <= size )
-            return 0;
-         dst_register_ext_modulate =
-            (struct  tgsi_dst_register_ext_modulate *) &tokens[size];
-         size++;
-
-         *dst_register_ext_modulate = tgsi_build_dst_register_ext_modulate(
-            reg->DstRegisterExtModulate.Modulate,
-            prev_token,
-            instruction,
-            header );
-         prev_token = (struct tgsi_token  *) dst_register_ext_modulate;
-      }
-   }
-
-   for( i = 0;  i < full_inst->Instruction.NumSrcRegs; i++ ) {
-      const struct tgsi_full_src_register *reg = &full_inst->FullSrcRegisters[i];
-      struct tgsi_src_register *src_register;
-      struct tgsi_token *prev_token;
-
-      if( maxsize <= size )
-         return 0;
-      src_register = (struct tgsi_src_register *)  &tokens[size];
-      size++;
-
-      *src_register = tgsi_build_src_register(
-         reg->SrcRegister.File,
-         reg->SrcRegister.SwizzleX,
-         reg->SrcRegister.SwizzleY,
-         reg->SrcRegister.SwizzleZ,
-         reg->SrcRegister.SwizzleW,
-         reg->SrcRegister.Negate,
-         reg->SrcRegister.Indirect,
-         reg->SrcRegister.Dimension,
-         reg->SrcRegister.Index,
-         instruction,
-         header );
-      prev_token = (struct tgsi_token  *) src_register;
-
-      if( tgsi_compare_src_register_ext_swz(
-            reg->SrcRegisterExtSwz,
-            tgsi_default_src_register_ext_swz() ) ) {
-         struct tgsi_src_register_ext_swz *src_register_ext_swz;
-
-         /* Use of the extended swizzle requires the simple swizzle to be identity.
-          */
-         assert( reg->SrcRegister.SwizzleX == TGSI_SWIZZLE_X );
-         assert( reg->SrcRegister.SwizzleY == TGSI_SWIZZLE_Y );
-         assert( reg->SrcRegister.SwizzleZ == TGSI_SWIZZLE_Z );
-         assert( reg->SrcRegister.SwizzleW == TGSI_SWIZZLE_W );
-         assert( reg->SrcRegister.Negate == FALSE );
-
-         if( maxsize <= size )
-            return 0;
-         src_register_ext_swz =
-            (struct  tgsi_src_register_ext_swz *) &tokens[size];
-         size++;
-
-         *src_register_ext_swz = tgsi_build_src_register_ext_swz(
-            reg->SrcRegisterExtSwz.ExtSwizzleX,
-            reg->SrcRegisterExtSwz.ExtSwizzleY,
-            reg->SrcRegisterExtSwz.ExtSwizzleZ,
-            reg->SrcRegisterExtSwz.ExtSwizzleW,
-            reg->SrcRegisterExtSwz.NegateX,
-            reg->SrcRegisterExtSwz.NegateY,
-            reg->SrcRegisterExtSwz.NegateZ,
-            reg->SrcRegisterExtSwz.NegateW,
-            prev_token,
-            instruction,
-            header );
-         prev_token = (struct tgsi_token  *) src_register_ext_swz;
-      }
-
-      if( tgsi_compare_src_register_ext_mod(
-            reg->SrcRegisterExtMod,
-            tgsi_default_src_register_ext_mod() ) ) {
-         struct tgsi_src_register_ext_mod *src_register_ext_mod;
-
-         if( maxsize <= size )
-            return 0;
-         src_register_ext_mod =
-            (struct  tgsi_src_register_ext_mod *) &tokens[size];
-         size++;
-
-         *src_register_ext_mod = tgsi_build_src_register_ext_mod(
-            reg->SrcRegisterExtMod.Complement,
-            reg->SrcRegisterExtMod.Bias,
-            reg->SrcRegisterExtMod.Scale2X,
-            reg->SrcRegisterExtMod.Absolute,
-            reg->SrcRegisterExtMod.Negate,
-            prev_token,
-            instruction,
-            header );
-         prev_token = (struct tgsi_token  *) src_register_ext_mod;
-      }
-
-      if( reg->SrcRegister.Indirect ) {
-         struct  tgsi_src_register *ind;
-
-         if( maxsize <= size )
-            return 0;
-         ind = (struct tgsi_src_register *) &tokens[size];
-         size++;
-
-         *ind = tgsi_build_src_register(
-            reg->SrcRegisterInd.File,
-            reg->SrcRegisterInd.SwizzleX,
-            reg->SrcRegisterInd.SwizzleY,
-            reg->SrcRegisterInd.SwizzleZ,
-            reg->SrcRegisterInd.SwizzleW,
-            reg->SrcRegisterInd.Negate,
-            reg->SrcRegisterInd.Indirect,
-            reg->SrcRegisterInd.Dimension,
-            reg->SrcRegisterInd.Index,
-            instruction,
-            header );
-      }
-
-      if( reg->SrcRegister.Dimension ) {
-         struct  tgsi_dimension *dim;
-
-         assert( !reg->SrcRegisterDim.Dimension );
-
-         if( maxsize <= size )
-            return 0;
-         dim = (struct tgsi_dimension *) &tokens[size];
-         size++;
-
-         *dim = tgsi_build_dimension(
-            reg->SrcRegisterDim.Indirect,
-            reg->SrcRegisterDim.Index,
-            instruction,
-            header );
-
-         if( reg->SrcRegisterDim.Indirect ) {
-            struct tgsi_src_register *ind;
-
-            if( maxsize <= size )
-               return 0;
-            ind = (struct tgsi_src_register *) &tokens[size];
-            size++;
-
-            *ind = tgsi_build_src_register(
-               reg->SrcRegisterDimInd.File,
-               reg->SrcRegisterDimInd.SwizzleX,
-               reg->SrcRegisterDimInd.SwizzleY,
-               reg->SrcRegisterDimInd.SwizzleZ,
-               reg->SrcRegisterDimInd.SwizzleW,
-               reg->SrcRegisterDimInd.Negate,
-               reg->SrcRegisterDimInd.Indirect,
-               reg->SrcRegisterDimInd.Dimension,
-               reg->SrcRegisterDimInd.Index,
-               instruction,
-               header );
-         }
-      }
-   }
-
-   return size;
-}
-
-struct tgsi_instruction_ext_nv
-tgsi_default_instruction_ext_nv( void )
-{
-   struct tgsi_instruction_ext_nv instruction_ext_nv;
-
-   instruction_ext_nv.Type = TGSI_INSTRUCTION_EXT_TYPE_NV;
-   instruction_ext_nv.Precision = TGSI_PRECISION_DEFAULT;
-   instruction_ext_nv.CondDstIndex = 0;
-   instruction_ext_nv.CondFlowIndex = 0;
-   instruction_ext_nv.CondMask = TGSI_CC_TR;
-   instruction_ext_nv.CondSwizzleX = TGSI_SWIZZLE_X;
-   instruction_ext_nv.CondSwizzleY = TGSI_SWIZZLE_Y;
-   instruction_ext_nv.CondSwizzleZ = TGSI_SWIZZLE_Z;
-   instruction_ext_nv.CondSwizzleW = TGSI_SWIZZLE_W;
-   instruction_ext_nv.CondDstUpdate = 0;
-   instruction_ext_nv.CondFlowEnable = 0;
-   instruction_ext_nv.Padding = 0;
-   instruction_ext_nv.Extended = 0;
-
-   return instruction_ext_nv;
-}
-
-union token_u32
-{
-   unsigned u32;
-};
-
-unsigned
-tgsi_compare_instruction_ext_nv(
-   struct tgsi_instruction_ext_nv a,
-   struct tgsi_instruction_ext_nv b )
-{
-   a.Padding = b.Padding = 0;
-   a.Extended = b.Extended = 0;
-   return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
-}
-
-struct tgsi_instruction_ext_nv
-tgsi_build_instruction_ext_nv(
-   unsigned precision,
-   unsigned cond_dst_index,
-   unsigned cond_flow_index,
-   unsigned cond_mask,
-   unsigned cond_swizzle_x,
-   unsigned cond_swizzle_y,
-   unsigned cond_swizzle_z,
-   unsigned cond_swizzle_w,
-   unsigned cond_dst_update,
-   unsigned cond_flow_update,
-   struct tgsi_token *prev_token,
-   struct tgsi_instruction *instruction,
-   struct tgsi_header *header )
-{
-   struct tgsi_instruction_ext_nv instruction_ext_nv;
-
-   instruction_ext_nv = tgsi_default_instruction_ext_nv();
-   instruction_ext_nv.Precision = precision;
-   instruction_ext_nv.CondDstIndex = cond_dst_index;
-   instruction_ext_nv.CondFlowIndex = cond_flow_index;
-   instruction_ext_nv.CondMask = cond_mask;
-   instruction_ext_nv.CondSwizzleX = cond_swizzle_x;
-   instruction_ext_nv.CondSwizzleY = cond_swizzle_y;
-   instruction_ext_nv.CondSwizzleZ = cond_swizzle_z;
-   instruction_ext_nv.CondSwizzleW = cond_swizzle_w;
-   instruction_ext_nv.CondDstUpdate = cond_dst_update;
-   instruction_ext_nv.CondFlowEnable = cond_flow_update;
-
-   prev_token->Extended = 1;
-   instruction_grow( instruction, header );
-
-   return instruction_ext_nv;
-}
-
-struct tgsi_instruction_ext_label
-tgsi_default_instruction_ext_label( void )
-{
-   struct tgsi_instruction_ext_label instruction_ext_label;
-
-   instruction_ext_label.Type = TGSI_INSTRUCTION_EXT_TYPE_LABEL;
-   instruction_ext_label.Label = 0;
-   instruction_ext_label.Padding = 0;
-   instruction_ext_label.Extended = 0;
-
-   return instruction_ext_label;
-}
-
-unsigned
-tgsi_compare_instruction_ext_label(
-   struct tgsi_instruction_ext_label a,
-   struct tgsi_instruction_ext_label b )
-{
-   a.Padding = b.Padding = 0;
-   a.Extended = b.Extended = 0;
-   return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
-}
-
-struct tgsi_instruction_ext_label
-tgsi_build_instruction_ext_label(
-   unsigned label,
-   struct tgsi_token  *prev_token,
-   struct tgsi_instruction *instruction,
-   struct tgsi_header *header )
-{
-   struct tgsi_instruction_ext_label instruction_ext_label;
-
-   instruction_ext_label = tgsi_default_instruction_ext_label();
-   instruction_ext_label.Label = label;
-
-   prev_token->Extended = 1;
-   instruction_grow( instruction, header );
-
-   return instruction_ext_label;
-}
-
-struct tgsi_instruction_ext_texture
-tgsi_default_instruction_ext_texture( void )
-{
-   struct tgsi_instruction_ext_texture instruction_ext_texture;
-
-   instruction_ext_texture.Type = TGSI_INSTRUCTION_EXT_TYPE_TEXTURE;
-   instruction_ext_texture.Texture = TGSI_TEXTURE_UNKNOWN;
-   instruction_ext_texture.Padding = 0;
-   instruction_ext_texture.Extended = 0;
-
-   return instruction_ext_texture;
-}
-
-unsigned
-tgsi_compare_instruction_ext_texture(
-   struct tgsi_instruction_ext_texture a,
-   struct tgsi_instruction_ext_texture b )
-{
-   a.Padding = b.Padding = 0;
-   a.Extended = b.Extended = 0;
-   return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
-}
-
-struct tgsi_instruction_ext_texture
-tgsi_build_instruction_ext_texture(
-   unsigned texture,
-   struct tgsi_token *prev_token,
-   struct tgsi_instruction *instruction,
-   struct tgsi_header *header )
-{
-   struct tgsi_instruction_ext_texture instruction_ext_texture;
-
-   instruction_ext_texture = tgsi_default_instruction_ext_texture();
-   instruction_ext_texture.Texture = texture;
-
-   prev_token->Extended = 1;
-   instruction_grow( instruction, header );
-
-   return instruction_ext_texture;
-}
-
-struct tgsi_src_register
-tgsi_default_src_register( void )
-{
-   struct tgsi_src_register src_register;
-
-   src_register.File = TGSI_FILE_NULL;
-   src_register.SwizzleX = TGSI_SWIZZLE_X;
-   src_register.SwizzleY = TGSI_SWIZZLE_Y;
-   src_register.SwizzleZ = TGSI_SWIZZLE_Z;
-   src_register.SwizzleW = TGSI_SWIZZLE_W;
-   src_register.Negate = 0;
-   src_register.Indirect = 0;
-   src_register.Dimension = 0;
-   src_register.Index = 0;
-   src_register.Extended = 0;
-
-   return src_register;
-}
-
-struct tgsi_src_register
-tgsi_build_src_register(
-   unsigned file,
-   unsigned swizzle_x,
-   unsigned swizzle_y,
-   unsigned swizzle_z,
-   unsigned swizzle_w,
-   unsigned negate,
-   unsigned indirect,
-   unsigned dimension,
-   int index,
-   struct tgsi_instruction *instruction,
-   struct tgsi_header *header )
-{
-   struct tgsi_src_register   src_register;
-
-   assert( file <= TGSI_FILE_IMMEDIATE );
-   assert( swizzle_x <= TGSI_SWIZZLE_W );
-   assert( swizzle_y <= TGSI_SWIZZLE_W );
-   assert( swizzle_z <= TGSI_SWIZZLE_W );
-   assert( swizzle_w <= TGSI_SWIZZLE_W );
-   assert( negate <= 1 );
-   assert( index >= -0x8000 && index <= 0x7FFF );
-
-   src_register = tgsi_default_src_register();
-   src_register.File = file;
-   src_register.SwizzleX = swizzle_x;
-   src_register.SwizzleY = swizzle_y;
-   src_register.SwizzleZ = swizzle_z;
-   src_register.SwizzleW = swizzle_w;
-   src_register.Negate = negate;
-   src_register.Indirect = indirect;
-   src_register.Dimension = dimension;
-   src_register.Index = index;
-
-   instruction_grow( instruction, header );
-
-   return src_register;
-}
-
-struct tgsi_full_src_register
-tgsi_default_full_src_register( void )
-{
-   struct tgsi_full_src_register full_src_register;
-
-   full_src_register.SrcRegister = tgsi_default_src_register();
-   full_src_register.SrcRegisterExtSwz = tgsi_default_src_register_ext_swz();
-   full_src_register.SrcRegisterExtMod = tgsi_default_src_register_ext_mod();
-   full_src_register.SrcRegisterInd = tgsi_default_src_register();
-   full_src_register.SrcRegisterDim = tgsi_default_dimension();
-   full_src_register.SrcRegisterDimInd = tgsi_default_src_register();
-
-   return full_src_register;
-}
-
-struct tgsi_src_register_ext_swz
-tgsi_default_src_register_ext_swz( void )
-{
-   struct tgsi_src_register_ext_swz src_register_ext_swz;
-
-   src_register_ext_swz.Type = TGSI_SRC_REGISTER_EXT_TYPE_SWZ;
-   src_register_ext_swz.ExtSwizzleX = TGSI_EXTSWIZZLE_X;
-   src_register_ext_swz.ExtSwizzleY = TGSI_EXTSWIZZLE_Y;
-   src_register_ext_swz.ExtSwizzleZ = TGSI_EXTSWIZZLE_Z;
-   src_register_ext_swz.ExtSwizzleW = TGSI_EXTSWIZZLE_W;
-   src_register_ext_swz.NegateX = 0;
-   src_register_ext_swz.NegateY = 0;
-   src_register_ext_swz.NegateZ = 0;
-   src_register_ext_swz.NegateW = 0;
-   src_register_ext_swz.Padding = 0;
-   src_register_ext_swz.Extended = 0;
-
-   return src_register_ext_swz;
-}
-
-unsigned
-tgsi_compare_src_register_ext_swz(
-   struct tgsi_src_register_ext_swz a,
-   struct tgsi_src_register_ext_swz b )
-{
-   a.Padding = b.Padding = 0;
-   a.Extended = b.Extended = 0;
-   return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
-}
-
-struct tgsi_src_register_ext_swz
-tgsi_build_src_register_ext_swz(
-   unsigned ext_swizzle_x,
-   unsigned ext_swizzle_y,
-   unsigned ext_swizzle_z,
-   unsigned ext_swizzle_w,
-   unsigned negate_x,
-   unsigned negate_y,
-   unsigned negate_z,
-   unsigned negate_w,
-   struct tgsi_token *prev_token,
-   struct tgsi_instruction *instruction,
-   struct tgsi_header *header )
-{
-   struct tgsi_src_register_ext_swz src_register_ext_swz;
-
-   assert( ext_swizzle_x <= TGSI_EXTSWIZZLE_ONE );
-   assert( ext_swizzle_y <= TGSI_EXTSWIZZLE_ONE );
-   assert( ext_swizzle_z <= TGSI_EXTSWIZZLE_ONE );
-   assert( ext_swizzle_w <= TGSI_EXTSWIZZLE_ONE );
-   assert( negate_x <= 1 );
-   assert( negate_y <= 1 );
-   assert( negate_z <= 1 );
-   assert( negate_w <= 1 );
-
-   src_register_ext_swz = tgsi_default_src_register_ext_swz();
-   src_register_ext_swz.ExtSwizzleX = ext_swizzle_x;
-   src_register_ext_swz.ExtSwizzleY = ext_swizzle_y;
-   src_register_ext_swz.ExtSwizzleZ = ext_swizzle_z;
-   src_register_ext_swz.ExtSwizzleW = ext_swizzle_w;
-   src_register_ext_swz.NegateX = negate_x;
-   src_register_ext_swz.NegateY = negate_y;
-   src_register_ext_swz.NegateZ = negate_z;
-   src_register_ext_swz.NegateW = negate_w;
-
-   prev_token->Extended = 1;
-   instruction_grow( instruction, header );
-
-   return src_register_ext_swz;
-}
-
-struct tgsi_src_register_ext_mod
-tgsi_default_src_register_ext_mod( void )
-{
-   struct tgsi_src_register_ext_mod src_register_ext_mod;
-
-   src_register_ext_mod.Type = TGSI_SRC_REGISTER_EXT_TYPE_MOD;
-   src_register_ext_mod.Complement = 0;
-   src_register_ext_mod.Bias = 0;
-   src_register_ext_mod.Scale2X = 0;
-   src_register_ext_mod.Absolute = 0;
-   src_register_ext_mod.Negate = 0;
-   src_register_ext_mod.Padding = 0;
-   src_register_ext_mod.Extended = 0;
-
-   return src_register_ext_mod;
-}
-
-unsigned
-tgsi_compare_src_register_ext_mod(
-   struct tgsi_src_register_ext_mod a,
-   struct tgsi_src_register_ext_mod b )
-{
-   a.Padding = b.Padding = 0;
-   a.Extended = b.Extended = 0;
-   return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
-}
-
-struct tgsi_src_register_ext_mod
-tgsi_build_src_register_ext_mod(
-   unsigned complement,
-   unsigned bias,
-   unsigned scale_2x,
-   unsigned absolute,
-   unsigned negate,
-   struct tgsi_token *prev_token,
-   struct tgsi_instruction *instruction,
-   struct tgsi_header *header )
-{
-   struct tgsi_src_register_ext_mod src_register_ext_mod;
-
-   assert( complement <= 1 );
-   assert( bias <= 1 );
-   assert( scale_2x <= 1 );
-   assert( absolute <= 1 );
-   assert( negate <= 1 );
-
-   src_register_ext_mod = tgsi_default_src_register_ext_mod();
-   src_register_ext_mod.Complement = complement;
-   src_register_ext_mod.Bias = bias;
-   src_register_ext_mod.Scale2X = scale_2x;
-   src_register_ext_mod.Absolute = absolute;
-   src_register_ext_mod.Negate = negate;
-
-   prev_token->Extended = 1;
-   instruction_grow( instruction, header );
-
-   return src_register_ext_mod;
-}
-
-struct tgsi_dimension
-tgsi_default_dimension( void )
-{
-   struct tgsi_dimension dimension;
-
-   dimension.Indirect = 0;
-   dimension.Dimension = 0;
-   dimension.Padding = 0;
-   dimension.Index = 0;
-   dimension.Extended = 0;
-
-   return dimension;
-}
-
-struct tgsi_dimension
-tgsi_build_dimension(
-   unsigned indirect,
-   unsigned index,
-   struct tgsi_instruction *instruction,
-   struct tgsi_header *header )
-{
-   struct tgsi_dimension dimension;
-
-   dimension = tgsi_default_dimension();
-   dimension.Indirect = indirect;
-   dimension.Index = index;
-
-   instruction_grow( instruction, header );
-
-   return dimension;
-}
-
-struct tgsi_dst_register
-tgsi_default_dst_register( void )
-{
-   struct tgsi_dst_register dst_register;
-
-   dst_register.File = TGSI_FILE_NULL;
-   dst_register.WriteMask = TGSI_WRITEMASK_XYZW;
-   dst_register.Indirect = 0;
-   dst_register.Dimension = 0;
-   dst_register.Index = 0;
-   dst_register.Padding = 0;
-   dst_register.Extended = 0;
-
-   return dst_register;
-}
-
-struct tgsi_dst_register
-tgsi_build_dst_register(
-   unsigned file,
-   unsigned mask,
-   int index,
-   struct tgsi_instruction *instruction,
-   struct tgsi_header *header )
-{
-   struct tgsi_dst_register dst_register;
-
-   assert( file <= TGSI_FILE_IMMEDIATE );
-   assert( mask <= TGSI_WRITEMASK_XYZW );
-   assert( index >= -32768 && index <= 32767 );
-
-   dst_register = tgsi_default_dst_register();
-   dst_register.File = file;
-   dst_register.WriteMask = mask;
-   dst_register.Index = index;
-
-   instruction_grow( instruction, header );
-
-   return dst_register;
-}
-
-struct tgsi_full_dst_register
-tgsi_default_full_dst_register( void )
-{
-   struct tgsi_full_dst_register full_dst_register;
-
-   full_dst_register.DstRegister = tgsi_default_dst_register();
-   full_dst_register.DstRegisterExtConcode =
-      tgsi_default_dst_register_ext_concode();
-   full_dst_register.DstRegisterExtModulate =
-      tgsi_default_dst_register_ext_modulate();
-
-   return full_dst_register;
-}
-
-struct tgsi_dst_register_ext_concode
-tgsi_default_dst_register_ext_concode( void )
-{
-   struct tgsi_dst_register_ext_concode dst_register_ext_concode;
-
-   dst_register_ext_concode.Type = TGSI_DST_REGISTER_EXT_TYPE_CONDCODE;
-   dst_register_ext_concode.CondMask = TGSI_CC_TR;
-   dst_register_ext_concode.CondSwizzleX = TGSI_SWIZZLE_X;
-   dst_register_ext_concode.CondSwizzleY = TGSI_SWIZZLE_Y;
-   dst_register_ext_concode.CondSwizzleZ = TGSI_SWIZZLE_Z;
-   dst_register_ext_concode.CondSwizzleW = TGSI_SWIZZLE_W;
-   dst_register_ext_concode.CondSrcIndex = 0;
-   dst_register_ext_concode.Padding = 0;
-   dst_register_ext_concode.Extended = 0;
-
-   return dst_register_ext_concode;
-}
-
-unsigned
-tgsi_compare_dst_register_ext_concode(
-   struct tgsi_dst_register_ext_concode a,
-   struct tgsi_dst_register_ext_concode b )
-{
-   a.Padding = b.Padding = 0;
-   a.Extended = b.Extended = 0;
-   return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
-}
-
-struct tgsi_dst_register_ext_concode
-tgsi_build_dst_register_ext_concode(
-   unsigned cc,
-   unsigned swizzle_x,
-   unsigned swizzle_y,
-   unsigned swizzle_z,
-   unsigned swizzle_w,
-   int index,
-   struct tgsi_token *prev_token,
-   struct tgsi_instruction *instruction,
-   struct tgsi_header *header )
-{
-   struct tgsi_dst_register_ext_concode dst_register_ext_concode;
-
-   assert( cc <= TGSI_CC_FL );
-   assert( swizzle_x <= TGSI_SWIZZLE_W );
-   assert( swizzle_y <= TGSI_SWIZZLE_W );
-   assert( swizzle_z <= TGSI_SWIZZLE_W );
-   assert( swizzle_w <= TGSI_SWIZZLE_W );
-   assert( index >= -32768 && index <= 32767 );
-
-   dst_register_ext_concode = tgsi_default_dst_register_ext_concode();
-   dst_register_ext_concode.CondMask = cc;
-   dst_register_ext_concode.CondSwizzleX = swizzle_x;
-   dst_register_ext_concode.CondSwizzleY = swizzle_y;
-   dst_register_ext_concode.CondSwizzleZ = swizzle_z;
-   dst_register_ext_concode.CondSwizzleW = swizzle_w;
-   dst_register_ext_concode.CondSrcIndex = index;
-
-   prev_token->Extended = 1;
-   instruction_grow( instruction, header );
-
-   return dst_register_ext_concode;
-}
-
-struct tgsi_dst_register_ext_modulate
-tgsi_default_dst_register_ext_modulate( void )
-{
-   struct tgsi_dst_register_ext_modulate dst_register_ext_modulate;
-
-   dst_register_ext_modulate.Type = TGSI_DST_REGISTER_EXT_TYPE_MODULATE;
-   dst_register_ext_modulate.Modulate = TGSI_MODULATE_1X;
-   dst_register_ext_modulate.Padding = 0;
-   dst_register_ext_modulate.Extended = 0;
-
-   return dst_register_ext_modulate;
-}
-
-unsigned
-tgsi_compare_dst_register_ext_modulate(
-   struct tgsi_dst_register_ext_modulate a,
-   struct tgsi_dst_register_ext_modulate b )
-{
-   a.Padding = b.Padding = 0;
-   a.Extended = b.Extended = 0;
-   return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
-}
-
-struct tgsi_dst_register_ext_modulate
-tgsi_build_dst_register_ext_modulate(
-   unsigned modulate,
-   struct tgsi_token *prev_token,
-   struct tgsi_instruction *instruction,
-   struct tgsi_header *header )
-{
-   struct tgsi_dst_register_ext_modulate dst_register_ext_modulate;
-
-   assert( modulate <= TGSI_MODULATE_EIGHTH );
-
-   dst_register_ext_modulate = tgsi_default_dst_register_ext_modulate();
-   dst_register_ext_modulate.Modulate = modulate;
-
-   prev_token->Extended = 1;
-   instruction_grow( instruction, header );
-
-   return dst_register_ext_modulate;
-}
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_build.h b/src/gallium/auxiliary/tgsi/util/tgsi_build.h
deleted file mode 100644
index ed258302487..00000000000
--- a/src/gallium/auxiliary/tgsi/util/tgsi_build.h
+++ /dev/null
@@ -1,332 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef TGSI_BUILD_H
-#define TGSI_BUILD_H
-
-#if defined __cplusplus
-extern "C" {
-#endif
-
-/*
- * version
- */
-
-struct tgsi_version
-tgsi_build_version( void );
-
-/*
- * header
- */
-
-struct tgsi_header
-tgsi_build_header( void );
-
-struct tgsi_processor
-tgsi_default_processor( void );
-
-struct tgsi_processor
-tgsi_build_processor(
-   unsigned processor,
-   struct tgsi_header *header );
-
-/*
- * declaration
- */
-
-struct tgsi_declaration
-tgsi_default_declaration( void );
-
-struct tgsi_declaration
-tgsi_build_declaration(
-   unsigned file,
-   unsigned usage_mask,
-   unsigned interpolate,
-   unsigned semantic,
-   struct tgsi_header *header );
-
-struct tgsi_full_declaration
-tgsi_default_full_declaration( void );
-
-unsigned
-tgsi_build_full_declaration(
-   const struct tgsi_full_declaration *full_decl,
-   struct tgsi_token *tokens,
-   struct tgsi_header *header,
-   unsigned maxsize );
-
-struct tgsi_declaration_range
-tgsi_default_declaration_range( void );
-
-struct tgsi_declaration_range
-tgsi_build_declaration_range(
-   unsigned first,
-   unsigned last,
-   struct tgsi_declaration *declaration,
-   struct tgsi_header *header );
-
-struct tgsi_declaration_semantic
-tgsi_default_declaration_semantic( void );
-
-struct tgsi_declaration_semantic
-tgsi_build_declaration_semantic(
-   unsigned semantic_name,
-   unsigned semantic_index,
-   struct tgsi_declaration *declaration,
-   struct tgsi_header *header );
-
-/*
- * immediate
- */
-
-struct tgsi_immediate
-tgsi_default_immediate( void );
-
-struct tgsi_immediate
-tgsi_build_immediate(
-   struct tgsi_header *header );
-
-struct tgsi_full_immediate
-tgsi_default_full_immediate( void );
-
-struct tgsi_immediate_float32
-tgsi_build_immediate_float32(
-   float value,
-   struct tgsi_immediate *immediate,
-   struct tgsi_header *header );
-
-unsigned
-tgsi_build_full_immediate(
-   const struct tgsi_full_immediate *full_imm,
-   struct tgsi_token *tokens,
-   struct tgsi_header *header,
-   unsigned maxsize );
-
-/*
- * instruction
- */
-
-struct tgsi_instruction
-tgsi_default_instruction( void );
-
-struct tgsi_instruction
-tgsi_build_instruction(
-   unsigned opcode,
-   unsigned saturate,
-   unsigned num_dst_regs,
-   unsigned num_src_regs,
-   struct tgsi_header *header );
-
-struct tgsi_full_instruction
-tgsi_default_full_instruction( void );
-
-unsigned
-tgsi_build_full_instruction(
-   const struct tgsi_full_instruction *full_inst,
-   struct tgsi_token *tokens,
-   struct tgsi_header *header,
-   unsigned maxsize );
-
-struct tgsi_instruction_ext_nv
-tgsi_default_instruction_ext_nv( void );
-
-unsigned
-tgsi_compare_instruction_ext_nv(
-   struct tgsi_instruction_ext_nv a,
-   struct tgsi_instruction_ext_nv b );
-
-struct tgsi_instruction_ext_nv
-tgsi_build_instruction_ext_nv(
-   unsigned precision,
-   unsigned cond_dst_index,
-   unsigned cond_flow_index,
-   unsigned cond_mask,
-   unsigned cond_swizzle_x,
-   unsigned cond_swizzle_y,
-   unsigned cond_swizzle_z,
-   unsigned cond_swizzle_w,
-   unsigned cond_dst_update,
-   unsigned cond_flow_update,
-   struct tgsi_token *prev_token,
-   struct tgsi_instruction *instruction,
-   struct tgsi_header *header );
-
-struct tgsi_instruction_ext_label
-tgsi_default_instruction_ext_label( void );
-
-unsigned
-tgsi_compare_instruction_ext_label(
-   struct tgsi_instruction_ext_label a,
-   struct tgsi_instruction_ext_label b );
-
-struct tgsi_instruction_ext_label
-tgsi_build_instruction_ext_label(
-   unsigned label,
-   struct tgsi_token *prev_token,
-   struct tgsi_instruction *instruction,
-   struct tgsi_header *header );
-
-struct tgsi_instruction_ext_texture
-tgsi_default_instruction_ext_texture( void );
-
-unsigned
-tgsi_compare_instruction_ext_texture(
-   struct tgsi_instruction_ext_texture a,
-   struct tgsi_instruction_ext_texture b );
-
-struct tgsi_instruction_ext_texture
-tgsi_build_instruction_ext_texture(
-   unsigned texture,
-   struct tgsi_token *prev_token,
-   struct tgsi_instruction *instruction,
-   struct tgsi_header *header );
-
-struct tgsi_src_register
-tgsi_default_src_register( void );
-
-struct tgsi_src_register
-tgsi_build_src_register(
-   unsigned file,
-   unsigned swizzle_x,
-   unsigned swizzle_y,
-   unsigned swizzle_z,
-   unsigned swizzle_w,
-   unsigned negate,
-   unsigned indirect,
-   unsigned dimension,
-   int index,
-   struct tgsi_instruction *instruction,
-   struct tgsi_header *header );
-
-struct tgsi_full_src_register
-tgsi_default_full_src_register( void );
-
-struct tgsi_src_register_ext_swz
-tgsi_default_src_register_ext_swz( void );
-
-unsigned
-tgsi_compare_src_register_ext_swz(
-   struct tgsi_src_register_ext_swz a,
-   struct tgsi_src_register_ext_swz b );
-
-struct tgsi_src_register_ext_swz
-tgsi_build_src_register_ext_swz(
-   unsigned ext_swizzle_x,
-   unsigned ext_swizzle_y,
-   unsigned ext_swizzle_z,
-   unsigned ext_swizzle_w,
-   unsigned negate_x,
-   unsigned negate_y,
-   unsigned negate_z,
-   unsigned negate_w,
-   struct tgsi_token *prev_token,
-   struct tgsi_instruction *instruction,
-   struct tgsi_header *header );
-
-struct tgsi_src_register_ext_mod
-tgsi_default_src_register_ext_mod( void );
-
-unsigned
-tgsi_compare_src_register_ext_mod(
-   struct tgsi_src_register_ext_mod a,
-   struct tgsi_src_register_ext_mod b );
-
-struct tgsi_src_register_ext_mod
-tgsi_build_src_register_ext_mod(
-   unsigned complement,
-   unsigned bias,
-   unsigned scale_2x,
-   unsigned absolute,
-   unsigned negate,
-   struct tgsi_token *prev_token,
-   struct tgsi_instruction *instruction,
-   struct tgsi_header *header );
-
-struct tgsi_dimension
-tgsi_default_dimension( void );
-
-struct tgsi_dimension
-tgsi_build_dimension(
-   unsigned indirect,
-   unsigned index,
-   struct tgsi_instruction *instruction,
-   struct tgsi_header *header );
-
-struct tgsi_dst_register
-tgsi_default_dst_register( void );
-
-struct tgsi_dst_register
-tgsi_build_dst_register(
-   unsigned file,
-   unsigned mask,
-   int index,
-   struct tgsi_instruction *instruction,
-   struct tgsi_header *header );
-
-struct tgsi_full_dst_register
-tgsi_default_full_dst_register( void );
-
-struct tgsi_dst_register_ext_concode
-tgsi_default_dst_register_ext_concode( void );
-
-unsigned
-tgsi_compare_dst_register_ext_concode(
-   struct tgsi_dst_register_ext_concode a,
-   struct tgsi_dst_register_ext_concode b );
-
-struct tgsi_dst_register_ext_concode
-tgsi_build_dst_register_ext_concode(
-   unsigned cc,
-   unsigned swizzle_x,
-   unsigned swizzle_y,
-   unsigned swizzle_z,
-   unsigned swizzle_w,
-   int index,
-   struct tgsi_token *prev_token,
-   struct tgsi_instruction *instruction,
-   struct tgsi_header *header );
-
-struct tgsi_dst_register_ext_modulate
-tgsi_default_dst_register_ext_modulate( void );
-
-unsigned
-tgsi_compare_dst_register_ext_modulate(
-   struct tgsi_dst_register_ext_modulate a,
-   struct tgsi_dst_register_ext_modulate b );
-
-struct tgsi_dst_register_ext_modulate
-tgsi_build_dst_register_ext_modulate(
-   unsigned modulate,
-   struct tgsi_token *prev_token,
-   struct tgsi_instruction *instruction,
-   struct tgsi_header *header );
-
-#if defined __cplusplus
-}
-#endif
-
-#endif /* TGSI_BUILD_H */
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
deleted file mode 100644
index d2e6375212f..00000000000
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
+++ /dev/null
@@ -1,582 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include "pipe/p_debug.h"
-#include "tgsi_dump.h"
-#include "tgsi_iterate.h"
-
-struct dump_ctx
-{
-   struct tgsi_iterate_context iter;
-
-   uint instno;
-};
-
-static void
-dump_enum(
-   uint e,
-   const char **enums,
-   uint enum_count )
-{
-   if (e >= enum_count)
-      debug_printf( "%u", e );
-   else
-      debug_printf( "%s", enums[e] );
-}
-
-#define EOL()           debug_printf( "\n" )
-#define TXT(S)          debug_printf( "%s", S )
-#define CHR(C)          debug_printf( "%c", C )
-#define UIX(I)          debug_printf( "0x%x", I )
-#define UID(I)          debug_printf( "%u", I )
-#define SID(I)          debug_printf( "%d", I )
-#define FLT(F)          debug_printf( "%10.4f", F )
-#define ENM(E,ENUMS)    dump_enum( E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) )
-
-static const char *processor_type_names[] =
-{
-   "FRAG",
-   "VERT",
-   "GEOM"
-};
-
-static const char *file_names[] =
-{
-   "NULL",
-   "CONST",
-   "IN",
-   "OUT",
-   "TEMP",
-   "SAMP",
-   "ADDR",
-   "IMM"
-};
-
-static const char *interpolate_names[] =
-{
-   "CONSTANT",
-   "LINEAR",
-   "PERSPECTIVE"
-};
-
-static const char *semantic_names[] =
-{
-   "POSITION",
-   "COLOR",
-   "BCOLOR",
-   "FOG",
-   "PSIZE",
-   "GENERIC",
-   "NORMAL"
-};
-
-static const char *immediate_type_names[] =
-{
-   "FLT32"
-};
-
-static const char *opcode_names[TGSI_OPCODE_LAST] =
-{
-   "ARL",
-   "MOV",
-   "LIT",
-   "RCP",
-   "RSQ",
-   "EXP",
-   "LOG",
-   "MUL",
-   "ADD",
-   "DP3",
-   "DP4",
-   "DST",
-   "MIN",
-   "MAX",
-   "SLT",
-   "SGE",
-   "MAD",
-   "SUB",
-   "LERP",
-   "CND",
-   "CND0",
-   "DOT2ADD",
-   "INDEX",
-   "NEGATE",
-   "FRAC",
-   "CLAMP",
-   "FLOOR",
-   "ROUND",
-   "EXPBASE2",
-   "LOGBASE2",
-   "POWER",
-   "CROSSPRODUCT",
-   "MULTIPLYMATRIX",
-   "ABS",
-   "RCC",
-   "DPH",
-   "COS",
-   "DDX",
-   "DDY",
-   "KILP",
-   "PK2H",
-   "PK2US",
-   "PK4B",
-   "PK4UB",
-   "RFL",
-   "SEQ",
-   "SFL",
-   "SGT",
-   "SIN",
-   "SLE",
-   "SNE",
-   "STR",
-   "TEX",
-   "TXD",
-   "TXP",
-   "UP2H",
-   "UP2US",
-   "UP4B",
-   "UP4UB",
-   "X2D",
-   "ARA",
-   "ARR",
-   "BRA",
-   "CAL",
-   "RET",
-   "SSG",
-   "CMP",
-   "SCS",
-   "TXB",
-   "NRM",
-   "DIV",
-   "DP2",
-   "TXL",
-   "BRK",
-   "IF",
-   "LOOP",
-   "REP",
-   "ELSE",
-   "ENDIF",
-   "ENDLOOP",
-   "ENDREP",
-   "PUSHA",
-   "POPA",
-   "CEIL",
-   "I2F",
-   "NOT",
-   "TRUNC",
-   "SHL",
-   "SHR",
-   "AND",
-   "OR",
-   "MOD",
-   "XOR",
-   "SAD",
-   "TXF",
-   "TXQ",
-   "CONT",
-   "EMIT",
-   "ENDPRIM",
-   "BGNLOOP2",
-   "BGNSUB",
-   "ENDLOOP2",
-   "ENDSUB",
-   "NOISE1",
-   "NOISE2",
-   "NOISE3",
-   "NOISE4",
-   "NOP",
-   "M4X3",
-   "M3X4",
-   "M3X3",
-   "M3X2",
-   "NRM4",
-   "CALLNZ",
-   "IFC",
-   "BREAKC",
-   "KIL",
-   "END",
-   "SWZ"
-};
-
-static const char *swizzle_names[] =
-{
-   "x",
-   "y",
-   "z",
-   "w"
-};
-
-static const char *texture_names[] =
-{
-   "UNKNOWN",
-   "1D",
-   "2D",
-   "3D",
-   "CUBE",
-   "RECT",
-   "SHADOW1D",
-   "SHADOW2D",
-   "SHADOWRECT"
-};
-
-static const char *extswizzle_names[] =
-{
-   "x",
-   "y",
-   "z",
-   "w",
-   "0",
-   "1"
-};
-
-static const char *modulate_names[TGSI_MODULATE_COUNT] =
-{
-   "",
-   "_2X",
-   "_4X",
-   "_8X",
-   "_D2",
-   "_D4",
-   "_D8"
-};
-
-static void
-_dump_register_prefix(
-   uint file,
-   uint first,
-   uint last )
-{
-   
-   
-}
-
-static void
-_dump_register(
-   uint file,
-   int first,
-   int last )
-{
-   ENM( file, file_names );
-   CHR( '[' );
-   SID( first );
-   if (first != last) {
-      TXT( ".." );
-      SID( last );
-   }
-   CHR( ']' );
-}
-
-static void
-_dump_register_ind(
-   uint file,
-   int index,
-   uint ind_file,
-   int ind_index )
-{
-   ENM( file, file_names );
-   CHR( '[' );
-   ENM( ind_file, file_names );
-   CHR( '[' );
-   SID( ind_index );
-   CHR( ']' );
-   if (index != 0) {
-      if (index > 0)
-         CHR( '+' );
-      SID( index );
-   }
-   CHR( ']' );
-}
-
-static void
-_dump_writemask(
-   uint writemask )
-{
-   if (writemask != TGSI_WRITEMASK_XYZW) {
-      CHR( '.' );
-      if (writemask & TGSI_WRITEMASK_X)
-         CHR( 'x' );
-      if (writemask & TGSI_WRITEMASK_Y)
-         CHR( 'y' );
-      if (writemask & TGSI_WRITEMASK_Z)
-         CHR( 'z' );
-      if (writemask & TGSI_WRITEMASK_W)
-         CHR( 'w' );
-   }
-}
-
-void
-tgsi_dump_declaration(
-   const struct tgsi_full_declaration *decl )
-{
-   TXT( "\nDCL " );
-
-   _dump_register(
-      decl->Declaration.File,
-      decl->DeclarationRange.First,
-      decl->DeclarationRange.Last );
-   _dump_writemask(
-      decl->Declaration.UsageMask );
-
-   if (decl->Declaration.Semantic) {
-      TXT( ", " );
-      ENM( decl->Semantic.SemanticName, semantic_names );
-      if (decl->Semantic.SemanticIndex != 0 ||
-          decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC) {
-         CHR( '[' );
-         UID( decl->Semantic.SemanticIndex );
-         CHR( ']' );
-      }
-   }
-
-   TXT( ", " );
-   ENM( decl->Declaration.Interpolate, interpolate_names );
-}
-
-static boolean
-iter_declaration(
-   struct tgsi_iterate_context *iter,
-   struct tgsi_full_declaration *decl )
-{
-   tgsi_dump_declaration( decl );
-   return TRUE;
-}
-
-void
-tgsi_dump_immediate(
-   const struct tgsi_full_immediate *imm )
-{
-   uint i;
-
-   TXT( "\nIMM " );
-   ENM( imm->Immediate.DataType, immediate_type_names );
-
-   TXT( " { " );
-   for (i = 0; i < imm->Immediate.Size - 1; i++) {
-      switch (imm->Immediate.DataType) {
-      case TGSI_IMM_FLOAT32:
-         FLT( imm->u.ImmediateFloat32[i].Float );
-         break;
-      default:
-         assert( 0 );
-      }
-
-      if (i < imm->Immediate.Size - 2)
-         TXT( ", " );
-   }
-   TXT( " }" );
-}
-
-static boolean
-iter_immediate(
-   struct tgsi_iterate_context *iter,
-   struct tgsi_full_immediate *imm )
-{
-   tgsi_dump_immediate( imm );
-   return TRUE;
-}
-
-void
-tgsi_dump_instruction(
-   const struct tgsi_full_instruction *inst,
-   uint instno )
-{
-   uint i;
-   boolean first_reg = TRUE;
-
-   EOL();
-   UID( instno );
-   CHR( ':' );
-   ENM( inst->Instruction.Opcode, opcode_names );
-
-   switch (inst->Instruction.Saturate) {
-   case TGSI_SAT_NONE:
-      break;
-   case TGSI_SAT_ZERO_ONE:
-      TXT( "_SAT" );
-      break;
-   case TGSI_SAT_MINUS_PLUS_ONE:
-      TXT( "_SATNV" );
-      break;
-   default:
-      assert( 0 );
-   }
-
-   for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
-      const struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
-
-      if (!first_reg)
-         CHR( ',' );
-      CHR( ' ' );
-
-      _dump_register(
-         dst->DstRegister.File,
-         dst->DstRegister.Index,
-         dst->DstRegister.Index );
-      ENM( dst->DstRegisterExtModulate.Modulate, modulate_names );
-      _dump_writemask( dst->DstRegister.WriteMask );
-
-      first_reg = FALSE;
-   }
-
-   for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
-      const struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i];
-
-      if (!first_reg)
-         CHR( ',' );
-      CHR( ' ' );
-
-      if (src->SrcRegisterExtMod.Negate)
-         TXT( "-(" );
-      if (src->SrcRegisterExtMod.Absolute)
-         CHR( '|' );
-      if (src->SrcRegisterExtMod.Scale2X)
-         TXT( "2*(" );
-      if (src->SrcRegisterExtMod.Bias)
-         CHR( '(' );
-      if (src->SrcRegisterExtMod.Complement)
-         TXT( "1-(" );
-      if (src->SrcRegister.Negate)
-         CHR( '-' );
-
-      if (src->SrcRegister.Indirect) {
-         _dump_register_ind(
-            src->SrcRegister.File,
-            src->SrcRegister.Index,
-            src->SrcRegisterInd.File,
-            src->SrcRegisterInd.Index );
-      }
-      else {
-         _dump_register(
-            src->SrcRegister.File,
-            src->SrcRegister.Index,
-            src->SrcRegister.Index );
-      }
-
-      if (src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X ||
-          src->SrcRegister.SwizzleY != TGSI_SWIZZLE_Y ||
-          src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z ||
-          src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W) {
-         CHR( '.' );
-         ENM( src->SrcRegister.SwizzleX, swizzle_names );
-         ENM( src->SrcRegister.SwizzleY, swizzle_names );
-         ENM( src->SrcRegister.SwizzleZ, swizzle_names );
-         ENM( src->SrcRegister.SwizzleW, swizzle_names );
-      }
-      if (src->SrcRegisterExtSwz.ExtSwizzleX != TGSI_EXTSWIZZLE_X ||
-          src->SrcRegisterExtSwz.ExtSwizzleY != TGSI_EXTSWIZZLE_Y ||
-          src->SrcRegisterExtSwz.ExtSwizzleZ != TGSI_EXTSWIZZLE_Z ||
-          src->SrcRegisterExtSwz.ExtSwizzleW != TGSI_EXTSWIZZLE_W) {
-         CHR( '.' );
-         if (src->SrcRegisterExtSwz.NegateX)
-            TXT("-");
-         ENM( src->SrcRegisterExtSwz.ExtSwizzleX, extswizzle_names );
-         if (src->SrcRegisterExtSwz.NegateY)
-            TXT("-");
-         ENM( src->SrcRegisterExtSwz.ExtSwizzleY, extswizzle_names );
-         if (src->SrcRegisterExtSwz.NegateZ)
-            TXT("-");
-         ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, extswizzle_names );
-         if (src->SrcRegisterExtSwz.NegateW)
-            TXT("-");
-         ENM( src->SrcRegisterExtSwz.ExtSwizzleW, extswizzle_names );
-      }
-
-      if (src->SrcRegisterExtMod.Complement)
-         CHR( ')' );
-      if (src->SrcRegisterExtMod.Bias)
-         TXT( ")-.5" );
-      if (src->SrcRegisterExtMod.Scale2X)
-         CHR( ')' );
-      if (src->SrcRegisterExtMod.Absolute)
-         CHR( '|' );
-      if (src->SrcRegisterExtMod.Negate)
-         CHR( ')' );
-
-      first_reg = FALSE;
-   }
-
-   if (inst->InstructionExtTexture.Texture != TGSI_TEXTURE_UNKNOWN) {
-      TXT( ", " );
-      ENM( inst->InstructionExtTexture.Texture, texture_names );
-   }
-
-   switch (inst->Instruction.Opcode) {
-   case TGSI_OPCODE_IF:
-   case TGSI_OPCODE_ELSE:
-   case TGSI_OPCODE_BGNLOOP2:
-   case TGSI_OPCODE_ENDLOOP2:
-   case TGSI_OPCODE_CAL:
-      TXT( " :" );
-      UID( inst->InstructionExtLabel.Label );
-      break;
-   }
-}
-
-static boolean
-iter_instruction(
-   struct tgsi_iterate_context *iter,
-   struct tgsi_full_instruction *inst )
-{
-   struct dump_ctx *ctx = (struct dump_ctx *) iter;
-
-   tgsi_dump_instruction( inst, ctx->instno++ );
-   return TRUE;
-}
-
-static boolean
-prolog(
-   struct tgsi_iterate_context *ctx )
-{
-   EOL();
-   ENM( ctx->processor.Processor, processor_type_names );
-   UID( ctx->version.MajorVersion );
-   CHR( '.' );
-   UID( ctx->version.MinorVersion );
-   return TRUE;
-}
-
-void
-tgsi_dump(
-   const struct tgsi_token *tokens,
-   uint flags )
-{
-   struct dump_ctx ctx;
-
-   /* sanity checks */
-   assert( strcmp( opcode_names[TGSI_OPCODE_CONT], "CONT" ) == 0 );
-   assert( strcmp( opcode_names[TGSI_OPCODE_END], "END" ) == 0 );
-
-   ctx.iter.prolog = prolog;
-   ctx.iter.iterate_instruction = iter_instruction;
-   ctx.iter.iterate_declaration = iter_declaration;
-   ctx.iter.iterate_immediate = iter_immediate;
-   ctx.iter.epilog = NULL;
-
-   ctx.instno = 0;
-
-   tgsi_iterate_shader( tokens, &ctx.iter );
-}
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h
deleted file mode 100644
index 51c230b5db4..00000000000
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef TGSI_DUMP_H
-#define TGSI_DUMP_H
-
-#include "pipe/p_shader_tokens.h"
-
-#if defined __cplusplus
-extern "C" {
-#endif
-
-void
-tgsi_dump(
-   const struct tgsi_token *tokens,
-   uint flags );
-
-struct tgsi_full_immediate;
-struct tgsi_full_instruction;
-struct tgsi_full_declaration;
-
-void
-tgsi_dump_immediate(
-   const struct tgsi_full_immediate *imm );
-
-void
-tgsi_dump_instruction(
-   const struct tgsi_full_instruction *inst,
-   uint instno );
-
-void
-tgsi_dump_declaration(
-   const struct tgsi_full_declaration *decl );
-
-#if defined __cplusplus
-}
-#endif
-
-#endif /* TGSI_DUMP_H */
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c
deleted file mode 100644
index eabd74bd6d9..00000000000
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c
+++ /dev/null
@@ -1,845 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include "pipe/p_debug.h"
-#include "pipe/p_util.h"
-#include "util/u_string.h"
-#include "tgsi_dump_c.h"
-#include "tgsi_parse.h"
-#include "tgsi_build.h"
-
-static void
-dump_enum(
-   const unsigned    e,
-   const char        **enums,
-   const unsigned    enums_count )
-{
-   if (e >= enums_count) {
-      debug_printf( "%u", e );
-   }
-   else {
-      debug_printf( "%s", enums[e] );
-   }
-}
-
-#define EOL()           debug_printf( "\n" )
-#define TXT(S)          debug_printf( "%s", S )
-#define CHR(C)          debug_printf( "%c", C )
-#define UIX(I)          debug_printf( "0x%x", I )
-#define UID(I)          debug_printf( "%u", I )
-#define SID(I)          debug_printf( "%d", I )
-#define FLT(F)          debug_printf( "%10.4f", F )
-#define ENM(E,ENUMS)    dump_enum( E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) )
-
-static const char *TGSI_PROCESSOR_TYPES[] =
-{
-   "PROCESSOR_FRAGMENT",
-   "PROCESSOR_VERTEX",
-   "PROCESSOR_GEOMETRY"
-};
-
-static const char *TGSI_TOKEN_TYPES[] =
-{
-   "TOKEN_TYPE_DECLARATION",
-   "TOKEN_TYPE_IMMEDIATE",
-   "TOKEN_TYPE_INSTRUCTION"
-};
-
-static const char *TGSI_FILES[] =
-{
-   "FILE_NULL",
-   "FILE_CONSTANT",
-   "FILE_INPUT",
-   "FILE_OUTPUT",
-   "FILE_TEMPORARY",
-   "FILE_SAMPLER",
-   "FILE_ADDRESS",
-   "FILE_IMMEDIATE"
-};
-
-static const char *TGSI_INTERPOLATES[] =
-{
-   "INTERPOLATE_CONSTANT",
-   "INTERPOLATE_LINEAR",
-   "INTERPOLATE_PERSPECTIVE"
-};
-
-static const char *TGSI_SEMANTICS[] =
-{
-   "SEMANTIC_POSITION",
-   "SEMANTIC_COLOR",
-   "SEMANTIC_BCOLOR",
-   "SEMANTIC_FOG",
-   "SEMANTIC_PSIZE",
-   "SEMANTIC_GENERIC",
-   "SEMANTIC_NORMAL"
-};
-
-static const char *TGSI_IMMS[] =
-{
-   "IMM_FLOAT32"
-};
-
-static const char *TGSI_OPCODES[TGSI_OPCODE_LAST] =
-{
-   "OPCODE_ARL",
-   "OPCODE_MOV",
-   "OPCODE_LIT",
-   "OPCODE_RCP",
-   "OPCODE_RSQ",
-   "OPCODE_EXP",
-   "OPCODE_LOG",
-   "OPCODE_MUL",
-   "OPCODE_ADD",
-   "OPCODE_DP3",
-   "OPCODE_DP4",
-   "OPCODE_DST",
-   "OPCODE_MIN",
-   "OPCODE_MAX",
-   "OPCODE_SLT",
-   "OPCODE_SGE",
-   "OPCODE_MAD",
-   "OPCODE_SUB",
-   "OPCODE_LERP",
-   "OPCODE_CND",
-   "OPCODE_CND0",
-   "OPCODE_DOT2ADD",
-   "OPCODE_INDEX",
-   "OPCODE_NEGATE",
-   "OPCODE_FRAC",
-   "OPCODE_CLAMP",
-   "OPCODE_FLOOR",
-   "OPCODE_ROUND",
-   "OPCODE_EXPBASE2",
-   "OPCODE_LOGBASE2",
-   "OPCODE_POWER",
-   "OPCODE_CROSSPRODUCT",
-   "OPCODE_MULTIPLYMATRIX",
-   "OPCODE_ABS",
-   "OPCODE_RCC",
-   "OPCODE_DPH",
-   "OPCODE_COS",
-   "OPCODE_DDX",
-   "OPCODE_DDY",
-   "OPCODE_KILP",
-   "OPCODE_PK2H",
-   "OPCODE_PK2US",
-   "OPCODE_PK4B",
-   "OPCODE_PK4UB",
-   "OPCODE_RFL",
-   "OPCODE_SEQ",
-   "OPCODE_SFL",
-   "OPCODE_SGT",
-   "OPCODE_SIN",
-   "OPCODE_SLE",
-   "OPCODE_SNE",
-   "OPCODE_STR",
-   "OPCODE_TEX",
-   "OPCODE_TXD",
-   "OPCODE_TXP",
-   "OPCODE_UP2H",
-   "OPCODE_UP2US",
-   "OPCODE_UP4B",
-   "OPCODE_UP4UB",
-   "OPCODE_X2D",
-   "OPCODE_ARA",
-   "OPCODE_ARR",
-   "OPCODE_BRA",
-   "OPCODE_CAL",
-   "OPCODE_RET",
-   "OPCODE_SSG",
-   "OPCODE_CMP",
-   "OPCODE_SCS",
-   "OPCODE_TXB",
-   "OPCODE_NRM",
-   "OPCODE_DIV",
-   "OPCODE_DP2",
-   "OPCODE_TXL",
-   "OPCODE_BRK",
-   "OPCODE_IF",
-   "OPCODE_LOOP",
-   "OPCODE_REP",
-   "OPCODE_ELSE",
-   "OPCODE_ENDIF",
-   "OPCODE_ENDLOOP",
-   "OPCODE_ENDREP",
-   "OPCODE_PUSHA",
-   "OPCODE_POPA",
-   "OPCODE_CEIL",
-   "OPCODE_I2F",
-   "OPCODE_NOT",
-   "OPCODE_TRUNC",
-   "OPCODE_SHL",
-   "OPCODE_SHR",
-   "OPCODE_AND",
-   "OPCODE_OR",
-   "OPCODE_MOD",
-   "OPCODE_XOR",
-   "OPCODE_SAD",
-   "OPCODE_TXF",
-   "OPCODE_TXQ",
-   "OPCODE_CONT",
-   "OPCODE_EMIT",
-   "OPCODE_ENDPRIM",
-   "OPCODE_BGNLOOP2",
-   "OPCODE_BGNSUB",
-   "OPCODE_ENDLOOP2",
-   "OPCODE_ENDSUB",
-   "OPCODE_NOISE1",
-   "OPCODE_NOISE2",
-   "OPCODE_NOISE3",
-   "OPCODE_NOISE4",
-   "OPCODE_NOP",
-   "OPCODE_M4X3",
-   "OPCODE_M3X4",
-   "OPCODE_M3X3",
-   "OPCODE_M3X2",
-   "OPCODE_NRM4",
-   "OPCODE_CALLNZ",
-   "OPCODE_IFC",
-   "OPCODE_BREAKC",
-   "OPCODE_KIL",
-   "OPCODE_END"
-};
-
-static const char *TGSI_SATS[] =
-{
-   "SAT_NONE",
-   "SAT_ZERO_ONE",
-   "SAT_MINUS_PLUS_ONE"
-};
-
-static const char *TGSI_INSTRUCTION_EXTS[] =
-{
-   "INSTRUCTION_EXT_TYPE_NV",
-   "INSTRUCTION_EXT_TYPE_LABEL",
-   "INSTRUCTION_EXT_TYPE_TEXTURE"
-};
-
-static const char *TGSI_PRECISIONS[] =
-{
-   "PRECISION_DEFAULT",
-   "PRECISION_FLOAT32",
-   "PRECISION_FLOAT16",
-   "PRECISION_FIXED12"
-};
-
-static const char *TGSI_CCS[] =
-{
-   "CC_GT",
-   "CC_EQ",
-   "CC_LT",
-   "CC_UN",
-   "CC_GE",
-   "CC_LE",
-   "CC_NE",
-   "CC_TR",
-   "CC_FL"
-};
-
-static const char *TGSI_SWIZZLES[] =
-{
-   "SWIZZLE_X",
-   "SWIZZLE_Y",
-   "SWIZZLE_Z",
-   "SWIZZLE_W"
-};
-
-static const char *TGSI_TEXTURES[] =
-{
-   "TEXTURE_UNKNOWN",
-   "TEXTURE_1D",
-   "TEXTURE_2D",
-   "TEXTURE_3D",
-   "TEXTURE_CUBE",
-   "TEXTURE_RECT",
-   "TEXTURE_SHADOW1D",
-   "TEXTURE_SHADOW2D",
-   "TEXTURE_SHADOWRECT"
-};
-
-static const char *TGSI_SRC_REGISTER_EXTS[] =
-{
-   "SRC_REGISTER_EXT_TYPE_SWZ",
-   "SRC_REGISTER_EXT_TYPE_MOD"
-};
-
-static const char *TGSI_EXTSWIZZLES[] =
-{
-   "EXTSWIZZLE_X",
-   "EXTSWIZZLE_Y",
-   "EXTSWIZZLE_Z",
-   "EXTSWIZZLE_W",
-   "EXTSWIZZLE_ZERO",
-   "EXTSWIZZLE_ONE"
-};
-
-static const char *TGSI_WRITEMASKS[] =
-{
-   "0",
-   "WRITEMASK_X",
-   "WRITEMASK_Y",
-   "WRITEMASK_XY",
-   "WRITEMASK_Z",
-   "WRITEMASK_XZ",
-   "WRITEMASK_YZ",
-   "WRITEMASK_XYZ",
-   "WRITEMASK_W",
-   "WRITEMASK_XW",
-   "WRITEMASK_YW",
-   "WRITEMASK_XYW",
-   "WRITEMASK_ZW",
-   "WRITEMASK_XZW",
-   "WRITEMASK_YZW",
-   "WRITEMASK_XYZW"
-};
-
-static const char *TGSI_DST_REGISTER_EXTS[] =
-{
-   "DST_REGISTER_EXT_TYPE_CONDCODE",
-   "DST_REGISTER_EXT_TYPE_MODULATE"
-};
-
-static const char *TGSI_MODULATES[] =
-{
-   "MODULATE_1X",
-   "MODULATE_2X",
-   "MODULATE_4X",
-   "MODULATE_8X",
-   "MODULATE_HALF",
-   "MODULATE_QUARTER",
-   "MODULATE_EIGHTH"
-};
-
-static void
-dump_declaration_verbose(
-   struct tgsi_full_declaration  *decl,
-   unsigned                      ignored,
-   unsigned                      deflt,
-   struct tgsi_full_declaration  *fd )
-{
-   TXT( "\nFile       : " );
-   ENM( decl->Declaration.File, TGSI_FILES );
-   if( deflt || fd->Declaration.UsageMask != decl->Declaration.UsageMask ) {
-      TXT( "\nUsageMask  : " );
-      if( decl->Declaration.UsageMask & TGSI_WRITEMASK_X ) {
-         CHR( 'X' );
-      }
-      if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Y ) {
-         CHR( 'Y' );
-      }
-      if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Z ) {
-         CHR( 'Z' );
-      }
-      if( decl->Declaration.UsageMask & TGSI_WRITEMASK_W ) {
-         CHR( 'W' );
-      }
-   }
-   if( deflt || fd->Declaration.Interpolate != decl->Declaration.Interpolate ) {
-      TXT( "\nInterpolate: " );
-      ENM( decl->Declaration.Interpolate, TGSI_INTERPOLATES );
-   }
-   if( deflt || fd->Declaration.Semantic != decl->Declaration.Semantic ) {
-      TXT( "\nSemantic   : " );
-      UID( decl->Declaration.Semantic );
-   }
-   if( ignored ) {
-      TXT( "\nPadding    : " );
-      UIX( decl->Declaration.Padding );
-   }
-
-   EOL();
-   TXT( "\nFirst: " );
-   UID( decl->DeclarationRange.First );
-   TXT( "\nLast : " );
-   UID( decl->DeclarationRange.Last );
-
-   if( decl->Declaration.Semantic ) {
-      EOL();
-      TXT( "\nSemanticName : " );
-      ENM( decl->Semantic.SemanticName, TGSI_SEMANTICS );
-      TXT( "\nSemanticIndex: " );
-      UID( decl->Semantic.SemanticIndex );
-      if( ignored ) {
-         TXT( "\nPadding      : " );
-         UIX( decl->Semantic.Padding );
-      }
-   }
-}
-
-static void
-dump_immediate_verbose(
-   struct tgsi_full_immediate *imm,
-   unsigned                   ignored )
-{
-   unsigned i;
-
-   TXT( "\nDataType   : " );
-   ENM( imm->Immediate.DataType, TGSI_IMMS );
-   if( ignored ) {
-      TXT( "\nPadding    : " );
-      UIX( imm->Immediate.Padding );
-   }
-
-   for( i = 0; i < imm->Immediate.Size - 1; i++ ) {
-      EOL();
-      switch( imm->Immediate.DataType ) {
-      case TGSI_IMM_FLOAT32:
-         TXT( "\nFloat: " );
-         FLT( imm->u.ImmediateFloat32[i].Float );
-         break;
-
-      default:
-         assert( 0 );
-      }
-   }
-}
-
-static void
-dump_instruction_verbose(
-   struct tgsi_full_instruction  *inst,
-   unsigned                      ignored,
-   unsigned                      deflt,
-   struct tgsi_full_instruction  *fi )
-{
-   unsigned i;
-
-   TXT( "\nOpcode     : " );
-   ENM( inst->Instruction.Opcode, TGSI_OPCODES );
-   if( deflt || fi->Instruction.Saturate != inst->Instruction.Saturate ) {
-      TXT( "\nSaturate   : " );
-      ENM( inst->Instruction.Saturate, TGSI_SATS );
-   }
-   if( deflt || fi->Instruction.NumDstRegs != inst->Instruction.NumDstRegs ) {
-      TXT( "\nNumDstRegs : " );
-      UID( inst->Instruction.NumDstRegs );
-   }
-   if( deflt || fi->Instruction.NumSrcRegs != inst->Instruction.NumSrcRegs ) {
-      TXT( "\nNumSrcRegs : " );
-      UID( inst->Instruction.NumSrcRegs );
-   }
-   if( ignored ) {
-      TXT( "\nPadding    : " );
-      UIX( inst->Instruction.Padding );
-   }
-
-   if( deflt || tgsi_compare_instruction_ext_nv( inst->InstructionExtNv, fi->InstructionExtNv ) ) {
-      EOL();
-      TXT( "\nType          : " );
-      ENM( inst->InstructionExtNv.Type, TGSI_INSTRUCTION_EXTS );
-      if( deflt || fi->InstructionExtNv.Precision != inst->InstructionExtNv.Precision ) {
-         TXT( "\nPrecision     : " );
-         ENM( inst->InstructionExtNv.Precision, TGSI_PRECISIONS );
-      }
-      if( deflt || fi->InstructionExtNv.CondDstIndex != inst->InstructionExtNv.CondDstIndex ) {
-         TXT( "\nCondDstIndex  : " );
-         UID( inst->InstructionExtNv.CondDstIndex );
-      }
-      if( deflt || fi->InstructionExtNv.CondFlowIndex != inst->InstructionExtNv.CondFlowIndex ) {
-         TXT( "\nCondFlowIndex : " );
-         UID( inst->InstructionExtNv.CondFlowIndex );
-      }
-      if( deflt || fi->InstructionExtNv.CondMask != inst->InstructionExtNv.CondMask ) {
-         TXT( "\nCondMask      : " );
-         ENM( inst->InstructionExtNv.CondMask, TGSI_CCS );
-      }
-      if( deflt || fi->InstructionExtNv.CondSwizzleX != inst->InstructionExtNv.CondSwizzleX ) {
-         TXT( "\nCondSwizzleX  : " );
-         ENM( inst->InstructionExtNv.CondSwizzleX, TGSI_SWIZZLES );
-      }
-      if( deflt || fi->InstructionExtNv.CondSwizzleY != inst->InstructionExtNv.CondSwizzleY ) {
-         TXT( "\nCondSwizzleY  : " );
-         ENM( inst->InstructionExtNv.CondSwizzleY, TGSI_SWIZZLES );
-      }
-      if( deflt || fi->InstructionExtNv.CondSwizzleZ != inst->InstructionExtNv.CondSwizzleZ ) {
-         TXT( "\nCondSwizzleZ  : " );
-         ENM( inst->InstructionExtNv.CondSwizzleZ, TGSI_SWIZZLES );
-      }
-      if( deflt || fi->InstructionExtNv.CondSwizzleW != inst->InstructionExtNv.CondSwizzleW ) {
-         TXT( "\nCondSwizzleW  : " );
-         ENM( inst->InstructionExtNv.CondSwizzleW, TGSI_SWIZZLES );
-      }
-      if( deflt || fi->InstructionExtNv.CondDstUpdate != inst->InstructionExtNv.CondDstUpdate ) {
-         TXT( "\nCondDstUpdate : " );
-         UID( inst->InstructionExtNv.CondDstUpdate );
-      }
-      if( deflt || fi->InstructionExtNv.CondFlowEnable != inst->InstructionExtNv.CondFlowEnable ) {
-         TXT( "\nCondFlowEnable: " );
-         UID( inst->InstructionExtNv.CondFlowEnable );
-      }
-      if( ignored ) {
-         TXT( "\nPadding       : " );
-         UIX( inst->InstructionExtNv.Padding );
-         if( deflt || fi->InstructionExtNv.Extended != inst->InstructionExtNv.Extended ) {
-            TXT( "\nExtended      : " );
-            UID( inst->InstructionExtNv.Extended );
-         }
-      }
-   }
-
-   if( deflt || tgsi_compare_instruction_ext_label( inst->InstructionExtLabel, fi->InstructionExtLabel ) ) {
-      EOL();
-      TXT( "\nType    : " );
-      ENM( inst->InstructionExtLabel.Type, TGSI_INSTRUCTION_EXTS );
-      if( deflt || fi->InstructionExtLabel.Label != inst->InstructionExtLabel.Label ) {
-         TXT( "\nLabel   : " );
-         UID( inst->InstructionExtLabel.Label );
-      }
-      if( ignored ) {
-         TXT( "\nPadding : " );
-         UIX( inst->InstructionExtLabel.Padding );
-         if( deflt || fi->InstructionExtLabel.Extended != inst->InstructionExtLabel.Extended ) {
-            TXT( "\nExtended: " );
-            UID( inst->InstructionExtLabel.Extended );
-         }
-      }
-   }
-
-   if( deflt || tgsi_compare_instruction_ext_texture( inst->InstructionExtTexture, fi->InstructionExtTexture ) ) {
-      EOL();
-      TXT( "\nType    : " );
-      ENM( inst->InstructionExtTexture.Type, TGSI_INSTRUCTION_EXTS );
-      if( deflt || fi->InstructionExtTexture.Texture != inst->InstructionExtTexture.Texture ) {
-         TXT( "\nTexture : " );
-         ENM( inst->InstructionExtTexture.Texture, TGSI_TEXTURES );
-      }
-      if( ignored ) {
-         TXT( "\nPadding : " );
-         UIX( inst->InstructionExtTexture.Padding );
-         if( deflt || fi->InstructionExtTexture.Extended != inst->InstructionExtTexture.Extended ) {
-            TXT( "\nExtended: " );
-            UID( inst->InstructionExtTexture.Extended );
-         }
-      }
-   }
-
-   for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) {
-      struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
-      struct tgsi_full_dst_register *fd = &fi->FullDstRegisters[i];
-
-      EOL();
-      TXT( "\nFile     : " );
-      ENM( dst->DstRegister.File, TGSI_FILES );
-      if( deflt || fd->DstRegister.WriteMask != dst->DstRegister.WriteMask ) {
-         TXT( "\nWriteMask: " );
-         ENM( dst->DstRegister.WriteMask, TGSI_WRITEMASKS );
-      }
-      if( ignored ) {
-         if( deflt || fd->DstRegister.Indirect != dst->DstRegister.Indirect ) {
-            TXT( "\nIndirect : " );
-            UID( dst->DstRegister.Indirect );
-         }
-         if( deflt || fd->DstRegister.Dimension != dst->DstRegister.Dimension ) {
-            TXT( "\nDimension: " );
-            UID( dst->DstRegister.Dimension );
-         }
-      }
-      if( deflt || fd->DstRegister.Index != dst->DstRegister.Index ) {
-         TXT( "\nIndex    : " );
-         SID( dst->DstRegister.Index );
-      }
-      if( ignored ) {
-         TXT( "\nPadding  : " );
-         UIX( dst->DstRegister.Padding );
-         if( deflt || fd->DstRegister.Extended != dst->DstRegister.Extended ) {
-            TXT( "\nExtended : " );
-            UID( dst->DstRegister.Extended );
-         }
-      }
-
-      if( deflt || tgsi_compare_dst_register_ext_concode( dst->DstRegisterExtConcode, fd->DstRegisterExtConcode ) ) {
-         EOL();
-         TXT( "\nType        : " );
-         ENM( dst->DstRegisterExtConcode.Type, TGSI_DST_REGISTER_EXTS );
-         if( deflt || fd->DstRegisterExtConcode.CondMask != dst->DstRegisterExtConcode.CondMask ) {
-            TXT( "\nCondMask    : " );
-            ENM( dst->DstRegisterExtConcode.CondMask, TGSI_CCS );
-         }
-         if( deflt || fd->DstRegisterExtConcode.CondSwizzleX != dst->DstRegisterExtConcode.CondSwizzleX ) {
-            TXT( "\nCondSwizzleX: " );
-            ENM( dst->DstRegisterExtConcode.CondSwizzleX, TGSI_SWIZZLES );
-         }
-         if( deflt || fd->DstRegisterExtConcode.CondSwizzleY != dst->DstRegisterExtConcode.CondSwizzleY ) {
-            TXT( "\nCondSwizzleY: " );
-            ENM( dst->DstRegisterExtConcode.CondSwizzleY, TGSI_SWIZZLES );
-         }
-         if( deflt || fd->DstRegisterExtConcode.CondSwizzleZ != dst->DstRegisterExtConcode.CondSwizzleZ ) {
-            TXT( "\nCondSwizzleZ: " );
-            ENM( dst->DstRegisterExtConcode.CondSwizzleZ, TGSI_SWIZZLES );
-         }
-         if( deflt || fd->DstRegisterExtConcode.CondSwizzleW != dst->DstRegisterExtConcode.CondSwizzleW ) {
-            TXT( "\nCondSwizzleW: " );
-            ENM( dst->DstRegisterExtConcode.CondSwizzleW, TGSI_SWIZZLES );
-         }
-         if( deflt || fd->DstRegisterExtConcode.CondSrcIndex != dst->DstRegisterExtConcode.CondSrcIndex ) {
-            TXT( "\nCondSrcIndex: " );
-            UID( dst->DstRegisterExtConcode.CondSrcIndex );
-         }
-         if( ignored ) {
-            TXT( "\nPadding     : " );
-            UIX( dst->DstRegisterExtConcode.Padding );
-            if( deflt || fd->DstRegisterExtConcode.Extended != dst->DstRegisterExtConcode.Extended ) {
-               TXT( "\nExtended    : " );
-               UID( dst->DstRegisterExtConcode.Extended );
-            }
-         }
-      }
-
-      if( deflt || tgsi_compare_dst_register_ext_modulate( dst->DstRegisterExtModulate, fd->DstRegisterExtModulate ) ) {
-         EOL();
-         TXT( "\nType    : " );
-         ENM( dst->DstRegisterExtModulate.Type, TGSI_DST_REGISTER_EXTS );
-         if( deflt || fd->DstRegisterExtModulate.Modulate != dst->DstRegisterExtModulate.Modulate ) {
-            TXT( "\nModulate: " );
-            ENM( dst->DstRegisterExtModulate.Modulate, TGSI_MODULATES );
-         }
-         if( ignored ) {
-            TXT( "\nPadding : " );
-            UIX( dst->DstRegisterExtModulate.Padding );
-            if( deflt || fd->DstRegisterExtModulate.Extended != dst->DstRegisterExtModulate.Extended ) {
-               TXT( "\nExtended: " );
-               UID( dst->DstRegisterExtModulate.Extended );
-            }
-         }
-      }
-   }
-
-   for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) {
-      struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i];
-      struct tgsi_full_src_register *fs = &fi->FullSrcRegisters[i];
-
-      EOL();
-      TXT( "\nFile     : ");
-      ENM( src->SrcRegister.File, TGSI_FILES );
-      if( deflt || fs->SrcRegister.SwizzleX != src->SrcRegister.SwizzleX ) {
-         TXT( "\nSwizzleX : " );
-         ENM( src->SrcRegister.SwizzleX, TGSI_SWIZZLES );
-      }
-      if( deflt || fs->SrcRegister.SwizzleY != src->SrcRegister.SwizzleY ) {
-         TXT( "\nSwizzleY : " );
-         ENM( src->SrcRegister.SwizzleY, TGSI_SWIZZLES );
-      }
-      if( deflt || fs->SrcRegister.SwizzleZ != src->SrcRegister.SwizzleZ ) {
-         TXT( "\nSwizzleZ : " );
-         ENM( src->SrcRegister.SwizzleZ, TGSI_SWIZZLES );
-      }
-      if( deflt || fs->SrcRegister.SwizzleW != src->SrcRegister.SwizzleW ) {
-         TXT( "\nSwizzleW : " );
-         ENM( src->SrcRegister.SwizzleW, TGSI_SWIZZLES );
-      }
-      if( deflt || fs->SrcRegister.Negate != src->SrcRegister.Negate ) {
-         TXT( "\nNegate   : " );
-         UID( src->SrcRegister.Negate );
-      }
-      if( ignored ) {
-         if( deflt || fs->SrcRegister.Indirect != src->SrcRegister.Indirect ) {
-            TXT( "\nIndirect : " );
-            UID( src->SrcRegister.Indirect );
-         }
-         if( deflt || fs->SrcRegister.Dimension != src->SrcRegister.Dimension ) {
-            TXT( "\nDimension: " );
-            UID( src->SrcRegister.Dimension );
-         }
-      }
-      if( deflt || fs->SrcRegister.Index != src->SrcRegister.Index ) {
-         TXT( "\nIndex    : " );
-         SID( src->SrcRegister.Index );
-      }
-      if( ignored ) {
-         if( deflt || fs->SrcRegister.Extended != src->SrcRegister.Extended ) {
-            TXT( "\nExtended : " );
-            UID( src->SrcRegister.Extended );
-         }
-      }
-
-      if( deflt || tgsi_compare_src_register_ext_swz( src->SrcRegisterExtSwz, fs->SrcRegisterExtSwz ) ) {
-         EOL();
-         TXT( "\nType       : " );
-         ENM( src->SrcRegisterExtSwz.Type, TGSI_SRC_REGISTER_EXTS );
-         if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleX != src->SrcRegisterExtSwz.ExtSwizzleX ) {
-            TXT( "\nExtSwizzleX: " );
-            ENM( src->SrcRegisterExtSwz.ExtSwizzleX, TGSI_EXTSWIZZLES );
-         }
-         if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleY != src->SrcRegisterExtSwz.ExtSwizzleY ) {
-            TXT( "\nExtSwizzleY: " );
-            ENM( src->SrcRegisterExtSwz.ExtSwizzleY, TGSI_EXTSWIZZLES );
-         }
-         if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleZ != src->SrcRegisterExtSwz.ExtSwizzleZ ) {
-            TXT( "\nExtSwizzleZ: " );
-            ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, TGSI_EXTSWIZZLES );
-         }
-         if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleW != src->SrcRegisterExtSwz.ExtSwizzleW ) {
-            TXT( "\nExtSwizzleW: " );
-            ENM( src->SrcRegisterExtSwz.ExtSwizzleW, TGSI_EXTSWIZZLES );
-         }
-         if( deflt || fs->SrcRegisterExtSwz.NegateX != src->SrcRegisterExtSwz.NegateX ) {
-            TXT( "\nNegateX   : " );
-            UID( src->SrcRegisterExtSwz.NegateX );
-         }
-         if( deflt || fs->SrcRegisterExtSwz.NegateY != src->SrcRegisterExtSwz.NegateY ) {
-            TXT( "\nNegateY   : " );
-            UID( src->SrcRegisterExtSwz.NegateY );
-         }
-         if( deflt || fs->SrcRegisterExtSwz.NegateZ != src->SrcRegisterExtSwz.NegateZ ) {
-            TXT( "\nNegateZ   : " );
-            UID( src->SrcRegisterExtSwz.NegateZ );
-         }
-         if( deflt || fs->SrcRegisterExtSwz.NegateW != src->SrcRegisterExtSwz.NegateW ) {
-            TXT( "\nNegateW   : " );
-            UID( src->SrcRegisterExtSwz.NegateW );
-         }
-         if( ignored ) {
-            TXT( "\nPadding   : " );
-            UIX( src->SrcRegisterExtSwz.Padding );
-            if( deflt || fs->SrcRegisterExtSwz.Extended != src->SrcRegisterExtSwz.Extended ) {
-               TXT( "\nExtended   : " );
-               UID( src->SrcRegisterExtSwz.Extended );
-            }
-         }
-      }
-
-      if( deflt || tgsi_compare_src_register_ext_mod( src->SrcRegisterExtMod, fs->SrcRegisterExtMod ) ) {
-         EOL();
-         TXT( "\nType     : " );
-         ENM( src->SrcRegisterExtMod.Type, TGSI_SRC_REGISTER_EXTS );
-         if( deflt || fs->SrcRegisterExtMod.Complement != src->SrcRegisterExtMod.Complement ) {
-            TXT( "\nComplement: " );
-            UID( src->SrcRegisterExtMod.Complement );
-         }
-         if( deflt || fs->SrcRegisterExtMod.Bias != src->SrcRegisterExtMod.Bias ) {
-            TXT( "\nBias     : " );
-            UID( src->SrcRegisterExtMod.Bias );
-         }
-         if( deflt || fs->SrcRegisterExtMod.Scale2X != src->SrcRegisterExtMod.Scale2X ) {
-            TXT( "\nScale2X   : " );
-            UID( src->SrcRegisterExtMod.Scale2X );
-         }
-         if( deflt || fs->SrcRegisterExtMod.Absolute != src->SrcRegisterExtMod.Absolute ) {
-            TXT( "\nAbsolute  : " );
-            UID( src->SrcRegisterExtMod.Absolute );
-         }
-         if( deflt || fs->SrcRegisterExtMod.Negate != src->SrcRegisterExtMod.Negate ) {
-            TXT( "\nNegate   : " );
-            UID( src->SrcRegisterExtMod.Negate );
-         }
-         if( ignored ) {
-            TXT( "\nPadding   : " );
-            UIX( src->SrcRegisterExtMod.Padding );
-            if( deflt || fs->SrcRegisterExtMod.Extended != src->SrcRegisterExtMod.Extended ) {
-               TXT( "\nExtended  : " );
-               UID( src->SrcRegisterExtMod.Extended );
-            }
-         }
-      }
-   }
-}
-
-void
-tgsi_dump_c(
-   const struct tgsi_token *tokens,
-   uint flags )
-{
-   struct tgsi_parse_context parse;
-   struct tgsi_full_instruction fi;
-   struct tgsi_full_declaration fd;
-   uint ignored = flags & TGSI_DUMP_C_IGNORED;
-   uint deflt = flags & TGSI_DUMP_C_DEFAULT;
-   uint instno = 0;
-
-   /* sanity checks */
-   assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_CONT], "OPCODE_CONT") == 0);
-   assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_END], "OPCODE_END") == 0);
-
-   tgsi_parse_init( &parse, tokens );
-
-   TXT( "tgsi-dump begin -----------------" );
-
-   TXT( "\nMajorVersion: " );
-   UID( parse.FullVersion.Version.MajorVersion );
-   TXT( "\nMinorVersion: " );
-   UID( parse.FullVersion.Version.MinorVersion );
-   EOL();
-
-   TXT( "\nHeaderSize: " );
-   UID( parse.FullHeader.Header.HeaderSize );
-   TXT( "\nBodySize  : " );
-   UID( parse.FullHeader.Header.BodySize );
-   TXT( "\nProcessor : " );
-   ENM( parse.FullHeader.Processor.Processor, TGSI_PROCESSOR_TYPES );
-   EOL();
-
-   fi = tgsi_default_full_instruction();
-   fd = tgsi_default_full_declaration();
-
-   while( !tgsi_parse_end_of_tokens( &parse ) ) {
-      tgsi_parse_token( &parse );
-
-      TXT( "\nType       : " );
-      ENM( parse.FullToken.Token.Type, TGSI_TOKEN_TYPES );
-      if( ignored ) {
-         TXT( "\nSize       : " );
-         UID( parse.FullToken.Token.Size );
-         if( deflt || parse.FullToken.Token.Extended ) {
-            TXT( "\nExtended   : " );
-            UID( parse.FullToken.Token.Extended );
-         }
-      }
-
-      switch( parse.FullToken.Token.Type ) {
-      case TGSI_TOKEN_TYPE_DECLARATION:
-         dump_declaration_verbose(
-            &parse.FullToken.FullDeclaration,
-            ignored,
-            deflt,
-            &fd );
-         break;
-
-      case TGSI_TOKEN_TYPE_IMMEDIATE:
-         dump_immediate_verbose(
-            &parse.FullToken.FullImmediate,
-            ignored );
-         break;
-
-      case TGSI_TOKEN_TYPE_INSTRUCTION:
-         dump_instruction_verbose(
-            &parse.FullToken.FullInstruction,
-            ignored,
-            deflt,
-            &fi );
-         break;
-
-      default:
-         assert( 0 );
-      }
-
-      EOL();
-   }
-
-   TXT( "\ntgsi-dump end -------------------\n" );
-
-   tgsi_parse_free( &parse );
-}
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.h b/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.h
deleted file mode 100644
index d91cd35b3b7..00000000000
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef TGSI_DUMP_C_H
-#define TGSI_DUMP_C_H
-
-#include "pipe/p_shader_tokens.h"
-
-#if defined __cplusplus
-extern "C" {
-#endif
-
-#define TGSI_DUMP_C_IGNORED 1
-#define TGSI_DUMP_C_DEFAULT 2
-
-void
-tgsi_dump_c(
-   const struct tgsi_token *tokens,
-   uint flags );
-
-#if defined __cplusplus
-}
-#endif
-
-#endif /* TGSI_DUMP_C_H */
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_iterate.c b/src/gallium/auxiliary/tgsi/util/tgsi_iterate.c
deleted file mode 100644
index 5371a88b964..00000000000
--- a/src/gallium/auxiliary/tgsi/util/tgsi_iterate.c
+++ /dev/null
@@ -1,85 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include "pipe/p_debug.h"
-#include "tgsi_iterate.h"
-
-boolean
-tgsi_iterate_shader(
-   const struct tgsi_token *tokens,
-   struct tgsi_iterate_context *ctx )
-{
-   struct tgsi_parse_context parse;
-
-   if (tgsi_parse_init( &parse, tokens ) != TGSI_PARSE_OK)
-      return FALSE;
-
-   ctx->processor = parse.FullHeader.Processor;
-   ctx->version = parse.FullVersion.Version;
-
-   if (ctx->prolog)
-      if (!ctx->prolog( ctx ))
-         goto fail;
-
-   while (!tgsi_parse_end_of_tokens( &parse )) {
-      tgsi_parse_token( &parse );
-
-      switch (parse.FullToken.Token.Type) {
-      case TGSI_TOKEN_TYPE_INSTRUCTION:
-         if (ctx->iterate_instruction)
-            if (!ctx->iterate_instruction( ctx, &parse.FullToken.FullInstruction ))
-               goto fail;
-         break;
-
-      case TGSI_TOKEN_TYPE_DECLARATION:
-         if (ctx->iterate_declaration)
-            if (!ctx->iterate_declaration( ctx, &parse.FullToken.FullDeclaration ))
-               goto fail;
-         break;
-
-      case TGSI_TOKEN_TYPE_IMMEDIATE:
-         if (ctx->iterate_immediate)
-            if (!ctx->iterate_immediate( ctx, &parse.FullToken.FullImmediate ))
-               goto fail;
-         break;
-
-      default:
-         assert( 0 );
-      }
-   }
-
-   if (ctx->epilog)
-      if (!ctx->epilog( ctx ))
-         goto fail;
-
-   tgsi_parse_free( &parse );
-   return TRUE;
-
-fail:
-   tgsi_parse_free( &parse );
-   return FALSE;
-}
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_iterate.h b/src/gallium/auxiliary/tgsi/util/tgsi_iterate.h
deleted file mode 100644
index f5bebf89b82..00000000000
--- a/src/gallium/auxiliary/tgsi/util/tgsi_iterate.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef TGSI_ITERATE_H
-#define TGSI_ITERATE_H
-
-#include "pipe/p_shader_tokens.h"
-#include "tgsi/util/tgsi_parse.h"
-
-#if defined __cplusplus
-extern "C" {
-#endif
-
-struct tgsi_iterate_context
-{
-   boolean
-   (* prolog)(
-      struct tgsi_iterate_context *ctx );
-
-   boolean
-   (* iterate_instruction)(
-      struct tgsi_iterate_context *ctx,
-      struct tgsi_full_instruction *inst );
-
-   boolean
-   (* iterate_declaration)(
-      struct tgsi_iterate_context *ctx,
-      struct tgsi_full_declaration *decl );
-
-   boolean
-   (* iterate_immediate)(
-      struct tgsi_iterate_context *ctx,
-      struct tgsi_full_immediate *imm );
-
-   boolean
-   (* epilog)(
-      struct tgsi_iterate_context *ctx );
-
-   struct tgsi_processor processor;
-   struct tgsi_version version;
-};
-
-boolean
-tgsi_iterate_shader(
-   const struct tgsi_token *tokens,
-   struct tgsi_iterate_context *ctx );
-
-#if defined __cplusplus
-}
-#endif
-
-#endif /* TGSI_ITERATE_H */
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.c b/src/gallium/auxiliary/tgsi/util/tgsi_parse.c
deleted file mode 100644
index d16f0cdcad4..00000000000
--- a/src/gallium/auxiliary/tgsi/util/tgsi_parse.c
+++ /dev/null
@@ -1,332 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include "pipe/p_debug.h"
-#include "pipe/p_util.h"
-#include "pipe/p_shader_tokens.h"
-#include "tgsi_parse.h"
-#include "tgsi_build.h"
-
-void
-tgsi_full_token_init(
-   union tgsi_full_token *full_token )
-{
-   full_token->Token.Type = TGSI_TOKEN_TYPE_DECLARATION;
-}
-
-void
-tgsi_full_token_free(
-   union tgsi_full_token *full_token )
-{
-   if( full_token->Token.Type == TGSI_TOKEN_TYPE_IMMEDIATE ) {
-      FREE( (void *) full_token->FullImmediate.u.Pointer );
-   }
-}
-
-unsigned
-tgsi_parse_init(
-   struct tgsi_parse_context *ctx,
-   const struct tgsi_token *tokens )
-{
-   ctx->FullVersion.Version = *(struct tgsi_version *) &tokens[0];
-   if( ctx->FullVersion.Version.MajorVersion > 1 ) {
-      return TGSI_PARSE_ERROR;
-   }
-
-   ctx->FullHeader.Header = *(struct tgsi_header *) &tokens[1];
-   if( ctx->FullHeader.Header.HeaderSize >= 2 ) {
-      ctx->FullHeader.Processor = *(struct tgsi_processor *) &tokens[2];
-   }
-   else {
-      ctx->FullHeader.Processor = tgsi_default_processor();
-   }
-
-   ctx->Tokens = tokens;
-   ctx->Position = 1 + ctx->FullHeader.Header.HeaderSize;
-
-   tgsi_full_token_init( &ctx->FullToken );
-
-   return TGSI_PARSE_OK;
-}
-
-void
-tgsi_parse_free(
-   struct tgsi_parse_context *ctx )
-{
-   tgsi_full_token_free( &ctx->FullToken );
-}
-
-boolean
-tgsi_parse_end_of_tokens(
-   struct tgsi_parse_context *ctx )
-{
-   return ctx->Position >=
-      1 + ctx->FullHeader.Header.HeaderSize + ctx->FullHeader.Header.BodySize;
-}
-
-static void
-next_token(
-   struct tgsi_parse_context *ctx,
-   void *token )
-{
-   assert( !tgsi_parse_end_of_tokens( ctx ) );
-
-   *(struct tgsi_token *) token = ctx->Tokens[ctx->Position++];
-}
-
-void
-tgsi_parse_token(
-   struct tgsi_parse_context *ctx )
-{
-   struct tgsi_token token;
-   unsigned i;
-
-   tgsi_full_token_free( &ctx->FullToken );
-   tgsi_full_token_init( &ctx->FullToken );
-
-   next_token( ctx, &token );
-
-   switch( token.Type ) {
-   case TGSI_TOKEN_TYPE_DECLARATION:
-   {
-      struct tgsi_full_declaration *decl = &ctx->FullToken.FullDeclaration;
-
-      *decl = tgsi_default_full_declaration();
-      decl->Declaration = *(struct tgsi_declaration *) &token;
-
-      next_token( ctx, &decl->DeclarationRange );
-
-      if( decl->Declaration.Semantic ) {
-         next_token( ctx, &decl->Semantic );
-      }
-
-      break;
-   }
-
-   case TGSI_TOKEN_TYPE_IMMEDIATE:
-   {
-      struct tgsi_full_immediate *imm = &ctx->FullToken.FullImmediate;
-
-      *imm = tgsi_default_full_immediate();
-      imm->Immediate = *(struct tgsi_immediate *) &token;
-
-      assert( !imm->Immediate.Extended );
-
-      switch (imm->Immediate.DataType) {
-      case TGSI_IMM_FLOAT32:
-         imm->u.Pointer = MALLOC(
-            sizeof( struct tgsi_immediate_float32 ) * (imm->Immediate.Size - 1) );
-         for( i = 0; i < imm->Immediate.Size - 1; i++ ) {
-            next_token( ctx, (struct tgsi_immediate_float32 *) &imm->u.ImmediateFloat32[i] );
-         }
-         break;
-
-      default:
-         assert( 0 );
-      }
-
-      break;
-   }
-
-   case TGSI_TOKEN_TYPE_INSTRUCTION:
-   {
-      struct tgsi_full_instruction *inst = &ctx->FullToken.FullInstruction;
-      unsigned extended;
-
-      *inst = tgsi_default_full_instruction();
-      inst->Instruction = *(struct tgsi_instruction *) &token;
-
-      extended = inst->Instruction.Extended;
-
-      while( extended ) {
-         struct tgsi_src_register_ext token;
-
-         next_token( ctx, &token );
-
-         switch( token.Type ) {
-         case TGSI_INSTRUCTION_EXT_TYPE_NV:
-            inst->InstructionExtNv =
-               *(struct tgsi_instruction_ext_nv *) &token;
-            break;
-
-         case TGSI_INSTRUCTION_EXT_TYPE_LABEL:
-            inst->InstructionExtLabel =
-               *(struct tgsi_instruction_ext_label *) &token;
-            break;
-
-         case TGSI_INSTRUCTION_EXT_TYPE_TEXTURE:
-            inst->InstructionExtTexture =
-               *(struct tgsi_instruction_ext_texture *) &token;
-            break;
-
-         default:
-            assert( 0 );
-         }
-
-         extended = token.Extended;
-      }
-
-      assert( inst->Instruction.NumDstRegs <= TGSI_FULL_MAX_DST_REGISTERS );
-
-      for(  i = 0; i < inst->Instruction.NumDstRegs; i++ ) {
-         unsigned extended;
-
-         next_token( ctx, &inst->FullDstRegisters[i].DstRegister );
-
-         /*
-          * No support for indirect or multi-dimensional addressing.
-          */
-         assert( !inst->FullDstRegisters[i].DstRegister.Indirect );
-         assert( !inst->FullDstRegisters[i].DstRegister.Dimension );
-
-         extended = inst->FullDstRegisters[i].DstRegister.Extended;
-
-         while( extended ) {
-            struct tgsi_src_register_ext token;
-
-            next_token( ctx, &token );
-
-            switch( token.Type ) {
-            case TGSI_DST_REGISTER_EXT_TYPE_CONDCODE:
-               inst->FullDstRegisters[i].DstRegisterExtConcode =
-                  *(struct tgsi_dst_register_ext_concode *) &token;
-               break;
-
-            case TGSI_DST_REGISTER_EXT_TYPE_MODULATE:
-               inst->FullDstRegisters[i].DstRegisterExtModulate =
-                  *(struct tgsi_dst_register_ext_modulate *) &token;
-               break;
-
-            default:
-               assert( 0 );
-            }
-
-            extended = token.Extended;
-         }
-      }
-
-      assert( inst->Instruction.NumSrcRegs <= TGSI_FULL_MAX_SRC_REGISTERS );
-
-      for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) {
-         unsigned extended;
-
-         next_token( ctx, &inst->FullSrcRegisters[i].SrcRegister );
-
-         extended = inst->FullSrcRegisters[i].SrcRegister.Extended;
-
-         while( extended ) {
-            struct tgsi_src_register_ext token;
-
-            next_token( ctx, &token );
-
-            switch( token.Type ) {
-            case TGSI_SRC_REGISTER_EXT_TYPE_SWZ:
-               inst->FullSrcRegisters[i].SrcRegisterExtSwz =
-                  *(struct tgsi_src_register_ext_swz *) &token;
-               break;
-
-            case TGSI_SRC_REGISTER_EXT_TYPE_MOD:
-               inst->FullSrcRegisters[i].SrcRegisterExtMod =
-                  *(struct tgsi_src_register_ext_mod *) &token;
-               break;
-
-            default:
-               assert( 0 );
-            }
-
-            extended = token.Extended;
-         }
-
-         if( inst->FullSrcRegisters[i].SrcRegister.Indirect ) {
-            next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterInd );
-
-            /*
-             * No support for indirect or multi-dimensional addressing.
-             */
-            assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Indirect );
-            assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Dimension );
-            assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Extended );
-         }
-
-         if( inst->FullSrcRegisters[i].SrcRegister.Dimension ) {
-            next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterDim );
-
-            /*
-             * No support for multi-dimensional addressing.
-             */
-            assert( !inst->FullSrcRegisters[i].SrcRegisterDim.Dimension );
-            assert( !inst->FullSrcRegisters[i].SrcRegisterDim.Extended );
-
-            if( inst->FullSrcRegisters[i].SrcRegisterDim.Indirect ) {
-               next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterDimInd );
-
-               /*
-               * No support for indirect or multi-dimensional addressing.
-               */
-               assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Indirect );
-               assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Dimension );
-               assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Extended );
-            }
-         }
-      }
-
-      break;
-   }
-
-   default:
-      assert( 0 );
-   }
-}
-
-
-unsigned
-tgsi_num_tokens(const struct tgsi_token *tokens)
-{
-   struct tgsi_parse_context ctx;
-   if (tgsi_parse_init(&ctx, tokens) == TGSI_PARSE_OK) {
-      unsigned len = (ctx.FullHeader.Header.HeaderSize +
-                      ctx.FullHeader.Header.BodySize +
-                      1);
-      return len;
-   }
-   return 0;
-}
-
-
-/**
- * Make a new copy of a token array.
- */
-struct tgsi_token *
-tgsi_dup_tokens(const struct tgsi_token *tokens)
-{
-   unsigned n = tgsi_num_tokens(tokens);
-   unsigned bytes = n * sizeof(struct tgsi_token);
-   struct tgsi_token *new_tokens = (struct tgsi_token *) MALLOC(bytes);
-   if (new_tokens)
-      memcpy(new_tokens, tokens, bytes);
-   return new_tokens;
-}
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h
deleted file mode 100644
index 054350712d8..00000000000
--- a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h
+++ /dev/null
@@ -1,151 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef TGSI_PARSE_H
-#define TGSI_PARSE_H
-
-#include "pipe/p_shader_tokens.h"
-
-#if defined __cplusplus
-extern "C" {
-#endif
-
-struct tgsi_full_version
-{
-   struct tgsi_version  Version;
-};
-
-struct tgsi_full_header
-{
-   struct tgsi_header      Header;
-   struct tgsi_processor   Processor;
-};
-
-struct tgsi_full_dst_register
-{
-   struct tgsi_dst_register               DstRegister;
-   struct tgsi_dst_register_ext_concode   DstRegisterExtConcode;
-   struct tgsi_dst_register_ext_modulate  DstRegisterExtModulate;
-};
-
-struct tgsi_full_src_register
-{
-   struct tgsi_src_register         SrcRegister;
-   struct tgsi_src_register_ext_swz SrcRegisterExtSwz;
-   struct tgsi_src_register_ext_mod SrcRegisterExtMod;
-   struct tgsi_src_register         SrcRegisterInd;
-   struct tgsi_dimension            SrcRegisterDim;
-   struct tgsi_src_register         SrcRegisterDimInd;
-};
-
-struct tgsi_full_declaration
-{
-   struct tgsi_declaration Declaration;
-   struct tgsi_declaration_range DeclarationRange;
-   struct tgsi_declaration_semantic Semantic;
-};
-
-struct tgsi_full_immediate
-{
-   struct tgsi_immediate   Immediate;
-   union
-   {
-      const void                          *Pointer;
-      const struct tgsi_immediate_float32 *ImmediateFloat32;
-   } u;
-};
-
-#define TGSI_FULL_MAX_DST_REGISTERS 2
-#define TGSI_FULL_MAX_SRC_REGISTERS 4 /* TXD has 4 */
-
-struct tgsi_full_instruction
-{
-   struct tgsi_instruction             Instruction;
-   struct tgsi_instruction_ext_nv      InstructionExtNv;
-   struct tgsi_instruction_ext_label   InstructionExtLabel;
-   struct tgsi_instruction_ext_texture InstructionExtTexture;
-   struct tgsi_full_dst_register       FullDstRegisters[TGSI_FULL_MAX_DST_REGISTERS];
-   struct tgsi_full_src_register       FullSrcRegisters[TGSI_FULL_MAX_SRC_REGISTERS];
-};
-
-union tgsi_full_token
-{
-   struct tgsi_token             Token;
-   struct tgsi_full_declaration  FullDeclaration;
-   struct tgsi_full_immediate    FullImmediate;
-   struct tgsi_full_instruction  FullInstruction;
-};
-
-void
-tgsi_full_token_init(
-   union tgsi_full_token *full_token );
-
-void
-tgsi_full_token_free(
-   union tgsi_full_token *full_token );
-
-struct tgsi_parse_context
-{
-   const struct tgsi_token    *Tokens;
-   unsigned                   Position;
-   struct tgsi_full_version   FullVersion;
-   struct tgsi_full_header    FullHeader;
-   union tgsi_full_token      FullToken;
-};
-
-#define TGSI_PARSE_OK      0
-#define TGSI_PARSE_ERROR   1
-
-unsigned
-tgsi_parse_init(
-   struct tgsi_parse_context *ctx,
-   const struct tgsi_token *tokens );
-
-void
-tgsi_parse_free(
-   struct tgsi_parse_context *ctx );
-
-boolean
-tgsi_parse_end_of_tokens(
-   struct tgsi_parse_context *ctx );
-
-void
-tgsi_parse_token(
-   struct tgsi_parse_context *ctx );
-
-unsigned
-tgsi_num_tokens(const struct tgsi_token *tokens);
-
-struct tgsi_token *
-tgsi_dup_tokens(const struct tgsi_token *tokens);
-
-#if defined __cplusplus
-}
-#endif
-
-#endif /* TGSI_PARSE_H */
-
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c
deleted file mode 100644
index 2e3ec96b5b5..00000000000
--- a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c
+++ /dev/null
@@ -1,341 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include "pipe/p_debug.h"
-#include "tgsi_sanity.h"
-#include "tgsi_iterate.h"
-
-#define MAX_REGISTERS 256
-
-typedef uint reg_flag;
-
-#define BITS_IN_REG_FLAG (sizeof( reg_flag ) * 8)
-
-struct sanity_check_ctx
-{
-   struct tgsi_iterate_context iter;
-
-   reg_flag regs_decl[TGSI_FILE_COUNT][MAX_REGISTERS / BITS_IN_REG_FLAG];
-   reg_flag regs_used[TGSI_FILE_COUNT][MAX_REGISTERS / BITS_IN_REG_FLAG];
-   boolean regs_ind_used[TGSI_FILE_COUNT];
-   uint num_imms;
-   uint num_instructions;
-   uint index_of_END;
-
-   uint errors;
-   uint warnings;
-};
-
-static void
-report_error(
-   struct sanity_check_ctx *ctx,
-   const char *format,
-   ... )
-{
-   va_list args;
-
-   debug_printf( "Error  : " );
-   va_start( args, format );
-   _debug_vprintf( format, args );
-   va_end( args );
-   debug_printf( "\n" );
-   ctx->errors++;
-}
-
-static void
-report_warning(
-   struct sanity_check_ctx *ctx,
-   const char *format,
-   ... )
-{
-   va_list args;
-
-   debug_printf( "Warning: " );
-   va_start( args, format );
-   _debug_vprintf( format, args );
-   va_end( args );
-   debug_printf( "\n" );
-   ctx->warnings++;
-}
-
-static boolean
-check_file_name(
-   struct sanity_check_ctx *ctx,
-   uint file )
-{
-   if (file <= TGSI_FILE_NULL || file >= TGSI_FILE_COUNT) {
-      report_error( ctx, "Invalid register file name" );
-      return FALSE;
-   }
-   return TRUE;
-}
-
-static boolean
-is_register_declared(
-   struct sanity_check_ctx *ctx,
-   uint file,
-   int index )
-{
-   assert( index >= 0 && index < MAX_REGISTERS );
-
-   return (ctx->regs_decl[file][index / BITS_IN_REG_FLAG] & (1 << (index % BITS_IN_REG_FLAG))) ? TRUE : FALSE;
-}
-
-static boolean
-is_any_register_declared(
-   struct sanity_check_ctx *ctx,
-   uint file )
-{
-   uint i;
-
-   for (i = 0; i < MAX_REGISTERS / BITS_IN_REG_FLAG; i++)
-      if (ctx->regs_decl[file][i])
-         return TRUE;
-   return FALSE;
-}
-
-static boolean
-is_register_used(
-   struct sanity_check_ctx *ctx,
-   uint file,
-   int index )
-{
-   assert( index < MAX_REGISTERS );
-
-   return (ctx->regs_used[file][index / BITS_IN_REG_FLAG] & (1 << (index % BITS_IN_REG_FLAG))) ? TRUE : FALSE;
-}
-
-static const char *file_names[] =
-{
-   "NULL",
-   "CONST",
-   "IN",
-   "OUT",
-   "TEMP",
-   "SAMP",
-   "ADDR",
-   "IMM"
-};
-
-static boolean
-check_register_usage(
-   struct sanity_check_ctx *ctx,
-   uint file,
-   int index,
-   const char *name,
-   boolean indirect_access )
-{
-   if (!check_file_name( ctx, file ))
-      return FALSE;
-   if (indirect_access) {
-      if (!is_any_register_declared( ctx, file ))
-         report_error( ctx, "%s: Undeclared %s register", file_names[file], name );
-      ctx->regs_ind_used[file] = TRUE;
-   }
-   else {
-      if (!is_register_declared( ctx, file, index ))
-         report_error( ctx, "%s[%d]: Undeclared %s register", file_names[file], index, name );
-      ctx->regs_used[file][index / BITS_IN_REG_FLAG] |= (1 << (index % BITS_IN_REG_FLAG));
-   }
-   return TRUE;
-}
-
-static boolean
-iter_instruction(
-   struct tgsi_iterate_context *iter,
-   struct tgsi_full_instruction *inst )
-{
-   struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter;
-   uint i;
-
-   /* There must be no other instructions after END.
-    */
-   if (ctx->index_of_END != ~0) {
-      report_error( ctx, "Unexpected instruction after END" );
-   }
-   else if (inst->Instruction.Opcode == TGSI_OPCODE_END) {
-      ctx->index_of_END = ctx->num_instructions;
-   }
-
-   /* Check destination and source registers' validity.
-    * Mark the registers as used.
-    */
-   for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
-      check_register_usage(
-         ctx,
-         inst->FullDstRegisters[i].DstRegister.File,
-         inst->FullDstRegisters[i].DstRegister.Index,
-         "destination",
-         FALSE );
-   }
-   for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
-      check_register_usage(
-         ctx,
-         inst->FullSrcRegisters[i].SrcRegister.File,
-         inst->FullSrcRegisters[i].SrcRegister.Index,
-         "source",
-         (boolean)inst->FullSrcRegisters[i].SrcRegister.Indirect );
-      if (inst->FullSrcRegisters[i].SrcRegister.Indirect) {
-         uint file;
-         int index;
-
-         file = inst->FullSrcRegisters[i].SrcRegisterInd.File;
-         index = inst->FullSrcRegisters[i].SrcRegisterInd.Index;
-         check_register_usage(
-            ctx,
-            file,
-            index,
-            "indirect",
-            FALSE );
-         if (file != TGSI_FILE_ADDRESS || index != 0)
-            report_warning( ctx, "Indirect register not ADDR[0]" );
-      }
-   }
-
-   ctx->num_instructions++;
-
-   return TRUE;
-}
-
-static boolean
-iter_declaration(
-   struct tgsi_iterate_context *iter,
-   struct tgsi_full_declaration *decl )
-{
-   struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter;
-   uint file;
-   uint i;
-
-   /* No declarations allowed after the first instruction.
-    */
-   if (ctx->num_instructions > 0)
-      report_error( ctx, "Instruction expected but declaration found" );
-
-   /* Check registers' validity.
-    * Mark the registers as declared.
-    */
-   file = decl->Declaration.File;
-   if (!check_file_name( ctx, file ))
-      return TRUE;
-   for (i = decl->DeclarationRange.First; i <= decl->DeclarationRange.Last; i++) {
-      if (is_register_declared( ctx, file, i ))
-         report_error( ctx, "The same register declared twice" );
-      ctx->regs_decl[file][i / BITS_IN_REG_FLAG] |= (1 << (i % BITS_IN_REG_FLAG));
-   }
-
-   return TRUE;
-}
-
-static boolean
-iter_immediate(
-   struct tgsi_iterate_context *iter,
-   struct tgsi_full_immediate *imm )
-{
-   struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter;
-
-   assert( ctx->num_imms < MAX_REGISTERS );
-
-   /* No immediates allowed after the first instruction.
-    */
-   if (ctx->num_instructions > 0)
-      report_error( ctx, "Instruction expected but immediate found" );
-
-   /* Mark the register as declared.
-    */
-   ctx->regs_decl[TGSI_FILE_IMMEDIATE][ctx->num_imms / BITS_IN_REG_FLAG] |= (1 << (ctx->num_imms % BITS_IN_REG_FLAG));
-   ctx->num_imms++;
-
-   /* Check data type validity.
-    */
-   if (imm->Immediate.DataType != TGSI_IMM_FLOAT32) {
-      report_error( ctx, "Invalid immediate data type" );
-      return TRUE;
-   }
-
-   return TRUE;
-}
-
-static boolean
-epilog(
-   struct tgsi_iterate_context *iter )
-{
-   struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter;
-   uint file;
-
-   /* There must be an END instruction at the end.
-    */
-   if (ctx->index_of_END == ~0 || ctx->index_of_END != ctx->num_instructions - 1) {
-      report_error( ctx, "Expected END at end of instruction sequence" );
-   }
-
-   /* Check if all declared registers were used.
-    */
-   for (file = TGSI_FILE_NULL; file < TGSI_FILE_COUNT; file++) {
-      uint i;
-
-      for (i = 0; i < MAX_REGISTERS; i++) {
-         if (is_register_declared( ctx, file, i ) && !is_register_used( ctx, file, i ) && !ctx->regs_ind_used[file]) {
-            report_warning( ctx, "Register never used" );
-         }
-      }
-   }
-
-   /* Print totals, if any.
-    */
-   if (ctx->errors || ctx->warnings)
-      debug_printf( "\n%u errors, %u warnings", ctx->errors, ctx->warnings );
-
-   return TRUE;
-}
-
-boolean
-tgsi_sanity_check(
-   struct tgsi_token *tokens )
-{
-   struct sanity_check_ctx ctx;
-
-   ctx.iter.prolog = NULL;
-   ctx.iter.iterate_instruction = iter_instruction;
-   ctx.iter.iterate_declaration = iter_declaration;
-   ctx.iter.iterate_immediate = iter_immediate;
-   ctx.iter.epilog = epilog;
-
-   memset( ctx.regs_decl, 0, sizeof( ctx.regs_decl ) );
-   memset( ctx.regs_used, 0, sizeof( ctx.regs_used ) );
-   memset( ctx.regs_ind_used, 0, sizeof( ctx.regs_ind_used ) );
-   ctx.num_imms = 0;
-   ctx.num_instructions = 0;
-   ctx.index_of_END = ~0;
-
-   ctx.errors = 0;
-   ctx.warnings = 0;
-
-   if (!tgsi_iterate_shader( tokens, &ctx.iter ))
-      return FALSE;
-
-   return ctx.errors == 0;
-}
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.h b/src/gallium/auxiliary/tgsi/util/tgsi_sanity.h
deleted file mode 100644
index ca45e94c7ad..00000000000
--- a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef TGSI_SANITY_H
-#define TGSI_SANITY_H
-
-#include "pipe/p_shader_tokens.h"
-
-#if defined __cplusplus
-extern "C" {
-#endif
-
-/* Check the given token stream for errors and common mistakes.
- * Diagnostic messages are printed out to the debug output.
- * Returns TRUE if there are no errors, even though there could be some warnings.
- */
-boolean
-tgsi_sanity_check(
-   struct tgsi_token *tokens );
-
-#if defined __cplusplus
-}
-#endif
-
-#endif /* TGSI_SANITY_H */
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c
deleted file mode 100644
index 240aaaf362c..00000000000
--- a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c
+++ /dev/null
@@ -1,226 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-/**
- * TGSI program scan utility.
- * Used to determine which registers and instructions are used by a shader.
- *
- * Authors:  Brian Paul
- */
-
-
-#include "tgsi_scan.h"
-#include "tgsi/util/tgsi_parse.h"
-#include "tgsi/util/tgsi_build.h"
-
-#include "pipe/p_util.h"
-
-
-
-/**
- */
-void
-tgsi_scan_shader(const struct tgsi_token *tokens,
-                 struct tgsi_shader_info *info)
-{
-   uint procType, i;
-   struct tgsi_parse_context parse;
-
-   memset(info, 0, sizeof(*info));
-   for (i = 0; i < TGSI_FILE_COUNT; i++)
-      info->file_max[i] = -1;
-
-   /**
-    ** Setup to begin parsing input shader
-    **/
-   if (tgsi_parse_init( &parse, tokens ) != TGSI_PARSE_OK) {
-      debug_printf("tgsi_parse_init() failed in tgsi_scan_shader()!\n");
-      return;
-   }
-   procType = parse.FullHeader.Processor.Processor;
-   assert(procType == TGSI_PROCESSOR_FRAGMENT ||
-          procType == TGSI_PROCESSOR_VERTEX ||
-          procType == TGSI_PROCESSOR_GEOMETRY);
-
-
-   /**
-    ** Loop over incoming program tokens/instructions
-    */
-   while( !tgsi_parse_end_of_tokens( &parse ) ) {
-
-      info->num_tokens++;
-
-      tgsi_parse_token( &parse );
-
-      switch( parse.FullToken.Token.Type ) {
-      case TGSI_TOKEN_TYPE_INSTRUCTION:
-         {
-            struct tgsi_full_instruction *fullinst
-               = &parse.FullToken.FullInstruction;
-
-            assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST);
-            info->opcode_count[fullinst->Instruction.Opcode]++;
-         }
-         break;
-
-      case TGSI_TOKEN_TYPE_DECLARATION:
-         {
-            struct tgsi_full_declaration *fulldecl
-               = &parse.FullToken.FullDeclaration;
-            uint file = fulldecl->Declaration.File;
-            uint i;
-            for (i = fulldecl->DeclarationRange.First;
-                 i <= fulldecl->DeclarationRange.Last;
-                 i++) {
-
-               /* only first 32 regs will appear in this bitfield */
-               info->file_mask[file] |= (1 << i);
-               info->file_count[file]++;
-               info->file_max[file] = MAX2(info->file_max[file], (int)i);
-
-               if (file == TGSI_FILE_INPUT) {
-                  info->input_semantic_name[i] = (ubyte)fulldecl->Semantic.SemanticName;
-                  info->input_semantic_index[i] = (ubyte)fulldecl->Semantic.SemanticIndex;
-                  info->num_inputs++;
-               }
-
-               if (file == TGSI_FILE_OUTPUT) {
-                  info->output_semantic_name[i] = (ubyte)fulldecl->Semantic.SemanticName;
-                  info->output_semantic_index[i] = (ubyte)fulldecl->Semantic.SemanticIndex;
-                  info->num_outputs++;
-               }
-
-               /* special case */
-               if (procType == TGSI_PROCESSOR_FRAGMENT &&
-                   file == TGSI_FILE_OUTPUT &&
-                   fulldecl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) {
-                  info->writes_z = TRUE;
-               }
-            }
-         }
-         break;
-
-      case TGSI_TOKEN_TYPE_IMMEDIATE:
-         info->immediate_count++;
-         break;
-
-      default:
-         assert( 0 );
-      }
-   }
-
-   assert( info->file_max[TGSI_FILE_INPUT] + 1 == info->num_inputs );
-   assert( info->file_max[TGSI_FILE_OUTPUT] + 1 == info->num_outputs );
-
-   info->uses_kill = (info->opcode_count[TGSI_OPCODE_KIL] ||
-                      info->opcode_count[TGSI_OPCODE_KILP]);
-
-   tgsi_parse_free (&parse);
-}
-
-
-
-/**
- * Check if the given shader is a "passthrough" shader consisting of only
- * MOV instructions of the form:  MOV OUT[n], IN[n]
- *  
- */
-boolean
-tgsi_is_passthrough_shader(const struct tgsi_token *tokens)
-{
-   struct tgsi_parse_context parse;
-
-   /**
-    ** Setup to begin parsing input shader
-    **/
-   if (tgsi_parse_init(&parse, tokens) != TGSI_PARSE_OK) {
-      debug_printf("tgsi_parse_init() failed in tgsi_is_passthrough_shader()!\n");
-      return FALSE;
-   }
-
-   /**
-    ** Loop over incoming program tokens/instructions
-    */
-   while (!tgsi_parse_end_of_tokens(&parse)) {
-
-      tgsi_parse_token(&parse);
-
-      switch (parse.FullToken.Token.Type) {
-      case TGSI_TOKEN_TYPE_INSTRUCTION:
-         {
-            struct tgsi_full_instruction *fullinst =
-               &parse.FullToken.FullInstruction;
-            const struct tgsi_full_src_register *src =
-               &fullinst->FullSrcRegisters[0];
-            const struct tgsi_full_dst_register *dst =
-               &fullinst->FullDstRegisters[0];
-
-            /* Do a whole bunch of checks for a simple move */
-            if (fullinst->Instruction.Opcode != TGSI_OPCODE_MOV ||
-                src->SrcRegister.File != TGSI_FILE_INPUT ||
-                dst->DstRegister.File != TGSI_FILE_OUTPUT ||
-                src->SrcRegister.Index != dst->DstRegister.Index ||
-
-                src->SrcRegister.Negate ||
-                src->SrcRegisterExtMod.Negate ||
-                src->SrcRegisterExtMod.Absolute ||
-                src->SrcRegisterExtMod.Scale2X ||
-                src->SrcRegisterExtMod.Bias ||
-                src->SrcRegisterExtMod.Complement ||
-
-                src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X ||
-                src->SrcRegister.SwizzleY != TGSI_SWIZZLE_Y ||
-                src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z ||
-                src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W ||
-
-                src->SrcRegisterExtSwz.ExtSwizzleX != TGSI_EXTSWIZZLE_X ||
-                src->SrcRegisterExtSwz.ExtSwizzleY != TGSI_EXTSWIZZLE_Y ||
-                src->SrcRegisterExtSwz.ExtSwizzleZ != TGSI_EXTSWIZZLE_Z ||
-                src->SrcRegisterExtSwz.ExtSwizzleW != TGSI_EXTSWIZZLE_W ||
-
-                dst->DstRegister.WriteMask != TGSI_WRITEMASK_XYZW)
-            {
-               tgsi_parse_free(&parse);
-               return FALSE;
-            }
-         }
-         break;
-
-      case TGSI_TOKEN_TYPE_DECLARATION:
-         /* fall-through */
-      case TGSI_TOKEN_TYPE_IMMEDIATE:
-         /* fall-through */
-      default:
-         ; /* no-op */
-      }
-   }
-
-   tgsi_parse_free(&parse);
-
-   /* if we get here, it's a pass-through shader */
-   return TRUE;
-}
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.h b/src/gallium/auxiliary/tgsi/util/tgsi_scan.h
deleted file mode 100644
index 5cb6efb3439..00000000000
--- a/src/gallium/auxiliary/tgsi/util/tgsi_scan.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef TGSI_SCAN_H
-#define TGSI_SCAN_H
-
-
-#include "pipe/p_compiler.h"
-#include "pipe/p_state.h"
-#include "pipe/p_shader_tokens.h"
-
-
-/**
- * Shader summary info
- */
-struct tgsi_shader_info
-{
-   uint num_tokens;
-
-   /* XXX eventually remove the corresponding fields from pipe_shader_state: */
-   ubyte num_inputs;
-   ubyte num_outputs;
-   ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; /**< TGSI_SEMANTIC_x */
-   ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
-   ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; /**< TGSI_SEMANTIC_x */
-   ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
-
-   uint file_mask[TGSI_FILE_COUNT];  /**< bitmask of declared registers */
-   uint file_count[TGSI_FILE_COUNT];  /**< number of declared registers */
-   int file_max[TGSI_FILE_COUNT];  /**< highest index of declared registers */
-
-   uint immediate_count; /**< number of immediates declared */
-
-   uint opcode_count[TGSI_OPCODE_LAST];  /**< opcode histogram */
-
-   boolean writes_z;  /**< does fragment shader write Z value? */
-   boolean uses_kill;  /**< KIL or KILP instruction used? */
-};
-
-
-extern void
-tgsi_scan_shader(const struct tgsi_token *tokens,
-                 struct tgsi_shader_info *info);
-
-
-extern boolean
-tgsi_is_passthrough_shader(const struct tgsi_token *tokens);
-
-
-#endif /* TGSI_SCAN_H */
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_text.c b/src/gallium/auxiliary/tgsi/util/tgsi_text.c
deleted file mode 100644
index 35cb3055bb2..00000000000
--- a/src/gallium/auxiliary/tgsi/util/tgsi_text.c
+++ /dev/null
@@ -1,1221 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include "pipe/p_debug.h"
-#include "tgsi_text.h"
-#include "tgsi_build.h"
-#include "tgsi_parse.h"
-#include "tgsi_sanity.h"
-#include "tgsi_util.h"
-
-static boolean is_alpha_underscore( const char *cur )
-{
-   return
-      (*cur >= 'a' && *cur <= 'z') ||
-      (*cur >= 'A' && *cur <= 'Z') ||
-      *cur == '_';
-}
-
-static boolean is_digit( const char *cur )
-{
-   return *cur >= '0' && *cur <= '9';
-}
-
-static boolean is_digit_alpha_underscore( const char *cur )
-{
-   return is_digit( cur ) || is_alpha_underscore( cur );
-}
-
-static boolean str_match_no_case( const char **pcur, const char *str )
-{
-   const char *cur = *pcur;
-
-   while (*str != '\0' && *str == toupper( *cur )) {
-      str++;
-      cur++;
-   }
-   if (*str == '\0') {
-      *pcur = cur;
-      return TRUE;
-   }
-   return FALSE;
-}
-
-/* Eat zero or more whitespaces.
- */
-static void eat_opt_white( const char **pcur )
-{
-   while (**pcur == ' ' || **pcur == '\t' || **pcur == '\n')
-      (*pcur)++;
-}
-
-/* Eat one or more whitespaces.
- * Return TRUE if at least one whitespace eaten.
- */
-static boolean eat_white( const char **pcur )
-{
-   const char *cur = *pcur;
-
-   eat_opt_white( pcur );
-   return *pcur > cur;
-}
-
-/* Parse unsigned integer.
- * No checks for overflow.
- */
-static boolean parse_uint( const char **pcur, uint *val )
-{
-   const char *cur = *pcur;
-
-   if (is_digit( cur )) {
-      *val = *cur++ - '0';
-      while (is_digit( cur ))
-         *val = *val * 10 + *cur++ - '0';
-      *pcur = cur;
-      return TRUE;
-   }
-   return FALSE;
-}
-
-/* Parse floating point.
- */
-static boolean parse_float( const char **pcur, float *val )
-{
-   const char *cur = *pcur;
-   boolean integral_part = FALSE;
-   boolean fractional_part = FALSE;
-
-   *val = (float) atof( cur );
-
-   if (*cur == '-' || *cur == '+')
-      cur++;
-   if (is_digit( cur )) {
-      cur++;
-      integral_part = TRUE;
-      while (is_digit( cur ))
-         cur++;
-   }
-   if (*cur == '.') {
-      cur++;
-      if (is_digit( cur )) {
-         cur++;
-         fractional_part = TRUE;
-         while (is_digit( cur ))
-            cur++;
-      }
-   }
-   if (!integral_part && !fractional_part)
-      return FALSE;
-   if (toupper( *cur ) == 'E') {
-      cur++;
-      if (*cur == '-' || *cur == '+')
-         cur++;
-      if (is_digit( cur )) {
-         cur++;
-         while (is_digit( cur ))
-            cur++;
-      }
-      else
-         return FALSE;
-   }
-   *pcur = cur;
-   return TRUE;
-}
-
-struct translate_ctx
-{
-   const char *text;
-   const char *cur;
-   struct tgsi_token *tokens;
-   struct tgsi_token *tokens_cur;
-   struct tgsi_token *tokens_end;
-   struct tgsi_header *header;
-};
-
-static void report_error( struct translate_ctx *ctx, const char *msg )
-{
-   debug_printf( "\nError: %s", msg );
-}
-
-/* Parse shader header.
- * Return TRUE for one of the following headers.
- *    FRAG1.1
- *    GEOM1.1
- *    VERT1.1
- */
-static boolean parse_header( struct translate_ctx *ctx )
-{
-   uint processor;
-
-   if (str_match_no_case( &ctx->cur, "FRAG1.1" ))
-      processor = TGSI_PROCESSOR_FRAGMENT;
-   else if (str_match_no_case( &ctx->cur, "VERT1.1" ))
-      processor = TGSI_PROCESSOR_VERTEX;
-   else if (str_match_no_case( &ctx->cur, "GEOM1.1" ))
-      processor = TGSI_PROCESSOR_GEOMETRY;
-   else {
-      report_error( ctx, "Unknown header" );
-      return FALSE;
-   }
-
-   if (ctx->tokens_cur >= ctx->tokens_end)
-      return FALSE;
-   *(struct tgsi_version *) ctx->tokens_cur++ = tgsi_build_version();
-
-   if (ctx->tokens_cur >= ctx->tokens_end)
-      return FALSE;
-   ctx->header = (struct tgsi_header *) ctx->tokens_cur++;
-   *ctx->header = tgsi_build_header();
-
-   if (ctx->tokens_cur >= ctx->tokens_end)
-      return FALSE;
-   *(struct tgsi_processor *) ctx->tokens_cur++ = tgsi_build_processor( processor, ctx->header );
-
-   return TRUE;
-}
-
-static boolean parse_label( struct translate_ctx *ctx, uint *val )
-{
-   const char *cur = ctx->cur;
-
-   if (parse_uint( &cur, val )) {
-      eat_opt_white( &cur );
-      if (*cur == ':') {
-         cur++;
-         ctx->cur = cur;
-         return TRUE;
-      }
-   }
-   return FALSE;
-}
-
-static const char *file_names[TGSI_FILE_COUNT] =
-{
-   "NULL",
-   "CONST",
-   "IN",
-   "OUT",
-   "TEMP",
-   "SAMP",
-   "ADDR",
-   "IMM"
-};
-
-static boolean
-parse_file( const char **pcur, uint *file )
-{
-   uint i;
-
-   for (i = 0; i < TGSI_FILE_COUNT; i++) {
-      const char *cur = *pcur;
-
-      if (str_match_no_case( &cur, file_names[i] )) {
-         if (!is_digit_alpha_underscore( cur )) {
-            *pcur = cur;
-            *file = i;
-            return TRUE;
-         }
-      }
-   }
-   return FALSE;
-}
-
-static boolean
-parse_opt_writemask(
-   struct translate_ctx *ctx,
-   uint *writemask )
-{
-   const char *cur;
-
-   cur = ctx->cur;
-   eat_opt_white( &cur );
-   if (*cur == '.') {
-      cur++;
-      *writemask = TGSI_WRITEMASK_NONE;
-      eat_opt_white( &cur );
-      if (toupper( *cur ) == 'X') {
-         cur++;
-         *writemask |= TGSI_WRITEMASK_X;
-      }
-      if (toupper( *cur ) == 'Y') {
-         cur++;
-         *writemask |= TGSI_WRITEMASK_Y;
-      }
-      if (toupper( *cur ) == 'Z') {
-         cur++;
-         *writemask |= TGSI_WRITEMASK_Z;
-      }
-      if (toupper( *cur ) == 'W') {
-         cur++;
-         *writemask |= TGSI_WRITEMASK_W;
-      }
-
-      if (*writemask == TGSI_WRITEMASK_NONE) {
-         report_error( ctx, "Writemask expected" );
-         return FALSE;
-      }
-
-      ctx->cur = cur;
-   }
-   else {
-      *writemask = TGSI_WRITEMASK_XYZW;
-   }
-   return TRUE;
-}
-
-/* <register_file_bracket> ::= <file> `['
- */
-static boolean
-parse_register_file_bracket(
-   struct translate_ctx *ctx,
-   uint *file )
-{
-   if (!parse_file( &ctx->cur, file )) {
-      report_error( ctx, "Unknown register file" );
-      return FALSE;
-   }
-   eat_opt_white( &ctx->cur );
-   if (*ctx->cur != '[') {
-      report_error( ctx, "Expected `['" );
-      return FALSE;
-   }
-   ctx->cur++;
-   return TRUE;
-}
-
-/* <register_file_bracket_index> ::= <register_file_bracket> <uint>
- */
-static boolean
-parse_register_file_bracket_index(
-   struct translate_ctx *ctx,
-   uint *file,
-   int *index )
-{
-   uint uindex;
-
-   if (!parse_register_file_bracket( ctx, file ))
-      return FALSE;
-   eat_opt_white( &ctx->cur );
-   if (!parse_uint( &ctx->cur, &uindex )) {
-      report_error( ctx, "Expected literal unsigned integer" );
-      return FALSE;
-   }
-   *index = (int) uindex;
-   return TRUE;
-}
-
-/* Parse destination register operand.
- *    <register_dst> ::= <register_file_bracket_index> `]'
- */
-static boolean
-parse_register_dst(
-   struct translate_ctx *ctx,
-   uint *file,
-   int *index )
-{
-   if (!parse_register_file_bracket_index( ctx, file, index ))
-      return FALSE;
-   eat_opt_white( &ctx->cur );
-   if (*ctx->cur != ']') {
-      report_error( ctx, "Expected `]'" );
-      return FALSE;
-   }
-   ctx->cur++;
-   return TRUE;
-}
-
-/* Parse source register operand.
- *    <register_src> ::= <register_file_bracket_index> `]' |
- *                       <register_file_bracket> <register_dst> `]' |
- *                       <register_file_bracket> <register_dst> `+' <uint> `]' |
- *                       <register_file_bracket> <register_dst> `-' <uint> `]'
- */
-static boolean
-parse_register_src(
-   struct translate_ctx *ctx,
-   uint *file,
-   int *index,
-   uint *ind_file,
-   int *ind_index )
-{
-   const char *cur;
-   uint uindex;
-
-   if (!parse_register_file_bracket( ctx, file ))
-      return FALSE;
-   eat_opt_white( &ctx->cur );
-   cur = ctx->cur;
-   if (parse_file( &cur, ind_file )) {
-      if (!parse_register_dst( ctx, ind_file, ind_index ))
-         return FALSE;
-      eat_opt_white( &ctx->cur );
-      if (*ctx->cur == '+' || *ctx->cur == '-') {
-         boolean negate;
-
-         negate = *ctx->cur == '-';
-         ctx->cur++;
-         eat_opt_white( &ctx->cur );
-         if (!parse_uint( &ctx->cur, &uindex )) {
-            report_error( ctx, "Expected literal unsigned integer" );
-            return FALSE;
-         }
-         if (negate)
-            *index = -(int) uindex;
-         else
-            *index = (int) uindex;
-      }
-      else {
-         *index = 0;
-      }
-   }
-   else {
-      if (!parse_uint( &ctx->cur, &uindex )) {
-         report_error( ctx, "Expected literal unsigned integer" );
-         return FALSE;
-      }
-      *index = (int) uindex;
-      *ind_file = TGSI_FILE_NULL;
-      *ind_index = 0;
-   }
-   eat_opt_white( &ctx->cur );
-   if (*ctx->cur != ']') {
-      report_error( ctx, "Expected `]'" );
-      return FALSE;
-   }
-   ctx->cur++;
-   return TRUE;
-}
-
-/* Parse register declaration.
- *    <register_dcl> ::= <register_file_bracket_index> `]' |
- *                       <register_file_bracket_index> `..' <index> `]'
- */
-static boolean
-parse_register_dcl(
-   struct translate_ctx *ctx,
-   uint *file,
-   int *first,
-   int *last )
-{
-   if (!parse_register_file_bracket_index( ctx, file, first ))
-      return FALSE;
-   eat_opt_white( &ctx->cur );
-   if (ctx->cur[0] == '.' && ctx->cur[1] == '.') {
-      uint uindex;
-
-      ctx->cur += 2;
-      eat_opt_white( &ctx->cur );
-      if (!parse_uint( &ctx->cur, &uindex )) {
-         report_error( ctx, "Expected literal integer" );
-         return FALSE;
-      }
-      *last = (int) uindex;
-      eat_opt_white( &ctx->cur );
-   }
-   else {
-      *last = *first;
-   }
-   if (*ctx->cur != ']') {
-      report_error( ctx, "Expected `]' or `..'" );
-      return FALSE;
-   }
-   ctx->cur++;
-   return TRUE;
-}
-
-static const char *modulate_names[TGSI_MODULATE_COUNT] =
-{
-   "_1X",
-   "_2X",
-   "_4X",
-   "_8X",
-   "_D2",
-   "_D4",
-   "_D8"
-};
-
-static boolean
-parse_dst_operand(
-   struct translate_ctx *ctx,
-   struct tgsi_full_dst_register *dst )
-{
-   uint file;
-   int index;
-   uint writemask;
-   const char *cur;
-
-   if (!parse_register_dst( ctx, &file, &index ))
-      return FALSE;
-
-   cur = ctx->cur;
-   eat_opt_white( &cur );
-   if (*cur == '_') {
-      uint i;
-
-      for (i = 0; i < TGSI_MODULATE_COUNT; i++) {
-         if (str_match_no_case( &cur, modulate_names[i] )) {
-            if (!is_digit_alpha_underscore( cur )) {
-               dst->DstRegisterExtModulate.Modulate = i;
-               ctx->cur = cur;
-               break;
-            }
-         }
-      }
-   }
-
-   if (!parse_opt_writemask( ctx, &writemask ))
-      return FALSE;
-
-   dst->DstRegister.File = file;
-   dst->DstRegister.Index = index;
-   dst->DstRegister.WriteMask = writemask;
-   return TRUE;
-}
-
-static boolean
-parse_optional_swizzle(
-   struct translate_ctx *ctx,
-   uint swizzle[4],
-   boolean *parsed_swizzle,
-   boolean *parsed_extswizzle )
-{
-   const char *cur = ctx->cur;
-
-   *parsed_swizzle = FALSE;
-   *parsed_extswizzle = FALSE;
-
-   eat_opt_white( &cur );
-   if (*cur == '.') {
-      uint i;
-
-      cur++;
-      eat_opt_white( &cur );
-      for (i = 0; i < 4; i++) {
-         if (toupper( *cur ) == 'X')
-            swizzle[i] = TGSI_SWIZZLE_X;
-         else if (toupper( *cur ) == 'Y')
-            swizzle[i] = TGSI_SWIZZLE_Y;
-         else if (toupper( *cur ) == 'Z')
-            swizzle[i] = TGSI_SWIZZLE_Z;
-         else if (toupper( *cur ) == 'W')
-            swizzle[i] = TGSI_SWIZZLE_W;
-         else {
-            if (*cur == '0')
-               swizzle[i] = TGSI_EXTSWIZZLE_ZERO;
-            else if (*cur == '1')
-               swizzle[i] = TGSI_EXTSWIZZLE_ONE;
-            else {
-               report_error( ctx, "Expected register swizzle component `x', `y', `z', `w', `0' or `1'" );
-               return FALSE;
-            }
-            *parsed_extswizzle = TRUE;
-         }
-         cur++;
-      }
-      *parsed_swizzle = TRUE;
-      ctx->cur = cur;
-   }
-   return TRUE;
-}
-
-static boolean
-parse_src_operand(
-   struct translate_ctx *ctx,
-   struct tgsi_full_src_register *src )
-{
-   const char *cur;
-   float value;
-   uint file;
-   int index;
-   uint ind_file;
-   int ind_index;
-   uint swizzle[4];
-   boolean parsed_swizzle;
-   boolean parsed_extswizzle;
-
-   if (*ctx->cur == '-') {
-      cur = ctx->cur;
-      cur++;
-      eat_opt_white( &cur );
-      if (*cur == '(') {
-         cur++;
-         src->SrcRegisterExtMod.Negate = 1;
-         eat_opt_white( &cur );
-         ctx->cur = cur;
-      }
-   }
-
-   if (*ctx->cur == '|') {
-      ctx->cur++;
-      eat_opt_white( &ctx->cur );
-      src->SrcRegisterExtMod.Absolute = 1;
-   }
-
-   if (*ctx->cur == '-') {
-      ctx->cur++;
-      eat_opt_white( &ctx->cur );
-      src->SrcRegister.Negate = 1;
-   }
-
-   cur = ctx->cur;
-   if (parse_float( &cur, &value )) {
-      if (value == 2.0f) {
-         eat_opt_white( &cur );
-         if (*cur != '*') {
-            report_error( ctx, "Expected `*'" );
-            return FALSE;
-         }
-         cur++;
-         if (*cur != '(') {
-            report_error( ctx, "Expected `('" );
-            return FALSE;
-         }
-         cur++;
-         src->SrcRegisterExtMod.Scale2X = 1;
-         eat_opt_white( &cur );
-         ctx->cur = cur;
-      }
-   }
-
-   if (*ctx->cur == '(') {
-      ctx->cur++;
-      eat_opt_white( &ctx->cur );
-      src->SrcRegisterExtMod.Bias = 1;
-   }
-
-   cur = ctx->cur;
-   if (parse_float( &cur, &value )) {
-      if (value == 1.0f) {
-         eat_opt_white( &cur );
-         if (*cur != '-') {
-            report_error( ctx, "Expected `-'" );
-            return FALSE;
-         }
-         cur++;
-         if (*cur != '(') {
-            report_error( ctx, "Expected `('" );
-            return FALSE;
-         }
-         cur++;
-         src->SrcRegisterExtMod.Complement = 1;
-         eat_opt_white( &cur );
-         ctx->cur = cur;
-      }
-   }
-
-   if (!parse_register_src( ctx, &file, &index, &ind_file, &ind_index ))
-      return FALSE;
-   src->SrcRegister.File = file;
-   src->SrcRegister.Index = index;
-   if (ind_file != TGSI_FILE_NULL) {
-      src->SrcRegister.Indirect = 1;
-      src->SrcRegisterInd.File = ind_file;
-      src->SrcRegisterInd.Index = ind_index;
-   }
-
-   /* Parse optional swizzle.
-    */
-   if (parse_optional_swizzle( ctx, swizzle, &parsed_swizzle, &parsed_extswizzle )) {
-      if (parsed_extswizzle) {
-         assert( parsed_swizzle );
-
-         src->SrcRegisterExtSwz.ExtSwizzleX = swizzle[0];
-         src->SrcRegisterExtSwz.ExtSwizzleY = swizzle[1];
-         src->SrcRegisterExtSwz.ExtSwizzleZ = swizzle[2];
-         src->SrcRegisterExtSwz.ExtSwizzleW = swizzle[3];
-      }
-      else if (parsed_swizzle) {
-         src->SrcRegister.SwizzleX = swizzle[0];
-         src->SrcRegister.SwizzleY = swizzle[1];
-         src->SrcRegister.SwizzleZ = swizzle[2];
-         src->SrcRegister.SwizzleW = swizzle[3];
-      }
-   }
-
-   if (src->SrcRegisterExtMod.Complement) {
-      eat_opt_white( &ctx->cur );
-      if (*ctx->cur != ')') {
-         report_error( ctx, "Expected `)'" );
-         return FALSE;
-      }
-      ctx->cur++;
-   }
-
-   if (src->SrcRegisterExtMod.Bias) {
-      eat_opt_white( &ctx->cur );
-      if (*ctx->cur != ')') {
-         report_error( ctx, "Expected `)'" );
-         return FALSE;
-      }
-      ctx->cur++;
-      eat_opt_white( &ctx->cur );
-      if (*ctx->cur != '-') {
-         report_error( ctx, "Expected `-'" );
-         return FALSE;
-      }
-      ctx->cur++;
-      eat_opt_white( &ctx->cur );
-      if (!parse_float( &ctx->cur, &value )) {
-         report_error( ctx, "Expected literal floating point" );
-         return FALSE;
-      }
-      if (value != 0.5f) {
-         report_error( ctx, "Expected 0.5" );
-         return FALSE;
-      }
-   }
-
-   if (src->SrcRegisterExtMod.Scale2X) {
-      eat_opt_white( &ctx->cur );
-      if (*ctx->cur != ')') {
-         report_error( ctx, "Expected `)'" );
-         return FALSE;
-      }
-      ctx->cur++;
-   }
-
-   if (src->SrcRegisterExtMod.Absolute) {
-      eat_opt_white( &ctx->cur );
-      if (*ctx->cur != '|') {
-         report_error( ctx, "Expected `|'" );
-         return FALSE;
-      }
-      ctx->cur++;
-   }
-
-   if (src->SrcRegisterExtMod.Negate) {
-      eat_opt_white( &ctx->cur );
-      if (*ctx->cur != ')') {
-         report_error( ctx, "Expected `)'" );
-         return FALSE;
-      }
-      ctx->cur++;
-   }
-
-   return TRUE;
-}
-
-struct opcode_info
-{
-   uint num_dst;
-   uint num_src;
-   uint is_tex;
-   uint is_branch;
-   const char *mnemonic;
-};
-
-static const struct opcode_info opcode_info[TGSI_OPCODE_LAST] =
-{
-   { 1, 1, 0, 0, "ARL" },
-   { 1, 1, 0, 0, "MOV" },
-   { 1, 1, 0, 0, "LIT" },
-   { 1, 1, 0, 0, "RCP" },
-   { 1, 1, 0, 0, "RSQ" },
-   { 1, 1, 0, 0, "EXP" },
-   { 1, 1, 0, 0, "LOG" },
-   { 1, 2, 0, 0, "MUL" },
-   { 1, 2, 0, 0, "ADD" },
-   { 1, 2, 0, 0, "DP3" },
-   { 1, 2, 0, 0, "DP4" },
-   { 1, 2, 0, 0, "DST" },
-   { 1, 2, 0, 0, "MIN" },
-   { 1, 2, 0, 0, "MAX" },
-   { 1, 2, 0, 0, "SLT" },
-   { 1, 2, 0, 0, "SGE" },
-   { 1, 3, 0, 0, "MAD" },
-   { 1, 2, 0, 0, "SUB" },
-   { 1, 3, 0, 0, "LERP" },
-   { 1, 3, 0, 0, "CND" },
-   { 1, 3, 0, 0, "CND0" },
-   { 1, 3, 0, 0, "DOT2ADD" },
-   { 1, 2, 0, 0, "INDEX" },
-   { 1, 1, 0, 0, "NEGATE" },
-   { 1, 1, 0, 0, "FRAC" },
-   { 1, 3, 0, 0, "CLAMP" },
-   { 1, 1, 0, 0, "FLOOR" },
-   { 1, 1, 0, 0, "ROUND" },
-   { 1, 1, 0, 0, "EXPBASE2" },
-   { 1, 1, 0, 0, "LOGBASE2" },
-   { 1, 2, 0, 0, "POWER" },
-   { 1, 2, 0, 0, "CROSSPRODUCT" },
-   { 1, 2, 0, 0, "MULTIPLYMATRIX" },
-   { 1, 1, 0, 0, "ABS" },
-   { 1, 1, 0, 0, "RCC" },
-   { 1, 2, 0, 0, "DPH" },
-   { 1, 1, 0, 0, "COS" },
-   { 1, 1, 0, 0, "DDX" },
-   { 1, 1, 0, 0, "DDY" },
-   { 0, 1, 0, 0, "KILP" },
-   { 1, 1, 0, 0, "PK2H" },
-   { 1, 1, 0, 0, "PK2US" },
-   { 1, 1, 0, 0, "PK4B" },
-   { 1, 1, 0, 0, "PK4UB" },
-   { 1, 2, 0, 0, "RFL" },
-   { 1, 2, 0, 0, "SEQ" },
-   { 1, 2, 0, 0, "SFL" },
-   { 1, 2, 0, 0, "SGT" },
-   { 1, 1, 0, 0, "SIN" },
-   { 1, 2, 0, 0, "SLE" },
-   { 1, 2, 0, 0, "SNE" },
-   { 1, 2, 0, 0, "STR" },
-   { 1, 2, 1, 0, "TEX" },
-   { 1, 4, 1, 0, "TXD" },
-   { 1, 2, 1, 0, "TXP" },
-   { 1, 1, 0, 0, "UP2H" },
-   { 1, 1, 0, 0, "UP2US" },
-   { 1, 1, 0, 0, "UP4B" },
-   { 1, 1, 0, 0, "UP4UB" },
-   { 1, 3, 0, 0, "X2D" },
-   { 1, 1, 0, 0, "ARA" },
-   { 1, 1, 0, 0, "ARR" },
-   { 0, 1, 0, 0, "BRA" },
-   { 0, 0, 0, 1, "CAL" },
-   { 0, 0, 0, 0, "RET" },
-   { 1, 1, 0, 0, "SSG" },
-   { 1, 3, 0, 0, "CMP" },
-   { 1, 1, 0, 0, "SCS" },
-   { 1, 2, 1, 0, "TXB" },
-   { 1, 1, 0, 0, "NRM" },
-   { 1, 2, 0, 0, "DIV" },
-   { 1, 2, 0, 0, "DP2" },
-   { 1, 2, 1, 0, "TXL" },
-   { 0, 0, 0, 0, "BRK" },
-   { 0, 1, 0, 1, "IF" },
-   { 0, 0, 0, 0, "LOOP" },
-   { 0, 1, 0, 0, "REP" },
-   { 0, 0, 0, 1, "ELSE" },
-   { 0, 0, 0, 0, "ENDIF" },
-   { 0, 0, 0, 0, "ENDLOOP" },
-   { 0, 0, 0, 0, "ENDREP" },
-   { 0, 1, 0, 0, "PUSHA" },
-   { 1, 0, 0, 0, "POPA" },
-   { 1, 1, 0, 0, "CEIL" },
-   { 1, 1, 0, 0, "I2F" },
-   { 1, 1, 0, 0, "NOT" },
-   { 1, 1, 0, 0, "TRUNC" },
-   { 1, 2, 0, 0, "SHL" },
-   { 1, 2, 0, 0, "SHR" },
-   { 1, 2, 0, 0, "AND" },
-   { 1, 2, 0, 0, "OR" },
-   { 1, 2, 0, 0, "MOD" },
-   { 1, 2, 0, 0, "XOR" },
-   { 1, 3, 0, 0, "SAD" },
-   { 1, 2, 1, 0, "TXF" },
-   { 1, 2, 1, 0, "TXQ" },
-   { 0, 0, 0, 0, "CONT" },
-   { 0, 0, 0, 0, "EMIT" },
-   { 0, 0, 0, 0, "ENDPRIM" },
-   { 0, 0, 0, 1, "BGNLOOP2" },
-   { 0, 0, 0, 0, "BGNSUB" },
-   { 0, 0, 0, 1, "ENDLOOP2" },
-   { 0, 0, 0, 0, "ENDSUB" },
-   { 1, 1, 0, 0, "NOISE1" },
-   { 1, 1, 0, 0, "NOISE2" },
-   { 1, 1, 0, 0, "NOISE3" },
-   { 1, 1, 0, 0, "NOISE4" },
-   { 0, 0, 0, 0, "NOP" },
-   { 1, 2, 0, 0, "M4X3" },
-   { 1, 2, 0, 0, "M3X4" },
-   { 1, 2, 0, 0, "M3X3" },
-   { 1, 2, 0, 0, "M3X2" },
-   { 1, 1, 0, 0, "NRM4" },
-   { 0, 1, 0, 0, "CALLNZ" },
-   { 0, 1, 0, 0, "IFC" },
-   { 0, 1, 0, 0, "BREAKC" },
-   { 0, 0, 0, 0, "KIL" },
-   { 0, 0, 0, 0, "END" },
-   { 1, 1, 0, 0, "SWZ" }
-};
-
-static const char *texture_names[TGSI_TEXTURE_COUNT] =
-{
-   "UNKNOWN",
-   "1D",
-   "2D",
-   "3D",
-   "CUBE",
-   "RECT",
-   "SHADOW1D",
-   "SHADOW2D",
-   "SHADOWRECT"
-};
-
-static boolean
-parse_instruction(
-   struct translate_ctx *ctx,
-   boolean has_label )
-{
-   uint i;
-   uint saturate = TGSI_SAT_NONE;
-   const struct opcode_info *info;
-   struct tgsi_full_instruction inst;
-   uint advance;
-
-   /* Parse instruction name.
-    */
-   eat_opt_white( &ctx->cur );
-   for (i = 0; i < TGSI_OPCODE_LAST; i++) {
-      const char *cur = ctx->cur;
-
-      info = &opcode_info[i];
-      if (str_match_no_case( &cur, info->mnemonic )) {
-         if (str_match_no_case( &cur, "_SATNV" ))
-            saturate = TGSI_SAT_MINUS_PLUS_ONE;
-         else if (str_match_no_case( &cur, "_SAT" ))
-            saturate = TGSI_SAT_ZERO_ONE;
-
-         if (info->num_dst + info->num_src + info->is_tex == 0) {
-            if (!is_digit_alpha_underscore( cur )) {
-               ctx->cur = cur;
-               break;
-            }
-         }
-         else if (*cur == '\0' || eat_white( &cur )) {
-            ctx->cur = cur;
-            break;
-         }
-      }
-   }
-   if (i == TGSI_OPCODE_LAST) {
-      if (has_label)
-         report_error( ctx, "Unknown opcode" );
-      else
-         report_error( ctx, "Expected `DCL', `IMM' or a label" );
-      return FALSE;
-   }
-
-   inst = tgsi_default_full_instruction();
-   inst.Instruction.Opcode = i;
-   inst.Instruction.Saturate = saturate;
-   inst.Instruction.NumDstRegs = info->num_dst;
-   inst.Instruction.NumSrcRegs = info->num_src;
-
-   /* Parse instruction operands.
-    */
-   for (i = 0; i < info->num_dst + info->num_src + info->is_tex; i++) {
-      if (i > 0) {
-         eat_opt_white( &ctx->cur );
-         if (*ctx->cur != ',') {
-            report_error( ctx, "Expected `,'" );
-            return FALSE;
-         }
-         ctx->cur++;
-         eat_opt_white( &ctx->cur );
-      }
-
-      if (i < info->num_dst) {
-         if (!parse_dst_operand( ctx, &inst.FullDstRegisters[i] ))
-            return FALSE;
-      }
-      else if (i < info->num_dst + info->num_src) {
-         if (!parse_src_operand( ctx, &inst.FullSrcRegisters[i - info->num_dst] ))
-            return FALSE;
-      }
-      else {
-         uint j;
-
-         for (j = 0; j < TGSI_TEXTURE_COUNT; j++) {
-            if (str_match_no_case( &ctx->cur, texture_names[j] )) {
-               if (!is_digit_alpha_underscore( ctx->cur )) {
-                  inst.InstructionExtTexture.Texture = j;
-                  break;
-               }
-            }
-         }
-         if (j == TGSI_TEXTURE_COUNT) {
-            report_error( ctx, "Expected texture target" );
-            return FALSE;
-         }
-      }
-   }
-
-   if (info->is_branch) {
-      uint target;
-
-      eat_opt_white( &ctx->cur );
-      if (*ctx->cur != ':') {
-         report_error( ctx, "Expected `:'" );
-         return FALSE;
-      }
-      ctx->cur++;
-      eat_opt_white( &ctx->cur );
-      if (!parse_uint( &ctx->cur, &target )) {
-         report_error( ctx, "Expected a label" );
-         return FALSE;
-      }
-      inst.InstructionExtLabel.Label = target;
-   }
-
-   advance = tgsi_build_full_instruction(
-      &inst,
-      ctx->tokens_cur,
-      ctx->header,
-      (uint) (ctx->tokens_end - ctx->tokens_cur) );
-   if (advance == 0)
-      return FALSE;
-   ctx->tokens_cur += advance;
-
-   return TRUE;
-}
-
-static const char *semantic_names[TGSI_SEMANTIC_COUNT] =
-{
-   "POSITION",
-   "COLOR",
-   "BCOLOR",
-   "FOG",
-   "PSIZE",
-   "GENERIC",
-   "NORMAL"
-};
-
-static const char *interpolate_names[TGSI_INTERPOLATE_COUNT] =
-{
-   "CONSTANT",
-   "LINEAR",
-   "PERSPECTIVE"
-};
-
-static boolean parse_declaration( struct translate_ctx *ctx )
-{
-   struct tgsi_full_declaration decl;
-   uint file;
-   int first;
-   int last;
-   uint writemask;
-   const char *cur;
-   uint advance;
-
-   if (!eat_white( &ctx->cur )) {
-      report_error( ctx, "Syntax error" );
-      return FALSE;
-   }
-   if (!parse_register_dcl( ctx, &file, &first, &last ))
-      return FALSE;
-   if (!parse_opt_writemask( ctx, &writemask ))
-      return FALSE;
-
-   decl = tgsi_default_full_declaration();
-   decl.Declaration.File = file;
-   decl.Declaration.UsageMask = writemask;
-   decl.DeclarationRange.First = first;
-   decl.DeclarationRange.Last = last;
-
-   cur = ctx->cur;
-   eat_opt_white( &cur );
-   if (*cur == ',') {
-      uint i;
-
-      cur++;
-      eat_opt_white( &cur );
-      for (i = 0; i < TGSI_SEMANTIC_COUNT; i++) {
-         if (str_match_no_case( &cur, semantic_names[i] )) {
-            const char *cur2 = cur;
-            uint index;
-
-            if (is_digit_alpha_underscore( cur ))
-               continue;
-            eat_opt_white( &cur2 );
-            if (*cur2 == '[') {
-               cur2++;
-               eat_opt_white( &cur2 );
-               if (!parse_uint( &cur2, &index )) {
-                  report_error( ctx, "Expected literal integer" );
-                  return FALSE;
-               }
-               eat_opt_white( &cur2 );
-               if (*cur2 != ']') {
-                  report_error( ctx, "Expected `]'" );
-                  return FALSE;
-               }
-               cur2++;
-
-               decl.Semantic.SemanticIndex = index;
-
-               cur = cur2;
-            }
-
-            decl.Declaration.Semantic = 1;
-            decl.Semantic.SemanticName = i;
-
-            ctx->cur = cur;
-            break;
-         }
-      }
-   }
-
-   cur = ctx->cur;
-   eat_opt_white( &cur );
-   if (*cur == ',') {
-      uint i;
-
-      cur++;
-      eat_opt_white( &cur );
-      for (i = 0; i < TGSI_INTERPOLATE_COUNT; i++) {
-         if (str_match_no_case( &cur, interpolate_names[i] )) {
-            if (is_digit_alpha_underscore( cur ))
-               continue;
-            decl.Declaration.Interpolate = i;
-
-            ctx->cur = cur;
-            break;
-         }
-      }
-      if (i == TGSI_INTERPOLATE_COUNT) {
-         report_error( ctx, "Expected semantic or interpolate attribute" );
-         return FALSE;
-      }
-   }
-
-   advance = tgsi_build_full_declaration(
-      &decl,
-      ctx->tokens_cur,
-      ctx->header,
-      (uint) (ctx->tokens_end - ctx->tokens_cur) );
-   if (advance == 0)
-      return FALSE;
-   ctx->tokens_cur += advance;
-
-   return TRUE;
-}
-
-static boolean parse_immediate( struct translate_ctx *ctx )
-{
-   struct tgsi_full_immediate imm;
-   uint i;
-   float values[4];
-   uint advance;
-
-   if (!eat_white( &ctx->cur )) {
-      report_error( ctx, "Syntax error" );
-      return FALSE;
-   }
-   if (!str_match_no_case( &ctx->cur, "FLT32" ) || is_digit_alpha_underscore( ctx->cur )) {
-      report_error( ctx, "Expected `FLT32'" );
-      return FALSE;
-   }
-   eat_opt_white( &ctx->cur );
-   if (*ctx->cur != '{') {
-      report_error( ctx, "Expected `{'" );
-      return FALSE;
-   }
-   ctx->cur++;
-   for (i = 0; i < 4; i++) {
-      eat_opt_white( &ctx->cur );
-      if (i > 0) {
-         if (*ctx->cur != ',') {
-            report_error( ctx, "Expected `,'" );
-            return FALSE;
-         }
-         ctx->cur++;
-         eat_opt_white( &ctx->cur );
-      }
-      if (!parse_float( &ctx->cur, &values[i] )) {
-         report_error( ctx, "Expected literal floating point" );
-         return FALSE;
-      }
-   }
-   eat_opt_white( &ctx->cur );
-   if (*ctx->cur != '}') {
-      report_error( ctx, "Expected `}'" );
-      return FALSE;
-   }
-   ctx->cur++;
-
-   imm = tgsi_default_full_immediate();
-   imm.Immediate.Size += 4;
-   imm.Immediate.DataType = TGSI_IMM_FLOAT32;
-   imm.u.Pointer = values;
-
-   advance = tgsi_build_full_immediate(
-      &imm,
-      ctx->tokens_cur,
-      ctx->header,
-      (uint) (ctx->tokens_end - ctx->tokens_cur) );
-   if (advance == 0)
-      return FALSE;
-   ctx->tokens_cur += advance;
-
-   return TRUE;
-}
-
-static boolean translate( struct translate_ctx *ctx )
-{
-   eat_opt_white( &ctx->cur );
-   if (!parse_header( ctx ))
-      return FALSE;
-
-   while (*ctx->cur != '\0') {
-      uint label_val = 0;
-
-      if (!eat_white( &ctx->cur )) {
-         report_error( ctx, "Syntax error" );
-         return FALSE;
-      }
-
-      if (*ctx->cur == '\0')
-         break;
-
-      if (parse_label( ctx, &label_val )) {
-         if (!parse_instruction( ctx, TRUE ))
-            return FALSE;
-      }
-      else if (str_match_no_case( &ctx->cur, "DCL" )) {
-         if (!parse_declaration( ctx ))
-            return FALSE;
-      }
-      else if (str_match_no_case( &ctx->cur, "IMM" )) {
-         if (!parse_immediate( ctx ))
-            return FALSE;
-      }
-      else if (!parse_instruction( ctx, FALSE )) {
-         return FALSE;
-      }
-   }
-
-   return TRUE;
-}
-
-boolean
-tgsi_text_translate(
-   const char *text,
-   struct tgsi_token *tokens,
-   uint num_tokens )
-{
-   struct translate_ctx ctx;
-
-   ctx.text = text;
-   ctx.cur = text;
-   ctx.tokens = tokens;
-   ctx.tokens_cur = tokens;
-   ctx.tokens_end = tokens + num_tokens;
-
-   if (!translate( &ctx ))
-      return FALSE;
-
-   return tgsi_sanity_check( tokens );
-}
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_text.h b/src/gallium/auxiliary/tgsi/util/tgsi_text.h
deleted file mode 100644
index 8eeeeef1402..00000000000
--- a/src/gallium/auxiliary/tgsi/util/tgsi_text.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef TGSI_TEXT_H
-#define TGSI_TEXT_H
-
-#include "pipe/p_shader_tokens.h"
-
-#if defined __cplusplus
-extern "C" {
-#endif
-
-boolean
-tgsi_text_translate(
-   const char *text,
-   struct tgsi_token *tokens,
-   uint num_tokens );
-
-#if defined __cplusplus
-}
-#endif
-
-#endif /* TGSI_TEXT_H */
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_transform.c b/src/gallium/auxiliary/tgsi/util/tgsi_transform.c
deleted file mode 100644
index 357f77b05a6..00000000000
--- a/src/gallium/auxiliary/tgsi/util/tgsi_transform.c
+++ /dev/null
@@ -1,199 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-/**
- * TGSI program transformation utility.
- *
- * Authors:  Brian Paul
- */
-
-
-#include "tgsi_transform.h"
-
-
-
-static void
-emit_instruction(struct tgsi_transform_context *ctx,
-                 const struct tgsi_full_instruction *inst)
-{
-   uint ti = ctx->ti;
-
-   ti += tgsi_build_full_instruction(inst,
-                                     ctx->tokens_out + ti,
-                                     ctx->header,
-                                     ctx->max_tokens_out - ti);
-   ctx->ti = ti;
-}
-
-
-static void
-emit_declaration(struct tgsi_transform_context *ctx,
-                 const struct tgsi_full_declaration *decl)
-{
-   uint ti = ctx->ti;
-
-   ti += tgsi_build_full_declaration(decl,
-                                     ctx->tokens_out + ti,
-                                     ctx->header,
-                                     ctx->max_tokens_out - ti);
-   ctx->ti = ti;
-}
-
-
-static void
-emit_immediate(struct tgsi_transform_context *ctx,
-               const struct tgsi_full_immediate *imm)
-{
-   uint ti = ctx->ti;
-
-   ti += tgsi_build_full_immediate(imm,
-                                   ctx->tokens_out + ti,
-                                   ctx->header,
-                                   ctx->max_tokens_out - ti);
-   ctx->ti = ti;
-}
-
-
-
-/**
- * Apply user-defined transformations to the input shader to produce
- * the output shader.
- * For example, a register search-and-replace operation could be applied
- * by defining a transform_instruction() callback that examined and changed
- * the instruction src/dest regs.
- *
- * \return number of tokens emitted
- */
-int
-tgsi_transform_shader(const struct tgsi_token *tokens_in,
-                      struct tgsi_token *tokens_out,
-                      uint max_tokens_out,
-                      struct tgsi_transform_context *ctx)
-{
-   uint procType;
-
-   /* input shader */
-   struct tgsi_parse_context parse;
-
-   /* output shader */
-   struct tgsi_processor *processor;
-
-
-   /**
-    ** callback context init
-    **/
-   ctx->emit_instruction = emit_instruction;
-   ctx->emit_declaration = emit_declaration;
-   ctx->emit_immediate = emit_immediate;
-   ctx->tokens_out = tokens_out;
-   ctx->max_tokens_out = max_tokens_out;
-
-
-   /**
-    ** Setup to begin parsing input shader
-    **/
-   if (tgsi_parse_init( &parse, tokens_in ) != TGSI_PARSE_OK) {
-      debug_printf("tgsi_parse_init() failed in tgsi_transform_shader()!\n");
-      return -1;
-   }
-   procType = parse.FullHeader.Processor.Processor;
-   assert(procType == TGSI_PROCESSOR_FRAGMENT ||
-          procType == TGSI_PROCESSOR_VERTEX ||
-          procType == TGSI_PROCESSOR_GEOMETRY);
-
-
-   /**
-    **  Setup output shader
-    **/
-   *(struct tgsi_version *) &tokens_out[0] = tgsi_build_version();
-
-   ctx->header = (struct tgsi_header *) (tokens_out + 1);
-   *ctx->header = tgsi_build_header();
-
-   processor = (struct tgsi_processor *) (tokens_out + 2);
-   *processor = tgsi_build_processor( procType, ctx->header );
-
-   ctx->ti = 3;
-
-
-   /**
-    ** Loop over incoming program tokens/instructions
-    */
-   while( !tgsi_parse_end_of_tokens( &parse ) ) {
-
-      tgsi_parse_token( &parse );
-
-      switch( parse.FullToken.Token.Type ) {
-      case TGSI_TOKEN_TYPE_INSTRUCTION:
-         {
-            struct tgsi_full_instruction *fullinst
-               = &parse.FullToken.FullInstruction;
-
-            if (ctx->transform_instruction)
-               ctx->transform_instruction(ctx, fullinst);
-            else
-               ctx->emit_instruction(ctx, fullinst);
-         }
-         break;
-
-      case TGSI_TOKEN_TYPE_DECLARATION:
-         {
-            struct tgsi_full_declaration *fulldecl
-               = &parse.FullToken.FullDeclaration;
-
-            if (ctx->transform_declaration)
-               ctx->transform_declaration(ctx, fulldecl);
-            else
-               ctx->emit_declaration(ctx, fulldecl);
-         }
-         break;
-
-      case TGSI_TOKEN_TYPE_IMMEDIATE:
-         {
-            struct tgsi_full_immediate *fullimm
-               = &parse.FullToken.FullImmediate;
-
-            if (ctx->transform_immediate)
-               ctx->transform_immediate(ctx, fullimm);
-            else
-               ctx->emit_immediate(ctx, fullimm);
-         }
-         break;
-
-      default:
-         assert( 0 );
-      }
-   }
-
-   if (ctx->epilog) {
-      ctx->epilog(ctx);
-   }
-
-   tgsi_parse_free (&parse);
-
-   return ctx->ti;
-}
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_transform.h b/src/gallium/auxiliary/tgsi/util/tgsi_transform.h
deleted file mode 100644
index fcf85d603be..00000000000
--- a/src/gallium/auxiliary/tgsi/util/tgsi_transform.h
+++ /dev/null
@@ -1,93 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef TGSI_TRANSFORM_H
-#define TGSI_TRANSFORM_H
-
-
-#include "pipe/p_util.h"
-#include "pipe/p_shader_tokens.h"
-#include "tgsi/util/tgsi_parse.h"
-#include "tgsi/util/tgsi_build.h"
-
-
-
-/**
- * Subclass this to add caller-specific data
- */
-struct tgsi_transform_context
-{
-/**** PUBLIC ***/
-
-   /**
-    * User-defined callbacks invoked per instruction.
-    */
-   void (*transform_instruction)(struct tgsi_transform_context *ctx,
-                                 struct tgsi_full_instruction *inst);
-
-   void (*transform_declaration)(struct tgsi_transform_context *ctx,
-                                 struct tgsi_full_declaration *decl);
-
-   void (*transform_immediate)(struct tgsi_transform_context *ctx,
-                               struct tgsi_full_immediate *imm);
-
-   /**
-    * Called at end of input program to allow caller to append extra
-    * instructions.  Return number of tokens emitted.
-    */
-   void (*epilog)(struct tgsi_transform_context *ctx);
-
-
-/*** PRIVATE ***/
-
-   /**
-    * These are setup by tgsi_transform_shader() and cannot be overridden.
-    * Meant to be called from in the above user callback functions.
-    */
-   void (*emit_instruction)(struct tgsi_transform_context *ctx,
-                            const struct tgsi_full_instruction *inst);
-   void (*emit_declaration)(struct tgsi_transform_context *ctx,
-                            const struct tgsi_full_declaration *decl);
-   void (*emit_immediate)(struct tgsi_transform_context *ctx,
-                          const struct tgsi_full_immediate *imm);
-
-   struct tgsi_header *header;
-   uint max_tokens_out;
-   struct tgsi_token *tokens_out;
-   uint ti;
-};
-
-
-
-extern int
-tgsi_transform_shader(const struct tgsi_token *tokens_in,
-                      struct tgsi_token *tokens_out,
-                      uint max_tokens_out,
-                      struct tgsi_transform_context *ctx);
-
-
-#endif /* TGSI_TRANSFORM_H */
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_util.c b/src/gallium/auxiliary/tgsi/util/tgsi_util.c
deleted file mode 100644
index 09486e649e1..00000000000
--- a/src/gallium/auxiliary/tgsi/util/tgsi_util.c
+++ /dev/null
@@ -1,300 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include "pipe/p_debug.h"
-#include "pipe/p_util.h"
-#include "pipe/p_shader_tokens.h"
-#include "tgsi_parse.h"
-#include "tgsi_build.h"
-#include "tgsi_util.h"
-
-union pointer_hack
-{
-   void *pointer;
-   uint64_t uint64;
-};
-
-void *
-tgsi_align_128bit(
-   void *unaligned )
-{
-   union pointer_hack ph;
-
-   ph.uint64 = 0;
-   ph.pointer = unaligned;
-   ph.uint64 = (ph.uint64 + 15) & ~15;
-   return ph.pointer;
-}
-
-unsigned
-tgsi_util_get_src_register_swizzle(
-   const struct tgsi_src_register *reg,
-   unsigned component )
-{
-   switch( component ) {
-   case 0:
-      return reg->SwizzleX;
-   case 1:
-      return reg->SwizzleY;
-   case 2:
-      return reg->SwizzleZ;
-   case 3:
-      return reg->SwizzleW;
-   default:
-      assert( 0 );
-   }
-   return 0;
-}
-
-unsigned
-tgsi_util_get_src_register_extswizzle(
-   const struct tgsi_src_register_ext_swz *reg,
-   unsigned component )
-{
-   switch( component ) {
-   case 0:
-      return reg->ExtSwizzleX;
-   case 1:
-      return reg->ExtSwizzleY;
-   case 2:
-      return reg->ExtSwizzleZ;
-   case 3:
-      return reg->ExtSwizzleW;
-   default:
-      assert( 0 );
-   }
-   return 0;
-}
-
-unsigned
-tgsi_util_get_full_src_register_extswizzle(
-   const struct tgsi_full_src_register  *reg,
-   unsigned component )
-{
-   unsigned swizzle;
-
-   /*
-    * First, calculate  the   extended swizzle for a given channel. This will give
-    * us either a channel index into the simple swizzle or  a constant 1 or   0.
-    */
-   swizzle = tgsi_util_get_src_register_extswizzle(
-      &reg->SrcRegisterExtSwz,
-      component );
-
-   assert (TGSI_SWIZZLE_X == TGSI_EXTSWIZZLE_X);
-   assert (TGSI_SWIZZLE_Y == TGSI_EXTSWIZZLE_Y);
-   assert (TGSI_SWIZZLE_Z == TGSI_EXTSWIZZLE_Z);
-   assert (TGSI_SWIZZLE_W == TGSI_EXTSWIZZLE_W);
-   assert (TGSI_EXTSWIZZLE_ZERO > TGSI_SWIZZLE_W);
-   assert (TGSI_EXTSWIZZLE_ONE > TGSI_SWIZZLE_W);
-
-   /*
-    * Second, calculate the simple  swizzle  for   the   unswizzled channel index.
-    * Leave the constants intact, they are   not   affected by the   simple swizzle.
-    */
-   if( swizzle <= TGSI_SWIZZLE_W ) {
-      swizzle = tgsi_util_get_src_register_swizzle(
-         &reg->SrcRegister,
-         swizzle );
-   }
-
-   return swizzle;
-}
-
-void
-tgsi_util_set_src_register_swizzle(
-   struct tgsi_src_register *reg,
-   unsigned swizzle,
-   unsigned component )
-{
-   switch( component ) {
-   case 0:
-      reg->SwizzleX = swizzle;
-      break;
-   case 1:
-      reg->SwizzleY = swizzle;
-      break;
-   case 2:
-      reg->SwizzleZ = swizzle;
-      break;
-   case 3:
-      reg->SwizzleW = swizzle;
-      break;
-   default:
-      assert( 0 );
-   }
-}
-
-void
-tgsi_util_set_src_register_extswizzle(
-   struct tgsi_src_register_ext_swz *reg,
-   unsigned swizzle,
-   unsigned component )
-{
-   switch( component ) {
-   case 0:
-      reg->ExtSwizzleX = swizzle;
-      break;
-   case 1:
-      reg->ExtSwizzleY = swizzle;
-      break;
-   case 2:
-      reg->ExtSwizzleZ = swizzle;
-      break;
-   case 3:
-      reg->ExtSwizzleW = swizzle;
-      break;
-   default:
-      assert( 0 );
-   }
-}
-
-unsigned
-tgsi_util_get_src_register_extnegate(
-   const  struct tgsi_src_register_ext_swz *reg,
-   unsigned component )
-{
-   switch( component ) {
-   case 0:
-      return reg->NegateX;
-   case 1:
-      return reg->NegateY;
-   case 2:
-      return reg->NegateZ;
-   case 3:
-      return reg->NegateW;
-   default:
-      assert( 0 );
-   }
-   return 0;
-}
-
-void
-tgsi_util_set_src_register_extnegate(
-   struct tgsi_src_register_ext_swz *reg,
-   unsigned negate,
-   unsigned component )
-{
-   switch( component ) {
-   case 0:
-      reg->NegateX = negate;
-      break;
-   case 1:
-      reg->NegateY = negate;
-      break;
-   case 2:
-      reg->NegateZ = negate;
-      break;
-   case 3:
-      reg->NegateW = negate;
-      break;
-   default:
-      assert( 0 );
-   }
-}
-
-unsigned
-tgsi_util_get_full_src_register_sign_mode(
-   const struct  tgsi_full_src_register *reg,
-   unsigned component )
-{
-   unsigned sign_mode;
-
-   if( reg->SrcRegisterExtMod.Absolute ) {
-      /* Consider only the post-abs negation. */
-
-      if( reg->SrcRegisterExtMod.Negate ) {
-         sign_mode = TGSI_UTIL_SIGN_SET;
-      }
-      else {
-         sign_mode = TGSI_UTIL_SIGN_CLEAR;
-      }
-   }
-   else {
-      /* Accumulate the three negations. */
-
-      unsigned negate;
-
-      negate = reg->SrcRegister.Negate;
-      if( tgsi_util_get_src_register_extnegate( &reg->SrcRegisterExtSwz, component ) ) {
-         negate = !negate;
-      }
-      if( reg->SrcRegisterExtMod.Negate ) {
-         negate = !negate;
-      }
-
-      if( negate ) {
-         sign_mode = TGSI_UTIL_SIGN_TOGGLE;
-      }
-      else {
-         sign_mode = TGSI_UTIL_SIGN_KEEP;
-      }
-   }
-
-   return sign_mode;
-}
-
-void
-tgsi_util_set_full_src_register_sign_mode(
-   struct tgsi_full_src_register *reg,
-   unsigned sign_mode )
-{
-   reg->SrcRegisterExtSwz.NegateX = 0;
-   reg->SrcRegisterExtSwz.NegateY = 0;
-   reg->SrcRegisterExtSwz.NegateZ = 0;
-   reg->SrcRegisterExtSwz.NegateW = 0;
-
-   switch (sign_mode)
-   {
-   case TGSI_UTIL_SIGN_CLEAR:
-      reg->SrcRegister.Negate = 0;
-      reg->SrcRegisterExtMod.Absolute = 1;
-      reg->SrcRegisterExtMod.Negate = 0;
-      break;
-
-   case TGSI_UTIL_SIGN_SET:
-      reg->SrcRegister.Negate = 0;
-      reg->SrcRegisterExtMod.Absolute = 1;
-      reg->SrcRegisterExtMod.Negate = 1;
-      break;
-
-   case TGSI_UTIL_SIGN_TOGGLE:
-      reg->SrcRegister.Negate = 1;
-      reg->SrcRegisterExtMod.Absolute = 0;
-      reg->SrcRegisterExtMod.Negate = 0;
-      break;
-
-   case TGSI_UTIL_SIGN_KEEP:
-      reg->SrcRegister.Negate = 0;
-      reg->SrcRegisterExtMod.Absolute = 0;
-      reg->SrcRegisterExtMod.Negate = 0;
-      break;
-
-   default:
-      assert( 0 );
-   }
-}
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_util.h b/src/gallium/auxiliary/tgsi/util/tgsi_util.h
deleted file mode 100644
index 7877f345587..00000000000
--- a/src/gallium/auxiliary/tgsi/util/tgsi_util.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef TGSI_UTIL_H
-#define TGSI_UTIL_H
-
-#if defined __cplusplus
-extern "C" {
-#endif
-
-void *
-tgsi_align_128bit(
-   void *unaligned );
-
-unsigned
-tgsi_util_get_src_register_swizzle(
-   const struct tgsi_src_register *reg,
-   unsigned component );
-
-unsigned
-tgsi_util_get_src_register_extswizzle(
-   const struct tgsi_src_register_ext_swz *reg,
-   unsigned component);
-
-unsigned
-tgsi_util_get_full_src_register_extswizzle(
-   const struct tgsi_full_src_register *reg,
-   unsigned component );
-
-void
-tgsi_util_set_src_register_swizzle(
-   struct tgsi_src_register *reg,
-   unsigned swizzle,
-   unsigned component );
-
-void
-tgsi_util_set_src_register_extswizzle(
-   struct tgsi_src_register_ext_swz *reg,
-   unsigned swizzle,
-   unsigned component );
-
-unsigned
-tgsi_util_get_src_register_extnegate(
-   const struct tgsi_src_register_ext_swz *reg,
-   unsigned component );
-
-void
-tgsi_util_set_src_register_extnegate(
-   struct tgsi_src_register_ext_swz *reg,
-   unsigned negate,
-   unsigned component );
-
-#define TGSI_UTIL_SIGN_CLEAR    0   /* Force positive */
-#define TGSI_UTIL_SIGN_SET      1   /* Force negative */
-#define TGSI_UTIL_SIGN_TOGGLE   2   /* Negate */
-#define TGSI_UTIL_SIGN_KEEP     3   /* No change */
-
-unsigned
-tgsi_util_get_full_src_register_sign_mode(
-   const struct tgsi_full_src_register *reg,
-   unsigned component );
-
-void
-tgsi_util_set_full_src_register_sign_mode(
-   struct tgsi_full_src_register *reg,
-   unsigned sign_mode );
-
-#if defined __cplusplus
-}
-#endif
-
-#endif /* TGSI_UTIL_H */
diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c
index 4999822068a..8713ff5d584 100644
--- a/src/gallium/auxiliary/util/u_gen_mipmap.c
+++ b/src/gallium/auxiliary/util/u_gen_mipmap.c
@@ -45,9 +45,9 @@
 #include "util/u_gen_mipmap.h"
 #include "util/u_simple_shaders.h"
 
-#include "tgsi/util/tgsi_build.h"
-#include "tgsi/util/tgsi_dump.h"
-#include "tgsi/util/tgsi_parse.h"
+#include "tgsi/tgsi_build.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_parse.h"
 
 #include "cso_cache/cso_context.h"
 
diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c
index 505d93d7277..c34fb6ee334 100644
--- a/src/gallium/auxiliary/util/u_simple_shaders.c
+++ b/src/gallium/auxiliary/util/u_simple_shaders.c
@@ -43,9 +43,9 @@
 
 #include "util/u_simple_shaders.h"
 
-#include "tgsi/util/tgsi_build.h"
-#include "tgsi/util/tgsi_dump.h"
-#include "tgsi/util/tgsi_parse.h"
+#include "tgsi/tgsi_build.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_parse.h"
 
 
 
diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h
index 8df41c1d4ca..f1d1ca89a97 100644
--- a/src/gallium/drivers/cell/ppu/cell_context.h
+++ b/src/gallium/drivers/cell/ppu/cell_context.h
@@ -37,7 +37,7 @@
 #include "cell_winsys.h"
 #include "cell/common.h"
 #include "rtasm/rtasm_ppc_spe.h"
-#include "tgsi/util/tgsi_scan.h"
+#include "tgsi/tgsi_scan.h"
 
 struct cell_vbuf_render;
 
diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c
index c3a3fbd066d..f5707f2bb8b 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_shader.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c
@@ -30,7 +30,7 @@
 #include "pipe/p_inlines.h"
 #include "pipe/p_winsys.h"
 #include "draw/draw_context.h"
-#include "tgsi/util/tgsi_parse.h"
+#include "tgsi/tgsi_parse.h"
 
 #include "cell_context.h"
 #include "cell_state.h"
diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c
index 3a80df427da..96393732ed8 100644
--- a/src/gallium/drivers/cell/spu/spu_exec.c
+++ b/src/gallium/drivers/cell/spu/spu_exec.c
@@ -65,8 +65,8 @@
 #include "pipe/p_state.h"
 #include "pipe/p_util.h"
 #include "pipe/p_shader_tokens.h"
-#include "tgsi/util/tgsi_parse.h"
-#include "tgsi/util/tgsi_util.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
 #include "spu_exec.h"
 #include "spu_main.h"
 #include "spu_vertex_shader.h"
diff --git a/src/gallium/drivers/cell/spu/spu_exec.h b/src/gallium/drivers/cell/spu/spu_exec.h
index 3e17c490d20..c68f78f59b6 100644
--- a/src/gallium/drivers/cell/spu/spu_exec.h
+++ b/src/gallium/drivers/cell/spu/spu_exec.h
@@ -29,7 +29,7 @@
 #define SPU_EXEC_H
 
 #include "pipe/p_compiler.h"
-#include "tgsi/exec/tgsi_exec.h"
+#include "tgsi/tgsi_exec.h"
 
 #if defined __cplusplus
 extern "C" {
diff --git a/src/gallium/drivers/cell/spu/spu_util.c b/src/gallium/drivers/cell/spu/spu_util.c
index ea4274a0a7b..74ab2bbd1f0 100644
--- a/src/gallium/drivers/cell/spu/spu_util.c
+++ b/src/gallium/drivers/cell/spu/spu_util.c
@@ -1,8 +1,8 @@
 #include "pipe/p_util.h"
 #include "pipe/p_shader_tokens.h"
-#include "tgsi/util/tgsi_parse.h"
+#include "tgsi/tgsi_parse.h"
 //#include "tgsi_build.h"
-#include "tgsi/util/tgsi_util.h"
+#include "tgsi/tgsi_util.h"
 
 unsigned
 tgsi_util_get_src_register_swizzle(
diff --git a/src/gallium/drivers/i915simple/i915_context.h b/src/gallium/drivers/i915simple/i915_context.h
index c8db4f608c5..3cdabe45f9d 100644
--- a/src/gallium/drivers/i915simple/i915_context.h
+++ b/src/gallium/drivers/i915simple/i915_context.h
@@ -35,7 +35,7 @@
 
 #include "draw/draw_vertex.h"
 
-#include "tgsi/util/tgsi_scan.h"
+#include "tgsi/tgsi_scan.h"
 
 
 #define I915_TEX_UNITS 8
diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c
index 23cd909337e..04507ab8adc 100644
--- a/src/gallium/drivers/i915simple/i915_fpc_translate.c
+++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c
@@ -34,8 +34,8 @@
 
 #include "pipe/p_shader_tokens.h"
 #include "util/u_string.h"
-#include "tgsi/util/tgsi_parse.h"
-#include "tgsi/util/tgsi_dump.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_dump.h"
 
 #include "draw/draw_vertex.h"
 
diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c
index dbb33f26955..e8521b385ef 100644
--- a/src/gallium/drivers/i915simple/i915_state.c
+++ b/src/gallium/drivers/i915simple/i915_state.c
@@ -33,7 +33,7 @@
 #include "pipe/p_winsys.h"
 #include "pipe/p_util.h"
 #include "pipe/p_inlines.h"
-#include "tgsi/util/tgsi_parse.h"
+#include "tgsi/tgsi_parse.h"
 
 #include "i915_context.h"
 #include "i915_reg.h"
diff --git a/src/gallium/drivers/i965simple/brw_context.h b/src/gallium/drivers/i965simple/brw_context.h
index 2cae7665f78..f00eb34f92b 100644
--- a/src/gallium/drivers/i965simple/brw_context.h
+++ b/src/gallium/drivers/i965simple/brw_context.h
@@ -38,7 +38,7 @@
 #include "pipe/p_defines.h"
 #include "pipe/p_state.h"
 
-#include "tgsi/util/tgsi_scan.h"
+#include "tgsi/tgsi_scan.h"
 
 #include "brw_structs.h"
 #include "brw_winsys.h"
diff --git a/src/gallium/drivers/i965simple/brw_sf.c b/src/gallium/drivers/i965simple/brw_sf.c
index 96f8fb87a3d..b82a2e143ba 100644
--- a/src/gallium/drivers/i965simple/brw_sf.c
+++ b/src/gallium/drivers/i965simple/brw_sf.c
@@ -36,7 +36,7 @@
 #include "brw_util.h"
 #include "brw_sf.h"
 #include "brw_state.h"
-#include "tgsi/util/tgsi_parse.h"
+#include "tgsi/tgsi_parse.h"
 
 
 static void compile_sf_prog( struct brw_context *brw,
diff --git a/src/gallium/drivers/i965simple/brw_shader_info.c b/src/gallium/drivers/i965simple/brw_shader_info.c
index fb3da92421e..30f37a99d46 100644
--- a/src/gallium/drivers/i965simple/brw_shader_info.c
+++ b/src/gallium/drivers/i965simple/brw_shader_info.c
@@ -3,7 +3,7 @@
 #include "brw_state.h"
 #include "pipe/p_util.h"
 #include "pipe/p_shader_tokens.h"
-#include "tgsi/util/tgsi_parse.h"
+#include "tgsi/tgsi_parse.h"
 
 
 /**
diff --git a/src/gallium/drivers/i965simple/brw_state.c b/src/gallium/drivers/i965simple/brw_state.c
index caeeba46300..27ca32843da 100644
--- a/src/gallium/drivers/i965simple/brw_state.c
+++ b/src/gallium/drivers/i965simple/brw_state.c
@@ -34,8 +34,8 @@
 #include "pipe/p_util.h"
 #include "pipe/p_inlines.h"
 #include "pipe/p_shader_tokens.h"
-#include "tgsi/util/tgsi_dump.h"
-#include "tgsi/util/tgsi_parse.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_parse.h"
 
 #include "brw_context.h"
 #include "brw_defines.h"
diff --git a/src/gallium/drivers/i965simple/brw_vs_emit.c b/src/gallium/drivers/i965simple/brw_vs_emit.c
index 81423e2d7d5..34dbc0624d5 100644
--- a/src/gallium/drivers/i965simple/brw_vs_emit.c
+++ b/src/gallium/drivers/i965simple/brw_vs_emit.c
@@ -33,7 +33,7 @@
 #include "brw_vs.h"
 
 #include "pipe/p_shader_tokens.h"
-#include "tgsi/util/tgsi_parse.h"
+#include "tgsi/tgsi_parse.h"
 
 struct brw_prog_info {
    unsigned num_temps;
diff --git a/src/gallium/drivers/i965simple/brw_wm_decl.c b/src/gallium/drivers/i965simple/brw_wm_decl.c
index bf1b4d961a7..e6f1a44817d 100644
--- a/src/gallium/drivers/i965simple/brw_wm_decl.c
+++ b/src/gallium/drivers/i965simple/brw_wm_decl.c
@@ -4,7 +4,7 @@
 #include "brw_wm.h"
 #include "pipe/p_util.h"
 #include "pipe/p_shader_tokens.h"
-#include "tgsi/util/tgsi_parse.h"
+#include "tgsi/tgsi_parse.h"
 
 static struct brw_reg alloc_tmp(struct brw_wm_compile *c)
 {
diff --git a/src/gallium/drivers/i965simple/brw_wm_glsl.c b/src/gallium/drivers/i965simple/brw_wm_glsl.c
index 5c905838243..6a4a5aef092 100644
--- a/src/gallium/drivers/i965simple/brw_wm_glsl.c
+++ b/src/gallium/drivers/i965simple/brw_wm_glsl.c
@@ -4,7 +4,7 @@
 #include "brw_wm.h"
 #include "pipe/p_util.h"
 #include "pipe/p_shader_tokens.h"
-#include "tgsi/util/tgsi_parse.h"
+#include "tgsi/tgsi_parse.h"
 
 
 
diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c
index 0b199a2193e..cc171bbc396 100644
--- a/src/gallium/drivers/softpipe/sp_fs_exec.c
+++ b/src/gallium/drivers/softpipe/sp_fs_exec.c
@@ -36,8 +36,8 @@
 #include "pipe/p_defines.h"
 #include "pipe/p_util.h"
 #include "pipe/p_inlines.h"
-#include "tgsi/exec/tgsi_exec.h"
-#include "tgsi/util/tgsi_parse.h"
+#include "tgsi/tgsi_exec.h"
+#include "tgsi/tgsi_parse.h"
 
 struct sp_exec_fragment_shader {
    struct sp_fragment_shader base;
diff --git a/src/gallium/drivers/softpipe/sp_fs_llvm.c b/src/gallium/drivers/softpipe/sp_fs_llvm.c
index 6e1d9280bb4..20226da78c3 100644
--- a/src/gallium/drivers/softpipe/sp_fs_llvm.c
+++ b/src/gallium/drivers/softpipe/sp_fs_llvm.c
@@ -38,7 +38,7 @@
 #include "pipe/p_defines.h"
 #include "pipe/p_util.h"
 #include "pipe/p_inlines.h"
-#include "tgsi/exec/tgsi_sse2.h"
+#include "tgsi/tgsi_sse2.h"
 
 #if 0
 
diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c
index 69f7f960aa1..8b7da7c7473 100644
--- a/src/gallium/drivers/softpipe/sp_fs_sse.c
+++ b/src/gallium/drivers/softpipe/sp_fs_sse.c
@@ -36,8 +36,8 @@
 #include "pipe/p_defines.h"
 #include "pipe/p_util.h"
 #include "pipe/p_inlines.h"
-#include "tgsi/exec/tgsi_exec.h"
-#include "tgsi/exec/tgsi_sse2.h"
+#include "tgsi/tgsi_exec.h"
+#include "tgsi/tgsi_sse2.h"
 
 
 #ifdef PIPE_ARCH_X86
diff --git a/src/gallium/drivers/softpipe/sp_headers.h b/src/gallium/drivers/softpipe/sp_headers.h
index 3d9ede69bbd..ae2ee210fc9 100644
--- a/src/gallium/drivers/softpipe/sp_headers.h
+++ b/src/gallium/drivers/softpipe/sp_headers.h
@@ -32,7 +32,7 @@
 #define SP_HEADERS_H
 
 #include "pipe/p_state.h"
-#include "tgsi/exec/tgsi_exec.h"
+#include "tgsi/tgsi_exec.h"
 
 #define PRIM_POINT 1
 #define PRIM_LINE  2
diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h
index 701e02b295e..476ef3dc8fb 100644
--- a/src/gallium/drivers/softpipe/sp_state.h
+++ b/src/gallium/drivers/softpipe/sp_state.h
@@ -32,7 +32,7 @@
 #define SP_STATE_H
 
 #include "pipe/p_state.h"
-#include "tgsi/util/tgsi_scan.h"
+#include "tgsi/tgsi_scan.h"
 
 
 #define SP_NEW_VIEWPORT      0x1
diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c
index 901c8f83e75..76fe6bfef9f 100644
--- a/src/gallium/drivers/softpipe/sp_state_fs.c
+++ b/src/gallium/drivers/softpipe/sp_state_fs.c
@@ -35,8 +35,8 @@
 #include "pipe/p_winsys.h"
 #include "pipe/p_shader_tokens.h"
 #include "draw/draw_context.h"
-#include "tgsi/util/tgsi_dump.h"
-#include "tgsi/util/tgsi_scan.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_scan.h"
 
 
 void *
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index be0b57d9fa6..63b3b91110e 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -40,7 +40,7 @@
 #include "pipe/p_context.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_util.h"
-#include "tgsi/exec/tgsi_exec.h"
+#include "tgsi/tgsi_exec.h"
 
 
 /*
diff --git a/src/gallium/state_trackers/python/gallium.i b/src/gallium/state_trackers/python/gallium.i
index 8d8b762ea57..284ecb827df 100644
--- a/src/gallium/state_trackers/python/gallium.i
+++ b/src/gallium/state_trackers/python/gallium.i
@@ -47,8 +47,8 @@
 #include "cso_cache/cso_context.h"
 #include "util/u_draw_quad.h" 
 #include "util/p_tile.h" 
-#include "tgsi/util/tgsi_text.h" 
-#include "tgsi/util/tgsi_dump.h" 
+#include "tgsi/tgsi_text.h" 
+#include "tgsi/tgsi_dump.h" 
 
 #include "st_device.h"
 #include "st_sample.h"
diff --git a/src/mesa/state_tracker/st_debug.c b/src/mesa/state_tracker/st_debug.c
index 23ecfff0aa4..c7d26ce33cf 100644
--- a/src/mesa/state_tracker/st_debug.c
+++ b/src/mesa/state_tracker/st_debug.c
@@ -31,7 +31,7 @@
 
 #include "pipe/p_state.h"
 #include "pipe/p_shader_tokens.h"
-#include "tgsi/util/tgsi_dump.h"
+#include "tgsi/tgsi_dump.h"
 
 #include "cso_cache/cso_cache.h"
 
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c
index 9029f12056f..6565107b107 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -32,9 +32,9 @@
 
 #include "pipe/p_compiler.h"
 #include "pipe/p_shader_tokens.h"
-#include "tgsi/util/tgsi_parse.h"
-#include "tgsi/util/tgsi_build.h"
-#include "tgsi/util/tgsi_util.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_build.h"
+#include "tgsi/tgsi_util.h"
 #include "st_mesa_to_tgsi.h"
 #include "shader/prog_instruction.h"
 #include "shader/prog_parameter.h"
diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
index 5966bbadae8..c25c668329a 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -40,7 +40,7 @@
 #include "pipe/p_defines.h"
 #include "pipe/p_shader_tokens.h"
 #include "draw/draw_context.h"
-#include "tgsi/util/tgsi_dump.h"
+#include "tgsi/tgsi_dump.h"
 
 #include "st_context.h"
 #include "st_atom.h"
-- 
cgit v1.2.3


From 76164bf7a20ef6dabc3204a766f604becfba9997 Mon Sep 17 00:00:00 2001
From: José Fonseca <jrfonseca@tungstengraphics.com>
Date: Wed, 30 Jul 2008 00:44:56 +0900
Subject: tgsi: Insert newlines after the statements, instead of before.

Prevents shader dumps from getting concatenated with the next debug message.
---
 src/gallium/auxiliary/tgsi/tgsi_dump.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index d2e6375212f..290d78bb14b 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -332,7 +332,7 @@ void
 tgsi_dump_declaration(
    const struct tgsi_full_declaration *decl )
 {
-   TXT( "\nDCL " );
+   TXT( "DCL " );
 
    _dump_register(
       decl->Declaration.File,
@@ -354,6 +354,8 @@ tgsi_dump_declaration(
 
    TXT( ", " );
    ENM( decl->Declaration.Interpolate, interpolate_names );
+   
+   EOL();
 }
 
 static boolean
@@ -407,7 +409,6 @@ tgsi_dump_instruction(
    uint i;
    boolean first_reg = TRUE;
 
-   EOL();
    UID( instno );
    CHR( ':' );
    ENM( inst->Instruction.Opcode, opcode_names );
@@ -534,6 +535,8 @@ tgsi_dump_instruction(
       UID( inst->InstructionExtLabel.Label );
       break;
    }
+   
+   EOL();
 }
 
 static boolean
@@ -551,11 +554,11 @@ static boolean
 prolog(
    struct tgsi_iterate_context *ctx )
 {
-   EOL();
    ENM( ctx->processor.Processor, processor_type_names );
    UID( ctx->version.MajorVersion );
    CHR( '.' );
    UID( ctx->version.MinorVersion );
+   EOL();
    return TRUE;
 }
 
-- 
cgit v1.2.3


From 2783f3bfabd6e316f7e221e950499c3631c041ce Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Mon, 4 Aug 2008 12:18:09 +0200
Subject: tgsi: Put a newline after IMM.

---
 src/gallium/auxiliary/tgsi/tgsi_dump.c | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index 290d78bb14b..0eedb1c91e3 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -264,16 +264,6 @@ static const char *modulate_names[TGSI_MODULATE_COUNT] =
    "_D8"
 };
 
-static void
-_dump_register_prefix(
-   uint file,
-   uint first,
-   uint last )
-{
-   
-   
-}
-
 static void
 _dump_register(
    uint file,
@@ -354,7 +344,7 @@ tgsi_dump_declaration(
 
    TXT( ", " );
    ENM( decl->Declaration.Interpolate, interpolate_names );
-   
+
    EOL();
 }
 
@@ -373,7 +363,7 @@ tgsi_dump_immediate(
 {
    uint i;
 
-   TXT( "\nIMM " );
+   TXT( "IMM " );
    ENM( imm->Immediate.DataType, immediate_type_names );
 
    TXT( " { " );
@@ -390,6 +380,8 @@ tgsi_dump_immediate(
          TXT( ", " );
    }
    TXT( " }" );
+
+   EOL();
 }
 
 static boolean
@@ -535,7 +527,7 @@ tgsi_dump_instruction(
       UID( inst->InstructionExtLabel.Label );
       break;
    }
-   
+
    EOL();
 }
 
-- 
cgit v1.2.3


From 673489fa5cde4ce8d49918f20f007201a17bc45e Mon Sep 17 00:00:00 2001
From: José Fonseca <jrfonseca@tungstengraphics.com>
Date: Sat, 9 Aug 2008 11:52:27 +0100
Subject: tgsi: Dump shaders to a string too.

Again.
---
 src/gallium/auxiliary/tgsi/tgsi_dump.c | 166 ++++++++++++++++++++++++---------
 src/gallium/auxiliary/tgsi/tgsi_dump.h |   7 ++
 2 files changed, 127 insertions(+), 46 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index 0eedb1c91e3..29bb530b4dd 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -26,6 +26,7 @@
  **************************************************************************/
 
 #include "pipe/p_debug.h"
+#include "util/u_string.h"
 #include "tgsi_dump.h"
 #include "tgsi_iterate.h"
 
@@ -34,28 +35,31 @@ struct dump_ctx
    struct tgsi_iterate_context iter;
 
    uint instno;
+   
+   struct util_strbuf *sbuf;
 };
 
 static void
 dump_enum(
+   struct util_strbuf *sbuf,
    uint e,
    const char **enums,
    uint enum_count )
 {
    if (e >= enum_count)
-      debug_printf( "%u", e );
+      util_strbuf_printf( sbuf, "%u", e );
    else
-      debug_printf( "%s", enums[e] );
+      util_strbuf_printf( sbuf, "%s", enums[e] );
 }
 
-#define EOL()           debug_printf( "\n" )
-#define TXT(S)          debug_printf( "%s", S )
-#define CHR(C)          debug_printf( "%c", C )
-#define UIX(I)          debug_printf( "0x%x", I )
-#define UID(I)          debug_printf( "%u", I )
-#define SID(I)          debug_printf( "%d", I )
-#define FLT(F)          debug_printf( "%10.4f", F )
-#define ENM(E,ENUMS)    dump_enum( E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) )
+#define EOL()           util_strbuf_printf( sbuf, "\n" )
+#define TXT(S)          util_strbuf_printf( sbuf, "%s", S )
+#define CHR(C)          util_strbuf_printf( sbuf, "%c", C )
+#define UIX(I)          util_strbuf_printf( sbuf, "0x%x", I )
+#define UID(I)          util_strbuf_printf( sbuf, "%u", I )
+#define SID(I)          util_strbuf_printf( sbuf, "%d", I )
+#define FLT(F)          util_strbuf_printf( sbuf, "%10.4f", F )
+#define ENM(E,ENUMS)    dump_enum( sbuf, E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) )
 
 static const char *processor_type_names[] =
 {
@@ -266,6 +270,7 @@ static const char *modulate_names[TGSI_MODULATE_COUNT] =
 
 static void
 _dump_register(
+   struct util_strbuf *sbuf,
    uint file,
    int first,
    int last )
@@ -282,6 +287,7 @@ _dump_register(
 
 static void
 _dump_register_ind(
+   struct util_strbuf *sbuf,
    uint file,
    int index,
    uint ind_file,
@@ -303,6 +309,7 @@ _dump_register_ind(
 
 static void
 _dump_writemask(
+   struct util_strbuf *sbuf,
    uint writemask )
 {
    if (writemask != TGSI_WRITEMASK_XYZW) {
@@ -318,17 +325,23 @@ _dump_writemask(
    }
 }
 
-void
-tgsi_dump_declaration(
-   const struct tgsi_full_declaration *decl )
+static boolean
+iter_declaration(
+   struct tgsi_iterate_context *iter,
+   struct tgsi_full_declaration *decl )
 {
+   struct dump_ctx *ctx = (struct dump_ctx *) iter;
+   struct util_strbuf *sbuf = ctx->sbuf;
+
    TXT( "DCL " );
 
    _dump_register(
+      sbuf,
       decl->Declaration.File,
       decl->DeclarationRange.First,
       decl->DeclarationRange.Last );
    _dump_writemask(
+      sbuf,
       decl->Declaration.UsageMask );
 
    if (decl->Declaration.Semantic) {
@@ -346,21 +359,35 @@ tgsi_dump_declaration(
    ENM( decl->Declaration.Interpolate, interpolate_names );
 
    EOL();
-}
 
-static boolean
-iter_declaration(
-   struct tgsi_iterate_context *iter,
-   struct tgsi_full_declaration *decl )
-{
-   tgsi_dump_declaration( decl );
    return TRUE;
 }
 
 void
-tgsi_dump_immediate(
-   const struct tgsi_full_immediate *imm )
+tgsi_dump_declaration(
+   const struct tgsi_full_declaration *decl )
 {
+   static char str[1024];
+   struct util_strbuf sbuf;
+   struct dump_ctx ctx;
+
+   util_strbuf_init(&sbuf, str, sizeof(str));
+   
+   ctx.sbuf = &sbuf;
+
+   iter_declaration( &ctx.iter, (struct tgsi_full_declaration *)decl );
+   
+   debug_printf("%s", str);
+}
+
+static boolean
+iter_immediate(
+   struct tgsi_iterate_context *iter,
+   struct tgsi_full_immediate *imm )
+{
+   struct dump_ctx *ctx = (struct dump_ctx *) iter;
+   struct util_strbuf *sbuf = ctx->sbuf;
+
    uint i;
 
    TXT( "IMM " );
@@ -382,22 +409,36 @@ tgsi_dump_immediate(
    TXT( " }" );
 
    EOL();
-}
 
-static boolean
-iter_immediate(
-   struct tgsi_iterate_context *iter,
-   struct tgsi_full_immediate *imm )
-{
-   tgsi_dump_immediate( imm );
    return TRUE;
 }
 
 void
-tgsi_dump_instruction(
-   const struct tgsi_full_instruction *inst,
-   uint instno )
+tgsi_dump_immediate(
+   const struct tgsi_full_immediate *imm )
 {
+   static char str[1024];
+   struct util_strbuf sbuf;
+   struct dump_ctx ctx;
+
+   util_strbuf_init(&sbuf, str, sizeof(str));
+   
+   ctx.sbuf = &sbuf;
+
+   iter_immediate( &ctx.iter, (struct tgsi_full_immediate *)imm );
+   
+   debug_printf("%s", str);
+}
+
+static boolean
+iter_instruction(
+   struct tgsi_iterate_context *iter,
+   struct tgsi_full_instruction *inst )
+{
+   struct dump_ctx *ctx = (struct dump_ctx *) iter;
+   struct util_strbuf *sbuf = ctx->sbuf;
+   uint instno = ctx->instno++;
+   
    uint i;
    boolean first_reg = TRUE;
 
@@ -426,11 +467,12 @@ tgsi_dump_instruction(
       CHR( ' ' );
 
       _dump_register(
+         sbuf,
          dst->DstRegister.File,
          dst->DstRegister.Index,
          dst->DstRegister.Index );
       ENM( dst->DstRegisterExtModulate.Modulate, modulate_names );
-      _dump_writemask( dst->DstRegister.WriteMask );
+      _dump_writemask( sbuf, dst->DstRegister.WriteMask );
 
       first_reg = FALSE;
    }
@@ -457,6 +499,7 @@ tgsi_dump_instruction(
 
       if (src->SrcRegister.Indirect) {
          _dump_register_ind(
+            sbuf,
             src->SrcRegister.File,
             src->SrcRegister.Index,
             src->SrcRegisterInd.File,
@@ -464,6 +507,7 @@ tgsi_dump_instruction(
       }
       else {
          _dump_register(
+            sbuf,
             src->SrcRegister.File,
             src->SrcRegister.Index,
             src->SrcRegister.Index );
@@ -529,38 +573,55 @@ tgsi_dump_instruction(
    }
 
    EOL();
+
+   return TRUE;
 }
 
-static boolean
-iter_instruction(
-   struct tgsi_iterate_context *iter,
-   struct tgsi_full_instruction *inst )
+void
+tgsi_dump_instruction(
+   const struct tgsi_full_instruction *inst,
+   uint instno )
 {
-   struct dump_ctx *ctx = (struct dump_ctx *) iter;
+   static char str[1024];
+   struct util_strbuf sbuf;
+   struct dump_ctx ctx;
 
-   tgsi_dump_instruction( inst, ctx->instno++ );
-   return TRUE;
+   util_strbuf_init(&sbuf, str, sizeof(str));
+   
+   ctx.instno = instno;
+   ctx.sbuf = &sbuf;
+
+   iter_instruction( &ctx.iter, (struct tgsi_full_instruction *)inst );
+   
+   debug_printf("%s", str);
 }
 
 static boolean
 prolog(
-   struct tgsi_iterate_context *ctx )
+   struct tgsi_iterate_context *iter )
 {
-   ENM( ctx->processor.Processor, processor_type_names );
-   UID( ctx->version.MajorVersion );
+   struct dump_ctx *ctx = (struct dump_ctx *) iter;
+   struct util_strbuf *sbuf = ctx->sbuf;
+   ENM( iter->processor.Processor, processor_type_names );
+   UID( iter->version.MajorVersion );
    CHR( '.' );
-   UID( ctx->version.MinorVersion );
+   UID( iter->version.MinorVersion );
    EOL();
    return TRUE;
 }
 
 void
-tgsi_dump(
+tgsi_dump_str(
    const struct tgsi_token *tokens,
-   uint flags )
+   uint flags,
+   char *str,
+   size_t size)
 {
+   struct util_strbuf sbuf;
    struct dump_ctx ctx;
 
+   util_strbuf_init(&sbuf, str, size);
+   
    /* sanity checks */
    assert( strcmp( opcode_names[TGSI_OPCODE_CONT], "CONT" ) == 0 );
    assert( strcmp( opcode_names[TGSI_OPCODE_END], "END" ) == 0 );
@@ -572,6 +633,19 @@ tgsi_dump(
    ctx.iter.epilog = NULL;
 
    ctx.instno = 0;
+   ctx.sbuf = &sbuf;
 
    tgsi_iterate_shader( tokens, &ctx.iter );
 }
+
+void
+tgsi_dump(
+   const struct tgsi_token *tokens,
+   uint flags )
+{
+   static char str[4096];
+   
+   tgsi_dump_str(tokens, flags, str, sizeof(str));
+   
+   debug_printf("%s", str);
+}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.h b/src/gallium/auxiliary/tgsi/tgsi_dump.h
index 51c230b5db4..ad1e647ec90 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.h
@@ -34,6 +34,13 @@
 extern "C" {
 #endif
 
+void
+tgsi_dump_str(
+   const struct tgsi_token *tokens,
+   uint flags,
+   char *str,
+   size_t size);
+
 void
 tgsi_dump(
    const struct tgsi_token *tokens,
-- 
cgit v1.2.3


From 80d3a653f0172f01be694a29456c70f1f4da1812 Mon Sep 17 00:00:00 2001
From: José Fonseca <jrfonseca@tungstengraphics.com>
Date: Thu, 7 Aug 2008 09:13:11 +0100
Subject: tgsi: Prevent division by zero.

---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 8b430548bca..b93f3d5222a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -393,10 +393,18 @@ micro_div(
    const union tgsi_exec_channel *src0,
    const union tgsi_exec_channel *src1 )
 {
-   dst->f[0] = src0->f[0] / src1->f[0];
-   dst->f[1] = src0->f[1] / src1->f[1];
-   dst->f[2] = src0->f[2] / src1->f[2];
-   dst->f[3] = src0->f[3] / src1->f[3];
+   if (src1->f[0] != 0) {
+      dst->f[0] = src0->f[0] / src1->f[0];
+   }
+   if (src1->f[1] != 0) {
+      dst->f[1] = src0->f[1] / src1->f[1];
+   }
+   if (src1->f[2] != 0) {
+      dst->f[2] = src0->f[2] / src1->f[2];
+   }
+   if (src1->f[3] != 0) {
+      dst->f[3] = src0->f[3] / src1->f[3];
+   }
 }
 
 static void
-- 
cgit v1.2.3


From faad6655946968dd16ab30cc8d5fbd5a09321976 Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Tue, 12 Aug 2008 12:00:57 -0600
Subject: gallium: distinguish between KIL and KILP

Note: KIL (unconditional) not done yet.
---
 src/gallium/auxiliary/tgsi/tgsi_sse2.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index 0cb1f11ef20..f3a202ae89e 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -1002,7 +1002,7 @@ emit_store(
  */
 
 static void
-emit_kil(
+emit_kilp(
    struct x86_function *func,
    const struct tgsi_full_src_register *reg )
 {
@@ -1096,6 +1096,15 @@ emit_kil(
       x86_make_reg( file_REG32, reg_AX ) );
 }
 
+
+static void
+emit_kil(
+   struct x86_function *func )
+{
+   /* XXX todo / fix me */
+}
+
+
 static void
 emit_setcc(
    struct x86_function *func,
@@ -1609,8 +1618,15 @@ emit_instruction(
       return 0;
       break;
 
+   case TGSI_OPCODE_KILP:
+      /* predicated kill */
+      emit_kilp( func, &inst->FullSrcRegisters[0] );
+      break;
+
    case TGSI_OPCODE_KIL:
-      emit_kil( func, &inst->FullSrcRegisters[0] );
+      /* unconditional kill */
+      emit_kil( func );
+      return 0; /* XXX fix me */
       break;
 
    case TGSI_OPCODE_PK2H:
-- 
cgit v1.2.3


From f0874d1a6b832e1bb29256661cb8beab3ddeb528 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Wed, 13 Aug 2008 11:09:20 +0200
Subject: tgsi: Swap meanings of KIL and KILP opcodes.

---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 22 ++++++++++++++++++----
 src/gallium/auxiliary/tgsi/tgsi_sse2.c | 12 ++++++------
 2 files changed, 24 insertions(+), 10 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index b93f3d5222a..c4ba667d32a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -1189,8 +1189,8 @@ store_dest(
  * Kill fragment if any of the four values is less than zero.
  */
 static void
-exec_kilp(struct tgsi_exec_machine *mach,
-          const struct tgsi_full_instruction *inst)
+exec_kil(struct tgsi_exec_machine *mach,
+         const struct tgsi_full_instruction *inst)
 {
    uint uniquemask;
    uint chan_index;
@@ -1226,6 +1226,21 @@ exec_kilp(struct tgsi_exec_machine *mach,
    mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
 }
 
+/**
+ * Execute NVIDIA-style KIL which is predicated by a condition code.
+ * Kill fragment if the condition code is TRUE.
+ */
+static void
+exec_kilp(struct tgsi_exec_machine *mach,
+          const struct tgsi_full_instruction *inst)
+{
+   uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
+
+   /* TODO: build kilmask from CC mask */
+
+   mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
+}
+
 
 /*
  * Fetch a texel using STR texture coordinates.
@@ -1971,8 +1986,7 @@ exec_instruction(
       break;
 
    case TGSI_OPCODE_KIL:
-      /* for enabled ExecMask bits, set the killed bit */
-      mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= mach->ExecMask;
+      exec_kil (mach, inst);
       break;
 
    case TGSI_OPCODE_PK2H:
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index f3a202ae89e..47dc06faf6d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -1002,7 +1002,7 @@ emit_store(
  */
 
 static void
-emit_kilp(
+emit_kil(
    struct x86_function *func,
    const struct tgsi_full_src_register *reg )
 {
@@ -1098,7 +1098,7 @@ emit_kilp(
 
 
 static void
-emit_kil(
+emit_kilp(
    struct x86_function *func )
 {
    /* XXX todo / fix me */
@@ -1620,13 +1620,13 @@ emit_instruction(
 
    case TGSI_OPCODE_KILP:
       /* predicated kill */
-      emit_kilp( func, &inst->FullSrcRegisters[0] );
+      emit_kilp( func );
+      return 0; /* XXX fix me */
       break;
 
    case TGSI_OPCODE_KIL:
-      /* unconditional kill */
-      emit_kil( func );
-      return 0; /* XXX fix me */
+      /* conditional kill */
+      emit_kil( func, &inst->FullSrcRegisters[0] );
       break;
 
    case TGSI_OPCODE_PK2H:
-- 
cgit v1.2.3


From 94f9faab31f7fbf5f14d23d993f9830fa51ce076 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Wed, 13 Aug 2008 17:16:59 +0200
Subject: tgsi: Opcode information.

---
 src/gallium/auxiliary/tgsi/tgsi_info.c | 161 +++++++++++++++++++++++++++++++++
 src/gallium/auxiliary/tgsi/tgsi_info.h |  53 +++++++++++
 2 files changed, 214 insertions(+)
 create mode 100644 src/gallium/auxiliary/tgsi/tgsi_info.c
 create mode 100644 src/gallium/auxiliary/tgsi/tgsi_info.h

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
new file mode 100644
index 00000000000..a4899cd4c2d
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -0,0 +1,161 @@
+/**************************************************************************
+ * 
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "pipe/p_debug.h"
+#include "tgsi_info.h"
+
+static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
+{
+   { 1, 1, 0, 0, "ARL" },
+   { 1, 1, 0, 0, "MOV" },
+   { 1, 1, 0, 0, "LIT" },
+   { 1, 1, 0, 0, "RCP" },
+   { 1, 1, 0, 0, "RSQ" },
+   { 1, 1, 0, 0, "EXP" },
+   { 1, 1, 0, 0, "LOG" },
+   { 1, 2, 0, 0, "MUL" },
+   { 1, 2, 0, 0, "ADD" },
+   { 1, 2, 0, 0, "DP3" },
+   { 1, 2, 0, 0, "DP4" },
+   { 1, 2, 0, 0, "DST" },
+   { 1, 2, 0, 0, "MIN" },
+   { 1, 2, 0, 0, "MAX" },
+   { 1, 2, 0, 0, "SLT" },
+   { 1, 2, 0, 0, "SGE" },
+   { 1, 3, 0, 0, "MAD" },
+   { 1, 2, 0, 0, "SUB" },
+   { 1, 3, 0, 0, "LERP" },
+   { 1, 3, 0, 0, "CND" },
+   { 1, 3, 0, 0, "CND0" },
+   { 1, 3, 0, 0, "DOT2ADD" },
+   { 1, 2, 0, 0, "INDEX" },
+   { 1, 1, 0, 0, "NEGATE" },
+   { 1, 1, 0, 0, "FRAC" },
+   { 1, 3, 0, 0, "CLAMP" },
+   { 1, 1, 0, 0, "FLOOR" },
+   { 1, 1, 0, 0, "ROUND" },
+   { 1, 1, 0, 0, "EXPBASE2" },
+   { 1, 1, 0, 0, "LOGBASE2" },
+   { 1, 2, 0, 0, "POWER" },
+   { 1, 2, 0, 0, "CROSSPRODUCT" },
+   { 1, 2, 0, 0, "MULTIPLYMATRIX" },
+   { 1, 1, 0, 0, "ABS" },
+   { 1, 1, 0, 0, "RCC" },
+   { 1, 2, 0, 0, "DPH" },
+   { 1, 1, 0, 0, "COS" },
+   { 1, 1, 0, 0, "DDX" },
+   { 1, 1, 0, 0, "DDY" },
+   { 0, 1, 0, 0, "KILP" },
+   { 1, 1, 0, 0, "PK2H" },
+   { 1, 1, 0, 0, "PK2US" },
+   { 1, 1, 0, 0, "PK4B" },
+   { 1, 1, 0, 0, "PK4UB" },
+   { 1, 2, 0, 0, "RFL" },
+   { 1, 2, 0, 0, "SEQ" },
+   { 1, 2, 0, 0, "SFL" },
+   { 1, 2, 0, 0, "SGT" },
+   { 1, 1, 0, 0, "SIN" },
+   { 1, 2, 0, 0, "SLE" },
+   { 1, 2, 0, 0, "SNE" },
+   { 1, 2, 0, 0, "STR" },
+   { 1, 2, 1, 0, "TEX" },
+   { 1, 4, 1, 0, "TXD" },
+   { 1, 2, 1, 0, "TXP" },
+   { 1, 1, 0, 0, "UP2H" },
+   { 1, 1, 0, 0, "UP2US" },
+   { 1, 1, 0, 0, "UP4B" },
+   { 1, 1, 0, 0, "UP4UB" },
+   { 1, 3, 0, 0, "X2D" },
+   { 1, 1, 0, 0, "ARA" },
+   { 1, 1, 0, 0, "ARR" },
+   { 0, 1, 0, 0, "BRA" },
+   { 0, 0, 0, 1, "CAL" },
+   { 0, 0, 0, 0, "RET" },
+   { 1, 1, 0, 0, "SSG" },
+   { 1, 3, 0, 0, "CMP" },
+   { 1, 1, 0, 0, "SCS" },
+   { 1, 2, 1, 0, "TXB" },
+   { 1, 1, 0, 0, "NRM" },
+   { 1, 2, 0, 0, "DIV" },
+   { 1, 2, 0, 0, "DP2" },
+   { 1, 2, 1, 0, "TXL" },
+   { 0, 0, 0, 0, "BRK" },
+   { 0, 1, 0, 1, "IF" },
+   { 0, 0, 0, 0, "LOOP" },
+   { 0, 1, 0, 0, "REP" },
+   { 0, 0, 0, 1, "ELSE" },
+   { 0, 0, 0, 0, "ENDIF" },
+   { 0, 0, 0, 0, "ENDLOOP" },
+   { 0, 0, 0, 0, "ENDREP" },
+   { 0, 1, 0, 0, "PUSHA" },
+   { 1, 0, 0, 0, "POPA" },
+   { 1, 1, 0, 0, "CEIL" },
+   { 1, 1, 0, 0, "I2F" },
+   { 1, 1, 0, 0, "NOT" },
+   { 1, 1, 0, 0, "TRUNC" },
+   { 1, 2, 0, 0, "SHL" },
+   { 1, 2, 0, 0, "SHR" },
+   { 1, 2, 0, 0, "AND" },
+   { 1, 2, 0, 0, "OR" },
+   { 1, 2, 0, 0, "MOD" },
+   { 1, 2, 0, 0, "XOR" },
+   { 1, 3, 0, 0, "SAD" },
+   { 1, 2, 1, 0, "TXF" },
+   { 1, 2, 1, 0, "TXQ" },
+   { 0, 0, 0, 0, "CONT" },
+   { 0, 0, 0, 0, "EMIT" },
+   { 0, 0, 0, 0, "ENDPRIM" },
+   { 0, 0, 0, 1, "BGNLOOP2" },
+   { 0, 0, 0, 0, "BGNSUB" },
+   { 0, 0, 0, 1, "ENDLOOP2" },
+   { 0, 0, 0, 0, "ENDSUB" },
+   { 1, 1, 0, 0, "NOISE1" },
+   { 1, 1, 0, 0, "NOISE2" },
+   { 1, 1, 0, 0, "NOISE3" },
+   { 1, 1, 0, 0, "NOISE4" },
+   { 0, 0, 0, 0, "NOP" },
+   { 1, 2, 0, 0, "M4X3" },
+   { 1, 2, 0, 0, "M3X4" },
+   { 1, 2, 0, 0, "M3X3" },
+   { 1, 2, 0, 0, "M3X2" },
+   { 1, 1, 0, 0, "NRM4" },
+   { 0, 1, 0, 0, "CALLNZ" },
+   { 0, 1, 0, 0, "IFC" },
+   { 0, 1, 0, 0, "BREAKC" },
+   { 0, 0, 0, 0, "KIL" },
+   { 0, 0, 0, 0, "END" },
+   { 1, 1, 0, 0, "SWZ" }
+};
+
+const struct tgsi_opcode_info *
+tgsi_get_opcode_info( uint opcode )
+{
+   if (opcode < TGSI_OPCODE_LAST)
+      return &opcode_info[opcode];
+   assert( 0 );
+   return NULL;
+}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.h b/src/gallium/auxiliary/tgsi/tgsi_info.h
new file mode 100644
index 00000000000..7230bdaae33
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.h
@@ -0,0 +1,53 @@
+/**************************************************************************
+ * 
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef TGSI_INFO_H
+#define TGSI_INFO_H
+
+#include "pipe/p_shader_tokens.h"
+
+#if defined __cplusplus
+extern "C" {
+#endif
+
+struct tgsi_opcode_info
+{
+   uint num_dst;
+   uint num_src;
+   boolean is_tex;
+   boolean is_branch;
+   const char *mnemonic;
+};
+
+const struct tgsi_opcode_info *
+tgsi_get_opcode_info( uint opcode );
+
+#if defined __cplusplus
+}
+#endif
+
+#endif /* TGSI_INFO_H */
-- 
cgit v1.2.3


From 668ac2572548b7818b89a424f44e9c9c59786df0 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Wed, 13 Aug 2008 17:19:20 +0200
Subject: scons: List tgsi_info.c.

---
 src/gallium/auxiliary/tgsi/SConscript | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/SConscript b/src/gallium/auxiliary/tgsi/SConscript
index 03982e21943..45bf3f6d577 100644
--- a/src/gallium/auxiliary/tgsi/SConscript
+++ b/src/gallium/auxiliary/tgsi/SConscript
@@ -7,6 +7,7 @@ tgsi = env.ConvenienceLibrary(
 		'tgsi_dump.c',
 		'tgsi_dump_c.c',
 		'tgsi_exec.c',
+		'tgsi_info.c',
 		'tgsi_iterate.c',
 		'tgsi_parse.c',
 		'tgsi_sanity.c',
-- 
cgit v1.2.3


From 6d83a0cc6846adb546794c0483694fdb1d1b4664 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Wed, 13 Aug 2008 17:20:14 +0200
Subject: make: List tgsi_info.c.

---
 src/gallium/auxiliary/tgsi/Makefile | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile
index bbeff1304d1..806a2bd4c52 100644
--- a/src/gallium/auxiliary/tgsi/Makefile
+++ b/src/gallium/auxiliary/tgsi/Makefile
@@ -7,6 +7,7 @@ C_SOURCES = \
 	tgsi_build.c \
 	tgsi_dump.c \
 	tgsi_exec.c \
+	tgsi_info.c \
 	tgsi_iterate.c \
 	tgsi_parse.c \
 	tgsi_scan.c \
-- 
cgit v1.2.3


From 79e52779bdcd1fa0ff8e1cbdc7555746205ee519 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Wed, 13 Aug 2008 17:22:34 +0200
Subject: tgsi: Use tgsi_opcode_info.

---
 src/gallium/auxiliary/tgsi/tgsi_text.c | 137 +--------------------------------
 1 file changed, 3 insertions(+), 134 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c
index 35cb3055bb2..7cdf1b67c52 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
@@ -28,6 +28,7 @@
 #include "pipe/p_debug.h"
 #include "tgsi_text.h"
 #include "tgsi_build.h"
+#include "tgsi_info.h"
 #include "tgsi_parse.h"
 #include "tgsi_sanity.h"
 #include "tgsi_util.h"
@@ -719,138 +720,6 @@ parse_src_operand(
    return TRUE;
 }
 
-struct opcode_info
-{
-   uint num_dst;
-   uint num_src;
-   uint is_tex;
-   uint is_branch;
-   const char *mnemonic;
-};
-
-static const struct opcode_info opcode_info[TGSI_OPCODE_LAST] =
-{
-   { 1, 1, 0, 0, "ARL" },
-   { 1, 1, 0, 0, "MOV" },
-   { 1, 1, 0, 0, "LIT" },
-   { 1, 1, 0, 0, "RCP" },
-   { 1, 1, 0, 0, "RSQ" },
-   { 1, 1, 0, 0, "EXP" },
-   { 1, 1, 0, 0, "LOG" },
-   { 1, 2, 0, 0, "MUL" },
-   { 1, 2, 0, 0, "ADD" },
-   { 1, 2, 0, 0, "DP3" },
-   { 1, 2, 0, 0, "DP4" },
-   { 1, 2, 0, 0, "DST" },
-   { 1, 2, 0, 0, "MIN" },
-   { 1, 2, 0, 0, "MAX" },
-   { 1, 2, 0, 0, "SLT" },
-   { 1, 2, 0, 0, "SGE" },
-   { 1, 3, 0, 0, "MAD" },
-   { 1, 2, 0, 0, "SUB" },
-   { 1, 3, 0, 0, "LERP" },
-   { 1, 3, 0, 0, "CND" },
-   { 1, 3, 0, 0, "CND0" },
-   { 1, 3, 0, 0, "DOT2ADD" },
-   { 1, 2, 0, 0, "INDEX" },
-   { 1, 1, 0, 0, "NEGATE" },
-   { 1, 1, 0, 0, "FRAC" },
-   { 1, 3, 0, 0, "CLAMP" },
-   { 1, 1, 0, 0, "FLOOR" },
-   { 1, 1, 0, 0, "ROUND" },
-   { 1, 1, 0, 0, "EXPBASE2" },
-   { 1, 1, 0, 0, "LOGBASE2" },
-   { 1, 2, 0, 0, "POWER" },
-   { 1, 2, 0, 0, "CROSSPRODUCT" },
-   { 1, 2, 0, 0, "MULTIPLYMATRIX" },
-   { 1, 1, 0, 0, "ABS" },
-   { 1, 1, 0, 0, "RCC" },
-   { 1, 2, 0, 0, "DPH" },
-   { 1, 1, 0, 0, "COS" },
-   { 1, 1, 0, 0, "DDX" },
-   { 1, 1, 0, 0, "DDY" },
-   { 0, 1, 0, 0, "KILP" },
-   { 1, 1, 0, 0, "PK2H" },
-   { 1, 1, 0, 0, "PK2US" },
-   { 1, 1, 0, 0, "PK4B" },
-   { 1, 1, 0, 0, "PK4UB" },
-   { 1, 2, 0, 0, "RFL" },
-   { 1, 2, 0, 0, "SEQ" },
-   { 1, 2, 0, 0, "SFL" },
-   { 1, 2, 0, 0, "SGT" },
-   { 1, 1, 0, 0, "SIN" },
-   { 1, 2, 0, 0, "SLE" },
-   { 1, 2, 0, 0, "SNE" },
-   { 1, 2, 0, 0, "STR" },
-   { 1, 2, 1, 0, "TEX" },
-   { 1, 4, 1, 0, "TXD" },
-   { 1, 2, 1, 0, "TXP" },
-   { 1, 1, 0, 0, "UP2H" },
-   { 1, 1, 0, 0, "UP2US" },
-   { 1, 1, 0, 0, "UP4B" },
-   { 1, 1, 0, 0, "UP4UB" },
-   { 1, 3, 0, 0, "X2D" },
-   { 1, 1, 0, 0, "ARA" },
-   { 1, 1, 0, 0, "ARR" },
-   { 0, 1, 0, 0, "BRA" },
-   { 0, 0, 0, 1, "CAL" },
-   { 0, 0, 0, 0, "RET" },
-   { 1, 1, 0, 0, "SSG" },
-   { 1, 3, 0, 0, "CMP" },
-   { 1, 1, 0, 0, "SCS" },
-   { 1, 2, 1, 0, "TXB" },
-   { 1, 1, 0, 0, "NRM" },
-   { 1, 2, 0, 0, "DIV" },
-   { 1, 2, 0, 0, "DP2" },
-   { 1, 2, 1, 0, "TXL" },
-   { 0, 0, 0, 0, "BRK" },
-   { 0, 1, 0, 1, "IF" },
-   { 0, 0, 0, 0, "LOOP" },
-   { 0, 1, 0, 0, "REP" },
-   { 0, 0, 0, 1, "ELSE" },
-   { 0, 0, 0, 0, "ENDIF" },
-   { 0, 0, 0, 0, "ENDLOOP" },
-   { 0, 0, 0, 0, "ENDREP" },
-   { 0, 1, 0, 0, "PUSHA" },
-   { 1, 0, 0, 0, "POPA" },
-   { 1, 1, 0, 0, "CEIL" },
-   { 1, 1, 0, 0, "I2F" },
-   { 1, 1, 0, 0, "NOT" },
-   { 1, 1, 0, 0, "TRUNC" },
-   { 1, 2, 0, 0, "SHL" },
-   { 1, 2, 0, 0, "SHR" },
-   { 1, 2, 0, 0, "AND" },
-   { 1, 2, 0, 0, "OR" },
-   { 1, 2, 0, 0, "MOD" },
-   { 1, 2, 0, 0, "XOR" },
-   { 1, 3, 0, 0, "SAD" },
-   { 1, 2, 1, 0, "TXF" },
-   { 1, 2, 1, 0, "TXQ" },
-   { 0, 0, 0, 0, "CONT" },
-   { 0, 0, 0, 0, "EMIT" },
-   { 0, 0, 0, 0, "ENDPRIM" },
-   { 0, 0, 0, 1, "BGNLOOP2" },
-   { 0, 0, 0, 0, "BGNSUB" },
-   { 0, 0, 0, 1, "ENDLOOP2" },
-   { 0, 0, 0, 0, "ENDSUB" },
-   { 1, 1, 0, 0, "NOISE1" },
-   { 1, 1, 0, 0, "NOISE2" },
-   { 1, 1, 0, 0, "NOISE3" },
-   { 1, 1, 0, 0, "NOISE4" },
-   { 0, 0, 0, 0, "NOP" },
-   { 1, 2, 0, 0, "M4X3" },
-   { 1, 2, 0, 0, "M3X4" },
-   { 1, 2, 0, 0, "M3X3" },
-   { 1, 2, 0, 0, "M3X2" },
-   { 1, 1, 0, 0, "NRM4" },
-   { 0, 1, 0, 0, "CALLNZ" },
-   { 0, 1, 0, 0, "IFC" },
-   { 0, 1, 0, 0, "BREAKC" },
-   { 0, 0, 0, 0, "KIL" },
-   { 0, 0, 0, 0, "END" },
-   { 1, 1, 0, 0, "SWZ" }
-};
-
 static const char *texture_names[TGSI_TEXTURE_COUNT] =
 {
    "UNKNOWN",
@@ -871,7 +740,7 @@ parse_instruction(
 {
    uint i;
    uint saturate = TGSI_SAT_NONE;
-   const struct opcode_info *info;
+   const struct tgsi_opcode_info *info;
    struct tgsi_full_instruction inst;
    uint advance;
 
@@ -881,7 +750,7 @@ parse_instruction(
    for (i = 0; i < TGSI_OPCODE_LAST; i++) {
       const char *cur = ctx->cur;
 
-      info = &opcode_info[i];
+      info = tgsi_get_opcode_info( i );
       if (str_match_no_case( &cur, info->mnemonic )) {
          if (str_match_no_case( &cur, "_SATNV" ))
             saturate = TGSI_SAT_MINUS_PLUS_ONE;
-- 
cgit v1.2.3


From 2caaba8195d1019702246bd7f0c02aa95364a8bd Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Wed, 13 Aug 2008 17:27:15 +0200
Subject: tgsi: Use tgsi_info to dump opcode names.

---
 src/gallium/auxiliary/tgsi/tgsi_dump.c   | 130 +-----------------------------
 src/gallium/auxiliary/tgsi/tgsi_dump_c.c | 133 +------------------------------
 2 files changed, 6 insertions(+), 257 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index 29bb530b4dd..4309d1bc76f 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -28,6 +28,7 @@
 #include "pipe/p_debug.h"
 #include "util/u_string.h"
 #include "tgsi_dump.h"
+#include "tgsi_info.h"
 #include "tgsi_iterate.h"
 
 struct dump_ctx
@@ -103,129 +104,6 @@ static const char *immediate_type_names[] =
    "FLT32"
 };
 
-static const char *opcode_names[TGSI_OPCODE_LAST] =
-{
-   "ARL",
-   "MOV",
-   "LIT",
-   "RCP",
-   "RSQ",
-   "EXP",
-   "LOG",
-   "MUL",
-   "ADD",
-   "DP3",
-   "DP4",
-   "DST",
-   "MIN",
-   "MAX",
-   "SLT",
-   "SGE",
-   "MAD",
-   "SUB",
-   "LERP",
-   "CND",
-   "CND0",
-   "DOT2ADD",
-   "INDEX",
-   "NEGATE",
-   "FRAC",
-   "CLAMP",
-   "FLOOR",
-   "ROUND",
-   "EXPBASE2",
-   "LOGBASE2",
-   "POWER",
-   "CROSSPRODUCT",
-   "MULTIPLYMATRIX",
-   "ABS",
-   "RCC",
-   "DPH",
-   "COS",
-   "DDX",
-   "DDY",
-   "KILP",
-   "PK2H",
-   "PK2US",
-   "PK4B",
-   "PK4UB",
-   "RFL",
-   "SEQ",
-   "SFL",
-   "SGT",
-   "SIN",
-   "SLE",
-   "SNE",
-   "STR",
-   "TEX",
-   "TXD",
-   "TXP",
-   "UP2H",
-   "UP2US",
-   "UP4B",
-   "UP4UB",
-   "X2D",
-   "ARA",
-   "ARR",
-   "BRA",
-   "CAL",
-   "RET",
-   "SSG",
-   "CMP",
-   "SCS",
-   "TXB",
-   "NRM",
-   "DIV",
-   "DP2",
-   "TXL",
-   "BRK",
-   "IF",
-   "LOOP",
-   "REP",
-   "ELSE",
-   "ENDIF",
-   "ENDLOOP",
-   "ENDREP",
-   "PUSHA",
-   "POPA",
-   "CEIL",
-   "I2F",
-   "NOT",
-   "TRUNC",
-   "SHL",
-   "SHR",
-   "AND",
-   "OR",
-   "MOD",
-   "XOR",
-   "SAD",
-   "TXF",
-   "TXQ",
-   "CONT",
-   "EMIT",
-   "ENDPRIM",
-   "BGNLOOP2",
-   "BGNSUB",
-   "ENDLOOP2",
-   "ENDSUB",
-   "NOISE1",
-   "NOISE2",
-   "NOISE3",
-   "NOISE4",
-   "NOP",
-   "M4X3",
-   "M3X4",
-   "M3X3",
-   "M3X2",
-   "NRM4",
-   "CALLNZ",
-   "IFC",
-   "BREAKC",
-   "KIL",
-   "END",
-   "SWZ"
-};
-
 static const char *swizzle_names[] =
 {
    "x",
@@ -444,7 +322,7 @@ iter_instruction(
 
    UID( instno );
    CHR( ':' );
-   ENM( inst->Instruction.Opcode, opcode_names );
+   TXT( tgsi_get_opcode_info( inst->Instruction.Opcode )->mnemonic );
 
    switch (inst->Instruction.Saturate) {
    case TGSI_SAT_NONE:
@@ -622,10 +500,6 @@ tgsi_dump_str(
 
    util_strbuf_init(&sbuf, str, size);
    
-   /* sanity checks */
-   assert( strcmp( opcode_names[TGSI_OPCODE_CONT], "CONT" ) == 0 );
-   assert( strcmp( opcode_names[TGSI_OPCODE_END], "END" ) == 0 );
-
    ctx.iter.prolog = prolog;
    ctx.iter.iterate_instruction = iter_instruction;
    ctx.iter.iterate_declaration = iter_declaration;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
index eabd74bd6d9..1025866a252 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
@@ -29,8 +29,9 @@
 #include "pipe/p_util.h"
 #include "util/u_string.h"
 #include "tgsi_dump_c.h"
-#include "tgsi_parse.h"
 #include "tgsi_build.h"
+#include "tgsi_info.h"
+#include "tgsi_parse.h"
 
 static void
 dump_enum(
@@ -104,128 +105,6 @@ static const char *TGSI_IMMS[] =
    "IMM_FLOAT32"
 };
 
-static const char *TGSI_OPCODES[TGSI_OPCODE_LAST] =
-{
-   "OPCODE_ARL",
-   "OPCODE_MOV",
-   "OPCODE_LIT",
-   "OPCODE_RCP",
-   "OPCODE_RSQ",
-   "OPCODE_EXP",
-   "OPCODE_LOG",
-   "OPCODE_MUL",
-   "OPCODE_ADD",
-   "OPCODE_DP3",
-   "OPCODE_DP4",
-   "OPCODE_DST",
-   "OPCODE_MIN",
-   "OPCODE_MAX",
-   "OPCODE_SLT",
-   "OPCODE_SGE",
-   "OPCODE_MAD",
-   "OPCODE_SUB",
-   "OPCODE_LERP",
-   "OPCODE_CND",
-   "OPCODE_CND0",
-   "OPCODE_DOT2ADD",
-   "OPCODE_INDEX",
-   "OPCODE_NEGATE",
-   "OPCODE_FRAC",
-   "OPCODE_CLAMP",
-   "OPCODE_FLOOR",
-   "OPCODE_ROUND",
-   "OPCODE_EXPBASE2",
-   "OPCODE_LOGBASE2",
-   "OPCODE_POWER",
-   "OPCODE_CROSSPRODUCT",
-   "OPCODE_MULTIPLYMATRIX",
-   "OPCODE_ABS",
-   "OPCODE_RCC",
-   "OPCODE_DPH",
-   "OPCODE_COS",
-   "OPCODE_DDX",
-   "OPCODE_DDY",
-   "OPCODE_KILP",
-   "OPCODE_PK2H",
-   "OPCODE_PK2US",
-   "OPCODE_PK4B",
-   "OPCODE_PK4UB",
-   "OPCODE_RFL",
-   "OPCODE_SEQ",
-   "OPCODE_SFL",
-   "OPCODE_SGT",
-   "OPCODE_SIN",
-   "OPCODE_SLE",
-   "OPCODE_SNE",
-   "OPCODE_STR",
-   "OPCODE_TEX",
-   "OPCODE_TXD",
-   "OPCODE_TXP",
-   "OPCODE_UP2H",
-   "OPCODE_UP2US",
-   "OPCODE_UP4B",
-   "OPCODE_UP4UB",
-   "OPCODE_X2D",
-   "OPCODE_ARA",
-   "OPCODE_ARR",
-   "OPCODE_BRA",
-   "OPCODE_CAL",
-   "OPCODE_RET",
-   "OPCODE_SSG",
-   "OPCODE_CMP",
-   "OPCODE_SCS",
-   "OPCODE_TXB",
-   "OPCODE_NRM",
-   "OPCODE_DIV",
-   "OPCODE_DP2",
-   "OPCODE_TXL",
-   "OPCODE_BRK",
-   "OPCODE_IF",
-   "OPCODE_LOOP",
-   "OPCODE_REP",
-   "OPCODE_ELSE",
-   "OPCODE_ENDIF",
-   "OPCODE_ENDLOOP",
-   "OPCODE_ENDREP",
-   "OPCODE_PUSHA",
-   "OPCODE_POPA",
-   "OPCODE_CEIL",
-   "OPCODE_I2F",
-   "OPCODE_NOT",
-   "OPCODE_TRUNC",
-   "OPCODE_SHL",
-   "OPCODE_SHR",
-   "OPCODE_AND",
-   "OPCODE_OR",
-   "OPCODE_MOD",
-   "OPCODE_XOR",
-   "OPCODE_SAD",
-   "OPCODE_TXF",
-   "OPCODE_TXQ",
-   "OPCODE_CONT",
-   "OPCODE_EMIT",
-   "OPCODE_ENDPRIM",
-   "OPCODE_BGNLOOP2",
-   "OPCODE_BGNSUB",
-   "OPCODE_ENDLOOP2",
-   "OPCODE_ENDSUB",
-   "OPCODE_NOISE1",
-   "OPCODE_NOISE2",
-   "OPCODE_NOISE3",
-   "OPCODE_NOISE4",
-   "OPCODE_NOP",
-   "OPCODE_M4X3",
-   "OPCODE_M3X4",
-   "OPCODE_M3X3",
-   "OPCODE_M3X2",
-   "OPCODE_NRM4",
-   "OPCODE_CALLNZ",
-   "OPCODE_IFC",
-   "OPCODE_BREAKC",
-   "OPCODE_KIL",
-   "OPCODE_END"
-};
-
 static const char *TGSI_SATS[] =
 {
    "SAT_NONE",
@@ -428,8 +307,8 @@ dump_instruction_verbose(
 {
    unsigned i;
 
-   TXT( "\nOpcode     : " );
-   ENM( inst->Instruction.Opcode, TGSI_OPCODES );
+   TXT( "\nOpcode     : OPCODE_" );
+   TXT( tgsi_get_opcode_info( inst->Instruction.Opcode )->mnemonic );
    if( deflt || fi->Instruction.Saturate != inst->Instruction.Saturate ) {
       TXT( "\nSaturate   : " );
       ENM( inst->Instruction.Saturate, TGSI_SATS );
@@ -770,10 +649,6 @@ tgsi_dump_c(
    uint deflt = flags & TGSI_DUMP_C_DEFAULT;
    uint instno = 0;
 
-   /* sanity checks */
-   assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_CONT], "OPCODE_CONT") == 0);
-   assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_END], "OPCODE_END") == 0);
-
    tgsi_parse_init( &parse, tokens );
 
    TXT( "tgsi-dump begin -----------------" );
-- 
cgit v1.2.3


From 4b929b32d03a58d80cacbd63c172dbd7221c8a8f Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Wed, 13 Aug 2008 17:31:26 +0200
Subject: tgsi: Validate instruction opcode and operand counts.

---
 src/gallium/auxiliary/tgsi/tgsi_sanity.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
index 2e3ec96b5b5..0b5bdd6ba1c 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
@@ -27,6 +27,7 @@
 
 #include "pipe/p_debug.h"
 #include "tgsi_sanity.h"
+#include "tgsi_info.h"
 #include "tgsi_iterate.h"
 
 #define MAX_REGISTERS 256
@@ -170,6 +171,7 @@ iter_instruction(
    struct tgsi_full_instruction *inst )
 {
    struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter;
+   const struct tgsi_opcode_info *info;
    uint i;
 
    /* There must be no other instructions after END.
@@ -181,6 +183,19 @@ iter_instruction(
       ctx->index_of_END = ctx->num_instructions;
    }
 
+   info = tgsi_get_opcode_info( inst->Instruction.Opcode );
+   if (info == NULL) {
+      report_error( ctx, "Invalid instruction opcode" );
+      return TRUE;
+   }
+
+   if (info->num_dst != inst->Instruction.NumDstRegs) {
+      report_error( ctx, "Invalid number of destination operands" );
+   }
+   if (info->num_src != inst->Instruction.NumSrcRegs) {
+      report_error( ctx, "Invalid number of source operands" );
+   }
+
    /* Check destination and source registers' validity.
     * Mark the registers as used.
     */
-- 
cgit v1.2.3


From bfdb1d55d58d70044af9fcd6f8465179145581dd Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Wed, 13 Aug 2008 19:28:10 +0200
Subject: tgsi: Fix typo.

---
 src/gallium/auxiliary/tgsi/tgsi_build.c | 4 ++--
 src/gallium/auxiliary/tgsi/tgsi_build.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c
index 742ef14c352..050b448fe7c 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.c
@@ -820,7 +820,7 @@ tgsi_build_instruction_ext_nv(
    unsigned cond_swizzle_z,
    unsigned cond_swizzle_w,
    unsigned cond_dst_update,
-   unsigned cond_flow_update,
+   unsigned cond_flow_enable,
    struct tgsi_token *prev_token,
    struct tgsi_instruction *instruction,
    struct tgsi_header *header )
@@ -837,7 +837,7 @@ tgsi_build_instruction_ext_nv(
    instruction_ext_nv.CondSwizzleZ = cond_swizzle_z;
    instruction_ext_nv.CondSwizzleW = cond_swizzle_w;
    instruction_ext_nv.CondDstUpdate = cond_dst_update;
-   instruction_ext_nv.CondFlowEnable = cond_flow_update;
+   instruction_ext_nv.CondFlowEnable = cond_flow_enable;
 
    prev_token->Extended = 1;
    instruction_grow( instruction, header );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h b/src/gallium/auxiliary/tgsi/tgsi_build.h
index ed258302487..6ae7f324f85 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.h
@@ -170,7 +170,7 @@ tgsi_build_instruction_ext_nv(
    unsigned cond_swizzle_z,
    unsigned cond_swizzle_w,
    unsigned cond_dst_update,
-   unsigned cond_flow_update,
+   unsigned cond_flow_enable,
    struct tgsi_token *prev_token,
    struct tgsi_instruction *instruction,
    struct tgsi_header *header );
-- 
cgit v1.2.3


From 83a5a225d773c119857f58672fd51c1d425f21d3 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Wed, 13 Aug 2008 19:31:13 +0200
Subject: tgsi: Use a homebrew version of toupper().

---
 src/gallium/auxiliary/tgsi/tgsi_text.c | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c
index 7cdf1b67c52..9454563361e 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
@@ -51,11 +51,18 @@ static boolean is_digit_alpha_underscore( const char *cur )
    return is_digit( cur ) || is_alpha_underscore( cur );
 }
 
+static boolean uprcase( char c )
+{
+   if (c >= 'a' && c <= 'z')
+      return c += 'A' - 'a';
+   return c;
+}
+
 static boolean str_match_no_case( const char **pcur, const char *str )
 {
    const char *cur = *pcur;
 
-   while (*str != '\0' && *str == toupper( *cur )) {
+   while (*str != '\0' && *str == uprcase( *cur )) {
       str++;
       cur++;
    }
@@ -131,7 +138,7 @@ static boolean parse_float( const char **pcur, float *val )
    }
    if (!integral_part && !fractional_part)
       return FALSE;
-   if (toupper( *cur ) == 'E') {
+   if (uprcase( *cur ) == 'E') {
       cur++;
       if (*cur == '-' || *cur == '+')
          cur++;
@@ -258,19 +265,19 @@ parse_opt_writemask(
       cur++;
       *writemask = TGSI_WRITEMASK_NONE;
       eat_opt_white( &cur );
-      if (toupper( *cur ) == 'X') {
+      if (uprcase( *cur ) == 'X') {
          cur++;
          *writemask |= TGSI_WRITEMASK_X;
       }
-      if (toupper( *cur ) == 'Y') {
+      if (uprcase( *cur ) == 'Y') {
          cur++;
          *writemask |= TGSI_WRITEMASK_Y;
       }
-      if (toupper( *cur ) == 'Z') {
+      if (uprcase( *cur ) == 'Z') {
          cur++;
          *writemask |= TGSI_WRITEMASK_Z;
       }
-      if (toupper( *cur ) == 'W') {
+      if (uprcase( *cur ) == 'W') {
          cur++;
          *writemask |= TGSI_WRITEMASK_W;
       }
@@ -516,13 +523,13 @@ parse_optional_swizzle(
       cur++;
       eat_opt_white( &cur );
       for (i = 0; i < 4; i++) {
-         if (toupper( *cur ) == 'X')
+         if (uprcase( *cur ) == 'X')
             swizzle[i] = TGSI_SWIZZLE_X;
-         else if (toupper( *cur ) == 'Y')
+         else if (uprcase( *cur ) == 'Y')
             swizzle[i] = TGSI_SWIZZLE_Y;
-         else if (toupper( *cur ) == 'Z')
+         else if (uprcase( *cur ) == 'Z')
             swizzle[i] = TGSI_SWIZZLE_Z;
-         else if (toupper( *cur ) == 'W')
+         else if (uprcase( *cur ) == 'W')
             swizzle[i] = TGSI_SWIZZLE_W;
          else {
             if (*cur == '0')
-- 
cgit v1.2.3


From 3a1af846fe13c2a9b8c24eb7e37e11a9b42668d5 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Wed, 13 Aug 2008 20:09:56 +0200
Subject: tgsi: Initial code for KILP, needs CCs working to be complete.

---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index c4ba667d32a..df2a7cc4b3e 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -1234,11 +1234,29 @@ static void
 exec_kilp(struct tgsi_exec_machine *mach,
           const struct tgsi_full_instruction *inst)
 {
-   uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
+   if (inst->InstructionExtNv.CondFlowEnable) {
+      uint swizzle[4];
+      uint chan_index;
+      uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
 
-   /* TODO: build kilmask from CC mask */
+      swizzle[0] = inst->InstructionExtNv.CondSwizzleX;
+      swizzle[1] = inst->InstructionExtNv.CondSwizzleY;
+      swizzle[2] = inst->InstructionExtNv.CondSwizzleZ;
+      swizzle[3] = inst->InstructionExtNv.CondSwizzleW;
 
-   mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
+      for (chan_index = 0; chan_index < 4; chan_index++)
+      {
+         uint i;
+
+         for (i = 0; i < 4; i++) {
+            /* TODO: evaluate the condition code */
+            if (0)
+               kilmask |= 1 << i;
+         }
+      }
+
+      mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
+   }
 }
 
 
-- 
cgit v1.2.3


From 19ff2326e94a7a773bcb1dd9bfd22197b999daf9 Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Wed, 13 Aug 2008 17:20:40 -0600
Subject: gallium: fix exec_kilp(), fix Exec/FuncMask test for TGSI_OPCODE_RET

Fixes a few glean glsl regressions.
---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index df2a7cc4b3e..a96209db1af 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -1234,10 +1234,13 @@ static void
 exec_kilp(struct tgsi_exec_machine *mach,
           const struct tgsi_full_instruction *inst)
 {
+   uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
+
    if (inst->InstructionExtNv.CondFlowEnable) {
       uint swizzle[4];
       uint chan_index;
-      uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
+
+      kilmask = 0x0;
 
       swizzle[0] = inst->InstructionExtNv.CondSwizzleX;
       swizzle[1] = inst->InstructionExtNv.CondSwizzleY;
@@ -1254,9 +1257,12 @@ exec_kilp(struct tgsi_exec_machine *mach,
                kilmask |= 1 << i;
          }
       }
-
-      mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
    }
+   else {
+      /* "unconditional" kil */
+      kilmask = mach->ExecMask;
+   }
+   mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
 }
 
 
@@ -2176,7 +2182,7 @@ exec_instruction(
       mach->FuncMask &= ~mach->ExecMask;
       UPDATE_EXEC_MASK(mach);
 
-      if (mach->ExecMask == 0x0) {
+      if (mach->FuncMask == 0x0) {
          /* really return now (otherwise, keep executing */
 
          if (mach->CallStackTop == 0) {
-- 
cgit v1.2.3


From e2da7edd642198d7c515dbc0b9ba77d4286c3262 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Mon, 18 Aug 2008 15:42:26 +0200
Subject: tgsi: Add condition code (CC) register.

---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 13 +++++++++++--
 src/gallium/auxiliary/tgsi/tgsi_exec.h | 29 ++++++++++++++++++++++++-----
 2 files changed, 35 insertions(+), 7 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index a96209db1af..7974915211a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -1,6 +1,6 @@
 /**************************************************************************
  * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
  * All Rights Reserved.
  * 
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -26,7 +26,7 @@
  **************************************************************************/
 
 /**
- * TGSI interpretor/executor.
+ * TGSI interpreter/executor.
  *
  * Flow control information:
  *
@@ -88,6 +88,8 @@
 #define TEMP_OUTPUT_C      TGSI_EXEC_TEMP_OUTPUT_C
 #define TEMP_PRIMITIVE_I   TGSI_EXEC_TEMP_PRIMITIVE_I
 #define TEMP_PRIMITIVE_C   TGSI_EXEC_TEMP_PRIMITIVE_C
+#define TEMP_CC_I          TGSI_EXEC_TEMP_CC_I
+#define TEMP_CC_C          TGSI_EXEC_TEMP_CC_C
 #define TEMP_3_I           TGSI_EXEC_TEMP_THREE_I
 #define TEMP_3_C           TGSI_EXEC_TEMP_THREE_C
 #define TEMP_HALF_I        TGSI_EXEC_TEMP_HALF_I
@@ -2539,6 +2541,13 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
       mach->Primitives[0] = 0;
    }
 
+   for (i = 0; i < QUAD_SIZE; i++) {
+      mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] =
+         (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) |
+         (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) |
+         (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) |
+         (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT);
+   }
 
    /* execute declarations (interpolants) */
    for (i = 0; i < mach->NumDeclarations; i++) {
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index 4f30650b07b..c4e649e69c4 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -1,6 +1,6 @@
 /**************************************************************************
  * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
  * All Rights Reserved.
  * 
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -25,7 +25,7 @@
  * 
  **************************************************************************/
 
-#if !defined TGSI_EXEC_H
+#ifndef TGSI_EXEC_H
 #define TGSI_EXEC_H
 
 #include "pipe/p_compiler.h"
@@ -140,11 +140,30 @@ struct tgsi_exec_labels
 #define TGSI_EXEC_TEMP_PRIMITIVE_I  (TGSI_EXEC_NUM_TEMPS + 2)
 #define TGSI_EXEC_TEMP_PRIMITIVE_C  2
 
-#define TGSI_EXEC_TEMP_THREE_I      (TGSI_EXEC_NUM_TEMPS + 2)
-#define TGSI_EXEC_TEMP_THREE_C      3
+/* NVIDIA condition code (CC) vector
+ */
+#define TGSI_EXEC_CC_GT       0x01
+#define TGSI_EXEC_CC_EQ       0x02
+#define TGSI_EXEC_CC_LT       0x04
+#define TGSI_EXEC_CC_UN       0x08
+
+#define TGSI_EXEC_CC_X_MASK   0x000000ff
+#define TGSI_EXEC_CC_X_SHIFT  0
+#define TGSI_EXEC_CC_Y_MASK   0x0000ff00
+#define TGSI_EXEC_CC_Y_SHIFT  8
+#define TGSI_EXEC_CC_Z_MASK   0x00ff0000
+#define TGSI_EXEC_CC_Z_SHIFT  16
+#define TGSI_EXEC_CC_W_MASK   0xff000000
+#define TGSI_EXEC_CC_W_SHIFT  24
+
+#define TGSI_EXEC_TEMP_CC_I         (TGSI_EXEC_NUM_TEMPS + 2)
+#define TGSI_EXEC_TEMP_CC_C         3
+
+#define TGSI_EXEC_TEMP_THREE_I      (TGSI_EXEC_NUM_TEMPS + 3)
+#define TGSI_EXEC_TEMP_THREE_C      0
 
 #define TGSI_EXEC_TEMP_HALF_I       (TGSI_EXEC_NUM_TEMPS + 3)
-#define TGSI_EXEC_TEMP_HALF_C       0
+#define TGSI_EXEC_TEMP_HALF_C       1
 
 #define TGSI_EXEC_TEMP_R0           (TGSI_EXEC_NUM_TEMPS + 4)
 
-- 
cgit v1.2.3


From 880b751e8e21cab21a0d522346300f463fd9c634 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Mon, 18 Aug 2008 15:57:39 +0200
Subject: tgsi: Cosmetic changes.

---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 31 ++++++++++++-------------------
 1 file changed, 12 insertions(+), 19 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 7974915211a..6b19ab5c8e8 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -63,6 +63,11 @@
 #define TILE_BOTTOM_LEFT  2
 #define TILE_BOTTOM_RIGHT 3
 
+#define CHAN_X  0
+#define CHAN_Y  1
+#define CHAN_Z  2
+#define CHAN_W  3
+
 /*
  * Shorthand locations of various utility registers (_I = Index, _C = Channel)
  */
@@ -96,9 +101,6 @@
 #define TEMP_HALF_C        TGSI_EXEC_TEMP_HALF_C
 #define TEMP_R0            TGSI_EXEC_TEMP_R0
 
-#define FOR_EACH_CHANNEL(CHAN)\
-   for (CHAN = 0; CHAN < 4; CHAN++)
-
 #define IS_CHANNEL_ENABLED(INST, CHAN)\
    ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
 
@@ -106,11 +108,11 @@
    ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
 
 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
-   FOR_EACH_CHANNEL( CHAN )\
+   for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
       if (IS_CHANNEL_ENABLED( INST, CHAN ))
 
 #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
-   FOR_EACH_CHANNEL( CHAN )\
+   for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
       if (IS_CHANNEL_ENABLED2( INST, CHAN ))
 
 
@@ -118,14 +120,6 @@
 #define UPDATE_EXEC_MASK(MACH) \
       MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
 
-
-#define CHAN_X  0
-#define CHAN_Y  1
-#define CHAN_Z  2
-#define CHAN_W  3
-
-
-
 /**
  * Initialize machine state by expanding tokens to full instructions,
  * allocating temporary storage, setting up constants, etc.
@@ -1128,7 +1122,7 @@ store_dest(
 {
    union tgsi_exec_channel *dst;
 
-   switch( reg->DstRegister.File ) {
+   switch (reg->DstRegister.File) {
    case TGSI_FILE_NULL:
       return;
 
@@ -1138,7 +1132,7 @@ store_dest(
       break;
 
    case TGSI_FILE_TEMPORARY:
-      assert(reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS);
+      assert( reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS );
       dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
       break;
 
@@ -1151,8 +1145,7 @@ store_dest(
       return;
    }
 
-   switch (inst->Instruction.Saturate)
-   {
+   switch (inst->Instruction.Saturate) {
    case TGSI_SAT_NONE:
       if (mach->ExecMask & 0x1)
          dst->i[0] = chan->i[0];
@@ -1166,8 +1159,8 @@ store_dest(
 
    case TGSI_SAT_ZERO_ONE:
       /* XXX need to obey ExecMask here */
-      micro_max(dst, chan, &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
-      micro_min(dst, dst, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
+      micro_max( dst, chan, &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
+      micro_min( dst, dst, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
       break;
 
    case TGSI_SAT_MINUS_PLUS_ONE:
-- 
cgit v1.2.3


From 6aacca106b0619d87015aece1a0b1d6332910926 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Mon, 18 Aug 2008 17:03:56 +0200
Subject: tgsi: Respect condition codes when storing destination register.

---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 121 ++++++++++++++++++++++++++++++---
 1 file changed, 113 insertions(+), 8 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 6b19ab5c8e8..1217ee71a53 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -1120,7 +1120,9 @@ store_dest(
    const struct tgsi_full_instruction *inst,
    uint chan_index )
 {
+   uint i;
    union tgsi_exec_channel *dst;
+   uint execmask = mach->ExecMask;
 
    switch (reg->DstRegister.File) {
    case TGSI_FILE_NULL:
@@ -1145,16 +1147,119 @@ store_dest(
       return;
    }
 
+   if (inst->InstructionExtNv.CondFlowEnable) {
+      union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
+      uint swizzle;
+      uint shift;
+      uint mask;
+      uint test;
+
+      /* Only CC0 supported.
+       */
+      assert( inst->InstructionExtNv.CondFlowIndex < 1 );
+
+      switch (chan_index) {
+      case CHAN_X:
+         swizzle = inst->InstructionExtNv.CondSwizzleX;
+         break;
+      case CHAN_Y:
+         swizzle = inst->InstructionExtNv.CondSwizzleY;
+         break;
+      case CHAN_Z:
+         swizzle = inst->InstructionExtNv.CondSwizzleZ;
+         break;
+      case CHAN_W:
+         swizzle = inst->InstructionExtNv.CondSwizzleW;
+         break;
+      default:
+         assert( 0 );
+         return;
+      }
+
+      switch (swizzle) {
+      case TGSI_SWIZZLE_X:
+         shift = TGSI_EXEC_CC_X_SHIFT;
+         mask = TGSI_EXEC_CC_X_MASK;
+         break;
+      case TGSI_SWIZZLE_Y:
+         shift = TGSI_EXEC_CC_Y_SHIFT;
+         mask = TGSI_EXEC_CC_Y_MASK;
+         break;
+      case TGSI_SWIZZLE_Z:
+         shift = TGSI_EXEC_CC_Z_SHIFT;
+         mask = TGSI_EXEC_CC_Z_MASK;
+         break;
+      case TGSI_SWIZZLE_W:
+         shift = TGSI_EXEC_CC_W_SHIFT;
+         mask = TGSI_EXEC_CC_W_MASK;
+         break;
+      default:
+         assert( 0 );
+         return;
+      }
+
+      switch (inst->InstructionExtNv.CondMask) {
+      case TGSI_CC_GT:
+         test = ~(TGSI_EXEC_CC_GT << shift) & mask;
+         for (i = 0; i < QUAD_SIZE; i++)
+            if (cc->u[i] & test)
+               execmask &= ~(1 << i);
+         break;
+
+      case TGSI_CC_EQ:
+         test = ~(TGSI_EXEC_CC_EQ << shift) & mask;
+         for (i = 0; i < QUAD_SIZE; i++)
+            if (cc->u[i] & test)
+               execmask &= ~(1 << i);
+         break;
+
+      case TGSI_CC_LT:
+         test = ~(TGSI_EXEC_CC_LT << shift) & mask;
+         for (i = 0; i < QUAD_SIZE; i++)
+            if (cc->u[i] & test)
+               execmask &= ~(1 << i);
+         break;
+
+      case TGSI_CC_GE:
+         test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_EQ) << shift) & mask;
+         for (i = 0; i < QUAD_SIZE; i++)
+            if (cc->u[i] & test)
+               execmask &= ~(1 << i);
+         break;
+
+      case TGSI_CC_LE:
+         test = ~((TGSI_EXEC_CC_LT | TGSI_EXEC_CC_EQ) << shift) & mask;
+         for (i = 0; i < QUAD_SIZE; i++)
+            if (cc->u[i] & test)
+               execmask &= ~(1 << i);
+         break;
+
+      case TGSI_CC_NE:
+         test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_LT | TGSI_EXEC_CC_UN) << shift) & mask;
+         for (i = 0; i < QUAD_SIZE; i++)
+            if (cc->u[i] & test)
+               execmask &= ~(1 << i);
+         break;
+
+      case TGSI_CC_TR:
+         break;
+
+      case TGSI_CC_FL:
+         for (i = 0; i < QUAD_SIZE; i++)
+            execmask &= ~(1 << i);
+         break;
+
+      default:
+         assert( 0 );
+         return;
+      }
+   }
+
    switch (inst->Instruction.Saturate) {
    case TGSI_SAT_NONE:
-      if (mach->ExecMask & 0x1)
-         dst->i[0] = chan->i[0];
-      if (mach->ExecMask & 0x2)
-         dst->i[1] = chan->i[1];
-      if (mach->ExecMask & 0x4)
-         dst->i[2] = chan->i[2];
-      if (mach->ExecMask & 0x8)
-         dst->i[3] = chan->i[3];
+      for (i = 0; i < QUAD_SIZE; i++)
+         if (execmask & (1 << i))
+            dst->i[i] = chan->i[i];
       break;
 
    case TGSI_SAT_ZERO_ONE:
-- 
cgit v1.2.3


From 56c30bf17b9f57efdb93ae5d1b801677535a9651 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Mon, 18 Aug 2008 17:09:20 +0200
Subject: tgsi: Saturate modifier obeys ExecMask. Implement NVIDIA [-1;+1]
 saturate mode.

---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 1217ee71a53..dfd22270c8b 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -1263,13 +1263,27 @@ store_dest(
       break;
 
    case TGSI_SAT_ZERO_ONE:
-      /* XXX need to obey ExecMask here */
-      micro_max( dst, chan, &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
-      micro_min( dst, dst, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
+      for (i = 0; i < QUAD_SIZE; i++)
+         if (execmask & (1 << i)) {
+            if (chan->f[i] < 0.0f)
+               dst->f[i] = 0.0f;
+            else if (chan->f[i] > 1.0f)
+               dst->f[i] = 1.0f;
+            else
+               dst->i[i] = chan->i[i];
+         }
       break;
 
    case TGSI_SAT_MINUS_PLUS_ONE:
-      assert( 0 );
+      for (i = 0; i < QUAD_SIZE; i++)
+         if (execmask & (1 << i)) {
+            if (chan->f[i] < -1.0f)
+               dst->f[i] = -1.0f;
+            else if (chan->f[i] > 1.0f)
+               dst->f[i] = 1.0f;
+            else
+               dst->i[i] = chan->i[i];
+         }
       break;
 
    default:
-- 
cgit v1.2.3


From e9ec60097cfed372f202aa64aa04674448881f0e Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Mon, 18 Aug 2008 17:18:58 +0200
Subject: tgsi: Update condition code vector when storing dest register.

---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 49 +++++++++++++++++++++++++++++++++-
 1 file changed, 48 insertions(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index dfd22270c8b..a2235de059b 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -1121,12 +1121,14 @@ store_dest(
    uint chan_index )
 {
    uint i;
+   union tgsi_exec_channel null;
    union tgsi_exec_channel *dst;
    uint execmask = mach->ExecMask;
 
    switch (reg->DstRegister.File) {
    case TGSI_FILE_NULL:
-      return;
+      dst = &null;
+      break;
 
    case TGSI_FILE_OUTPUT:
       dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
@@ -1289,6 +1291,51 @@ store_dest(
    default:
       assert( 0 );
    }
+
+   if (inst->InstructionExtNv.CondDstUpdate) {
+      union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
+      uint shift;
+      uint mask;
+
+      /* Only CC0 supported.
+       */
+      assert( inst->InstructionExtNv.CondDstIndex < 1 );
+
+      switch (chan_index) {
+      case CHAN_X:
+         shift = TGSI_EXEC_CC_X_SHIFT;
+         mask = ~TGSI_EXEC_CC_X_MASK;
+         break;
+      case CHAN_Y:
+         shift = TGSI_EXEC_CC_Y_SHIFT;
+         mask = ~TGSI_EXEC_CC_Y_MASK;
+         break;
+      case CHAN_Z:
+         shift = TGSI_EXEC_CC_Z_SHIFT;
+         mask = ~TGSI_EXEC_CC_Z_MASK;
+         break;
+      case CHAN_W:
+         shift = TGSI_EXEC_CC_W_SHIFT;
+         mask = ~TGSI_EXEC_CC_W_MASK;
+         break;
+      default:
+         assert( 0 );
+         return;
+      }
+
+      for (i = 0; i < QUAD_SIZE; i++)
+         if (execmask & (1 << i)) {
+            cc->u[i] &= mask;
+            if (dst->f[i] < 0.0f)
+               cc->u[i] |= TGSI_EXEC_CC_LT << shift;
+            else if (dst->f[i] > 0.0f)
+               cc->u[i] |= TGSI_EXEC_CC_GT << shift;
+            else if (dst->f[i] == 0.0f)
+               cc->u[i] |= TGSI_EXEC_CC_EQ << shift;
+            else
+               cc->u[i] |= TGSI_EXEC_CC_UN << shift;
+         }
+   }
 }
 
 #define FETCH(VAL,INDEX,CHAN)\
-- 
cgit v1.2.3


From 93305bd6808787964c0088a88c428ecd1208aa44 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Mon, 18 Aug 2008 18:09:23 +0200
Subject: tgsi: Use NUM_CHANNELS.

---
 src/gallium/auxiliary/tgsi/tgsi_sse2.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index 47dc06faf6d..0bf260c4dd8 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -1,6 +1,6 @@
 /**************************************************************************
  * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
  * All Rights Reserved.
  * 
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -44,13 +44,13 @@
 
 
 #define FOR_EACH_CHANNEL( CHAN )\
-   for( CHAN = 0; CHAN < 4; CHAN++ )
+   for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
 
 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
    ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
 
 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
-   if( IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
+   if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
 
 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
    FOR_EACH_CHANNEL( CHAN )\
@@ -1172,7 +1172,7 @@ emit_instruction(
 {
    unsigned chan_index;
 
-   switch( inst->Instruction.Opcode ) {
+   switch (inst->Instruction.Opcode) {
    case TGSI_OPCODE_ARL:
 #if 0
       /* XXX this isn't working properly (see glean vertProg1 test) */
-- 
cgit v1.2.3


From 2b512c0135bc8512cc80009ea7430f7cc0c869d6 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Mon, 18 Aug 2008 22:21:32 +0200
Subject: tgsi: Workaround debug output buffer size limitations.

---
 src/gallium/auxiliary/tgsi/tgsi_dump.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index 4309d1bc76f..a168c949280 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -518,8 +518,23 @@ tgsi_dump(
    uint flags )
 {
    static char str[4096];
-   
+   uint len;
+   char *p = str;
+
    tgsi_dump_str(tokens, flags, str, sizeof(str));
-   
-   debug_printf("%s", str);
+
+   /* Workaround output buffer size limitations.
+    */
+   len = strlen( str );
+   while (len > 256) {
+      char piggy_bank;
+
+      piggy_bank = p[256];
+      p[256] = '\0';
+      debug_printf( "%s", p );
+      p[256] = piggy_bank;
+      p += 256;
+      len -= 256;
+   }
+   debug_printf( "%s", p );
 }
-- 
cgit v1.2.3


From de3083be7197cccb8dbf223d644ebdb78a8c809d Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Tue, 19 Aug 2008 00:42:30 +0200
Subject: tgsi: Fix ARL opcode in SSE2 codegen.

---
 src/gallium/auxiliary/tgsi/tgsi_exec.c |  6 +--
 src/gallium/auxiliary/tgsi/tgsi_sse2.c | 87 +++++++++++++++++++++++++---------
 2 files changed, 68 insertions(+), 25 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index a2235de059b..b793e1ebaec 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -1688,9 +1688,9 @@ exec_instruction(
    switch (inst->Instruction.Opcode) {
    case TGSI_OPCODE_ARL:
       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-	 FETCH( &r[0], 0, chan_index );
-	 micro_f2it( &r[0], &r[0] );
-	 STORE( &r[0], 0, chan_index );
+         FETCH( &r[0], 0, chan_index );
+         micro_f2it( &r[0], &r[0] );
+         STORE( &r[0], 0, chan_index );
       }
       break;
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index 0bf260c4dd8..ca48d422e08 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -62,6 +62,7 @@
 #define CHAN_W 3
 
 #define TEMP_R0   TGSI_EXEC_TEMP_R0
+#define TEMP_ADDR TGSI_EXEC_TEMP_ADDR
 
 /**
  * X86 utility functions.
@@ -215,19 +216,61 @@ emit_ret(
 static void
 emit_const(
    struct x86_function *func,
-   unsigned xmm,
-   unsigned vec,
-   unsigned chan )
-{
-   sse_movss(
-      func,
-      make_xmm( xmm ),
-      get_const( vec, chan ) );
-   sse_shufps(
-      func,
-      make_xmm( xmm ),
-      make_xmm( xmm ),
-      SHUF( 0, 0, 0, 0 ) );
+   uint xmm,
+   int vec,
+   uint chan,
+   uint indirect,
+   uint indirectFile,
+   int indirectIndex )
+{
+   if (indirect) {
+      struct x86_reg r0 = get_input_base();
+      struct x86_reg r1 = get_output_base();
+      uint i;
+
+      assert( indirectFile == TGSI_FILE_ADDRESS );
+      assert( indirectIndex == 0 );
+
+      x86_push( func, r0 );
+      x86_push( func, r1 );
+
+      for (i = 0; i < QUAD_SIZE; i++) {
+         x86_lea( func, r0, get_const( vec, chan ) );
+         x86_mov( func, r1, x86_make_disp( get_temp( TEMP_ADDR, CHAN_X ), i * 4 ) );
+
+         /* Quick hack to multiply by 16 -- need to add SHL to rtasm.
+          */
+         x86_add( func, r1, r1 );
+         x86_add( func, r1, r1 );
+         x86_add( func, r1, r1 );
+         x86_add( func, r1, r1 );
+
+         x86_add( func, r0, r1 );
+         x86_mov( func, r1, x86_deref( r0 ) );
+         x86_mov( func, x86_make_disp( get_temp( TEMP_R0, CHAN_X ), i * 4 ), r1 );
+      }
+
+      x86_pop( func, r1 );
+      x86_pop( func, r0 );
+
+      sse_movaps(
+         func,
+         make_xmm( xmm ),
+         get_temp( TEMP_R0, CHAN_X ) );
+   }
+   else {
+      assert( vec >= 0 );
+
+      sse_movss(
+         func,
+         make_xmm( xmm ),
+         get_const( vec, chan ) );
+      sse_shufps(
+         func,
+         make_xmm( xmm ),
+         make_xmm( xmm ),
+         SHUF( 0, 0, 0, 0 ) );
+   }
 }
 
 static void
@@ -369,10 +412,12 @@ emit_addrs(
    unsigned vec,
    unsigned chan )
 {
+   assert( vec == 0 );
+
    emit_temps(
       func,
       xmm,
-      vec + TGSI_EXEC_NUM_TEMPS,
+      vec + TGSI_EXEC_TEMP_ADDR,
       chan );
 }
 
@@ -855,18 +900,21 @@ emit_fetch(
 {
    unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
 
-   switch( swizzle ) {
+   switch (swizzle) {
    case TGSI_EXTSWIZZLE_X:
    case TGSI_EXTSWIZZLE_Y:
    case TGSI_EXTSWIZZLE_Z:
    case TGSI_EXTSWIZZLE_W:
-      switch( reg->SrcRegister.File ) {
+      switch (reg->SrcRegister.File) {
       case TGSI_FILE_CONSTANT:
          emit_const(
             func,
             xmm,
             reg->SrcRegister.Index,
-            swizzle );
+            swizzle,
+            reg->SrcRegister.Indirect,
+            reg->SrcRegisterInd.File,
+            reg->SrcRegisterInd.Index );
          break;
 
       case TGSI_FILE_IMMEDIATE:
@@ -1174,16 +1222,11 @@ emit_instruction(
 
    switch (inst->Instruction.Opcode) {
    case TGSI_OPCODE_ARL:
-#if 0
-      /* XXX this isn't working properly (see glean vertProg1 test) */
       FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( func, *inst, 0, 0, chan_index );
          emit_f2it( func, 0 );
          STORE( func, *inst, 0, 0, chan_index );
       }
-#else
-      return 0;
-#endif
       break;
 
    case TGSI_OPCODE_MOV:
-- 
cgit v1.2.3


From 0d9d2045e86a249a36d8eeebf59979a162c164af Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Tue, 19 Aug 2008 11:47:30 +0200
Subject: tgsi: Implement EXP opcode for SSE2.

---
 src/gallium/auxiliary/tgsi/tgsi_exec.c |  8 ++---
 src/gallium/auxiliary/tgsi/tgsi_sse2.c | 64 ++++++++++++++++++++++++++--------
 2 files changed, 53 insertions(+), 19 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index b793e1ebaec..d584a4e4fa4 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -1756,18 +1756,18 @@ exec_instruction(
       micro_flr( &r[1], &r[0] );  /* r1 = floor(r0) */
       if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
          micro_exp2( &r[2], &r[1] );       /* r2 = 2 ^ r1 */
-	 STORE( &r[2], 0, CHAN_X );        /* store r2 */
+         STORE( &r[2], 0, CHAN_X );        /* store r2 */
       }
       if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
          micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
-	 STORE( &r[2], 0, CHAN_Y );        /* store r2 */
+         STORE( &r[2], 0, CHAN_Y );        /* store r2 */
       }
       if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
          micro_exp2( &r[2], &r[0] );       /* r2 = 2 ^ r0 */
-	 STORE( &r[2], 0, CHAN_Z );        /* store r2 */
+         STORE( &r[2], 0, CHAN_Z );        /* store r2 */
       }
       if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
-	 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
+         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
       }
       break;
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index ca48d422e08..684bc081e55 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -61,6 +61,9 @@
 #define CHAN_Z 2
 #define CHAN_W 3
 
+#define TEMP_ONE_I   TGSI_EXEC_TEMP_ONE_I
+#define TEMP_ONE_C   TGSI_EXEC_TEMP_ONE_C
+
 #define TEMP_R0   TGSI_EXEC_TEMP_R0
 #define TEMP_ADDR TGSI_EXEC_TEMP_ADDR
 
@@ -958,8 +961,8 @@ emit_fetch(
       emit_tempf(
          func,
          xmm,
-         TGSI_EXEC_TEMP_ONE_I,
-         TGSI_EXEC_TEMP_ONE_C );
+         TEMP_ONE_I,
+         TEMP_ONE_C );
       break;
 
    default:
@@ -1173,8 +1176,8 @@ emit_setcc(
          func,
          make_xmm( 0 ),
          get_temp(
-            TGSI_EXEC_TEMP_ONE_I,
-            TGSI_EXEC_TEMP_ONE_C ) );
+            TEMP_ONE_I,
+            TEMP_ONE_C ) );
       STORE( func, *inst, 0, 0, chan_index );
    }
 }
@@ -1243,8 +1246,8 @@ emit_instruction(
          emit_tempf(
             func,
             0,
-            TGSI_EXEC_TEMP_ONE_I,
-            TGSI_EXEC_TEMP_ONE_C);
+            TEMP_ONE_I,
+            TEMP_ONE_C);
          if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
             STORE( func, *inst, 0, 0, CHAN_X );
          }
@@ -1329,7 +1332,38 @@ emit_instruction(
       break;
 
    case TGSI_OPCODE_EXP:
-      return 0;
+      if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
+          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
+          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+         FETCH( func, *inst, 0, 0, CHAN_X );
+         if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
+             IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
+            emit_MOV( func, 1, 0 );
+            emit_flr( func, 1 );
+            /* dst.x = ex2(floor(src.x)) */
+            if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) {
+               emit_MOV( func, 2, 1 );
+               emit_ex2( func, 2 );
+               STORE( func, *inst, 2, 0, CHAN_X );
+            }
+            /* dst.y = src.x - floor(src.x) */
+            if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
+               emit_MOV( func, 2, 0 );
+               emit_sub( func, 2, 1 );
+               STORE( func, *inst, 2, 0, CHAN_Y );
+            }
+         }
+         /* dst.z = ex2(src.x) */
+         if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+            emit_ex2( func, 0 );
+            STORE( func, *inst, 0, 0, CHAN_Z );
+         }
+      }
+      /* dst.w = 1.0 */
+      if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W )) {
+         emit_tempf( func, 0, TEMP_ONE_I, TEMP_ONE_C );
+         STORE( func, *inst, 0, 0, CHAN_W );
+      }
       break;
 
    case TGSI_OPCODE_LOG:
@@ -1399,8 +1433,8 @@ emit_instruction(
          emit_tempf(
             func,
             0,
-            TGSI_EXEC_TEMP_ONE_I,
-            TGSI_EXEC_TEMP_ONE_C );
+            TEMP_ONE_I,
+            TEMP_ONE_C );
          STORE( func, *inst, 0, 0, CHAN_X );
       }
       IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
@@ -1603,8 +1637,8 @@ emit_instruction(
 	 emit_tempf(
 	    func,
 	    0,
-	    TGSI_EXEC_TEMP_ONE_I,
-	    TGSI_EXEC_TEMP_ONE_C );
+	    TEMP_ONE_I,
+	    TEMP_ONE_C );
          STORE( func, *inst, 0, 0, CHAN_W );
       }
       break;
@@ -1731,8 +1765,8 @@ emit_instruction(
 	 emit_tempf(
 	    func,
 	    0,
-	    TGSI_EXEC_TEMP_ONE_I,
-	    TGSI_EXEC_TEMP_ONE_C );
+	    TEMP_ONE_I,
+	    TEMP_ONE_C );
 	 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
 	    STORE( func, *inst, 0, 0, chan_index );
 	 }
@@ -1820,8 +1854,8 @@ emit_instruction(
 	 emit_tempf(
 	    func,
 	    0,
-	    TGSI_EXEC_TEMP_ONE_I,
-	    TGSI_EXEC_TEMP_ONE_C );
+	    TEMP_ONE_I,
+	    TEMP_ONE_C );
          STORE( func, *inst, 0, 0, CHAN_W );
       }
       break;
-- 
cgit v1.2.3


From 4405e428e4c2a80172c803cc3c4933d546bf2b4d Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Tue, 19 Aug 2008 12:07:25 +0200
Subject: tgsi: Implement LOG opcode for SSE2 codegen.

---
 src/gallium/auxiliary/tgsi/tgsi_exec.c |  8 ++++----
 src/gallium/auxiliary/tgsi/tgsi_sse2.c | 33 ++++++++++++++++++++++++++++++++-
 2 files changed, 36 insertions(+), 5 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index d584a4e4fa4..88a34a69613 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -1777,18 +1777,18 @@ exec_instruction(
       micro_lg2( &r[1], &r[2] );  /* r1 = lg2(r2) */
       micro_flr( &r[0], &r[1] );  /* r0 = floor(r1) */
       if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
-	 STORE( &r[0], 0, CHAN_X );
+         STORE( &r[0], 0, CHAN_X );
       }
       if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
          micro_exp2( &r[0], &r[0] );       /* r0 = 2 ^ r0 */
          micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
-	 STORE( &r[0], 0, CHAN_Y );
+         STORE( &r[0], 0, CHAN_Y );
       }
       if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
-	 STORE( &r[1], 0, CHAN_Z );
+         STORE( &r[1], 0, CHAN_Z );
       }
       if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
-	 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
+         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
       }
       break;
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index 684bc081e55..485e5a0e6f5 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -1367,7 +1367,38 @@ emit_instruction(
       break;
 
    case TGSI_OPCODE_LOG:
-      return 0;
+      if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
+          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
+          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+         FETCH( func, *inst, 0, 0, CHAN_X );
+         emit_abs( func, 0 );
+         emit_MOV( func, 1, 0 );
+         emit_lg2( func, 1 );
+         /* dst.z = lg2(abs(src.x)) */
+         if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+            STORE( func, *inst, 1, 0, CHAN_Z );
+         }
+         if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
+             IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
+            emit_flr( func, 1 );
+            /* dst.x = floor(lg2(abs(src.x))) */
+            if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) {
+               STORE( func, *inst, 1, 0, CHAN_X );
+            }
+            /* dst.x = abs(src)/ex2(floor(lg2(abs(src.x)))) */
+            if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
+               emit_ex2( func, 1 );
+               emit_rcp( func, 1, 1 );
+               emit_mul( func, 0, 1 );
+               STORE( func, *inst, 0, 0, CHAN_Y );
+            }
+         }
+      }
+      /* dst.w = 1.0 */
+      if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W )) {
+         emit_tempf( func, 0, TEMP_ONE_I, TEMP_ONE_C );
+         STORE( func, *inst, 0, 0, CHAN_W );
+      }
       break;
 
    case TGSI_OPCODE_MUL:
-- 
cgit v1.2.3


From 1c2ff4d9e65563c071747a9c3bd907bd24706da0 Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Fri, 22 Aug 2008 15:16:43 -0600
Subject: gallium: use new util_fast_exp2(), _log2(), pow() functions

New code surrounded with #if FAST_MATH to allow comparing against original
code if we need to debug.
---
 src/gallium/auxiliary/draw/draw_vs_aos.c | 66 +++++++++++++++++++++++++++++++-
 src/gallium/auxiliary/tgsi/tgsi_exec.c   | 30 +++++++++++++++
 src/gallium/auxiliary/tgsi/tgsi_sse2.c   | 19 +++++++++
 3 files changed, 113 insertions(+), 2 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index 441877d46f0..41bdd012d56 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -31,6 +31,7 @@
 
 #include "pipe/p_util.h"
 #include "pipe/p_shader_tokens.h"
+#include "util/u_math.h"
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_util.h"
 #include "tgsi/tgsi_exec.h"
@@ -43,6 +44,7 @@
 
 #ifdef PIPE_ARCH_X86
 #define DISASSEM 0
+#define FAST_MATH 1
 
 static const char *files[] =
 {
@@ -1380,14 +1382,28 @@ static boolean emit_MAD( struct aos_compilation *cp, const struct tgsi_full_inst
    return TRUE;
 }
 
+
+
 /* A wrapper for powf().
  * Makes sure it is cdecl and operates on floats.
  */
 static float PIPE_CDECL _powerf( float x, float y )
 {
+#if FAST_MATH
+   return util_fast_pow(x, y);
+#else
    return powf( x, y );
+#endif
 }
 
+#if FAST_MATH
+static float PIPE_CDECL _exp2(float x)
+{
+   return util_fast_exp2(x);
+}
+#endif
+
+
 /* Really not sufficient -- need to check for conditions that could
  * generate inf/nan values, which will slow things down hugely.
  */
@@ -1442,6 +1458,48 @@ static boolean emit_POW( struct aos_compilation *cp, const struct tgsi_full_inst
 }
 
 
+#if FAST_MATH
+static boolean emit_EXPBASE2( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) 
+{
+   uint i;
+
+   /* For absolute correctness, need to spill/invalidate all XMM regs
+    * too.  
+    */
+   for (i = 0; i < 8; i++) {
+      if (cp->xmm[i].dirty) 
+         spill(cp, i);
+      aos_release_xmm_reg(cp, i);
+   }
+
+   /* Push caller-save (ie scratch) regs.  
+    */
+   x86_cdecl_caller_push_regs( cp->func );
+
+   x86_lea( cp->func, cp->stack_ESP, x86_make_disp(cp->stack_ESP, -4) );
+
+   x87_fld_src( cp, &op->FullSrcRegisters[0], 0 );
+   x87_fstp( cp->func, x86_make_disp( cp->stack_ESP, 0 ) );
+
+   /* tmp_EAX has been pushed & will be restored below */
+   x86_mov_reg_imm( cp->func, cp->tmp_EAX, (unsigned long) _exp2 );
+   x86_call( cp->func, cp->tmp_EAX );
+
+   x86_lea( cp->func, cp->stack_ESP, x86_make_disp(cp->stack_ESP, 4) );
+
+   x86_cdecl_caller_pop_regs( cp->func );
+
+   /* Note retval on x87 stack:
+    */
+   cp->func->x87_stack++;
+
+   x87_fstp_dest4( cp, &op->FullDstRegisters[0] );
+
+   return TRUE;
+}
+#endif
+
+
 static boolean emit_RCP( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
 {
    struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
@@ -1662,7 +1720,9 @@ emit_instruction( struct aos_compilation *cp,
       return emit_RND(cp, inst);
 
    case TGSI_OPCODE_EXPBASE2:
-#if 0
+#if FAST_MATH
+      return emit_EXPBASE2(cp, inst);
+#elif 0
       /* this seems to fail for "larger" exponents.
        * See glean tvertProg1's EX2 test.
        */
@@ -1827,6 +1887,8 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
    struct aos_compilation cp;
    unsigned fixup, label;
 
+   util_init_math();
+
    tgsi_parse_init( &parse, varient->base.vs->state.tokens );
 
    memset(&cp, 0, sizeof(cp));
@@ -2135,4 +2197,4 @@ struct draw_vs_varient *draw_vs_varient_aos_sse( struct draw_vertex_shader *vs,
 
 
 
-#endif
+#endif /* PIPE_ARCH_X86 */
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 88a34a69613..e28b56c842f 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -57,6 +57,9 @@
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_util.h"
 #include "tgsi_exec.h"
+#include "util/u_math.h"
+
+#define FAST_MATH 1
 
 #define TILE_TOP_LEFT     0
 #define TILE_TOP_RIGHT    1
@@ -145,6 +148,8 @@ tgsi_exec_machine_bind_shader(
    tgsi_dump(tokens, 0);
 #endif
 
+   util_init_math();
+
    mach->Tokens = tokens;
    mach->Samplers = samplers;
 
@@ -448,10 +453,17 @@ micro_exp2(
    union tgsi_exec_channel *dst,
    const union tgsi_exec_channel *src)
 {
+#if FAST_MATH
+   dst->f[0] = util_fast_exp2( src->f[0] );
+   dst->f[1] = util_fast_exp2( src->f[1] );
+   dst->f[2] = util_fast_exp2( src->f[2] );
+   dst->f[3] = util_fast_exp2( src->f[3] );
+#else
    dst->f[0] = powf( 2.0f, src->f[0] );
    dst->f[1] = powf( 2.0f, src->f[1] );
    dst->f[2] = powf( 2.0f, src->f[2] );
    dst->f[3] = powf( 2.0f, src->f[3] );
+#endif
 }
 
 static void
@@ -528,10 +540,17 @@ micro_lg2(
    union tgsi_exec_channel *dst,
    const union tgsi_exec_channel *src )
 {
+#if FAST_MATH
+   dst->f[0] = util_fast_log2( src->f[0] );
+   dst->f[1] = util_fast_log2( src->f[1] );
+   dst->f[2] = util_fast_log2( src->f[2] );
+   dst->f[3] = util_fast_log2( src->f[3] );
+#else
    dst->f[0] = logf( src->f[0] ) * 1.442695f;
    dst->f[1] = logf( src->f[1] ) * 1.442695f;
    dst->f[2] = logf( src->f[2] ) * 1.442695f;
    dst->f[3] = logf( src->f[3] ) * 1.442695f;
+#endif
 }
 
 static void
@@ -796,10 +815,17 @@ micro_pow(
    const union tgsi_exec_channel *src0,
    const union tgsi_exec_channel *src1 )
 {
+#if FAST_MATH
+   dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
+   dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
+   dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
+   dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
+#else
    dst->f[0] = powf( src0->f[0], src1->f[0] );
    dst->f[1] = powf( src0->f[1], src1->f[1] );
    dst->f[2] = powf( src0->f[2], src1->f[2] );
    dst->f[3] = powf( src0->f[3], src1->f[3] );
+#endif
 }
 
 static void
@@ -2024,7 +2050,11 @@ exec_instruction(
     /* TGSI_OPCODE_EX2 */
       FETCH(&r[0], 0, CHAN_X);
 
+#if FAST_MATH
+      micro_exp2( &r[0], &r[0] );
+#else
       micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
+#endif
 
       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
 	 STORE( &r[0], 0, chan_index );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index 485e5a0e6f5..e3906070237 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -27,6 +27,7 @@
 
 #include "pipe/p_util.h"
 #include "pipe/p_shader_tokens.h"
+#include "util/u_math.h"
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_util.h"
 #include "tgsi_exec.h"
@@ -42,6 +43,8 @@
  */
 #define HIGH_PRECISION 1
 
+#define FAST_MATH 1
+
 
 #define FOR_EACH_CHANNEL( CHAN )\
    for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
@@ -623,10 +626,17 @@ ex24f(
 {
    const unsigned X = 0;
 
+#if FAST_MATH
+   store[X + 0] = util_fast_exp2( store[X + 0] );
+   store[X + 1] = util_fast_exp2( store[X + 1] );
+   store[X + 2] = util_fast_exp2( store[X + 2] );
+   store[X + 3] = util_fast_exp2( store[X + 3] );
+#else
    store[X + 0] = powf( 2.0f, store[X + 0] );
    store[X + 1] = powf( 2.0f, store[X + 1] );
    store[X + 2] = powf( 2.0f, store[X + 2] );
    store[X + 3] = powf( 2.0f, store[X + 3] );
+#endif
 }
 
 static void
@@ -762,10 +772,17 @@ pow4f(
 {
    const unsigned X = 0;
 
+#if FAST_MATH
+   store[X + 0] = util_fast_pow( store[X + 0], store[X + 4] );
+   store[X + 1] = util_fast_pow( store[X + 1], store[X + 5] );
+   store[X + 2] = util_fast_pow( store[X + 2], store[X + 6] );
+   store[X + 3] = util_fast_pow( store[X + 3], store[X + 7] );
+#else
    store[X + 0] = powf( store[X + 0], store[X + 4] );
    store[X + 1] = powf( store[X + 1], store[X + 5] );
    store[X + 2] = powf( store[X + 2], store[X + 6] );
    store[X + 3] = powf( store[X + 3], store[X + 7] );
+#endif
 }
 
 static void
@@ -2235,6 +2252,8 @@ tgsi_emit_sse2(
    unsigned ok = 1;
    uint num_immediates = 0;
 
+   util_init_math();
+
    func->csr = func->store;
 
    tgsi_parse_init( &parse, tokens );
-- 
cgit v1.2.3


From 1a46dcc8a927dfb38ca1381e7b3dafb789f8257c Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Fri, 22 Aug 2008 15:25:21 -0600
Subject: gallium: replace LOG2() macro with util_fast_log2() inline func

---
 src/gallium/auxiliary/tgsi/tgsi_sse2.c       |  8 ++++----
 src/gallium/drivers/softpipe/sp_context.c    |  3 +++
 src/gallium/drivers/softpipe/sp_tex_sample.c |  3 ++-
 src/gallium/include/pipe/p_util.h            | 15 ---------------
 4 files changed, 9 insertions(+), 20 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index e3906070237..00ed4da4507 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -713,10 +713,10 @@ lg24f(
 {
    const unsigned X = 0;
 
-   store[X + 0] = LOG2( store[X + 0] );
-   store[X + 1] = LOG2( store[X + 1] );
-   store[X + 2] = LOG2( store[X + 2] );
-   store[X + 3] = LOG2( store[X + 3] );
+   store[X + 0] = util_fast_log2( store[X + 0] );
+   store[X + 1] = util_fast_log2( store[X + 1] );
+   store[X + 2] = util_fast_log2( store[X + 2] );
+   store[X + 3] = util_fast_log2( store[X + 3] );
 }
 
 static void
diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c
index 626c3a9d4e1..9b1313bc83e 100644
--- a/src/gallium/drivers/softpipe/sp_context.c
+++ b/src/gallium/drivers/softpipe/sp_context.c
@@ -33,6 +33,7 @@
 #include "pipe/p_defines.h"
 #include "pipe/p_inlines.h"
 #include "pipe/p_util.h"
+#include "util/u_math.h"
 #include "sp_clear.h"
 #include "sp_context.h"
 #include "sp_flush.h"
@@ -128,6 +129,8 @@ softpipe_create( struct pipe_screen *screen,
    struct softpipe_context *softpipe = CALLOC_STRUCT(softpipe_context);
    uint i;
 
+   util_init_math();
+
 #ifdef PIPE_ARCH_X86
    softpipe->use_sse = !debug_get_bool_option( "GALLIUM_NOSSE", FALSE );
 #else
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 01f4d0ca811..d7680ffa81d 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -41,6 +41,7 @@
 #include "pipe/p_defines.h"
 #include "pipe/p_util.h"
 #include "tgsi/tgsi_exec.h"
+#include "util/u_math.h"
 
 
 /*
@@ -499,7 +500,7 @@ compute_lambda(struct tgsi_sampler *sampler,
       rho = MAX2(rho, max);
    }
 
-   lambda = LOG2(rho);
+   lambda = util_fast_log2(rho);
    lambda += lodbias + sampler->state->lod_bias;
    lambda = CLAMP(lambda, sampler->state->min_lod, sampler->state->max_lod);
 
diff --git a/src/gallium/include/pipe/p_util.h b/src/gallium/include/pipe/p_util.h
index 473a8d94abb..660192b28e7 100644
--- a/src/gallium/include/pipe/p_util.h
+++ b/src/gallium/include/pipe/p_util.h
@@ -443,21 +443,6 @@ static INLINE int iround(float f)
 #define FABSF(x)   ((float) fabs(x))
 #endif
 
-/* Pretty fast, and accurate.
- * Based on code from http://www.flipcode.com/totd/
- */
-static INLINE float LOG2(float val)
-{
-   union fi num;
-   int log_2;
-
-   num.f = val;
-   log_2 = ((num.i >> 23) & 255) - 128;
-   num.i &= ~(255 << 23);
-   num.i += 127 << 23;
-   num.f = ((-1.0f/3) * num.f + 2) * num.f - 2.0f/3;
-   return num.f + log_2;
-}
 
 #if defined(__GNUC__)
 #define CEILF(x)   ceilf(x)
-- 
cgit v1.2.3


From 4f25420bdd834e81a3e22733304efc5261c2998a Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Sun, 24 Aug 2008 17:48:55 -0600
Subject: gallium: refactor/replace p_util.h with util/u_memory.h and
 util/u_math.h

Also, rename p_tile.[ch] to u_tile.[ch]
---
 src/gallium/README.portability                     |    4 +-
 src/gallium/auxiliary/cso_cache/cso_cache.c        |    3 +-
 src/gallium/auxiliary/cso_cache/cso_context.c      |    2 +-
 src/gallium/auxiliary/cso_cache/cso_hash.c         |    2 +-
 src/gallium/auxiliary/draw/draw_context.c          |    3 +-
 src/gallium/auxiliary/draw/draw_pipe.c             |    1 -
 src/gallium/auxiliary/draw/draw_pipe_aaline.c      |    3 +-
 src/gallium/auxiliary/draw/draw_pipe_aapoint.c     |    4 +-
 src/gallium/auxiliary/draw/draw_pipe_clip.c        |    4 +-
 src/gallium/auxiliary/draw/draw_pipe_cull.c        |    2 +-
 src/gallium/auxiliary/draw/draw_pipe_flatshade.c   |    4 +-
 src/gallium/auxiliary/draw/draw_pipe_offset.c      |    3 +-
 src/gallium/auxiliary/draw/draw_pipe_pstipple.c    |    4 +-
 src/gallium/auxiliary/draw/draw_pipe_stipple.c     |    6 +-
 src/gallium/auxiliary/draw/draw_pipe_twoside.c     |    3 +-
 src/gallium/auxiliary/draw/draw_pipe_unfilled.c    |    2 +-
 src/gallium/auxiliary/draw/draw_pipe_util.c        |    2 +-
 src/gallium/auxiliary/draw/draw_pipe_validate.c    |    2 +-
 src/gallium/auxiliary/draw/draw_pipe_vbuf.c        |    3 +-
 src/gallium/auxiliary/draw/draw_pipe_wide_line.c   |    3 +-
 src/gallium/auxiliary/draw/draw_pipe_wide_point.c  |    3 +-
 src/gallium/auxiliary/draw/draw_pt.c               |    1 -
 src/gallium/auxiliary/draw/draw_pt_emit.c          |    2 +-
 src/gallium/auxiliary/draw/draw_pt_fetch.c         |    2 +-
 src/gallium/auxiliary/draw/draw_pt_fetch_emit.c    |    2 +-
 .../auxiliary/draw/draw_pt_fetch_shade_emit.c      |    3 +-
 .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c  |    3 +-
 src/gallium/auxiliary/draw/draw_pt_post_vs.c       |    2 +-
 src/gallium/auxiliary/draw/draw_pt_util.c          |    1 -
 src/gallium/auxiliary/draw/draw_pt_varray.c        |    4 +-
 src/gallium/auxiliary/draw/draw_pt_vcache.c        |    2 +-
 src/gallium/auxiliary/draw/draw_vbuf.h             |    2 -
 src/gallium/auxiliary/draw/draw_vs.c               |    6 +-
 src/gallium/auxiliary/draw/draw_vs_aos.c           |    4 +-
 src/gallium/auxiliary/draw/draw_vs_aos_io.c        |    2 +-
 src/gallium/auxiliary/draw/draw_vs_aos_machine.c   |    3 +-
 src/gallium/auxiliary/draw/draw_vs_exec.c          |    3 +-
 src/gallium/auxiliary/draw/draw_vs_llvm.c          |    1 -
 src/gallium/auxiliary/draw/draw_vs_sse.c           |    3 +-
 src/gallium/auxiliary/draw/draw_vs_varient.c       |    3 +-
 src/gallium/auxiliary/gallivm/gallivm_cpu.cpp      |    3 +-
 src/gallium/auxiliary/gallivm/instructions.cpp     |    2 +-
 src/gallium/auxiliary/gallivm/instructionssoa.cpp  |    2 +-
 .../auxiliary/pipebuffer/pb_buffer_fenced.c        |    2 +-
 .../auxiliary/pipebuffer/pb_buffer_malloc.c        |    2 +-
 src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c   |    2 +-
 src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c |    2 +-
 src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c |    2 +-
 .../auxiliary/pipebuffer/pb_bufmgr_fenced.c        |    2 +-
 src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c    |    2 +-
 src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c  |    2 +-
 src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c  |    2 +-
 src/gallium/auxiliary/pipebuffer/pb_validate.c     |    2 +-
 src/gallium/auxiliary/pipebuffer/pb_winsys.c       |    2 +-
 src/gallium/auxiliary/rtasm/rtasm_execmem.c        |    2 +-
 src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c        |    2 +-
 src/gallium/auxiliary/sct/sct.c                    |    2 +-
 src/gallium/auxiliary/tgsi/tgsi_build.c            |    1 -
 src/gallium/auxiliary/tgsi/tgsi_build.h            |    4 +
 src/gallium/auxiliary/tgsi/tgsi_dump_c.c           |    1 -
 src/gallium/auxiliary/tgsi/tgsi_exec.c             |    2 +-
 src/gallium/auxiliary/tgsi/tgsi_parse.c            |    2 +-
 src/gallium/auxiliary/tgsi/tgsi_scan.c             |    6 +-
 src/gallium/auxiliary/tgsi/tgsi_sse2.c             |    2 +-
 src/gallium/auxiliary/tgsi/tgsi_transform.c        |    1 +
 src/gallium/auxiliary/tgsi/tgsi_transform.h        |    1 -
 src/gallium/auxiliary/tgsi/tgsi_util.c             |    1 -
 src/gallium/auxiliary/translate/translate.c        |    1 -
 src/gallium/auxiliary/translate/translate_cache.c  |    2 +-
 .../auxiliary/translate/translate_generic.c        |    2 +-
 src/gallium/auxiliary/translate/translate_sse.c    |    2 +-
 src/gallium/auxiliary/util/Makefile                |    2 +-
 src/gallium/auxiliary/util/SConscript              |    2 +-
 src/gallium/auxiliary/util/p_debug.c               |    1 -
 src/gallium/auxiliary/util/u_blit.c                |    5 +-
 src/gallium/auxiliary/util/u_gen_mipmap.c          |    2 +-
 src/gallium/auxiliary/util/u_handle_table.c        |    4 +-
 src/gallium/auxiliary/util/u_hash_table.c          |    5 +-
 src/gallium/auxiliary/util/u_math.h                |  240 +++-
 src/gallium/auxiliary/util/u_memory.h              |  222 ++++
 src/gallium/auxiliary/util/u_mm.c                  |    2 +-
 src/gallium/auxiliary/util/u_pack_color.h          |   36 +-
 src/gallium/auxiliary/util/u_pointer.h             |  107 ++
 src/gallium/auxiliary/util/u_rect.c                |    1 -
 src/gallium/auxiliary/util/u_simple_shaders.c      |    2 +-
 src/gallium/auxiliary/util/u_tile.c                | 1169 ++++++++++++++++++++
 src/gallium/auxiliary/util/u_tile.h                |  101 ++
 src/gallium/drivers/cell/common.h                  |    1 -
 src/gallium/drivers/cell/ppu/cell_clear.c          |    2 +-
 src/gallium/drivers/cell/ppu/cell_context.c        |    2 +-
 src/gallium/drivers/cell/ppu/cell_pipe_state.c     |    2 +-
 src/gallium/drivers/cell/ppu/cell_render.c         |    2 +-
 src/gallium/drivers/cell/ppu/cell_screen.c         |    2 +-
 src/gallium/drivers/cell/ppu/cell_state_derived.c  |    2 +-
 src/gallium/drivers/cell/ppu/cell_state_emit.c     |    2 +-
 src/gallium/drivers/cell/ppu/cell_state_shader.c   |    2 +-
 src/gallium/drivers/cell/ppu/cell_surface.c        |    2 +-
 src/gallium/drivers/cell/ppu/cell_texture.c        |    2 +-
 src/gallium/drivers/cell/ppu/cell_winsys.c         |    2 +-
 src/gallium/drivers/cell/spu/spu_exec.c            |    1 -
 src/gallium/drivers/cell/spu/spu_tri.c             |    1 -
 src/gallium/drivers/cell/spu/spu_util.c            |    1 -
 src/gallium/drivers/cell/spu/spu_vertex_fetch.c    |    1 -
 src/gallium/drivers/cell/spu/spu_vertex_shader.c   |    1 -
 src/gallium/drivers/failover/fo_context.c          |    2 +-
 src/gallium/drivers/i915simple/i915_context.c      |    2 +-
 src/gallium/drivers/i915simple/i915_debug_fp.c     |    2 +-
 src/gallium/drivers/i915simple/i915_fpc.h          |    1 -
 .../drivers/i915simple/i915_fpc_translate.c        |    2 +
 src/gallium/drivers/i915simple/i915_prim_emit.c    |    4 +-
 src/gallium/drivers/i915simple/i915_prim_vbuf.c    |    3 +-
 src/gallium/drivers/i915simple/i915_screen.c       |    2 +-
 src/gallium/drivers/i915simple/i915_state.c        |    3 +-
 .../drivers/i915simple/i915_state_derived.c        |    2 +-
 .../drivers/i915simple/i915_state_dynamic.c        |    4 +-
 .../drivers/i915simple/i915_state_immediate.c      |    2 +-
 .../drivers/i915simple/i915_state_sampler.c        |    2 +-
 src/gallium/drivers/i915simple/i915_surface.c      |    3 +-
 src/gallium/drivers/i915simple/i915_texture.c      |    3 +-
 src/gallium/drivers/i965simple/brw_cc.c            |    6 +-
 src/gallium/drivers/i965simple/brw_clip_state.c    |    3 +-
 src/gallium/drivers/i965simple/brw_context.c       |    2 +-
 src/gallium/drivers/i965simple/brw_curbe.c         |    3 +-
 src/gallium/drivers/i965simple/brw_draw_upload.c   |    1 +
 src/gallium/drivers/i965simple/brw_gs_state.c      |    3 +-
 src/gallium/drivers/i965simple/brw_screen.c        |    2 +-
 src/gallium/drivers/i965simple/brw_sf_state.c      |    5 +-
 src/gallium/drivers/i965simple/brw_shader_info.c   |    2 +-
 src/gallium/drivers/i965simple/brw_state.c         |    2 +-
 src/gallium/drivers/i965simple/brw_state_batch.c   |    2 +-
 src/gallium/drivers/i965simple/brw_state_cache.c   |    2 +-
 src/gallium/drivers/i965simple/brw_state_pool.c    |    3 +-
 src/gallium/drivers/i965simple/brw_state_upload.c  |    2 +-
 src/gallium/drivers/i965simple/brw_surface.c       |    3 +-
 src/gallium/drivers/i965simple/brw_tex_layout.c    |    8 +-
 src/gallium/drivers/i965simple/brw_vs_state.c      |    3 +-
 src/gallium/drivers/i965simple/brw_wm.c            |    2 +-
 src/gallium/drivers/i965simple/brw_wm_decl.c       |    3 +-
 src/gallium/drivers/i965simple/brw_wm_glsl.c       |    3 +-
 .../drivers/i965simple/brw_wm_sampler_state.c      |    3 +-
 src/gallium/drivers/i965simple/brw_wm_state.c      |    3 +-
 src/gallium/drivers/softpipe/sp_context.c          |    2 +-
 src/gallium/drivers/softpipe/sp_fs_exec.c          |    2 +-
 src/gallium/drivers/softpipe/sp_fs_llvm.c          |    2 +-
 src/gallium/drivers/softpipe/sp_fs_sse.c           |    2 +-
 src/gallium/drivers/softpipe/sp_prim_setup.c       |    2 +-
 src/gallium/drivers/softpipe/sp_prim_vbuf.c        |    1 +
 src/gallium/drivers/softpipe/sp_quad_alpha_test.c  |    2 +-
 src/gallium/drivers/softpipe/sp_quad_blend.c       |   29 +-
 src/gallium/drivers/softpipe/sp_quad_bufloop.c     |    2 +-
 src/gallium/drivers/softpipe/sp_quad_colormask.c   |    3 +-
 src/gallium/drivers/softpipe/sp_quad_coverage.c    |    2 +-
 src/gallium/drivers/softpipe/sp_quad_depth_test.c  |    2 +-
 src/gallium/drivers/softpipe/sp_quad_earlyz.c      |    2 +-
 src/gallium/drivers/softpipe/sp_quad_fs.c          |    3 +-
 src/gallium/drivers/softpipe/sp_quad_occlusion.c   |    2 +-
 src/gallium/drivers/softpipe/sp_quad_output.c      |    2 +-
 src/gallium/drivers/softpipe/sp_quad_stencil.c     |    2 +-
 src/gallium/drivers/softpipe/sp_quad_stipple.c     |    2 +-
 src/gallium/drivers/softpipe/sp_query.c            |    2 +-
 src/gallium/drivers/softpipe/sp_screen.c           |    2 +-
 src/gallium/drivers/softpipe/sp_setup.c            |    2 +-
 src/gallium/drivers/softpipe/sp_state_blend.c      |    2 +-
 src/gallium/drivers/softpipe/sp_state_derived.c    |    3 +-
 src/gallium/drivers/softpipe/sp_state_fs.c         |    2 +-
 src/gallium/drivers/softpipe/sp_state_rasterizer.c |    2 +-
 src/gallium/drivers/softpipe/sp_state_sampler.c    |    2 +-
 src/gallium/drivers/softpipe/sp_surface.c          |    3 +-
 src/gallium/drivers/softpipe/sp_tex_sample.c       |    2 +-
 src/gallium/drivers/softpipe/sp_texture.c          |    3 +-
 src/gallium/drivers/softpipe/sp_tile_cache.c       |    4 +-
 src/gallium/drivers/trace/tr_context.c             |    2 +-
 src/gallium/drivers/trace/tr_dump.c                |    2 +
 src/gallium/drivers/trace/tr_dump.h                |    1 -
 src/gallium/drivers/trace/tr_screen.c              |    2 +-
 src/gallium/drivers/trace/tr_state.c               |    1 +
 src/gallium/drivers/trace/tr_stream_stdc.c         |    2 +-
 src/gallium/drivers/trace/tr_stream_wd.c           |    2 +-
 src/gallium/drivers/trace/tr_texture.c             |    2 +-
 src/gallium/drivers/trace/tr_winsys.c              |    3 +-
 src/gallium/include/pipe/p_util.h                  |  460 --------
 src/gallium/state_trackers/python/gallium.i        |    2 +-
 src/gallium/state_trackers/python/st_device.c      |    3 +-
 src/gallium/state_trackers/python/st_sample.c      |    5 +-
 .../state_trackers/python/st_softpipe_winsys.c     |    3 +-
 .../winsys/drm/intel/common/intel_be_device.c      |    2 +-
 .../winsys/drm/intel/dri/intel_winsys_softpipe.c   |    2 +-
 src/gallium/winsys/egl_xlib/egl_xlib.c             |    2 +-
 src/gallium/winsys/egl_xlib/sw_winsys.c            |    3 +-
 src/gallium/winsys/gdi/wmesa.c                     |    2 +-
 src/gallium/winsys/xlib/brw_aub.c                  |    1 -
 src/gallium/winsys/xlib/xm_winsys.c                |    3 +-
 src/gallium/winsys/xlib/xm_winsys_aub.c            |    2 +-
 src/mesa/state_tracker/acc2.c                      |  319 ++++++
 src/mesa/state_tracker/st_cb_accum.c               |    2 +-
 src/mesa/state_tracker/st_cb_bitmap.c              |    2 +-
 src/mesa/state_tracker/st_cb_drawpixels.c          |    2 +-
 src/mesa/state_tracker/st_cb_readpixels.c          |    2 +-
 src/mesa/state_tracker/st_cb_texture.c             |    2 +-
 src/mesa/state_tracker/st_program.c                |    2 +-
 src/mesa/state_tracker/st_texture.c                |    1 -
 201 files changed, 2453 insertions(+), 686 deletions(-)
 create mode 100644 src/gallium/auxiliary/util/u_memory.h
 create mode 100644 src/gallium/auxiliary/util/u_pointer.h
 create mode 100644 src/gallium/auxiliary/util/u_tile.c
 create mode 100644 src/gallium/auxiliary/util/u_tile.h
 delete mode 100644 src/gallium/include/pipe/p_util.h
 create mode 100644 src/mesa/state_tracker/acc2.c

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/README.portability b/src/gallium/README.portability
index d5d5987a7ff..adecf4bb798 100644
--- a/src/gallium/README.portability
+++ b/src/gallium/README.portability
@@ -35,8 +35,8 @@ not available in Windows Kernel Mode. Use the appropriate p_*.h include.
 
 * Use MALLOC, CALLOC, FREE instead of the malloc, calloc, free functions.
 
-* Use align_pointer() function defined in p_util.h for aligning pointers in a
-portable way.
+* Use align_pointer() function defined in u_memory.h for aligning pointers
+ in a portable way.
 
 == Debugging ==
 
diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c
index 36dc46ff809..6b1754ea002 100644
--- a/src/gallium/auxiliary/cso_cache/cso_cache.c
+++ b/src/gallium/auxiliary/cso_cache/cso_cache.c
@@ -28,9 +28,10 @@
 /* Authors:  Zack Rusin <zack@tungstengraphics.com>
  */
 
-#include "pipe/p_util.h"
 #include "pipe/p_debug.h"
 
+#include "util/u_memory.h"
+
 #include "cso_cache.h"
 #include "cso_hash.h"
 
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c
index 86e4d46a200..f22ba40824d 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -36,7 +36,7 @@
   */
 
 #include "pipe/p_state.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "pipe/p_inlines.h"
 #include "tgsi/tgsi_parse.h"
 
diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.c b/src/gallium/auxiliary/cso_cache/cso_hash.c
index 0646efd9527..7f0044c5a7f 100644
--- a/src/gallium/auxiliary/cso_cache/cso_hash.c
+++ b/src/gallium/auxiliary/cso_cache/cso_hash.c
@@ -31,7 +31,7 @@
   */
 
 #include "pipe/p_debug.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 
 #include "cso_hash.h"
 
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
index 2f263cf06a9..1c26cb31a39 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -31,7 +31,8 @@
   */
 
 
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
+#include "util/u_math.h"
 #include "draw_context.h"
 #include "draw_vbuf.h"
 #include "draw_vs.h"
diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c
index 1db43876efb..3cde9d36d3a 100644
--- a/src/gallium/auxiliary/draw/draw_pipe.c
+++ b/src/gallium/auxiliary/draw/draw_pipe.c
@@ -30,7 +30,6 @@
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
 
-#include "pipe/p_util.h"
 #include "draw/draw_private.h"
 #include "draw/draw_pipe.h"
 
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
index 991304b2c84..20841bb5d69 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -32,11 +32,12 @@
  */
 
 
-#include "pipe/p_util.h"
 #include "pipe/p_inlines.h"
 #include "pipe/p_context.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_shader_tokens.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
 
 #include "tgsi/tgsi_transform.h"
 #include "tgsi/tgsi_dump.h"
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
index c7f4349cb3e..2c1cacbdb47 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
@@ -38,7 +38,6 @@
  */
 
 
-#include "pipe/p_util.h"
 #include "pipe/p_inlines.h"
 #include "pipe/p_context.h"
 #include "pipe/p_defines.h"
@@ -47,6 +46,9 @@
 #include "tgsi/tgsi_transform.h"
 #include "tgsi/tgsi_dump.h"
 
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
 #include "draw_context.h"
 #include "draw_vs.h"
 #include "draw_pipe.h"
diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c
index fa10f8efca2..3265dcd154a 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_clip.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c
@@ -32,7 +32,9 @@
  */
 
 
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
 #include "pipe/p_shader_tokens.h"
 
 #include "draw_vs.h"
diff --git a/src/gallium/auxiliary/draw/draw_pipe_cull.c b/src/gallium/auxiliary/draw/draw_pipe_cull.c
index d0d22a38e07..053be5f050d 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_cull.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_cull.c
@@ -33,7 +33,7 @@
  */
 
 
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "pipe/p_defines.h"
 #include "draw_pipe.h"
 
diff --git a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c
index 4741b22d023..43d1fecc4dd 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c
@@ -28,7 +28,9 @@
 /* Authors:  Keith Whitwell <keith@tungstengraphics.com>
  */
 
-#include "pipe/p_util.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
 #include "pipe/p_shader_tokens.h"
 #include "draw_vs.h"
 #include "draw_pipe.h"
diff --git a/src/gallium/auxiliary/draw/draw_pipe_offset.c b/src/gallium/auxiliary/draw/draw_pipe_offset.c
index 2f5865741c1..1fea5e6dcbc 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_offset.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_offset.c
@@ -32,7 +32,8 @@
  * \author  Brian Paul
  */
 
-#include "pipe/p_util.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
 #include "draw_pipe.h"
 
 
diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
index e97136fa1f8..b764d9c518c 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -34,12 +34,14 @@
  */
 
 
-#include "pipe/p_util.h"
 #include "pipe/p_inlines.h"
 #include "pipe/p_context.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_shader_tokens.h"
 
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
 #include "tgsi/tgsi_transform.h"
 #include "tgsi/tgsi_dump.h"
 
diff --git a/src/gallium/auxiliary/draw/draw_pipe_stipple.c b/src/gallium/auxiliary/draw/draw_pipe_stipple.c
index bf0db18a684..b65e2aa1021 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_stipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_stipple.c
@@ -36,10 +36,12 @@
  */
 
 
-#include "pipe/p_util.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_shader_tokens.h"
-#include "draw_pipe.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "draw/draw_pipe.h"
 
 
 /** Subclass of draw_stage */
diff --git a/src/gallium/auxiliary/draw/draw_pipe_twoside.c b/src/gallium/auxiliary/draw/draw_pipe_twoside.c
index 3ac825f5656..c329d923390 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_twoside.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_twoside.c
@@ -28,7 +28,8 @@
 /* Authors:  Keith Whitwell <keith@tungstengraphics.com>
  */
 
-#include "pipe/p_util.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_shader_tokens.h"
 #include "draw_vs.h"
diff --git a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
index 8f97fdedaac..68835fd1a59 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
@@ -33,7 +33,7 @@
 /* Authors:  Keith Whitwell <keith@tungstengraphics.com>
  */
 
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "pipe/p_defines.h"
 #include "draw_private.h"
 #include "draw_pipe.h"
diff --git a/src/gallium/auxiliary/draw/draw_pipe_util.c b/src/gallium/auxiliary/draw/draw_pipe_util.c
index 04438f4dd08..e22e5fed0c6 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_util.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_util.c
@@ -30,7 +30,7 @@
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
 
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "draw/draw_private.h"
 #include "draw/draw_pipe.h"
 
diff --git a/src/gallium/auxiliary/draw/draw_pipe_validate.c b/src/gallium/auxiliary/draw/draw_pipe_validate.c
index 6be1d369c33..f34c68728ef 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_validate.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_validate.c
@@ -28,7 +28,7 @@
 /* Authors:  Keith Whitwell <keith@tungstengraphics.com>
  */
 
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "pipe/p_defines.h"
 #include "draw_private.h"
 #include "draw_pipe.h"
diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
index a6fde77a0ed..c0cf4269dbb 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
@@ -35,7 +35,8 @@
 
 
 #include "pipe/p_debug.h"
-#include "pipe/p_util.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
 
 #include "draw_vbuf.h"
 #include "draw_private.h"
diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c
index 48ec2f1239c..184e363594d 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c
@@ -28,9 +28,10 @@
 /* Authors:  Keith Whitwell <keith@tungstengraphics.com>
  */
 
-#include "pipe/p_util.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_shader_tokens.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
 #include "draw_private.h"
 #include "draw_pipe.h"
 
diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
index 54590984c6b..4f1326053df 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
@@ -28,7 +28,8 @@
 /* Authors:  Keith Whitwell <keith@tungstengraphics.com>
  */
 
-#include "pipe/p_util.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_shader_tokens.h"
 #include "draw_vs.h"
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index 85a75525c8b..669c11c993c 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -30,7 +30,6 @@
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
 
-#include "pipe/p_util.h"
 #include "draw/draw_context.h"
 #include "draw/draw_private.h"
 #include "draw/draw_pt.h"
diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c
index 40f05cb9e0b..d4eca80588b 100644
--- a/src/gallium/auxiliary/draw/draw_pt_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_emit.c
@@ -25,7 +25,7 @@
  *
  **************************************************************************/
 
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "draw/draw_context.h"
 #include "draw/draw_private.h"
 #include "draw/draw_vbuf.h"
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c
index 07f4c991642..6377f896fb0 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c
@@ -25,7 +25,7 @@
  *
  **************************************************************************/
 
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "draw/draw_context.h"
 #include "draw/draw_private.h"
 #include "draw/draw_vbuf.h"
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
index 4a1f3b09536..0684c93d102 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
@@ -30,7 +30,7 @@
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
 
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "draw/draw_context.h"
 #include "draw/draw_private.h"
 #include "draw/draw_vbuf.h"
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
index fdf9b6fe6a8..87094f30924 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
@@ -31,7 +31,8 @@
   */
 
 
-#include "pipe/p_util.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
 #include "draw/draw_context.h"
 #include "draw/draw_private.h"
 #include "draw/draw_vbuf.h"
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
index be3535ed9e1..f617aac9f79 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
@@ -25,7 +25,8 @@
  *
  **************************************************************************/
 
-#include "pipe/p_util.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
 #include "draw/draw_context.h"
 #include "draw/draw_vbuf.h"
 #include "draw/draw_vertex.h"
diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c
index af6306b1c67..96dc706b998 100644
--- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c
+++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c
@@ -25,7 +25,7 @@
  *
  **************************************************************************/
 
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "pipe/p_context.h"
 #include "draw/draw_context.h"
 #include "draw/draw_private.h"
diff --git a/src/gallium/auxiliary/draw/draw_pt_util.c b/src/gallium/auxiliary/draw/draw_pt_util.c
index 32c8a9632cb..3bc7939c556 100644
--- a/src/gallium/auxiliary/draw/draw_pt_util.c
+++ b/src/gallium/auxiliary/draw/draw_pt_util.c
@@ -30,7 +30,6 @@
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
 
-#include "pipe/p_util.h"
 #include "draw/draw_context.h"
 #include "draw/draw_private.h"
 #include "draw/draw_pt.h"
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c
index 46e722a154e..c15afe65f1a 100644
--- a/src/gallium/auxiliary/draw/draw_pt_varray.c
+++ b/src/gallium/auxiliary/draw/draw_pt_varray.c
@@ -25,7 +25,9 @@
  *
  **************************************************************************/
 
-#include "pipe/p_util.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
 #include "draw/draw_context.h"
 #include "draw/draw_private.h"
 #include "draw/draw_pt.h"
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c
index cda2987c9ed..b8b5de729d2 100644
--- a/src/gallium/auxiliary/draw/draw_pt_vcache.c
+++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c
@@ -30,7 +30,7 @@
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
 
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "draw/draw_context.h"
 #include "draw/draw_private.h"
 #include "draw/draw_pt.h"
diff --git a/src/gallium/auxiliary/draw/draw_vbuf.h b/src/gallium/auxiliary/draw/draw_vbuf.h
index e90f37872a1..62247ccd9f9 100644
--- a/src/gallium/auxiliary/draw/draw_vbuf.h
+++ b/src/gallium/auxiliary/draw/draw_vbuf.h
@@ -37,8 +37,6 @@
 #define DRAW_VBUF_H_
 
 
-#include "pipe/p_util.h"
-
 
 struct draw_context;
 struct vertex_info;
diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c
index f798b204929..34adbd49b00 100644
--- a/src/gallium/auxiliary/draw/draw_vs.c
+++ b/src/gallium/auxiliary/draw/draw_vs.c
@@ -31,11 +31,15 @@
   *   Brian Paul
   */
 
-#include "pipe/p_util.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
 #include "pipe/p_shader_tokens.h"
+
 #include "draw_private.h"
 #include "draw_context.h"
 #include "draw_vs.h"
+
 #include "translate/translate.h"
 #include "translate/translate_cache.h"
 
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index 41bdd012d56..760fcb389fc 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -29,9 +29,9 @@
  */
 
 
-#include "pipe/p_util.h"
-#include "pipe/p_shader_tokens.h"
+#include "util/u_memory.h"
 #include "util/u_math.h"
+#include "pipe/p_shader_tokens.h"
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_util.h"
 #include "tgsi/tgsi_exec.h"
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c
index eda677cc62c..ab3c5b94a50 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c
@@ -26,7 +26,7 @@
  **************************************************************************/
 
 
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "pipe/p_shader_tokens.h"
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_util.h"
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_machine.c b/src/gallium/auxiliary/draw/draw_vs_aos_machine.c
index e029b7b4bb5..b358bd2df47 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos_machine.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos_machine.c
@@ -29,8 +29,9 @@
 #include "pipe/p_config.h"
 
 
-#include "pipe/p_util.h"
 #include "pipe/p_shader_tokens.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_util.h"
 #include "tgsi/tgsi_exec.h"
diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c
index e26903d8cc5..44563803f90 100644
--- a/src/gallium/auxiliary/draw/draw_vs_exec.c
+++ b/src/gallium/auxiliary/draw/draw_vs_exec.c
@@ -31,7 +31,8 @@
   *   Brian Paul
   */
 
-#include "pipe/p_util.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
 #include "pipe/p_shader_tokens.h"
 
 #include "draw_private.h"
diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c
index fc03473b919..2ce30b9a02b 100644
--- a/src/gallium/auxiliary/draw/draw_vs_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c
@@ -32,7 +32,6 @@
   *   Brian Paul
   */
 
-#include "pipe/p_util.h"
 #include "pipe/p_shader_tokens.h"
 #include "draw_private.h"
 #include "draw_context.h"
diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c
index 61f0c084c38..0efabd9de8b 100644
--- a/src/gallium/auxiliary/draw/draw_vs_sse.c
+++ b/src/gallium/auxiliary/draw/draw_vs_sse.c
@@ -31,13 +31,14 @@
   *   Brian Paul
   */
 
+#include "util/u_math.h"
+#include "util/u_memory.h"
 #include "pipe/p_config.h"
 
 #include "draw_vs.h"
 
 #if defined(PIPE_ARCH_X86)
 
-#include "pipe/p_util.h"
 #include "pipe/p_shader_tokens.h"
 
 #include "draw_private.h"
diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c
index 994ce3e8896..4daf05dae7c 100644
--- a/src/gallium/auxiliary/draw/draw_vs_varient.c
+++ b/src/gallium/auxiliary/draw/draw_vs_varient.c
@@ -30,7 +30,8 @@
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
 
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
+#include "util/u_math.h"
 #include "draw/draw_context.h"
 #include "draw/draw_private.h"
 #include "draw/draw_vbuf.h"
diff --git a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp
index cf5b9788375..e64bfb1c6cb 100644
--- a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp
+++ b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp
@@ -41,11 +41,12 @@
 
 #include "pipe/p_context.h"
 #include "pipe/p_shader_tokens.h"
-#include "pipe/p_util.h"
 
 #include "tgsi/tgsi_exec.h"
 #include "tgsi/tgsi_dump.h"
 
+#include "util/u_memory.h"
+
 #include <llvm/Module.h>
 #include <llvm/CallingConv.h>
 #include <llvm/Constants.h>
diff --git a/src/gallium/auxiliary/gallivm/instructions.cpp b/src/gallium/auxiliary/gallivm/instructions.cpp
index 035224e8f3c..a82dc30306d 100644
--- a/src/gallium/auxiliary/gallivm/instructions.cpp
+++ b/src/gallium/auxiliary/gallivm/instructions.cpp
@@ -35,7 +35,7 @@
 
 #include "storage.h"
 
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 
 #include <llvm/CallingConv.h>
 #include <llvm/Constants.h>
diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp
index 76049ade7c0..efddc04e818 100644
--- a/src/gallium/auxiliary/gallivm/instructionssoa.cpp
+++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp
@@ -29,7 +29,7 @@
 #include "storagesoa.h"
 
 #include "pipe/p_shader_tokens.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 
 #include <llvm/CallingConv.h>
 #include <llvm/Constants.h>
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
index ce41418a0f9..8ae052e8750 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
@@ -45,7 +45,7 @@
 #include "pipe/p_debug.h"
 #include "pipe/p_winsys.h"
 #include "pipe/p_thread.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "util/u_double_list.h"
 
 #include "pb_buffer.h"
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c
index e90d2e56231..20fc87b39d2 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c
@@ -35,7 +35,7 @@
 
 
 #include "pipe/p_debug.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "pb_buffer.h"
 #include "pb_bufmgr.h"
 
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c
index 0d2d6c0c1b2..2afaeafa1a1 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c
@@ -35,7 +35,7 @@
 
 #include "pipe/p_compiler.h"
 #include "pipe/p_debug.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 
 #include "pb_buffer.h"
 #include "pb_bufmgr.h"
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
index bed4bec4fe7..b914c2d0fea 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
@@ -38,7 +38,7 @@
 #include "pipe/p_debug.h"
 #include "pipe/p_winsys.h"
 #include "pipe/p_thread.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "util/u_double_list.h"
 #include "util/u_time.h"
 
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
index d02e3500fff..5e518370d0e 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
@@ -37,7 +37,7 @@
 #include "pipe/p_debug.h"
 #include "pipe/p_winsys.h"
 #include "pipe/p_thread.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "util/u_double_list.h"
 #include "util/u_time.h"
 
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c
index 05efd8ce41a..8fc63ce648c 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c
@@ -35,7 +35,7 @@
 
 
 #include "pipe/p_debug.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 
 #include "pb_buffer.h"
 #include "pb_buffer_fenced.h"
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
index c51e582611d..b40eb6cc90c 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
@@ -36,7 +36,7 @@
 #include "pipe/p_defines.h"
 #include "pipe/p_debug.h"
 #include "pipe/p_thread.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "util/u_double_list.h"
 #include "util/u_mm.h"
 #include "pb_buffer.h"
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c
index 95af08929a9..93d2cc96351 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c
@@ -39,7 +39,7 @@
 #include "pipe/p_debug.h"
 #include "pipe/p_thread.h"
 #include "pipe/p_defines.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "util/u_double_list.h"
 
 #include "pb_buffer.h"
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
index 598d9ce3105..af307e265a2 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
@@ -39,7 +39,7 @@
 #include "pipe/p_debug.h"
 #include "pipe/p_thread.h"
 #include "pipe/p_defines.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "util/u_double_list.h"
 #include "util/u_time.h"
 
diff --git a/src/gallium/auxiliary/pipebuffer/pb_validate.c b/src/gallium/auxiliary/pipebuffer/pb_validate.c
index 362fd896f31..1e54fc39d44 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_validate.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_validate.c
@@ -35,7 +35,7 @@
 
 #include "pipe/p_compiler.h"
 #include "pipe/p_error.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "pipe/p_debug.h"
 
 #include "pb_buffer.h"
diff --git a/src/gallium/auxiliary/pipebuffer/pb_winsys.c b/src/gallium/auxiliary/pipebuffer/pb_winsys.c
index 978944091fd..28d137dbc43 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_winsys.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_winsys.c
@@ -35,7 +35,7 @@
 
 
 #include "pipe/p_winsys.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 
 #include "pb_buffer.h"
 
diff --git a/src/gallium/auxiliary/rtasm/rtasm_execmem.c b/src/gallium/auxiliary/rtasm/rtasm_execmem.c
index 300c1c2d9d7..dfa5c35ab6a 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_execmem.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_execmem.c
@@ -33,7 +33,7 @@
 #include "pipe/p_compiler.h"
 #include "pipe/p_debug.h"
 #include "pipe/p_thread.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 
 #include "rtasm_execmem.h"
 
diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
index 7f6bf577b26..285ddc0e3f3 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
@@ -30,7 +30,7 @@
  */
 
 #include "pipe/p_compiler.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "rtasm_ppc_spe.h"
 
 #ifdef GALLIUM_CELL
diff --git a/src/gallium/auxiliary/sct/sct.c b/src/gallium/auxiliary/sct/sct.c
index 5e4126e014d..49bb7ea92e2 100644
--- a/src/gallium/auxiliary/sct/sct.c
+++ b/src/gallium/auxiliary/sct/sct.c
@@ -26,7 +26,7 @@
  **************************************************************************/
 
 
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "pipe/p_state.h"
 #include "pipe/p_inlines.h"
 #include "sct.h"
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c
index 050b448fe7c..74614d36884 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.c
@@ -26,7 +26,6 @@
  **************************************************************************/
 
 #include "pipe/p_debug.h"
-#include "pipe/p_util.h"
 #include "pipe/p_shader_tokens.h"
 #include "tgsi_build.h"
 #include "tgsi_parse.h"
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h b/src/gallium/auxiliary/tgsi/tgsi_build.h
index 6ae7f324f85..7d6234746a2 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.h
@@ -28,6 +28,10 @@
 #ifndef TGSI_BUILD_H
 #define TGSI_BUILD_H
 
+
+struct tgsi_token;
+
+
 #if defined __cplusplus
 extern "C" {
 #endif
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
index 1025866a252..be25cb45a0a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
@@ -26,7 +26,6 @@
  **************************************************************************/
 
 #include "pipe/p_debug.h"
-#include "pipe/p_util.h"
 #include "util/u_string.h"
 #include "tgsi_dump_c.h"
 #include "tgsi_build.h"
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index e28b56c842f..fb573fe1f0c 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -52,11 +52,11 @@
 
 #include "pipe/p_compiler.h"
 #include "pipe/p_state.h"
-#include "pipe/p_util.h"
 #include "pipe/p_shader_tokens.h"
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_util.h"
 #include "tgsi_exec.h"
+#include "util/u_memory.h"
 #include "util/u_math.h"
 
 #define FAST_MATH 1
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c
index d16f0cdcad4..3757486ba9b 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c
@@ -26,10 +26,10 @@
  **************************************************************************/
 
 #include "pipe/p_debug.h"
-#include "pipe/p_util.h"
 #include "pipe/p_shader_tokens.h"
 #include "tgsi_parse.h"
 #include "tgsi_build.h"
+#include "util/u_memory.h"
 
 void
 tgsi_full_token_init(
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index 59bcf10b530..be4870a4983 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -33,11 +33,11 @@
  */
 
 
-#include "tgsi_scan.h"
-#include "tgsi/tgsi_parse.h"
+#include "util/u_math.h"
 #include "tgsi/tgsi_build.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_scan.h"
 
-#include "pipe/p_util.h"
 
 
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index 00ed4da4507..626724ad4e4 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -25,7 +25,7 @@
  * 
  **************************************************************************/
 
-#include "pipe/p_util.h"
+#include "pipe/p_debug.h"
 #include "pipe/p_shader_tokens.h"
 #include "util/u_math.h"
 #include "tgsi/tgsi_parse.h"
diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.c b/src/gallium/auxiliary/tgsi/tgsi_transform.c
index 357f77b05a6..ea87da31e50 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_transform.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_transform.c
@@ -31,6 +31,7 @@
  * Authors:  Brian Paul
  */
 
+#include "pipe/p_debug.h"
 
 #include "tgsi_transform.h"
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.h b/src/gallium/auxiliary/tgsi/tgsi_transform.h
index 3da0b382711..a121adbaef4 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_transform.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_transform.h
@@ -29,7 +29,6 @@
 #define TGSI_TRANSFORM_H
 
 
-#include "pipe/p_util.h"
 #include "pipe/p_shader_tokens.h"
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_build.h"
diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c
index 09486e649e1..50101a9bb0c 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_util.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_util.c
@@ -26,7 +26,6 @@
  **************************************************************************/
 
 #include "pipe/p_debug.h"
-#include "pipe/p_util.h"
 #include "pipe/p_shader_tokens.h"
 #include "tgsi_parse.h"
 #include "tgsi_build.h"
diff --git a/src/gallium/auxiliary/translate/translate.c b/src/gallium/auxiliary/translate/translate.c
index b93fbf9033e..7678903f75c 100644
--- a/src/gallium/auxiliary/translate/translate.c
+++ b/src/gallium/auxiliary/translate/translate.c
@@ -31,7 +31,6 @@
   */
 
 #include "pipe/p_config.h"
-#include "pipe/p_util.h"
 #include "pipe/p_state.h"
 #include "translate.h"
 
diff --git a/src/gallium/auxiliary/translate/translate_cache.c b/src/gallium/auxiliary/translate/translate_cache.c
index 115dc9287e0..d8069a149cf 100644
--- a/src/gallium/auxiliary/translate/translate_cache.c
+++ b/src/gallium/auxiliary/translate/translate_cache.c
@@ -25,7 +25,7 @@
  *
  **************************************************************************/
 
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "pipe/p_state.h"
 #include "translate.h"
 #include "translate_cache.h"
diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c
index 4c8179ffa87..4d336f47ea3 100644
--- a/src/gallium/auxiliary/translate/translate_generic.c
+++ b/src/gallium/auxiliary/translate/translate_generic.c
@@ -30,7 +30,7 @@
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
 
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "pipe/p_state.h"
 #include "translate.h"
 
diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c
index 18a212ac1cf..7955186e168 100644
--- a/src/gallium/auxiliary/translate/translate_sse.c
+++ b/src/gallium/auxiliary/translate/translate_sse.c
@@ -28,7 +28,7 @@
 
 #include "pipe/p_config.h"
 #include "pipe/p_compiler.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "util/u_simple_list.h"
 
 #include "translate.h"
diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile
index 6eebf6d29b6..6e5fd26c057 100644
--- a/src/gallium/auxiliary/util/Makefile
+++ b/src/gallium/auxiliary/util/Makefile
@@ -5,7 +5,6 @@ LIBNAME = util
 
 C_SOURCES = \
 	p_debug.c \
-	p_tile.c \
 	u_blit.c \
 	u_draw_quad.c \
 	u_gen_mipmap.c \
@@ -16,6 +15,7 @@ C_SOURCES = \
 	u_rect.c \
 	u_simple_shaders.c \
 	u_snprintf.c \
+	u_tile.c \
 	u_time.c
 
 include ../../Makefile.template
diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript
index 94382fe1f9e..ce3fad7068e 100644
--- a/src/gallium/auxiliary/util/SConscript
+++ b/src/gallium/auxiliary/util/SConscript
@@ -6,7 +6,6 @@ util = env.ConvenienceLibrary(
 		'p_debug.c',
 		'p_debug_mem.c',
 		'p_debug_prof.c',
-		'p_tile.c',
 		'u_blit.c',
 		'u_draw_quad.c',
 		'u_gen_mipmap.c',
@@ -17,6 +16,7 @@ util = env.ConvenienceLibrary(
 		'u_rect.c',
 		'u_simple_shaders.c',
 		'u_snprintf.c',
+		'u_tile.c',
 		'u_time.c',
 	])
 
diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c
index 2c2f2f8931f..7d1dba5a247 100644
--- a/src/gallium/auxiliary/util/p_debug.c
+++ b/src/gallium/auxiliary/util/p_debug.c
@@ -51,7 +51,6 @@
 #endif
 
 #include "pipe/p_compiler.h" 
-#include "pipe/p_util.h" 
 #include "pipe/p_debug.h" 
 #include "pipe/p_format.h" 
 #include "pipe/p_state.h" 
diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c
index ae087df4cf7..05399f9885e 100644
--- a/src/gallium/auxiliary/util/u_blit.c
+++ b/src/gallium/auxiliary/util/u_blit.c
@@ -37,12 +37,13 @@
 #include "pipe/p_debug.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_inlines.h"
-#include "pipe/p_util.h"
 #include "pipe/p_winsys.h"
 #include "pipe/p_shader_tokens.h"
 
-#include "util/u_draw_quad.h"
 #include "util/u_blit.h"
+#include "util/u_draw_quad.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
 #include "util/u_simple_shaders.h"
 
 #include "cso_cache/cso_context.h"
diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c
index 8713ff5d584..c1e2c19f877 100644
--- a/src/gallium/auxiliary/util/u_gen_mipmap.c
+++ b/src/gallium/auxiliary/util/u_gen_mipmap.c
@@ -37,10 +37,10 @@
 #include "pipe/p_debug.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_inlines.h"
-#include "pipe/p_util.h"
 #include "pipe/p_winsys.h"
 #include "pipe/p_shader_tokens.h"
 
+#include "util/u_memory.h"
 #include "util/u_draw_quad.h"
 #include "util/u_gen_mipmap.h"
 #include "util/u_simple_shaders.h"
diff --git a/src/gallium/auxiliary/util/u_handle_table.c b/src/gallium/auxiliary/util/u_handle_table.c
index 2176a009592..2c40011923d 100644
--- a/src/gallium/auxiliary/util/u_handle_table.c
+++ b/src/gallium/auxiliary/util/u_handle_table.c
@@ -35,9 +35,9 @@
 
 #include "pipe/p_compiler.h"
 #include "pipe/p_debug.h"
-#include "pipe/p_util.h"
 
-#include "u_handle_table.h"
+#include "util/u_memory.h"
+#include "util/u_handle_table.h"
 
 
 #define HANDLE_TABLE_INITIAL_SIZE 16  
diff --git a/src/gallium/auxiliary/util/u_hash_table.c b/src/gallium/auxiliary/util/u_hash_table.c
index dd5eca7fca3..0bc8de9632c 100644
--- a/src/gallium/auxiliary/util/u_hash_table.c
+++ b/src/gallium/auxiliary/util/u_hash_table.c
@@ -40,10 +40,11 @@
 
 #include "pipe/p_compiler.h"
 #include "pipe/p_debug.h"
-#include "pipe/p_util.h"
 
 #include "cso_cache/cso_hash.h"
-#include "u_hash_table.h"
+
+#include "util/u_memory.h"
+#include "util/u_hash_table.h"
 
 
 struct hash_table
diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h
index a541d30a5d7..9b4ca393714 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -40,8 +40,6 @@
 
 
 #include "pipe/p_compiler.h"
-#include "pipe/p_util.h"
-#include "util/u_math.h"
 
 
 #ifdef __cplusplus
@@ -49,6 +47,132 @@ extern "C" {
 #endif
 
 
+#if defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT)
+__inline double ceil(double val)
+{
+   double ceil_val;
+
+   if((val - (long) val) == 0) {
+      ceil_val = val;
+   }
+   else {
+      if(val > 0) {
+         ceil_val = (long) val + 1;
+      }
+      else {
+         ceil_val = (long) val;
+      }
+   }
+
+   return ceil_val;
+}
+
+#ifndef PIPE_SUBSYSTEM_WINDOWS_CE
+__inline double floor(double val)
+{
+   double floor_val;
+
+   if((val - (long) val) == 0) {
+      floor_val = val;
+   }
+   else {
+      if(val > 0) {
+         floor_val = (long) val;
+      }
+      else {
+         floor_val = (long) val - 1;
+      }
+   }
+
+   return floor_val;
+}
+#endif
+
+#pragma function(pow)
+__inline double __cdecl pow(double val, double exponent)
+{
+   /* XXX */
+   assert(0);
+   return 0;
+}
+
+#pragma function(log)
+__inline double __cdecl log(double val)
+{
+   /* XXX */
+   assert(0);
+   return 0;
+}
+
+#pragma function(atan2)
+__inline double __cdecl atan2(double val)
+{
+   /* XXX */
+   assert(0);
+   return 0;
+}
+#else
+#include <math.h>
+#include <stdarg.h>
+#endif
+
+
+#if defined(_MSC_VER) 
+#if _MSC_VER < 1400 && !defined(__cplusplus) || defined(PIPE_SUBSYSTEM_WINDOWS_CE)
+ 
+static INLINE float cosf( float f ) 
+{
+   return (float) cos( (double) f );
+}
+
+static INLINE float sinf( float f ) 
+{
+   return (float) sin( (double) f );
+}
+
+static INLINE float ceilf( float f ) 
+{
+   return (float) ceil( (double) f );
+}
+
+static INLINE float floorf( float f ) 
+{
+   return (float) floor( (double) f );
+}
+
+static INLINE float powf( float f, float g ) 
+{
+   return (float) pow( (double) f, (double) g );
+}
+
+static INLINE float sqrtf( float f ) 
+{
+   return (float) sqrt( (double) f );
+}
+
+static INLINE float fabsf( float f ) 
+{
+   return (float) fabs( (double) f );
+}
+
+static INLINE float logf( float f ) 
+{
+   return (float) log( (double) f );
+}
+
+#else
+/* Work-around an extra semi-colon in VS 2005 logf definition */
+#ifdef logf
+#undef logf
+#define logf(x) ((float)log((double)(x)))
+#endif /* logf */
+#endif
+#endif /* _MSC_VER */
+
+
+
+
+
 #define POW2_TABLE_SIZE 256
 #define POW2_TABLE_SCALE ((float) (POW2_TABLE_SIZE-1))
 extern float pow2_table[POW2_TABLE_SIZE];
@@ -59,6 +183,11 @@ extern void
 util_init_math(void);
 
 
+union fi {
+   float f;
+   int i;
+   unsigned ui;
+};
 
 
 /**
@@ -195,6 +324,113 @@ util_iround(float f)
 
 
 
+#if defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86)
+/**
+ * Find first bit set in word.  Least significant bit is 1.
+ * Return 0 if no bits set.
+ */
+static INLINE
+unsigned ffs( unsigned u )
+{
+   unsigned i;
+
+   if( u == 0 ) {
+      return 0;
+   }
+
+   __asm bsf eax, [u]
+   __asm inc eax
+   __asm mov [i], eax
+
+   return i;
+}
+#endif
+
+
+/**
+ * Return float bits.
+ */
+static INLINE unsigned
+fui( float f )
+{
+   union fi fi;
+   fi.f = f;
+   return fi.ui;
+}
+
+
+
+static INLINE float
+ubyte_to_float(ubyte ub)
+{
+   return (float) ub * (1.0f / 255.0f);
+}
+
+
+/**
+ * Convert float in [0,1] to ubyte in [0,255] with clamping.
+ */
+static INLINE ubyte
+float_to_ubyte(float f)
+{
+   const int ieee_0996 = 0x3f7f0000;   /* 0.996 or so */
+   union fi tmp;
+
+   tmp.f = f;
+   if (tmp.i < 0) {
+      return (ubyte) 0;
+   }
+   else if (tmp.i >= ieee_0996) {
+      return (ubyte) 255;
+   }
+   else {
+      tmp.f = tmp.f * (255.0f/256.0f) + 32768.0f;
+      return (ubyte) tmp.i;
+   }
+}
+
+
+
+#define CLAMP( X, MIN, MAX )  ( (X)<(MIN) ? (MIN) : ((X)>(MAX) ? (MAX) : (X)) )
+
+#define MIN2( A, B )   ( (A)<(B) ? (A) : (B) )
+#define MAX2( A, B )   ( (A)>(B) ? (A) : (B) )
+
+
+static INLINE int
+align(int value, int alignment)
+{
+   return (value + alignment - 1) & ~(alignment - 1);
+}
+
+
+#ifndef COPY_4V
+#define COPY_4V( DST, SRC )         \
+do {                                \
+   (DST)[0] = (SRC)[0];             \
+   (DST)[1] = (SRC)[1];             \
+   (DST)[2] = (SRC)[2];             \
+   (DST)[3] = (SRC)[3];             \
+} while (0)
+#endif
+
+
+#ifndef COPY_4FV
+#define COPY_4FV( DST, SRC )  COPY_4V(DST, SRC)
+#endif
+
+
+#ifndef ASSIGN_4V
+#define ASSIGN_4V( DST, V0, V1, V2, V3 ) \
+do {                                     \
+   (DST)[0] = (V0);                      \
+   (DST)[1] = (V1);                      \
+   (DST)[2] = (V2);                      \
+   (DST)[3] = (V3);                      \
+} while (0)
+#endif
+
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/gallium/auxiliary/util/u_memory.h b/src/gallium/auxiliary/util/u_memory.h
new file mode 100644
index 00000000000..148a5cb9972
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_memory.h
@@ -0,0 +1,222 @@
+/**************************************************************************
+ * 
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+/**
+ * Memory functions
+ */
+
+
+#ifndef U_MEMORY_H
+#define U_MEMORY_H
+
+
+#include "util/u_pointer.h"
+
+
+ /* Define ENOMEM for WINCE */ 
+#if (_WIN32_WCE < 600)
+#ifndef ENOMEM
+#define ENOMEM 12
+#endif
+#endif
+
+
+
+#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) && defined(DEBUG) 
+
+/* memory debugging */
+
+#include "p_debug.h"
+
+#define MALLOC( _size ) \
+   debug_malloc( __FILE__, __LINE__, __FUNCTION__, _size )
+#define CALLOC( _count, _size ) \
+   debug_calloc(__FILE__, __LINE__, __FUNCTION__, _count, _size )
+#define FREE( _ptr ) \
+   debug_free( __FILE__, __LINE__, __FUNCTION__,  _ptr )
+#define REALLOC( _ptr, _old_size, _size ) \
+   debug_realloc( __FILE__, __LINE__, __FUNCTION__,  _ptr, _old_size, _size )
+
+#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY)
+
+void * __stdcall
+EngAllocMem(
+    unsigned long Flags,
+    unsigned long MemSize,
+    unsigned long Tag );
+
+void __stdcall
+EngFreeMem(
+    void *Mem );
+
+#define MALLOC( _size ) EngAllocMem( 0, _size, 'D3AG' )
+#define _FREE( _ptr ) EngFreeMem( _ptr )
+
+#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT)
+
+void *
+ExAllocatePool(
+    unsigned long PoolType, 
+    size_t NumberOfBytes);
+
+void 
+ExFreePool(void *P);
+
+#define MALLOC(_size) ExAllocatePool(0, _size)
+#define _FREE(_ptr) ExFreePool(_ptr)
+
+#else
+
+#define MALLOC( SIZE )  malloc( SIZE )
+#define CALLOC( COUNT, SIZE )   calloc( COUNT, SIZE )
+#define FREE( PTR )  free( PTR )
+#define REALLOC( OLDPTR, OLDSIZE, NEWSIZE )  realloc( OLDPTR, NEWSIZE )
+
+#endif
+
+
+#ifndef CALLOC
+static INLINE void *
+CALLOC( unsigned count, unsigned size )
+{
+   void *ptr = MALLOC( count * size );
+   if( ptr ) {
+      memset( ptr, 0, count * size );
+   }
+   return ptr;
+}
+#endif /* !CALLOC */
+
+#ifndef FREE
+static INLINE void
+FREE( void *ptr )
+{
+   if( ptr ) {
+      _FREE( ptr );
+   }
+}
+#endif /* !FREE */
+
+#ifndef REALLOC
+static INLINE void *
+REALLOC( void *old_ptr, unsigned old_size, unsigned new_size )
+{
+   void *new_ptr = NULL;
+
+   if (new_size != 0) {
+      unsigned copy_size = old_size < new_size ? old_size : new_size;
+      new_ptr = MALLOC( new_size );
+      if (new_ptr && old_ptr && copy_size) {
+         memcpy( new_ptr, old_ptr, copy_size );
+      }
+   }
+
+   FREE( old_ptr );
+   return new_ptr;
+}
+#endif /* !REALLOC */
+
+
+#define MALLOC_STRUCT(T)   (struct T *) MALLOC(sizeof(struct T))
+
+#define CALLOC_STRUCT(T)   (struct T *) CALLOC(1, sizeof(struct T))
+
+
+/**
+ * Return memory on given byte alignment
+ */
+static INLINE void *
+align_malloc(size_t bytes, uint alignment)
+{
+#if defined(HAVE_POSIX_MEMALIGN)
+   void *mem;
+   alignment = (alignment + (uint)sizeof(void*) - 1) & ~((uint)sizeof(void*) - 1);
+   if(posix_memalign(& mem, alignment, bytes) != 0)
+      return NULL;
+   return mem;
+#else
+   char *ptr, *buf;
+
+   assert( alignment > 0 );
+
+   ptr = (char *) MALLOC(bytes + alignment + sizeof(void *));
+   if (!ptr)
+      return NULL;
+
+   buf = (char *) align_pointer( ptr + sizeof(void *), alignment );
+   *(char **)(buf - sizeof(void *)) = ptr;
+
+   return buf;
+#endif /* defined(HAVE_POSIX_MEMALIGN) */
+}
+
+/**
+ * Free memory returned by align_malloc().
+ */
+static INLINE void
+align_free(void *ptr)
+{
+#if defined(HAVE_POSIX_MEMALIGN)
+   FREE(ptr);
+#else
+   void **cubbyHole = (void **) ((char *) ptr - sizeof(void *));
+   void *realAddr = *cubbyHole;
+   FREE(realAddr);
+#endif /* defined(HAVE_POSIX_MEMALIGN) */
+}
+
+
+/**
+ * Duplicate a block of memory.
+ */
+static INLINE void *
+mem_dup(const void *src, uint size)
+{
+   void *dup = MALLOC(size);
+   if (dup)
+      memcpy(dup, src, size);
+   return dup;
+}
+
+
+/**
+ * Number of elements in an array.
+ */
+#ifndef Elements
+#define Elements(x) (sizeof(x)/sizeof((x)[0]))
+#endif
+
+
+/**
+ * Offset of a field in a struct, in bytes.
+ */
+#define Offset(TYPE, MEMBER) ((unsigned)&(((TYPE *)NULL)->MEMBER))
+
+
+
+#endif /* U_MEMORY_H */
diff --git a/src/gallium/auxiliary/util/u_mm.c b/src/gallium/auxiliary/util/u_mm.c
index b49ae074e0e..0f51dd59777 100644
--- a/src/gallium/auxiliary/util/u_mm.c
+++ b/src/gallium/auxiliary/util/u_mm.c
@@ -24,9 +24,9 @@
 
 
 #include "pipe/p_compiler.h"
-#include "pipe/p_util.h"
 #include "pipe/p_debug.h"
 
+#include "util/u_memory.h"
 #include "util/u_mm.h"
 
 
diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h
index 06abb34d5a0..39e4ae9d071 100644
--- a/src/gallium/auxiliary/util/u_pack_color.h
+++ b/src/gallium/auxiliary/util/u_pack_color.h
@@ -37,6 +37,7 @@
 
 #include "pipe/p_compiler.h"
 #include "pipe/p_format.h"
+#include "util/u_math.h"
 
 
 /**
@@ -150,10 +151,10 @@ util_pack_color(const float rgba[4], enum pipe_format format, void *dest)
 
    if (pf_size_x(format) <= 8) {
       /* format uses 8-bit components or less */
-      UNCLAMPED_FLOAT_TO_UBYTE(r, rgba[0]);
-      UNCLAMPED_FLOAT_TO_UBYTE(g, rgba[1]);
-      UNCLAMPED_FLOAT_TO_UBYTE(b, rgba[2]);
-      UNCLAMPED_FLOAT_TO_UBYTE(a, rgba[3]);
+      r = float_to_ubyte(rgba[0]);
+      g = float_to_ubyte(rgba[1]);
+      b = float_to_ubyte(rgba[2]);
+      a = float_to_ubyte(rgba[3]);
    }
 
    switch (format) {
@@ -286,4 +287,31 @@ util_pack_z(enum pipe_format format, double z)
 }
 
 
+/**
+ * Pack 4 ubytes into a 4-byte word
+ */
+static INLINE unsigned
+pack_ub4(ubyte b0, ubyte b1, ubyte b2, ubyte b3)
+{
+   return ((((unsigned int)b0) << 0) |
+	   (((unsigned int)b1) << 8) |
+	   (((unsigned int)b2) << 16) |
+	   (((unsigned int)b3) << 24));
+}
+
+
+/**
+ * Pack/convert 4 floats into one 4-byte word.
+ */
+static INLINE unsigned
+pack_ui32_float4(float a, float b, float c, float d)
+{
+   return pack_ub4( float_to_ubyte(a),
+		    float_to_ubyte(b),
+		    float_to_ubyte(c),
+		    float_to_ubyte(d) );
+}
+
+
+
 #endif /* U_PACK_COLOR_H */
diff --git a/src/gallium/auxiliary/util/u_pointer.h b/src/gallium/auxiliary/util/u_pointer.h
new file mode 100644
index 00000000000..e1af9f11cb9
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_pointer.h
@@ -0,0 +1,107 @@
+/**************************************************************************
+ * 
+ * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef U_POINTER_H
+#define U_POINTER_H
+
+#include "pipe/p_compiler.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static INLINE intptr_t
+pointer_to_intptr( const void *p )
+{
+   union {
+      const void *p;
+      intptr_t i;
+   } pi;
+   pi.p = p;
+   return pi.i;
+}
+
+static INLINE void *
+intptr_to_pointer( intptr_t i )
+{
+   union {
+      void *p;
+      intptr_t i;
+   } pi;
+   pi.i = i;
+   return pi.p;
+}
+
+static INLINE uintptr_t
+pointer_to_uintptr( const void *ptr )
+{
+   union {
+      const void *p;
+      uintptr_t u;
+   } pu;
+   pu.p = ptr;
+   return pu.u;
+}
+
+static INLINE void *
+uintptr_to_pointer( uintptr_t u )
+{
+   union {
+      void *p;
+      uintptr_t u;
+   } pu;
+   pu.u = u;
+   return pu.p;
+}
+
+/**
+ * Return a pointer aligned to next multiple of N bytes.
+ */
+static INLINE void *
+align_pointer( const void *unaligned, uintptr_t alignment )
+{
+   uintptr_t aligned = (pointer_to_uintptr( unaligned ) + alignment - 1) & ~(alignment - 1);
+   return uintptr_to_pointer( aligned );
+}
+
+
+/**
+ * Return a pointer aligned to next multiple of 16 bytes.
+ */
+static INLINE void *
+align16( void *unaligned )
+{
+   return align_pointer( unaligned, 16 );
+}
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* U_POINTER_H */
diff --git a/src/gallium/auxiliary/util/u_rect.c b/src/gallium/auxiliary/util/u_rect.c
index 94e447b9d53..b31ab5415fa 100644
--- a/src/gallium/auxiliary/util/u_rect.c
+++ b/src/gallium/auxiliary/util/u_rect.c
@@ -31,7 +31,6 @@
 
 
 #include "pipe/p_defines.h"
-#include "pipe/p_util.h"
 #include "pipe/p_format.h"
 #include "util/u_rect.h"
 
diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c
index c34fb6ee334..f06d13c2c4a 100644
--- a/src/gallium/auxiliary/util/u_simple_shaders.c
+++ b/src/gallium/auxiliary/util/u_simple_shaders.c
@@ -37,10 +37,10 @@
 #include "pipe/p_debug.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_inlines.h"
-#include "pipe/p_util.h"
 #include "pipe/p_winsys.h"
 #include "pipe/p_shader_tokens.h"
 
+#include "util/u_memory.h"
 #include "util/u_simple_shaders.h"
 
 #include "tgsi/tgsi_build.h"
diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c
new file mode 100644
index 00000000000..853c503f4ff
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_tile.c
@@ -0,0 +1,1169 @@
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+/**
+ * RGBA/float tile get/put functions.
+ * Usable both by drivers and state trackers.
+ * Surfaces should already be in a mapped state.
+ */
+
+
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_rect.h"
+#include "util/u_tile.h"
+
+
+/**
+ * Move raw block of pixels from surface to user memory.
+ * This should be usable by any hw driver that has mappable surfaces.
+ */
+void
+pipe_get_tile_raw(struct pipe_surface *ps,
+                  uint x, uint y, uint w, uint h,
+                  void *dst, int dst_stride)
+{
+   const void *src;
+
+   if (dst_stride == 0)
+      dst_stride = pf_get_nblocksx(&ps->block, w) * ps->block.size;
+
+   if (pipe_clip_tile(x, y, &w, &h, ps))
+      return;
+
+   src = pipe_surface_map(ps, PIPE_BUFFER_USAGE_CPU_READ);
+   assert(src);
+   if(!src)
+      return;
+
+   pipe_copy_rect(dst, &ps->block, dst_stride, 0, 0, w, h, src, ps->stride, x, y);
+
+   pipe_surface_unmap(ps);
+}
+
+
+/**
+ * Move raw block of pixels from user memory to surface.
+ * This should be usable by any hw driver that has mappable surfaces.
+ */
+void
+pipe_put_tile_raw(struct pipe_surface *ps,
+                  uint x, uint y, uint w, uint h,
+                  const void *src, int src_stride)
+{
+   void *dst;
+
+   if (src_stride == 0)
+      src_stride = pf_get_nblocksx(&ps->block, w) * ps->block.size;
+
+   if (pipe_clip_tile(x, y, &w, &h, ps))
+      return;
+
+   dst = pipe_surface_map(ps, PIPE_BUFFER_USAGE_CPU_WRITE);
+   assert(dst);
+   if(!dst)
+      return;
+
+   pipe_copy_rect(dst, &ps->block, ps->stride, x, y, w, h, src, src_stride, 0, 0);
+
+   pipe_surface_unmap(ps);
+}
+
+
+
+
+/** Convert short in [-32768,32767] to GLfloat in [-1.0,1.0] */
+#define SHORT_TO_FLOAT(S)   ((2.0F * (S) + 1.0F) * (1.0F/65535.0F))
+
+#define UNCLAMPED_FLOAT_TO_SHORT(us, f)  \
+   us = ( (short) ( CLAMP((f), -1.0, 1.0) * 32767.0F) )
+
+
+
+/*** PIPE_FORMAT_A8R8G8B8_UNORM ***/
+
+static void
+a8r8g8b8_get_tile_rgba(const unsigned *src,
+                       unsigned w, unsigned h,
+                       float *p,
+                       unsigned dst_stride)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      float *pRow = p;
+      for (j = 0; j < w; j++, pRow += 4) {
+         const unsigned pixel = *src++;
+         pRow[0] = ubyte_to_float((pixel >> 16) & 0xff);
+         pRow[1] = ubyte_to_float((pixel >>  8) & 0xff);
+         pRow[2] = ubyte_to_float((pixel >>  0) & 0xff);
+         pRow[3] = ubyte_to_float((pixel >> 24) & 0xff);
+      }
+      p += dst_stride;
+   }
+}
+
+
+static void
+a8r8g8b8_put_tile_rgba(unsigned *dst,
+                       unsigned w, unsigned h,
+                       const float *p,
+                       unsigned src_stride)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      const float *pRow = p;
+      for (j = 0; j < w; j++, pRow += 4) {
+         unsigned r, g, b, a;
+         r = float_to_ubyte(pRow[0]);
+         g = float_to_ubyte(pRow[1]);
+         b = float_to_ubyte(pRow[2]);
+         a = float_to_ubyte(pRow[3]);
+         *dst++ = (a << 24) | (r << 16) | (g << 8) | b;
+      }
+      p += src_stride;
+   }
+}
+
+
+/*** PIPE_FORMAT_A8R8G8B8_UNORM ***/
+
+static void
+x8r8g8b8_get_tile_rgba(const unsigned *src,
+                       unsigned w, unsigned h,
+                       float *p,
+                       unsigned dst_stride)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      float *pRow = p;
+      for (j = 0; j < w; j++, pRow += 4) {
+         const unsigned pixel = *src++;
+         pRow[0] = ubyte_to_float((pixel >> 16) & 0xff);
+         pRow[1] = ubyte_to_float((pixel >>  8) & 0xff);
+         pRow[2] = ubyte_to_float((pixel >>  0) & 0xff);
+         pRow[3] = ubyte_to_float(0xff);
+      }
+      p += dst_stride;
+   }
+}
+
+
+static void
+x8r8g8b8_put_tile_rgba(unsigned *dst,
+                       unsigned w, unsigned h,
+                       const float *p,
+                       unsigned src_stride)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      const float *pRow = p;
+      for (j = 0; j < w; j++, pRow += 4) {
+         unsigned r, g, b;
+         r = float_to_ubyte(pRow[0]);
+         g = float_to_ubyte(pRow[1]);
+         b = float_to_ubyte(pRow[2]);
+         *dst++ = (0xff << 24) | (r << 16) | (g << 8) | b;
+      }
+      p += src_stride;
+   }
+}
+
+
+/*** PIPE_FORMAT_B8G8R8A8_UNORM ***/
+
+static void
+b8g8r8a8_get_tile_rgba(const unsigned *src,
+                       unsigned w, unsigned h,
+                       float *p,
+                       unsigned dst_stride)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      float *pRow = p;
+      for (j = 0; j < w; j++, pRow += 4) {
+         const unsigned pixel = *src++;
+         pRow[0] = ubyte_to_float((pixel >>  8) & 0xff);
+         pRow[1] = ubyte_to_float((pixel >> 16) & 0xff);
+         pRow[2] = ubyte_to_float((pixel >> 24) & 0xff);
+         pRow[3] = ubyte_to_float((pixel >>  0) & 0xff);
+      }
+      p += dst_stride;
+   }
+}
+
+
+static void
+b8g8r8a8_put_tile_rgba(unsigned *dst,
+                       unsigned w, unsigned h,
+                       const float *p,
+                       unsigned src_stride)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      const float *pRow = p;
+      for (j = 0; j < w; j++, pRow += 4) {
+         unsigned r, g, b, a;
+         r = float_to_ubyte(pRow[0]);
+         g = float_to_ubyte(pRow[1]);
+         b = float_to_ubyte(pRow[2]);
+         a = float_to_ubyte(pRow[3]);
+         *dst++ = (b << 24) | (g << 16) | (r << 8) | a;
+      }
+      p += src_stride;
+   }
+}
+
+
+/*** PIPE_FORMAT_A1R5G5B5_UNORM ***/
+
+static void
+a1r5g5b5_get_tile_rgba(const ushort *src,
+                       unsigned w, unsigned h,
+                       float *p,
+                       unsigned dst_stride)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      float *pRow = p;
+      for (j = 0; j < w; j++, pRow += 4) {
+         const ushort pixel = *src++;
+         pRow[0] = ((pixel >> 10) & 0x1f) * (1.0f / 31.0f);
+         pRow[1] = ((pixel >>  5) & 0x1f) * (1.0f / 31.0f);
+         pRow[2] = ((pixel      ) & 0x1f) * (1.0f / 31.0f);
+         pRow[3] = ((pixel >> 15)       ) * 1.0f;
+      }
+      p += dst_stride;
+   }
+}
+
+
+static void
+a1r5g5b5_put_tile_rgba(ushort *dst,
+                       unsigned w, unsigned h,
+                       const float *p,
+                       unsigned src_stride)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      const float *pRow = p;
+      for (j = 0; j < w; j++, pRow += 4) {
+         unsigned r, g, b, a;
+         r = float_to_ubyte(pRow[0]);
+         g = float_to_ubyte(pRow[1]);
+         b = float_to_ubyte(pRow[2]);
+         a = float_to_ubyte(pRow[3]);
+         r = r >> 3;  /* 5 bits */
+         g = g >> 3;  /* 5 bits */
+         b = b >> 3;  /* 5 bits */
+         a = a >> 7;  /* 1 bit */
+         *dst++ = (a << 15) | (r << 10) | (g << 5) | b;
+      }
+      p += src_stride;
+   }
+}
+
+
+/*** PIPE_FORMAT_A4R4G4B4_UNORM ***/
+
+static void
+a4r4g4b4_get_tile_rgba(const ushort *src,
+                       unsigned w, unsigned h,
+                       float *p,
+                       unsigned dst_stride)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      float *pRow = p;
+      for (j = 0; j < w; j++, pRow += 4) {
+         const ushort pixel = *src++;
+         pRow[0] = ((pixel >>  8) & 0xf) * (1.0f / 15.0f);
+         pRow[1] = ((pixel >>  4) & 0xf) * (1.0f / 15.0f);
+         pRow[2] = ((pixel      ) & 0xf) * (1.0f / 15.0f);
+         pRow[3] = ((pixel >> 12)      ) * (1.0f / 15.0f);
+      }
+      p += dst_stride;
+   }
+}
+
+
+static void
+a4r4g4b4_put_tile_rgba(ushort *dst,
+                       unsigned w, unsigned h,
+                       const float *p,
+                       unsigned src_stride)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      const float *pRow = p;
+      for (j = 0; j < w; j++, pRow += 4) {
+         unsigned r, g, b, a;
+         r = float_to_ubyte(pRow[0]);
+         g = float_to_ubyte(pRow[1]);
+         b = float_to_ubyte(pRow[2]);
+         a = float_to_ubyte(pRow[3]);
+         r >>= 4;
+         g >>= 4;
+         b >>= 4;
+         a >>= 4;
+         *dst++ = (a << 12) | (r << 16) | (g << 4) | b;
+      }
+      p += src_stride;
+   }
+}
+
+
+/*** PIPE_FORMAT_R5G6B5_UNORM ***/
+
+static void
+r5g6b5_get_tile_rgba(const ushort *src,
+                     unsigned w, unsigned h,
+                     float *p,
+                     unsigned dst_stride)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      float *pRow = p;
+      for (j = 0; j < w; j++, pRow += 4) {
+         const ushort pixel = *src++;
+         pRow[0] = ((pixel >> 11) & 0x1f) * (1.0f / 31.0f);
+         pRow[1] = ((pixel >>  5) & 0x3f) * (1.0f / 63.0f);
+         pRow[2] = ((pixel      ) & 0x1f) * (1.0f / 31.0f);
+         pRow[3] = 1.0f;
+      }
+      p += dst_stride;
+   }
+}
+
+
+static void
+r5g6b5_put_tile_rgba(ushort *dst,
+                     unsigned w, unsigned h,
+                     const float *p,
+                     unsigned src_stride)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      const float *pRow = p;
+      for (j = 0; j < w; j++, pRow += 4) {
+         uint r = (uint) (CLAMP(pRow[0], 0.0, 1.0) * 31.0);
+         uint g = (uint) (CLAMP(pRow[1], 0.0, 1.0) * 63.0);
+         uint b = (uint) (CLAMP(pRow[2], 0.0, 1.0) * 31.0);
+         *dst++ = (r << 11) | (g << 5) | (b);
+      }
+      p += src_stride;
+   }
+}
+
+
+
+/*** PIPE_FORMAT_Z16_UNORM ***/
+
+/**
+ * Return each Z value as four floats in [0,1].
+ */
+static void
+z16_get_tile_rgba(const ushort *src,
+                  unsigned w, unsigned h,
+                  float *p,
+                  unsigned dst_stride)
+{
+   const float scale = 1.0f / 65535.0f;
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      float *pRow = p;
+      for (j = 0; j < w; j++, pRow += 4) {
+         pRow[0] =
+         pRow[1] =
+         pRow[2] =
+         pRow[3] = *src++ * scale;
+      }
+      p += dst_stride;
+   }
+}
+
+
+
+
+/*** PIPE_FORMAT_L8_UNORM ***/
+
+static void
+l8_get_tile_rgba(const ubyte *src,
+                 unsigned w, unsigned h,
+                 float *p,
+                 unsigned dst_stride)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      float *pRow = p;
+      for (j = 0; j < w; j++, src++, pRow += 4) {
+         pRow[0] =
+         pRow[1] =
+         pRow[2] = ubyte_to_float(*src);
+         pRow[3] = 1.0;
+      }
+      p += dst_stride;
+   }
+}
+
+
+static void
+l8_put_tile_rgba(ubyte *dst,
+                 unsigned w, unsigned h,
+                 const float *p,
+                 unsigned src_stride)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      const float *pRow = p;
+      for (j = 0; j < w; j++, pRow += 4) {
+         unsigned r;
+         r = float_to_ubyte(pRow[0]);
+         *dst++ = r;
+      }
+      p += src_stride;
+   }
+}
+
+
+
+/*** PIPE_FORMAT_A8_UNORM ***/
+
+static void
+a8_get_tile_rgba(const ubyte *src,
+                 unsigned w, unsigned h,
+                 float *p,
+                 unsigned dst_stride)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      float *pRow = p;
+      for (j = 0; j < w; j++, src++, pRow += 4) {
+         pRow[0] =
+         pRow[1] =
+         pRow[2] = 0.0;
+         pRow[3] = ubyte_to_float(*src);
+      }
+      p += dst_stride;
+   }
+}
+
+
+static void
+a8_put_tile_rgba(ubyte *dst,
+                 unsigned w, unsigned h,
+                 const float *p,
+                 unsigned src_stride)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      const float *pRow = p;
+      for (j = 0; j < w; j++, pRow += 4) {
+         unsigned a;
+         a = float_to_ubyte(pRow[3]);
+         *dst++ = a;
+      }
+      p += src_stride;
+   }
+}
+
+
+
+/*** PIPE_FORMAT_R16_SNORM ***/
+
+static void
+r16_get_tile_rgba(const short *src,
+                  unsigned w, unsigned h,
+                  float *p,
+                  unsigned dst_stride)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      float *pRow = p;
+      for (j = 0; j < w; j++, src++, pRow += 4) {
+         pRow[0] = SHORT_TO_FLOAT(src[0]);
+         pRow[1] =
+         pRow[2] = 0.0;
+         pRow[3] = 1.0;
+      }
+      p += dst_stride;
+   }
+}
+
+
+static void
+r16_put_tile_rgba(short *dst,
+                  unsigned w, unsigned h,
+                  const float *p,
+                  unsigned src_stride)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      const float *pRow = p;
+      for (j = 0; j < w; j++, dst++, pRow += 4) {
+         UNCLAMPED_FLOAT_TO_SHORT(dst[0], pRow[0]);
+      }
+      p += src_stride;
+   }
+}
+
+
+/*** PIPE_FORMAT_R16G16B16A16_SNORM ***/
+
+static void
+r16g16b16a16_get_tile_rgba(const short *src,
+                           unsigned w, unsigned h,
+                           float *p,
+                           unsigned dst_stride)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      float *pRow = p;
+      for (j = 0; j < w; j++, src += 4, pRow += 4) {
+         pRow[0] = SHORT_TO_FLOAT(src[0]);
+         pRow[1] = SHORT_TO_FLOAT(src[1]);
+         pRow[2] = SHORT_TO_FLOAT(src[2]);
+         pRow[3] = SHORT_TO_FLOAT(src[3]);
+      }
+      p += dst_stride;
+   }
+}
+
+
+static void
+r16g16b16a16_put_tile_rgba(short *dst,
+                           unsigned w, unsigned h,
+                           const float *p,
+                           unsigned src_stride)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      const float *pRow = p;
+      for (j = 0; j < w; j++, dst += 4, pRow += 4) {
+         UNCLAMPED_FLOAT_TO_SHORT(dst[0], pRow[0]);
+         UNCLAMPED_FLOAT_TO_SHORT(dst[1], pRow[1]);
+         UNCLAMPED_FLOAT_TO_SHORT(dst[2], pRow[2]);
+         UNCLAMPED_FLOAT_TO_SHORT(dst[3], pRow[3]);
+      }
+      p += src_stride;
+   }
+}
+
+
+
+/*** PIPE_FORMAT_I8_UNORM ***/
+
+static void
+i8_get_tile_rgba(const ubyte *src,
+                 unsigned w, unsigned h,
+                 float *p,
+                 unsigned dst_stride)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      float *pRow = p;
+      for (j = 0; j < w; j++, src++, pRow += 4) {
+         pRow[0] =
+         pRow[1] =
+         pRow[2] =
+         pRow[3] = ubyte_to_float(*src);
+      }
+      p += dst_stride;
+   }
+}
+
+
+static void
+i8_put_tile_rgba(ubyte *dst,
+                 unsigned w, unsigned h,
+                 const float *p,
+                 unsigned src_stride)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      const float *pRow = p;
+      for (j = 0; j < w; j++, pRow += 4) {
+         unsigned r;
+         r = float_to_ubyte(pRow[0]);
+         *dst++ = r;
+      }
+      p += src_stride;
+   }
+}
+
+
+/*** PIPE_FORMAT_A8L8_UNORM ***/
+
+static void
+a8l8_get_tile_rgba(const ushort *src,
+                   unsigned w, unsigned h,
+                   float *p,
+                   unsigned dst_stride)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      float *pRow = p;
+      for (j = 0; j < w; j++, pRow += 4) {
+         ushort p = *src++;
+         pRow[0] =
+         pRow[1] =
+         pRow[2] = ubyte_to_float(p & 0xff);
+         pRow[3] = ubyte_to_float(p >> 8);
+      }
+      p += dst_stride;
+   }
+}
+
+
+static void
+a8l8_put_tile_rgba(ushort *dst,
+                   unsigned w, unsigned h,
+                   const float *p,
+                   unsigned src_stride)
+{
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      const float *pRow = p;
+      for (j = 0; j < w; j++, pRow += 4) {
+         unsigned r, a;
+         r = float_to_ubyte(pRow[0]);
+         a = float_to_ubyte(pRow[3]);
+         *dst++ = (a << 8) | r;
+      }
+      p += src_stride;
+   }
+}
+
+
+
+
+/*** PIPE_FORMAT_Z32_UNORM ***/
+
+/**
+ * Return each Z value as four floats in [0,1].
+ */
+static void
+z32_get_tile_rgba(const unsigned *src,
+                  unsigned w, unsigned h,
+                  float *p,
+                  unsigned dst_stride)
+{
+   const double scale = 1.0 / (double) 0xffffffff;
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      float *pRow = p;
+      for (j = 0; j < w; j++, pRow += 4) {
+         pRow[0] =
+         pRow[1] =
+         pRow[2] =
+         pRow[3] = (float) (*src++ * scale);
+      }
+      p += dst_stride;
+   }
+}
+
+
+/*** PIPE_FORMAT_S8Z24_UNORM ***/
+
+/**
+ * Return Z component as four float in [0,1].  Stencil part ignored.
+ */
+static void
+s8z24_get_tile_rgba(const unsigned *src,
+                    unsigned w, unsigned h,
+                    float *p,
+                    unsigned dst_stride)
+{
+   const double scale = 1.0 / ((1 << 24) - 1);
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      float *pRow = p;
+      for (j = 0; j < w; j++, pRow += 4) {
+         pRow[0] =
+         pRow[1] =
+         pRow[2] =
+         pRow[3] = (float) (scale * (*src++ & 0xffffff));
+      }
+      p += dst_stride;
+   }
+}
+
+
+/*** PIPE_FORMAT_Z24S8_UNORM ***/
+
+/**
+ * Return Z component as four float in [0,1].  Stencil part ignored.
+ */
+static void
+z24s8_get_tile_rgba(const unsigned *src,
+                    unsigned w, unsigned h,
+                    float *p,
+                    unsigned dst_stride)
+{
+   const double scale = 1.0 / ((1 << 24) - 1);
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      float *pRow = p;
+      for (j = 0; j < w; j++, pRow += 4) {
+         pRow[0] =
+         pRow[1] =
+         pRow[2] =
+         pRow[3] = (float) (scale * (*src++ >> 8));
+      }
+      p += dst_stride;
+   }
+}
+
+
+/*** PIPE_FORMAT_YCBCR / PIPE_FORMAT_YCBCR_REV ***/
+
+/**
+ * Convert YCbCr (or YCrCb) to RGBA.
+ */
+static void
+ycbcr_get_tile_rgba(const ushort *src,
+                    unsigned w, unsigned h,
+                    float *p,
+                    unsigned dst_stride,
+                    boolean rev)
+{
+   const float scale = 1.0f / 255.0f;
+   unsigned i, j;
+
+   for (i = 0; i < h; i++) {
+      float *pRow = p;
+      /* do two texels at a time */
+      for (j = 0; j < (w & ~1); j += 2, src += 2) {
+         const ushort t0 = src[0];
+         const ushort t1 = src[1];
+         const ubyte y0 = (t0 >> 8) & 0xff;  /* luminance */
+         const ubyte y1 = (t1 >> 8) & 0xff;  /* luminance */
+         ubyte cb, cr;
+         float r, g, b;
+
+         if (rev) {
+            cb = t1 & 0xff;         /* chroma U */
+            cr = t0 & 0xff;         /* chroma V */
+         }
+         else {
+            cb = t0 & 0xff;         /* chroma U */
+            cr = t1 & 0xff;         /* chroma V */
+         }
+
+         /* even pixel: y0,cr,cb */
+         r = 1.164f * (y0-16) + 1.596f * (cr-128);
+         g = 1.164f * (y0-16) - 0.813f * (cr-128) - 0.391f * (cb-128);
+         b = 1.164f * (y0-16) + 2.018f * (cb-128);
+         pRow[0] = r * scale;
+         pRow[1] = g * scale;
+         pRow[2] = b * scale;
+         pRow[3] = 1.0f;
+         pRow += 4;
+
+         /* odd pixel: use y1,cr,cb */
+         r = 1.164f * (y1-16) + 1.596f * (cr-128);
+         g = 1.164f * (y1-16) - 0.813f * (cr-128) - 0.391f * (cb-128);
+         b = 1.164f * (y1-16) + 2.018f * (cb-128);
+         pRow[0] = r * scale;
+         pRow[1] = g * scale;
+         pRow[2] = b * scale;
+         pRow[3] = 1.0f;
+         pRow += 4;
+
+      }
+      /* do the last texel */
+      if (w & 1) {
+         const ushort t0 = src[0];
+         const ushort t1 = src[1];
+         const ubyte y0 = (t0 >> 8) & 0xff;  /* luminance */
+         ubyte cb, cr;
+         float r, g, b;
+
+         if (rev) {
+            cb = t1 & 0xff;         /* chroma U */
+            cr = t0 & 0xff;         /* chroma V */
+         }
+         else {
+            cb = t0 & 0xff;         /* chroma U */
+            cr = t1 & 0xff;         /* chroma V */
+         }
+
+         /* even pixel: y0,cr,cb */
+         r = 1.164f * (y0-16) + 1.596f * (cr-128);
+         g = 1.164f * (y0-16) - 0.813f * (cr-128) - 0.391f * (cb-128);
+         b = 1.164f * (y0-16) + 2.018f * (cb-128);
+         pRow[0] = r * scale;
+         pRow[1] = g * scale;
+         pRow[2] = b * scale;
+         pRow[3] = 1.0f;
+         pRow += 4;
+      }
+      p += dst_stride;
+   }
+}
+
+
+void
+pipe_tile_raw_to_rgba(enum pipe_format format,
+                      void *src,
+                      uint w, uint h,
+                      float *dst, unsigned dst_stride)
+{
+   switch (format) {
+   case PIPE_FORMAT_A8R8G8B8_UNORM:
+      a8r8g8b8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride);
+      break;
+   case PIPE_FORMAT_X8R8G8B8_UNORM:
+      x8r8g8b8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride);
+      break;
+   case PIPE_FORMAT_B8G8R8A8_UNORM:
+      b8g8r8a8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride);
+      break;
+   case PIPE_FORMAT_A1R5G5B5_UNORM:
+      a1r5g5b5_get_tile_rgba((ushort *) src, w, h, dst, dst_stride);
+      break;
+   case PIPE_FORMAT_A4R4G4B4_UNORM:
+      a4r4g4b4_get_tile_rgba((ushort *) src, w, h, dst, dst_stride);
+      break;
+   case PIPE_FORMAT_R5G6B5_UNORM:
+      r5g6b5_get_tile_rgba((ushort *) src, w, h, dst, dst_stride);
+      break;
+   case PIPE_FORMAT_L8_UNORM:
+      l8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride);
+      break;
+   case PIPE_FORMAT_A8_UNORM:
+      a8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride);
+      break;
+   case PIPE_FORMAT_I8_UNORM:
+      i8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride);
+      break;
+   case PIPE_FORMAT_A8L8_UNORM:
+      a8l8_get_tile_rgba((ushort *) src, w, h, dst, dst_stride);
+      break;
+   case PIPE_FORMAT_R16_SNORM:
+      r16_get_tile_rgba((short *) src, w, h, dst, dst_stride);
+      break;
+   case PIPE_FORMAT_R16G16B16A16_SNORM:
+      r16g16b16a16_get_tile_rgba((short *) src, w, h, dst, dst_stride);
+      break;
+   case PIPE_FORMAT_Z16_UNORM:
+      z16_get_tile_rgba((ushort *) src, w, h, dst, dst_stride);
+      break;
+   case PIPE_FORMAT_Z32_UNORM:
+      z32_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride);
+      break;
+   case PIPE_FORMAT_S8Z24_UNORM:
+   case PIPE_FORMAT_X8Z24_UNORM:
+      s8z24_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride);
+      break;
+   case PIPE_FORMAT_Z24S8_UNORM:
+      z24s8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride);
+      break;
+   case PIPE_FORMAT_YCBCR:
+      ycbcr_get_tile_rgba((ushort *) src, w, h, dst, dst_stride, FALSE);
+      break;
+   case PIPE_FORMAT_YCBCR_REV:
+      ycbcr_get_tile_rgba((ushort *) src, w, h, dst, dst_stride, TRUE);
+      break;
+   default:
+      assert(0);
+   }
+}
+
+
+void
+pipe_get_tile_rgba(struct pipe_surface *ps,
+                   uint x, uint y, uint w, uint h,
+                   float *p)
+{
+   unsigned dst_stride = w * 4;
+   void *packed;
+
+   if (pipe_clip_tile(x, y, &w, &h, ps))
+      return;
+
+   packed = MALLOC(pf_get_nblocks(&ps->block, w, h) * ps->block.size);
+
+   if (!packed)
+      return;
+
+   if(ps->format == PIPE_FORMAT_YCBCR || ps->format == PIPE_FORMAT_YCBCR_REV)
+      assert((x & 1) == 0);
+
+   pipe_get_tile_raw(ps, x, y, w, h, packed, 0);
+
+   pipe_tile_raw_to_rgba(ps->format, packed, w, h, p, dst_stride);
+
+   FREE(packed);
+}
+
+
+void
+pipe_put_tile_rgba(struct pipe_surface *ps,
+                   uint x, uint y, uint w, uint h,
+                   const float *p)
+{
+   unsigned src_stride = w * 4;
+   void *packed;
+
+   if (pipe_clip_tile(x, y, &w, &h, ps))
+      return;
+
+   packed = MALLOC(pf_get_nblocks(&ps->block, w, h) * ps->block.size);
+
+   if (!packed)
+      return;
+
+   switch (ps->format) {
+   case PIPE_FORMAT_A8R8G8B8_UNORM:
+      a8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);
+      break;
+   case PIPE_FORMAT_X8R8G8B8_UNORM:
+      x8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);
+      break;
+   case PIPE_FORMAT_B8G8R8A8_UNORM:
+      b8g8r8a8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);
+      break;
+   case PIPE_FORMAT_A1R5G5B5_UNORM:
+      a1r5g5b5_put_tile_rgba((ushort *) packed, w, h, p, src_stride);
+      break;
+   case PIPE_FORMAT_R5G6B5_UNORM:
+      r5g6b5_put_tile_rgba((ushort *) packed, w, h, p, src_stride);
+      break;
+   case PIPE_FORMAT_R8G8B8A8_UNORM:
+      assert(0);
+      break;
+   case PIPE_FORMAT_A4R4G4B4_UNORM:
+      a4r4g4b4_put_tile_rgba((ushort *) packed, w, h, p, src_stride);
+      break;
+   case PIPE_FORMAT_L8_UNORM:
+      l8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride);
+      break;
+   case PIPE_FORMAT_A8_UNORM:
+      a8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride);
+      break;
+   case PIPE_FORMAT_I8_UNORM:
+      i8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride);
+      break;
+   case PIPE_FORMAT_A8L8_UNORM:
+      a8l8_put_tile_rgba((ushort *) packed, w, h, p, src_stride);
+      break;
+   case PIPE_FORMAT_R16_SNORM:
+      r16_put_tile_rgba((short *) packed, w, h, p, src_stride);
+      break;
+   case PIPE_FORMAT_R16G16B16A16_SNORM:
+      r16g16b16a16_put_tile_rgba((short *) packed, w, h, p, src_stride);
+      break;
+   case PIPE_FORMAT_Z16_UNORM:
+      /*z16_put_tile_rgba((ushort *) packed, w, h, p, src_stride);*/
+      break;
+   case PIPE_FORMAT_Z32_UNORM:
+      /*z32_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/
+      break;
+   case PIPE_FORMAT_S8Z24_UNORM:
+   case PIPE_FORMAT_X8Z24_UNORM:
+      /*s8z24_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/
+      break;
+   case PIPE_FORMAT_Z24S8_UNORM:
+      /*z24s8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/
+      break;
+   default:
+      assert(0);
+   }
+
+   pipe_put_tile_raw(ps, x, y, w, h, packed, 0);
+
+   FREE(packed);
+}
+
+
+/**
+ * Get a block of Z values, converted to 32-bit range.
+ */
+void
+pipe_get_tile_z(struct pipe_surface *ps,
+                uint x, uint y, uint w, uint h,
+                uint *z)
+{
+   const uint dstStride = w;
+   ubyte *map;
+   uint *pDest = z;
+   uint i, j;
+
+   if (pipe_clip_tile(x, y, &w, &h, ps))
+      return;
+
+   map = (ubyte *)pipe_surface_map(ps, PIPE_BUFFER_USAGE_CPU_READ);
+   if (!map) {
+      assert(0);
+      return;
+   }
+
+   switch (ps->format) {
+   case PIPE_FORMAT_Z32_UNORM:
+      {
+         const uint *pSrc
+            = (const uint *)(map  + y * ps->stride + x*4);
+         for (i = 0; i < h; i++) {
+            memcpy(pDest, pSrc, 4 * w);
+            pDest += dstStride;
+            pSrc += ps->stride/4;
+         }
+      }
+      break;
+   case PIPE_FORMAT_S8Z24_UNORM:
+   case PIPE_FORMAT_X8Z24_UNORM:
+      {
+         const uint *pSrc
+            = (const uint *)(map + y * ps->stride + x*4);
+         for (i = 0; i < h; i++) {
+            for (j = 0; j < w; j++) {
+               /* convert 24-bit Z to 32-bit Z */
+               pDest[j] = (pSrc[j] << 8) | (pSrc[j] & 0xff);
+            }
+            pDest += dstStride;
+            pSrc += ps->stride/4;
+         }
+      }
+      break;
+   case PIPE_FORMAT_Z16_UNORM:
+      {
+         const ushort *pSrc
+            = (const ushort *)(map + y * ps->stride + x*2);
+         for (i = 0; i < h; i++) {
+            for (j = 0; j < w; j++) {
+               /* convert 16-bit Z to 32-bit Z */
+               pDest[j] = (pSrc[j] << 16) | pSrc[j];
+            }
+            pDest += dstStride;
+            pSrc += ps->stride/2;
+         }
+      }
+      break;
+   default:
+      assert(0);
+   }
+
+   pipe_surface_unmap(ps);
+}
+
+
+void
+pipe_put_tile_z(struct pipe_surface *ps,
+                uint x, uint y, uint w, uint h,
+                const uint *zSrc)
+{
+   const uint srcStride = w;
+   const uint *pSrc = zSrc;
+   ubyte *map;
+   uint i, j;
+
+   if (pipe_clip_tile(x, y, &w, &h, ps))
+      return;
+
+   map = (ubyte *)pipe_surface_map(ps, PIPE_BUFFER_USAGE_CPU_WRITE);
+   if (!map) {
+      assert(0);
+      return;
+   }
+
+   switch (ps->format) {
+   case PIPE_FORMAT_Z32_UNORM:
+      {
+         uint *pDest = (uint *) (map + y * ps->stride + x*4);
+         for (i = 0; i < h; i++) {
+            memcpy(pDest, pSrc, 4 * w);
+            pDest += ps->stride/4;
+            pSrc += srcStride;
+         }
+      }
+      break;
+   case PIPE_FORMAT_S8Z24_UNORM:
+   case PIPE_FORMAT_X8Z24_UNORM:
+      {
+         uint *pDest = (uint *) (map + y * ps->stride + x*4);
+         for (i = 0; i < h; i++) {
+            for (j = 0; j < w; j++) {
+               /* convert 32-bit Z to 24-bit Z (0 stencil) */
+               pDest[j] = pSrc[j] >> 8;
+            }
+            pDest += ps->stride/4;
+            pSrc += srcStride;
+         }
+      }
+      break;
+   case PIPE_FORMAT_Z16_UNORM:
+      {
+         ushort *pDest = (ushort *) (map + y * ps->stride + x*2);
+         for (i = 0; i < h; i++) {
+            for (j = 0; j < w; j++) {
+               /* convert 32-bit Z to 16-bit Z */
+               pDest[j] = pSrc[j] >> 16;
+            }
+            pDest += ps->stride/2;
+            pSrc += srcStride;
+         }
+      }
+      break;
+   default:
+      assert(0);
+   }
+
+   pipe_surface_unmap(ps);
+}
+
+
diff --git a/src/gallium/auxiliary/util/u_tile.h b/src/gallium/auxiliary/util/u_tile.h
new file mode 100644
index 00000000000..a8ac8053083
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_tile.h
@@ -0,0 +1,101 @@
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef P_TILE_H
+#define P_TILE_H
+
+#include "pipe/p_compiler.h"
+
+struct pipe_surface;
+
+
+/**
+ * Clip tile against surface dims.
+ * \return TRUE if tile is totally clipped, FALSE otherwise
+ */
+static INLINE boolean
+pipe_clip_tile(uint x, uint y, uint *w, uint *h, const struct pipe_surface *ps)
+{
+   if (x >= ps->width)
+      return TRUE;
+   if (y >= ps->height)
+      return TRUE;
+   if (x + *w > ps->width)
+      *w = ps->width - x;
+   if (y + *h > ps->height)
+      *h = ps->height - y;
+   return FALSE;
+}
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void
+pipe_get_tile_raw(struct pipe_surface *ps,
+                  uint x, uint y, uint w, uint h,
+                  void *p, int dst_stride);
+
+void
+pipe_put_tile_raw(struct pipe_surface *ps,
+                  uint x, uint y, uint w, uint h,
+                  const void *p, int src_stride);
+
+
+void
+pipe_get_tile_rgba(struct pipe_surface *ps,
+                   uint x, uint y, uint w, uint h,
+                   float *p);
+
+void
+pipe_put_tile_rgba(struct pipe_surface *ps,
+                   uint x, uint y, uint w, uint h,
+                   const float *p);
+
+
+void
+pipe_get_tile_z(struct pipe_surface *ps,
+                uint x, uint y, uint w, uint h,
+                uint *z);
+
+void
+pipe_put_tile_z(struct pipe_surface *ps,
+                uint x, uint y, uint w, uint h,
+                const uint *z);
+
+void
+pipe_tile_raw_to_rgba(enum pipe_format format,
+                      void *src,
+                      uint w, uint h,
+                      float *dst, unsigned dst_stride);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h
index f430e88b9c1..6bace0bb11a 100644
--- a/src/gallium/drivers/cell/common.h
+++ b/src/gallium/drivers/cell/common.h
@@ -34,7 +34,6 @@
 #define CELL_COMMON_H
 
 #include "pipe/p_compiler.h"
-#include "pipe/p_util.h"
 #include "pipe/p_format.h"
 #include "pipe/p_state.h"
 
diff --git a/src/gallium/drivers/cell/ppu/cell_clear.c b/src/gallium/drivers/cell/ppu/cell_clear.c
index 3ffe09add66..cee0917b631 100644
--- a/src/gallium/drivers/cell/ppu/cell_clear.c
+++ b/src/gallium/drivers/cell/ppu/cell_clear.c
@@ -34,7 +34,7 @@
 #include <assert.h>
 #include <stdint.h>
 #include "pipe/p_inlines.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "cell/common.h"
 #include "cell_clear.h"
 #include "cell_context.h"
diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c
index 12eb5aa2547..5af95a3c103 100644
--- a/src/gallium/drivers/cell/ppu/cell_context.c
+++ b/src/gallium/drivers/cell/ppu/cell_context.c
@@ -35,7 +35,7 @@
 
 #include "pipe/p_defines.h"
 #include "pipe/p_format.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "pipe/p_winsys.h"
 #include "pipe/p_screen.h"
 
diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c
index 67b87f16d7f..971d65d09e4 100644
--- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c
+++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c
@@ -30,7 +30,7 @@
  *  Brian Paul
  */
 
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "pipe/p_inlines.h"
 #include "draw/draw_context.h"
 #include "cell_context.h"
diff --git a/src/gallium/drivers/cell/ppu/cell_render.c b/src/gallium/drivers/cell/ppu/cell_render.c
index b663b376222..dd25ae880e5 100644
--- a/src/gallium/drivers/cell/ppu/cell_render.c
+++ b/src/gallium/drivers/cell/ppu/cell_render.c
@@ -33,7 +33,7 @@
 #include "cell_context.h"
 #include "cell_render.h"
 #include "cell_spu.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "draw/draw_private.h"
 
 
diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c
index 2bf441a0c5f..139b3719b62 100644
--- a/src/gallium/drivers/cell/ppu/cell_screen.c
+++ b/src/gallium/drivers/cell/ppu/cell_screen.c
@@ -26,7 +26,7 @@
  **************************************************************************/
 
 
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "pipe/p_winsys.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_screen.h"
diff --git a/src/gallium/drivers/cell/ppu/cell_state_derived.c b/src/gallium/drivers/cell/ppu/cell_state_derived.c
index 5480534ad90..8ab938a02aa 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_derived.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_derived.c
@@ -25,7 +25,7 @@
  * 
  **************************************************************************/
 
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "pipe/p_shader_tokens.h"
 #include "draw/draw_context.h"
 #include "draw/draw_vertex.h"
diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c
index 9cae67f0912..3646a0ee4f3 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_emit.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c
@@ -25,7 +25,7 @@
  * 
  **************************************************************************/
 
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "cell_context.h"
 #include "cell_state.h"
 #include "cell_state_emit.h"
diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c
index f5707f2bb8b..cd96b317faa 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_shader.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c
@@ -26,7 +26,7 @@
  **************************************************************************/
 
 #include "pipe/p_defines.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "pipe/p_inlines.h"
 #include "pipe/p_winsys.h"
 #include "draw/draw_context.h"
diff --git a/src/gallium/drivers/cell/ppu/cell_surface.c b/src/gallium/drivers/cell/ppu/cell_surface.c
index 01ffa31c2c2..2d31ad89a66 100644
--- a/src/gallium/drivers/cell/ppu/cell_surface.c
+++ b/src/gallium/drivers/cell/ppu/cell_surface.c
@@ -26,7 +26,7 @@
  **************************************************************************/
 
 #include "pipe/p_defines.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "pipe/p_inlines.h"
 #include "pipe/p_winsys.h"
 #include "util/p_tile.h"
diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c
index 533b64227d6..1add81373db 100644
--- a/src/gallium/drivers/cell/ppu/cell_texture.c
+++ b/src/gallium/drivers/cell/ppu/cell_texture.c
@@ -33,7 +33,7 @@
 #include "pipe/p_context.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_inlines.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "pipe/p_winsys.h"
 
 #include "cell_context.h"
diff --git a/src/gallium/drivers/cell/ppu/cell_winsys.c b/src/gallium/drivers/cell/ppu/cell_winsys.c
index ebabce3c8f5..d570bbd2f91 100644
--- a/src/gallium/drivers/cell/ppu/cell_winsys.c
+++ b/src/gallium/drivers/cell/ppu/cell_winsys.c
@@ -26,7 +26,7 @@
  **************************************************************************/
 
 
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "cell_winsys.h"
 
 
diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c
index 42e5022f30b..89c61136a4c 100644
--- a/src/gallium/drivers/cell/spu/spu_exec.c
+++ b/src/gallium/drivers/cell/spu/spu_exec.c
@@ -63,7 +63,6 @@
 
 #include "pipe/p_compiler.h"
 #include "pipe/p_state.h"
-#include "pipe/p_util.h"
 #include "pipe/p_shader_tokens.h"
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_util.h"
diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c
index ab4ff8160a9..8944ef171eb 100644
--- a/src/gallium/drivers/cell/spu/spu_tri.c
+++ b/src/gallium/drivers/cell/spu/spu_tri.c
@@ -32,7 +32,6 @@
 #include <transpose_matrix4x4.h>
 #include "pipe/p_compiler.h"
 #include "pipe/p_format.h"
-#include "pipe/p_util.h"
 #include "spu_colorpack.h"
 #include "spu_main.h"
 #include "spu_texture.h"
diff --git a/src/gallium/drivers/cell/spu/spu_util.c b/src/gallium/drivers/cell/spu/spu_util.c
index 74ab2bbd1f0..dbcf4b0eb93 100644
--- a/src/gallium/drivers/cell/spu/spu_util.c
+++ b/src/gallium/drivers/cell/spu/spu_util.c
@@ -1,4 +1,3 @@
-#include "pipe/p_util.h"
 #include "pipe/p_shader_tokens.h"
 #include "tgsi/tgsi_parse.h"
 //#include "tgsi_build.h"
diff --git a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c
index 219fd90cc0e..26f23637492 100644
--- a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c
+++ b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c
@@ -32,7 +32,6 @@
   *   Ian Romanick <idr@us.ibm.com>
   */
 
-#include "pipe/p_util.h"
 #include "pipe/p_state.h"
 #include "pipe/p_shader_tokens.h"
 #include "spu_exec.h"
diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.c b/src/gallium/drivers/cell/spu/spu_vertex_shader.c
index 3119a78c060..a1e81975e65 100644
--- a/src/gallium/drivers/cell/spu/spu_vertex_shader.c
+++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.c
@@ -34,7 +34,6 @@
 
 #include <spu_mfcio.h>
 
-#include "pipe/p_util.h"
 #include "pipe/p_state.h"
 #include "pipe/p_shader_tokens.h"
 #include "spu_vertex_shader.h"
diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c
index 014a3e31d50..10c4ffc2096 100644
--- a/src/gallium/drivers/failover/fo_context.c
+++ b/src/gallium/drivers/failover/fo_context.c
@@ -28,7 +28,7 @@
 
 #include "pipe/p_defines.h"
 #include "pipe/p_winsys.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "pipe/p_context.h"
 
 #include "fo_context.h"
diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c
index e2bf5ab678f..c6776716a2f 100644
--- a/src/gallium/drivers/i915simple/i915_context.c
+++ b/src/gallium/drivers/i915simple/i915_context.c
@@ -35,7 +35,7 @@
 #include "draw/draw_context.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_winsys.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "pipe/p_screen.h"
 
 
diff --git a/src/gallium/drivers/i915simple/i915_debug_fp.c b/src/gallium/drivers/i915simple/i915_debug_fp.c
index c024a051a58..48be3e14727 100644
--- a/src/gallium/drivers/i915simple/i915_debug_fp.c
+++ b/src/gallium/drivers/i915simple/i915_debug_fp.c
@@ -29,7 +29,7 @@
 #include "i915_reg.h"
 #include "i915_debug.h"
 #include "pipe/p_winsys.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 
 
 static void
diff --git a/src/gallium/drivers/i915simple/i915_fpc.h b/src/gallium/drivers/i915simple/i915_fpc.h
index 80a9576304c..2f0f99d0468 100644
--- a/src/gallium/drivers/i915simple/i915_fpc.h
+++ b/src/gallium/drivers/i915simple/i915_fpc.h
@@ -29,7 +29,6 @@
 #ifndef I915_FPC_H
 #define I915_FPC_H
 
-#include "pipe/p_util.h"
 
 #include "i915_context.h"
 #include "i915_reg.h"
diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c
index 64432982c42..34b4a846c11 100644
--- a/src/gallium/drivers/i915simple/i915_fpc_translate.c
+++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c
@@ -33,6 +33,8 @@
 #include "i915_fpc.h"
 
 #include "pipe/p_shader_tokens.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
 #include "util/u_string.h"
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_dump.h"
diff --git a/src/gallium/drivers/i915simple/i915_prim_emit.c b/src/gallium/drivers/i915simple/i915_prim_emit.c
index 9ffa4601380..d194c2fb158 100644
--- a/src/gallium/drivers/i915simple/i915_prim_emit.c
+++ b/src/gallium/drivers/i915simple/i915_prim_emit.c
@@ -27,7 +27,9 @@
 
 
 #include "draw/draw_pipe.h"
-#include "pipe/p_util.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_pack_color.h"
 
 #include "i915_context.h"
 #include "i915_winsys.h"
diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c
index aef3682bbfc..e4ece550985 100644
--- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c
+++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c
@@ -41,9 +41,10 @@
 #include "draw/draw_context.h"
 #include "draw/draw_vbuf.h"
 #include "pipe/p_debug.h"
-#include "pipe/p_util.h"
 #include "pipe/p_inlines.h"
 #include "pipe/p_winsys.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
 
 #include "i915_context.h"
 #include "i915_reg.h"
diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c
index 0afa17bed85..e9e40c3f0b2 100644
--- a/src/gallium/drivers/i915simple/i915_screen.c
+++ b/src/gallium/drivers/i915simple/i915_screen.c
@@ -26,7 +26,7 @@
  **************************************************************************/
 
 
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "pipe/p_winsys.h"
 #include "util/u_string.h"
 
diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c
index e8521b385ef..d2487d82778 100644
--- a/src/gallium/drivers/i915simple/i915_state.c
+++ b/src/gallium/drivers/i915simple/i915_state.c
@@ -31,8 +31,9 @@
 
 #include "draw/draw_context.h"
 #include "pipe/p_winsys.h"
-#include "pipe/p_util.h"
 #include "pipe/p_inlines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
 #include "tgsi/tgsi_parse.h"
 
 #include "i915_context.h"
diff --git a/src/gallium/drivers/i915simple/i915_state_derived.c b/src/gallium/drivers/i915simple/i915_state_derived.c
index 4daccec6e0c..488615067c5 100644
--- a/src/gallium/drivers/i915simple/i915_state_derived.c
+++ b/src/gallium/drivers/i915simple/i915_state_derived.c
@@ -26,7 +26,7 @@
  **************************************************************************/
 
 
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "pipe/p_shader_tokens.h"
 #include "draw/draw_context.h"
 #include "draw/draw_vertex.h"
diff --git a/src/gallium/drivers/i915simple/i915_state_dynamic.c b/src/gallium/drivers/i915simple/i915_state_dynamic.c
index 8cfbdddd19b..86126a5a152 100644
--- a/src/gallium/drivers/i915simple/i915_state_dynamic.c
+++ b/src/gallium/drivers/i915simple/i915_state_dynamic.c
@@ -30,7 +30,9 @@
 #include "i915_context.h"
 #include "i915_reg.h"
 #include "i915_state.h"
-#include "pipe/p_util.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_pack_color.h"
 
 #define FILE_DEBUG_FLAG DEBUG_STATE
 
diff --git a/src/gallium/drivers/i915simple/i915_state_immediate.c b/src/gallium/drivers/i915simple/i915_state_immediate.c
index 2501f2d7cb8..8c16bb4e271 100644
--- a/src/gallium/drivers/i915simple/i915_state_immediate.c
+++ b/src/gallium/drivers/i915simple/i915_state_immediate.c
@@ -33,7 +33,7 @@
 #include "i915_context.h"
 #include "i915_state.h"
 #include "i915_reg.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 
 
 /* All state expressable with the LOAD_STATE_IMMEDIATE_1 packet.
diff --git a/src/gallium/drivers/i915simple/i915_state_sampler.c b/src/gallium/drivers/i915simple/i915_state_sampler.c
index 7868f21ca6a..c09c10601b4 100644
--- a/src/gallium/drivers/i915simple/i915_state_sampler.c
+++ b/src/gallium/drivers/i915simple/i915_state_sampler.c
@@ -27,7 +27,7 @@
 
 #include "pipe/p_context.h"
 #include "pipe/p_state.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 
 #include "i915_state_inlines.h"
 #include "i915_context.h"
diff --git a/src/gallium/drivers/i915simple/i915_surface.c b/src/gallium/drivers/i915simple/i915_surface.c
index 17b5125e569..62f1926644a 100644
--- a/src/gallium/drivers/i915simple/i915_surface.c
+++ b/src/gallium/drivers/i915simple/i915_surface.c
@@ -30,10 +30,9 @@
 #include "i915_state.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_inlines.h"
-#include "pipe/p_util.h"
 #include "pipe/p_inlines.h"
 #include "pipe/p_winsys.h"
-#include "util/p_tile.h"
+#include "util/u_tile.h"
 #include "util/u_rect.h"
 
 
diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c
index ca0fb8761bf..32344da4d5a 100644
--- a/src/gallium/drivers/i915simple/i915_texture.c
+++ b/src/gallium/drivers/i915simple/i915_texture.c
@@ -34,8 +34,9 @@
 #include "pipe/p_context.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_inlines.h"
-#include "pipe/p_util.h"
 #include "pipe/p_winsys.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
 
 #include "i915_context.h"
 #include "i915_texture.h"
diff --git a/src/gallium/drivers/i965simple/brw_cc.c b/src/gallium/drivers/i965simple/brw_cc.c
index 337e4f95f69..79d4150383a 100644
--- a/src/gallium/drivers/i965simple/brw_cc.c
+++ b/src/gallium/drivers/i965simple/brw_cc.c
@@ -29,7 +29,8 @@
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
 
-#include "pipe/p_util.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
 
 #include "brw_context.h"
 #include "brw_state.h"
@@ -232,8 +233,7 @@ static void upload_cc_unit( struct brw_context *brw )
       cc.cc3.alpha_test_func = 
 	 brw_translate_compare_func(brw->attribs.DepthStencil->alpha.func);
 
-      UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], 
-			       brw->attribs.DepthStencil->alpha.ref);
+      cc.cc7.alpha_ref.ub[0] = float_to_ubyte(brw->attribs.DepthStencil->alpha.ref);
 
       cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8;
    }
diff --git a/src/gallium/drivers/i965simple/brw_clip_state.c b/src/gallium/drivers/i965simple/brw_clip_state.c
index ea5c05a2796..8e78dd51be9 100644
--- a/src/gallium/drivers/i965simple/brw_clip_state.c
+++ b/src/gallium/drivers/i965simple/brw_clip_state.c
@@ -32,7 +32,8 @@
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_defines.h"
-#include "pipe/p_util.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
 
 
 static void upload_clip_unit( struct brw_context *brw )
diff --git a/src/gallium/drivers/i965simple/brw_context.c b/src/gallium/drivers/i965simple/brw_context.c
index 8326f7b9c40..96920df0087 100644
--- a/src/gallium/drivers/i965simple/brw_context.c
+++ b/src/gallium/drivers/i965simple/brw_context.c
@@ -39,7 +39,7 @@
 
 #include "pipe/p_winsys.h"
 #include "pipe/p_context.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "pipe/p_screen.h"
 
 
diff --git a/src/gallium/drivers/i965simple/brw_curbe.c b/src/gallium/drivers/i965simple/brw_curbe.c
index 52bbd525c16..824ee7fd6df 100644
--- a/src/gallium/drivers/i965simple/brw_curbe.c
+++ b/src/gallium/drivers/i965simple/brw_curbe.c
@@ -39,7 +39,8 @@
 #include "brw_wm.h"
 #include "pipe/p_state.h"
 #include "pipe/p_winsys.h"
-#include "pipe/p_util.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
 
 #define FILE_DEBUG_FLAG DEBUG_FALLBACKS
 
diff --git a/src/gallium/drivers/i965simple/brw_draw_upload.c b/src/gallium/drivers/i965simple/brw_draw_upload.c
index 9c0c78c2369..7c20ea52af5 100644
--- a/src/gallium/drivers/i965simple/brw_draw_upload.c
+++ b/src/gallium/drivers/i965simple/brw_draw_upload.c
@@ -33,6 +33,7 @@
 #include "brw_context.h"
 #include "brw_state.h"
 
+
 struct brw_array_state {
    union header_union header;
 
diff --git a/src/gallium/drivers/i965simple/brw_gs_state.c b/src/gallium/drivers/i965simple/brw_gs_state.c
index 3932e9e9394..5b8016b2e93 100644
--- a/src/gallium/drivers/i965simple/brw_gs_state.c
+++ b/src/gallium/drivers/i965simple/brw_gs_state.c
@@ -34,7 +34,8 @@
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_defines.h"
-#include "pipe/p_util.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
 
 
 
diff --git a/src/gallium/drivers/i965simple/brw_screen.c b/src/gallium/drivers/i965simple/brw_screen.c
index fadfbf94ab3..ab7cd624b27 100644
--- a/src/gallium/drivers/i965simple/brw_screen.c
+++ b/src/gallium/drivers/i965simple/brw_screen.c
@@ -26,7 +26,7 @@
  **************************************************************************/
 
 
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "pipe/p_winsys.h"
 #include "util/u_string.h"
 
diff --git a/src/gallium/drivers/i965simple/brw_sf_state.c b/src/gallium/drivers/i965simple/brw_sf_state.c
index 9acd3ea61b2..2a5de61c219 100644
--- a/src/gallium/drivers/i965simple/brw_sf_state.c
+++ b/src/gallium/drivers/i965simple/brw_sf_state.c
@@ -30,11 +30,12 @@
   */
 
 
-
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_defines.h"
-#include "pipe/p_util.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
 
 static void upload_sf_vp(struct brw_context *brw)
 {
diff --git a/src/gallium/drivers/i965simple/brw_shader_info.c b/src/gallium/drivers/i965simple/brw_shader_info.c
index 30f37a99d46..86d877d7efd 100644
--- a/src/gallium/drivers/i965simple/brw_shader_info.c
+++ b/src/gallium/drivers/i965simple/brw_shader_info.c
@@ -1,7 +1,7 @@
 
 #include "brw_context.h"
 #include "brw_state.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "pipe/p_shader_tokens.h"
 #include "tgsi/tgsi_parse.h"
 
diff --git a/src/gallium/drivers/i965simple/brw_state.c b/src/gallium/drivers/i965simple/brw_state.c
index 27ca32843da..af46cb546fa 100644
--- a/src/gallium/drivers/i965simple/brw_state.c
+++ b/src/gallium/drivers/i965simple/brw_state.c
@@ -31,7 +31,7 @@
 
 
 #include "pipe/p_winsys.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "pipe/p_inlines.h"
 #include "pipe/p_shader_tokens.h"
 #include "tgsi/tgsi_dump.h"
diff --git a/src/gallium/drivers/i965simple/brw_state_batch.c b/src/gallium/drivers/i965simple/brw_state_batch.c
index 35db76b5941..43a1c89fc40 100644
--- a/src/gallium/drivers/i965simple/brw_state_batch.c
+++ b/src/gallium/drivers/i965simple/brw_state_batch.c
@@ -32,7 +32,7 @@
 #include "brw_state.h"
 #include "brw_winsys.h"
 
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 
 /* A facility similar to the data caching code above, which aims to
  * prevent identical commands being issued repeatedly.
diff --git a/src/gallium/drivers/i965simple/brw_state_cache.c b/src/gallium/drivers/i965simple/brw_state_cache.c
index b3a5124461d..094248fa691 100644
--- a/src/gallium/drivers/i965simple/brw_state_cache.c
+++ b/src/gallium/drivers/i965simple/brw_state_cache.c
@@ -38,7 +38,7 @@
 #include "brw_sf.h"
 #include "brw_gs.h"
 
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 
 
 
diff --git a/src/gallium/drivers/i965simple/brw_state_pool.c b/src/gallium/drivers/i965simple/brw_state_pool.c
index f3174bfe0ae..78d4c0e411b 100644
--- a/src/gallium/drivers/i965simple/brw_state_pool.c
+++ b/src/gallium/drivers/i965simple/brw_state_pool.c
@@ -43,7 +43,8 @@
  */
 
 #include "pipe/p_winsys.h"
-#include "pipe/p_util.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
 #include "pipe/p_inlines.h"
 #include "brw_context.h"
 #include "brw_state.h"
diff --git a/src/gallium/drivers/i965simple/brw_state_upload.c b/src/gallium/drivers/i965simple/brw_state_upload.c
index e727601e1e7..bac9161b5f1 100644
--- a/src/gallium/drivers/i965simple/brw_state_upload.c
+++ b/src/gallium/drivers/i965simple/brw_state_upload.c
@@ -33,7 +33,7 @@
 #include "brw_context.h"
 #include "brw_state.h"
 
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 
 /* This is used to initialize brw->state.atoms[].  We could use this
  * list directly except for a single atom, brw_constant_buffer, which
diff --git a/src/gallium/drivers/i965simple/brw_surface.c b/src/gallium/drivers/i965simple/brw_surface.c
index 69da2522859..b89756c47b0 100644
--- a/src/gallium/drivers/i965simple/brw_surface.c
+++ b/src/gallium/drivers/i965simple/brw_surface.c
@@ -29,10 +29,9 @@
 #include "brw_context.h"
 #include "brw_state.h"
 #include "pipe/p_defines.h"
-#include "pipe/p_util.h"
 #include "pipe/p_inlines.h"
 #include "pipe/p_winsys.h"
-#include "util/p_tile.h"
+#include "util/u_tile.h"
 #include "util/u_rect.h"
 
 
diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.c b/src/gallium/drivers/i965simple/brw_tex_layout.c
index 9b6cf817238..05eda9d1f26 100644
--- a/src/gallium/drivers/i965simple/brw_tex_layout.c
+++ b/src/gallium/drivers/i965simple/brw_tex_layout.c
@@ -33,16 +33,16 @@
 /* Code to layout images in a mipmap tree for i965.
  */
 
-#include "brw_tex_layout.h"
-
 #include "pipe/p_state.h"
 #include "pipe/p_context.h"
 #include "pipe/p_defines.h"
-#include "pipe/p_util.h"
 #include "pipe/p_inlines.h"
 #include "pipe/p_winsys.h"
-
+#include "util/u_math.h"
+#include "util/u_memory.h"
 #include "brw_context.h"
+#include "brw_tex_layout.h"
+
 
 #define FILE_DEBUG_FLAG DEBUG_TEXTURE
 
diff --git a/src/gallium/drivers/i965simple/brw_vs_state.c b/src/gallium/drivers/i965simple/brw_vs_state.c
index c73469929ce..1eaff878928 100644
--- a/src/gallium/drivers/i965simple/brw_vs_state.c
+++ b/src/gallium/drivers/i965simple/brw_vs_state.c
@@ -34,7 +34,8 @@
 #include "brw_state.h"
 #include "brw_defines.h"
 
-#include "pipe/p_util.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
 
 static void upload_vs_unit( struct brw_context *brw )
 {
diff --git a/src/gallium/drivers/i965simple/brw_wm.c b/src/gallium/drivers/i965simple/brw_wm.c
index 7fc5f59a98c..8de565b96cd 100644
--- a/src/gallium/drivers/i965simple/brw_wm.c
+++ b/src/gallium/drivers/i965simple/brw_wm.c
@@ -35,7 +35,7 @@
 #include "brw_wm.h"
 #include "brw_eu.h"
 #include "brw_state.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 
 
 
diff --git a/src/gallium/drivers/i965simple/brw_wm_decl.c b/src/gallium/drivers/i965simple/brw_wm_decl.c
index e6f1a44817d..d50e66f613f 100644
--- a/src/gallium/drivers/i965simple/brw_wm_decl.c
+++ b/src/gallium/drivers/i965simple/brw_wm_decl.c
@@ -2,7 +2,8 @@
 #include "brw_context.h"
 #include "brw_eu.h"
 #include "brw_wm.h"
-#include "pipe/p_util.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
 #include "pipe/p_shader_tokens.h"
 #include "tgsi/tgsi_parse.h"
 
diff --git a/src/gallium/drivers/i965simple/brw_wm_glsl.c b/src/gallium/drivers/i965simple/brw_wm_glsl.c
index 6a4a5aef092..ab6410aa607 100644
--- a/src/gallium/drivers/i965simple/brw_wm_glsl.c
+++ b/src/gallium/drivers/i965simple/brw_wm_glsl.c
@@ -2,7 +2,8 @@
 #include "brw_context.h"
 #include "brw_eu.h"
 #include "brw_wm.h"
-#include "pipe/p_util.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
 #include "pipe/p_shader_tokens.h"
 #include "tgsi/tgsi_parse.h"
 
diff --git a/src/gallium/drivers/i965simple/brw_wm_sampler_state.c b/src/gallium/drivers/i965simple/brw_wm_sampler_state.c
index b9eaee56ee8..52b2909a651 100644
--- a/src/gallium/drivers/i965simple/brw_wm_sampler_state.c
+++ b/src/gallium/drivers/i965simple/brw_wm_sampler_state.c
@@ -34,7 +34,8 @@
 #include "brw_state.h"
 #include "brw_defines.h"
 
-#include "pipe/p_util.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
 
 
 #define COMPAREFUNC_ALWAYS		0
diff --git a/src/gallium/drivers/i965simple/brw_wm_state.c b/src/gallium/drivers/i965simple/brw_wm_state.c
index f3aa36b07f8..37a9bf919cd 100644
--- a/src/gallium/drivers/i965simple/brw_wm_state.c
+++ b/src/gallium/drivers/i965simple/brw_wm_state.c
@@ -34,7 +34,8 @@
 #include "brw_state.h"
 #include "brw_defines.h"
 #include "brw_wm.h"
-#include "pipe/p_util.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
 
 /***********************************************************************
  * WM unit - fragment programs and rasterization
diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c
index 9b1313bc83e..dda90f760a3 100644
--- a/src/gallium/drivers/softpipe/sp_context.c
+++ b/src/gallium/drivers/softpipe/sp_context.c
@@ -32,8 +32,8 @@
 #include "draw/draw_context.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_inlines.h"
-#include "pipe/p_util.h"
 #include "util/u_math.h"
+#include "util/u_memory.h"
 #include "sp_clear.h"
 #include "sp_context.h"
 #include "sp_flush.h"
diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c
index cc171bbc396..d0456731bea 100644
--- a/src/gallium/drivers/softpipe/sp_fs_exec.c
+++ b/src/gallium/drivers/softpipe/sp_fs_exec.c
@@ -34,7 +34,7 @@
 
 #include "pipe/p_state.h"
 #include "pipe/p_defines.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "pipe/p_inlines.h"
 #include "tgsi/tgsi_exec.h"
 #include "tgsi/tgsi_parse.h"
diff --git a/src/gallium/drivers/softpipe/sp_fs_llvm.c b/src/gallium/drivers/softpipe/sp_fs_llvm.c
index 20226da78c3..34adac5226c 100644
--- a/src/gallium/drivers/softpipe/sp_fs_llvm.c
+++ b/src/gallium/drivers/softpipe/sp_fs_llvm.c
@@ -36,7 +36,7 @@
 
 #include "pipe/p_state.h"
 #include "pipe/p_defines.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "pipe/p_inlines.h"
 #include "tgsi/tgsi_sse2.h"
 
diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c
index 8b7da7c7473..35653a8e48c 100644
--- a/src/gallium/drivers/softpipe/sp_fs_sse.c
+++ b/src/gallium/drivers/softpipe/sp_fs_sse.c
@@ -34,7 +34,7 @@
 
 #include "pipe/p_state.h"
 #include "pipe/p_defines.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "pipe/p_inlines.h"
 #include "tgsi/tgsi_exec.h"
 #include "tgsi/tgsi_sse2.h"
diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.c b/src/gallium/drivers/softpipe/sp_prim_setup.c
index 941ab62e00c..038ff04d4f1 100644
--- a/src/gallium/drivers/softpipe/sp_prim_setup.c
+++ b/src/gallium/drivers/softpipe/sp_prim_setup.c
@@ -41,7 +41,7 @@
 #include "sp_prim_setup.h"
 #include "draw/draw_pipe.h"
 #include "draw/draw_vertex.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 
 /**
  * Triangle setup info (derived from draw_stage).
diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c
index e9fae951e0b..425e13cd283 100644
--- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c
+++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c
@@ -43,6 +43,7 @@
 #include "sp_setup.h"
 #include "draw/draw_context.h"
 #include "draw/draw_vbuf.h"
+#include "util/u_memory.h"
 
 
 #define SP_MAX_VBUF_INDEXES 1024
diff --git a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c
index 7a42b08ef53..7d3580fb4f2 100644
--- a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c
+++ b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c
@@ -7,7 +7,7 @@
 #include "sp_headers.h"
 #include "sp_quad.h"
 #include "pipe/p_defines.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 
 
 static void
diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c
index 74c6bff84ae..a834accb863 100644
--- a/src/gallium/drivers/softpipe/sp_quad_blend.c
+++ b/src/gallium/drivers/softpipe/sp_quad_blend.c
@@ -31,7 +31,8 @@
  */
 
 #include "pipe/p_defines.h"
-#include "pipe/p_util.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
 #include "sp_context.h"
 #include "sp_headers.h"
 #include "sp_surface.h"
@@ -128,15 +129,15 @@ logicop_quad(struct quad_stage *qs, struct quad_header *quad)
 
       /* convert to ubyte */
       for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */
-         UNCLAMPED_FLOAT_TO_UBYTE(dst[j][0], dest[j][0]); /* P0 */
-         UNCLAMPED_FLOAT_TO_UBYTE(dst[j][1], dest[j][1]); /* P1 */
-         UNCLAMPED_FLOAT_TO_UBYTE(dst[j][2], dest[j][2]); /* P2 */
-         UNCLAMPED_FLOAT_TO_UBYTE(dst[j][3], dest[j][3]); /* P3 */
-
-         UNCLAMPED_FLOAT_TO_UBYTE(src[j][0], quadColor[j][0]); /* P0 */
-         UNCLAMPED_FLOAT_TO_UBYTE(src[j][1], quadColor[j][1]); /* P1 */
-         UNCLAMPED_FLOAT_TO_UBYTE(src[j][2], quadColor[j][2]); /* P2 */
-         UNCLAMPED_FLOAT_TO_UBYTE(src[j][3], quadColor[j][3]); /* P3 */
+         dst[j][0] = float_to_ubyte(dest[j][0]); /* P0 */
+         dst[j][1] = float_to_ubyte(dest[j][1]); /* P1 */
+         dst[j][2] = float_to_ubyte(dest[j][2]); /* P2 */
+         dst[j][3] = float_to_ubyte(dest[j][3]); /* P3 */
+
+         src[j][0] = float_to_ubyte(quadColor[j][0]); /* P0 */
+         src[j][1] = float_to_ubyte(quadColor[j][1]); /* P1 */
+         src[j][2] = float_to_ubyte(quadColor[j][2]); /* P2 */
+         src[j][3] = float_to_ubyte(quadColor[j][3]); /* P3 */
       }
 
       switch (softpipe->blend->logicop_func) {
@@ -209,10 +210,10 @@ logicop_quad(struct quad_stage *qs, struct quad_header *quad)
       }
 
       for (j = 0; j < 4; j++) {
-         quadColor[j][0] = UBYTE_TO_FLOAT(res[j][0]);
-         quadColor[j][1] = UBYTE_TO_FLOAT(res[j][1]);
-         quadColor[j][2] = UBYTE_TO_FLOAT(res[j][2]);
-         quadColor[j][3] = UBYTE_TO_FLOAT(res[j][3]);
+         quadColor[j][0] = ubyte_to_float(res[j][0]);
+         quadColor[j][1] = ubyte_to_float(res[j][1]);
+         quadColor[j][2] = ubyte_to_float(res[j][2]);
+         quadColor[j][3] = ubyte_to_float(res[j][3]);
       }
    }
 
diff --git a/src/gallium/drivers/softpipe/sp_quad_bufloop.c b/src/gallium/drivers/softpipe/sp_quad_bufloop.c
index b3db428ef18..92e9af09c18 100644
--- a/src/gallium/drivers/softpipe/sp_quad_bufloop.c
+++ b/src/gallium/drivers/softpipe/sp_quad_bufloop.c
@@ -1,5 +1,5 @@
 
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "sp_context.h"
 #include "sp_headers.h"
 #include "sp_surface.h"
diff --git a/src/gallium/drivers/softpipe/sp_quad_colormask.c b/src/gallium/drivers/softpipe/sp_quad_colormask.c
index 7fe080990bd..f72f31db973 100644
--- a/src/gallium/drivers/softpipe/sp_quad_colormask.c
+++ b/src/gallium/drivers/softpipe/sp_quad_colormask.c
@@ -31,7 +31,8 @@
  */
 
 #include "pipe/p_defines.h"
-#include "pipe/p_util.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
 #include "sp_context.h"
 #include "sp_headers.h"
 #include "sp_surface.h"
diff --git a/src/gallium/drivers/softpipe/sp_quad_coverage.c b/src/gallium/drivers/softpipe/sp_quad_coverage.c
index dd5ebb22961..ad907ec25fe 100644
--- a/src/gallium/drivers/softpipe/sp_quad_coverage.c
+++ b/src/gallium/drivers/softpipe/sp_quad_coverage.c
@@ -33,7 +33,7 @@
 
 
 #include "pipe/p_defines.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "sp_context.h"
 #include "sp_headers.h"
 #include "sp_quad.h"
diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c
index 0c82692c6ee..227cb2014e1 100644
--- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c
+++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c
@@ -30,7 +30,7 @@
  */
 
 #include "pipe/p_defines.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "sp_context.h"
 #include "sp_headers.h"
 #include "sp_surface.h"
diff --git a/src/gallium/drivers/softpipe/sp_quad_earlyz.c b/src/gallium/drivers/softpipe/sp_quad_earlyz.c
index 22ea99049fe..5a66a866993 100644
--- a/src/gallium/drivers/softpipe/sp_quad_earlyz.c
+++ b/src/gallium/drivers/softpipe/sp_quad_earlyz.c
@@ -30,7 +30,7 @@
  */
 
 #include "pipe/p_defines.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "sp_headers.h"
 #include "sp_quad.h"
 
diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c
index 8c88c192f8f..5499ba5361f 100644
--- a/src/gallium/drivers/softpipe/sp_quad_fs.c
+++ b/src/gallium/drivers/softpipe/sp_quad_fs.c
@@ -35,7 +35,8 @@
  * all the enabled attributes run contiguously.
  */
 
-#include "pipe/p_util.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_shader_tokens.h"
 
diff --git a/src/gallium/drivers/softpipe/sp_quad_occlusion.c b/src/gallium/drivers/softpipe/sp_quad_occlusion.c
index 54254df1f11..db13e73ae35 100644
--- a/src/gallium/drivers/softpipe/sp_quad_occlusion.c
+++ b/src/gallium/drivers/softpipe/sp_quad_occlusion.c
@@ -33,7 +33,7 @@
 
 
 #include "pipe/p_defines.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "sp_context.h"
 #include "sp_headers.h"
 #include "sp_surface.h"
diff --git a/src/gallium/drivers/softpipe/sp_quad_output.c b/src/gallium/drivers/softpipe/sp_quad_output.c
index 40083138a43..b64646a449f 100644
--- a/src/gallium/drivers/softpipe/sp_quad_output.c
+++ b/src/gallium/drivers/softpipe/sp_quad_output.c
@@ -25,7 +25,7 @@
  * 
  **************************************************************************/
 
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "sp_context.h"
 #include "sp_headers.h"
 #include "sp_surface.h"
diff --git a/src/gallium/drivers/softpipe/sp_quad_stencil.c b/src/gallium/drivers/softpipe/sp_quad_stencil.c
index b4c7e942fa5..ce9562e07c6 100644
--- a/src/gallium/drivers/softpipe/sp_quad_stencil.c
+++ b/src/gallium/drivers/softpipe/sp_quad_stencil.c
@@ -10,7 +10,7 @@
 #include "sp_tile_cache.h"
 #include "sp_quad.h"
 #include "pipe/p_defines.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 
 
 /** Only 8-bit stencil supported */
diff --git a/src/gallium/drivers/softpipe/sp_quad_stipple.c b/src/gallium/drivers/softpipe/sp_quad_stipple.c
index f1e9b80e09a..a39ecc2e9d4 100644
--- a/src/gallium/drivers/softpipe/sp_quad_stipple.c
+++ b/src/gallium/drivers/softpipe/sp_quad_stipple.c
@@ -7,7 +7,7 @@
 #include "sp_headers.h"
 #include "sp_quad.h"
 #include "pipe/p_defines.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 
 
 /**
diff --git a/src/gallium/drivers/softpipe/sp_query.c b/src/gallium/drivers/softpipe/sp_query.c
index adf9ccf64c6..2106ee1d235 100644
--- a/src/gallium/drivers/softpipe/sp_query.c
+++ b/src/gallium/drivers/softpipe/sp_query.c
@@ -32,7 +32,7 @@
 #include "draw/draw_context.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_inlines.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "sp_context.h"
 #include "sp_query.h"
 
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index f6b3d7ac24f..9644dbd168f 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -26,7 +26,7 @@
  **************************************************************************/
 
 
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "pipe/p_winsys.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_screen.h"
diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c
index c8c55fa6e85..87336ab6e31 100644
--- a/src/gallium/drivers/softpipe/sp_setup.c
+++ b/src/gallium/drivers/softpipe/sp_setup.c
@@ -42,9 +42,9 @@
 #include "draw/draw_context.h"
 #include "draw/draw_private.h"
 #include "draw/draw_vertex.h"
-#include "pipe/p_util.h"
 #include "pipe/p_shader_tokens.h"
 #include "util/u_math.h"
+#include "util/u_memory.h"
 
 
 #define DEBUG_VERTS 0
diff --git a/src/gallium/drivers/softpipe/sp_state_blend.c b/src/gallium/drivers/softpipe/sp_state_blend.c
index 2d40d6bd8f5..384fe559afd 100644
--- a/src/gallium/drivers/softpipe/sp_state_blend.c
+++ b/src/gallium/drivers/softpipe/sp_state_blend.c
@@ -28,7 +28,7 @@
 /* Authors:  Keith Whitwell <keith@tungstengraphics.com>
  */
 
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "sp_context.h"
 #include "sp_state.h"
 
diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c
index f10a1fa4718..6b6a4c3ff34 100644
--- a/src/gallium/drivers/softpipe/sp_state_derived.c
+++ b/src/gallium/drivers/softpipe/sp_state_derived.c
@@ -25,7 +25,8 @@
  * 
  **************************************************************************/
 
-#include "pipe/p_util.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
 #include "pipe/p_shader_tokens.h"
 #include "draw/draw_context.h"
 #include "draw/draw_vertex.h"
diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c
index 76fe6bfef9f..1be461b3a46 100644
--- a/src/gallium/drivers/softpipe/sp_state_fs.c
+++ b/src/gallium/drivers/softpipe/sp_state_fs.c
@@ -30,7 +30,7 @@
 #include "sp_fs.h"
 
 #include "pipe/p_defines.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "pipe/p_inlines.h"
 #include "pipe/p_winsys.h"
 #include "pipe/p_shader_tokens.h"
diff --git a/src/gallium/drivers/softpipe/sp_state_rasterizer.c b/src/gallium/drivers/softpipe/sp_state_rasterizer.c
index 98e04352dbc..87b72196838 100644
--- a/src/gallium/drivers/softpipe/sp_state_rasterizer.c
+++ b/src/gallium/drivers/softpipe/sp_state_rasterizer.c
@@ -26,7 +26,7 @@
  **************************************************************************/
 
 #include "pipe/p_defines.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "sp_context.h"
 #include "sp_state.h"
 #include "draw/draw_context.h"
diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c
index 033288a0aa3..99a28c0d7e0 100644
--- a/src/gallium/drivers/softpipe/sp_state_sampler.c
+++ b/src/gallium/drivers/softpipe/sp_state_sampler.c
@@ -29,7 +29,7 @@
  *  Brian Paul
  */
 
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "pipe/p_inlines.h"
 
 #include "draw/draw_context.h"
diff --git a/src/gallium/drivers/softpipe/sp_surface.c b/src/gallium/drivers/softpipe/sp_surface.c
index bfbae234f1d..389aceb27ce 100644
--- a/src/gallium/drivers/softpipe/sp_surface.c
+++ b/src/gallium/drivers/softpipe/sp_surface.c
@@ -26,10 +26,9 @@
  **************************************************************************/
 
 #include "pipe/p_defines.h"
-#include "pipe/p_util.h"
 #include "pipe/p_inlines.h"
 #include "pipe/p_winsys.h"
-#include "util/p_tile.h"
+#include "util/u_tile.h"
 #include "util/u_rect.h"
 #include "sp_context.h"
 #include "sp_surface.h"
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 58a95d13e13..49250ec084c 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -39,9 +39,9 @@
 #include "sp_tile_cache.h"
 #include "pipe/p_context.h"
 #include "pipe/p_defines.h"
-#include "pipe/p_util.h"
 #include "tgsi/tgsi_exec.h"
 #include "util/u_math.h"
+#include "util/u_memory.h"
 
 
 /*
diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c
index f775591352b..3a737d6f722 100644
--- a/src/gallium/drivers/softpipe/sp_texture.c
+++ b/src/gallium/drivers/softpipe/sp_texture.c
@@ -33,8 +33,9 @@
 #include "pipe/p_context.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_inlines.h"
-#include "pipe/p_util.h"
 #include "pipe/p_winsys.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
 
 #include "sp_context.h"
 #include "sp_state.h"
diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c
index 57c12ffe335..b50c9845133 100644
--- a/src/gallium/drivers/softpipe/sp_tile_cache.c
+++ b/src/gallium/drivers/softpipe/sp_tile_cache.c
@@ -32,9 +32,9 @@
  *    Brian Paul
  */
 
-#include "pipe/p_util.h"
 #include "pipe/p_inlines.h"
-#include "util/p_tile.h"
+#include "util/u_memory.h"
+#include "util/u_tile.h"
 #include "sp_context.h"
 #include "sp_surface.h"
 #include "sp_texture.h"
diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c
index f16359e8ad6..1dd77193791 100644
--- a/src/gallium/drivers/trace/tr_context.c
+++ b/src/gallium/drivers/trace/tr_context.c
@@ -25,7 +25,7 @@
  *
  **************************************************************************/
 
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "pipe/p_screen.h"
 
 #include "tr_dump.h"
diff --git a/src/gallium/drivers/trace/tr_dump.c b/src/gallium/drivers/trace/tr_dump.c
index 1613a626df4..48032c1617f 100644
--- a/src/gallium/drivers/trace/tr_dump.c
+++ b/src/gallium/drivers/trace/tr_dump.c
@@ -45,6 +45,8 @@
 #endif
 
 #include "pipe/p_compiler.h"
+#include "pipe/p_debug.h"
+#include "util/u_memory.h"
 #include "util/u_string.h"
 
 #include "tr_stream.h"
diff --git a/src/gallium/drivers/trace/tr_dump.h b/src/gallium/drivers/trace/tr_dump.h
index 6ddc8fc15c4..76a53731b31 100644
--- a/src/gallium/drivers/trace/tr_dump.h
+++ b/src/gallium/drivers/trace/tr_dump.h
@@ -35,7 +35,6 @@
 
 
 #include "pipe/p_compiler.h"
-#include "pipe/p_util.h"
 
 
 boolean trace_dump_trace_begin(void);
diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c
index a6467ec35fd..8789f86b1a8 100644
--- a/src/gallium/drivers/trace/tr_screen.c
+++ b/src/gallium/drivers/trace/tr_screen.c
@@ -25,7 +25,7 @@
  *
  **************************************************************************/
 
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 
 #include "tr_dump.h"
 #include "tr_state.h"
diff --git a/src/gallium/drivers/trace/tr_state.c b/src/gallium/drivers/trace/tr_state.c
index 30ab5a8fdcb..986d939e0c6 100644
--- a/src/gallium/drivers/trace/tr_state.c
+++ b/src/gallium/drivers/trace/tr_state.c
@@ -27,6 +27,7 @@
 
 
 #include "pipe/p_compiler.h"
+#include "util/u_memory.h"
 #include "tgsi/tgsi_dump.h"
 
 #include "tr_dump.h"
diff --git a/src/gallium/drivers/trace/tr_stream_stdc.c b/src/gallium/drivers/trace/tr_stream_stdc.c
index 4c77e1c995c..4c19ec0b243 100644
--- a/src/gallium/drivers/trace/tr_stream_stdc.c
+++ b/src/gallium/drivers/trace/tr_stream_stdc.c
@@ -36,7 +36,7 @@
 
 #include <stdio.h>
 
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 
 #include "tr_stream.h"
 
diff --git a/src/gallium/drivers/trace/tr_stream_wd.c b/src/gallium/drivers/trace/tr_stream_wd.c
index b3b65f09716..704eb15bd71 100644
--- a/src/gallium/drivers/trace/tr_stream_wd.c
+++ b/src/gallium/drivers/trace/tr_stream_wd.c
@@ -37,7 +37,7 @@
 #include <windows.h>
 #include <winddi.h>
 
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "util/u_string.h"
 
 #include "tr_stream.h"
diff --git a/src/gallium/drivers/trace/tr_texture.c b/src/gallium/drivers/trace/tr_texture.c
index 99ba74d3667..440a78704ab 100644
--- a/src/gallium/drivers/trace/tr_texture.c
+++ b/src/gallium/drivers/trace/tr_texture.c
@@ -25,9 +25,9 @@
  *
  **************************************************************************/
 
-#include "pipe/p_util.h"
 #include "pipe/p_inlines.h"
 #include "util/u_hash_table.h"
+#include "util/u_memory.h"
 
 #include "tr_screen.h"
 #include "tr_texture.h"
diff --git a/src/gallium/drivers/trace/tr_winsys.c b/src/gallium/drivers/trace/tr_winsys.c
index 2c7a6f893b0..177835854e1 100644
--- a/src/gallium/drivers/trace/tr_winsys.c
+++ b/src/gallium/drivers/trace/tr_winsys.c
@@ -25,8 +25,7 @@
  *
  **************************************************************************/
 
-#include "pipe/p_util.h"
-#include "pipe/p_state.h"
+#include "util/u_memory.h"
 #include "util/u_hash_table.h"
 
 #include "tr_dump.h"
diff --git a/src/gallium/include/pipe/p_util.h b/src/gallium/include/pipe/p_util.h
deleted file mode 100644
index 4a3fca59621..00000000000
--- a/src/gallium/include/pipe/p_util.h
+++ /dev/null
@@ -1,460 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef P_UTIL_H
-#define P_UTIL_H
-
-#include "p_config.h"
-#include "p_compiler.h"
-#include "p_debug.h"
-#include "p_pointer.h"
-
-#if defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT)
-__inline double ceil(double val)
-{
-	double ceil_val;
-
-	if((val - (long) val) == 0) {
-		ceil_val = val;
-	} else {
-		if(val > 0) {
-			ceil_val = (long) val + 1;
-		} else {
-			ceil_val = (long) val;
-		}
-	}
-
-	return ceil_val;
-}
-
-#ifndef PIPE_SUBSYSTEM_WINDOWS_CE
-__inline double floor(double val)
-{
-	double floor_val;
-
-	if((val - (long) val) == 0) {
-		floor_val = val;
-	} else {
-		if(val > 0) {
-			floor_val = (long) val;
-		} else {
-			floor_val = (long) val - 1;
-		}
-	}
-
-	return floor_val;
-}
-#endif
-
-#pragma function(pow)
-__inline double __cdecl pow(double val, double exponent)
-{
-	/* XXX */
-	assert(0);
-	return 0;
-}
-
-#pragma function(log)
-__inline double __cdecl log(double val)
-{
-	/* XXX */
-	assert(0);
-	return 0;
-}
-
-#pragma function(atan2)
-__inline double __cdecl atan2(double val)
-{
-	/* XXX */
-	assert(0);
-	return 0;
-}
-#else
-#include <math.h>
-#include <stdarg.h>
-#endif
-
- /* Define ENOMEM for WINCE */ 
-#if (_WIN32_WCE < 600)
-#ifndef ENOMEM
-#define ENOMEM 12
-#endif
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) && defined(DEBUG) 
-
-/* memory debugging */
-
-#include "p_debug.h"
-
-#define MALLOC( _size ) \
-   debug_malloc( __FILE__, __LINE__, __FUNCTION__, _size )
-#define CALLOC( _count, _size ) \
-   debug_calloc(__FILE__, __LINE__, __FUNCTION__, _count, _size )
-#define FREE( _ptr ) \
-   debug_free( __FILE__, __LINE__, __FUNCTION__,  _ptr )
-#define REALLOC( _ptr, _old_size, _size ) \
-   debug_realloc( __FILE__, __LINE__, __FUNCTION__,  _ptr, _old_size, _size )
-
-#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY)
-
-void * __stdcall
-EngAllocMem(
-    unsigned long Flags,
-    unsigned long MemSize,
-    unsigned long Tag );
-
-void __stdcall
-EngFreeMem(
-    void *Mem );
-
-#define MALLOC( _size ) EngAllocMem( 0, _size, 'D3AG' )
-#define _FREE( _ptr ) EngFreeMem( _ptr )
-
-#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT)
-
-void *
-ExAllocatePool(
-    unsigned long PoolType, 
-    size_t NumberOfBytes);
-
-void 
-ExFreePool(void *P);
-
-#define MALLOC(_size) ExAllocatePool(0, _size)
-#define _FREE(_ptr) ExFreePool(_ptr)
-
-#else
-
-#define MALLOC( SIZE )  malloc( SIZE )
-#define CALLOC( COUNT, SIZE )   calloc( COUNT, SIZE )
-#define FREE( PTR )  free( PTR )
-#define REALLOC( OLDPTR, OLDSIZE, NEWSIZE )  realloc( OLDPTR, NEWSIZE )
-
-#endif
-
-
-#ifndef CALLOC
-static INLINE void *
-CALLOC( unsigned count, unsigned size )
-{
-   void *ptr = MALLOC( count * size );
-   if( ptr ) {
-      memset( ptr, 0, count * size );
-   }
-   return ptr;
-}
-#endif /* !CALLOC */
-
-#ifndef FREE
-static INLINE void
-FREE( void *ptr )
-{
-   if( ptr ) {
-      _FREE( ptr );
-   }
-}
-#endif /* !FREE */
-
-#ifndef REALLOC
-static INLINE void *
-REALLOC( void *old_ptr, unsigned old_size, unsigned new_size )
-{
-   void *new_ptr = NULL;
-
-   if (new_size != 0) {
-      unsigned copy_size = old_size < new_size ? old_size : new_size;
-      new_ptr = MALLOC( new_size );
-      if (new_ptr && old_ptr && copy_size) {
-         memcpy( new_ptr, old_ptr, copy_size );
-      }
-   }
-
-   FREE( old_ptr );
-   return new_ptr;
-}
-#endif /* !REALLOC */
-
-
-#define MALLOC_STRUCT(T)   (struct T *) MALLOC(sizeof(struct T))
-
-#define CALLOC_STRUCT(T)   (struct T *) CALLOC(1, sizeof(struct T))
-
-
-/**
- * Return memory on given byte alignment
- */
-static INLINE void *
-align_malloc(size_t bytes, uint alignment)
-{
-#if defined(HAVE_POSIX_MEMALIGN)
-   void *mem;
-   alignment = (alignment + (uint)sizeof(void*) - 1) & ~((uint)sizeof(void*) - 1);
-   if(posix_memalign(& mem, alignment, bytes) != 0)
-      return NULL;
-   return mem;
-#else
-   char *ptr, *buf;
-
-   assert( alignment > 0 );
-
-   ptr = (char *) MALLOC(bytes + alignment + sizeof(void *));
-   if (!ptr)
-      return NULL;
-
-   buf = (char *) align_pointer( ptr + sizeof(void *), alignment );
-   *(char **)(buf - sizeof(void *)) = ptr;
-
-   return buf;
-#endif /* defined(HAVE_POSIX_MEMALIGN) */
-}
-
-/**
- * Free memory returned by align_malloc().
- */
-static INLINE void
-align_free(void *ptr)
-{
-#if defined(HAVE_POSIX_MEMALIGN)
-   FREE(ptr);
-#else
-   void **cubbyHole = (void **) ((char *) ptr - sizeof(void *));
-   void *realAddr = *cubbyHole;
-   FREE(realAddr);
-#endif /* defined(HAVE_POSIX_MEMALIGN) */
-}
-
-
-
-/**
- * Duplicate a block of memory.
- */
-static INLINE void *
-mem_dup(const void *src, uint size)
-{
-   void *dup = MALLOC(size);
-   if (dup)
-      memcpy(dup, src, size);
-   return dup;
-}
-
-
-
-#define CLAMP( X, MIN, MAX )  ( (X)<(MIN) ? (MIN) : ((X)>(MAX) ? (MAX) : (X)) )
-#define MIN2( A, B )   ( (A)<(B) ? (A) : (B) )
-#define MAX2( A, B )   ( (A)>(B) ? (A) : (B) )
-
-#ifndef Elements
-#define Elements(x) (sizeof(x)/sizeof((x)[0]))
-#endif
-#define Offset(TYPE, MEMBER) ((unsigned)&(((TYPE *)NULL)->MEMBER))
-
-/**
- * Return a pointer aligned to next multiple of 16 bytes.
- */
-static INLINE void *
-align16( void *unaligned )
-{
-   return align_pointer( unaligned, 16 );
-}
-
-
-static INLINE int align(int value, int alignment)
-{
-   return (value + alignment - 1) & ~(alignment - 1);
-}
-
-
-
-
-#if defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86)
-static INLINE unsigned ffs( unsigned u )
-{
-   unsigned i;
-
-   if( u == 0 ) {
-      return 0;
-   }
-
-   __asm bsf eax, [u]
-   __asm inc eax
-   __asm mov [i], eax
-
-   return i;
-}
-#endif
-
-union fi {
-   float f;
-   int i;
-   unsigned ui;
-};
-
-#define UBYTE_TO_FLOAT( ub ) ((float)(ub) / 255.0F)
-
-#define IEEE_0996 0x3f7f0000	/* 0.996 or so */
-
-/* This function/macro is sensitive to precision.  Test very carefully
- * if you change it!
- */
-#define UNCLAMPED_FLOAT_TO_UBYTE(UB, F)					\
-        do {								\
-           union fi __tmp;						\
-           __tmp.f = (F);						\
-           if (__tmp.i < 0)						\
-              UB = (ubyte) 0;						\
-           else if (__tmp.i >= IEEE_0996)				\
-              UB = (ubyte) 255;					\
-           else {							\
-              __tmp.f = __tmp.f * (255.0f/256.0f) + 32768.0f;		\
-              UB = (ubyte) __tmp.i;					\
-           }								\
-        } while (0)
-
-
-
-static INLINE unsigned pack_ub4( unsigned char b0,
-				 unsigned char b1,
-				 unsigned char b2,
-				 unsigned char b3 )
-{
-   return ((((unsigned int)b0) << 0) |
-	   (((unsigned int)b1) << 8) |
-	   (((unsigned int)b2) << 16) |
-	   (((unsigned int)b3) << 24));
-}
-
-static INLINE unsigned fui( float f )
-{
-   union fi fi;
-   fi.f = f;
-   return fi.ui;
-}
-
-static INLINE unsigned char float_to_ubyte( float f )
-{
-   unsigned char ub;
-   UNCLAMPED_FLOAT_TO_UBYTE(ub, f);
-   return ub;
-}
-
-static INLINE unsigned pack_ui32_float4( float a,
-					 float b, 
-					 float c, 
-					 float d )
-{
-   return pack_ub4( float_to_ubyte(a),
-		    float_to_ubyte(b),
-		    float_to_ubyte(c),
-		    float_to_ubyte(d) );
-}
-
-#define COPY_4V( DST, SRC )         \
-do {                                \
-   (DST)[0] = (SRC)[0];             \
-   (DST)[1] = (SRC)[1];             \
-   (DST)[2] = (SRC)[2];             \
-   (DST)[3] = (SRC)[3];             \
-} while (0)
-
-
-#define COPY_4FV( DST, SRC )  COPY_4V(DST, SRC)
-
-
-#define ASSIGN_4V( DST, V0, V1, V2, V3 ) \
-do {                                     \
-   (DST)[0] = (V0);                      \
-   (DST)[1] = (V1);                      \
-   (DST)[2] = (V2);                      \
-   (DST)[3] = (V3);                      \
-} while (0)
-
-
-
-#if defined(_MSC_VER) 
-#if _MSC_VER < 1400 && !defined(__cplusplus) || defined(PIPE_SUBSYSTEM_WINDOWS_CE)
- 
-static INLINE float cosf( float f ) 
-{
-   return (float) cos( (double) f );
-}
-
-static INLINE float sinf( float f ) 
-{
-   return (float) sin( (double) f );
-}
-
-static INLINE float ceilf( float f ) 
-{
-   return (float) ceil( (double) f );
-}
-
-static INLINE float floorf( float f ) 
-{
-   return (float) floor( (double) f );
-}
-
-static INLINE float powf( float f, float g ) 
-{
-   return (float) pow( (double) f, (double) g );
-}
-
-static INLINE float sqrtf( float f ) 
-{
-   return (float) sqrt( (double) f );
-}
-
-static INLINE float fabsf( float f ) 
-{
-   return (float) fabs( (double) f );
-}
-
-static INLINE float logf( float f ) 
-{
-   return (float) log( (double) f );
-}
-
-#else
-/* Work-around an extra semi-colon in VS 2005 logf definition */
-#ifdef logf
-#undef logf
-#define logf(x) ((float)log((double)(x)))
-#endif /* logf */
-#endif
-#endif /* _MSC_VER */
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/src/gallium/state_trackers/python/gallium.i b/src/gallium/state_trackers/python/gallium.i
index 641b19e9406..a67372c6239 100644
--- a/src/gallium/state_trackers/python/gallium.i
+++ b/src/gallium/state_trackers/python/gallium.i
@@ -42,7 +42,7 @@
 #include "pipe/p_screen.h"
 #include "pipe/p_context.h"
 #include "pipe/p_inlines.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "pipe/p_shader_tokens.h" 
 #include "cso_cache/cso_context.h"
 #include "util/u_draw_quad.h" 
diff --git a/src/gallium/state_trackers/python/st_device.c b/src/gallium/state_trackers/python/st_device.c
index a1889539dce..f71d85dd9b4 100644
--- a/src/gallium/state_trackers/python/st_device.c
+++ b/src/gallium/state_trackers/python/st_device.c
@@ -26,12 +26,13 @@
  **************************************************************************/
 
 
-#include "pipe/p_util.h"
 #include "pipe/p_winsys.h"
 #include "pipe/p_context.h"
 #include "pipe/p_shader_tokens.h"
 #include "pipe/p_inlines.h"
 #include "cso_cache/cso_context.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
 #include "util/u_simple_shaders.h"
 #include "trace/tr_screen.h"
 #include "trace/tr_context.h"
diff --git a/src/gallium/state_trackers/python/st_sample.c b/src/gallium/state_trackers/python/st_sample.c
index b47c7be2933..7765df3c4a4 100644
--- a/src/gallium/state_trackers/python/st_sample.c
+++ b/src/gallium/state_trackers/python/st_sample.c
@@ -29,9 +29,10 @@
 #include "pipe/p_compiler.h"
 #include "pipe/p_format.h"
 #include "pipe/p_state.h"
-#include "pipe/p_util.h"
 #include "pipe/p_inlines.h"
-#include "util/p_tile.h"
+#include "util/u_tile.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
 
 #include "st_sample.h"
 
diff --git a/src/gallium/state_trackers/python/st_softpipe_winsys.c b/src/gallium/state_trackers/python/st_softpipe_winsys.c
index 6ea3c9a5cf0..2d4f5434b35 100644
--- a/src/gallium/state_trackers/python/st_softpipe_winsys.c
+++ b/src/gallium/state_trackers/python/st_softpipe_winsys.c
@@ -39,8 +39,9 @@
 #include "pipe/p_winsys.h"
 #include "pipe/p_format.h"
 #include "pipe/p_context.h"
-#include "pipe/p_util.h"
 #include "pipe/p_inlines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
 #include "softpipe/sp_winsys.h"
 #include "st_winsys.h"
 
diff --git a/src/gallium/winsys/drm/intel/common/intel_be_device.c b/src/gallium/winsys/drm/intel/common/intel_be_device.c
index 8db03296156..019ee5cbd2e 100644
--- a/src/gallium/winsys/drm/intel/common/intel_be_device.c
+++ b/src/gallium/winsys/drm/intel/common/intel_be_device.c
@@ -13,8 +13,8 @@
 #include "pipe/p_winsys.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_state.h"
-#include "pipe/p_util.h"
 #include "pipe/p_inlines.h"
+#include "util/u_memory.h"
 
 #include "i915simple/i915_screen.h"
 
diff --git a/src/gallium/winsys/drm/intel/dri/intel_winsys_softpipe.c b/src/gallium/winsys/drm/intel/dri/intel_winsys_softpipe.c
index 0d98d16cf1f..20920a20529 100644
--- a/src/gallium/winsys/drm/intel/dri/intel_winsys_softpipe.c
+++ b/src/gallium/winsys/drm/intel/dri/intel_winsys_softpipe.c
@@ -32,8 +32,8 @@
 #include "intel_context.h"
 #include "intel_winsys_softpipe.h"
 #include "pipe/p_defines.h"
-#include "pipe/p_util.h"
 #include "pipe/p_format.h"
+#include "util/u_memory.h"
 #include "softpipe/sp_winsys.h"
 
 
diff --git a/src/gallium/winsys/egl_xlib/egl_xlib.c b/src/gallium/winsys/egl_xlib/egl_xlib.c
index 829732eea8b..e9f821d2764 100644
--- a/src/gallium/winsys/egl_xlib/egl_xlib.c
+++ b/src/gallium/winsys/egl_xlib/egl_xlib.c
@@ -38,8 +38,8 @@
 #include "pipe/p_compiler.h"
 #include "pipe/p_format.h"
 #include "pipe/p_state.h"
-#include "pipe/p_util.h"
 #include "pipe/p_winsys.h"
+#include "util/u_memory.h"
 #include "softpipe/sp_winsys.h"
 
 #include "eglconfig.h"
diff --git a/src/gallium/winsys/egl_xlib/sw_winsys.c b/src/gallium/winsys/egl_xlib/sw_winsys.c
index f4199e6f891..ae81d7f801b 100644
--- a/src/gallium/winsys/egl_xlib/sw_winsys.c
+++ b/src/gallium/winsys/egl_xlib/sw_winsys.c
@@ -37,8 +37,9 @@
 
 #include "pipe/p_winsys.h"
 #include "pipe/p_state.h"
-#include "pipe/p_util.h"
 #include "pipe/p_inlines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
 
 #include "sw_winsys.h"
 
diff --git a/src/gallium/winsys/gdi/wmesa.c b/src/gallium/winsys/gdi/wmesa.c
index ff52ceb8c41..730fb1b5417 100644
--- a/src/gallium/winsys/gdi/wmesa.c
+++ b/src/gallium/winsys/gdi/wmesa.c
@@ -12,8 +12,8 @@
 #include "pipe/p_winsys.h"
 #include "pipe/p_format.h"
 #include "pipe/p_context.h"
-#include "pipe/p_util.h"
 #include "pipe/p_inlines.h"
+#include "util/u_memory.h"
 #include "softpipe/sp_winsys.h"
 #include "glapi/glapi.h"
 #include "colors.h"
diff --git a/src/gallium/winsys/xlib/brw_aub.c b/src/gallium/winsys/xlib/brw_aub.c
index 6e814ce5d11..f3198029629 100644
--- a/src/gallium/winsys/xlib/brw_aub.c
+++ b/src/gallium/winsys/xlib/brw_aub.c
@@ -34,7 +34,6 @@
 #include "brw_aub.h"
 #include "pipe/p_context.h"
 #include "pipe/p_state.h"
-#include "pipe/p_util.h"
 #include "pipe/p_debug.h"
 
 
diff --git a/src/gallium/winsys/xlib/xm_winsys.c b/src/gallium/winsys/xlib/xm_winsys.c
index 4b4dc56e843..68ead7f528e 100644
--- a/src/gallium/winsys/xlib/xm_winsys.c
+++ b/src/gallium/winsys/xlib/xm_winsys.c
@@ -42,8 +42,9 @@
 #include "pipe/p_winsys.h"
 #include "pipe/p_format.h"
 #include "pipe/p_context.h"
-#include "pipe/p_util.h"
 #include "pipe/p_inlines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
 #include "softpipe/sp_winsys.h"
 
 #ifdef GALLIUM_CELL
diff --git a/src/gallium/winsys/xlib/xm_winsys_aub.c b/src/gallium/winsys/xlib/xm_winsys_aub.c
index 7fc9debdd54..3439367636d 100644
--- a/src/gallium/winsys/xlib/xm_winsys_aub.c
+++ b/src/gallium/winsys/xlib/xm_winsys_aub.c
@@ -37,7 +37,7 @@
 #include "xmesaP.h"
 
 #include "pipe/p_winsys.h"
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
 #include "pipe/p_inlines.h"
 #include "i965simple/brw_winsys.h"
 #include "i965simple/brw_screen.h"
diff --git a/src/mesa/state_tracker/acc2.c b/src/mesa/state_tracker/acc2.c
new file mode 100644
index 00000000000..fa5de2b764c
--- /dev/null
+++ b/src/mesa/state_tracker/acc2.c
@@ -0,0 +1,319 @@
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+ /*
+  * Authors:
+  *   Brian Paul
+  */
+
+#include "main/imports.h"
+#include "main/image.h"
+#include "main/macros.h"
+
+#include "st_context.h"
+#include "st_cb_accum.h"
+#include "st_cb_fbo.h"
+#include "st_draw.h"
+#include "st_format.h"
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+#include "util/p_tile.h"
+
+
+#define UNCLAMPED_FLOAT_TO_SHORT(us, f)  \
+   us = ( (short) ( CLAMP((f), -1.0, 1.0) * 32767.0F) )
+
+
+/**
+ * For hardware that supports deep color buffers, we could accelerate
+ * most/all the accum operations with blending/texturing.
+ * For now, just use the get/put_tile() functions and do things in software.
+ */
+
+
+static void
+acc_get_tile_rgba(struct pipe_context *pipe, struct pipe_surface *acc_ps,
+                  uint x, uint y, uint w, uint h, float *p)
+{
+   const enum pipe_format f = acc_ps->format;
+   const int cpp = acc_ps->cpp;
+
+   acc_ps->format = PIPE_FORMAT_R16G16B16A16_SNORM;
+   acc_ps->cpp = 8;
+
+   pipe_get_tile_rgba(pipe, acc_ps, x, y, w, h, p);
+
+   acc_ps->format = f;
+   acc_ps->cpp = cpp;
+}
+
+
+static void
+acc_put_tile_rgba(struct pipe_context *pipe, struct pipe_surface *acc_ps,
+                  uint x, uint y, uint w, uint h, const float *p)
+{
+   enum pipe_format f = acc_ps->format;
+   const int cpp = acc_ps->cpp;
+
+   acc_ps->format = PIPE_FORMAT_R16G16B16A16_SNORM;
+   acc_ps->cpp = 8;
+
+   pipe_put_tile_rgba(pipe, acc_ps, x, y, w, h, p);
+
+   acc_ps->format = f;
+   acc_ps->cpp = cpp;
+}
+
+
+
+void
+st_clear_accum_buffer(GLcontext *ctx, struct gl_renderbuffer *rb)
+{
+   struct pipe_context *pipe = ctx->st->pipe;
+   struct st_renderbuffer *acc_strb = st_renderbuffer(rb);
+   struct pipe_surface *acc_ps = acc_strb->surface;
+   const GLint xpos = ctx->DrawBuffer->_Xmin;
+   const GLint ypos = ctx->DrawBuffer->_Ymin;
+   const GLint width = ctx->DrawBuffer->_Xmax - xpos;
+   const GLint height = ctx->DrawBuffer->_Ymax - ypos;
+   const GLfloat r = ctx->Accum.ClearColor[0];
+   const GLfloat g = ctx->Accum.ClearColor[1];
+   const GLfloat b = ctx->Accum.ClearColor[2];
+   const GLfloat a = ctx->Accum.ClearColor[3];
+   GLfloat *accBuf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat));
+   int i;
+
+#if 1
+   GLvoid *map;
+
+   map = pipe_surface_map(acc_ps);
+   switch (acc_strb->format) {
+   case PIPE_FORMAT_R16G16B16A16_SNORM:
+      {
+         GLshort r = FLOAT_TO_SHORT(ctx->Accum.ClearColor[0]);
+         GLshort g = FLOAT_TO_SHORT(ctx->Accum.ClearColor[1]);
+         GLshort b = FLOAT_TO_SHORT(ctx->Accum.ClearColor[2]);
+         GLshort a = FLOAT_TO_SHORT(ctx->Accum.ClearColor[3]);
+         int i, j;
+         for (i = 0; i < height; i++) {
+            GLshort *dst = ((GLshort *) map
+                            + ((ypos + i) * acc_ps->pitch + xpos) * 4);
+            for (j = 0; j < width; j++) {
+               dst[0] = r;
+               dst[1] = g;
+               dst[2] = b;
+               dst[3] = a;
+               dst += 4;
+            }
+         }
+      }
+      break;
+   default:
+      _mesa_problem(ctx, "unexpected format in st_clear_accum_buffer()");
+   }
+
+   pipe_surface_unmap(acc_ps);
+
+#else
+   for (i = 0; i < width * height; i++) {
+      accBuf[i*4+0] = r;
+      accBuf[i*4+1] = g;
+      accBuf[i*4+2] = b;
+      accBuf[i*4+3] = a;
+   }
+
+   acc_put_tile_rgba(pipe, acc_ps, xpos, ypos, width, height, accBuf);
+#endif
+}
+
+
+/** For ADD/MULT */
+static void
+accum_mad(struct pipe_context *pipe, GLfloat scale, GLfloat bias,
+          GLint xpos, GLint ypos, GLint width, GLint height,
+          struct pipe_surface *acc_ps)
+{
+   GLfloat *accBuf;
+   GLint i;
+
+   accBuf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat));
+
+   pipe_get_tile_rgba(pipe, acc_ps, xpos, ypos, width, height, accBuf);
+
+   for (i = 0; i < 4 * width * height; i++) {
+      accBuf[i] = accBuf[i] * scale + bias;
+   }
+
+   pipe_put_tile_rgba(pipe, acc_ps, xpos, ypos, width, height, accBuf);
+
+   free(accBuf);
+}
+
+
+static void
+accum_accum(struct pipe_context *pipe, GLfloat value,
+            GLint xpos, GLint ypos, GLint width, GLint height,
+            struct pipe_surface *acc_ps,
+            struct pipe_surface *color_ps)
+{
+   GLfloat *colorBuf, *accBuf;
+   GLint i;
+
+   colorBuf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat));
+   accBuf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat));
+
+   pipe_get_tile_rgba(pipe, color_ps, xpos, ypos, width, height, colorBuf);
+   acc_get_tile_rgba(pipe, acc_ps, xpos, ypos, width, height, accBuf);
+
+   for (i = 0; i < 4 * width * height; i++) {
+      accBuf[i] = accBuf[i] + colorBuf[i] * value;
+   }
+
+   acc_put_tile_rgba(pipe, acc_ps, xpos, ypos, width, height, accBuf);
+
+   free(colorBuf);
+   free(accBuf);
+}
+
+
+static void
+accum_load(struct pipe_context *pipe, GLfloat value,
+           GLint xpos, GLint ypos, GLint width, GLint height,
+           struct pipe_surface *acc_ps,
+           struct pipe_surface *color_ps)
+{
+   GLfloat *buf;
+   GLint i;
+
+   buf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat));
+
+   pipe_get_tile_rgba(pipe, color_ps, xpos, ypos, width, height, buf);
+
+   for (i = 0; i < 4 * width * height; i++) {
+      buf[i] = buf[i] * value;
+   }
+
+   acc_put_tile_rgba(pipe, acc_ps, xpos, ypos, width, height, buf);
+
+   free(buf);
+}
+
+
+static void
+accum_return(GLcontext *ctx, GLfloat value,
+             GLint xpos, GLint ypos, GLint width, GLint height,
+             struct pipe_surface *acc_ps,
+             struct pipe_surface *color_ps)
+{
+   struct pipe_context *pipe = ctx->st->pipe;
+   const GLubyte *colormask = ctx->Color.ColorMask;
+   GLfloat *abuf, *cbuf = NULL;
+   GLint i, ch;
+
+   abuf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat));
+
+   acc_get_tile_rgba(pipe, acc_ps, xpos, ypos, width, height, abuf);
+
+   if (!colormask[0] || !colormask[1] || !colormask[2] || !colormask[3]) {
+      cbuf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat));
+      pipe_get_tile_rgba(pipe, color_ps, xpos, ypos, width, height, cbuf);
+   }
+
+   for (i = 0; i < width * height; i++) {
+      for (ch = 0; ch < 4; ch++) {
+         if (colormask[ch]) {
+            GLfloat val = abuf[i * 4 + ch] * value;
+            abuf[i * 4 + ch] = CLAMP(val, 0.0, 1.0);
+         }
+         else {
+            abuf[i * 4 + ch] = cbuf[i * 4 + ch];
+         }
+      }
+   }
+
+   pipe_put_tile_rgba(pipe, color_ps, xpos, ypos, width, height, abuf);
+
+   free(abuf);
+   if (cbuf)
+      free(cbuf);
+}
+
+
+static void
+st_Accum(GLcontext *ctx, GLenum op, GLfloat value)
+{
+   struct st_context *st = ctx->st;
+   struct pipe_context *pipe = st->pipe;
+   struct st_renderbuffer *acc_strb
+     = st_renderbuffer(ctx->DrawBuffer->Attachment[BUFFER_ACCUM].Renderbuffer);
+   struct st_renderbuffer *color_strb
+      = st_renderbuffer(ctx->ReadBuffer->_ColorReadBuffer);
+   struct pipe_surface *acc_ps = acc_strb->surface;
+   struct pipe_surface *color_ps = color_strb->surface;
+
+   const GLint xpos = ctx->DrawBuffer->_Xmin;
+   const GLint ypos = ctx->DrawBuffer->_Ymin;
+   const GLint width = ctx->DrawBuffer->_Xmax - xpos;
+   const GLint height = ctx->DrawBuffer->_Ymax - ypos;
+
+   /* make sure color bufs aren't cached */
+   pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL);
+
+   switch (op) {
+   case GL_ADD:
+      if (value != 0.0F) {
+         accum_mad(pipe, 1.0, value, xpos, ypos, width, height, acc_ps);
+      }
+      break;
+   case GL_MULT:
+      if (value != 1.0F) {
+         accum_mad(pipe, value, 0.0, xpos, ypos, width, height, acc_ps);
+      }
+      break;
+   case GL_ACCUM:
+      if (value != 0.0F) {
+         accum_accum(pipe, value, xpos, ypos, width, height, acc_ps, color_ps);
+      }
+      break;
+   case GL_LOAD:
+      accum_load(pipe, value, xpos, ypos, width, height, acc_ps, color_ps);
+      break;
+   case GL_RETURN:
+      accum_return(ctx, value, xpos, ypos, width, height, acc_ps, color_ps);
+      break;
+   default:
+      assert(0);
+   }
+}
+
+
+
+void st_init_accum_functions(struct dd_function_table *functions)
+{
+   functions->Accum = st_Accum;
+}
diff --git a/src/mesa/state_tracker/st_cb_accum.c b/src/mesa/state_tracker/st_cb_accum.c
index a992e08ff68..cf3a99e7e9d 100644
--- a/src/mesa/state_tracker/st_cb_accum.c
+++ b/src/mesa/state_tracker/st_cb_accum.c
@@ -42,7 +42,7 @@
 #include "pipe/p_context.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_inlines.h"
-#include "util/p_tile.h"
+#include "util/u_tile.h"
 
 
 #define UNCLAMPED_FLOAT_TO_SHORT(us, f)  \
diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c
index d5696a909f7..a0c305d66ff 100644
--- a/src/mesa/state_tracker/st_cb_bitmap.c
+++ b/src/mesa/state_tracker/st_cb_bitmap.c
@@ -50,7 +50,7 @@
 #include "pipe/p_context.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_inlines.h"
-#include "util/p_tile.h"
+#include "util/u_tile.h"
 #include "util/u_draw_quad.h"
 #include "util/u_simple_shaders.h"
 #include "shader/prog_instruction.h"
diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c
index 0c5e21d4ff0..4ec7c752dfe 100644
--- a/src/mesa/state_tracker/st_cb_drawpixels.c
+++ b/src/mesa/state_tracker/st_cb_drawpixels.c
@@ -55,7 +55,7 @@
 #include "pipe/p_context.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_inlines.h"
-#include "util/p_tile.h"
+#include "util/u_tile.h"
 #include "util/u_draw_quad.h"
 #include "shader/prog_instruction.h"
 #include "cso_cache/cso_context.h"
diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c
index 39f5856f94e..c8015327885 100644
--- a/src/mesa/state_tracker/st_cb_readpixels.c
+++ b/src/mesa/state_tracker/st_cb_readpixels.c
@@ -41,7 +41,7 @@
 #include "pipe/p_context.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_inlines.h"
-#include "util/p_tile.h"
+#include "util/u_tile.h"
 #include "st_context.h"
 #include "st_cb_bitmap.h"
 #include "st_cb_readpixels.h"
diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c
index 6177ac63f03..16bbf3d80f2 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -51,7 +51,7 @@
 #include "pipe/p_context.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_inlines.h"
-#include "util/p_tile.h"
+#include "util/u_tile.h"
 #include "util/u_blit.h"
 
 
diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
index 325d95e8658..936a6e32ea1 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -55,7 +55,7 @@
 #define TGSI_DEBUG 0
 
 
-/** XXX we should use the version of this from p_util.h but including
+/** XXX we should use the version of this from u_memory.h but including
  * that header causes symbol collisions.
  */
 static INLINE void *
diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c
index 63046a0ecce..73cebff33f7 100644
--- a/src/mesa/state_tracker/st_texture.c
+++ b/src/mesa/state_tracker/st_texture.c
@@ -36,7 +36,6 @@
 #include "pipe/p_context.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_inlines.h"
-#include "pipe/p_util.h"
 #include "pipe/p_inlines.h"
 #include "util/u_rect.h"
 
-- 
cgit v1.2.3


From e3509fd4d09a19293ac3f2e0c92d758bc3ef308c Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Tue, 2 Sep 2008 18:05:09 -0600
Subject: gallium: increase string buffer size to 16000 to avoid truncated
 output of long shaders

---
 src/gallium/auxiliary/tgsi/tgsi_dump.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index a168c949280..58693db13aa 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -517,7 +517,7 @@ tgsi_dump(
    const struct tgsi_token *tokens,
    uint flags )
 {
-   static char str[4096];
+   static char str[16000];
    uint len;
    char *p = str;
 
-- 
cgit v1.2.3


From b8a7eef242f6bb97d90f6e0303d270b2cbc58421 Mon Sep 17 00:00:00 2001
From: José Fonseca <jrfonseca@tungstengraphics.com>
Date: Fri, 5 Sep 2008 10:29:17 +0900
Subject: tgsi: Refactor tgsi_dump to avoid using string buffers when dumping.

This fixes a stack overflow when dumping shaders.

It ended up being pretty much as the original code Michal had before,
before I went on a cleanup rampage on it and took things that ended up
needing...
---
 src/gallium/auxiliary/tgsi/tgsi_dump.c | 160 +++++++++++++++++----------------
 1 file changed, 83 insertions(+), 77 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index 58693db13aa..afc8ffa553c 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -37,30 +37,40 @@ struct dump_ctx
 
    uint instno;
    
-   struct util_strbuf *sbuf;
+   void (*printf)(struct dump_ctx *ctx, const char *format, ...);
 };
 
+static void 
+dump_ctx_printf(struct dump_ctx *ctx, const char *format, ...)
+{
+   va_list ap;
+   (void)ctx;
+   va_start(ap, format);
+   debug_vprintf(format, ap);
+   va_end(ap);
+}
+
 static void
 dump_enum(
-   struct util_strbuf *sbuf,
+   struct dump_ctx *ctx,
    uint e,
    const char **enums,
    uint enum_count )
 {
    if (e >= enum_count)
-      util_strbuf_printf( sbuf, "%u", e );
+      ctx->printf( ctx, "%u", e );
    else
-      util_strbuf_printf( sbuf, "%s", enums[e] );
+      ctx->printf( ctx, "%s", enums[e] );
 }
 
-#define EOL()           util_strbuf_printf( sbuf, "\n" )
-#define TXT(S)          util_strbuf_printf( sbuf, "%s", S )
-#define CHR(C)          util_strbuf_printf( sbuf, "%c", C )
-#define UIX(I)          util_strbuf_printf( sbuf, "0x%x", I )
-#define UID(I)          util_strbuf_printf( sbuf, "%u", I )
-#define SID(I)          util_strbuf_printf( sbuf, "%d", I )
-#define FLT(F)          util_strbuf_printf( sbuf, "%10.4f", F )
-#define ENM(E,ENUMS)    dump_enum( sbuf, E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) )
+#define EOL()           ctx->printf( ctx, "\n" )
+#define TXT(S)          ctx->printf( ctx, "%s", S )
+#define CHR(C)          ctx->printf( ctx, "%c", C )
+#define UIX(I)          ctx->printf( ctx, "0x%x", I )
+#define UID(I)          ctx->printf( ctx, "%u", I )
+#define SID(I)          ctx->printf( ctx, "%d", I )
+#define FLT(F)          ctx->printf( ctx, "%10.4f", F )
+#define ENM(E,ENUMS)    dump_enum( ctx, E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) )
 
 static const char *processor_type_names[] =
 {
@@ -148,7 +158,7 @@ static const char *modulate_names[TGSI_MODULATE_COUNT] =
 
 static void
 _dump_register(
-   struct util_strbuf *sbuf,
+   struct dump_ctx *ctx,
    uint file,
    int first,
    int last )
@@ -165,7 +175,7 @@ _dump_register(
 
 static void
 _dump_register_ind(
-   struct util_strbuf *sbuf,
+   struct dump_ctx *ctx,
    uint file,
    int index,
    uint ind_file,
@@ -187,7 +197,7 @@ _dump_register_ind(
 
 static void
 _dump_writemask(
-   struct util_strbuf *sbuf,
+   struct dump_ctx *ctx,
    uint writemask )
 {
    if (writemask != TGSI_WRITEMASK_XYZW) {
@@ -208,18 +218,17 @@ iter_declaration(
    struct tgsi_iterate_context *iter,
    struct tgsi_full_declaration *decl )
 {
-   struct dump_ctx *ctx = (struct dump_ctx *) iter;
-   struct util_strbuf *sbuf = ctx->sbuf;
+   struct dump_ctx *ctx = (struct dump_ctx *)iter;
 
    TXT( "DCL " );
 
    _dump_register(
-      sbuf,
+      ctx,
       decl->Declaration.File,
       decl->DeclarationRange.First,
       decl->DeclarationRange.Last );
    _dump_writemask(
-      sbuf,
+      ctx,
       decl->Declaration.UsageMask );
 
    if (decl->Declaration.Semantic) {
@@ -245,17 +254,11 @@ void
 tgsi_dump_declaration(
    const struct tgsi_full_declaration *decl )
 {
-   static char str[1024];
-   struct util_strbuf sbuf;
    struct dump_ctx ctx;
 
-   util_strbuf_init(&sbuf, str, sizeof(str));
-   
-   ctx.sbuf = &sbuf;
+   ctx.printf = dump_ctx_printf;
 
    iter_declaration( &ctx.iter, (struct tgsi_full_declaration *)decl );
-   
-   debug_printf("%s", str);
 }
 
 static boolean
@@ -264,7 +267,6 @@ iter_immediate(
    struct tgsi_full_immediate *imm )
 {
    struct dump_ctx *ctx = (struct dump_ctx *) iter;
-   struct util_strbuf *sbuf = ctx->sbuf;
 
    uint i;
 
@@ -295,17 +297,11 @@ void
 tgsi_dump_immediate(
    const struct tgsi_full_immediate *imm )
 {
-   static char str[1024];
-   struct util_strbuf sbuf;
    struct dump_ctx ctx;
 
-   util_strbuf_init(&sbuf, str, sizeof(str));
-   
-   ctx.sbuf = &sbuf;
+   ctx.printf = dump_ctx_printf;
 
    iter_immediate( &ctx.iter, (struct tgsi_full_immediate *)imm );
-   
-   debug_printf("%s", str);
 }
 
 static boolean
@@ -314,7 +310,6 @@ iter_instruction(
    struct tgsi_full_instruction *inst )
 {
    struct dump_ctx *ctx = (struct dump_ctx *) iter;
-   struct util_strbuf *sbuf = ctx->sbuf;
    uint instno = ctx->instno++;
    
    uint i;
@@ -345,12 +340,12 @@ iter_instruction(
       CHR( ' ' );
 
       _dump_register(
-         sbuf,
+         ctx,
          dst->DstRegister.File,
          dst->DstRegister.Index,
          dst->DstRegister.Index );
       ENM( dst->DstRegisterExtModulate.Modulate, modulate_names );
-      _dump_writemask( sbuf, dst->DstRegister.WriteMask );
+      _dump_writemask( ctx, dst->DstRegister.WriteMask );
 
       first_reg = FALSE;
    }
@@ -377,7 +372,7 @@ iter_instruction(
 
       if (src->SrcRegister.Indirect) {
          _dump_register_ind(
-            sbuf,
+            ctx,
             src->SrcRegister.File,
             src->SrcRegister.Index,
             src->SrcRegisterInd.File,
@@ -385,7 +380,7 @@ iter_instruction(
       }
       else {
          _dump_register(
-            sbuf,
+            ctx,
             src->SrcRegister.File,
             src->SrcRegister.Index,
             src->SrcRegister.Index );
@@ -460,18 +455,12 @@ tgsi_dump_instruction(
    const struct tgsi_full_instruction *inst,
    uint instno )
 {
-   static char str[1024];
-   struct util_strbuf sbuf;
    struct dump_ctx ctx;
 
-   util_strbuf_init(&sbuf, str, sizeof(str));
-   
    ctx.instno = instno;
-   ctx.sbuf = &sbuf;
+   ctx.printf = dump_ctx_printf;
 
    iter_instruction( &ctx.iter, (struct tgsi_full_instruction *)inst );
-   
-   debug_printf("%s", str);
 }
 
 static boolean
@@ -479,7 +468,6 @@ prolog(
    struct tgsi_iterate_context *iter )
 {
    struct dump_ctx *ctx = (struct dump_ctx *) iter;
-   struct util_strbuf *sbuf = ctx->sbuf;
    ENM( iter->processor.Processor, processor_type_names );
    UID( iter->version.MajorVersion );
    CHR( '.' );
@@ -489,17 +477,12 @@ prolog(
 }
 
 void
-tgsi_dump_str(
+tgsi_dump(
    const struct tgsi_token *tokens,
-   uint flags,
-   char *str,
-   size_t size)
+   uint flags )
 {
-   struct util_strbuf sbuf;
    struct dump_ctx ctx;
 
-   util_strbuf_init(&sbuf, str, size);
-   
    ctx.iter.prolog = prolog;
    ctx.iter.iterate_instruction = iter_instruction;
    ctx.iter.iterate_declaration = iter_declaration;
@@ -507,34 +490,57 @@ tgsi_dump_str(
    ctx.iter.epilog = NULL;
 
    ctx.instno = 0;
-   ctx.sbuf = &sbuf;
+   ctx.printf = dump_ctx_printf;
 
    tgsi_iterate_shader( tokens, &ctx.iter );
 }
 
+struct str_dump_ctx
+{
+   struct dump_ctx base;
+   char *str;
+   char *ptr;
+   size_t left;
+};
+
+static void
+str_dump_ctx_printf(struct dump_ctx *ctx, const char *format, ...)
+{
+   struct str_dump_ctx *sctx = (struct str_dump_ctx *)ctx;
+   
+   if(sctx->left > 1) {
+      size_t written;
+      va_list ap;
+      va_start(ap, format);
+      written = util_vsnprintf(sctx->ptr, sctx->left, format, ap);
+      va_end(ap);
+      sctx->ptr += written;
+      sctx->left -= written;
+   }
+}
+
 void
-tgsi_dump(
+tgsi_dump_str(
    const struct tgsi_token *tokens,
-   uint flags )
+   uint flags,
+   char *str,
+   size_t size)
 {
-   static char str[16000];
-   uint len;
-   char *p = str;
-
-   tgsi_dump_str(tokens, flags, str, sizeof(str));
-
-   /* Workaround output buffer size limitations.
-    */
-   len = strlen( str );
-   while (len > 256) {
-      char piggy_bank;
-
-      piggy_bank = p[256];
-      p[256] = '\0';
-      debug_printf( "%s", p );
-      p[256] = piggy_bank;
-      p += 256;
-      len -= 256;
-   }
-   debug_printf( "%s", p );
+   struct str_dump_ctx ctx;
+
+   ctx.base.iter.prolog = prolog;
+   ctx.base.iter.iterate_instruction = iter_instruction;
+   ctx.base.iter.iterate_declaration = iter_declaration;
+   ctx.base.iter.iterate_immediate = iter_immediate;
+   ctx.base.iter.epilog = NULL;
+
+   ctx.base.instno = 0;
+   ctx.base.printf = &str_dump_ctx_printf;
+
+   ctx.str = str;
+   ctx.str[0] = 0;
+   ctx.ptr = str;
+   ctx.left = size;
+
+   tgsi_iterate_shader( tokens, &ctx.base.iter );
 }
-- 
cgit v1.2.3


From 5a25628bd20684ac67adcb542647a0a2d7649a37 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@ubuntu-vbox.(none)>
Date: Fri, 5 Sep 2008 17:08:50 +0200
Subject: tgsi: Cleanup code.

---
 src/gallium/auxiliary/tgsi/tgsi_sse2.c | 87 +++++++++++++++-------------------
 1 file changed, 37 insertions(+), 50 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index 626724ad4e4..4681b29f52b 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -601,12 +601,10 @@ static void PIPE_CDECL
 cos4f(
    float *store )
 {
-   const unsigned X = 0;
-
-   store[X + 0] = cosf( store[X + 0] );
-   store[X + 1] = cosf( store[X + 1] );
-   store[X + 2] = cosf( store[X + 2] );
-   store[X + 3] = cosf( store[X + 3] );
+   store[0] = cosf( store[0] );
+   store[1] = cosf( store[1] );
+   store[2] = cosf( store[2] );
+   store[3] = cosf( store[3] );
 }
 
 static void
@@ -624,18 +622,16 @@ static void PIPE_CDECL
 ex24f(
    float *store )
 {
-   const unsigned X = 0;
-
 #if FAST_MATH
-   store[X + 0] = util_fast_exp2( store[X + 0] );
-   store[X + 1] = util_fast_exp2( store[X + 1] );
-   store[X + 2] = util_fast_exp2( store[X + 2] );
-   store[X + 3] = util_fast_exp2( store[X + 3] );
+   store[0] = util_fast_exp2( store[0] );
+   store[1] = util_fast_exp2( store[1] );
+   store[2] = util_fast_exp2( store[2] );
+   store[3] = util_fast_exp2( store[3] );
 #else
-   store[X + 0] = powf( 2.0f, store[X + 0] );
-   store[X + 1] = powf( 2.0f, store[X + 1] );
-   store[X + 2] = powf( 2.0f, store[X + 2] );
-   store[X + 3] = powf( 2.0f, store[X + 3] );
+   store[0] = powf( 2.0f, store[0] );
+   store[1] = powf( 2.0f, store[1] );
+   store[2] = powf( 2.0f, store[2] );
+   store[3] = powf( 2.0f, store[3] );
 #endif
 }
 
@@ -665,12 +661,10 @@ static void PIPE_CDECL
 flr4f(
    float *store )
 {
-   const unsigned X = 0;
-
-   store[X + 0] = floorf( store[X + 0] );
-   store[X + 1] = floorf( store[X + 1] );
-   store[X + 2] = floorf( store[X + 2] );
-   store[X + 3] = floorf( store[X + 3] );
+   store[0] = floorf( store[0] );
+   store[1] = floorf( store[1] );
+   store[2] = floorf( store[2] );
+   store[3] = floorf( store[3] );
 }
 
 static void
@@ -688,12 +682,10 @@ static void PIPE_CDECL
 frc4f(
    float *store )
 {
-   const unsigned X = 0;
-
-   store[X + 0] -= floorf( store[X + 0] );
-   store[X + 1] -= floorf( store[X + 1] );
-   store[X + 2] -= floorf( store[X + 2] );
-   store[X + 3] -= floorf( store[X + 3] );
+   store[0] -= floorf( store[0] );
+   store[1] -= floorf( store[1] );
+   store[2] -= floorf( store[2] );
+   store[3] -= floorf( store[3] );
 }
 
 static void
@@ -711,12 +703,10 @@ static void PIPE_CDECL
 lg24f(
    float *store )
 {
-   const unsigned X = 0;
-
-   store[X + 0] = util_fast_log2( store[X + 0] );
-   store[X + 1] = util_fast_log2( store[X + 1] );
-   store[X + 2] = util_fast_log2( store[X + 2] );
-   store[X + 3] = util_fast_log2( store[X + 3] );
+   store[0] = util_fast_log2( store[0] );
+   store[1] = util_fast_log2( store[1] );
+   store[2] = util_fast_log2( store[2] );
+   store[3] = util_fast_log2( store[3] );
 }
 
 static void
@@ -770,18 +760,16 @@ static void PIPE_CDECL
 pow4f(
    float *store )
 {
-   const unsigned X = 0;
-
 #if FAST_MATH
-   store[X + 0] = util_fast_pow( store[X + 0], store[X + 4] );
-   store[X + 1] = util_fast_pow( store[X + 1], store[X + 5] );
-   store[X + 2] = util_fast_pow( store[X + 2], store[X + 6] );
-   store[X + 3] = util_fast_pow( store[X + 3], store[X + 7] );
+   store[0] = util_fast_pow( store[0], store[4] );
+   store[1] = util_fast_pow( store[1], store[5] );
+   store[2] = util_fast_pow( store[2], store[6] );
+   store[3] = util_fast_pow( store[3], store[7] );
 #else
-   store[X + 0] = powf( store[X + 0], store[X + 4] );
-   store[X + 1] = powf( store[X + 1], store[X + 5] );
-   store[X + 2] = powf( store[X + 2], store[X + 6] );
-   store[X + 3] = powf( store[X + 3], store[X + 7] );
+   store[0] = powf( store[0], store[4] );
+   store[1] = powf( store[1], store[5] );
+   store[2] = powf( store[2], store[6] );
+   store[3] = powf( store[3], store[7] );
 #endif
 }
 
@@ -877,12 +865,10 @@ static void PIPE_CDECL
 sin4f(
    float *store )
 {
-   const unsigned X = 0;
-
-   store[X + 0] = sinf( store[X + 0] );
-   store[X + 1] = sinf( store[X + 1] );
-   store[X + 2] = sinf( store[X + 2] );
-   store[X + 3] = sinf( store[X + 3] );
+   store[0] = sinf( store[0] );
+   store[1] = sinf( store[1] );
+   store[2] = sinf( store[2] );
+   store[3] = sinf( store[3] );
 }
 
 static void
@@ -2416,3 +2402,4 @@ tgsi_emit_sse2(
 }
 
 #endif /* PIPE_ARCH_X86 */
+
-- 
cgit v1.2.3


From dc1834a873726b9920c2cae6ffad86e09d4637ee Mon Sep 17 00:00:00 2001
From: José Fonseca <jrfonseca@tungstengraphics.com>
Date: Wed, 10 Sep 2008 10:32:52 +0900
Subject: tgsi: Verify constants are set before attempting to read them.

---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index fb573fe1f0c..df002939c6b 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -957,6 +957,7 @@ fetch_src_file_channel(
    case TGSI_EXTSWIZZLE_W:
       switch( file ) {
       case TGSI_FILE_CONSTANT:
+         assert(mach->Consts);
          chan->f[0] = mach->Consts[index->i[0]][swizzle];
          chan->f[1] = mach->Consts[index->i[1]][swizzle];
          chan->f[2] = mach->Consts[index->i[2]][swizzle];
-- 
cgit v1.2.3


From eb5b16d278e0f7ee0121049e43dfee1d52f2b0f7 Mon Sep 17 00:00:00 2001
From: José Fonseca <jrfonseca@tungstengraphics.com>
Date: Wed, 10 Sep 2008 10:33:03 +0900
Subject: tgsi: Fix newline pos.

---
 src/gallium/auxiliary/tgsi/tgsi_sanity.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
index 0b5bdd6ba1c..c6590272969 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
@@ -322,7 +322,7 @@ epilog(
    /* Print totals, if any.
     */
    if (ctx->errors || ctx->warnings)
-      debug_printf( "\n%u errors, %u warnings", ctx->errors, ctx->warnings );
+      debug_printf( "%u errors, %u warnings\n", ctx->errors, ctx->warnings );
 
    return TRUE;
 }
-- 
cgit v1.2.3


From 50f78fcc2e3da24fa6dc076f0985355b3f64e9fd Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Fri, 12 Sep 2008 11:01:31 -0600
Subject: gallium: silence warning

---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index df002939c6b..1a5294eabc3 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -1674,6 +1674,7 @@ exec_declaration(
             break;
 
          default:
+            eval = NULL;
             assert( 0 );
          }
 
-- 
cgit v1.2.3


From 31d2e5b954ece02070555b51c06ee427cf951b1f Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Fri, 12 Sep 2008 11:02:18 -0600
Subject: gallium: use new compare32() function to fix warnings about type
 punning and aliasing

---
 src/gallium/auxiliary/tgsi/tgsi_build.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c
index 74614d36884..38fcaf88293 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.c
@@ -793,10 +793,14 @@ tgsi_default_instruction_ext_nv( void )
    return instruction_ext_nv;
 }
 
-union token_u32
+
+/** test for inequality of 32-bit values pointed to by a and b */
+static INLINE boolean
+compare32(const void *a, const void *b)
 {
-   unsigned u32;
-};
+   return *((uint32_t *) a) != *((uint32_t *) b);
+}
+
 
 unsigned
 tgsi_compare_instruction_ext_nv(
@@ -805,7 +809,7 @@ tgsi_compare_instruction_ext_nv(
 {
    a.Padding = b.Padding = 0;
    a.Extended = b.Extended = 0;
-   return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
+   return compare32(&a, &b);
 }
 
 struct tgsi_instruction_ext_nv
@@ -864,7 +868,7 @@ tgsi_compare_instruction_ext_label(
 {
    a.Padding = b.Padding = 0;
    a.Extended = b.Extended = 0;
-   return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
+   return compare32(&a, &b);
 }
 
 struct tgsi_instruction_ext_label
@@ -905,7 +909,7 @@ tgsi_compare_instruction_ext_texture(
 {
    a.Padding = b.Padding = 0;
    a.Extended = b.Extended = 0;
-   return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
+   return compare32(&a, &b);
 }
 
 struct tgsi_instruction_ext_texture
@@ -1027,7 +1031,7 @@ tgsi_compare_src_register_ext_swz(
 {
    a.Padding = b.Padding = 0;
    a.Extended = b.Extended = 0;
-   return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
+   return compare32(&a, &b);
 }
 
 struct tgsi_src_register_ext_swz
@@ -1095,7 +1099,7 @@ tgsi_compare_src_register_ext_mod(
 {
    a.Padding = b.Padding = 0;
    a.Extended = b.Extended = 0;
-   return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
+   return compare32(&a, &b);
 }
 
 struct tgsi_src_register_ext_mod
@@ -1241,7 +1245,7 @@ tgsi_compare_dst_register_ext_concode(
 {
    a.Padding = b.Padding = 0;
    a.Extended = b.Extended = 0;
-   return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
+   return compare32(&a, &b);
 }
 
 struct tgsi_dst_register_ext_concode
@@ -1299,7 +1303,7 @@ tgsi_compare_dst_register_ext_modulate(
 {
    a.Padding = b.Padding = 0;
    a.Extended = b.Extended = 0;
-   return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
+   return compare32(&a, &b);
 }
 
 struct tgsi_dst_register_ext_modulate
-- 
cgit v1.2.3


From bd34b8a4febb7aadec0545250fd8b6b06ad774e8 Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Fri, 12 Sep 2008 11:40:31 -0600
Subject: gallium: use copy_token() function to avoid type punning/aliasing
 problems

This fixes parsing errors seen with optimized builds on PPC (which led to crashes).
The memcpy() is heavy-handed, but works.  A lighter uint assignment could
be used on x86...
---
 src/gallium/auxiliary/tgsi/tgsi_parse.c | 54 ++++++++++++++++++++-------------
 1 file changed, 33 insertions(+), 21 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c
index 3757486ba9b..2cd56e413a5 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c
@@ -88,16 +88,33 @@ tgsi_parse_end_of_tokens(
       1 + ctx->FullHeader.Header.HeaderSize + ctx->FullHeader.Header.BodySize;
 }
 
+
+/**
+ * This function is used to avoid and work-around type punning/aliasing
+ * warnings.  The warnings seem harmless on x86 but on PPC they cause
+ * real failures.
+ */
+static INLINE void
+copy_token(void *dst, const void *src)
+{
+   memcpy(dst, src, 4);
+}
+
+
+/**
+ * Get next 4-byte token, return it at address specified by 'token'
+ */
 static void
 next_token(
    struct tgsi_parse_context *ctx,
    void *token )
 {
    assert( !tgsi_parse_end_of_tokens( ctx ) );
-
-   *(struct tgsi_token *) token = ctx->Tokens[ctx->Position++];
+   copy_token(token, &ctx->Tokens[ctx->Position]);
+   ctx->Position++;
 }
 
+
 void
 tgsi_parse_token(
    struct tgsi_parse_context *ctx )
@@ -116,7 +133,7 @@ tgsi_parse_token(
       struct tgsi_full_declaration *decl = &ctx->FullToken.FullDeclaration;
 
       *decl = tgsi_default_full_declaration();
-      decl->Declaration = *(struct tgsi_declaration *) &token;
+      copy_token(&decl->Declaration, &token);
 
       next_token( ctx, &decl->DeclarationRange );
 
@@ -132,8 +149,7 @@ tgsi_parse_token(
       struct tgsi_full_immediate *imm = &ctx->FullToken.FullImmediate;
 
       *imm = tgsi_default_full_immediate();
-      imm->Immediate = *(struct tgsi_immediate *) &token;
-
+      copy_token(&imm->Immediate, &token);
       assert( !imm->Immediate.Extended );
 
       switch (imm->Immediate.DataType) {
@@ -158,8 +174,7 @@ tgsi_parse_token(
       unsigned extended;
 
       *inst = tgsi_default_full_instruction();
-      inst->Instruction = *(struct tgsi_instruction *) &token;
-
+      copy_token(&inst->Instruction, &token);
       extended = inst->Instruction.Extended;
 
       while( extended ) {
@@ -169,18 +184,15 @@ tgsi_parse_token(
 
          switch( token.Type ) {
          case TGSI_INSTRUCTION_EXT_TYPE_NV:
-            inst->InstructionExtNv =
-               *(struct tgsi_instruction_ext_nv *) &token;
+            copy_token(&inst->InstructionExtNv, &token);
             break;
 
          case TGSI_INSTRUCTION_EXT_TYPE_LABEL:
-            inst->InstructionExtLabel =
-               *(struct tgsi_instruction_ext_label *) &token;
+            copy_token(&inst->InstructionExtLabel, &token);
             break;
 
          case TGSI_INSTRUCTION_EXT_TYPE_TEXTURE:
-            inst->InstructionExtTexture =
-               *(struct tgsi_instruction_ext_texture *) &token;
+            copy_token(&inst->InstructionExtTexture, &token);
             break;
 
          default:
@@ -212,13 +224,13 @@ tgsi_parse_token(
 
             switch( token.Type ) {
             case TGSI_DST_REGISTER_EXT_TYPE_CONDCODE:
-               inst->FullDstRegisters[i].DstRegisterExtConcode =
-                  *(struct tgsi_dst_register_ext_concode *) &token;
+               copy_token(&inst->FullDstRegisters[i].DstRegisterExtConcode,
+                          &token);
                break;
 
             case TGSI_DST_REGISTER_EXT_TYPE_MODULATE:
-               inst->FullDstRegisters[i].DstRegisterExtModulate =
-                  *(struct tgsi_dst_register_ext_modulate *) &token;
+               copy_token(&inst->FullDstRegisters[i].DstRegisterExtModulate,
+                          &token);
                break;
 
             default:
@@ -245,13 +257,13 @@ tgsi_parse_token(
 
             switch( token.Type ) {
             case TGSI_SRC_REGISTER_EXT_TYPE_SWZ:
-               inst->FullSrcRegisters[i].SrcRegisterExtSwz =
-                  *(struct tgsi_src_register_ext_swz *) &token;
+               copy_token(&inst->FullSrcRegisters[i].SrcRegisterExtSwz,
+                          &token);
                break;
 
             case TGSI_SRC_REGISTER_EXT_TYPE_MOD:
-               inst->FullSrcRegisters[i].SrcRegisterExtMod =
-                  *(struct tgsi_src_register_ext_mod *) &token;
+               copy_token(&inst->FullSrcRegisters[i].SrcRegisterExtMod,
+                          &token);
                break;
 
             default:
-- 
cgit v1.2.3


From ad16ecbbe4fe8c1bcb18ed8fbbd672c68a0b17fa Mon Sep 17 00:00:00 2001
From: Jakob Bornecrantz <jakob@tungstengraphics.com>
Date: Tue, 16 Sep 2008 16:16:54 +0200
Subject: tgsi: Make tgsi_sanity.c compile with make

---
 src/gallium/auxiliary/tgsi/Makefile      | 1 +
 src/gallium/auxiliary/tgsi/tgsi_sanity.c | 6 ++++++
 2 files changed, 7 insertions(+)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile
index 806a2bd4c52..c5d2082087c 100644
--- a/src/gallium/auxiliary/tgsi/Makefile
+++ b/src/gallium/auxiliary/tgsi/Makefile
@@ -4,6 +4,7 @@ include $(TOP)/configs/current
 LIBNAME = tgsi
 
 C_SOURCES = \
+	tgsi_sanity.c \
 	tgsi_build.c \
 	tgsi_dump.c \
 	tgsi_exec.c \
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
index c6590272969..20b32477be3 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
@@ -152,6 +152,12 @@ check_register_usage(
 {
    if (!check_file_name( ctx, file ))
       return FALSE;
+
+   if (index < 0 || index > MAX_REGISTERS) {
+      report_error( ctx, "%s[%i]: Invalid index %s", file_names[file], index, name );
+      return FALSE;
+   }
+
    if (indirect_access) {
       if (!is_any_register_declared( ctx, file ))
          report_error( ctx, "%s: Undeclared %s register", file_names[file], name );
-- 
cgit v1.2.3


From 8cdab20c9a0d8794d5d85dbeef478b982ce39506 Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Tue, 16 Sep 2008 12:52:19 -0600
Subject: gallium: fix info entries for KIL, KILP

KIL takes 1 src register.  KILP uses no registers (uses cond codes).
---
 src/gallium/auxiliary/tgsi/tgsi_info.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
index a4899cd4c2d..68c7a6b7f58 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -69,7 +69,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
    { 1, 1, 0, 0, "COS" },
    { 1, 1, 0, 0, "DDX" },
    { 1, 1, 0, 0, "DDY" },
-   { 0, 1, 0, 0, "KILP" },
+   { 0, 0, 0, 0, "KILP" },
    { 1, 1, 0, 0, "PK2H" },
    { 1, 1, 0, 0, "PK2US" },
    { 1, 1, 0, 0, "PK4B" },
@@ -146,7 +146,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
    { 0, 1, 0, 0, "CALLNZ" },
    { 0, 1, 0, 0, "IFC" },
    { 0, 1, 0, 0, "BREAKC" },
-   { 0, 0, 0, 0, "KIL" },
+   { 0, 1, 0, 0, "KIL" },
    { 0, 0, 0, 0, "END" },
    { 1, 1, 0, 0, "SWZ" }
 };
-- 
cgit v1.2.3


From e6a120fefea44078b3a8d4292d83671e6c41357f Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Wed, 17 Sep 2008 13:14:57 -0600
Subject: gallium: fix tgsi sanity checker with respect to END.

Subroutine code may be found after the END instruction so it's not always
the last instruction.
At least check for presence of exactly one END instruction though.
---
 src/gallium/auxiliary/tgsi/tgsi_sanity.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
index 20b32477be3..11659247c0c 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
@@ -180,12 +180,10 @@ iter_instruction(
    const struct tgsi_opcode_info *info;
    uint i;
 
-   /* There must be no other instructions after END.
-    */
-   if (ctx->index_of_END != ~0) {
-      report_error( ctx, "Unexpected instruction after END" );
-   }
-   else if (inst->Instruction.Opcode == TGSI_OPCODE_END) {
+   if (inst->Instruction.Opcode == TGSI_OPCODE_END) {
+      if (ctx->index_of_END != ~0) {
+         report_error( ctx, "Too many END instructions" );
+      }
       ctx->index_of_END = ctx->num_instructions;
    }
 
@@ -307,10 +305,10 @@ epilog(
    struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter;
    uint file;
 
-   /* There must be an END instruction at the end.
+   /* There must be an END instruction somewhere.
     */
-   if (ctx->index_of_END == ~0 || ctx->index_of_END != ctx->num_instructions - 1) {
-      report_error( ctx, "Expected END at end of instruction sequence" );
+   if (ctx->index_of_END == ~0) {
+      report_error( ctx, "Missing END instruction" );
    }
 
    /* Check if all declared registers were used.
-- 
cgit v1.2.3


From 5e1ef85dc430a4439cd60b66262eab9062dd5f4f Mon Sep 17 00:00:00 2001
From: Jakob Bornecrantz <jakob@tungstengraphics.com>
Date: Thu, 18 Sep 2008 14:48:45 +0200
Subject: tgsi: Make tgsi dumps look more like mesa shader dumps.

---
 src/gallium/auxiliary/tgsi/tgsi_dump.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index afc8ffa553c..3177f549523 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -68,6 +68,7 @@ dump_enum(
 #define CHR(C)          ctx->printf( ctx, "%c", C )
 #define UIX(I)          ctx->printf( ctx, "0x%x", I )
 #define UID(I)          ctx->printf( ctx, "%u", I )
+#define INSTID(I)          ctx->printf( ctx, "% 3u", I )
 #define SID(I)          ctx->printf( ctx, "%d", I )
 #define FLT(F)          ctx->printf( ctx, "%10.4f", F )
 #define ENM(E,ENUMS)    dump_enum( ctx, E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) )
@@ -315,8 +316,8 @@ iter_instruction(
    uint i;
    boolean first_reg = TRUE;
 
-   UID( instno );
-   CHR( ':' );
+   INSTID( instno );
+   TXT( ": " );
    TXT( tgsi_get_opcode_info( inst->Instruction.Opcode )->mnemonic );
 
    switch (inst->Instruction.Saturate) {
-- 
cgit v1.2.3


From 0b8e19ffc51c29543796d4f1e3243e97d8c32671 Mon Sep 17 00:00:00 2001
From: Jakob Bornecrantz <jakob@tungstengraphics.com>
Date: Thu, 18 Sep 2008 16:28:16 +0200
Subject: tgsi: Build tgsi_text with make

---
 src/gallium/auxiliary/tgsi/Makefile | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile
index c5d2082087c..c7155a93168 100644
--- a/src/gallium/auxiliary/tgsi/Makefile
+++ b/src/gallium/auxiliary/tgsi/Makefile
@@ -13,6 +13,7 @@ C_SOURCES = \
 	tgsi_parse.c \
 	tgsi_scan.c \
 	tgsi_sse2.c \
+	tgsi_text.c \
 	tgsi_transform.c \
 	tgsi_util.c
 
-- 
cgit v1.2.3


From 5dc8e67078be8b8c42a809311debd275ac7d64a7 Mon Sep 17 00:00:00 2001
From: José Fonseca <jrfonseca@tungstengraphics.com>
Date: Tue, 30 Sep 2008 01:12:52 +0900
Subject: tgsi: SSE2 optimized exp2, log2 and pow implementations.

Special care must be taken when calling compiler generated SSE2 functions
from the runtime generated SSE2: saving the xmm registers, and notify gcc
the stack is not 16byte aligned.

It would be more efficient to keep the stack pointer 16byte aligned, but
too hairy, and not consistent in all x86 architectures.

This has been tested in linux x86 and windows x86 userspace. Not tested on
x86-64 because it is broken for other reasons (even without this change).
---
 src/gallium/auxiliary/tgsi/tgsi_sse2.c | 287 ++++++++++++++++++++++++---------
 1 file changed, 211 insertions(+), 76 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index 4681b29f52b..4fdad3a5c78 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -28,6 +28,7 @@
 #include "pipe/p_debug.h"
 #include "pipe/p_shader_tokens.h"
 #include "util/u_math.h"
+#include "util/u_sse.h"
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_util.h"
 #include "tgsi_exec.h"
@@ -480,10 +481,31 @@ emit_coef_dady(
  * Function call helpers.
  */
 
+/**
+ * NOTE: In gcc, if the destination uses the SSE intrinsics, then it must be 
+ * defined with __attribute__((force_align_arg_pointer)), as we do not guarantee
+ * that the stack pointer is 16 byte aligned, as expected.
+ */
 static void
-emit_push_gp(
-   struct x86_function *func )
+emit_func_call_dst(
+   struct x86_function *func,
+   unsigned xmm_save,
+   unsigned xmm_dst,
+   void (PIPE_CDECL *code)() )
 {
+   struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX );
+   unsigned i, n, xmm;
+   unsigned xmm_mask;
+   
+   /* Bitmask of the xmm registers to save */
+   xmm_mask = (1 << xmm_save) - 1;
+   xmm_mask &= ~(1 << xmm_dst);
+
+   sse_movaps(
+      func,
+      get_temp( TEMP_R0, 0 ),
+      make_xmm( xmm_dst ) );
+
    x86_push(
       func,
       x86_make_reg( file_REG32, reg_AX) );
@@ -493,12 +515,49 @@ emit_push_gp(
    x86_push(
       func,
       x86_make_reg( file_REG32, reg_DX) );
-}
+   
+   for(i = 0, n = 0; i < 8; ++i)
+      if(xmm_mask & (1 << i))
+         ++n;
+   
+   x86_sub_imm(
+      func, 
+      x86_make_reg( file_REG32, reg_SP ),
+      n*16);
+
+   for(i = 0, n = 0; i < 8; ++i)
+      if(xmm_mask & (1 << i)) {
+         sse_movups(
+            func,
+            x86_make_disp( x86_make_reg( file_REG32, reg_SP ), n*16 ),
+            make_xmm( xmm ) );
+         ++n;
+      }
+   
+   x86_lea(
+      func,
+      ecx,
+      get_temp( TEMP_R0, 0 ) );
+   
+   x86_push( func, ecx );
+   x86_mov_reg_imm( func, ecx, (unsigned long) code );
+   x86_call( func, ecx );
+   x86_pop(func, ecx );
+   
+   for(i = 0, n = 0; i < 8; ++i)
+      if(xmm_mask & (1 << i)) {
+         sse_movups(
+            func,
+            make_xmm( xmm ),
+            x86_make_disp( x86_make_reg( file_REG32, reg_SP ), n*16 ) );
+         ++n;
+      }
+   
+   x86_add_imm(
+      func, 
+      x86_make_reg( file_REG32, reg_SP ),
+      n*16);
 
-static void
-x86_pop_gp(
-   struct x86_function *func )
-{
    /* Restore GP registers in a reverse order.
     */
    x86_pop(
@@ -510,39 +569,6 @@ x86_pop_gp(
    x86_pop(
       func,
       x86_make_reg( file_REG32, reg_AX) );
-}
-
-static void
-emit_func_call_dst(
-   struct x86_function *func,
-   unsigned xmm_dst,
-   void (PIPE_CDECL *code)() )
-{
-   sse_movaps(
-      func,
-      get_temp( TEMP_R0, 0 ),
-      make_xmm( xmm_dst ) );
-
-   emit_push_gp(
-      func );
-
-   {
-      struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX );
-
-      x86_lea(
-         func,
-         ecx,
-         get_temp( TEMP_R0, 0 ) );
-
-      x86_push( func, ecx );
-      x86_mov_reg_imm( func, ecx, (unsigned long) code );
-      x86_call( func, ecx );
-      x86_pop(func, ecx ); 
-   }
-
-
-   x86_pop_gp(
-      func );
 
    sse_movaps(
       func,
@@ -553,6 +579,7 @@ emit_func_call_dst(
 static void
 emit_func_call_dst_src(
    struct x86_function *func,
+   unsigned xmm_save, 
    unsigned xmm_dst,
    unsigned xmm_src,
    void (PIPE_CDECL *code)() )
@@ -564,10 +591,111 @@ emit_func_call_dst_src(
 
    emit_func_call_dst(
       func,
+      xmm_save,
       xmm_dst,
       code );
 }
 
+/*
+ * Fast SSE2 implementation of special math functions.
+ */
+
+#define POLY0(x, c0) _mm_set1_ps(c0)
+#define POLY1(x, c0, c1) _mm_add_ps(_mm_mul_ps(POLY0(x, c1), x), _mm_set1_ps(c0))
+#define POLY2(x, c0, c1, c2) _mm_add_ps(_mm_mul_ps(POLY1(x, c1, c2), x), _mm_set1_ps(c0))
+#define POLY3(x, c0, c1, c2, c3) _mm_add_ps(_mm_mul_ps(POLY2(x, c1, c2, c3), x), _mm_set1_ps(c0))
+#define POLY4(x, c0, c1, c2, c3, c4) _mm_add_ps(_mm_mul_ps(POLY3(x, c1, c2, c3, c4), x), _mm_set1_ps(c0))
+#define POLY5(x, c0, c1, c2, c3, c4, c5) _mm_add_ps(_mm_mul_ps(POLY4(x, c1, c2, c3, c4, c5), x), _mm_set1_ps(c0))
+
+#define EXP_POLY_DEGREE 3
+#define LOG_POLY_DEGREE 5
+
+/**
+ * See http://www.devmaster.net/forums/showthread.php?p=43580
+ */
+static INLINE __m128 
+exp2f4(__m128 x)
+{
+   __m128i ipart;
+   __m128 fpart, expipart, expfpart;
+
+   x = _mm_min_ps(x, _mm_set1_ps( 129.00000f));
+   x = _mm_max_ps(x, _mm_set1_ps(-126.99999f));
+
+   /* ipart = int(x - 0.5) */
+   ipart = _mm_cvtps_epi32(_mm_sub_ps(x, _mm_set1_ps(0.5f)));
+
+   /* fpart = x - ipart */
+   fpart = _mm_sub_ps(x, _mm_cvtepi32_ps(ipart));
+
+   /* expipart = (float) (1 << ipart) */
+   expipart = _mm_castsi128_ps(_mm_slli_epi32(_mm_add_epi32(ipart, _mm_set1_epi32(127)), 23));
+
+   /* minimax polynomial fit of 2**x, in range [-0.5, 0.5[ */
+#if EXP_POLY_DEGREE == 5
+   expfpart = POLY5(fpart, 9.9999994e-1f, 6.9315308e-1f, 2.4015361e-1f, 5.5826318e-2f, 8.9893397e-3f, 1.8775767e-3f);
+#elif EXP_POLY_DEGREE == 4
+   expfpart = POLY4(fpart, 1.0000026f, 6.9300383e-1f, 2.4144275e-1f, 5.2011464e-2f, 1.3534167e-2f);
+#elif EXP_POLY_DEGREE == 3
+   expfpart = POLY3(fpart, 9.9992520e-1f, 6.9583356e-1f, 2.2606716e-1f, 7.8024521e-2f);
+#elif EXP_POLY_DEGREE == 2
+   expfpart = POLY2(fpart, 1.0017247f, 6.5763628e-1f, 3.3718944e-1f);
+#else
+#error
+#endif
+
+   return _mm_mul_ps(expipart, expfpart);
+}
+
+/**
+ * See http://www.devmaster.net/forums/showthread.php?p=43580
+ */
+static INLINE __m128 
+log2f4(__m128 x)
+{
+   __m128i expmask = _mm_set1_epi32(0x7f800000);
+   __m128i mantmask = _mm_set1_epi32(0x007fffff);
+   __m128 one = _mm_set1_ps(1.0f);
+
+   __m128i i = _mm_castps_si128(x);
+
+   /* exp = (float) exponent(x) */
+   __m128 exp = _mm_cvtepi32_ps(_mm_sub_epi32(_mm_srli_epi32(_mm_and_si128(i, expmask), 23), _mm_set1_epi32(127)));
+
+   /* mant = (float) mantissa(x) */
+   __m128 mant = _mm_or_ps(_mm_castsi128_ps(_mm_and_si128(i, mantmask)), one);
+
+   __m128 logmant;
+
+   /* Minimax polynomial fit of log2(x)/(x - 1), for x in range [1, 2[ 
+    * These coefficients can be generate with 
+    * http://www.boost.org/doc/libs/1_36_0/libs/math/doc/sf_and_dist/html/math_toolkit/toolkit/internals2/minimax.html
+    */
+#if LOG_POLY_DEGREE == 6
+   logmant = POLY5(mant, 3.11578814719469302614f, -3.32419399085241980044f, 2.59883907202499966007f, -1.23152682416275988241f, 0.318212422185251071475f, -0.0344359067839062357313f);
+#elif LOG_POLY_DEGREE == 5
+   logmant = POLY4(mant, 2.8882704548164776201f, -2.52074962577807006663f, 1.48116647521213171641f, -0.465725644288844778798f, 0.0596515482674574969533f);
+#elif LOG_POLY_DEGREE == 4
+   logmant = POLY3(mant, 2.61761038894603480148f, -1.75647175389045657003f, 0.688243882994381274313f, -0.107254423828329604454f);
+#elif LOG_POLY_DEGREE == 3
+   logmant = POLY2(mant, 2.28330284476918490682f, -1.04913055217340124191f, 0.204446009836232697516f);
+#else
+#error
+#endif
+
+   /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/
+   logmant = _mm_mul_ps(logmant, _mm_sub_ps(mant, one));
+
+   return _mm_add_ps(logmant, exp);
+}
+
+static INLINE __m128
+powf4(__m128 x, __m128 y)
+{
+   return exp2f4(_mm_mul_ps(log2f4(x), y));
+}
+
+
 /**
  * Low-level instruction translators.
  */
@@ -610,38 +738,35 @@ cos4f(
 static void
 emit_cos(
    struct x86_function *func,
+   unsigned xmm_save, 
    unsigned xmm_dst )
 {
    emit_func_call_dst(
       func,
+      xmm_save, 
       xmm_dst,
       cos4f );
 }
 
 static void PIPE_CDECL
+#if defined(PIPE_CC_GCC)
+__attribute__((force_align_arg_pointer))
+#endif
 ex24f(
    float *store )
 {
-#if FAST_MATH
-   store[0] = util_fast_exp2( store[0] );
-   store[1] = util_fast_exp2( store[1] );
-   store[2] = util_fast_exp2( store[2] );
-   store[3] = util_fast_exp2( store[3] );
-#else
-   store[0] = powf( 2.0f, store[0] );
-   store[1] = powf( 2.0f, store[1] );
-   store[2] = powf( 2.0f, store[2] );
-   store[3] = powf( 2.0f, store[3] );
-#endif
+   _mm_store_ps(&store[0], exp2f4( _mm_load_ps(&store[0]) ));
 }
 
 static void
 emit_ex2(
    struct x86_function *func,
+   unsigned xmm_save, 
    unsigned xmm_dst )
 {
    emit_func_call_dst(
       func,
+      xmm_save,
       xmm_dst,
       ex24f );
 }
@@ -670,10 +795,12 @@ flr4f(
 static void
 emit_flr(
    struct x86_function *func,
+   unsigned xmm_save, 
    unsigned xmm_dst )
 {
    emit_func_call_dst(
       func,
+      xmm_save,
       xmm_dst,
       flr4f );
 }
@@ -691,31 +818,35 @@ frc4f(
 static void
 emit_frc(
    struct x86_function *func,
+   unsigned xmm_save, 
    unsigned xmm_dst )
 {
    emit_func_call_dst(
       func,
+      xmm_save,
       xmm_dst,
       frc4f );
 }
 
 static void PIPE_CDECL
+#if defined(PIPE_CC_GCC)
+__attribute__((force_align_arg_pointer))
+#endif
 lg24f(
    float *store )
 {
-   store[0] = util_fast_log2( store[0] );
-   store[1] = util_fast_log2( store[1] );
-   store[2] = util_fast_log2( store[2] );
-   store[3] = util_fast_log2( store[3] );
+   _mm_store_ps(&store[0], log2f4( _mm_load_ps(&store[0]) ));
 }
 
 static void
 emit_lg2(
    struct x86_function *func,
+   unsigned xmm_save, 
    unsigned xmm_dst )
 {
    emit_func_call_dst(
       func,
+      xmm_save,
       xmm_dst,
       lg24f );
 }
@@ -757,14 +888,14 @@ emit_neg(
 }
 
 static void PIPE_CDECL
+#if defined(PIPE_CC_GCC)
+__attribute__((force_align_arg_pointer))
+#endif
 pow4f(
    float *store )
 {
-#if FAST_MATH
-   store[0] = util_fast_pow( store[0], store[4] );
-   store[1] = util_fast_pow( store[1], store[5] );
-   store[2] = util_fast_pow( store[2], store[6] );
-   store[3] = util_fast_pow( store[3], store[7] );
+#if 1
+   _mm_store_ps(&store[0], powf4( _mm_load_ps(&store[0]), _mm_load_ps(&store[4]) ));
 #else
    store[0] = powf( store[0], store[4] );
    store[1] = powf( store[1], store[5] );
@@ -776,11 +907,13 @@ pow4f(
 static void
 emit_pow(
    struct x86_function *func,
+   unsigned xmm_save, 
    unsigned xmm_dst,
    unsigned xmm_src )
 {
    emit_func_call_dst_src(
       func,
+      xmm_save,
       xmm_dst,
       xmm_src,
       pow4f );
@@ -873,10 +1006,12 @@ sin4f(
 
 static void
 emit_sin (struct x86_function *func,
+          unsigned xmm_save, 
           unsigned xmm_dst)
 {
    emit_func_call_dst(
       func,
+      xmm_save,
       xmm_dst,
       sin4f );
 }
@@ -1296,7 +1431,7 @@ emit_instruction(
                get_temp(
                   TGSI_EXEC_TEMP_MINUS_128_I,
                   TGSI_EXEC_TEMP_MINUS_128_C ) );
-            emit_pow( func, 1, 2 );
+            emit_pow( func, 3, 1, 2 );
             FETCH( func, *inst, 0, 0, CHAN_X );
             sse_xorps(
                func,
@@ -1342,11 +1477,11 @@ emit_instruction(
          if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
              IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
             emit_MOV( func, 1, 0 );
-            emit_flr( func, 1 );
+            emit_flr( func, 2, 1 );
             /* dst.x = ex2(floor(src.x)) */
             if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) {
                emit_MOV( func, 2, 1 );
-               emit_ex2( func, 2 );
+               emit_ex2( func, 3, 2 );
                STORE( func, *inst, 2, 0, CHAN_X );
             }
             /* dst.y = src.x - floor(src.x) */
@@ -1358,7 +1493,7 @@ emit_instruction(
          }
          /* dst.z = ex2(src.x) */
          if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
-            emit_ex2( func, 0 );
+            emit_ex2( func, 3, 0 );
             STORE( func, *inst, 0, 0, CHAN_Z );
          }
       }
@@ -1376,21 +1511,21 @@ emit_instruction(
          FETCH( func, *inst, 0, 0, CHAN_X );
          emit_abs( func, 0 );
          emit_MOV( func, 1, 0 );
-         emit_lg2( func, 1 );
+         emit_lg2( func, 2, 1 );
          /* dst.z = lg2(abs(src.x)) */
          if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
             STORE( func, *inst, 1, 0, CHAN_Z );
          }
          if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
              IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
-            emit_flr( func, 1 );
+            emit_flr( func, 2, 1 );
             /* dst.x = floor(lg2(abs(src.x))) */
             if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) {
                STORE( func, *inst, 1, 0, CHAN_X );
             }
             /* dst.x = abs(src)/ex2(floor(lg2(abs(src.x)))) */
             if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
-               emit_ex2( func, 1 );
+               emit_ex2( func, 2, 1 );
                emit_rcp( func, 1, 1 );
                emit_mul( func, 0, 1 );
                STORE( func, *inst, 0, 0, CHAN_Y );
@@ -1580,7 +1715,7 @@ emit_instruction(
    /* TGSI_OPCODE_FRC */
       FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( func, *inst, 0, 0, chan_index );
-         emit_frc( func, 0 );
+         emit_frc( func, 0, 0 );
          STORE( func, *inst, 0, 0, chan_index );
       }
       break;
@@ -1593,7 +1728,7 @@ emit_instruction(
    /* TGSI_OPCODE_FLR */
       FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( func, *inst, 0, 0, chan_index );
-         emit_flr( func, 0 );
+         emit_flr( func, 0, 0 );
          STORE( func, *inst, 0, 0, chan_index );
       }
       break;
@@ -1605,7 +1740,7 @@ emit_instruction(
    case TGSI_OPCODE_EXPBASE2:
    /* TGSI_OPCODE_EX2 */
       FETCH( func, *inst, 0, 0, CHAN_X );
-      emit_ex2( func, 0 );
+      emit_ex2( func, 0, 0 );
       FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
          STORE( func, *inst, 0, 0, chan_index );
       }
@@ -1614,7 +1749,7 @@ emit_instruction(
    case TGSI_OPCODE_LOGBASE2:
    /* TGSI_OPCODE_LG2 */
       FETCH( func, *inst, 0, 0, CHAN_X );
-      emit_lg2( func, 0 );
+      emit_lg2( func, 0, 0 );
       FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
          STORE( func, *inst, 0, 0, chan_index );
       }
@@ -1624,7 +1759,7 @@ emit_instruction(
    /* TGSI_OPCODE_POW */
       FETCH( func, *inst, 0, 0, CHAN_X );
       FETCH( func, *inst, 1, 1, CHAN_X );
-      emit_pow( func, 0, 1 );
+      emit_pow( func, 0, 0, 1 );
       FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
          STORE( func, *inst, 0, 0, chan_index );
       }
@@ -1715,7 +1850,7 @@ emit_instruction(
 
    case TGSI_OPCODE_COS:
       FETCH( func, *inst, 0, 0, CHAN_X );
-      emit_cos( func, 0 );
+      emit_cos( func, 0, 0 );
       FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
          STORE( func, *inst, 0, 0, chan_index );
       }
@@ -1774,7 +1909,7 @@ emit_instruction(
 
    case TGSI_OPCODE_SIN:
       FETCH( func, *inst, 0, 0, CHAN_X );
-      emit_sin( func, 0 );
+      emit_sin( func, 0, 0 );
       FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
          STORE( func, *inst, 0, 0, chan_index );
       }
@@ -1868,12 +2003,12 @@ emit_instruction(
    case TGSI_OPCODE_SCS:
       IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
          FETCH( func, *inst, 0, 0, CHAN_X );
-         emit_cos( func, 0 );
+         emit_cos( func, 0, 0 );
          STORE( func, *inst, 0, 0, CHAN_X );
       }
       IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
          FETCH( func, *inst, 0, 0, CHAN_X );
-         emit_sin( func, 0 );
+         emit_sin( func, 0, 0 );
          STORE( func, *inst, 0, 0, CHAN_Y );
       }
       IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
-- 
cgit v1.2.3


From 5e585719ebab17959d972e2e69c04203ecd3f2f3 Mon Sep 17 00:00:00 2001
From: Jonathan White <jwhite@tungstengraphics.com>
Date: Tue, 30 Sep 2008 14:07:09 -0600
Subject: cell:  Moved X86 checks to wrap #include section so that Cell targets
 will compile again.

---
 src/gallium/auxiliary/tgsi/tgsi_sse2.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index 4fdad3a5c78..94b2df2dbbf 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -25,6 +25,8 @@
  * 
  **************************************************************************/
 
+#ifdef PIPE_ARCH_X86
+
 #include "pipe/p_debug.h"
 #include "pipe/p_shader_tokens.h"
 #include "util/u_math.h"
@@ -36,8 +38,6 @@
 
 #include "rtasm/rtasm_x86sse.h"
 
-#ifdef PIPE_ARCH_X86
-
 /* for 1/sqrt()
  *
  * This costs about 100fps (close to 10%) in gears:
-- 
cgit v1.2.3


From cbfce4175bf72788842bb45fa11c7e19caa8e6a8 Mon Sep 17 00:00:00 2001
From: José Fonseca <jrfonseca@tungstengraphics.com>
Date: Wed, 1 Oct 2008 08:27:20 +0900
Subject: tgsi: Include p_config.h.

---
 src/gallium/auxiliary/tgsi/tgsi_sse2.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index 94b2df2dbbf..79f424b692a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -25,6 +25,8 @@
  * 
  **************************************************************************/
 
+#include "pipe/p_config.h"
+
 #ifdef PIPE_ARCH_X86
 
 #include "pipe/p_debug.h"
-- 
cgit v1.2.3


From 4d7394f89292131323fc8e39efa511a2eeb8cc60 Mon Sep 17 00:00:00 2001
From: José Fonseca <jrfonseca@tungstengraphics.com>
Date: Tue, 7 Oct 2008 14:25:09 +0900
Subject: gallium: Introduce PIPE_ARCH_SSE define for SSE support.

Besides meaning x86 and x86-64 architecture, it also depends on SSE2
support enabled on gcc.

This fixes the linux-debug build.
---
 src/gallium/auxiliary/draw/draw_vs_sse.c | 2 +-
 src/gallium/auxiliary/tgsi/tgsi_sse2.c   | 2 +-
 src/gallium/auxiliary/util/u_sse.h       | 2 +-
 src/gallium/drivers/softpipe/sp_fs_sse.c | 2 +-
 src/gallium/include/pipe/p_config.h      | 8 ++++++++
 5 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c
index 0efabd9de8b..b11ae316627 100644
--- a/src/gallium/auxiliary/draw/draw_vs_sse.c
+++ b/src/gallium/auxiliary/draw/draw_vs_sse.c
@@ -37,7 +37,7 @@
 
 #include "draw_vs.h"
 
-#if defined(PIPE_ARCH_X86)
+#if defined(PIPE_ARCH_X86) && defined(PIPE_ARCH_SSE)
 
 #include "pipe/p_shader_tokens.h"
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index 79f424b692a..f79170b9d65 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -27,7 +27,7 @@
 
 #include "pipe/p_config.h"
 
-#ifdef PIPE_ARCH_X86
+#if defined(PIPE_ARCH_X86) && defined(PIPE_ARCH_SSE)
 
 #include "pipe/p_debug.h"
 #include "pipe/p_shader_tokens.h"
diff --git a/src/gallium/auxiliary/util/u_sse.h b/src/gallium/auxiliary/util/u_sse.h
index 68e56f08165..e2a8491e62c 100644
--- a/src/gallium/auxiliary/util/u_sse.h
+++ b/src/gallium/auxiliary/util/u_sse.h
@@ -39,7 +39,7 @@
 
 #include "pipe/p_config.h"
 
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+#if defined(PIPE_ARCH_SSE)
 
 #include <xmmintrin.h>
 #include <emmintrin.h>
diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c
index 496ed43df26..0111469405f 100644
--- a/src/gallium/drivers/softpipe/sp_fs_sse.c
+++ b/src/gallium/drivers/softpipe/sp_fs_sse.c
@@ -40,7 +40,7 @@
 #include "tgsi/tgsi_sse2.h"
 
 
-#ifdef PIPE_ARCH_X86
+#if defined(PIPE_ARCH_X86) && defined(PIPE_ARCH_SSE)
 
 #include "rtasm/rtasm_x86sse.h"
 
diff --git a/src/gallium/include/pipe/p_config.h b/src/gallium/include/pipe/p_config.h
index af3746c0265..ef05547819d 100644
--- a/src/gallium/include/pipe/p_config.h
+++ b/src/gallium/include/pipe/p_config.h
@@ -85,6 +85,14 @@
 #define PIPE_ARCH_X86_64
 #endif
 
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+#if defined(PIPE_CC_GCC) && !defined(__SSE2__)
+/* #warning SSE2 support requires -msse -msse2 compiler options */
+#else
+#define PIPE_ARCH_SSE
+#endif
+#endif
+
 #if 0 /* FIXME */
 #define PIPE_ARCH_PPC
 #endif
-- 
cgit v1.2.3


From 70f4ad44985e3ec6dabc1b0e55a5bf85803a4cd4 Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Wed, 22 Oct 2008 11:07:35 -0600
Subject: gallium: TGSI to PPC code generation

Based on the TGSIto SSE2 code generator.
Incomplete and lots of SSE stuff still hanging around but the basic dozen
or so TGSI opcodes are functioning.
---
 src/gallium/auxiliary/tgsi/Makefile   |    1 +
 src/gallium/auxiliary/tgsi/tgsi_ppc.c | 2781 +++++++++++++++++++++++++++++++++
 src/gallium/auxiliary/tgsi/tgsi_ppc.h |   48 +
 3 files changed, 2830 insertions(+)
 create mode 100644 src/gallium/auxiliary/tgsi/tgsi_ppc.c
 create mode 100644 src/gallium/auxiliary/tgsi/tgsi_ppc.h

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile
index c7155a93168..d7df9490cfa 100644
--- a/src/gallium/auxiliary/tgsi/Makefile
+++ b/src/gallium/auxiliary/tgsi/Makefile
@@ -11,6 +11,7 @@ C_SOURCES = \
 	tgsi_info.c \
 	tgsi_iterate.c \
 	tgsi_parse.c \
+	tgsi_ppc.c \
 	tgsi_scan.c \
 	tgsi_sse2.c \
 	tgsi_text.c \
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
new file mode 100644
index 00000000000..112e7365233
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
@@ -0,0 +1,2781 @@
+/**************************************************************************
+ * 
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+/**
+ * TGSI to PowerPC code generation.
+ */
+
+#include "pipe/p_config.h"
+
+#if defined(PIPE_ARCH_PPC)
+
+#include "pipe/p_debug.h"
+#include "pipe/p_shader_tokens.h"
+#include "util/u_math.h"
+#include "util/u_sse.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+#include "tgsi_exec.h"
+#include "tgsi_ppc.h"
+#include "rtasm/rtasm_ppc.h"
+
+
+/* for 1/sqrt()
+ *
+ * This costs about 100fps (close to 10%) in gears:
+ */
+#define HIGH_PRECISION 1
+
+#define FAST_MATH 1
+
+
+#define FOR_EACH_CHANNEL( CHAN )\
+   for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
+
+#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
+   ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
+
+#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
+   if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
+
+#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
+   FOR_EACH_CHANNEL( CHAN )\
+      IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
+
+#define CHAN_X 0
+#define CHAN_Y 1
+#define CHAN_Z 2
+#define CHAN_W 3
+
+#define TEMP_ONE_I   TGSI_EXEC_TEMP_ONE_I
+#define TEMP_ONE_C   TGSI_EXEC_TEMP_ONE_C
+
+#define TEMP_R0   TGSI_EXEC_TEMP_R0
+#define TEMP_ADDR TGSI_EXEC_TEMP_ADDR
+
+
+/**
+ * Context/state used during code gen.
+ */
+struct gen_context
+{
+   struct ppc_function *f;
+   int inputs_reg;    /**< register pointing to input params */
+   int outputs_reg;   /**< register pointing to output params */
+   int temps_reg;     /**< register pointing to temporary "registers" */
+   int immed_reg;     /**< register pointing to immediates buffer */
+   int const_reg;     /**< register pointing to constants buffer */
+};
+
+
+
+#if 0000
+
+/**
+ * X86 utility functions.
+ */
+
+static struct x86_reg
+make_xmm(
+   unsigned xmm )
+{
+   return x86_make_reg(
+      file_XMM,
+      (enum x86_reg_name) xmm );
+}
+
+/**
+ * X86 register mapping helpers.
+ */
+
+static struct x86_reg
+get_const_base( void )
+{
+   return x86_make_reg(
+      file_REG32,
+      reg_CX );
+}
+
+static struct x86_reg
+get_input_base( void )
+{
+   return x86_make_reg(
+      file_REG32,
+      reg_AX );
+}
+
+static struct x86_reg
+get_output_base( void )
+{
+   return x86_make_reg(
+      file_REG32,
+      reg_DX );
+}
+
+static struct x86_reg
+get_temp_base( void )
+{
+   return x86_make_reg(
+      file_REG32,
+      reg_BX );
+}
+
+static struct x86_reg
+get_coef_base( void )
+{
+   return get_output_base();
+}
+
+static struct x86_reg
+get_immediate_base( void )
+{
+   return x86_make_reg(
+      file_REG32,
+      reg_DI );
+}
+
+
+/**
+ * Data access helpers.
+ */
+
+
+static struct x86_reg
+get_immediate(
+   unsigned vec,
+   unsigned chan )
+{
+   return x86_make_disp(
+      get_immediate_base(),
+      (vec * 4 + chan) * 4 );
+}
+
+static struct x86_reg
+get_const(
+   unsigned vec,
+   unsigned chan )
+{
+   return x86_make_disp(
+      get_const_base(),
+      (vec * 4 + chan) * 4 );
+}
+
+static struct x86_reg
+get_input(
+   unsigned vec,
+   unsigned chan )
+{
+   return x86_make_disp(
+      get_input_base(),
+      (vec * 4 + chan) * 16 );
+}
+
+static struct x86_reg
+get_output(
+   unsigned vec,
+   unsigned chan )
+{
+   return x86_make_disp(
+      get_output_base(),
+      (vec * 4 + chan) * 16 );
+}
+
+static struct x86_reg
+get_temp(
+   unsigned vec,
+   unsigned chan )
+{
+   return x86_make_disp(
+      get_temp_base(),
+      (vec * 4 + chan) * 16 );
+}
+
+static struct x86_reg
+get_coef(
+   unsigned vec,
+   unsigned chan,
+   unsigned member )
+{
+   return x86_make_disp(
+      get_coef_base(),
+      ((vec * 3 + member) * 4 + chan) * 4 );
+}
+
+
+static void
+emit_ret(
+   struct x86_function  *func )
+{
+   x86_ret( func );
+}
+
+#endif
+
+/**
+ * Data fetch helpers.
+ */
+
+#if 00
+/**
+ * Copy a shader constant to xmm register
+ * \param xmm  the destination xmm register
+ * \param vec  the src const buffer index
+ * \param chan  src channel to fetch (X, Y, Z or W)
+ */
+static void
+emit_const(
+   struct x86_function *func,
+   uint xmm,
+   int vec,
+   uint chan,
+   uint indirect,
+   uint indirectFile,
+   int indirectIndex )
+{
+   if (indirect) {
+      struct x86_reg r0 = get_input_base();
+      struct x86_reg r1 = get_output_base();
+      uint i;
+
+      assert( indirectFile == TGSI_FILE_ADDRESS );
+      assert( indirectIndex == 0 );
+
+      x86_push( func, r0 );
+      x86_push( func, r1 );
+
+      for (i = 0; i < QUAD_SIZE; i++) {
+         x86_lea( func, r0, get_const( vec, chan ) );
+         x86_mov( func, r1, x86_make_disp( get_temp( TEMP_ADDR, CHAN_X ), i * 4 ) );
+
+         /* Quick hack to multiply by 16 -- need to add SHL to rtasm.
+          */
+         x86_add( func, r1, r1 );
+         x86_add( func, r1, r1 );
+         x86_add( func, r1, r1 );
+         x86_add( func, r1, r1 );
+
+         x86_add( func, r0, r1 );
+         x86_mov( func, r1, x86_deref( r0 ) );
+         x86_mov( func, x86_make_disp( get_temp( TEMP_R0, CHAN_X ), i * 4 ), r1 );
+      }
+
+      x86_pop( func, r1 );
+      x86_pop( func, r0 );
+
+      sse_movaps(
+         func,
+         make_xmm( xmm ),
+         get_temp( TEMP_R0, CHAN_X ) );
+   }
+   else {
+      assert( vec >= 0 );
+
+      sse_movss(
+         func,
+         make_xmm( xmm ),
+         get_const( vec, chan ) );
+      sse_shufps(
+         func,
+         make_xmm( xmm ),
+         make_xmm( xmm ),
+         SHUF( 0, 0, 0, 0 ) );
+   }
+}
+
+static void
+emit_immediate(
+   struct x86_function *func,
+   unsigned xmm,
+   unsigned vec,
+   unsigned chan )
+{
+   sse_movss(
+      func,
+      make_xmm( xmm ),
+      get_immediate( vec, chan ) );
+   sse_shufps(
+      func,
+      make_xmm( xmm ),
+      make_xmm( xmm ),
+      SHUF( 0, 0, 0, 0 ) );
+}
+
+
+/**
+ * Copy a shader input to xmm register
+ * \param xmm  the destination xmm register
+ * \param vec  the src input attrib
+ * \param chan  src channel to fetch (X, Y, Z or W)
+ */
+static void
+emit_inputf(
+   struct x86_function *func,
+   unsigned xmm,
+   unsigned vec,
+   unsigned chan )
+{
+   sse_movups(
+      func,
+      make_xmm( xmm ),
+      get_input( vec, chan ) );
+}
+
+/**
+ * Store an xmm register to a shader output
+ * \param xmm  the source xmm register
+ * \param vec  the dest output attrib
+ * \param chan  src dest channel to store (X, Y, Z or W)
+ */
+static void
+emit_output(
+   struct x86_function *func,
+   unsigned xmm,
+   unsigned vec,
+   unsigned chan )
+{
+   sse_movups(
+      func,
+      get_output( vec, chan ),
+      make_xmm( xmm ) );
+}
+
+/**
+ * Copy a shader temporary to xmm register
+ * \param xmm  the destination xmm register
+ * \param vec  the src temp register
+ * \param chan  src channel to fetch (X, Y, Z or W)
+ */
+static void
+emit_tempf(
+   struct x86_function *func,
+   unsigned xmm,
+   unsigned vec,
+   unsigned chan )
+{
+   sse_movaps(
+      func,
+      make_xmm( xmm ),
+      get_temp( vec, chan ) );
+}
+
+/**
+ * Load an xmm register with an input attrib coefficient (a0, dadx or dady)
+ * \param xmm  the destination xmm register
+ * \param vec  the src input/attribute coefficient index
+ * \param chan  src channel to fetch (X, Y, Z or W)
+ * \param member  0=a0, 1=dadx, 2=dady
+ */
+static void
+emit_coef(
+   struct x86_function *func,
+   unsigned xmm,
+   unsigned vec,
+   unsigned chan,
+   unsigned member )
+{
+   sse_movss(
+      func,
+      make_xmm( xmm ),
+      get_coef( vec, chan, member ) );
+   sse_shufps(
+      func,
+      make_xmm( xmm ),
+      make_xmm( xmm ),
+      SHUF( 0, 0, 0, 0 ) );
+}
+
+/**
+ * Data store helpers.
+ */
+
+static void
+emit_inputs(
+   struct x86_function *func,
+   unsigned xmm,
+   unsigned vec,
+   unsigned chan )
+{
+   sse_movups(
+      func,
+      get_input( vec, chan ),
+      make_xmm( xmm ) );
+}
+
+static void
+emit_temps(
+   struct x86_function *func,
+   unsigned xmm,
+   unsigned vec,
+   unsigned chan )
+{
+   sse_movaps(
+      func,
+      get_temp( vec, chan ),
+      make_xmm( xmm ) );
+}
+
+static void
+emit_addrs(
+   struct x86_function *func,
+   unsigned xmm,
+   unsigned vec,
+   unsigned chan )
+{
+   assert( vec == 0 );
+
+   emit_temps(
+      func,
+      xmm,
+      vec + TGSI_EXEC_TEMP_ADDR,
+      chan );
+}
+
+/**
+ * Coefficent fetch helpers.
+ */
+
+static void
+emit_coef_a0(
+   struct x86_function *func,
+   unsigned xmm,
+   unsigned vec,
+   unsigned chan )
+{
+   emit_coef(
+      func,
+      xmm,
+      vec,
+      chan,
+      0 );
+}
+
+static void
+emit_coef_dadx(
+   struct x86_function *func,
+   unsigned xmm,
+   unsigned vec,
+   unsigned chan )
+{
+   emit_coef(
+      func,
+      xmm,
+      vec,
+      chan,
+      1 );
+}
+
+static void
+emit_coef_dady(
+   struct x86_function *func,
+   unsigned xmm,
+   unsigned vec,
+   unsigned chan )
+{
+   emit_coef(
+      func,
+      xmm,
+      vec,
+      chan,
+      2 );
+}
+#endif
+
+
+/**
+ * Function call helpers.
+ */
+
+#if 00
+/**
+ * NOTE: In gcc, if the destination uses the SSE intrinsics, then it must be 
+ * defined with __attribute__((force_align_arg_pointer)), as we do not guarantee
+ * that the stack pointer is 16 byte aligned, as expected.
+ */
+static void
+emit_func_call_dst(
+   struct x86_function *func,
+   unsigned xmm_save,
+   unsigned xmm_dst,
+   void (PIPE_CDECL *code)() )
+{
+   struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX );
+   unsigned i, n, xmm;
+   unsigned xmm_mask;
+   
+   /* Bitmask of the xmm registers to save */
+   xmm_mask = (1 << xmm_save) - 1;
+   xmm_mask &= ~(1 << xmm_dst);
+
+   sse_movaps(
+      func,
+      get_temp( TEMP_R0, 0 ),
+      make_xmm( xmm_dst ) );
+
+   x86_push(
+      func,
+      x86_make_reg( file_REG32, reg_AX) );
+   x86_push(
+      func,
+      x86_make_reg( file_REG32, reg_CX) );
+   x86_push(
+      func,
+      x86_make_reg( file_REG32, reg_DX) );
+   
+   for(i = 0, n = 0; i < 8; ++i)
+      if(xmm_mask & (1 << i))
+         ++n;
+   
+   x86_sub_imm(
+      func, 
+      x86_make_reg( file_REG32, reg_SP ),
+      n*16);
+
+   for(i = 0, n = 0; i < 8; ++i)
+      if(xmm_mask & (1 << i)) {
+         sse_movups(
+            func,
+            x86_make_disp( x86_make_reg( file_REG32, reg_SP ), n*16 ),
+            make_xmm( xmm ) );
+         ++n;
+      }
+   
+   x86_lea(
+      func,
+      ecx,
+      get_temp( TEMP_R0, 0 ) );
+   
+   x86_push( func, ecx );
+   x86_mov_reg_imm( func, ecx, (unsigned long) code );
+   x86_call( func, ecx );
+   x86_pop(func, ecx );
+   
+   for(i = 0, n = 0; i < 8; ++i)
+      if(xmm_mask & (1 << i)) {
+         sse_movups(
+            func,
+            make_xmm( xmm ),
+            x86_make_disp( x86_make_reg( file_REG32, reg_SP ), n*16 ) );
+         ++n;
+      }
+   
+   x86_add_imm(
+      func, 
+      x86_make_reg( file_REG32, reg_SP ),
+      n*16);
+
+   /* Restore GP registers in a reverse order.
+    */
+   x86_pop(
+      func,
+      x86_make_reg( file_REG32, reg_DX) );
+   x86_pop(
+      func,
+      x86_make_reg( file_REG32, reg_CX) );
+   x86_pop(
+      func,
+      x86_make_reg( file_REG32, reg_AX) );
+
+   sse_movaps(
+      func,
+      make_xmm( xmm_dst ),
+      get_temp( TEMP_R0, 0 ) );
+}
+
+static void
+emit_func_call_dst_src(
+   struct x86_function *func,
+   unsigned xmm_save, 
+   unsigned xmm_dst,
+   unsigned xmm_src,
+   void (PIPE_CDECL *code)() )
+{
+   sse_movaps(
+      func,
+      get_temp( TEMP_R0, 1 ),
+      make_xmm( xmm_src ) );
+
+   emit_func_call_dst(
+      func,
+      xmm_save,
+      xmm_dst,
+      code );
+}
+
+/*
+ * Fast SSE2 implementation of special math functions.
+ */
+
+#define POLY0(x, c0) _mm_set1_ps(c0)
+#define POLY1(x, c0, c1) _mm_add_ps(_mm_mul_ps(POLY0(x, c1), x), _mm_set1_ps(c0))
+#define POLY2(x, c0, c1, c2) _mm_add_ps(_mm_mul_ps(POLY1(x, c1, c2), x), _mm_set1_ps(c0))
+#define POLY3(x, c0, c1, c2, c3) _mm_add_ps(_mm_mul_ps(POLY2(x, c1, c2, c3), x), _mm_set1_ps(c0))
+#define POLY4(x, c0, c1, c2, c3, c4) _mm_add_ps(_mm_mul_ps(POLY3(x, c1, c2, c3, c4), x), _mm_set1_ps(c0))
+#define POLY5(x, c0, c1, c2, c3, c4, c5) _mm_add_ps(_mm_mul_ps(POLY4(x, c1, c2, c3, c4, c5), x), _mm_set1_ps(c0))
+
+#define EXP_POLY_DEGREE 3
+#define LOG_POLY_DEGREE 5
+
+/**
+ * See http://www.devmaster.net/forums/showthread.php?p=43580
+ */
+static INLINE __m128 
+exp2f4(__m128 x)
+{
+   __m128i ipart;
+   __m128 fpart, expipart, expfpart;
+
+   x = _mm_min_ps(x, _mm_set1_ps( 129.00000f));
+   x = _mm_max_ps(x, _mm_set1_ps(-126.99999f));
+
+   /* ipart = int(x - 0.5) */
+   ipart = _mm_cvtps_epi32(_mm_sub_ps(x, _mm_set1_ps(0.5f)));
+
+   /* fpart = x - ipart */
+   fpart = _mm_sub_ps(x, _mm_cvtepi32_ps(ipart));
+
+   /* expipart = (float) (1 << ipart) */
+   expipart = _mm_castsi128_ps(_mm_slli_epi32(_mm_add_epi32(ipart, _mm_set1_epi32(127)), 23));
+
+   /* minimax polynomial fit of 2**x, in range [-0.5, 0.5[ */
+#if EXP_POLY_DEGREE == 5
+   expfpart = POLY5(fpart, 9.9999994e-1f, 6.9315308e-1f, 2.4015361e-1f, 5.5826318e-2f, 8.9893397e-3f, 1.8775767e-3f);
+#elif EXP_POLY_DEGREE == 4
+   expfpart = POLY4(fpart, 1.0000026f, 6.9300383e-1f, 2.4144275e-1f, 5.2011464e-2f, 1.3534167e-2f);
+#elif EXP_POLY_DEGREE == 3
+   expfpart = POLY3(fpart, 9.9992520e-1f, 6.9583356e-1f, 2.2606716e-1f, 7.8024521e-2f);
+#elif EXP_POLY_DEGREE == 2
+   expfpart = POLY2(fpart, 1.0017247f, 6.5763628e-1f, 3.3718944e-1f);
+#else
+#error
+#endif
+
+   return _mm_mul_ps(expipart, expfpart);
+}
+
+/**
+ * See http://www.devmaster.net/forums/showthread.php?p=43580
+ */
+static INLINE __m128 
+log2f4(__m128 x)
+{
+   __m128i expmask = _mm_set1_epi32(0x7f800000);
+   __m128i mantmask = _mm_set1_epi32(0x007fffff);
+   __m128 one = _mm_set1_ps(1.0f);
+
+   __m128i i = _mm_castps_si128(x);
+
+   /* exp = (float) exponent(x) */
+   __m128 exp = _mm_cvtepi32_ps(_mm_sub_epi32(_mm_srli_epi32(_mm_and_si128(i, expmask), 23), _mm_set1_epi32(127)));
+
+   /* mant = (float) mantissa(x) */
+   __m128 mant = _mm_or_ps(_mm_castsi128_ps(_mm_and_si128(i, mantmask)), one);
+
+   __m128 logmant;
+
+   /* Minimax polynomial fit of log2(x)/(x - 1), for x in range [1, 2[ 
+    * These coefficients can be generate with 
+    * http://www.boost.org/doc/libs/1_36_0/libs/math/doc/sf_and_dist/html/math_toolkit/toolkit/internals2/minimax.html
+    */
+#if LOG_POLY_DEGREE == 6
+   logmant = POLY5(mant, 3.11578814719469302614f, -3.32419399085241980044f, 2.59883907202499966007f, -1.23152682416275988241f, 0.318212422185251071475f, -0.0344359067839062357313f);
+#elif LOG_POLY_DEGREE == 5
+   logmant = POLY4(mant, 2.8882704548164776201f, -2.52074962577807006663f, 1.48116647521213171641f, -0.465725644288844778798f, 0.0596515482674574969533f);
+#elif LOG_POLY_DEGREE == 4
+   logmant = POLY3(mant, 2.61761038894603480148f, -1.75647175389045657003f, 0.688243882994381274313f, -0.107254423828329604454f);
+#elif LOG_POLY_DEGREE == 3
+   logmant = POLY2(mant, 2.28330284476918490682f, -1.04913055217340124191f, 0.204446009836232697516f);
+#else
+#error
+#endif
+
+   /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/
+   logmant = _mm_mul_ps(logmant, _mm_sub_ps(mant, one));
+
+   return _mm_add_ps(logmant, exp);
+}
+
+static INLINE __m128
+powf4(__m128 x, __m128 y)
+{
+   return exp2f4(_mm_mul_ps(log2f4(x), y));
+}
+
+
+/**
+ * Low-level instruction translators.
+ */
+
+static void
+emit_abs(
+   struct x86_function *func,
+   unsigned xmm )
+{
+   sse_andps(
+      func,
+      make_xmm( xmm ),
+      get_temp(
+         TGSI_EXEC_TEMP_7FFFFFFF_I,
+         TGSI_EXEC_TEMP_7FFFFFFF_C ) );
+}
+
+static void
+emit_add(
+   struct x86_function *func,
+   unsigned xmm_dst,
+   unsigned xmm_src )
+{
+   sse_addps(
+      func,
+      make_xmm( xmm_dst ),
+      make_xmm( xmm_src ) );
+}
+
+static void PIPE_CDECL
+cos4f(
+   float *store )
+{
+   store[0] = cosf( store[0] );
+   store[1] = cosf( store[1] );
+   store[2] = cosf( store[2] );
+   store[3] = cosf( store[3] );
+}
+
+static void
+emit_cos(
+   struct x86_function *func,
+   unsigned xmm_save, 
+   unsigned xmm_dst )
+{
+   emit_func_call_dst(
+      func,
+      xmm_save, 
+      xmm_dst,
+      cos4f );
+}
+
+static void PIPE_CDECL
+#if defined(PIPE_CC_GCC)
+__attribute__((force_align_arg_pointer))
+#endif
+ex24f(
+   float *store )
+{
+   _mm_store_ps(&store[0], exp2f4( _mm_load_ps(&store[0]) ));
+}
+
+static void
+emit_ex2(
+   struct x86_function *func,
+   unsigned xmm_save, 
+   unsigned xmm_dst )
+{
+   emit_func_call_dst(
+      func,
+      xmm_save,
+      xmm_dst,
+      ex24f );
+}
+
+static void
+emit_f2it(
+   struct x86_function *func,
+   unsigned xmm )
+{
+   sse2_cvttps2dq(
+      func,
+      make_xmm( xmm ),
+      make_xmm( xmm ) );
+}
+
+static void PIPE_CDECL
+flr4f(
+   float *store )
+{
+   store[0] = floorf( store[0] );
+   store[1] = floorf( store[1] );
+   store[2] = floorf( store[2] );
+   store[3] = floorf( store[3] );
+}
+
+static void
+emit_flr(
+   struct x86_function *func,
+   unsigned xmm_save, 
+   unsigned xmm_dst )
+{
+   emit_func_call_dst(
+      func,
+      xmm_save,
+      xmm_dst,
+      flr4f );
+}
+
+static void PIPE_CDECL
+frc4f(
+   float *store )
+{
+   store[0] -= floorf( store[0] );
+   store[1] -= floorf( store[1] );
+   store[2] -= floorf( store[2] );
+   store[3] -= floorf( store[3] );
+}
+
+static void
+emit_frc(
+   struct x86_function *func,
+   unsigned xmm_save, 
+   unsigned xmm_dst )
+{
+   emit_func_call_dst(
+      func,
+      xmm_save,
+      xmm_dst,
+      frc4f );
+}
+
+static void PIPE_CDECL
+#if defined(PIPE_CC_GCC)
+__attribute__((force_align_arg_pointer))
+#endif
+lg24f(
+   float *store )
+{
+   _mm_store_ps(&store[0], log2f4( _mm_load_ps(&store[0]) ));
+}
+
+static void
+emit_lg2(
+   struct x86_function *func,
+   unsigned xmm_save, 
+   unsigned xmm_dst )
+{
+   emit_func_call_dst(
+      func,
+      xmm_save,
+      xmm_dst,
+      lg24f );
+}
+
+static void
+emit_MOV(
+   struct x86_function *func,
+   unsigned xmm_dst,
+   unsigned xmm_src )
+{
+   sse_movups(
+      func,
+      make_xmm( xmm_dst ),
+      make_xmm( xmm_src ) );
+}
+
+static void
+emit_mul (struct x86_function *func,
+          unsigned xmm_dst,
+          unsigned xmm_src)
+{
+   sse_mulps(
+      func,
+      make_xmm( xmm_dst ),
+      make_xmm( xmm_src ) );
+}
+
+static void
+emit_neg(
+   struct x86_function *func,
+   unsigned xmm )
+{
+   sse_xorps(
+      func,
+      make_xmm( xmm ),
+      get_temp(
+         TGSI_EXEC_TEMP_80000000_I,
+         TGSI_EXEC_TEMP_80000000_C ) );
+}
+
+static void PIPE_CDECL
+#if defined(PIPE_CC_GCC)
+__attribute__((force_align_arg_pointer))
+#endif
+pow4f(
+   float *store )
+{
+#if 1
+   _mm_store_ps(&store[0], powf4( _mm_load_ps(&store[0]), _mm_load_ps(&store[4]) ));
+#else
+   store[0] = powf( store[0], store[4] );
+   store[1] = powf( store[1], store[5] );
+   store[2] = powf( store[2], store[6] );
+   store[3] = powf( store[3], store[7] );
+#endif
+}
+
+static void
+emit_pow(
+   struct x86_function *func,
+   unsigned xmm_save, 
+   unsigned xmm_dst,
+   unsigned xmm_src )
+{
+   emit_func_call_dst_src(
+      func,
+      xmm_save,
+      xmm_dst,
+      xmm_src,
+      pow4f );
+}
+
+static void
+emit_rcp (
+   struct x86_function *func,
+   unsigned xmm_dst,
+   unsigned xmm_src )
+{
+   /* On Intel CPUs at least, this is only accurate to 12 bits -- not
+    * good enough.  Need to either emit a proper divide or use the
+    * iterative technique described below in emit_rsqrt().
+    */
+   sse2_rcpps(
+      func,
+      make_xmm( xmm_dst ),
+      make_xmm( xmm_src ) );
+}
+
+static void
+emit_rsqrt(
+   struct x86_function *func,
+   unsigned xmm_dst,
+   unsigned xmm_src )
+{
+#if HIGH_PRECISION
+   /* Although rsqrtps() and rcpps() are low precision on some/all SSE
+    * implementations, it is possible to improve its precision at
+    * fairly low cost, using a newton/raphson step, as below:
+    * 
+    * x1 = 2 * rcpps(a) - a * rcpps(a) * rcpps(a)
+    * x1 = 0.5 * rsqrtps(a) * [3.0 - (a * rsqrtps(a))* rsqrtps(a)]
+    *
+    * See: http://softwarecommunity.intel.com/articles/eng/1818.htm
+    */
+   {
+      struct x86_reg dst = make_xmm( xmm_dst );
+      struct x86_reg src = make_xmm( xmm_src );
+      struct x86_reg tmp0 = make_xmm( 2 );
+      struct x86_reg tmp1 = make_xmm( 3 );
+
+      assert( xmm_dst != xmm_src );
+      assert( xmm_dst != 2 && xmm_dst != 3 );
+      assert( xmm_src != 2 && xmm_src != 3 );
+
+      sse_movaps(  func, dst,  get_temp( TGSI_EXEC_TEMP_HALF_I, TGSI_EXEC_TEMP_HALF_C ) );
+      sse_movaps(  func, tmp0, get_temp( TGSI_EXEC_TEMP_THREE_I, TGSI_EXEC_TEMP_THREE_C ) );
+      sse_rsqrtps( func, tmp1, src  );
+      sse_mulps(   func, src,  tmp1 );
+      sse_mulps(   func, dst,  tmp1 );
+      sse_mulps(   func, src,  tmp1 );
+      sse_subps(   func, tmp0, src  );
+      sse_mulps(   func, dst,  tmp0 );
+   }
+#else
+   /* On Intel CPUs at least, this is only accurate to 12 bits -- not
+    * good enough.
+    */
+   sse_rsqrtps(
+      func,
+      make_xmm( xmm_dst ),
+      make_xmm( xmm_src ) );
+#endif
+}
+
+static void
+emit_setsign(
+   struct x86_function *func,
+   unsigned xmm )
+{
+   sse_orps(
+      func,
+      make_xmm( xmm ),
+      get_temp(
+         TGSI_EXEC_TEMP_80000000_I,
+         TGSI_EXEC_TEMP_80000000_C ) );
+}
+
+static void PIPE_CDECL
+sin4f(
+   float *store )
+{
+   store[0] = sinf( store[0] );
+   store[1] = sinf( store[1] );
+   store[2] = sinf( store[2] );
+   store[3] = sinf( store[3] );
+}
+
+static void
+emit_sin (struct x86_function *func,
+          unsigned xmm_save, 
+          unsigned xmm_dst)
+{
+   emit_func_call_dst(
+      func,
+      xmm_save,
+      xmm_dst,
+      sin4f );
+}
+
+static void
+emit_sub(
+   struct x86_function *func,
+   unsigned xmm_dst,
+   unsigned xmm_src )
+{
+   sse_subps(
+      func,
+      make_xmm( xmm_dst ),
+      make_xmm( xmm_src ) );
+}
+#endif
+
+
+/**
+ * Register fetch.
+ */
+static void
+emit_fetch(struct gen_context *gen,
+           unsigned vec_reg,
+           const struct tgsi_full_src_register *reg,
+           const unsigned chan_index)
+{
+   uint swizzle = tgsi_util_get_full_src_register_extswizzle(reg, chan_index);
+
+   switch (swizzle) {
+   case TGSI_EXTSWIZZLE_X:
+   case TGSI_EXTSWIZZLE_Y:
+   case TGSI_EXTSWIZZLE_Z:
+   case TGSI_EXTSWIZZLE_W:
+      switch (reg->SrcRegister.File) {
+      case TGSI_FILE_INPUT:
+         {
+            int offset_reg = ppc_allocate_register(gen->f);
+            int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16;
+            ppc_li(gen->f, offset_reg, offset);
+            ppc_lvx(gen->f, vec_reg, gen->inputs_reg, offset_reg);
+            ppc_release_register(gen->f, offset_reg);
+         }
+         break;
+      case TGSI_FILE_TEMPORARY:
+         {
+            int offset_reg = ppc_allocate_register(gen->f);
+            int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16;
+            ppc_li(gen->f, offset_reg, offset);
+            ppc_lvx(gen->f, vec_reg, gen->temps_reg, offset_reg);
+            ppc_release_register(gen->f, offset_reg);
+         }
+         break;
+      case TGSI_FILE_IMMEDIATE:
+         {
+            int offset_reg = ppc_allocate_register(gen->f);
+            int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16;
+            ppc_li(gen->f, offset_reg, offset);
+            ppc_lvx(gen->f, vec_reg, gen->immed_reg, offset_reg);
+            ppc_release_register(gen->f, offset_reg);
+         }
+         break;
+      case TGSI_FILE_CONSTANT:
+         {
+            int offset_reg = ppc_allocate_register(gen->f);
+            int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4;
+            ppc_li(gen->f, offset_reg, offset);
+            /* load vector word */
+            ppc_lvewx(gen->f, vec_reg, gen->const_reg, offset_reg);
+            /* splat word[0] across vector */
+            ppc_vspltw(gen->f, vec_reg, vec_reg, 0);
+            ppc_release_register(gen->f, offset_reg);
+         }
+         break;
+      default:
+         assert( 0 );
+      }
+      break;
+
+   case TGSI_EXTSWIZZLE_ZERO:
+#if 0
+      emit_tempf(
+         func,
+         xmm,
+         TGSI_EXEC_TEMP_00000000_I,
+         TGSI_EXEC_TEMP_00000000_C );
+#endif
+      break;
+
+   case TGSI_EXTSWIZZLE_ONE:
+#if 0
+      emit_tempf(
+         func,
+         xmm,
+         TEMP_ONE_I,
+         TEMP_ONE_C );
+#endif
+      break;
+
+   default:
+      assert( 0 );
+   }
+
+#if 0
+   switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
+   case TGSI_UTIL_SIGN_CLEAR:
+      emit_abs( func, xmm );
+      break;
+
+   case TGSI_UTIL_SIGN_SET:
+      emit_setsign( func, xmm );
+      break;
+
+   case TGSI_UTIL_SIGN_TOGGLE:
+      emit_neg( func, xmm );
+      break;
+
+   case TGSI_UTIL_SIGN_KEEP:
+      break;
+   }
+#endif
+}
+
+#define FETCH( GEN, INST, VEC_REG, SRC_REG, CHAN ) \
+   emit_fetch( GEN, VEC_REG, &(INST).FullSrcRegisters[SRC_REG], CHAN )
+
+
+
+/**
+ * Register store.
+ */
+static void
+emit_store(struct gen_context *gen,
+           unsigned vec_reg,
+           const struct tgsi_full_dst_register *reg,
+           const struct tgsi_full_instruction *inst,
+           unsigned chan_index)
+{
+   switch (reg->DstRegister.File) {
+   case TGSI_FILE_OUTPUT:
+      {
+         int offset_reg = ppc_allocate_register(gen->f);
+         int offset = (reg->DstRegister.Index * 4 + chan_index) * 16;
+         ppc_li(gen->f, offset_reg, offset);
+         ppc_stvx(gen->f, vec_reg, gen->outputs_reg, offset_reg);
+         ppc_release_register(gen->f, offset_reg);
+      }
+      break;
+   case TGSI_FILE_TEMPORARY:
+      {
+         int offset_reg = ppc_allocate_register(gen->f);
+         int offset = (reg->DstRegister.Index * 4 + chan_index) * 16;
+         ppc_li(gen->f, offset_reg, offset);
+         ppc_stvx(gen->f, vec_reg, gen->temps_reg, offset_reg);
+         ppc_release_register(gen->f, offset_reg);
+      }
+      break;
+#if 0
+   case TGSI_FILE_ADDRESS:
+      emit_addrs(
+         func,
+         xmm,
+         reg->DstRegister.Index,
+         chan_index );
+      break;
+#endif
+   default:
+      assert( 0 );
+   }
+
+#if 0
+   switch( inst->Instruction.Saturate ) {
+   case TGSI_SAT_NONE:
+      break;
+
+   case TGSI_SAT_ZERO_ONE:
+      /* assert( 0 ); */
+      break;
+
+   case TGSI_SAT_MINUS_PLUS_ONE:
+      assert( 0 );
+      break;
+   }
+#endif
+}
+
+
+#define STORE( GEN, INST, XMM, INDEX, CHAN )\
+   emit_store( GEN, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN )
+
+
+
+#if 000
+/**
+ * High-level instruction translators.
+ */
+
+static void
+emit_kil(
+   struct x86_function *func,
+   const struct tgsi_full_src_register *reg )
+{
+   unsigned uniquemask;
+   unsigned registers[4];
+   unsigned nextregister = 0;
+   unsigned firstchan = ~0;
+   unsigned chan_index;
+
+   /* This mask stores component bits that were already tested. Note that
+    * we test if the value is less than zero, so 1.0 and 0.0 need not to be
+    * tested. */
+   uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
+
+   FOR_EACH_CHANNEL( chan_index ) {
+      unsigned swizzle;
+
+      /* unswizzle channel */
+      swizzle = tgsi_util_get_full_src_register_extswizzle(
+         reg,
+         chan_index );
+
+      /* check if the component has not been already tested */
+      if( !(uniquemask & (1 << swizzle)) ) {
+         uniquemask |= 1 << swizzle;
+
+         /* allocate register */
+         registers[chan_index] = nextregister;
+         emit_fetch(
+            func,
+            nextregister,
+            reg,
+            chan_index );
+         nextregister++;
+
+         /* mark the first channel used */
+         if( firstchan == ~0 ) {
+            firstchan = chan_index;
+         }
+      }
+   }
+
+   x86_push(
+      func,
+      x86_make_reg( file_REG32, reg_AX ) );
+   x86_push(
+      func,
+      x86_make_reg( file_REG32, reg_DX ) );
+
+   FOR_EACH_CHANNEL( chan_index ) {
+      if( uniquemask & (1 << chan_index) ) {
+         sse_cmpps(
+            func,
+            make_xmm( registers[chan_index] ),
+            get_temp(
+               TGSI_EXEC_TEMP_00000000_I,
+               TGSI_EXEC_TEMP_00000000_C ),
+            cc_LessThan );
+
+         if( chan_index == firstchan ) {
+            sse_pmovmskb(
+               func,
+               x86_make_reg( file_REG32, reg_AX ),
+               make_xmm( registers[chan_index] ) );
+         }
+         else {
+            sse_pmovmskb(
+               func,
+               x86_make_reg( file_REG32, reg_DX ),
+               make_xmm( registers[chan_index] ) );
+            x86_or(
+               func,
+               x86_make_reg( file_REG32, reg_AX ),
+               x86_make_reg( file_REG32, reg_DX ) );
+         }
+      }
+   }
+
+   x86_or(
+      func,
+      get_temp(
+         TGSI_EXEC_TEMP_KILMASK_I,
+         TGSI_EXEC_TEMP_KILMASK_C ),
+      x86_make_reg( file_REG32, reg_AX ) );
+
+   x86_pop(
+      func,
+      x86_make_reg( file_REG32, reg_DX ) );
+   x86_pop(
+      func,
+      x86_make_reg( file_REG32, reg_AX ) );
+}
+
+
+static void
+emit_kilp(
+   struct x86_function *func )
+{
+   /* XXX todo / fix me */
+}
+
+
+static void
+emit_setcc(
+   struct x86_function *func,
+   struct tgsi_full_instruction *inst,
+   enum sse_cc cc )
+{
+   unsigned chan_index;
+
+   FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+      FETCH( func, *inst, 0, 0, chan_index );
+      FETCH( func, *inst, 1, 1, chan_index );
+      sse_cmpps(
+         func,
+         make_xmm( 0 ),
+         make_xmm( 1 ),
+         cc );
+      sse_andps(
+         func,
+         make_xmm( 0 ),
+         get_temp(
+            TEMP_ONE_I,
+            TEMP_ONE_C ) );
+      STORE( func, *inst, 0, 0, chan_index );
+   }
+}
+
+static void
+emit_cmp(
+   struct x86_function *func,
+   struct tgsi_full_instruction *inst )
+{
+   unsigned chan_index;
+
+   FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+      FETCH( func, *inst, 0, 0, chan_index );
+      FETCH( func, *inst, 1, 1, chan_index );
+      FETCH( func, *inst, 2, 2, chan_index );
+      sse_cmpps(
+         func,
+         make_xmm( 0 ),
+         get_temp(
+            TGSI_EXEC_TEMP_00000000_I,
+            TGSI_EXEC_TEMP_00000000_C ),
+         cc_LessThan );
+      sse_andps(
+         func,
+         make_xmm( 1 ),
+         make_xmm( 0 ) );
+      sse_andnps(
+         func,
+         make_xmm( 0 ),
+         make_xmm( 2 ) );
+      sse_orps(
+         func,
+         make_xmm( 0 ),
+         make_xmm( 1 ) );
+      STORE( func, *inst, 0, 0, chan_index );
+   }
+}
+#endif
+
+
+static void
+emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst)
+{
+   int v0 = ppc_allocate_vec_register(gen->f);
+   uint chan_index;
+   FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) {
+      FETCH(gen, *inst, 0, 0, chan_index);   /* v0 = srcreg[0] */
+      switch (inst->Instruction.Opcode) {
+      case TGSI_OPCODE_ABS:
+         /* turn off the most significant bit of each vector float word */
+         {
+            int v1 = ppc_allocate_vec_register(gen->f);
+            ppc_vspltisw(gen->f, v1, -1);  /* v1 = {-1, -1, -1, -1} */
+            ppc_vslw(gen->f, v1, v1, v1);  /* v1 = {1<<31, 1<<31, 1<<31, 1<<31} */
+            ppc_vandc(gen->f, v0, v0, v1); /* v0 = v0 & ~v1 */
+            ppc_release_vec_register(gen->f, v1);
+         }
+         break;
+      case TGSI_OPCODE_FLOOR:
+         ppc_vrfim(gen->f, v0, v0);         /* v0 = floor(v0) */
+         break;
+      case TGSI_OPCODE_FRAC:
+         {
+            int v1 = ppc_allocate_vec_register(gen->f);
+            ppc_vrfim(gen->f, v1, v0);         /* v1 = floor(v0) */
+            ppc_vsubfp(gen->f, v0, v0, v1);    /* v0 = v0 - v1 */
+            ppc_release_vec_register(gen->f, v1);
+         }
+         break;
+      case TGSI_OPCODE_EXPBASE2:
+         ppc_vexptefp(gen->f, v0, v0);      /* v0 = 2^v0 */
+         break;
+      case TGSI_OPCODE_LOGBASE2:
+         /* XXX this may be broken! */
+         ppc_vlogefp(gen->f, v0, v0);      /* v0 = log2(v0) */
+         break;
+      case TGSI_OPCODE_MOV:
+         /* nothing */
+         break;
+      default:
+         assert(0);
+      }
+      STORE(gen, *inst, v0, 0, chan_index);   /* store v0 */
+   }
+   ppc_release_vec_register(gen->f, v0);
+}
+
+
+static void
+emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst)
+{
+   int v0 = ppc_allocate_vec_register(gen->f);
+   int v1 = ppc_allocate_vec_register(gen->f);
+   int v2 = ppc_allocate_vec_register(gen->f);
+   uint chan_index;
+   FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) {
+      FETCH(gen, *inst, v0, 0, chan_index);   /* v0 = srcreg[0] */
+      FETCH(gen, *inst, v1, 1, chan_index);   /* v1 = srcreg[1] */
+      switch (inst->Instruction.Opcode) {
+      case TGSI_OPCODE_ADD:
+         ppc_vaddfp(gen->f, v2, v0, v1);
+         break;
+      case TGSI_OPCODE_SUB:
+         ppc_vsubfp(gen->f, v2, v0, v1);
+         break;
+      case TGSI_OPCODE_MUL:
+         ppc_vxor(gen->f, v2, v2, v2);        /* v2 = {0, 0, 0, 0} */
+         ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v0 */
+         break;
+      case TGSI_OPCODE_MIN:
+         ppc_vminfp(gen->f, v2, v0, v1);
+         break;
+      case TGSI_OPCODE_MAX:
+         ppc_vmaxfp(gen->f, v2, v0, v1);
+         break;
+      default:
+         assert(0);
+      }
+      STORE(gen, *inst, v2, 0, chan_index);   /* store v2 */
+   }
+   ppc_release_vec_register(gen->f, v0);
+   ppc_release_vec_register(gen->f, v1);
+   ppc_release_vec_register(gen->f, v2);
+}
+
+
+static void
+emit_dotprod(struct gen_context *gen, struct tgsi_full_instruction *inst)
+{
+   int v0 = ppc_allocate_vec_register(gen->f);
+   int v1 = ppc_allocate_vec_register(gen->f);
+   int v2 = ppc_allocate_vec_register(gen->f);
+   uint chan_index;
+
+   ppc_vxor(gen->f, v2, v2, v2);           /* v2 = {0, 0, 0, 0} */
+
+   FETCH(gen, *inst, v0, 0, CHAN_X);       /* v0 = src0.XXXX */
+   FETCH(gen, *inst, v1, 1, CHAN_X);       /* v1 = src1.XXXX */
+   ppc_vmaddfp(gen->f, v2, v0, v1, v2);    /* v2 = v0 * v1 + v2 */
+
+   FETCH(gen, *inst, v0, 0, CHAN_Y);       /* v0 = src0.YYYY */
+   FETCH(gen, *inst, v1, 1, CHAN_Y);       /* v1 = src1.YYYY */
+   ppc_vmaddfp(gen->f, v2, v0, v1, v2);    /* v2 = v0 * v1 + v2 */
+
+   FETCH(gen, *inst, v0, 0, CHAN_Z);       /* v0 = src0.ZZZZ */
+   FETCH(gen, *inst, v1, 1, CHAN_Z);       /* v1 = src1.ZZZZ */
+   ppc_vmaddfp(gen->f, v2, v0, v1, v2);    /* v2 = v0 * v1 + v2 */
+
+   if (inst->Instruction.Opcode == TGSI_OPCODE_DP4) {
+      FETCH(gen, *inst, v0, 0, CHAN_W);    /* v0 = src0.WWWW */
+      FETCH(gen, *inst, v1, 1, CHAN_W);    /* v1 = src1.WWWW */
+      ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */
+   }
+   else if (inst->Instruction.Opcode == TGSI_OPCODE_DPH) {
+      FETCH(gen, *inst, v1, 1, CHAN_W);    /* v1 = src1.WWWW */
+      ppc_vaddfp(gen->f, v2, v2, v1);      /* v2 = v2 + v1 */
+   }
+
+   FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) {
+      STORE(gen, *inst, v2, 0, chan_index);  /* store v2 */
+   }
+   ppc_release_vec_register(gen->f, v0);
+   ppc_release_vec_register(gen->f, v1);
+   ppc_release_vec_register(gen->f, v2);
+}
+
+
+static void
+emit_triop(struct gen_context *gen, struct tgsi_full_instruction *inst)
+{
+   int v0 = ppc_allocate_vec_register(gen->f);
+   int v1 = ppc_allocate_vec_register(gen->f);
+   int v2 = ppc_allocate_vec_register(gen->f);
+   int v3 = ppc_allocate_vec_register(gen->f);
+   uint chan_index;
+   FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) {
+      FETCH(gen, *inst, v0, 0, chan_index);   /* v0 = srcreg[0] */
+      FETCH(gen, *inst, v1, 1, chan_index);   /* v1 = srcreg[1] */
+      FETCH(gen, *inst, v2, 2, chan_index);   /* v2 = srcreg[2] */
+      switch (inst->Instruction.Opcode) {
+      case TGSI_OPCODE_MAD:
+         ppc_vmaddfp(gen->f, v3, v0, v1, v2);   /* v3 = v0 * v1 + v2 */
+         break;
+      case TGSI_OPCODE_LRP:
+         ppc_vsubfp(gen->f, v3, v1, v2);        /* v3 = v1 - v2 */
+         ppc_vmaddfp(gen->f, v3, v0, v3, v2);   /* v3 = v0 * v3 + v2 */
+         break;
+      default:
+         assert(0);
+      }
+      STORE(gen, *inst, v3, 0, chan_index);   /* store v3 */
+   }
+   ppc_release_vec_register(gen->f, v0);
+   ppc_release_vec_register(gen->f, v1);
+   ppc_release_vec_register(gen->f, v2);
+   ppc_release_vec_register(gen->f, v3);
+}
+
+
+static int
+emit_instruction(struct gen_context *gen,
+                 struct tgsi_full_instruction *inst)
+{
+   switch (inst->Instruction.Opcode) {
+   case TGSI_OPCODE_MOV:
+   case TGSI_OPCODE_ABS:
+   case TGSI_OPCODE_FLOOR:
+   case TGSI_OPCODE_FRAC:
+   case TGSI_OPCODE_EXPBASE2:
+   case TGSI_OPCODE_LOGBASE2:
+      emit_unaryop(gen, inst);
+      break;
+   case TGSI_OPCODE_ADD:
+   case TGSI_OPCODE_SUB:
+   case TGSI_OPCODE_MUL:
+   case TGSI_OPCODE_MIN:
+   case TGSI_OPCODE_MAX:
+      emit_binop(gen, inst);
+      break;
+   case TGSI_OPCODE_MAD:
+   case TGSI_OPCODE_LRP:
+      emit_triop(gen, inst);
+      break;
+   case TGSI_OPCODE_DP3:
+   case TGSI_OPCODE_DP4:
+   case TGSI_OPCODE_DPH:
+      emit_dotprod(gen, inst);
+      break;
+   case TGSI_OPCODE_END:
+      /* normal end */
+      return 1;
+   default:
+      return 0;
+   }
+
+#if 0
+   unsigned chan_index;
+
+   switch (inst->Instruction.Opcode) {
+   case TGSI_OPCODE_ARL:
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         emit_f2it( func, 0 );
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_MOV:
+   case TGSI_OPCODE_SWZ:
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_LIT:
+      if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
+          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
+         emit_tempf(
+            func,
+            0,
+            TEMP_ONE_I,
+            TEMP_ONE_C);
+         if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
+            STORE( func, *inst, 0, 0, CHAN_X );
+         }
+         if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
+            STORE( func, *inst, 0, 0, CHAN_W );
+         }
+      }
+      if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
+          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
+         if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
+            FETCH( func, *inst, 0, 0, CHAN_X );
+            sse_maxps(
+               func,
+               make_xmm( 0 ),
+               get_temp(
+                  TGSI_EXEC_TEMP_00000000_I,
+                  TGSI_EXEC_TEMP_00000000_C ) );
+            STORE( func, *inst, 0, 0, CHAN_Y );
+         }
+         if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
+            /* XMM[1] = SrcReg[0].yyyy */
+            FETCH( func, *inst, 1, 0, CHAN_Y );
+            /* XMM[1] = max(XMM[1], 0) */
+            sse_maxps(
+               func,
+               make_xmm( 1 ),
+               get_temp(
+                  TGSI_EXEC_TEMP_00000000_I,
+                  TGSI_EXEC_TEMP_00000000_C ) );
+            /* XMM[2] = SrcReg[0].wwww */
+            FETCH( func, *inst, 2, 0, CHAN_W );
+            /* XMM[2] = min(XMM[2], 128.0) */
+            sse_minps(
+               func,
+               make_xmm( 2 ),
+               get_temp(
+                  TGSI_EXEC_TEMP_128_I,
+                  TGSI_EXEC_TEMP_128_C ) );
+            /* XMM[2] = max(XMM[2], -128.0) */
+            sse_maxps(
+               func,
+               make_xmm( 2 ),
+               get_temp(
+                  TGSI_EXEC_TEMP_MINUS_128_I,
+                  TGSI_EXEC_TEMP_MINUS_128_C ) );
+            emit_pow( func, 3, 1, 2 );
+            FETCH( func, *inst, 0, 0, CHAN_X );
+            sse_xorps(
+               func,
+               make_xmm( 2 ),
+               make_xmm( 2 ) );
+            sse_cmpps(
+               func,
+               make_xmm( 2 ),
+               make_xmm( 0 ),
+               cc_LessThanEqual );
+            sse_andps(
+               func,
+               make_xmm( 2 ),
+               make_xmm( 1 ) );
+            STORE( func, *inst, 2, 0, CHAN_Z );
+         }
+      }
+      break;
+
+   case TGSI_OPCODE_RCP:
+   /* TGSI_OPCODE_RECIP */
+      FETCH( func, *inst, 0, 0, CHAN_X );
+      emit_rcp( func, 0, 0 );
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_RSQ:
+   /* TGSI_OPCODE_RECIPSQRT */
+      FETCH( func, *inst, 0, 0, CHAN_X );
+      emit_rsqrt( func, 1, 0 );
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( func, *inst, 1, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_EXP:
+      if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
+          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
+          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+         FETCH( func, *inst, 0, 0, CHAN_X );
+         if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
+             IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
+            emit_MOV( func, 1, 0 );
+            emit_flr( func, 2, 1 );
+            /* dst.x = ex2(floor(src.x)) */
+            if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) {
+               emit_MOV( func, 2, 1 );
+               emit_ex2( func, 3, 2 );
+               STORE( func, *inst, 2, 0, CHAN_X );
+            }
+            /* dst.y = src.x - floor(src.x) */
+            if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
+               emit_MOV( func, 2, 0 );
+               emit_sub( func, 2, 1 );
+               STORE( func, *inst, 2, 0, CHAN_Y );
+            }
+         }
+         /* dst.z = ex2(src.x) */
+         if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+            emit_ex2( func, 3, 0 );
+            STORE( func, *inst, 0, 0, CHAN_Z );
+         }
+      }
+      /* dst.w = 1.0 */
+      if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W )) {
+         emit_tempf( func, 0, TEMP_ONE_I, TEMP_ONE_C );
+         STORE( func, *inst, 0, 0, CHAN_W );
+      }
+      break;
+
+   case TGSI_OPCODE_LOG:
+      if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
+          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
+          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+         FETCH( func, *inst, 0, 0, CHAN_X );
+         emit_abs( func, 0 );
+         emit_MOV( func, 1, 0 );
+         emit_lg2( func, 2, 1 );
+         /* dst.z = lg2(abs(src.x)) */
+         if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+            STORE( func, *inst, 1, 0, CHAN_Z );
+         }
+         if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
+             IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
+            emit_flr( func, 2, 1 );
+            /* dst.x = floor(lg2(abs(src.x))) */
+            if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) {
+               STORE( func, *inst, 1, 0, CHAN_X );
+            }
+            /* dst.x = abs(src)/ex2(floor(lg2(abs(src.x)))) */
+            if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
+               emit_ex2( func, 2, 1 );
+               emit_rcp( func, 1, 1 );
+               emit_mul( func, 0, 1 );
+               STORE( func, *inst, 0, 0, CHAN_Y );
+            }
+         }
+      }
+      /* dst.w = 1.0 */
+      if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W )) {
+         emit_tempf( func, 0, TEMP_ONE_I, TEMP_ONE_C );
+         STORE( func, *inst, 0, 0, CHAN_W );
+      }
+      break;
+
+   case TGSI_OPCODE_MUL:
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         FETCH( func, *inst, 1, 1, chan_index );
+         emit_mul( func, 0, 1 );
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_ADD:
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         FETCH( func, *inst, 1, 1, chan_index );
+         emit_add( func, 0, 1 );
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_DP3:
+   /* TGSI_OPCODE_DOT3 */
+      FETCH( func, *inst, 0, 0, CHAN_X );
+      FETCH( func, *inst, 1, 1, CHAN_X );
+      emit_mul( func, 0, 1 );
+      FETCH( func, *inst, 1, 0, CHAN_Y );
+      FETCH( func, *inst, 2, 1, CHAN_Y );
+      emit_mul( func, 1, 2 );
+      emit_add( func, 0, 1 );
+      FETCH( func, *inst, 1, 0, CHAN_Z );
+      FETCH( func, *inst, 2, 1, CHAN_Z );
+      emit_mul( func, 1, 2 );
+      emit_add( func, 0, 1 );
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_DP4:
+   /* TGSI_OPCODE_DOT4 */
+      FETCH( func, *inst, 0, 0, CHAN_X );
+      FETCH( func, *inst, 1, 1, CHAN_X );
+      emit_mul( func, 0, 1 );
+      FETCH( func, *inst, 1, 0, CHAN_Y );
+      FETCH( func, *inst, 2, 1, CHAN_Y );
+      emit_mul( func, 1, 2 );
+      emit_add( func, 0, 1 );
+      FETCH( func, *inst, 1, 0, CHAN_Z );
+      FETCH( func, *inst, 2, 1, CHAN_Z );
+      emit_mul(func, 1, 2 );
+      emit_add(func, 0, 1 );
+      FETCH( func, *inst, 1, 0, CHAN_W );
+      FETCH( func, *inst, 2, 1, CHAN_W );
+      emit_mul( func, 1, 2 );
+      emit_add( func, 0, 1 );
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_DST:
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
+         emit_tempf(
+            func,
+            0,
+            TEMP_ONE_I,
+            TEMP_ONE_C );
+         STORE( func, *inst, 0, 0, CHAN_X );
+      }
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
+         FETCH( func, *inst, 0, 0, CHAN_Y );
+         FETCH( func, *inst, 1, 1, CHAN_Y );
+         emit_mul( func, 0, 1 );
+         STORE( func, *inst, 0, 0, CHAN_Y );
+      }
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
+         FETCH( func, *inst, 0, 0, CHAN_Z );
+         STORE( func, *inst, 0, 0, CHAN_Z );
+      }
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
+         FETCH( func, *inst, 0, 1, CHAN_W );
+         STORE( func, *inst, 0, 0, CHAN_W );
+      }
+      break;
+
+   case TGSI_OPCODE_MIN:
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         FETCH( func, *inst, 1, 1, chan_index );
+         sse_minps(
+            func,
+            make_xmm( 0 ),
+            make_xmm( 1 ) );
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_MAX:
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         FETCH( func, *inst, 1, 1, chan_index );
+         sse_maxps(
+            func,
+            make_xmm( 0 ),
+            make_xmm( 1 ) );
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_SLT:
+   /* TGSI_OPCODE_SETLT */
+      emit_setcc( func, inst, cc_LessThan );
+      break;
+
+   case TGSI_OPCODE_SGE:
+   /* TGSI_OPCODE_SETGE */
+      emit_setcc( func, inst, cc_NotLessThan );
+      break;
+
+   case TGSI_OPCODE_MAD:
+   /* TGSI_OPCODE_MADD */
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         FETCH( func, *inst, 1, 1, chan_index );
+         FETCH( func, *inst, 2, 2, chan_index );
+         emit_mul( func, 0, 1 );
+         emit_add( func, 0, 2 );
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_SUB:
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         FETCH( func, *inst, 1, 1, chan_index );
+         emit_sub( func, 0, 1 );
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_LERP:
+   /* TGSI_OPCODE_LRP */
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         FETCH( func, *inst, 1, 1, chan_index );
+         FETCH( func, *inst, 2, 2, chan_index );
+         emit_sub( func, 1, 2 );
+         emit_mul( func, 0, 1 );
+         emit_add( func, 0, 2 );
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_CND:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_CND0:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_DOT2ADD:
+   /* TGSI_OPCODE_DP2A */
+      return 0;
+      break;
+
+   case TGSI_OPCODE_INDEX:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_NEGATE:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_FRAC:
+   /* TGSI_OPCODE_FRC */
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         emit_frc( func, 0, 0 );
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_CLAMP:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_FLOOR:
+   /* TGSI_OPCODE_FLR */
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         emit_flr( func, 0, 0 );
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_ROUND:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_EXPBASE2:
+   /* TGSI_OPCODE_EX2 */
+      FETCH( func, *inst, 0, 0, CHAN_X );
+      emit_ex2( func, 0, 0 );
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_LOGBASE2:
+   /* TGSI_OPCODE_LG2 */
+      FETCH( func, *inst, 0, 0, CHAN_X );
+      emit_lg2( func, 0, 0 );
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_POWER:
+   /* TGSI_OPCODE_POW */
+      FETCH( func, *inst, 0, 0, CHAN_X );
+      FETCH( func, *inst, 1, 1, CHAN_X );
+      emit_pow( func, 0, 0, 1 );
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_CROSSPRODUCT:
+   /* TGSI_OPCODE_XPD */
+      if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
+          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
+         FETCH( func, *inst, 1, 1, CHAN_Z );
+         FETCH( func, *inst, 3, 0, CHAN_Z );
+      }
+      if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
+          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
+         FETCH( func, *inst, 0, 0, CHAN_Y );
+         FETCH( func, *inst, 4, 1, CHAN_Y );
+      }
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
+         emit_MOV( func, 2, 0 );
+         emit_mul( func, 2, 1 );
+         emit_MOV( func, 5, 3 );
+         emit_mul( func, 5, 4 );
+         emit_sub( func, 2, 5 );
+         STORE( func, *inst, 2, 0, CHAN_X );
+      }
+      if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
+          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
+         FETCH( func, *inst, 2, 1, CHAN_X );
+         FETCH( func, *inst, 5, 0, CHAN_X );
+      }
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
+         emit_mul( func, 3, 2 );
+         emit_mul( func, 1, 5 );
+         emit_sub( func, 3, 1 );
+         STORE( func, *inst, 3, 0, CHAN_Y );
+      }
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
+         emit_mul( func, 5, 4 );
+         emit_mul( func, 0, 2 );
+         emit_sub( func, 5, 0 );
+         STORE( func, *inst, 5, 0, CHAN_Z );
+      }
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
+	 emit_tempf(
+	    func,
+	    0,
+	    TEMP_ONE_I,
+	    TEMP_ONE_C );
+         STORE( func, *inst, 0, 0, CHAN_W );
+      }
+      break;
+
+   case TGSI_OPCODE_MULTIPLYMATRIX:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_ABS:
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         emit_abs( func, 0) ;
+
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_RCC:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_DPH:
+      FETCH( func, *inst, 0, 0, CHAN_X );
+      FETCH( func, *inst, 1, 1, CHAN_X );
+      emit_mul( func, 0, 1 );
+      FETCH( func, *inst, 1, 0, CHAN_Y );
+      FETCH( func, *inst, 2, 1, CHAN_Y );
+      emit_mul( func, 1, 2 );
+      emit_add( func, 0, 1 );
+      FETCH( func, *inst, 1, 0, CHAN_Z );
+      FETCH( func, *inst, 2, 1, CHAN_Z );
+      emit_mul( func, 1, 2 );
+      emit_add( func, 0, 1 );
+      FETCH( func, *inst, 1, 1, CHAN_W );
+      emit_add( func, 0, 1 );
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_COS:
+      FETCH( func, *inst, 0, 0, CHAN_X );
+      emit_cos( func, 0, 0 );
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_DDX:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_DDY:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_KILP:
+      /* predicated kill */
+      emit_kilp( func );
+      return 0; /* XXX fix me */
+      break;
+
+   case TGSI_OPCODE_KIL:
+      /* conditional kill */
+      emit_kil( func, &inst->FullSrcRegisters[0] );
+      break;
+
+   case TGSI_OPCODE_PK2H:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_PK2US:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_PK4B:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_PK4UB:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_RFL:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_SEQ:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_SFL:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_SGT:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_SIN:
+      FETCH( func, *inst, 0, 0, CHAN_X );
+      emit_sin( func, 0, 0 );
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( func, *inst, 0, 0, chan_index );
+      }
+      break;
+
+   case TGSI_OPCODE_SLE:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_SNE:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_STR:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_TEX:
+      if (0) {
+	 /* Disable dummy texture code: 
+	  */
+	 emit_tempf(
+	    func,
+	    0,
+	    TEMP_ONE_I,
+	    TEMP_ONE_C );
+	 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+	    STORE( func, *inst, 0, 0, chan_index );
+	 }
+      }
+      else {
+	 return 0;
+      }
+      break;
+
+   case TGSI_OPCODE_TXD:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_UP2H:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_UP2US:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_UP4B:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_UP4UB:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_X2D:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_ARA:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_ARR:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_BRA:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_CAL:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_RET:
+      emit_ret( func );
+      break;
+
+   case TGSI_OPCODE_END:
+      break;
+
+   case TGSI_OPCODE_SSG:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_CMP:
+      emit_cmp (func, inst);
+      break;
+
+   case TGSI_OPCODE_SCS:
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
+         FETCH( func, *inst, 0, 0, CHAN_X );
+         emit_cos( func, 0, 0 );
+         STORE( func, *inst, 0, 0, CHAN_X );
+      }
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
+         FETCH( func, *inst, 0, 0, CHAN_X );
+         emit_sin( func, 0, 0 );
+         STORE( func, *inst, 0, 0, CHAN_Y );
+      }
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
+	 emit_tempf(
+	    func,
+	    0,
+	    TGSI_EXEC_TEMP_00000000_I,
+	    TGSI_EXEC_TEMP_00000000_C );
+         STORE( func, *inst, 0, 0, CHAN_Z );
+      }
+      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
+	 emit_tempf(
+	    func,
+	    0,
+	    TEMP_ONE_I,
+	    TEMP_ONE_C );
+         STORE( func, *inst, 0, 0, CHAN_W );
+      }
+      break;
+
+   case TGSI_OPCODE_TXB:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_NRM:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_DIV:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_DP2:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_TXL:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_BRK:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_IF:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_LOOP:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_REP:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_ELSE:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_ENDIF:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_ENDLOOP:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_ENDREP:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_PUSHA:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_POPA:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_CEIL:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_I2F:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_NOT:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_TRUNC:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_SHL:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_SHR:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_AND:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_OR:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_MOD:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_XOR:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_SAD:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_TXF:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_TXQ:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_CONT:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_EMIT:
+      return 0;
+      break;
+
+   case TGSI_OPCODE_ENDPRIM:
+      return 0;
+      break;
+
+   default:
+      return 0;
+   }
+#endif
+   
+   return 1;
+}
+
+static void
+emit_declaration(
+   struct ppc_function *func,
+   struct tgsi_full_declaration *decl )
+{
+   if( decl->Declaration.File == TGSI_FILE_INPUT ) {
+#if 0
+      unsigned first, last, mask;
+      unsigned i, j;
+
+      first = decl->DeclarationRange.First;
+      last = decl->DeclarationRange.Last;
+      mask = decl->Declaration.UsageMask;
+
+      for( i = first; i <= last; i++ ) {
+         for( j = 0; j < NUM_CHANNELS; j++ ) {
+            if( mask & (1 << j) ) {
+               switch( decl->Declaration.Interpolate ) {
+               case TGSI_INTERPOLATE_CONSTANT:
+                  emit_coef_a0( func, 0, i, j );
+                  emit_inputs( func, 0, i, j );
+                  break;
+
+               case TGSI_INTERPOLATE_LINEAR:
+                  emit_tempf( func, 0, 0, TGSI_SWIZZLE_X );
+                  emit_coef_dadx( func, 1, i, j );
+                  emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y );
+                  emit_coef_dady( func, 3, i, j );
+                  emit_mul( func, 0, 1 );    /* x * dadx */
+                  emit_coef_a0( func, 4, i, j );
+                  emit_mul( func, 2, 3 );    /* y * dady */
+                  emit_add( func, 0, 4 );    /* x * dadx + a0 */
+                  emit_add( func, 0, 2 );    /* x * dadx + y * dady + a0 */
+                  emit_inputs( func, 0, i, j );
+                  break;
+
+               case TGSI_INTERPOLATE_PERSPECTIVE:
+                  emit_tempf( func, 0, 0, TGSI_SWIZZLE_X );
+                  emit_coef_dadx( func, 1, i, j );
+                  emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y );
+                  emit_coef_dady( func, 3, i, j );
+                  emit_mul( func, 0, 1 );    /* x * dadx */
+                  emit_tempf( func, 4, 0, TGSI_SWIZZLE_W );
+                  emit_coef_a0( func, 5, i, j );
+                  emit_rcp( func, 4, 4 );    /* 1.0 / w */
+                  emit_mul( func, 2, 3 );    /* y * dady */
+                  emit_add( func, 0, 5 );    /* x * dadx + a0 */
+                  emit_add( func, 0, 2 );    /* x * dadx + y * dady + a0 */
+                  emit_mul( func, 0, 4 );    /* (x * dadx + y * dady + a0) / w */
+                  emit_inputs( func, 0, i, j );
+                  break;
+
+               default:
+                  assert( 0 );
+		  break;
+               }
+            }
+         }
+      }
+#endif
+   }
+}
+
+#if 0
+static void aos_to_soa( struct x86_function *func, 
+                        uint arg_aos,
+                        uint arg_soa, 
+                        uint arg_num, 
+                        uint arg_stride )
+{
+   struct x86_reg soa_input = x86_make_reg( file_REG32, reg_AX );
+   struct x86_reg aos_input = x86_make_reg( file_REG32, reg_BX );
+   struct x86_reg num_inputs = x86_make_reg( file_REG32, reg_CX );
+   struct x86_reg stride = x86_make_reg( file_REG32, reg_DX );
+   int inner_loop;
+
+
+   /* Save EBX */
+   x86_push( func, x86_make_reg( file_REG32, reg_BX ) );
+
+   x86_mov( func, aos_input,  x86_fn_arg( func, arg_aos ) );
+   x86_mov( func, soa_input,  x86_fn_arg( func, arg_soa ) );
+   x86_mov( func, num_inputs, x86_fn_arg( func, arg_num ) );
+   x86_mov( func, stride,     x86_fn_arg( func, arg_stride ) );
+
+   /* do */
+   inner_loop = x86_get_label( func );
+   {
+      x86_push( func, aos_input );
+      sse_movlps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) );
+      sse_movlps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) );
+      x86_add( func, aos_input, stride );
+      sse_movhps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) );
+      sse_movhps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) );
+      x86_add( func, aos_input, stride );
+      sse_movlps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) );
+      sse_movlps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) );
+      x86_add( func, aos_input, stride );
+      sse_movhps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) );
+      sse_movhps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) );
+      x86_pop( func, aos_input );
+
+      sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) );
+      sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) );
+      sse_shufps( func, make_xmm( 0 ), make_xmm( 1 ), 0x88 );
+      sse_shufps( func, make_xmm( 2 ), make_xmm( 1 ), 0xdd );
+      sse_shufps( func, make_xmm( 3 ), make_xmm( 4 ), 0x88 );
+      sse_shufps( func, make_xmm( 5 ), make_xmm( 4 ), 0xdd );
+
+      sse_movups( func, x86_make_disp( soa_input, 0 ), make_xmm( 0 ) );
+      sse_movups( func, x86_make_disp( soa_input, 16 ), make_xmm( 2 ) );
+      sse_movups( func, x86_make_disp( soa_input, 32 ), make_xmm( 3 ) );
+      sse_movups( func, x86_make_disp( soa_input, 48 ), make_xmm( 5 ) );
+
+      /* Advance to next input */
+      x86_lea( func, aos_input, x86_make_disp(aos_input, 16) );
+      x86_lea( func, soa_input, x86_make_disp(soa_input, 64) );
+   }
+   /* while --num_inputs */
+   x86_dec( func, num_inputs );
+   x86_jcc( func, cc_NE, inner_loop );
+
+   /* Restore EBX */
+   x86_pop( func, aos_input );
+}
+#endif
+
+#if 0
+static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num, uint stride )
+{
+   struct x86_reg soa_output;
+   struct x86_reg aos_output;
+   struct x86_reg num_outputs;
+   struct x86_reg temp;
+   int inner_loop;
+
+   soa_output = x86_make_reg( file_REG32, reg_AX );
+   aos_output = x86_make_reg( file_REG32, reg_BX );
+   num_outputs = x86_make_reg( file_REG32, reg_CX );
+   temp = x86_make_reg( file_REG32, reg_DX );
+
+   /* Save EBX */
+   x86_push( func, aos_output );
+
+   x86_mov( func, soa_output, x86_fn_arg( func, soa ) );
+   x86_mov( func, aos_output, x86_fn_arg( func, aos ) );
+   x86_mov( func, num_outputs, x86_fn_arg( func, num ) );
+
+   /* do */
+   inner_loop = x86_get_label( func );
+   {
+      sse_movups( func, make_xmm( 0 ), x86_make_disp( soa_output, 0 ) );
+      sse_movups( func, make_xmm( 1 ), x86_make_disp( soa_output, 16 ) );
+      sse_movups( func, make_xmm( 3 ), x86_make_disp( soa_output, 32 ) );
+      sse_movups( func, make_xmm( 4 ), x86_make_disp( soa_output, 48 ) );
+
+      sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) );
+      sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) );
+      sse_unpcklps( func, make_xmm( 0 ), make_xmm( 1 ) );
+      sse_unpckhps( func, make_xmm( 2 ), make_xmm( 1 ) );
+      sse_unpcklps( func, make_xmm( 3 ), make_xmm( 4 ) );
+      sse_unpckhps( func, make_xmm( 5 ), make_xmm( 4 ) );
+
+      x86_mov( func, temp, x86_fn_arg( func, stride ) );
+      x86_push( func, aos_output );
+      sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) );
+      sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) );
+      x86_add( func, aos_output, temp );
+      sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) );
+      sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) );
+      x86_add( func, aos_output, temp );
+      sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) );
+      sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) );
+      x86_add( func, aos_output, temp );
+      sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) );
+      sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) );
+      x86_pop( func, aos_output );
+
+      /* Advance to next output */
+      x86_lea( func, aos_output, x86_make_disp(aos_output, 16) );
+      x86_lea( func, soa_output, x86_make_disp(soa_output, 64) );
+   }
+   /* while --num_outputs */
+   x86_dec( func, num_outputs );
+   x86_jcc( func, cc_NE, inner_loop );
+
+   /* Restore EBX */
+   x86_pop( func, aos_output );
+}
+#endif
+
+
+static void
+emit_prologue(struct ppc_function *func)
+{
+   /* XXX set up stack frame */
+}
+
+
+static void
+emit_epilogue(struct ppc_function *func)
+{
+   ppc_return(func);
+   /* XXX restore prev stack frame */
+}
+
+
+
+/**
+ * Translate a TGSI vertex/fragment shader to PPC code.
+ *
+ * \param tokens  the TGSI input shader
+ * \param func  the output PPC code/function
+ * \param immediates  buffer to place immediates, later passed to PPC func
+ * \return TRUE for success, FALSE if translation failed
+ */
+boolean
+tgsi_emit_ppc(const struct tgsi_token *tokens,
+              struct ppc_function *func,
+              float (*immediates)[4],
+              boolean do_swizzles )
+{
+   struct tgsi_parse_context parse;
+   /*boolean instruction_phase = FALSE;*/
+   unsigned ok = 1;
+   uint num_immediates = 0;
+   struct gen_context gen;
+
+   util_init_math();
+
+   tgsi_parse_init( &parse, tokens );
+
+   gen.f = func;
+   gen.inputs_reg = 3;   /* first function param */
+   gen.outputs_reg = 4;  /* second function param */
+   gen.temps_reg = 5;    /* ... */
+   gen.immed_reg = 6;
+   gen.const_reg = 7;
+
+   emit_prologue(func);
+
+   /*
+    * Different function args for vertex/fragment shaders:
+    */
+#if 0
+   if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
+      /* DECLARATION phase, do not load output argument. */
+      x86_mov(
+         func,
+         get_input_base(),
+         x86_fn_arg( func, 1 ) );
+      /* skipping outputs argument here */
+      x86_mov(
+         func,
+         get_const_base(),
+         x86_fn_arg( func, 3 ) );
+      x86_mov(
+         func,
+         get_temp_base(),
+         x86_fn_arg( func, 4 ) );
+      x86_mov(
+         func,
+         get_coef_base(),
+         x86_fn_arg( func, 5 ) );
+      x86_mov(
+         func,
+         get_immediate_base(),
+         x86_fn_arg( func, 6 ) );
+   }
+   else {
+      assert(parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX);
+
+      if (do_swizzles)
+         aos_to_soa( func, 
+                     6,         /* aos_input */
+                     1,         /* machine->input */
+                     7,         /* num_inputs */
+                     8 );       /* input_stride */
+
+      x86_mov(
+         func,
+         get_input_base(),
+         x86_fn_arg( func, 1 ) );
+      x86_mov(
+         func,
+         get_output_base(),
+         x86_fn_arg( func, 2 ) );
+      x86_mov(
+         func,
+         get_const_base(),
+         x86_fn_arg( func, 3 ) );
+      x86_mov(
+         func,
+         get_temp_base(),
+         x86_fn_arg( func, 4 ) );
+      x86_mov(
+         func,
+         get_immediate_base(),
+         x86_fn_arg( func, 5 ) );
+   }
+#endif
+
+   while (!tgsi_parse_end_of_tokens(&parse) && ok) {
+      tgsi_parse_token(&parse);
+
+      switch (parse.FullToken.Token.Type) {
+      case TGSI_TOKEN_TYPE_DECLARATION:
+         if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
+            emit_declaration(func, &parse.FullToken.FullDeclaration );
+         }
+         break;
+
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+         if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
+#if 0
+            if( !instruction_phase ) {
+               /* INSTRUCTION phase, overwrite coeff with output. */
+               instruction_phase = TRUE;
+               x86_mov(
+                  func,
+                  get_output_base(),
+                  x86_fn_arg( func, 2 ) );
+            }
+#endif
+         }
+
+         ok = emit_instruction(&gen, &parse.FullToken.FullInstruction);
+
+	 if (!ok) {
+	    debug_printf("failed to translate tgsi opcode %d to PPC (%s)\n", 
+			 parse.FullToken.FullInstruction.Instruction.Opcode,
+                         parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ?
+                         "vertex shader" : "fragment shader");
+	 }
+         break;
+
+      case TGSI_TOKEN_TYPE_IMMEDIATE:
+         /* splat each immediate component into a float[4] vector for SoA */
+         {
+            const uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
+            float *imm = (float *) immediates;
+            uint i;
+            assert(size <= 4);
+            assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES);
+            for (i = 0; i < size; i++) {
+               const float value =
+                  parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
+               imm[num_immediates * 4 + 0] = 
+               imm[num_immediates * 4 + 1] = 
+               imm[num_immediates * 4 + 2] = 
+               imm[num_immediates * 4 + 3] = value;
+               num_immediates++;
+            }
+         }
+         break;
+
+      default:
+	 ok = 0;
+         assert( 0 );
+      }
+   }
+
+#if 0
+   if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX) {
+      if (do_swizzles)
+         soa_to_aos( func, 9, 2, 10, 11 );
+   }
+#endif
+
+   emit_epilogue(func);
+
+   tgsi_parse_free( &parse );
+
+   return ok;
+}
+
+#endif /* PIPE_ARCH_PPC */
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.h b/src/gallium/auxiliary/tgsi/tgsi_ppc.h
new file mode 100644
index 00000000000..7cd2bf9aff2
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.h
@@ -0,0 +1,48 @@
+/**************************************************************************
+ * 
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef TGSI_PPC_H
+#define TGSI_PPC_H
+
+#if defined __cplusplus
+extern "C" {
+#endif
+
+struct tgsi_token;
+struct ppc_function;
+
+boolean
+tgsi_emit_ppc(const struct tgsi_token *tokens,
+              struct ppc_function *function,
+              float (*immediates)[4],
+              boolean do_swizzles);
+
+#if defined __cplusplus
+}
+#endif
+
+#endif /* TGSI_PPC_H */
-- 
cgit v1.2.3


From da63edd720fc154820fcbf699e1056ac9357a03f Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Wed, 22 Oct 2008 13:59:11 -0600
Subject: gallium: fix broken TGSI_FILE_CONSTANT case, use
 ppc_reserver_register()

---
 src/gallium/auxiliary/tgsi/tgsi_ppc.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
index 112e7365233..dbf215c0d57 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
@@ -1108,10 +1108,15 @@ emit_fetch(struct gen_context *gen,
             int offset_reg = ppc_allocate_register(gen->f);
             int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4;
             ppc_li(gen->f, offset_reg, offset);
-            /* load vector word */
+            /* Load 4-byte word into vector register.
+             * The vector slot depends on the effective address we load from.
+             * We know that our constants start at a 16-byte boundary so we
+             * know that 'swizzle' tells us which vector slot will have the
+             * loaded word.  The other vector slots will be undefined.
+             */
             ppc_lvewx(gen->f, vec_reg, gen->const_reg, offset_reg);
-            /* splat word[0] across vector */
-            ppc_vspltw(gen->f, vec_reg, vec_reg, 0);
+            /* splat word[swizzle] across the vector reg */
+            ppc_vspltw(gen->f, vec_reg, vec_reg, swizzle);
             ppc_release_register(gen->f, offset_reg);
          }
          break;
@@ -2635,11 +2640,11 @@ tgsi_emit_ppc(const struct tgsi_token *tokens,
    tgsi_parse_init( &parse, tokens );
 
    gen.f = func;
-   gen.inputs_reg = 3;   /* first function param */
-   gen.outputs_reg = 4;  /* second function param */
-   gen.temps_reg = 5;    /* ... */
-   gen.immed_reg = 6;
-   gen.const_reg = 7;
+   gen.inputs_reg = ppc_reserve_register(func, 3);   /* first function param */
+   gen.outputs_reg = ppc_reserve_register(func, 4);  /* second function param */
+   gen.temps_reg = ppc_reserve_register(func, 5);    /* ... */
+   gen.immed_reg = ppc_reserve_register(func, 6);
+   gen.const_reg = ppc_reserve_register(func, 7);
 
    emit_prologue(func);
 
-- 
cgit v1.2.3


From 51840065607337210fbba5ba1c01874293fbb42e Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Wed, 22 Oct 2008 14:48:58 -0600
Subject: gallium: TGSI->PPC inequality operators

---
 src/gallium/auxiliary/tgsi/tgsi_ppc.c | 70 +++++++++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
index dbf215c0d57..9bf364b8c4a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
@@ -1495,6 +1495,68 @@ emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst)
 }
 
 
+/**
+ * Vector comparisons, resulting in 1.0 or 0.0 values.
+ */
+static void
+emit_inequality(struct gen_context *gen, struct tgsi_full_instruction *inst)
+{
+   int v0 = ppc_allocate_vec_register(gen->f);
+   int v1 = ppc_allocate_vec_register(gen->f);
+   int v2 = ppc_allocate_vec_register(gen->f);
+   int v_one = ppc_allocate_vec_register(gen->f);
+   uint chan_index;
+   boolean complement = FALSE;
+
+   /* v_one = splat(1.0) */
+   ppc_vload_float(gen->f, v_one, 1.0f);
+
+   FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) {
+      FETCH(gen, *inst, v0, 0, chan_index);   /* v0 = srcreg[0] */
+      FETCH(gen, *inst, v1, 1, chan_index);   /* v1 = srcreg[1] */
+
+      switch (inst->Instruction.Opcode) {
+      case TGSI_OPCODE_SNE:
+         complement = TRUE;
+         /* fall-through */
+      case TGSI_OPCODE_SEQ:
+         ppc_vcmpeqfpx(gen->f, v2, v0, v1); /* v2 = v0 == v1 ? ~0 : 0 */
+         break;
+
+      case TGSI_OPCODE_SGE:
+         complement = TRUE;
+         /* fall-through */
+      case TGSI_OPCODE_SLT:
+         ppc_vcmpgtfpx(gen->f, v2, v1, v0); /* v2 = v1 > v0 ? ~0 : 0 */
+         break;
+
+      case TGSI_OPCODE_SLE:
+         complement = TRUE;
+         /* fall-through */
+      case TGSI_OPCODE_SGT:
+         ppc_vcmpgtfpx(gen->f, v2, v0, v1); /* v2 = v0 > v1 ? ~0 : 0 */
+         break;
+      default:
+         assert(0);
+      }
+
+      /* v2 is now {0,0,0,0} or {~0,~0,~0,~0} */
+
+      if (complement)
+         ppc_vandc(gen->f, v2, v_one, v2);    /* v2 = v_one & ~v2 */
+      else
+         ppc_vand(gen->f, v2, v_one, v2);     /* v2 = v_one & v2 */
+
+      STORE(gen, *inst, v2, 0, chan_index);   /* store v2 */
+   }
+
+   ppc_release_vec_register(gen->f, v0);
+   ppc_release_vec_register(gen->f, v1);
+   ppc_release_vec_register(gen->f, v2);
+   ppc_release_vec_register(gen->f, v_one);
+}
+
+
 static void
 emit_dotprod(struct gen_context *gen, struct tgsi_full_instruction *inst)
 {
@@ -1588,6 +1650,14 @@ emit_instruction(struct gen_context *gen,
    case TGSI_OPCODE_MAX:
       emit_binop(gen, inst);
       break;
+   case TGSI_OPCODE_SEQ:
+   case TGSI_OPCODE_SNE:
+   case TGSI_OPCODE_SLT:
+   case TGSI_OPCODE_SGT:
+   case TGSI_OPCODE_SLE:
+   case TGSI_OPCODE_SGE:
+      emit_inequality(gen, inst);
+      break;
    case TGSI_OPCODE_MAD:
    case TGSI_OPCODE_LRP:
       emit_triop(gen, inst);
-- 
cgit v1.2.3


From c6ff870836e7c970f1030e9e0fbdd0cb5df40d29 Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Wed, 22 Oct 2008 15:21:22 -0600
Subject: cell: TGSI->PPC for RSQ, RCP and src register sign modes

---
 src/gallium/auxiliary/tgsi/tgsi_ppc.c | 162 ++++++++++++++++++++++++----------
 1 file changed, 116 insertions(+), 46 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
index 9bf364b8c4a..36377721022 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
@@ -84,11 +84,14 @@
 struct gen_context
 {
    struct ppc_function *f;
-   int inputs_reg;    /**< register pointing to input params */
-   int outputs_reg;   /**< register pointing to output params */
-   int temps_reg;     /**< register pointing to temporary "registers" */
-   int immed_reg;     /**< register pointing to immediates buffer */
-   int const_reg;     /**< register pointing to constants buffer */
+   int inputs_reg;    /**< GP register pointing to input params */
+   int outputs_reg;   /**< GP register pointing to output params */
+   int temps_reg;     /**< GP register pointing to temporary "registers" */
+   int immed_reg;     /**< GP register pointing to immediates buffer */
+   int const_reg;     /**< GP register pointing to constants buffer */
+
+   int one_vec;       /**< vector register with {1.0, 1.0, 1.0, 1.0} */
+   int bit31_vec;     /**< vector register with {1<<31, 1<<31, 1<<31, 1<<31} */
 };
 
 
@@ -1059,6 +1062,35 @@ emit_sub(
 #endif
 
 
+/**
+ * Return index of vector register containing {1.0, 1.0, 1.0, 1.0}.
+ */
+static int
+gen_one_vec(struct gen_context *gen)
+{
+   if (gen->one_vec < 0) {
+      gen->one_vec = ppc_allocate_vec_register(gen->f);
+      ppc_vload_float(gen->f, gen->one_vec, 1.0f);
+   }
+   return gen->one_vec;
+}
+
+/**
+ * Return index of vector register containing {1<<31, 1<<31, 1<<31, 1<<31}.
+ */
+static int
+gen_get_bit31_vec(struct gen_context *gen)
+{
+   if (gen->bit31_vec < 0) {
+      gen->bit31_vec = ppc_allocate_vec_register(gen->f);
+      ppc_vspltisw(gen->f, gen->bit31_vec, -1);
+      ppc_vslw(gen->f, gen->bit31_vec, gen->bit31_vec, gen->bit31_vec);
+   }
+   return gen->bit31_vec;
+}
+
+
+
 /**
  * Register fetch.
  */
@@ -1124,49 +1156,42 @@ emit_fetch(struct gen_context *gen,
          assert( 0 );
       }
       break;
-
    case TGSI_EXTSWIZZLE_ZERO:
-#if 0
-      emit_tempf(
-         func,
-         xmm,
-         TGSI_EXEC_TEMP_00000000_I,
-         TGSI_EXEC_TEMP_00000000_C );
-#endif
+      ppc_vload_float(gen->f, vec_reg, 0.0f);
       break;
-
    case TGSI_EXTSWIZZLE_ONE:
-#if 0
-      emit_tempf(
-         func,
-         xmm,
-         TEMP_ONE_I,
-         TEMP_ONE_C );
-#endif
+      {
+         int one_vec = gen_one_vec(gen);
+         ppc_vecmove(gen->f, vec_reg, one_vec);
+      }
       break;
-
    default:
       assert( 0 );
    }
 
-#if 0
-   switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
-   case TGSI_UTIL_SIGN_CLEAR:
-      emit_abs( func, xmm );
-      break;
-
-   case TGSI_UTIL_SIGN_SET:
-      emit_setsign( func, xmm );
-      break;
-
-   case TGSI_UTIL_SIGN_TOGGLE:
-      emit_neg( func, xmm );
-      break;
-
-   case TGSI_UTIL_SIGN_KEEP:
-      break;
+   {
+      uint sign_op = tgsi_util_get_full_src_register_sign_mode(reg, chan_index);
+      if (sign_op != TGSI_UTIL_SIGN_KEEP) {
+         int bit31_vec = gen_get_bit31_vec(gen);
+
+         switch (sign_op) {
+         case TGSI_UTIL_SIGN_CLEAR:
+            /* vec = vec & ~bit31 */
+            ppc_vandc(gen->f, vec_reg, vec_reg, bit31_vec);
+            break;
+         case TGSI_UTIL_SIGN_SET:
+            /* vec = vec | bit31 */
+            ppc_vor(gen->f, vec_reg, vec_reg, bit31_vec);
+            break;
+         case TGSI_UTIL_SIGN_TOGGLE:
+            /* vec = vec ^ bit31 */
+            ppc_vxor(gen->f, vec_reg, vec_reg, bit31_vec);
+            break;
+         default:
+            assert(0);
+         }
+      }
    }
-#endif
 }
 
 #define FETCH( GEN, INST, VEC_REG, SRC_REG, CHAN ) \
@@ -1409,6 +1434,36 @@ emit_cmp(
 #endif
 
 
+static void
+emit_scalar_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst)
+{
+   int v0 = ppc_allocate_vec_register(gen->f);
+   int v1 = ppc_allocate_vec_register(gen->f);
+   uint chan_index;
+
+   FETCH(gen, *inst, v0, 0, CHAN_X);
+
+   switch (inst->Instruction.Opcode) {
+   case TGSI_OPCODE_RSQ:
+      /* v1 = 1.0 / sqrt(v0) */
+      ppc_vrsqrtefp(gen->f, v1, v0);
+      break;
+   case TGSI_OPCODE_RCP:
+      /* v1 = 1.0 / v0 */
+      ppc_vrefp(gen->f, v1, v0);
+      break;
+   default:
+      assert(0);
+   }
+
+   FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+      STORE(gen, *inst, v1, 0, chan_index);
+   }
+   ppc_release_vec_register(gen->f, v0);
+   ppc_release_vec_register(gen->f, v1);
+}
+
+
 static void
 emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst)
 {
@@ -1504,12 +1559,9 @@ emit_inequality(struct gen_context *gen, struct tgsi_full_instruction *inst)
    int v0 = ppc_allocate_vec_register(gen->f);
    int v1 = ppc_allocate_vec_register(gen->f);
    int v2 = ppc_allocate_vec_register(gen->f);
-   int v_one = ppc_allocate_vec_register(gen->f);
    uint chan_index;
    boolean complement = FALSE;
-
-   /* v_one = splat(1.0) */
-   ppc_vload_float(gen->f, v_one, 1.0f);
+   int one_vec = gen_one_vec(gen);
 
    FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) {
       FETCH(gen, *inst, v0, 0, chan_index);   /* v0 = srcreg[0] */
@@ -1543,9 +1595,9 @@ emit_inequality(struct gen_context *gen, struct tgsi_full_instruction *inst)
       /* v2 is now {0,0,0,0} or {~0,~0,~0,~0} */
 
       if (complement)
-         ppc_vandc(gen->f, v2, v_one, v2);    /* v2 = v_one & ~v2 */
+         ppc_vandc(gen->f, v2, one_vec, v2);    /* v2 = one_vec & ~v2 */
       else
-         ppc_vand(gen->f, v2, v_one, v2);     /* v2 = v_one & v2 */
+         ppc_vand(gen->f, v2, one_vec, v2);     /* v2 = one_vec & v2 */
 
       STORE(gen, *inst, v2, 0, chan_index);   /* store v2 */
    }
@@ -1553,7 +1605,6 @@ emit_inequality(struct gen_context *gen, struct tgsi_full_instruction *inst)
    ppc_release_vec_register(gen->f, v0);
    ppc_release_vec_register(gen->f, v1);
    ppc_release_vec_register(gen->f, v2);
-   ppc_release_vec_register(gen->f, v_one);
 }
 
 
@@ -1630,6 +1681,14 @@ emit_triop(struct gen_context *gen, struct tgsi_full_instruction *inst)
 }
 
 
+/*
+static void
+emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst)
+{
+}
+*/
+
+
 static int
 emit_instruction(struct gen_context *gen,
                  struct tgsi_full_instruction *inst)
@@ -1643,6 +1702,10 @@ emit_instruction(struct gen_context *gen,
    case TGSI_OPCODE_LOGBASE2:
       emit_unaryop(gen, inst);
       break;
+   case TGSI_OPCODE_RSQ:
+   case TGSI_OPCODE_RCP:
+      emit_scalar_unaryop(gen, inst);
+      break;
    case TGSI_OPCODE_ADD:
    case TGSI_OPCODE_SUB:
    case TGSI_OPCODE_MUL:
@@ -1667,6 +1730,11 @@ emit_instruction(struct gen_context *gen,
    case TGSI_OPCODE_DPH:
       emit_dotprod(gen, inst);
       break;
+      /*
+   case TGSI_OPCODE_LIT:
+      emit_lit(gen, inst);
+      break;
+      */
    case TGSI_OPCODE_END:
       /* normal end */
       return 1;
@@ -2715,6 +2783,8 @@ tgsi_emit_ppc(const struct tgsi_token *tokens,
    gen.temps_reg = ppc_reserve_register(func, 5);    /* ... */
    gen.immed_reg = ppc_reserve_register(func, 6);
    gen.const_reg = ppc_reserve_register(func, 7);
+   gen.one_vec = -1;
+   gen.bit31_vec = -1;
 
    emit_prologue(func);
 
-- 
cgit v1.2.3


From 519c2dbed57b3c5e1717a62df5d5f8b908a1acd6 Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Wed, 22 Oct 2008 15:30:00 -0600
Subject: gallium: remove SSE remnants from tgsi_ppc.c

---
 src/gallium/auxiliary/tgsi/tgsi_ppc.c | 2987 +++++----------------------------
 1 file changed, 417 insertions(+), 2570 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
index 36377721022..432ec7459b3 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
@@ -44,14 +44,6 @@
 #include "rtasm/rtasm_ppc.h"
 
 
-/* for 1/sqrt()
- *
- * This costs about 100fps (close to 10%) in gears:
- */
-#define HIGH_PRECISION 1
-
-#define FAST_MATH 1
-
 
 #define FOR_EACH_CHANNEL( CHAN )\
    for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
@@ -95,2452 +87,515 @@ struct gen_context
 };
 
 
-
-#if 0000
-
 /**
- * X86 utility functions.
+ * Return index of vector register containing {1.0, 1.0, 1.0, 1.0}.
  */
-
-static struct x86_reg
-make_xmm(
-   unsigned xmm )
+static int
+gen_one_vec(struct gen_context *gen)
 {
-   return x86_make_reg(
-      file_XMM,
-      (enum x86_reg_name) xmm );
+   if (gen->one_vec < 0) {
+      gen->one_vec = ppc_allocate_vec_register(gen->f);
+      ppc_vload_float(gen->f, gen->one_vec, 1.0f);
+   }
+   return gen->one_vec;
 }
 
 /**
- * X86 register mapping helpers.
+ * Return index of vector register containing {1<<31, 1<<31, 1<<31, 1<<31}.
  */
-
-static struct x86_reg
-get_const_base( void )
-{
-   return x86_make_reg(
-      file_REG32,
-      reg_CX );
-}
-
-static struct x86_reg
-get_input_base( void )
-{
-   return x86_make_reg(
-      file_REG32,
-      reg_AX );
-}
-
-static struct x86_reg
-get_output_base( void )
-{
-   return x86_make_reg(
-      file_REG32,
-      reg_DX );
-}
-
-static struct x86_reg
-get_temp_base( void )
-{
-   return x86_make_reg(
-      file_REG32,
-      reg_BX );
-}
-
-static struct x86_reg
-get_coef_base( void )
+static int
+gen_get_bit31_vec(struct gen_context *gen)
 {
-   return get_output_base();
+   if (gen->bit31_vec < 0) {
+      gen->bit31_vec = ppc_allocate_vec_register(gen->f);
+      ppc_vspltisw(gen->f, gen->bit31_vec, -1);
+      ppc_vslw(gen->f, gen->bit31_vec, gen->bit31_vec, gen->bit31_vec);
+   }
+   return gen->bit31_vec;
 }
 
-static struct x86_reg
-get_immediate_base( void )
-{
-   return x86_make_reg(
-      file_REG32,
-      reg_DI );
-}
 
 
 /**
- * Data access helpers.
+ * Register fetch.
  */
-
-
-static struct x86_reg
-get_immediate(
-   unsigned vec,
-   unsigned chan )
-{
-   return x86_make_disp(
-      get_immediate_base(),
-      (vec * 4 + chan) * 4 );
-}
-
-static struct x86_reg
-get_const(
-   unsigned vec,
-   unsigned chan )
-{
-   return x86_make_disp(
-      get_const_base(),
-      (vec * 4 + chan) * 4 );
-}
-
-static struct x86_reg
-get_input(
-   unsigned vec,
-   unsigned chan )
+static void
+emit_fetch(struct gen_context *gen,
+           unsigned vec_reg,
+           const struct tgsi_full_src_register *reg,
+           const unsigned chan_index)
 {
-   return x86_make_disp(
-      get_input_base(),
-      (vec * 4 + chan) * 16 );
-}
+   uint swizzle = tgsi_util_get_full_src_register_extswizzle(reg, chan_index);
 
-static struct x86_reg
-get_output(
-   unsigned vec,
-   unsigned chan )
-{
-   return x86_make_disp(
-      get_output_base(),
-      (vec * 4 + chan) * 16 );
-}
+   switch (swizzle) {
+   case TGSI_EXTSWIZZLE_X:
+   case TGSI_EXTSWIZZLE_Y:
+   case TGSI_EXTSWIZZLE_Z:
+   case TGSI_EXTSWIZZLE_W:
+      switch (reg->SrcRegister.File) {
+      case TGSI_FILE_INPUT:
+         {
+            int offset_reg = ppc_allocate_register(gen->f);
+            int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16;
+            ppc_li(gen->f, offset_reg, offset);
+            ppc_lvx(gen->f, vec_reg, gen->inputs_reg, offset_reg);
+            ppc_release_register(gen->f, offset_reg);
+         }
+         break;
+      case TGSI_FILE_TEMPORARY:
+         {
+            int offset_reg = ppc_allocate_register(gen->f);
+            int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16;
+            ppc_li(gen->f, offset_reg, offset);
+            ppc_lvx(gen->f, vec_reg, gen->temps_reg, offset_reg);
+            ppc_release_register(gen->f, offset_reg);
+         }
+         break;
+      case TGSI_FILE_IMMEDIATE:
+         {
+            int offset_reg = ppc_allocate_register(gen->f);
+            int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16;
+            ppc_li(gen->f, offset_reg, offset);
+            ppc_lvx(gen->f, vec_reg, gen->immed_reg, offset_reg);
+            ppc_release_register(gen->f, offset_reg);
+         }
+         break;
+      case TGSI_FILE_CONSTANT:
+         {
+            int offset_reg = ppc_allocate_register(gen->f);
+            int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4;
+            ppc_li(gen->f, offset_reg, offset);
+            /* Load 4-byte word into vector register.
+             * The vector slot depends on the effective address we load from.
+             * We know that our constants start at a 16-byte boundary so we
+             * know that 'swizzle' tells us which vector slot will have the
+             * loaded word.  The other vector slots will be undefined.
+             */
+            ppc_lvewx(gen->f, vec_reg, gen->const_reg, offset_reg);
+            /* splat word[swizzle] across the vector reg */
+            ppc_vspltw(gen->f, vec_reg, vec_reg, swizzle);
+            ppc_release_register(gen->f, offset_reg);
+         }
+         break;
+      default:
+         assert( 0 );
+      }
+      break;
+   case TGSI_EXTSWIZZLE_ZERO:
+      ppc_vload_float(gen->f, vec_reg, 0.0f);
+      break;
+   case TGSI_EXTSWIZZLE_ONE:
+      {
+         int one_vec = gen_one_vec(gen);
+         ppc_vecmove(gen->f, vec_reg, one_vec);
+      }
+      break;
+   default:
+      assert( 0 );
+   }
 
-static struct x86_reg
-get_temp(
-   unsigned vec,
-   unsigned chan )
-{
-   return x86_make_disp(
-      get_temp_base(),
-      (vec * 4 + chan) * 16 );
-}
+   {
+      uint sign_op = tgsi_util_get_full_src_register_sign_mode(reg, chan_index);
+      if (sign_op != TGSI_UTIL_SIGN_KEEP) {
+         int bit31_vec = gen_get_bit31_vec(gen);
 
-static struct x86_reg
-get_coef(
-   unsigned vec,
-   unsigned chan,
-   unsigned member )
-{
-   return x86_make_disp(
-      get_coef_base(),
-      ((vec * 3 + member) * 4 + chan) * 4 );
+         switch (sign_op) {
+         case TGSI_UTIL_SIGN_CLEAR:
+            /* vec = vec & ~bit31 */
+            ppc_vandc(gen->f, vec_reg, vec_reg, bit31_vec);
+            break;
+         case TGSI_UTIL_SIGN_SET:
+            /* vec = vec | bit31 */
+            ppc_vor(gen->f, vec_reg, vec_reg, bit31_vec);
+            break;
+         case TGSI_UTIL_SIGN_TOGGLE:
+            /* vec = vec ^ bit31 */
+            ppc_vxor(gen->f, vec_reg, vec_reg, bit31_vec);
+            break;
+         default:
+            assert(0);
+         }
+      }
+   }
 }
 
+#define FETCH( GEN, INST, VEC_REG, SRC_REG, CHAN ) \
+   emit_fetch( GEN, VEC_REG, &(INST).FullSrcRegisters[SRC_REG], CHAN )
 
-static void
-emit_ret(
-   struct x86_function  *func )
-{
-   x86_ret( func );
-}
-
-#endif
 
-/**
- * Data fetch helpers.
- */
 
-#if 00
 /**
- * Copy a shader constant to xmm register
- * \param xmm  the destination xmm register
- * \param vec  the src const buffer index
- * \param chan  src channel to fetch (X, Y, Z or W)
+ * Register store.
  */
 static void
-emit_const(
-   struct x86_function *func,
-   uint xmm,
-   int vec,
-   uint chan,
-   uint indirect,
-   uint indirectFile,
-   int indirectIndex )
+emit_store(struct gen_context *gen,
+           unsigned vec_reg,
+           const struct tgsi_full_dst_register *reg,
+           const struct tgsi_full_instruction *inst,
+           unsigned chan_index)
 {
-   if (indirect) {
-      struct x86_reg r0 = get_input_base();
-      struct x86_reg r1 = get_output_base();
-      uint i;
-
-      assert( indirectFile == TGSI_FILE_ADDRESS );
-      assert( indirectIndex == 0 );
-
-      x86_push( func, r0 );
-      x86_push( func, r1 );
-
-      for (i = 0; i < QUAD_SIZE; i++) {
-         x86_lea( func, r0, get_const( vec, chan ) );
-         x86_mov( func, r1, x86_make_disp( get_temp( TEMP_ADDR, CHAN_X ), i * 4 ) );
-
-         /* Quick hack to multiply by 16 -- need to add SHL to rtasm.
-          */
-         x86_add( func, r1, r1 );
-         x86_add( func, r1, r1 );
-         x86_add( func, r1, r1 );
-         x86_add( func, r1, r1 );
-
-         x86_add( func, r0, r1 );
-         x86_mov( func, r1, x86_deref( r0 ) );
-         x86_mov( func, x86_make_disp( get_temp( TEMP_R0, CHAN_X ), i * 4 ), r1 );
+   switch (reg->DstRegister.File) {
+   case TGSI_FILE_OUTPUT:
+      {
+         int offset_reg = ppc_allocate_register(gen->f);
+         int offset = (reg->DstRegister.Index * 4 + chan_index) * 16;
+         ppc_li(gen->f, offset_reg, offset);
+         ppc_stvx(gen->f, vec_reg, gen->outputs_reg, offset_reg);
+         ppc_release_register(gen->f, offset_reg);
       }
-
-      x86_pop( func, r1 );
-      x86_pop( func, r0 );
-
-      sse_movaps(
+      break;
+   case TGSI_FILE_TEMPORARY:
+      {
+         int offset_reg = ppc_allocate_register(gen->f);
+         int offset = (reg->DstRegister.Index * 4 + chan_index) * 16;
+         ppc_li(gen->f, offset_reg, offset);
+         ppc_stvx(gen->f, vec_reg, gen->temps_reg, offset_reg);
+         ppc_release_register(gen->f, offset_reg);
+      }
+      break;
+#if 0
+   case TGSI_FILE_ADDRESS:
+      emit_addrs(
          func,
-         make_xmm( xmm ),
-         get_temp( TEMP_R0, CHAN_X ) );
+         xmm,
+         reg->DstRegister.Index,
+         chan_index );
+      break;
+#endif
+   default:
+      assert( 0 );
    }
-   else {
-      assert( vec >= 0 );
 
-      sse_movss(
-         func,
-         make_xmm( xmm ),
-         get_const( vec, chan ) );
-      sse_shufps(
-         func,
-         make_xmm( xmm ),
-         make_xmm( xmm ),
-         SHUF( 0, 0, 0, 0 ) );
+#if 0
+   switch( inst->Instruction.Saturate ) {
+   case TGSI_SAT_NONE:
+      break;
+
+   case TGSI_SAT_ZERO_ONE:
+      /* assert( 0 ); */
+      break;
+
+   case TGSI_SAT_MINUS_PLUS_ONE:
+      assert( 0 );
+      break;
    }
+#endif
 }
 
-static void
-emit_immediate(
-   struct x86_function *func,
-   unsigned xmm,
-   unsigned vec,
-   unsigned chan )
-{
-   sse_movss(
-      func,
-      make_xmm( xmm ),
-      get_immediate( vec, chan ) );
-   sse_shufps(
-      func,
-      make_xmm( xmm ),
-      make_xmm( xmm ),
-      SHUF( 0, 0, 0, 0 ) );
-}
 
+#define STORE( GEN, INST, XMM, INDEX, CHAN )\
+   emit_store( GEN, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN )
 
-/**
- * Copy a shader input to xmm register
- * \param xmm  the destination xmm register
- * \param vec  the src input attrib
- * \param chan  src channel to fetch (X, Y, Z or W)
- */
-static void
-emit_inputf(
-   struct x86_function *func,
-   unsigned xmm,
-   unsigned vec,
-   unsigned chan )
-{
-   sse_movups(
-      func,
-      make_xmm( xmm ),
-      get_input( vec, chan ) );
-}
 
-/**
- * Store an xmm register to a shader output
- * \param xmm  the source xmm register
- * \param vec  the dest output attrib
- * \param chan  src dest channel to store (X, Y, Z or W)
- */
-static void
-emit_output(
-   struct x86_function *func,
-   unsigned xmm,
-   unsigned vec,
-   unsigned chan )
-{
-   sse_movups(
-      func,
-      get_output( vec, chan ),
-      make_xmm( xmm ) );
-}
 
-/**
- * Copy a shader temporary to xmm register
- * \param xmm  the destination xmm register
- * \param vec  the src temp register
- * \param chan  src channel to fetch (X, Y, Z or W)
- */
 static void
-emit_tempf(
-   struct x86_function *func,
-   unsigned xmm,
-   unsigned vec,
-   unsigned chan )
+emit_scalar_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst)
 {
-   sse_movaps(
-      func,
-      make_xmm( xmm ),
-      get_temp( vec, chan ) );
-}
+   int v0 = ppc_allocate_vec_register(gen->f);
+   int v1 = ppc_allocate_vec_register(gen->f);
+   uint chan_index;
 
-/**
- * Load an xmm register with an input attrib coefficient (a0, dadx or dady)
- * \param xmm  the destination xmm register
- * \param vec  the src input/attribute coefficient index
- * \param chan  src channel to fetch (X, Y, Z or W)
- * \param member  0=a0, 1=dadx, 2=dady
- */
-static void
-emit_coef(
-   struct x86_function *func,
-   unsigned xmm,
-   unsigned vec,
-   unsigned chan,
-   unsigned member )
-{
-   sse_movss(
-      func,
-      make_xmm( xmm ),
-      get_coef( vec, chan, member ) );
-   sse_shufps(
-      func,
-      make_xmm( xmm ),
-      make_xmm( xmm ),
-      SHUF( 0, 0, 0, 0 ) );
+   FETCH(gen, *inst, v0, 0, CHAN_X);
+
+   switch (inst->Instruction.Opcode) {
+   case TGSI_OPCODE_RSQ:
+      /* v1 = 1.0 / sqrt(v0) */
+      ppc_vrsqrtefp(gen->f, v1, v0);
+      break;
+   case TGSI_OPCODE_RCP:
+      /* v1 = 1.0 / v0 */
+      ppc_vrefp(gen->f, v1, v0);
+      break;
+   default:
+      assert(0);
+   }
+
+   FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+      STORE(gen, *inst, v1, 0, chan_index);
+   }
+   ppc_release_vec_register(gen->f, v0);
+   ppc_release_vec_register(gen->f, v1);
 }
 
-/**
- * Data store helpers.
- */
 
 static void
-emit_inputs(
-   struct x86_function *func,
-   unsigned xmm,
-   unsigned vec,
-   unsigned chan )
-{
-   sse_movups(
-      func,
-      get_input( vec, chan ),
-      make_xmm( xmm ) );
-}
-
-static void
-emit_temps(
-   struct x86_function *func,
-   unsigned xmm,
-   unsigned vec,
-   unsigned chan )
-{
-   sse_movaps(
-      func,
-      get_temp( vec, chan ),
-      make_xmm( xmm ) );
-}
-
-static void
-emit_addrs(
-   struct x86_function *func,
-   unsigned xmm,
-   unsigned vec,
-   unsigned chan )
-{
-   assert( vec == 0 );
-
-   emit_temps(
-      func,
-      xmm,
-      vec + TGSI_EXEC_TEMP_ADDR,
-      chan );
-}
-
-/**
- * Coefficent fetch helpers.
- */
-
-static void
-emit_coef_a0(
-   struct x86_function *func,
-   unsigned xmm,
-   unsigned vec,
-   unsigned chan )
-{
-   emit_coef(
-      func,
-      xmm,
-      vec,
-      chan,
-      0 );
-}
-
-static void
-emit_coef_dadx(
-   struct x86_function *func,
-   unsigned xmm,
-   unsigned vec,
-   unsigned chan )
-{
-   emit_coef(
-      func,
-      xmm,
-      vec,
-      chan,
-      1 );
-}
-
-static void
-emit_coef_dady(
-   struct x86_function *func,
-   unsigned xmm,
-   unsigned vec,
-   unsigned chan )
-{
-   emit_coef(
-      func,
-      xmm,
-      vec,
-      chan,
-      2 );
-}
-#endif
-
-
-/**
- * Function call helpers.
- */
-
-#if 00
-/**
- * NOTE: In gcc, if the destination uses the SSE intrinsics, then it must be 
- * defined with __attribute__((force_align_arg_pointer)), as we do not guarantee
- * that the stack pointer is 16 byte aligned, as expected.
- */
-static void
-emit_func_call_dst(
-   struct x86_function *func,
-   unsigned xmm_save,
-   unsigned xmm_dst,
-   void (PIPE_CDECL *code)() )
+emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst)
 {
-   struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX );
-   unsigned i, n, xmm;
-   unsigned xmm_mask;
-   
-   /* Bitmask of the xmm registers to save */
-   xmm_mask = (1 << xmm_save) - 1;
-   xmm_mask &= ~(1 << xmm_dst);
-
-   sse_movaps(
-      func,
-      get_temp( TEMP_R0, 0 ),
-      make_xmm( xmm_dst ) );
-
-   x86_push(
-      func,
-      x86_make_reg( file_REG32, reg_AX) );
-   x86_push(
-      func,
-      x86_make_reg( file_REG32, reg_CX) );
-   x86_push(
-      func,
-      x86_make_reg( file_REG32, reg_DX) );
-   
-   for(i = 0, n = 0; i < 8; ++i)
-      if(xmm_mask & (1 << i))
-         ++n;
-   
-   x86_sub_imm(
-      func, 
-      x86_make_reg( file_REG32, reg_SP ),
-      n*16);
-
-   for(i = 0, n = 0; i < 8; ++i)
-      if(xmm_mask & (1 << i)) {
-         sse_movups(
-            func,
-            x86_make_disp( x86_make_reg( file_REG32, reg_SP ), n*16 ),
-            make_xmm( xmm ) );
-         ++n;
-      }
-   
-   x86_lea(
-      func,
-      ecx,
-      get_temp( TEMP_R0, 0 ) );
-   
-   x86_push( func, ecx );
-   x86_mov_reg_imm( func, ecx, (unsigned long) code );
-   x86_call( func, ecx );
-   x86_pop(func, ecx );
-   
-   for(i = 0, n = 0; i < 8; ++i)
-      if(xmm_mask & (1 << i)) {
-         sse_movups(
-            func,
-            make_xmm( xmm ),
-            x86_make_disp( x86_make_reg( file_REG32, reg_SP ), n*16 ) );
-         ++n;
+   int v0 = ppc_allocate_vec_register(gen->f);
+   uint chan_index;
+   FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) {
+      FETCH(gen, *inst, 0, 0, chan_index);   /* v0 = srcreg[0] */
+      switch (inst->Instruction.Opcode) {
+      case TGSI_OPCODE_ABS:
+         /* turn off the most significant bit of each vector float word */
+         {
+            int v1 = ppc_allocate_vec_register(gen->f);
+            ppc_vspltisw(gen->f, v1, -1);  /* v1 = {-1, -1, -1, -1} */
+            ppc_vslw(gen->f, v1, v1, v1);  /* v1 = {1<<31, 1<<31, 1<<31, 1<<31} */
+            ppc_vandc(gen->f, v0, v0, v1); /* v0 = v0 & ~v1 */
+            ppc_release_vec_register(gen->f, v1);
+         }
+         break;
+      case TGSI_OPCODE_FLOOR:
+         ppc_vrfim(gen->f, v0, v0);         /* v0 = floor(v0) */
+         break;
+      case TGSI_OPCODE_FRAC:
+         {
+            int v1 = ppc_allocate_vec_register(gen->f);
+            ppc_vrfim(gen->f, v1, v0);         /* v1 = floor(v0) */
+            ppc_vsubfp(gen->f, v0, v0, v1);    /* v0 = v0 - v1 */
+            ppc_release_vec_register(gen->f, v1);
+         }
+         break;
+      case TGSI_OPCODE_EXPBASE2:
+         ppc_vexptefp(gen->f, v0, v0);      /* v0 = 2^v0 */
+         break;
+      case TGSI_OPCODE_LOGBASE2:
+         /* XXX this may be broken! */
+         ppc_vlogefp(gen->f, v0, v0);      /* v0 = log2(v0) */
+         break;
+      case TGSI_OPCODE_MOV:
+         /* nothing */
+         break;
+      default:
+         assert(0);
       }
-   
-   x86_add_imm(
-      func, 
-      x86_make_reg( file_REG32, reg_SP ),
-      n*16);
-
-   /* Restore GP registers in a reverse order.
-    */
-   x86_pop(
-      func,
-      x86_make_reg( file_REG32, reg_DX) );
-   x86_pop(
-      func,
-      x86_make_reg( file_REG32, reg_CX) );
-   x86_pop(
-      func,
-      x86_make_reg( file_REG32, reg_AX) );
-
-   sse_movaps(
-      func,
-      make_xmm( xmm_dst ),
-      get_temp( TEMP_R0, 0 ) );
-}
-
-static void
-emit_func_call_dst_src(
-   struct x86_function *func,
-   unsigned xmm_save, 
-   unsigned xmm_dst,
-   unsigned xmm_src,
-   void (PIPE_CDECL *code)() )
-{
-   sse_movaps(
-      func,
-      get_temp( TEMP_R0, 1 ),
-      make_xmm( xmm_src ) );
-
-   emit_func_call_dst(
-      func,
-      xmm_save,
-      xmm_dst,
-      code );
-}
-
-/*
- * Fast SSE2 implementation of special math functions.
- */
-
-#define POLY0(x, c0) _mm_set1_ps(c0)
-#define POLY1(x, c0, c1) _mm_add_ps(_mm_mul_ps(POLY0(x, c1), x), _mm_set1_ps(c0))
-#define POLY2(x, c0, c1, c2) _mm_add_ps(_mm_mul_ps(POLY1(x, c1, c2), x), _mm_set1_ps(c0))
-#define POLY3(x, c0, c1, c2, c3) _mm_add_ps(_mm_mul_ps(POLY2(x, c1, c2, c3), x), _mm_set1_ps(c0))
-#define POLY4(x, c0, c1, c2, c3, c4) _mm_add_ps(_mm_mul_ps(POLY3(x, c1, c2, c3, c4), x), _mm_set1_ps(c0))
-#define POLY5(x, c0, c1, c2, c3, c4, c5) _mm_add_ps(_mm_mul_ps(POLY4(x, c1, c2, c3, c4, c5), x), _mm_set1_ps(c0))
-
-#define EXP_POLY_DEGREE 3
-#define LOG_POLY_DEGREE 5
-
-/**
- * See http://www.devmaster.net/forums/showthread.php?p=43580
- */
-static INLINE __m128 
-exp2f4(__m128 x)
-{
-   __m128i ipart;
-   __m128 fpart, expipart, expfpart;
-
-   x = _mm_min_ps(x, _mm_set1_ps( 129.00000f));
-   x = _mm_max_ps(x, _mm_set1_ps(-126.99999f));
-
-   /* ipart = int(x - 0.5) */
-   ipart = _mm_cvtps_epi32(_mm_sub_ps(x, _mm_set1_ps(0.5f)));
-
-   /* fpart = x - ipart */
-   fpart = _mm_sub_ps(x, _mm_cvtepi32_ps(ipart));
-
-   /* expipart = (float) (1 << ipart) */
-   expipart = _mm_castsi128_ps(_mm_slli_epi32(_mm_add_epi32(ipart, _mm_set1_epi32(127)), 23));
-
-   /* minimax polynomial fit of 2**x, in range [-0.5, 0.5[ */
-#if EXP_POLY_DEGREE == 5
-   expfpart = POLY5(fpart, 9.9999994e-1f, 6.9315308e-1f, 2.4015361e-1f, 5.5826318e-2f, 8.9893397e-3f, 1.8775767e-3f);
-#elif EXP_POLY_DEGREE == 4
-   expfpart = POLY4(fpart, 1.0000026f, 6.9300383e-1f, 2.4144275e-1f, 5.2011464e-2f, 1.3534167e-2f);
-#elif EXP_POLY_DEGREE == 3
-   expfpart = POLY3(fpart, 9.9992520e-1f, 6.9583356e-1f, 2.2606716e-1f, 7.8024521e-2f);
-#elif EXP_POLY_DEGREE == 2
-   expfpart = POLY2(fpart, 1.0017247f, 6.5763628e-1f, 3.3718944e-1f);
-#else
-#error
-#endif
-
-   return _mm_mul_ps(expipart, expfpart);
-}
-
-/**
- * See http://www.devmaster.net/forums/showthread.php?p=43580
- */
-static INLINE __m128 
-log2f4(__m128 x)
-{
-   __m128i expmask = _mm_set1_epi32(0x7f800000);
-   __m128i mantmask = _mm_set1_epi32(0x007fffff);
-   __m128 one = _mm_set1_ps(1.0f);
-
-   __m128i i = _mm_castps_si128(x);
-
-   /* exp = (float) exponent(x) */
-   __m128 exp = _mm_cvtepi32_ps(_mm_sub_epi32(_mm_srli_epi32(_mm_and_si128(i, expmask), 23), _mm_set1_epi32(127)));
-
-   /* mant = (float) mantissa(x) */
-   __m128 mant = _mm_or_ps(_mm_castsi128_ps(_mm_and_si128(i, mantmask)), one);
-
-   __m128 logmant;
-
-   /* Minimax polynomial fit of log2(x)/(x - 1), for x in range [1, 2[ 
-    * These coefficients can be generate with 
-    * http://www.boost.org/doc/libs/1_36_0/libs/math/doc/sf_and_dist/html/math_toolkit/toolkit/internals2/minimax.html
-    */
-#if LOG_POLY_DEGREE == 6
-   logmant = POLY5(mant, 3.11578814719469302614f, -3.32419399085241980044f, 2.59883907202499966007f, -1.23152682416275988241f, 0.318212422185251071475f, -0.0344359067839062357313f);
-#elif LOG_POLY_DEGREE == 5
-   logmant = POLY4(mant, 2.8882704548164776201f, -2.52074962577807006663f, 1.48116647521213171641f, -0.465725644288844778798f, 0.0596515482674574969533f);
-#elif LOG_POLY_DEGREE == 4
-   logmant = POLY3(mant, 2.61761038894603480148f, -1.75647175389045657003f, 0.688243882994381274313f, -0.107254423828329604454f);
-#elif LOG_POLY_DEGREE == 3
-   logmant = POLY2(mant, 2.28330284476918490682f, -1.04913055217340124191f, 0.204446009836232697516f);
-#else
-#error
-#endif
-
-   /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/
-   logmant = _mm_mul_ps(logmant, _mm_sub_ps(mant, one));
-
-   return _mm_add_ps(logmant, exp);
-}
-
-static INLINE __m128
-powf4(__m128 x, __m128 y)
-{
-   return exp2f4(_mm_mul_ps(log2f4(x), y));
-}
-
-
-/**
- * Low-level instruction translators.
- */
-
-static void
-emit_abs(
-   struct x86_function *func,
-   unsigned xmm )
-{
-   sse_andps(
-      func,
-      make_xmm( xmm ),
-      get_temp(
-         TGSI_EXEC_TEMP_7FFFFFFF_I,
-         TGSI_EXEC_TEMP_7FFFFFFF_C ) );
-}
-
-static void
-emit_add(
-   struct x86_function *func,
-   unsigned xmm_dst,
-   unsigned xmm_src )
-{
-   sse_addps(
-      func,
-      make_xmm( xmm_dst ),
-      make_xmm( xmm_src ) );
-}
-
-static void PIPE_CDECL
-cos4f(
-   float *store )
-{
-   store[0] = cosf( store[0] );
-   store[1] = cosf( store[1] );
-   store[2] = cosf( store[2] );
-   store[3] = cosf( store[3] );
-}
-
-static void
-emit_cos(
-   struct x86_function *func,
-   unsigned xmm_save, 
-   unsigned xmm_dst )
-{
-   emit_func_call_dst(
-      func,
-      xmm_save, 
-      xmm_dst,
-      cos4f );
-}
-
-static void PIPE_CDECL
-#if defined(PIPE_CC_GCC)
-__attribute__((force_align_arg_pointer))
-#endif
-ex24f(
-   float *store )
-{
-   _mm_store_ps(&store[0], exp2f4( _mm_load_ps(&store[0]) ));
-}
-
-static void
-emit_ex2(
-   struct x86_function *func,
-   unsigned xmm_save, 
-   unsigned xmm_dst )
-{
-   emit_func_call_dst(
-      func,
-      xmm_save,
-      xmm_dst,
-      ex24f );
-}
-
-static void
-emit_f2it(
-   struct x86_function *func,
-   unsigned xmm )
-{
-   sse2_cvttps2dq(
-      func,
-      make_xmm( xmm ),
-      make_xmm( xmm ) );
-}
-
-static void PIPE_CDECL
-flr4f(
-   float *store )
-{
-   store[0] = floorf( store[0] );
-   store[1] = floorf( store[1] );
-   store[2] = floorf( store[2] );
-   store[3] = floorf( store[3] );
-}
-
-static void
-emit_flr(
-   struct x86_function *func,
-   unsigned xmm_save, 
-   unsigned xmm_dst )
-{
-   emit_func_call_dst(
-      func,
-      xmm_save,
-      xmm_dst,
-      flr4f );
-}
-
-static void PIPE_CDECL
-frc4f(
-   float *store )
-{
-   store[0] -= floorf( store[0] );
-   store[1] -= floorf( store[1] );
-   store[2] -= floorf( store[2] );
-   store[3] -= floorf( store[3] );
-}
-
-static void
-emit_frc(
-   struct x86_function *func,
-   unsigned xmm_save, 
-   unsigned xmm_dst )
-{
-   emit_func_call_dst(
-      func,
-      xmm_save,
-      xmm_dst,
-      frc4f );
-}
-
-static void PIPE_CDECL
-#if defined(PIPE_CC_GCC)
-__attribute__((force_align_arg_pointer))
-#endif
-lg24f(
-   float *store )
-{
-   _mm_store_ps(&store[0], log2f4( _mm_load_ps(&store[0]) ));
-}
-
-static void
-emit_lg2(
-   struct x86_function *func,
-   unsigned xmm_save, 
-   unsigned xmm_dst )
-{
-   emit_func_call_dst(
-      func,
-      xmm_save,
-      xmm_dst,
-      lg24f );
-}
-
-static void
-emit_MOV(
-   struct x86_function *func,
-   unsigned xmm_dst,
-   unsigned xmm_src )
-{
-   sse_movups(
-      func,
-      make_xmm( xmm_dst ),
-      make_xmm( xmm_src ) );
-}
-
-static void
-emit_mul (struct x86_function *func,
-          unsigned xmm_dst,
-          unsigned xmm_src)
-{
-   sse_mulps(
-      func,
-      make_xmm( xmm_dst ),
-      make_xmm( xmm_src ) );
-}
-
-static void
-emit_neg(
-   struct x86_function *func,
-   unsigned xmm )
-{
-   sse_xorps(
-      func,
-      make_xmm( xmm ),
-      get_temp(
-         TGSI_EXEC_TEMP_80000000_I,
-         TGSI_EXEC_TEMP_80000000_C ) );
-}
-
-static void PIPE_CDECL
-#if defined(PIPE_CC_GCC)
-__attribute__((force_align_arg_pointer))
-#endif
-pow4f(
-   float *store )
-{
-#if 1
-   _mm_store_ps(&store[0], powf4( _mm_load_ps(&store[0]), _mm_load_ps(&store[4]) ));
-#else
-   store[0] = powf( store[0], store[4] );
-   store[1] = powf( store[1], store[5] );
-   store[2] = powf( store[2], store[6] );
-   store[3] = powf( store[3], store[7] );
-#endif
-}
-
-static void
-emit_pow(
-   struct x86_function *func,
-   unsigned xmm_save, 
-   unsigned xmm_dst,
-   unsigned xmm_src )
-{
-   emit_func_call_dst_src(
-      func,
-      xmm_save,
-      xmm_dst,
-      xmm_src,
-      pow4f );
-}
-
-static void
-emit_rcp (
-   struct x86_function *func,
-   unsigned xmm_dst,
-   unsigned xmm_src )
-{
-   /* On Intel CPUs at least, this is only accurate to 12 bits -- not
-    * good enough.  Need to either emit a proper divide or use the
-    * iterative technique described below in emit_rsqrt().
-    */
-   sse2_rcpps(
-      func,
-      make_xmm( xmm_dst ),
-      make_xmm( xmm_src ) );
-}
-
-static void
-emit_rsqrt(
-   struct x86_function *func,
-   unsigned xmm_dst,
-   unsigned xmm_src )
-{
-#if HIGH_PRECISION
-   /* Although rsqrtps() and rcpps() are low precision on some/all SSE
-    * implementations, it is possible to improve its precision at
-    * fairly low cost, using a newton/raphson step, as below:
-    * 
-    * x1 = 2 * rcpps(a) - a * rcpps(a) * rcpps(a)
-    * x1 = 0.5 * rsqrtps(a) * [3.0 - (a * rsqrtps(a))* rsqrtps(a)]
-    *
-    * See: http://softwarecommunity.intel.com/articles/eng/1818.htm
-    */
-   {
-      struct x86_reg dst = make_xmm( xmm_dst );
-      struct x86_reg src = make_xmm( xmm_src );
-      struct x86_reg tmp0 = make_xmm( 2 );
-      struct x86_reg tmp1 = make_xmm( 3 );
-
-      assert( xmm_dst != xmm_src );
-      assert( xmm_dst != 2 && xmm_dst != 3 );
-      assert( xmm_src != 2 && xmm_src != 3 );
-
-      sse_movaps(  func, dst,  get_temp( TGSI_EXEC_TEMP_HALF_I, TGSI_EXEC_TEMP_HALF_C ) );
-      sse_movaps(  func, tmp0, get_temp( TGSI_EXEC_TEMP_THREE_I, TGSI_EXEC_TEMP_THREE_C ) );
-      sse_rsqrtps( func, tmp1, src  );
-      sse_mulps(   func, src,  tmp1 );
-      sse_mulps(   func, dst,  tmp1 );
-      sse_mulps(   func, src,  tmp1 );
-      sse_subps(   func, tmp0, src  );
-      sse_mulps(   func, dst,  tmp0 );
+      STORE(gen, *inst, v0, 0, chan_index);   /* store v0 */
    }
-#else
-   /* On Intel CPUs at least, this is only accurate to 12 bits -- not
-    * good enough.
-    */
-   sse_rsqrtps(
-      func,
-      make_xmm( xmm_dst ),
-      make_xmm( xmm_src ) );
-#endif
-}
-
-static void
-emit_setsign(
-   struct x86_function *func,
-   unsigned xmm )
-{
-   sse_orps(
-      func,
-      make_xmm( xmm ),
-      get_temp(
-         TGSI_EXEC_TEMP_80000000_I,
-         TGSI_EXEC_TEMP_80000000_C ) );
-}
-
-static void PIPE_CDECL
-sin4f(
-   float *store )
-{
-   store[0] = sinf( store[0] );
-   store[1] = sinf( store[1] );
-   store[2] = sinf( store[2] );
-   store[3] = sinf( store[3] );
+   ppc_release_vec_register(gen->f, v0);
 }
 
-static void
-emit_sin (struct x86_function *func,
-          unsigned xmm_save, 
-          unsigned xmm_dst)
-{
-   emit_func_call_dst(
-      func,
-      xmm_save,
-      xmm_dst,
-      sin4f );
-}
 
 static void
-emit_sub(
-   struct x86_function *func,
-   unsigned xmm_dst,
-   unsigned xmm_src )
+emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst)
 {
-   sse_subps(
-      func,
-      make_xmm( xmm_dst ),
-      make_xmm( xmm_src ) );
+   int v0 = ppc_allocate_vec_register(gen->f);
+   int v1 = ppc_allocate_vec_register(gen->f);
+   int v2 = ppc_allocate_vec_register(gen->f);
+   uint chan_index;
+   FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) {
+      FETCH(gen, *inst, v0, 0, chan_index);   /* v0 = srcreg[0] */
+      FETCH(gen, *inst, v1, 1, chan_index);   /* v1 = srcreg[1] */
+      switch (inst->Instruction.Opcode) {
+      case TGSI_OPCODE_ADD:
+         ppc_vaddfp(gen->f, v2, v0, v1);
+         break;
+      case TGSI_OPCODE_SUB:
+         ppc_vsubfp(gen->f, v2, v0, v1);
+         break;
+      case TGSI_OPCODE_MUL:
+         ppc_vxor(gen->f, v2, v2, v2);        /* v2 = {0, 0, 0, 0} */
+         ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v0 */
+         break;
+      case TGSI_OPCODE_MIN:
+         ppc_vminfp(gen->f, v2, v0, v1);
+         break;
+      case TGSI_OPCODE_MAX:
+         ppc_vmaxfp(gen->f, v2, v0, v1);
+         break;
+      default:
+         assert(0);
+      }
+      STORE(gen, *inst, v2, 0, chan_index);   /* store v2 */
+   }
+   ppc_release_vec_register(gen->f, v0);
+   ppc_release_vec_register(gen->f, v1);
+   ppc_release_vec_register(gen->f, v2);
 }
-#endif
 
 
 /**
- * Return index of vector register containing {1.0, 1.0, 1.0, 1.0}.
+ * Vector comparisons, resulting in 1.0 or 0.0 values.
  */
-static int
-gen_one_vec(struct gen_context *gen)
-{
-   if (gen->one_vec < 0) {
-      gen->one_vec = ppc_allocate_vec_register(gen->f);
-      ppc_vload_float(gen->f, gen->one_vec, 1.0f);
-   }
-   return gen->one_vec;
-}
-
-/**
- * Return index of vector register containing {1<<31, 1<<31, 1<<31, 1<<31}.
- */
-static int
-gen_get_bit31_vec(struct gen_context *gen)
-{
-   if (gen->bit31_vec < 0) {
-      gen->bit31_vec = ppc_allocate_vec_register(gen->f);
-      ppc_vspltisw(gen->f, gen->bit31_vec, -1);
-      ppc_vslw(gen->f, gen->bit31_vec, gen->bit31_vec, gen->bit31_vec);
-   }
-   return gen->bit31_vec;
-}
-
-
-
-/**
- * Register fetch.
- */
-static void
-emit_fetch(struct gen_context *gen,
-           unsigned vec_reg,
-           const struct tgsi_full_src_register *reg,
-           const unsigned chan_index)
-{
-   uint swizzle = tgsi_util_get_full_src_register_extswizzle(reg, chan_index);
-
-   switch (swizzle) {
-   case TGSI_EXTSWIZZLE_X:
-   case TGSI_EXTSWIZZLE_Y:
-   case TGSI_EXTSWIZZLE_Z:
-   case TGSI_EXTSWIZZLE_W:
-      switch (reg->SrcRegister.File) {
-      case TGSI_FILE_INPUT:
-         {
-            int offset_reg = ppc_allocate_register(gen->f);
-            int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16;
-            ppc_li(gen->f, offset_reg, offset);
-            ppc_lvx(gen->f, vec_reg, gen->inputs_reg, offset_reg);
-            ppc_release_register(gen->f, offset_reg);
-         }
-         break;
-      case TGSI_FILE_TEMPORARY:
-         {
-            int offset_reg = ppc_allocate_register(gen->f);
-            int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16;
-            ppc_li(gen->f, offset_reg, offset);
-            ppc_lvx(gen->f, vec_reg, gen->temps_reg, offset_reg);
-            ppc_release_register(gen->f, offset_reg);
-         }
-         break;
-      case TGSI_FILE_IMMEDIATE:
-         {
-            int offset_reg = ppc_allocate_register(gen->f);
-            int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16;
-            ppc_li(gen->f, offset_reg, offset);
-            ppc_lvx(gen->f, vec_reg, gen->immed_reg, offset_reg);
-            ppc_release_register(gen->f, offset_reg);
-         }
-         break;
-      case TGSI_FILE_CONSTANT:
-         {
-            int offset_reg = ppc_allocate_register(gen->f);
-            int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4;
-            ppc_li(gen->f, offset_reg, offset);
-            /* Load 4-byte word into vector register.
-             * The vector slot depends on the effective address we load from.
-             * We know that our constants start at a 16-byte boundary so we
-             * know that 'swizzle' tells us which vector slot will have the
-             * loaded word.  The other vector slots will be undefined.
-             */
-            ppc_lvewx(gen->f, vec_reg, gen->const_reg, offset_reg);
-            /* splat word[swizzle] across the vector reg */
-            ppc_vspltw(gen->f, vec_reg, vec_reg, swizzle);
-            ppc_release_register(gen->f, offset_reg);
-         }
-         break;
-      default:
-         assert( 0 );
-      }
-      break;
-   case TGSI_EXTSWIZZLE_ZERO:
-      ppc_vload_float(gen->f, vec_reg, 0.0f);
-      break;
-   case TGSI_EXTSWIZZLE_ONE:
-      {
-         int one_vec = gen_one_vec(gen);
-         ppc_vecmove(gen->f, vec_reg, one_vec);
-      }
-      break;
-   default:
-      assert( 0 );
-   }
-
-   {
-      uint sign_op = tgsi_util_get_full_src_register_sign_mode(reg, chan_index);
-      if (sign_op != TGSI_UTIL_SIGN_KEEP) {
-         int bit31_vec = gen_get_bit31_vec(gen);
-
-         switch (sign_op) {
-         case TGSI_UTIL_SIGN_CLEAR:
-            /* vec = vec & ~bit31 */
-            ppc_vandc(gen->f, vec_reg, vec_reg, bit31_vec);
-            break;
-         case TGSI_UTIL_SIGN_SET:
-            /* vec = vec | bit31 */
-            ppc_vor(gen->f, vec_reg, vec_reg, bit31_vec);
-            break;
-         case TGSI_UTIL_SIGN_TOGGLE:
-            /* vec = vec ^ bit31 */
-            ppc_vxor(gen->f, vec_reg, vec_reg, bit31_vec);
-            break;
-         default:
-            assert(0);
-         }
-      }
-   }
-}
-
-#define FETCH( GEN, INST, VEC_REG, SRC_REG, CHAN ) \
-   emit_fetch( GEN, VEC_REG, &(INST).FullSrcRegisters[SRC_REG], CHAN )
-
-
-
-/**
- * Register store.
- */
-static void
-emit_store(struct gen_context *gen,
-           unsigned vec_reg,
-           const struct tgsi_full_dst_register *reg,
-           const struct tgsi_full_instruction *inst,
-           unsigned chan_index)
-{
-   switch (reg->DstRegister.File) {
-   case TGSI_FILE_OUTPUT:
-      {
-         int offset_reg = ppc_allocate_register(gen->f);
-         int offset = (reg->DstRegister.Index * 4 + chan_index) * 16;
-         ppc_li(gen->f, offset_reg, offset);
-         ppc_stvx(gen->f, vec_reg, gen->outputs_reg, offset_reg);
-         ppc_release_register(gen->f, offset_reg);
-      }
-      break;
-   case TGSI_FILE_TEMPORARY:
-      {
-         int offset_reg = ppc_allocate_register(gen->f);
-         int offset = (reg->DstRegister.Index * 4 + chan_index) * 16;
-         ppc_li(gen->f, offset_reg, offset);
-         ppc_stvx(gen->f, vec_reg, gen->temps_reg, offset_reg);
-         ppc_release_register(gen->f, offset_reg);
-      }
-      break;
-#if 0
-   case TGSI_FILE_ADDRESS:
-      emit_addrs(
-         func,
-         xmm,
-         reg->DstRegister.Index,
-         chan_index );
-      break;
-#endif
-   default:
-      assert( 0 );
-   }
-
-#if 0
-   switch( inst->Instruction.Saturate ) {
-   case TGSI_SAT_NONE:
-      break;
-
-   case TGSI_SAT_ZERO_ONE:
-      /* assert( 0 ); */
-      break;
-
-   case TGSI_SAT_MINUS_PLUS_ONE:
-      assert( 0 );
-      break;
-   }
-#endif
-}
-
-
-#define STORE( GEN, INST, XMM, INDEX, CHAN )\
-   emit_store( GEN, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN )
-
-
-
-#if 000
-/**
- * High-level instruction translators.
- */
-
-static void
-emit_kil(
-   struct x86_function *func,
-   const struct tgsi_full_src_register *reg )
-{
-   unsigned uniquemask;
-   unsigned registers[4];
-   unsigned nextregister = 0;
-   unsigned firstchan = ~0;
-   unsigned chan_index;
-
-   /* This mask stores component bits that were already tested. Note that
-    * we test if the value is less than zero, so 1.0 and 0.0 need not to be
-    * tested. */
-   uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
-
-   FOR_EACH_CHANNEL( chan_index ) {
-      unsigned swizzle;
-
-      /* unswizzle channel */
-      swizzle = tgsi_util_get_full_src_register_extswizzle(
-         reg,
-         chan_index );
-
-      /* check if the component has not been already tested */
-      if( !(uniquemask & (1 << swizzle)) ) {
-         uniquemask |= 1 << swizzle;
-
-         /* allocate register */
-         registers[chan_index] = nextregister;
-         emit_fetch(
-            func,
-            nextregister,
-            reg,
-            chan_index );
-         nextregister++;
-
-         /* mark the first channel used */
-         if( firstchan == ~0 ) {
-            firstchan = chan_index;
-         }
-      }
-   }
-
-   x86_push(
-      func,
-      x86_make_reg( file_REG32, reg_AX ) );
-   x86_push(
-      func,
-      x86_make_reg( file_REG32, reg_DX ) );
-
-   FOR_EACH_CHANNEL( chan_index ) {
-      if( uniquemask & (1 << chan_index) ) {
-         sse_cmpps(
-            func,
-            make_xmm( registers[chan_index] ),
-            get_temp(
-               TGSI_EXEC_TEMP_00000000_I,
-               TGSI_EXEC_TEMP_00000000_C ),
-            cc_LessThan );
-
-         if( chan_index == firstchan ) {
-            sse_pmovmskb(
-               func,
-               x86_make_reg( file_REG32, reg_AX ),
-               make_xmm( registers[chan_index] ) );
-         }
-         else {
-            sse_pmovmskb(
-               func,
-               x86_make_reg( file_REG32, reg_DX ),
-               make_xmm( registers[chan_index] ) );
-            x86_or(
-               func,
-               x86_make_reg( file_REG32, reg_AX ),
-               x86_make_reg( file_REG32, reg_DX ) );
-         }
-      }
-   }
-
-   x86_or(
-      func,
-      get_temp(
-         TGSI_EXEC_TEMP_KILMASK_I,
-         TGSI_EXEC_TEMP_KILMASK_C ),
-      x86_make_reg( file_REG32, reg_AX ) );
-
-   x86_pop(
-      func,
-      x86_make_reg( file_REG32, reg_DX ) );
-   x86_pop(
-      func,
-      x86_make_reg( file_REG32, reg_AX ) );
-}
-
-
-static void
-emit_kilp(
-   struct x86_function *func )
-{
-   /* XXX todo / fix me */
-}
-
-
-static void
-emit_setcc(
-   struct x86_function *func,
-   struct tgsi_full_instruction *inst,
-   enum sse_cc cc )
-{
-   unsigned chan_index;
-
-   FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-      FETCH( func, *inst, 0, 0, chan_index );
-      FETCH( func, *inst, 1, 1, chan_index );
-      sse_cmpps(
-         func,
-         make_xmm( 0 ),
-         make_xmm( 1 ),
-         cc );
-      sse_andps(
-         func,
-         make_xmm( 0 ),
-         get_temp(
-            TEMP_ONE_I,
-            TEMP_ONE_C ) );
-      STORE( func, *inst, 0, 0, chan_index );
-   }
-}
-
-static void
-emit_cmp(
-   struct x86_function *func,
-   struct tgsi_full_instruction *inst )
-{
-   unsigned chan_index;
-
-   FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-      FETCH( func, *inst, 0, 0, chan_index );
-      FETCH( func, *inst, 1, 1, chan_index );
-      FETCH( func, *inst, 2, 2, chan_index );
-      sse_cmpps(
-         func,
-         make_xmm( 0 ),
-         get_temp(
-            TGSI_EXEC_TEMP_00000000_I,
-            TGSI_EXEC_TEMP_00000000_C ),
-         cc_LessThan );
-      sse_andps(
-         func,
-         make_xmm( 1 ),
-         make_xmm( 0 ) );
-      sse_andnps(
-         func,
-         make_xmm( 0 ),
-         make_xmm( 2 ) );
-      sse_orps(
-         func,
-         make_xmm( 0 ),
-         make_xmm( 1 ) );
-      STORE( func, *inst, 0, 0, chan_index );
-   }
-}
-#endif
-
-
-static void
-emit_scalar_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst)
+static void
+emit_inequality(struct gen_context *gen, struct tgsi_full_instruction *inst)
 {
    int v0 = ppc_allocate_vec_register(gen->f);
-   int v1 = ppc_allocate_vec_register(gen->f);
-   uint chan_index;
-
-   FETCH(gen, *inst, v0, 0, CHAN_X);
-
-   switch (inst->Instruction.Opcode) {
-   case TGSI_OPCODE_RSQ:
-      /* v1 = 1.0 / sqrt(v0) */
-      ppc_vrsqrtefp(gen->f, v1, v0);
-      break;
-   case TGSI_OPCODE_RCP:
-      /* v1 = 1.0 / v0 */
-      ppc_vrefp(gen->f, v1, v0);
-      break;
-   default:
-      assert(0);
-   }
-
-   FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-      STORE(gen, *inst, v1, 0, chan_index);
-   }
-   ppc_release_vec_register(gen->f, v0);
-   ppc_release_vec_register(gen->f, v1);
-}
-
-
-static void
-emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst)
-{
-   int v0 = ppc_allocate_vec_register(gen->f);
-   uint chan_index;
-   FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) {
-      FETCH(gen, *inst, 0, 0, chan_index);   /* v0 = srcreg[0] */
-      switch (inst->Instruction.Opcode) {
-      case TGSI_OPCODE_ABS:
-         /* turn off the most significant bit of each vector float word */
-         {
-            int v1 = ppc_allocate_vec_register(gen->f);
-            ppc_vspltisw(gen->f, v1, -1);  /* v1 = {-1, -1, -1, -1} */
-            ppc_vslw(gen->f, v1, v1, v1);  /* v1 = {1<<31, 1<<31, 1<<31, 1<<31} */
-            ppc_vandc(gen->f, v0, v0, v1); /* v0 = v0 & ~v1 */
-            ppc_release_vec_register(gen->f, v1);
-         }
-         break;
-      case TGSI_OPCODE_FLOOR:
-         ppc_vrfim(gen->f, v0, v0);         /* v0 = floor(v0) */
-         break;
-      case TGSI_OPCODE_FRAC:
-         {
-            int v1 = ppc_allocate_vec_register(gen->f);
-            ppc_vrfim(gen->f, v1, v0);         /* v1 = floor(v0) */
-            ppc_vsubfp(gen->f, v0, v0, v1);    /* v0 = v0 - v1 */
-            ppc_release_vec_register(gen->f, v1);
-         }
-         break;
-      case TGSI_OPCODE_EXPBASE2:
-         ppc_vexptefp(gen->f, v0, v0);      /* v0 = 2^v0 */
-         break;
-      case TGSI_OPCODE_LOGBASE2:
-         /* XXX this may be broken! */
-         ppc_vlogefp(gen->f, v0, v0);      /* v0 = log2(v0) */
-         break;
-      case TGSI_OPCODE_MOV:
-         /* nothing */
-         break;
-      default:
-         assert(0);
-      }
-      STORE(gen, *inst, v0, 0, chan_index);   /* store v0 */
-   }
-   ppc_release_vec_register(gen->f, v0);
-}
-
-
-static void
-emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst)
-{
-   int v0 = ppc_allocate_vec_register(gen->f);
-   int v1 = ppc_allocate_vec_register(gen->f);
-   int v2 = ppc_allocate_vec_register(gen->f);
-   uint chan_index;
-   FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) {
-      FETCH(gen, *inst, v0, 0, chan_index);   /* v0 = srcreg[0] */
-      FETCH(gen, *inst, v1, 1, chan_index);   /* v1 = srcreg[1] */
-      switch (inst->Instruction.Opcode) {
-      case TGSI_OPCODE_ADD:
-         ppc_vaddfp(gen->f, v2, v0, v1);
-         break;
-      case TGSI_OPCODE_SUB:
-         ppc_vsubfp(gen->f, v2, v0, v1);
-         break;
-      case TGSI_OPCODE_MUL:
-         ppc_vxor(gen->f, v2, v2, v2);        /* v2 = {0, 0, 0, 0} */
-         ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v0 */
-         break;
-      case TGSI_OPCODE_MIN:
-         ppc_vminfp(gen->f, v2, v0, v1);
-         break;
-      case TGSI_OPCODE_MAX:
-         ppc_vmaxfp(gen->f, v2, v0, v1);
-         break;
-      default:
-         assert(0);
-      }
-      STORE(gen, *inst, v2, 0, chan_index);   /* store v2 */
-   }
-   ppc_release_vec_register(gen->f, v0);
-   ppc_release_vec_register(gen->f, v1);
-   ppc_release_vec_register(gen->f, v2);
-}
-
-
-/**
- * Vector comparisons, resulting in 1.0 or 0.0 values.
- */
-static void
-emit_inequality(struct gen_context *gen, struct tgsi_full_instruction *inst)
-{
-   int v0 = ppc_allocate_vec_register(gen->f);
-   int v1 = ppc_allocate_vec_register(gen->f);
-   int v2 = ppc_allocate_vec_register(gen->f);
-   uint chan_index;
-   boolean complement = FALSE;
-   int one_vec = gen_one_vec(gen);
-
-   FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) {
-      FETCH(gen, *inst, v0, 0, chan_index);   /* v0 = srcreg[0] */
-      FETCH(gen, *inst, v1, 1, chan_index);   /* v1 = srcreg[1] */
-
-      switch (inst->Instruction.Opcode) {
-      case TGSI_OPCODE_SNE:
-         complement = TRUE;
-         /* fall-through */
-      case TGSI_OPCODE_SEQ:
-         ppc_vcmpeqfpx(gen->f, v2, v0, v1); /* v2 = v0 == v1 ? ~0 : 0 */
-         break;
-
-      case TGSI_OPCODE_SGE:
-         complement = TRUE;
-         /* fall-through */
-      case TGSI_OPCODE_SLT:
-         ppc_vcmpgtfpx(gen->f, v2, v1, v0); /* v2 = v1 > v0 ? ~0 : 0 */
-         break;
-
-      case TGSI_OPCODE_SLE:
-         complement = TRUE;
-         /* fall-through */
-      case TGSI_OPCODE_SGT:
-         ppc_vcmpgtfpx(gen->f, v2, v0, v1); /* v2 = v0 > v1 ? ~0 : 0 */
-         break;
-      default:
-         assert(0);
-      }
-
-      /* v2 is now {0,0,0,0} or {~0,~0,~0,~0} */
-
-      if (complement)
-         ppc_vandc(gen->f, v2, one_vec, v2);    /* v2 = one_vec & ~v2 */
-      else
-         ppc_vand(gen->f, v2, one_vec, v2);     /* v2 = one_vec & v2 */
-
-      STORE(gen, *inst, v2, 0, chan_index);   /* store v2 */
-   }
-
-   ppc_release_vec_register(gen->f, v0);
-   ppc_release_vec_register(gen->f, v1);
-   ppc_release_vec_register(gen->f, v2);
-}
-
-
-static void
-emit_dotprod(struct gen_context *gen, struct tgsi_full_instruction *inst)
-{
-   int v0 = ppc_allocate_vec_register(gen->f);
-   int v1 = ppc_allocate_vec_register(gen->f);
-   int v2 = ppc_allocate_vec_register(gen->f);
-   uint chan_index;
-
-   ppc_vxor(gen->f, v2, v2, v2);           /* v2 = {0, 0, 0, 0} */
-
-   FETCH(gen, *inst, v0, 0, CHAN_X);       /* v0 = src0.XXXX */
-   FETCH(gen, *inst, v1, 1, CHAN_X);       /* v1 = src1.XXXX */
-   ppc_vmaddfp(gen->f, v2, v0, v1, v2);    /* v2 = v0 * v1 + v2 */
-
-   FETCH(gen, *inst, v0, 0, CHAN_Y);       /* v0 = src0.YYYY */
-   FETCH(gen, *inst, v1, 1, CHAN_Y);       /* v1 = src1.YYYY */
-   ppc_vmaddfp(gen->f, v2, v0, v1, v2);    /* v2 = v0 * v1 + v2 */
-
-   FETCH(gen, *inst, v0, 0, CHAN_Z);       /* v0 = src0.ZZZZ */
-   FETCH(gen, *inst, v1, 1, CHAN_Z);       /* v1 = src1.ZZZZ */
-   ppc_vmaddfp(gen->f, v2, v0, v1, v2);    /* v2 = v0 * v1 + v2 */
-
-   if (inst->Instruction.Opcode == TGSI_OPCODE_DP4) {
-      FETCH(gen, *inst, v0, 0, CHAN_W);    /* v0 = src0.WWWW */
-      FETCH(gen, *inst, v1, 1, CHAN_W);    /* v1 = src1.WWWW */
-      ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */
-   }
-   else if (inst->Instruction.Opcode == TGSI_OPCODE_DPH) {
-      FETCH(gen, *inst, v1, 1, CHAN_W);    /* v1 = src1.WWWW */
-      ppc_vaddfp(gen->f, v2, v2, v1);      /* v2 = v2 + v1 */
-   }
-
-   FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) {
-      STORE(gen, *inst, v2, 0, chan_index);  /* store v2 */
-   }
-   ppc_release_vec_register(gen->f, v0);
-   ppc_release_vec_register(gen->f, v1);
-   ppc_release_vec_register(gen->f, v2);
-}
-
-
-static void
-emit_triop(struct gen_context *gen, struct tgsi_full_instruction *inst)
-{
-   int v0 = ppc_allocate_vec_register(gen->f);
-   int v1 = ppc_allocate_vec_register(gen->f);
-   int v2 = ppc_allocate_vec_register(gen->f);
-   int v3 = ppc_allocate_vec_register(gen->f);
-   uint chan_index;
-   FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) {
-      FETCH(gen, *inst, v0, 0, chan_index);   /* v0 = srcreg[0] */
-      FETCH(gen, *inst, v1, 1, chan_index);   /* v1 = srcreg[1] */
-      FETCH(gen, *inst, v2, 2, chan_index);   /* v2 = srcreg[2] */
-      switch (inst->Instruction.Opcode) {
-      case TGSI_OPCODE_MAD:
-         ppc_vmaddfp(gen->f, v3, v0, v1, v2);   /* v3 = v0 * v1 + v2 */
-         break;
-      case TGSI_OPCODE_LRP:
-         ppc_vsubfp(gen->f, v3, v1, v2);        /* v3 = v1 - v2 */
-         ppc_vmaddfp(gen->f, v3, v0, v3, v2);   /* v3 = v0 * v3 + v2 */
-         break;
-      default:
-         assert(0);
-      }
-      STORE(gen, *inst, v3, 0, chan_index);   /* store v3 */
-   }
-   ppc_release_vec_register(gen->f, v0);
-   ppc_release_vec_register(gen->f, v1);
-   ppc_release_vec_register(gen->f, v2);
-   ppc_release_vec_register(gen->f, v3);
-}
-
-
-/*
-static void
-emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst)
-{
-}
-*/
-
-
-static int
-emit_instruction(struct gen_context *gen,
-                 struct tgsi_full_instruction *inst)
-{
-   switch (inst->Instruction.Opcode) {
-   case TGSI_OPCODE_MOV:
-   case TGSI_OPCODE_ABS:
-   case TGSI_OPCODE_FLOOR:
-   case TGSI_OPCODE_FRAC:
-   case TGSI_OPCODE_EXPBASE2:
-   case TGSI_OPCODE_LOGBASE2:
-      emit_unaryop(gen, inst);
-      break;
-   case TGSI_OPCODE_RSQ:
-   case TGSI_OPCODE_RCP:
-      emit_scalar_unaryop(gen, inst);
-      break;
-   case TGSI_OPCODE_ADD:
-   case TGSI_OPCODE_SUB:
-   case TGSI_OPCODE_MUL:
-   case TGSI_OPCODE_MIN:
-   case TGSI_OPCODE_MAX:
-      emit_binop(gen, inst);
-      break;
-   case TGSI_OPCODE_SEQ:
-   case TGSI_OPCODE_SNE:
-   case TGSI_OPCODE_SLT:
-   case TGSI_OPCODE_SGT:
-   case TGSI_OPCODE_SLE:
-   case TGSI_OPCODE_SGE:
-      emit_inequality(gen, inst);
-      break;
-   case TGSI_OPCODE_MAD:
-   case TGSI_OPCODE_LRP:
-      emit_triop(gen, inst);
-      break;
-   case TGSI_OPCODE_DP3:
-   case TGSI_OPCODE_DP4:
-   case TGSI_OPCODE_DPH:
-      emit_dotprod(gen, inst);
-      break;
-      /*
-   case TGSI_OPCODE_LIT:
-      emit_lit(gen, inst);
-      break;
-      */
-   case TGSI_OPCODE_END:
-      /* normal end */
-      return 1;
-   default:
-      return 0;
-   }
-
-#if 0
-   unsigned chan_index;
-
-   switch (inst->Instruction.Opcode) {
-   case TGSI_OPCODE_ARL:
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( func, *inst, 0, 0, chan_index );
-         emit_f2it( func, 0 );
-         STORE( func, *inst, 0, 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_MOV:
-   case TGSI_OPCODE_SWZ:
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( func, *inst, 0, 0, chan_index );
-         STORE( func, *inst, 0, 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_LIT:
-      if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
-          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
-         emit_tempf(
-            func,
-            0,
-            TEMP_ONE_I,
-            TEMP_ONE_C);
-         if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
-            STORE( func, *inst, 0, 0, CHAN_X );
-         }
-         if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
-            STORE( func, *inst, 0, 0, CHAN_W );
-         }
-      }
-      if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
-          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
-         if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
-            FETCH( func, *inst, 0, 0, CHAN_X );
-            sse_maxps(
-               func,
-               make_xmm( 0 ),
-               get_temp(
-                  TGSI_EXEC_TEMP_00000000_I,
-                  TGSI_EXEC_TEMP_00000000_C ) );
-            STORE( func, *inst, 0, 0, CHAN_Y );
-         }
-         if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
-            /* XMM[1] = SrcReg[0].yyyy */
-            FETCH( func, *inst, 1, 0, CHAN_Y );
-            /* XMM[1] = max(XMM[1], 0) */
-            sse_maxps(
-               func,
-               make_xmm( 1 ),
-               get_temp(
-                  TGSI_EXEC_TEMP_00000000_I,
-                  TGSI_EXEC_TEMP_00000000_C ) );
-            /* XMM[2] = SrcReg[0].wwww */
-            FETCH( func, *inst, 2, 0, CHAN_W );
-            /* XMM[2] = min(XMM[2], 128.0) */
-            sse_minps(
-               func,
-               make_xmm( 2 ),
-               get_temp(
-                  TGSI_EXEC_TEMP_128_I,
-                  TGSI_EXEC_TEMP_128_C ) );
-            /* XMM[2] = max(XMM[2], -128.0) */
-            sse_maxps(
-               func,
-               make_xmm( 2 ),
-               get_temp(
-                  TGSI_EXEC_TEMP_MINUS_128_I,
-                  TGSI_EXEC_TEMP_MINUS_128_C ) );
-            emit_pow( func, 3, 1, 2 );
-            FETCH( func, *inst, 0, 0, CHAN_X );
-            sse_xorps(
-               func,
-               make_xmm( 2 ),
-               make_xmm( 2 ) );
-            sse_cmpps(
-               func,
-               make_xmm( 2 ),
-               make_xmm( 0 ),
-               cc_LessThanEqual );
-            sse_andps(
-               func,
-               make_xmm( 2 ),
-               make_xmm( 1 ) );
-            STORE( func, *inst, 2, 0, CHAN_Z );
-         }
-      }
-      break;
-
-   case TGSI_OPCODE_RCP:
-   /* TGSI_OPCODE_RECIP */
-      FETCH( func, *inst, 0, 0, CHAN_X );
-      emit_rcp( func, 0, 0 );
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         STORE( func, *inst, 0, 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_RSQ:
-   /* TGSI_OPCODE_RECIPSQRT */
-      FETCH( func, *inst, 0, 0, CHAN_X );
-      emit_rsqrt( func, 1, 0 );
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         STORE( func, *inst, 1, 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_EXP:
-      if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
-          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
-          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
-         FETCH( func, *inst, 0, 0, CHAN_X );
-         if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
-             IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
-            emit_MOV( func, 1, 0 );
-            emit_flr( func, 2, 1 );
-            /* dst.x = ex2(floor(src.x)) */
-            if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) {
-               emit_MOV( func, 2, 1 );
-               emit_ex2( func, 3, 2 );
-               STORE( func, *inst, 2, 0, CHAN_X );
-            }
-            /* dst.y = src.x - floor(src.x) */
-            if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
-               emit_MOV( func, 2, 0 );
-               emit_sub( func, 2, 1 );
-               STORE( func, *inst, 2, 0, CHAN_Y );
-            }
-         }
-         /* dst.z = ex2(src.x) */
-         if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
-            emit_ex2( func, 3, 0 );
-            STORE( func, *inst, 0, 0, CHAN_Z );
-         }
-      }
-      /* dst.w = 1.0 */
-      if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W )) {
-         emit_tempf( func, 0, TEMP_ONE_I, TEMP_ONE_C );
-         STORE( func, *inst, 0, 0, CHAN_W );
-      }
-      break;
-
-   case TGSI_OPCODE_LOG:
-      if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
-          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
-          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
-         FETCH( func, *inst, 0, 0, CHAN_X );
-         emit_abs( func, 0 );
-         emit_MOV( func, 1, 0 );
-         emit_lg2( func, 2, 1 );
-         /* dst.z = lg2(abs(src.x)) */
-         if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
-            STORE( func, *inst, 1, 0, CHAN_Z );
-         }
-         if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
-             IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
-            emit_flr( func, 2, 1 );
-            /* dst.x = floor(lg2(abs(src.x))) */
-            if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) {
-               STORE( func, *inst, 1, 0, CHAN_X );
-            }
-            /* dst.x = abs(src)/ex2(floor(lg2(abs(src.x)))) */
-            if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
-               emit_ex2( func, 2, 1 );
-               emit_rcp( func, 1, 1 );
-               emit_mul( func, 0, 1 );
-               STORE( func, *inst, 0, 0, CHAN_Y );
-            }
-         }
-      }
-      /* dst.w = 1.0 */
-      if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W )) {
-         emit_tempf( func, 0, TEMP_ONE_I, TEMP_ONE_C );
-         STORE( func, *inst, 0, 0, CHAN_W );
-      }
-      break;
-
-   case TGSI_OPCODE_MUL:
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( func, *inst, 0, 0, chan_index );
-         FETCH( func, *inst, 1, 1, chan_index );
-         emit_mul( func, 0, 1 );
-         STORE( func, *inst, 0, 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_ADD:
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( func, *inst, 0, 0, chan_index );
-         FETCH( func, *inst, 1, 1, chan_index );
-         emit_add( func, 0, 1 );
-         STORE( func, *inst, 0, 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_DP3:
-   /* TGSI_OPCODE_DOT3 */
-      FETCH( func, *inst, 0, 0, CHAN_X );
-      FETCH( func, *inst, 1, 1, CHAN_X );
-      emit_mul( func, 0, 1 );
-      FETCH( func, *inst, 1, 0, CHAN_Y );
-      FETCH( func, *inst, 2, 1, CHAN_Y );
-      emit_mul( func, 1, 2 );
-      emit_add( func, 0, 1 );
-      FETCH( func, *inst, 1, 0, CHAN_Z );
-      FETCH( func, *inst, 2, 1, CHAN_Z );
-      emit_mul( func, 1, 2 );
-      emit_add( func, 0, 1 );
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         STORE( func, *inst, 0, 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_DP4:
-   /* TGSI_OPCODE_DOT4 */
-      FETCH( func, *inst, 0, 0, CHAN_X );
-      FETCH( func, *inst, 1, 1, CHAN_X );
-      emit_mul( func, 0, 1 );
-      FETCH( func, *inst, 1, 0, CHAN_Y );
-      FETCH( func, *inst, 2, 1, CHAN_Y );
-      emit_mul( func, 1, 2 );
-      emit_add( func, 0, 1 );
-      FETCH( func, *inst, 1, 0, CHAN_Z );
-      FETCH( func, *inst, 2, 1, CHAN_Z );
-      emit_mul(func, 1, 2 );
-      emit_add(func, 0, 1 );
-      FETCH( func, *inst, 1, 0, CHAN_W );
-      FETCH( func, *inst, 2, 1, CHAN_W );
-      emit_mul( func, 1, 2 );
-      emit_add( func, 0, 1 );
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         STORE( func, *inst, 0, 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_DST:
-      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
-         emit_tempf(
-            func,
-            0,
-            TEMP_ONE_I,
-            TEMP_ONE_C );
-         STORE( func, *inst, 0, 0, CHAN_X );
-      }
-      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
-         FETCH( func, *inst, 0, 0, CHAN_Y );
-         FETCH( func, *inst, 1, 1, CHAN_Y );
-         emit_mul( func, 0, 1 );
-         STORE( func, *inst, 0, 0, CHAN_Y );
-      }
-      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
-         FETCH( func, *inst, 0, 0, CHAN_Z );
-         STORE( func, *inst, 0, 0, CHAN_Z );
-      }
-      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
-         FETCH( func, *inst, 0, 1, CHAN_W );
-         STORE( func, *inst, 0, 0, CHAN_W );
-      }
-      break;
-
-   case TGSI_OPCODE_MIN:
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( func, *inst, 0, 0, chan_index );
-         FETCH( func, *inst, 1, 1, chan_index );
-         sse_minps(
-            func,
-            make_xmm( 0 ),
-            make_xmm( 1 ) );
-         STORE( func, *inst, 0, 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_MAX:
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( func, *inst, 0, 0, chan_index );
-         FETCH( func, *inst, 1, 1, chan_index );
-         sse_maxps(
-            func,
-            make_xmm( 0 ),
-            make_xmm( 1 ) );
-         STORE( func, *inst, 0, 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_SLT:
-   /* TGSI_OPCODE_SETLT */
-      emit_setcc( func, inst, cc_LessThan );
-      break;
-
-   case TGSI_OPCODE_SGE:
-   /* TGSI_OPCODE_SETGE */
-      emit_setcc( func, inst, cc_NotLessThan );
-      break;
-
-   case TGSI_OPCODE_MAD:
-   /* TGSI_OPCODE_MADD */
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( func, *inst, 0, 0, chan_index );
-         FETCH( func, *inst, 1, 1, chan_index );
-         FETCH( func, *inst, 2, 2, chan_index );
-         emit_mul( func, 0, 1 );
-         emit_add( func, 0, 2 );
-         STORE( func, *inst, 0, 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_SUB:
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( func, *inst, 0, 0, chan_index );
-         FETCH( func, *inst, 1, 1, chan_index );
-         emit_sub( func, 0, 1 );
-         STORE( func, *inst, 0, 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_LERP:
-   /* TGSI_OPCODE_LRP */
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( func, *inst, 0, 0, chan_index );
-         FETCH( func, *inst, 1, 1, chan_index );
-         FETCH( func, *inst, 2, 2, chan_index );
-         emit_sub( func, 1, 2 );
-         emit_mul( func, 0, 1 );
-         emit_add( func, 0, 2 );
-         STORE( func, *inst, 0, 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_CND:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_CND0:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_DOT2ADD:
-   /* TGSI_OPCODE_DP2A */
-      return 0;
-      break;
-
-   case TGSI_OPCODE_INDEX:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_NEGATE:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_FRAC:
-   /* TGSI_OPCODE_FRC */
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( func, *inst, 0, 0, chan_index );
-         emit_frc( func, 0, 0 );
-         STORE( func, *inst, 0, 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_CLAMP:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_FLOOR:
-   /* TGSI_OPCODE_FLR */
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( func, *inst, 0, 0, chan_index );
-         emit_flr( func, 0, 0 );
-         STORE( func, *inst, 0, 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_ROUND:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_EXPBASE2:
-   /* TGSI_OPCODE_EX2 */
-      FETCH( func, *inst, 0, 0, CHAN_X );
-      emit_ex2( func, 0, 0 );
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         STORE( func, *inst, 0, 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_LOGBASE2:
-   /* TGSI_OPCODE_LG2 */
-      FETCH( func, *inst, 0, 0, CHAN_X );
-      emit_lg2( func, 0, 0 );
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         STORE( func, *inst, 0, 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_POWER:
-   /* TGSI_OPCODE_POW */
-      FETCH( func, *inst, 0, 0, CHAN_X );
-      FETCH( func, *inst, 1, 1, CHAN_X );
-      emit_pow( func, 0, 0, 1 );
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         STORE( func, *inst, 0, 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_CROSSPRODUCT:
-   /* TGSI_OPCODE_XPD */
-      if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
-          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
-         FETCH( func, *inst, 1, 1, CHAN_Z );
-         FETCH( func, *inst, 3, 0, CHAN_Z );
-      }
-      if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
-          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
-         FETCH( func, *inst, 0, 0, CHAN_Y );
-         FETCH( func, *inst, 4, 1, CHAN_Y );
-      }
-      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
-         emit_MOV( func, 2, 0 );
-         emit_mul( func, 2, 1 );
-         emit_MOV( func, 5, 3 );
-         emit_mul( func, 5, 4 );
-         emit_sub( func, 2, 5 );
-         STORE( func, *inst, 2, 0, CHAN_X );
-      }
-      if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
-          IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
-         FETCH( func, *inst, 2, 1, CHAN_X );
-         FETCH( func, *inst, 5, 0, CHAN_X );
-      }
-      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
-         emit_mul( func, 3, 2 );
-         emit_mul( func, 1, 5 );
-         emit_sub( func, 3, 1 );
-         STORE( func, *inst, 3, 0, CHAN_Y );
-      }
-      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
-         emit_mul( func, 5, 4 );
-         emit_mul( func, 0, 2 );
-         emit_sub( func, 5, 0 );
-         STORE( func, *inst, 5, 0, CHAN_Z );
-      }
-      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
-	 emit_tempf(
-	    func,
-	    0,
-	    TEMP_ONE_I,
-	    TEMP_ONE_C );
-         STORE( func, *inst, 0, 0, CHAN_W );
-      }
-      break;
-
-   case TGSI_OPCODE_MULTIPLYMATRIX:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_ABS:
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( func, *inst, 0, 0, chan_index );
-         emit_abs( func, 0) ;
-
-         STORE( func, *inst, 0, 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_RCC:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_DPH:
-      FETCH( func, *inst, 0, 0, CHAN_X );
-      FETCH( func, *inst, 1, 1, CHAN_X );
-      emit_mul( func, 0, 1 );
-      FETCH( func, *inst, 1, 0, CHAN_Y );
-      FETCH( func, *inst, 2, 1, CHAN_Y );
-      emit_mul( func, 1, 2 );
-      emit_add( func, 0, 1 );
-      FETCH( func, *inst, 1, 0, CHAN_Z );
-      FETCH( func, *inst, 2, 1, CHAN_Z );
-      emit_mul( func, 1, 2 );
-      emit_add( func, 0, 1 );
-      FETCH( func, *inst, 1, 1, CHAN_W );
-      emit_add( func, 0, 1 );
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         STORE( func, *inst, 0, 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_COS:
-      FETCH( func, *inst, 0, 0, CHAN_X );
-      emit_cos( func, 0, 0 );
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         STORE( func, *inst, 0, 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_DDX:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_DDY:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_KILP:
-      /* predicated kill */
-      emit_kilp( func );
-      return 0; /* XXX fix me */
-      break;
-
-   case TGSI_OPCODE_KIL:
-      /* conditional kill */
-      emit_kil( func, &inst->FullSrcRegisters[0] );
-      break;
-
-   case TGSI_OPCODE_PK2H:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_PK2US:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_PK4B:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_PK4UB:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_RFL:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_SEQ:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_SFL:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_SGT:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_SIN:
-      FETCH( func, *inst, 0, 0, CHAN_X );
-      emit_sin( func, 0, 0 );
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         STORE( func, *inst, 0, 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_SLE:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_SNE:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_STR:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_TEX:
-      if (0) {
-	 /* Disable dummy texture code: 
-	  */
-	 emit_tempf(
-	    func,
-	    0,
-	    TEMP_ONE_I,
-	    TEMP_ONE_C );
-	 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-	    STORE( func, *inst, 0, 0, chan_index );
-	 }
-      }
-      else {
-	 return 0;
-      }
-      break;
-
-   case TGSI_OPCODE_TXD:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_UP2H:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_UP2US:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_UP4B:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_UP4UB:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_X2D:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_ARA:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_ARR:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_BRA:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_CAL:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_RET:
-      emit_ret( func );
-      break;
-
-   case TGSI_OPCODE_END:
-      break;
-
-   case TGSI_OPCODE_SSG:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_CMP:
-      emit_cmp (func, inst);
-      break;
-
-   case TGSI_OPCODE_SCS:
-      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
-         FETCH( func, *inst, 0, 0, CHAN_X );
-         emit_cos( func, 0, 0 );
-         STORE( func, *inst, 0, 0, CHAN_X );
-      }
-      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
-         FETCH( func, *inst, 0, 0, CHAN_X );
-         emit_sin( func, 0, 0 );
-         STORE( func, *inst, 0, 0, CHAN_Y );
-      }
-      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
-	 emit_tempf(
-	    func,
-	    0,
-	    TGSI_EXEC_TEMP_00000000_I,
-	    TGSI_EXEC_TEMP_00000000_C );
-         STORE( func, *inst, 0, 0, CHAN_Z );
-      }
-      IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
-	 emit_tempf(
-	    func,
-	    0,
-	    TEMP_ONE_I,
-	    TEMP_ONE_C );
-         STORE( func, *inst, 0, 0, CHAN_W );
-      }
-      break;
-
-   case TGSI_OPCODE_TXB:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_NRM:
-      return 0;
-      break;
-
-   case TGSI_OPCODE_DIV:
-      return 0;
-      break;
+   int v1 = ppc_allocate_vec_register(gen->f);
+   int v2 = ppc_allocate_vec_register(gen->f);
+   uint chan_index;
+   boolean complement = FALSE;
+   int one_vec = gen_one_vec(gen);
 
-   case TGSI_OPCODE_DP2:
-      return 0;
-      break;
+   FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) {
+      FETCH(gen, *inst, v0, 0, chan_index);   /* v0 = srcreg[0] */
+      FETCH(gen, *inst, v1, 1, chan_index);   /* v1 = srcreg[1] */
 
-   case TGSI_OPCODE_TXL:
-      return 0;
-      break;
+      switch (inst->Instruction.Opcode) {
+      case TGSI_OPCODE_SNE:
+         complement = TRUE;
+         /* fall-through */
+      case TGSI_OPCODE_SEQ:
+         ppc_vcmpeqfpx(gen->f, v2, v0, v1); /* v2 = v0 == v1 ? ~0 : 0 */
+         break;
 
-   case TGSI_OPCODE_BRK:
-      return 0;
-      break;
+      case TGSI_OPCODE_SGE:
+         complement = TRUE;
+         /* fall-through */
+      case TGSI_OPCODE_SLT:
+         ppc_vcmpgtfpx(gen->f, v2, v1, v0); /* v2 = v1 > v0 ? ~0 : 0 */
+         break;
 
-   case TGSI_OPCODE_IF:
-      return 0;
-      break;
+      case TGSI_OPCODE_SLE:
+         complement = TRUE;
+         /* fall-through */
+      case TGSI_OPCODE_SGT:
+         ppc_vcmpgtfpx(gen->f, v2, v0, v1); /* v2 = v0 > v1 ? ~0 : 0 */
+         break;
+      default:
+         assert(0);
+      }
 
-   case TGSI_OPCODE_LOOP:
-      return 0;
-      break;
+      /* v2 is now {0,0,0,0} or {~0,~0,~0,~0} */
 
-   case TGSI_OPCODE_REP:
-      return 0;
-      break;
+      if (complement)
+         ppc_vandc(gen->f, v2, one_vec, v2);    /* v2 = one_vec & ~v2 */
+      else
+         ppc_vand(gen->f, v2, one_vec, v2);     /* v2 = one_vec & v2 */
 
-   case TGSI_OPCODE_ELSE:
-      return 0;
-      break;
+      STORE(gen, *inst, v2, 0, chan_index);   /* store v2 */
+   }
 
-   case TGSI_OPCODE_ENDIF:
-      return 0;
-      break;
+   ppc_release_vec_register(gen->f, v0);
+   ppc_release_vec_register(gen->f, v1);
+   ppc_release_vec_register(gen->f, v2);
+}
 
-   case TGSI_OPCODE_ENDLOOP:
-      return 0;
-      break;
 
-   case TGSI_OPCODE_ENDREP:
-      return 0;
-      break;
+static void
+emit_dotprod(struct gen_context *gen, struct tgsi_full_instruction *inst)
+{
+   int v0 = ppc_allocate_vec_register(gen->f);
+   int v1 = ppc_allocate_vec_register(gen->f);
+   int v2 = ppc_allocate_vec_register(gen->f);
+   uint chan_index;
 
-   case TGSI_OPCODE_PUSHA:
-      return 0;
-      break;
+   ppc_vxor(gen->f, v2, v2, v2);           /* v2 = {0, 0, 0, 0} */
 
-   case TGSI_OPCODE_POPA:
-      return 0;
-      break;
+   FETCH(gen, *inst, v0, 0, CHAN_X);       /* v0 = src0.XXXX */
+   FETCH(gen, *inst, v1, 1, CHAN_X);       /* v1 = src1.XXXX */
+   ppc_vmaddfp(gen->f, v2, v0, v1, v2);    /* v2 = v0 * v1 + v2 */
 
-   case TGSI_OPCODE_CEIL:
-      return 0;
-      break;
+   FETCH(gen, *inst, v0, 0, CHAN_Y);       /* v0 = src0.YYYY */
+   FETCH(gen, *inst, v1, 1, CHAN_Y);       /* v1 = src1.YYYY */
+   ppc_vmaddfp(gen->f, v2, v0, v1, v2);    /* v2 = v0 * v1 + v2 */
 
-   case TGSI_OPCODE_I2F:
-      return 0;
-      break;
+   FETCH(gen, *inst, v0, 0, CHAN_Z);       /* v0 = src0.ZZZZ */
+   FETCH(gen, *inst, v1, 1, CHAN_Z);       /* v1 = src1.ZZZZ */
+   ppc_vmaddfp(gen->f, v2, v0, v1, v2);    /* v2 = v0 * v1 + v2 */
 
-   case TGSI_OPCODE_NOT:
-      return 0;
-      break;
+   if (inst->Instruction.Opcode == TGSI_OPCODE_DP4) {
+      FETCH(gen, *inst, v0, 0, CHAN_W);    /* v0 = src0.WWWW */
+      FETCH(gen, *inst, v1, 1, CHAN_W);    /* v1 = src1.WWWW */
+      ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */
+   }
+   else if (inst->Instruction.Opcode == TGSI_OPCODE_DPH) {
+      FETCH(gen, *inst, v1, 1, CHAN_W);    /* v1 = src1.WWWW */
+      ppc_vaddfp(gen->f, v2, v2, v1);      /* v2 = v2 + v1 */
+   }
 
-   case TGSI_OPCODE_TRUNC:
-      return 0;
-      break;
+   FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) {
+      STORE(gen, *inst, v2, 0, chan_index);  /* store v2 */
+   }
+   ppc_release_vec_register(gen->f, v0);
+   ppc_release_vec_register(gen->f, v1);
+   ppc_release_vec_register(gen->f, v2);
+}
 
-   case TGSI_OPCODE_SHL:
-      return 0;
-      break;
 
-   case TGSI_OPCODE_SHR:
-      return 0;
-      break;
+static void
+emit_triop(struct gen_context *gen, struct tgsi_full_instruction *inst)
+{
+   int v0 = ppc_allocate_vec_register(gen->f);
+   int v1 = ppc_allocate_vec_register(gen->f);
+   int v2 = ppc_allocate_vec_register(gen->f);
+   int v3 = ppc_allocate_vec_register(gen->f);
+   uint chan_index;
+   FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) {
+      FETCH(gen, *inst, v0, 0, chan_index);   /* v0 = srcreg[0] */
+      FETCH(gen, *inst, v1, 1, chan_index);   /* v1 = srcreg[1] */
+      FETCH(gen, *inst, v2, 2, chan_index);   /* v2 = srcreg[2] */
+      switch (inst->Instruction.Opcode) {
+      case TGSI_OPCODE_MAD:
+         ppc_vmaddfp(gen->f, v3, v0, v1, v2);   /* v3 = v0 * v1 + v2 */
+         break;
+      case TGSI_OPCODE_LRP:
+         ppc_vsubfp(gen->f, v3, v1, v2);        /* v3 = v1 - v2 */
+         ppc_vmaddfp(gen->f, v3, v0, v3, v2);   /* v3 = v0 * v3 + v2 */
+         break;
+      default:
+         assert(0);
+      }
+      STORE(gen, *inst, v3, 0, chan_index);   /* store v3 */
+   }
+   ppc_release_vec_register(gen->f, v0);
+   ppc_release_vec_register(gen->f, v1);
+   ppc_release_vec_register(gen->f, v2);
+   ppc_release_vec_register(gen->f, v3);
+}
 
-   case TGSI_OPCODE_AND:
-      return 0;
-      break;
 
-   case TGSI_OPCODE_OR:
-      return 0;
-      break;
+/*
+static void
+emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst)
+{
+}
+*/
 
-   case TGSI_OPCODE_MOD:
-      return 0;
-      break;
 
-   case TGSI_OPCODE_XOR:
-      return 0;
+static int
+emit_instruction(struct gen_context *gen,
+                 struct tgsi_full_instruction *inst)
+{
+   switch (inst->Instruction.Opcode) {
+   case TGSI_OPCODE_MOV:
+   case TGSI_OPCODE_ABS:
+   case TGSI_OPCODE_FLOOR:
+   case TGSI_OPCODE_FRAC:
+   case TGSI_OPCODE_EXPBASE2:
+   case TGSI_OPCODE_LOGBASE2:
+      emit_unaryop(gen, inst);
       break;
-
-   case TGSI_OPCODE_SAD:
-      return 0;
+   case TGSI_OPCODE_RSQ:
+   case TGSI_OPCODE_RCP:
+      emit_scalar_unaryop(gen, inst);
       break;
-
-   case TGSI_OPCODE_TXF:
-      return 0;
+   case TGSI_OPCODE_ADD:
+   case TGSI_OPCODE_SUB:
+   case TGSI_OPCODE_MUL:
+   case TGSI_OPCODE_MIN:
+   case TGSI_OPCODE_MAX:
+      emit_binop(gen, inst);
       break;
-
-   case TGSI_OPCODE_TXQ:
-      return 0;
+   case TGSI_OPCODE_SEQ:
+   case TGSI_OPCODE_SNE:
+   case TGSI_OPCODE_SLT:
+   case TGSI_OPCODE_SGT:
+   case TGSI_OPCODE_SLE:
+   case TGSI_OPCODE_SGE:
+      emit_inequality(gen, inst);
       break;
-
-   case TGSI_OPCODE_CONT:
-      return 0;
+   case TGSI_OPCODE_MAD:
+   case TGSI_OPCODE_LRP:
+      emit_triop(gen, inst);
       break;
-
-   case TGSI_OPCODE_EMIT:
-      return 0;
+   case TGSI_OPCODE_DP3:
+   case TGSI_OPCODE_DP4:
+   case TGSI_OPCODE_DPH:
+      emit_dotprod(gen, inst);
       break;
-
-   case TGSI_OPCODE_ENDPRIM:
-      return 0;
+      /*
+   case TGSI_OPCODE_LIT:
+      emit_lit(gen, inst);
       break;
-
+      */
+   case TGSI_OPCODE_END:
+      /* normal end */
+      return 1;
    default:
       return 0;
    }
-#endif
+
    
    return 1;
 }
@@ -2608,133 +663,6 @@ emit_declaration(
    }
 }
 
-#if 0
-static void aos_to_soa( struct x86_function *func, 
-                        uint arg_aos,
-                        uint arg_soa, 
-                        uint arg_num, 
-                        uint arg_stride )
-{
-   struct x86_reg soa_input = x86_make_reg( file_REG32, reg_AX );
-   struct x86_reg aos_input = x86_make_reg( file_REG32, reg_BX );
-   struct x86_reg num_inputs = x86_make_reg( file_REG32, reg_CX );
-   struct x86_reg stride = x86_make_reg( file_REG32, reg_DX );
-   int inner_loop;
-
-
-   /* Save EBX */
-   x86_push( func, x86_make_reg( file_REG32, reg_BX ) );
-
-   x86_mov( func, aos_input,  x86_fn_arg( func, arg_aos ) );
-   x86_mov( func, soa_input,  x86_fn_arg( func, arg_soa ) );
-   x86_mov( func, num_inputs, x86_fn_arg( func, arg_num ) );
-   x86_mov( func, stride,     x86_fn_arg( func, arg_stride ) );
-
-   /* do */
-   inner_loop = x86_get_label( func );
-   {
-      x86_push( func, aos_input );
-      sse_movlps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) );
-      sse_movlps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) );
-      x86_add( func, aos_input, stride );
-      sse_movhps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) );
-      sse_movhps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) );
-      x86_add( func, aos_input, stride );
-      sse_movlps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) );
-      sse_movlps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) );
-      x86_add( func, aos_input, stride );
-      sse_movhps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) );
-      sse_movhps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) );
-      x86_pop( func, aos_input );
-
-      sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) );
-      sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) );
-      sse_shufps( func, make_xmm( 0 ), make_xmm( 1 ), 0x88 );
-      sse_shufps( func, make_xmm( 2 ), make_xmm( 1 ), 0xdd );
-      sse_shufps( func, make_xmm( 3 ), make_xmm( 4 ), 0x88 );
-      sse_shufps( func, make_xmm( 5 ), make_xmm( 4 ), 0xdd );
-
-      sse_movups( func, x86_make_disp( soa_input, 0 ), make_xmm( 0 ) );
-      sse_movups( func, x86_make_disp( soa_input, 16 ), make_xmm( 2 ) );
-      sse_movups( func, x86_make_disp( soa_input, 32 ), make_xmm( 3 ) );
-      sse_movups( func, x86_make_disp( soa_input, 48 ), make_xmm( 5 ) );
-
-      /* Advance to next input */
-      x86_lea( func, aos_input, x86_make_disp(aos_input, 16) );
-      x86_lea( func, soa_input, x86_make_disp(soa_input, 64) );
-   }
-   /* while --num_inputs */
-   x86_dec( func, num_inputs );
-   x86_jcc( func, cc_NE, inner_loop );
-
-   /* Restore EBX */
-   x86_pop( func, aos_input );
-}
-#endif
-
-#if 0
-static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num, uint stride )
-{
-   struct x86_reg soa_output;
-   struct x86_reg aos_output;
-   struct x86_reg num_outputs;
-   struct x86_reg temp;
-   int inner_loop;
-
-   soa_output = x86_make_reg( file_REG32, reg_AX );
-   aos_output = x86_make_reg( file_REG32, reg_BX );
-   num_outputs = x86_make_reg( file_REG32, reg_CX );
-   temp = x86_make_reg( file_REG32, reg_DX );
-
-   /* Save EBX */
-   x86_push( func, aos_output );
-
-   x86_mov( func, soa_output, x86_fn_arg( func, soa ) );
-   x86_mov( func, aos_output, x86_fn_arg( func, aos ) );
-   x86_mov( func, num_outputs, x86_fn_arg( func, num ) );
-
-   /* do */
-   inner_loop = x86_get_label( func );
-   {
-      sse_movups( func, make_xmm( 0 ), x86_make_disp( soa_output, 0 ) );
-      sse_movups( func, make_xmm( 1 ), x86_make_disp( soa_output, 16 ) );
-      sse_movups( func, make_xmm( 3 ), x86_make_disp( soa_output, 32 ) );
-      sse_movups( func, make_xmm( 4 ), x86_make_disp( soa_output, 48 ) );
-
-      sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) );
-      sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) );
-      sse_unpcklps( func, make_xmm( 0 ), make_xmm( 1 ) );
-      sse_unpckhps( func, make_xmm( 2 ), make_xmm( 1 ) );
-      sse_unpcklps( func, make_xmm( 3 ), make_xmm( 4 ) );
-      sse_unpckhps( func, make_xmm( 5 ), make_xmm( 4 ) );
-
-      x86_mov( func, temp, x86_fn_arg( func, stride ) );
-      x86_push( func, aos_output );
-      sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) );
-      sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) );
-      x86_add( func, aos_output, temp );
-      sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) );
-      sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) );
-      x86_add( func, aos_output, temp );
-      sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) );
-      sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) );
-      x86_add( func, aos_output, temp );
-      sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) );
-      sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) );
-      x86_pop( func, aos_output );
-
-      /* Advance to next output */
-      x86_lea( func, aos_output, x86_make_disp(aos_output, 16) );
-      x86_lea( func, soa_output, x86_make_disp(soa_output, 64) );
-   }
-   /* while --num_outputs */
-   x86_dec( func, num_outputs );
-   x86_jcc( func, cc_NE, inner_loop );
-
-   /* Restore EBX */
-   x86_pop( func, aos_output );
-}
-#endif
 
 
 static void
@@ -2788,67 +716,6 @@ tgsi_emit_ppc(const struct tgsi_token *tokens,
 
    emit_prologue(func);
 
-   /*
-    * Different function args for vertex/fragment shaders:
-    */
-#if 0
-   if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
-      /* DECLARATION phase, do not load output argument. */
-      x86_mov(
-         func,
-         get_input_base(),
-         x86_fn_arg( func, 1 ) );
-      /* skipping outputs argument here */
-      x86_mov(
-         func,
-         get_const_base(),
-         x86_fn_arg( func, 3 ) );
-      x86_mov(
-         func,
-         get_temp_base(),
-         x86_fn_arg( func, 4 ) );
-      x86_mov(
-         func,
-         get_coef_base(),
-         x86_fn_arg( func, 5 ) );
-      x86_mov(
-         func,
-         get_immediate_base(),
-         x86_fn_arg( func, 6 ) );
-   }
-   else {
-      assert(parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX);
-
-      if (do_swizzles)
-         aos_to_soa( func, 
-                     6,         /* aos_input */
-                     1,         /* machine->input */
-                     7,         /* num_inputs */
-                     8 );       /* input_stride */
-
-      x86_mov(
-         func,
-         get_input_base(),
-         x86_fn_arg( func, 1 ) );
-      x86_mov(
-         func,
-         get_output_base(),
-         x86_fn_arg( func, 2 ) );
-      x86_mov(
-         func,
-         get_const_base(),
-         x86_fn_arg( func, 3 ) );
-      x86_mov(
-         func,
-         get_temp_base(),
-         x86_fn_arg( func, 4 ) );
-      x86_mov(
-         func,
-         get_immediate_base(),
-         x86_fn_arg( func, 5 ) );
-   }
-#endif
-
    while (!tgsi_parse_end_of_tokens(&parse) && ok) {
       tgsi_parse_token(&parse);
 
@@ -2860,19 +727,6 @@ tgsi_emit_ppc(const struct tgsi_token *tokens,
          break;
 
       case TGSI_TOKEN_TYPE_INSTRUCTION:
-         if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
-#if 0
-            if( !instruction_phase ) {
-               /* INSTRUCTION phase, overwrite coeff with output. */
-               instruction_phase = TRUE;
-               x86_mov(
-                  func,
-                  get_output_base(),
-                  x86_fn_arg( func, 2 ) );
-            }
-#endif
-         }
-
          ok = emit_instruction(&gen, &parse.FullToken.FullInstruction);
 
 	 if (!ok) {
@@ -2909,13 +763,6 @@ tgsi_emit_ppc(const struct tgsi_token *tokens,
       }
    }
 
-#if 0
-   if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX) {
-      if (do_swizzles)
-         soa_to_aos( func, 9, 2, 10, 11 );
-   }
-#endif
-
    emit_epilogue(func);
 
    tgsi_parse_free( &parse );
-- 
cgit v1.2.3


From 77160cd97b7f2181b7953bcc8d13e86055b819e3 Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Wed, 22 Oct 2008 15:34:16 -0600
Subject: gallium: var renaming in tgsi_ppc.c

---
 src/gallium/auxiliary/tgsi/tgsi_ppc.c | 36 +++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
index 432ec7459b3..c1e707657b5 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
@@ -117,11 +117,11 @@ gen_get_bit31_vec(struct gen_context *gen)
 
 
 /**
- * Register fetch.
+ * Register fetch, put result in 'dst_vec'.
  */
 static void
 emit_fetch(struct gen_context *gen,
-           unsigned vec_reg,
+           unsigned dst_vec,
            const struct tgsi_full_src_register *reg,
            const unsigned chan_index)
 {
@@ -138,7 +138,7 @@ emit_fetch(struct gen_context *gen,
             int offset_reg = ppc_allocate_register(gen->f);
             int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16;
             ppc_li(gen->f, offset_reg, offset);
-            ppc_lvx(gen->f, vec_reg, gen->inputs_reg, offset_reg);
+            ppc_lvx(gen->f, dst_vec, gen->inputs_reg, offset_reg);
             ppc_release_register(gen->f, offset_reg);
          }
          break;
@@ -147,7 +147,7 @@ emit_fetch(struct gen_context *gen,
             int offset_reg = ppc_allocate_register(gen->f);
             int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16;
             ppc_li(gen->f, offset_reg, offset);
-            ppc_lvx(gen->f, vec_reg, gen->temps_reg, offset_reg);
+            ppc_lvx(gen->f, dst_vec, gen->temps_reg, offset_reg);
             ppc_release_register(gen->f, offset_reg);
          }
          break;
@@ -156,7 +156,7 @@ emit_fetch(struct gen_context *gen,
             int offset_reg = ppc_allocate_register(gen->f);
             int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16;
             ppc_li(gen->f, offset_reg, offset);
-            ppc_lvx(gen->f, vec_reg, gen->immed_reg, offset_reg);
+            ppc_lvx(gen->f, dst_vec, gen->immed_reg, offset_reg);
             ppc_release_register(gen->f, offset_reg);
          }
          break;
@@ -171,9 +171,9 @@ emit_fetch(struct gen_context *gen,
              * know that 'swizzle' tells us which vector slot will have the
              * loaded word.  The other vector slots will be undefined.
              */
-            ppc_lvewx(gen->f, vec_reg, gen->const_reg, offset_reg);
+            ppc_lvewx(gen->f, dst_vec, gen->const_reg, offset_reg);
             /* splat word[swizzle] across the vector reg */
-            ppc_vspltw(gen->f, vec_reg, vec_reg, swizzle);
+            ppc_vspltw(gen->f, dst_vec, dst_vec, swizzle);
             ppc_release_register(gen->f, offset_reg);
          }
          break;
@@ -182,12 +182,12 @@ emit_fetch(struct gen_context *gen,
       }
       break;
    case TGSI_EXTSWIZZLE_ZERO:
-      ppc_vload_float(gen->f, vec_reg, 0.0f);
+      ppc_vload_float(gen->f, dst_vec, 0.0f);
       break;
    case TGSI_EXTSWIZZLE_ONE:
       {
          int one_vec = gen_one_vec(gen);
-         ppc_vecmove(gen->f, vec_reg, one_vec);
+         ppc_vecmove(gen->f, dst_vec, one_vec);
       }
       break;
    default:
@@ -202,15 +202,15 @@ emit_fetch(struct gen_context *gen,
          switch (sign_op) {
          case TGSI_UTIL_SIGN_CLEAR:
             /* vec = vec & ~bit31 */
-            ppc_vandc(gen->f, vec_reg, vec_reg, bit31_vec);
+            ppc_vandc(gen->f, dst_vec, dst_vec, bit31_vec);
             break;
          case TGSI_UTIL_SIGN_SET:
             /* vec = vec | bit31 */
-            ppc_vor(gen->f, vec_reg, vec_reg, bit31_vec);
+            ppc_vor(gen->f, dst_vec, dst_vec, bit31_vec);
             break;
          case TGSI_UTIL_SIGN_TOGGLE:
             /* vec = vec ^ bit31 */
-            ppc_vxor(gen->f, vec_reg, vec_reg, bit31_vec);
+            ppc_vxor(gen->f, dst_vec, dst_vec, bit31_vec);
             break;
          default:
             assert(0);
@@ -219,17 +219,17 @@ emit_fetch(struct gen_context *gen,
    }
 }
 
-#define FETCH( GEN, INST, VEC_REG, SRC_REG, CHAN ) \
-   emit_fetch( GEN, VEC_REG, &(INST).FullSrcRegisters[SRC_REG], CHAN )
+#define FETCH( GEN, INST, DST_VEC, SRC_REG, CHAN ) \
+   emit_fetch( GEN, DST_VEC, &(INST).FullSrcRegisters[SRC_REG], CHAN )
 
 
 
 /**
- * Register store.
+ * Register store.  Store 'src_vec' at location indicated by 'reg'.
  */
 static void
 emit_store(struct gen_context *gen,
-           unsigned vec_reg,
+           unsigned src_vec,
            const struct tgsi_full_dst_register *reg,
            const struct tgsi_full_instruction *inst,
            unsigned chan_index)
@@ -240,7 +240,7 @@ emit_store(struct gen_context *gen,
          int offset_reg = ppc_allocate_register(gen->f);
          int offset = (reg->DstRegister.Index * 4 + chan_index) * 16;
          ppc_li(gen->f, offset_reg, offset);
-         ppc_stvx(gen->f, vec_reg, gen->outputs_reg, offset_reg);
+         ppc_stvx(gen->f, src_vec, gen->outputs_reg, offset_reg);
          ppc_release_register(gen->f, offset_reg);
       }
       break;
@@ -249,7 +249,7 @@ emit_store(struct gen_context *gen,
          int offset_reg = ppc_allocate_register(gen->f);
          int offset = (reg->DstRegister.Index * 4 + chan_index) * 16;
          ppc_li(gen->f, offset_reg, offset);
-         ppc_stvx(gen->f, vec_reg, gen->temps_reg, offset_reg);
+         ppc_stvx(gen->f, src_vec, gen->temps_reg, offset_reg);
          ppc_release_register(gen->f, offset_reg);
       }
       break;
-- 
cgit v1.2.3


From 9e3ee82305b4602feca0253dc0e0c27f9bc9b05e Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Wed, 22 Oct 2008 16:57:22 -0600
Subject: gallium: PPC LIT instruction (not quite complete yet)

---
 src/gallium/auxiliary/tgsi/tgsi_ppc.c | 89 +++++++++++++++++++++++++++++++++--
 1 file changed, 85 insertions(+), 4 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
index c1e707657b5..edd535a8846 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
@@ -535,12 +535,95 @@ emit_triop(struct gen_context *gen, struct tgsi_full_instruction *inst)
 }
 
 
-/*
+
+/** Approximation for vr = pow(va, vb) */
+static void
+ppc_vec_pow(struct ppc_function *f, int vr, int va, int vb)
+{
+   /* pow(a,b) ~= exp2(log2(a) * b) */
+   int t_vec = ppc_allocate_vec_register(f);
+   int zero_vec = ppc_allocate_vec_register(f);
+
+   ppc_vload_float(f, zero_vec, 0.0f);
+
+   ppc_vlogefp(f, t_vec, va);                   /* t = log2(va) */
+   ppc_vmaddfp(f, t_vec, t_vec, vb, zero_vec);  /* t = t * vb */
+   ppc_vexptefp(f, vr, t_vec);                  /* vr = 2^t */
+
+   ppc_release_vec_register(f, t_vec);
+   ppc_release_vec_register(f, zero_vec);
+}
+
+
 static void
 emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst)
 {
+   int one_vec = gen_one_vec(gen);
+
+   /* Compute X */
+   if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) {
+      STORE(gen, *inst, one_vec, 0, CHAN_X);
+   }
+
+   /* Compute Y, Z */
+   if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) ||
+       IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+      int x_vec = ppc_allocate_vec_register(gen->f);
+      int zero_vec = ppc_allocate_vec_register(gen->f);
+
+      FETCH(gen, *inst, x_vec, 0, CHAN_X);        /* x_vec = src[0].x */
+
+      ppc_vload_float(gen->f, zero_vec, 0.0f);    /* zero = {0,0,0,0} */
+      ppc_vmaxfp(gen->f, x_vec, x_vec, zero_vec); /* x_vec = max(x_vec, 0) */
+
+      if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+         STORE(gen, *inst, x_vec, 0, CHAN_Y);        /* store Y */
+      }
+
+      if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+         int y_vec = ppc_allocate_vec_register(gen->f);
+         int z_vec = ppc_allocate_vec_register(gen->f);
+         int w_vec = ppc_allocate_vec_register(gen->f);
+         int pow_vec = ppc_allocate_vec_register(gen->f);
+         int pos_vec = ppc_allocate_vec_register(gen->f);
+         int c128_vec = ppc_allocate_vec_register(gen->f);
+
+         FETCH(gen, *inst, y_vec, 0, CHAN_Y);        /* y_vec = src[0].y */
+         ppc_vmaxfp(gen->f, y_vec, y_vec, zero_vec); /* y_vec = max(y_vec, 0) */
+
+         FETCH(gen, *inst, w_vec, 0, CHAN_W);        /* w_vec = src[0].w */
+
+         /* XXX clamp Y to [-128, 128] */
+         ppc_vload_float(gen->f, c128_vec, 128.0f);
+
+         /* if temp.x > 0
+          *    pow(tmp.y, tmp.w)
+          * else
+          *   0.0
+          */
+
+         ppc_vec_pow(gen->f, pow_vec, y_vec, w_vec);      /* pow = pow(y, w) */
+         ppc_vcmpgtfpx(gen->f, pos_vec, x_vec, zero_vec); /* pos = x > 0 */
+         ppc_vand(gen->f, z_vec, pow_vec, pos_vec);       /* z = pow & pos */
+
+         STORE(gen, *inst, z_vec, 0, CHAN_Z);             /* store Z */
+
+         ppc_release_vec_register(gen->f, y_vec);
+         ppc_release_vec_register(gen->f, z_vec);
+         ppc_release_vec_register(gen->f, w_vec);
+         ppc_release_vec_register(gen->f, pow_vec);
+         ppc_release_vec_register(gen->f, pos_vec);
+      }
+
+      ppc_release_vec_register(gen->f, x_vec);
+      ppc_release_vec_register(gen->f, zero_vec);
+   }
+
+   /* Compute W */
+   if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) {
+      STORE(gen, *inst, one_vec, 0, CHAN_W);
+   }
 }
-*/
 
 
 static int
@@ -584,11 +667,9 @@ emit_instruction(struct gen_context *gen,
    case TGSI_OPCODE_DPH:
       emit_dotprod(gen, inst);
       break;
-      /*
    case TGSI_OPCODE_LIT:
       emit_lit(gen, inst);
       break;
-      */
    case TGSI_OPCODE_END:
       /* normal end */
       return 1;
-- 
cgit v1.2.3


From ae81aeb12868db219cbdc02437c481714cfed3f5 Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Wed, 22 Oct 2008 16:58:05 -0600
Subject: gallium: GALLIUM_NOPPC debug var to disable PPC codegen

---
 src/gallium/auxiliary/tgsi/tgsi_ppc.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
index edd535a8846..9d7de41fe7f 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
@@ -776,15 +776,21 @@ tgsi_emit_ppc(const struct tgsi_token *tokens,
               float (*immediates)[4],
               boolean do_swizzles )
 {
+   static int use_ppc_asm = -1;
    struct tgsi_parse_context parse;
    /*boolean instruction_phase = FALSE;*/
    unsigned ok = 1;
    uint num_immediates = 0;
    struct gen_context gen;
 
-   util_init_math();
+   if (use_ppc_asm < 0) {
+      /* If GALLIUM_NOPPC is set, don't use PPC codegen */
+      use_ppc_asm = !debug_get_bool_option("GALLIUM_NOPPC", FALSE);
+   }
+   if (!use_ppc_asm)
+      return FALSE;
 
-   tgsi_parse_init( &parse, tokens );
+   util_init_math();
 
    gen.f = func;
    gen.inputs_reg = ppc_reserve_register(func, 3);   /* first function param */
@@ -797,6 +803,8 @@ tgsi_emit_ppc(const struct tgsi_token *tokens,
 
    emit_prologue(func);
 
+   tgsi_parse_init( &parse, tokens );
+
    while (!tgsi_parse_end_of_tokens(&parse) && ok) {
       tgsi_parse_token(&parse);
 
-- 
cgit v1.2.3


From abbbe876ac98596b143da295abf6887e0a4e50d2 Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Wed, 22 Oct 2008 17:19:12 -0600
Subject: gallium: new PPC built-in constants array

It's hard to form PPC vector immediates so load them from an array.
---
 src/gallium/auxiliary/draw/draw_vs_ppc.c |  8 +++--
 src/gallium/auxiliary/tgsi/tgsi_ppc.c    | 61 ++++++++++++++++++++++++++++----
 src/gallium/auxiliary/tgsi/tgsi_ppc.h    |  3 ++
 3 files changed, 63 insertions(+), 9 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c
index fcc9cbfec54..8eff6d4fda3 100644
--- a/src/gallium/auxiliary/draw/draw_vs_ppc.c
+++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c
@@ -55,7 +55,8 @@ typedef void (PIPE_CDECL *codegen_function) (float (*inputs)[4][4],
                                              float (*outputs)[4][4],
                                              float (*temps)[4][4],
                                              float (*immeds)[4][4],
-                                             float (*consts)[4]);
+                                             float (*consts)[4],
+                                             const float *builtins);
 
 #if 0
    const struct tgsi_exec_vector *input,
@@ -151,7 +152,8 @@ vs_ppc_run_linear( struct draw_vertex_shader *base,
 #else
       shader->func(inputs_soa, outputs_soa, temps_soa,
 		   (float (*)[4][4]) shader->base.immediates,
-		   (float (*)[4]) constants);
+		   (float (*)[4]) constants,
+                   ppc_builtin_constants);
 
       /*output[0][0] = input[0][0] * 0.5;*/
 #endif
@@ -246,7 +248,9 @@ draw_create_vs_ppc(struct draw_context *draw,
    return &vs->base;
 
 fail:
+   /*
    debug_error("tgsi_emit_ppc() failed, falling back to interpreter\n");
+   */
 
    ppc_release_func( &vs->ppc_program );
    
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
index 9d7de41fe7f..6b05fd16cf9 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
@@ -36,6 +36,7 @@
 #include "pipe/p_debug.h"
 #include "pipe/p_shader_tokens.h"
 #include "util/u_math.h"
+#include "util/u_memory.h"
 #include "util/u_sse.h"
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_util.h"
@@ -44,6 +45,14 @@
 #include "rtasm/rtasm_ppc.h"
 
 
+/**
+ * Since it's pretty much impossible to form PPC vector immediates, load
+ * them from memory here:
+ */
+const float ppc_builtin_constants[] ALIGN16_ATTRIB = {
+   1.0f, -128.0f, 128.0, 0.0
+};
+
 
 #define FOR_EACH_CHANNEL( CHAN )\
    for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
@@ -81,12 +90,46 @@ struct gen_context
    int temps_reg;     /**< GP register pointing to temporary "registers" */
    int immed_reg;     /**< GP register pointing to immediates buffer */
    int const_reg;     /**< GP register pointing to constants buffer */
+   int builtins_reg;  /**< GP register pointint to built-in constants */
 
    int one_vec;       /**< vector register with {1.0, 1.0, 1.0, 1.0} */
    int bit31_vec;     /**< vector register with {1<<31, 1<<31, 1<<31, 1<<31} */
 };
 
 
+/**
+ * Load the given vector register with {value, value, value, value}.
+ * The value must be in the ppu_builtin_constants[] array.
+ * We wouldn't need this if there was a simple way to load PPC vector
+ * registers with immediate values!
+ */
+static void
+load_constant_vec(struct gen_context *gen, int dst_vec, float value)
+{
+   uint pos;
+   for (pos = 0; pos < Elements(ppc_builtin_constants); pos++) {
+      if (ppc_builtin_constants[pos] == value) {
+         int offset_reg = ppc_allocate_register(gen->f);
+         int offset = pos * 4;
+
+         ppc_li(gen->f, offset_reg, offset);
+         /* Load 4-byte word into vector register.
+          * The vector slot depends on the effective address we load from.
+          * We know that our builtins start at a 16-byte boundary so we
+          * know that 'swizzle' tells us which vector slot will have the
+          * loaded word.  The other vector slots will be undefined.
+          */
+         ppc_lvewx(gen->f, dst_vec, gen->builtins_reg, offset_reg);
+         /* splat word[pos % 4] across the vector reg */
+         ppc_vspltw(gen->f, dst_vec, dst_vec, pos % 4);
+         ppc_release_register(gen->f, offset_reg);
+         return;
+      }
+   }
+   assert(0 && "Need to add new constant to ppc_builtin_constants array");
+}
+
+
 /**
  * Return index of vector register containing {1.0, 1.0, 1.0, 1.0}.
  */
@@ -95,7 +138,7 @@ gen_one_vec(struct gen_context *gen)
 {
    if (gen->one_vec < 0) {
       gen->one_vec = ppc_allocate_vec_register(gen->f);
-      ppc_vload_float(gen->f, gen->one_vec, 1.0f);
+      load_constant_vec(gen, gen->one_vec, 1.0f);
    }
    return gen->one_vec;
 }
@@ -115,7 +158,6 @@ gen_get_bit31_vec(struct gen_context *gen)
 }
 
 
-
 /**
  * Register fetch, put result in 'dst_vec'.
  */
@@ -182,7 +224,7 @@ emit_fetch(struct gen_context *gen,
       }
       break;
    case TGSI_EXTSWIZZLE_ZERO:
-      ppc_vload_float(gen->f, dst_vec, 0.0f);
+      ppc_vzero(gen->f, dst_vec);
       break;
    case TGSI_EXTSWIZZLE_ONE:
       {
@@ -544,7 +586,7 @@ ppc_vec_pow(struct ppc_function *f, int vr, int va, int vb)
    int t_vec = ppc_allocate_vec_register(f);
    int zero_vec = ppc_allocate_vec_register(f);
 
-   ppc_vload_float(f, zero_vec, 0.0f);
+   ppc_vzero(f, zero_vec);
 
    ppc_vlogefp(f, t_vec, va);                   /* t = log2(va) */
    ppc_vmaddfp(f, t_vec, t_vec, vb, zero_vec);  /* t = t * vb */
@@ -573,7 +615,7 @@ emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst)
 
       FETCH(gen, *inst, x_vec, 0, CHAN_X);        /* x_vec = src[0].x */
 
-      ppc_vload_float(gen->f, zero_vec, 0.0f);    /* zero = {0,0,0,0} */
+      ppc_vzero(gen->f, zero_vec);                /* zero = {0,0,0,0} */
       ppc_vmaxfp(gen->f, x_vec, x_vec, zero_vec); /* x_vec = max(x_vec, 0) */
 
       if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) {
@@ -586,7 +628,8 @@ emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst)
          int w_vec = ppc_allocate_vec_register(gen->f);
          int pow_vec = ppc_allocate_vec_register(gen->f);
          int pos_vec = ppc_allocate_vec_register(gen->f);
-         int c128_vec = ppc_allocate_vec_register(gen->f);
+         int p128_vec = ppc_allocate_vec_register(gen->f);
+         int n128_vec = ppc_allocate_vec_register(gen->f);
 
          FETCH(gen, *inst, y_vec, 0, CHAN_Y);        /* y_vec = src[0].y */
          ppc_vmaxfp(gen->f, y_vec, y_vec, zero_vec); /* y_vec = max(y_vec, 0) */
@@ -594,7 +637,8 @@ emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst)
          FETCH(gen, *inst, w_vec, 0, CHAN_W);        /* w_vec = src[0].w */
 
          /* XXX clamp Y to [-128, 128] */
-         ppc_vload_float(gen->f, c128_vec, 128.0f);
+         load_constant_vec(gen, p128_vec, 128.0f);
+         load_constant_vec(gen, n128_vec, -128.0f);
 
          /* if temp.x > 0
           *    pow(tmp.y, tmp.w)
@@ -613,6 +657,8 @@ emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst)
          ppc_release_vec_register(gen->f, w_vec);
          ppc_release_vec_register(gen->f, pow_vec);
          ppc_release_vec_register(gen->f, pos_vec);
+         ppc_release_vec_register(gen->f, p128_vec);
+         ppc_release_vec_register(gen->f, n128_vec);
       }
 
       ppc_release_vec_register(gen->f, x_vec);
@@ -798,6 +844,7 @@ tgsi_emit_ppc(const struct tgsi_token *tokens,
    gen.temps_reg = ppc_reserve_register(func, 5);    /* ... */
    gen.immed_reg = ppc_reserve_register(func, 6);
    gen.const_reg = ppc_reserve_register(func, 7);
+   gen.builtins_reg = ppc_reserve_register(func, 8);
    gen.one_vec = -1;
    gen.bit31_vec = -1;
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.h b/src/gallium/auxiliary/tgsi/tgsi_ppc.h
index 7cd2bf9aff2..829ec075e7f 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ppc.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.h
@@ -35,6 +35,9 @@ extern "C" {
 struct tgsi_token;
 struct ppc_function;
 
+extern const float ppc_builtin_constants[];
+
+
 boolean
 tgsi_emit_ppc(const struct tgsi_token *tokens,
               struct ppc_function *function,
-- 
cgit v1.2.3


From f8ab4feb75f4a592e23859813c093dcdbd4b8988 Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Wed, 22 Oct 2008 17:21:43 -0600
Subject: gallium: remove ppc_vload_float(), rename ppc_vecmove() ->
 ppc_vmove().

---
 src/gallium/auxiliary/rtasm/rtasm_ppc.c | 19 +------------------
 src/gallium/auxiliary/rtasm/rtasm_ppc.h |  6 +-----
 src/gallium/auxiliary/tgsi/tgsi_ppc.c   |  2 +-
 3 files changed, 3 insertions(+), 24 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.c b/src/gallium/auxiliary/rtasm/rtasm_ppc.c
index 51d9b536573..7dd82637494 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_ppc.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.c
@@ -603,23 +603,6 @@ ppc_lvewx(struct ppc_function *p, uint vr, uint ra, uint rb)
    emit_x(p, 31, vr, ra, rb, 71);
 }
 
-/** vector load float: vr = splats(imm) */
-void
-ppc_vload_float(struct ppc_function *p, uint vr, float imm)
-{
-   if (imm == 0.0f) {
-      ppc_vxor(p, vr, vr, vr);
-   }
-   else if (imm == 1.0f) {
-      /* use 2^0=1 to get 1.0 */
-      ppc_vxor(p, vr, vr, vr);  /* vr = {0,0,0,0} */
-      ppc_vexptefp(p, vr, vr);  /* vr = 0^0 */
-   }
-   else {
-      assert(0);
-   }
-}
-
 
 
 
@@ -664,7 +647,7 @@ ppc_vxor(struct ppc_function *p, uint vD, uint vA, uint vB)
 
 /** Pseudo-instruction: vector move */
 void
-ppc_vecmove(struct ppc_function *p, uint vD, uint vA)
+ppc_vmove(struct ppc_function *p, uint vD, uint vA)
 {
    ppc_vor(p, vD, vA, vA);
 }
diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.h b/src/gallium/auxiliary/rtasm/rtasm_ppc.h
index f194d3be134..f938d8d759c 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_ppc.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.h
@@ -158,10 +158,6 @@ ppc_lvx(struct ppc_function *p, uint vR, uint vA, uint vB);
 extern void
 ppc_lvewx(struct ppc_function *p, uint vR, uint vA, uint vB);
 
-/** vector load float: vr = splats(imm) */
-extern void
-ppc_vload_float(struct ppc_function *p, uint vr, float imm);
-
 
 
 /**
@@ -191,7 +187,7 @@ ppc_vxor(struct ppc_function *p, uint vD, uint vA, uint vB);
 
 /** Pseudo-instruction: vector move */
 extern void
-ppc_vecmove(struct ppc_function *p, uint vD, uint vA);
+ppc_vmove(struct ppc_function *p, uint vD, uint vA);
 
 /** Set vector register to {0,0,0,0} */
 extern void
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
index 6b05fd16cf9..96beec0cc66 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
@@ -229,7 +229,7 @@ emit_fetch(struct gen_context *gen,
    case TGSI_EXTSWIZZLE_ONE:
       {
          int one_vec = gen_one_vec(gen);
-         ppc_vecmove(gen->f, dst_vec, one_vec);
+         ppc_vmove(gen->f, dst_vec, one_vec);
       }
       break;
    default:
-- 
cgit v1.2.3


From 0ac99457811eb766e9bdd3903857b5c0fdef7694 Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Wed, 22 Oct 2008 17:29:37 -0600
Subject: gallium: PPC: clamp y to [-128,128] for LIT

---
 src/gallium/auxiliary/tgsi/tgsi_ppc.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
index 96beec0cc66..9ad7ecd7cfe 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
@@ -636,16 +636,17 @@ emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst)
 
          FETCH(gen, *inst, w_vec, 0, CHAN_W);        /* w_vec = src[0].w */
 
-         /* XXX clamp Y to [-128, 128] */
+         /* clamp Y to [-128, 128] */
          load_constant_vec(gen, p128_vec, 128.0f);
          load_constant_vec(gen, n128_vec, -128.0f);
+         ppc_vmaxfp(gen->f, y_vec, y_vec, n128_vec); /* y = max(y, -128) */
+         ppc_vminfp(gen->f, y_vec, y_vec, p128_vec); /* y = min(y, 128) */
 
          /* if temp.x > 0
-          *    pow(tmp.y, tmp.w)
+          *    z = pow(tmp.y, tmp.w)
           * else
-          *   0.0
+          *    z = 0.0
           */
-
          ppc_vec_pow(gen->f, pow_vec, y_vec, w_vec);      /* pow = pow(y, w) */
          ppc_vcmpgtfpx(gen->f, pos_vec, x_vec, zero_vec); /* pos = x > 0 */
          ppc_vand(gen->f, z_vec, pow_vec, pos_vec);       /* z = pow & pos */
-- 
cgit v1.2.3


From 6b69e3c71741d99a54c6f4dcb605a3c241239aeb Mon Sep 17 00:00:00 2001
From: Michel Dänzer <michel@tungstengraphics.com>
Date: Thu, 23 Oct 2008 10:28:48 +0200
Subject: scons: ppc support.

---
 SConstruct                             | 2 ++
 common.py                              | 3 ++-
 scons/gallium.py                       | 1 +
 src/gallium/auxiliary/draw/SConscript  | 1 +
 src/gallium/auxiliary/rtasm/SConscript | 1 +
 src/gallium/auxiliary/tgsi/SConscript  | 1 +
 src/mesa/SConscript                    | 4 ++++
 7 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/SConstruct b/SConstruct
index c1dc6246519..8c96817daef 100644
--- a/SConstruct
+++ b/SConstruct
@@ -70,12 +70,14 @@ platform = env['platform']
 
 # derived options
 x86 = machine == 'x86'
+ppc = machine == 'ppc'
 gcc = platform in ('linux', 'freebsd', 'darwin')
 msvc = platform in ('windows', 'winddk')
 
 Export([
 	'debug', 
 	'x86', 
+	'ppc', 
 	'dri', 
 	'llvm',
 	'platform',
diff --git a/common.py b/common.py
index dd64e0f434d..cc2582f1a4e 100644
--- a/common.py
+++ b/common.py
@@ -24,6 +24,7 @@ _machine_map = {
 	'i486': 'x86',
 	'i586': 'x86',
 	'i686': 'x86',
+	'ppc' : 'ppc',
 	'x86_64': 'x86_64',
 }
 if 'PROCESSOR_ARCHITECTURE' in os.environ:
@@ -56,7 +57,7 @@ def AddOptions(opts):
 	opts.Add(BoolOption('profile', 'profile build', 'no'))
 	#opts.Add(BoolOption('quiet', 'quiet command lines', 'no'))
 	opts.Add(EnumOption('machine', 'use machine-specific assembly code', default_machine,
-											 allowed_values=('generic', 'x86', 'x86_64')))
+											 allowed_values=('generic', 'ppc', 'x86', 'x86_64')))
 	opts.Add(EnumOption('platform', 'target platform', default_platform,
 											 allowed_values=('linux', 'cell', 'windows', 'winddk', 'wince')))
 	opts.Add(BoolOption('llvm', 'use LLVM', 'no'))
diff --git a/scons/gallium.py b/scons/gallium.py
index 3631607e666..2a42bdf2bb9 100644
--- a/scons/gallium.py
+++ b/scons/gallium.py
@@ -175,6 +175,7 @@ def generate(env):
     machine = env['machine']
     platform = env['platform']
     x86 = env['machine'] == 'x86'
+    ppc = env['machine'] == 'ppc'
     gcc = env['platform'] in ('linux', 'freebsd', 'darwin')
     msvc = env['platform'] in ('windows', 'winddk', 'wince')
 
diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript
index 544a04918b6..5f05aa324a5 100644
--- a/src/gallium/auxiliary/draw/SConscript
+++ b/src/gallium/auxiliary/draw/SConscript
@@ -38,6 +38,7 @@ draw = env.ConvenienceLibrary(
 		'draw_vs_aos_machine.c',
 		'draw_vs_exec.c',
 		'draw_vs_llvm.c',
+		'draw_vs_ppc.c',
 		'draw_vs_sse.c',
 		'draw_vs_varient.c'
 	])
diff --git a/src/gallium/auxiliary/rtasm/SConscript b/src/gallium/auxiliary/rtasm/SConscript
index 8ea25922aa1..eb48368accb 100644
--- a/src/gallium/auxiliary/rtasm/SConscript
+++ b/src/gallium/auxiliary/rtasm/SConscript
@@ -6,6 +6,7 @@ rtasm = env.ConvenienceLibrary(
 		'rtasm_cpu.c',
 		'rtasm_execmem.c',
 		'rtasm_x86sse.c',
+		'rtasm_ppc.c',
 		'rtasm_ppc_spe.c',
 	])
 
diff --git a/src/gallium/auxiliary/tgsi/SConscript b/src/gallium/auxiliary/tgsi/SConscript
index 45bf3f6d577..8200cce42f5 100644
--- a/src/gallium/auxiliary/tgsi/SConscript
+++ b/src/gallium/auxiliary/tgsi/SConscript
@@ -12,6 +12,7 @@ tgsi = env.ConvenienceLibrary(
 		'tgsi_parse.c',
 		'tgsi_sanity.c',
 		'tgsi_scan.c',
+		'tgsi_ppc.c',
 		'tgsi_sse2.c',
 		'tgsi_text.c',
 		'tgsi_transform.c',
diff --git a/src/mesa/SConscript b/src/mesa/SConscript
index af8dfcb4930..89b98b37ab0 100644
--- a/src/mesa/SConscript
+++ b/src/mesa/SConscript
@@ -283,6 +283,10 @@ if env['platform'] != 'winddk':
 			'x86-64/glapi_x86-64.S'
 		]
 	elif gcc and env['machine'] == 'ppc':
+		env.Append(CPPDEFINES = [
+			'USE_PPC_ASM', 
+			'USE_VMX_ASM', 
+		])
 		mesa_sources += [
 			'ppc/common_ppc.c',
 		]
-- 
cgit v1.2.3


From 604be5561f17042f61db42b31caf4d720cf66389 Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Mon, 27 Oct 2008 15:36:25 -0600
Subject: gallium: ppc: use a src register cache to avoid redundant loads

---
 src/gallium/auxiliary/tgsi/tgsi_ppc.c | 300 +++++++++++++++++++++++-----------
 1 file changed, 204 insertions(+), 96 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
index 9ad7ecd7cfe..000ddba9359 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
@@ -72,11 +72,14 @@ const float ppc_builtin_constants[] ALIGN16_ATTRIB = {
 #define CHAN_Z 2
 #define CHAN_W 3
 
-#define TEMP_ONE_I   TGSI_EXEC_TEMP_ONE_I
-#define TEMP_ONE_C   TGSI_EXEC_TEMP_ONE_C
 
-#define TEMP_R0   TGSI_EXEC_TEMP_R0
-#define TEMP_ADDR TGSI_EXEC_TEMP_ADDR
+
+struct reg_chan_vec
+{
+   struct tgsi_full_src_register src;
+   uint chan;
+   uint vec;
+};
 
 
 /**
@@ -94,6 +97,18 @@ struct gen_context
 
    int one_vec;       /**< vector register with {1.0, 1.0, 1.0, 1.0} */
    int bit31_vec;     /**< vector register with {1<<31, 1<<31, 1<<31, 1<<31} */
+
+
+   /**
+    * Cache of src registers.
+    * This is used to avoid redundant load instructions.
+    */
+   struct {
+      struct tgsi_full_src_register src;
+      uint chan;
+      uint vec;
+   } regs[12];  /* 3 src regs, 4 channels */
+   uint num_regs;
 };
 
 
@@ -261,8 +276,83 @@ emit_fetch(struct gen_context *gen,
    }
 }
 
-#define FETCH( GEN, INST, DST_VEC, SRC_REG, CHAN ) \
-   emit_fetch( GEN, DST_VEC, &(INST).FullSrcRegisters[SRC_REG], CHAN )
+
+
+/**
+ * Test if two TGSI src registers refer to the same memory location.
+ * We use this to avoid redundant register loads.
+ */
+static boolean
+equal_src_locs(const struct tgsi_full_src_register *a, uint chan_a,
+               const struct tgsi_full_src_register *b, uint chan_b)
+{
+   int swz_a, swz_b;
+   int sign_a, sign_b;
+   if (a->SrcRegister.File != b->SrcRegister.File)
+      return FALSE;
+   if (a->SrcRegister.Index != b->SrcRegister.Index)
+      return FALSE;
+   swz_a = tgsi_util_get_full_src_register_extswizzle(a, chan_a);
+   swz_b = tgsi_util_get_full_src_register_extswizzle(b, chan_b);
+   if (swz_a != swz_b)
+      return FALSE;
+   sign_a = tgsi_util_get_full_src_register_sign_mode(a, chan_a);
+   sign_b = tgsi_util_get_full_src_register_sign_mode(b, chan_b);
+   if (sign_a != sign_b)
+      return FALSE;
+   return TRUE;
+}
+
+
+/**
+ * Given a TGSI src register and channel index, return the PPC vector
+ * register containing the value.  We use a cache to prevent re-loading
+ * the same register multiple times.
+ * \return index of PPC vector register with the desired src operand
+ */
+static int
+get_src_vec(struct gen_context *gen,
+            struct tgsi_full_instruction *inst, int src_reg, uint chan)
+{
+   const const struct tgsi_full_src_register *src = 
+      &inst->FullSrcRegisters[src_reg];
+   int vec;
+   uint i;
+
+   /* check the cache */
+   for (i = 0; i < gen->num_regs; i++) {
+      if (equal_src_locs(&gen->regs[i].src, gen->regs[i].chan, src, chan)) {
+         /* cache hit */
+         return gen->regs[i].vec;
+      }
+   }
+
+   /* cache miss: allocate new vec reg and emit fetch/load code */
+   vec = ppc_allocate_vec_register(gen->f);
+   gen->regs[gen->num_regs].src = *src;
+   gen->regs[gen->num_regs].chan = chan;
+   gen->regs[gen->num_regs].vec = vec;
+   gen->num_regs++;
+   emit_fetch(gen, vec, src, chan);
+
+   assert(gen->num_regs <= Elements(gen->regs));
+
+   return vec;
+}
+
+
+/**
+ * Clear the src operand cache.  To be called at the end of each emit function.
+ */
+static void
+release_src_vecs(struct gen_context *gen)
+{
+   uint i;
+   for (i = 0; i < gen->num_regs; i++) {
+      ppc_release_vec_register(gen->f, gen->regs[i].vec);
+   }
+   gen->num_regs = 0;
+}
 
 
 
@@ -333,11 +423,10 @@ emit_store(struct gen_context *gen,
 static void
 emit_scalar_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst)
 {
-   int v0 = ppc_allocate_vec_register(gen->f);
-   int v1 = ppc_allocate_vec_register(gen->f);
+   int v0, v1 = ppc_allocate_vec_register(gen->f);
    uint chan_index;
 
-   FETCH(gen, *inst, v0, 0, CHAN_X);
+   v0 = get_src_vec(gen, inst, 0, CHAN_X);
 
    switch (inst->Instruction.Opcode) {
    case TGSI_OPCODE_RSQ:
@@ -355,7 +444,8 @@ emit_scalar_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst)
    FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
       STORE(gen, *inst, v1, 0, chan_index);
    }
-   ppc_release_vec_register(gen->f, v0);
+
+   release_src_vecs(gen);
    ppc_release_vec_register(gen->f, v1);
 }
 
@@ -363,10 +453,9 @@ emit_scalar_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst)
 static void
 emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst)
 {
-   int v0 = ppc_allocate_vec_register(gen->f);
    uint chan_index;
    FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) {
-      FETCH(gen, *inst, 0, 0, chan_index);   /* v0 = srcreg[0] */
+      int v0 = get_src_vec(gen, inst, 0, chan_index);   /* v0 = srcreg[0] */
       switch (inst->Instruction.Opcode) {
       case TGSI_OPCODE_ABS:
          /* turn off the most significant bit of each vector float word */
@@ -404,20 +493,30 @@ emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst)
       }
       STORE(gen, *inst, v0, 0, chan_index);   /* store v0 */
    }
-   ppc_release_vec_register(gen->f, v0);
+
+   release_src_vecs(gen);
 }
 
 
 static void
 emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst)
 {
-   int v0 = ppc_allocate_vec_register(gen->f);
-   int v1 = ppc_allocate_vec_register(gen->f);
-   int v2 = ppc_allocate_vec_register(gen->f);
-   uint chan_index;
-   FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) {
-      FETCH(gen, *inst, v0, 0, chan_index);   /* v0 = srcreg[0] */
-      FETCH(gen, *inst, v1, 1, chan_index);   /* v1 = srcreg[1] */
+   int v2, zero_vec = -1;
+   uint chan;
+
+   if (inst->Instruction.Opcode == TGSI_OPCODE_MUL) {
+      zero_vec = ppc_allocate_vec_register(gen->f);
+      ppc_vzero(gen->f, zero_vec);
+   }
+
+   v2 = ppc_allocate_vec_register(gen->f);
+
+   FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) {
+      /* fetch src operands */
+      int v0 = get_src_vec(gen, inst, 0, chan);
+      int v1 = get_src_vec(gen, inst, 1, chan);
+
+      /* emit binop */
       switch (inst->Instruction.Opcode) {
       case TGSI_OPCODE_ADD:
          ppc_vaddfp(gen->f, v2, v0, v1);
@@ -426,8 +525,7 @@ emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst)
          ppc_vsubfp(gen->f, v2, v0, v1);
          break;
       case TGSI_OPCODE_MUL:
-         ppc_vxor(gen->f, v2, v2, v2);        /* v2 = {0, 0, 0, 0} */
-         ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v0 */
+         ppc_vmaddfp(gen->f, v2, v0, v1, zero_vec);
          break;
       case TGSI_OPCODE_MIN:
          ppc_vminfp(gen->f, v2, v0, v1);
@@ -438,11 +536,54 @@ emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst)
       default:
          assert(0);
       }
-      STORE(gen, *inst, v2, 0, chan_index);   /* store v2 */
+
+      /* store v2 */
+      STORE(gen, *inst, v2, 0, chan);
    }
-   ppc_release_vec_register(gen->f, v0);
-   ppc_release_vec_register(gen->f, v1);
+
    ppc_release_vec_register(gen->f, v2);
+
+   if (inst->Instruction.Opcode == TGSI_OPCODE_MUL)
+      ppc_release_vec_register(gen->f, zero_vec);
+
+   release_src_vecs(gen);
+}
+
+
+static void
+emit_triop(struct gen_context *gen, struct tgsi_full_instruction *inst)
+{
+   int v3;
+   uint chan;
+
+   v3 = ppc_allocate_vec_register(gen->f);
+
+   FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) {
+      /* fetch src operands */
+      int v0 = get_src_vec(gen, inst, 0, chan);
+      int v1 = get_src_vec(gen, inst, 1, chan);
+      int v2 = get_src_vec(gen, inst, 2, chan);
+
+      /* emit ALU */
+      switch (inst->Instruction.Opcode) {
+      case TGSI_OPCODE_MAD:
+         ppc_vmaddfp(gen->f, v3, v0, v1, v2);   /* v3 = v0 * v1 + v2 */
+         break;
+      case TGSI_OPCODE_LRP:
+         ppc_vsubfp(gen->f, v3, v1, v2);        /* v3 = v1 - v2 */
+         ppc_vmaddfp(gen->f, v3, v0, v3, v2);   /* v3 = v0 * v3 + v2 */
+         break;
+      default:
+         assert(0);
+      }
+
+      /* store v3 */
+      STORE(gen, *inst, v3, 0, chan);
+   }
+
+   ppc_release_vec_register(gen->f, v3);
+
+   release_src_vecs(gen);
 }
 
 
@@ -452,16 +593,17 @@ emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst)
 static void
 emit_inequality(struct gen_context *gen, struct tgsi_full_instruction *inst)
 {
-   int v0 = ppc_allocate_vec_register(gen->f);
-   int v1 = ppc_allocate_vec_register(gen->f);
-   int v2 = ppc_allocate_vec_register(gen->f);
-   uint chan_index;
-   boolean complement = FALSE;
+   int v2;
+   uint chan;
    int one_vec = gen_one_vec(gen);
 
-   FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) {
-      FETCH(gen, *inst, v0, 0, chan_index);   /* v0 = srcreg[0] */
-      FETCH(gen, *inst, v1, 1, chan_index);   /* v1 = srcreg[1] */
+   v2 = ppc_allocate_vec_register(gen->f);
+
+   FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) {
+      /* fetch src operands */
+      int v0 = get_src_vec(gen, inst, 0, chan);
+      int v1 = get_src_vec(gen, inst, 1, chan);
+      boolean complement = FALSE;
 
       switch (inst->Instruction.Opcode) {
       case TGSI_OPCODE_SNE:
@@ -495,89 +637,58 @@ emit_inequality(struct gen_context *gen, struct tgsi_full_instruction *inst)
       else
          ppc_vand(gen->f, v2, one_vec, v2);     /* v2 = one_vec & v2 */
 
-      STORE(gen, *inst, v2, 0, chan_index);   /* store v2 */
+      /* store v2 */
+      STORE(gen, *inst, v2, 0, chan);
    }
 
-   ppc_release_vec_register(gen->f, v0);
-   ppc_release_vec_register(gen->f, v1);
    ppc_release_vec_register(gen->f, v2);
+
+   release_src_vecs(gen);
 }
 
 
 static void
 emit_dotprod(struct gen_context *gen, struct tgsi_full_instruction *inst)
 {
-   int v0 = ppc_allocate_vec_register(gen->f);
-   int v1 = ppc_allocate_vec_register(gen->f);
-   int v2 = ppc_allocate_vec_register(gen->f);
+   int v0, v1, v2;
    uint chan_index;
 
+   v2 = ppc_allocate_vec_register(gen->f);
+
    ppc_vxor(gen->f, v2, v2, v2);           /* v2 = {0, 0, 0, 0} */
 
-   FETCH(gen, *inst, v0, 0, CHAN_X);       /* v0 = src0.XXXX */
-   FETCH(gen, *inst, v1, 1, CHAN_X);       /* v1 = src1.XXXX */
+   v0 = get_src_vec(gen, inst, 0, CHAN_X); /* v0 = src0.XXXX */
+   v1 = get_src_vec(gen, inst, 1, CHAN_X); /* v1 = src1.XXXX */
    ppc_vmaddfp(gen->f, v2, v0, v1, v2);    /* v2 = v0 * v1 + v2 */
 
-   FETCH(gen, *inst, v0, 0, CHAN_Y);       /* v0 = src0.YYYY */
-   FETCH(gen, *inst, v1, 1, CHAN_Y);       /* v1 = src1.YYYY */
+   v0 = get_src_vec(gen, inst, 0, CHAN_Y); /* v0 = src0.YYYY */
+   v1 = get_src_vec(gen, inst, 1, CHAN_Y); /* v1 = src1.YYYY */
    ppc_vmaddfp(gen->f, v2, v0, v1, v2);    /* v2 = v0 * v1 + v2 */
 
-   FETCH(gen, *inst, v0, 0, CHAN_Z);       /* v0 = src0.ZZZZ */
-   FETCH(gen, *inst, v1, 1, CHAN_Z);       /* v1 = src1.ZZZZ */
+   v0 = get_src_vec(gen, inst, 0, CHAN_Z); /* v0 = src0.ZZZZ */
+   v1 = get_src_vec(gen, inst, 1, CHAN_Z); /* v1 = src1.ZZZZ */
    ppc_vmaddfp(gen->f, v2, v0, v1, v2);    /* v2 = v0 * v1 + v2 */
 
    if (inst->Instruction.Opcode == TGSI_OPCODE_DP4) {
-      FETCH(gen, *inst, v0, 0, CHAN_W);    /* v0 = src0.WWWW */
-      FETCH(gen, *inst, v1, 1, CHAN_W);    /* v1 = src1.WWWW */
-      ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */
+      v0 = get_src_vec(gen, inst, 0, CHAN_W); /* v0 = src0.WWWW */
+      v1 = get_src_vec(gen, inst, 1, CHAN_W); /* v1 = src1.WWWW */
+      ppc_vmaddfp(gen->f, v2, v0, v1, v2);    /* v2 = v0 * v1 + v2 */
    }
    else if (inst->Instruction.Opcode == TGSI_OPCODE_DPH) {
-      FETCH(gen, *inst, v1, 1, CHAN_W);    /* v1 = src1.WWWW */
-      ppc_vaddfp(gen->f, v2, v2, v1);      /* v2 = v2 + v1 */
+      v1 = get_src_vec(gen, inst, 1, CHAN_W); /* v1 = src1.WWWW */
+      ppc_vaddfp(gen->f, v2, v2, v1);         /* v2 = v2 + v1 */
    }
 
    FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) {
       STORE(gen, *inst, v2, 0, chan_index);  /* store v2 */
    }
-   ppc_release_vec_register(gen->f, v0);
-   ppc_release_vec_register(gen->f, v1);
-   ppc_release_vec_register(gen->f, v2);
-}
 
+   release_src_vecs(gen);
 
-static void
-emit_triop(struct gen_context *gen, struct tgsi_full_instruction *inst)
-{
-   int v0 = ppc_allocate_vec_register(gen->f);
-   int v1 = ppc_allocate_vec_register(gen->f);
-   int v2 = ppc_allocate_vec_register(gen->f);
-   int v3 = ppc_allocate_vec_register(gen->f);
-   uint chan_index;
-   FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) {
-      FETCH(gen, *inst, v0, 0, chan_index);   /* v0 = srcreg[0] */
-      FETCH(gen, *inst, v1, 1, chan_index);   /* v1 = srcreg[1] */
-      FETCH(gen, *inst, v2, 2, chan_index);   /* v2 = srcreg[2] */
-      switch (inst->Instruction.Opcode) {
-      case TGSI_OPCODE_MAD:
-         ppc_vmaddfp(gen->f, v3, v0, v1, v2);   /* v3 = v0 * v1 + v2 */
-         break;
-      case TGSI_OPCODE_LRP:
-         ppc_vsubfp(gen->f, v3, v1, v2);        /* v3 = v1 - v2 */
-         ppc_vmaddfp(gen->f, v3, v0, v3, v2);   /* v3 = v0 * v3 + v2 */
-         break;
-      default:
-         assert(0);
-      }
-      STORE(gen, *inst, v3, 0, chan_index);   /* store v3 */
-   }
-   ppc_release_vec_register(gen->f, v0);
-   ppc_release_vec_register(gen->f, v1);
    ppc_release_vec_register(gen->f, v2);
-   ppc_release_vec_register(gen->f, v3);
 }
 
 
-
 /** Approximation for vr = pow(va, vb) */
 static void
 ppc_vec_pow(struct ppc_function *f, int vr, int va, int vb)
@@ -610,10 +721,10 @@ emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst)
    /* Compute Y, Z */
    if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) ||
        IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) {
-      int x_vec = ppc_allocate_vec_register(gen->f);
+      int x_vec;
       int zero_vec = ppc_allocate_vec_register(gen->f);
 
-      FETCH(gen, *inst, x_vec, 0, CHAN_X);        /* x_vec = src[0].x */
+      x_vec = get_src_vec(gen, inst, 0, CHAN_X);  /* x_vec = src[0].x */
 
       ppc_vzero(gen->f, zero_vec);                /* zero = {0,0,0,0} */
       ppc_vmaxfp(gen->f, x_vec, x_vec, zero_vec); /* x_vec = max(x_vec, 0) */
@@ -623,18 +734,16 @@ emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst)
       }
 
       if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) {
-         int y_vec = ppc_allocate_vec_register(gen->f);
-         int z_vec = ppc_allocate_vec_register(gen->f);
-         int w_vec = ppc_allocate_vec_register(gen->f);
+         int y_vec, z_vec, w_vec;
          int pow_vec = ppc_allocate_vec_register(gen->f);
          int pos_vec = ppc_allocate_vec_register(gen->f);
          int p128_vec = ppc_allocate_vec_register(gen->f);
          int n128_vec = ppc_allocate_vec_register(gen->f);
 
-         FETCH(gen, *inst, y_vec, 0, CHAN_Y);        /* y_vec = src[0].y */
+         y_vec = get_src_vec(gen, inst, 0, CHAN_Y);  /* y_vec = src[0].y */
          ppc_vmaxfp(gen->f, y_vec, y_vec, zero_vec); /* y_vec = max(y_vec, 0) */
 
-         FETCH(gen, *inst, w_vec, 0, CHAN_W);        /* w_vec = src[0].w */
+         w_vec = get_src_vec(gen, inst, 0, CHAN_W);  /* w_vec = src[0].w */
 
          /* clamp Y to [-128, 128] */
          load_constant_vec(gen, p128_vec, 128.0f);
@@ -653,16 +762,12 @@ emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst)
 
          STORE(gen, *inst, z_vec, 0, CHAN_Z);             /* store Z */
 
-         ppc_release_vec_register(gen->f, y_vec);
-         ppc_release_vec_register(gen->f, z_vec);
-         ppc_release_vec_register(gen->f, w_vec);
          ppc_release_vec_register(gen->f, pow_vec);
          ppc_release_vec_register(gen->f, pos_vec);
          ppc_release_vec_register(gen->f, p128_vec);
          ppc_release_vec_register(gen->f, n128_vec);
       }
 
-      ppc_release_vec_register(gen->f, x_vec);
       ppc_release_vec_register(gen->f, zero_vec);
    }
 
@@ -670,6 +775,8 @@ emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst)
    if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) {
       STORE(gen, *inst, one_vec, 0, CHAN_W);
    }
+
+   release_src_vecs(gen);
 }
 
 
@@ -723,11 +830,10 @@ emit_instruction(struct gen_context *gen,
    default:
       return 0;
    }
-
-   
    return 1;
 }
 
+
 static void
 emit_declaration(
    struct ppc_function *func,
@@ -805,6 +911,7 @@ emit_epilogue(struct ppc_function *func)
 {
    ppc_return(func);
    /* XXX restore prev stack frame */
+   debug_printf("PPC: Emitted %u instructions\n", func->num_inst);
 }
 
 
@@ -839,6 +946,7 @@ tgsi_emit_ppc(const struct tgsi_token *tokens,
 
    util_init_math();
 
+   memset(&gen, 0, sizeof(gen));
    gen.f = func;
    gen.inputs_reg = ppc_reserve_register(func, 3);   /* first function param */
    gen.outputs_reg = ppc_reserve_register(func, 4);  /* second function param */
-- 
cgit v1.2.3


From a1754424b6597219f436091dec1de4713719c4b8 Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Mon, 27 Oct 2008 15:58:00 -0600
Subject: gallium: ppc: emit fewer 'li' instructions prior to vector
 loads/stores

---
 src/gallium/auxiliary/tgsi/tgsi_ppc.c | 106 ++++++++++++++++++++++++----------
 1 file changed, 75 insertions(+), 31 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
index 000ddba9359..06b7a41b1b9 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
@@ -95,6 +95,9 @@ struct gen_context
    int const_reg;     /**< GP register pointing to constants buffer */
    int builtins_reg;  /**< GP register pointint to built-in constants */
 
+   int offset_reg;    /**< used to reduce redundant li instructions */
+   int offset_value;
+
    int one_vec;       /**< vector register with {1.0, 1.0, 1.0, 1.0} */
    int bit31_vec;     /**< vector register with {1<<31, 1<<31, 1<<31, 1<<31} */
 
@@ -112,6 +115,70 @@ struct gen_context
 };
 
 
+/**
+ * Initialize code generation context.
+ */
+static void
+init_gen_context(struct gen_context *gen, struct ppc_function *func)
+{
+   memset(gen, 0, sizeof(*gen));
+   gen->f = func;
+   gen->inputs_reg = ppc_reserve_register(func, 3);   /* first function param */
+   gen->outputs_reg = ppc_reserve_register(func, 4);  /* second function param */
+   gen->temps_reg = ppc_reserve_register(func, 5);    /* ... */
+   gen->immed_reg = ppc_reserve_register(func, 6);
+   gen->const_reg = ppc_reserve_register(func, 7);
+   gen->builtins_reg = ppc_reserve_register(func, 8);
+   gen->one_vec = -1;
+   gen->bit31_vec = -1;
+   gen->offset_reg = -1;
+   gen->offset_value = -9999999;
+}
+
+
+/**
+ * All PPC vector load/store instructions form an effective address
+ * by adding the contents of two registers.  For example:
+ *    lvx v2,r8,r9   # v2 = memory[r8 + r9]
+ *    stvx v2,r8,r9  # memory[r8 + r9] = v2;
+ * So our lvx/stvx instructions are typically preceded by an 'li' instruction
+ * to load r9 (above) with an immediate (an offset).
+ * This code emits that 'li' instruction, but only if the offset value is
+ * different than the previous 'li'.
+ * This optimization seems to save about 10% in the instruction count.
+ * Note that we need to unconditionally emit an 'li' inside basic blocks
+ * (such as inside loops).
+ */
+static int
+emit_li_offset(struct gen_context *gen, int offset)
+{
+   if (gen->offset_reg <= 0) {
+      /* allocate a GP register for storing load/store offset */
+      gen->offset_reg = ppc_allocate_register(gen->f);
+   }
+
+   /* emit new 'li' if offset is changing */
+   if (gen->offset_value < 0 || gen->offset_value != offset) {
+      gen->offset_value = offset;
+      ppc_li(gen->f, gen->offset_reg, offset);
+   }
+
+   return gen->offset_reg;
+}
+
+
+/**
+ * Forces subsequent emit_li_offset() calls to emit an 'li'.
+ * To be called at the top of basic blocks.
+ */
+static int
+reset_li_offset(struct gen_context *gen)
+{
+   gen->offset_value = -9999999;
+}
+
+
+
 /**
  * Load the given vector register with {value, value, value, value}.
  * The value must be in the ppu_builtin_constants[] array.
@@ -124,10 +191,9 @@ load_constant_vec(struct gen_context *gen, int dst_vec, float value)
    uint pos;
    for (pos = 0; pos < Elements(ppc_builtin_constants); pos++) {
       if (ppc_builtin_constants[pos] == value) {
-         int offset_reg = ppc_allocate_register(gen->f);
          int offset = pos * 4;
+         int offset_reg = emit_li_offset(gen, offset);
 
-         ppc_li(gen->f, offset_reg, offset);
          /* Load 4-byte word into vector register.
           * The vector slot depends on the effective address we load from.
           * We know that our builtins start at a 16-byte boundary so we
@@ -137,7 +203,6 @@ load_constant_vec(struct gen_context *gen, int dst_vec, float value)
          ppc_lvewx(gen->f, dst_vec, gen->builtins_reg, offset_reg);
          /* splat word[pos % 4] across the vector reg */
          ppc_vspltw(gen->f, dst_vec, dst_vec, pos % 4);
-         ppc_release_register(gen->f, offset_reg);
          return;
       }
    }
@@ -192,36 +257,29 @@ emit_fetch(struct gen_context *gen,
       switch (reg->SrcRegister.File) {
       case TGSI_FILE_INPUT:
          {
-            int offset_reg = ppc_allocate_register(gen->f);
             int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16;
-            ppc_li(gen->f, offset_reg, offset);
+            int offset_reg = emit_li_offset(gen, offset);
             ppc_lvx(gen->f, dst_vec, gen->inputs_reg, offset_reg);
-            ppc_release_register(gen->f, offset_reg);
          }
          break;
       case TGSI_FILE_TEMPORARY:
          {
-            int offset_reg = ppc_allocate_register(gen->f);
             int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16;
-            ppc_li(gen->f, offset_reg, offset);
+            int offset_reg = emit_li_offset(gen, offset);
             ppc_lvx(gen->f, dst_vec, gen->temps_reg, offset_reg);
-            ppc_release_register(gen->f, offset_reg);
          }
          break;
       case TGSI_FILE_IMMEDIATE:
          {
-            int offset_reg = ppc_allocate_register(gen->f);
             int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16;
-            ppc_li(gen->f, offset_reg, offset);
+            int offset_reg = emit_li_offset(gen, offset);
             ppc_lvx(gen->f, dst_vec, gen->immed_reg, offset_reg);
-            ppc_release_register(gen->f, offset_reg);
          }
          break;
       case TGSI_FILE_CONSTANT:
          {
-            int offset_reg = ppc_allocate_register(gen->f);
             int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4;
-            ppc_li(gen->f, offset_reg, offset);
+            int offset_reg = emit_li_offset(gen, offset);
             /* Load 4-byte word into vector register.
              * The vector slot depends on the effective address we load from.
              * We know that our constants start at a 16-byte boundary so we
@@ -231,7 +289,6 @@ emit_fetch(struct gen_context *gen,
             ppc_lvewx(gen->f, dst_vec, gen->const_reg, offset_reg);
             /* splat word[swizzle] across the vector reg */
             ppc_vspltw(gen->f, dst_vec, dst_vec, swizzle);
-            ppc_release_register(gen->f, offset_reg);
          }
          break;
       default:
@@ -369,20 +426,16 @@ emit_store(struct gen_context *gen,
    switch (reg->DstRegister.File) {
    case TGSI_FILE_OUTPUT:
       {
-         int offset_reg = ppc_allocate_register(gen->f);
          int offset = (reg->DstRegister.Index * 4 + chan_index) * 16;
-         ppc_li(gen->f, offset_reg, offset);
+         int offset_reg = emit_li_offset(gen, offset);
          ppc_stvx(gen->f, src_vec, gen->outputs_reg, offset_reg);
-         ppc_release_register(gen->f, offset_reg);
       }
       break;
    case TGSI_FILE_TEMPORARY:
       {
-         int offset_reg = ppc_allocate_register(gen->f);
          int offset = (reg->DstRegister.Index * 4 + chan_index) * 16;
-         ppc_li(gen->f, offset_reg, offset);
+         int offset_reg = emit_li_offset(gen, offset);
          ppc_stvx(gen->f, src_vec, gen->temps_reg, offset_reg);
-         ppc_release_register(gen->f, offset_reg);
       }
       break;
 #if 0
@@ -946,16 +999,7 @@ tgsi_emit_ppc(const struct tgsi_token *tokens,
 
    util_init_math();
 
-   memset(&gen, 0, sizeof(gen));
-   gen.f = func;
-   gen.inputs_reg = ppc_reserve_register(func, 3);   /* first function param */
-   gen.outputs_reg = ppc_reserve_register(func, 4);  /* second function param */
-   gen.temps_reg = ppc_reserve_register(func, 5);    /* ... */
-   gen.immed_reg = ppc_reserve_register(func, 6);
-   gen.const_reg = ppc_reserve_register(func, 7);
-   gen.builtins_reg = ppc_reserve_register(func, 8);
-   gen.one_vec = -1;
-   gen.bit31_vec = -1;
+   init_gen_context(&gen, func);
 
    emit_prologue(func);
 
-- 
cgit v1.2.3


From c46583416a749f2e7f76a1eaadb54a8b9e76fb11 Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Tue, 28 Oct 2008 13:17:48 -0600
Subject: gallium: use some PPC vec registers to store TGSI temps

This could be a lot better, but already makes for better code.
---
 src/gallium/auxiliary/tgsi/tgsi_ppc.c | 184 ++++++++++++++++++++++------------
 1 file changed, 122 insertions(+), 62 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
index 06b7a41b1b9..0de9b972b4b 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
@@ -40,6 +40,7 @@
 #include "util/u_sse.h"
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_util.h"
+#include "tgsi_dump.h"
 #include "tgsi_exec.h"
 #include "tgsi_ppc.h"
 #include "rtasm/rtasm_ppc.h"
@@ -73,6 +74,12 @@ const float ppc_builtin_constants[] ALIGN16_ATTRIB = {
 #define CHAN_W 3
 
 
+/**
+ * How many TGSI temps should be implemented with real PPC vector registers
+ * rather than memory.
+ */
+#define MAX_PPC_TEMPS 4
+
 
 struct reg_chan_vec
 {
@@ -101,6 +108,12 @@ struct gen_context
    int one_vec;       /**< vector register with {1.0, 1.0, 1.0, 1.0} */
    int bit31_vec;     /**< vector register with {1<<31, 1<<31, 1<<31, 1<<31} */
 
+   /**
+    * Map TGSI temps to PPC vector temps.
+    * We have 32 PPC vector regs.  Use 16 of them for storing 4 TGSI temps.
+    * XXX currently only do this for TGSI temps [0..MAX_PPC_TEMPS-1].
+    */
+   int temps_map[MAX_PPC_TEMPS][4];
 
    /**
     * Cache of src registers.
@@ -121,6 +134,8 @@ struct gen_context
 static void
 init_gen_context(struct gen_context *gen, struct ppc_function *func)
 {
+   uint i;
+
    memset(gen, 0, sizeof(*gen));
    gen->f = func;
    gen->inputs_reg = ppc_reserve_register(func, 3);   /* first function param */
@@ -133,6 +148,12 @@ init_gen_context(struct gen_context *gen, struct ppc_function *func)
    gen->bit31_vec = -1;
    gen->offset_reg = -1;
    gen->offset_value = -9999999;
+   for (i = 0; i < MAX_PPC_TEMPS; i++) {
+      gen->temps_map[i][0] = ppc_allocate_vec_register(gen->f);
+      gen->temps_map[i][1] = ppc_allocate_vec_register(gen->f);
+      gen->temps_map[i][2] = ppc_allocate_vec_register(gen->f);
+      gen->temps_map[i][3] = ppc_allocate_vec_register(gen->f);
+   }
 }
 
 
@@ -171,7 +192,7 @@ emit_li_offset(struct gen_context *gen, int offset)
  * Forces subsequent emit_li_offset() calls to emit an 'li'.
  * To be called at the top of basic blocks.
  */
-static int
+static void
 reset_li_offset(struct gen_context *gen)
 {
    gen->offset_value = -9999999;
@@ -239,15 +260,15 @@ gen_get_bit31_vec(struct gen_context *gen)
 
 
 /**
- * Register fetch, put result in 'dst_vec'.
+ * Register fetch.  Return PPC vector register with result.
  */
-static void
+static int
 emit_fetch(struct gen_context *gen,
-           unsigned dst_vec,
            const struct tgsi_full_src_register *reg,
            const unsigned chan_index)
 {
    uint swizzle = tgsi_util_get_full_src_register_extswizzle(reg, chan_index);
+   int dst_vec = -1;
 
    switch (swizzle) {
    case TGSI_EXTSWIZZLE_X:
@@ -259,13 +280,20 @@ emit_fetch(struct gen_context *gen,
          {
             int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16;
             int offset_reg = emit_li_offset(gen, offset);
+            dst_vec = ppc_allocate_vec_register(gen->f);
             ppc_lvx(gen->f, dst_vec, gen->inputs_reg, offset_reg);
          }
          break;
       case TGSI_FILE_TEMPORARY:
-         {
+         if (reg->SrcRegister.Index < MAX_PPC_TEMPS) {
+            /* use PPC vec register */
+            dst_vec = gen->temps_map[reg->SrcRegister.Index][swizzle];
+         }
+         else {
+            /* use memory-based temp register "file" */
             int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16;
             int offset_reg = emit_li_offset(gen, offset);
+            dst_vec = ppc_allocate_vec_register(gen->f);
             ppc_lvx(gen->f, dst_vec, gen->temps_reg, offset_reg);
          }
          break;
@@ -273,6 +301,7 @@ emit_fetch(struct gen_context *gen,
          {
             int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16;
             int offset_reg = emit_li_offset(gen, offset);
+            dst_vec = ppc_allocate_vec_register(gen->f);
             ppc_lvx(gen->f, dst_vec, gen->immed_reg, offset_reg);
          }
          break;
@@ -280,6 +309,7 @@ emit_fetch(struct gen_context *gen,
          {
             int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4;
             int offset_reg = emit_li_offset(gen, offset);
+            dst_vec = ppc_allocate_vec_register(gen->f);
             /* Load 4-byte word into vector register.
              * The vector slot depends on the effective address we load from.
              * We know that our constants start at a 16-byte boundary so we
@@ -301,6 +331,7 @@ emit_fetch(struct gen_context *gen,
    case TGSI_EXTSWIZZLE_ONE:
       {
          int one_vec = gen_one_vec(gen);
+         dst_vec = ppc_allocate_vec_register(gen->f);
          ppc_vmove(gen->f, dst_vec, one_vec);
       }
       break;
@@ -308,6 +339,8 @@ emit_fetch(struct gen_context *gen,
       assert( 0 );
    }
 
+   assert(dst_vec >= 0);
+
    {
       uint sign_op = tgsi_util_get_full_src_register_sign_mode(reg, chan_index);
       if (sign_op != TGSI_UTIL_SIGN_KEEP) {
@@ -331,6 +364,8 @@ emit_fetch(struct gen_context *gen,
          }
       }
    }
+
+   return dst_vec;
 }
 
 
@@ -380,20 +415,22 @@ get_src_vec(struct gen_context *gen,
    for (i = 0; i < gen->num_regs; i++) {
       if (equal_src_locs(&gen->regs[i].src, gen->regs[i].chan, src, chan)) {
          /* cache hit */
+         assert(gen->regs[i].vec >= 0);
          return gen->regs[i].vec;
       }
    }
 
    /* cache miss: allocate new vec reg and emit fetch/load code */
-   vec = ppc_allocate_vec_register(gen->f);
+   vec = emit_fetch(gen, src, chan);
    gen->regs[gen->num_regs].src = *src;
    gen->regs[gen->num_regs].chan = chan;
    gen->regs[gen->num_regs].vec = vec;
    gen->num_regs++;
-   emit_fetch(gen, vec, src, chan);
 
    assert(gen->num_regs <= Elements(gen->regs));
 
+   assert(vec >= 0);
+
    return vec;
 }
 
@@ -406,23 +443,48 @@ release_src_vecs(struct gen_context *gen)
 {
    uint i;
    for (i = 0; i < gen->num_regs; i++) {
-      ppc_release_vec_register(gen->f, gen->regs[i].vec);
+      const const struct tgsi_full_src_register src = gen->regs[i].src;
+      if (!(src.SrcRegister.File == TGSI_FILE_TEMPORARY &&
+            src.SrcRegister.Index < MAX_PPC_TEMPS)) {
+         ppc_release_vec_register(gen->f, gen->regs[i].vec);
+      }
    }
    gen->num_regs = 0;
 }
 
 
 
+static int
+get_dst_vec(struct gen_context *gen, 
+            const struct tgsi_full_instruction *inst,
+            unsigned chan_index)
+{
+   const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[0];
+
+   if (reg->DstRegister.File == TGSI_FILE_TEMPORARY &&
+       reg->DstRegister.Index < MAX_PPC_TEMPS) {
+      int vec = gen->temps_map[reg->DstRegister.Index][chan_index];
+      return vec;
+   }
+   else {
+      return ppc_allocate_vec_register(gen->f);
+   }
+}
+
+
 /**
  * Register store.  Store 'src_vec' at location indicated by 'reg'.
+ * \param free_vec  Should the src_vec be released when done?
  */
 static void
 emit_store(struct gen_context *gen,
-           unsigned src_vec,
-           const struct tgsi_full_dst_register *reg,
+           int src_vec,
            const struct tgsi_full_instruction *inst,
-           unsigned chan_index)
+           unsigned chan_index,
+           boolean free_vec)
 {
+   const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[0];
+
    switch (reg->DstRegister.File) {
    case TGSI_FILE_OUTPUT:
       {
@@ -432,7 +494,15 @@ emit_store(struct gen_context *gen,
       }
       break;
    case TGSI_FILE_TEMPORARY:
-      {
+      if (reg->DstRegister.Index < MAX_PPC_TEMPS) {
+         if (!free_vec) {
+            int dst_vec = gen->temps_map[reg->DstRegister.Index][chan_index];
+            if (dst_vec != src_vec)
+               ppc_vmove(gen->f, dst_vec, src_vec);
+         }
+         free_vec = FALSE;
+      }
+      else {
          int offset = (reg->DstRegister.Index * 4 + chan_index) * 16;
          int offset_reg = emit_li_offset(gen, offset);
          ppc_stvx(gen->f, src_vec, gen->temps_reg, offset_reg);
@@ -465,21 +535,20 @@ emit_store(struct gen_context *gen,
       break;
    }
 #endif
-}
-
-
-#define STORE( GEN, INST, XMM, INDEX, CHAN )\
-   emit_store( GEN, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN )
 
+   if (free_vec)
+      ppc_release_vec_register(gen->f, src_vec);
+}
 
 
 static void
 emit_scalar_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst)
 {
-   int v0, v1 = ppc_allocate_vec_register(gen->f);
+   int v0, v1;
    uint chan_index;
 
    v0 = get_src_vec(gen, inst, 0, CHAN_X);
+   v1 = ppc_allocate_vec_register(gen->f);
 
    switch (inst->Instruction.Opcode) {
    case TGSI_OPCODE_RSQ:
@@ -495,7 +564,7 @@ emit_scalar_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst)
    }
 
    FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-      STORE(gen, *inst, v1, 0, chan_index);
+      emit_store(gen, v1, inst, chan_index, FALSE);
    }
 
    release_src_vecs(gen);
@@ -509,42 +578,37 @@ emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst)
    uint chan_index;
    FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) {
       int v0 = get_src_vec(gen, inst, 0, chan_index);   /* v0 = srcreg[0] */
+      int v1 = get_dst_vec(gen, inst, chan_index);
       switch (inst->Instruction.Opcode) {
       case TGSI_OPCODE_ABS:
          /* turn off the most significant bit of each vector float word */
          {
-            int v1 = ppc_allocate_vec_register(gen->f);
-            ppc_vspltisw(gen->f, v1, -1);  /* v1 = {-1, -1, -1, -1} */
-            ppc_vslw(gen->f, v1, v1, v1);  /* v1 = {1<<31, 1<<31, 1<<31, 1<<31} */
-            ppc_vandc(gen->f, v0, v0, v1); /* v0 = v0 & ~v1 */
-            ppc_release_vec_register(gen->f, v1);
+            int bit31_vec = gen_get_bit31_vec(gen);
+            ppc_vandc(gen->f, v1, v0, bit31_vec); /* v1 = v0 & ~bit31 */
          }
          break;
       case TGSI_OPCODE_FLOOR:
-         ppc_vrfim(gen->f, v0, v0);         /* v0 = floor(v0) */
+         ppc_vrfim(gen->f, v1, v0);         /* v1 = floor(v0) */
          break;
       case TGSI_OPCODE_FRAC:
-         {
-            int v1 = ppc_allocate_vec_register(gen->f);
-            ppc_vrfim(gen->f, v1, v0);         /* v1 = floor(v0) */
-            ppc_vsubfp(gen->f, v0, v0, v1);    /* v0 = v0 - v1 */
-            ppc_release_vec_register(gen->f, v1);
-         }
+         ppc_vrfim(gen->f, v1, v0);      /* tmp = floor(v0) */
+         ppc_vsubfp(gen->f, v1, v0, v1); /* v1 = v0 - v1 */
          break;
       case TGSI_OPCODE_EXPBASE2:
-         ppc_vexptefp(gen->f, v0, v0);      /* v0 = 2^v0 */
+         ppc_vexptefp(gen->f, v1, v0);     /* v1 = 2^v0 */
          break;
       case TGSI_OPCODE_LOGBASE2:
          /* XXX this may be broken! */
-         ppc_vlogefp(gen->f, v0, v0);      /* v0 = log2(v0) */
+         ppc_vlogefp(gen->f, v1, v0);      /* v1 = log2(v0) */
          break;
       case TGSI_OPCODE_MOV:
-         /* nothing */
+         if (v0 != v1)
+            ppc_vmove(gen->f, v1, v0);
          break;
       default:
          assert(0);
       }
-      STORE(gen, *inst, v0, 0, chan_index);   /* store v0 */
+      emit_store(gen, v1, inst, chan_index, TRUE);  /* store v0 */
    }
 
    release_src_vecs(gen);
@@ -554,7 +618,7 @@ emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst)
 static void
 emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst)
 {
-   int v2, zero_vec = -1;
+   int zero_vec = -1;
    uint chan;
 
    if (inst->Instruction.Opcode == TGSI_OPCODE_MUL) {
@@ -562,12 +626,11 @@ emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst)
       ppc_vzero(gen->f, zero_vec);
    }
 
-   v2 = ppc_allocate_vec_register(gen->f);
-
    FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) {
       /* fetch src operands */
       int v0 = get_src_vec(gen, inst, 0, chan);
       int v1 = get_src_vec(gen, inst, 1, chan);
+      int v2 = get_dst_vec(gen, inst, chan);
 
       /* emit binop */
       switch (inst->Instruction.Opcode) {
@@ -591,11 +654,9 @@ emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst)
       }
 
       /* store v2 */
-      STORE(gen, *inst, v2, 0, chan);
+      emit_store(gen, v2, inst, chan, TRUE);
    }
 
-   ppc_release_vec_register(gen->f, v2);
-
    if (inst->Instruction.Opcode == TGSI_OPCODE_MUL)
       ppc_release_vec_register(gen->f, zero_vec);
 
@@ -606,16 +667,14 @@ emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst)
 static void
 emit_triop(struct gen_context *gen, struct tgsi_full_instruction *inst)
 {
-   int v3;
    uint chan;
 
-   v3 = ppc_allocate_vec_register(gen->f);
-
    FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) {
       /* fetch src operands */
       int v0 = get_src_vec(gen, inst, 0, chan);
       int v1 = get_src_vec(gen, inst, 1, chan);
       int v2 = get_src_vec(gen, inst, 2, chan);
+      int v3 = get_dst_vec(gen, inst, chan);
 
       /* emit ALU */
       switch (inst->Instruction.Opcode) {
@@ -631,11 +690,9 @@ emit_triop(struct gen_context *gen, struct tgsi_full_instruction *inst)
       }
 
       /* store v3 */
-      STORE(gen, *inst, v3, 0, chan);
+      emit_store(gen, v3, inst, chan, TRUE);
    }
 
-   ppc_release_vec_register(gen->f, v3);
-
    release_src_vecs(gen);
 }
 
@@ -646,16 +703,14 @@ emit_triop(struct gen_context *gen, struct tgsi_full_instruction *inst)
 static void
 emit_inequality(struct gen_context *gen, struct tgsi_full_instruction *inst)
 {
-   int v2;
    uint chan;
    int one_vec = gen_one_vec(gen);
 
-   v2 = ppc_allocate_vec_register(gen->f);
-
    FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) {
       /* fetch src operands */
       int v0 = get_src_vec(gen, inst, 0, chan);
       int v1 = get_src_vec(gen, inst, 1, chan);
+      int v2 = get_dst_vec(gen, inst, chan);
       boolean complement = FALSE;
 
       switch (inst->Instruction.Opcode) {
@@ -691,11 +746,9 @@ emit_inequality(struct gen_context *gen, struct tgsi_full_instruction *inst)
          ppc_vand(gen->f, v2, one_vec, v2);     /* v2 = one_vec & v2 */
 
       /* store v2 */
-      STORE(gen, *inst, v2, 0, chan);
+      emit_store(gen, v2, inst, chan, TRUE);
    }
 
-   ppc_release_vec_register(gen->f, v2);
-
    release_src_vecs(gen);
 }
 
@@ -733,7 +786,7 @@ emit_dotprod(struct gen_context *gen, struct tgsi_full_instruction *inst)
    }
 
    FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) {
-      STORE(gen, *inst, v2, 0, chan_index);  /* store v2 */
+      emit_store(gen, v2, inst, chan_index, FALSE);  /* store v2, free v2 later */
    }
 
    release_src_vecs(gen);
@@ -768,7 +821,7 @@ emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst)
 
    /* Compute X */
    if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) {
-      STORE(gen, *inst, one_vec, 0, CHAN_X);
+      emit_store(gen, one_vec, inst, CHAN_X, FALSE);
    }
 
    /* Compute Y, Z */
@@ -783,11 +836,12 @@ emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst)
       ppc_vmaxfp(gen->f, x_vec, x_vec, zero_vec); /* x_vec = max(x_vec, 0) */
 
       if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) {
-         STORE(gen, *inst, x_vec, 0, CHAN_Y);        /* store Y */
+         emit_store(gen, x_vec, inst, CHAN_Y, FALSE);
       }
 
       if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) {
-         int y_vec, z_vec, w_vec;
+         int y_vec, w_vec;
+         int z_vec = ppc_allocate_vec_register(gen->f);
          int pow_vec = ppc_allocate_vec_register(gen->f);
          int pos_vec = ppc_allocate_vec_register(gen->f);
          int p128_vec = ppc_allocate_vec_register(gen->f);
@@ -798,11 +852,11 @@ emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst)
 
          w_vec = get_src_vec(gen, inst, 0, CHAN_W);  /* w_vec = src[0].w */
 
-         /* clamp Y to [-128, 128] */
+         /* clamp W to [-128, 128] */
          load_constant_vec(gen, p128_vec, 128.0f);
          load_constant_vec(gen, n128_vec, -128.0f);
-         ppc_vmaxfp(gen->f, y_vec, y_vec, n128_vec); /* y = max(y, -128) */
-         ppc_vminfp(gen->f, y_vec, y_vec, p128_vec); /* y = min(y, 128) */
+         ppc_vmaxfp(gen->f, w_vec, w_vec, n128_vec); /* w = max(w, -128) */
+         ppc_vminfp(gen->f, w_vec, w_vec, p128_vec); /* w = min(w, 128) */
 
          /* if temp.x > 0
           *    z = pow(tmp.y, tmp.w)
@@ -813,8 +867,9 @@ emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst)
          ppc_vcmpgtfpx(gen->f, pos_vec, x_vec, zero_vec); /* pos = x > 0 */
          ppc_vand(gen->f, z_vec, pow_vec, pos_vec);       /* z = pow & pos */
 
-         STORE(gen, *inst, z_vec, 0, CHAN_Z);             /* store Z */
+         emit_store(gen, z_vec, inst, CHAN_Z, FALSE);
 
+         ppc_release_vec_register(gen->f, z_vec);
          ppc_release_vec_register(gen->f, pow_vec);
          ppc_release_vec_register(gen->f, pos_vec);
          ppc_release_vec_register(gen->f, p128_vec);
@@ -826,7 +881,7 @@ emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst)
 
    /* Compute W */
    if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) {
-      STORE(gen, *inst, one_vec, 0, CHAN_W);
+      emit_store(gen, one_vec, inst, CHAN_W, FALSE);
    }
 
    release_src_vecs(gen);
@@ -997,6 +1052,11 @@ tgsi_emit_ppc(const struct tgsi_token *tokens,
    if (!use_ppc_asm)
       return FALSE;
 
+   if (0) {
+      debug_printf("\n********* TGSI->PPC ********\n");
+      tgsi_dump(tokens, 0);
+   }
+
    util_init_math();
 
    init_gen_context(&gen, func);
-- 
cgit v1.2.3


From f4e9526addc617dc78af9b1af781ffe09ce62504 Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Tue, 28 Oct 2008 18:21:03 -0600
Subject: gallium: ppc: don't replicate/smear immediate values, use vspltw
 instruction as with constants

---
 src/gallium/auxiliary/draw/draw_vs_ppc.c |  8 ++++----
 src/gallium/auxiliary/tgsi/tgsi_ppc.c    | 22 +++++++++++++---------
 2 files changed, 17 insertions(+), 13 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c
index 8eff6d4fda3..ff402634001 100644
--- a/src/gallium/auxiliary/draw/draw_vs_ppc.c
+++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c
@@ -54,7 +54,7 @@
 typedef void (PIPE_CDECL *codegen_function) (float (*inputs)[4][4],
                                              float (*outputs)[4][4],
                                              float (*temps)[4][4],
-                                             float (*immeds)[4][4],
+                                             float (*immeds)[4],
                                              float (*consts)[4],
                                              const float *builtins);
 
@@ -151,7 +151,7 @@ vs_ppc_run_linear( struct draw_vertex_shader *base,
                    output_stride );
 #else
       shader->func(inputs_soa, outputs_soa, temps_soa,
-		   (float (*)[4][4]) shader->base.immediates,
+		   (float (*)[4]) shader->base.immediates,
 		   (float (*)[4]) constants,
                    ppc_builtin_constants);
 
@@ -227,7 +227,7 @@ draw_create_vs_ppc(struct draw_context *draw,
    vs->base.run_linear = vs_ppc_run_linear;
    vs->base.delete = vs_ppc_delete;
    
-   vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 * 4 *
+   vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 *
                                       sizeof(float), 16);
 
    vs->machine = &draw->vs.machine;
@@ -236,7 +236,7 @@ draw_create_vs_ppc(struct draw_context *draw,
 
    if (!tgsi_emit_ppc( (struct tgsi_token *) vs->base.state.tokens,
 			&vs->ppc_program, 
-                        (float (*)[4])vs->base.immediates, 
+                       (float (*)[4]) vs->base.immediates, 
                         TRUE )) 
       goto fail;
       
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
index 0de9b972b4b..dd574ac02a6 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
@@ -299,10 +299,18 @@ emit_fetch(struct gen_context *gen,
          break;
       case TGSI_FILE_IMMEDIATE:
          {
-            int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16;
+            int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4;
             int offset_reg = emit_li_offset(gen, offset);
             dst_vec = ppc_allocate_vec_register(gen->f);
-            ppc_lvx(gen->f, dst_vec, gen->immed_reg, offset_reg);
+            /* Load 4-byte word into vector register.
+             * The vector slot depends on the effective address we load from.
+             * We know that our immediates start at a 16-byte boundary so we
+             * know that 'swizzle' tells us which vector slot will have the
+             * loaded word.  The other vector slots will be undefined.
+             */
+            ppc_lvewx(gen->f, dst_vec, gen->immed_reg, offset_reg);
+            /* splat word[swizzle] across the vector reg */
+            ppc_vspltw(gen->f, dst_vec, dst_vec, swizzle);
          }
          break;
       case TGSI_FILE_CONSTANT:
@@ -1095,14 +1103,10 @@ tgsi_emit_ppc(const struct tgsi_token *tokens,
             assert(size <= 4);
             assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES);
             for (i = 0; i < size; i++) {
-               const float value =
-                  parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
-               imm[num_immediates * 4 + 0] = 
-               imm[num_immediates * 4 + 1] = 
-               imm[num_immediates * 4 + 2] = 
-               imm[num_immediates * 4 + 3] = value;
-               num_immediates++;
+               immediates[num_immediates][i] =
+		  parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
             }
+            num_immediates++;
          }
          break;
 
-- 
cgit v1.2.3


From 5db0372b3cffec9b5c28699a580da77dcfbd938d Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Tue, 28 Oct 2008 18:57:54 -0600
Subject: gallium: ppc: implement TGSI_OPCODE_LOG/EXP

---
 src/gallium/auxiliary/tgsi/tgsi_ppc.c | 111 +++++++++++++++++++++++++++++++++-
 1 file changed, 110 insertions(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
index dd574ac02a6..e64fb5ac916 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
@@ -896,6 +896,110 @@ emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst)
 }
 
 
+static void
+emit_exp(struct gen_context *gen, struct tgsi_full_instruction *inst)
+{
+   const int one_vec = gen_one_vec(gen);
+   int src_vec;
+
+   /* get src arg */
+   src_vec = get_src_vec(gen, inst, 0, CHAN_X);
+
+   /* Compute X = 2^floor(src) */
+   if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) {
+      int dst_vec = get_dst_vec(gen, inst, CHAN_X);
+      int tmp_vec = ppc_allocate_vec_register(gen->f);
+      ppc_vrfim(gen->f, tmp_vec, src_vec);             /* tmp = floor(src); */
+      ppc_vexptefp(gen->f, dst_vec, tmp_vec);          /* dst = 2 ^ tmp */
+      emit_store(gen, dst_vec, inst, CHAN_X, TRUE);
+      ppc_release_vec_register(gen->f, tmp_vec);
+   }
+
+   /* Compute Y = src - floor(src) */
+   if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+      int dst_vec = get_dst_vec(gen, inst, CHAN_Y);
+      int tmp_vec = ppc_allocate_vec_register(gen->f);
+      ppc_vrfim(gen->f, tmp_vec, src_vec);             /* tmp = floor(src); */
+      ppc_vsubfp(gen->f, dst_vec, src_vec, tmp_vec);   /* dst = src - tmp */
+      emit_store(gen, dst_vec, inst, CHAN_Y, TRUE);
+      ppc_release_vec_register(gen->f, tmp_vec);
+   }
+
+   /* Compute Z = RoughApprox2ToX(src) */
+   if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+      int dst_vec = get_dst_vec(gen, inst, CHAN_Z);
+      ppc_vexptefp(gen->f, dst_vec, src_vec);          /* dst = 2 ^ src */
+      emit_store(gen, dst_vec, inst, CHAN_Z, TRUE);
+   }
+
+   /* Compute W = 1.0 */
+   if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) {
+      emit_store(gen, one_vec, inst, CHAN_W, FALSE);
+   }
+
+   release_src_vecs(gen);
+}
+
+
+static void
+emit_log(struct gen_context *gen, struct tgsi_full_instruction *inst)
+{
+   const int bit31_vec = gen_get_bit31_vec(gen);
+   const int one_vec = gen_one_vec(gen);
+   int src_vec, abs_vec;
+
+   /* get src arg */
+   src_vec = get_src_vec(gen, inst, 0, CHAN_X);
+
+   /* compute abs(src) */
+   abs_vec = ppc_allocate_vec_register(gen->f);
+   ppc_vandc(gen->f, abs_vec, src_vec, bit31_vec);     /* abs = src & ~bit31 */
+
+   if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) &&
+       IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+
+      /* compute tmp = floor(log2(abs)) */
+      int tmp_vec = ppc_allocate_vec_register(gen->f);
+      ppc_vlogefp(gen->f, tmp_vec, abs_vec);           /* tmp = log2(abs) */
+      ppc_vrfim(gen->f, tmp_vec, tmp_vec);             /* tmp = floor(tmp); */
+
+      /* Compute X = tmp */
+      if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) {
+         emit_store(gen, tmp_vec, inst, CHAN_X, FALSE);
+      }
+      
+      /* Compute Y = abs / 2^tmp */
+      if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+         const int zero_vec = ppc_allocate_vec_register(gen->f);
+         ppc_vzero(gen->f, zero_vec);
+         ppc_vexptefp(gen->f, tmp_vec, tmp_vec);       /* tmp = 2 ^ tmp */
+         ppc_vrefp(gen->f, tmp_vec, tmp_vec);          /* tmp = 1 / tmp */
+         /* tmp = abs * tmp + zero */
+         ppc_vmaddfp(gen->f, tmp_vec, abs_vec, tmp_vec, zero_vec);
+         emit_store(gen, tmp_vec, inst, CHAN_Y, FALSE);
+         ppc_release_vec_register(gen->f, zero_vec);
+      }
+
+      ppc_release_vec_register(gen->f, tmp_vec);
+   }
+
+   /* Compute Z = RoughApproxLog2(abs) */
+   if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+      int dst_vec = get_dst_vec(gen, inst, CHAN_Z);
+      ppc_vlogefp(gen->f, dst_vec, abs_vec);           /* dst = log2(abs) */
+      emit_store(gen, dst_vec, inst, CHAN_Z, TRUE);
+   }
+
+   /* Compute W = 1.0 */
+   if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) {
+      emit_store(gen, one_vec, inst, CHAN_W, FALSE);
+   }
+
+   ppc_release_vec_register(gen->f, abs_vec);
+   release_src_vecs(gen);
+}
+
+
 static int
 emit_instruction(struct gen_context *gen,
                  struct tgsi_full_instruction *inst)
@@ -940,6 +1044,12 @@ emit_instruction(struct gen_context *gen,
    case TGSI_OPCODE_LIT:
       emit_lit(gen, inst);
       break;
+   case TGSI_OPCODE_LOG:
+      emit_log(gen, inst);
+      break;
+   case TGSI_OPCODE_EXP:
+      emit_exp(gen, inst);
+      break;
    case TGSI_OPCODE_END:
       /* normal end */
       return 1;
@@ -1098,7 +1208,6 @@ tgsi_emit_ppc(const struct tgsi_token *tokens,
          /* splat each immediate component into a float[4] vector for SoA */
          {
             const uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
-            float *imm = (float *) immediates;
             uint i;
             assert(size <= 4);
             assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES);
-- 
cgit v1.2.3


From 4e1c33700d8885c91d8a1db4cbaefa1ff9f1b5fc Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Wed, 29 Oct 2008 11:05:34 -0600
Subject: gallium: added PPC support for SWZ, XPD, POW

That's the last of the ARB_v_p opcodes, except for ARL.
---
 src/gallium/auxiliary/tgsi/tgsi_ppc.c | 86 +++++++++++++++++++++++++++++++++++
 1 file changed, 86 insertions(+)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
index e64fb5ac916..5d130709221 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
@@ -610,6 +610,7 @@ emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst)
          ppc_vlogefp(gen->f, v1, v0);      /* v1 = log2(v0) */
          break;
       case TGSI_OPCODE_MOV:
+      case TGSI_OPCODE_SWZ:
          if (v0 != v1)
             ppc_vmove(gen->f, v1, v0);
          break;
@@ -1000,12 +1001,91 @@ emit_log(struct gen_context *gen, struct tgsi_full_instruction *inst)
 }
 
 
+static void
+emit_pow(struct gen_context *gen, struct tgsi_full_instruction *inst)
+{
+   int s0_vec = get_src_vec(gen, inst, 0, CHAN_X);
+   int s1_vec = get_src_vec(gen, inst, 1, CHAN_X);
+   int pow_vec = ppc_allocate_vec_register(gen->f);
+   int chan;
+
+   ppc_vec_pow(gen->f, pow_vec, s0_vec, s1_vec);
+
+   FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) {
+      emit_store(gen, pow_vec, inst, chan, FALSE);
+   }
+
+   ppc_release_vec_register(gen->f, pow_vec);
+
+   release_src_vecs(gen);
+}
+
+
+static void
+emit_xpd(struct gen_context *gen, struct tgsi_full_instruction *inst)
+{
+   int x0_vec, y0_vec, z0_vec;
+   int x1_vec, y1_vec, z1_vec;
+   int zero_vec, tmp_vec;
+   int tmp2_vec;
+
+   zero_vec = ppc_allocate_vec_register(gen->f);
+   ppc_vzero(gen->f, zero_vec);
+
+   tmp_vec = ppc_allocate_vec_register(gen->f);
+   tmp2_vec = ppc_allocate_vec_register(gen->f);
+
+   if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) ||
+       IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+      x0_vec = get_src_vec(gen, inst, 0, CHAN_X);
+      x1_vec = get_src_vec(gen, inst, 1, CHAN_X);
+   }
+   if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) ||
+       IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+      y0_vec = get_src_vec(gen, inst, 0, CHAN_Y);
+      y1_vec = get_src_vec(gen, inst, 1, CHAN_Y);
+   }
+   if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) ||
+       IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+      z0_vec = get_src_vec(gen, inst, 0, CHAN_Z);
+      z1_vec = get_src_vec(gen, inst, 1, CHAN_Z);
+   }
+
+   IF_IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) {
+      /* tmp = y0 * z1 */
+      ppc_vmaddfp(gen->f, tmp_vec, y0_vec, z1_vec, zero_vec);
+      /* tmp = tmp - z0 * y1*/
+      ppc_vnmsubfp(gen->f, tmp_vec, tmp_vec, z0_vec, y1_vec);
+      emit_store(gen, tmp_vec, inst, CHAN_X, FALSE);
+   }
+   IF_IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) {
+      /* tmp = z0 * x1 */
+      ppc_vmaddfp(gen->f, tmp_vec, z0_vec, x1_vec, zero_vec);
+      /* tmp = tmp - x0 * z1 */
+      ppc_vnmsubfp(gen->f, tmp_vec, tmp_vec, x0_vec, z1_vec);
+      emit_store(gen, tmp_vec, inst, CHAN_Y, FALSE);
+   }
+   IF_IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z) {
+      /* tmp = x0 * y1 */
+      ppc_vmaddfp(gen->f, tmp_vec, x0_vec, y1_vec, zero_vec);
+      /* tmp = tmp - y0 * x1 */
+      ppc_vnmsubfp(gen->f, tmp_vec, tmp_vec, y0_vec, x1_vec);
+      emit_store(gen, tmp_vec, inst, CHAN_Z, FALSE);
+   }
+   /* W is undefined */
+
+   ppc_release_vec_register(gen->f, tmp_vec);
+   ppc_release_vec_register(gen->f, zero_vec);
+   release_src_vecs(gen);
+}
+
 static int
 emit_instruction(struct gen_context *gen,
                  struct tgsi_full_instruction *inst)
 {
    switch (inst->Instruction.Opcode) {
    case TGSI_OPCODE_MOV:
+   case TGSI_OPCODE_SWZ:
    case TGSI_OPCODE_ABS:
    case TGSI_OPCODE_FLOOR:
    case TGSI_OPCODE_FRAC:
@@ -1050,6 +1130,12 @@ emit_instruction(struct gen_context *gen,
    case TGSI_OPCODE_EXP:
       emit_exp(gen, inst);
       break;
+   case TGSI_OPCODE_POW:
+      emit_pow(gen, inst);
+      break;
+   case TGSI_OPCODE_XPD:
+      emit_xpd(gen, inst);
+      break;
    case TGSI_OPCODE_END:
       /* normal end */
       return 1;
-- 
cgit v1.2.3


From a5d920297a2affe34c535d30a2c49588f92f69ad Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Wed, 29 Oct 2008 16:26:10 -0600
Subject: gallium: use execmem for PPC code, grow instruction buffer as needed

---
 src/gallium/auxiliary/rtasm/rtasm_ppc.c | 70 +++++++++++++++++++++++----------
 src/gallium/auxiliary/rtasm/rtasm_ppc.h |  1 +
 src/gallium/auxiliary/tgsi/tgsi_ppc.c   |  8 ++++
 3 files changed, 58 insertions(+), 21 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.c b/src/gallium/auxiliary/rtasm/rtasm_ppc.c
index a90b5587b0f..e73ed71a0b6 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_ppc.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.c
@@ -38,6 +38,7 @@
 #include <stdio.h>
 #include "util/u_memory.h"
 #include "pipe/p_debug.h"
+#include "rtasm_execmem.h"
 #include "rtasm_ppc.h"
 
 
@@ -46,9 +47,9 @@ ppc_init_func(struct ppc_function *p, unsigned max_inst)
 {
    uint i;
 
-   p->store = align_malloc(max_inst * PPC_INST_SIZE, 16);
    p->num_inst = 0;
-   p->max_inst = max_inst;
+   p->max_inst = 100; /* first guess at buffer size */
+   p->store = rtasm_exec_malloc(p->max_inst * PPC_INST_SIZE);
    p->reg_used = 0x0;
    p->fp_used = 0x0;
    p->vec_used = 0x0;
@@ -66,12 +67,19 @@ ppc_release_func(struct ppc_function *p)
 {
    assert(p->num_inst <= p->max_inst);
    if (p->store != NULL) {
-      align_free(p->store);
+      rtasm_exec_free(p->store);
    }
    p->store = NULL;
 }
 
 
+uint
+ppc_num_instructions(const struct ppc_function *p)
+{
+   return p->num_inst;
+}
+
+
 void (*ppc_get_func(struct ppc_function *p))(void)
 {
 #if 0
@@ -202,6 +210,35 @@ ppc_release_vec_register(struct ppc_function *p, int reg)
 }
 
 
+/**
+ * Append instruction to instruction buffer.  Grow buffer if out of room.
+ */
+static void
+emit_instruction(struct ppc_function *p, uint32_t inst_bits)
+{
+   if (!p->store)
+      return;  /* out of memory, drop the instruction */
+
+   if (p->num_inst == p->max_inst) {
+      /* allocate larger buffer */
+      uint32_t *newbuf;
+      p->max_inst *= 2;  /* 2x larger */
+      newbuf = rtasm_exec_malloc(p->max_inst * PPC_INST_SIZE);
+      if (newbuf) {
+         memcpy(newbuf, p->store, p->num_inst * PPC_INST_SIZE);
+      }
+      rtasm_exec_free(p->store);
+      p->store = newbuf;
+      if (!p->store) {
+         /* out of memory */
+         p->num_inst = 0;
+         return;
+      }
+   }
+
+   p->store[p->num_inst++] = inst_bits;
+}
+
 
 union vx_inst {
    uint32_t bits;
@@ -223,8 +260,7 @@ emit_vx(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB)
    inst.inst.vA = vA;
    inst.inst.vB = vB;
    inst.inst.op2 = op2;
-   p->store[p->num_inst++] = inst.bits;
-   assert(p->num_inst <= p->max_inst);
+   emit_instruction(p, inst.bits);
 };
 
 
@@ -250,8 +286,7 @@ emit_vxr(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB)
    inst.inst.vB = vB;
    inst.inst.rC = 0;
    inst.inst.op2 = op2;
-   p->store[p->num_inst++] = inst.bits;
-   assert(p->num_inst <= p->max_inst);
+   emit_instruction(p, inst.bits);
 };
 
 
@@ -277,8 +312,7 @@ emit_va(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, uint vC)
    inst.inst.vB = vB;
    inst.inst.vC = vC;
    inst.inst.op2 = op2;
-   p->store[p->num_inst++] = inst.bits;
-   assert(p->num_inst <= p->max_inst);
+   emit_instruction(p, inst.bits);
 };
 
 
@@ -300,8 +334,7 @@ emit_i(struct ppc_function *p, uint op, uint li, uint aa, uint lk)
    inst.inst.li = li;
    inst.inst.aa = aa;
    inst.inst.lk = lk;
-   p->store[p->num_inst++] = inst.bits;
-   assert(p->num_inst <= p->max_inst);
+   emit_instruction(p, inst.bits);
 }
 
 
@@ -330,8 +363,7 @@ emit_xl(struct ppc_function *p, uint op, uint bo, uint bi, uint bh,
    inst.inst.bh = bh;
    inst.inst.op2 = op2;
    inst.inst.lk = lk;
-   p->store[p->num_inst++] = inst.bits;
-   assert(p->num_inst <= p->max_inst);
+   emit_instruction(p, inst.bits);
 }
 
 static INLINE void
@@ -373,8 +405,7 @@ emit_x(struct ppc_function *p, uint op, uint vrs, uint ra, uint rb, uint op2)
    inst.inst.rb = rb;
    inst.inst.op2 = op2;
    inst.inst.unused = 0x0;
-   p->store[p->num_inst++] = inst.bits;
-   assert(p->num_inst <= p->max_inst);
+   emit_instruction(p, inst.bits);
 }
 
 
@@ -398,8 +429,7 @@ emit_d(struct ppc_function *p, uint op, uint rt, uint ra, int si)
    inst.inst.rt = rt;
    inst.inst.ra = ra;
    inst.inst.si = (unsigned) (si & 0xffff);
-   p->store[p->num_inst++] = inst.bits;
-   assert(p->num_inst <= p->max_inst);
+   emit_instruction(p, inst.bits);
 };
 
 
@@ -428,8 +458,7 @@ emit_a(struct ppc_function *p, uint op, uint frt, uint fra, uint frb, uint op2,
    inst.inst.unused = 0x0;
    inst.inst.op2 = op2;
    inst.inst.rc = rc;
-   p->store[p->num_inst++] = inst.bits;
-   assert(p->num_inst <= p->max_inst);
+   emit_instruction(p, inst.bits);
 };
 
 
@@ -458,8 +487,7 @@ emit_xo(struct ppc_function *p, uint op, uint rt, uint ra, uint rb, uint oe,
    inst.inst.oe = oe;
    inst.inst.op2 = op2;
    inst.inst.rc = rc;
-   p->store[p->num_inst++] = inst.bits;
-   assert(p->num_inst <= p->max_inst);
+   emit_instruction(p, inst.bits);
 }
 
 
diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.h b/src/gallium/auxiliary/rtasm/rtasm_ppc.h
index 561e139bce7..d0477dec941 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_ppc.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.h
@@ -64,6 +64,7 @@ struct ppc_function
 
 extern void ppc_init_func(struct ppc_function *p, unsigned max_inst);
 extern void ppc_release_func(struct ppc_function *p);
+extern uint ppc_num_instructions(const struct ppc_function *p);
 extern void (*ppc_get_func( struct ppc_function *p ))( void );
 extern void ppc_dump_func(const struct ppc_function *p);
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
index 5d130709221..a92b1902e3d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
@@ -1315,6 +1315,14 @@ tgsi_emit_ppc(const struct tgsi_token *tokens,
 
    tgsi_parse_free( &parse );
 
+   if (ppc_num_instructions(func) == 0) {
+      /* ran out of memory for instructions */
+      ok = FALSE;
+   }
+
+   if (!ok)
+      debug_printf("TGSI->PPC translation failed\n");
+
    return ok;
 }
 
-- 
cgit v1.2.3


From 502974b345dae8a3ca641083b4df5183b04ca825 Mon Sep 17 00:00:00 2001
From: michal <michal@quad.(none)>
Date: Wed, 5 Nov 2008 11:48:56 +0100
Subject: tgsi: Implement OPCODE_TRUNC.

---
 src/gallium/auxiliary/tgsi/tgsi_sse2.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index 4681b29f52b..c115956c5d7 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -657,6 +657,17 @@ emit_f2it(
       make_xmm( xmm ) );
 }
 
+static void
+emit_i2f(
+   struct x86_function *func,
+   unsigned xmm )
+{
+   sse2_cvtdq2ps(
+      func,
+      make_xmm( xmm ),
+      make_xmm( xmm ) );
+}
+
 static void PIPE_CDECL
 flr4f(
    float *store )
@@ -1967,7 +1978,12 @@ emit_instruction(
       break;
 
    case TGSI_OPCODE_TRUNC:
-      return 0;
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         emit_f2it( func, 0 );
+         emit_i2f( func, 0 );
+         STORE( func, *inst, 0, 0, chan_index );
+      }
       break;
 
    case TGSI_OPCODE_SHL:
-- 
cgit v1.2.3


From de2ace201fe26d36a2a75211a7d8447940a47fbe Mon Sep 17 00:00:00 2001
From: michal <michal@quad.(none)>
Date: Wed, 5 Nov 2008 11:48:56 +0100
Subject: tgsi: Implement OPCODE_TRUNC.

---
 src/gallium/auxiliary/tgsi/tgsi_sse2.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index f79170b9d65..47e52c84247 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -784,6 +784,17 @@ emit_f2it(
       make_xmm( xmm ) );
 }
 
+static void
+emit_i2f(
+   struct x86_function *func,
+   unsigned xmm )
+{
+   sse2_cvtdq2ps(
+      func,
+      make_xmm( xmm ),
+      make_xmm( xmm ) );
+}
+
 static void PIPE_CDECL
 flr4f(
    float *store )
@@ -2104,7 +2115,12 @@ emit_instruction(
       break;
 
    case TGSI_OPCODE_TRUNC:
-      return 0;
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         emit_f2it( func, 0 );
+         emit_i2f( func, 0 );
+         STORE( func, *inst, 0, 0, chan_index );
+      }
       break;
 
    case TGSI_OPCODE_SHL:
-- 
cgit v1.2.3


From a137f03c56688c190f3542fb6b7c9a4ff4c80cff Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Wed, 5 Nov 2008 13:55:56 -0700
Subject: gallium: added some sanity check assertions for constant buffer
 indexing

---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index df002939c6b..ea5a44fb8ab 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -958,6 +958,10 @@ fetch_src_file_channel(
       switch( file ) {
       case TGSI_FILE_CONSTANT:
          assert(mach->Consts);
+         assert(index->i[0] >= 0);
+         assert(index->i[1] >= 0);
+         assert(index->i[2] >= 0);
+         assert(index->i[3] >= 0);
          chan->f[0] = mach->Consts[index->i[0]][swizzle];
          chan->f[1] = mach->Consts[index->i[1]][swizzle];
          chan->f[2] = mach->Consts[index->i[2]][swizzle];
-- 
cgit v1.2.3


From 03c0ce4c61fd970509d605fe78166e828fc1df57 Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Wed, 5 Nov 2008 13:56:20 -0700
Subject: gallium: added tgsi_set_exec_mask()

---
 src/gallium/auxiliary/tgsi/tgsi_exec.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index c4e649e69c4..fc40a25e09f 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -165,6 +165,10 @@ struct tgsi_exec_labels
 #define TGSI_EXEC_TEMP_HALF_I       (TGSI_EXEC_NUM_TEMPS + 3)
 #define TGSI_EXEC_TEMP_HALF_C       1
 
+/* execution mask, each value is either 0 or ~0 */
+#define TGSI_EXEC_MASK_I            (TGSI_EXEC_NUM_TEMPS + 3)
+#define TGSI_EXEC_MASK_C            2
+
 #define TGSI_EXEC_TEMP_R0           (TGSI_EXEC_NUM_TEMPS + 4)
 
 #define TGSI_EXEC_TEMP_ADDR         (TGSI_EXEC_NUM_TEMPS + 5)
@@ -265,6 +269,27 @@ void
 tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach);
 
 
+static INLINE void
+tgsi_set_kill_mask(struct tgsi_exec_machine *mach, unsigned mask)
+{
+   mach->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0] =
+      mask;
+}
+
+
+/** Set execution mask values prior to executing the shader */
+static INLINE void
+tgsi_set_exec_mask(struct tgsi_exec_machine *mach,
+                   boolean ch0, boolean ch1, boolean ch2, boolean ch3)
+{
+   int *mask = mach->Temps[TGSI_EXEC_MASK_I].xyzw[TGSI_EXEC_MASK_C].i;
+   mask[0] = ch0 ? ~0 : 0;
+   mask[1] = ch1 ? ~0 : 0;
+   mask[2] = ch2 ? ~0 : 0;
+   mask[3] = ch3 ? ~0 : 0;
+}
+
+
 #if defined __cplusplus
 } /* extern "C" */
 #endif
-- 
cgit v1.2.3


From f0debbb0bb951bfc6dc0ae467564b3b1230324cf Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Wed, 5 Nov 2008 14:02:07 -0700
Subject: gallium: call tgsi_set_exec_mask() and use exec mask in SSE ARL code

This prevents vertex shaders from referencing invalid memory locations when
the shader is operating on less than four vertices or fragments.
---
 src/gallium/auxiliary/draw/draw_vs_exec.c |  6 ++++++
 src/gallium/auxiliary/draw/draw_vs_sse.c  | 14 +++++++++++++
 src/gallium/auxiliary/tgsi/tgsi_sse2.c    | 35 ++++++++++++++++++++++++++++---
 src/gallium/drivers/softpipe/sp_fs_sse.c  |  3 ++-
 4 files changed, 54 insertions(+), 4 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c
index 44563803f90..82d27d44934 100644
--- a/src/gallium/auxiliary/draw/draw_vs_exec.c
+++ b/src/gallium/auxiliary/draw/draw_vs_exec.c
@@ -120,6 +120,12 @@ vs_exec_run_linear( struct draw_vertex_shader *shader,
 	 input = (const float (*)[4])((const char *)input + input_stride);
       } 
 
+      tgsi_set_exec_mask(machine,
+                         1,
+                         max_vertices > 1,
+                         max_vertices > 2,
+                         max_vertices > 3);
+
       /* run interpreter */
       tgsi_exec_machine_run( machine );
 
diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c
index 0efabd9de8b..77ba5152f9f 100644
--- a/src/gallium/auxiliary/draw/draw_vs_sse.c
+++ b/src/gallium/auxiliary/draw/draw_vs_sse.c
@@ -99,9 +99,23 @@ vs_sse_run_linear( struct draw_vertex_shader *base,
    struct tgsi_exec_machine *machine = shader->machine;
    unsigned int i;
 
+   /* By default, execute all channels.  XXX move this inside the loop
+    * below when we support shader conditionals/loops.
+    */
+   tgsi_set_exec_mask(machine, 1, 1, 1, 1);
+
    for (i = 0; i < count; i += MAX_TGSI_VERTICES) {
       unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i);
 
+      if (max_vertices < 4) {
+         /* disable the unused execution channels */
+         tgsi_set_exec_mask(machine,
+                            1,
+                            max_vertices > 1,
+                            max_vertices > 2,
+                            0);
+      }
+
       /* run compiled shader
        */
       shader->func(machine->Inputs,
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index c115956c5d7..4d59106dbfa 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -69,6 +69,9 @@
 
 #define TEMP_R0   TGSI_EXEC_TEMP_R0
 #define TEMP_ADDR TGSI_EXEC_TEMP_ADDR
+#define TEMP_EXEC_MASK_I TGSI_EXEC_MASK_I
+#define TEMP_EXEC_MASK_C TGSI_EXEC_MASK_C
+
 
 /**
  * X86 utility functions.
@@ -230,6 +233,9 @@ emit_const(
    int indirectIndex )
 {
    if (indirect) {
+      /* 'vec' is the offset from the address register's value.
+       * We're loading CONST[ADDR+vec] into an xmm register.
+       */
       struct x86_reg r0 = get_input_base();
       struct x86_reg r1 = get_output_base();
       uint i;
@@ -240,18 +246,40 @@ emit_const(
       x86_push( func, r0 );
       x86_push( func, r1 );
 
+      /*
+       * Loop over the four pixels or vertices in the quad.
+       * Get the value of the address (offset) register for pixel/vertex[i],
+       * add it to the src offset and index into the constant buffer.
+       * Note that we're working on SOA data.
+       * If any of the pixel/vertex execution channels are unused their
+       * values will be garbage.  It's very important that we don't use
+       * those garbage values as indexes into the constant buffer since
+       * that'll cause segfaults.
+       * The solution is to bitwise-AND the offset with the execution mask
+       * register whose values are either 0 or ~0.
+       * The caller must setup the execution mask register to indicate
+       * which channels are valid/alive before running the shader.
+       * The execution mask will also figure into loops and conditionals
+       * someday.
+       */
       for (i = 0; i < QUAD_SIZE; i++) {
-         x86_lea( func, r0, get_const( vec, chan ) );
+         /* r1 = address register[i] */
          x86_mov( func, r1, x86_make_disp( get_temp( TEMP_ADDR, CHAN_X ), i * 4 ) );
+         /* r0 = execution mask[i] */
+         x86_mov( func, r0, x86_make_disp( get_temp( TEMP_EXEC_MASK_I, TEMP_EXEC_MASK_C ), i * 4 ) );
+         /* r1 = r1 & r0 */
+         x86_and( func, r1, r0 );
+         /* r0 = 'vec', the offset */
+         x86_lea( func, r0, get_const( vec, chan ) );
 
-         /* Quick hack to multiply by 16 -- need to add SHL to rtasm.
+         /* Quick hack to multiply r1 by 16 -- need to add SHL to rtasm.
           */
          x86_add( func, r1, r1 );
          x86_add( func, r1, r1 );
          x86_add( func, r1, r1 );
          x86_add( func, r1, r1 );
 
-         x86_add( func, r0, r1 );
+         x86_add( func, r0, r1 );  /* r0 = r0 + r1 */
          x86_mov( func, r1, x86_deref( r0 ) );
          x86_mov( func, x86_make_disp( get_temp( TEMP_R0, CHAN_X ), i * 4 ), r1 );
       }
@@ -265,6 +293,7 @@ emit_const(
          get_temp( TEMP_R0, CHAN_X ) );
    }
    else {
+      /* 'vec' is the index into the src register file, such as TEMP[vec] */
       assert( vec >= 0 );
 
       sse_movss(
diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c
index 496ed43df26..50eb2c07bcb 100644
--- a/src/gallium/drivers/softpipe/sp_fs_sse.c
+++ b/src/gallium/drivers/softpipe/sp_fs_sse.c
@@ -92,7 +92,8 @@ fs_sse_run( const struct sp_fragment_shader *base,
 		       machine->Temps);
 
    /* init kill mask */
-   machine->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0] = 0x0;
+   tgsi_set_kill_mask(machine, 0x0);
+   tgsi_set_exec_mask(machine, 1, 1, 1, 1);
 
    shader->func( machine->Inputs,
 		 machine->Outputs,
-- 
cgit v1.2.3


From 639a2b0ec853eda49e3e7150b2ed7f8f40d101af Mon Sep 17 00:00:00 2001
From: Brian <brian.paul@tungstengraphics.com>
Date: Wed, 5 Nov 2008 19:26:20 -0700
Subject: gallium: don't range check tgsi register index for indirect accesses

Fixes progs/vp/arl.txt test.
---
 src/gallium/auxiliary/tgsi/tgsi_sanity.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
index 11659247c0c..bc7b941b785 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
@@ -153,17 +153,21 @@ check_register_usage(
    if (!check_file_name( ctx, file ))
       return FALSE;
 
-   if (index < 0 || index > MAX_REGISTERS) {
-      report_error( ctx, "%s[%i]: Invalid index %s", file_names[file], index, name );
-      return FALSE;
-   }
-
    if (indirect_access) {
+      /* Note that 'index' is an offset relative to the value of the
+       * address register.  No range checking done here.
+       */
       if (!is_any_register_declared( ctx, file ))
          report_error( ctx, "%s: Undeclared %s register", file_names[file], name );
       ctx->regs_ind_used[file] = TRUE;
    }
    else {
+      if (index < 0 || index > MAX_REGISTERS) {
+         report_error( ctx, "%s[%i]: Invalid index %s",
+                       file_names[file], index, name );
+         return FALSE;
+      }
+
       if (!is_register_declared( ctx, file, index ))
          report_error( ctx, "%s[%d]: Undeclared %s register", file_names[file], index, name );
       ctx->regs_used[file][index / BITS_IN_REG_FLAG] |= (1 << (index % BITS_IN_REG_FLAG));
-- 
cgit v1.2.3


From 5b2b064a5c1328449e3eb8179afc2ba366f18ae6 Mon Sep 17 00:00:00 2001
From: Brian <brian.paul@tungstengraphics.com>
Date: Wed, 5 Nov 2008 20:04:49 -0700
Subject: gallium: check execution mask in indirect register loads

Zero-out the index for disabled execution channels to avoid using potential
garbage values (thus avoiding bad array indexing).
---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index ea5a44fb8ab..53e92b96aee 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -1045,12 +1045,16 @@ fetch_source(
    if (reg->SrcRegister.Indirect) {
       union tgsi_exec_channel index2;
       union tgsi_exec_channel indir_index;
+      const uint execmask = mach->ExecMask;
+      uint i;
 
+      /* which address register (always zero now) */
       index2.i[0] =
       index2.i[1] =
       index2.i[2] =
       index2.i[3] = reg->SrcRegisterInd.Index;
 
+      /* get current value of address register[swizzle] */
       swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterInd, CHAN_X );
       fetch_src_file_channel(
          mach,
@@ -1059,10 +1063,19 @@ fetch_source(
          &index2,
          &indir_index );
 
+      /* add value of address register to the offset */
       index.i[0] += indir_index.i[0];
       index.i[1] += indir_index.i[1];
       index.i[2] += indir_index.i[2];
       index.i[3] += indir_index.i[3];
+
+      /* for disabled execution channels, zero-out the index to
+       * avoid using a potential garbage value.
+       */
+      for (i = 0; i < QUAD_SIZE; i++) {
+         if ((execmask & (1 << i)) == 0)
+            index.i[i] = 0;
+      }
    }
 
    if( reg->SrcRegister.Dimension ) {
@@ -1091,6 +1104,8 @@ fetch_source(
       if (reg->SrcRegisterDim.Indirect) {
          union tgsi_exec_channel index2;
          union tgsi_exec_channel indir_index;
+         const uint execmask = mach->ExecMask;
+         uint i;
 
          index2.i[0] =
          index2.i[1] =
@@ -1109,6 +1124,14 @@ fetch_source(
          index.i[1] += indir_index.i[1];
          index.i[2] += indir_index.i[2];
          index.i[3] += indir_index.i[3];
+
+         /* for disabled execution channels, zero-out the index to
+          * avoid using a potential garbage value.
+          */
+         for (i = 0; i < QUAD_SIZE; i++) {
+            if ((execmask & (1 << i)) == 0)
+               index.i[i] = 0;
+         }
       }
    }
 
-- 
cgit v1.2.3


From bb8a9ce705f309a3b38d10c61c3865db79a0f71c Mon Sep 17 00:00:00 2001
From: Brian <brian.paul@tungstengraphics.com>
Date: Thu, 6 Nov 2008 19:24:47 -0700
Subject: gallium: implement TGSI_OPCODE_NRM/NRM4 in tgsi_exec.c

---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 59 +++++++++++++++++++++++++++++++++-
 1 file changed, 58 insertions(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 53e92b96aee..41dffc3dbaf 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -2462,7 +2462,64 @@ exec_instruction(
       break;
 
    case TGSI_OPCODE_NRM:
-      assert (0);
+      /* 3-component vector normalize */
+      {
+         union tgsi_exec_channel tmp, dot;
+
+         /* tmp = dp3(src0, src0): */
+         FETCH( &r[0], 0, CHAN_X );
+         micro_mul( &tmp, &r[0], &r[0] );
+
+         FETCH( &r[1], 0, CHAN_Y );
+         micro_mul( &dot, &r[1], &r[1] );
+         micro_add( &tmp, &tmp, &dot );
+
+         FETCH( &r[2], 0, CHAN_Z );
+         micro_mul( &dot, &r[2], &r[2] );
+         micro_add( &tmp, &tmp, &dot );
+
+         /* tmp = 1 / tmp */
+         micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
+
+         /* note: w channel is undefined */
+         FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+            /* chan = chan * tmp */
+            micro_mul( &r[chan_index], &tmp, &r[chan_index] );
+            STORE( &r[chan_index], 0, chan_index );
+         }
+      }
+      break;
+
+   case TGSI_OPCODE_NRM4:
+      /* 4-component vector normalize */
+      {
+         union tgsi_exec_channel tmp, dot;
+
+         /* tmp = dp4(src0, src0): */
+         FETCH( &r[0], 0, CHAN_X );
+         micro_mul( &tmp, &r[0], &r[0] );
+
+         FETCH( &r[1], 0, CHAN_Y );
+         micro_mul( &dot, &r[1], &r[1] );
+         micro_add( &tmp, &tmp, &dot );
+
+         FETCH( &r[2], 0, CHAN_Z );
+         micro_mul( &dot, &r[2], &r[2] );
+         micro_add( &tmp, &tmp, &dot );
+
+         FETCH( &r[3], 0, CHAN_W );
+         micro_mul( &dot, &r[3], &r[3] );
+         micro_add( &tmp, &tmp, &dot );
+
+         /* tmp = 1 / tmp */
+         micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
+
+         FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+            /* chan = chan * tmp */
+            micro_mul( &r[chan_index], &tmp, &r[chan_index] );
+            STORE( &r[chan_index], 0, chan_index );
+         }
+      }
       break;
 
    case TGSI_OPCODE_DIV:
-- 
cgit v1.2.3


From cf9836cf09790de70732963ea571b83719c0c03c Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Fri, 7 Nov 2008 13:02:43 -0700
Subject: gallium: implement TGSI_OPCODE_DP2A, add sqrt to NRM3/NRM4

---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 8d135f87776..1da04ab7e0e 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -2034,7 +2034,21 @@ exec_instruction(
 
    case TGSI_OPCODE_DOT2ADD:
       /* TGSI_OPCODE_DP2A */
-      assert (0);
+      FETCH( &r[0], 0, CHAN_X );
+      FETCH( &r[1], 1, CHAN_X );
+      micro_mul( &r[0], &r[0], &r[1] );
+
+      FETCH( &r[1], 0, CHAN_Y );
+      FETCH( &r[2], 1, CHAN_Y );
+      micro_mul( &r[1], &r[1], &r[2] );
+      micro_add( &r[0], &r[0], &r[1] );
+
+      FETCH( &r[2], 2, CHAN_X );
+      micro_add( &r[0], &r[0], &r[2] );
+
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( &r[0], 0, chan_index );
+      }
       break;
 
    case TGSI_OPCODE_INDEX:
@@ -2479,7 +2493,8 @@ exec_instruction(
          micro_mul( &dot, &r[2], &r[2] );
          micro_add( &tmp, &tmp, &dot );
 
-         /* tmp = 1 / tmp */
+         /* tmp = 1 / sqrt(tmp) */
+         micro_sqrt( &tmp, &tmp );
          micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
 
          /* note: w channel is undefined */
@@ -2512,7 +2527,8 @@ exec_instruction(
          micro_mul( &dot, &r[3], &r[3] );
          micro_add( &tmp, &tmp, &dot );
 
-         /* tmp = 1 / tmp */
+         /* tmp = 1 / sqrt(tmp) */
+         micro_sqrt( &tmp, &tmp );
          micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
 
          FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-- 
cgit v1.2.3


From a52a6d7bcdaa47604151b9af07ebcd394316e784 Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Fri, 7 Nov 2008 13:03:07 -0700
Subject: gallium: added SSE for DP2, DP2A

---
 src/gallium/auxiliary/tgsi/tgsi_sse2.c | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index 3df0c5db3fa..b8e4ab6d833 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -1742,7 +1742,18 @@ emit_instruction(
 
    case TGSI_OPCODE_DOT2ADD:
    /* TGSI_OPCODE_DP2A */
-      return 0;
+      FETCH( func, *inst, 0, 0, CHAN_X );  /* xmm0 = src[0].x */
+      FETCH( func, *inst, 1, 1, CHAN_X );  /* xmm1 = src[1].x */
+      emit_mul( func, 0, 1 );              /* xmm0 = xmm0 * xmm1 */
+      FETCH( func, *inst, 1, 0, CHAN_Y );  /* xmm1 = src[0].y */
+      FETCH( func, *inst, 2, 1, CHAN_Y );  /* xmm2 = src[1].y */
+      emit_mul( func, 1, 2 );              /* xmm1 = xmm1 * xmm2 */
+      emit_add( func, 0, 1 );              /* xmm0 = xmm0 + xmm1 */
+      FETCH( func, *inst, 1, 2, CHAN_X );  /* xmm1 = src[2].x */
+      emit_add( func, 0, 1 );              /* xmm0 = xmm0 + xmm1 */
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( func, *inst, 0, 0, chan_index );  /* dest[ch] = xmm0 */
+      }
       break;
 
    case TGSI_OPCODE_INDEX:
@@ -2084,7 +2095,16 @@ emit_instruction(
       break;
 
    case TGSI_OPCODE_DP2:
-      return 0;
+      FETCH( func, *inst, 0, 0, CHAN_X );  /* xmm0 = src[0].x */
+      FETCH( func, *inst, 1, 1, CHAN_X );  /* xmm1 = src[1].x */
+      emit_mul( func, 0, 1 );              /* xmm0 = xmm0 * xmm1 */
+      FETCH( func, *inst, 1, 0, CHAN_Y );  /* xmm1 = src[0].y */
+      FETCH( func, *inst, 2, 1, CHAN_Y );  /* xmm2 = src[1].y */
+      emit_mul( func, 1, 2 );              /* xmm1 = xmm1 * xmm2 */
+      emit_add( func, 0, 1 );              /* xmm0 = xmm0 + xmm1 */
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( func, *inst, 0, 0, chan_index );  /* dest[ch] = xmm0 */
+      }
       break;
 
    case TGSI_OPCODE_TXL:
-- 
cgit v1.2.3


From a58dbf34ca88656739a8f8e5f4259e760365c9d0 Mon Sep 17 00:00:00 2001
From: Brian <brian.paul@tungstengraphics.com>
Date: Sat, 8 Nov 2008 10:29:23 -0700
Subject: gallium: implement SSE codegen for TGSI_OPCODE_NRM/NRM4

---
 src/gallium/auxiliary/tgsi/tgsi_sse2.c | 34 +++++++++++++++++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index b8e4ab6d833..3ce2c1c27bd 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -2087,7 +2087,39 @@ emit_instruction(
       break;
 
    case TGSI_OPCODE_NRM:
-      return 0;
+      /* fall-through */
+   case TGSI_OPCODE_NRM4:
+      /* 3 or 4-component normalization */
+      {
+         uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
+         /* note: cannot use xmm regs 2/3 here (see emit_rsqrt() above) */
+         FETCH( func, *inst, 4, 0, CHAN_X );    /* xmm4 = src[0].x */
+         FETCH( func, *inst, 5, 0, CHAN_Y );    /* xmm5 = src[0].y */
+         FETCH( func, *inst, 6, 0, CHAN_Z );    /* xmm6 = src[0].z */
+         if (dims == 4) {
+            FETCH( func, *inst, 7, 0, CHAN_W ); /* xmm7 = src[0].w */
+         }
+         emit_MOV( func, 0, 4 );                /* xmm0 = xmm3 */
+         emit_mul( func, 0, 4 );                /* xmm0 *= xmm3 */
+         emit_MOV( func, 1, 5 );                /* xmm1 = xmm4 */
+         emit_mul( func, 1, 5 );                /* xmm1 *= xmm4 */
+         emit_add( func, 0, 1 );                /* xmm0 += xmm1 */
+         emit_MOV( func, 1, 6 );                /* xmm1 = xmm5 */
+         emit_mul( func, 1, 6 );                /* xmm1 *= xmm5 */
+         emit_add( func, 0, 1 );                /* xmm0 += xmm1 */
+         if (dims == 4) {
+            emit_MOV( func, 1, 7 );             /* xmm1 = xmm7 */
+            emit_mul( func, 1, 7 );             /* xmm1 *= xmm7 */
+            emit_add( func, 0, 0 );             /* xmm0 += xmm1 */
+         }
+         emit_rsqrt( func, 1, 0 );              /* xmm1 = 1/sqrt(xmm0) */
+         FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+            if (chan_index < dims) {
+               emit_mul( func, 4+chan_index, 1); /* xmm[4+ch] *= xmm1 */
+               STORE( func, *inst, 4+chan_index, 0, chan_index );
+            }
+         }
+      }
       break;
 
    case TGSI_OPCODE_DIV:
-- 
cgit v1.2.3


From 399da3a337932c6074a69ac73e711138271308eb Mon Sep 17 00:00:00 2001
From: Brian <brian.paul@tungstengraphics.com>
Date: Sun, 9 Nov 2008 09:36:22 -0700
Subject: gallium: use PIPE_ARCH_SSE to protect use of SSE instrinsics only

This allows us to use SSE codegen with debug builds again.
When PIPE_ARCH_SSE is set (w/ gcc -msse -msse2) we will also use the
gcc SSE intrinsic functions.
---
 src/gallium/auxiliary/draw/draw_vs_sse.c |  2 +-
 src/gallium/auxiliary/tgsi/tgsi_sse2.c   | 42 +++++++++++++++++++++++++-------
 src/gallium/drivers/softpipe/sp_fs_sse.c |  2 +-
 3 files changed, 35 insertions(+), 11 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c
index 0e2036f12a3..77ba5152f9f 100644
--- a/src/gallium/auxiliary/draw/draw_vs_sse.c
+++ b/src/gallium/auxiliary/draw/draw_vs_sse.c
@@ -37,7 +37,7 @@
 
 #include "draw_vs.h"
 
-#if defined(PIPE_ARCH_X86) && defined(PIPE_ARCH_SSE)
+#if defined(PIPE_ARCH_X86)
 
 #include "pipe/p_shader_tokens.h"
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index 3ce2c1c27bd..f93db18114c 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -27,12 +27,14 @@
 
 #include "pipe/p_config.h"
 
-#if defined(PIPE_ARCH_X86) && defined(PIPE_ARCH_SSE)
+#if defined(PIPE_ARCH_X86)
 
 #include "pipe/p_debug.h"
 #include "pipe/p_shader_tokens.h"
 #include "util/u_math.h"
+#if defined(PIPE_ARCH_SSE)
 #include "util/u_sse.h"
+#endif
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_util.h"
 #include "tgsi_exec.h"
@@ -627,6 +629,9 @@ emit_func_call_dst_src(
       code );
 }
 
+
+#if defined(PIPE_ARCH_SSE)
+
 /*
  * Fast SSE2 implementation of special math functions.
  */
@@ -678,6 +683,7 @@ exp2f4(__m128 x)
    return _mm_mul_ps(expipart, expfpart);
 }
 
+
 /**
  * See http://www.devmaster.net/forums/showthread.php?p=43580
  */
@@ -720,12 +726,16 @@ log2f4(__m128 x)
    return _mm_add_ps(logmant, exp);
 }
 
+
 static INLINE __m128
 powf4(__m128 x, __m128 y)
 {
    return exp2f4(_mm_mul_ps(log2f4(x), y));
 }
 
+#endif /* PIPE_ARCH_SSE */
+
+
 
 /**
  * Low-level instruction translators.
@@ -780,13 +790,20 @@ emit_cos(
 }
 
 static void PIPE_CDECL
-#if defined(PIPE_CC_GCC)
+#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_SSE)
 __attribute__((force_align_arg_pointer))
 #endif
 ex24f(
    float *store )
 {
+#if defined(PIPE_ARCH_SSE)
    _mm_store_ps(&store[0], exp2f4( _mm_load_ps(&store[0]) ));
+#else
+   store[0] = util_fast_exp2( store[0] );
+   store[1] = util_fast_exp2( store[1] );
+   store[2] = util_fast_exp2( store[2] );
+   store[3] = util_fast_exp2( store[3] );
+#endif
 }
 
 static void
@@ -871,13 +888,20 @@ emit_frc(
 }
 
 static void PIPE_CDECL
-#if defined(PIPE_CC_GCC)
+#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_SSE)
 __attribute__((force_align_arg_pointer))
 #endif
 lg24f(
    float *store )
 {
+#if defined(PIPE_ARCH_SSE)
    _mm_store_ps(&store[0], log2f4( _mm_load_ps(&store[0]) ));
+#else
+   store[0] = util_fast_log2( store[0] );
+   store[1] = util_fast_log2( store[1] );
+   store[2] = util_fast_log2( store[2] );
+   store[3] = util_fast_log2( store[3] );
+#endif
 }
 
 static void
@@ -930,19 +954,19 @@ emit_neg(
 }
 
 static void PIPE_CDECL
-#if defined(PIPE_CC_GCC)
+#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_SSE)
 __attribute__((force_align_arg_pointer))
 #endif
 pow4f(
    float *store )
 {
-#if 1
+#if defined(PIPE_ARCH_SSE)
    _mm_store_ps(&store[0], powf4( _mm_load_ps(&store[0]), _mm_load_ps(&store[4]) ));
 #else
-   store[0] = powf( store[0], store[4] );
-   store[1] = powf( store[1], store[5] );
-   store[2] = powf( store[2], store[6] );
-   store[3] = powf( store[3], store[7] );
+   store[0] = util_fast_pow( store[0], store[4] );
+   store[1] = util_fast_pow( store[1], store[5] );
+   store[2] = util_fast_pow( store[2], store[6] );
+   store[3] = util_fast_pow( store[3], store[7] );
 #endif
 }
 
diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c
index 8aa597f633a..31908a517b7 100644
--- a/src/gallium/drivers/softpipe/sp_fs_sse.c
+++ b/src/gallium/drivers/softpipe/sp_fs_sse.c
@@ -40,7 +40,7 @@
 #include "tgsi/tgsi_sse2.h"
 
 
-#if defined(PIPE_ARCH_X86) && defined(PIPE_ARCH_SSE)
+#if defined(PIPE_ARCH_X86)
 
 #include "rtasm/rtasm_x86sse.h"
 
-- 
cgit v1.2.3


From 1bfe7c36bac4b8e5ddfcce537603aa8a5f35529d Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Wed, 12 Nov 2008 18:19:20 +0100
Subject: tgsi: Fix a bug with saving/restoring xmm registers upon func call.

---
 src/gallium/auxiliary/tgsi/tgsi_sse2.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index f93db18114c..8dfd2ced089 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -527,7 +527,7 @@ emit_func_call_dst(
    void (PIPE_CDECL *code)() )
 {
    struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX );
-   unsigned i, n, xmm;
+   unsigned i, n;
    unsigned xmm_mask;
    
    /* Bitmask of the xmm registers to save */
@@ -563,7 +563,7 @@ emit_func_call_dst(
          sse_movups(
             func,
             x86_make_disp( x86_make_reg( file_REG32, reg_SP ), n*16 ),
-            make_xmm( xmm ) );
+            make_xmm( i ) );
          ++n;
       }
    
@@ -581,7 +581,7 @@ emit_func_call_dst(
       if(xmm_mask & (1 << i)) {
          sse_movups(
             func,
-            make_xmm( xmm ),
+            make_xmm( i ),
             x86_make_disp( x86_make_reg( file_REG32, reg_SP ), n*16 ) );
          ++n;
       }
-- 
cgit v1.2.3


From 26c8593093bd9e42d06a54ed8cfdedce2fb44332 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Wed, 12 Nov 2008 23:23:49 +0100
Subject: tgsi: More comments on source register indirect and 2D indexing.

---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 65 ++++++++++++++++++++++++++++------
 src/gallium/auxiliary/tgsi/tgsi_exec.h | 10 ++++++
 2 files changed, 65 insertions(+), 10 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 1da04ab7e0e..4a217454dda 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -1037,11 +1037,28 @@ fetch_source(
    union tgsi_exec_channel index;
    uint swizzle;
 
+   /* We start with a direct index into a register file.
+    *
+    *    file[1],
+    *    where:
+    *       file = SrcRegister.File
+    *       [1] = SrcRegister.Index
+    */
    index.i[0] =
    index.i[1] =
    index.i[2] =
    index.i[3] = reg->SrcRegister.Index;
 
+   /* There is an extra source register that indirectly subscripts
+    * a register file. The direct index now becomes an offset
+    * that is being added to the indirect register.
+    *
+    *    file[ind[2].x+1],
+    *    where:
+    *       ind = SrcRegisterInd.File
+    *       [2] = SrcRegisterInd.Index
+    *       .x = SrcRegisterInd.SwizzleX
+    */
    if (reg->SrcRegister.Indirect) {
       union tgsi_exec_channel index2;
       union tgsi_exec_channel indir_index;
@@ -1078,19 +1095,31 @@ fetch_source(
       }
    }
 
-   if( reg->SrcRegister.Dimension ) {
-      switch( reg->SrcRegister.File ) {
+   /* There is an extra source register that is a second
+    * subscript to a register file. Effectively it means that
+    * the register file is actually a 2D array of registers.
+    *
+    *    file[1][3] == file[1*sizeof(file[1])+3],
+    *    where:
+    *       [3] = SrcRegisterDim.Index
+    */
+   if (reg->SrcRegister.Dimension) {
+      /* The size of the first-order array depends on the register file type.
+       * We need to multiply the index to the first array to get an effective,
+       * "flat" index that points to the beginning of the second-order array.
+       */
+      switch (reg->SrcRegister.File) {
       case TGSI_FILE_INPUT:
-         index.i[0] *= 17;
-         index.i[1] *= 17;
-         index.i[2] *= 17;
-         index.i[3] *= 17;
+         index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
+         index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
+         index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
+         index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
          break;
       case TGSI_FILE_CONSTANT:
-         index.i[0] *= 4096;
-         index.i[1] *= 4096;
-         index.i[2] *= 4096;
-         index.i[3] *= 4096;
+         index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER;
+         index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER;
+         index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER;
+         index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER;
          break;
       default:
          assert( 0 );
@@ -1101,6 +1130,17 @@ fetch_source(
       index.i[2] += reg->SrcRegisterDim.Index;
       index.i[3] += reg->SrcRegisterDim.Index;
 
+      /* Again, the second subscript index can be addressed indirectly
+       * identically to the first one.
+       * Nothing stops us from indirectly addressing the indirect register,
+       * but there is no need for that, so we won't exercise it.
+       *
+       *    file[1][ind[4].y+3],
+       *    where:
+       *       ind = SrcRegisterDimInd.File
+       *       [4] = SrcRegisterDimInd.Index
+       *       .y = SrcRegisterDimInd.SwizzleX
+       */
       if (reg->SrcRegisterDim.Indirect) {
          union tgsi_exec_channel index2;
          union tgsi_exec_channel indir_index;
@@ -1133,6 +1173,11 @@ fetch_source(
                index.i[i] = 0;
          }
       }
+
+      /* If by any chance there was a need for a 3D array of register
+       * files, we would have to check whether SrcRegisterDim is followed
+       * by a dimension register and continue the saga.
+       */
    }
 
    swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index fc40a25e09f..ac4b239910f 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -178,6 +178,16 @@ struct tgsi_exec_labels
 #define TGSI_EXEC_MAX_LOOP_NESTING  20
 #define TGSI_EXEC_MAX_CALL_NESTING  20
 
+/* The maximum number of input attributes per vertex. For 2D
+ * input register files, this is the stride between two 1D
+ * arrays.
+ */
+#define TGSI_EXEC_MAX_INPUT_ATTRIBS 17
+
+/* The maximum number of constant vectors per constant buffer.
+ */
+#define TGSI_EXEC_MAX_CONST_BUFFER  4096
+
 /**
  * Run-time virtual machine state for executing TGSI shader.
  */
-- 
cgit v1.2.3


From 957f7d7d94e8d092ba98433e61b21ac704453519 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Fri, 14 Nov 2008 13:26:01 +0100
Subject: tgsi: Keep address register as a floating point.

---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 29 +++++++++--------------------
 1 file changed, 9 insertions(+), 20 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 4a217454dda..177cf206b33 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -466,17 +466,6 @@ micro_exp2(
 #endif
 }
 
-static void
-micro_f2it(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src )
-{
-   dst->i[0] = (int) src->f[0];
-   dst->i[1] = (int) src->f[1];
-   dst->i[2] = (int) src->f[2];
-   dst->i[3] = (int) src->f[3];
-}
-
 static void
 micro_f2ut(
    union tgsi_exec_channel *dst,
@@ -1081,10 +1070,10 @@ fetch_source(
          &indir_index );
 
       /* add value of address register to the offset */
-      index.i[0] += indir_index.i[0];
-      index.i[1] += indir_index.i[1];
-      index.i[2] += indir_index.i[2];
-      index.i[3] += indir_index.i[3];
+      index.i[0] += (int) indir_index.f[0];
+      index.i[1] += (int) indir_index.f[1];
+      index.i[2] += (int) indir_index.f[2];
+      index.i[3] += (int) indir_index.f[3];
 
       /* for disabled execution channels, zero-out the index to
        * avoid using a potential garbage value.
@@ -1160,10 +1149,10 @@ fetch_source(
             &index2,
             &indir_index );
 
-         index.i[0] += indir_index.i[0];
-         index.i[1] += indir_index.i[1];
-         index.i[2] += indir_index.i[2];
-         index.i[3] += indir_index.i[3];
+         index.i[0] += (int) indir_index.f[0];
+         index.i[1] += (int) indir_index.f[1];
+         index.i[2] += (int) indir_index.f[2];
+         index.i[3] += (int) indir_index.f[3];
 
          /* for disabled execution channels, zero-out the index to
           * avoid using a potential garbage value.
@@ -1789,7 +1778,7 @@ exec_instruction(
    case TGSI_OPCODE_ARL:
       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( &r[0], 0, chan_index );
-         micro_f2it( &r[0], &r[0] );
+         micro_trunc( &r[0], &r[0] );
          STORE( &r[0], 0, chan_index );
       }
       break;
-- 
cgit v1.2.3


From d86ffcffb365d1f9fc383e450c8e08bf86169726 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Fri, 14 Nov 2008 13:31:06 +0100
Subject: tgsi: Return 0.0 for negative constant register indices.

---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 177cf206b33..96567772b75 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -947,14 +947,22 @@ fetch_src_file_channel(
       switch( file ) {
       case TGSI_FILE_CONSTANT:
          assert(mach->Consts);
-         assert(index->i[0] >= 0);
-         assert(index->i[1] >= 0);
-         assert(index->i[2] >= 0);
-         assert(index->i[3] >= 0);
-         chan->f[0] = mach->Consts[index->i[0]][swizzle];
-         chan->f[1] = mach->Consts[index->i[1]][swizzle];
-         chan->f[2] = mach->Consts[index->i[2]][swizzle];
-         chan->f[3] = mach->Consts[index->i[3]][swizzle];
+         if (index->i[0] < 0)
+            chan->f[0] = 0.0f;
+         else
+            chan->f[0] = mach->Consts[index->i[0]][swizzle];
+         if (index->i[1] < 0)
+            chan->f[1] = 0.0f;
+         else
+            chan->f[1] = mach->Consts[index->i[1]][swizzle];
+         if (index->i[2] < 0)
+            chan->f[2] = 0.0f;
+         else
+            chan->f[2] = mach->Consts[index->i[2]][swizzle];
+         if (index->i[3] < 0)
+            chan->f[3] = 0.0f;
+         else
+            chan->f[3] = mach->Consts[index->i[3]][swizzle];
          break;
 
       case TGSI_FILE_INPUT:
-- 
cgit v1.2.3


From 0b9e96fae9493d5d58f046e01c983a3c4267090e Mon Sep 17 00:00:00 2001
From: Brian <brian.paul@tungstengraphics.com>
Date: Sun, 23 Nov 2008 19:15:15 -0700
Subject: softpipe: remove old/unneeded dependencies between TGSI exec and
 softpipe

Use tgsi_sampler struct as a base class.  Softpipe subclasses it and adds
the fields it needs.
---
 src/gallium/auxiliary/tgsi/tgsi_exec.c       |   8 +-
 src/gallium/auxiliary/tgsi/tgsi_exec.h       |  11 +-
 src/gallium/drivers/softpipe/sp_fs_exec.c    |   2 +-
 src/gallium/drivers/softpipe/sp_fs_sse.c     |   2 +-
 src/gallium/drivers/softpipe/sp_quad_fs.c    |  23 +--
 src/gallium/drivers/softpipe/sp_tex_sample.c | 262 +++++++++++++++------------
 src/gallium/drivers/softpipe/sp_tex_sample.h |  50 ++++-
 src/gallium/drivers/softpipe/sp_tile_cache.c |   4 +-
 src/gallium/drivers/softpipe/sp_tile_cache.h |   2 +-
 9 files changed, 216 insertions(+), 148 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 96567772b75..0fdfb91d393 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -133,7 +133,7 @@ tgsi_exec_machine_bind_shader(
    struct tgsi_exec_machine *mach,
    const struct tgsi_token *tokens,
    uint numSamplers,
-   struct tgsi_sampler *samplers)
+   struct tgsi_sampler **samplers)
 {
    uint k;
    struct tgsi_parse_context parse;
@@ -1581,7 +1581,7 @@ exec_tex(struct tgsi_exec_machine *mach,
       else
          lodBias = 0.0;
 
-      fetch_texel(&mach->Samplers[unit],
+      fetch_texel(mach->Samplers[unit],
                   &r[0], NULL, NULL, lodBias,  /* S, T, P, BIAS */
                   &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
       break;
@@ -1607,7 +1607,7 @@ exec_tex(struct tgsi_exec_machine *mach,
       else
          lodBias = 0.0;
 
-      fetch_texel(&mach->Samplers[unit],
+      fetch_texel(mach->Samplers[unit],
                   &r[0], &r[1], &r[2], lodBias,  /* inputs */
                   &r[0], &r[1], &r[2], &r[3]);  /* outputs */
       break;
@@ -1633,7 +1633,7 @@ exec_tex(struct tgsi_exec_machine *mach,
       else
          lodBias = 0.0;
 
-      fetch_texel(&mach->Samplers[unit],
+      fetch_texel(mach->Samplers[unit],
                   &r[0], &r[1], &r[2], lodBias,
                   &r[0], &r[1], &r[2], &r[3]);
       break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index ac4b239910f..4ffd4efbffa 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -68,17 +68,12 @@ struct tgsi_interp_coef
    float dady[NUM_CHANNELS];
 };
 
-
-struct softpipe_tile_cache;  /**< Opaque to TGSI */
-
 /**
  * Information for sampling textures, which must be implemented
  * by code outside the TGSI executor.
  */
 struct tgsi_sampler
 {
-   const struct pipe_sampler_state *state;
-   struct pipe_texture *texture;
    /** Get samples for four fragments in a quad */
    void (*get_samples)(struct tgsi_sampler *sampler,
                        const float s[QUAD_SIZE],
@@ -86,8 +81,6 @@ struct tgsi_sampler
                        const float p[QUAD_SIZE],
                        float lodbias,
                        float rgba[NUM_CHANNELS][QUAD_SIZE]);
-   void *pipe; /*XXX temporary*/
-   struct softpipe_tile_cache *cache;
 };
 
 /**
@@ -205,7 +198,7 @@ struct tgsi_exec_machine
    struct tgsi_exec_vector       *Temps;
    struct tgsi_exec_vector       *Addrs;
 
-   struct tgsi_sampler           *Samplers;
+   struct tgsi_sampler           **Samplers;
 
    float                         Imms[TGSI_EXEC_NUM_IMMEDIATES][4];
    unsigned                      ImmLimit;
@@ -268,7 +261,7 @@ tgsi_exec_machine_bind_shader(
    struct tgsi_exec_machine *mach,
    const struct tgsi_token *tokens,
    uint numSamplers,
-   struct tgsi_sampler *samplers);
+   struct tgsi_sampler **samplers);
 
 uint
 tgsi_exec_machine_run(
diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c
index f472dd0ed2a..453b0373f0f 100644
--- a/src/gallium/drivers/softpipe/sp_fs_exec.c
+++ b/src/gallium/drivers/softpipe/sp_fs_exec.c
@@ -92,7 +92,7 @@ sp_setup_pos_vector(const struct tgsi_interp_coef *coef,
 static void
 exec_prepare( const struct sp_fragment_shader *base,
 	      struct tgsi_exec_machine *machine,
-	      struct tgsi_sampler *samplers )
+	      struct tgsi_sampler **samplers )
 {
    /*
     * Bind tokens/shader to the interpreter's machine state.
diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c
index 31908a517b7..9a273c87643 100644
--- a/src/gallium/drivers/softpipe/sp_fs_sse.c
+++ b/src/gallium/drivers/softpipe/sp_fs_sse.c
@@ -69,7 +69,7 @@ struct sp_sse_fragment_shader {
 static void
 fs_sse_prepare( const struct sp_fragment_shader *base,
 		struct tgsi_exec_machine *machine,
-		struct tgsi_sampler *samplers )
+		struct tgsi_sampler **samplers )
 {
 }
 
diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c
index 1f0cb3e0355..730fa0cf49f 100644
--- a/src/gallium/drivers/softpipe/sp_quad_fs.c
+++ b/src/gallium/drivers/softpipe/sp_quad_fs.c
@@ -50,8 +50,9 @@
 
 struct quad_shade_stage
 {
-   struct quad_stage stage;
-   struct tgsi_sampler samplers[PIPE_MAX_SAMPLERS];
+   struct quad_stage stage;  /**< base class */
+   struct sp_shader_sampler samplers[PIPE_MAX_SAMPLERS];
+   struct sp_shader_sampler *samplers_list[PIPE_MAX_SAMPLERS];
    struct tgsi_exec_machine machine;
    struct tgsi_exec_vector *inputs, *outputs;
 };
@@ -147,18 +148,10 @@ static void shade_begin(struct quad_stage *qs)
 {
    struct quad_shade_stage *qss = quad_shade_stage(qs);
    struct softpipe_context *softpipe = qs->softpipe;
-   unsigned i;
-   unsigned num = MAX2(softpipe->num_textures, softpipe->num_samplers);
-
-   /* set TGSI sampler state that varies */
-   for (i = 0; i < num; i++) {
-      qss->samplers[i].state = softpipe->sampler[i];
-      qss->samplers[i].texture = softpipe->texture[i];
-   }
 
    softpipe->fs->prepare( softpipe->fs, 
 			  &qss->machine,
-			  qss->samplers );
+			  qss->samplers_list );
 
    qs->next->begin(qs->next);
 }
@@ -191,12 +184,14 @@ struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe )
    qss->stage.run = shade_quad;
    qss->stage.destroy = shade_destroy;
 
-   /* set TGSI sampler state that's constant */
+   /* setup TGSI sampler state */
    for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
       assert(softpipe->tex_cache[i]);
-      qss->samplers[i].get_samples = sp_get_samples;
-      qss->samplers[i].pipe = &softpipe->pipe;
+      qss->samplers[i].base.get_samples = sp_get_samples;
+      qss->samplers[i].unit = i;
+      qss->samplers[i].sp = softpipe;
       qss->samplers[i].cache = softpipe->tex_cache[i];
+      qss->samplers_list[i] = &qss->samplers[i];
    }
 
    tgsi_exec_machine_init( &qss->machine );
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 49250ec084c..b66caf95078 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -35,6 +35,7 @@
 #include "sp_context.h"
 #include "sp_headers.h"
 #include "sp_surface.h"
+#include "sp_texture.h"
 #include "sp_tex_sample.h"
 #include "sp_tile_cache.h"
 #include "pipe/p_context.h"
@@ -463,7 +464,8 @@ choose_cube_face(float rx, float ry, float rz, float *newS, float *newT)
  * This is only done for fragment shaders, not vertex shaders.
  */
 static float
-compute_lambda(struct tgsi_sampler *sampler,
+compute_lambda(const struct pipe_texture *tex,
+               const struct pipe_sampler_state *sampler,
                const float s[QUAD_SIZE],
                const float t[QUAD_SIZE],
                const float p[QUAD_SIZE],
@@ -471,7 +473,7 @@ compute_lambda(struct tgsi_sampler *sampler,
 {
    float rho, lambda;
 
-   assert(sampler->state->normalized_coords);
+   assert(sampler->normalized_coords);
 
    assert(s);
    {
@@ -479,7 +481,7 @@ compute_lambda(struct tgsi_sampler *sampler,
       float dsdy = s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT];
       dsdx = fabsf(dsdx);
       dsdy = fabsf(dsdy);
-      rho = MAX2(dsdx, dsdy) * sampler->texture->width[0];
+      rho = MAX2(dsdx, dsdy) * tex->width[0];
    }
    if (t) {
       float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT];
@@ -487,7 +489,7 @@ compute_lambda(struct tgsi_sampler *sampler,
       float max;
       dtdx = fabsf(dtdx);
       dtdy = fabsf(dtdy);
-      max = MAX2(dtdx, dtdy) * sampler->texture->height[0];
+      max = MAX2(dtdx, dtdy) * tex->height[0];
       rho = MAX2(rho, max);
    }
    if (p) {
@@ -496,13 +498,13 @@ compute_lambda(struct tgsi_sampler *sampler,
       float max;
       dpdx = fabsf(dpdx);
       dpdy = fabsf(dpdy);
-      max = MAX2(dpdx, dpdy) * sampler->texture->depth[0];
+      max = MAX2(dpdx, dpdy) * tex->depth[0];
       rho = MAX2(rho, max);
    }
 
    lambda = util_fast_log2(rho);
-   lambda += lodbias + sampler->state->lod_bias;
-   lambda = CLAMP(lambda, sampler->state->min_lod, sampler->state->max_lod);
+   lambda += lodbias + sampler->lod_bias;
+   lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod);
 
    return lambda;
 }
@@ -516,7 +518,8 @@ compute_lambda(struct tgsi_sampler *sampler,
  * 4. Return image filter to use within mipmap images
  */
 static void
-choose_mipmap_levels(struct tgsi_sampler *sampler,
+choose_mipmap_levels(const struct pipe_texture *texture,
+                     const struct pipe_sampler_state *sampler,
                      const float s[QUAD_SIZE],
                      const float t[QUAD_SIZE],
                      const float p[QUAD_SIZE],
@@ -524,25 +527,26 @@ choose_mipmap_levels(struct tgsi_sampler *sampler,
                      unsigned *level0, unsigned *level1, float *levelBlend,
                      unsigned *imgFilter)
 {
-   if (sampler->state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) {
+
+   if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) {
       /* no mipmap selection needed */
-      *level0 = *level1 = CLAMP((int) sampler->state->min_lod,
-                                0, (int) sampler->texture->last_level);
+      *level0 = *level1 = CLAMP((int) sampler->min_lod,
+                                0, (int) texture->last_level);
 
-      if (sampler->state->min_img_filter != sampler->state->mag_img_filter) {
+      if (sampler->min_img_filter != sampler->mag_img_filter) {
          /* non-mipmapped texture, but still need to determine if doing
           * minification or magnification.
           */
-         float lambda = compute_lambda(sampler, s, t, p, lodbias);
+         float lambda = compute_lambda(texture, sampler, s, t, p, lodbias);
          if (lambda <= 0.0) {
-            *imgFilter = sampler->state->mag_img_filter;
+            *imgFilter = sampler->mag_img_filter;
          }
          else {
-            *imgFilter = sampler->state->min_img_filter;
+            *imgFilter = sampler->min_img_filter;
          }
       }
       else {
-         *imgFilter = sampler->state->mag_img_filter;
+         *imgFilter = sampler->mag_img_filter;
       }
    }
    else {
@@ -550,32 +554,32 @@ choose_mipmap_levels(struct tgsi_sampler *sampler,
 
       if (1)
          /* fragment shader */
-         lambda = compute_lambda(sampler, s, t, p, lodbias);
+         lambda = compute_lambda(texture, sampler, s, t, p, lodbias);
       else
          /* vertex shader */
          lambda = lodbias; /* not really a bias, but absolute LOD */
 
       if (lambda <= 0.0) { /* XXX threshold depends on the filter */
          /* magnifying */
-         *imgFilter = sampler->state->mag_img_filter;
+         *imgFilter = sampler->mag_img_filter;
          *level0 = *level1 = 0;
       }
       else {
          /* minifying */
-         *imgFilter = sampler->state->min_img_filter;
+         *imgFilter = sampler->min_img_filter;
 
          /* choose mipmap level(s) and compute the blend factor between them */
-         if (sampler->state->min_mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
+         if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
             /* Nearest mipmap level */
             const int lvl = (int) (lambda + 0.5);
             *level0 =
-            *level1 = CLAMP(lvl, 0, (int) sampler->texture->last_level);
+            *level1 = CLAMP(lvl, 0, (int) texture->last_level);
          }
          else {
             /* Linear interpolation between mipmap levels */
             const int lvl = (int) lambda;
-            *level0 = CLAMP(lvl,     0, (int) sampler->texture->last_level);
-            *level1 = CLAMP(lvl + 1, 0, (int) sampler->texture->last_level);
+            *level0 = CLAMP(lvl,     0, (int) texture->last_level);
+            *level1 = CLAMP(lvl + 1, 0, (int) texture->last_level);
             *levelBlend = FRAC(lambda);  /* blending weight between levels */
          }
       }
@@ -585,6 +589,7 @@ choose_mipmap_levels(struct tgsi_sampler *sampler,
 
 /**
  * Get a texel from a texture, using the texture tile cache.
+ * Called by the TGSI interpreter.
  *
  * \param face  the cube face in 0..5
  * \param level  the mipmap level
@@ -598,23 +603,29 @@ choose_mipmap_levels(struct tgsi_sampler *sampler,
  * sp_get_cached_tile_tex() function.  Also, get 4 texels instead of 1...
  */
 static void
-get_texel(struct tgsi_sampler *sampler,
+get_texel(struct tgsi_sampler *tgsi_sampler,
           unsigned face, unsigned level, int x, int y, int z,
           float rgba[NUM_CHANNELS][QUAD_SIZE], unsigned j)
 {
-   if (x < 0 || x >= (int) sampler->texture->width[level] ||
-       y < 0 || y >= (int) sampler->texture->height[level] ||
-       z < 0 || z >= (int) sampler->texture->depth[level]) {
-      rgba[0][j] = sampler->state->border_color[0];
-      rgba[1][j] = sampler->state->border_color[1];
-      rgba[2][j] = sampler->state->border_color[2];
-      rgba[3][j] = sampler->state->border_color[3];
+   const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
+   const struct softpipe_context *sp = samp->sp;
+   const uint unit = samp->unit;
+   const struct pipe_texture *texture = sp->texture[unit];
+   const struct pipe_sampler_state *sampler = sp->sampler[unit];
+
+   if (x < 0 || x >= (int) texture->width[level] ||
+       y < 0 || y >= (int) texture->height[level] ||
+       z < 0 || z >= (int) texture->depth[level]) {
+      rgba[0][j] = sampler->border_color[0];
+      rgba[1][j] = sampler->border_color[1];
+      rgba[2][j] = sampler->border_color[2];
+      rgba[3][j] = sampler->border_color[3];
    }
    else {
       const int tx = x % TILE_SIZE;
       const int ty = y % TILE_SIZE;
       const struct softpipe_cached_tile *tile
-         = sp_get_cached_tile_tex(sampler->pipe, sampler->cache,
+         = sp_get_cached_tile_tex(samp->sp, samp->cache,
                                   x, y, z, face, level);
       rgba[0][j] = tile->data.color[ty][tx][0];
       rgba[1][j] = tile->data.color[ty][tx][1];
@@ -624,7 +635,7 @@ get_texel(struct tgsi_sampler *sampler,
       {
          debug_printf("Get texel %f %f %f %f from %s\n",
                       rgba[0][j], rgba[1][j], rgba[2][j], rgba[3][j],
-                      pf_name(sampler->texture->format));
+                      pf_name(texture->format));
       }
    }
 }
@@ -682,7 +693,7 @@ shadow_compare(uint compare_func,
  * Could probably extend for 3D...
  */
 static void
-sp_get_samples_2d_common(struct tgsi_sampler *sampler,
+sp_get_samples_2d_common(struct tgsi_sampler *tgsi_sampler,
                          const float s[QUAD_SIZE],
                          const float t[QUAD_SIZE],
                          const float p[QUAD_SIZE],
@@ -690,28 +701,33 @@ sp_get_samples_2d_common(struct tgsi_sampler *sampler,
                          float rgba[NUM_CHANNELS][QUAD_SIZE],
                          const unsigned faces[4])
 {
-   const uint compare_func = sampler->state->compare_func;
+   const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
+   const struct softpipe_context *sp = samp->sp;
+   const uint unit = samp->unit;
+   const struct pipe_texture *texture = sp->texture[unit];
+   const struct pipe_sampler_state *sampler = sp->sampler[unit];
+   const uint compare_func = sampler->compare_func;
    unsigned level0, level1, j, imgFilter;
    int width, height;
    float levelBlend;
 
-   choose_mipmap_levels(sampler, s, t, p, lodbias,
+   choose_mipmap_levels(texture, sampler, s, t, p, lodbias,
                         &level0, &level1, &levelBlend, &imgFilter);
 
-   assert(sampler->state->normalized_coords);
+   assert(sampler->normalized_coords);
 
-   width = sampler->texture->width[level0];
-   height = sampler->texture->height[level0];
+   width = texture->width[level0];
+   height = texture->height[level0];
 
    assert(width > 0);
 
    switch (imgFilter) {
    case PIPE_TEX_FILTER_NEAREST:
       for (j = 0; j < QUAD_SIZE; j++) {
-         int x = nearest_texcoord(sampler->state->wrap_s, s[j], width);
-         int y = nearest_texcoord(sampler->state->wrap_t, t[j], height);
-         get_texel(sampler, faces[j], level0, x, y, 0, rgba, j);
-         if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+         int x = nearest_texcoord(sampler->wrap_s, s[j], width);
+         int y = nearest_texcoord(sampler->wrap_t, t[j], height);
+         get_texel(tgsi_sampler, faces[j], level0, x, y, 0, rgba, j);
+         if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
             shadow_compare(compare_func, rgba, p, j);
          }
 
@@ -721,8 +737,8 @@ sp_get_samples_2d_common(struct tgsi_sampler *sampler,
             unsigned c;
             x = x / 2;
             y = y / 2;
-            get_texel(sampler, faces[j], level1, x, y, 0, rgba2, j);
-            if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){
+            get_texel(tgsi_sampler, faces[j], level1, x, y, 0, rgba2, j);
+            if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){
                shadow_compare(compare_func, rgba2, p, j);
             }
 
@@ -737,13 +753,13 @@ sp_get_samples_2d_common(struct tgsi_sampler *sampler,
       for (j = 0; j < QUAD_SIZE; j++) {
          float tx[4][4], a, b;
          int x0, y0, x1, y1, c;
-         linear_texcoord(sampler->state->wrap_s, s[j], width,  &x0, &x1, &a);
-         linear_texcoord(sampler->state->wrap_t, t[j], height, &y0, &y1, &b);
-         get_texel(sampler, faces[j], level0, x0, y0, 0, tx, 0);
-         get_texel(sampler, faces[j], level0, x1, y0, 0, tx, 1);
-         get_texel(sampler, faces[j], level0, x0, y1, 0, tx, 2);
-         get_texel(sampler, faces[j], level0, x1, y1, 0, tx, 3);
-         if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+         linear_texcoord(sampler->wrap_s, s[j], width,  &x0, &x1, &a);
+         linear_texcoord(sampler->wrap_t, t[j], height, &y0, &y1, &b);
+         get_texel(tgsi_sampler, faces[j], level0, x0, y0, 0, tx, 0);
+         get_texel(tgsi_sampler, faces[j], level0, x1, y0, 0, tx, 1);
+         get_texel(tgsi_sampler, faces[j], level0, x0, y1, 0, tx, 2);
+         get_texel(tgsi_sampler, faces[j], level0, x1, y1, 0, tx, 3);
+         if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
             shadow_compare(compare_func, tx, p, 0);
             shadow_compare(compare_func, tx, p, 1);
             shadow_compare(compare_func, tx, p, 2);
@@ -761,11 +777,11 @@ sp_get_samples_2d_common(struct tgsi_sampler *sampler,
             y0 = y0 / 2;
             x1 = x1 / 2;
             y1 = y1 / 2;
-            get_texel(sampler, faces[j], level1, x0, y0, 0, tx, 0);
-            get_texel(sampler, faces[j], level1, x1, y0, 0, tx, 1);
-            get_texel(sampler, faces[j], level1, x0, y1, 0, tx, 2);
-            get_texel(sampler, faces[j], level1, x1, y1, 0, tx, 3);
-            if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){
+            get_texel(tgsi_sampler, faces[j], level1, x0, y0, 0, tx, 0);
+            get_texel(tgsi_sampler, faces[j], level1, x1, y0, 0, tx, 1);
+            get_texel(tgsi_sampler, faces[j], level1, x0, y1, 0, tx, 2);
+            get_texel(tgsi_sampler, faces[j], level1, x1, y1, 0, tx, 3);
+            if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){
                shadow_compare(compare_func, tx, p, 0);
                shadow_compare(compare_func, tx, p, 1);
                shadow_compare(compare_func, tx, p, 2);
@@ -817,27 +833,32 @@ sp_get_samples_2d(struct tgsi_sampler *sampler,
 
 
 static void
-sp_get_samples_3d(struct tgsi_sampler *sampler,
+sp_get_samples_3d(struct tgsi_sampler *tgsi_sampler,
                   const float s[QUAD_SIZE],
                   const float t[QUAD_SIZE],
                   const float p[QUAD_SIZE],
                   float lodbias,
                   float rgba[NUM_CHANNELS][QUAD_SIZE])
 {
+   const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
+   const struct softpipe_context *sp = samp->sp;
+   const uint unit = samp->unit;
+   const struct pipe_texture *texture = sp->texture[unit];
+   const struct pipe_sampler_state *sampler = sp->sampler[unit];
    /* get/map pipe_surfaces corresponding to 3D tex slices */
    unsigned level0, level1, j, imgFilter;
    int width, height, depth;
    float levelBlend;
    const uint face = 0;
 
-   choose_mipmap_levels(sampler, s, t, p, lodbias,
+   choose_mipmap_levels(texture, sampler, s, t, p, lodbias,
                         &level0, &level1, &levelBlend, &imgFilter);
 
-   assert(sampler->state->normalized_coords);
+   assert(sampler->normalized_coords);
 
-   width = sampler->texture->width[level0];
-   height = sampler->texture->height[level0];
-   depth = sampler->texture->depth[level0];
+   width = texture->width[level0];
+   height = texture->height[level0];
+   depth = texture->depth[level0];
 
    assert(width > 0);
    assert(height > 0);
@@ -846,10 +867,10 @@ sp_get_samples_3d(struct tgsi_sampler *sampler,
    switch (imgFilter) {
    case PIPE_TEX_FILTER_NEAREST:
       for (j = 0; j < QUAD_SIZE; j++) {
-         int x = nearest_texcoord(sampler->state->wrap_s, s[j], width);
-         int y = nearest_texcoord(sampler->state->wrap_t, t[j], height);
-         int z = nearest_texcoord(sampler->state->wrap_r, p[j], depth);
-         get_texel(sampler, face, level0, x, y, z, rgba, j);
+         int x = nearest_texcoord(sampler->wrap_s, s[j], width);
+         int y = nearest_texcoord(sampler->wrap_t, t[j], height);
+         int z = nearest_texcoord(sampler->wrap_r, p[j], depth);
+         get_texel(tgsi_sampler, face, level0, x, y, z, rgba, j);
 
          if (level0 != level1) {
             /* get texels from second mipmap level and blend */
@@ -858,7 +879,7 @@ sp_get_samples_3d(struct tgsi_sampler *sampler,
             x /= 2;
             y /= 2;
             z /= 2;
-            get_texel(sampler, face, level1, x, y, z, rgba2, j);
+            get_texel(tgsi_sampler, face, level1, x, y, z, rgba2, j);
             for (c = 0; c < NUM_CHANNELS; c++) {
                rgba[c][j] = LERP(levelBlend, rgba2[c][j], rgba[c][j]);
             }
@@ -871,17 +892,17 @@ sp_get_samples_3d(struct tgsi_sampler *sampler,
          float texel0[4][4], texel1[4][4];
          float xw, yw, zw; /* interpolation weights */
          int x0, x1, y0, y1, z0, z1, c;
-         linear_texcoord(sampler->state->wrap_s, s[j], width,  &x0, &x1, &xw);
-         linear_texcoord(sampler->state->wrap_t, t[j], height, &y0, &y1, &yw);
-         linear_texcoord(sampler->state->wrap_r, p[j], depth,  &z0, &z1, &zw);
-         get_texel(sampler, face, level0, x0, y0, z0, texel0, 0);
-         get_texel(sampler, face, level0, x1, y0, z0, texel0, 1);
-         get_texel(sampler, face, level0, x0, y1, z0, texel0, 2);
-         get_texel(sampler, face, level0, x1, y1, z0, texel0, 3);
-         get_texel(sampler, face, level0, x0, y0, z1, texel1, 0);
-         get_texel(sampler, face, level0, x1, y0, z1, texel1, 1);
-         get_texel(sampler, face, level0, x0, y1, z1, texel1, 2);
-         get_texel(sampler, face, level0, x1, y1, z1, texel1, 3);
+         linear_texcoord(sampler->wrap_s, s[j], width,  &x0, &x1, &xw);
+         linear_texcoord(sampler->wrap_t, t[j], height, &y0, &y1, &yw);
+         linear_texcoord(sampler->wrap_r, p[j], depth,  &z0, &z1, &zw);
+         get_texel(tgsi_sampler, face, level0, x0, y0, z0, texel0, 0);
+         get_texel(tgsi_sampler, face, level0, x1, y0, z0, texel0, 1);
+         get_texel(tgsi_sampler, face, level0, x0, y1, z0, texel0, 2);
+         get_texel(tgsi_sampler, face, level0, x1, y1, z0, texel0, 3);
+         get_texel(tgsi_sampler, face, level0, x0, y0, z1, texel1, 0);
+         get_texel(tgsi_sampler, face, level0, x1, y0, z1, texel1, 1);
+         get_texel(tgsi_sampler, face, level0, x0, y1, z1, texel1, 2);
+         get_texel(tgsi_sampler, face, level0, x1, y1, z1, texel1, 3);
 
          /* 3D lerp */
          for (c = 0; c < 4; c++) {
@@ -904,14 +925,14 @@ sp_get_samples_3d(struct tgsi_sampler *sampler,
             x1 /= 2;
             y1 /= 2;
             z1 /= 2;
-            get_texel(sampler, face, level1, x0, y0, z0, texel0, 0);
-            get_texel(sampler, face, level1, x1, y0, z0, texel0, 1);
-            get_texel(sampler, face, level1, x0, y1, z0, texel0, 2);
-            get_texel(sampler, face, level1, x1, y1, z0, texel0, 3);
-            get_texel(sampler, face, level1, x0, y0, z1, texel1, 0);
-            get_texel(sampler, face, level1, x1, y0, z1, texel1, 1);
-            get_texel(sampler, face, level1, x0, y1, z1, texel1, 2);
-            get_texel(sampler, face, level1, x1, y1, z1, texel1, 3);
+            get_texel(tgsi_sampler, face, level1, x0, y0, z0, texel0, 0);
+            get_texel(tgsi_sampler, face, level1, x1, y0, z0, texel0, 1);
+            get_texel(tgsi_sampler, face, level1, x0, y1, z0, texel0, 2);
+            get_texel(tgsi_sampler, face, level1, x1, y1, z0, texel0, 3);
+            get_texel(tgsi_sampler, face, level1, x0, y0, z1, texel1, 0);
+            get_texel(tgsi_sampler, face, level1, x1, y0, z1, texel1, 1);
+            get_texel(tgsi_sampler, face, level1, x0, y1, z1, texel1, 2);
+            get_texel(tgsi_sampler, face, level1, x1, y1, z1, texel1, 3);
 
             /* 3D lerp */
             for (c = 0; c < 4; c++) {
@@ -956,38 +977,43 @@ sp_get_samples_cube(struct tgsi_sampler *sampler,
 
 
 static void
-sp_get_samples_rect(struct tgsi_sampler *sampler,
+sp_get_samples_rect(struct tgsi_sampler *tgsi_sampler,
                     const float s[QUAD_SIZE],
                     const float t[QUAD_SIZE],
                     const float p[QUAD_SIZE],
                     float lodbias,
                     float rgba[NUM_CHANNELS][QUAD_SIZE])
 {
+   const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
+   const struct softpipe_context *sp = samp->sp;
+   const uint unit = samp->unit;
+   const struct pipe_texture *texture = sp->texture[unit];
+   const struct pipe_sampler_state *sampler = sp->sampler[unit];
    //sp_get_samples_2d_common(sampler, s, t, p, lodbias, rgba, faces);
    static const uint face = 0;
-   const uint compare_func = sampler->state->compare_func;
+   const uint compare_func = sampler->compare_func;
    unsigned level0, level1, j, imgFilter;
    int width, height;
    float levelBlend;
 
-   choose_mipmap_levels(sampler, s, t, p, lodbias,
+   choose_mipmap_levels(texture, sampler, s, t, p, lodbias,
                         &level0, &level1, &levelBlend, &imgFilter);
 
    /* texture RECTS cannot be mipmapped */
    assert(level0 == level1);
 
-   width = sampler->texture->width[level0];
-   height = sampler->texture->height[level0];
+   width = texture->width[level0];
+   height = texture->height[level0];
 
    assert(width > 0);
 
    switch (imgFilter) {
    case PIPE_TEX_FILTER_NEAREST:
       for (j = 0; j < QUAD_SIZE; j++) {
-         int x = nearest_texcoord_unnorm(sampler->state->wrap_s, s[j], width);
-         int y = nearest_texcoord_unnorm(sampler->state->wrap_t, t[j], height);
-         get_texel(sampler, face, level0, x, y, 0, rgba, j);
-         if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+         int x = nearest_texcoord_unnorm(sampler->wrap_s, s[j], width);
+         int y = nearest_texcoord_unnorm(sampler->wrap_t, t[j], height);
+         get_texel(tgsi_sampler, face, level0, x, y, 0, rgba, j);
+         if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
             shadow_compare(compare_func, rgba, p, j);
          }
       }
@@ -997,13 +1023,13 @@ sp_get_samples_rect(struct tgsi_sampler *sampler,
       for (j = 0; j < QUAD_SIZE; j++) {
          float tx[4][4], a, b;
          int x0, y0, x1, y1, c;
-         linear_texcoord_unnorm(sampler->state->wrap_s, s[j], width,  &x0, &x1, &a);
-         linear_texcoord_unnorm(sampler->state->wrap_t, t[j], height, &y0, &y1, &b);
-         get_texel(sampler, face, level0, x0, y0, 0, tx, 0);
-         get_texel(sampler, face, level0, x1, y0, 0, tx, 1);
-         get_texel(sampler, face, level0, x0, y1, 0, tx, 2);
-         get_texel(sampler, face, level0, x1, y1, 0, tx, 3);
-         if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+         linear_texcoord_unnorm(sampler->wrap_s, s[j], width,  &x0, &x1, &a);
+         linear_texcoord_unnorm(sampler->wrap_t, t[j], height, &y0, &y1, &b);
+         get_texel(tgsi_sampler, face, level0, x0, y0, 0, tx, 0);
+         get_texel(tgsi_sampler, face, level0, x1, y0, 0, tx, 1);
+         get_texel(tgsi_sampler, face, level0, x0, y1, 0, tx, 2);
+         get_texel(tgsi_sampler, face, level0, x1, y1, 0, tx, 3);
+         if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
             shadow_compare(compare_func, tx, p, 0);
             shadow_compare(compare_func, tx, p, 1);
             shadow_compare(compare_func, tx, p, 2);
@@ -1036,34 +1062,40 @@ sp_get_samples_rect(struct tgsi_sampler *sampler,
  * a new tgsi_sampler object for each state combo it finds....
  */
 void
-sp_get_samples(struct tgsi_sampler *sampler,
+sp_get_samples(struct tgsi_sampler *tgsi_sampler,
                const float s[QUAD_SIZE],
                const float t[QUAD_SIZE],
                const float p[QUAD_SIZE],
                float lodbias,
                float rgba[NUM_CHANNELS][QUAD_SIZE])
 {
-   if (!sampler->texture)
+   const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
+   const struct softpipe_context *sp = samp->sp;
+   const uint unit = samp->unit;
+   const struct pipe_texture *texture = sp->texture[unit];
+   const struct pipe_sampler_state *sampler = sp->sampler[unit];
+
+   if (!texture)
       return;
 
-   switch (sampler->texture->target) {
+   switch (texture->target) {
    case PIPE_TEXTURE_1D:
-      assert(sampler->state->normalized_coords);
-      sp_get_samples_1d(sampler, s, t, p, lodbias, rgba);
+      assert(sampler->normalized_coords);
+      sp_get_samples_1d(tgsi_sampler, s, t, p, lodbias, rgba);
       break;
    case PIPE_TEXTURE_2D:
-      if (sampler->state->normalized_coords)
-         sp_get_samples_2d(sampler, s, t, p, lodbias, rgba);
+      if (sampler->normalized_coords)
+         sp_get_samples_2d(tgsi_sampler, s, t, p, lodbias, rgba);
       else
-         sp_get_samples_rect(sampler, s, t, p, lodbias, rgba);
+         sp_get_samples_rect(tgsi_sampler, s, t, p, lodbias, rgba);
       break;
    case PIPE_TEXTURE_3D:
-      assert(sampler->state->normalized_coords);
-      sp_get_samples_3d(sampler, s, t, p, lodbias, rgba);
+      assert(sampler->normalized_coords);
+      sp_get_samples_3d(tgsi_sampler, s, t, p, lodbias, rgba);
       break;
    case PIPE_TEXTURE_CUBE:
-      assert(sampler->state->normalized_coords);
-      sp_get_samples_cube(sampler, s, t, p, lodbias, rgba);
+      assert(sampler->normalized_coords);
+      sp_get_samples_cube(tgsi_sampler, s, t, p, lodbias, rgba);
       break;
    default:
       assert(0);
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h
index 404bfd0c365..783169c9397 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.h
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.h
@@ -1,8 +1,56 @@
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
 #ifndef SP_TEX_SAMPLE_H
 #define SP_TEX_SAMPLE_H
 
 
-struct tgsi_sampler;
+#include "tgsi/tgsi_exec.h"
+
+
+/**
+ * Subclass of tgsi_sampler
+ */
+struct sp_shader_sampler
+{
+   struct tgsi_sampler base;  /**< base class */
+
+   uint unit;
+   struct softpipe_context *sp;
+   struct softpipe_tile_cache *cache;
+};
+
+
+
+static INLINE struct sp_shader_sampler *
+sp_shader_sampler(struct tgsi_sampler *sampler)
+{
+   return (struct sp_shader_sampler *) sampler;
+}
 
 
 extern void
diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c
index b50c9845133..78b0efa46d2 100644
--- a/src/gallium/drivers/softpipe/sp_tile_cache.c
+++ b/src/gallium/drivers/softpipe/sp_tile_cache.c
@@ -494,11 +494,11 @@ tex_cache_pos(int x, int y, int z, int face, int level)
  * Tiles are read-only and indexed with more params.
  */
 const struct softpipe_cached_tile *
-sp_get_cached_tile_tex(struct pipe_context *pipe,
+sp_get_cached_tile_tex(struct softpipe_context *sp,
                        struct softpipe_tile_cache *tc, int x, int y, int z,
                        int face, int level)
 {
-   struct pipe_screen *screen = pipe->screen;
+   struct pipe_screen *screen = sp->pipe.screen;
    /* tile pos in framebuffer: */
    const int tile_x = x & ~(TILE_SIZE - 1);
    const int tile_y = y & ~(TILE_SIZE - 1);
diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.h b/src/gallium/drivers/softpipe/sp_tile_cache.h
index bc96c941f61..a66bb50bcc1 100644
--- a/src/gallium/drivers/softpipe/sp_tile_cache.h
+++ b/src/gallium/drivers/softpipe/sp_tile_cache.h
@@ -96,7 +96,7 @@ sp_get_cached_tile(struct softpipe_context *softpipe,
                    struct softpipe_tile_cache *tc, int x, int y);
 
 extern const struct softpipe_cached_tile *
-sp_get_cached_tile_tex(struct pipe_context *pipe,
+sp_get_cached_tile_tex(struct softpipe_context *softpipe,
                        struct softpipe_tile_cache *tc, int x, int y, int z,
                        int face, int level);
 
-- 
cgit v1.2.3


From 434e255eae90b0f3d836d452b7d3b0c5aadf78b8 Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Mon, 24 Nov 2008 10:02:44 -0700
Subject: tgsi: add tgsi_declaration fields for centroid sampling, invariant
 optimization

---
 src/gallium/auxiliary/tgsi/tgsi_build.c    | 2 ++
 src/gallium/auxiliary/tgsi/tgsi_dump.c     | 8 ++++++++
 src/gallium/include/pipe/p_shader_tokens.h | 4 +++-
 3 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c
index 38fcaf88293..eee2db7771a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.c
@@ -119,6 +119,8 @@ tgsi_default_declaration( void )
    declaration.UsageMask = TGSI_WRITEMASK_XYZW;
    declaration.Interpolate = TGSI_INTERPOLATE_CONSTANT;
    declaration.Semantic = 0;
+   declaration.Centroid = 0;
+   declaration.Invariant = 0;
    declaration.Padding = 0;
    declaration.Extended = 0;
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index 3177f549523..c2a0ac5aff8 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -246,6 +246,14 @@ iter_declaration(
    TXT( ", " );
    ENM( decl->Declaration.Interpolate, interpolate_names );
 
+   if (decl->Declaration.Centroid) {
+      TXT( ", CENTROID" );
+   }
+
+   if (decl->Declaration.Invariant) {
+      TXT( ", INVARIANT" );
+   }
+
    EOL();
 
    return TRUE;
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index 78c20de3e27..1292367c665 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -112,7 +112,9 @@ struct tgsi_declaration
    unsigned UsageMask   : 4;  /* bitmask of TGSI_WRITEMASK_x flags */
    unsigned Interpolate : 4;  /* TGSI_INTERPOLATE_ */
    unsigned Semantic    : 1;  /* BOOL, any semantic info? */
-   unsigned Padding     : 6;
+   unsigned Centroid    : 1;  /* centroid sampling */
+   unsigned Invariant   : 1;  /* invariant optimization */
+   unsigned Padding     : 4;
    unsigned Extended    : 1;  /* BOOL */
 };
 
-- 
cgit v1.2.3


From 4de360e67d83cd6503fb8ad053bb8afe507db5fa Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Tue, 25 Nov 2008 09:02:27 -0700
Subject: gallium: added centroid/invarient fields to declarations

---
 src/gallium/auxiliary/tgsi/tgsi_build.c | 6 ++++++
 src/gallium/auxiliary/tgsi/tgsi_build.h | 2 ++
 2 files changed, 8 insertions(+)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c
index eee2db7771a..fd02c2c87c8 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.c
@@ -133,6 +133,8 @@ tgsi_build_declaration(
    unsigned usage_mask,
    unsigned interpolate,
    unsigned semantic,
+   unsigned centroid,
+   unsigned invariant,
    struct tgsi_header *header )
 {
    struct tgsi_declaration declaration;
@@ -145,6 +147,8 @@ tgsi_build_declaration(
    declaration.UsageMask = usage_mask;
    declaration.Interpolate = interpolate;
    declaration.Semantic = semantic;
+   declaration.Centroid = centroid;
+   declaration.Invariant = invariant;
 
    header_bodysize_grow( header );
 
@@ -196,6 +200,8 @@ tgsi_build_full_declaration(
       full_decl->Declaration.UsageMask,
       full_decl->Declaration.Interpolate,
       full_decl->Declaration.Semantic,
+      full_decl->Declaration.Centroid,
+      full_decl->Declaration.Invariant,
       header );
 
    if (maxsize <= size)
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h b/src/gallium/auxiliary/tgsi/tgsi_build.h
index 7d6234746a2..0fd6fabd83d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.h
@@ -71,6 +71,8 @@ tgsi_build_declaration(
    unsigned usage_mask,
    unsigned interpolate,
    unsigned semantic,
+   unsigned centroid,
+   unsigned invariant,
    struct tgsi_header *header );
 
 struct tgsi_full_declaration
-- 
cgit v1.2.3


From 18a1389077c72717dfbe6ae10793f3329d13b848 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Wed, 26 Nov 2008 12:56:23 +0100
Subject: tgsi: Implement OPCODE_ROUND for SSE2 backend.

---
 src/gallium/auxiliary/tgsi/tgsi_sse2.c | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index 8dfd2ced089..8574d79da1b 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -1001,6 +1001,29 @@ emit_rcp (
       make_xmm( xmm_src ) );
 }
 
+static void PIPE_CDECL
+rnd4f(
+   float *store )
+{
+   store[0] = floorf( store[0] + 0.5f );
+   store[1] = floorf( store[1] + 0.5f );
+   store[2] = floorf( store[2] + 0.5f );
+   store[3] = floorf( store[3] + 0.5f );
+}
+
+static void
+emit_rnd(
+   struct x86_function *func,
+   unsigned xmm_save, 
+   unsigned xmm_dst )
+{
+   emit_func_call_dst(
+      func,
+      xmm_save,
+      xmm_dst,
+      rnd4f );
+}
+
 static void
 emit_rsqrt(
    struct x86_function *func,
@@ -1811,7 +1834,11 @@ emit_instruction(
       break;
 
    case TGSI_OPCODE_ROUND:
-      return 0;
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         emit_rnd( func, 0, 0 );
+         STORE( func, *inst, 0, 0, chan_index );
+      }
       break;
 
    case TGSI_OPCODE_EXPBASE2:
-- 
cgit v1.2.3


From adf14090fb6e43a25eabb13796d5a9385d8511ea Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Wed, 26 Nov 2008 13:17:25 +0100
Subject: tgsi: Implement OPCODE_ARR.

---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 5 +----
 src/gallium/auxiliary/tgsi/tgsi_sse2.c | 7 ++++++-
 2 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 0fdfb91d393..1981d8b68f9 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -2124,6 +2124,7 @@ exec_instruction(
       break;
 
    case TGSI_OPCODE_ROUND:
+   case TGSI_OPCODE_ARR:
       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( &r[0], 0, chan_index );
          micro_rnd( &r[0], &r[0] );
@@ -2424,10 +2425,6 @@ exec_instruction(
       assert (0);
       break;
 
-   case TGSI_OPCODE_ARR:
-      assert (0);
-      break;
-
    case TGSI_OPCODE_BRA:
       assert (0);
       break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index 8574d79da1b..d2d431980b1 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -2078,7 +2078,12 @@ emit_instruction(
       break;
 
    case TGSI_OPCODE_ARR:
-      return 0;
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         emit_rnd( func, 0, 0 );
+         emit_f2it( func, 0 );
+         STORE( func, *inst, 0, 0, chan_index );
+      }
       break;
 
    case TGSI_OPCODE_BRA:
-- 
cgit v1.2.3


From 685fd2c035e284db2447ede0f6da278adaa70a0d Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Wed, 26 Nov 2008 12:56:23 +0100
Subject: tgsi: Implement OPCODE_ROUND for SSE2 backend.

---
 src/gallium/auxiliary/tgsi/tgsi_sse2.c | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index 4d59106dbfa..151d2b1c379 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -842,6 +842,29 @@ emit_rcp (
       make_xmm( xmm_src ) );
 }
 
+static void PIPE_CDECL
+rnd4f(
+   float *store )
+{
+   store[0] = floorf( store[0] + 0.5f );
+   store[1] = floorf( store[1] + 0.5f );
+   store[2] = floorf( store[2] + 0.5f );
+   store[3] = floorf( store[3] + 0.5f );
+}
+
+static void
+emit_rnd(
+   struct x86_function *func,
+   unsigned xmm_save, 
+   unsigned xmm_dst )
+{
+   emit_func_call_dst(
+      func,
+      xmm_save,
+      xmm_dst,
+      rnd4f );
+}
+
 static void
 emit_rsqrt(
    struct x86_function *func,
@@ -1639,7 +1662,11 @@ emit_instruction(
       break;
 
    case TGSI_OPCODE_ROUND:
-      return 0;
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         emit_rnd( func, 0, 0 );
+         STORE( func, *inst, 0, 0, chan_index );
+      }
       break;
 
    case TGSI_OPCODE_EXPBASE2:
-- 
cgit v1.2.3


From eee3d216049f21507a3ff6908f1d506c683efad0 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Wed, 26 Nov 2008 13:17:25 +0100
Subject: tgsi: Implement OPCODE_ARR.

---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 5 +----
 src/gallium/auxiliary/tgsi/tgsi_sse2.c | 7 ++++++-
 2 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 41dffc3dbaf..7114119fe29 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -2067,6 +2067,7 @@ exec_instruction(
       break;
 
    case TGSI_OPCODE_ROUND:
+   case TGSI_OPCODE_ARR:
       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( &r[0], 0, chan_index );
          micro_rnd( &r[0], &r[0] );
@@ -2367,10 +2368,6 @@ exec_instruction(
       assert (0);
       break;
 
-   case TGSI_OPCODE_ARR:
-      assert (0);
-      break;
-
    case TGSI_OPCODE_BRA:
       assert (0);
       break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index 151d2b1c379..752c7c3c315 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -1906,7 +1906,12 @@ emit_instruction(
       break;
 
    case TGSI_OPCODE_ARR:
-      return 0;
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         emit_rnd( func, 0, 0 );
+         emit_f2it( func, 0 );
+         STORE( func, *inst, 0, 0, chan_index );
+      }
       break;
 
    case TGSI_OPCODE_BRA:
-- 
cgit v1.2.3


From 823aac36d5580ea46f76ccec3fd31c91f168274e Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Wed, 26 Nov 2008 13:54:28 +0100
Subject: tgsi: Implement OPCODE_SSG/SGN.

---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 18 +++++++++++++++++-
 src/gallium/auxiliary/tgsi/tgsi_sse2.c | 30 +++++++++++++++++++++++++++++-
 2 files changed, 46 insertions(+), 2 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 1981d8b68f9..9fd0497043f 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -828,6 +828,17 @@ micro_rnd(
    dst->f[3] = floorf( src->f[3] + 0.5f );
 }
 
+static void
+micro_sgn(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src )
+{
+   dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
+   dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
+   dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
+   dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
+}
+
 static void
 micro_shl(
    union tgsi_exec_channel *dst,
@@ -2480,7 +2491,12 @@ exec_instruction(
       break;
 
    case TGSI_OPCODE_SSG:
-      assert (0);
+   /* TGSI_OPCODE_SGN */
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         micro_sgn( &r[0], &r[0] );
+         STORE( &r[0], 0, chan_index );
+      }
       break;
 
    case TGSI_OPCODE_CMP:
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index d2d431980b1..cac44af7f41 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -1083,6 +1083,29 @@ emit_setsign(
          TGSI_EXEC_TEMP_80000000_C ) );
 }
 
+static void PIPE_CDECL
+sgn4f(
+   float *store )
+{
+   store[0] = store[0] < 0.0f ? -1.0f : store[0] > 0.0f ? 1.0f : 0.0f;
+   store[1] = store[1] < 0.0f ? -1.0f : store[1] > 0.0f ? 1.0f : 0.0f;
+   store[2] = store[2] < 0.0f ? -1.0f : store[2] > 0.0f ? 1.0f : 0.0f;
+   store[3] = store[3] < 0.0f ? -1.0f : store[3] > 0.0f ? 1.0f : 0.0f;
+}
+
+static void
+emit_sgn(
+   struct x86_function *func,
+   unsigned xmm_save, 
+   unsigned xmm_dst )
+{
+   emit_func_call_dst(
+      func,
+      xmm_save,
+      xmm_dst,
+      sgn4f );
+}
+
 static void PIPE_CDECL
 sin4f(
    float *store )
@@ -2102,7 +2125,12 @@ emit_instruction(
       break;
 
    case TGSI_OPCODE_SSG:
-      return 0;
+   /* TGSI_OPCODE_SGN */
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         emit_sgn( func, 0, 0 );
+         STORE( func, *inst, 0, 0, chan_index );
+      }
       break;
 
    case TGSI_OPCODE_CMP:
-- 
cgit v1.2.3


From 6e96bd70e56f6ba4ff444c584376475a136bca26 Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Wed, 26 Nov 2008 07:38:31 -0700
Subject: Revert "tgsi: Implement OPCODE_ROUND for SSE2 backend."

This reverts commit 685fd2c035e284db2447ede0f6da278adaa70a0d.

Does not compile since emit_rnd() is trying to pass 4 params to
emit_func_call_dst() which takes 3 params.
---
 src/gallium/auxiliary/tgsi/tgsi_sse2.c | 29 +----------------------------
 1 file changed, 1 insertion(+), 28 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index 752c7c3c315..4bda756dbc1 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -842,29 +842,6 @@ emit_rcp (
       make_xmm( xmm_src ) );
 }
 
-static void PIPE_CDECL
-rnd4f(
-   float *store )
-{
-   store[0] = floorf( store[0] + 0.5f );
-   store[1] = floorf( store[1] + 0.5f );
-   store[2] = floorf( store[2] + 0.5f );
-   store[3] = floorf( store[3] + 0.5f );
-}
-
-static void
-emit_rnd(
-   struct x86_function *func,
-   unsigned xmm_save, 
-   unsigned xmm_dst )
-{
-   emit_func_call_dst(
-      func,
-      xmm_save,
-      xmm_dst,
-      rnd4f );
-}
-
 static void
 emit_rsqrt(
    struct x86_function *func,
@@ -1662,11 +1639,7 @@ emit_instruction(
       break;
 
    case TGSI_OPCODE_ROUND:
-      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( func, *inst, 0, 0, chan_index );
-         emit_rnd( func, 0, 0 );
-         STORE( func, *inst, 0, 0, chan_index );
-      }
+      return 0;
       break;
 
    case TGSI_OPCODE_EXPBASE2:
-- 
cgit v1.2.3


From 1250526e3012f6958679c5dcdcb990387b53479b Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Wed, 26 Nov 2008 07:41:19 -0700
Subject: gallium: disable TGSI_OPCODE_ARR case until emit_rnd() is redone.

---
 src/gallium/auxiliary/tgsi/tgsi_sse2.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index 4bda756dbc1..29442b4ec4f 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -1878,6 +1878,7 @@ emit_instruction(
       return 0;
       break;
 
+#if 0
    case TGSI_OPCODE_ARR:
       FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( func, *inst, 0, 0, chan_index );
@@ -1886,7 +1887,7 @@ emit_instruction(
          STORE( func, *inst, 0, 0, chan_index );
       }
       break;
-
+#endif
    case TGSI_OPCODE_BRA:
       return 0;
       break;
-- 
cgit v1.2.3


From 1347439a87a26f261ab07c914ea4e69965703ee2 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Wed, 26 Nov 2008 13:54:28 +0100
Subject: tgsi: Implement OPCODE_SSG/SGN.

---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 18 +++++++++++++++++-
 src/gallium/auxiliary/tgsi/tgsi_sse2.c | 30 +++++++++++++++++++++++++++++-
 2 files changed, 46 insertions(+), 2 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 7114119fe29..65a0f39fdb4 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -839,6 +839,17 @@ micro_rnd(
    dst->f[3] = floorf( src->f[3] + 0.5f );
 }
 
+static void
+micro_sgn(
+   union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src )
+{
+   dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
+   dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
+   dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
+   dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
+}
+
 static void
 micro_shl(
    union tgsi_exec_channel *dst,
@@ -2423,7 +2434,12 @@ exec_instruction(
       break;
 
    case TGSI_OPCODE_SSG:
-      assert (0);
+   /* TGSI_OPCODE_SGN */
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( &r[0], 0, chan_index );
+         micro_sgn( &r[0], &r[0] );
+         STORE( &r[0], 0, chan_index );
+      }
       break;
 
    case TGSI_OPCODE_CMP:
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index 29442b4ec4f..499d8655421 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -901,6 +901,29 @@ emit_setsign(
          TGSI_EXEC_TEMP_80000000_C ) );
 }
 
+static void PIPE_CDECL
+sgn4f(
+   float *store )
+{
+   store[0] = store[0] < 0.0f ? -1.0f : store[0] > 0.0f ? 1.0f : 0.0f;
+   store[1] = store[1] < 0.0f ? -1.0f : store[1] > 0.0f ? 1.0f : 0.0f;
+   store[2] = store[2] < 0.0f ? -1.0f : store[2] > 0.0f ? 1.0f : 0.0f;
+   store[3] = store[3] < 0.0f ? -1.0f : store[3] > 0.0f ? 1.0f : 0.0f;
+}
+
+static void
+emit_sgn(
+   struct x86_function *func,
+   unsigned xmm_save, 
+   unsigned xmm_dst )
+{
+   emit_func_call_dst(
+      func,
+      xmm_save,
+      xmm_dst,
+      sgn4f );
+}
+
 static void PIPE_CDECL
 sin4f(
    float *store )
@@ -1904,7 +1927,12 @@ emit_instruction(
       break;
 
    case TGSI_OPCODE_SSG:
-      return 0;
+   /* TGSI_OPCODE_SGN */
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         emit_sgn( func, 0, 0 );
+         STORE( func, *inst, 0, 0, chan_index );
+      }
       break;
 
    case TGSI_OPCODE_CMP:
-- 
cgit v1.2.3


From 972922b1bf28346568bedfadc2198ed93230f5d7 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Wed, 26 Nov 2008 12:56:23 +0100
Subject: tgsi: Implement OPCODE_ROUND for SSE2 backend.

---
 src/gallium/auxiliary/tgsi/tgsi_sse2.c | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index 499d8655421..31480c40da0 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -842,6 +842,29 @@ emit_rcp (
       make_xmm( xmm_src ) );
 }
 
+static void PIPE_CDECL
+rnd4f(
+   float *store )
+{
+   store[0] = floorf( store[0] + 0.5f );
+   store[1] = floorf( store[1] + 0.5f );
+   store[2] = floorf( store[2] + 0.5f );
+   store[3] = floorf( store[3] + 0.5f );
+}
+
+static void
+emit_rnd(
+   struct x86_function *func,
+   unsigned xmm_save, 
+   unsigned xmm_dst )
+{
+   emit_func_call_dst(
+      func,
+      xmm_save,
+      xmm_dst,
+      rnd4f );
+}
+
 static void
 emit_rsqrt(
    struct x86_function *func,
@@ -1662,7 +1685,11 @@ emit_instruction(
       break;
 
    case TGSI_OPCODE_ROUND:
-      return 0;
+      FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH( func, *inst, 0, 0, chan_index );
+         emit_rnd( func, 0, 0 );
+         STORE( func, *inst, 0, 0, chan_index );
+      }
       break;
 
    case TGSI_OPCODE_EXPBASE2:
-- 
cgit v1.2.3


From 527e76a7ec7f330bd321fe9632a0fadedbab1d41 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Wed, 26 Nov 2008 17:20:07 +0100
Subject: tgsi: Fix build.

---
 src/gallium/auxiliary/tgsi/tgsi_sse2.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index 31480c40da0..68f5510351f 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -855,12 +855,10 @@ rnd4f(
 static void
 emit_rnd(
    struct x86_function *func,
-   unsigned xmm_save, 
    unsigned xmm_dst )
 {
    emit_func_call_dst(
       func,
-      xmm_save,
       xmm_dst,
       rnd4f );
 }
@@ -937,12 +935,10 @@ sgn4f(
 static void
 emit_sgn(
    struct x86_function *func,
-   unsigned xmm_save, 
    unsigned xmm_dst )
 {
    emit_func_call_dst(
       func,
-      xmm_save,
       xmm_dst,
       sgn4f );
 }
@@ -1687,7 +1683,7 @@ emit_instruction(
    case TGSI_OPCODE_ROUND:
       FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( func, *inst, 0, 0, chan_index );
-         emit_rnd( func, 0, 0 );
+         emit_rnd( func, 0 );
          STORE( func, *inst, 0, 0, chan_index );
       }
       break;
@@ -1957,7 +1953,7 @@ emit_instruction(
    /* TGSI_OPCODE_SGN */
       FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( func, *inst, 0, 0, chan_index );
-         emit_sgn( func, 0, 0 );
+         emit_sgn( func, 0 );
          STORE( func, *inst, 0, 0, chan_index );
       }
       break;
-- 
cgit v1.2.3


From 158a5f75d8436facfe8163845a942979899213fe Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Wed, 26 Nov 2008 22:29:49 +0100
Subject: tgsi: Reenable OPCODE_ARR.

---
 src/gallium/auxiliary/tgsi/tgsi_sse2.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index 68f5510351f..ff869c8312f 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -1924,16 +1924,15 @@ emit_instruction(
       return 0;
       break;
 
-#if 0
    case TGSI_OPCODE_ARR:
       FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( func, *inst, 0, 0, chan_index );
-         emit_rnd( func, 0, 0 );
+         emit_rnd( func, 0 );
          emit_f2it( func, 0 );
          STORE( func, *inst, 0, 0, chan_index );
       }
       break;
-#endif
+
    case TGSI_OPCODE_BRA:
       return 0;
       break;
-- 
cgit v1.2.3


From dd55083ac1c13723dba6be71f161e2ca7cac7c66 Mon Sep 17 00:00:00 2001
From: Brian <brian.paul@tungstengraphics.com>
Date: Fri, 28 Nov 2008 10:28:44 -0700
Subject: gallium: minor texture-related clean-ups, comments, etc

---
 src/gallium/auxiliary/tgsi/tgsi_exec.c       |  4 ++--
 src/gallium/drivers/softpipe/sp_tex_sample.c | 28 ++++++++++++++--------------
 src/gallium/drivers/softpipe/sp_tex_sample.h |  6 +++---
 3 files changed, 19 insertions(+), 19 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 9fd0497043f..a2d2cfd1fcc 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -1535,7 +1535,7 @@ exec_kilp(struct tgsi_exec_machine *mach,
 
 
 /*
- * Fetch a texel using STR texture coordinates.
+ * Fetch a four texture samples using STR texture coordinates.
  */
 static void
 fetch_texel( struct tgsi_sampler *sampler,
@@ -1569,7 +1569,7 @@ exec_tex(struct tgsi_exec_machine *mach,
          boolean projected)
 {
    const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
-   union tgsi_exec_channel r[8];
+   union tgsi_exec_channel r[4];
    uint chan_index;
    float lodBias;
 
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index b66caf95078..181247739b5 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -516,6 +516,10 @@ compute_lambda(const struct pipe_texture *tex,
  * 2. Determine if we're minifying or magnifying
  * 3. If minifying, choose mipmap levels
  * 4. Return image filter to use within mipmap images
+ * \param level0  Returns first mipmap level to sample from
+ * \param level1  Returns second mipmap level to sample from
+ * \param levelBlend  Returns blend factor between levels, in [0,1]
+ * \param imgFilter  Returns either the min or mag filter, depending on lambda
  */
 static void
 choose_mipmap_levels(const struct pipe_texture *texture,
@@ -527,7 +531,6 @@ choose_mipmap_levels(const struct pipe_texture *texture,
                      unsigned *level0, unsigned *level1, float *levelBlend,
                      unsigned *imgFilter)
 {
-
    if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) {
       /* no mipmap selection needed */
       *level0 = *level1 = CLAMP((int) sampler->min_lod,
@@ -589,7 +592,6 @@ choose_mipmap_levels(const struct pipe_texture *texture,
 
 /**
  * Get a texel from a texture, using the texture tile cache.
- * Called by the TGSI interpreter.
  *
  * \param face  the cube face in 0..5
  * \param level  the mipmap level
@@ -603,7 +605,7 @@ choose_mipmap_levels(const struct pipe_texture *texture,
  * sp_get_cached_tile_tex() function.  Also, get 4 texels instead of 1...
  */
 static void
-get_texel(struct tgsi_sampler *tgsi_sampler,
+get_texel(const struct tgsi_sampler *tgsi_sampler,
           unsigned face, unsigned level, int x, int y, int z,
           float rgba[NUM_CHANNELS][QUAD_SIZE], unsigned j)
 {
@@ -693,7 +695,7 @@ shadow_compare(uint compare_func,
  * Could probably extend for 3D...
  */
 static void
-sp_get_samples_2d_common(struct tgsi_sampler *tgsi_sampler,
+sp_get_samples_2d_common(const struct tgsi_sampler *tgsi_sampler,
                          const float s[QUAD_SIZE],
                          const float t[QUAD_SIZE],
                          const float p[QUAD_SIZE],
@@ -805,8 +807,8 @@ sp_get_samples_2d_common(struct tgsi_sampler *tgsi_sampler,
 }
 
 
-static void
-sp_get_samples_1d(struct tgsi_sampler *sampler,
+static INLINE void
+sp_get_samples_1d(const struct tgsi_sampler *sampler,
                   const float s[QUAD_SIZE],
                   const float t[QUAD_SIZE],
                   const float p[QUAD_SIZE],
@@ -819,8 +821,8 @@ sp_get_samples_1d(struct tgsi_sampler *sampler,
 }
 
 
-static void
-sp_get_samples_2d(struct tgsi_sampler *sampler,
+static INLINE void
+sp_get_samples_2d(const struct tgsi_sampler *sampler,
                   const float s[QUAD_SIZE],
                   const float t[QUAD_SIZE],
                   const float p[QUAD_SIZE],
@@ -832,8 +834,8 @@ sp_get_samples_2d(struct tgsi_sampler *sampler,
 }
 
 
-static void
-sp_get_samples_3d(struct tgsi_sampler *tgsi_sampler,
+static INLINE void
+sp_get_samples_3d(const struct tgsi_sampler *tgsi_sampler,
                   const float s[QUAD_SIZE],
                   const float t[QUAD_SIZE],
                   const float p[QUAD_SIZE],
@@ -960,7 +962,7 @@ sp_get_samples_3d(struct tgsi_sampler *tgsi_sampler,
 
 
 static void
-sp_get_samples_cube(struct tgsi_sampler *sampler,
+sp_get_samples_cube(const struct tgsi_sampler *sampler,
                     const float s[QUAD_SIZE],
                     const float t[QUAD_SIZE],
                     const float p[QUAD_SIZE],
@@ -977,7 +979,7 @@ sp_get_samples_cube(struct tgsi_sampler *sampler,
 
 
 static void
-sp_get_samples_rect(struct tgsi_sampler *tgsi_sampler,
+sp_get_samples_rect(const struct tgsi_sampler *tgsi_sampler,
                     const float s[QUAD_SIZE],
                     const float t[QUAD_SIZE],
                     const float p[QUAD_SIZE],
@@ -1047,8 +1049,6 @@ sp_get_samples_rect(struct tgsi_sampler *tgsi_sampler,
 }
 
 
-
-
 /**
  * Called via tgsi_sampler::get_samples()
  * Use the sampler's state setting to get a filtered RGBA value
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h
index 783169c9397..f0a8a787782 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.h
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.h
@@ -46,10 +46,10 @@ struct sp_shader_sampler
 
 
 
-static INLINE struct sp_shader_sampler *
-sp_shader_sampler(struct tgsi_sampler *sampler)
+static INLINE const struct sp_shader_sampler *
+sp_shader_sampler(const struct tgsi_sampler *sampler)
 {
-   return (struct sp_shader_sampler *) sampler;
+   return (const struct sp_shader_sampler *) sampler;
 }
 
 
-- 
cgit v1.2.3


From 3616fb08da8ef392db1d5ccab55b8eb9f6a6f32b Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Tue, 16 Dec 2008 15:39:14 -0700
Subject: tgsi: use flr(), not trunc() for ARL

---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index a2d2cfd1fcc..989b6eec27b 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -1797,7 +1797,7 @@ exec_instruction(
    case TGSI_OPCODE_ARL:
       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( &r[0], 0, chan_index );
-         micro_trunc( &r[0], &r[0] );
+         micro_flr( &r[0], &r[0] );
          STORE( &r[0], 0, chan_index );
       }
       break;
-- 
cgit v1.2.3


From db99ca3bc999137e6d523aa24e13cc5cfbb2b52c Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 18 Dec 2008 18:06:38 -0700
Subject: tgsi: scan for additional info: uses_fogcoord, uses_frontfacing

---
 src/gallium/auxiliary/tgsi/tgsi_scan.c | 49 ++++++++++++++++++++++++----------
 src/gallium/auxiliary/tgsi/tgsi_scan.h |  3 ++-
 2 files changed, 37 insertions(+), 15 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index be4870a4983..cfc7ea8e898 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -2,6 +2,7 @@
  * 
  * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
  * All Rights Reserved.
+ * Copyright 2008 VMware, Inc.  All rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
@@ -79,38 +80,61 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
       switch( parse.FullToken.Token.Type ) {
       case TGSI_TOKEN_TYPE_INSTRUCTION:
          {
-            struct tgsi_full_instruction *fullinst
+            const struct tgsi_full_instruction *fullinst
                = &parse.FullToken.FullInstruction;
 
             assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST);
             info->opcode_count[fullinst->Instruction.Opcode]++;
+
+            /* special case: scan fragment shaders for use of the fog
+             * input/attribute.  The X component is fog, the Y component
+             * is the front/back-face flag.
+             */
+            if (procType == TGSI_PROCESSOR_FRAGMENT) {
+               uint i;
+               for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) {
+                  const struct tgsi_full_src_register *src =
+                     &fullinst->FullSrcRegisters[i];
+                  if (src->SrcRegister.File == TGSI_FILE_INPUT) {
+                     const int ind = src->SrcRegister.Index;
+                     if (info->input_semantic_name[ind] == TGSI_SEMANTIC_FOG) {
+                        if (src->SrcRegister.SwizzleX == TGSI_SWIZZLE_X) {
+                           info->uses_fogcoord = TRUE;
+                        }
+                        else if (src->SrcRegister.SwizzleX == TGSI_SWIZZLE_Y) {
+                           info->uses_frontfacing = TRUE;
+                        }
+                     }
+                  }
+               }
+            }
          }
          break;
 
       case TGSI_TOKEN_TYPE_DECLARATION:
          {
-            struct tgsi_full_declaration *fulldecl
+            const struct tgsi_full_declaration *fulldecl
                = &parse.FullToken.FullDeclaration;
             uint file = fulldecl->Declaration.File;
-            uint i;
-            for (i = fulldecl->DeclarationRange.First;
-                 i <= fulldecl->DeclarationRange.Last;
-                 i++) {
+            uint reg;
+            for (reg = fulldecl->DeclarationRange.First;
+                 reg <= fulldecl->DeclarationRange.Last;
+                 reg++) {
 
                /* only first 32 regs will appear in this bitfield */
-               info->file_mask[file] |= (1 << i);
+               info->file_mask[file] |= (1 << reg);
                info->file_count[file]++;
                info->file_max[file] = MAX2(info->file_max[file], (int)i);
 
                if (file == TGSI_FILE_INPUT) {
-                  info->input_semantic_name[i] = (ubyte)fulldecl->Semantic.SemanticName;
-                  info->input_semantic_index[i] = (ubyte)fulldecl->Semantic.SemanticIndex;
+                  info->input_semantic_name[reg] = (ubyte)fulldecl->Semantic.SemanticName;
+                  info->input_semantic_index[reg] = (ubyte)fulldecl->Semantic.SemanticIndex;
                   info->num_inputs++;
                }
 
                if (file == TGSI_FILE_OUTPUT) {
-                  info->output_semantic_name[i] = (ubyte)fulldecl->Semantic.SemanticName;
-                  info->output_semantic_index[i] = (ubyte)fulldecl->Semantic.SemanticIndex;
+                  info->output_semantic_name[reg] = (ubyte)fulldecl->Semantic.SemanticName;
+                  info->output_semantic_index[reg] = (ubyte)fulldecl->Semantic.SemanticIndex;
                   info->num_outputs++;
                }
 
@@ -133,9 +157,6 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
       }
    }
 
-   assert( info->file_max[TGSI_FILE_INPUT] + 1 == info->num_inputs );
-   assert( info->file_max[TGSI_FILE_OUTPUT] + 1 == info->num_outputs );
-
    info->uses_kill = (info->opcode_count[TGSI_OPCODE_KIL] ||
                       info->opcode_count[TGSI_OPCODE_KILP]);
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h
index 5cb6efb3439..2c1a75bc812 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h
@@ -41,7 +41,6 @@ struct tgsi_shader_info
 {
    uint num_tokens;
 
-   /* XXX eventually remove the corresponding fields from pipe_shader_state: */
    ubyte num_inputs;
    ubyte num_outputs;
    ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; /**< TGSI_SEMANTIC_x */
@@ -59,6 +58,8 @@ struct tgsi_shader_info
 
    boolean writes_z;  /**< does fragment shader write Z value? */
    boolean uses_kill;  /**< KIL or KILP instruction used? */
+   boolean uses_fogcoord; /**< fragment shader uses fog coord? */
+   boolean uses_frontfacing; /**< fragment shader uses front/back-face flag? */
 };
 
 
-- 
cgit v1.2.3


From 59a168d5c9b5f478e4e8bedcd8522e359e98987e Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 18 Dec 2008 18:06:38 -0700
Subject: tgsi: scan for additional info: uses_fogcoord, uses_frontfacing

---
 src/gallium/auxiliary/tgsi/tgsi_scan.c | 49 ++++++++++++++++++++++++----------
 src/gallium/auxiliary/tgsi/tgsi_scan.h |  3 ++-
 2 files changed, 37 insertions(+), 15 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index be4870a4983..cfc7ea8e898 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -2,6 +2,7 @@
  * 
  * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
  * All Rights Reserved.
+ * Copyright 2008 VMware, Inc.  All rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
@@ -79,38 +80,61 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
       switch( parse.FullToken.Token.Type ) {
       case TGSI_TOKEN_TYPE_INSTRUCTION:
          {
-            struct tgsi_full_instruction *fullinst
+            const struct tgsi_full_instruction *fullinst
                = &parse.FullToken.FullInstruction;
 
             assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST);
             info->opcode_count[fullinst->Instruction.Opcode]++;
+
+            /* special case: scan fragment shaders for use of the fog
+             * input/attribute.  The X component is fog, the Y component
+             * is the front/back-face flag.
+             */
+            if (procType == TGSI_PROCESSOR_FRAGMENT) {
+               uint i;
+               for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) {
+                  const struct tgsi_full_src_register *src =
+                     &fullinst->FullSrcRegisters[i];
+                  if (src->SrcRegister.File == TGSI_FILE_INPUT) {
+                     const int ind = src->SrcRegister.Index;
+                     if (info->input_semantic_name[ind] == TGSI_SEMANTIC_FOG) {
+                        if (src->SrcRegister.SwizzleX == TGSI_SWIZZLE_X) {
+                           info->uses_fogcoord = TRUE;
+                        }
+                        else if (src->SrcRegister.SwizzleX == TGSI_SWIZZLE_Y) {
+                           info->uses_frontfacing = TRUE;
+                        }
+                     }
+                  }
+               }
+            }
          }
          break;
 
       case TGSI_TOKEN_TYPE_DECLARATION:
          {
-            struct tgsi_full_declaration *fulldecl
+            const struct tgsi_full_declaration *fulldecl
                = &parse.FullToken.FullDeclaration;
             uint file = fulldecl->Declaration.File;
-            uint i;
-            for (i = fulldecl->DeclarationRange.First;
-                 i <= fulldecl->DeclarationRange.Last;
-                 i++) {
+            uint reg;
+            for (reg = fulldecl->DeclarationRange.First;
+                 reg <= fulldecl->DeclarationRange.Last;
+                 reg++) {
 
                /* only first 32 regs will appear in this bitfield */
-               info->file_mask[file] |= (1 << i);
+               info->file_mask[file] |= (1 << reg);
                info->file_count[file]++;
                info->file_max[file] = MAX2(info->file_max[file], (int)i);
 
                if (file == TGSI_FILE_INPUT) {
-                  info->input_semantic_name[i] = (ubyte)fulldecl->Semantic.SemanticName;
-                  info->input_semantic_index[i] = (ubyte)fulldecl->Semantic.SemanticIndex;
+                  info->input_semantic_name[reg] = (ubyte)fulldecl->Semantic.SemanticName;
+                  info->input_semantic_index[reg] = (ubyte)fulldecl->Semantic.SemanticIndex;
                   info->num_inputs++;
                }
 
                if (file == TGSI_FILE_OUTPUT) {
-                  info->output_semantic_name[i] = (ubyte)fulldecl->Semantic.SemanticName;
-                  info->output_semantic_index[i] = (ubyte)fulldecl->Semantic.SemanticIndex;
+                  info->output_semantic_name[reg] = (ubyte)fulldecl->Semantic.SemanticName;
+                  info->output_semantic_index[reg] = (ubyte)fulldecl->Semantic.SemanticIndex;
                   info->num_outputs++;
                }
 
@@ -133,9 +157,6 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
       }
    }
 
-   assert( info->file_max[TGSI_FILE_INPUT] + 1 == info->num_inputs );
-   assert( info->file_max[TGSI_FILE_OUTPUT] + 1 == info->num_outputs );
-
    info->uses_kill = (info->opcode_count[TGSI_OPCODE_KIL] ||
                       info->opcode_count[TGSI_OPCODE_KILP]);
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h
index 5cb6efb3439..2c1a75bc812 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h
@@ -41,7 +41,6 @@ struct tgsi_shader_info
 {
    uint num_tokens;
 
-   /* XXX eventually remove the corresponding fields from pipe_shader_state: */
    ubyte num_inputs;
    ubyte num_outputs;
    ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; /**< TGSI_SEMANTIC_x */
@@ -59,6 +58,8 @@ struct tgsi_shader_info
 
    boolean writes_z;  /**< does fragment shader write Z value? */
    boolean uses_kill;  /**< KIL or KILP instruction used? */
+   boolean uses_fogcoord; /**< fragment shader uses fog coord? */
+   boolean uses_frontfacing; /**< fragment shader uses front/back-face flag? */
 };
 
 
-- 
cgit v1.2.3


From b8e68f2e55ed22a97b7f976fe9556b2abcc49ea9 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Fri, 14 Nov 2008 13:26:01 +0100
Subject: tgsi: Keep address register as a floating point.

---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 29 +++++++++--------------------
 1 file changed, 9 insertions(+), 20 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 65a0f39fdb4..0756d7db176 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -466,17 +466,6 @@ micro_exp2(
 #endif
 }
 
-static void
-micro_f2it(
-   union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src )
-{
-   dst->i[0] = (int) src->f[0];
-   dst->i[1] = (int) src->f[1];
-   dst->i[2] = (int) src->f[2];
-   dst->i[3] = (int) src->f[3];
-}
-
 static void
 micro_f2ut(
    union tgsi_exec_channel *dst,
@@ -1075,10 +1064,10 @@ fetch_source(
          &indir_index );
 
       /* add value of address register to the offset */
-      index.i[0] += indir_index.i[0];
-      index.i[1] += indir_index.i[1];
-      index.i[2] += indir_index.i[2];
-      index.i[3] += indir_index.i[3];
+      index.i[0] += (int) indir_index.f[0];
+      index.i[1] += (int) indir_index.f[1];
+      index.i[2] += (int) indir_index.f[2];
+      index.i[3] += (int) indir_index.f[3];
 
       /* for disabled execution channels, zero-out the index to
        * avoid using a potential garbage value.
@@ -1131,10 +1120,10 @@ fetch_source(
             &index2,
             &indir_index );
 
-         index.i[0] += indir_index.i[0];
-         index.i[1] += indir_index.i[1];
-         index.i[2] += indir_index.i[2];
-         index.i[3] += indir_index.i[3];
+         index.i[0] += (int) indir_index.f[0];
+         index.i[1] += (int) indir_index.f[1];
+         index.i[2] += (int) indir_index.f[2];
+         index.i[3] += (int) indir_index.f[3];
 
          /* for disabled execution channels, zero-out the index to
           * avoid using a potential garbage value.
@@ -1754,7 +1743,7 @@ exec_instruction(
    case TGSI_OPCODE_ARL:
       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( &r[0], 0, chan_index );
-         micro_f2it( &r[0], &r[0] );
+         micro_trunc( &r[0], &r[0] );
          STORE( &r[0], 0, chan_index );
       }
       break;
-- 
cgit v1.2.3


From 4b3c74b4d6786475bc45f883612e76069e722cbd Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@tungstengraphics.com>
Date: Fri, 14 Nov 2008 13:31:06 +0100
Subject: tgsi: Return 0.0 for negative constant register indices.

---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 0756d7db176..d55c3372077 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -958,14 +958,22 @@ fetch_src_file_channel(
       switch( file ) {
       case TGSI_FILE_CONSTANT:
          assert(mach->Consts);
-         assert(index->i[0] >= 0);
-         assert(index->i[1] >= 0);
-         assert(index->i[2] >= 0);
-         assert(index->i[3] >= 0);
-         chan->f[0] = mach->Consts[index->i[0]][swizzle];
-         chan->f[1] = mach->Consts[index->i[1]][swizzle];
-         chan->f[2] = mach->Consts[index->i[2]][swizzle];
-         chan->f[3] = mach->Consts[index->i[3]][swizzle];
+         if (index->i[0] < 0)
+            chan->f[0] = 0.0f;
+         else
+            chan->f[0] = mach->Consts[index->i[0]][swizzle];
+         if (index->i[1] < 0)
+            chan->f[1] = 0.0f;
+         else
+            chan->f[1] = mach->Consts[index->i[1]][swizzle];
+         if (index->i[2] < 0)
+            chan->f[2] = 0.0f;
+         else
+            chan->f[2] = mach->Consts[index->i[2]][swizzle];
+         if (index->i[3] < 0)
+            chan->f[3] = 0.0f;
+         else
+            chan->f[3] = mach->Consts[index->i[3]][swizzle];
          break;
 
       case TGSI_FILE_INPUT:
-- 
cgit v1.2.3


From ed7ba03256fc4503d5d7483d032014ac9e8242fe Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@vmware.com>
Date: Tue, 23 Dec 2008 15:13:59 +0100
Subject: tgsi: Dump indirect register swizzle.

---
 src/gallium/auxiliary/tgsi/tgsi_dump.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index 3177f549523..485e96379c0 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -180,14 +180,16 @@ _dump_register_ind(
    uint file,
    int index,
    uint ind_file,
-   int ind_index )
+   int ind_index,
+   uint ind_swizzle )
 {
    ENM( file, file_names );
    CHR( '[' );
    ENM( ind_file, file_names );
    CHR( '[' );
    SID( ind_index );
-   CHR( ']' );
+   TXT( "]." );
+   ENM( ind_swizzle, swizzle_names );
    if (index != 0) {
       if (index > 0)
          CHR( '+' );
@@ -377,7 +379,8 @@ iter_instruction(
             src->SrcRegister.File,
             src->SrcRegister.Index,
             src->SrcRegisterInd.File,
-            src->SrcRegisterInd.Index );
+            src->SrcRegisterInd.Index,
+            src->SrcRegisterInd.SwizzleX );
       }
       else {
          _dump_register(
-- 
cgit v1.2.3


From fc4cea08fe8320438c72de7f4af2d7091681dca3 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keith@tungstengraphics.com>
Date: Tue, 23 Dec 2008 18:16:49 +0000
Subject: tgsi: fix incomplete rename of loop counter variable

---
 src/gallium/auxiliary/tgsi/tgsi_scan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index cfc7ea8e898..1239f6c0765 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -124,7 +124,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
                /* only first 32 regs will appear in this bitfield */
                info->file_mask[file] |= (1 << reg);
                info->file_count[file]++;
-               info->file_max[file] = MAX2(info->file_max[file], (int)i);
+               info->file_max[file] = MAX2(info->file_max[file], (int)reg);
 
                if (file == TGSI_FILE_INPUT) {
                   info->input_semantic_name[reg] = (ubyte)fulldecl->Semantic.SemanticName;
-- 
cgit v1.2.3


From 49c40b10c72e64977971ccb96abfc8767ed4c6ea Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Fri, 7 Nov 2008 13:02:43 -0700
Subject: gallium: implement TGSI_OPCODE_DP2A, add sqrt to NRM3/NRM4

---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index d55c3372077..f98b66dc0b4 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -2041,7 +2041,21 @@ exec_instruction(
 
    case TGSI_OPCODE_DOT2ADD:
       /* TGSI_OPCODE_DP2A */
-      assert (0);
+      FETCH( &r[0], 0, CHAN_X );
+      FETCH( &r[1], 1, CHAN_X );
+      micro_mul( &r[0], &r[0], &r[1] );
+
+      FETCH( &r[1], 0, CHAN_Y );
+      FETCH( &r[2], 1, CHAN_Y );
+      micro_mul( &r[1], &r[1], &r[2] );
+      micro_add( &r[0], &r[0], &r[1] );
+
+      FETCH( &r[2], 2, CHAN_X );
+      micro_add( &r[0], &r[0], &r[2] );
+
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE( &r[0], 0, chan_index );
+      }
       break;
 
    case TGSI_OPCODE_INDEX:
@@ -2488,7 +2502,8 @@ exec_instruction(
          micro_mul( &dot, &r[2], &r[2] );
          micro_add( &tmp, &tmp, &dot );
 
-         /* tmp = 1 / tmp */
+         /* tmp = 1 / sqrt(tmp) */
+         micro_sqrt( &tmp, &tmp );
          micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
 
          /* note: w channel is undefined */
@@ -2521,7 +2536,8 @@ exec_instruction(
          micro_mul( &dot, &r[3], &r[3] );
          micro_add( &tmp, &tmp, &dot );
 
-         /* tmp = 1 / tmp */
+         /* tmp = 1 / sqrt(tmp) */
+         micro_sqrt( &tmp, &tmp );
          micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
 
          FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-- 
cgit v1.2.3


From 0e0fb49c4515e14c54f23c1d3f8b2e981fe404a2 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Tue, 30 Dec 2008 17:15:34 +0000
Subject: gallium: Remove unused variables.

---
 src/gallium/auxiliary/tgsi/tgsi_dump_c.c  | 1 -
 src/gallium/drivers/softpipe/sp_quad_fs.c | 1 -
 src/mesa/state_tracker/st_framebuffer.c   | 1 -
 3 files changed, 3 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
index be25cb45a0a..c575b6c3e1f 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
@@ -646,7 +646,6 @@ tgsi_dump_c(
    struct tgsi_full_declaration fd;
    uint ignored = flags & TGSI_DUMP_C_IGNORED;
    uint deflt = flags & TGSI_DUMP_C_DEFAULT;
-   uint instno = 0;
 
    tgsi_parse_init( &parse, tokens );
 
diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c
index 40329a95627..5dacbbe55f8 100644
--- a/src/gallium/drivers/softpipe/sp_quad_fs.c
+++ b/src/gallium/drivers/softpipe/sp_quad_fs.c
@@ -171,7 +171,6 @@ static void shade_destroy(struct quad_stage *qs)
 struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe )
 {
    struct quad_shade_stage *qss = CALLOC_STRUCT(quad_shade_stage);
-   uint i;
 
    /* allocate storage for program inputs/outputs, aligned to 16 bytes */
    qss->inputs = MALLOC(PIPE_MAX_ATTRIBS * sizeof(*qss->inputs) + 16);
diff --git a/src/mesa/state_tracker/st_framebuffer.c b/src/mesa/state_tracker/st_framebuffer.c
index 43ac195e67c..ea22a943039 100644
--- a/src/mesa/state_tracker/st_framebuffer.c
+++ b/src/mesa/state_tracker/st_framebuffer.c
@@ -170,7 +170,6 @@ st_set_framebuffer_surface(struct st_framebuffer *stfb,
                            uint surfIndex, struct pipe_surface *surf)
 {
    GET_CURRENT_CONTEXT(ctx);
-   struct st_context *st;
    static const GLuint invalid_size = 9999999;
    struct st_renderbuffer *strb;
    GLuint width, height, i;
-- 
cgit v1.2.3


From dc48ae97dcd84acf691b33b0ea2e76c5fdfe18e1 Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Mon, 24 Nov 2008 10:02:44 -0700
Subject: tgsi: add tgsi_declaration fields for centroid sampling, invariant
 optimization

(cherry picked from commit 434e255eae90b0f3d836d452b7d3b0c5aadf78b8)
---
 src/gallium/auxiliary/tgsi/tgsi_build.c    | 2 ++
 src/gallium/auxiliary/tgsi/tgsi_dump.c     | 8 ++++++++
 src/gallium/include/pipe/p_shader_tokens.h | 4 +++-
 3 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c
index 74614d36884..1219c7da182 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.c
@@ -119,6 +119,8 @@ tgsi_default_declaration( void )
    declaration.UsageMask = TGSI_WRITEMASK_XYZW;
    declaration.Interpolate = TGSI_INTERPOLATE_CONSTANT;
    declaration.Semantic = 0;
+   declaration.Centroid = 0;
+   declaration.Invariant = 0;
    declaration.Padding = 0;
    declaration.Extended = 0;
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index 485e96379c0..2ed8c2bf07b 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -248,6 +248,14 @@ iter_declaration(
    TXT( ", " );
    ENM( decl->Declaration.Interpolate, interpolate_names );
 
+   if (decl->Declaration.Centroid) {
+      TXT( ", CENTROID" );
+   }
+
+   if (decl->Declaration.Invariant) {
+      TXT( ", INVARIANT" );
+   }
+
    EOL();
 
    return TRUE;
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index a562e3a6b14..d591f046fbc 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -85,7 +85,9 @@ struct tgsi_declaration
    unsigned UsageMask   : 4;  /* bitmask of TGSI_WRITEMASK_x flags */
    unsigned Interpolate : 4;  /* TGSI_INTERPOLATE_ */
    unsigned Semantic    : 1;  /* BOOL, any semantic info? */
-   unsigned Padding     : 6;
+   unsigned Centroid    : 1;  /* centroid sampling */
+   unsigned Invariant   : 1;  /* invariant optimization */
+   unsigned Padding     : 4;
    unsigned Extended    : 1;  /* BOOL */
 };
 
-- 
cgit v1.2.3


From b8cf2f00760b4d2299f0880b7e6896bb2d71407b Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Tue, 25 Nov 2008 09:02:27 -0700
Subject: gallium: added centroid/invarient fields to declarations

(cherry picked from commit 4de360e67d83cd6503fb8ad053bb8afe507db5fa)
---
 src/gallium/auxiliary/tgsi/tgsi_build.c | 6 ++++++
 src/gallium/auxiliary/tgsi/tgsi_build.h | 2 ++
 2 files changed, 8 insertions(+)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c
index 1219c7da182..ed8fc5ac25b 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.c
@@ -133,6 +133,8 @@ tgsi_build_declaration(
    unsigned usage_mask,
    unsigned interpolate,
    unsigned semantic,
+   unsigned centroid,
+   unsigned invariant,
    struct tgsi_header *header )
 {
    struct tgsi_declaration declaration;
@@ -145,6 +147,8 @@ tgsi_build_declaration(
    declaration.UsageMask = usage_mask;
    declaration.Interpolate = interpolate;
    declaration.Semantic = semantic;
+   declaration.Centroid = centroid;
+   declaration.Invariant = invariant;
 
    header_bodysize_grow( header );
 
@@ -196,6 +200,8 @@ tgsi_build_full_declaration(
       full_decl->Declaration.UsageMask,
       full_decl->Declaration.Interpolate,
       full_decl->Declaration.Semantic,
+      full_decl->Declaration.Centroid,
+      full_decl->Declaration.Invariant,
       header );
 
    if (maxsize <= size)
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h b/src/gallium/auxiliary/tgsi/tgsi_build.h
index 7d6234746a2..0fd6fabd83d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.h
@@ -71,6 +71,8 @@ tgsi_build_declaration(
    unsigned usage_mask,
    unsigned interpolate,
    unsigned semantic,
+   unsigned centroid,
+   unsigned invariant,
    struct tgsi_header *header );
 
 struct tgsi_full_declaration
-- 
cgit v1.2.3


From 1922ea965ac5c411cf5a3ed0ac7c8dbb873dba6c Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Fri, 9 Jan 2009 21:46:08 -0700
Subject: gallium: emit comments in TGSI->PPC codegen

---
 src/gallium/auxiliary/tgsi/tgsi_ppc.c | 92 ++++++++++++++++++++++++++++++++++-
 1 file changed, 90 insertions(+), 2 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
index a92b1902e3d..1a265047389 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
@@ -555,6 +555,18 @@ emit_scalar_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst)
    int v0, v1;
    uint chan_index;
 
+   switch (inst->Instruction.Opcode) {
+   case TGSI_OPCODE_RSQ:
+      ppc_comment(gen->f, -4, "RSQ:");
+      break;
+   case TGSI_OPCODE_RCP:
+      ppc_comment(gen->f, -4, "LCP:");
+      break;
+   default:
+      assert(0);
+   }
+
+
    v0 = get_src_vec(gen, inst, 0, CHAN_X);
    v1 = ppc_allocate_vec_register(gen->f);
 
@@ -584,6 +596,33 @@ static void
 emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst)
 {
    uint chan_index;
+
+   switch (inst->Instruction.Opcode) {
+   case TGSI_OPCODE_ABS:
+      ppc_comment(gen->f, -4, "ABS:");
+      break;
+   case TGSI_OPCODE_FLOOR:
+      ppc_comment(gen->f, -4, "FLOOR:");
+      break;
+   case TGSI_OPCODE_FRAC:
+      ppc_comment(gen->f, -4, "FRAC:");
+      break;
+   case TGSI_OPCODE_EXPBASE2:
+      ppc_comment(gen->f, -4, "EXPBASE2:");
+      break;
+   case TGSI_OPCODE_LOGBASE2:
+      ppc_comment(gen->f, -4, "LOGBASE2:");
+      break;
+   case TGSI_OPCODE_MOV:
+      ppc_comment(gen->f, -4, "MOV:");
+      break;
+   case TGSI_OPCODE_SWZ:
+      ppc_comment(gen->f, -4, "SWZ:");
+      break;
+   default:
+      assert(0);
+   }
+
    FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) {
       int v0 = get_src_vec(gen, inst, 0, chan_index);   /* v0 = srcreg[0] */
       int v1 = get_dst_vec(gen, inst, chan_index);
@@ -630,6 +669,26 @@ emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst)
    int zero_vec = -1;
    uint chan;
 
+   switch (inst->Instruction.Opcode) {
+   case TGSI_OPCODE_ADD:
+      ppc_comment(gen->f, -4, "ADD:");
+      break;
+   case TGSI_OPCODE_SUB:
+      ppc_comment(gen->f, -4, "SUB:");
+      break;
+   case TGSI_OPCODE_MUL:
+      ppc_comment(gen->f, -4, "MUL:");
+      break;
+   case TGSI_OPCODE_MIN:
+      ppc_comment(gen->f, -4, "MIN:");
+      break;
+   case TGSI_OPCODE_MAX:
+      ppc_comment(gen->f, -4, "MAX:");
+      break;
+   default:
+      assert(0);
+   }
+
    if (inst->Instruction.Opcode == TGSI_OPCODE_MUL) {
       zero_vec = ppc_allocate_vec_register(gen->f);
       ppc_vzero(gen->f, zero_vec);
@@ -678,6 +737,17 @@ emit_triop(struct gen_context *gen, struct tgsi_full_instruction *inst)
 {
    uint chan;
 
+   switch (inst->Instruction.Opcode) {
+   case TGSI_OPCODE_MAD:
+      ppc_comment(gen->f, -4, "MAD:");
+      break;
+   case TGSI_OPCODE_LRP:
+      ppc_comment(gen->f, -4, "LRP:");
+      break;
+   default:
+      assert(0);
+   }
+
    FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) {
       /* fetch src operands */
       int v0 = get_src_vec(gen, inst, 0, chan);
@@ -768,9 +838,23 @@ emit_dotprod(struct gen_context *gen, struct tgsi_full_instruction *inst)
    int v0, v1, v2;
    uint chan_index;
 
+   switch (inst->Instruction.Opcode) {
+   case TGSI_OPCODE_DP3:
+      ppc_comment(gen->f, -4, "DP3:");
+      break;
+   case TGSI_OPCODE_DP4:
+      ppc_comment(gen->f, -4, "DP4:");
+      break;
+   case TGSI_OPCODE_DPH:
+      ppc_comment(gen->f, -4, "DPH:");
+      break;
+   default:
+      assert(0);
+   }
+
    v2 = ppc_allocate_vec_register(gen->f);
 
-   ppc_vxor(gen->f, v2, v2, v2);           /* v2 = {0, 0, 0, 0} */
+   ppc_vzero(gen->f, v2);                  /* v2 = {0, 0, 0, 0} */
 
    v0 = get_src_vec(gen, inst, 0, CHAN_X); /* v0 = src0.XXXX */
    v1 = get_src_vec(gen, inst, 1, CHAN_X); /* v1 = src1.XXXX */
@@ -812,10 +896,11 @@ ppc_vec_pow(struct ppc_function *f, int vr, int va, int vb)
    int t_vec = ppc_allocate_vec_register(f);
    int zero_vec = ppc_allocate_vec_register(f);
 
+   ppc_comment(f, -4, "POW:");
    ppc_vzero(f, zero_vec);
 
    ppc_vlogefp(f, t_vec, va);                   /* t = log2(va) */
-   ppc_vmaddfp(f, t_vec, t_vec, vb, zero_vec);  /* t = t * vb */
+   ppc_vmaddfp(f, t_vec, t_vec, vb, zero_vec);  /* t = t * vb + zero */
    ppc_vexptefp(f, vr, t_vec);                  /* vr = 2^t */
 
    ppc_release_vec_register(f, t_vec);
@@ -1221,9 +1306,12 @@ emit_prologue(struct ppc_function *func)
 static void
 emit_epilogue(struct ppc_function *func)
 {
+   ppc_comment(func, -4, "Epilogue:");
    ppc_return(func);
    /* XXX restore prev stack frame */
+#if 0
    debug_printf("PPC: Emitted %u instructions\n", func->num_inst);
+#endif
 }
 
 
-- 
cgit v1.2.3


From 0c71313970be3d097814839577cd141d46666783 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Fri, 9 Jan 2009 21:48:54 -0700
Subject: gallium: fix register clobber bug in TGSI->PPC codegen

When negating a src vector that's stored in a altivec register, need to put
negated value into a new register so we don't upset the original value.
This solves the dark colors in the mandelbrot GLSL demo.
Also, use new predicate functions to check if a TGSI temp is stored in
an altivec register.
---
 src/gallium/auxiliary/tgsi/tgsi_ppc.c | 51 ++++++++++++++++++++++++++++-------
 1 file changed, 41 insertions(+), 10 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
index 1a265047389..af180adbedc 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
@@ -78,7 +78,7 @@ const float ppc_builtin_constants[] ALIGN16_ATTRIB = {
  * How many TGSI temps should be implemented with real PPC vector registers
  * rather than memory.
  */
-#define MAX_PPC_TEMPS 4
+#define MAX_PPC_TEMPS 3
 
 
 struct reg_chan_vec
@@ -157,6 +157,29 @@ init_gen_context(struct gen_context *gen, struct ppc_function *func)
 }
 
 
+/**
+ * Is the given TGSI register stored as a real PPC vector register?
+ */
+static boolean
+is_ppc_vec_temporary(const struct tgsi_full_src_register *reg)
+{
+   return (reg->SrcRegister.File == TGSI_FILE_TEMPORARY &&
+           reg->SrcRegister.Index < MAX_PPC_TEMPS);
+}
+
+
+/**
+ * Is the given TGSI register stored as a real PPC vector register?
+ */
+static boolean
+is_ppc_vec_temporary_dst(const struct tgsi_full_dst_register *reg)
+{
+   return (reg->DstRegister.File == TGSI_FILE_TEMPORARY &&
+           reg->DstRegister.Index < MAX_PPC_TEMPS);
+}
+
+
+
 /**
  * All PPC vector load/store instructions form an effective address
  * by adding the contents of two registers.  For example:
@@ -285,7 +308,7 @@ emit_fetch(struct gen_context *gen,
          }
          break;
       case TGSI_FILE_TEMPORARY:
-         if (reg->SrcRegister.Index < MAX_PPC_TEMPS) {
+         if (is_ppc_vec_temporary(reg)) {
             /* use PPC vec register */
             dst_vec = gen->temps_map[reg->SrcRegister.Index][swizzle];
          }
@@ -353,23 +376,33 @@ emit_fetch(struct gen_context *gen,
       uint sign_op = tgsi_util_get_full_src_register_sign_mode(reg, chan_index);
       if (sign_op != TGSI_UTIL_SIGN_KEEP) {
          int bit31_vec = gen_get_bit31_vec(gen);
+         int dst_vec2;
+
+         if (is_ppc_vec_temporary(reg)) {
+            /* need to use a new temp */
+            dst_vec2 = ppc_allocate_vec_register(gen->f);
+         }
+         else {
+            dst_vec2 = dst_vec;
+         }
 
          switch (sign_op) {
          case TGSI_UTIL_SIGN_CLEAR:
             /* vec = vec & ~bit31 */
-            ppc_vandc(gen->f, dst_vec, dst_vec, bit31_vec);
+            ppc_vandc(gen->f, dst_vec2, dst_vec, bit31_vec);
             break;
          case TGSI_UTIL_SIGN_SET:
             /* vec = vec | bit31 */
-            ppc_vor(gen->f, dst_vec, dst_vec, bit31_vec);
+            ppc_vor(gen->f, dst_vec2, dst_vec, bit31_vec);
             break;
          case TGSI_UTIL_SIGN_TOGGLE:
             /* vec = vec ^ bit31 */
-            ppc_vxor(gen->f, dst_vec, dst_vec, bit31_vec);
+            ppc_vxor(gen->f, dst_vec2, dst_vec, bit31_vec);
             break;
          default:
             assert(0);
          }
+         return dst_vec2;
       }
    }
 
@@ -452,8 +485,7 @@ release_src_vecs(struct gen_context *gen)
    uint i;
    for (i = 0; i < gen->num_regs; i++) {
       const const struct tgsi_full_src_register src = gen->regs[i].src;
-      if (!(src.SrcRegister.File == TGSI_FILE_TEMPORARY &&
-            src.SrcRegister.Index < MAX_PPC_TEMPS)) {
+      if (!is_ppc_vec_temporary(&src)) {
          ppc_release_vec_register(gen->f, gen->regs[i].vec);
       }
    }
@@ -469,8 +501,7 @@ get_dst_vec(struct gen_context *gen,
 {
    const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[0];
 
-   if (reg->DstRegister.File == TGSI_FILE_TEMPORARY &&
-       reg->DstRegister.Index < MAX_PPC_TEMPS) {
+   if (is_ppc_vec_temporary_dst(reg)) {
       int vec = gen->temps_map[reg->DstRegister.Index][chan_index];
       return vec;
    }
@@ -502,7 +533,7 @@ emit_store(struct gen_context *gen,
       }
       break;
    case TGSI_FILE_TEMPORARY:
-      if (reg->DstRegister.Index < MAX_PPC_TEMPS) {
+      if (is_ppc_vec_temporary_dst(reg)) {
          if (!free_vec) {
             int dst_vec = gen->temps_map[reg->DstRegister.Index][chan_index];
             if (dst_vec != src_vec)
-- 
cgit v1.2.3


From d4394bb768f17ac6a7e99116f2bc79c40dca3c5b Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Fri, 9 Jan 2009 21:51:22 -0700
Subject: gallium: remove unused struct type

---
 src/gallium/auxiliary/tgsi/tgsi_ppc.c | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
index af180adbedc..638fe098616 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
@@ -81,14 +81,6 @@ const float ppc_builtin_constants[] ALIGN16_ATTRIB = {
 #define MAX_PPC_TEMPS 3
 
 
-struct reg_chan_vec
-{
-   struct tgsi_full_src_register src;
-   uint chan;
-   uint vec;
-};
-
-
 /**
  * Context/state used during code gen.
  */
-- 
cgit v1.2.3


From 2acf07983f8bb134d639c9e652e7e0e3307e20f3 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Fri, 9 Jan 2009 22:03:48 -0700
Subject: gallium: use tgsi_dump_instruction() instead of ppc_comment()

---
 src/gallium/auxiliary/tgsi/tgsi_ppc.c | 91 +++--------------------------------
 1 file changed, 7 insertions(+), 84 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
index 638fe098616..1a4db47501c 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
@@ -578,18 +578,6 @@ emit_scalar_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst)
    int v0, v1;
    uint chan_index;
 
-   switch (inst->Instruction.Opcode) {
-   case TGSI_OPCODE_RSQ:
-      ppc_comment(gen->f, -4, "RSQ:");
-      break;
-   case TGSI_OPCODE_RCP:
-      ppc_comment(gen->f, -4, "LCP:");
-      break;
-   default:
-      assert(0);
-   }
-
-
    v0 = get_src_vec(gen, inst, 0, CHAN_X);
    v1 = ppc_allocate_vec_register(gen->f);
 
@@ -620,32 +608,6 @@ emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst)
 {
    uint chan_index;
 
-   switch (inst->Instruction.Opcode) {
-   case TGSI_OPCODE_ABS:
-      ppc_comment(gen->f, -4, "ABS:");
-      break;
-   case TGSI_OPCODE_FLOOR:
-      ppc_comment(gen->f, -4, "FLOOR:");
-      break;
-   case TGSI_OPCODE_FRAC:
-      ppc_comment(gen->f, -4, "FRAC:");
-      break;
-   case TGSI_OPCODE_EXPBASE2:
-      ppc_comment(gen->f, -4, "EXPBASE2:");
-      break;
-   case TGSI_OPCODE_LOGBASE2:
-      ppc_comment(gen->f, -4, "LOGBASE2:");
-      break;
-   case TGSI_OPCODE_MOV:
-      ppc_comment(gen->f, -4, "MOV:");
-      break;
-   case TGSI_OPCODE_SWZ:
-      ppc_comment(gen->f, -4, "SWZ:");
-      break;
-   default:
-      assert(0);
-   }
-
    FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) {
       int v0 = get_src_vec(gen, inst, 0, chan_index);   /* v0 = srcreg[0] */
       int v1 = get_dst_vec(gen, inst, chan_index);
@@ -692,26 +654,6 @@ emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst)
    int zero_vec = -1;
    uint chan;
 
-   switch (inst->Instruction.Opcode) {
-   case TGSI_OPCODE_ADD:
-      ppc_comment(gen->f, -4, "ADD:");
-      break;
-   case TGSI_OPCODE_SUB:
-      ppc_comment(gen->f, -4, "SUB:");
-      break;
-   case TGSI_OPCODE_MUL:
-      ppc_comment(gen->f, -4, "MUL:");
-      break;
-   case TGSI_OPCODE_MIN:
-      ppc_comment(gen->f, -4, "MIN:");
-      break;
-   case TGSI_OPCODE_MAX:
-      ppc_comment(gen->f, -4, "MAX:");
-      break;
-   default:
-      assert(0);
-   }
-
    if (inst->Instruction.Opcode == TGSI_OPCODE_MUL) {
       zero_vec = ppc_allocate_vec_register(gen->f);
       ppc_vzero(gen->f, zero_vec);
@@ -760,17 +702,6 @@ emit_triop(struct gen_context *gen, struct tgsi_full_instruction *inst)
 {
    uint chan;
 
-   switch (inst->Instruction.Opcode) {
-   case TGSI_OPCODE_MAD:
-      ppc_comment(gen->f, -4, "MAD:");
-      break;
-   case TGSI_OPCODE_LRP:
-      ppc_comment(gen->f, -4, "LRP:");
-      break;
-   default:
-      assert(0);
-   }
-
    FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) {
       /* fetch src operands */
       int v0 = get_src_vec(gen, inst, 0, chan);
@@ -861,20 +792,6 @@ emit_dotprod(struct gen_context *gen, struct tgsi_full_instruction *inst)
    int v0, v1, v2;
    uint chan_index;
 
-   switch (inst->Instruction.Opcode) {
-   case TGSI_OPCODE_DP3:
-      ppc_comment(gen->f, -4, "DP3:");
-      break;
-   case TGSI_OPCODE_DP4:
-      ppc_comment(gen->f, -4, "DP4:");
-      break;
-   case TGSI_OPCODE_DPH:
-      ppc_comment(gen->f, -4, "DPH:");
-      break;
-   default:
-      assert(0);
-   }
-
    v2 = ppc_allocate_vec_register(gen->f);
 
    ppc_vzero(gen->f, v2);                  /* v2 = {0, 0, 0, 0} */
@@ -919,7 +836,6 @@ ppc_vec_pow(struct ppc_function *f, int vr, int va, int vb)
    int t_vec = ppc_allocate_vec_register(f);
    int zero_vec = ppc_allocate_vec_register(f);
 
-   ppc_comment(f, -4, "POW:");
    ppc_vzero(f, zero_vec);
 
    ppc_vlogefp(f, t_vec, va);                   /* t = log2(va) */
@@ -1359,6 +1275,7 @@ tgsi_emit_ppc(const struct tgsi_token *tokens,
    unsigned ok = 1;
    uint num_immediates = 0;
    struct gen_context gen;
+   uint ic = 0;
 
    if (use_ppc_asm < 0) {
       /* If GALLIUM_NOPPC is set, don't use PPC codegen */
@@ -1391,6 +1308,12 @@ tgsi_emit_ppc(const struct tgsi_token *tokens,
          break;
 
       case TGSI_TOKEN_TYPE_INSTRUCTION:
+         if (func->print) {
+            _debug_printf("# ");
+            ic++;
+            tgsi_dump_instruction(&parse.FullToken.FullInstruction, ic);
+         }
+
          ok = emit_instruction(&gen, &parse.FullToken.FullInstruction);
 
 	 if (!ok) {
-- 
cgit v1.2.3


From 1907135235a684872706d1a28ea8e2b4e1b6e7d3 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Mon, 12 Jan 2009 12:01:09 -0700
Subject: tgsi: change an if to an else-if, added const qual, added comments

---
 src/gallium/auxiliary/tgsi/tgsi_scan.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index 1239f6c0765..d02205a63ef 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -43,6 +43,9 @@
 
 
 /**
+ * Scan the given TGSI shader to collect information such as number of
+ * registers used, special instructions used, etc.
+ * \return info  the result of the scan
  */
 void
 tgsi_scan_shader(const struct tgsi_token *tokens,
@@ -115,7 +118,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
          {
             const struct tgsi_full_declaration *fulldecl
                = &parse.FullToken.FullDeclaration;
-            uint file = fulldecl->Declaration.File;
+            const uint file = fulldecl->Declaration.File;
             uint reg;
             for (reg = fulldecl->DeclarationRange.First;
                  reg <= fulldecl->DeclarationRange.Last;
@@ -131,8 +134,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
                   info->input_semantic_index[reg] = (ubyte)fulldecl->Semantic.SemanticIndex;
                   info->num_inputs++;
                }
-
-               if (file == TGSI_FILE_OUTPUT) {
+               else if (file == TGSI_FILE_OUTPUT) {
                   info->output_semantic_name[reg] = (ubyte)fulldecl->Semantic.SemanticName;
                   info->output_semantic_index[reg] = (ubyte)fulldecl->Semantic.SemanticIndex;
                   info->num_outputs++;
-- 
cgit v1.2.3


From 4d710dd3cf3187e94e5765b46e4dd6899a7a41d6 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keith@tungstengraphics.com>
Date: Tue, 27 Jan 2009 11:15:52 +0000
Subject: tgsi: silence some warnings

---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 37 ++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 989b6eec27b..a182e679daf 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -320,6 +320,7 @@ micro_add(
    dst->f[3] = src0->f[3] + src1->f[3];
 }
 
+#if 0
 static void
 micro_iadd(
    union tgsi_exec_channel *dst,
@@ -331,6 +332,7 @@ micro_iadd(
    dst->i[2] = src0->i[2] + src1->i[2];
    dst->i[3] = src0->i[3] + src1->i[3];
 }
+#endif
 
 static void
 micro_and(
@@ -408,6 +410,7 @@ micro_div(
    }
 }
 
+#if 0
 static void
 micro_udiv(
    union tgsi_exec_channel *dst,
@@ -419,6 +422,7 @@ micro_udiv(
    dst->u[2] = src0->u[2] / src1->u[2];
    dst->u[3] = src0->u[3] / src1->u[3];
 }
+#endif
 
 static void
 micro_eq(
@@ -434,6 +438,7 @@ micro_eq(
    dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
 }
 
+#if 0
 static void
 micro_ieq(
    union tgsi_exec_channel *dst,
@@ -447,6 +452,7 @@ micro_ieq(
    dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
    dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
 }
+#endif
 
 static void
 micro_exp2(
@@ -466,6 +472,7 @@ micro_exp2(
 #endif
 }
 
+#if 0
 static void
 micro_f2ut(
    union tgsi_exec_channel *dst,
@@ -476,6 +483,7 @@ micro_f2ut(
    dst->u[2] = (uint) src->f[2];
    dst->u[3] = (uint) src->f[3];
 }
+#endif
 
 static void
 micro_flr(
@@ -570,6 +578,7 @@ micro_lt(
    dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
 }
 
+#if 0
 static void
 micro_ilt(
    union tgsi_exec_channel *dst,
@@ -583,7 +592,9 @@ micro_ilt(
    dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
    dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
 }
+#endif
 
+#if 0
 static void
 micro_ult(
    union tgsi_exec_channel *dst,
@@ -597,6 +608,7 @@ micro_ult(
    dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
    dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
 }
+#endif
 
 static void
 micro_max(
@@ -610,6 +622,7 @@ micro_max(
    dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
 }
 
+#if 0
 static void
 micro_imax(
    union tgsi_exec_channel *dst,
@@ -621,7 +634,9 @@ micro_imax(
    dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
    dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
 }
+#endif
 
+#if 0
 static void
 micro_umax(
    union tgsi_exec_channel *dst,
@@ -633,6 +648,7 @@ micro_umax(
    dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
    dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
 }
+#endif
 
 static void
 micro_min(
@@ -646,6 +662,7 @@ micro_min(
    dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
 }
 
+#if 0
 static void
 micro_imin(
    union tgsi_exec_channel *dst,
@@ -657,7 +674,9 @@ micro_imin(
    dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
    dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
 }
+#endif
 
+#if 0
 static void
 micro_umin(
    union tgsi_exec_channel *dst,
@@ -669,7 +688,9 @@ micro_umin(
    dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
    dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
 }
+#endif
 
+#if 0
 static void
 micro_umod(
    union tgsi_exec_channel *dst,
@@ -681,6 +702,7 @@ micro_umod(
    dst->u[2] = src0->u[2] % src1->u[2];
    dst->u[3] = src0->u[3] % src1->u[3];
 }
+#endif
 
 static void
 micro_mul(
@@ -694,6 +716,7 @@ micro_mul(
    dst->f[3] = src0->f[3] * src1->f[3];
 }
 
+#if 0
 static void
 micro_imul(
    union tgsi_exec_channel *dst,
@@ -705,7 +728,9 @@ micro_imul(
    dst->i[2] = src0->i[2] * src1->i[2];
    dst->i[3] = src0->i[3] * src1->i[3];
 }
+#endif
 
+#if 0
 static void
 micro_imul64(
    union tgsi_exec_channel *dst0,
@@ -722,7 +747,9 @@ micro_imul64(
    dst0->i[2] = 0;
    dst0->i[3] = 0;
 }
+#endif
 
+#if 0
 static void
 micro_umul64(
    union tgsi_exec_channel *dst0,
@@ -739,7 +766,10 @@ micro_umul64(
    dst0->u[2] = 0;
    dst0->u[3] = 0;
 }
+#endif
 
+
+#if 0
 static void
 micro_movc(
    union tgsi_exec_channel *dst,
@@ -752,6 +782,7 @@ micro_movc(
    dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
    dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
 }
+#endif
 
 static void
 micro_neg(
@@ -764,6 +795,7 @@ micro_neg(
    dst->f[3] = -src->f[3];
 }
 
+#if 0
 static void
 micro_ineg(
    union tgsi_exec_channel *dst,
@@ -774,6 +806,7 @@ micro_ineg(
    dst->i[2] = -src->i[2];
    dst->i[3] = -src->i[3];
 }
+#endif
 
 static void
 micro_not(
@@ -874,6 +907,7 @@ micro_trunc(
    dst->f[3] = (float) (int) src0->f[3];
 }
 
+#if 0
 static void
 micro_ushr(
    union tgsi_exec_channel *dst,
@@ -885,6 +919,7 @@ micro_ushr(
    dst->u[2] = src0->u[2] >> src1->u[2];
    dst->u[3] = src0->u[3] >> src1->u[3];
 }
+#endif
 
 static void
 micro_sin(
@@ -919,6 +954,7 @@ micro_sub(
    dst->f[3] = src0->f[3] - src1->f[3];
 }
 
+#if 0
 static void
 micro_u2f(
    union tgsi_exec_channel *dst,
@@ -929,6 +965,7 @@ micro_u2f(
    dst->f[2] = (float) src->u[2];
    dst->f[3] = (float) src->u[3];
 }
+#endif
 
 static void
 micro_xor(
-- 
cgit v1.2.3


From 5ecd0b0890fd48656e46c017830a9f5d6b906aae Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@vmware.com>
Date: Tue, 10 Feb 2009 15:14:31 +0100
Subject: tgsi: Fix build -- rename Size to NrTokens.

---
 src/gallium/auxiliary/tgsi/tgsi_build.c  | 20 ++++++++++----------
 src/gallium/auxiliary/tgsi/tgsi_dump.c   |  4 ++--
 src/gallium/auxiliary/tgsi/tgsi_dump_c.c |  4 ++--
 src/gallium/auxiliary/tgsi/tgsi_exec.c   |  2 +-
 src/gallium/auxiliary/tgsi/tgsi_parse.c  |  4 ++--
 src/gallium/auxiliary/tgsi/tgsi_ppc.c    |  2 +-
 src/gallium/auxiliary/tgsi/tgsi_sse2.c   |  2 +-
 src/gallium/auxiliary/tgsi/tgsi_text.c   |  2 +-
 8 files changed, 20 insertions(+), 20 deletions(-)

(limited to 'src/gallium/auxiliary/tgsi')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c
index fd02c2c87c8..17886540cf7 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.c
@@ -114,7 +114,7 @@ tgsi_default_declaration( void )
    struct tgsi_declaration declaration;
 
    declaration.Type = TGSI_TOKEN_TYPE_DECLARATION;
-   declaration.Size = 1;
+   declaration.NrTokens = 1;
    declaration.File = TGSI_FILE_NULL;
    declaration.UsageMask = TGSI_WRITEMASK_XYZW;
    declaration.Interpolate = TGSI_INTERPOLATE_CONSTANT;
@@ -160,9 +160,9 @@ declaration_grow(
    struct tgsi_declaration *declaration,
    struct tgsi_header *header )
 {
-   assert( declaration->Size < 0xFF );
+   assert( declaration->NrTokens < 0xFF );
 
-   declaration->Size++;
+   declaration->NrTokens++;
 
    header_bodysize_grow( header );
 }
@@ -308,7 +308,7 @@ tgsi_default_immediate( void )
    struct tgsi_immediate immediate;
 
    immediate.Type = TGSI_TOKEN_TYPE_IMMEDIATE;
-   immediate.Size = 1;
+   immediate.NrTokens = 1;
    immediate.DataType = TGSI_IMM_FLOAT32;
    immediate.Padding = 0;
    immediate.Extended = 0;
@@ -345,9 +345,9 @@ immediate_grow(
    struct tgsi_immediate *immediate,
    struct tgsi_header *header )
 {
-   assert( immediate->Size < 0xFF );
+   assert( immediate->NrTokens < 0xFF );
 
-   immediate->Size++;
+   immediate->NrTokens++;
 
    header_bodysize_grow( header );
 }
@@ -384,7 +384,7 @@ tgsi_build_full_immediate(
 
    *immediate = tgsi_build_immediate( header );
 
-   for( i = 0; i < full_imm->Immediate.Size - 1; i++ ) {
+   for( i = 0; i < full_imm->Immediate.NrTokens - 1; i++ ) {
       struct tgsi_immediate_float32 *if32;
 
       if( maxsize <= size )
@@ -411,7 +411,7 @@ tgsi_default_instruction( void )
    struct tgsi_instruction instruction;
 
    instruction.Type = TGSI_TOKEN_TYPE_INSTRUCTION;
-   instruction.Size = 1;
+   instruction.NrTokens = 1;
    instruction.Opcode = TGSI_OPCODE_MOV;
    instruction.Saturate = TGSI_SAT_NONE;
    instruction.NumDstRegs = 1;
@@ -453,9 +453,9 @@ instruction_grow(
    struct tgsi_instruction *instruction,
    struct tgsi_header *header )
 {
-   assert (instruction->Size <   0xFF);
+   assert (instruction->NrTokens <   0xFF);
 
-   instruction->Size++;
+   instruction->NrTokens++;
 
    header_bodysize_grow( header );
 }
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index 2ed8c2bf07b..ab2b1f2c58a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -285,7 +285,7 @@ iter_immediate(
    ENM( imm->Immediate.DataType, immediate_type_names );
 
    TXT( " { " );
-   for (i = 0; i < imm->Immediate.Size - 1; i++) {
+   for (i = 0; i < imm->Immediate.NrTokens - 1; i++) {
       switch (imm->Immediate.DataType) {
       case TGSI_IMM_FLOAT32:
          FLT( imm->u.ImmediateFloat32[i].Float );
@@ -294,7 +294,7 @@ iter_immediate(
          assert( 0 );
       }
 
-      if (i < imm->Immediate.Size - 2)
+      if (i < imm->Immediate.NrTokens - 2)
          TXT( ", " );
    }
    TXT( " }" );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
index c575b6c3e1f..2ecf1e2f14b 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
@@ -283,7 +283,7 @@ dump_immediate_verbose(
       UIX( imm->Immediate.Padding );
    }
 
-   for( i = 0; i < imm->Immediate.Size - 1; i++ ) {
+   for( i = 0; i < imm->Immediate.NrTokens - 1; i++ ) {
       EOL();
       switch( imm->Immediate.DataType ) {
       case TGSI_IMM_FLOAT32:
@@ -675,7 +675,7 @@ tgsi_dump_c(
       ENM( parse.FullToken.Token.Type, TGSI_TOKEN_TYPES );
       if( ignored ) {
          TXT( "\nSize       : " );
-         UID( parse.FullToken.Token.Size );
+         UID( parse.FullToken.Token.NrTokens );
          if( deflt || parse.FullToken.Token.Extended ) {
             TXT( "\nExtended   : " );
             UID( parse.FullToken.Token.Extended );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index a182e679daf..ab641efb603 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -202,7 +202,7 @@ tgsi_exec_machine_bind_shader(
 
       case TGSI_TOKEN_TYPE_IMMEDIATE:
          {
-            uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
+            uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
             assert( size % 4 == 0 );
             assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES );
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c
index 2cd56e413a5..d374b16f9a1 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c
@@ -155,8 +155,8 @@ tgsi_parse_token(
       switch (imm->Immediate.DataType) {
       case TGSI_IMM_FLOAT32:
          imm->u.Pointer = MALLOC(
-            sizeof( struct tgsi_immediate_float32 ) * (imm->Immediate.Size - 1) );
-         for( i = 0; i < imm->Immediate.Size - 1; i++ ) {
+            sizeof( struct tgsi_immediate_float32 ) * (imm->Immediate.NrTokens - 1) );
+         for( i = 0; i < imm->Immediate.NrTokens - 1; i++ ) {
             next_token( ctx, (struct tgsi_immediate_float32 *) &imm->u.ImmediateFloat32[i] );
          }
          break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
index 1a4db47501c..f365030e526 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
@@ -1327,7 +1327,7 @@ tgsi_emit_ppc(const struct tgsi_token *tokens,
       case TGSI_TOKEN_TYPE_IMMEDIATE:
          /* splat each immediate component into a float[4] vector for SoA */
          {
-            const uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
+            const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
             uint i;
             assert(size <= 4);
             assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES);
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index cac44af7f41..481ba89c5e7 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -2671,7 +2671,7 @@ tgsi_emit_sse2(
       case TGSI_TOKEN_TYPE_IMMEDIATE:
          /* simply copy the immediate values into the next immediates[] slot */
          {
-            const uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
+            const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
             uint i;
             assert(size <= 4);
             assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES);
diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c
index 9454563361e..1e822fbbea1 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
@@ -1023,7 +1023,7 @@ static boolean parse_immediate( struct translate_ctx *ctx )
    ctx->cur++;
 
    imm = tgsi_default_full_immediate();
-   imm.Immediate.Size += 4;
+   imm.Immediate.NrTokens += 4;
    imm.Immediate.DataType = TGSI_IMM_FLOAT32;
    imm.u.Pointer = values;
 
-- 
cgit v1.2.3