6 files changed, 100 insertions, 41 deletions
diff --git a/src/mesa/pipe/draw/draw_vertex_fetch.c b/src/mesa/pipe/draw/draw_vertex_fetch.c
index 0dbbdf17f2f..ce402d681f0 100644
--- a/src/mesa/pipe/draw/draw_vertex_fetch.c
+++ b/src/mesa/pipe/draw/draw_vertex_fetch.c
@@ -67,6 +67,10 @@ fetch_attrib4(const void *ptr, unsigned format, float attrib[4])
    }
 }
 
+
+/**
+ * Fetch vertex attributes for 'count' vertices.
+ */
 void draw_vertex_fetch( struct draw_context *draw,
 			struct tgsi_exec_machine *machine,
 			const unsigned *elts,
@@ -74,27 +78,26 @@ void draw_vertex_fetch( struct draw_context *draw,
 {
    unsigned j;
 
-
-   /* load machine inputs */
+   /* loop over vertices */
    for (j = 0; j < count; j++) {
-      unsigned attr;
-      for (attr = 0; attr < 16; attr++) {
-         if (draw->vertex_shader.inputs_read & (1 << attr)) {
-            unsigned buf = draw->vertex_element[attr].vertex_buffer_index;
-            const void *src
-               = (const void *) ((const ubyte *) draw->mapped_vbuffer[buf]
-                                 + draw->vertex_buffer[buf].buffer_offset
-                                 + draw->vertex_element[attr].src_offset
-                                 + elts[j] * draw->vertex_buffer[buf].pitch);
-            float p[4];
+      uint attr;
+      /* loop over vertex attributes (vertex shader inputs) */
+      for (attr = 0; attr < draw->vertex_shader.num_inputs; attr++) {
+
+         unsigned buf = draw->vertex_element[attr].vertex_buffer_index;
+         const void *src
+            = (const void *) ((const ubyte *) draw->mapped_vbuffer[buf]
+                              + draw->vertex_buffer[buf].buffer_offset
+                              + draw->vertex_element[attr].src_offset
+                              + elts[j] * draw->vertex_buffer[buf].pitch);
+         float p[4];
 
-            fetch_attrib4(src, draw->vertex_element[attr].src_format, p);
+         fetch_attrib4(src, draw->vertex_element[attr].src_format, p);
 
-            machine->Inputs[attr].xyzw[0].f[j] = p[0]; /*X*/
-            machine->Inputs[attr].xyzw[1].f[j] = p[1]; /*Y*/
-            machine->Inputs[attr].xyzw[2].f[j] = p[2]; /*Z*/
-            machine->Inputs[attr].xyzw[3].f[j] = p[3]; /*W*/
-         }
+         machine->Inputs[attr].xyzw[0].f[j] = p[0]; /*X*/
+         machine->Inputs[attr].xyzw[1].f[j] = p[1]; /*Y*/
+         machine->Inputs[attr].xyzw[2].f[j] = p[2]; /*Z*/
+         machine->Inputs[attr].xyzw[3].f[j] = p[3]; /*W*/
       }
    }
 }
diff --git a/src/mesa/pipe/draw/draw_vertex_shader.c b/src/mesa/pipe/draw/draw_vertex_shader.c
index 8effc74cbee..cb6c605b8db 100644
--- a/src/mesa/pipe/draw/draw_vertex_shader.c
+++ b/src/mesa/pipe/draw/draw_vertex_shader.c
@@ -114,7 +114,6 @@ run_vertex_program(struct draw_context *draw,
 
    draw_vertex_fetch( draw, &machine, elts, count );
 
-
    /* run shader */
    if( draw->vertex_shader.executable != NULL ) {
 #if defined(USE_X86_ASM) || defined(SLANG_X86)
@@ -159,14 +158,23 @@ run_vertex_program(struct draw_context *draw,
       vOut[j]->data[0][2] = z * scale[2] + trans[2];
       vOut[j]->data[0][3] = w;
 
-      /* remaining attributes are packed into sequential post-transform
+      /* Remaining attributes are packed into sequential post-transform
        * vertex attrib slots.
+       * Skip 0 since we just did it above.
+       * Subtract two because of the VERTEX_HEADER, CLIP_POS attribs.
        */
-      for (slot = 1; slot < draw->vertex_info.num_attribs; slot++) {
+      for (slot = 1; slot < draw->vertex_info.num_attribs - 2; slot++) {
          vOut[j]->data[slot][0] = machine.Outputs[slot].xyzw[0].f[j];
          vOut[j]->data[slot][1] = machine.Outputs[slot].xyzw[1].f[j];
          vOut[j]->data[slot][2] = machine.Outputs[slot].xyzw[2].f[j];
          vOut[j]->data[slot][3] = machine.Outputs[slot].xyzw[3].f[j];
+         /*
+         printf("output %d: %f %f %f %f\n", slot,
+                vOut[j]->data[slot][0],
+                vOut[j]->data[slot][1],
+                vOut[j]->data[slot][2],
+                vOut[j]->data[slot][3]);
+         */
       }
    } /* loop over vertices */
 }
diff --git a/src/mesa/pipe/p_state.h b/src/mesa/pipe/p_state.h
index 048feede3ba..d2cc76a59b2 100644
--- a/src/mesa/pipe/p_state.h
+++ b/src/mesa/pipe/p_state.h
@@ -144,6 +144,9 @@ struct pipe_shader_state {
    unsigned outputs_written;               /**< TGSI_ATTRIB_ bits */
    const struct tgsi_token *tokens;
    void *executable;
+
+   uint num_inputs;
+   uint num_outputs;
 };
 
 struct pipe_depth_stencil_state
diff --git a/src/mesa/pipe/softpipe/sp_prim_setup.c b/src/mesa/pipe/softpipe/sp_prim_setup.c
index d0baf0734b0..8a4be79d114 100644
--- a/src/mesa/pipe/softpipe/sp_prim_setup.c
+++ b/src/mesa/pipe/softpipe/sp_prim_setup.c
@@ -448,6 +448,14 @@ static void tri_persp_coeff( struct setup_stage *setup,
    float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
    float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
       
+   /*
+   printf("tri persp %d,%d: %f %f %f\n", slot, i,
+          setup->vmin->data[slot][i],
+          setup->vmid->data[slot][i],
+          setup->vmax->data[slot][i]
+          );
+   */
+
    assert(slot < TGSI_ATTRIB_MAX);
    assert(i <= 3);
 
diff --git a/src/mesa/pipe/tgsi/mesa/mesa_to_tgsi.c b/src/mesa/pipe/tgsi/mesa/mesa_to_tgsi.c
index 28207065372..1f8d937bc6b 100644
--- a/src/mesa/pipe/tgsi/mesa/mesa_to_tgsi.c
+++ b/src/mesa/pipe/tgsi/mesa/mesa_to_tgsi.c
@@ -211,6 +211,7 @@ tgsi_mesa_translate_fragment_output(GLuint attrib)
 }
 
 
+#if 01
 uint
 tgsi_mesa_translate_vertex_input_mask(GLbitfield mask)
 {
@@ -224,7 +225,7 @@ tgsi_mesa_translate_vertex_input_mask(GLbitfield mask)
    }
    return tgsiMask;
 }
-
+#endif
 
 uint
 tgsi_mesa_translate_vertex_output_mask(GLbitfield mask)
@@ -318,7 +319,9 @@ map_register_file_index(
    GLuint processor,
    GLuint file,
    GLuint index,
-   GLbitfield usage_bitmask )
+   GLbitfield usage_bitmask,
+   const GLuint inputMapping[],
+   const GLuint outputMapping[])
 {
    GLuint mapped_index;
    GLuint i;
@@ -337,6 +340,12 @@ map_register_file_index(
        * etc.
        */
       assert( index < 32 );
+      if (inputMapping) {
+         printf("New map %d input %d to %d\n", processor, index,
+                inputMapping[index]);
+         return inputMapping[index];
+      }
+
       assert( usage_bitmask & (1 << index) );
       mapped_index = 0;
       for( i = 0; i < index; i++ ) {
@@ -344,7 +353,7 @@ map_register_file_index(
             mapped_index++;
          }
       }
-      printf("Map input %d to %d\n", index, mapped_index);
+      printf("Map %d input %d to %d\n", processor, index, mapped_index);
       break;
 
    case TGSI_FILE_OUTPUT:
@@ -372,6 +381,8 @@ map_register_file_index(
                mapped_index++;
             }
          }
+         printf("Map VP output from %d to %d\n", index, mapped_index);
+         assert(outputMapping[index] == mapped_index);
       }
       break;
 
@@ -443,6 +454,8 @@ compile_instruction(
    struct tgsi_full_instruction *fullinst,
    GLuint inputs_read,
    GLuint outputs_written,
+   const GLuint inputMapping[],
+   const GLuint outputMapping[],
    GLuint preamble_size,
    GLuint processor )
 {
@@ -462,7 +475,9 @@ compile_instruction(
       processor,
       fulldst->DstRegister.File,
       inst->DstReg.Index,
-      outputs_written
+      outputs_written,
+      NULL,
+      outputMapping
       );
    fulldst->DstRegister.WriteMask = convert_writemask( inst->DstReg.WriteMask );
 
@@ -475,7 +490,9 @@ compile_instruction(
          processor,
          fullsrc->SrcRegister.File,
          inst->SrcReg[i].Index,
-         inputs_read );
+         inputs_read,
+         inputMapping,
+         outputMapping );
 
       for( j = 0; j < 4; j++ ) {
          GLuint swz;
@@ -789,9 +806,20 @@ make_frag_output_decl(
    return decl;
 }
 
+
+/**
+ * Convert Mesa fragment program to TGSI format.
+ * \param inputMapping  array to map original Mesa fragment program inputs
+ *                      registers to TGSI generic input indexes
+ * \param interpMode  array[FRAG_ATTRIB_x] of TGSI_INTERPOLATE_LINEAR/PERSP.
+ *
+ */
 GLboolean
 tgsi_mesa_compile_fp_program(
    const struct gl_fragment_program *program,
+   const GLuint inputMapping[],
+   const GLuint interpMode[],
+   const GLuint outputMapping[],
    struct tgsi_token *tokens,
    GLuint maxTokens )
 {
@@ -800,8 +828,10 @@ tgsi_mesa_compile_fp_program(
    struct tgsi_processor *processor;
    struct tgsi_full_declaration fulldecl;
    struct tgsi_full_instruction fullinst;
+   /*
    struct tgsi_full_dst_register *fulldst;
    struct tgsi_full_src_register *fullsrc;
+   */
    GLuint inputs_read;
    GLboolean reads_wpos;
    GLuint preamble_size = 0;
@@ -822,7 +852,7 @@ tgsi_mesa_compile_fp_program(
    /*
     * Declare input attributes. Note that we do not interpolate fragment position.
     */
-
+   reads_wpos = 1;
    /* Fragment position. */
    if( reads_wpos ) {
       fulldecl = make_frag_input_decl(
@@ -853,20 +883,16 @@ tgsi_mesa_compile_fp_program(
    for( i = 1; i < 32; i++ ) {
       if( inputs_read & (1 << i) ) {
          count++;
+         fulldecl = make_frag_input_decl(count,
+                                         count,
+                                         interpMode[i],
+                                         TGSI_WRITEMASK_XYZW );
+         ti += tgsi_build_full_declaration(&fulldecl,
+                                           &tokens[ti],
+                                           header,
+                                           maxTokens - ti );
       }
-   }
-   if( count > 0 ) {
-      fulldecl = make_frag_input_decl(
-         1,
-         1 + count - 1,
-         TGSI_INTERPOLATE_PERSPECTIVE,
-         TGSI_WRITEMASK_XYZW );
-      ti += tgsi_build_full_declaration(
-         &fulldecl,
-         &tokens[ti],
-         header,
-         maxTokens - ti );
-   }
+   }         
 
    /*
     * Declare output attributes.
@@ -932,6 +958,8 @@ tgsi_mesa_compile_fp_program(
             &fullinst,
             inputs_read,
             ~0, /*outputs_written*/
+            inputMapping,
+            outputMapping,
             preamble_size,
             TGSI_PROCESSOR_FRAGMENT ) ) {
          assert( i == program->Base.NumInstructions - 1 );
@@ -955,8 +983,10 @@ tgsi_mesa_compile_fp_program(
 GLboolean
 tgsi_mesa_compile_vp_program(
    const struct gl_vertex_program *program,
+   const GLuint inputMapping[],
+   const GLuint outputMapping[],
    struct tgsi_token *tokens,
-   GLuint maxTokens )
+   GLuint maxTokens)
 {
    GLuint i, ti;
    struct tgsi_header *header;
@@ -983,6 +1013,8 @@ tgsi_mesa_compile_vp_program(
             &fullinst,
             inputs_read,
             outputs_written,
+            inputMapping,
+            outputMapping,
             0,
             TGSI_PROCESSOR_VERTEX ) ) {
          assert( i == program->Base.NumInstructions - 1 );
diff --git a/src/mesa/pipe/tgsi/mesa/mesa_to_tgsi.h b/src/mesa/pipe/tgsi/mesa/mesa_to_tgsi.h
index fda3fa397f8..017cfce72ea 100644
--- a/src/mesa/pipe/tgsi/mesa/mesa_to_tgsi.h
+++ b/src/mesa/pipe/tgsi/mesa/mesa_to_tgsi.h
@@ -10,12 +10,17 @@ struct tgsi_token;
 GLboolean
 tgsi_mesa_compile_fp_program(
    const struct gl_fragment_program *program,
+   const GLuint inputMapping[],
+   const GLuint interpMode[],
+   const GLuint outputMapping[],
    struct tgsi_token *tokens,
    GLuint maxTokens );
 
 GLboolean
 tgsi_mesa_compile_vp_program(
    const struct gl_vertex_program *program,
+   const GLuint inputMapping[],
+   const GLuint outputMapping[],
    struct tgsi_token *tokens,
    GLuint maxTokens );