summaryrefslogtreecommitdiffstats
path: root/src/mesa/pipe/draw
diff options
context:
space:
mode:
authorBrian <[email protected]>2007-10-02 11:46:11 -0600
committerBrian <[email protected]>2007-10-02 11:46:11 -0600
commit0d13ade0cdd38759936a74824efbd6ac8b563aed (patch)
tree1fe31314186843a968470d42f0151ccea410ad7d /src/mesa/pipe/draw
parent57d3770f35730bef17e5d93bd424a59eb6daec4c (diff)
Move tgsi machine state init/allocations so they're done less frequently.
This, plus expanding all instructions ahead of time, seems to have improved the performance of program execution by 8x or so.
Diffstat (limited to 'src/mesa/pipe/draw')
-rw-r--r--src/mesa/pipe/draw/draw_private.h4
-rw-r--r--src/mesa/pipe/draw/draw_vertex_shader.c52
2 files changed, 35 insertions, 21 deletions
diff --git a/src/mesa/pipe/draw/draw_private.h b/src/mesa/pipe/draw/draw_private.h
index 12a970a6711..a54fef41e74 100644
--- a/src/mesa/pipe/draw/draw_private.h
+++ b/src/mesa/pipe/draw/draw_private.h
@@ -47,6 +47,8 @@
#include "draw_vertex.h"
#include "x86/rtasm/x86sse.h"
+#include "pipe/tgsi/exec/tgsi_core.h"
+
/**
* Basic vertex info.
@@ -187,6 +189,8 @@ struct draw_context
unsigned prim; /**< current prim type: PIPE_PRIM_x */
unsigned reduced_prim;
+ /** TGSI program interpreter runtime state */
+ struct tgsi_exec_machine machine;
/* Post-tnl vertex cache:
*/
diff --git a/src/mesa/pipe/draw/draw_vertex_shader.c b/src/mesa/pipe/draw/draw_vertex_shader.c
index 3518bd52a3f..e3bcd353341 100644
--- a/src/mesa/pipe/draw/draw_vertex_shader.c
+++ b/src/mesa/pipe/draw/draw_vertex_shader.c
@@ -86,7 +86,7 @@ run_vertex_program(struct draw_context *draw,
unsigned elts[4], unsigned count,
struct vertex_header *vOut[])
{
- struct tgsi_exec_machine machine;
+ struct tgsi_exec_machine *machine = &draw->machine;
unsigned int j;
ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_ATTRIB_MAX);
@@ -98,35 +98,39 @@ run_vertex_program(struct draw_context *draw,
assert(draw->vertex_shader->state->output_semantic_name[0]
== TGSI_SEMANTIC_POSITION);
-#ifdef DEBUG
- memset( &machine, 0, sizeof( machine ) );
+#ifdef DEBUG_foo
+ memset( machine, 0, sizeof( *machine ) );
#endif
+#if 0
/* init machine state */
- tgsi_exec_machine_init(&machine,
+ tgsi_exec_machine_init(machine,
draw->vertex_shader->state->tokens,
PIPE_MAX_SAMPLERS,
NULL /*samplers*/ );
+#endif
/* Consts does not require 16 byte alignment. */
- machine.Consts = (float (*)[4]) draw->mapped_constants;
+ machine->Consts = (float (*)[4]) draw->mapped_constants;
- machine.Inputs = ALIGN16_ASSIGN(inputs);
- machine.Outputs = ALIGN16_ASSIGN(outputs);
+ machine->Inputs = ALIGN16_ASSIGN(inputs);
+ machine->Outputs = ALIGN16_ASSIGN(outputs);
- draw_vertex_fetch( draw, &machine, elts, count );
+ draw_vertex_fetch( draw, machine, elts, count );
/* run shader */
if( draw->vertex_shader->state->executable != NULL ) {
+ /* SSE */
codegen_function func = (codegen_function) draw->vertex_shader->state->executable;
func(
- machine.Inputs,
- machine.Outputs,
- machine.Consts,
- machine.Temps );
+ machine->Inputs,
+ machine->Outputs,
+ machine->Consts,
+ machine->Temps );
}
else {
- tgsi_exec_machine_run( &machine );
+ /* interpreter */
+ tgsi_exec_machine_run( machine );
}
@@ -136,10 +140,10 @@ run_vertex_program(struct draw_context *draw,
float x, y, z, w;
/* Handle attr[0] (position) specially: */
- x = vOut[j]->clip[0] = machine.Outputs[0].xyzw[0].f[j];
- y = vOut[j]->clip[1] = machine.Outputs[0].xyzw[1].f[j];
- z = vOut[j]->clip[2] = machine.Outputs[0].xyzw[2].f[j];
- w = vOut[j]->clip[3] = machine.Outputs[0].xyzw[3].f[j];
+ x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j];
+ y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j];
+ z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j];
+ w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j];
vOut[j]->clipmask = compute_clipmask(x, y, z, w) | draw->user_clipmask;
vOut[j]->edgeflag = 1;
@@ -162,10 +166,10 @@ run_vertex_program(struct draw_context *draw,
* Subtract two because of the VERTEX_HEADER, CLIP_POS attribs.
*/
for (slot = 1; slot < draw->vertex_info.num_attribs - 2; slot++) {
- vOut[j]->data[slot][0] = machine.Outputs[slot].xyzw[0].f[j];
- vOut[j]->data[slot][1] = machine.Outputs[slot].xyzw[1].f[j];
- vOut[j]->data[slot][2] = machine.Outputs[slot].xyzw[2].f[j];
- vOut[j]->data[slot][3] = machine.Outputs[slot].xyzw[3].f[j];
+ vOut[j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
+ vOut[j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
+ vOut[j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
+ vOut[j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
/*
printf("output %d: %f %f %f %f\n", slot,
vOut[j]->data[slot][0],
@@ -235,6 +239,12 @@ void draw_bind_vertex_shader(struct draw_context *draw,
{
draw_flush(draw);
draw->vertex_shader = (struct draw_vertex_shader*)(vcso);
+
+ /* init machine state */
+ tgsi_exec_machine_init(&draw->machine,
+ draw->vertex_shader->state->tokens,
+ PIPE_MAX_SAMPLERS,
+ NULL /*samplers*/ );
}
void draw_delete_vertex_shader(struct draw_context *draw,