diff options
author | Brian <[email protected]> | 2007-10-02 11:46:11 -0600 |
---|---|---|
committer | Brian <[email protected]> | 2007-10-02 11:46:11 -0600 |
commit | 0d13ade0cdd38759936a74824efbd6ac8b563aed (patch) | |
tree | 1fe31314186843a968470d42f0151ccea410ad7d /src/mesa/pipe/draw | |
parent | 57d3770f35730bef17e5d93bd424a59eb6daec4c (diff) |
Move tgsi machine state init/allocations so they're done less frequently.
This, plus expanding all instructions ahead of time, seems to have improved
the performance of program execution by 8x or so.
Diffstat (limited to 'src/mesa/pipe/draw')
-rw-r--r-- | src/mesa/pipe/draw/draw_private.h | 4 | ||||
-rw-r--r-- | src/mesa/pipe/draw/draw_vertex_shader.c | 52 |
2 files changed, 35 insertions, 21 deletions
diff --git a/src/mesa/pipe/draw/draw_private.h b/src/mesa/pipe/draw/draw_private.h index 12a970a6711..a54fef41e74 100644 --- a/src/mesa/pipe/draw/draw_private.h +++ b/src/mesa/pipe/draw/draw_private.h @@ -47,6 +47,8 @@ #include "draw_vertex.h" #include "x86/rtasm/x86sse.h" +#include "pipe/tgsi/exec/tgsi_core.h" + /** * Basic vertex info. @@ -187,6 +189,8 @@ struct draw_context unsigned prim; /**< current prim type: PIPE_PRIM_x */ unsigned reduced_prim; + /** TGSI program interpreter runtime state */ + struct tgsi_exec_machine machine; /* Post-tnl vertex cache: */ diff --git a/src/mesa/pipe/draw/draw_vertex_shader.c b/src/mesa/pipe/draw/draw_vertex_shader.c index 3518bd52a3f..e3bcd353341 100644 --- a/src/mesa/pipe/draw/draw_vertex_shader.c +++ b/src/mesa/pipe/draw/draw_vertex_shader.c @@ -86,7 +86,7 @@ run_vertex_program(struct draw_context *draw, unsigned elts[4], unsigned count, struct vertex_header *vOut[]) { - struct tgsi_exec_machine machine; + struct tgsi_exec_machine *machine = &draw->machine; unsigned int j; ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_ATTRIB_MAX); @@ -98,35 +98,39 @@ run_vertex_program(struct draw_context *draw, assert(draw->vertex_shader->state->output_semantic_name[0] == TGSI_SEMANTIC_POSITION); -#ifdef DEBUG - memset( &machine, 0, sizeof( machine ) ); +#ifdef DEBUG_foo + memset( machine, 0, sizeof( *machine ) ); #endif +#if 0 /* init machine state */ - tgsi_exec_machine_init(&machine, + tgsi_exec_machine_init(machine, draw->vertex_shader->state->tokens, PIPE_MAX_SAMPLERS, NULL /*samplers*/ ); +#endif /* Consts does not require 16 byte alignment. */ - machine.Consts = (float (*)[4]) draw->mapped_constants; + machine->Consts = (float (*)[4]) draw->mapped_constants; - machine.Inputs = ALIGN16_ASSIGN(inputs); - machine.Outputs = ALIGN16_ASSIGN(outputs); + machine->Inputs = ALIGN16_ASSIGN(inputs); + machine->Outputs = ALIGN16_ASSIGN(outputs); - draw_vertex_fetch( draw, &machine, elts, count ); + draw_vertex_fetch( draw, machine, elts, count ); /* run shader */ if( draw->vertex_shader->state->executable != NULL ) { + /* SSE */ codegen_function func = (codegen_function) draw->vertex_shader->state->executable; func( - machine.Inputs, - machine.Outputs, - machine.Consts, - machine.Temps ); + machine->Inputs, + machine->Outputs, + machine->Consts, + machine->Temps ); } else { - tgsi_exec_machine_run( &machine ); + /* interpreter */ + tgsi_exec_machine_run( machine ); } @@ -136,10 +140,10 @@ run_vertex_program(struct draw_context *draw, float x, y, z, w; /* Handle attr[0] (position) specially: */ - x = vOut[j]->clip[0] = machine.Outputs[0].xyzw[0].f[j]; - y = vOut[j]->clip[1] = machine.Outputs[0].xyzw[1].f[j]; - z = vOut[j]->clip[2] = machine.Outputs[0].xyzw[2].f[j]; - w = vOut[j]->clip[3] = machine.Outputs[0].xyzw[3].f[j]; + x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j]; + y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j]; + z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j]; + w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j]; vOut[j]->clipmask = compute_clipmask(x, y, z, w) | draw->user_clipmask; vOut[j]->edgeflag = 1; @@ -162,10 +166,10 @@ run_vertex_program(struct draw_context *draw, * Subtract two because of the VERTEX_HEADER, CLIP_POS attribs. */ for (slot = 1; slot < draw->vertex_info.num_attribs - 2; slot++) { - vOut[j]->data[slot][0] = machine.Outputs[slot].xyzw[0].f[j]; - vOut[j]->data[slot][1] = machine.Outputs[slot].xyzw[1].f[j]; - vOut[j]->data[slot][2] = machine.Outputs[slot].xyzw[2].f[j]; - vOut[j]->data[slot][3] = machine.Outputs[slot].xyzw[3].f[j]; + vOut[j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; + vOut[j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; + vOut[j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; + vOut[j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; /* printf("output %d: %f %f %f %f\n", slot, vOut[j]->data[slot][0], @@ -235,6 +239,12 @@ void draw_bind_vertex_shader(struct draw_context *draw, { draw_flush(draw); draw->vertex_shader = (struct draw_vertex_shader*)(vcso); + + /* init machine state */ + tgsi_exec_machine_init(&draw->machine, + draw->vertex_shader->state->tokens, + PIPE_MAX_SAMPLERS, + NULL /*samplers*/ ); } void draw_delete_vertex_shader(struct draw_context *draw, |