aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBrian <[email protected]>2007-10-02 11:46:11 -0600
committerBrian <[email protected]>2007-10-02 11:46:11 -0600
commit0d13ade0cdd38759936a74824efbd6ac8b563aed (patch)
tree1fe31314186843a968470d42f0151ccea410ad7d
parent57d3770f35730bef17e5d93bd424a59eb6daec4c (diff)
Move tgsi machine state init/allocations so they're done less frequently.
This, plus expanding all instructions ahead of time, seems to have improved the performance of program execution by 8x or so.
-rw-r--r--src/mesa/pipe/draw/draw_private.h4
-rw-r--r--src/mesa/pipe/draw/draw_vertex_shader.c52
-rwxr-xr-xsrc/mesa/pipe/softpipe/sp_quad_fs.c81
-rw-r--r--src/mesa/pipe/tgsi/exec/tgsi_exec.c176
-rw-r--r--src/mesa/pipe/tgsi/exec/tgsi_exec.h8
5 files changed, 179 insertions, 142 deletions
diff --git a/src/mesa/pipe/draw/draw_private.h b/src/mesa/pipe/draw/draw_private.h
index 12a970a6711..a54fef41e74 100644
--- a/src/mesa/pipe/draw/draw_private.h
+++ b/src/mesa/pipe/draw/draw_private.h
@@ -47,6 +47,8 @@
#include "draw_vertex.h"
#include "x86/rtasm/x86sse.h"
+#include "pipe/tgsi/exec/tgsi_core.h"
+
/**
* Basic vertex info.
@@ -187,6 +189,8 @@ struct draw_context
unsigned prim; /**< current prim type: PIPE_PRIM_x */
unsigned reduced_prim;
+ /** TGSI program interpreter runtime state */
+ struct tgsi_exec_machine machine;
/* Post-tnl vertex cache:
*/
diff --git a/src/mesa/pipe/draw/draw_vertex_shader.c b/src/mesa/pipe/draw/draw_vertex_shader.c
index 3518bd52a3f..e3bcd353341 100644
--- a/src/mesa/pipe/draw/draw_vertex_shader.c
+++ b/src/mesa/pipe/draw/draw_vertex_shader.c
@@ -86,7 +86,7 @@ run_vertex_program(struct draw_context *draw,
unsigned elts[4], unsigned count,
struct vertex_header *vOut[])
{
- struct tgsi_exec_machine machine;
+ struct tgsi_exec_machine *machine = &draw->machine;
unsigned int j;
ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_ATTRIB_MAX);
@@ -98,35 +98,39 @@ run_vertex_program(struct draw_context *draw,
assert(draw->vertex_shader->state->output_semantic_name[0]
== TGSI_SEMANTIC_POSITION);
-#ifdef DEBUG
- memset( &machine, 0, sizeof( machine ) );
+#ifdef DEBUG_foo
+ memset( machine, 0, sizeof( *machine ) );
#endif
+#if 0
/* init machine state */
- tgsi_exec_machine_init(&machine,
+ tgsi_exec_machine_init(machine,
draw->vertex_shader->state->tokens,
PIPE_MAX_SAMPLERS,
NULL /*samplers*/ );
+#endif
/* Consts does not require 16 byte alignment. */
- machine.Consts = (float (*)[4]) draw->mapped_constants;
+ machine->Consts = (float (*)[4]) draw->mapped_constants;
- machine.Inputs = ALIGN16_ASSIGN(inputs);
- machine.Outputs = ALIGN16_ASSIGN(outputs);
+ machine->Inputs = ALIGN16_ASSIGN(inputs);
+ machine->Outputs = ALIGN16_ASSIGN(outputs);
- draw_vertex_fetch( draw, &machine, elts, count );
+ draw_vertex_fetch( draw, machine, elts, count );
/* run shader */
if( draw->vertex_shader->state->executable != NULL ) {
+ /* SSE */
codegen_function func = (codegen_function) draw->vertex_shader->state->executable;
func(
- machine.Inputs,
- machine.Outputs,
- machine.Consts,
- machine.Temps );
+ machine->Inputs,
+ machine->Outputs,
+ machine->Consts,
+ machine->Temps );
}
else {
- tgsi_exec_machine_run( &machine );
+ /* interpreter */
+ tgsi_exec_machine_run( machine );
}
@@ -136,10 +140,10 @@ run_vertex_program(struct draw_context *draw,
float x, y, z, w;
/* Handle attr[0] (position) specially: */
- x = vOut[j]->clip[0] = machine.Outputs[0].xyzw[0].f[j];
- y = vOut[j]->clip[1] = machine.Outputs[0].xyzw[1].f[j];
- z = vOut[j]->clip[2] = machine.Outputs[0].xyzw[2].f[j];
- w = vOut[j]->clip[3] = machine.Outputs[0].xyzw[3].f[j];
+ x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j];
+ y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j];
+ z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j];
+ w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j];
vOut[j]->clipmask = compute_clipmask(x, y, z, w) | draw->user_clipmask;
vOut[j]->edgeflag = 1;
@@ -162,10 +166,10 @@ run_vertex_program(struct draw_context *draw,
* Subtract two because of the VERTEX_HEADER, CLIP_POS attribs.
*/
for (slot = 1; slot < draw->vertex_info.num_attribs - 2; slot++) {
- vOut[j]->data[slot][0] = machine.Outputs[slot].xyzw[0].f[j];
- vOut[j]->data[slot][1] = machine.Outputs[slot].xyzw[1].f[j];
- vOut[j]->data[slot][2] = machine.Outputs[slot].xyzw[2].f[j];
- vOut[j]->data[slot][3] = machine.Outputs[slot].xyzw[3].f[j];
+ vOut[j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
+ vOut[j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
+ vOut[j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
+ vOut[j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
/*
printf("output %d: %f %f %f %f\n", slot,
vOut[j]->data[slot][0],
@@ -235,6 +239,12 @@ void draw_bind_vertex_shader(struct draw_context *draw,
{
draw_flush(draw);
draw->vertex_shader = (struct draw_vertex_shader*)(vcso);
+
+ /* init machine state */
+ tgsi_exec_machine_init(&draw->machine,
+ draw->vertex_shader->state->tokens,
+ PIPE_MAX_SAMPLERS,
+ NULL /*samplers*/ );
}
void draw_delete_vertex_shader(struct draw_context *draw,
diff --git a/src/mesa/pipe/softpipe/sp_quad_fs.c b/src/mesa/pipe/softpipe/sp_quad_fs.c
index 673d339f412..57c01dcfcc3 100755
--- a/src/mesa/pipe/softpipe/sp_quad_fs.c
+++ b/src/mesa/pipe/softpipe/sp_quad_fs.c
@@ -45,6 +45,8 @@ struct quad_shade_stage
{
struct quad_stage stage;
struct tgsi_sampler samplers[PIPE_MAX_SAMPLERS];
+ struct tgsi_exec_machine machine;
+ struct tgsi_exec_vector *inputs, *outputs;
};
@@ -83,58 +85,41 @@ shade_quad(
struct softpipe_context *softpipe = qs->softpipe;
const float fx = (float) quad->x0;
const float fy = (float) quad->y0;
- struct tgsi_exec_machine machine;
-
- ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_ATTRIB_MAX);
- ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_ATTRIB_MAX);
-
-#ifdef DEBUG
- memset( &machine, 0, sizeof( machine ) );
-#endif
-
- /* init machine state */
- tgsi_exec_machine_init(
- &machine,
- softpipe->fs->tokens,
- PIPE_MAX_SAMPLERS,
- qss->samplers );
+ struct tgsi_exec_machine *machine = &qss->machine;
/* Consts does not require 16 byte alignment. */
- machine.Consts = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT];
-
- machine.Inputs = ALIGN16_ASSIGN(inputs);
- machine.Outputs = ALIGN16_ASSIGN(outputs);
+ machine->Consts = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT];
- machine.InterpCoefs = quad->coef;
+ machine->InterpCoefs = quad->coef;
- machine.Inputs[0].xyzw[0].f[0] = fx;
- machine.Inputs[0].xyzw[0].f[1] = fx + 1.0f;
- machine.Inputs[0].xyzw[0].f[2] = fx;
- machine.Inputs[0].xyzw[0].f[3] = fx + 1.0f;
+ machine->Inputs[0].xyzw[0].f[0] = fx;
+ machine->Inputs[0].xyzw[0].f[1] = fx + 1.0f;
+ machine->Inputs[0].xyzw[0].f[2] = fx;
+ machine->Inputs[0].xyzw[0].f[3] = fx + 1.0f;
- machine.Inputs[0].xyzw[1].f[0] = fy;
- machine.Inputs[0].xyzw[1].f[1] = fy;
- machine.Inputs[0].xyzw[1].f[2] = fy + 1.0f;
- machine.Inputs[0].xyzw[1].f[3] = fy + 1.0f;
+ machine->Inputs[0].xyzw[1].f[0] = fy;
+ machine->Inputs[0].xyzw[1].f[1] = fy;
+ machine->Inputs[0].xyzw[1].f[2] = fy + 1.0f;
+ machine->Inputs[0].xyzw[1].f[3] = fy + 1.0f;
/* run shader */
if( softpipe->fs->executable != NULL ) {
codegen_function func = (codegen_function) softpipe->fs->executable;
func(
- machine.Inputs,
- machine.Outputs,
- machine.Consts,
- machine.Temps,
- machine.InterpCoefs );
+ machine->Inputs,
+ machine->Outputs,
+ machine->Consts,
+ machine->Temps,
+ machine->InterpCoefs );
}
else {
- tgsi_exec_machine_run( &machine );
+ tgsi_exec_machine_run( machine );
}
/* store result color (always in output[1]) */
memcpy(
quad->outputs.color,
- &machine.Outputs[1].xyzw[0].f[0],
+ &machine->Outputs[1].xyzw[0].f[0],
sizeof( quad->outputs.color ) );
#if 0
@@ -142,14 +127,14 @@ shade_quad(
/* XXX temporary */
memcpy(
quad->outputs.depth,
- &machine.Outputs[0].xyzw[2],
+ machine->Outputs[0].xyzw[2],
sizeof( quad->outputs.depth ) );
}
#else
{
uint i;
for (i = 0; i < 4; i++) {
- quad->outputs.depth[i] = machine.Inputs[0].xyzw[2].f[i];
+ quad->outputs.depth[i] = machine->Inputs[0].xyzw[2].f[i];
#if 0
printf("output z %f\n", quad->outputs.depth[i]);
#endif
@@ -188,6 +173,12 @@ static void shade_begin(struct quad_stage *qs)
}
}
+ /* XXX only do this if the fragment shader changes... */
+ tgsi_exec_machine_init(&qss->machine,
+ softpipe->fs->tokens,
+ PIPE_MAX_SAMPLERS,
+ qss->samplers );
+
if (qs->next)
qs->next->begin(qs->next);
}
@@ -195,11 +186,17 @@ static void shade_begin(struct quad_stage *qs)
struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe )
{
- struct quad_shade_stage *stage = CALLOC_STRUCT(quad_shade_stage);
+ struct quad_shade_stage *qss = CALLOC_STRUCT(quad_shade_stage);
+
+ /* allocate storage for program inputs/outputs, aligned to 16 bytes */
+ qss->inputs = malloc(PIPE_ATTRIB_MAX * sizeof(*qss->inputs) + 16);
+ qss->outputs = malloc(PIPE_ATTRIB_MAX * sizeof(*qss->outputs) + 16);
+ qss->machine.Inputs = align16(qss->inputs);
+ qss->machine.Outputs = align16(qss->outputs);
- stage->stage.softpipe = softpipe;
- stage->stage.begin = shade_begin;
- stage->stage.run = shade_quad;
+ qss->stage.softpipe = softpipe;
+ qss->stage.begin = shade_begin;
+ qss->stage.run = shade_quad;
- return &stage->stage;
+ return &qss->stage;
}
diff --git a/src/mesa/pipe/tgsi/exec/tgsi_exec.c b/src/mesa/pipe/tgsi/exec/tgsi_exec.c
index 77a24ec1d8b..1c515a26e33 100644
--- a/src/mesa/pipe/tgsi/exec/tgsi_exec.c
+++ b/src/mesa/pipe/tgsi/exec/tgsi_exec.c
@@ -65,6 +65,80 @@
#define CHAN_Z 2
#define CHAN_W 3
+
+static void
+expand_program(struct tgsi_exec_machine *mach )
+{
+ struct tgsi_full_instruction *instructions;
+ struct tgsi_full_declaration *declarations;
+ struct tgsi_parse_context parse;
+ uint k;
+ uint maxInstructions = 10, numInstructions = 0;
+ uint maxDeclarations = 10, numDeclarations = 0;
+
+ k = tgsi_parse_init( &parse, mach->Tokens );
+ if (k != TGSI_PARSE_OK) {
+ printf("Problem parsing!\n");
+ return;
+ }
+
+ declarations = (struct tgsi_full_declaration *)
+ malloc(maxDeclarations * sizeof(struct tgsi_full_declaration));
+
+ instructions = (struct tgsi_full_instruction *)
+ malloc(maxInstructions * sizeof(struct tgsi_full_instruction));
+
+ while( !tgsi_parse_end_of_tokens( &parse ) ) {
+ tgsi_parse_token( &parse );
+ switch( parse.FullToken.Token.Type ) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ /*
+ exec_declaration( mach, &parse.FullToken.FullDeclaration );
+ */
+ if (numDeclarations == maxDeclarations) {
+ maxDeclarations += 10;
+ declarations = realloc(declarations,
+ maxDeclarations
+ * sizeof(struct tgsi_full_instruction));
+ }
+ memcpy(declarations + numDeclarations,
+ &parse.FullToken.FullInstruction,
+ sizeof(declarations[0]));
+ numDeclarations++;
+ break;
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ break;
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ if (numInstructions == maxInstructions) {
+ maxInstructions += 10;
+ instructions = realloc(instructions,
+ maxInstructions
+ * sizeof(struct tgsi_full_instruction));
+ }
+ memcpy(instructions + numInstructions,
+ &parse.FullToken.FullInstruction,
+ sizeof(instructions[0]));
+ numInstructions++;
+ break;
+ default:
+ assert( 0 );
+ }
+ }
+ tgsi_parse_free (&parse);
+
+ assert(!mach->Instructions);
+ mach->Instructions = instructions;
+ mach->NumInstructions = numInstructions;
+ mach->Declarations = declarations;
+ mach->NumDeclarations = numDeclarations;
+}
+
+
+/**
+ * Initialize machine state by expanding tokens to full instructions,
+ * allocating temporary storage, setting up constants, etc.
+ * After this, we can call tgsi_exec_machine_run() many times.
+ */
void
tgsi_exec_machine_init(
struct tgsi_exec_machine *mach,
@@ -103,16 +177,32 @@ tgsi_exec_machine_init(
mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
}
+ if (mach->Declarations) {
+ free(mach->Declarations);
+ mach->Declarations = NULL;
+ mach->NumDeclarations = 0;
+ }
+ if (mach->Instructions) {
+ free(mach->Instructions);
+ mach->Instructions = NULL;
+ mach->NumInstructions = 0;
+ }
+
mach->CondMask = 0xf;
mach->LoopMask = 0xf;
mach->ExecMask = 0xf;
+
+#if 01
+ tgsi_exec_prepare( mach );
+ expand_program(mach);
+#endif
}
void
tgsi_exec_prepare(
- struct tgsi_exec_machine *mach,
- struct tgsi_exec_labels *labels )
+ struct tgsi_exec_machine *mach )
{
+ struct tgsi_exec_labels *labels = &mach->Labels;
struct tgsi_parse_context parse;
GLuint k;
GLuint instno = 0;
@@ -164,10 +254,10 @@ void
tgsi_exec_machine_run(
struct tgsi_exec_machine *mach )
{
- struct tgsi_exec_labels labels;
-
- tgsi_exec_prepare( mach, &labels );
- tgsi_exec_machine_run2( mach, &labels );
+#if 0
+ tgsi_exec_prepare( mach );
+#endif
+ tgsi_exec_machine_run2( mach );
}
static void
@@ -2170,77 +2260,9 @@ exec_instruction(
}
-static void
-expand_program(struct tgsi_exec_machine *mach )
-{
- struct tgsi_full_instruction *instructions;
- struct tgsi_full_declaration *declarations;
- struct tgsi_parse_context parse;
- uint k;
- uint maxInstructions = 10, numInstructions = 0;
- uint maxDeclarations = 10, numDeclarations = 0;
-
- k = tgsi_parse_init( &parse, mach->Tokens );
- if (k != TGSI_PARSE_OK) {
- printf("Problem parsing!\n");
- return;
- }
-
- declarations = (struct tgsi_full_declaration *)
- malloc(maxDeclarations * sizeof(struct tgsi_full_declaration));
-
- instructions = (struct tgsi_full_instruction *)
- malloc(maxInstructions * sizeof(struct tgsi_full_instruction));
-
- while( !tgsi_parse_end_of_tokens( &parse ) ) {
- tgsi_parse_token( &parse );
- switch( parse.FullToken.Token.Type ) {
- case TGSI_TOKEN_TYPE_DECLARATION:
- /*
- exec_declaration( mach, &parse.FullToken.FullDeclaration );
- */
- if (numDeclarations == maxDeclarations) {
- maxDeclarations += 10;
- declarations = realloc(declarations,
- maxDeclarations
- * sizeof(struct tgsi_full_instruction));
- }
- memcpy(declarations + numDeclarations,
- &parse.FullToken.FullInstruction,
- sizeof(declarations[0]));
- numDeclarations++;
- break;
- case TGSI_TOKEN_TYPE_IMMEDIATE:
- break;
- case TGSI_TOKEN_TYPE_INSTRUCTION:
- if (numInstructions == maxInstructions) {
- maxInstructions += 10;
- instructions = realloc(instructions,
- maxInstructions
- * sizeof(struct tgsi_full_instruction));
- }
- memcpy(instructions + numInstructions,
- &parse.FullToken.FullInstruction,
- sizeof(instructions[0]));
- numInstructions++;
- break;
- default:
- assert( 0 );
- }
- }
- tgsi_parse_free (&parse);
-
- mach->Instructions = instructions;
- mach->NumInstructions = numInstructions;
- mach->Declarations = declarations;
- mach->NumDeclarations = numDeclarations;
-}
-
-
void
tgsi_exec_machine_run2(
- struct tgsi_exec_machine *mach,
- struct tgsi_exec_labels *labels )
+ struct tgsi_exec_machine *mach )
{
#if 0 && MESA
GET_CURRENT_CONTEXT(ctx);
@@ -2255,9 +2277,11 @@ tgsi_exec_machine_run2(
GLuint k;
#endif
+#if 0
if (!mach->Instructions) {
expand_program(mach);
}
+#endif
mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
@@ -2305,8 +2329,10 @@ tgsi_exec_machine_run2(
exec_instruction( mach, mach->Instructions + pc, &pc );
}
+#if 0
free(mach->Declarations);
free(mach->Instructions);
+#endif
}
#endif
diff --git a/src/mesa/pipe/tgsi/exec/tgsi_exec.h b/src/mesa/pipe/tgsi/exec/tgsi_exec.h
index 8997ea9c090..2b493ff6821 100644
--- a/src/mesa/pipe/tgsi/exec/tgsi_exec.h
+++ b/src/mesa/pipe/tgsi/exec/tgsi_exec.h
@@ -154,6 +154,8 @@ struct tgsi_exec_machine
struct tgsi_full_declaration *Declarations;
uint NumDeclarations;
+
+ struct tgsi_exec_labels Labels;
};
@@ -166,8 +168,7 @@ tgsi_exec_machine_init(
void
tgsi_exec_prepare(
- struct tgsi_exec_machine *mach,
- struct tgsi_exec_labels *labels );
+ struct tgsi_exec_machine *mach );
void
tgsi_exec_machine_run(
@@ -175,8 +176,7 @@ tgsi_exec_machine_run(
void
tgsi_exec_machine_run2(
- struct tgsi_exec_machine *mach,
- struct tgsi_exec_labels *labels );
+ struct tgsi_exec_machine *mach );
#if defined __cplusplus
} // extern "C"