From a773f06e969a3992451dd7fe6fd55ea96b2774fa Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 17 Apr 2008 23:44:32 +0100 Subject: draw: split off all the extra functionality in the vertex shader This will at least allow us to make the initial gains to get decent vertex performance much more quickly & with higher confidence of getting it right. At some later point can look again at code-generating all the fetch/cliptest/viewport extras in the same block as the vertex shader. For now, just need to get some decent baseline performance. --- src/gallium/auxiliary/draw/draw_vs_llvm.c | 39 ++++++++++++++----------------- 1 file changed, 18 insertions(+), 21 deletions(-) (limited to 'src/gallium/auxiliary/draw/draw_vs_llvm.c') diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index d0baca715e6..4dbfa955a4e 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -47,6 +47,7 @@ struct draw_llvm_vertex_shader { struct draw_vertex_shader base; struct gallivm_prog *llvm_prog; + struct tgsi_exec_machine *machine; }; @@ -77,12 +78,9 @@ vs_llvm_run( struct draw_vertex_shader *base, struct draw_llvm_vertex_shader *shader = (struct draw_llvm_vertex_shader *)base; - struct tgsi_exec_machine *machine = &draw->machine; + struct tgsi_exec_machine *machine = shader->machine; unsigned int j; unsigned int clipped = 0; - - ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_MAX_ATTRIBS); - ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_MAX_ATTRIBS); const float *scale = draw->viewport.scale; const float *trans = draw->viewport.translate; @@ -93,13 +91,12 @@ vs_llvm_run( struct draw_vertex_shader *base, /* Consts does not require 16 byte alignment. */ machine->Consts = (float (*)[4]) draw->user.constants; - machine->Inputs = ALIGN16_ASSIGN(inputs); if (draw->rasterizer->bypass_vs) { /* outputs are just the inputs */ - machine->Outputs = machine->Inputs; + outputs = machine->Inputs; } else { - machine->Outputs = ALIGN16_ASSIGN(outputs); + outputs = machine->Outputs; } @@ -119,10 +116,10 @@ vs_llvm_run( struct draw_vertex_shader *base, unsigned slot; float x, y, z, w; - x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j]; - y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j]; - z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j]; - w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j]; + x = vOut[j]->clip[0] = outputs[0].xyzw[0].f[j]; + y = vOut[j]->clip[1] = outputs[0].xyzw[1].f[j]; + z = vOut[j]->clip[2] = outputs[0].xyzw[2].f[j]; + w = vOut[j]->clip[3] = outputs[0].xyzw[3].f[j]; if (!draw->rasterizer->bypass_clipping) { vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, @@ -159,10 +156,10 @@ vs_llvm_run( struct draw_vertex_shader *base, * vertex attrib slots. */ for (slot = 1; slot < draw->num_vs_outputs; slot++) { - vOut[j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; - vOut[j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; - vOut[j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; - vOut[j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; + vOut[j]->data[slot][0] = outputs[slot].xyzw[0].f[j]; + vOut[j]->data[slot][1] = outputs[slot].xyzw[1].f[j]; + vOut[j]->data[slot][2] = outputs[slot].xyzw[2].f[j]; + vOut[j]->data[slot][3] = outputs[slot].xyzw[3].f[j]; } } /* loop over vertices */ return clipped != 0; @@ -183,7 +180,7 @@ vs_llvm_run_linear( struct draw_vertex_shader *base, struct draw_llvm_vertex_shader *shader = (struct draw_llvm_vertex_shader *)base; - struct tgsi_exec_machine *machine = &draw->machine; + struct tgsi_exec_machine *machine = shader->machine; unsigned int j; @@ -199,6 +196,8 @@ vs_llvm_run_linear( struct draw_vertex_shader *base, machine->Inputs[slot].xyzw[2].f[j] = input[slot][2]; machine->Inputs[slot].xyzw[3].f[j] = input[slot][3]; } + + input = (const float (*)[4])((const char *)input + input_stride); } /* run shader */ @@ -216,12 +215,9 @@ vs_llvm_run_linear( struct draw_vertex_shader *base, output[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; output[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; output[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; - } - /* Advance input, output pointers: - */ - input = (const float (*)[4])((const char *)input + input_stride); - output = (float (*)[4])((char *)output + output_stride); + output = (float (*)[4])((char *)output + output_stride); + } } } @@ -263,6 +259,7 @@ draw_create_vs_llvm(struct draw_context *draw, vs->base.run = vs_llvm_run; vs->base.run_linear = vs_llvm_run_linear; vs->base.delete = vs_llvm_delete; + vs->machine = &draw->machine; { struct gallivm_ir *ir = gallivm_ir_new(GALLIVM_VS); -- cgit v1.2.3