diff options
author | Brian <[email protected]> | 2007-08-20 18:53:41 -0600 |
---|---|---|
committer | Brian <[email protected]> | 2007-08-20 18:53:41 -0600 |
commit | 3cb6fc3f9aaa94236be611bad04125c17e01ad6c (patch) | |
tree | aa3c52f8c6faa493acfe36d68390704fa200c8db /src/mesa/pipe/draw/draw_prim.c | |
parent | 3772441c498b997947f68e98830f72649bbc8442 (diff) |
Move vertex transformation/shader code into draw_prim.c to avoid need for vs_flush() function pointer.
Diffstat (limited to 'src/mesa/pipe/draw/draw_prim.c')
-rw-r--r-- | src/mesa/pipe/draw/draw_prim.c | 273 |
1 files changed, 270 insertions, 3 deletions
diff --git a/src/mesa/pipe/draw/draw_prim.c b/src/mesa/pipe/draw/draw_prim.c index d296780fd75..fd8a39106dd 100644 --- a/src/mesa/pipe/draw/draw_prim.c +++ b/src/mesa/pipe/draw/draw_prim.c @@ -35,6 +35,10 @@ #include "draw_context.h" #include "draw_prim.h" +#include "pipe/tgsi/core/tgsi_exec.h" +#include "pipe/tgsi/core/tgsi_build.h" +#include "pipe/tgsi/core/tgsi_util.h" + #define RP_NONE 0 #define RP_POINT 1 @@ -56,6 +60,261 @@ static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = { }; + +/** XXX remove */ +#define VERT_RESULT_HPOS 0 +#define VERT_RESULT_MAX 24 + + +static INLINE unsigned +compute_clipmask(float cx, float cy, float cz, float cw) +{ + unsigned mask; +#if defined(macintosh) || defined(__powerpc__) + /* on powerpc cliptest is 17% faster in this way. */ + mask = (((cw < cx) << CLIP_RIGHT_SHIFT)); + mask |= (((cw < -cx) << CLIP_LEFT_SHIFT)); + mask |= (((cw < cy) << CLIP_TOP_SHIFT)); + mask |= (((cw < -cy) << CLIP_BOTTOM_SHIFT)); + mask |= (((cw < cz) << CLIP_FAR_SHIFT)); + mask |= (((cw < -cz) << CLIP_NEAR_SHIFT)); +#else /* !defined(macintosh)) */ + mask = 0x0; + if (-cx + cw < 0) mask |= CLIP_RIGHT_BIT; + if ( cx + cw < 0) mask |= CLIP_LEFT_BIT; + if (-cy + cw < 0) mask |= CLIP_TOP_BIT; + if ( cy + cw < 0) mask |= CLIP_BOTTOM_BIT; + if (-cz + cw < 0) mask |= CLIP_FAR_BIT; + if ( cz + cw < 0) mask |= CLIP_NEAR_BIT; +#endif /* defined(macintosh) */ + return mask; +} + + +/** + * Fetch a float[4] vertex attribute from memory, doing format/type + * conversion as needed. + * XXX this might be a temporary thing. + */ +static void +fetch_attrib4(const void *ptr, unsigned format, float attrib[4]) +{ + /* defaults */ + attrib[1] = 0.0; + attrib[2] = 0.0; + attrib[3] = 1.0; + switch (format) { + case PIPE_FORMAT_R32G32B32A32_FLOAT: + attrib[3] = ((float *) ptr)[3]; + /* fall-through */ + case PIPE_FORMAT_R32G32B32_FLOAT: + attrib[2] = ((float *) ptr)[2]; + /* fall-through */ + case PIPE_FORMAT_R32G32_FLOAT: + attrib[1] = ((float *) ptr)[1]; + /* fall-through */ + case PIPE_FORMAT_R32_FLOAT: + attrib[0] = ((float *) ptr)[0]; + break; + default: + assert(0); + } +} + + +/** + * Transform vertices with the current vertex program/shader + * Up to four vertices can be shaded at a time. + * \param vbuffer the input vertex data + * \param elts indexes of four input vertices + * \param count number of vertices to shade [1..4] + * \param vOut array of pointers to four output vertices + */ +static void +run_vertex_program(struct draw_context *draw, + unsigned elts[4], unsigned count, + struct vertex_header *vOut[]) +{ + struct tgsi_exec_machine machine; + unsigned int j; + + ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_ATTRIB_MAX); + ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_ATTRIB_MAX); + const float *scale = draw->viewport.scale; + const float *trans = draw->viewport.translate; + + assert(count <= 4); + +#ifdef DEBUG + memset( &machine, 0, sizeof( machine ) ); +#endif + + /* init machine state */ + tgsi_exec_machine_init(&machine, + draw->vertex_shader.tokens, + PIPE_MAX_SAMPLERS, + NULL /*samplers*/ ); + + /* Consts does not require 16 byte alignment. */ + machine.Consts = draw->vertex_shader.constants->constant; + + machine.Inputs = ALIGN16_ASSIGN(inputs); + machine.Outputs = ALIGN16_ASSIGN(outputs); + + + if (0) + { + unsigned attr; + for (attr = 0; attr < 16; attr++) { + if (draw->vertex_shader.inputs_read & (1 << attr)) { + printf("attr %d: buf_off %d src_off %d pitch %d\n", + attr, + draw->vertex_buffer[attr].buffer_offset, + draw->vertex_element[attr].src_offset, + draw->vertex_buffer[attr].pitch); + } + } + } + + /* load machine inputs */ + for (j = 0; j < count; j++) { + unsigned attr; + for (attr = 0; attr < 16; attr++) { + if (draw->vertex_shader.inputs_read & (1 << attr)) { + unsigned buf = draw->vertex_element[attr].vertex_buffer_index; + const void *src + = (const void *) ((const ubyte *) draw->mapped_vbuffer[buf] + + draw->vertex_buffer[buf].buffer_offset + + draw->vertex_element[attr].src_offset + + elts[j] * draw->vertex_buffer[buf].pitch); + float p[4]; + + fetch_attrib4(src, draw->vertex_element[attr].src_format, p); + + machine.Inputs[attr].xyzw[0].f[j] = p[0]; /*X*/ + machine.Inputs[attr].xyzw[1].f[j] = p[1]; /*Y*/ + machine.Inputs[attr].xyzw[2].f[j] = p[2]; /*Z*/ + machine.Inputs[attr].xyzw[3].f[j] = p[3]; /*W*/ +#if 0 + if (attr == 0) { + printf("Input vertex %d: %f %f %f\n", + j, p[0], p[1], p[2]); + } +#endif + } + } + } + +#if 0 + printf("Consts:\n"); + for (i = 0; i < 4; i++) { + printf(" %d: %f %f %f %f\n", i, + machine.Consts[i][0], + machine.Consts[i][1], + machine.Consts[i][2], + machine.Consts[i][3]); + } +#endif + + /* run shader */ + tgsi_exec_machine_run( &machine ); + +#if 0 + printf("VS result: %f %f %f %f\n", + outputs[0].xyzw[0].f[0], + outputs[0].xyzw[1].f[0], + outputs[0].xyzw[2].f[0], + outputs[0].xyzw[3].f[0]); +#endif + + /* store machine results */ + assert(draw->vertex_shader.outputs_written & (1 << VERT_RESULT_HPOS)); + for (j = 0; j < count; j++) { + unsigned attr, slot; + float x, y, z, w; + + /* Handle attr[0] (position) specially: */ + x = vOut[j]->clip[0] = outputs[0].xyzw[0].f[j]; + y = vOut[j]->clip[1] = outputs[0].xyzw[1].f[j]; + z = vOut[j]->clip[2] = outputs[0].xyzw[2].f[j]; + w = vOut[j]->clip[3] = outputs[0].xyzw[3].f[j]; + + vOut[j]->clipmask = compute_clipmask(x, y, z, w); + vOut[j]->edgeflag = 1; + + /* divide by w */ + w = 1.0 / w; + x *= w; + y *= w; + z *= w; + + /* Viewport mapping */ + vOut[j]->data[0][0] = x * scale[0] + trans[0]; + vOut[j]->data[0][1] = y * scale[1] + trans[1]; + vOut[j]->data[0][2] = z * scale[2] + trans[2]; + vOut[j]->data[0][3] = w; +#if 0 + printf("wincoord: %f %f %f %f\n", + vOut[j]->data[0][0], + vOut[j]->data[0][1], + vOut[j]->data[0][2], + vOut[j]->data[0][3]); +#endif + + /* remaining attributes: */ + /* pack into sequential post-transform attrib slots */ + slot = 1; + for (attr = 1; attr < VERT_RESULT_MAX; attr++) { + if (draw->vertex_shader.outputs_written & (1 << attr)) { + assert(slot < draw->nr_attrs); + vOut[j]->data[slot][0] = outputs[attr].xyzw[0].f[j]; + vOut[j]->data[slot][1] = outputs[attr].xyzw[1].f[j]; + vOut[j]->data[slot][2] = outputs[attr].xyzw[2].f[j]; + vOut[j]->data[slot][3] = outputs[attr].xyzw[3].f[j]; + slot++; + } + } + } + +#if 0 + memcpy( + quad->outputs.color, + &machine.Outputs[1].xyzw[0].f[0], + sizeof( quad->outputs.color ) ); +#endif +} + + +/** + * Called by the draw module when the vertx cache needs to be flushed. + * This involves running the vertex shader. + */ +static void transform_vertices( struct draw_context *draw ) +{ + unsigned i, j; + + /* run vertex shader on vertex cache entries, four per invokation */ + for (i = 0; i < draw->vs.queue_nr; i += 4) { + struct vertex_header *dests[4]; + unsigned elts[4]; + int n; + + for (j = 0; j < 4; j++) { + elts[j] = draw->vs.queue[i + j].elt; + dests[j] = draw->vs.queue[i + j].dest; + } + + n = MIN2(4, draw->vs.queue_nr - i); + assert(n > 0); + assert(n <= 4); + + run_vertex_program(draw, elts, n, dests); + } + + draw->vs.queue_nr = 0; +} + + void draw_flush( struct draw_context *draw ) { struct draw_stage *first = draw->pipeline.first; @@ -63,9 +322,7 @@ void draw_flush( struct draw_context *draw ) /* Make sure all vertices are available: */ - assert(draw->vs_flush); - draw->vs_flush( draw ); - + transform_vertices(draw); switch (draw->reduced_prim) { case RP_TRI: @@ -459,6 +716,16 @@ draw_set_mapped_element_buffer( struct draw_context *draw, } +/** + * Tell drawing context where to find mapped vertex buffers. + */ +void draw_set_mapped_vertex_buffer(struct draw_context *draw, + unsigned attr, const void *buffer) +{ + draw->mapped_vbuffer[attr] = buffer; +} + + unsigned draw_prim_info(unsigned prim, unsigned *first, unsigned *incr) { |