From ec8cbd79ac4065111365a6720c9564de56855cc8 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 4 Jan 2012 17:38:55 +0000 Subject: draw/softpipe: EXT_transform_feedback support (v2) This replaces the current code with an implementation compatible with the new gallium interface. I've left some of the remains of the interface intact so llvmpipe keeps building correctly, and I'll take a look at fixing llvmpipe up later. v2: fixup as per Brian's review Signed-off-by: Dave Airlie --- src/gallium/auxiliary/draw/draw_context.c | 21 +++-- src/gallium/auxiliary/draw/draw_context.h | 19 ++++- src/gallium/auxiliary/draw/draw_private.h | 4 +- src/gallium/auxiliary/draw/draw_pt_so_emit.c | 114 ++++++++++++++++----------- src/gallium/auxiliary/draw/draw_vbuf.h | 3 +- src/gallium/auxiliary/draw/draw_vs_exec.c | 1 + 6 files changed, 105 insertions(+), 57 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index f91e408cbf0..10a20f76178 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -724,17 +724,26 @@ draw_get_rasterizer_no_cull( struct draw_context *draw, return draw->rasterizer_no_cull[scissor][flatshade]; } +void +draw_set_mapped_so_targets(struct draw_context *draw, + int num_targets, + struct draw_so_target *targets[PIPE_MAX_SO_BUFFERS]) +{ + int i; + + for (i = 0; i < num_targets; i++) + draw->so.targets[i] = targets[i]; + for (i = num_targets; i < PIPE_MAX_SO_BUFFERS; i++) + draw->so.targets[i] = NULL; + + draw->so.num_targets = num_targets; +} + void draw_set_mapped_so_buffers(struct draw_context *draw, void *buffers[PIPE_MAX_SO_BUFFERS], unsigned num_buffers) { - int i; - - for (i = 0; i < num_buffers; ++i) { - draw->so.buffers[i] = buffers[i]; - } - draw->so.num_buffers = num_buffers; } void diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index 7655ad0a90d..02c176e8f4b 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -50,7 +50,18 @@ struct draw_fragment_shader; struct tgsi_sampler; struct gallivm_state; - +/* + * structure to contain driver internal information + * for stream out support. mapping stores the pointer + * to the buffer contents, and internal offset stores + * stores an internal counter to how much of the stream + * out buffer is used (in bytes). + */ +struct draw_so_target { + struct pipe_stream_output_target target; + void *mapping; + int internal_offset; +}; struct draw_context *draw_create( struct pipe_context *pipe ); @@ -202,6 +213,12 @@ void draw_set_mapped_so_buffers(struct draw_context *draw, void *buffers[PIPE_MAX_SO_BUFFERS], unsigned num_buffers); + +void +draw_set_mapped_so_targets(struct draw_context *draw, + int num_targets, + struct draw_so_target *targets[PIPE_MAX_SO_BUFFERS]); + void draw_set_so_state(struct draw_context *draw, struct pipe_stream_output_info *state); diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index c0ef18e3ed2..91112eb96b4 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -271,8 +271,8 @@ struct draw_context /** Stream output (vertex feedback) state */ struct { struct pipe_stream_output_info state; - void *buffers[PIPE_MAX_SO_BUFFERS]; - uint num_buffers; + struct draw_so_target *targets[PIPE_MAX_SO_BUFFERS]; + uint num_targets; } so; /* Clip derived state: diff --git a/src/gallium/auxiliary/draw/draw_pt_so_emit.c b/src/gallium/auxiliary/draw/draw_pt_so_emit.c index 2dc9e299f56..d4182a843ca 100644 --- a/src/gallium/auxiliary/draw/draw_pt_so_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_so_emit.c @@ -25,29 +25,29 @@ * **************************************************************************/ -#include "draw/draw_context.h" #include "draw/draw_private.h" +#include "draw/draw_vs.h" +#include "draw/draw_context.h" #include "draw/draw_vbuf.h" #include "draw/draw_vertex.h" #include "draw/draw_pt.h" +#include "pipe/p_state.h" + #include "util/u_math.h" #include "util/u_memory.h" struct pt_so_emit { struct draw_context *draw; - void *buffers[PIPE_MAX_SO_BUFFERS]; - unsigned input_vertex_stride; const float (*inputs)[4]; boolean has_so; - boolean single_buffer; - unsigned emitted_primitives; unsigned emitted_vertices; + unsigned generated_primitives; }; @@ -55,15 +55,15 @@ void draw_pt_so_emit_prepare(struct pt_so_emit *emit) { struct draw_context *draw = emit->draw; - emit->has_so = (draw->so.state.num_outputs > 0); + emit->has_so = (draw->vs.vertex_shader->state.stream_output.num_outputs > 0); /* if we have a state with outputs make sure we have * buffers to output to */ if (emit->has_so) { boolean has_valid_buffer = FALSE; unsigned i; - for (i = 0; i < draw->so.num_buffers; ++i) { - if (draw->so.buffers[i]) { + for (i = 0; i < draw->so.num_targets; ++i) { + if (draw->so.targets[i]) { has_valid_buffer = TRUE; break; } @@ -122,6 +122,29 @@ is_component_writable(unsigned mask, } } +static INLINE int mask_num_comps(int register_mask) +{ + int comps = 0; + switch (register_mask) { + case TGSI_WRITEMASK_XYZW: + comps = 4; + break; + case TGSI_WRITEMASK_XYZ: + comps = 3; + break; + case TGSI_WRITEMASK_XY: + comps = 2; + break; + case TGSI_WRITEMASK_X: + comps = 1; + break; + default: + assert(0); + break; + } + return comps; +} + static void so_emit_prim(struct pt_so_emit *so, unsigned *indices, unsigned num_vertices) @@ -131,57 +154,58 @@ static void so_emit_prim(struct pt_so_emit *so, struct draw_context *draw = so->draw; const float (*input_ptr)[4]; const struct pipe_stream_output_info *state = - &draw->so.state; - float **buffer = 0; + &draw->vs.vertex_shader->state.stream_output; + float *buffer; + int buffer_total_bytes[PIPE_MAX_SO_BUFFERS]; input_ptr = so->inputs; + ++so->generated_primitives; + + for (i = 0; i < draw->so.num_targets; i++) { + struct draw_so_target *target = draw->so.targets[i]; + buffer_total_bytes[i] = target->internal_offset; + } + + /* check have we space to emit prim first - if not don't do anything */ + for (i = 0; i < num_vertices; ++i) { + for (slot = 0; slot < state->num_outputs; ++slot) { + unsigned writemask = state->output[slot].register_mask; + int ob = state->output[slot].output_buffer; + + if ((buffer_total_bytes[ob] + mask_num_comps(writemask) * sizeof(float)) > + draw->so.targets[ob]->target.buffer_size) { + return; + } + buffer_total_bytes[ob] += mask_num_comps(writemask) * sizeof(float); + } + } + for (i = 0; i < num_vertices; ++i) { const float (*input)[4]; unsigned total_written_compos = 0; /*debug_printf("%d) vertex index = %d (prim idx = %d)\n", i, indices[i], prim_idx);*/ input = (const float (*)[4])( (const char *)input_ptr + (indices[i] * input_vertex_stride)); + for (slot = 0; slot < state->num_outputs; ++slot) { unsigned idx = state->output[slot].register_index; unsigned writemask = state->output[slot].register_mask; unsigned written_compos = 0; unsigned compo; + int ob = state->output[slot].output_buffer; - buffer = (float**)&so->buffers[state->output[slot].output_buffer]; - - /*debug_printf("\tSlot = %d, vs_slot = %d, idx = %d:\n", - slot, vs_slot, idx);*/ -#if 1 - assert(!util_is_inf_or_nan(input[idx][0])); - assert(!util_is_inf_or_nan(input[idx][1])); - assert(!util_is_inf_or_nan(input[idx][2])); - assert(!util_is_inf_or_nan(input[idx][3])); -#endif + buffer = (float *)((char *)draw->so.targets[ob]->mapping + + draw->so.targets[ob]->target.buffer_offset + + draw->so.targets[ob]->internal_offset); for (compo = 0; compo < 4; ++compo) { if (is_component_writable(writemask, compo)) { - float *buf = *buffer; - buf[written_compos++] = input[idx][compo]; + buffer[written_compos++] = input[idx][compo]; } } -#if 0 - debug_printf("\t\t(writemask = %d)%f %f %f %f\n", - writemask, - input[idx][0], - input[idx][1], - input[idx][2], - input[idx][3]); -#endif - *buffer += written_compos; + draw->so.targets[ob]->internal_offset += written_compos * sizeof(float); total_written_compos += written_compos; } - if (so->single_buffer) { - int stride = (int)state->stride - - sizeof(float) * total_written_compos; - - debug_assert(stride >= 0); - *buffer = (float*) (((char*)*buffer) + stride); - } } so->emitted_vertices += num_vertices; ++so->emitted_primitives; @@ -235,23 +259,18 @@ void draw_pt_so_emit( struct pt_so_emit *emit, { struct draw_context *draw = emit->draw; struct vbuf_render *render = draw->render; + struct pipe_stream_output_info *so; unsigned start, i; if (!emit->has_so) return; + so = &draw->vs.vertex_shader->state.stream_output; emit->emitted_vertices = 0; emit->emitted_primitives = 0; + emit->generated_primitives = 0; emit->input_vertex_stride = input_verts->stride; emit->inputs = (const float (*)[4])input_verts->verts->data; - for (i = 0; i < draw->so.num_buffers; ++i) { - emit->buffers[i] = draw->so.buffers[i]; - } - emit->single_buffer = TRUE; - for (i = 0; i < draw->so.state.num_outputs; ++i) { - if (draw->so.state.output[i].output_buffer != 0) - emit->single_buffer = FALSE; - } /* XXX: need to flush to get prim_vbuf.c to release its allocation??*/ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); @@ -272,7 +291,8 @@ void draw_pt_so_emit( struct pt_so_emit *emit, render->set_stream_output_info(render, emit->emitted_primitives, - emit->emitted_vertices); + emit->emitted_vertices, + emit->generated_primitives); } diff --git a/src/gallium/auxiliary/draw/draw_vbuf.h b/src/gallium/auxiliary/draw/draw_vbuf.h index 1f1a7b4972f..f10d185868d 100644 --- a/src/gallium/auxiliary/draw/draw_vbuf.h +++ b/src/gallium/auxiliary/draw/draw_vbuf.h @@ -124,7 +124,8 @@ struct vbuf_render { */ void (*set_stream_output_info)( struct vbuf_render *vbufr, unsigned primitive_count, - unsigned vertices_count ); + unsigned vertices_count, + unsigned primitive_generated ); }; diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index d9c4209a42b..3f89881534b 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -217,6 +217,7 @@ draw_create_vs_exec(struct draw_context *draw, tgsi_scan_shader(state->tokens, &vs->base.info); + vs->base.state.stream_output = state->stream_output; vs->base.draw = draw; vs->base.prepare = vs_exec_prepare; vs->base.run_linear = vs_exec_run_linear; -- cgit v1.2.3