diff options
-rw-r--r-- | src/gallium/drivers/svga/svga_draw.c | 13 | ||||
-rw-r--r-- | src/gallium/drivers/svga/svga_draw.h | 3 | ||||
-rw-r--r-- | src/gallium/drivers/svga/svga_draw_private.h | 7 | ||||
-rw-r--r-- | src/gallium/drivers/svga/svga_pipe_draw.c | 130 | ||||
-rw-r--r-- | src/gallium/drivers/svga/svga_resource_buffer.h | 7 | ||||
-rw-r--r-- | src/gallium/drivers/svga/svga_state_vdecl.c | 119 |
6 files changed, 208 insertions, 71 deletions
diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c index 28ba470d8c7..aa096692888 100644 --- a/src/gallium/drivers/svga/svga_draw.c +++ b/src/gallium/drivers/svga/svga_draw.c @@ -242,6 +242,11 @@ svga_hwtnl_flush( struct svga_hwtnl *hwtnl ) } +void svga_hwtnl_set_index_bias( struct svga_hwtnl *hwtnl, + int index_bias) +{ + hwtnl->index_bias = index_bias; +} @@ -265,15 +270,16 @@ enum pipe_error svga_hwtnl_prim( struct svga_hwtnl *hwtnl, unsigned size = vb ? vb->width0 : 0; unsigned offset = hwtnl->cmd.vdecl[i].array.offset; unsigned stride = hwtnl->cmd.vdecl[i].array.stride; - unsigned index_bias = range->indexBias; + int index_bias = (int) range->indexBias + hwtnl->index_bias; unsigned width; assert(vb); assert(size); assert(offset < size); - assert(index_bias >= 0); assert(min_index <= max_index); - assert(offset + index_bias*stride < size); + if (index_bias >= 0) { + assert(offset + index_bias*stride < size); + } if (min_index != ~0) { assert(offset + (index_bias + min_index) * stride < size); } @@ -394,6 +400,7 @@ enum pipe_error svga_hwtnl_prim( struct svga_hwtnl *hwtnl, hwtnl->cmd.max_index[hwtnl->cmd.prim_count] = max_index; hwtnl->cmd.prim[hwtnl->cmd.prim_count] = *range; + hwtnl->cmd.prim[hwtnl->cmd.prim_count].indexBias += hwtnl->index_bias; pipe_resource_reference(&hwtnl->cmd.prim_ib[hwtnl->cmd.prim_count], ib); hwtnl->cmd.prim_count++; diff --git a/src/gallium/drivers/svga/svga_draw.h b/src/gallium/drivers/svga/svga_draw.h index a2403d802be..1dac17421e1 100644 --- a/src/gallium/drivers/svga/svga_draw.h +++ b/src/gallium/drivers/svga/svga_draw.h @@ -79,5 +79,8 @@ svga_hwtnl_draw_range_elements( struct svga_hwtnl *hwtnl, enum pipe_error svga_hwtnl_flush( struct svga_hwtnl *hwtnl ); +void svga_hwtnl_set_index_bias( struct svga_hwtnl *hwtnl, + int index_bias); + #endif /* SVGA_DRAW_H_ */ diff --git a/src/gallium/drivers/svga/svga_draw_private.h b/src/gallium/drivers/svga/svga_draw_private.h index ca658ac6745..8126f7ee23c 100644 --- a/src/gallium/drivers/svga/svga_draw_private.h +++ b/src/gallium/drivers/svga/svga_draw_private.h @@ -116,6 +116,13 @@ struct draw_cmd { struct svga_hwtnl { struct svga_context *svga; struct u_upload_mgr *upload_ib; + + /* Additional negative index bias due to partial buffer uploads + * This is compensated for in the offset associated with all + * vertex buffers. + */ + + int index_bias; /* Flatshade information: */ diff --git a/src/gallium/drivers/svga/svga_pipe_draw.c b/src/gallium/drivers/svga/svga_pipe_draw.c index a632fb12c94..8e1c764ef5f 100644 --- a/src/gallium/drivers/svga/svga_pipe_draw.c +++ b/src/gallium/drivers/svga/svga_pipe_draw.c @@ -37,6 +37,116 @@ #include "svga_state.h" #include "svga_swtnl.h" #include "svga_debug.h" +#include "svga_resource_buffer.h" +#include "util/u_upload_mgr.h" + +/** + * svga_upload_user_buffers - upload parts of user buffers + * + * This function streams a part of a user buffer to hw and sets + * svga_buffer::source_offset to the first byte uploaded. After upload + * also svga_buffer::uploaded::buffer is set to !NULL + */ + +static int +svga_upload_user_buffers(struct svga_context *svga, + unsigned start, + unsigned count, + unsigned instance_count) +{ + const struct pipe_vertex_element *ve = svga->curr.velems->velem; + unsigned i; + int ret; + + for (i=0; i < svga->curr.velems->count; i++) { + struct pipe_vertex_buffer *vb = + &svga->curr.vb[ve[i].vertex_buffer_index]; + + if (vb->buffer && svga_buffer_is_user_buffer(vb->buffer)) { + struct svga_buffer *buffer = svga_buffer(vb->buffer); + unsigned first, size; + boolean flushed; + unsigned instance_div = ve[i].instance_divisor; + + svga->dirty |= SVGA_NEW_VBUFFER; + + if (instance_div) { + first = 0; + size = vb->stride * + (instance_count + instance_div - 1) / instance_div; + } else if (vb->stride) { + first = vb->stride * start; + size = vb->stride * count; + } else { + /* Only a single vertex! + * Upload with the largest vertex size the hw supports, + * if possible. + */ + first = 0; + size = MIN2(16, vb->buffer->width0); + } + + ret = u_upload_buffer( svga->upload_vb, + 0, first, size, + &buffer->b.b, + &buffer->uploaded.offset, + &buffer->uploaded.buffer, + &flushed); + + if (ret) + return ret; + + if (0) + debug_printf("%s: %d: orig buf %p upl buf %p ofs %d sofs %d" + " sz %d\n", + __FUNCTION__, + i, + buffer, + buffer->uploaded.buffer, + buffer->uploaded.offset, + first, + size); + + vb->buffer_offset = buffer->uploaded.offset; + buffer->source_offset = first; + } + } + + return PIPE_OK; +} + +/** + * svga_release_user_upl_buffers - release uploaded parts of user buffers + * + * This function releases the hw copy of the uploaded fraction of the + * user-buffer. It's important to do this as soon as all draw calls + * affecting the uploaded fraction are issued, as this allows for + * efficient reuse of the hardware surface backing the uploaded fraction. + * + * svga_buffer::source_offset is set to 0, and svga_buffer::uploaded::buffer + * is set to 0. + */ + +static void +svga_release_user_upl_buffers(struct svga_context *svga) +{ + unsigned i; + unsigned nr; + + nr = svga->curr.num_vertex_buffers; + + for (i = 0; i < nr; ++i) { + struct pipe_vertex_buffer *vb = &svga->curr.vb[i]; + + if (vb->buffer && svga_buffer_is_user_buffer(vb->buffer)) { + struct svga_buffer *buffer = svga_buffer(vb->buffer); + + buffer->source_offset = 0; + if (buffer->uploaded.buffer) + pipe_resource_reference(&buffer->uploaded.buffer, NULL); + } + } +} @@ -50,6 +160,7 @@ retry_draw_range_elements( struct svga_context *svga, unsigned prim, unsigned start, unsigned count, + unsigned instance_count, boolean do_retry ) { enum pipe_error ret = 0; @@ -61,6 +172,10 @@ retry_draw_range_elements( struct svga_context *svga, svga->curr.rast->templ.flatshade, svga->curr.rast->templ.flatshade_first ); + ret = svga_upload_user_buffers( svga, min_index + index_bias, + max_index - min_index + 1, instance_count ); + if (ret != PIPE_OK) + goto retry; ret = svga_update_state( svga, SVGA_STATE_HW_DRAW ); if (ret) @@ -84,7 +199,7 @@ retry: index_buffer, index_size, index_bias, min_index, max_index, prim, start, count, - FALSE ); + instance_count, FALSE ); } return ret; @@ -96,6 +211,7 @@ retry_draw_arrays( struct svga_context *svga, unsigned prim, unsigned start, unsigned count, + unsigned instance_count, boolean do_retry ) { enum pipe_error ret; @@ -107,6 +223,11 @@ retry_draw_arrays( struct svga_context *svga, svga->curr.rast->templ.flatshade, svga->curr.rast->templ.flatshade_first ); + ret = svga_upload_user_buffers( svga, start, count, instance_count ); + + if (ret != PIPE_OK) + goto retry; + ret = svga_update_state( svga, SVGA_STATE_HW_DRAW ); if (ret) goto retry; @@ -127,6 +248,7 @@ retry: prim, start, count, + instance_count, FALSE ); } @@ -183,6 +305,8 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) svga_context_flush(svga, NULL); } + /* Avoid leaking the previous hwtnl bias to swtnl */ + svga_hwtnl_set_index_bias( svga->hwtnl, 0 ); ret = svga_swtnl_draw_vbo( svga, info ); } else { @@ -201,6 +325,7 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) info->mode, info->start + offset, info->count, + info->instance_count, TRUE ); } else { @@ -208,10 +333,13 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) info->mode, info->start, info->count, + info->instance_count, TRUE ); } } + svga_release_user_upl_buffers( svga ); + if (SVGA_DEBUG & DEBUG_FLUSH) { svga_hwtnl_flush_retry( svga ); svga_context_flush(svga, NULL); diff --git a/src/gallium/drivers/svga/svga_resource_buffer.h b/src/gallium/drivers/svga/svga_resource_buffer.h index 95032213fa5..2ae44d2a5e9 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer.h +++ b/src/gallium/drivers/svga/svga_resource_buffer.h @@ -132,6 +132,13 @@ struct svga_buffer } uploaded; /** + * For user buffers, this is the offset to the data about to be + * referenced by the next draw command, and hence the data that needs + * to be uploaded. + */ + unsigned source_offset; + + /** * DMA'ble memory. * * A piece of GMR memory, with the same size of the buffer. It is created diff --git a/src/gallium/drivers/svga/svga_state_vdecl.c b/src/gallium/drivers/svga/svga_state_vdecl.c index 7c393a1da8d..2375a022f97 100644 --- a/src/gallium/drivers/svga/svga_state_vdecl.c +++ b/src/gallium/drivers/svga/svga_state_vdecl.c @@ -38,57 +38,6 @@ #include "svga_hw_reg.h" -static int -upload_user_buffers( struct svga_context *svga ) -{ - enum pipe_error ret = PIPE_OK; - int i; - int nr; - - if (0) - debug_printf("%s: %d\n", __FUNCTION__, svga->curr.num_vertex_buffers); - - nr = svga->curr.num_vertex_buffers; - - for (i = 0; i < nr; i++) - { - if (svga_buffer_is_user_buffer(svga->curr.vb[i].buffer)) - { - struct svga_buffer *buffer = svga_buffer(svga->curr.vb[i].buffer); - - if (!buffer->uploaded.buffer) { - boolean flushed; - ret = u_upload_buffer( svga->upload_vb, - 0, 0, - buffer->b.b.width0, - &buffer->b.b, - &buffer->uploaded.offset, - &buffer->uploaded.buffer, - &flushed); - if (ret) - return ret; - - if (0) - debug_printf("%s: %d: orig buf %p upl buf %p ofs %d sz %d\n", - __FUNCTION__, - i, - buffer, - buffer->uploaded.buffer, - buffer->uploaded.offset, - buffer->b.b.width0); - } - - svga->curr.vb[i].buffer_offset = buffer->uploaded.offset; - } - } - - if (0) - debug_printf("%s: DONE\n", __FUNCTION__); - - return ret; -} - - /*********************************************************************** */ @@ -99,6 +48,7 @@ static int emit_hw_vs_vdecl( struct svga_context *svga, const struct pipe_vertex_element *ve = svga->curr.velems->velem; SVGA3dVertexDecl decl; unsigned i; + unsigned neg_bias = 0; assert(svga->curr.velems->count >= svga->curr.vs->base.info.file_count[TGSI_FILE_INPUT]); @@ -106,12 +56,50 @@ static int emit_hw_vs_vdecl( struct svga_context *svga, svga_hwtnl_reset_vdecl( svga->hwtnl, svga->curr.velems->count ); + /** + * We can't set the VDECL offset to something negative, so we + * must calculate a common negative additional index bias, and modify + * the VDECL offsets accordingly so they *all* end up positive. + * + * Note that the exact value of the negative index bias is not that + * important, since we compensate for it when we calculate the vertex + * buffer offset below. The important thing is that all vertex buffer + * offsets remain positive. + * + * Note that we use a negative bias variable in order to make the + * rounding maths more easy to follow, and to avoid int / unsigned + * confusion. + */ + for (i = 0; i < svga->curr.velems->count; i++) { - const struct pipe_vertex_buffer *vb = &svga->curr.vb[ve[i].vertex_buffer_index]; + const struct pipe_vertex_buffer *vb = + &svga->curr.vb[ve[i].vertex_buffer_index]; + struct svga_buffer *buffer; + unsigned int offset = vb->buffer_offset + ve[i].src_offset; + unsigned tmp_neg_bias = 0; + + if (!vb->buffer) + continue; + + buffer = svga_buffer(vb->buffer); + if (buffer->source_offset > offset) { + tmp_neg_bias = buffer->source_offset - offset; + if (vb->stride) + tmp_neg_bias = (tmp_neg_bias + vb->stride - 1) / vb->stride; + neg_bias = MAX2(neg_bias, tmp_neg_bias); + } + } + + for (i = 0; i < svga->curr.velems->count; i++) { + const struct pipe_vertex_buffer *vb = + &svga->curr.vb[ve[i].vertex_buffer_index]; unsigned usage, index; - struct svga_buffer *buffer = svga_buffer(vb->buffer); + struct svga_buffer *buffer; + if (!vb->buffer) + continue; + buffer= svga_buffer(vb->buffer); svga_generate_vdecl_semantics( i, &usage, &index ); /* SVGA_NEW_VELEMENT @@ -121,8 +109,16 @@ static int emit_hw_vs_vdecl( struct svga_context *svga, decl.identity.usage = usage; decl.identity.usageIndex = index; decl.array.stride = vb->stride; - decl.array.offset = (vb->buffer_offset + - ve[i].src_offset); + + /* Compensate for partially uploaded vbo, and + * for the negative index bias. + */ + decl.array.offset = (vb->buffer_offset + + ve[i].src_offset + + neg_bias * vb->stride + - buffer->source_offset); + + assert(decl.array.offset >= 0); svga_hwtnl_vdecl( svga->hwtnl, i, @@ -131,6 +127,7 @@ static int emit_hw_vs_vdecl( struct svga_context *svga, vb->buffer ); } + svga_hwtnl_set_index_bias( svga->hwtnl, -neg_bias ); return 0; } @@ -138,23 +135,11 @@ static int emit_hw_vs_vdecl( struct svga_context *svga, static int emit_hw_vdecl( struct svga_context *svga, unsigned dirty ) { - int ret = 0; - /* SVGA_NEW_NEED_SWTNL */ if (svga->state.sw.need_swtnl) return 0; /* Do not emit during swtnl */ - /* If we get to here, we know that we're going to draw. Upload - * userbuffers now and try to combine multiple userbuffers from - * multiple draw calls into a single host buffer for performance. - */ - if (svga->curr.any_user_vertex_buffers) { - ret = upload_user_buffers( svga ); - if (ret) - return ret; - } - return emit_hw_vs_vdecl( svga, dirty ); } |