diff options
author | Marek Olšák <[email protected]> | 2013-02-27 23:50:15 +0100 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2013-03-01 13:46:32 +0100 |
commit | 89e2898e9ecfcf93c337b99542b06892a8e30cbe (patch) | |
tree | cc04229ada84b24ff72eeb2969f2a1a8723fe7e6 /src/gallium | |
parent | 44f37261fc34763003314245a811cfd21ce6fc87 (diff) |
r600g: always map uninitialized buffer range as unsynchronized
Any driver can implement this simple and efficient optimization.
Team Fortress 2 hits it always. The DISCARD_RANGE codepath is not even used
with TF2 anymore, so we avoid a ton of useless buffer copies.
Tested-by: Andreas Boll <[email protected]>
NOTE: This is a candidate for the 9.1 branch.
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/drivers/r600/evergreen_hw_context.c | 3 | ||||
-rw-r--r-- | src/gallium/drivers/r600/evergreen_state.c | 4 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600.h | 11 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_buffer.c | 17 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_hw_context.c | 6 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_state_common.c | 4 |
6 files changed, 45 insertions, 0 deletions
diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c index b0f64b4ee53..f81d7f3216b 100644 --- a/src/gallium/drivers/r600/evergreen_hw_context.c +++ b/src/gallium/drivers/r600/evergreen_hw_context.c @@ -241,4 +241,7 @@ void evergreen_dma_copy(struct r600_context *rctx, src_offset += csize << shift; size -= csize; } + + util_range_add(&rdst->valid_buffer_range, dst_offset, + dst_offset + size); } diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 41dd70e5ab1..97f91df374c 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -1377,6 +1377,10 @@ void evergreen_init_color_surface_rat(struct r600_context *rctx, * elements. */ surf->cb_color_dim = pipe_buffer->width0; + /* Set the buffer range the GPU will have access to: */ + util_range_add(&r600_resource(pipe_buffer)->valid_buffer_range, + 0, pipe_buffer->width0); + surf->cb_color_cmask = surf->cb_color_base; surf->cb_color_cmask_slice = 0; surf->cb_color_fmask = surf->cb_color_base; diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index d018ebb0298..15196f7579f 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -28,6 +28,7 @@ #include "../../winsys/radeon/drm/radeon_winsys.h" #include "util/u_double_list.h" +#include "util/u_range.h" #include "util/u_transfer.h" #define R600_ERR(fmt, args...) \ @@ -50,6 +51,16 @@ struct r600_resource { /* Resource state. */ unsigned domains; + + /* The buffer range which is initialized (with a write transfer, + * streamout, DMA, or as a random access target). The rest of + * the buffer is considered invalid and can be mapped unsynchronized. + * + * This allows unsychronized mapping of a buffer range which hasn't + * been used yet. It's for applications which forget to use + * the unsynchronized map flag and expect the driver to figure it out. + */ + struct util_range valid_buffer_range; }; #define R600_BLOCK_MAX_BO 32 diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index 3267c37cc6d..7574cd6c889 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -34,6 +34,7 @@ static void r600_buffer_destroy(struct pipe_screen *screen, { struct r600_resource *rbuffer = r600_resource(buf); + util_range_destroy(&rbuffer->valid_buffer_range); pb_reference(&rbuffer->buf, NULL); FREE(rbuffer); } @@ -98,6 +99,14 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx, assert(box->x + box->width <= resource->width0); + /* See if the buffer range being mapped has never been initialized, + * in which case it can be mapped unsynchronized. */ + if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED) && + usage & PIPE_TRANSFER_WRITE && + !util_ranges_intersect(&rbuffer->valid_buffer_range, box->x, box->x + box->width)) { + usage |= PIPE_TRANSFER_UNSYNCHRONIZED; + } + if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE && !(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { assert(usage & PIPE_TRANSFER_WRITE); @@ -180,6 +189,7 @@ static void r600_buffer_transfer_unmap(struct pipe_context *pipe, { struct r600_context *rctx = (struct r600_context*)pipe; struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; + struct r600_resource *rbuffer = r600_resource(transfer->resource); if (rtransfer->staging) { struct pipe_resource *dst, *src; @@ -205,6 +215,11 @@ static void r600_buffer_transfer_unmap(struct pipe_context *pipe, } pipe_resource_reference((struct pipe_resource**)&rtransfer->staging, NULL); } + + if (transfer->usage & PIPE_TRANSFER_WRITE) { + util_range_add(&rbuffer->valid_buffer_range, transfer->box.x, + transfer->box.x + transfer->box.width); + } util_slab_free(&rctx->pool_transfers, transfer); } @@ -261,6 +276,7 @@ bool r600_init_resource(struct r600_screen *rscreen, res->cs_buf = rscreen->ws->buffer_get_cs_handle(res->buf); res->domains = domains; + util_range_set_empty(&res->valid_buffer_range); return true; } @@ -277,6 +293,7 @@ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, pipe_reference_init(&rbuffer->b.b.reference, 1); rbuffer->b.b.screen = screen; rbuffer->b.vtbl = &r600_buffer_vtbl; + util_range_init(&rbuffer->valid_buffer_range); if (!r600_init_resource(rscreen, rbuffer, templ->width0, alignment, TRUE, templ->usage)) { FREE(rbuffer); diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index 0ff34353c4a..c2f3aab7813 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -1148,6 +1148,9 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx, /* Invalidate the read caches. */ rctx->flags |= R600_CONTEXT_INVAL_READ_CACHES; + + util_range_add(&r600_resource(dst)->valid_buffer_range, dst_offset, + dst_offset + size); } void r600_need_dma_space(struct r600_context *ctx, unsigned num_dw) @@ -1194,4 +1197,7 @@ void r600_dma_copy(struct r600_context *rctx, src_offset += csize << shift; size -= csize; } + + util_range_add(&rdst->valid_buffer_range, dst_offset, + dst_offset + size); } diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index b067e50746c..22ac8466e4c 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -977,6 +977,7 @@ r600_create_so_target(struct pipe_context *ctx, { struct r600_context *rctx = (struct r600_context *)ctx; struct r600_so_target *t; + struct r600_resource *rbuffer = (struct r600_resource*)buffer; t = CALLOC_STRUCT(r600_so_target); if (!t) { @@ -996,6 +997,9 @@ r600_create_so_target(struct pipe_context *ctx, pipe_resource_reference(&t->b.buffer, buffer); t->b.buffer_offset = buffer_offset; t->b.buffer_size = buffer_size; + + util_range_add(&rbuffer->valid_buffer_range, buffer_offset, + buffer_offset + buffer_size); return &t->b; } |