diff options
author | Marek Olšák <[email protected]> | 2013-11-29 17:28:23 +0100 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2013-12-12 18:34:11 +0100 |
commit | 91aca8c662faf0ec311968b2897a72a6d08b199d (patch) | |
tree | 97a5c26785cb01493158a8e2cb3c2c5965c5b08d /src/gallium/drivers/radeon | |
parent | 12806449fa35aff47ad6f4615ede55776c9f66c8 (diff) |
r600g,radeonsi: consolidate buffer code, add handling of DISCARD_RANGE for SI
This adds 2 optimizations for radeonsi:
- handling of DISCARD_RANGE
- mapping an uninitialized buffer range is automatically UNSYNCHRONIZED
Reviewed-by: Michel Dänzer <[email protected]>
Diffstat (limited to 'src/gallium/drivers/radeon')
-rw-r--r-- | src/gallium/drivers/radeon/r600_buffer_common.c | 174 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/r600_pipe_common.c | 17 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/r600_pipe_common.h | 8 |
3 files changed, 199 insertions, 0 deletions
diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c index 82b69d86244..ac5fbcc0dd0 100644 --- a/src/gallium/drivers/radeon/r600_buffer_common.c +++ b/src/gallium/drivers/radeon/r600_buffer_common.c @@ -25,6 +25,8 @@ */ #include "r600_cs.h" +#include "util/u_memory.h" +#include "util/u_upload_mgr.h" #include <inttypes.h> boolean r600_rings_is_buffer_referenced(struct r600_common_context *ctx, @@ -146,3 +148,175 @@ bool r600_init_resource(struct r600_common_screen *rscreen, } return true; } + +static void r600_buffer_destroy(struct pipe_screen *screen, + struct pipe_resource *buf) +{ + struct r600_resource *rbuffer = r600_resource(buf); + + util_range_destroy(&rbuffer->valid_buffer_range); + pb_reference(&rbuffer->buf, NULL); + FREE(rbuffer); +} + +static void *r600_buffer_get_transfer(struct pipe_context *ctx, + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box, + struct pipe_transfer **ptransfer, + void *data, struct r600_resource *staging, + unsigned offset) +{ + struct r600_common_context *rctx = (struct r600_common_context*)ctx; + struct r600_transfer *transfer = util_slab_alloc(&rctx->pool_transfers); + + transfer->transfer.resource = resource; + transfer->transfer.level = level; + transfer->transfer.usage = usage; + transfer->transfer.box = *box; + transfer->transfer.stride = 0; + transfer->transfer.layer_stride = 0; + transfer->offset = offset; + transfer->staging = staging; + *ptransfer = &transfer->transfer; + return data; +} + +static void *r600_buffer_transfer_map(struct pipe_context *ctx, + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box, + struct pipe_transfer **ptransfer) +{ + struct r600_common_context *rctx = (struct r600_common_context*)ctx; + struct r600_common_screen *rscreen = (struct r600_common_screen*)ctx->screen; + struct r600_resource *rbuffer = r600_resource(resource); + uint8_t *data; + + assert(box->x + box->width <= resource->width0); + + /* See if the buffer range being mapped has never been initialized, + * in which case it can be mapped unsynchronized. */ + if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED) && + usage & PIPE_TRANSFER_WRITE && + !util_ranges_intersect(&rbuffer->valid_buffer_range, box->x, box->x + box->width)) { + usage |= PIPE_TRANSFER_UNSYNCHRONIZED; + } + + if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE && + !(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { + assert(usage & PIPE_TRANSFER_WRITE); + + /* Check if mapping this buffer would cause waiting for the GPU. */ + if (r600_rings_is_buffer_referenced(rctx, rbuffer->cs_buf, RADEON_USAGE_READWRITE) || + rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) { + rctx->invalidate_buffer(&rctx->b, &rbuffer->b.b); + } + } + else if ((usage & PIPE_TRANSFER_DISCARD_RANGE) && + !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) && + !(rscreen->debug_flags & DBG_NO_DISCARD_RANGE) && + (rscreen->has_cp_dma || + (rscreen->has_streamout && + /* The buffer range must be aligned to 4 with streamout. */ + box->x % 4 == 0 && box->width % 4 == 0))) { + assert(usage & PIPE_TRANSFER_WRITE); + + /* Check if mapping this buffer would cause waiting for the GPU. */ + if (r600_rings_is_buffer_referenced(rctx, rbuffer->cs_buf, RADEON_USAGE_READWRITE) || + rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) { + /* Do a wait-free write-only transfer using a temporary buffer. */ + unsigned offset; + struct r600_resource *staging = NULL; + + u_upload_alloc(rctx->uploader, 0, box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT), + &offset, (struct pipe_resource**)&staging, (void**)&data); + + if (staging) { + data += box->x % R600_MAP_BUFFER_ALIGNMENT; + return r600_buffer_get_transfer(ctx, resource, level, usage, box, + ptransfer, data, staging, offset); + } + } + } + + data = r600_buffer_map_sync_with_rings(rctx, rbuffer, usage); + if (!data) { + return NULL; + } + data += box->x; + + return r600_buffer_get_transfer(ctx, resource, level, usage, box, + ptransfer, data, NULL, 0); +} + +static void r600_buffer_transfer_unmap(struct pipe_context *ctx, + struct pipe_transfer *transfer) +{ + struct r600_common_context *rctx = (struct r600_common_context*)ctx; + struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; + struct r600_resource *rbuffer = r600_resource(transfer->resource); + + if (rtransfer->staging) { + struct pipe_resource *dst, *src; + unsigned soffset, doffset, size; + struct pipe_box box; + + dst = transfer->resource; + src = &rtransfer->staging->b.b; + size = transfer->box.width; + doffset = transfer->box.x; + soffset = rtransfer->offset + transfer->box.x % R600_MAP_BUFFER_ALIGNMENT; + + u_box_1d(soffset, size, &box); + + /* Copy the staging buffer into the original one. */ + if (!(size % 4) && !(doffset % 4) && !(soffset % 4) && + rctx->dma_copy(ctx, dst, 0, doffset, 0, 0, src, 0, &box)) { + /* DONE. */ + } else { + ctx->resource_copy_region(ctx, dst, 0, doffset, 0, 0, src, 0, &box); + } + pipe_resource_reference((struct pipe_resource**)&rtransfer->staging, NULL); + } + + if (transfer->usage & PIPE_TRANSFER_WRITE) { + util_range_add(&rbuffer->valid_buffer_range, transfer->box.x, + transfer->box.x + transfer->box.width); + } + util_slab_free(&rctx->pool_transfers, transfer); +} + +static const struct u_resource_vtbl r600_buffer_vtbl = +{ + NULL, /* get_handle */ + r600_buffer_destroy, /* resource_destroy */ + r600_buffer_transfer_map, /* transfer_map */ + NULL, /* transfer_flush_region */ + r600_buffer_transfer_unmap, /* transfer_unmap */ + NULL /* transfer_inline_write */ +}; + +struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, + const struct pipe_resource *templ, + unsigned alignment) +{ + struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; + struct r600_resource *rbuffer; + + rbuffer = MALLOC_STRUCT(r600_resource); + + rbuffer->b.b = *templ; + pipe_reference_init(&rbuffer->b.b.reference, 1); + rbuffer->b.b.screen = screen; + rbuffer->b.vtbl = &r600_buffer_vtbl; + util_range_init(&rbuffer->valid_buffer_range); + + if (!r600_init_resource(rscreen, rbuffer, templ->width0, alignment, TRUE, templ->usage)) { + FREE(rbuffer); + return NULL; + } + return &rbuffer->b.b; +} diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index 1ad47e1481b..28921beea82 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -28,6 +28,7 @@ #include "r600_cs.h" #include "tgsi/tgsi_parse.h" #include "util/u_format_s3tc.h" +#include "util/u_upload_mgr.h" #include <inttypes.h> static const struct debug_named_value common_debug_options[] = { @@ -223,6 +224,10 @@ void r600_common_screen_cleanup(struct r600_common_screen *rscreen) bool r600_common_context_init(struct r600_common_context *rctx, struct r600_common_screen *rscreen) { + util_slab_create(&rctx->pool_transfers, + sizeof(struct r600_transfer), 64, + UTIL_SLAB_SINGLETHREADED); + rctx->ws = rscreen->ws; rctx->family = rscreen->family; rctx->chip_class = rscreen->chip_class; @@ -234,11 +239,23 @@ bool r600_common_context_init(struct r600_common_context *rctx, if (!rctx->allocator_so_filled_size) return false; + rctx->uploader = u_upload_create(&rctx->b, 1024 * 1024, 256, + PIPE_BIND_INDEX_BUFFER | + PIPE_BIND_CONSTANT_BUFFER); + if (!rctx->uploader) + return false; + return true; } void r600_common_context_cleanup(struct r600_common_context *rctx) { + if (rctx->uploader) { + u_upload_destroy(rctx->uploader); + } + + util_slab_destroy(&rctx->pool_transfers); + if (rctx->allocator_so_filled_size) { u_suballocator_destroy(rctx->allocator_so_filled_size); } diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 172dd937bbd..08144823d6e 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -35,6 +35,7 @@ #include "../../winsys/radeon/drm/radeon_winsys.h" #include "util/u_range.h" +#include "util/u_slab.h" #include "util/u_suballoc.h" #include "util/u_transfer.h" @@ -77,6 +78,8 @@ #define DBG_NO_DISCARD_RANGE (1 << 14) /* The maximum allowed bit is 15. */ +#define R600_MAP_BUFFER_ALIGNMENT 64 + struct r600_common_context; struct r600_resource { @@ -225,7 +228,9 @@ struct r600_common_context { enum chip_class chip_class; struct r600_rings rings; + struct u_upload_mgr *uploader; struct u_suballocator *allocator_so_filled_size; + struct util_slab_mempool pool_transfers; /* Current unaccounted memory usage. */ uint64_t vram; @@ -273,6 +278,9 @@ bool r600_init_resource(struct r600_common_screen *rscreen, struct r600_resource *res, unsigned size, unsigned alignment, bool use_reusable_pool, unsigned usage); +struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, + const struct pipe_resource *templ, + unsigned alignment); /* r600_common_pipe.c */ bool r600_common_screen_init(struct r600_common_screen *rscreen, |