diff options
author | Marek Olšák <marek.olsak@amd.com> | 2016-12-24 22:52:45 +0100 |
---|---|---|
committer | Marek Olšák <marek.olsak@amd.com> | 2017-01-05 18:43:23 +0100 |
commit | d4c0ad4de8c4eeec1cc0478b12ce542e9a7faa0f (patch) | |
tree | 31c5a2d5025b412fddfeb158bd1ad24b6c4dfcf9 | |
parent | 431742dbba46b66e61a22f7186ec82a818685a31 (diff) |
radeonsi: implement SDMA-based buffer clearing for CIK-VI
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
-rw-r--r-- | src/gallium/drivers/radeon/r600_pipe_common.c | 9 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/r600_pipe_common.h | 3 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/cik_sdma.c | 42 |
3 files changed, 54 insertions, 0 deletions
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index e0b914c50fd..a3198450923 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -533,6 +533,14 @@ bool r600_check_device_reset(struct r600_common_context *rctx) return true; } +static void r600_dma_clear_buffer_fallback(struct pipe_context *ctx, + struct pipe_resource *dst, + uint64_t offset, uint64_t size, + unsigned value) +{ + ctx->clear_buffer(ctx, dst, offset, size, &value, 4); +} + bool r600_common_context_init(struct r600_common_context *rctx, struct r600_common_screen *rscreen, unsigned context_flags) @@ -559,6 +567,7 @@ bool r600_common_context_init(struct r600_common_context *rctx, rctx->b.memory_barrier = r600_memory_barrier; rctx->b.flush = r600_flush_from_st; rctx->b.set_debug_callback = r600_set_debug_callback; + rctx->dma_clear_buffer = r600_dma_clear_buffer_fallback; /* evergreen_compute.c has a special codepath for global buffers. * Everything else can use the direct path. diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 25d40da931e..da4b63c0b69 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -626,6 +626,9 @@ struct r600_common_context { unsigned src_level, const struct pipe_box *src_box); + void (*dma_clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst, + uint64_t offset, uint64_t size, unsigned value); + void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst, uint64_t offset, uint64_t size, unsigned value, enum r600_coherency coher); diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c index b3eaed58fc0..698f8f6bbb2 100644 --- a/src/gallium/drivers/radeonsi/cik_sdma.c +++ b/src/gallium/drivers/radeonsi/cik_sdma.c @@ -82,6 +82,47 @@ static void cik_sdma_copy_buffer(struct si_context *ctx, r600_dma_emit_wait_idle(&ctx->b); } +static void cik_sdma_clear_buffer(struct pipe_context *ctx, + struct pipe_resource *dst, + uint64_t offset, + uint64_t size, + unsigned clear_value) +{ + struct si_context *sctx = (struct si_context *)ctx; + struct radeon_winsys_cs *cs = sctx->b.dma.cs; + unsigned i, ncopy, csize; + struct r600_resource *rdst = r600_resource(dst); + + if (!cs || offset % 4 != 0 || size % 4 != 0) { + ctx->clear_buffer(ctx, dst, offset, size, &clear_value, 4); + return; + } + + /* Mark the buffer range of destination as valid (initialized), + * so that transfer_map knows it should wait for the GPU when mapping + * that range. */ + util_range_add(&rdst->valid_buffer_range, offset, offset + size); + + offset += rdst->gpu_address; + + /* the same maximum size as for copying */ + ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE); + r600_need_dma_space(&sctx->b, ncopy * 5, rdst, NULL); + + for (i = 0; i < ncopy; i++) { + csize = MIN2(size, CIK_SDMA_COPY_MAX_SIZE); + radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_PACKET_CONSTANT_FILL, 0, + 0x8000 /* dword copy */)); + radeon_emit(cs, offset); + radeon_emit(cs, offset >> 32); + radeon_emit(cs, clear_value); + radeon_emit(cs, csize); + offset += csize; + size -= csize; + } + r600_dma_emit_wait_idle(&sctx->b); +} + static unsigned minify_as_blocks(unsigned width, unsigned level, unsigned blk_w) { width = u_minify(width, level); @@ -525,4 +566,5 @@ fallback: void cik_init_sdma_functions(struct si_context *sctx) { sctx->b.dma_copy = cik_sdma_copy; + sctx->b.dma_clear_buffer = cik_sdma_clear_buffer; } |