summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2016-12-27 17:51:43 +0100
committerMarek Olšák <[email protected]>2017-01-05 18:43:24 +0100
commitcba9d59362130e1b44cd9cfc4f38ad3773111442 (patch)
tree20621757dce0b5a601ddcf689ef76a06d52c34fe /src
parent29d6a367a6508a6814f44039166a65a4b69f53b1 (diff)
radeonsi: implement SDMA-based buffer clearing for SI
Reviewed-by: Nicolai Hähnle <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/radeonsi/si_cp_dma.c2
-rw-r--r--src/gallium/drivers/radeonsi/si_dma.c40
2 files changed, 41 insertions, 1 deletions
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index 722ac4d2c23..596e32c957c 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -196,7 +196,7 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
/* dma_clear_buffer can use clear_buffer on failure. Make sure that
* doesn't happen. We don't want an infinite recursion: */
- if (sctx->b.chip_class >= CIK && sctx->b.dma.cs &&
+ if (sctx->b.dma.cs &&
/* CP DMA is very slow. Always use SDMA for big clears. This
* alone improves DeusEx:MD performance by 70%. */
(size > 128 * 1024 ||
diff --git a/src/gallium/drivers/radeonsi/si_dma.c b/src/gallium/drivers/radeonsi/si_dma.c
index b6aab00cadc..9dbee3af381 100644
--- a/src/gallium/drivers/radeonsi/si_dma.c
+++ b/src/gallium/drivers/radeonsi/si_dma.c
@@ -78,6 +78,45 @@ static void si_dma_copy_buffer(struct si_context *ctx,
}
}
+static void si_dma_clear_buffer(struct pipe_context *ctx,
+ struct pipe_resource *dst,
+ uint64_t offset,
+ uint64_t size,
+ unsigned clear_value)
+{
+ struct si_context *sctx = (struct si_context *)ctx;
+ struct radeon_winsys_cs *cs = sctx->b.dma.cs;
+ unsigned i, ncopy, csize;
+ struct r600_resource *rdst = r600_resource(dst);
+
+ if (!cs || offset % 4 != 0 || size % 4 != 0) {
+ ctx->clear_buffer(ctx, dst, offset, size, &clear_value, 4);
+ return;
+ }
+
+ /* Mark the buffer range of destination as valid (initialized),
+ * so that transfer_map knows it should wait for the GPU when mapping
+ * that range. */
+ util_range_add(&rdst->valid_buffer_range, offset, offset + size);
+
+ offset += rdst->gpu_address;
+
+ /* the same maximum size as for copying */
+ ncopy = DIV_ROUND_UP(size, SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE);
+ r600_need_dma_space(&sctx->b, ncopy * 4, rdst, NULL);
+
+ for (i = 0; i < ncopy; i++) {
+ csize = MIN2(size, SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE);
+ radeon_emit(cs, SI_DMA_PACKET(SI_DMA_PACKET_CONSTANT_FILL, 0,
+ csize / 4));
+ radeon_emit(cs, offset);
+ radeon_emit(cs, clear_value);
+ radeon_emit(cs, (offset >> 32) << 16);
+ offset += csize;
+ size -= csize;
+ }
+}
+
static void si_dma_copy_tile(struct si_context *ctx,
struct pipe_resource *dst,
unsigned dst_level,
@@ -285,4 +324,5 @@ fallback:
void si_init_dma_functions(struct si_context *sctx)
{
sctx->b.dma_copy = si_dma_copy;
+ sctx->b.dma_clear_buffer = si_dma_clear_buffer;
}