aboutsummaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/radeonsi/si_cp_dma.c
diff options
context:
space:
mode:
authorBas Nieuwenhuizen <[email protected]>2016-03-27 11:14:34 +0200
committerBas Nieuwenhuizen <[email protected]>2016-04-19 18:10:31 +0200
commit7a92c0842892bf55a82b7d95ab5a3b7dfbb83407 (patch)
tree67bea5002d5768b610169a98f6e69e59f502dac3 /src/gallium/drivers/radeonsi/si_cp_dma.c
parente764ee13ae21e3c1dbda24daeb2d08c5e7c81871 (diff)
radeonsi: do not do two full flushes on every compute dispatch
v2: Add more CS_PARTIAL_FLUSH events. Essentially every place with waits on finishing for pixel shaders also has a write after read hazard with compute shaders. Invalidating L2 waits implicitly on pixel and compute shaders, so, we don't need a CS_PARTIAL_FLUSH for switching FBO. v3: Add CS_PARTIAL_FLUSH events even if we already have INV_GLOBAL_L2. According to Marek the INV_GLOBAL_L2 events don't wait for compute shaders to finish, so wait for them explicitly. Signed-off-by: Bas Nieuwenhuizen <[email protected]> Reviewed-by: Marek Olšák <[email protected]> Reviewed-by: Nicolai Hähnle <[email protected]> Reviewed-by: Edward O'Callaghan <[email protected]>
Diffstat (limited to 'src/gallium/drivers/radeonsi/si_cp_dma.c')
-rw-r--r--src/gallium/drivers/radeonsi/si_cp_dma.c6
1 files changed, 4 insertions, 2 deletions
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index 001ddd4bfae..38e0ee60d64 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -190,7 +190,8 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
uint64_t va = r600_resource(dst)->gpu_address + offset;
/* Flush the caches. */
- sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | flush_flags;
+ sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
+ SI_CONTEXT_CS_PARTIAL_FLUSH | flush_flags;
while (size) {
unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
@@ -296,7 +297,8 @@ void si_copy_buffer(struct si_context *sctx,
}
/* Flush the caches. */
- sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | flush_flags;
+ sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
+ SI_CONTEXT_CS_PARTIAL_FLUSH | flush_flags;
/* This is the main part doing the copying. Src is always aligned. */
main_dst_offset = dst_offset + skipped_size;