summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2016-04-30 01:21:22 +0200
committerMarek Olšák <[email protected]>2016-05-10 17:20:09 +0200
commit60946c0d60610b03bc297df17ec7a3cca1e5f6e8 (patch)
tree531093de54a63ab4ae98b5ac1495df343beb80c7
parentbb74152597de44ee877b8928587b1cece8b49656 (diff)
gallium/radeon: add a heuristic for better (S)DMA performance
Reviewed-by: Alex Deucher <[email protected]> Reviewed-by: Nicolai Hähnle <[email protected]>
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.c14
-rw-r--r--src/gallium/drivers/radeon/radeon_winsys.h2
-rw-r--r--src/gallium/winsys/amdgpu/drm/amdgpu_cs.c8
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_cs.c8
4 files changed, 32 insertions, 0 deletions
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index ce8d76963a9..bc7f5f53e5d 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -176,6 +176,20 @@ void r600_dma_emit_wait_idle(struct r600_common_context *rctx)
/* done at the end of DMA calls, so increment this. */
rctx->num_dma_calls++;
+ /* IBs using too little memory are limited by the IB submission overhead.
+ * IBs using too much memory are limited by the kernel/TTM overhead.
+ * Too long IBs create CPU-GPU pipeline bubbles and add latency.
+ *
+ * This heuristic makes sure that DMA requests are executed
+ * very soon after the call is made and lowers memory usage.
+ * It improves texture upload performance by keeping the DMA
+ * engine busy while uploads are being submitted.
+ */
+ if (rctx->ws->cs_query_memory_usage(rctx->dma.cs) > 64 * 1024 * 1024) {
+ rctx->dma.flush(rctx, RADEON_FLUSH_ASYNC, NULL);
+ return;
+ }
+
r600_need_dma_space(rctx, 1, NULL, NULL);
if (cs->cdw == 0) /* empty queue */
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h
index 442a4613e96..e73fa14a17e 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -683,6 +683,8 @@ struct radeon_winsys {
*/
boolean (*cs_memory_below_limit)(struct radeon_winsys_cs *cs, uint64_t vram, uint64_t gtt);
+ uint64_t (*cs_query_memory_usage)(struct radeon_winsys_cs *cs);
+
/**
* Return the buffer list.
*
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index 9534b972a46..a5d703347c7 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -563,6 +563,13 @@ static boolean amdgpu_cs_memory_below_limit(struct radeon_winsys_cs *rcs, uint64
return gtt < ws->info.gart_size * 0.7;
}
+static uint64_t amdgpu_cs_query_memory_usage(struct radeon_winsys_cs *rcs)
+{
+ struct amdgpu_cs *cs = amdgpu_cs(rcs);
+
+ return cs->used_vram + cs->used_gart;
+}
+
static unsigned amdgpu_cs_get_buffer_list(struct radeon_winsys_cs *rcs,
struct radeon_bo_list_item *list)
{
@@ -828,6 +835,7 @@ void amdgpu_cs_init_functions(struct amdgpu_winsys *ws)
ws->base.cs_lookup_buffer = amdgpu_cs_lookup_buffer;
ws->base.cs_validate = amdgpu_cs_validate;
ws->base.cs_memory_below_limit = amdgpu_cs_memory_below_limit;
+ ws->base.cs_query_memory_usage = amdgpu_cs_query_memory_usage;
ws->base.cs_get_buffer_list = amdgpu_cs_get_buffer_list;
ws->base.cs_flush = amdgpu_cs_flush;
ws->base.cs_is_buffer_referenced = amdgpu_bo_is_referenced;
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
index 7a901a175f8..9ac50816f86 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -398,6 +398,13 @@ static boolean radeon_drm_cs_memory_below_limit(struct radeon_winsys_cs *rcs, ui
return gtt < cs->ws->info.gart_size * 0.7;
}
+static uint64_t radeon_drm_cs_query_memory_usage(struct radeon_winsys_cs *rcs)
+{
+ struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
+
+ return cs->csc->used_vram + cs->csc->used_gart;
+}
+
static unsigned radeon_drm_cs_get_buffer_list(struct radeon_winsys_cs *rcs,
struct radeon_bo_list_item *list)
{
@@ -671,6 +678,7 @@ void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
ws->base.cs_lookup_buffer = radeon_drm_cs_lookup_buffer;
ws->base.cs_validate = radeon_drm_cs_validate;
ws->base.cs_memory_below_limit = radeon_drm_cs_memory_below_limit;
+ ws->base.cs_query_memory_usage = radeon_drm_cs_query_memory_usage;
ws->base.cs_get_buffer_list = radeon_drm_cs_get_buffer_list;
ws->base.cs_flush = radeon_drm_cs_flush;
ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced;