summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2017-02-15 19:50:15 +0100
committerMarek Olšák <[email protected]>2017-02-18 01:22:08 +0100
commitac6007460adaf4bb21028a3281ec622d1e43df49 (patch)
tree01ee6b099afebfdbb3b784d2404517f067745d30 /src/gallium/drivers
parenta550fbb510c998a0e484e9bf996e2d884ce3230e (diff)
radeonsi: upload constants into VRAM instead of GTT
This lowers lgkm wait cycles by 30% on VI and normal conditions. The might be a measurable improvement when CE is disabled (radeon) or under L2 thrashing. Reviewed-by: Nicolai Hähnle <[email protected]>
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.c11
-rw-r--r--src/gallium/drivers/radeonsi/si_compute.c4
-rw-r--r--src/gallium/drivers/radeonsi/si_descriptors.c6
-rw-r--r--src/gallium/drivers/radeonsi/si_state.c7
4 files changed, 18 insertions, 10 deletions
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index d573b39d7c0..1781584f5ff 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -607,7 +607,11 @@ bool r600_common_context_init(struct r600_common_context *rctx,
0, PIPE_USAGE_STREAM);
if (!rctx->b.stream_uploader)
return false;
- rctx->b.const_uploader = rctx->b.stream_uploader;
+
+ rctx->b.const_uploader = u_upload_create(&rctx->b, 128 * 1024,
+ 0, PIPE_USAGE_DEFAULT);
+ if (!rctx->b.const_uploader)
+ return false;
rctx->ctx = rctx->ws->ctx_create(rctx->ws);
if (!rctx->ctx)
@@ -649,9 +653,10 @@ void r600_common_context_cleanup(struct r600_common_context *rctx)
if (rctx->ctx)
rctx->ws->ctx_destroy(rctx->ctx);
- if (rctx->b.stream_uploader) {
+ if (rctx->b.stream_uploader)
u_upload_destroy(rctx->b.stream_uploader);
- }
+ if (rctx->b.const_uploader)
+ u_upload_destroy(rctx->b.const_uploader);
slab_destroy_child(&rctx->pool_transfers);
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index 381837c8a57..88d72c1ea2a 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -503,7 +503,7 @@ static void si_setup_user_sgprs_co_v2(struct si_context *sctx,
dispatch.kernarg_address = kernel_args_va;
- u_upload_data(sctx->b.b.stream_uploader, 0, sizeof(dispatch),
+ u_upload_data(sctx->b.b.const_uploader, 0, sizeof(dispatch),
256, &dispatch, &dispatch_offset,
(struct pipe_resource**)&dispatch_buf);
@@ -565,7 +565,7 @@ static void si_upload_compute_input(struct si_context *sctx,
/* The extra num_work_size_bytes are for work group / work item size information */
kernel_args_size = program->input_size + num_work_size_bytes;
- u_upload_alloc(sctx->b.b.stream_uploader, 0, kernel_args_size,
+ u_upload_alloc(sctx->b.b.const_uploader, 0, kernel_args_size,
sctx->screen->b.info.tcc_cache_line_size,
&kernel_args_offset,
(struct pipe_resource**)&input_buffer, &kernel_args_ptr);
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index b4f1fbfb213..a41b243eec8 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -235,7 +235,7 @@ static bool si_upload_descriptors(struct si_context *sctx,
} else {
void *ptr;
- u_upload_alloc(sctx->b.b.stream_uploader, 0, list_size,
+ u_upload_alloc(sctx->b.b.const_uploader, 0, list_size,
sctx->screen->b.info.tcc_cache_line_size,
&desc->buffer_offset,
(struct pipe_resource**)&desc->buffer, &ptr);
@@ -963,7 +963,7 @@ bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
* directly through a staging buffer and don't go through
* the fine-grained upload path.
*/
- u_upload_alloc(sctx->b.b.stream_uploader, 0,
+ u_upload_alloc(sctx->b.b.const_uploader, 0,
desc_list_byte_size,
si_optimal_tcc_alignment(sctx, desc_list_byte_size),
&desc->buffer_offset,
@@ -1051,7 +1051,7 @@ void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuf
{
void *tmp;
- u_upload_alloc(sctx->b.b.stream_uploader, 0, size,
+ u_upload_alloc(sctx->b.b.const_uploader, 0, size,
si_optimal_tcc_alignment(sctx, size),
const_offset,
(struct pipe_resource**)rbuffer, &tmp);
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index f53f8dd8ee9..81592a7e858 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3526,8 +3526,11 @@ static void si_set_vertex_buffers(struct pipe_context *ctx,
assert(src->stride == 0);
/* Assume the attrib has 4 dwords like the vbo
- * module. This is also a good upper bound. */
- u_upload_data(sctx->b.b.stream_uploader, 0, 16, 16,
+ * module. This is also a good upper bound.
+ *
+ * Use const_uploader to upload into VRAM directly.
+ */
+ u_upload_data(sctx->b.b.const_uploader, 0, 16, 16,
src->user_buffer,
&dsti->buffer_offset,
&dsti->buffer);