diff options
author | Marek Olšák <[email protected]> | 2017-02-15 19:50:15 +0100 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2017-02-18 01:22:08 +0100 |
commit | ac6007460adaf4bb21028a3281ec622d1e43df49 (patch) | |
tree | 01ee6b099afebfdbb3b784d2404517f067745d30 /src | |
parent | a550fbb510c998a0e484e9bf996e2d884ce3230e (diff) |
radeonsi: upload constants into VRAM instead of GTT
This lowers lgkm wait cycles by 30% on VI and normal conditions.
The might be a measurable improvement when CE is disabled (radeon)
or under L2 thrashing.
Reviewed-by: Nicolai Hähnle <[email protected]>
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/drivers/radeon/r600_pipe_common.c | 11 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_compute.c | 4 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_descriptors.c | 6 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state.c | 7 |
4 files changed, 18 insertions, 10 deletions
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index d573b39d7c0..1781584f5ff 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -607,7 +607,11 @@ bool r600_common_context_init(struct r600_common_context *rctx, 0, PIPE_USAGE_STREAM); if (!rctx->b.stream_uploader) return false; - rctx->b.const_uploader = rctx->b.stream_uploader; + + rctx->b.const_uploader = u_upload_create(&rctx->b, 128 * 1024, + 0, PIPE_USAGE_DEFAULT); + if (!rctx->b.const_uploader) + return false; rctx->ctx = rctx->ws->ctx_create(rctx->ws); if (!rctx->ctx) @@ -649,9 +653,10 @@ void r600_common_context_cleanup(struct r600_common_context *rctx) if (rctx->ctx) rctx->ws->ctx_destroy(rctx->ctx); - if (rctx->b.stream_uploader) { + if (rctx->b.stream_uploader) u_upload_destroy(rctx->b.stream_uploader); - } + if (rctx->b.const_uploader) + u_upload_destroy(rctx->b.const_uploader); slab_destroy_child(&rctx->pool_transfers); diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 381837c8a57..88d72c1ea2a 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -503,7 +503,7 @@ static void si_setup_user_sgprs_co_v2(struct si_context *sctx, dispatch.kernarg_address = kernel_args_va; - u_upload_data(sctx->b.b.stream_uploader, 0, sizeof(dispatch), + u_upload_data(sctx->b.b.const_uploader, 0, sizeof(dispatch), 256, &dispatch, &dispatch_offset, (struct pipe_resource**)&dispatch_buf); @@ -565,7 +565,7 @@ static void si_upload_compute_input(struct si_context *sctx, /* The extra num_work_size_bytes are for work group / work item size information */ kernel_args_size = program->input_size + num_work_size_bytes; - u_upload_alloc(sctx->b.b.stream_uploader, 0, kernel_args_size, + u_upload_alloc(sctx->b.b.const_uploader, 0, kernel_args_size, sctx->screen->b.info.tcc_cache_line_size, &kernel_args_offset, (struct pipe_resource**)&input_buffer, &kernel_args_ptr); diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index b4f1fbfb213..a41b243eec8 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -235,7 +235,7 @@ static bool si_upload_descriptors(struct si_context *sctx, } else { void *ptr; - u_upload_alloc(sctx->b.b.stream_uploader, 0, list_size, + u_upload_alloc(sctx->b.b.const_uploader, 0, list_size, sctx->screen->b.info.tcc_cache_line_size, &desc->buffer_offset, (struct pipe_resource**)&desc->buffer, &ptr); @@ -963,7 +963,7 @@ bool si_upload_vertex_buffer_descriptors(struct si_context *sctx) * directly through a staging buffer and don't go through * the fine-grained upload path. */ - u_upload_alloc(sctx->b.b.stream_uploader, 0, + u_upload_alloc(sctx->b.b.const_uploader, 0, desc_list_byte_size, si_optimal_tcc_alignment(sctx, desc_list_byte_size), &desc->buffer_offset, @@ -1051,7 +1051,7 @@ void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuf { void *tmp; - u_upload_alloc(sctx->b.b.stream_uploader, 0, size, + u_upload_alloc(sctx->b.b.const_uploader, 0, size, si_optimal_tcc_alignment(sctx, size), const_offset, (struct pipe_resource**)rbuffer, &tmp); diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index f53f8dd8ee9..81592a7e858 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -3526,8 +3526,11 @@ static void si_set_vertex_buffers(struct pipe_context *ctx, assert(src->stride == 0); /* Assume the attrib has 4 dwords like the vbo - * module. This is also a good upper bound. */ - u_upload_data(sctx->b.b.stream_uploader, 0, 16, 16, + * module. This is also a good upper bound. + * + * Use const_uploader to upload into VRAM directly. + */ + u_upload_data(sctx->b.b.const_uploader, 0, 16, 16, src->user_buffer, &dsti->buffer_offset, &dsti->buffer); |