aboutsummaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/radeonsi/si_descriptors.c
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2017-10-08 03:44:07 +0200
committerMarek Olšák <[email protected]>2017-10-17 22:03:03 +0200
commit2f4705afde707e8eb41b9414c25df91aa1ea2fb3 (patch)
tree1d33d30de07cd513b46438ccc1ca8924f0205027 /src/gallium/drivers/radeonsi/si_descriptors.c
parent854593b8eb2cf27d1666fd0ca1931eee80750019 (diff)
radeonsi: if there's just const buffer 0, set it in place of CONST/SSBO pointer
SI_SGPR_CONST_AND_SHADER_BUFFERS now contains the pointer to const buffer 0 if there is no other buffer there. Benefits: - there is no constbuf descriptor upload and shader load It's assumed that all constant addresses are within bounds. Non-constant addresses are clamped against the last declared CONST variable. This only works if the state tracker ensures the bound constant buffer matches what the shader needs. Once we get 32-bit pointers, we can only do this for user constant buffers where the driver is in charge of the upload so that it can guarantee a 32-bit address. The real performance benefit might not be measurable. These apps get 100% theoretical benefit in all shaders (except where noted): - antichamber - barman arkham origins - borderlands 2 - borderlands pre-sequel - brutal legend - civilization BE - CS:GO - deadcore - dota 2 -- most shaders - europa universalis - grid autosport -- most shaders - left 4 dead 2 - legend of grimrock - life is strange - payday 2 - portal - rocket league - serious sam 3 bfe - talos principle - team fortress 2 - thea - unigine heaven - unigine valley -- also sanctuary and tropics - wasteland 2 - xcom: enemy unknown & enemy within - tesseract - unity (engine) Changed stats only: SGPRS: 2059998 -> 2086238 (1.27 %) VGPRS: 1626888 -> 1626904 (0.00 %) Spilled SGPRs: 7902 -> 7865 (-0.47 %) Code Size: 60924520 -> 60982660 (0.10 %) bytes Max Waves: 374539 -> 374526 (-0.00 %) Reviewed-by: Nicolai Hähnle <[email protected]>
Diffstat (limited to 'src/gallium/drivers/radeonsi/si_descriptors.c')
-rw-r--r--src/gallium/drivers/radeonsi/si_descriptors.c23
1 files changed, 20 insertions, 3 deletions
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index 0c1fca87181..da6efa83947 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -126,6 +126,7 @@ static void si_init_descriptors(struct si_descriptors *desc,
desc->element_dw_size = element_dw_size;
desc->num_elements = num_elements;
desc->shader_userdata_offset = shader_userdata_index * 4;
+ desc->slot_index_to_bind_directly = -1;
}
static void si_release_descriptors(struct si_descriptors *desc)
@@ -148,6 +149,20 @@ static bool si_upload_descriptors(struct si_context *sctx,
if (!upload_size)
return true;
+ /* If there is just one active descriptor, bind it directly. */
+ if ((int)desc->first_active_slot == desc->slot_index_to_bind_directly &&
+ desc->num_active_slots == 1) {
+ uint32_t *descriptor = &desc->list[desc->slot_index_to_bind_directly *
+ desc->element_dw_size];
+
+ /* The buffer is already in the buffer list. */
+ r600_resource_reference(&desc->buffer, NULL);
+ desc->gpu_list = NULL;
+ desc->gpu_address = si_desc_extract_buffer_address(descriptor);
+ si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom);
+ return true;
+ }
+
uint32_t *ptr;
int buffer_offset;
u_upload_alloc(sctx->b.b.const_uploader, 0, upload_size,
@@ -2531,14 +2546,15 @@ void si_init_all_descriptors(struct si_context *sctx)
bool gfx9_gs = false;
unsigned num_sampler_slots = SI_NUM_IMAGES / 2 + SI_NUM_SAMPLERS;
unsigned num_buffer_slots = SI_NUM_SHADER_BUFFERS + SI_NUM_CONST_BUFFERS;
+ struct si_descriptors *desc;
if (sctx->b.chip_class >= GFX9) {
gfx9_tcs = i == PIPE_SHADER_TESS_CTRL;
gfx9_gs = i == PIPE_SHADER_GEOMETRY;
}
- si_init_buffer_resources(&sctx->const_and_shader_buffers[i],
- si_const_and_shader_buffer_descriptors(sctx, i),
+ desc = si_const_and_shader_buffer_descriptors(sctx, i);
+ si_init_buffer_resources(&sctx->const_and_shader_buffers[i], desc,
num_buffer_slots,
gfx9_tcs ? GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS :
gfx9_gs ? GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS :
@@ -2547,8 +2563,9 @@ void si_init_all_descriptors(struct si_context *sctx)
RADEON_USAGE_READ,
RADEON_PRIO_SHADER_RW_BUFFER,
RADEON_PRIO_CONST_BUFFER);
+ desc->slot_index_to_bind_directly = si_get_constbuf_slot(0);
- struct si_descriptors *desc = si_sampler_and_image_descriptors(sctx, i);
+ desc = si_sampler_and_image_descriptors(sctx, i);
si_init_descriptors(desc,
gfx9_tcs ? GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES :
gfx9_gs ? GFX9_SGPR_GS_SAMPLERS_AND_IMAGES :