diff options
author | Marek Olšák <[email protected]> | 2017-10-08 03:44:07 +0200 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2017-10-17 22:03:03 +0200 |
commit | 2f4705afde707e8eb41b9414c25df91aa1ea2fb3 (patch) | |
tree | 1d33d30de07cd513b46438ccc1ca8924f0205027 /src/gallium/drivers/radeonsi/si_descriptors.c | |
parent | 854593b8eb2cf27d1666fd0ca1931eee80750019 (diff) |
radeonsi: if there's just const buffer 0, set it in place of CONST/SSBO pointer
SI_SGPR_CONST_AND_SHADER_BUFFERS now contains the pointer to const buffer 0
if there is no other buffer there.
Benefits:
- there is no constbuf descriptor upload and shader load
It's assumed that all constant addresses are within bounds. Non-constant
addresses are clamped against the last declared CONST variable.
This only works if the state tracker ensures the bound constant buffer
matches what the shader needs.
Once we get 32-bit pointers, we can only do this for user constant buffers
where the driver is in charge of the upload so that it can guarantee a 32-bit
address.
The real performance benefit might not be measurable.
These apps get 100% theoretical benefit in all shaders (except where noted):
- antichamber
- barman arkham origins
- borderlands 2
- borderlands pre-sequel
- brutal legend
- civilization BE
- CS:GO
- deadcore
- dota 2 -- most shaders
- europa universalis
- grid autosport -- most shaders
- left 4 dead 2
- legend of grimrock
- life is strange
- payday 2
- portal
- rocket league
- serious sam 3 bfe
- talos principle
- team fortress 2
- thea
- unigine heaven
- unigine valley -- also sanctuary and tropics
- wasteland 2
- xcom: enemy unknown & enemy within
- tesseract
- unity (engine)
Changed stats only:
SGPRS: 2059998 -> 2086238 (1.27 %)
VGPRS: 1626888 -> 1626904 (0.00 %)
Spilled SGPRs: 7902 -> 7865 (-0.47 %)
Code Size: 60924520 -> 60982660 (0.10 %) bytes
Max Waves: 374539 -> 374526 (-0.00 %)
Reviewed-by: Nicolai Hähnle <[email protected]>
Diffstat (limited to 'src/gallium/drivers/radeonsi/si_descriptors.c')
-rw-r--r-- | src/gallium/drivers/radeonsi/si_descriptors.c | 23 |
1 files changed, 20 insertions, 3 deletions
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 0c1fca87181..da6efa83947 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -126,6 +126,7 @@ static void si_init_descriptors(struct si_descriptors *desc, desc->element_dw_size = element_dw_size; desc->num_elements = num_elements; desc->shader_userdata_offset = shader_userdata_index * 4; + desc->slot_index_to_bind_directly = -1; } static void si_release_descriptors(struct si_descriptors *desc) @@ -148,6 +149,20 @@ static bool si_upload_descriptors(struct si_context *sctx, if (!upload_size) return true; + /* If there is just one active descriptor, bind it directly. */ + if ((int)desc->first_active_slot == desc->slot_index_to_bind_directly && + desc->num_active_slots == 1) { + uint32_t *descriptor = &desc->list[desc->slot_index_to_bind_directly * + desc->element_dw_size]; + + /* The buffer is already in the buffer list. */ + r600_resource_reference(&desc->buffer, NULL); + desc->gpu_list = NULL; + desc->gpu_address = si_desc_extract_buffer_address(descriptor); + si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom); + return true; + } + uint32_t *ptr; int buffer_offset; u_upload_alloc(sctx->b.b.const_uploader, 0, upload_size, @@ -2531,14 +2546,15 @@ void si_init_all_descriptors(struct si_context *sctx) bool gfx9_gs = false; unsigned num_sampler_slots = SI_NUM_IMAGES / 2 + SI_NUM_SAMPLERS; unsigned num_buffer_slots = SI_NUM_SHADER_BUFFERS + SI_NUM_CONST_BUFFERS; + struct si_descriptors *desc; if (sctx->b.chip_class >= GFX9) { gfx9_tcs = i == PIPE_SHADER_TESS_CTRL; gfx9_gs = i == PIPE_SHADER_GEOMETRY; } - si_init_buffer_resources(&sctx->const_and_shader_buffers[i], - si_const_and_shader_buffer_descriptors(sctx, i), + desc = si_const_and_shader_buffer_descriptors(sctx, i); + si_init_buffer_resources(&sctx->const_and_shader_buffers[i], desc, num_buffer_slots, gfx9_tcs ? GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS : gfx9_gs ? GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS : @@ -2547,8 +2563,9 @@ void si_init_all_descriptors(struct si_context *sctx) RADEON_USAGE_READ, RADEON_PRIO_SHADER_RW_BUFFER, RADEON_PRIO_CONST_BUFFER); + desc->slot_index_to_bind_directly = si_get_constbuf_slot(0); - struct si_descriptors *desc = si_sampler_and_image_descriptors(sctx, i); + desc = si_sampler_and_image_descriptors(sctx, i); si_init_descriptors(desc, gfx9_tcs ? GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES : gfx9_gs ? GFX9_SGPR_GS_SAMPLERS_AND_IMAGES : |