diff options
author | Marek Olšák <[email protected]> | 2017-05-29 00:40:39 +0200 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2017-06-07 20:14:15 +0200 |
commit | 2b8b9a56efc24cc0f27469bf1532c288cdca2076 (patch) | |
tree | 338aae3dc064c1ff78befc7c8d26ff3d21bf780c /src/gallium | |
parent | 2c4ec3f93fcab3fddcbe132200b210e7def1facc (diff) |
radeonsi: move PSIZE and CLIPDIST unique IO indices after GENERIC
Heaven LDS usage for LS+HS is below. The masks are "outputs_written"
for LS and HS. Note that 32K is the maximum size.
Before:
heaven_x64: ls=1f1 tcs=1f1, lds=32K
heaven_x64: ls=31 tcs=31, lds=24K
heaven_x64: ls=71 tcs=71, lds=28K
After:
heaven_x64: ls=3f tcs=3f, lds=24K
heaven_x64: ls=7 tcs=7, lds=13K
heaven_x64: ls=f tcs=f, lds=17K
All other apps have a similar decrease in LDS usage, because
the "outputs_written" masks are similar. Also, most apps don't write
POSITION in these shader stages, so there is room for improvement.
(tight per-component input/output packing might help even more)
It's unknown whether this improves performance.
Tested-by: Edmondo Tommasina <[email protected]>
Tested-by: Dieter Nützel <[email protected]>
Reviewed-by: Nicolai Hähnle <[email protected]>
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.c | 18 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_shaders.c | 4 |
2 files changed, 14 insertions, 8 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 0ca07ab4ee5..5c7deeb250e 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -136,18 +136,22 @@ unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index) switch (semantic_name) { case TGSI_SEMANTIC_POSITION: return 0; - case TGSI_SEMANTIC_PSIZE: - return 1; - case TGSI_SEMANTIC_CLIPDIST: - assert(index <= 1); - return 2 + index; case TGSI_SEMANTIC_GENERIC: + /* Since some shader stages use the the highest used IO index + * to determine the size to allocate for inputs/outputs + * (in LDS, tess and GS rings). GENERIC should be placed right + * after POSITION to make that size as small as possible. + */ if (index < SI_MAX_IO_GENERIC) - return 4 + index; + return 1 + index; assert(!"invalid generic index"); return 0; - + case TGSI_SEMANTIC_PSIZE: + return SI_MAX_IO_GENERIC + 1; + case TGSI_SEMANTIC_CLIPDIST: + assert(index <= 1); + return SI_MAX_IO_GENERIC + 2 + index; case TGSI_SEMANTIC_FOG: return SI_MAX_IO_GENERIC + 4; case TGSI_SEMANTIC_LAYER: diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 08d647b5a54..41f8bdf9a54 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -1233,7 +1233,9 @@ static void si_shader_selector_key_hw_vs(struct si_context *sctx, uint64_t outputs_written = vs->outputs_written; uint64_t inputs_read = 0; - outputs_written &= ~0x3; /* ignore POSITION, PSIZE */ + /* ignore POSITION, PSIZE */ + outputs_written &= ~((1ull << si_shader_io_get_unique_index(TGSI_SEMANTIC_POSITION, 0) | + (1ull << si_shader_io_get_unique_index(TGSI_SEMANTIC_PSIZE, 0)))); if (!ps_disabled) { inputs_read = ps->inputs_read; |