aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2019-07-05 22:12:36 -0400
committerMarek Olšák <[email protected]>2019-07-19 20:16:19 -0400
commit37db9d28650c21d2091a654b7c6a636927ef584d (patch)
tree9f6112ce9fbc29edee90f4ec0b9f07fd61d4a89c
parent985a59e0d1d7dac696ebd2018a950bfe1b3f3580 (diff)
radeonsi/gfx10: fix unnecessary LDS overallocation for NGG GS
Acked-by: Pierre-Eric Pelloux-Prayer <[email protected]> Reviewed-by: Samuel Pitoiset <[email protected]>
-rw-r--r--src/gallium/drivers/radeonsi/gfx10_shader_ngg.c8
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c2
2 files changed, 2 insertions, 8 deletions
diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
index e69bc810b63..de0299740f8 100644
--- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
+++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
@@ -1279,17 +1279,11 @@ void gfx10_ngg_calculate_subgroup_info(struct si_shader *shader)
/* We can't allow using the whole LDS, because GS waves compete with
* other shader stages for LDS space.
*
- * Streamout can increase the ESGS buffer size later on, so be more
- * conservative with streamout and use 4K dwords. This may be suboptimal.
- *
- * Otherwise, use the limit of 7K dwords. The reason is that we need
- * to leave some headroom for the max_esverts increase at the end.
- *
* TODO: We should really take the shader's internal LDS use into
* account. The linker will fail if the size is greater than
* 8K dwords.
*/
- const unsigned max_lds_size = (gs_sel->so.num_outputs ? 4 : 7) * 1024 - 128;
+ const unsigned max_lds_size = 8 * 1024 - 768;
const unsigned target_lds_size = max_lds_size;
unsigned esvert_lds_size = 0;
unsigned gsprim_lds_size = 0;
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index f217abd1501..8f392d640fe 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5242,7 +5242,7 @@ static bool si_shader_binary_open(struct si_screen *screen,
}
if (sel && shader->key.as_ngg) {
- if (sel->so.num_outputs) {
+ if (sel->type != PIPE_SHADER_GEOMETRY && sel->so.num_outputs) {
unsigned esgs_vertex_bytes = 4 * (4 * sel->info.num_outputs + 1);
esgs_ring_size = MAX2(esgs_ring_size,
shader->ngg.max_out_verts * esgs_vertex_bytes);