radeonsi: move PSIZE and CLIPDIST unique IO indices after GENERIC

Heaven LDS usage for LS+HS is below. The masks are "outputs_written" for LS and HS. Note that 32K is the maximum size. Before: heaven_x64: ls=1f1 tcs=1f1, lds=32K heaven_x64: ls=31 tcs=31, lds=24K heaven_x64: ls=71 tcs=71, lds=28K After: heaven_x64: ls=3f tcs=3f, lds=24K heaven_x64: ls=7 tcs=7, lds=13K heaven_x64: ls=f tcs=f, lds=17K All other apps have a similar decrease in LDS usage, because the "outputs_written" masks are similar. Also, most apps don't write POSITION in these shader stages, so there is room for improvement. (tight per-component input/output packing might help even more) It's unknown whether this improves performance. Tested-by: Edmondo Tommasina <[email protected]> Tested-by: Dieter Nützel <[email protected]> Reviewed-by: Nicolai Hähnle <[email protected]>
author: Marek Olšák <[email protected]> 2017-05-29 00:40:39 +0200
committer: Marek Olšák <[email protected]> 2017-06-07 20:14:15 +0200
commit: 2b8b9a56efc24cc0f27469bf1532c288cdca2076 (patch)
tree: 338aae3dc064c1ff78befc7c8d26ff3d21bf780c /src
parent: 2c4ec3f93fcab3fddcbe132200b210e7def1facc (diff)
2 files changed, 14 insertions, 8 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 0ca07ab4ee5..5c7deeb250e 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -136,18 +136,22 @@ unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index)
 	switch (semantic_name) {
 	case TGSI_SEMANTIC_POSITION:
 		return 0;
-	case TGSI_SEMANTIC_PSIZE:
-		return 1;
-	case TGSI_SEMANTIC_CLIPDIST:
-		assert(index <= 1);
-		return 2 + index;
 	case TGSI_SEMANTIC_GENERIC:
+		/* Since some shader stages use the the highest used IO index
+		 * to determine the size to allocate for inputs/outputs
+		 * (in LDS, tess and GS rings). GENERIC should be placed right
+		 * after POSITION to make that size as small as possible.
+		 */
 		if (index < SI_MAX_IO_GENERIC)
-			return 4 + index;
+			return 1 + index;
 
 		assert(!"invalid generic index");
 		return 0;
-
+	case TGSI_SEMANTIC_PSIZE:
+		return SI_MAX_IO_GENERIC + 1;
+	case TGSI_SEMANTIC_CLIPDIST:
+		assert(index <= 1);
+		return SI_MAX_IO_GENERIC + 2 + index;
 	case TGSI_SEMANTIC_FOG:
 		return SI_MAX_IO_GENERIC + 4;
 	case TGSI_SEMANTIC_LAYER:
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 08d647b5a54..41f8bdf9a54 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1233,7 +1233,9 @@ static void si_shader_selector_key_hw_vs(struct si_context *sctx,
 	uint64_t outputs_written = vs->outputs_written;
 	uint64_t inputs_read = 0;
 
-	outputs_written &= ~0x3; /* ignore POSITION, PSIZE */
+	/* ignore POSITION, PSIZE */
+	outputs_written &= ~((1ull << si_shader_io_get_unique_index(TGSI_SEMANTIC_POSITION, 0) |
+			     (1ull << si_shader_io_get_unique_index(TGSI_SEMANTIC_PSIZE, 0))));
 
 	if (!ps_disabled) {
 		inputs_read = ps->inputs_read;
author	Marek Olšák <[email protected]>	2017-05-29 00:40:39 +0200
committer	Marek Olšák <[email protected]>	2017-06-07 20:14:15 +0200
commit	2b8b9a56efc24cc0f27469bf1532c288cdca2076 (patch)
tree	338aae3dc064c1ff78befc7c8d26ff3d21bf780c /src
parent	2c4ec3f93fcab3fddcbe132200b210e7def1facc (diff)