radeonsi: disable the patch ID workaround on SI when the patch ID isn't used (v2)

The workaround causes a massive performance decrease on 1-SE parts. (Cape Verde, Hainan, Oland) The performance regression is already part of 17.0 and 17.1. v2: check tess_uses_prim_id Cc: 17.0 17.1 <[email protected]> Reviewed-by: Nicolai Hähnle <[email protected]>
author: Marek Olšák <[email protected]> 2017-06-06 15:23:42 +0200
committer: Marek Olšák <[email protected]> 2017-06-08 23:29:07 +0200
commit: 391673af7ad1565a5f6ac8fc2f8c9fcdd1fe9908 (patch)
tree: 968f54761e23772b15aeebb14094a5a626a57d46 /src
parent: 4b8d0c2b1d25c2cf1b3ce57c9c1d997bff1b9408 (diff)
2 files changed, 21 insertions, 15 deletions
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 108929c10c6..55599464843 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -371,6 +371,7 @@ struct si_context {
 	struct si_shader_selector *last_tcs;
 	int			last_num_tcs_input_cp;
 	int			last_tes_sh_base;
+	bool			last_tess_uses_primid;
 	unsigned		last_num_patches;
 
 	/* Debug state. */
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index cd069e31a64..8508259ad87 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -102,6 +102,9 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
 	 * It would be wrong to think that TCS = TES. */
 	struct si_shader_selector *tcs =
 		sctx->tcs_shader.cso ? sctx->tcs_shader.cso : sctx->tes_shader.cso;
+	unsigned tess_uses_primid = sctx->ia_multi_vgt_param_key.u.tess_uses_prim_id;
+	bool has_primid_instancing_bug = sctx->b.chip_class == SI &&
+					 sctx->b.screen->info.max_se == 1;
 	unsigned tes_sh_base = sctx->shader_userdata.sh_base[PIPE_SHADER_TESS_EVAL];
 	unsigned num_tcs_input_cp = info->vertices_per_patch;
 	unsigned num_tcs_output_cp, num_tcs_inputs, num_tcs_outputs;
@@ -128,7 +131,9 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
 	if (sctx->last_ls == ls_current &&
 	    sctx->last_tcs == tcs &&
 	    sctx->last_tes_sh_base == tes_sh_base &&
-	    sctx->last_num_tcs_input_cp == num_tcs_input_cp) {
+	    sctx->last_num_tcs_input_cp == num_tcs_input_cp &&
+	    (!has_primid_instancing_bug ||
+	     (sctx->last_tess_uses_primid == tess_uses_primid))) {
 		*num_patches = sctx->last_num_patches;
 		return;
 	}
@@ -137,6 +142,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
 	sctx->last_tcs = tcs;
 	sctx->last_tes_sh_base = tes_sh_base;
 	sctx->last_num_tcs_input_cp = num_tcs_input_cp;
+	sctx->last_tess_uses_primid = tess_uses_primid;
 
 	/* This calculates how shader inputs and outputs among VS, TCS, and TES
 	 * are laid out in LDS. */
@@ -194,22 +200,21 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
 		 */
 		unsigned one_wave = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp);
 		*num_patches = MIN2(*num_patches, one_wave);
-
-		if (sctx->screen->b.info.max_se == 1) {
-			/* The VGT HS block increments the patch ID unconditionally
-			 * within a single threadgroup. This results in incorrect
-			 * patch IDs when instanced draws are used.
-			 *
-			 * The intended solution is to restrict threadgroups to
-			 * a single instance by setting SWITCH_ON_EOI, which
-			 * should cause IA to split instances up. However, this
-			 * doesn't work correctly on SI when there is no other
-			 * SE to switch to.
-			 */
-			*num_patches = 1;
-		}
 	}
 
+	/* The VGT HS block increments the patch ID unconditionally
+	 * within a single threadgroup. This results in incorrect
+	 * patch IDs when instanced draws are used.
+	 *
+	 * The intended solution is to restrict threadgroups to
+	 * a single instance by setting SWITCH_ON_EOI, which
+	 * should cause IA to split instances up. However, this
+	 * doesn't work correctly on SI when there is no other
+	 * SE to switch to.
+	 */
+	if (has_primid_instancing_bug)
+		*num_patches = 1;
+
 	sctx->last_num_patches = *num_patches;
 
 	output_patch0_offset = input_patch_size * *num_patches;
author	Marek Olšák <[email protected]>	2017-06-06 15:23:42 +0200
committer	Marek Olšák <[email protected]>	2017-06-08 23:29:07 +0200
commit	391673af7ad1565a5f6ac8fc2f8c9fcdd1fe9908 (patch)
tree	968f54761e23772b15aeebb14094a5a626a57d46 /src
parent	4b8d0c2b1d25c2cf1b3ce57c9c1d997bff1b9408 (diff)