freedreno/ir3: move binning-pass fixup for a6xx+

Move this to after ir3_cp (which can add lowered immediates to the const state) for a6xx+, to ensure the uniform state matches between binning and vertex shaders. This way we can emit just a single VS_CONST state- group when we re-use single cmdstream for both binning and draw passes. Signed-off-by: Rob Clark <[email protected]>
author: Rob Clark <[email protected]> 2018-10-12 16:01:22 -0400
committer: Rob Clark <[email protected]> 2018-10-17 12:44:48 -0400
commit: d9dbc9c21f17e4c86f8e366fbe225df39e3b7b59 (patch)
tree: 8aac369382a6b6bdcaf229526f896ae0dd243cad
parent: 1a51c4a87ea9202af90ccb28bd697f0df753f587 (diff)
1 files changed, 37 insertions, 20 deletions
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 9474f75a9f7..197196383b0 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -3562,6 +3562,32 @@ fixup_astc_srgb(struct ir3_context *ctx)
 	}
 }
 
+static void
+fixup_binning_pass(struct ir3_context *ctx)
+{
+	struct ir3_shader_variant *so = ctx->so;
+	struct ir3 *ir = ctx->ir;
+	unsigned i, j;
+
+	for (i = 0, j = 0; i < so->outputs_count; i++) {
+		unsigned slot = so->outputs[i].slot;
+
+		/* throw away everything but first position/psize */
+		if ((slot == VARYING_SLOT_POS) || (slot == VARYING_SLOT_PSIZ)) {
+			if (i != j) {
+				so->outputs[j] = so->outputs[i];
+				ir->outputs[(j*4)+0] = ir->outputs[(i*4)+0];
+				ir->outputs[(j*4)+1] = ir->outputs[(i*4)+1];
+				ir->outputs[(j*4)+2] = ir->outputs[(i*4)+2];
+				ir->outputs[(j*4)+3] = ir->outputs[(i*4)+3];
+			}
+			j++;
+		}
+	}
+	so->outputs_count = j;
+	ir->noutputs = j * 4;
+}
+
 int
 ir3_compile_shader_nir(struct ir3_compiler *compiler,
 		struct ir3_shader_variant *so)
@@ -3569,7 +3595,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
 	struct ir3_context *ctx;
 	struct ir3 *ir;
 	struct ir3_instruction **inputs;
-	unsigned i, j, actual_in, inloc;
+	unsigned i, actual_in, inloc;
 	int ret = 0, max_bary;
 
 	assert(!so->ir);
@@ -3599,25 +3625,8 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
 		fixup_frag_inputs(ctx);
 
 	/* at this point, for binning pass, throw away unneeded outputs: */
-	if (so->binning_pass) {
-		for (i = 0, j = 0; i < so->outputs_count; i++) {
-			unsigned slot = so->outputs[i].slot;
-
-			/* throw away everything but first position/psize */
-			if ((slot == VARYING_SLOT_POS) || (slot == VARYING_SLOT_PSIZ)) {
-				if (i != j) {
-					so->outputs[j] = so->outputs[i];
-					ir->outputs[(j*4)+0] = ir->outputs[(i*4)+0];
-					ir->outputs[(j*4)+1] = ir->outputs[(i*4)+1];
-					ir->outputs[(j*4)+2] = ir->outputs[(i*4)+2];
-					ir->outputs[(j*4)+3] = ir->outputs[(i*4)+3];
-				}
-				j++;
-			}
-		}
-		so->outputs_count = j;
-		ir->noutputs = j * 4;
-	}
+	if (so->binning_pass && (ctx->compiler->gpu_id < 600))
+		fixup_binning_pass(ctx);
 
 	/* if we want half-precision outputs, mark the output registers
 	 * as half:
@@ -3656,6 +3665,14 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
 
 	ir3_cp(ir, so);
 
+	/* at this point, for binning pass, throw away unneeded outputs:
+	 * Note that for a6xx and later, we do this after ir3_cp to ensure
+	 * that the uniform/constant layout for BS and VS matches, so that
+	 * we can re-use same VS_CONST state group.
+	 */
+	if (so->binning_pass && (ctx->compiler->gpu_id >= 600))
+		fixup_binning_pass(ctx);
+
 	/* Insert mov if there's same instruction for each output.
 	 * eg. dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.const_expression.vertex.sampler2dshadow
 	 */
author	Rob Clark <[email protected]>	2018-10-12 16:01:22 -0400
committer	Rob Clark <[email protected]>	2018-10-17 12:44:48 -0400
commit	d9dbc9c21f17e4c86f8e366fbe225df39e3b7b59 (patch)
tree	8aac369382a6b6bdcaf229526f896ae0dd243cad
parent	1a51c4a87ea9202af90ccb28bd697f0df753f587 (diff)