freedreno/ir3: Setup ir3 inputs and outputs for GS

Inputs are the GS header, which contains vertex ID, local primitive ID and thread ID as well as primitive ID. The setup is a little different from other sysvals, since we always have to receive them in the VS so that it can pass them on into the GS. The vertex flag outputs from GS is set up as a proper nir output in the lowering pass and doesn't need special handling here. Signed-off-by: Kristian H. Kristensen <[email protected]>
author: Kristian H. Kristensen <[email protected]> 2019-10-11 12:24:12 -0700
committer: Kristian H. Kristensen <[email protected]> 2019-10-17 13:43:53 -0700
commit: ce08fddbbe5888a5d573edb127c623e7be7d3169 (patch)
tree: 4f8725222abda212bbaa576a2a897fdd141ee5b3
parent: 0293d14719201fc5802aa2573e03d293f06d94fa (diff)
1 files changed, 64 insertions, 3 deletions
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
index 04c44b90dfe..1d51bb0aaec 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -1359,6 +1359,14 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
 		dst[0] = create_uniform(b, primitive_map + idx);
 		break;
 
+	case nir_intrinsic_load_gs_header_ir3:
+		dst[0] = ctx->gs_header;
+		break;
+
+	case nir_intrinsic_load_primitive_id:
+		dst[0] = ctx->primitive_id;
+		break;
+
 	case nir_intrinsic_load_ubo:
 		emit_intrinsic_load_ubo(ctx, intr, dst);
 		break;
@@ -2488,6 +2496,12 @@ setup_input(struct ir3_context *ctx, nir_variable *in)
 	unsigned frac = in->data.location_frac;
 	unsigned slot = in->data.location;
 
+	/* Inputs are loaded using ldlw or ldg for these stages. */
+	if (ctx->so->type == MESA_SHADER_TESS_CTRL ||
+			ctx->so->type == MESA_SHADER_TESS_EVAL ||
+			ctx->so->type == MESA_SHADER_GEOMETRY)
+		return;
+
 	/* skip unread inputs, we could end up with (for example), unsplit
 	 * matrix/etc inputs in the case they are not read, so just silently
 	 * skip these.
@@ -2685,7 +2699,8 @@ setup_output(struct ir3_context *ctx, nir_variable *out)
 			ir3_context_error(ctx, "unknown FS output name: %s\n",
 					gl_frag_result_name(slot));
 		}
-	} else if (ctx->so->type == MESA_SHADER_VERTEX) {
+	} else if (ctx->so->type == MESA_SHADER_VERTEX ||
+			ctx->so->type == MESA_SHADER_GEOMETRY) {
 		switch (slot) {
 		case VARYING_SLOT_POS:
 			so->writes_pos = true;
@@ -2693,6 +2708,11 @@ setup_output(struct ir3_context *ctx, nir_variable *out)
 		case VARYING_SLOT_PSIZ:
 			so->writes_psize = true;
 			break;
+		case VARYING_SLOT_PRIMITIVE_ID:
+		case VARYING_SLOT_LAYER:
+		case VARYING_SLOT_GS_VERTEX_FLAGS_IR3:
+			debug_assert(ctx->so->type == MESA_SHADER_GEOMETRY);
+			/* fall through */
 		case VARYING_SLOT_COL0:
 		case VARYING_SLOT_COL1:
 		case VARYING_SLOT_BFC0:
@@ -2707,7 +2727,8 @@ setup_output(struct ir3_context *ctx, nir_variable *out)
 				break;
 			if ((VARYING_SLOT_TEX0 <= slot) && (slot <= VARYING_SLOT_TEX7))
 				break;
-			ir3_context_error(ctx, "unknown VS output name: %s\n",
+			ir3_context_error(ctx, "unknown %s shader output name: %s\n",
+					_mesa_shader_stage_to_string(ctx->so->type),
 					gl_varying_slot_name(slot));
 		}
 	} else {
@@ -2753,8 +2774,9 @@ max_drvloc(struct exec_list *vars)
 }
 
 static const unsigned max_sysvals[] = {
-	[MESA_SHADER_FRAGMENT] = 24,  // TODO
 	[MESA_SHADER_VERTEX]  = 16,
+	[MESA_SHADER_GEOMETRY] = 16,
+	[MESA_SHADER_FRAGMENT] = 24,  // TODO
 	[MESA_SHADER_COMPUTE] = 16, // TODO how many do we actually need?
 	[MESA_SHADER_KERNEL]  = 16, // TODO how many do we actually need?
 };
@@ -2771,6 +2793,8 @@ emit_instructions(struct ir3_context *ctx)
 	/* we need to leave room for sysvals:
 	 */
 	ninputs += max_sysvals[ctx->so->type];
+	if (ctx->so->type == MESA_SHADER_VERTEX)
+		noutputs += 8; /* gs or tess header + primitive_id */
 
 	ctx->ir = ir3_create(ctx->compiler, ctx->so->type, ninputs, noutputs);
 
@@ -2781,6 +2805,14 @@ emit_instructions(struct ir3_context *ctx)
 
 	ninputs -= max_sysvals[ctx->so->type];
 
+	if (ctx->so->key.has_gs) {
+		if (ctx->so->type == MESA_SHADER_VERTEX ||
+			ctx->so->type == MESA_SHADER_GEOMETRY) {
+			ctx->gs_header = create_input(ctx, 0);
+			ctx->primitive_id = create_input(ctx, 0);
+		}
+	}
+
 	/* for fragment shader, the vcoord input register is used as the
 	 * base for bary.f varying fetch instrs:
 	 *
@@ -2813,11 +2845,40 @@ emit_instructions(struct ir3_context *ctx)
 				0x3, vcoord);
 	}
 
+	if (ctx->primitive_id)
+		add_sysval_input(ctx, SYSTEM_VALUE_PRIMITIVE_ID, ctx->primitive_id);
+	if (ctx->gs_header)
+		add_sysval_input(ctx, SYSTEM_VALUE_GS_HEADER_IR3, ctx->gs_header);
+
 	/* Setup outputs: */
 	nir_foreach_variable(var, &ctx->s->outputs) {
 		setup_output(ctx, var);
 	}
 
+	/* Set up the gs header as an output for the vertex shader so it won't
+	 * clobber it for the tess ctrl shader. */
+	if (ctx->so->type == MESA_SHADER_VERTEX) {
+		struct ir3_shader_variant *so = ctx->so;
+		if (ctx->primitive_id) {
+			unsigned n = so->outputs_count++;
+			so->outputs[n].slot = VARYING_SLOT_PRIMITIVE_ID;
+			so->outputs[n].regid = regid(n, 0);
+			ctx->ir->outputs[n * 4] = ctx->primitive_id;
+
+			compile_assert(ctx, n * 4 < ctx->ir->noutputs);
+		}
+
+		if (ctx->gs_header) {
+			unsigned n = so->outputs_count++;
+			so->outputs[n].slot = VARYING_SLOT_GS_HEADER_IR3;
+			so->outputs[n].regid = regid(n, 0);
+			ctx->ir->outputs[n * 4] = ctx->gs_header;
+
+			compile_assert(ctx, n * 4 < ctx->ir->noutputs);
+		}
+
+	}
+
 	/* Find # of samplers: */
 	nir_foreach_variable(var, &ctx->s->uniforms) {
 		ctx->so->num_samp += glsl_type_get_sampler_count(var->type);
author	Kristian H. Kristensen <[email protected]>	2019-10-11 12:24:12 -0700
committer	Kristian H. Kristensen <[email protected]>	2019-10-17 13:43:53 -0700
commit	ce08fddbbe5888a5d573edb127c623e7be7d3169 (patch)
tree	4f8725222abda212bbaa576a2a897fdd141ee5b3
parent	0293d14719201fc5802aa2573e03d293f06d94fa (diff)