freedreno/ir3: Implement lowering passes for VS and GS

This introduces two new lowering passes. One to lower VS to explicit outputs using STLW and one to lower GS to load input using LDLW and implement the GS specific functionality. Signed-off-by: Kristian H. Kristensen <[email protected]>
author: Kristian H. Kristensen <[email protected]> 2019-10-10 17:17:10 -0700
committer: Kristian H. Kristensen <[email protected]> 2019-10-17 13:43:53 -0700
commit: 8e16fb152813d13121d13710f72ecce009c1885e (patch)
tree: e85c93fd326b3b0f724011607d85b8f4a77d0f77
parent: 8f39985b01b34cbb6a84b4614f5cb732748954d1 (diff)
13 files changed, 517 insertions, 2 deletions
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 9b94c9edf23..5c98aeefc66 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1524,6 +1524,9 @@ typedef enum {
    NIR_INTRINSIC_SRC_ACCESS,
    NIR_INTRINSIC_DST_ACCESS,
 
+   /* Driver location for nir_load_patch_location_ir3 */
+   NIR_INTRINSIC_DRIVER_LOCATION,
+
    NIR_INTRINSIC_NUM_INDEX_FLAGS,
 
 } nir_intrinsic_index_flag;
@@ -1632,6 +1635,7 @@ INTRINSIC_IDX_ACCESSORS(align_offset, ALIGN_OFFSET, unsigned)
 INTRINSIC_IDX_ACCESSORS(desc_type, DESC_TYPE, unsigned)
 INTRINSIC_IDX_ACCESSORS(type, TYPE, nir_alu_type)
 INTRINSIC_IDX_ACCESSORS(swizzle_mask, SWIZZLE_MASK, unsigned)
+INTRINSIC_IDX_ACCESSORS(driver_location, DRIVER_LOCATION, unsigned)
 
 static inline void
 nir_intrinsic_set_align(nir_intrinsic_instr *intrin,
diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py
index ae62a85d39b..637576c092a 100644
--- a/src/compiler/nir/nir_intrinsics.py
+++ b/src/compiler/nir/nir_intrinsics.py
@@ -124,6 +124,8 @@ DESC_TYPE = "NIR_INTRINSIC_DESC_TYPE"
 TYPE = "NIR_INTRINSIC_TYPE"
 # The swizzle mask for quad_swizzle_amd & masked_swizzle_amd
 SWIZZLE_MASK = "NIR_INTRINSIC_SWIZZLE_MASK"
+# Driver location of attribute
+DRIVER_LOCATION = "NIR_INTRINSIC_DRIVER_LOCATION"
 
 #
 # Possible flags:
@@ -771,6 +773,12 @@ intrinsic("ssbo_atomic_xor_ir3",        src_comp=[1, 1, 1, 1],    dest_comp=1)
 intrinsic("ssbo_atomic_exchange_ir3",   src_comp=[1, 1, 1, 1],    dest_comp=1)
 intrinsic("ssbo_atomic_comp_swap_ir3",  src_comp=[1, 1, 1, 1, 1], dest_comp=1)
 
+# System values for freedreno geometry shaders.
+system_value("vs_primitive_stride_ir3", 1)
+system_value("vs_vertex_stride_ir3", 1)
+system_value("gs_header_ir3", 1)
+system_value("primitive_location_ir3", 1, indices=[DRIVER_LOCATION])
+
 # IR3-specific load/store intrinsics. These access a buffer used to pass data
 # between geometry stages - perhaps it's explicit access to the vertex cache.
 
diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c
index 48844b7ed79..496f9279676 100644
--- a/src/compiler/nir/nir_print.c
+++ b/src/compiler/nir/nir_print.c
@@ -800,6 +800,7 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
       [NIR_INTRINSIC_DESC_TYPE] = "desc_type",
       [NIR_INTRINSIC_TYPE] = "type",
       [NIR_INTRINSIC_SWIZZLE_MASK] = "swizzle_mask",
+      [NIR_INTRINSIC_DRIVER_LOCATION] = "driver_location",
    };
    for (unsigned idx = 1; idx < NIR_INTRINSIC_NUM_INDEX_FLAGS; idx++) {
       if (!info->index_map[idx])
diff --git a/src/compiler/shader_enums.c b/src/compiler/shader_enums.c
index 71796687afa..afaad50adf6 100644
--- a/src/compiler/shader_enums.c
+++ b/src/compiler/shader_enums.c
@@ -254,6 +254,7 @@ gl_system_value_name(gl_system_value sysval)
      ENUM(SYSTEM_VALUE_BARYCENTRIC_SAMPLE),
      ENUM(SYSTEM_VALUE_BARYCENTRIC_CENTROID),
      ENUM(SYSTEM_VALUE_BARYCENTRIC_SIZE),
+     ENUM(SYSTEM_VALUE_GS_HEADER_IR3),
    };
    STATIC_ASSERT(ARRAY_SIZE(names) == SYSTEM_VALUE_MAX);
    return NAME(sysval);
diff --git a/src/compiler/shader_enums.h b/src/compiler/shader_enums.h
index 0704719c229..f9b2b8c1d73 100644
--- a/src/compiler/shader_enums.h
+++ b/src/compiler/shader_enums.h
@@ -641,6 +641,13 @@ typedef enum
    SYSTEM_VALUE_BARYCENTRIC_CENTROID,
    SYSTEM_VALUE_BARYCENTRIC_SIZE,
 
+   /**
+    * IR3 specific geometry shader system value that packs invocation id,
+    * thread id and vertex id.  Having this as a nir level system value lets
+    * us do the unpacking in nir.
+    */
+   SYSTEM_VALUE_GS_HEADER_IR3,
+
    SYSTEM_VALUE_MAX             /**< Number of values */
 } gl_system_value;
 
diff --git a/src/freedreno/Makefile.sources b/src/freedreno/Makefile.sources
index cf3ac7bdba4..bb56869e1cc 100644
--- a/src/freedreno/Makefile.sources
+++ b/src/freedreno/Makefile.sources
@@ -38,6 +38,7 @@ ir3_SOURCES := \
 	ir3/ir3_nir_lower_load_barycentric_at_sample.c \
 	ir3/ir3_nir_lower_load_barycentric_at_offset.c \
 	ir3/ir3_nir_lower_io_offsets.c \
+	ir3/ir3_nir_lower_tess.c \
 	ir3/ir3_nir_lower_tg4_to_tex.c \
 	ir3/ir3_nir_move_varying_inputs.c \
 	ir3/ir3_print.c \
diff --git a/src/freedreno/ir3/ir3_context.h b/src/freedreno/ir3/ir3_context.h
index b0d3e98d00a..2a1f9071118 100644
--- a/src/freedreno/ir3/ir3_context.h
+++ b/src/freedreno/ir3/ir3_context.h
@@ -76,6 +76,10 @@ struct ir3_context {
 	/* For fragment shaders: */
 	struct ir3_instruction *samp_id, *samp_mask_in;
 
+	/* For geometry shaders: */
+	struct ir3_instruction *primitive_id;
+	struct ir3_instruction *gs_header;
+
 	/* Compute shader inputs: */
 	struct ir3_instruction *local_invocation_id, *work_group_id;
 
diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c
index 2f95b249c26..103821cd6b3 100644
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -101,7 +101,8 @@ ir3_key_lowers_nir(const struct ir3_shader_key *key)
 	return key->fsaturate_s | key->fsaturate_t | key->fsaturate_r |
 			key->vsaturate_s | key->vsaturate_t | key->vsaturate_r |
 			key->ucp_enables | key->color_two_side |
-			key->fclamp_color | key->vclamp_color;
+			key->fclamp_color | key->vclamp_color |
+			key->has_gs;
 }
 
 #define OPT(nir, pass, ...) ({                             \
@@ -186,6 +187,19 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
 			.lower_tg4_offsets = true,
 	};
 
+	if (key && key->has_gs) {
+		switch (shader->type) {
+		case MESA_SHADER_VERTEX:
+			NIR_PASS_V(s, ir3_nir_lower_vs_to_explicit_io, shader);
+			break;
+		case MESA_SHADER_GEOMETRY:
+			NIR_PASS_V(s, ir3_nir_lower_gs, shader);
+			break;
+		default:
+			break;
+		}
+	}
+
 	if (key) {
 		switch (shader->type) {
 		case MESA_SHADER_FRAGMENT:
diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h
index a9b39e235b5..a602f40858b 100644
--- a/src/freedreno/ir3/ir3_nir.h
+++ b/src/freedreno/ir3/ir3_nir.h
@@ -41,6 +41,9 @@ bool ir3_nir_lower_load_barycentric_at_sample(nir_shader *shader);
 bool ir3_nir_lower_load_barycentric_at_offset(nir_shader *shader);
 bool ir3_nir_move_varying_inputs(nir_shader *shader);
 
+void ir3_nir_lower_vs_to_explicit_io(nir_shader *shader, struct ir3_shader *s);
+void ir3_nir_lower_gs(nir_shader *shader, struct ir3_shader *s);
+
 const nir_shader_compiler_options * ir3_get_compiler_options(struct ir3_compiler *compiler);
 bool ir3_key_lowers_nir(const struct ir3_shader_key *key);
 void ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
diff --git a/src/freedreno/ir3/ir3_nir_lower_tess.c b/src/freedreno/ir3/ir3_nir_lower_tess.c
new file mode 100644
index 00000000000..b4982503f0d
--- /dev/null
+++ b/src/freedreno/ir3/ir3_nir_lower_tess.c
@@ -0,0 +1,455 @@
+/*
+ * Copyright © 2019 Google, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "ir3_nir.h"
+#include "ir3_compiler.h"
+#include "compiler/nir/nir_builder.h"
+
+struct state {
+	struct primitive_map {
+		unsigned loc[32];
+		unsigned size[32];
+		unsigned stride;
+	} map;
+
+	nir_ssa_def *header;
+
+	nir_variable *vertex_count_var;
+	nir_variable *emitted_vertex_var;
+	nir_variable *vertex_flags_var;
+	nir_variable *vertex_flags_out;
+
+	nir_variable *output_vars[32];
+};
+
+static nir_ssa_def *
+bitfield_extract(nir_builder *b, nir_ssa_def *v, uint32_t start, uint32_t mask)
+{
+	return nir_iand(b, nir_ushr(b, v, nir_imm_int(b, start)),
+			nir_imm_int(b, mask));
+}
+
+static nir_ssa_def *
+build_invocation_id(nir_builder *b, struct state *state)
+{
+	return bitfield_extract(b, state->header, 11, 31);
+}
+
+static nir_ssa_def *
+build_vertex_id(nir_builder *b, struct state *state)
+{
+	return bitfield_extract(b, state->header, 6, 31);
+}
+
+static nir_ssa_def *
+build_local_primitive_id(nir_builder *b, struct state *state)
+{
+	return bitfield_extract(b, state->header, 0, 63);
+}
+
+static nir_variable *
+get_var(struct exec_list *list, int driver_location)
+{
+	nir_foreach_variable(v, list) {
+		if (v->data.driver_location == driver_location) {
+			return v;
+		}
+	}
+
+	return NULL;
+}
+
+static nir_ssa_def *
+build_local_offset(nir_builder *b, struct state *state,
+		nir_ssa_def *vertex, uint32_t base, nir_ssa_def *offset)
+{
+	nir_ssa_def *primitive_stride = nir_load_vs_primitive_stride_ir3(b);
+	nir_ssa_def *primitive_offset =
+		nir_imul(b, build_local_primitive_id(b, state), primitive_stride);
+	nir_ssa_def *attr_offset;
+	nir_ssa_def *vertex_stride;
+
+	if (b->shader->info.stage == MESA_SHADER_VERTEX) {
+		vertex_stride = nir_imm_int(b, state->map.stride * 4);
+		attr_offset = nir_imm_int(b, state->map.loc[base] * 4);
+	} else if (b->shader->info.stage == MESA_SHADER_GEOMETRY) {
+		vertex_stride = nir_load_vs_vertex_stride_ir3(b);
+		attr_offset = nir_load_primitive_location_ir3(b, base);
+	} else {
+		unreachable("bad shader stage");
+	}
+
+	nir_ssa_def *vertex_offset = nir_imul(b, vertex, vertex_stride);
+
+	return nir_iadd(b, nir_iadd(b, primitive_offset, vertex_offset),
+			nir_iadd(b, attr_offset, offset));
+}
+
+static nir_intrinsic_instr *
+replace_intrinsic(nir_builder *b, nir_intrinsic_instr *intr,
+		nir_intrinsic_op op, nir_ssa_def *src0, nir_ssa_def *src1, nir_ssa_def *src2)
+{
+	nir_intrinsic_instr *new_intr =
+		nir_intrinsic_instr_create(b->shader, op);
+
+	new_intr->src[0] = nir_src_for_ssa(src0);
+	if (src1)
+		new_intr->src[1] = nir_src_for_ssa(src1);
+	if (src2)
+		new_intr->src[2] = nir_src_for_ssa(src2);
+
+	new_intr->num_components = intr->num_components;
+
+	if (nir_intrinsic_infos[op].has_dest)
+		nir_ssa_dest_init(&new_intr->instr, &new_intr->dest,
+						  intr->num_components, 32, NULL);
+
+	nir_builder_instr_insert(b, &new_intr->instr);
+
+	if (nir_intrinsic_infos[op].has_dest)
+		nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(&new_intr->dest.ssa));
+
+	nir_instr_remove(&intr->instr);
+
+	return new_intr;
+}
+
+static void
+build_primitive_map(nir_shader *shader, struct primitive_map *map, struct exec_list *list)
+{
+	nir_foreach_variable(var, list) {
+		switch (var->data.location) {
+		case VARYING_SLOT_TESS_LEVEL_OUTER:
+		case VARYING_SLOT_TESS_LEVEL_INNER:
+			continue;
+		}
+
+		unsigned size = glsl_count_attribute_slots(var->type, false) * 4;
+
+		assert(var->data.driver_location < ARRAY_SIZE(map->size));
+		map->size[var->data.driver_location] =
+			MAX2(map->size[var->data.driver_location], size);
+	}
+
+	unsigned loc = 0;
+	for (uint32_t i = 0; i < ARRAY_SIZE(map->size); i++) {
+		if (map->size[i] == 0)
+				continue;
+		nir_variable *var = get_var(list, i);
+		map->loc[i] = loc;
+		loc += map->size[i];
+
+		if (var->data.patch)
+			map->size[i] = 0;
+		else
+			map->size[i] = map->size[i] / glsl_get_length(var->type);
+	}
+
+	map->stride = loc;
+}
+
+static void
+lower_vs_block(nir_block *block, nir_builder *b, struct state *state)
+{
+	nir_foreach_instr_safe(instr, block) {
+		if (instr->type != nir_instr_type_intrinsic)
+			continue;
+
+		nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+		switch (intr->intrinsic) {
+		case nir_intrinsic_store_output: {
+			// src[] = { value, offset }.
+
+			b->cursor = nir_before_instr(&intr->instr);
+
+			nir_ssa_def *vertex_id = build_vertex_id(b, state);
+			nir_ssa_def *offset = build_local_offset(b, state, vertex_id, nir_intrinsic_base(intr),
+					intr->src[1].ssa);
+			nir_intrinsic_instr *store =
+				nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_shared_ir3);
+
+			nir_intrinsic_set_write_mask(store, MASK(intr->num_components));
+			store->src[0] = nir_src_for_ssa(intr->src[0].ssa);
+			store->src[1] = nir_src_for_ssa(offset);
+
+			store->num_components = intr->num_components;
+
+			nir_builder_instr_insert(b, &store->instr);
+			break;
+		}
+
+		default:
+			break;
+		}
+	}
+}
+
+static nir_ssa_def *
+local_thread_id(nir_builder *b)
+{
+	return bitfield_extract(b, nir_load_gs_header_ir3(b), 16, 1023);
+}
+
+void
+ir3_nir_lower_vs_to_explicit_io(nir_shader *shader, struct ir3_shader *s)
+{
+	struct state state = { };
+
+	build_primitive_map(shader, &state.map, &shader->outputs);
+	memcpy(s->output_loc, state.map.loc, sizeof(s->output_loc));
+
+	nir_function_impl *impl = nir_shader_get_entrypoint(shader);
+	assert(impl);
+
+	nir_builder b;
+	nir_builder_init(&b, impl);
+	b.cursor = nir_before_cf_list(&impl->body);
+
+	state.header = nir_load_gs_header_ir3(&b);
+
+	nir_foreach_block_safe(block, impl)
+		lower_vs_block(block, &b, &state);
+
+	nir_metadata_preserve(impl, nir_metadata_block_index |
+			nir_metadata_dominance);
+
+	s->output_size = state.map.stride;
+}
+
+static void
+lower_gs_block(nir_block *block, nir_builder *b, struct state *state)
+{
+	nir_intrinsic_instr *outputs[32] = {};
+
+	nir_foreach_instr_safe(instr, block) {
+		if (instr->type != nir_instr_type_intrinsic)
+			continue;
+
+		nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+		switch (intr->intrinsic) {
+		case nir_intrinsic_store_output: {
+			// src[] = { value, offset }.
+
+			uint32_t loc = nir_intrinsic_base(intr);
+			outputs[loc] = intr;
+			break;
+		}
+
+		case nir_intrinsic_end_primitive: {
+			b->cursor = nir_before_instr(&intr->instr);
+			nir_store_var(b, state->vertex_flags_var, nir_imm_int(b, 4), 0x1);
+			nir_instr_remove(&intr->instr);
+			break;
+		}
+
+		case nir_intrinsic_emit_vertex: {
+
+			/* Load the vertex count */
+			b->cursor = nir_before_instr(&intr->instr);
+			nir_ssa_def *count = nir_load_var(b, state->vertex_count_var);
+
+			nir_push_if(b, nir_ieq(b, count, local_thread_id(b)));
+
+			for (uint32_t i = 0; i < ARRAY_SIZE(outputs); i++) {
+				if (outputs[i]) {
+					nir_store_var(b, state->output_vars[i],
+							outputs[i]->src[0].ssa,
+							(1 << outputs[i]->num_components) - 1);
+
+					nir_instr_remove(&outputs[i]->instr);
+				}
+				outputs[i] = NULL;
+			}
+
+			nir_instr_remove(&intr->instr);
+
+			nir_store_var(b, state->emitted_vertex_var,
+					nir_iadd(b, nir_load_var(b, state->emitted_vertex_var), nir_imm_int(b, 1)), 0x1);
+
+			nir_store_var(b, state->vertex_flags_out,
+					nir_load_var(b, state->vertex_flags_var), 0x1);
+
+			nir_pop_if(b, NULL);
+
+			/* Increment the vertex count by 1 */
+			nir_store_var(b, state->vertex_count_var,
+					nir_iadd(b, count, nir_imm_int(b, 1)), 0x1); /* .x */
+			nir_store_var(b, state->vertex_flags_var, nir_imm_int(b, 0), 0x1);
+
+			break;
+		}
+
+		case nir_intrinsic_load_per_vertex_input: {
+			// src[] = { vertex, offset }.
+
+			b->cursor = nir_before_instr(&intr->instr);
+
+			nir_ssa_def *offset = build_local_offset(b, state,
+					intr->src[0].ssa, // this is typically gl_InvocationID
+					nir_intrinsic_base(intr),
+					intr->src[1].ssa);
+
+			replace_intrinsic(b, intr, nir_intrinsic_load_shared_ir3, offset, NULL, NULL);
+			break;
+		}
+
+		case nir_intrinsic_load_invocation_id: {
+			b->cursor = nir_before_instr(&intr->instr);
+
+			nir_ssa_def *iid = build_invocation_id(b, state);
+			nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(iid));
+			nir_instr_remove(&intr->instr);
+			break;
+		}
+
+		default:
+			break;
+		}
+	}
+}
+
+static void
+emit_store_outputs(nir_builder *b, struct state *state)
+{
+	/* This also stores the internally added vertex_flags output. */
+
+	for (uint32_t i = 0; i < ARRAY_SIZE(state->output_vars); i++) {
+		if (!state->output_vars[i])
+			continue;
+
+		nir_intrinsic_instr *store =
+			nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output);
+
+		nir_intrinsic_set_base(store, i);
+		store->src[0] = nir_src_for_ssa(nir_load_var(b, state->output_vars[i]));
+		store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
+		store->num_components = store->src[0].ssa->num_components;
+
+		nir_builder_instr_insert(b, &store->instr);
+	}
+}
+
+static void
+clean_up_split_vars(nir_shader *shader, struct exec_list *list)
+{
+	uint32_t components[32] = {};
+
+	nir_foreach_variable(var, list) {
+		uint32_t mask =
+			((1 << glsl_get_components(glsl_without_array(var->type))) - 1) << var->data.location_frac;
+		components[var->data.driver_location] |= mask;
+	}
+
+	nir_foreach_variable_safe(var, list) {
+		uint32_t mask =
+			((1 << glsl_get_components(glsl_without_array(var->type))) - 1) << var->data.location_frac;
+		bool subset =
+			(components[var->data.driver_location] | mask) != mask;
+		if (subset)
+			exec_node_remove(&var->node);
+	}
+}
+
+void
+ir3_nir_lower_gs(nir_shader *shader, struct ir3_shader *s)
+{
+	struct state state = { };
+
+	if (shader_debug_enabled(shader->info.stage)) {
+		fprintf(stderr, "NIR (before gs lowering):\n");
+		nir_print_shader(shader, stderr);
+	}
+
+	clean_up_split_vars(shader, &shader->inputs);
+	clean_up_split_vars(shader, &shader->outputs);
+
+	build_primitive_map(shader, &state.map, &shader->inputs);
+
+	uint32_t loc = 0;
+	nir_foreach_variable(var, &shader->outputs) {
+		uint32_t end = var->data.driver_location + glsl_count_attribute_slots(var->type, false);
+		loc = MAX2(loc, end);
+	}
+
+	state.vertex_flags_out = nir_variable_create(shader, nir_var_shader_out,
+			glsl_uint_type(), "vertex_flags");
+	state.vertex_flags_out->data.driver_location = loc;
+	state.vertex_flags_out->data.location = VARYING_SLOT_GS_VERTEX_FLAGS_IR3;
+
+	nir_function_impl *impl = nir_shader_get_entrypoint(shader);
+	assert(impl);
+
+	nir_builder b;
+	nir_builder_init(&b, impl);
+	b.cursor = nir_before_cf_list(&impl->body);
+
+	state.header = nir_load_gs_header_ir3(&b);
+
+	nir_foreach_variable(var, &shader->outputs) {
+		state.output_vars[var->data.driver_location] = 
+			nir_local_variable_create(impl, var->type,
+					ralloc_asprintf(var, "%s:gs-temp", var->name));
+	}
+
+	state.vertex_count_var =
+		nir_local_variable_create(impl, glsl_uint_type(), "vertex_count");
+	state.emitted_vertex_var =
+		nir_local_variable_create(impl, glsl_uint_type(), "emitted_vertex");
+	state.vertex_flags_var =
+		nir_local_variable_create(impl, glsl_uint_type(), "vertex_flags");
+	state.vertex_flags_out = state.output_vars[state.vertex_flags_out->data.driver_location];
+
+	/* initialize to 0 */
+	b.cursor = nir_before_cf_list(&impl->body);
+	nir_store_var(&b, state.vertex_count_var, nir_imm_int(&b, 0), 0x1);
+	nir_store_var(&b, state.emitted_vertex_var, nir_imm_int(&b, 0), 0x1);
+	nir_store_var(&b, state.vertex_flags_var, nir_imm_int(&b, 4), 0x1);
+
+	nir_foreach_block_safe(block, impl)
+		lower_gs_block(block, &b, &state);
+
+	set_foreach(impl->end_block->predecessors, block_entry) {
+		struct nir_block *block = (void *)block_entry->key;
+		b.cursor = nir_after_block_before_jump(block);
+
+		nir_intrinsic_instr *discard_if =
+			nir_intrinsic_instr_create(b.shader, nir_intrinsic_discard_if);
+
+		nir_ssa_def *cond = nir_ieq(&b, nir_load_var(&b, state.emitted_vertex_var), nir_imm_int(&b, 0));
+
+		discard_if->src[0] = nir_src_for_ssa(cond);
+
+		nir_builder_instr_insert(&b, &discard_if->instr);
+
+		emit_store_outputs(&b, &state);
+	}
+
+	nir_metadata_preserve(impl, 0);
+
+	if (shader_debug_enabled(shader->info.stage)) {
+		fprintf(stderr, "NIR (after gs lowering):\n");
+		nir_print_shader(shader, stderr);
+	}
+}
diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c
index aae7baeb2e0..10980bd38be 100644
--- a/src/freedreno/ir3/ir3_shader.c
+++ b/src/freedreno/ir3/ir3_shader.c
@@ -350,7 +350,14 @@ output_name(struct ir3_shader_variant *so, int i)
 	if (so->type == MESA_SHADER_FRAGMENT) {
 		return gl_frag_result_name(so->outputs[i].slot);
 	} else {
-		return gl_varying_slot_name(so->outputs[i].slot);
+		switch (so->outputs[i].slot) {
+		case VARYING_SLOT_GS_HEADER_IR3:
+			return "GS_HEADER";
+		case VARYING_SLOT_GS_VERTEX_FLAGS_IR3:
+			return "GS_VERTEX_FLAGS";
+		default:
+			return gl_varying_slot_name(so->outputs[i].slot);
+		}
 	}
 }
 
diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h
index fa6d5b7d387..ce258865658 100644
--- a/src/freedreno/ir3/ir3_shader.h
+++ b/src/freedreno/ir3/ir3_shader.h
@@ -554,6 +554,11 @@ struct ir3_shader {
 
 	struct ir3_shader_variant *variants;
 	mtx_t variants_lock;
+
+	uint32_t output_size; /* Size in dwords of all outputs for VS, size of entire patch for HS. */
+
+	/* Map from driver_location to byte offset in per-primitive storage */
+	unsigned output_loc[32];
 };
 
 void * ir3_shader_assemble(struct ir3_shader_variant *v, uint32_t gpu_id);
@@ -693,6 +698,10 @@ ir3_find_output_regid(const struct ir3_shader_variant *so, unsigned slot)
 	return regid(63, 0);
 }
 
+#define VARYING_SLOT_GS_HEADER_IR3			(VARYING_SLOT_MAX + 0)
+#define VARYING_SLOT_GS_VERTEX_FLAGS_IR3	(VARYING_SLOT_MAX + 1)
+
+
 static inline uint32_t
 ir3_find_sysval_regid(const struct ir3_shader_variant *so, unsigned slot)
 {
diff --git a/src/freedreno/ir3/meson.build b/src/freedreno/ir3/meson.build
index be03ffb88c3..6e1434057e7 100644
--- a/src/freedreno/ir3/meson.build
+++ b/src/freedreno/ir3/meson.build
@@ -66,6 +66,7 @@ libfreedreno_ir3_files = files(
   'ir3_nir_lower_load_barycentric_at_sample.c',
   'ir3_nir_lower_load_barycentric_at_offset.c',
   'ir3_nir_lower_io_offsets.c',
+  'ir3_nir_lower_tess.c',
   'ir3_nir_lower_tg4_to_tex.c',
   'ir3_nir_move_varying_inputs.c',
   'ir3_print.c',
author	Kristian H. Kristensen <[email protected]>	2019-10-10 17:17:10 -0700
committer	Kristian H. Kristensen <[email protected]>	2019-10-17 13:43:53 -0700
commit	8e16fb152813d13121d13710f72ecce009c1885e (patch)
tree	e85c93fd326b3b0f724011607d85b8f4a77d0f77
parent	8f39985b01b34cbb6a84b4614f5cb732748954d1 (diff)