summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/compiler/nir/nir.h4
-rw-r--r--src/compiler/nir/nir_intrinsics.py8
-rw-r--r--src/compiler/nir/nir_print.c1
-rw-r--r--src/compiler/shader_enums.c1
-rw-r--r--src/compiler/shader_enums.h7
-rw-r--r--src/freedreno/Makefile.sources1
-rw-r--r--src/freedreno/ir3/ir3_context.h4
-rw-r--r--src/freedreno/ir3/ir3_nir.c16
-rw-r--r--src/freedreno/ir3/ir3_nir.h3
-rw-r--r--src/freedreno/ir3/ir3_nir_lower_tess.c455
-rw-r--r--src/freedreno/ir3/ir3_shader.c9
-rw-r--r--src/freedreno/ir3/ir3_shader.h9
-rw-r--r--src/freedreno/ir3/meson.build1
13 files changed, 517 insertions, 2 deletions
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 9b94c9edf23..5c98aeefc66 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1524,6 +1524,9 @@ typedef enum {
NIR_INTRINSIC_SRC_ACCESS,
NIR_INTRINSIC_DST_ACCESS,
+ /* Driver location for nir_load_patch_location_ir3 */
+ NIR_INTRINSIC_DRIVER_LOCATION,
+
NIR_INTRINSIC_NUM_INDEX_FLAGS,
} nir_intrinsic_index_flag;
@@ -1632,6 +1635,7 @@ INTRINSIC_IDX_ACCESSORS(align_offset, ALIGN_OFFSET, unsigned)
INTRINSIC_IDX_ACCESSORS(desc_type, DESC_TYPE, unsigned)
INTRINSIC_IDX_ACCESSORS(type, TYPE, nir_alu_type)
INTRINSIC_IDX_ACCESSORS(swizzle_mask, SWIZZLE_MASK, unsigned)
+INTRINSIC_IDX_ACCESSORS(driver_location, DRIVER_LOCATION, unsigned)
static inline void
nir_intrinsic_set_align(nir_intrinsic_instr *intrin,
diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py
index ae62a85d39b..637576c092a 100644
--- a/src/compiler/nir/nir_intrinsics.py
+++ b/src/compiler/nir/nir_intrinsics.py
@@ -124,6 +124,8 @@ DESC_TYPE = "NIR_INTRINSIC_DESC_TYPE"
TYPE = "NIR_INTRINSIC_TYPE"
# The swizzle mask for quad_swizzle_amd & masked_swizzle_amd
SWIZZLE_MASK = "NIR_INTRINSIC_SWIZZLE_MASK"
+# Driver location of attribute
+DRIVER_LOCATION = "NIR_INTRINSIC_DRIVER_LOCATION"
#
# Possible flags:
@@ -771,6 +773,12 @@ intrinsic("ssbo_atomic_xor_ir3", src_comp=[1, 1, 1, 1], dest_comp=1)
intrinsic("ssbo_atomic_exchange_ir3", src_comp=[1, 1, 1, 1], dest_comp=1)
intrinsic("ssbo_atomic_comp_swap_ir3", src_comp=[1, 1, 1, 1, 1], dest_comp=1)
+# System values for freedreno geometry shaders.
+system_value("vs_primitive_stride_ir3", 1)
+system_value("vs_vertex_stride_ir3", 1)
+system_value("gs_header_ir3", 1)
+system_value("primitive_location_ir3", 1, indices=[DRIVER_LOCATION])
+
# IR3-specific load/store intrinsics. These access a buffer used to pass data
# between geometry stages - perhaps it's explicit access to the vertex cache.
diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c
index 48844b7ed79..496f9279676 100644
--- a/src/compiler/nir/nir_print.c
+++ b/src/compiler/nir/nir_print.c
@@ -800,6 +800,7 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
[NIR_INTRINSIC_DESC_TYPE] = "desc_type",
[NIR_INTRINSIC_TYPE] = "type",
[NIR_INTRINSIC_SWIZZLE_MASK] = "swizzle_mask",
+ [NIR_INTRINSIC_DRIVER_LOCATION] = "driver_location",
};
for (unsigned idx = 1; idx < NIR_INTRINSIC_NUM_INDEX_FLAGS; idx++) {
if (!info->index_map[idx])
diff --git a/src/compiler/shader_enums.c b/src/compiler/shader_enums.c
index 71796687afa..afaad50adf6 100644
--- a/src/compiler/shader_enums.c
+++ b/src/compiler/shader_enums.c
@@ -254,6 +254,7 @@ gl_system_value_name(gl_system_value sysval)
ENUM(SYSTEM_VALUE_BARYCENTRIC_SAMPLE),
ENUM(SYSTEM_VALUE_BARYCENTRIC_CENTROID),
ENUM(SYSTEM_VALUE_BARYCENTRIC_SIZE),
+ ENUM(SYSTEM_VALUE_GS_HEADER_IR3),
};
STATIC_ASSERT(ARRAY_SIZE(names) == SYSTEM_VALUE_MAX);
return NAME(sysval);
diff --git a/src/compiler/shader_enums.h b/src/compiler/shader_enums.h
index 0704719c229..f9b2b8c1d73 100644
--- a/src/compiler/shader_enums.h
+++ b/src/compiler/shader_enums.h
@@ -641,6 +641,13 @@ typedef enum
SYSTEM_VALUE_BARYCENTRIC_CENTROID,
SYSTEM_VALUE_BARYCENTRIC_SIZE,
+ /**
+ * IR3 specific geometry shader system value that packs invocation id,
+ * thread id and vertex id. Having this as a nir level system value lets
+ * us do the unpacking in nir.
+ */
+ SYSTEM_VALUE_GS_HEADER_IR3,
+
SYSTEM_VALUE_MAX /**< Number of values */
} gl_system_value;
diff --git a/src/freedreno/Makefile.sources b/src/freedreno/Makefile.sources
index cf3ac7bdba4..bb56869e1cc 100644
--- a/src/freedreno/Makefile.sources
+++ b/src/freedreno/Makefile.sources
@@ -38,6 +38,7 @@ ir3_SOURCES := \
ir3/ir3_nir_lower_load_barycentric_at_sample.c \
ir3/ir3_nir_lower_load_barycentric_at_offset.c \
ir3/ir3_nir_lower_io_offsets.c \
+ ir3/ir3_nir_lower_tess.c \
ir3/ir3_nir_lower_tg4_to_tex.c \
ir3/ir3_nir_move_varying_inputs.c \
ir3/ir3_print.c \
diff --git a/src/freedreno/ir3/ir3_context.h b/src/freedreno/ir3/ir3_context.h
index b0d3e98d00a..2a1f9071118 100644
--- a/src/freedreno/ir3/ir3_context.h
+++ b/src/freedreno/ir3/ir3_context.h
@@ -76,6 +76,10 @@ struct ir3_context {
/* For fragment shaders: */
struct ir3_instruction *samp_id, *samp_mask_in;
+ /* For geometry shaders: */
+ struct ir3_instruction *primitive_id;
+ struct ir3_instruction *gs_header;
+
/* Compute shader inputs: */
struct ir3_instruction *local_invocation_id, *work_group_id;
diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c
index 2f95b249c26..103821cd6b3 100644
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -101,7 +101,8 @@ ir3_key_lowers_nir(const struct ir3_shader_key *key)
return key->fsaturate_s | key->fsaturate_t | key->fsaturate_r |
key->vsaturate_s | key->vsaturate_t | key->vsaturate_r |
key->ucp_enables | key->color_two_side |
- key->fclamp_color | key->vclamp_color;
+ key->fclamp_color | key->vclamp_color |
+ key->has_gs;
}
#define OPT(nir, pass, ...) ({ \
@@ -186,6 +187,19 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
.lower_tg4_offsets = true,
};
+ if (key && key->has_gs) {
+ switch (shader->type) {
+ case MESA_SHADER_VERTEX:
+ NIR_PASS_V(s, ir3_nir_lower_vs_to_explicit_io, shader);
+ break;
+ case MESA_SHADER_GEOMETRY:
+ NIR_PASS_V(s, ir3_nir_lower_gs, shader);
+ break;
+ default:
+ break;
+ }
+ }
+
if (key) {
switch (shader->type) {
case MESA_SHADER_FRAGMENT:
diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h
index a9b39e235b5..a602f40858b 100644
--- a/src/freedreno/ir3/ir3_nir.h
+++ b/src/freedreno/ir3/ir3_nir.h
@@ -41,6 +41,9 @@ bool ir3_nir_lower_load_barycentric_at_sample(nir_shader *shader);
bool ir3_nir_lower_load_barycentric_at_offset(nir_shader *shader);
bool ir3_nir_move_varying_inputs(nir_shader *shader);
+void ir3_nir_lower_vs_to_explicit_io(nir_shader *shader, struct ir3_shader *s);
+void ir3_nir_lower_gs(nir_shader *shader, struct ir3_shader *s);
+
const nir_shader_compiler_options * ir3_get_compiler_options(struct ir3_compiler *compiler);
bool ir3_key_lowers_nir(const struct ir3_shader_key *key);
void ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
diff --git a/src/freedreno/ir3/ir3_nir_lower_tess.c b/src/freedreno/ir3/ir3_nir_lower_tess.c
new file mode 100644
index 00000000000..b4982503f0d
--- /dev/null
+++ b/src/freedreno/ir3/ir3_nir_lower_tess.c
@@ -0,0 +1,455 @@
+/*
+ * Copyright © 2019 Google, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "ir3_nir.h"
+#include "ir3_compiler.h"
+#include "compiler/nir/nir_builder.h"
+
+struct state {
+ struct primitive_map {
+ unsigned loc[32];
+ unsigned size[32];
+ unsigned stride;
+ } map;
+
+ nir_ssa_def *header;
+
+ nir_variable *vertex_count_var;
+ nir_variable *emitted_vertex_var;
+ nir_variable *vertex_flags_var;
+ nir_variable *vertex_flags_out;
+
+ nir_variable *output_vars[32];
+};
+
+static nir_ssa_def *
+bitfield_extract(nir_builder *b, nir_ssa_def *v, uint32_t start, uint32_t mask)
+{
+ return nir_iand(b, nir_ushr(b, v, nir_imm_int(b, start)),
+ nir_imm_int(b, mask));
+}
+
+static nir_ssa_def *
+build_invocation_id(nir_builder *b, struct state *state)
+{
+ return bitfield_extract(b, state->header, 11, 31);
+}
+
+static nir_ssa_def *
+build_vertex_id(nir_builder *b, struct state *state)
+{
+ return bitfield_extract(b, state->header, 6, 31);
+}
+
+static nir_ssa_def *
+build_local_primitive_id(nir_builder *b, struct state *state)
+{
+ return bitfield_extract(b, state->header, 0, 63);
+}
+
+static nir_variable *
+get_var(struct exec_list *list, int driver_location)
+{
+ nir_foreach_variable(v, list) {
+ if (v->data.driver_location == driver_location) {
+ return v;
+ }
+ }
+
+ return NULL;
+}
+
+static nir_ssa_def *
+build_local_offset(nir_builder *b, struct state *state,
+ nir_ssa_def *vertex, uint32_t base, nir_ssa_def *offset)
+{
+ nir_ssa_def *primitive_stride = nir_load_vs_primitive_stride_ir3(b);
+ nir_ssa_def *primitive_offset =
+ nir_imul(b, build_local_primitive_id(b, state), primitive_stride);
+ nir_ssa_def *attr_offset;
+ nir_ssa_def *vertex_stride;
+
+ if (b->shader->info.stage == MESA_SHADER_VERTEX) {
+ vertex_stride = nir_imm_int(b, state->map.stride * 4);
+ attr_offset = nir_imm_int(b, state->map.loc[base] * 4);
+ } else if (b->shader->info.stage == MESA_SHADER_GEOMETRY) {
+ vertex_stride = nir_load_vs_vertex_stride_ir3(b);
+ attr_offset = nir_load_primitive_location_ir3(b, base);
+ } else {
+ unreachable("bad shader stage");
+ }
+
+ nir_ssa_def *vertex_offset = nir_imul(b, vertex, vertex_stride);
+
+ return nir_iadd(b, nir_iadd(b, primitive_offset, vertex_offset),
+ nir_iadd(b, attr_offset, offset));
+}
+
+static nir_intrinsic_instr *
+replace_intrinsic(nir_builder *b, nir_intrinsic_instr *intr,
+ nir_intrinsic_op op, nir_ssa_def *src0, nir_ssa_def *src1, nir_ssa_def *src2)
+{
+ nir_intrinsic_instr *new_intr =
+ nir_intrinsic_instr_create(b->shader, op);
+
+ new_intr->src[0] = nir_src_for_ssa(src0);
+ if (src1)
+ new_intr->src[1] = nir_src_for_ssa(src1);
+ if (src2)
+ new_intr->src[2] = nir_src_for_ssa(src2);
+
+ new_intr->num_components = intr->num_components;
+
+ if (nir_intrinsic_infos[op].has_dest)
+ nir_ssa_dest_init(&new_intr->instr, &new_intr->dest,
+ intr->num_components, 32, NULL);
+
+ nir_builder_instr_insert(b, &new_intr->instr);
+
+ if (nir_intrinsic_infos[op].has_dest)
+ nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(&new_intr->dest.ssa));
+
+ nir_instr_remove(&intr->instr);
+
+ return new_intr;
+}
+
+static void
+build_primitive_map(nir_shader *shader, struct primitive_map *map, struct exec_list *list)
+{
+ nir_foreach_variable(var, list) {
+ switch (var->data.location) {
+ case VARYING_SLOT_TESS_LEVEL_OUTER:
+ case VARYING_SLOT_TESS_LEVEL_INNER:
+ continue;
+ }
+
+ unsigned size = glsl_count_attribute_slots(var->type, false) * 4;
+
+ assert(var->data.driver_location < ARRAY_SIZE(map->size));
+ map->size[var->data.driver_location] =
+ MAX2(map->size[var->data.driver_location], size);
+ }
+
+ unsigned loc = 0;
+ for (uint32_t i = 0; i < ARRAY_SIZE(map->size); i++) {
+ if (map->size[i] == 0)
+ continue;
+ nir_variable *var = get_var(list, i);
+ map->loc[i] = loc;
+ loc += map->size[i];
+
+ if (var->data.patch)
+ map->size[i] = 0;
+ else
+ map->size[i] = map->size[i] / glsl_get_length(var->type);
+ }
+
+ map->stride = loc;
+}
+
+static void
+lower_vs_block(nir_block *block, nir_builder *b, struct state *state)
+{
+ nir_foreach_instr_safe(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+ switch (intr->intrinsic) {
+ case nir_intrinsic_store_output: {
+ // src[] = { value, offset }.
+
+ b->cursor = nir_before_instr(&intr->instr);
+
+ nir_ssa_def *vertex_id = build_vertex_id(b, state);
+ nir_ssa_def *offset = build_local_offset(b, state, vertex_id, nir_intrinsic_base(intr),
+ intr->src[1].ssa);
+ nir_intrinsic_instr *store =
+ nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_shared_ir3);
+
+ nir_intrinsic_set_write_mask(store, MASK(intr->num_components));
+ store->src[0] = nir_src_for_ssa(intr->src[0].ssa);
+ store->src[1] = nir_src_for_ssa(offset);
+
+ store->num_components = intr->num_components;
+
+ nir_builder_instr_insert(b, &store->instr);
+ break;
+ }
+
+ default:
+ break;
+ }
+ }
+}
+
+static nir_ssa_def *
+local_thread_id(nir_builder *b)
+{
+ return bitfield_extract(b, nir_load_gs_header_ir3(b), 16, 1023);
+}
+
+void
+ir3_nir_lower_vs_to_explicit_io(nir_shader *shader, struct ir3_shader *s)
+{
+ struct state state = { };
+
+ build_primitive_map(shader, &state.map, &shader->outputs);
+ memcpy(s->output_loc, state.map.loc, sizeof(s->output_loc));
+
+ nir_function_impl *impl = nir_shader_get_entrypoint(shader);
+ assert(impl);
+
+ nir_builder b;
+ nir_builder_init(&b, impl);
+ b.cursor = nir_before_cf_list(&impl->body);
+
+ state.header = nir_load_gs_header_ir3(&b);
+
+ nir_foreach_block_safe(block, impl)
+ lower_vs_block(block, &b, &state);
+
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+
+ s->output_size = state.map.stride;
+}
+
+static void
+lower_gs_block(nir_block *block, nir_builder *b, struct state *state)
+{
+ nir_intrinsic_instr *outputs[32] = {};
+
+ nir_foreach_instr_safe(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+ switch (intr->intrinsic) {
+ case nir_intrinsic_store_output: {
+ // src[] = { value, offset }.
+
+ uint32_t loc = nir_intrinsic_base(intr);
+ outputs[loc] = intr;
+ break;
+ }
+
+ case nir_intrinsic_end_primitive: {
+ b->cursor = nir_before_instr(&intr->instr);
+ nir_store_var(b, state->vertex_flags_var, nir_imm_int(b, 4), 0x1);
+ nir_instr_remove(&intr->instr);
+ break;
+ }
+
+ case nir_intrinsic_emit_vertex: {
+
+ /* Load the vertex count */
+ b->cursor = nir_before_instr(&intr->instr);
+ nir_ssa_def *count = nir_load_var(b, state->vertex_count_var);
+
+ nir_push_if(b, nir_ieq(b, count, local_thread_id(b)));
+
+ for (uint32_t i = 0; i < ARRAY_SIZE(outputs); i++) {
+ if (outputs[i]) {
+ nir_store_var(b, state->output_vars[i],
+ outputs[i]->src[0].ssa,
+ (1 << outputs[i]->num_components) - 1);
+
+ nir_instr_remove(&outputs[i]->instr);
+ }
+ outputs[i] = NULL;
+ }
+
+ nir_instr_remove(&intr->instr);
+
+ nir_store_var(b, state->emitted_vertex_var,
+ nir_iadd(b, nir_load_var(b, state->emitted_vertex_var), nir_imm_int(b, 1)), 0x1);
+
+ nir_store_var(b, state->vertex_flags_out,
+ nir_load_var(b, state->vertex_flags_var), 0x1);
+
+ nir_pop_if(b, NULL);
+
+ /* Increment the vertex count by 1 */
+ nir_store_var(b, state->vertex_count_var,
+ nir_iadd(b, count, nir_imm_int(b, 1)), 0x1); /* .x */
+ nir_store_var(b, state->vertex_flags_var, nir_imm_int(b, 0), 0x1);
+
+ break;
+ }
+
+ case nir_intrinsic_load_per_vertex_input: {
+ // src[] = { vertex, offset }.
+
+ b->cursor = nir_before_instr(&intr->instr);
+
+ nir_ssa_def *offset = build_local_offset(b, state,
+ intr->src[0].ssa, // this is typically gl_InvocationID
+ nir_intrinsic_base(intr),
+ intr->src[1].ssa);
+
+ replace_intrinsic(b, intr, nir_intrinsic_load_shared_ir3, offset, NULL, NULL);
+ break;
+ }
+
+ case nir_intrinsic_load_invocation_id: {
+ b->cursor = nir_before_instr(&intr->instr);
+
+ nir_ssa_def *iid = build_invocation_id(b, state);
+ nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(iid));
+ nir_instr_remove(&intr->instr);
+ break;
+ }
+
+ default:
+ break;
+ }
+ }
+}
+
+static void
+emit_store_outputs(nir_builder *b, struct state *state)
+{
+ /* This also stores the internally added vertex_flags output. */
+
+ for (uint32_t i = 0; i < ARRAY_SIZE(state->output_vars); i++) {
+ if (!state->output_vars[i])
+ continue;
+
+ nir_intrinsic_instr *store =
+ nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output);
+
+ nir_intrinsic_set_base(store, i);
+ store->src[0] = nir_src_for_ssa(nir_load_var(b, state->output_vars[i]));
+ store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
+ store->num_components = store->src[0].ssa->num_components;
+
+ nir_builder_instr_insert(b, &store->instr);
+ }
+}
+
+static void
+clean_up_split_vars(nir_shader *shader, struct exec_list *list)
+{
+ uint32_t components[32] = {};
+
+ nir_foreach_variable(var, list) {
+ uint32_t mask =
+ ((1 << glsl_get_components(glsl_without_array(var->type))) - 1) << var->data.location_frac;
+ components[var->data.driver_location] |= mask;
+ }
+
+ nir_foreach_variable_safe(var, list) {
+ uint32_t mask =
+ ((1 << glsl_get_components(glsl_without_array(var->type))) - 1) << var->data.location_frac;
+ bool subset =
+ (components[var->data.driver_location] | mask) != mask;
+ if (subset)
+ exec_node_remove(&var->node);
+ }
+}
+
+void
+ir3_nir_lower_gs(nir_shader *shader, struct ir3_shader *s)
+{
+ struct state state = { };
+
+ if (shader_debug_enabled(shader->info.stage)) {
+ fprintf(stderr, "NIR (before gs lowering):\n");
+ nir_print_shader(shader, stderr);
+ }
+
+ clean_up_split_vars(shader, &shader->inputs);
+ clean_up_split_vars(shader, &shader->outputs);
+
+ build_primitive_map(shader, &state.map, &shader->inputs);
+
+ uint32_t loc = 0;
+ nir_foreach_variable(var, &shader->outputs) {
+ uint32_t end = var->data.driver_location + glsl_count_attribute_slots(var->type, false);
+ loc = MAX2(loc, end);
+ }
+
+ state.vertex_flags_out = nir_variable_create(shader, nir_var_shader_out,
+ glsl_uint_type(), "vertex_flags");
+ state.vertex_flags_out->data.driver_location = loc;
+ state.vertex_flags_out->data.location = VARYING_SLOT_GS_VERTEX_FLAGS_IR3;
+
+ nir_function_impl *impl = nir_shader_get_entrypoint(shader);
+ assert(impl);
+
+ nir_builder b;
+ nir_builder_init(&b, impl);
+ b.cursor = nir_before_cf_list(&impl->body);
+
+ state.header = nir_load_gs_header_ir3(&b);
+
+ nir_foreach_variable(var, &shader->outputs) {
+ state.output_vars[var->data.driver_location] =
+ nir_local_variable_create(impl, var->type,
+ ralloc_asprintf(var, "%s:gs-temp", var->name));
+ }
+
+ state.vertex_count_var =
+ nir_local_variable_create(impl, glsl_uint_type(), "vertex_count");
+ state.emitted_vertex_var =
+ nir_local_variable_create(impl, glsl_uint_type(), "emitted_vertex");
+ state.vertex_flags_var =
+ nir_local_variable_create(impl, glsl_uint_type(), "vertex_flags");
+ state.vertex_flags_out = state.output_vars[state.vertex_flags_out->data.driver_location];
+
+ /* initialize to 0 */
+ b.cursor = nir_before_cf_list(&impl->body);
+ nir_store_var(&b, state.vertex_count_var, nir_imm_int(&b, 0), 0x1);
+ nir_store_var(&b, state.emitted_vertex_var, nir_imm_int(&b, 0), 0x1);
+ nir_store_var(&b, state.vertex_flags_var, nir_imm_int(&b, 4), 0x1);
+
+ nir_foreach_block_safe(block, impl)
+ lower_gs_block(block, &b, &state);
+
+ set_foreach(impl->end_block->predecessors, block_entry) {
+ struct nir_block *block = (void *)block_entry->key;
+ b.cursor = nir_after_block_before_jump(block);
+
+ nir_intrinsic_instr *discard_if =
+ nir_intrinsic_instr_create(b.shader, nir_intrinsic_discard_if);
+
+ nir_ssa_def *cond = nir_ieq(&b, nir_load_var(&b, state.emitted_vertex_var), nir_imm_int(&b, 0));
+
+ discard_if->src[0] = nir_src_for_ssa(cond);
+
+ nir_builder_instr_insert(&b, &discard_if->instr);
+
+ emit_store_outputs(&b, &state);
+ }
+
+ nir_metadata_preserve(impl, 0);
+
+ if (shader_debug_enabled(shader->info.stage)) {
+ fprintf(stderr, "NIR (after gs lowering):\n");
+ nir_print_shader(shader, stderr);
+ }
+}
diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c
index aae7baeb2e0..10980bd38be 100644
--- a/src/freedreno/ir3/ir3_shader.c
+++ b/src/freedreno/ir3/ir3_shader.c
@@ -350,7 +350,14 @@ output_name(struct ir3_shader_variant *so, int i)
if (so->type == MESA_SHADER_FRAGMENT) {
return gl_frag_result_name(so->outputs[i].slot);
} else {
- return gl_varying_slot_name(so->outputs[i].slot);
+ switch (so->outputs[i].slot) {
+ case VARYING_SLOT_GS_HEADER_IR3:
+ return "GS_HEADER";
+ case VARYING_SLOT_GS_VERTEX_FLAGS_IR3:
+ return "GS_VERTEX_FLAGS";
+ default:
+ return gl_varying_slot_name(so->outputs[i].slot);
+ }
}
}
diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h
index fa6d5b7d387..ce258865658 100644
--- a/src/freedreno/ir3/ir3_shader.h
+++ b/src/freedreno/ir3/ir3_shader.h
@@ -554,6 +554,11 @@ struct ir3_shader {
struct ir3_shader_variant *variants;
mtx_t variants_lock;
+
+ uint32_t output_size; /* Size in dwords of all outputs for VS, size of entire patch for HS. */
+
+ /* Map from driver_location to byte offset in per-primitive storage */
+ unsigned output_loc[32];
};
void * ir3_shader_assemble(struct ir3_shader_variant *v, uint32_t gpu_id);
@@ -693,6 +698,10 @@ ir3_find_output_regid(const struct ir3_shader_variant *so, unsigned slot)
return regid(63, 0);
}
+#define VARYING_SLOT_GS_HEADER_IR3 (VARYING_SLOT_MAX + 0)
+#define VARYING_SLOT_GS_VERTEX_FLAGS_IR3 (VARYING_SLOT_MAX + 1)
+
+
static inline uint32_t
ir3_find_sysval_regid(const struct ir3_shader_variant *so, unsigned slot)
{
diff --git a/src/freedreno/ir3/meson.build b/src/freedreno/ir3/meson.build
index be03ffb88c3..6e1434057e7 100644
--- a/src/freedreno/ir3/meson.build
+++ b/src/freedreno/ir3/meson.build
@@ -66,6 +66,7 @@ libfreedreno_ir3_files = files(
'ir3_nir_lower_load_barycentric_at_sample.c',
'ir3_nir_lower_load_barycentric_at_offset.c',
'ir3_nir_lower_io_offsets.c',
+ 'ir3_nir_lower_tess.c',
'ir3_nir_lower_tg4_to_tex.c',
'ir3_nir_move_varying_inputs.c',
'ir3_print.c',