diff options
-rw-r--r-- | src/compiler/nir/nir.h | 4 | ||||
-rw-r--r-- | src/compiler/nir/nir_intrinsics.py | 8 | ||||
-rw-r--r-- | src/compiler/nir/nir_print.c | 1 | ||||
-rw-r--r-- | src/compiler/shader_enums.c | 1 | ||||
-rw-r--r-- | src/compiler/shader_enums.h | 7 | ||||
-rw-r--r-- | src/freedreno/Makefile.sources | 1 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_context.h | 4 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_nir.c | 16 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_nir.h | 3 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_nir_lower_tess.c | 455 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_shader.c | 9 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_shader.h | 9 | ||||
-rw-r--r-- | src/freedreno/ir3/meson.build | 1 |
13 files changed, 517 insertions, 2 deletions
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 9b94c9edf23..5c98aeefc66 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -1524,6 +1524,9 @@ typedef enum { NIR_INTRINSIC_SRC_ACCESS, NIR_INTRINSIC_DST_ACCESS, + /* Driver location for nir_load_patch_location_ir3 */ + NIR_INTRINSIC_DRIVER_LOCATION, + NIR_INTRINSIC_NUM_INDEX_FLAGS, } nir_intrinsic_index_flag; @@ -1632,6 +1635,7 @@ INTRINSIC_IDX_ACCESSORS(align_offset, ALIGN_OFFSET, unsigned) INTRINSIC_IDX_ACCESSORS(desc_type, DESC_TYPE, unsigned) INTRINSIC_IDX_ACCESSORS(type, TYPE, nir_alu_type) INTRINSIC_IDX_ACCESSORS(swizzle_mask, SWIZZLE_MASK, unsigned) +INTRINSIC_IDX_ACCESSORS(driver_location, DRIVER_LOCATION, unsigned) static inline void nir_intrinsic_set_align(nir_intrinsic_instr *intrin, diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index ae62a85d39b..637576c092a 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -124,6 +124,8 @@ DESC_TYPE = "NIR_INTRINSIC_DESC_TYPE" TYPE = "NIR_INTRINSIC_TYPE" # The swizzle mask for quad_swizzle_amd & masked_swizzle_amd SWIZZLE_MASK = "NIR_INTRINSIC_SWIZZLE_MASK" +# Driver location of attribute +DRIVER_LOCATION = "NIR_INTRINSIC_DRIVER_LOCATION" # # Possible flags: @@ -771,6 +773,12 @@ intrinsic("ssbo_atomic_xor_ir3", src_comp=[1, 1, 1, 1], dest_comp=1) intrinsic("ssbo_atomic_exchange_ir3", src_comp=[1, 1, 1, 1], dest_comp=1) intrinsic("ssbo_atomic_comp_swap_ir3", src_comp=[1, 1, 1, 1, 1], dest_comp=1) +# System values for freedreno geometry shaders. +system_value("vs_primitive_stride_ir3", 1) +system_value("vs_vertex_stride_ir3", 1) +system_value("gs_header_ir3", 1) +system_value("primitive_location_ir3", 1, indices=[DRIVER_LOCATION]) + # IR3-specific load/store intrinsics. These access a buffer used to pass data # between geometry stages - perhaps it's explicit access to the vertex cache. diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c index 48844b7ed79..496f9279676 100644 --- a/src/compiler/nir/nir_print.c +++ b/src/compiler/nir/nir_print.c @@ -800,6 +800,7 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state) [NIR_INTRINSIC_DESC_TYPE] = "desc_type", [NIR_INTRINSIC_TYPE] = "type", [NIR_INTRINSIC_SWIZZLE_MASK] = "swizzle_mask", + [NIR_INTRINSIC_DRIVER_LOCATION] = "driver_location", }; for (unsigned idx = 1; idx < NIR_INTRINSIC_NUM_INDEX_FLAGS; idx++) { if (!info->index_map[idx]) diff --git a/src/compiler/shader_enums.c b/src/compiler/shader_enums.c index 71796687afa..afaad50adf6 100644 --- a/src/compiler/shader_enums.c +++ b/src/compiler/shader_enums.c @@ -254,6 +254,7 @@ gl_system_value_name(gl_system_value sysval) ENUM(SYSTEM_VALUE_BARYCENTRIC_SAMPLE), ENUM(SYSTEM_VALUE_BARYCENTRIC_CENTROID), ENUM(SYSTEM_VALUE_BARYCENTRIC_SIZE), + ENUM(SYSTEM_VALUE_GS_HEADER_IR3), }; STATIC_ASSERT(ARRAY_SIZE(names) == SYSTEM_VALUE_MAX); return NAME(sysval); diff --git a/src/compiler/shader_enums.h b/src/compiler/shader_enums.h index 0704719c229..f9b2b8c1d73 100644 --- a/src/compiler/shader_enums.h +++ b/src/compiler/shader_enums.h @@ -641,6 +641,13 @@ typedef enum SYSTEM_VALUE_BARYCENTRIC_CENTROID, SYSTEM_VALUE_BARYCENTRIC_SIZE, + /** + * IR3 specific geometry shader system value that packs invocation id, + * thread id and vertex id. Having this as a nir level system value lets + * us do the unpacking in nir. + */ + SYSTEM_VALUE_GS_HEADER_IR3, + SYSTEM_VALUE_MAX /**< Number of values */ } gl_system_value; diff --git a/src/freedreno/Makefile.sources b/src/freedreno/Makefile.sources index cf3ac7bdba4..bb56869e1cc 100644 --- a/src/freedreno/Makefile.sources +++ b/src/freedreno/Makefile.sources @@ -38,6 +38,7 @@ ir3_SOURCES := \ ir3/ir3_nir_lower_load_barycentric_at_sample.c \ ir3/ir3_nir_lower_load_barycentric_at_offset.c \ ir3/ir3_nir_lower_io_offsets.c \ + ir3/ir3_nir_lower_tess.c \ ir3/ir3_nir_lower_tg4_to_tex.c \ ir3/ir3_nir_move_varying_inputs.c \ ir3/ir3_print.c \ diff --git a/src/freedreno/ir3/ir3_context.h b/src/freedreno/ir3/ir3_context.h index b0d3e98d00a..2a1f9071118 100644 --- a/src/freedreno/ir3/ir3_context.h +++ b/src/freedreno/ir3/ir3_context.h @@ -76,6 +76,10 @@ struct ir3_context { /* For fragment shaders: */ struct ir3_instruction *samp_id, *samp_mask_in; + /* For geometry shaders: */ + struct ir3_instruction *primitive_id; + struct ir3_instruction *gs_header; + /* Compute shader inputs: */ struct ir3_instruction *local_invocation_id, *work_group_id; diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 2f95b249c26..103821cd6b3 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -101,7 +101,8 @@ ir3_key_lowers_nir(const struct ir3_shader_key *key) return key->fsaturate_s | key->fsaturate_t | key->fsaturate_r | key->vsaturate_s | key->vsaturate_t | key->vsaturate_r | key->ucp_enables | key->color_two_side | - key->fclamp_color | key->vclamp_color; + key->fclamp_color | key->vclamp_color | + key->has_gs; } #define OPT(nir, pass, ...) ({ \ @@ -186,6 +187,19 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s, .lower_tg4_offsets = true, }; + if (key && key->has_gs) { + switch (shader->type) { + case MESA_SHADER_VERTEX: + NIR_PASS_V(s, ir3_nir_lower_vs_to_explicit_io, shader); + break; + case MESA_SHADER_GEOMETRY: + NIR_PASS_V(s, ir3_nir_lower_gs, shader); + break; + default: + break; + } + } + if (key) { switch (shader->type) { case MESA_SHADER_FRAGMENT: diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h index a9b39e235b5..a602f40858b 100644 --- a/src/freedreno/ir3/ir3_nir.h +++ b/src/freedreno/ir3/ir3_nir.h @@ -41,6 +41,9 @@ bool ir3_nir_lower_load_barycentric_at_sample(nir_shader *shader); bool ir3_nir_lower_load_barycentric_at_offset(nir_shader *shader); bool ir3_nir_move_varying_inputs(nir_shader *shader); +void ir3_nir_lower_vs_to_explicit_io(nir_shader *shader, struct ir3_shader *s); +void ir3_nir_lower_gs(nir_shader *shader, struct ir3_shader *s); + const nir_shader_compiler_options * ir3_get_compiler_options(struct ir3_compiler *compiler); bool ir3_key_lowers_nir(const struct ir3_shader_key *key); void ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s, diff --git a/src/freedreno/ir3/ir3_nir_lower_tess.c b/src/freedreno/ir3/ir3_nir_lower_tess.c new file mode 100644 index 00000000000..b4982503f0d --- /dev/null +++ b/src/freedreno/ir3/ir3_nir_lower_tess.c @@ -0,0 +1,455 @@ +/* + * Copyright © 2019 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ir3_nir.h" +#include "ir3_compiler.h" +#include "compiler/nir/nir_builder.h" + +struct state { + struct primitive_map { + unsigned loc[32]; + unsigned size[32]; + unsigned stride; + } map; + + nir_ssa_def *header; + + nir_variable *vertex_count_var; + nir_variable *emitted_vertex_var; + nir_variable *vertex_flags_var; + nir_variable *vertex_flags_out; + + nir_variable *output_vars[32]; +}; + +static nir_ssa_def * +bitfield_extract(nir_builder *b, nir_ssa_def *v, uint32_t start, uint32_t mask) +{ + return nir_iand(b, nir_ushr(b, v, nir_imm_int(b, start)), + nir_imm_int(b, mask)); +} + +static nir_ssa_def * +build_invocation_id(nir_builder *b, struct state *state) +{ + return bitfield_extract(b, state->header, 11, 31); +} + +static nir_ssa_def * +build_vertex_id(nir_builder *b, struct state *state) +{ + return bitfield_extract(b, state->header, 6, 31); +} + +static nir_ssa_def * +build_local_primitive_id(nir_builder *b, struct state *state) +{ + return bitfield_extract(b, state->header, 0, 63); +} + +static nir_variable * +get_var(struct exec_list *list, int driver_location) +{ + nir_foreach_variable(v, list) { + if (v->data.driver_location == driver_location) { + return v; + } + } + + return NULL; +} + +static nir_ssa_def * +build_local_offset(nir_builder *b, struct state *state, + nir_ssa_def *vertex, uint32_t base, nir_ssa_def *offset) +{ + nir_ssa_def *primitive_stride = nir_load_vs_primitive_stride_ir3(b); + nir_ssa_def *primitive_offset = + nir_imul(b, build_local_primitive_id(b, state), primitive_stride); + nir_ssa_def *attr_offset; + nir_ssa_def *vertex_stride; + + if (b->shader->info.stage == MESA_SHADER_VERTEX) { + vertex_stride = nir_imm_int(b, state->map.stride * 4); + attr_offset = nir_imm_int(b, state->map.loc[base] * 4); + } else if (b->shader->info.stage == MESA_SHADER_GEOMETRY) { + vertex_stride = nir_load_vs_vertex_stride_ir3(b); + attr_offset = nir_load_primitive_location_ir3(b, base); + } else { + unreachable("bad shader stage"); + } + + nir_ssa_def *vertex_offset = nir_imul(b, vertex, vertex_stride); + + return nir_iadd(b, nir_iadd(b, primitive_offset, vertex_offset), + nir_iadd(b, attr_offset, offset)); +} + +static nir_intrinsic_instr * +replace_intrinsic(nir_builder *b, nir_intrinsic_instr *intr, + nir_intrinsic_op op, nir_ssa_def *src0, nir_ssa_def *src1, nir_ssa_def *src2) +{ + nir_intrinsic_instr *new_intr = + nir_intrinsic_instr_create(b->shader, op); + + new_intr->src[0] = nir_src_for_ssa(src0); + if (src1) + new_intr->src[1] = nir_src_for_ssa(src1); + if (src2) + new_intr->src[2] = nir_src_for_ssa(src2); + + new_intr->num_components = intr->num_components; + + if (nir_intrinsic_infos[op].has_dest) + nir_ssa_dest_init(&new_intr->instr, &new_intr->dest, + intr->num_components, 32, NULL); + + nir_builder_instr_insert(b, &new_intr->instr); + + if (nir_intrinsic_infos[op].has_dest) + nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(&new_intr->dest.ssa)); + + nir_instr_remove(&intr->instr); + + return new_intr; +} + +static void +build_primitive_map(nir_shader *shader, struct primitive_map *map, struct exec_list *list) +{ + nir_foreach_variable(var, list) { + switch (var->data.location) { + case VARYING_SLOT_TESS_LEVEL_OUTER: + case VARYING_SLOT_TESS_LEVEL_INNER: + continue; + } + + unsigned size = glsl_count_attribute_slots(var->type, false) * 4; + + assert(var->data.driver_location < ARRAY_SIZE(map->size)); + map->size[var->data.driver_location] = + MAX2(map->size[var->data.driver_location], size); + } + + unsigned loc = 0; + for (uint32_t i = 0; i < ARRAY_SIZE(map->size); i++) { + if (map->size[i] == 0) + continue; + nir_variable *var = get_var(list, i); + map->loc[i] = loc; + loc += map->size[i]; + + if (var->data.patch) + map->size[i] = 0; + else + map->size[i] = map->size[i] / glsl_get_length(var->type); + } + + map->stride = loc; +} + +static void +lower_vs_block(nir_block *block, nir_builder *b, struct state *state) +{ + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + + switch (intr->intrinsic) { + case nir_intrinsic_store_output: { + // src[] = { value, offset }. + + b->cursor = nir_before_instr(&intr->instr); + + nir_ssa_def *vertex_id = build_vertex_id(b, state); + nir_ssa_def *offset = build_local_offset(b, state, vertex_id, nir_intrinsic_base(intr), + intr->src[1].ssa); + nir_intrinsic_instr *store = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_shared_ir3); + + nir_intrinsic_set_write_mask(store, MASK(intr->num_components)); + store->src[0] = nir_src_for_ssa(intr->src[0].ssa); + store->src[1] = nir_src_for_ssa(offset); + + store->num_components = intr->num_components; + + nir_builder_instr_insert(b, &store->instr); + break; + } + + default: + break; + } + } +} + +static nir_ssa_def * +local_thread_id(nir_builder *b) +{ + return bitfield_extract(b, nir_load_gs_header_ir3(b), 16, 1023); +} + +void +ir3_nir_lower_vs_to_explicit_io(nir_shader *shader, struct ir3_shader *s) +{ + struct state state = { }; + + build_primitive_map(shader, &state.map, &shader->outputs); + memcpy(s->output_loc, state.map.loc, sizeof(s->output_loc)); + + nir_function_impl *impl = nir_shader_get_entrypoint(shader); + assert(impl); + + nir_builder b; + nir_builder_init(&b, impl); + b.cursor = nir_before_cf_list(&impl->body); + + state.header = nir_load_gs_header_ir3(&b); + + nir_foreach_block_safe(block, impl) + lower_vs_block(block, &b, &state); + + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + + s->output_size = state.map.stride; +} + +static void +lower_gs_block(nir_block *block, nir_builder *b, struct state *state) +{ + nir_intrinsic_instr *outputs[32] = {}; + + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + + switch (intr->intrinsic) { + case nir_intrinsic_store_output: { + // src[] = { value, offset }. + + uint32_t loc = nir_intrinsic_base(intr); + outputs[loc] = intr; + break; + } + + case nir_intrinsic_end_primitive: { + b->cursor = nir_before_instr(&intr->instr); + nir_store_var(b, state->vertex_flags_var, nir_imm_int(b, 4), 0x1); + nir_instr_remove(&intr->instr); + break; + } + + case nir_intrinsic_emit_vertex: { + + /* Load the vertex count */ + b->cursor = nir_before_instr(&intr->instr); + nir_ssa_def *count = nir_load_var(b, state->vertex_count_var); + + nir_push_if(b, nir_ieq(b, count, local_thread_id(b))); + + for (uint32_t i = 0; i < ARRAY_SIZE(outputs); i++) { + if (outputs[i]) { + nir_store_var(b, state->output_vars[i], + outputs[i]->src[0].ssa, + (1 << outputs[i]->num_components) - 1); + + nir_instr_remove(&outputs[i]->instr); + } + outputs[i] = NULL; + } + + nir_instr_remove(&intr->instr); + + nir_store_var(b, state->emitted_vertex_var, + nir_iadd(b, nir_load_var(b, state->emitted_vertex_var), nir_imm_int(b, 1)), 0x1); + + nir_store_var(b, state->vertex_flags_out, + nir_load_var(b, state->vertex_flags_var), 0x1); + + nir_pop_if(b, NULL); + + /* Increment the vertex count by 1 */ + nir_store_var(b, state->vertex_count_var, + nir_iadd(b, count, nir_imm_int(b, 1)), 0x1); /* .x */ + nir_store_var(b, state->vertex_flags_var, nir_imm_int(b, 0), 0x1); + + break; + } + + case nir_intrinsic_load_per_vertex_input: { + // src[] = { vertex, offset }. + + b->cursor = nir_before_instr(&intr->instr); + + nir_ssa_def *offset = build_local_offset(b, state, + intr->src[0].ssa, // this is typically gl_InvocationID + nir_intrinsic_base(intr), + intr->src[1].ssa); + + replace_intrinsic(b, intr, nir_intrinsic_load_shared_ir3, offset, NULL, NULL); + break; + } + + case nir_intrinsic_load_invocation_id: { + b->cursor = nir_before_instr(&intr->instr); + + nir_ssa_def *iid = build_invocation_id(b, state); + nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(iid)); + nir_instr_remove(&intr->instr); + break; + } + + default: + break; + } + } +} + +static void +emit_store_outputs(nir_builder *b, struct state *state) +{ + /* This also stores the internally added vertex_flags output. */ + + for (uint32_t i = 0; i < ARRAY_SIZE(state->output_vars); i++) { + if (!state->output_vars[i]) + continue; + + nir_intrinsic_instr *store = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output); + + nir_intrinsic_set_base(store, i); + store->src[0] = nir_src_for_ssa(nir_load_var(b, state->output_vars[i])); + store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0)); + store->num_components = store->src[0].ssa->num_components; + + nir_builder_instr_insert(b, &store->instr); + } +} + +static void +clean_up_split_vars(nir_shader *shader, struct exec_list *list) +{ + uint32_t components[32] = {}; + + nir_foreach_variable(var, list) { + uint32_t mask = + ((1 << glsl_get_components(glsl_without_array(var->type))) - 1) << var->data.location_frac; + components[var->data.driver_location] |= mask; + } + + nir_foreach_variable_safe(var, list) { + uint32_t mask = + ((1 << glsl_get_components(glsl_without_array(var->type))) - 1) << var->data.location_frac; + bool subset = + (components[var->data.driver_location] | mask) != mask; + if (subset) + exec_node_remove(&var->node); + } +} + +void +ir3_nir_lower_gs(nir_shader *shader, struct ir3_shader *s) +{ + struct state state = { }; + + if (shader_debug_enabled(shader->info.stage)) { + fprintf(stderr, "NIR (before gs lowering):\n"); + nir_print_shader(shader, stderr); + } + + clean_up_split_vars(shader, &shader->inputs); + clean_up_split_vars(shader, &shader->outputs); + + build_primitive_map(shader, &state.map, &shader->inputs); + + uint32_t loc = 0; + nir_foreach_variable(var, &shader->outputs) { + uint32_t end = var->data.driver_location + glsl_count_attribute_slots(var->type, false); + loc = MAX2(loc, end); + } + + state.vertex_flags_out = nir_variable_create(shader, nir_var_shader_out, + glsl_uint_type(), "vertex_flags"); + state.vertex_flags_out->data.driver_location = loc; + state.vertex_flags_out->data.location = VARYING_SLOT_GS_VERTEX_FLAGS_IR3; + + nir_function_impl *impl = nir_shader_get_entrypoint(shader); + assert(impl); + + nir_builder b; + nir_builder_init(&b, impl); + b.cursor = nir_before_cf_list(&impl->body); + + state.header = nir_load_gs_header_ir3(&b); + + nir_foreach_variable(var, &shader->outputs) { + state.output_vars[var->data.driver_location] = + nir_local_variable_create(impl, var->type, + ralloc_asprintf(var, "%s:gs-temp", var->name)); + } + + state.vertex_count_var = + nir_local_variable_create(impl, glsl_uint_type(), "vertex_count"); + state.emitted_vertex_var = + nir_local_variable_create(impl, glsl_uint_type(), "emitted_vertex"); + state.vertex_flags_var = + nir_local_variable_create(impl, glsl_uint_type(), "vertex_flags"); + state.vertex_flags_out = state.output_vars[state.vertex_flags_out->data.driver_location]; + + /* initialize to 0 */ + b.cursor = nir_before_cf_list(&impl->body); + nir_store_var(&b, state.vertex_count_var, nir_imm_int(&b, 0), 0x1); + nir_store_var(&b, state.emitted_vertex_var, nir_imm_int(&b, 0), 0x1); + nir_store_var(&b, state.vertex_flags_var, nir_imm_int(&b, 4), 0x1); + + nir_foreach_block_safe(block, impl) + lower_gs_block(block, &b, &state); + + set_foreach(impl->end_block->predecessors, block_entry) { + struct nir_block *block = (void *)block_entry->key; + b.cursor = nir_after_block_before_jump(block); + + nir_intrinsic_instr *discard_if = + nir_intrinsic_instr_create(b.shader, nir_intrinsic_discard_if); + + nir_ssa_def *cond = nir_ieq(&b, nir_load_var(&b, state.emitted_vertex_var), nir_imm_int(&b, 0)); + + discard_if->src[0] = nir_src_for_ssa(cond); + + nir_builder_instr_insert(&b, &discard_if->instr); + + emit_store_outputs(&b, &state); + } + + nir_metadata_preserve(impl, 0); + + if (shader_debug_enabled(shader->info.stage)) { + fprintf(stderr, "NIR (after gs lowering):\n"); + nir_print_shader(shader, stderr); + } +} diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c index aae7baeb2e0..10980bd38be 100644 --- a/src/freedreno/ir3/ir3_shader.c +++ b/src/freedreno/ir3/ir3_shader.c @@ -350,7 +350,14 @@ output_name(struct ir3_shader_variant *so, int i) if (so->type == MESA_SHADER_FRAGMENT) { return gl_frag_result_name(so->outputs[i].slot); } else { - return gl_varying_slot_name(so->outputs[i].slot); + switch (so->outputs[i].slot) { + case VARYING_SLOT_GS_HEADER_IR3: + return "GS_HEADER"; + case VARYING_SLOT_GS_VERTEX_FLAGS_IR3: + return "GS_VERTEX_FLAGS"; + default: + return gl_varying_slot_name(so->outputs[i].slot); + } } } diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index fa6d5b7d387..ce258865658 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -554,6 +554,11 @@ struct ir3_shader { struct ir3_shader_variant *variants; mtx_t variants_lock; + + uint32_t output_size; /* Size in dwords of all outputs for VS, size of entire patch for HS. */ + + /* Map from driver_location to byte offset in per-primitive storage */ + unsigned output_loc[32]; }; void * ir3_shader_assemble(struct ir3_shader_variant *v, uint32_t gpu_id); @@ -693,6 +698,10 @@ ir3_find_output_regid(const struct ir3_shader_variant *so, unsigned slot) return regid(63, 0); } +#define VARYING_SLOT_GS_HEADER_IR3 (VARYING_SLOT_MAX + 0) +#define VARYING_SLOT_GS_VERTEX_FLAGS_IR3 (VARYING_SLOT_MAX + 1) + + static inline uint32_t ir3_find_sysval_regid(const struct ir3_shader_variant *so, unsigned slot) { diff --git a/src/freedreno/ir3/meson.build b/src/freedreno/ir3/meson.build index be03ffb88c3..6e1434057e7 100644 --- a/src/freedreno/ir3/meson.build +++ b/src/freedreno/ir3/meson.build @@ -66,6 +66,7 @@ libfreedreno_ir3_files = files( 'ir3_nir_lower_load_barycentric_at_sample.c', 'ir3_nir_lower_load_barycentric_at_offset.c', 'ir3_nir_lower_io_offsets.c', + 'ir3_nir_lower_tess.c', 'ir3_nir_lower_tg4_to_tex.c', 'ir3_nir_move_varying_inputs.c', 'ir3_print.c', |