diff options
author | Timothy Arceri <[email protected]> | 2018-12-18 12:05:31 +1100 |
---|---|---|
committer | Timothy Arceri <[email protected]> | 2019-01-02 10:01:24 +1100 |
commit | 2832bc972bfb512b314a3180a1258be984807760 (patch) | |
tree | be44f4ba55e5985ecc0c652edd065131cc51917a | |
parent | 2817a4ec0bebfda5fe1a4a1d72a8902b9a39e4dd (diff) |
ac/nir_to_llvm: add ac_are_tessfactors_def_in_all_invocs()
The following patch will use this with the radeonsi NIR backend
but I've added it to ac so we can use it with RADV in future.
This is a NIR implementation of the tgsi function
tgsi_scan_tess_ctrl().
Tested-by: Dieter Nützel <[email protected]>
Reviewed-by: Marek Olšák <[email protected]>
-rw-r--r-- | src/amd/common/ac_nir_to_llvm.c | 161 | ||||
-rw-r--r-- | src/amd/common/ac_nir_to_llvm.h | 2 |
2 files changed, 163 insertions, 0 deletions
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 6d97212b805..84dbe174575 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -4067,3 +4067,164 @@ ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class chip_class) nir_lower_indirect_derefs(nir, indirect_mask); } + +static unsigned +get_inst_tessfactor_writemask(nir_intrinsic_instr *intrin) +{ + if (intrin->intrinsic != nir_intrinsic_store_deref) + return 0; + + nir_variable *var = + nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[0])); + + if (var->data.mode != nir_var_shader_out) + return 0; + + unsigned writemask = 0; + const int location = var->data.location; + unsigned first_component = var->data.location_frac; + unsigned num_comps = intrin->dest.ssa.num_components; + + if (location == VARYING_SLOT_TESS_LEVEL_INNER) + writemask = ((1 << num_comps + 1) - 1) << first_component; + else if (location == VARYING_SLOT_TESS_LEVEL_OUTER) + writemask = (((1 << num_comps + 1) - 1) << first_component) << 4; + + return writemask; +} + +static void +scan_tess_ctrl(nir_cf_node *cf_node, unsigned *upper_block_tf_writemask, + unsigned *cond_block_tf_writemask, + bool *tessfactors_are_def_in_all_invocs, bool is_nested_cf) +{ + switch (cf_node->type) { + case nir_cf_node_block: { + nir_block *block = nir_cf_node_as_block(cf_node); + nir_foreach_instr(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic == nir_intrinsic_barrier) { + + /* If we find a barrier in nested control flow put this in the + * too hard basket. In GLSL this is not possible but it is in + * SPIR-V. + */ + if (is_nested_cf) { + *tessfactors_are_def_in_all_invocs = false; + return; + } + + /* The following case must be prevented: + * gl_TessLevelInner = ...; + * barrier(); + * if (gl_InvocationID == 1) + * gl_TessLevelInner = ...; + * + * If you consider disjoint code segments separated by barriers, each + * such segment that writes tess factor channels should write the same + * channels in all codepaths within that segment. + */ + if (upper_block_tf_writemask || cond_block_tf_writemask) { + /* Accumulate the result: */ + *tessfactors_are_def_in_all_invocs &= + !(*cond_block_tf_writemask & ~(*upper_block_tf_writemask)); + + /* Analyze the next code segment from scratch. */ + *upper_block_tf_writemask = 0; + *cond_block_tf_writemask = 0; + } + } else + *upper_block_tf_writemask |= get_inst_tessfactor_writemask(intrin); + } + + break; + } + case nir_cf_node_if: { + unsigned then_tessfactor_writemask = 0; + unsigned else_tessfactor_writemask = 0; + + nir_if *if_stmt = nir_cf_node_as_if(cf_node); + foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->then_list) { + scan_tess_ctrl(nested_node, &then_tessfactor_writemask, + cond_block_tf_writemask, + tessfactors_are_def_in_all_invocs, true); + } + + foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->else_list) { + scan_tess_ctrl(nested_node, &else_tessfactor_writemask, + cond_block_tf_writemask, + tessfactors_are_def_in_all_invocs, true); + } + + if (then_tessfactor_writemask || else_tessfactor_writemask) { + /* If both statements write the same tess factor channels, + * we can say that the upper block writes them too. + */ + *upper_block_tf_writemask |= then_tessfactor_writemask & + else_tessfactor_writemask; + *cond_block_tf_writemask |= then_tessfactor_writemask | + else_tessfactor_writemask; + } + + break; + } + case nir_cf_node_loop: { + nir_loop *loop = nir_cf_node_as_loop(cf_node); + foreach_list_typed(nir_cf_node, nested_node, node, &loop->body) { + scan_tess_ctrl(nested_node, cond_block_tf_writemask, + cond_block_tf_writemask, + tessfactors_are_def_in_all_invocs, true); + } + + break; + } + default: + unreachable("unknown cf node type"); + } +} + +bool +ac_are_tessfactors_def_in_all_invocs(const struct nir_shader *nir) +{ + assert(nir->info.stage == MESA_SHADER_TESS_CTRL); + + /* The pass works as follows: + * If all codepaths write tess factors, we can say that all + * invocations define tess factors. + * + * Each tess factor channel is tracked separately. + */ + unsigned main_block_tf_writemask = 0; /* if main block writes tess factors */ + unsigned cond_block_tf_writemask = 0; /* if cond block writes tess factors */ + + /* Initial value = true. Here the pass will accumulate results from + * multiple segments surrounded by barriers. If tess factors aren't + * written at all, it's a shader bug and we don't care if this will be + * true. + */ + bool tessfactors_are_def_in_all_invocs = true; + + nir_foreach_function(function, nir) { + if (function->impl) { + foreach_list_typed(nir_cf_node, node, node, &function->impl->body) { + scan_tess_ctrl(node, &main_block_tf_writemask, + &cond_block_tf_writemask, + &tessfactors_are_def_in_all_invocs, + false); + } + } + } + + /* Accumulate the result for the last code segment separated by a + * barrier. + */ + if (main_block_tf_writemask || cond_block_tf_writemask) { + tessfactors_are_def_in_all_invocs &= + !(cond_block_tf_writemask & ~main_block_tf_writemask); + } + + return tessfactors_are_def_in_all_invocs; +} diff --git a/src/amd/common/ac_nir_to_llvm.h b/src/amd/common/ac_nir_to_llvm.h index 778bd7544fc..4782d9fc9d6 100644 --- a/src/amd/common/ac_nir_to_llvm.h +++ b/src/amd/common/ac_nir_to_llvm.h @@ -47,6 +47,8 @@ static inline unsigned ac_llvm_reg_index_soa(unsigned index, unsigned chan) void ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class); +bool ac_are_tessfactors_def_in_all_invocs(const struct nir_shader *nir); + void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi, struct nir_shader *nir); |