diff options
-rw-r--r-- | src/amd/vulkan/radv_shader.c | 2 | ||||
-rw-r--r-- | src/broadcom/compiler/nir_to_vir.c | 2 | ||||
-rw-r--r-- | src/compiler/nir/nir.h | 3 | ||||
-rw-r--r-- | src/compiler/nir/nir_opt_peephole_select.c | 37 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_nir.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader_nir.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_program.c | 2 | ||||
-rw-r--r-- | src/intel/compiler/brw_nir.c | 13 | ||||
-rw-r--r-- | src/mesa/state_tracker/st_glsl_to_nir.cpp | 2 |
9 files changed, 47 insertions, 18 deletions
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 1ce6baebff0..f778e85b8d9 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -159,7 +159,7 @@ radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively, NIR_PASS(progress, shader, nir_opt_if); NIR_PASS(progress, shader, nir_opt_dead_cf); NIR_PASS(progress, shader, nir_opt_cse); - NIR_PASS(progress, shader, nir_opt_peephole_select, 8); + NIR_PASS(progress, shader, nir_opt_peephole_select, 8, true); NIR_PASS(progress, shader, nir_opt_algebraic); NIR_PASS(progress, shader, nir_opt_constant_folding); NIR_PASS(progress, shader, nir_opt_undef); diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index 167f00750b4..9b1066467cc 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -1241,7 +1241,7 @@ v3d_optimize_nir(struct nir_shader *s) NIR_PASS(progress, s, nir_opt_dce); NIR_PASS(progress, s, nir_opt_dead_cf); NIR_PASS(progress, s, nir_opt_cse); - NIR_PASS(progress, s, nir_opt_peephole_select, 8); + NIR_PASS(progress, s, nir_opt_peephole_select, 8, true); NIR_PASS(progress, s, nir_opt_algebraic); NIR_PASS(progress, s, nir_opt_constant_folding); NIR_PASS(progress, s, nir_opt_undef); diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index d99cc6b2d38..2bbfb3c6b15 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -3197,7 +3197,8 @@ bool nir_opt_move_comparisons(nir_shader *shader); bool nir_opt_move_load_ubo(nir_shader *shader); -bool nir_opt_peephole_select(nir_shader *shader, unsigned limit); +bool nir_opt_peephole_select(nir_shader *shader, unsigned limit, + bool indirect_load_ok); bool nir_opt_remove_phis_impl(nir_function_impl *impl); bool nir_opt_remove_phis(nir_shader *shader); diff --git a/src/compiler/nir/nir_opt_peephole_select.c b/src/compiler/nir/nir_opt_peephole_select.c index ad9d0abec03..6808d3eda6c 100644 --- a/src/compiler/nir/nir_opt_peephole_select.c +++ b/src/compiler/nir/nir_opt_peephole_select.c @@ -58,7 +58,8 @@ */ static bool -block_check_for_allowed_instrs(nir_block *block, unsigned *count, bool alu_ok) +block_check_for_allowed_instrs(nir_block *block, unsigned *count, + bool alu_ok, bool indirect_load_ok) { nir_foreach_instr(instr, block) { switch (instr->type) { @@ -66,16 +67,26 @@ block_check_for_allowed_instrs(nir_block *block, unsigned *count, bool alu_ok) nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); switch (intrin->intrinsic) { - case nir_intrinsic_load_deref: - switch (nir_src_as_deref(intrin->src[0])->mode) { + case nir_intrinsic_load_deref: { + nir_deref_instr *const deref = nir_src_as_deref(intrin->src[0]); + + switch (deref->mode) { case nir_var_shader_in: case nir_var_uniform: + /* Don't try to remove flow control around an indirect load + * because that flow control may be trying to avoid invalid + * loads. + */ + if (!indirect_load_ok && nir_deref_instr_has_indirect(deref)) + return false; + break; default: return false; } break; + } case nir_intrinsic_load_uniform: if (!alu_ok) @@ -149,7 +160,7 @@ block_check_for_allowed_instrs(nir_block *block, unsigned *count, bool alu_ok) static bool nir_opt_peephole_select_block(nir_block *block, nir_shader *shader, - unsigned limit) + unsigned limit, bool indirect_load_ok) { if (nir_cf_node_is_first(&block->cf_node)) return false; @@ -169,8 +180,10 @@ nir_opt_peephole_select_block(nir_block *block, nir_shader *shader, /* ... and those blocks must only contain "allowed" instructions. */ unsigned count = 0; - if (!block_check_for_allowed_instrs(then_block, &count, limit != 0) || - !block_check_for_allowed_instrs(else_block, &count, limit != 0)) + if (!block_check_for_allowed_instrs(then_block, &count, limit != 0, + indirect_load_ok) || + !block_check_for_allowed_instrs(else_block, &count, limit != 0, + indirect_load_ok)) return false; if (count > limit) @@ -236,13 +249,15 @@ nir_opt_peephole_select_block(nir_block *block, nir_shader *shader, } static bool -nir_opt_peephole_select_impl(nir_function_impl *impl, unsigned limit) +nir_opt_peephole_select_impl(nir_function_impl *impl, unsigned limit, + bool indirect_load_ok) { nir_shader *shader = impl->function->shader; bool progress = false; nir_foreach_block_safe(block, impl) { - progress |= nir_opt_peephole_select_block(block, shader, limit); + progress |= nir_opt_peephole_select_block(block, shader, limit, + indirect_load_ok); } if (progress) @@ -252,13 +267,15 @@ nir_opt_peephole_select_impl(nir_function_impl *impl, unsigned limit) } bool -nir_opt_peephole_select(nir_shader *shader, unsigned limit) +nir_opt_peephole_select(nir_shader *shader, unsigned limit, + bool indirect_load_ok) { bool progress = false; nir_foreach_function(function, shader) { if (function->impl) - progress |= nir_opt_peephole_select_impl(function->impl, limit); + progress |= nir_opt_peephole_select_impl(function->impl, limit, + indirect_load_ok); } return progress; diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 70c01ee0593..112c0928529 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -97,7 +97,7 @@ ir3_optimize_loop(nir_shader *s) progress |= OPT(s, nir_opt_gcm, true); else if (gcm == 2) progress |= OPT(s, nir_opt_gcm, false); - progress |= OPT(s, nir_opt_peephole_select, 16); + progress |= OPT(s, nir_opt_peephole_select, 16, true); progress |= OPT(s, nir_opt_intrinsics); progress |= OPT(s, nir_opt_algebraic); progress |= OPT(s, nir_opt_constant_folding); diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index 01559866274..89acaab248a 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -841,7 +841,7 @@ si_lower_nir(struct si_shader_selector* sel) NIR_PASS(progress, sel->nir, nir_opt_if); NIR_PASS(progress, sel->nir, nir_opt_dead_cf); NIR_PASS(progress, sel->nir, nir_opt_cse); - NIR_PASS(progress, sel->nir, nir_opt_peephole_select, 8); + NIR_PASS(progress, sel->nir, nir_opt_peephole_select, 8, true); /* Needed for algebraic lowering */ NIR_PASS(progress, sel->nir, nir_opt_algebraic); diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 7053c66723b..48d83061f9a 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -1591,7 +1591,7 @@ vc4_optimize_nir(struct nir_shader *s) NIR_PASS(progress, s, nir_opt_dce); NIR_PASS(progress, s, nir_opt_dead_cf); NIR_PASS(progress, s, nir_opt_cse); - NIR_PASS(progress, s, nir_opt_peephole_select, 8); + NIR_PASS(progress, s, nir_opt_peephole_select, 8, true); NIR_PASS(progress, s, nir_opt_algebraic); NIR_PASS(progress, s, nir_opt_constant_folding); NIR_PASS(progress, s, nir_opt_undef); diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index 594edde5413..e0aa927f2f4 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -568,7 +568,18 @@ brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler, OPT(nir_copy_prop); OPT(nir_opt_dce); OPT(nir_opt_cse); - OPT(nir_opt_peephole_select, 0); + + /* For indirect loads of uniforms (push constants), we assume that array + * indices will nearly always be in bounds and the cost of the load is + * low. Therefore there shouldn't be a performance benefit to avoid it. + * However, in vec4 tessellation shaders, these loads operate by + * actually pulling from memory. + */ + const bool is_vec4_tessellation = !is_scalar && + (nir->info.stage == MESA_SHADER_TESS_CTRL || + nir->info.stage == MESA_SHADER_TESS_EVAL); + OPT(nir_opt_peephole_select, 0, !is_vec4_tessellation); + OPT(nir_opt_intrinsics); OPT(nir_opt_idiv_const, 32); OPT(nir_opt_algebraic); diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp b/src/mesa/state_tracker/st_glsl_to_nir.cpp index 7406e26e2f8..e6d5c86bfbc 100644 --- a/src/mesa/state_tracker/st_glsl_to_nir.cpp +++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp @@ -328,7 +328,7 @@ st_nir_opts(nir_shader *nir, bool scalar) NIR_PASS(progress, nir, nir_opt_if); NIR_PASS(progress, nir, nir_opt_dead_cf); NIR_PASS(progress, nir, nir_opt_cse); - NIR_PASS(progress, nir, nir_opt_peephole_select, 8); + NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true); NIR_PASS(progress, nir, nir_opt_algebraic); NIR_PASS(progress, nir, nir_opt_constant_folding); |