diff options
-rw-r--r-- | src/compiler/nir/nir.h | 2 | ||||
-rw-r--r-- | src/compiler/nir/nir_lower_alu_to_scalar.c | 42 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_nir.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_program.c | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_nir.c | 2 |
5 files changed, 30 insertions, 20 deletions
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 6f85bdb6bb6..3b3d6ae8393 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2410,7 +2410,7 @@ bool nir_remove_dead_variables(nir_shader *shader, nir_variable_mode modes); void nir_move_vec_src_uses_to_dest(nir_shader *shader); bool nir_lower_vec_to_movs(nir_shader *shader); -void nir_lower_alu_to_scalar(nir_shader *shader); +bool nir_lower_alu_to_scalar(nir_shader *shader); void nir_lower_load_const_to_scalar(nir_shader *shader); void nir_lower_phis_to_scalar(nir_shader *shader); diff --git a/src/compiler/nir/nir_lower_alu_to_scalar.c b/src/compiler/nir/nir_lower_alu_to_scalar.c index a84fbdfd32b..fa18debd850 100644 --- a/src/compiler/nir/nir_lower_alu_to_scalar.c +++ b/src/compiler/nir/nir_lower_alu_to_scalar.c @@ -73,7 +73,7 @@ lower_reduction(nir_alu_instr *instr, nir_op chan_op, nir_op merge_op, nir_instr_remove(&instr->instr); } -static void +static bool lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b) { unsigned num_src = nir_op_infos[instr->op].num_inputs; @@ -90,7 +90,7 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b) case name##3: \ case name##4: \ lower_reduction(instr, chan, merge, b); \ - return; + return true; switch (instr->op) { case nir_op_vec4: @@ -99,11 +99,11 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b) /* We don't need to scalarize these ops, they're the ones generated to * group up outputs into a value that can be SSAed. */ - return; + return false; case nir_op_pack_half_2x16: if (!b->shader->options->lower_pack_half_2x16) - return; + return false; nir_ssa_def *val = nir_pack_half_2x16_split(b, nir_channel(b, instr->src[0].src.ssa, @@ -113,7 +113,7 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b) nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(val)); nir_instr_remove(&instr->instr); - return; + return true; case nir_op_unpack_unorm_4x8: case nir_op_unpack_snorm_4x8: @@ -122,11 +122,11 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b) /* There is no scalar version of these ops, unless we were to break it * down to bitshifts and math (which is definitely not intended). */ - return; + return false; case nir_op_unpack_half_2x16: { if (!b->shader->options->lower_unpack_half_2x16) - return; + return false; nir_ssa_def *comps[2]; comps[0] = nir_unpack_half_2x16_split_x(b, instr->src[0].src.ssa); @@ -135,7 +135,7 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b) nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(vec)); nir_instr_remove(&instr->instr); - return; + return true; } case nir_op_pack_uvec2_to_uint: { @@ -185,11 +185,11 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b) nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(val)); nir_instr_remove(&instr->instr); - return; + return true; } case nir_op_unpack_double_2x32: - return; + return false; LOWER_REDUCTION(nir_op_fdot, nir_op_fmul, nir_op_fadd); LOWER_REDUCTION(nir_op_ball_fequal, nir_op_feq, nir_op_iand); @@ -204,7 +204,7 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b) } if (instr->dest.dest.ssa.num_components == 1) - return; + return false; unsigned num_components = instr->dest.dest.ssa.num_components; nir_ssa_def *comps[] = { NULL, NULL, NULL, NULL }; @@ -240,30 +240,40 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b) nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(vec)); nir_instr_remove(&instr->instr); + return true; } -static void +static bool nir_lower_alu_to_scalar_impl(nir_function_impl *impl) { nir_builder builder; nir_builder_init(&builder, impl); + bool progress = false; nir_foreach_block(block, impl) { nir_foreach_instr_safe(instr, block) { - if (instr->type == nir_instr_type_alu) - lower_alu_instr_scalar(nir_instr_as_alu(instr), &builder); + if (instr->type == nir_instr_type_alu) { + progress = lower_alu_instr_scalar(nir_instr_as_alu(instr), + &builder) || progress; + } } } nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance); + + return progress; } -void +bool nir_lower_alu_to_scalar(nir_shader *shader) { + bool progress = false; + nir_foreach_function(function, shader) { if (function->impl) - nir_lower_alu_to_scalar_impl(function->impl); + progress = nir_lower_alu_to_scalar_impl(function->impl) || progress; } + + return progress; } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_nir.c index 023da3be292..25262224f98 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_nir.c @@ -90,7 +90,7 @@ ir3_optimize_loop(nir_shader *s) progress = false; OPT_V(s, nir_lower_vars_to_ssa); - OPT_V(s, nir_lower_alu_to_scalar); + progress |= OPT(s, nir_lower_alu_to_scalar); OPT_V(s, nir_lower_phis_to_scalar); progress |= OPT(s, nir_copy_prop); diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 52b938d066e..24e4699b92e 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -1423,7 +1423,7 @@ vc4_optimize_nir(struct nir_shader *s) progress = false; NIR_PASS_V(s, nir_lower_vars_to_ssa); - NIR_PASS_V(s, nir_lower_alu_to_scalar); + NIR_PASS(progress, s, nir_lower_alu_to_scalar); NIR_PASS_V(s, nir_lower_phis_to_scalar); NIR_PASS(progress, s, nir_copy_prop); diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index e8dafaebc91..27be201db6f 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -375,7 +375,7 @@ nir_optimize(nir_shader *nir, bool is_scalar) OPT_V(nir_lower_vars_to_ssa); if (is_scalar) { - OPT_V(nir_lower_alu_to_scalar); + OPT(nir_lower_alu_to_scalar); } OPT(nir_copy_prop); |