diff options
author | Timothy Arceri <[email protected]> | 2018-12-06 16:00:40 +1100 |
---|---|---|
committer | Timothy Arceri <[email protected]> | 2019-03-12 00:52:30 +0000 |
commit | 3235a942c16b61849bc16a710c53f0a7a5566f0d (patch) | |
tree | 6bc2519f7239cc39c0ad1f2d5f4c0f0b0ad82409 /src/compiler/nir/nir_loop_analyze.c | |
parent | 67c3478482f55a0e86397c0f1af65ccef84f089a (diff) |
nir: find induction/limit vars in iand instructions
This will be used to help find the trip count of loops that look
like the following:
while (a < x && i < 8) {
...
i++;
}
Where the NIR will end up looking something like this:
vec1 32 ssa_1 = load_const (0x00000004 /* 0.000000 */)
loop {
...
vec1 1 ssa_12 = ilt ssa_225, ssa_11
vec1 1 ssa_17 = ilt ssa_226, ssa_1
vec1 1 ssa_18 = iand ssa_12, ssa_17
vec1 1 ssa_19 = inot ssa_18
if ssa_19 {
...
break
} else {
...
}
}
On RADV this unrolls a bunch of loops in F1-2017 shaders.
Totals from affected shaders:
SGPRS: 4112 -> 4136 (0.58 %)
VGPRS: 4132 -> 4052 (-1.94 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 515444 -> 587720 (14.02 %) bytes
LDS: 2 -> 2 (0.00 %) blocks
Max Waves: 194 -> 196 (1.03 %)
Wait states: 0 -> 0 (0.00 %)
It also unrolls a couple of loops in shader-db on radeonsi.
Totals from affected shaders:
SGPRS: 128 -> 128 (0.00 %)
VGPRS: 64 -> 64 (0.00 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 6880 -> 9504 (38.14 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 16 -> 16 (0.00 %)
Wait states: 0 -> 0 (0.00 %)
Reviewed-by: Ian Romanick <[email protected]>
Diffstat (limited to 'src/compiler/nir/nir_loop_analyze.c')
-rw-r--r-- | src/compiler/nir/nir_loop_analyze.c | 99 |
1 files changed, 91 insertions, 8 deletions
diff --git a/src/compiler/nir/nir_loop_analyze.c b/src/compiler/nir/nir_loop_analyze.c index 6d5de558595..bc116f4d1d7 100644 --- a/src/compiler/nir/nir_loop_analyze.c +++ b/src/compiler/nir/nir_loop_analyze.c @@ -792,6 +792,68 @@ get_induction_and_limit_vars(nir_alu_instr *alu, nir_loop_variable **ind, return limit_rhs; } +static void +try_find_trip_count_vars_in_iand(nir_alu_instr **alu, + nir_loop_variable **ind, + nir_loop_variable **limit, + bool *limit_rhs, + loop_info_state *state) +{ + assert((*alu)->op == nir_op_ieq || (*alu)->op == nir_op_inot); + + nir_ssa_def *iand_def = (*alu)->src[0].src.ssa; + + if ((*alu)->op == nir_op_ieq) { + nir_ssa_def *zero_def = (*alu)->src[1].src.ssa; + + if (iand_def->parent_instr->type != nir_instr_type_alu || + zero_def->parent_instr->type != nir_instr_type_load_const) { + + /* Maybe we had it the wrong way, flip things around */ + iand_def = (*alu)->src[1].src.ssa; + zero_def = (*alu)->src[0].src.ssa; + + /* If we still didn't find what we need then return */ + if (zero_def->parent_instr->type != nir_instr_type_load_const) + return; + } + + /* If the loop is not breaking on (x && y) == 0 then return */ + nir_const_value zero = + nir_instr_as_load_const(zero_def->parent_instr)->value; + if (zero.i32[0] != 0) + return; + } + + if (iand_def->parent_instr->type != nir_instr_type_alu) + return; + + nir_alu_instr *iand = nir_instr_as_alu(iand_def->parent_instr); + if (iand->op != nir_op_iand) + return; + + /* Check if iand src is a terminator condition and try get induction var + * and trip limit var. + */ + nir_ssa_def *src = iand->src[0].src.ssa; + if (src->parent_instr->type == nir_instr_type_alu) { + *alu = nir_instr_as_alu(src->parent_instr); + if (is_supported_terminator_condition(*alu)) + *limit_rhs = get_induction_and_limit_vars(*alu, ind, limit, state); + } + + /* Try the other iand src if needed */ + if (*ind == NULL || *ind && (*ind)->type != basic_induction || + !is_var_constant(*limit)) { + src = iand->src[1].src.ssa; + if (src->parent_instr->type == nir_instr_type_alu) { + *alu = nir_instr_as_alu(src->parent_instr); + if (is_supported_terminator_condition(*alu)) + *limit_rhs = get_induction_and_limit_vars(*alu, ind, limit, state); + } + } +} + /* Run through each of the terminators of the loop and try to infer a possible * trip-count. We need to check them all, and set the lowest trip-count as the * trip-count of our loop. If one of the terminators has an undecidable @@ -821,16 +883,35 @@ find_trip_count(loop_info_state *state) nir_alu_instr *alu = nir_instr_as_alu(terminator->conditional_instr); nir_op alu_op = alu->op; - if (!is_supported_terminator_condition(alu)) { - trip_count_known = false; - continue; + bool limit_rhs; + nir_loop_variable *basic_ind = NULL; + nir_loop_variable *limit; + if (alu->op == nir_op_inot || alu->op == nir_op_ieq) { + nir_alu_instr *new_alu = alu; + try_find_trip_count_vars_in_iand(&new_alu, &basic_ind, &limit, + &limit_rhs, state); + + /* The loop is exiting on (x && y) == 0 so we need to get the + * inverse of x or y (i.e. which ever contained the induction var) in + * order to compute the trip count. + */ + if (basic_ind && basic_ind->type == basic_induction) { + alu = new_alu; + alu_op = inverse_comparison(alu); + trip_count_known = false; + terminator->exact_trip_count_unknown = true; + } } - nir_loop_variable *basic_ind; - nir_loop_variable *limit; - bool limit_rhs = get_induction_and_limit_vars(alu, &basic_ind, &limit, - state); - terminator->induction_rhs = !limit_rhs; + if (!basic_ind) { + if (!is_supported_terminator_condition(alu)) { + trip_count_known = false; + continue; + } + + limit_rhs = get_induction_and_limit_vars(alu, &basic_ind, &limit, + state); + } /* The comparison has to have a basic induction variable for us to be * able to find trip counts. @@ -840,6 +921,8 @@ find_trip_count(loop_info_state *state) continue; } + terminator->induction_rhs = !limit_rhs; + /* Attempt to find a constant limit for the loop */ nir_const_value limit_val; if (is_var_constant(limit)) { |