diff options
-rw-r--r-- | src/compiler/nir/nir_opt_loop_unroll.c | 207 |
1 files changed, 199 insertions, 8 deletions
diff --git a/src/compiler/nir/nir_opt_loop_unroll.c b/src/compiler/nir/nir_opt_loop_unroll.c index 0dacf0546ae..c700e6704da 100644 --- a/src/compiler/nir/nir_opt_loop_unroll.c +++ b/src/compiler/nir/nir_opt_loop_unroll.c @@ -553,18 +553,200 @@ wrapper_unroll(nir_loop *loop) } static bool +is_access_out_of_bounds(nir_loop_terminator *term, nir_deref_instr *deref, + unsigned trip_count) +{ + for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) { + if (d->deref_type != nir_deref_type_array) + continue; + + nir_alu_instr *alu = nir_instr_as_alu(term->conditional_instr); + nir_src src = term->induction_rhs ? alu->src[1].src : alu->src[0].src; + if (!nir_srcs_equal(d->arr.index, src)) + continue; + + nir_deref_instr *parent = nir_deref_instr_parent(d); + assert(glsl_type_is_array(parent->type) || + glsl_type_is_matrix(parent->type)); + + /* We have already unrolled the loop and the new one will be imbedded in + * the innermost continue branch. So unless the array is greater than + * the trip count any iteration over the loop will be an out of bounds + * access of the array. + */ + return glsl_get_length(parent->type) <= trip_count; + } + + return false; +} + +/* If we know an array access is going to be out of bounds remove or replace + * the access with an undef. This can later result in the entire loop being + * removed by nir_opt_dead_cf(). + */ +static void +remove_out_of_bounds_induction_use(nir_shader *shader, nir_loop *loop, + nir_loop_terminator *term, + nir_cf_list *lp_header, + nir_cf_list *lp_body, + unsigned trip_count) +{ + if (!loop->info->guessed_trip_count) + return; + + /* Temporarily recreate the original loop so we can alter it */ + nir_cf_reinsert(lp_header, nir_after_block(nir_loop_last_block(loop))); + nir_cf_reinsert(lp_body, nir_after_block(nir_loop_last_block(loop))); + + nir_builder b; + nir_builder_init(&b, nir_cf_node_get_function(&loop->cf_node)); + + nir_foreach_block_in_cf_node(block, &loop->cf_node) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + /* Check for arrays variably-indexed by a loop induction variable. + * If this access is out of bounds remove the instruction or replace + * its use with an undefined instruction. + * If the loop is no longer useful we leave it for the appropriate + * pass to clean it up for us. + */ + if (intrin->intrinsic == nir_intrinsic_load_deref || + intrin->intrinsic == nir_intrinsic_store_deref || + intrin->intrinsic == nir_intrinsic_copy_deref) { + + if (is_access_out_of_bounds(term, nir_src_as_deref(intrin->src[0]), + trip_count)) { + if (intrin->intrinsic == nir_intrinsic_load_deref) { + assert(intrin->src[0].is_ssa); + nir_ssa_def *a_ssa = intrin->src[0].ssa; + nir_ssa_def *undef = + nir_ssa_undef(&b, intrin->num_components, + a_ssa->bit_size); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_src_for_ssa(undef)); + } else { + nir_instr_remove(instr); + continue; + } + } + + if (intrin->intrinsic == nir_intrinsic_copy_deref && + is_access_out_of_bounds(term, nir_src_as_deref(intrin->src[1]), + trip_count)) { + assert(intrin->src[1].is_ssa); + nir_ssa_def *a_ssa = intrin->src[1].ssa; + nir_ssa_def *undef = + nir_ssa_undef(&b, intrin->num_components, a_ssa->bit_size); + + /* Replace the copy with a store of the undefined value */ + b.cursor = nir_before_instr(instr); + nir_store_deref(&b, nir_src_as_deref(intrin->src[0]), undef, ~0); + nir_instr_remove(instr); + } + } + } + } + + /* Now that we are done extract the loop header and body again */ + nir_cf_extract(lp_header, nir_before_block(nir_loop_first_block(loop)), + nir_before_cf_node(&term->nif->cf_node)); + nir_cf_extract(lp_body, nir_before_block(nir_loop_first_block(loop)), + nir_after_block(nir_loop_last_block(loop))); +} + +/* Partially unrolls loops that don't have a known trip count. + */ +static void +partial_unroll(nir_shader *shader, nir_loop *loop, unsigned trip_count) +{ + assert(list_length(&loop->info->loop_terminator_list) == 1); + + nir_loop_terminator *terminator = + list_first_entry(&loop->info->loop_terminator_list, + nir_loop_terminator, loop_terminator_link); + + assert(nir_is_trivial_loop_if(terminator->nif, terminator->break_block)); + + loop_prepare_for_unroll(loop); + + /* Pluck out the loop header */ + nir_cf_list lp_header; + nir_cf_extract(&lp_header, nir_before_block(nir_loop_first_block(loop)), + nir_before_cf_node(&terminator->nif->cf_node)); + + struct hash_table *remap_table = + _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + + nir_cf_list lp_body; + nir_cf_node *unroll_loc = + complex_unroll_loop_body(loop, terminator, &lp_header, &lp_body, + remap_table, trip_count); + + /* Attempt to remove out of bounds array access */ + remove_out_of_bounds_induction_use(shader, loop, terminator, &lp_header, + &lp_body, trip_count); + + nir_cursor cursor = + get_complex_unroll_insert_location(unroll_loc, + terminator->continue_from_then); + + /* Reinsert the loop in the innermost nested continue branch of the unrolled + * loop. + */ + nir_loop *new_loop = nir_loop_create(shader); + nir_cf_node_insert(cursor, &new_loop->cf_node); + new_loop->partially_unrolled = true; + + /* Clone loop header and insert into new loop */ + nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent, + nir_after_cf_list(&new_loop->body), + remap_table); + + /* Clone loop body and insert into new loop */ + nir_cf_list_clone_and_reinsert(&lp_body, loop->cf_node.parent, + nir_after_cf_list(&new_loop->body), + remap_table); + + /* Insert break back into terminator */ + nir_jump_instr *brk = nir_jump_instr_create(shader, nir_jump_break); + nir_if *nif = nir_block_get_following_if(nir_loop_first_block(new_loop)); + if (terminator->continue_from_then) { + nir_instr_insert_after_block(nir_if_last_else_block(nif), &brk->instr); + } else { + nir_instr_insert_after_block(nir_if_last_then_block(nif), &brk->instr); + } + + /* Delete the original loop header and body */ + nir_cf_delete(&lp_header); + nir_cf_delete(&lp_body); + + /* The original loop has been replaced so remove it. */ + nir_cf_node_remove(&loop->cf_node); + + _mesa_hash_table_destroy(remap_table, NULL); +} + +static bool is_loop_small_enough_to_unroll(nir_shader *shader, nir_loop_info *li) { unsigned max_iter = shader->options->max_unroll_iterations; - if (li->max_trip_count > max_iter) + unsigned trip_count = + li->max_trip_count ? li->max_trip_count : li->guessed_trip_count; + + if (trip_count > max_iter) return false; - if (li->force_unroll) + if (li->force_unroll && !li->guessed_trip_count) return true; bool loop_not_too_large = - li->instr_cost * li->max_trip_count <= max_iter * LOOP_UNROLL_LIMIT; + li->instr_cost * trip_count <= max_iter * LOOP_UNROLL_LIMIT; return loop_not_too_large; } @@ -616,15 +798,24 @@ process_loops(nir_shader *sh, nir_cf_node *cf_node, bool *has_nested_loop_out) !loop->info->complex_loop) { nir_block *last_loop_blk = nir_loop_last_block(loop); - if (!nir_block_ends_in_break(last_loop_blk)) + if (nir_block_ends_in_break(last_loop_blk)) { + progress = wrapper_unroll(loop); goto exit; + } - progress = wrapper_unroll(loop); - - goto exit; + /* If we were able to guess the loop iteration based on array access + * then do a partial unroll. + */ + unsigned num_lt = list_length(&loop->info->loop_terminator_list); + if (!has_nested_loop && num_lt == 1 && !loop->partially_unrolled && + loop->info->guessed_trip_count && + is_loop_small_enough_to_unroll(sh, loop->info)) { + partial_unroll(sh, loop, loop->info->guessed_trip_count); + progress = true; + } } - if (has_nested_loop || loop->info->limiting_terminator == NULL) + if (has_nested_loop || !loop->info->limiting_terminator) goto exit; if (!is_loop_small_enough_to_unroll(sh, loop->info)) |