diff options
-rw-r--r-- | src/compiler/nir/nir_opt_copy_prop_vars.c | 423 | ||||
-rw-r--r-- | src/compiler/nir/tests/vars_tests.cpp | 10 |
2 files changed, 351 insertions, 82 deletions
diff --git a/src/compiler/nir/nir_opt_copy_prop_vars.c b/src/compiler/nir/nir_opt_copy_prop_vars.c index b3f4e86bab1..b664ef27d35 100644 --- a/src/compiler/nir/nir_opt_copy_prop_vars.c +++ b/src/compiler/nir/nir_opt_copy_prop_vars.c @@ -26,6 +26,7 @@ #include "nir_deref.h" #include "util/bitscan.h" +#include "util/u_dynarray.h" /** * Variable-based copy propagation @@ -42,16 +43,21 @@ * to do this because it isn't aware of variable writes that may alias the * value and make the former load invalid. * - * Unfortunately, properly handling all of those cases makes this path rather - * complex. In order to avoid additional complexity, this pass is entirely - * block-local. If we tried to make it global, the data-flow analysis would - * rapidly get out of hand. Fortunately, for anything that is only ever - * accessed directly, we get SSA based copy-propagation which is extremely - * powerful so this isn't that great a loss. + * This pass uses an intermediate solution between being local / "per-block" + * and a complete data-flow analysis. It follows the control flow graph, and + * propagate the available copy information forward, invalidating data at each + * cf_node. * * Removal of dead writes to variables is handled by another pass. */ +struct vars_written { + nir_variable_mode modes; + + /* Key is deref and value is the uintptr_t with the write mask. */ + struct hash_table *derefs; +}; + struct value { bool is_ssa; union { @@ -61,61 +67,191 @@ struct value { }; struct copy_entry { - struct list_head link; - struct value src; nir_deref_instr *dst; }; struct copy_prop_var_state { - nir_shader *shader; + nir_function_impl *impl; void *mem_ctx; + void *lin_ctx; - struct list_head copies; - - /* We're going to be allocating and deleting a lot of copy entries so we'll - * keep a free list to avoid thrashing malloc too badly. + /* Maps nodes to vars_written. Used to invalidate copy entries when + * visiting each node. */ - struct list_head copy_free_list; + struct hash_table *vars_written_map; bool progress; }; -static struct copy_entry * -copy_entry_create(struct copy_prop_var_state *state, - nir_deref_instr *dst_deref) +static struct vars_written * +create_vars_written(struct copy_prop_var_state *state) { - struct copy_entry *entry; - if (!list_empty(&state->copy_free_list)) { - struct list_head *item = state->copy_free_list.next; - list_del(item); - entry = LIST_ENTRY(struct copy_entry, item, link); - memset(entry, 0, sizeof(*entry)); - } else { - entry = rzalloc(state->mem_ctx, struct copy_entry); + struct vars_written *written = + linear_zalloc_child(state->lin_ctx, sizeof(struct vars_written)); + written->derefs = _mesa_hash_table_create(state->mem_ctx, _mesa_hash_pointer, + _mesa_key_pointer_equal); + return written; +} + +static void +gather_vars_written(struct copy_prop_var_state *state, + struct vars_written *written, + nir_cf_node *cf_node) +{ + struct vars_written *new_written = NULL; + + switch (cf_node->type) { + case nir_cf_node_function: { + nir_function_impl *impl = nir_cf_node_as_function(cf_node); + foreach_list_typed_safe(nir_cf_node, cf_node, node, &impl->body) + gather_vars_written(state, NULL, cf_node); + break; } - entry->dst = dst_deref; - list_add(&entry->link, &state->copies); + case nir_cf_node_block: { + if (!written) + break; - return entry; + nir_block *block = nir_cf_node_as_block(cf_node); + nir_foreach_instr(instr, block) { + if (instr->type == nir_instr_type_call) { + written->modes |= nir_var_shader_out | + nir_var_global | + nir_var_local | + nir_var_shader_storage | + nir_var_shared; + continue; + } + + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + switch (intrin->intrinsic) { + case nir_intrinsic_barrier: + case nir_intrinsic_memory_barrier: + written->modes |= nir_var_shader_out | + nir_var_shader_storage | + nir_var_shared; + break; + + case nir_intrinsic_emit_vertex: + case nir_intrinsic_emit_vertex_with_counter: + written->modes = nir_var_shader_out; + break; + + case nir_intrinsic_store_deref: + case nir_intrinsic_copy_deref: { + /* Destination in _both_ store_deref and copy_deref is src[0]. */ + nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]); + + uintptr_t mask = intrin->intrinsic == nir_intrinsic_store_deref ? + nir_intrinsic_write_mask(intrin) : (1 << glsl_get_vector_elements(dst->type)) - 1; + + struct hash_entry *ht_entry = _mesa_hash_table_search(written->derefs, dst); + if (ht_entry) + ht_entry->data = (void *)(mask | (uintptr_t)ht_entry->data); + else + _mesa_hash_table_insert(written->derefs, dst, (void *)mask); + + break; + } + + default: + break; + } + } + + break; + } + + case nir_cf_node_if: { + nir_if *if_stmt = nir_cf_node_as_if(cf_node); + + new_written = create_vars_written(state); + + foreach_list_typed_safe(nir_cf_node, cf_node, node, &if_stmt->then_list) + gather_vars_written(state, new_written, cf_node); + + foreach_list_typed_safe(nir_cf_node, cf_node, node, &if_stmt->else_list) + gather_vars_written(state, new_written, cf_node); + + break; + } + + case nir_cf_node_loop: { + nir_loop *loop = nir_cf_node_as_loop(cf_node); + + new_written = create_vars_written(state); + + foreach_list_typed_safe(nir_cf_node, cf_node, node, &loop->body) + gather_vars_written(state, new_written, cf_node); + + break; + } + + default: + unreachable("Invalid CF node type"); + } + + if (new_written) { + /* Merge new information to the parent control flow node. */ + if (written) { + written->modes |= new_written->modes; + struct hash_entry *new_entry; + hash_table_foreach(new_written->derefs, new_entry) { + struct hash_entry *old_entry = + _mesa_hash_table_search_pre_hashed(written->derefs, new_entry->hash, + new_entry->key); + if (old_entry) { + nir_component_mask_t merged = (uintptr_t) new_entry->data | + (uintptr_t) old_entry->data; + old_entry->data = (void *) ((uintptr_t) merged); + } else { + _mesa_hash_table_insert_pre_hashed(written->derefs, new_entry->hash, + new_entry->key, new_entry->data); + } + } + } + _mesa_hash_table_insert(state->vars_written_map, cf_node, new_written); + } +} + +static struct copy_entry * +copy_entry_create(struct util_dynarray *copies, + nir_deref_instr *dst_deref) +{ + struct copy_entry new_entry = { + .dst = dst_deref, + }; + util_dynarray_append(copies, struct copy_entry, new_entry); + return util_dynarray_top_ptr(copies, struct copy_entry); } +/* Remove copy entry by swapping it with the last element and reducing the + * size. If used inside an iteration on copies, it must be a reverse + * (backwards) iteration. It is safe to use in those cases because the swap + * will not affect the rest of the iteration. + */ static void -copy_entry_remove(struct copy_prop_var_state *state, struct copy_entry *entry) +copy_entry_remove(struct util_dynarray *copies, + struct copy_entry *entry) { - list_del(&entry->link); - list_add(&entry->link, &state->copy_free_list); + /* This also works when removing the last element since pop don't shrink + * the memory used by the array, so the swap is useless but not invalid. + */ + *entry = util_dynarray_pop(copies, struct copy_entry); } static struct copy_entry * -lookup_entry_for_deref(struct copy_prop_var_state *state, +lookup_entry_for_deref(struct util_dynarray *copies, nir_deref_instr *deref, nir_deref_compare_result allowed_comparisons) { - list_for_each_entry(struct copy_entry, iter, &state->copies, link) { + util_dynarray_foreach(copies, struct copy_entry, iter) { if (nir_compare_derefs(iter->dst, deref) & allowed_comparisons) return iter; } @@ -124,16 +260,18 @@ lookup_entry_for_deref(struct copy_prop_var_state *state, } static struct copy_entry * -get_entry_and_kill_aliases(struct copy_prop_var_state *state, - nir_deref_instr *deref, - unsigned write_mask) +lookup_entry_and_kill_aliases(struct util_dynarray *copies, + nir_deref_instr *deref, + unsigned write_mask) { + /* TODO: Take into account the write_mask. */ + struct copy_entry *entry = NULL; - list_for_each_entry_safe(struct copy_entry, iter, &state->copies, link) { + util_dynarray_foreach_reverse(copies, struct copy_entry, iter) { if (!iter->src.is_ssa) { /* If this write aliases the source of some entry, get rid of it */ if (nir_compare_derefs(iter->src.deref, deref) & nir_derefs_may_alias_bit) { - copy_entry_remove(state, iter); + copy_entry_remove(copies, iter); continue; } } @@ -144,28 +282,54 @@ get_entry_and_kill_aliases(struct copy_prop_var_state *state, assert(entry == NULL); entry = iter; } else if (comp & nir_derefs_may_alias_bit) { - copy_entry_remove(state, iter); + copy_entry_remove(copies, iter); } } + return entry; +} + +static void +kill_aliases(struct util_dynarray *copies, + nir_deref_instr *deref, + unsigned write_mask) +{ + /* TODO: Take into account the write_mask. */ + + struct copy_entry *entry = + lookup_entry_and_kill_aliases(copies, deref, write_mask); + if (entry) + copy_entry_remove(copies, entry); +} + +static struct copy_entry * +get_entry_and_kill_aliases(struct util_dynarray *copies, + nir_deref_instr *deref, + unsigned write_mask) +{ + /* TODO: Take into account the write_mask. */ + + struct copy_entry *entry = + lookup_entry_and_kill_aliases(copies, deref, write_mask); + if (entry == NULL) - entry = copy_entry_create(state, deref); + entry = copy_entry_create(copies, deref); return entry; } static void -apply_barrier_for_modes(struct copy_prop_var_state *state, +apply_barrier_for_modes(struct util_dynarray *copies, nir_variable_mode modes) { - list_for_each_entry_safe(struct copy_entry, iter, &state->copies, link) { + util_dynarray_foreach_reverse(copies, struct copy_entry, iter) { nir_variable *dst_var = nir_deref_instr_get_variable(iter->dst); nir_variable *src_var = iter->src.is_ssa ? NULL : nir_deref_instr_get_variable(iter->src.deref); if ((dst_var->data.mode & modes) || (src_var && (src_var->data.mode & modes))) - copy_entry_remove(state, iter); + copy_entry_remove(copies, iter); } } @@ -396,13 +560,33 @@ try_load_from_entry(struct copy_prop_var_state *state, struct copy_entry *entry, } static void -copy_prop_vars_block(struct copy_prop_var_state *state, - nir_builder *b, nir_block *block) +invalidate_copies_for_cf_node(struct copy_prop_var_state *state, + struct util_dynarray *copies, + nir_cf_node *cf_node) { - /* Start each block with a blank slate */ - list_for_each_entry_safe(struct copy_entry, iter, &state->copies, link) - copy_entry_remove(state, iter); + struct hash_entry *ht_entry = _mesa_hash_table_search(state->vars_written_map, cf_node); + assert(ht_entry); + + struct vars_written *written = ht_entry->data; + if (written->modes) { + util_dynarray_foreach_reverse(copies, struct copy_entry, entry) { + if (entry->dst->mode & written->modes) + copy_entry_remove(copies, entry); + } + } + struct hash_entry *entry; + hash_table_foreach (written->derefs, entry) { + nir_deref_instr *deref_written = (nir_deref_instr *)entry->key; + kill_aliases(copies, deref_written, (uintptr_t)entry->data); + } +} + +static void +copy_prop_vars_block(struct copy_prop_var_state *state, + nir_builder *b, nir_block *block, + struct util_dynarray *copies) +{ nir_foreach_instr_safe(instr, block) { if (instr->type == nir_instr_type_call) { apply_barrier_for_modes(copies, nir_var_shader_out | @@ -427,14 +611,14 @@ copy_prop_vars_block(struct copy_prop_var_state *state, case nir_intrinsic_emit_vertex: case nir_intrinsic_emit_vertex_with_counter: - apply_barrier_for_modes(state, nir_var_shader_out); + apply_barrier_for_modes(copies, nir_var_shader_out); break; case nir_intrinsic_load_deref: { nir_deref_instr *src = nir_src_as_deref(intrin->src[0]); struct copy_entry *src_entry = - lookup_entry_for_deref(state, src, nir_derefs_a_contains_b_bit); + lookup_entry_for_deref(copies, src, nir_derefs_a_contains_b_bit); struct value value; if (try_load_from_entry(state, src_entry, b, intrin, src, &value)) { if (value.is_ssa) { @@ -479,9 +663,9 @@ copy_prop_vars_block(struct copy_prop_var_state *state, * contains what we're looking for. */ struct copy_entry *store_entry = - lookup_entry_for_deref(state, src, nir_derefs_equal_bit); + lookup_entry_for_deref(copies, src, nir_derefs_equal_bit); if (!store_entry) - store_entry = copy_entry_create(state, src); + store_entry = copy_entry_create(copies, src); /* Set up a store to this entry with the value of the load. This way * we can potentially remove subsequent loads. However, we use a @@ -504,7 +688,7 @@ copy_prop_vars_block(struct copy_prop_var_state *state, nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]); unsigned wrmask = nir_intrinsic_write_mask(intrin); struct copy_entry *entry = - get_entry_and_kill_aliases(state, dst, wrmask); + get_entry_and_kill_aliases(copies, dst, wrmask); store_to_entry(state, entry, &value, wrmask); break; } @@ -520,7 +704,7 @@ copy_prop_vars_block(struct copy_prop_var_state *state, } struct copy_entry *src_entry = - lookup_entry_for_deref(state, src, nir_derefs_a_contains_b_bit); + lookup_entry_for_deref(copies, src, nir_derefs_a_contains_b_bit); struct value value; if (try_load_from_entry(state, src_entry, b, intrin, src, &value)) { if (value.is_ssa) { @@ -547,7 +731,7 @@ copy_prop_vars_block(struct copy_prop_var_state *state, } struct copy_entry *dst_entry = - get_entry_and_kill_aliases(state, dst, 0xf); + get_entry_and_kill_aliases(copies, dst, 0xf); store_to_entry(state, dst_entry, &value, 0xf); break; } @@ -558,36 +742,121 @@ copy_prop_vars_block(struct copy_prop_var_state *state, } } -bool -nir_opt_copy_prop_vars(nir_shader *shader) +static void +copy_prop_vars_cf_node(struct copy_prop_var_state *state, + struct util_dynarray *copies, + nir_cf_node *cf_node) { - struct copy_prop_var_state state; + switch (cf_node->type) { + case nir_cf_node_function: { + nir_function_impl *impl = nir_cf_node_as_function(cf_node); - state.shader = shader; - state.mem_ctx = ralloc_context(NULL); - list_inithead(&state.copies); - list_inithead(&state.copy_free_list); + struct util_dynarray impl_copies; + util_dynarray_init(&impl_copies, state->mem_ctx); - bool global_progress = false; - nir_foreach_function(function, shader) { - if (!function->impl) - continue; + foreach_list_typed_safe(nir_cf_node, cf_node, node, &impl->body) + copy_prop_vars_cf_node(state, &impl_copies, cf_node); + break; + } + + case nir_cf_node_block: { + nir_block *block = nir_cf_node_as_block(cf_node); nir_builder b; - nir_builder_init(&b, function->impl); + nir_builder_init(&b, state->impl); + copy_prop_vars_block(state, &b, block, copies); + break; + } - state.progress = false; - nir_foreach_block(block, function->impl) - copy_prop_vars_block(&state, &b, block); + case nir_cf_node_if: { + nir_if *if_stmt = nir_cf_node_as_if(cf_node); - if (state.progress) { - nir_metadata_preserve(function->impl, nir_metadata_block_index | - nir_metadata_dominance); - global_progress = true; - } + /* Clone the copies for each branch of the if statement. The idea is + * that they both see the same state of available copies, but do not + * interfere to each other. + */ + + struct util_dynarray then_copies; + util_dynarray_clone(&then_copies, state->mem_ctx, copies); + + struct util_dynarray else_copies; + util_dynarray_clone(&else_copies, state->mem_ctx, copies); + + foreach_list_typed_safe(nir_cf_node, cf_node, node, &if_stmt->then_list) + copy_prop_vars_cf_node(state, &then_copies, cf_node); + + foreach_list_typed_safe(nir_cf_node, cf_node, node, &if_stmt->else_list) + copy_prop_vars_cf_node(state, &else_copies, cf_node); + + /* Both branches copies can be ignored, since the effect of running both + * branches was captured in the first pass that collects vars_written. + */ + + invalidate_copies_for_cf_node(state, copies, cf_node); + + break; + } + + case nir_cf_node_loop: { + nir_loop *loop = nir_cf_node_as_loop(cf_node); + + /* Invalidate before cloning the copies for the loop, since the loop + * body can be executed more than once. + */ + + invalidate_copies_for_cf_node(state, copies, cf_node); + + struct util_dynarray loop_copies; + util_dynarray_clone(&loop_copies, state->mem_ctx, copies); + + foreach_list_typed_safe(nir_cf_node, cf_node, node, &loop->body) + copy_prop_vars_cf_node(state, &loop_copies, cf_node); + + break; + } + + default: + unreachable("Invalid CF node type"); + } +} + +static bool +nir_copy_prop_vars_impl(nir_function_impl *impl) +{ + void *mem_ctx = ralloc_context(NULL); + + struct copy_prop_var_state state = { + .impl = impl, + .mem_ctx = mem_ctx, + .lin_ctx = linear_zalloc_parent(mem_ctx, 0), + + .vars_written_map = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer, + _mesa_key_pointer_equal), + }; + + gather_vars_written(&state, NULL, &impl->cf_node); + + copy_prop_vars_cf_node(&state, NULL, &impl->cf_node); + + if (state.progress) { + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); } - ralloc_free(state.mem_ctx); + ralloc_free(mem_ctx); + return state.progress; +} + +bool +nir_opt_copy_prop_vars(nir_shader *shader) +{ + bool progress = false; + + nir_foreach_function(function, shader) { + if (!function->impl) + continue; + progress |= nir_copy_prop_vars_impl(function->impl); + } - return global_progress; + return progress; } diff --git a/src/compiler/nir/tests/vars_tests.cpp b/src/compiler/nir/tests/vars_tests.cpp index d06f41d02e9..2896d9fce66 100644 --- a/src/compiler/nir/tests/vars_tests.cpp +++ b/src/compiler/nir/tests/vars_tests.cpp @@ -159,7 +159,7 @@ TEST_F(nir_redundant_load_vars_test, duplicated_load) ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1); } -TEST_F(nir_redundant_load_vars_test, DISABLED_duplicated_load_in_two_blocks) +TEST_F(nir_redundant_load_vars_test, duplicated_load_in_two_blocks) { /* Load a variable twice in different blocks. One should be removed. */ @@ -185,7 +185,7 @@ TEST_F(nir_redundant_load_vars_test, DISABLED_duplicated_load_in_two_blocks) ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1); } -TEST_F(nir_redundant_load_vars_test, DISABLED_invalidate_inside_if_block) +TEST_F(nir_redundant_load_vars_test, invalidate_inside_if_block) { /* Load variables, then write to some of then in different branches of the * if statement. They should be invalidated accordingly. @@ -383,7 +383,7 @@ TEST_F(nir_copy_prop_vars_test, store_store_load_different_components) EXPECT_TRUE(store_to_v1); } -TEST_F(nir_copy_prop_vars_test, DISABLED_store_store_load_different_components_in_many_blocks) +TEST_F(nir_copy_prop_vars_test, store_store_load_different_components_in_many_blocks) { nir_variable **v = create_many_ivec2(nir_var_local, "v", 2); @@ -433,7 +433,7 @@ TEST_F(nir_copy_prop_vars_test, DISABLED_store_store_load_different_components_i EXPECT_TRUE(store_to_v1); } -TEST_F(nir_copy_prop_vars_test, DISABLED_memory_barrier_in_two_blocks) +TEST_F(nir_copy_prop_vars_test, memory_barrier_in_two_blocks) { nir_variable **v = create_many_int(nir_var_shader_storage, "v", 4); @@ -459,7 +459,7 @@ TEST_F(nir_copy_prop_vars_test, DISABLED_memory_barrier_in_two_blocks) ASSERT_EQ(nir_intrinsic_get_var(load, 0), v[1]); } -TEST_F(nir_copy_prop_vars_test, DISABLED_simple_store_load_in_two_blocks) +TEST_F(nir_copy_prop_vars_test, simple_store_load_in_two_blocks) { nir_variable **v = create_many_ivec2(nir_var_local, "v", 2); unsigned mask = 1 | 2; |