diff options
Diffstat (limited to 'src/compiler')
-rw-r--r-- | src/compiler/Makefile.sources | 1 | ||||
-rw-r--r-- | src/compiler/nir/meson.build | 1 | ||||
-rw-r--r-- | src/compiler/nir/nir.h | 2 | ||||
-rw-r--r-- | src/compiler/nir/nir_opt_combine_stores.c | 401 | ||||
-rw-r--r-- | src/compiler/nir/tests/vars_tests.cpp | 174 |
5 files changed, 579 insertions, 0 deletions
diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources index 7154fd1bc7d..5436584abdd 100644 --- a/src/compiler/Makefile.sources +++ b/src/compiler/Makefile.sources @@ -273,6 +273,7 @@ NIR_FILES = \ nir/nir_move_load_const.c \ nir/nir_move_vec_src_uses_to_dest.c \ nir/nir_normalize_cubemap_coords.c \ + nir/nir_opt_combine_stores.c \ nir/nir_opt_conditional_discard.c \ nir/nir_opt_constant_folding.c \ nir/nir_opt_copy_prop_vars.c \ diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build index 514f5ceaa56..b46bf8f2ebf 100644 --- a/src/compiler/nir/meson.build +++ b/src/compiler/nir/meson.build @@ -156,6 +156,7 @@ files_libnir = files( 'nir_move_load_const.c', 'nir_move_vec_src_uses_to_dest.c', 'nir_normalize_cubemap_coords.c', + 'nir_opt_combine_stores.c', 'nir_opt_conditional_discard.c', 'nir_opt_constant_folding.c', 'nir_opt_copy_prop_vars.c', diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 22831cf3cc1..98adf1b5331 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -3329,6 +3329,8 @@ bool nir_opt_algebraic_before_ffma(nir_shader *shader); bool nir_opt_algebraic_late(nir_shader *shader); bool nir_opt_constant_folding(nir_shader *shader); +bool nir_opt_combine_stores(nir_shader *shader, nir_variable_mode modes); + bool nir_opt_global_to_local(nir_shader *shader); bool nir_copy_prop(nir_shader *shader); diff --git a/src/compiler/nir/nir_opt_combine_stores.c b/src/compiler/nir/nir_opt_combine_stores.c new file mode 100644 index 00000000000..48b9cfa1501 --- /dev/null +++ b/src/compiler/nir/nir_opt_combine_stores.c @@ -0,0 +1,401 @@ +/* + * Copyright © 2019 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir.h" +#include "nir_builder.h" +#include "nir_deref.h" + +#include "util/bitscan.h" +#include "util/list.h" +#include "util/u_math.h" + +/* Combine stores of vectors to the same deref into a single store. + * + * This per-block pass keeps track of stores of vectors to the same + * destination and combines them into the last store of the sequence. Dead + * stores (or parts of the store) found during the process are removed. + * + * A pending combination becomes an actual combination in various situations: + * at the end of the block, when another instruction uses the memory or due to + * barriers. + * + * Besides vectors, the pass also look at array derefs of vectors. For direct + * array derefs, it works like a write mask access to the given component. + * For indirect access there's no way to know before hand what component it + * will overlap with, so the combination is finished -- the indirect remains + * unmodified. + */ + +/* Keep track of a group of stores that can be combined. All stores share the + * same destination. + */ +struct combined_store { + struct list_head link; + + nir_component_mask_t write_mask; + nir_deref_instr *dst; + + /* Latest store added. It is reused when combining. */ + nir_intrinsic_instr *latest; + + /* Original store for each component. The number of times a store appear + * in this array is kept in the store's pass_flags. + */ + nir_intrinsic_instr *stores[NIR_MAX_VEC_COMPONENTS]; +}; + +struct combine_stores_state { + nir_variable_mode modes; + + /* Pending store combinations. */ + struct list_head pending; + + /* Per function impl state. */ + nir_builder b; + bool progress; + + + /* Allocator and freelist to reuse structs between functions. */ + void *lin_ctx; + struct list_head freelist; +}; + +static struct combined_store * +alloc_combined_store(struct combine_stores_state *state) +{ + struct combined_store *result; + if (list_empty(&state->freelist)) { + result = linear_zalloc_child(state->lin_ctx, sizeof(*result)); + } else { + result = list_first_entry(&state->freelist, + struct combined_store, + link); + list_del(&result->link); + memset(result, 0, sizeof(*result)); + } + return result; +} + +static void +free_combined_store(struct combine_stores_state *state, + struct combined_store *combo) +{ + list_del(&combo->link); + combo->write_mask = 0; + list_add(&combo->link, &state->freelist); +} + +static void +combine_stores(struct combine_stores_state *state, + struct combined_store *combo) +{ + assert(combo->latest); + assert(combo->latest->intrinsic == nir_intrinsic_store_deref); + + /* If the combined writemask is the same as the latest store, we know there + * is only one store in the combination, so nothing to combine. + */ + if ((combo->write_mask & nir_intrinsic_write_mask(combo->latest)) == + combo->write_mask) + return; + + state->b.cursor = nir_before_instr(&combo->latest->instr); + + /* Build a new vec, to be used as source for the combined store. As it + * gets build, remove previous stores that are not needed anymore. + */ + nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS] = {0}; + unsigned num_components = glsl_get_vector_elements(combo->dst->type); + unsigned bit_size = combo->latest->src[1].ssa->bit_size; + for (unsigned i = 0; i < num_components; i++) { + nir_intrinsic_instr *store = combo->stores[i]; + if (combo->write_mask & (1 << i)) { + assert(store); + assert(store->src[1].is_ssa); + + /* If store->num_components == 1 then we are in the deref-of-vec case + * and store->src[1] is a scalar. Otherwise, we're a regular vector + * load and we have to pick off a component. + */ + comps[i] = store->num_components == 1 ? + store->src[1].ssa : + nir_channel(&state->b, store->src[1].ssa, i); + + assert(store->instr.pass_flags > 0); + if (--store->instr.pass_flags == 0 && store != combo->latest) + nir_instr_remove(&store->instr); + } else { + comps[i] = nir_ssa_undef(&state->b, 1, bit_size); + } + } + assert(combo->latest->instr.pass_flags == 0); + nir_ssa_def *vec = nir_vec(&state->b, comps, num_components); + + /* Fix the latest store with the combined information. */ + nir_intrinsic_instr *store = combo->latest; + + /* In this case, our store is as an array deref of a vector so we need to + * rewrite it to use a deref to the whole vector. + */ + if (store->num_components == 1) { + store->num_components = num_components; + nir_instr_rewrite_src(&store->instr, &store->src[0], + nir_src_for_ssa(&combo->dst->dest.ssa)); + } + + assert(store->num_components == num_components); + nir_intrinsic_set_write_mask(store, combo->write_mask); + nir_instr_rewrite_src(&store->instr, &store->src[1], + nir_src_for_ssa(vec)); + state->progress = true; +} + +static void +combine_stores_with_deref(struct combine_stores_state *state, + nir_deref_instr *deref) +{ + if ((state->modes & deref->mode) == 0) + return; + + list_for_each_entry_safe(struct combined_store, combo, &state->pending, link) { + if (nir_compare_derefs(combo->dst, deref) & nir_derefs_may_alias_bit) { + combine_stores(state, combo); + free_combined_store(state, combo); + } + } +} + +static void +combine_stores_with_modes(struct combine_stores_state *state, + nir_variable_mode modes) +{ + if ((state->modes & modes) == 0) + return; + + list_for_each_entry_safe(struct combined_store, combo, &state->pending, link) { + if (combo->dst->mode & modes) { + combine_stores(state, combo); + free_combined_store(state, combo); + } + } +} + +static struct combined_store * +find_matching_combined_store(struct combine_stores_state *state, + nir_deref_instr *deref) +{ + list_for_each_entry(struct combined_store, combo, &state->pending, link) { + if (nir_compare_derefs(combo->dst, deref) & nir_derefs_equal_bit) + return combo; + } + return NULL; +} + +static void +update_combined_store(struct combine_stores_state *state, + nir_intrinsic_instr *intrin) +{ + nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]); + if ((dst->mode & state->modes) == 0) + return; + + unsigned vec_mask; + nir_deref_instr *vec_dst; + + if (glsl_type_is_vector(dst->type)) { + vec_mask = nir_intrinsic_write_mask(intrin); + vec_dst = dst; + } else { + /* Besides vectors, only direct array derefs of vectors are handled. */ + if (dst->deref_type != nir_deref_type_array || + !nir_src_is_const(dst->arr.index) || + !glsl_type_is_vector(nir_deref_instr_parent(dst)->type)) { + combine_stores_with_deref(state, dst); + return; + } + + uint64_t index = nir_src_as_uint(dst->arr.index); + vec_dst = nir_deref_instr_parent(dst); + + if (index >= glsl_get_vector_elements(vec_dst->type)) { + /* Storing to an invalid index is a no-op. */ + nir_instr_remove(&intrin->instr); + state->progress = true; + return; + } + + vec_mask = 1 << index; + } + + struct combined_store *combo = find_matching_combined_store(state, vec_dst); + if (!combo) { + combo = alloc_combined_store(state); + combo->dst = vec_dst; + list_add(&combo->link, &state->pending); + } + + /* Use pass_flags to reference count the store based on how many + * components are still used by the combination. + */ + intrin->instr.pass_flags = util_bitcount(vec_mask); + combo->latest = intrin; + + /* Update the combined_store, clearing up older overlapping references. */ + combo->write_mask |= vec_mask; + while (vec_mask) { + unsigned i = u_bit_scan(&vec_mask); + nir_intrinsic_instr *prev_store = combo->stores[i]; + + if (prev_store) { + if (--prev_store->instr.pass_flags == 0) { + nir_instr_remove(&prev_store->instr); + } else { + assert(glsl_type_is_vector( + nir_src_as_deref(prev_store->src[0])->type)); + nir_component_mask_t prev_mask = nir_intrinsic_write_mask(prev_store); + nir_intrinsic_set_write_mask(prev_store, prev_mask & ~(1 << i)); + } + state->progress = true; + } + combo->stores[i] = combo->latest; + } +} + +static void +combine_stores_block(struct combine_stores_state *state, nir_block *block) +{ + nir_foreach_instr_safe(instr, block) { + if (instr->type == nir_instr_type_call) { + combine_stores_with_modes(state, nir_var_shader_out | + nir_var_shader_temp | + nir_var_function_temp | + nir_var_mem_ssbo | + nir_var_mem_shared); + continue; + } + + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + switch (intrin->intrinsic) { + case nir_intrinsic_store_deref: + update_combined_store(state, intrin); + break; + + case nir_intrinsic_barrier: + case nir_intrinsic_group_memory_barrier: + case nir_intrinsic_memory_barrier: + case nir_intrinsic_memory_barrier_atomic_counter: + case nir_intrinsic_memory_barrier_buffer: + case nir_intrinsic_memory_barrier_image: + case nir_intrinsic_memory_barrier_shared: + /* TODO: Be more granular depending on the barrier. */ + combine_stores_with_modes(state, nir_var_shader_out | + nir_var_mem_ssbo | + nir_var_mem_shared); + break; + + case nir_intrinsic_emit_vertex: + case nir_intrinsic_emit_vertex_with_counter: + combine_stores_with_modes(state, nir_var_shader_out); + break; + + case nir_intrinsic_load_deref: { + nir_deref_instr *src = nir_src_as_deref(intrin->src[0]); + combine_stores_with_deref(state, src); + break; + } + + case nir_intrinsic_copy_deref: { + nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]); + nir_deref_instr *src = nir_src_as_deref(intrin->src[1]); + combine_stores_with_deref(state, dst); + combine_stores_with_deref(state, src); + break; + } + + case nir_intrinsic_deref_atomic_add: + case nir_intrinsic_deref_atomic_imin: + case nir_intrinsic_deref_atomic_umin: + case nir_intrinsic_deref_atomic_imax: + case nir_intrinsic_deref_atomic_umax: + case nir_intrinsic_deref_atomic_and: + case nir_intrinsic_deref_atomic_or: + case nir_intrinsic_deref_atomic_xor: + case nir_intrinsic_deref_atomic_exchange: + case nir_intrinsic_deref_atomic_comp_swap: { + nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]); + combine_stores_with_deref(state, dst); + break; + } + + default: + break; + } + } + + /* At the end of the block, try all the remaining combinations. */ + combine_stores_with_modes(state, state->modes); +} + +static bool +combine_stores_impl(struct combine_stores_state *state, nir_function_impl *impl) +{ + state->progress = false; + nir_builder_init(&state->b, impl); + + nir_foreach_block(block, impl) + combine_stores_block(state, block); + + if (state->progress) { + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + } + + return state->progress; +} + +bool +nir_opt_combine_stores(nir_shader *shader, nir_variable_mode modes) +{ + void *mem_ctx = ralloc_context(NULL); + struct combine_stores_state state = { + .modes = modes, + .lin_ctx = linear_zalloc_parent(mem_ctx, 0), + }; + + list_inithead(&state.pending); + list_inithead(&state.freelist); + + bool progress = false; + + nir_foreach_function(function, shader) { + if (!function->impl) + continue; + progress |= combine_stores_impl(&state, function->impl); + } + + ralloc_free(mem_ctx); + return progress; +} diff --git a/src/compiler/nir/tests/vars_tests.cpp b/src/compiler/nir/tests/vars_tests.cpp index f72b6b99f62..9e0b5d2c0df 100644 --- a/src/compiler/nir/tests/vars_tests.cpp +++ b/src/compiler/nir/tests/vars_tests.cpp @@ -49,6 +49,10 @@ protected: return create_var(mode, glsl_vector_type(GLSL_TYPE_INT, 2), name); } + nir_variable *create_ivec4(nir_variable_mode mode, const char *name) { + return create_var(mode, glsl_vector_type(GLSL_TYPE_INT, 4), name); + } + nir_variable **create_many_int(nir_variable_mode mode, const char *prefix, unsigned count) { nir_variable **result = (nir_variable **)linear_alloc_child(lin_ctx, sizeof(nir_variable *) * count); for (unsigned i = 0; i < count; i++) @@ -63,6 +67,13 @@ protected: return result; } + nir_variable **create_many_ivec4(nir_variable_mode mode, const char *prefix, unsigned count) { + nir_variable **result = (nir_variable **)linear_alloc_child(lin_ctx, sizeof(nir_variable *) * count); + for (unsigned i = 0; i < count; i++) + result[i] = create_ivec4(mode, linear_asprintf(lin_ctx, "%s%u", prefix, i)); + return result; + } + unsigned count_intrinsics(nir_intrinsic_op intrinsic); nir_intrinsic_instr *get_intrinsic(nir_intrinsic_op intrinsic, @@ -132,6 +143,7 @@ nir_vars_test::get_intrinsic(nir_intrinsic_op intrinsic, class nir_redundant_load_vars_test : public nir_vars_test {}; class nir_copy_prop_vars_test : public nir_vars_test {}; class nir_dead_write_vars_test : public nir_vars_test {}; +class nir_combine_stores_test : public nir_vars_test {}; } // namespace @@ -953,3 +965,165 @@ TEST_F(nir_dead_write_vars_test, DISABLED_unrelated_barrier_in_two_blocks) nir_intrinsic_instr *third_store = get_intrinsic(nir_intrinsic_store_deref, 2); EXPECT_EQ(nir_intrinsic_get_var(third_store, 0), v[0]); } + +TEST_F(nir_combine_stores_test, non_overlapping_stores) +{ + nir_variable **v = create_many_ivec4(nir_var_mem_ssbo, "v", 4); + nir_variable *out = create_ivec4(nir_var_shader_out, "out"); + + for (int i = 0; i < 4; i++) + nir_store_var(b, out, nir_load_var(b, v[i]), 1 << i); + + nir_validate_shader(b->shader, NULL); + + bool progress = nir_opt_combine_stores(b->shader, nir_var_shader_out); + ASSERT_TRUE(progress); + + nir_validate_shader(b->shader, NULL); + + /* Clean up to verify from where the values in combined store are coming. */ + nir_copy_prop(b->shader); + nir_opt_dce(b->shader); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1); + nir_intrinsic_instr *combined = get_intrinsic(nir_intrinsic_store_deref, 0); + ASSERT_EQ(nir_intrinsic_write_mask(combined), 0xf); + ASSERT_EQ(nir_intrinsic_get_var(combined, 0), out); + + nir_alu_instr *vec = nir_src_as_alu_instr(&combined->src[1]); + ASSERT_TRUE(vec); + for (int i = 0; i < 4; i++) { + nir_intrinsic_instr *load = + nir_instr_as_intrinsic(nir_src_instr(&vec->src[i].src)); + ASSERT_EQ(load->intrinsic, nir_intrinsic_load_deref); + ASSERT_EQ(nir_intrinsic_get_var(load, 0), v[i]) + << "Source value for component " << i << " of store is wrong"; + ASSERT_EQ(vec->src[i].swizzle[0], i) + << "Source component for component " << i << " of store is wrong"; + } +} + +TEST_F(nir_combine_stores_test, overlapping_stores) +{ + nir_variable **v = create_many_ivec4(nir_var_mem_ssbo, "v", 3); + nir_variable *out = create_ivec4(nir_var_shader_out, "out"); + + /* Make stores with xy, yz and zw masks. */ + for (int i = 0; i < 3; i++) { + nir_component_mask_t mask = (1 << i) | (1 << (i + 1)); + nir_store_var(b, out, nir_load_var(b, v[i]), mask); + } + + nir_validate_shader(b->shader, NULL); + + bool progress = nir_opt_combine_stores(b->shader, nir_var_shader_out); + ASSERT_TRUE(progress); + + nir_validate_shader(b->shader, NULL); + + /* Clean up to verify from where the values in combined store are coming. */ + nir_copy_prop(b->shader); + nir_opt_dce(b->shader); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1); + nir_intrinsic_instr *combined = get_intrinsic(nir_intrinsic_store_deref, 0); + ASSERT_EQ(nir_intrinsic_write_mask(combined), 0xf); + ASSERT_EQ(nir_intrinsic_get_var(combined, 0), out); + + nir_alu_instr *vec = nir_src_as_alu_instr(&combined->src[1]); + ASSERT_TRUE(vec); + + /* Component x comes from v[0]. */ + nir_intrinsic_instr *load_for_x = + nir_instr_as_intrinsic(nir_src_instr(&vec->src[0].src)); + ASSERT_EQ(nir_intrinsic_get_var(load_for_x, 0), v[0]); + ASSERT_EQ(vec->src[0].swizzle[0], 0); + + /* Component y comes from v[1]. */ + nir_intrinsic_instr *load_for_y = + nir_instr_as_intrinsic(nir_src_instr(&vec->src[1].src)); + ASSERT_EQ(nir_intrinsic_get_var(load_for_y, 0), v[1]); + ASSERT_EQ(vec->src[1].swizzle[0], 1); + + /* Components z and w come from v[2]. */ + nir_intrinsic_instr *load_for_z = + nir_instr_as_intrinsic(nir_src_instr(&vec->src[2].src)); + nir_intrinsic_instr *load_for_w = + nir_instr_as_intrinsic(nir_src_instr(&vec->src[3].src)); + ASSERT_EQ(load_for_z, load_for_w); + ASSERT_EQ(nir_intrinsic_get_var(load_for_z, 0), v[2]); + ASSERT_EQ(vec->src[2].swizzle[0], 2); + ASSERT_EQ(vec->src[3].swizzle[0], 3); +} + +TEST_F(nir_combine_stores_test, direct_array_derefs) +{ + nir_variable **v = create_many_ivec4(nir_var_mem_ssbo, "vec", 2); + nir_variable **s = create_many_int(nir_var_mem_ssbo, "scalar", 2); + nir_variable *out = create_ivec4(nir_var_mem_ssbo, "out"); + + nir_deref_instr *out_deref = nir_build_deref_var(b, out); + + /* Store to vector with mask x. */ + nir_store_deref(b, out_deref, nir_load_var(b, v[0]), + 1 << 0); + + /* Store to vector with mask yz. */ + nir_store_deref(b, out_deref, nir_load_var(b, v[1]), + (1 << 2) | (1 << 1)); + + /* Store to vector[2], overlapping with previous store. */ + nir_store_deref(b, + nir_build_deref_array_imm(b, out_deref, 2), + nir_load_var(b, s[0]), + 1 << 0); + + /* Store to vector[3], no overlap. */ + nir_store_deref(b, + nir_build_deref_array_imm(b, out_deref, 3), + nir_load_var(b, s[1]), + 1 << 0); + + nir_validate_shader(b->shader, NULL); + + bool progress = nir_opt_combine_stores(b->shader, nir_var_mem_ssbo); + ASSERT_TRUE(progress); + + nir_validate_shader(b->shader, NULL); + + /* Clean up to verify from where the values in combined store are coming. */ + nir_copy_prop(b->shader); + nir_opt_dce(b->shader); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1); + nir_intrinsic_instr *combined = get_intrinsic(nir_intrinsic_store_deref, 0); + ASSERT_EQ(nir_intrinsic_write_mask(combined), 0xf); + ASSERT_EQ(nir_intrinsic_get_var(combined, 0), out); + + nir_alu_instr *vec = nir_src_as_alu_instr(&combined->src[1]); + ASSERT_TRUE(vec); + + /* Component x comes from v[0]. */ + nir_intrinsic_instr *load_for_x = + nir_instr_as_intrinsic(nir_src_instr(&vec->src[0].src)); + ASSERT_EQ(nir_intrinsic_get_var(load_for_x, 0), v[0]); + ASSERT_EQ(vec->src[0].swizzle[0], 0); + + /* Component y comes from v[1]. */ + nir_intrinsic_instr *load_for_y = + nir_instr_as_intrinsic(nir_src_instr(&vec->src[1].src)); + ASSERT_EQ(nir_intrinsic_get_var(load_for_y, 0), v[1]); + ASSERT_EQ(vec->src[1].swizzle[0], 1); + + /* Components z comes from s[0]. */ + nir_intrinsic_instr *load_for_z = + nir_instr_as_intrinsic(nir_src_instr(&vec->src[2].src)); + ASSERT_EQ(nir_intrinsic_get_var(load_for_z, 0), s[0]); + ASSERT_EQ(vec->src[2].swizzle[0], 0); + + /* Component w comes from s[1]. */ + nir_intrinsic_instr *load_for_w = + nir_instr_as_intrinsic(nir_src_instr(&vec->src[3].src)); + ASSERT_EQ(nir_intrinsic_get_var(load_for_w, 0), s[1]); + ASSERT_EQ(vec->src[3].swizzle[0], 0); +} |