diff options
Diffstat (limited to 'src/intel/vulkan')
-rw-r--r-- | src/intel/vulkan/anv_nir_apply_pipeline_layout.c | 214 |
1 files changed, 212 insertions, 2 deletions
diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c index ea02ed1be78..7abc27be103 100644 --- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c +++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c @@ -25,6 +25,7 @@ #include "program/prog_parameter.h" #include "nir/nir_builder.h" #include "compiler/brw_nir.h" +#include "util/set.h" /* Sampler tables don't actually have a maximum size but we pick one just so * that we don't end up emitting too much state on-the-fly. @@ -41,6 +42,9 @@ struct apply_pipeline_layout_state { struct anv_pipeline_layout *layout; bool add_bounds_checks; + /* Place to flag lowered instructions so we don't lower them twice */ + struct set *lowered_instrs; + bool uses_constants; uint8_t constants_offset; struct { @@ -143,6 +147,176 @@ get_used_bindings_block(nir_block *block, } } +static bool +find_descriptor_for_index_src(nir_src src, + struct apply_pipeline_layout_state *state) +{ + nir_intrinsic_instr *intrin = nir_src_as_intrinsic(src); + + while (intrin && intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex) + intrin = nir_src_as_intrinsic(intrin->src[0]); + + if (!intrin || intrin->intrinsic != nir_intrinsic_vulkan_resource_index) + return false; + + return true; +} + +static bool +nir_deref_find_descriptor(nir_deref_instr *deref, + struct apply_pipeline_layout_state *state) +{ + while (1) { + /* Nothing we will use this on has a variable */ + assert(deref->deref_type != nir_deref_type_var); + + nir_deref_instr *parent = nir_src_as_deref(deref->parent); + if (!parent) + break; + + deref = parent; + } + assert(deref->deref_type == nir_deref_type_cast); + + nir_intrinsic_instr *intrin = nir_src_as_intrinsic(deref->parent); + if (!intrin || intrin->intrinsic != nir_intrinsic_load_vulkan_descriptor) + return false; + + return find_descriptor_for_index_src(intrin->src[0], state); +} + +static nir_ssa_def * +build_index_for_res_reindex(nir_intrinsic_instr *intrin, + struct apply_pipeline_layout_state *state) +{ + nir_builder *b = &state->builder; + + if (intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex) { + nir_ssa_def *bti = + build_index_for_res_reindex(nir_src_as_intrinsic(intrin->src[0]), state); + + b->cursor = nir_before_instr(&intrin->instr); + return nir_iadd(b, bti, nir_ssa_for_src(b, intrin->src[1], 1)); + } + + assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index); + + uint32_t set = nir_intrinsic_desc_set(intrin); + uint32_t binding = nir_intrinsic_binding(intrin); + + const struct anv_descriptor_set_binding_layout *bind_layout = + &state->layout->set[set].layout->binding[binding]; + + uint32_t surface_index = state->set[set].surface_offsets[binding]; + uint32_t array_size = bind_layout->array_size; + + b->cursor = nir_before_instr(&intrin->instr); + + nir_ssa_def *array_index = nir_ssa_for_src(b, intrin->src[0], 1); + if (nir_src_is_const(intrin->src[0]) || state->add_bounds_checks) + array_index = nir_umin(b, array_index, nir_imm_int(b, array_size - 1)); + + return nir_iadd_imm(b, array_index, surface_index); +} + +static nir_ssa_def * +build_index_offset_for_deref(nir_deref_instr *deref, + struct apply_pipeline_layout_state *state) +{ + nir_builder *b = &state->builder; + + nir_deref_instr *parent = nir_deref_instr_parent(deref); + if (parent) { + nir_ssa_def *addr = build_index_offset_for_deref(parent, state); + + b->cursor = nir_before_instr(&deref->instr); + return nir_explicit_io_address_from_deref(b, deref, addr, + nir_address_format_32bit_index_offset); + } + + nir_intrinsic_instr *load_desc = nir_src_as_intrinsic(deref->parent); + assert(load_desc->intrinsic == nir_intrinsic_load_vulkan_descriptor); + + nir_ssa_def *index = + build_index_for_res_reindex(nir_src_as_intrinsic(load_desc->src[0]), state); + + /* Return a 0 offset which will get picked up by the recursion */ + b->cursor = nir_before_instr(&deref->instr); + return nir_vec2(b, index, nir_imm_int(b, 0)); +} + +static bool +try_lower_direct_buffer_intrinsic(nir_intrinsic_instr *intrin, + struct apply_pipeline_layout_state *state) +{ + nir_builder *b = &state->builder; + + nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + if (deref->mode != nir_var_mem_ssbo) + return false; + + if (!nir_deref_find_descriptor(deref, state)) + return false; + + nir_ssa_def *addr = build_index_offset_for_deref(deref, state); + + b->cursor = nir_before_instr(&intrin->instr); + nir_lower_explicit_io_instr(b, intrin, addr, + nir_address_format_32bit_index_offset); + return true; +} + +static void +lower_direct_buffer_access(nir_function_impl *impl, + struct apply_pipeline_layout_state *state) +{ + nir_foreach_block(block, impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + switch (intrin->intrinsic) { + case nir_intrinsic_load_deref: + case nir_intrinsic_store_deref: + case nir_intrinsic_deref_atomic_add: + case nir_intrinsic_deref_atomic_imin: + case nir_intrinsic_deref_atomic_umin: + case nir_intrinsic_deref_atomic_imax: + case nir_intrinsic_deref_atomic_umax: + case nir_intrinsic_deref_atomic_and: + case nir_intrinsic_deref_atomic_or: + case nir_intrinsic_deref_atomic_xor: + case nir_intrinsic_deref_atomic_exchange: + case nir_intrinsic_deref_atomic_comp_swap: + case nir_intrinsic_deref_atomic_fmin: + case nir_intrinsic_deref_atomic_fmax: + case nir_intrinsic_deref_atomic_fcomp_swap: + try_lower_direct_buffer_intrinsic(intrin, state); + break; + + case nir_intrinsic_get_buffer_size: { + /* The get_buffer_size intrinsic always just takes a + * index/reindex intrinsic. + */ + if (!find_descriptor_for_index_src(intrin->src[0], state)) + break; + + nir_ssa_def *index = + build_index_for_res_reindex(nir_src_as_intrinsic(intrin->src[0]), + state); + nir_instr_rewrite_src(&intrin->instr, &intrin->src[0], + nir_src_for_ssa(index)); + _mesa_set_add(state->lowered_instrs, intrin); + } + + default: + break; + } + } + } +} + static void lower_res_index_intrinsic(nir_intrinsic_instr *intrin, struct apply_pipeline_layout_state *state) @@ -228,6 +402,9 @@ static void lower_get_buffer_size(nir_intrinsic_instr *intrin, struct apply_pipeline_layout_state *state) { + if (_mesa_set_search(state->lowered_instrs, intrin)) + return; + nir_builder *b = &state->builder; b->cursor = nir_before_instr(&intrin->instr); @@ -539,15 +716,16 @@ anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice, struct brw_stage_prog_data *prog_data, struct anv_pipeline_bind_map *map) { + void *mem_ctx = ralloc_context(NULL); + struct apply_pipeline_layout_state state = { .pdevice = pdevice, .shader = shader, .layout = layout, .add_bounds_checks = robust_buffer_access, + .lowered_instrs = _mesa_pointer_set_create(mem_ctx), }; - void *mem_ctx = ralloc_context(NULL); - for (unsigned s = 0; s < layout->num_sets; s++) { const unsigned count = layout->set[s].layout->binding_count; state.set[s].use_count = rzalloc_array(mem_ctx, uint8_t, count); @@ -735,6 +913,38 @@ anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice, if (!function->impl) continue; + /* Before we do the normal lowering, we look for any SSBO operations + * that we can lower to the BTI model and lower them up-front. The BTI + * model can perform better than the A64 model for a couple reasons: + * + * 1. 48-bit address calculations are potentially expensive and using + * the BTI model lets us simply compute 32-bit offsets and the + * hardware adds the 64-bit surface base address. + * + * 2. The BTI messages, because they use surface states, do bounds + * checking for us. With the A64 model, we have to do our own + * bounds checking and this means wider pointers and extra + * calculations and branching in the shader. + * + * The solution to both of these is to convert things to the BTI model + * opportunistically. The reason why we need to do this as a pre-pass + * is for two reasons: + * + * 1. The BTI model requires nir_address_format_32bit_index_offset + * pointers which are not the same type as the pointers needed for + * the A64 model. Because all our derefs are set up for the A64 + * model (in case we have variable pointers), we have to crawl all + * the way back to the vulkan_resource_index intrinsic and build a + * completely fresh index+offset calculation. + * + * 2. Because the variable-pointers-capable lowering that we do as part + * of apply_pipeline_layout_block is destructive (It really has to + * be to handle variable pointers properly), we've lost the deref + * information by the time we get to the load/store/atomic + * intrinsics in that pass. + */ + lower_direct_buffer_access(function->impl, &state); + nir_builder_init(&state.builder, function->impl); nir_foreach_block(block, function->impl) apply_pipeline_layout_block(block, &state); |