diff options
author | Jason Ekstrand <[email protected]> | 2018-08-16 16:23:10 -0500 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2018-08-29 14:04:03 -0500 |
commit | 09f1de97a76a4990fd7ce909760f3c8933263b05 (patch) | |
tree | 778444bc1e61c92feb54c7d4bc968fda3edf6476 /src/intel/vulkan | |
parent | 0de003be0363df74a18f463d0291bc8000d4c1dd (diff) |
anv,i965: Lower away image derefs in the driver
Previously, the back-end compiler turn image access into magic uniform
reads and there was a complex contract between back-end compiler and
driver about setting up and filling out those params. As of this
commit, both drivers now lower image_deref_load_param_intel intrinsics
to load_uniform intrinsics controlled by the driver and lower the other
image_deref_* intrinsics to image_* intrinsics which take an actual
binding table index. There are still "magic" uniforms but they are now
added and controlled entirely by the driver and that contract no longer
spans components.
This also has the side-effect of making most image use compile-time
binding table indices. Previously, all image access pulled the binding
table index from a uniform. Part of the reason for this was that the
magic uniforms made it difficult to decouple binding table indices from
the uniforms and, since they are indexed completely differently
(especially in Vulkan), it was hard to pull them apart. Now that the
driver is handling both, it's trivial to decouple the two and provide
actual binding table indices.
Shader-db results on Kaby Lake:
total instructions in shared programs: 15166872 -> 15164293 (-0.02%)
instructions in affected programs: 115834 -> 113255 (-2.23%)
helped: 191
HURT: 0
total cycles in shared programs: 571311495 -> 571196465 (-0.02%)
cycles in affected programs: 4757115 -> 4642085 (-2.42%)
helped: 73
HURT: 67
total spills in shared programs: 10951 -> 10926 (-0.23%)
spills in affected programs: 742 -> 717 (-3.37%)
helped: 7
HURT: 0
total fills in shared programs: 22226 -> 22201 (-0.11%)
fills in affected programs: 1146 -> 1121 (-2.18%)
helped: 7
HURT: 0
Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src/intel/vulkan')
-rw-r--r-- | src/intel/vulkan/anv_nir_apply_pipeline_layout.c | 159 | ||||
-rw-r--r-- | src/intel/vulkan/anv_pipeline.c | 4 |
2 files changed, 115 insertions, 48 deletions
diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c index 84a664826e8..583b5a17cc6 100644 --- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c +++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c @@ -24,6 +24,7 @@ #include "anv_nir.h" #include "program/prog_parameter.h" #include "nir/nir_builder.h" +#include "compiler/brw_nir.h" struct apply_pipeline_layout_state { nir_shader *shader; @@ -32,6 +33,8 @@ struct apply_pipeline_layout_state { struct anv_pipeline_layout *layout; bool add_bounds_checks; + unsigned first_image_uniform; + bool uses_constants; uint8_t constants_offset; struct { @@ -99,6 +102,9 @@ get_used_bindings_block(nir_block *block, case nir_intrinsic_image_deref_atomic_comp_swap: case nir_intrinsic_image_deref_size: case nir_intrinsic_image_deref_samples: + case nir_intrinsic_image_deref_load_param_intel: + case nir_intrinsic_image_deref_load_raw_intel: + case nir_intrinsic_image_deref_store_raw_intel: add_deref_src_binding(state, intrin->src[0]); break; @@ -179,6 +185,63 @@ lower_res_reindex_intrinsic(nir_intrinsic_instr *intrin, } static void +lower_image_intrinsic(nir_intrinsic_instr *intrin, + struct apply_pipeline_layout_state *state) +{ + nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + nir_variable *var = nir_deref_instr_get_variable(deref); + + unsigned set = var->data.descriptor_set; + unsigned binding = var->data.binding; + unsigned array_size = + state->layout->set[set].layout->binding[binding].array_size; + + nir_builder *b = &state->builder; + b->cursor = nir_before_instr(&intrin->instr); + + nir_ssa_def *index = NULL; + if (deref->deref_type != nir_deref_type_var) { + assert(deref->deref_type == nir_deref_type_array); + index = nir_ssa_for_src(b, deref->arr.index, 1); + if (state->add_bounds_checks) + index = nir_umin(b, index, nir_imm_int(b, array_size - 1)); + } else { + index = nir_imm_int(b, 0); + } + + if (intrin->intrinsic == nir_intrinsic_image_deref_load_param_intel) { + b->cursor = nir_instr_remove(&intrin->instr); + + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_uniform); + + nir_intrinsic_set_base(load, state->first_image_uniform + + state->set[set].image_offsets[binding] * + BRW_IMAGE_PARAM_SIZE * 4); + nir_intrinsic_set_range(load, array_size * BRW_IMAGE_PARAM_SIZE * 4); + + const unsigned param = nir_intrinsic_base(intrin); + nir_ssa_def *offset = + nir_imul(b, index, nir_imm_int(b, BRW_IMAGE_PARAM_SIZE * 4)); + offset = nir_iadd(b, offset, nir_imm_int(b, param * 16)); + load->src[0] = nir_src_for_ssa(offset); + + load->num_components = intrin->dest.ssa.num_components; + nir_ssa_dest_init(&load->instr, &load->dest, + intrin->dest.ssa.num_components, + intrin->dest.ssa.bit_size, NULL); + nir_builder_instr_insert(b, &load->instr); + + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_src_for_ssa(&load->dest.ssa)); + } else { + unsigned binding_offset = state->set[set].surface_offsets[binding]; + index = nir_iadd(b, index, nir_imm_int(b, binding_offset)); + brw_nir_rewrite_image_intrinsic(intrin, index); + } +} + +static void lower_load_constant(nir_intrinsic_instr *intrin, struct apply_pipeline_layout_state *state) { @@ -318,6 +381,23 @@ apply_pipeline_layout_block(nir_block *block, case nir_intrinsic_vulkan_resource_reindex: lower_res_reindex_intrinsic(intrin, state); break; + case nir_intrinsic_image_deref_load: + case nir_intrinsic_image_deref_store: + case nir_intrinsic_image_deref_atomic_add: + case nir_intrinsic_image_deref_atomic_min: + case nir_intrinsic_image_deref_atomic_max: + case nir_intrinsic_image_deref_atomic_and: + case nir_intrinsic_image_deref_atomic_or: + case nir_intrinsic_image_deref_atomic_xor: + case nir_intrinsic_image_deref_atomic_exchange: + case nir_intrinsic_image_deref_atomic_comp_swap: + case nir_intrinsic_image_deref_size: + case nir_intrinsic_image_deref_samples: + case nir_intrinsic_image_deref_load_param_intel: + case nir_intrinsic_image_deref_load_raw_intel: + case nir_intrinsic_image_deref_store_raw_intel: + lower_image_intrinsic(intrin, state); + break; case nir_intrinsic_load_constant: lower_load_constant(intrin, state); break; @@ -436,6 +516,39 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, } } + unsigned image_uniform; + if (map->image_count > 0) { + assert(map->image_count <= MAX_IMAGES); + assert(shader->num_uniforms == prog_data->nr_params * 4); + state.first_image_uniform = shader->num_uniforms; + uint32_t *param = brw_stage_prog_data_add_params(prog_data, + map->image_count * + BRW_IMAGE_PARAM_SIZE); + struct anv_push_constants *null_data = NULL; + const struct brw_image_param *image_param = null_data->images; + for (uint32_t i = 0; i < map->image_count; i++) { + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET, + (uintptr_t)&image_param->surface_idx, 1); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET, + (uintptr_t)image_param->offset, 2); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET, + (uintptr_t)image_param->size, 3); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET, + (uintptr_t)image_param->stride, 4); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET, + (uintptr_t)image_param->tiling, 3); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, + (uintptr_t)image_param->swizzling, 2); + + param += BRW_IMAGE_PARAM_SIZE; + image_param ++; + } + assert(param == prog_data->param + prog_data->nr_params); + + shader->num_uniforms += map->image_count * BRW_IMAGE_PARAM_SIZE * 4; + assert(shader->num_uniforms == prog_data->nr_params * 4); + } + nir_foreach_variable(var, &shader->uniforms) { const struct glsl_type *glsl_type = glsl_without_array(var->type); @@ -479,51 +592,5 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, nir_metadata_dominance); } - if (map->image_count > 0) { - assert(map->image_count <= MAX_IMAGES); - nir_foreach_variable(var, &shader->uniforms) { - if (glsl_type_is_image(var->type) || - (glsl_type_is_array(var->type) && - glsl_type_is_image(glsl_get_array_element(var->type)))) { - /* Images are represented as uniform push constants and the actual - * information required for reading/writing to/from the image is - * storred in the uniform. - */ - unsigned set = var->data.descriptor_set; - unsigned binding = var->data.binding; - unsigned image_index = state.set[set].image_offsets[binding]; - - var->data.driver_location = shader->num_uniforms + - image_index * BRW_IMAGE_PARAM_SIZE * 4; - } - } - - uint32_t *param = brw_stage_prog_data_add_params(prog_data, - map->image_count * - BRW_IMAGE_PARAM_SIZE); - struct anv_push_constants *null_data = NULL; - const struct brw_image_param *image_param = null_data->images; - for (uint32_t i = 0; i < map->image_count; i++) { - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET, - (uintptr_t)&image_param->surface_idx, 1); - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET, - (uintptr_t)image_param->offset, 2); - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET, - (uintptr_t)image_param->size, 3); - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET, - (uintptr_t)image_param->stride, 4); - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET, - (uintptr_t)image_param->tiling, 3); - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, - (uintptr_t)image_param->swizzling, 2); - - param += BRW_IMAGE_PARAM_SIZE; - image_param ++; - } - assert(param == prog_data->param + prog_data->nr_params); - - shader->num_uniforms += map->image_count * BRW_IMAGE_PARAM_SIZE * 4; - } - ralloc_free(mem_ctx); } diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 19d59b7fbac..a3eb68769a2 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -523,6 +523,8 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline, if (nir->info.num_ssbos > 0 || nir->info.num_images > 0) pipeline->needs_data_cache = true; + NIR_PASS_V(nir, brw_nir_lower_image_load_store, compiler->devinfo); + /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */ if (layout) { anv_nir_apply_pipeline_layout(pipeline, layout, nir, prog_data, @@ -532,8 +534,6 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline, if (nir->info.stage != MESA_SHADER_COMPUTE) brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges); - NIR_PASS_V(nir, brw_nir_lower_image_load_store, compiler->devinfo); - assert(nir->num_uniforms == prog_data->nr_params * 4); stage->nir = nir; |