diff options
author | Jason Ekstrand <[email protected]> | 2018-08-16 16:23:10 -0500 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2018-08-29 14:04:03 -0500 |
commit | 09f1de97a76a4990fd7ce909760f3c8933263b05 (patch) | |
tree | 778444bc1e61c92feb54c7d4bc968fda3edf6476 /src/mesa | |
parent | 0de003be0363df74a18f463d0291bc8000d4c1dd (diff) |
anv,i965: Lower away image derefs in the driver
Previously, the back-end compiler turn image access into magic uniform
reads and there was a complex contract between back-end compiler and
driver about setting up and filling out those params. As of this
commit, both drivers now lower image_deref_load_param_intel intrinsics
to load_uniform intrinsics controlled by the driver and lower the other
image_deref_* intrinsics to image_* intrinsics which take an actual
binding table index. There are still "magic" uniforms but they are now
added and controlled entirely by the driver and that contract no longer
spans components.
This also has the side-effect of making most image use compile-time
binding table indices. Previously, all image access pulled the binding
table index from a uniform. Part of the reason for this was that the
magic uniforms made it difficult to decouple binding table indices from
the uniforms and, since they are indexed completely differently
(especially in Vulkan), it was hard to pull them apart. Now that the
driver is handling both, it's trivial to decouple the two and provide
actual binding table indices.
Shader-db results on Kaby Lake:
total instructions in shared programs: 15166872 -> 15164293 (-0.02%)
instructions in affected programs: 115834 -> 113255 (-2.23%)
helped: 191
HURT: 0
total cycles in shared programs: 571311495 -> 571196465 (-0.02%)
cycles in affected programs: 4757115 -> 4642085 (-2.42%)
helped: 73
HURT: 67
total spills in shared programs: 10951 -> 10926 (-0.23%)
spills in affected programs: 742 -> 717 (-3.37%)
helped: 7
HURT: 0
total fills in shared programs: 22226 -> 22201 (-0.11%)
fills in affected programs: 1146 -> 1121 (-2.18%)
helped: 7
HURT: 0
Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src/mesa')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp | 130 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_program.c | 1 |
2 files changed, 131 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp index 54f9f9b1a6b..8a560d9bac1 100644 --- a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp +++ b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp @@ -23,6 +23,7 @@ #include "compiler/brw_nir.h" #include "compiler/glsl/ir_uniform.h" +#include "compiler/nir/nir_builder.h" #include "brw_program.h" static void @@ -267,3 +268,132 @@ brw_nir_setup_arb_uniforms(void *mem_ctx, nir_shader *shader, stage_prog_data->param[4 * p + i] = BRW_PARAM_BUILTIN_ZERO; } } + +static nir_ssa_def * +get_aoa_deref_offset(nir_builder *b, + nir_deref_instr *deref, + unsigned elem_size) +{ + unsigned array_size = elem_size; + nir_ssa_def *offset = nir_imm_int(b, 0); + + while (deref->deref_type != nir_deref_type_var) { + assert(deref->deref_type == nir_deref_type_array); + + /* This level's element size is the previous level's array size */ + nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1); + assert(deref->arr.index.ssa); + offset = nir_iadd(b, offset, + nir_imul(b, index, nir_imm_int(b, array_size))); + + deref = nir_deref_instr_parent(deref); + assert(glsl_type_is_array(deref->type)); + array_size *= glsl_get_length(deref->type); + } + + /* Accessing an invalid surface index with the dataport can result in a + * hang. According to the spec "if the index used to select an individual + * element is negative or greater than or equal to the size of the array, + * the results of the operation are undefined but may not lead to + * termination" -- which is one of the possible outcomes of the hang. + * Clamp the index to prevent access outside of the array bounds. + */ + return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size)); +} + +void +brw_nir_lower_glsl_images(nir_shader *shader, + const struct gl_program *prog) +{ + /* We put image uniforms at the end */ + nir_foreach_variable(var, &shader->uniforms) { + if (!var->type->contains_image()) + continue; + + /* GL Only allows arrays of arrays of images */ + assert(var->type->without_array()->is_image()); + const unsigned num_images = MAX2(1, var->type->arrays_of_arrays_size()); + + var->data.driver_location = shader->num_uniforms; + shader->num_uniforms += num_images * BRW_IMAGE_PARAM_SIZE * 4; + } + + nir_function_impl *impl = nir_shader_get_entrypoint(shader); + + nir_builder b; + nir_builder_init(&b, impl); + + nir_foreach_block(block, impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + switch (intrin->intrinsic) { + case nir_intrinsic_image_deref_load: + case nir_intrinsic_image_deref_store: + case nir_intrinsic_image_deref_atomic_add: + case nir_intrinsic_image_deref_atomic_min: + case nir_intrinsic_image_deref_atomic_max: + case nir_intrinsic_image_deref_atomic_and: + case nir_intrinsic_image_deref_atomic_or: + case nir_intrinsic_image_deref_atomic_xor: + case nir_intrinsic_image_deref_atomic_exchange: + case nir_intrinsic_image_deref_atomic_comp_swap: + case nir_intrinsic_image_deref_size: + case nir_intrinsic_image_deref_samples: + case nir_intrinsic_image_deref_load_raw_intel: + case nir_intrinsic_image_deref_store_raw_intel: { + nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + nir_variable *var = nir_deref_instr_get_variable(deref); + const unsigned num_images = + MAX2(1, var->type->arrays_of_arrays_size()); + + struct gl_uniform_storage *storage = + &prog->sh.data->UniformStorage[var->data.location]; + const unsigned image_var_idx = + storage->opaque[shader->info.stage].index; + + b.cursor = nir_before_instr(&intrin->instr); + nir_ssa_def *index = nir_iadd(&b, nir_imm_int(&b, image_var_idx), + get_aoa_deref_offset(&b, deref, 1)); + brw_nir_rewrite_image_intrinsic(intrin, index); + break; + } + + case nir_intrinsic_image_deref_load_param_intel: { + nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + nir_variable *var = nir_deref_instr_get_variable(deref); + const unsigned num_images = + MAX2(1, var->type->arrays_of_arrays_size()); + + b.cursor = nir_instr_remove(&intrin->instr); + + const unsigned param = nir_intrinsic_base(intrin); + nir_ssa_def *offset = + get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4); + offset = nir_iadd(&b, offset, nir_imm_int(&b, param * 16)); + + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(b.shader, + nir_intrinsic_load_uniform); + nir_intrinsic_set_base(load, var->data.driver_location); + nir_intrinsic_set_range(load, num_images * BRW_IMAGE_PARAM_SIZE * 4); + load->src[0] = nir_src_for_ssa(offset); + load->num_components = intrin->dest.ssa.num_components; + nir_ssa_dest_init(&load->instr, &load->dest, + intrin->dest.ssa.num_components, + intrin->dest.ssa.bit_size, NULL); + nir_builder_instr_insert(&b, &load->instr); + + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_src_for_ssa(&load->dest.ssa)); + break; + } + + default: + break; + } + } + } +} diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index f5ebd3c3b05..041395ec4c0 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -140,6 +140,7 @@ brw_create_nir(struct brw_context *brw, } NIR_PASS_V(nir, brw_nir_lower_uniforms, is_scalar); + NIR_PASS_V(nir, brw_nir_lower_glsl_images, prog); return nir; } |