summaryrefslogtreecommitdiffstats
path: root/src/mesa
diff options
context:
space:
mode:
authorJason Ekstrand <[email protected]>2018-08-16 16:23:10 -0500
committerJason Ekstrand <[email protected]>2018-08-29 14:04:03 -0500
commit09f1de97a76a4990fd7ce909760f3c8933263b05 (patch)
tree778444bc1e61c92feb54c7d4bc968fda3edf6476 /src/mesa
parent0de003be0363df74a18f463d0291bc8000d4c1dd (diff)
anv,i965: Lower away image derefs in the driver
Previously, the back-end compiler turn image access into magic uniform reads and there was a complex contract between back-end compiler and driver about setting up and filling out those params. As of this commit, both drivers now lower image_deref_load_param_intel intrinsics to load_uniform intrinsics controlled by the driver and lower the other image_deref_* intrinsics to image_* intrinsics which take an actual binding table index. There are still "magic" uniforms but they are now added and controlled entirely by the driver and that contract no longer spans components. This also has the side-effect of making most image use compile-time binding table indices. Previously, all image access pulled the binding table index from a uniform. Part of the reason for this was that the magic uniforms made it difficult to decouple binding table indices from the uniforms and, since they are indexed completely differently (especially in Vulkan), it was hard to pull them apart. Now that the driver is handling both, it's trivial to decouple the two and provide actual binding table indices. Shader-db results on Kaby Lake: total instructions in shared programs: 15166872 -> 15164293 (-0.02%) instructions in affected programs: 115834 -> 113255 (-2.23%) helped: 191 HURT: 0 total cycles in shared programs: 571311495 -> 571196465 (-0.02%) cycles in affected programs: 4757115 -> 4642085 (-2.42%) helped: 73 HURT: 67 total spills in shared programs: 10951 -> 10926 (-0.23%) spills in affected programs: 742 -> 717 (-3.37%) helped: 7 HURT: 0 total fills in shared programs: 22226 -> 22201 (-0.11%) fills in affected programs: 1146 -> 1121 (-2.18%) helped: 7 HURT: 0 Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src/mesa')
-rw-r--r--src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp130
-rw-r--r--src/mesa/drivers/dri/i965/brw_program.c1
2 files changed, 131 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
index 54f9f9b1a6b..8a560d9bac1 100644
--- a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
+++ b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
@@ -23,6 +23,7 @@
#include "compiler/brw_nir.h"
#include "compiler/glsl/ir_uniform.h"
+#include "compiler/nir/nir_builder.h"
#include "brw_program.h"
static void
@@ -267,3 +268,132 @@ brw_nir_setup_arb_uniforms(void *mem_ctx, nir_shader *shader,
stage_prog_data->param[4 * p + i] = BRW_PARAM_BUILTIN_ZERO;
}
}
+
+static nir_ssa_def *
+get_aoa_deref_offset(nir_builder *b,
+ nir_deref_instr *deref,
+ unsigned elem_size)
+{
+ unsigned array_size = elem_size;
+ nir_ssa_def *offset = nir_imm_int(b, 0);
+
+ while (deref->deref_type != nir_deref_type_var) {
+ assert(deref->deref_type == nir_deref_type_array);
+
+ /* This level's element size is the previous level's array size */
+ nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
+ assert(deref->arr.index.ssa);
+ offset = nir_iadd(b, offset,
+ nir_imul(b, index, nir_imm_int(b, array_size)));
+
+ deref = nir_deref_instr_parent(deref);
+ assert(glsl_type_is_array(deref->type));
+ array_size *= glsl_get_length(deref->type);
+ }
+
+ /* Accessing an invalid surface index with the dataport can result in a
+ * hang. According to the spec "if the index used to select an individual
+ * element is negative or greater than or equal to the size of the array,
+ * the results of the operation are undefined but may not lead to
+ * termination" -- which is one of the possible outcomes of the hang.
+ * Clamp the index to prevent access outside of the array bounds.
+ */
+ return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size));
+}
+
+void
+brw_nir_lower_glsl_images(nir_shader *shader,
+ const struct gl_program *prog)
+{
+ /* We put image uniforms at the end */
+ nir_foreach_variable(var, &shader->uniforms) {
+ if (!var->type->contains_image())
+ continue;
+
+ /* GL Only allows arrays of arrays of images */
+ assert(var->type->without_array()->is_image());
+ const unsigned num_images = MAX2(1, var->type->arrays_of_arrays_size());
+
+ var->data.driver_location = shader->num_uniforms;
+ shader->num_uniforms += num_images * BRW_IMAGE_PARAM_SIZE * 4;
+ }
+
+ nir_function_impl *impl = nir_shader_get_entrypoint(shader);
+
+ nir_builder b;
+ nir_builder_init(&b, impl);
+
+ nir_foreach_block(block, impl) {
+ nir_foreach_instr_safe(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ switch (intrin->intrinsic) {
+ case nir_intrinsic_image_deref_load:
+ case nir_intrinsic_image_deref_store:
+ case nir_intrinsic_image_deref_atomic_add:
+ case nir_intrinsic_image_deref_atomic_min:
+ case nir_intrinsic_image_deref_atomic_max:
+ case nir_intrinsic_image_deref_atomic_and:
+ case nir_intrinsic_image_deref_atomic_or:
+ case nir_intrinsic_image_deref_atomic_xor:
+ case nir_intrinsic_image_deref_atomic_exchange:
+ case nir_intrinsic_image_deref_atomic_comp_swap:
+ case nir_intrinsic_image_deref_size:
+ case nir_intrinsic_image_deref_samples:
+ case nir_intrinsic_image_deref_load_raw_intel:
+ case nir_intrinsic_image_deref_store_raw_intel: {
+ nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
+ nir_variable *var = nir_deref_instr_get_variable(deref);
+ const unsigned num_images =
+ MAX2(1, var->type->arrays_of_arrays_size());
+
+ struct gl_uniform_storage *storage =
+ &prog->sh.data->UniformStorage[var->data.location];
+ const unsigned image_var_idx =
+ storage->opaque[shader->info.stage].index;
+
+ b.cursor = nir_before_instr(&intrin->instr);
+ nir_ssa_def *index = nir_iadd(&b, nir_imm_int(&b, image_var_idx),
+ get_aoa_deref_offset(&b, deref, 1));
+ brw_nir_rewrite_image_intrinsic(intrin, index);
+ break;
+ }
+
+ case nir_intrinsic_image_deref_load_param_intel: {
+ nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
+ nir_variable *var = nir_deref_instr_get_variable(deref);
+ const unsigned num_images =
+ MAX2(1, var->type->arrays_of_arrays_size());
+
+ b.cursor = nir_instr_remove(&intrin->instr);
+
+ const unsigned param = nir_intrinsic_base(intrin);
+ nir_ssa_def *offset =
+ get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4);
+ offset = nir_iadd(&b, offset, nir_imm_int(&b, param * 16));
+
+ nir_intrinsic_instr *load =
+ nir_intrinsic_instr_create(b.shader,
+ nir_intrinsic_load_uniform);
+ nir_intrinsic_set_base(load, var->data.driver_location);
+ nir_intrinsic_set_range(load, num_images * BRW_IMAGE_PARAM_SIZE * 4);
+ load->src[0] = nir_src_for_ssa(offset);
+ load->num_components = intrin->dest.ssa.num_components;
+ nir_ssa_dest_init(&load->instr, &load->dest,
+ intrin->dest.ssa.num_components,
+ intrin->dest.ssa.bit_size, NULL);
+ nir_builder_instr_insert(&b, &load->instr);
+
+ nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+ nir_src_for_ssa(&load->dest.ssa));
+ break;
+ }
+
+ default:
+ break;
+ }
+ }
+ }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index f5ebd3c3b05..041395ec4c0 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -140,6 +140,7 @@ brw_create_nir(struct brw_context *brw,
}
NIR_PASS_V(nir, brw_nir_lower_uniforms, is_scalar);
+ NIR_PASS_V(nir, brw_nir_lower_glsl_images, prog);
return nir;
}