summaryrefslogtreecommitdiffstats
path: root/src/intel/vulkan
diff options
context:
space:
mode:
authorJason Ekstrand <[email protected]>2018-08-16 16:23:10 -0500
committerJason Ekstrand <[email protected]>2018-08-29 14:04:03 -0500
commit09f1de97a76a4990fd7ce909760f3c8933263b05 (patch)
tree778444bc1e61c92feb54c7d4bc968fda3edf6476 /src/intel/vulkan
parent0de003be0363df74a18f463d0291bc8000d4c1dd (diff)
anv,i965: Lower away image derefs in the driver
Previously, the back-end compiler turn image access into magic uniform reads and there was a complex contract between back-end compiler and driver about setting up and filling out those params. As of this commit, both drivers now lower image_deref_load_param_intel intrinsics to load_uniform intrinsics controlled by the driver and lower the other image_deref_* intrinsics to image_* intrinsics which take an actual binding table index. There are still "magic" uniforms but they are now added and controlled entirely by the driver and that contract no longer spans components. This also has the side-effect of making most image use compile-time binding table indices. Previously, all image access pulled the binding table index from a uniform. Part of the reason for this was that the magic uniforms made it difficult to decouple binding table indices from the uniforms and, since they are indexed completely differently (especially in Vulkan), it was hard to pull them apart. Now that the driver is handling both, it's trivial to decouple the two and provide actual binding table indices. Shader-db results on Kaby Lake: total instructions in shared programs: 15166872 -> 15164293 (-0.02%) instructions in affected programs: 115834 -> 113255 (-2.23%) helped: 191 HURT: 0 total cycles in shared programs: 571311495 -> 571196465 (-0.02%) cycles in affected programs: 4757115 -> 4642085 (-2.42%) helped: 73 HURT: 67 total spills in shared programs: 10951 -> 10926 (-0.23%) spills in affected programs: 742 -> 717 (-3.37%) helped: 7 HURT: 0 total fills in shared programs: 22226 -> 22201 (-0.11%) fills in affected programs: 1146 -> 1121 (-2.18%) helped: 7 HURT: 0 Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src/intel/vulkan')
-rw-r--r--src/intel/vulkan/anv_nir_apply_pipeline_layout.c159
-rw-r--r--src/intel/vulkan/anv_pipeline.c4
2 files changed, 115 insertions, 48 deletions
diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
index 84a664826e8..583b5a17cc6 100644
--- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
+++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
@@ -24,6 +24,7 @@
#include "anv_nir.h"
#include "program/prog_parameter.h"
#include "nir/nir_builder.h"
+#include "compiler/brw_nir.h"
struct apply_pipeline_layout_state {
nir_shader *shader;
@@ -32,6 +33,8 @@ struct apply_pipeline_layout_state {
struct anv_pipeline_layout *layout;
bool add_bounds_checks;
+ unsigned first_image_uniform;
+
bool uses_constants;
uint8_t constants_offset;
struct {
@@ -99,6 +102,9 @@ get_used_bindings_block(nir_block *block,
case nir_intrinsic_image_deref_atomic_comp_swap:
case nir_intrinsic_image_deref_size:
case nir_intrinsic_image_deref_samples:
+ case nir_intrinsic_image_deref_load_param_intel:
+ case nir_intrinsic_image_deref_load_raw_intel:
+ case nir_intrinsic_image_deref_store_raw_intel:
add_deref_src_binding(state, intrin->src[0]);
break;
@@ -179,6 +185,63 @@ lower_res_reindex_intrinsic(nir_intrinsic_instr *intrin,
}
static void
+lower_image_intrinsic(nir_intrinsic_instr *intrin,
+ struct apply_pipeline_layout_state *state)
+{
+ nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
+ nir_variable *var = nir_deref_instr_get_variable(deref);
+
+ unsigned set = var->data.descriptor_set;
+ unsigned binding = var->data.binding;
+ unsigned array_size =
+ state->layout->set[set].layout->binding[binding].array_size;
+
+ nir_builder *b = &state->builder;
+ b->cursor = nir_before_instr(&intrin->instr);
+
+ nir_ssa_def *index = NULL;
+ if (deref->deref_type != nir_deref_type_var) {
+ assert(deref->deref_type == nir_deref_type_array);
+ index = nir_ssa_for_src(b, deref->arr.index, 1);
+ if (state->add_bounds_checks)
+ index = nir_umin(b, index, nir_imm_int(b, array_size - 1));
+ } else {
+ index = nir_imm_int(b, 0);
+ }
+
+ if (intrin->intrinsic == nir_intrinsic_image_deref_load_param_intel) {
+ b->cursor = nir_instr_remove(&intrin->instr);
+
+ nir_intrinsic_instr *load =
+ nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_uniform);
+
+ nir_intrinsic_set_base(load, state->first_image_uniform +
+ state->set[set].image_offsets[binding] *
+ BRW_IMAGE_PARAM_SIZE * 4);
+ nir_intrinsic_set_range(load, array_size * BRW_IMAGE_PARAM_SIZE * 4);
+
+ const unsigned param = nir_intrinsic_base(intrin);
+ nir_ssa_def *offset =
+ nir_imul(b, index, nir_imm_int(b, BRW_IMAGE_PARAM_SIZE * 4));
+ offset = nir_iadd(b, offset, nir_imm_int(b, param * 16));
+ load->src[0] = nir_src_for_ssa(offset);
+
+ load->num_components = intrin->dest.ssa.num_components;
+ nir_ssa_dest_init(&load->instr, &load->dest,
+ intrin->dest.ssa.num_components,
+ intrin->dest.ssa.bit_size, NULL);
+ nir_builder_instr_insert(b, &load->instr);
+
+ nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+ nir_src_for_ssa(&load->dest.ssa));
+ } else {
+ unsigned binding_offset = state->set[set].surface_offsets[binding];
+ index = nir_iadd(b, index, nir_imm_int(b, binding_offset));
+ brw_nir_rewrite_image_intrinsic(intrin, index);
+ }
+}
+
+static void
lower_load_constant(nir_intrinsic_instr *intrin,
struct apply_pipeline_layout_state *state)
{
@@ -318,6 +381,23 @@ apply_pipeline_layout_block(nir_block *block,
case nir_intrinsic_vulkan_resource_reindex:
lower_res_reindex_intrinsic(intrin, state);
break;
+ case nir_intrinsic_image_deref_load:
+ case nir_intrinsic_image_deref_store:
+ case nir_intrinsic_image_deref_atomic_add:
+ case nir_intrinsic_image_deref_atomic_min:
+ case nir_intrinsic_image_deref_atomic_max:
+ case nir_intrinsic_image_deref_atomic_and:
+ case nir_intrinsic_image_deref_atomic_or:
+ case nir_intrinsic_image_deref_atomic_xor:
+ case nir_intrinsic_image_deref_atomic_exchange:
+ case nir_intrinsic_image_deref_atomic_comp_swap:
+ case nir_intrinsic_image_deref_size:
+ case nir_intrinsic_image_deref_samples:
+ case nir_intrinsic_image_deref_load_param_intel:
+ case nir_intrinsic_image_deref_load_raw_intel:
+ case nir_intrinsic_image_deref_store_raw_intel:
+ lower_image_intrinsic(intrin, state);
+ break;
case nir_intrinsic_load_constant:
lower_load_constant(intrin, state);
break;
@@ -436,6 +516,39 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline,
}
}
+ unsigned image_uniform;
+ if (map->image_count > 0) {
+ assert(map->image_count <= MAX_IMAGES);
+ assert(shader->num_uniforms == prog_data->nr_params * 4);
+ state.first_image_uniform = shader->num_uniforms;
+ uint32_t *param = brw_stage_prog_data_add_params(prog_data,
+ map->image_count *
+ BRW_IMAGE_PARAM_SIZE);
+ struct anv_push_constants *null_data = NULL;
+ const struct brw_image_param *image_param = null_data->images;
+ for (uint32_t i = 0; i < map->image_count; i++) {
+ setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET,
+ (uintptr_t)&image_param->surface_idx, 1);
+ setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET,
+ (uintptr_t)image_param->offset, 2);
+ setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET,
+ (uintptr_t)image_param->size, 3);
+ setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET,
+ (uintptr_t)image_param->stride, 4);
+ setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET,
+ (uintptr_t)image_param->tiling, 3);
+ setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET,
+ (uintptr_t)image_param->swizzling, 2);
+
+ param += BRW_IMAGE_PARAM_SIZE;
+ image_param ++;
+ }
+ assert(param == prog_data->param + prog_data->nr_params);
+
+ shader->num_uniforms += map->image_count * BRW_IMAGE_PARAM_SIZE * 4;
+ assert(shader->num_uniforms == prog_data->nr_params * 4);
+ }
+
nir_foreach_variable(var, &shader->uniforms) {
const struct glsl_type *glsl_type = glsl_without_array(var->type);
@@ -479,51 +592,5 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline,
nir_metadata_dominance);
}
- if (map->image_count > 0) {
- assert(map->image_count <= MAX_IMAGES);
- nir_foreach_variable(var, &shader->uniforms) {
- if (glsl_type_is_image(var->type) ||
- (glsl_type_is_array(var->type) &&
- glsl_type_is_image(glsl_get_array_element(var->type)))) {
- /* Images are represented as uniform push constants and the actual
- * information required for reading/writing to/from the image is
- * storred in the uniform.
- */
- unsigned set = var->data.descriptor_set;
- unsigned binding = var->data.binding;
- unsigned image_index = state.set[set].image_offsets[binding];
-
- var->data.driver_location = shader->num_uniforms +
- image_index * BRW_IMAGE_PARAM_SIZE * 4;
- }
- }
-
- uint32_t *param = brw_stage_prog_data_add_params(prog_data,
- map->image_count *
- BRW_IMAGE_PARAM_SIZE);
- struct anv_push_constants *null_data = NULL;
- const struct brw_image_param *image_param = null_data->images;
- for (uint32_t i = 0; i < map->image_count; i++) {
- setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET,
- (uintptr_t)&image_param->surface_idx, 1);
- setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET,
- (uintptr_t)image_param->offset, 2);
- setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET,
- (uintptr_t)image_param->size, 3);
- setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET,
- (uintptr_t)image_param->stride, 4);
- setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET,
- (uintptr_t)image_param->tiling, 3);
- setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET,
- (uintptr_t)image_param->swizzling, 2);
-
- param += BRW_IMAGE_PARAM_SIZE;
- image_param ++;
- }
- assert(param == prog_data->param + prog_data->nr_params);
-
- shader->num_uniforms += map->image_count * BRW_IMAGE_PARAM_SIZE * 4;
- }
-
ralloc_free(mem_ctx);
}
diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index 19d59b7fbac..a3eb68769a2 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -523,6 +523,8 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
if (nir->info.num_ssbos > 0 || nir->info.num_images > 0)
pipeline->needs_data_cache = true;
+ NIR_PASS_V(nir, brw_nir_lower_image_load_store, compiler->devinfo);
+
/* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
if (layout) {
anv_nir_apply_pipeline_layout(pipeline, layout, nir, prog_data,
@@ -532,8 +534,6 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
if (nir->info.stage != MESA_SHADER_COMPUTE)
brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
- NIR_PASS_V(nir, brw_nir_lower_image_load_store, compiler->devinfo);
-
assert(nir->num_uniforms == prog_data->nr_params * 4);
stage->nir = nir;