summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/compiler/nir/nir_intrinsics.py6
-rw-r--r--src/intel/compiler/brw_fs.cpp10
-rw-r--r--src/intel/compiler/brw_fs.h3
-rw-r--r--src/intel/compiler/brw_fs_nir.cpp181
-rw-r--r--src/intel/compiler/brw_nir.h5
-rw-r--r--src/intel/compiler/brw_nir_lower_image_load_store.c41
-rw-r--r--src/intel/vulkan/anv_nir_apply_pipeline_layout.c159
-rw-r--r--src/intel/vulkan/anv_pipeline.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp130
-rw-r--r--src/mesa/drivers/dri/i965/brw_program.c1
10 files changed, 371 insertions, 169 deletions
diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py
index d7184dadbbc..b06b38fc2ce 100644
--- a/src/compiler/nir/nir_intrinsics.py
+++ b/src/compiler/nir/nir_intrinsics.py
@@ -331,9 +331,9 @@ image("samples", dest_comp=1, flags=[CAN_ELIMINATE, CAN_REORDER])
# variable. The const index specifies which of the six parameters to load.
intrinsic("image_deref_load_param_intel", src_comp=[1], dest_comp=0,
indices=[BASE], flags=[CAN_ELIMINATE, CAN_REORDER])
-intrinsic("image_deref_load_raw_intel", src_comp=[1, 1], dest_comp=0,
- flags=[CAN_ELIMINATE])
-intrinsic("image_deref_store_raw_intel", src_comp=[1, 1, 0])
+image("load_raw_intel", src_comp=[1], dest_comp=0,
+ flags=[CAN_ELIMINATE])
+image("store_raw_intel", src_comp=[1, 0])
# Vulkan descriptor set intrinsics
#
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 58736503f9a..02a7a33c4d7 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -494,16 +494,14 @@ type_size_scalar(const struct glsl_type *type)
}
return size;
case GLSL_TYPE_SAMPLER:
- /* Samplers take up no register space, since they're baked in at
- * link time.
- */
- return 0;
case GLSL_TYPE_ATOMIC_UINT:
+ case GLSL_TYPE_IMAGE:
+ /* Samplers, atomics, and images take up no register space, since
+ * they're baked in at link time.
+ */
return 0;
case GLSL_TYPE_SUBROUTINE:
return 1;
- case GLSL_TYPE_IMAGE:
- return BRW_IMAGE_PARAM_SIZE;
case GLSL_TYPE_VOID:
case GLSL_TYPE_ERROR:
case GLSL_TYPE_INTERFACE:
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index 52220db2dc0..aba19d5ab2c 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -216,6 +216,8 @@ public:
nir_intrinsic_instr *instr);
void nir_emit_cs_intrinsic(const brw::fs_builder &bld,
nir_intrinsic_instr *instr);
+ fs_reg get_nir_image_intrinsic_image(const brw::fs_builder &bld,
+ nir_intrinsic_instr *instr);
void nir_emit_intrinsic(const brw::fs_builder &bld,
nir_intrinsic_instr *instr);
void nir_emit_tes_intrinsic(const brw::fs_builder &bld,
@@ -235,7 +237,6 @@ public:
fs_reg get_nir_src(const nir_src &src);
fs_reg get_nir_src_imm(const nir_src &src);
fs_reg get_nir_dest(const nir_dest &dest);
- fs_reg get_nir_image_deref(nir_deref_instr *deref);
fs_reg get_indirect_offset(nir_intrinsic_instr *instr);
void emit_percomp(const brw::fs_builder &bld, const fs_inst &inst,
unsigned wr_mask);
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index b2be91f9117..aaba0e2a693 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -1694,70 +1694,6 @@ fs_visitor::get_nir_dest(const nir_dest &dest)
}
}
-fs_reg
-fs_visitor::get_nir_image_deref(nir_deref_instr *deref)
-{
- fs_reg arr_offset = brw_imm_ud(0);
- unsigned array_size = BRW_IMAGE_PARAM_SIZE * 4;
- nir_deref_instr *head = deref;
- while (head->deref_type != nir_deref_type_var) {
- assert(head->deref_type == nir_deref_type_array);
-
- /* This level's element size is the previous level's array size */
- const unsigned elem_size = array_size;
-
- fs_reg index = retype(get_nir_src_imm(head->arr.index),
- BRW_REGISTER_TYPE_UD);
- if (arr_offset.file == BRW_IMMEDIATE_VALUE &&
- index.file == BRW_IMMEDIATE_VALUE) {
- arr_offset.ud += index.ud * elem_size;
- } else if (index.file == BRW_IMMEDIATE_VALUE) {
- bld.ADD(arr_offset, arr_offset, brw_imm_ud(index.ud * elem_size));
- } else {
- fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD);
- bld.MUL(tmp, index, brw_imm_ud(elem_size));
- bld.ADD(tmp, tmp, arr_offset);
- arr_offset = tmp;
- }
-
- head = nir_deref_instr_parent(head);
- assert(glsl_type_is_array(head->type));
- array_size = elem_size * glsl_get_length(head->type);
- }
-
- assert(head->deref_type == nir_deref_type_var);
- const unsigned max_arr_offset = array_size - (BRW_IMAGE_PARAM_SIZE * 4);
- fs_reg image(UNIFORM, head->var->data.driver_location / 4,
- BRW_REGISTER_TYPE_UD);
-
- if (arr_offset.file == BRW_IMMEDIATE_VALUE) {
- /* The offset is in bytes but we want it in dwords */
- return offset(image, bld, MIN2(arr_offset.ud, max_arr_offset) / 4);
- } else {
- /* Accessing an invalid surface index with the dataport can result
- * in a hang. According to the spec "if the index used to
- * select an individual element is negative or greater than or
- * equal to the size of the array, the results of the operation
- * are undefined but may not lead to termination" -- which is one
- * of the possible outcomes of the hang. Clamp the index to
- * prevent access outside of the array bounds.
- */
- bld.emit_minmax(arr_offset, arr_offset, brw_imm_ud(max_arr_offset),
- BRW_CONDITIONAL_L);
-
- /* Emit a pile of MOVs to load the uniform into a temporary. The
- * dead-code elimination pass will get rid of what we don't use.
- */
- fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, BRW_IMAGE_PARAM_SIZE);
- for (unsigned j = 0; j < BRW_IMAGE_PARAM_SIZE; j++) {
- bld.emit(SHADER_OPCODE_MOV_INDIRECT,
- offset(tmp, bld, j), offset(image, bld, j),
- arr_offset, brw_imm_ud(max_arr_offset + 4));
- }
- return tmp;
- }
-}
-
void
fs_visitor::emit_percomp(const fs_builder &bld, const fs_inst &inst,
unsigned wr_mask)
@@ -3847,6 +3783,43 @@ brw_cond_mod_for_nir_reduction_op(nir_op op)
}
}
+fs_reg
+fs_visitor::get_nir_image_intrinsic_image(const brw::fs_builder &bld,
+ nir_intrinsic_instr *instr)
+{
+ fs_reg image = retype(get_nir_src_imm(instr->src[0]), BRW_REGISTER_TYPE_UD);
+
+ if (stage_prog_data->binding_table.image_start > 0) {
+ if (image.file == BRW_IMMEDIATE_VALUE) {
+ image.d += stage_prog_data->binding_table.image_start;
+ } else {
+ bld.ADD(image, image,
+ brw_imm_d(stage_prog_data->binding_table.image_start));
+ }
+ }
+
+ return bld.emit_uniformize(image);
+}
+
+static unsigned
+image_intrinsic_coord_components(nir_intrinsic_instr *instr)
+{
+ switch (nir_intrinsic_image_dim(instr)) {
+ case GLSL_SAMPLER_DIM_1D:
+ return 1 + nir_intrinsic_image_array(instr);
+ case GLSL_SAMPLER_DIM_2D:
+ case GLSL_SAMPLER_DIM_RECT:
+ return 2 + nir_intrinsic_image_array(instr);
+ case GLSL_SAMPLER_DIM_3D:
+ case GLSL_SAMPLER_DIM_CUBE:
+ return 3;
+ case GLSL_SAMPLER_DIM_BUF:
+ return 1;
+ default:
+ unreachable("Invalid image dimension");
+ }
+}
+
void
fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr)
{
@@ -3855,40 +3828,37 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
dest = get_nir_dest(instr->dest);
switch (instr->intrinsic) {
- case nir_intrinsic_image_deref_load:
- case nir_intrinsic_image_deref_store:
- case nir_intrinsic_image_deref_atomic_add:
- case nir_intrinsic_image_deref_atomic_min:
- case nir_intrinsic_image_deref_atomic_max:
- case nir_intrinsic_image_deref_atomic_and:
- case nir_intrinsic_image_deref_atomic_or:
- case nir_intrinsic_image_deref_atomic_xor:
- case nir_intrinsic_image_deref_atomic_exchange:
- case nir_intrinsic_image_deref_atomic_comp_swap: {
+ case nir_intrinsic_image_load:
+ case nir_intrinsic_image_store:
+ case nir_intrinsic_image_atomic_add:
+ case nir_intrinsic_image_atomic_min:
+ case nir_intrinsic_image_atomic_max:
+ case nir_intrinsic_image_atomic_and:
+ case nir_intrinsic_image_atomic_or:
+ case nir_intrinsic_image_atomic_xor:
+ case nir_intrinsic_image_atomic_exchange:
+ case nir_intrinsic_image_atomic_comp_swap: {
if (stage == MESA_SHADER_FRAGMENT &&
- instr->intrinsic != nir_intrinsic_image_deref_load)
+ instr->intrinsic != nir_intrinsic_image_load)
brw_wm_prog_data(prog_data)->has_side_effects = true;
- /* Get the referenced image variable and type. */
- nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
- const glsl_type *type = deref->type;
-
/* Get some metadata from the image intrinsic. */
const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic];
- const unsigned dims = type->coordinate_components();
+ const unsigned dims = image_intrinsic_coord_components(instr);
+ const GLenum format = nir_intrinsic_format(instr);
const unsigned dest_components = nir_intrinsic_dest_components(instr);
/* Get the arguments of the image intrinsic. */
- const fs_reg image = get_nir_image_deref(deref);
+ const fs_reg image = get_nir_image_intrinsic_image(bld, instr);
const fs_reg coords = retype(get_nir_src(instr->src[1]),
BRW_REGISTER_TYPE_UD);
fs_reg tmp;
/* Emit an image load, store or atomic op. */
- if (instr->intrinsic == nir_intrinsic_image_deref_load) {
+ if (instr->intrinsic == nir_intrinsic_image_load) {
tmp = emit_typed_read(bld, image, coords, dims,
instr->num_components);
- } else if (instr->intrinsic == nir_intrinsic_image_deref_store) {
+ } else if (instr->intrinsic == nir_intrinsic_image_store) {
const fs_reg src0 = get_nir_src(instr->src[3]);
emit_typed_write(bld, image, coords, src0, dims,
instr->num_components);
@@ -3897,7 +3867,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
unsigned num_srcs = info->num_srcs;
switch (instr->intrinsic) {
- case nir_intrinsic_image_deref_atomic_add:
+ case nir_intrinsic_image_atomic_add:
assert(num_srcs == 4);
op = get_op_for_atomic_add(instr, 3);
@@ -3905,27 +3875,27 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
if (op != BRW_AOP_ADD)
num_srcs = 3;
break;
- case nir_intrinsic_image_deref_atomic_min:
- op = (get_image_base_type(type) == BRW_REGISTER_TYPE_D ?
- BRW_AOP_IMIN : BRW_AOP_UMIN);
+ case nir_intrinsic_image_atomic_min:
+ assert(format == GL_R32UI || format == GL_R32I);
+ op = (format == GL_R32I) ? BRW_AOP_IMIN : BRW_AOP_UMIN;
break;
- case nir_intrinsic_image_deref_atomic_max:
- op = (get_image_base_type(type) == BRW_REGISTER_TYPE_D ?
- BRW_AOP_IMAX : BRW_AOP_UMAX);
+ case nir_intrinsic_image_atomic_max:
+ assert(format == GL_R32UI || format == GL_R32I);
+ op = (format == GL_R32I) ? BRW_AOP_IMAX : BRW_AOP_UMAX;
break;
- case nir_intrinsic_image_deref_atomic_and:
+ case nir_intrinsic_image_atomic_and:
op = BRW_AOP_AND;
break;
- case nir_intrinsic_image_deref_atomic_or:
+ case nir_intrinsic_image_atomic_or:
op = BRW_AOP_OR;
break;
- case nir_intrinsic_image_deref_atomic_xor:
+ case nir_intrinsic_image_atomic_xor:
op = BRW_AOP_XOR;
break;
- case nir_intrinsic_image_deref_atomic_exchange:
+ case nir_intrinsic_image_atomic_exchange:
op = BRW_AOP_MOV;
break;
- case nir_intrinsic_image_deref_atomic_comp_swap:
+ case nir_intrinsic_image_atomic_comp_swap:
op = BRW_AOP_CMPWR;
break;
default:
@@ -3948,19 +3918,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
break;
}
- case nir_intrinsic_image_deref_load_param_intel: {
- nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
- const fs_reg image = get_nir_image_deref(deref);
- const fs_reg param = offset(image, bld, nir_intrinsic_base(instr) * 4);
- for (unsigned c = 0; c < instr->dest.ssa.num_components; ++c) {
- bld.MOV(offset(retype(dest, param.type), bld, c),
- offset(param, bld, c));
- }
- break;
- }
-
- case nir_intrinsic_image_deref_load_raw_intel: {
- const fs_reg image = get_nir_image_deref(nir_src_as_deref(instr->src[0]));
+ case nir_intrinsic_image_load_raw_intel: {
+ const fs_reg image = get_nir_image_intrinsic_image(bld, instr);
const fs_reg addr = retype(get_nir_src(instr->src[1]),
BRW_REGISTER_TYPE_UD);
@@ -3974,8 +3933,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
break;
}
- case nir_intrinsic_image_deref_store_raw_intel: {
- const fs_reg image = get_nir_image_deref(nir_src_as_deref(instr->src[0]));
+ case nir_intrinsic_image_store_raw_intel: {
+ const fs_reg image = get_nir_image_intrinsic_image(bld, instr);
const fs_reg addr = retype(get_nir_src(instr->src[1]),
BRW_REGISTER_TYPE_UD);
const fs_reg data = retype(get_nir_src(instr->src[2]),
@@ -4010,7 +3969,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
break;
}
- case nir_intrinsic_image_deref_samples:
+ case nir_intrinsic_image_samples:
/* The driver does not support multi-sampled images. */
bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), brw_imm_d(1));
break;
diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h
index 72a6ee8884a..50073265539 100644
--- a/src/intel/compiler/brw_nir.h
+++ b/src/intel/compiler/brw_nir.h
@@ -116,6 +116,8 @@ void brw_nir_lower_fs_outputs(nir_shader *nir);
bool brw_nir_lower_image_load_store(nir_shader *nir,
const struct gen_device_info *devinfo);
+void brw_nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin,
+ nir_ssa_def *index);
nir_shader *brw_postprocess_nir(nir_shader *nir,
const struct brw_compiler *compiler,
@@ -147,6 +149,9 @@ void brw_nir_setup_arb_uniforms(void *mem_ctx, nir_shader *shader,
struct gl_program *prog,
struct brw_stage_prog_data *stage_prog_data);
+void brw_nir_lower_glsl_images(nir_shader *shader,
+ const struct gl_program *prog);
+
void brw_nir_analyze_ubo_ranges(const struct brw_compiler *compiler,
nir_shader *nir,
const struct brw_vs_prog_key *vs_key,
diff --git a/src/intel/compiler/brw_nir_lower_image_load_store.c b/src/intel/compiler/brw_nir_lower_image_load_store.c
index 819fb440f2c..5eba9ddabd3 100644
--- a/src/intel/compiler/brw_nir_lower_image_load_store.c
+++ b/src/intel/compiler/brw_nir_lower_image_load_store.c
@@ -811,3 +811,44 @@ brw_nir_lower_image_load_store(nir_shader *shader,
return progress;
}
+
+void
+brw_nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin,
+ nir_ssa_def *index)
+{
+ nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
+ nir_variable *var = nir_deref_instr_get_variable(deref);
+
+ switch (intrin->intrinsic) {
+#define CASE(op) \
+ case nir_intrinsic_image_deref_##op: \
+ intrin->intrinsic = nir_intrinsic_image_##op; \
+ break;
+ CASE(load)
+ CASE(store)
+ CASE(atomic_add)
+ CASE(atomic_min)
+ CASE(atomic_max)
+ CASE(atomic_and)
+ CASE(atomic_or)
+ CASE(atomic_xor)
+ CASE(atomic_exchange)
+ CASE(atomic_comp_swap)
+ CASE(atomic_fadd)
+ CASE(size)
+ CASE(samples)
+ CASE(load_raw_intel)
+ CASE(store_raw_intel)
+#undef CASE
+ default:
+ unreachable("Unhanded image intrinsic");
+ }
+
+ nir_intrinsic_set_image_dim(intrin, glsl_get_sampler_dim(deref->type));
+ nir_intrinsic_set_image_array(intrin, glsl_sampler_type_is_array(deref->type));
+ nir_intrinsic_set_access(intrin, var->data.image.access);
+ nir_intrinsic_set_format(intrin, var->data.image.format);
+
+ nir_instr_rewrite_src(&intrin->instr, &intrin->src[0],
+ nir_src_for_ssa(index));
+}
diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
index 84a664826e8..583b5a17cc6 100644
--- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
+++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
@@ -24,6 +24,7 @@
#include "anv_nir.h"
#include "program/prog_parameter.h"
#include "nir/nir_builder.h"
+#include "compiler/brw_nir.h"
struct apply_pipeline_layout_state {
nir_shader *shader;
@@ -32,6 +33,8 @@ struct apply_pipeline_layout_state {
struct anv_pipeline_layout *layout;
bool add_bounds_checks;
+ unsigned first_image_uniform;
+
bool uses_constants;
uint8_t constants_offset;
struct {
@@ -99,6 +102,9 @@ get_used_bindings_block(nir_block *block,
case nir_intrinsic_image_deref_atomic_comp_swap:
case nir_intrinsic_image_deref_size:
case nir_intrinsic_image_deref_samples:
+ case nir_intrinsic_image_deref_load_param_intel:
+ case nir_intrinsic_image_deref_load_raw_intel:
+ case nir_intrinsic_image_deref_store_raw_intel:
add_deref_src_binding(state, intrin->src[0]);
break;
@@ -179,6 +185,63 @@ lower_res_reindex_intrinsic(nir_intrinsic_instr *intrin,
}
static void
+lower_image_intrinsic(nir_intrinsic_instr *intrin,
+ struct apply_pipeline_layout_state *state)
+{
+ nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
+ nir_variable *var = nir_deref_instr_get_variable(deref);
+
+ unsigned set = var->data.descriptor_set;
+ unsigned binding = var->data.binding;
+ unsigned array_size =
+ state->layout->set[set].layout->binding[binding].array_size;
+
+ nir_builder *b = &state->builder;
+ b->cursor = nir_before_instr(&intrin->instr);
+
+ nir_ssa_def *index = NULL;
+ if (deref->deref_type != nir_deref_type_var) {
+ assert(deref->deref_type == nir_deref_type_array);
+ index = nir_ssa_for_src(b, deref->arr.index, 1);
+ if (state->add_bounds_checks)
+ index = nir_umin(b, index, nir_imm_int(b, array_size - 1));
+ } else {
+ index = nir_imm_int(b, 0);
+ }
+
+ if (intrin->intrinsic == nir_intrinsic_image_deref_load_param_intel) {
+ b->cursor = nir_instr_remove(&intrin->instr);
+
+ nir_intrinsic_instr *load =
+ nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_uniform);
+
+ nir_intrinsic_set_base(load, state->first_image_uniform +
+ state->set[set].image_offsets[binding] *
+ BRW_IMAGE_PARAM_SIZE * 4);
+ nir_intrinsic_set_range(load, array_size * BRW_IMAGE_PARAM_SIZE * 4);
+
+ const unsigned param = nir_intrinsic_base(intrin);
+ nir_ssa_def *offset =
+ nir_imul(b, index, nir_imm_int(b, BRW_IMAGE_PARAM_SIZE * 4));
+ offset = nir_iadd(b, offset, nir_imm_int(b, param * 16));
+ load->src[0] = nir_src_for_ssa(offset);
+
+ load->num_components = intrin->dest.ssa.num_components;
+ nir_ssa_dest_init(&load->instr, &load->dest,
+ intrin->dest.ssa.num_components,
+ intrin->dest.ssa.bit_size, NULL);
+ nir_builder_instr_insert(b, &load->instr);
+
+ nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+ nir_src_for_ssa(&load->dest.ssa));
+ } else {
+ unsigned binding_offset = state->set[set].surface_offsets[binding];
+ index = nir_iadd(b, index, nir_imm_int(b, binding_offset));
+ brw_nir_rewrite_image_intrinsic(intrin, index);
+ }
+}
+
+static void
lower_load_constant(nir_intrinsic_instr *intrin,
struct apply_pipeline_layout_state *state)
{
@@ -318,6 +381,23 @@ apply_pipeline_layout_block(nir_block *block,
case nir_intrinsic_vulkan_resource_reindex:
lower_res_reindex_intrinsic(intrin, state);
break;
+ case nir_intrinsic_image_deref_load:
+ case nir_intrinsic_image_deref_store:
+ case nir_intrinsic_image_deref_atomic_add:
+ case nir_intrinsic_image_deref_atomic_min:
+ case nir_intrinsic_image_deref_atomic_max:
+ case nir_intrinsic_image_deref_atomic_and:
+ case nir_intrinsic_image_deref_atomic_or:
+ case nir_intrinsic_image_deref_atomic_xor:
+ case nir_intrinsic_image_deref_atomic_exchange:
+ case nir_intrinsic_image_deref_atomic_comp_swap:
+ case nir_intrinsic_image_deref_size:
+ case nir_intrinsic_image_deref_samples:
+ case nir_intrinsic_image_deref_load_param_intel:
+ case nir_intrinsic_image_deref_load_raw_intel:
+ case nir_intrinsic_image_deref_store_raw_intel:
+ lower_image_intrinsic(intrin, state);
+ break;
case nir_intrinsic_load_constant:
lower_load_constant(intrin, state);
break;
@@ -436,6 +516,39 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline,
}
}
+ unsigned image_uniform;
+ if (map->image_count > 0) {
+ assert(map->image_count <= MAX_IMAGES);
+ assert(shader->num_uniforms == prog_data->nr_params * 4);
+ state.first_image_uniform = shader->num_uniforms;
+ uint32_t *param = brw_stage_prog_data_add_params(prog_data,
+ map->image_count *
+ BRW_IMAGE_PARAM_SIZE);
+ struct anv_push_constants *null_data = NULL;
+ const struct brw_image_param *image_param = null_data->images;
+ for (uint32_t i = 0; i < map->image_count; i++) {
+ setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET,
+ (uintptr_t)&image_param->surface_idx, 1);
+ setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET,
+ (uintptr_t)image_param->offset, 2);
+ setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET,
+ (uintptr_t)image_param->size, 3);
+ setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET,
+ (uintptr_t)image_param->stride, 4);
+ setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET,
+ (uintptr_t)image_param->tiling, 3);
+ setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET,
+ (uintptr_t)image_param->swizzling, 2);
+
+ param += BRW_IMAGE_PARAM_SIZE;
+ image_param ++;
+ }
+ assert(param == prog_data->param + prog_data->nr_params);
+
+ shader->num_uniforms += map->image_count * BRW_IMAGE_PARAM_SIZE * 4;
+ assert(shader->num_uniforms == prog_data->nr_params * 4);
+ }
+
nir_foreach_variable(var, &shader->uniforms) {
const struct glsl_type *glsl_type = glsl_without_array(var->type);
@@ -479,51 +592,5 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline,
nir_metadata_dominance);
}
- if (map->image_count > 0) {
- assert(map->image_count <= MAX_IMAGES);
- nir_foreach_variable(var, &shader->uniforms) {
- if (glsl_type_is_image(var->type) ||
- (glsl_type_is_array(var->type) &&
- glsl_type_is_image(glsl_get_array_element(var->type)))) {
- /* Images are represented as uniform push constants and the actual
- * information required for reading/writing to/from the image is
- * storred in the uniform.
- */
- unsigned set = var->data.descriptor_set;
- unsigned binding = var->data.binding;
- unsigned image_index = state.set[set].image_offsets[binding];
-
- var->data.driver_location = shader->num_uniforms +
- image_index * BRW_IMAGE_PARAM_SIZE * 4;
- }
- }
-
- uint32_t *param = brw_stage_prog_data_add_params(prog_data,
- map->image_count *
- BRW_IMAGE_PARAM_SIZE);
- struct anv_push_constants *null_data = NULL;
- const struct brw_image_param *image_param = null_data->images;
- for (uint32_t i = 0; i < map->image_count; i++) {
- setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET,
- (uintptr_t)&image_param->surface_idx, 1);
- setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET,
- (uintptr_t)image_param->offset, 2);
- setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET,
- (uintptr_t)image_param->size, 3);
- setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET,
- (uintptr_t)image_param->stride, 4);
- setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET,
- (uintptr_t)image_param->tiling, 3);
- setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET,
- (uintptr_t)image_param->swizzling, 2);
-
- param += BRW_IMAGE_PARAM_SIZE;
- image_param ++;
- }
- assert(param == prog_data->param + prog_data->nr_params);
-
- shader->num_uniforms += map->image_count * BRW_IMAGE_PARAM_SIZE * 4;
- }
-
ralloc_free(mem_ctx);
}
diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index 19d59b7fbac..a3eb68769a2 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -523,6 +523,8 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
if (nir->info.num_ssbos > 0 || nir->info.num_images > 0)
pipeline->needs_data_cache = true;
+ NIR_PASS_V(nir, brw_nir_lower_image_load_store, compiler->devinfo);
+
/* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
if (layout) {
anv_nir_apply_pipeline_layout(pipeline, layout, nir, prog_data,
@@ -532,8 +534,6 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
if (nir->info.stage != MESA_SHADER_COMPUTE)
brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
- NIR_PASS_V(nir, brw_nir_lower_image_load_store, compiler->devinfo);
-
assert(nir->num_uniforms == prog_data->nr_params * 4);
stage->nir = nir;
diff --git a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
index 54f9f9b1a6b..8a560d9bac1 100644
--- a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
+++ b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
@@ -23,6 +23,7 @@
#include "compiler/brw_nir.h"
#include "compiler/glsl/ir_uniform.h"
+#include "compiler/nir/nir_builder.h"
#include "brw_program.h"
static void
@@ -267,3 +268,132 @@ brw_nir_setup_arb_uniforms(void *mem_ctx, nir_shader *shader,
stage_prog_data->param[4 * p + i] = BRW_PARAM_BUILTIN_ZERO;
}
}
+
+static nir_ssa_def *
+get_aoa_deref_offset(nir_builder *b,
+ nir_deref_instr *deref,
+ unsigned elem_size)
+{
+ unsigned array_size = elem_size;
+ nir_ssa_def *offset = nir_imm_int(b, 0);
+
+ while (deref->deref_type != nir_deref_type_var) {
+ assert(deref->deref_type == nir_deref_type_array);
+
+ /* This level's element size is the previous level's array size */
+ nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
+ assert(deref->arr.index.ssa);
+ offset = nir_iadd(b, offset,
+ nir_imul(b, index, nir_imm_int(b, array_size)));
+
+ deref = nir_deref_instr_parent(deref);
+ assert(glsl_type_is_array(deref->type));
+ array_size *= glsl_get_length(deref->type);
+ }
+
+ /* Accessing an invalid surface index with the dataport can result in a
+ * hang. According to the spec "if the index used to select an individual
+ * element is negative or greater than or equal to the size of the array,
+ * the results of the operation are undefined but may not lead to
+ * termination" -- which is one of the possible outcomes of the hang.
+ * Clamp the index to prevent access outside of the array bounds.
+ */
+ return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size));
+}
+
+void
+brw_nir_lower_glsl_images(nir_shader *shader,
+ const struct gl_program *prog)
+{
+ /* We put image uniforms at the end */
+ nir_foreach_variable(var, &shader->uniforms) {
+ if (!var->type->contains_image())
+ continue;
+
+ /* GL Only allows arrays of arrays of images */
+ assert(var->type->without_array()->is_image());
+ const unsigned num_images = MAX2(1, var->type->arrays_of_arrays_size());
+
+ var->data.driver_location = shader->num_uniforms;
+ shader->num_uniforms += num_images * BRW_IMAGE_PARAM_SIZE * 4;
+ }
+
+ nir_function_impl *impl = nir_shader_get_entrypoint(shader);
+
+ nir_builder b;
+ nir_builder_init(&b, impl);
+
+ nir_foreach_block(block, impl) {
+ nir_foreach_instr_safe(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ switch (intrin->intrinsic) {
+ case nir_intrinsic_image_deref_load:
+ case nir_intrinsic_image_deref_store:
+ case nir_intrinsic_image_deref_atomic_add:
+ case nir_intrinsic_image_deref_atomic_min:
+ case nir_intrinsic_image_deref_atomic_max:
+ case nir_intrinsic_image_deref_atomic_and:
+ case nir_intrinsic_image_deref_atomic_or:
+ case nir_intrinsic_image_deref_atomic_xor:
+ case nir_intrinsic_image_deref_atomic_exchange:
+ case nir_intrinsic_image_deref_atomic_comp_swap:
+ case nir_intrinsic_image_deref_size:
+ case nir_intrinsic_image_deref_samples:
+ case nir_intrinsic_image_deref_load_raw_intel:
+ case nir_intrinsic_image_deref_store_raw_intel: {
+ nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
+ nir_variable *var = nir_deref_instr_get_variable(deref);
+ const unsigned num_images =
+ MAX2(1, var->type->arrays_of_arrays_size());
+
+ struct gl_uniform_storage *storage =
+ &prog->sh.data->UniformStorage[var->data.location];
+ const unsigned image_var_idx =
+ storage->opaque[shader->info.stage].index;
+
+ b.cursor = nir_before_instr(&intrin->instr);
+ nir_ssa_def *index = nir_iadd(&b, nir_imm_int(&b, image_var_idx),
+ get_aoa_deref_offset(&b, deref, 1));
+ brw_nir_rewrite_image_intrinsic(intrin, index);
+ break;
+ }
+
+ case nir_intrinsic_image_deref_load_param_intel: {
+ nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
+ nir_variable *var = nir_deref_instr_get_variable(deref);
+ const unsigned num_images =
+ MAX2(1, var->type->arrays_of_arrays_size());
+
+ b.cursor = nir_instr_remove(&intrin->instr);
+
+ const unsigned param = nir_intrinsic_base(intrin);
+ nir_ssa_def *offset =
+ get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4);
+ offset = nir_iadd(&b, offset, nir_imm_int(&b, param * 16));
+
+ nir_intrinsic_instr *load =
+ nir_intrinsic_instr_create(b.shader,
+ nir_intrinsic_load_uniform);
+ nir_intrinsic_set_base(load, var->data.driver_location);
+ nir_intrinsic_set_range(load, num_images * BRW_IMAGE_PARAM_SIZE * 4);
+ load->src[0] = nir_src_for_ssa(offset);
+ load->num_components = intrin->dest.ssa.num_components;
+ nir_ssa_dest_init(&load->instr, &load->dest,
+ intrin->dest.ssa.num_components,
+ intrin->dest.ssa.bit_size, NULL);
+ nir_builder_instr_insert(&b, &load->instr);
+
+ nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+ nir_src_for_ssa(&load->dest.ssa));
+ break;
+ }
+
+ default:
+ break;
+ }
+ }
+ }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index f5ebd3c3b05..041395ec4c0 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -140,6 +140,7 @@ brw_create_nir(struct brw_context *brw,
}
NIR_PASS_V(nir, brw_nir_lower_uniforms, is_scalar);
+ NIR_PASS_V(nir, brw_nir_lower_glsl_images, prog);
return nir;
}