summaryrefslogtreecommitdiffstats
path: root/src/intel/compiler
diff options
context:
space:
mode:
authorJason Ekstrand <[email protected]>2018-08-16 16:23:10 -0500
committerJason Ekstrand <[email protected]>2018-08-29 14:04:03 -0500
commit09f1de97a76a4990fd7ce909760f3c8933263b05 (patch)
tree778444bc1e61c92feb54c7d4bc968fda3edf6476 /src/intel/compiler
parent0de003be0363df74a18f463d0291bc8000d4c1dd (diff)
anv,i965: Lower away image derefs in the driver
Previously, the back-end compiler turn image access into magic uniform reads and there was a complex contract between back-end compiler and driver about setting up and filling out those params. As of this commit, both drivers now lower image_deref_load_param_intel intrinsics to load_uniform intrinsics controlled by the driver and lower the other image_deref_* intrinsics to image_* intrinsics which take an actual binding table index. There are still "magic" uniforms but they are now added and controlled entirely by the driver and that contract no longer spans components. This also has the side-effect of making most image use compile-time binding table indices. Previously, all image access pulled the binding table index from a uniform. Part of the reason for this was that the magic uniforms made it difficult to decouple binding table indices from the uniforms and, since they are indexed completely differently (especially in Vulkan), it was hard to pull them apart. Now that the driver is handling both, it's trivial to decouple the two and provide actual binding table indices. Shader-db results on Kaby Lake: total instructions in shared programs: 15166872 -> 15164293 (-0.02%) instructions in affected programs: 115834 -> 113255 (-2.23%) helped: 191 HURT: 0 total cycles in shared programs: 571311495 -> 571196465 (-0.02%) cycles in affected programs: 4757115 -> 4642085 (-2.42%) helped: 73 HURT: 67 total spills in shared programs: 10951 -> 10926 (-0.23%) spills in affected programs: 742 -> 717 (-3.37%) helped: 7 HURT: 0 total fills in shared programs: 22226 -> 22201 (-0.11%) fills in affected programs: 1146 -> 1121 (-2.18%) helped: 7 HURT: 0 Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src/intel/compiler')
-rw-r--r--src/intel/compiler/brw_fs.cpp10
-rw-r--r--src/intel/compiler/brw_fs.h3
-rw-r--r--src/intel/compiler/brw_fs_nir.cpp181
-rw-r--r--src/intel/compiler/brw_nir.h5
-rw-r--r--src/intel/compiler/brw_nir_lower_image_load_store.c41
5 files changed, 122 insertions, 118 deletions
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 58736503f9a..02a7a33c4d7 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -494,16 +494,14 @@ type_size_scalar(const struct glsl_type *type)
}
return size;
case GLSL_TYPE_SAMPLER:
- /* Samplers take up no register space, since they're baked in at
- * link time.
- */
- return 0;
case GLSL_TYPE_ATOMIC_UINT:
+ case GLSL_TYPE_IMAGE:
+ /* Samplers, atomics, and images take up no register space, since
+ * they're baked in at link time.
+ */
return 0;
case GLSL_TYPE_SUBROUTINE:
return 1;
- case GLSL_TYPE_IMAGE:
- return BRW_IMAGE_PARAM_SIZE;
case GLSL_TYPE_VOID:
case GLSL_TYPE_ERROR:
case GLSL_TYPE_INTERFACE:
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index 52220db2dc0..aba19d5ab2c 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -216,6 +216,8 @@ public:
nir_intrinsic_instr *instr);
void nir_emit_cs_intrinsic(const brw::fs_builder &bld,
nir_intrinsic_instr *instr);
+ fs_reg get_nir_image_intrinsic_image(const brw::fs_builder &bld,
+ nir_intrinsic_instr *instr);
void nir_emit_intrinsic(const brw::fs_builder &bld,
nir_intrinsic_instr *instr);
void nir_emit_tes_intrinsic(const brw::fs_builder &bld,
@@ -235,7 +237,6 @@ public:
fs_reg get_nir_src(const nir_src &src);
fs_reg get_nir_src_imm(const nir_src &src);
fs_reg get_nir_dest(const nir_dest &dest);
- fs_reg get_nir_image_deref(nir_deref_instr *deref);
fs_reg get_indirect_offset(nir_intrinsic_instr *instr);
void emit_percomp(const brw::fs_builder &bld, const fs_inst &inst,
unsigned wr_mask);
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index b2be91f9117..aaba0e2a693 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -1694,70 +1694,6 @@ fs_visitor::get_nir_dest(const nir_dest &dest)
}
}
-fs_reg
-fs_visitor::get_nir_image_deref(nir_deref_instr *deref)
-{
- fs_reg arr_offset = brw_imm_ud(0);
- unsigned array_size = BRW_IMAGE_PARAM_SIZE * 4;
- nir_deref_instr *head = deref;
- while (head->deref_type != nir_deref_type_var) {
- assert(head->deref_type == nir_deref_type_array);
-
- /* This level's element size is the previous level's array size */
- const unsigned elem_size = array_size;
-
- fs_reg index = retype(get_nir_src_imm(head->arr.index),
- BRW_REGISTER_TYPE_UD);
- if (arr_offset.file == BRW_IMMEDIATE_VALUE &&
- index.file == BRW_IMMEDIATE_VALUE) {
- arr_offset.ud += index.ud * elem_size;
- } else if (index.file == BRW_IMMEDIATE_VALUE) {
- bld.ADD(arr_offset, arr_offset, brw_imm_ud(index.ud * elem_size));
- } else {
- fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD);
- bld.MUL(tmp, index, brw_imm_ud(elem_size));
- bld.ADD(tmp, tmp, arr_offset);
- arr_offset = tmp;
- }
-
- head = nir_deref_instr_parent(head);
- assert(glsl_type_is_array(head->type));
- array_size = elem_size * glsl_get_length(head->type);
- }
-
- assert(head->deref_type == nir_deref_type_var);
- const unsigned max_arr_offset = array_size - (BRW_IMAGE_PARAM_SIZE * 4);
- fs_reg image(UNIFORM, head->var->data.driver_location / 4,
- BRW_REGISTER_TYPE_UD);
-
- if (arr_offset.file == BRW_IMMEDIATE_VALUE) {
- /* The offset is in bytes but we want it in dwords */
- return offset(image, bld, MIN2(arr_offset.ud, max_arr_offset) / 4);
- } else {
- /* Accessing an invalid surface index with the dataport can result
- * in a hang. According to the spec "if the index used to
- * select an individual element is negative or greater than or
- * equal to the size of the array, the results of the operation
- * are undefined but may not lead to termination" -- which is one
- * of the possible outcomes of the hang. Clamp the index to
- * prevent access outside of the array bounds.
- */
- bld.emit_minmax(arr_offset, arr_offset, brw_imm_ud(max_arr_offset),
- BRW_CONDITIONAL_L);
-
- /* Emit a pile of MOVs to load the uniform into a temporary. The
- * dead-code elimination pass will get rid of what we don't use.
- */
- fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, BRW_IMAGE_PARAM_SIZE);
- for (unsigned j = 0; j < BRW_IMAGE_PARAM_SIZE; j++) {
- bld.emit(SHADER_OPCODE_MOV_INDIRECT,
- offset(tmp, bld, j), offset(image, bld, j),
- arr_offset, brw_imm_ud(max_arr_offset + 4));
- }
- return tmp;
- }
-}
-
void
fs_visitor::emit_percomp(const fs_builder &bld, const fs_inst &inst,
unsigned wr_mask)
@@ -3847,6 +3783,43 @@ brw_cond_mod_for_nir_reduction_op(nir_op op)
}
}
+fs_reg
+fs_visitor::get_nir_image_intrinsic_image(const brw::fs_builder &bld,
+ nir_intrinsic_instr *instr)
+{
+ fs_reg image = retype(get_nir_src_imm(instr->src[0]), BRW_REGISTER_TYPE_UD);
+
+ if (stage_prog_data->binding_table.image_start > 0) {
+ if (image.file == BRW_IMMEDIATE_VALUE) {
+ image.d += stage_prog_data->binding_table.image_start;
+ } else {
+ bld.ADD(image, image,
+ brw_imm_d(stage_prog_data->binding_table.image_start));
+ }
+ }
+
+ return bld.emit_uniformize(image);
+}
+
+static unsigned
+image_intrinsic_coord_components(nir_intrinsic_instr *instr)
+{
+ switch (nir_intrinsic_image_dim(instr)) {
+ case GLSL_SAMPLER_DIM_1D:
+ return 1 + nir_intrinsic_image_array(instr);
+ case GLSL_SAMPLER_DIM_2D:
+ case GLSL_SAMPLER_DIM_RECT:
+ return 2 + nir_intrinsic_image_array(instr);
+ case GLSL_SAMPLER_DIM_3D:
+ case GLSL_SAMPLER_DIM_CUBE:
+ return 3;
+ case GLSL_SAMPLER_DIM_BUF:
+ return 1;
+ default:
+ unreachable("Invalid image dimension");
+ }
+}
+
void
fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr)
{
@@ -3855,40 +3828,37 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
dest = get_nir_dest(instr->dest);
switch (instr->intrinsic) {
- case nir_intrinsic_image_deref_load:
- case nir_intrinsic_image_deref_store:
- case nir_intrinsic_image_deref_atomic_add:
- case nir_intrinsic_image_deref_atomic_min:
- case nir_intrinsic_image_deref_atomic_max:
- case nir_intrinsic_image_deref_atomic_and:
- case nir_intrinsic_image_deref_atomic_or:
- case nir_intrinsic_image_deref_atomic_xor:
- case nir_intrinsic_image_deref_atomic_exchange:
- case nir_intrinsic_image_deref_atomic_comp_swap: {
+ case nir_intrinsic_image_load:
+ case nir_intrinsic_image_store:
+ case nir_intrinsic_image_atomic_add:
+ case nir_intrinsic_image_atomic_min:
+ case nir_intrinsic_image_atomic_max:
+ case nir_intrinsic_image_atomic_and:
+ case nir_intrinsic_image_atomic_or:
+ case nir_intrinsic_image_atomic_xor:
+ case nir_intrinsic_image_atomic_exchange:
+ case nir_intrinsic_image_atomic_comp_swap: {
if (stage == MESA_SHADER_FRAGMENT &&
- instr->intrinsic != nir_intrinsic_image_deref_load)
+ instr->intrinsic != nir_intrinsic_image_load)
brw_wm_prog_data(prog_data)->has_side_effects = true;
- /* Get the referenced image variable and type. */
- nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
- const glsl_type *type = deref->type;
-
/* Get some metadata from the image intrinsic. */
const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic];
- const unsigned dims = type->coordinate_components();
+ const unsigned dims = image_intrinsic_coord_components(instr);
+ const GLenum format = nir_intrinsic_format(instr);
const unsigned dest_components = nir_intrinsic_dest_components(instr);
/* Get the arguments of the image intrinsic. */
- const fs_reg image = get_nir_image_deref(deref);
+ const fs_reg image = get_nir_image_intrinsic_image(bld, instr);
const fs_reg coords = retype(get_nir_src(instr->src[1]),
BRW_REGISTER_TYPE_UD);
fs_reg tmp;
/* Emit an image load, store or atomic op. */
- if (instr->intrinsic == nir_intrinsic_image_deref_load) {
+ if (instr->intrinsic == nir_intrinsic_image_load) {
tmp = emit_typed_read(bld, image, coords, dims,
instr->num_components);
- } else if (instr->intrinsic == nir_intrinsic_image_deref_store) {
+ } else if (instr->intrinsic == nir_intrinsic_image_store) {
const fs_reg src0 = get_nir_src(instr->src[3]);
emit_typed_write(bld, image, coords, src0, dims,
instr->num_components);
@@ -3897,7 +3867,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
unsigned num_srcs = info->num_srcs;
switch (instr->intrinsic) {
- case nir_intrinsic_image_deref_atomic_add:
+ case nir_intrinsic_image_atomic_add:
assert(num_srcs == 4);
op = get_op_for_atomic_add(instr, 3);
@@ -3905,27 +3875,27 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
if (op != BRW_AOP_ADD)
num_srcs = 3;
break;
- case nir_intrinsic_image_deref_atomic_min:
- op = (get_image_base_type(type) == BRW_REGISTER_TYPE_D ?
- BRW_AOP_IMIN : BRW_AOP_UMIN);
+ case nir_intrinsic_image_atomic_min:
+ assert(format == GL_R32UI || format == GL_R32I);
+ op = (format == GL_R32I) ? BRW_AOP_IMIN : BRW_AOP_UMIN;
break;
- case nir_intrinsic_image_deref_atomic_max:
- op = (get_image_base_type(type) == BRW_REGISTER_TYPE_D ?
- BRW_AOP_IMAX : BRW_AOP_UMAX);
+ case nir_intrinsic_image_atomic_max:
+ assert(format == GL_R32UI || format == GL_R32I);
+ op = (format == GL_R32I) ? BRW_AOP_IMAX : BRW_AOP_UMAX;
break;
- case nir_intrinsic_image_deref_atomic_and:
+ case nir_intrinsic_image_atomic_and:
op = BRW_AOP_AND;
break;
- case nir_intrinsic_image_deref_atomic_or:
+ case nir_intrinsic_image_atomic_or:
op = BRW_AOP_OR;
break;
- case nir_intrinsic_image_deref_atomic_xor:
+ case nir_intrinsic_image_atomic_xor:
op = BRW_AOP_XOR;
break;
- case nir_intrinsic_image_deref_atomic_exchange:
+ case nir_intrinsic_image_atomic_exchange:
op = BRW_AOP_MOV;
break;
- case nir_intrinsic_image_deref_atomic_comp_swap:
+ case nir_intrinsic_image_atomic_comp_swap:
op = BRW_AOP_CMPWR;
break;
default:
@@ -3948,19 +3918,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
break;
}
- case nir_intrinsic_image_deref_load_param_intel: {
- nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
- const fs_reg image = get_nir_image_deref(deref);
- const fs_reg param = offset(image, bld, nir_intrinsic_base(instr) * 4);
- for (unsigned c = 0; c < instr->dest.ssa.num_components; ++c) {
- bld.MOV(offset(retype(dest, param.type), bld, c),
- offset(param, bld, c));
- }
- break;
- }
-
- case nir_intrinsic_image_deref_load_raw_intel: {
- const fs_reg image = get_nir_image_deref(nir_src_as_deref(instr->src[0]));
+ case nir_intrinsic_image_load_raw_intel: {
+ const fs_reg image = get_nir_image_intrinsic_image(bld, instr);
const fs_reg addr = retype(get_nir_src(instr->src[1]),
BRW_REGISTER_TYPE_UD);
@@ -3974,8 +3933,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
break;
}
- case nir_intrinsic_image_deref_store_raw_intel: {
- const fs_reg image = get_nir_image_deref(nir_src_as_deref(instr->src[0]));
+ case nir_intrinsic_image_store_raw_intel: {
+ const fs_reg image = get_nir_image_intrinsic_image(bld, instr);
const fs_reg addr = retype(get_nir_src(instr->src[1]),
BRW_REGISTER_TYPE_UD);
const fs_reg data = retype(get_nir_src(instr->src[2]),
@@ -4010,7 +3969,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
break;
}
- case nir_intrinsic_image_deref_samples:
+ case nir_intrinsic_image_samples:
/* The driver does not support multi-sampled images. */
bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), brw_imm_d(1));
break;
diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h
index 72a6ee8884a..50073265539 100644
--- a/src/intel/compiler/brw_nir.h
+++ b/src/intel/compiler/brw_nir.h
@@ -116,6 +116,8 @@ void brw_nir_lower_fs_outputs(nir_shader *nir);
bool brw_nir_lower_image_load_store(nir_shader *nir,
const struct gen_device_info *devinfo);
+void brw_nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin,
+ nir_ssa_def *index);
nir_shader *brw_postprocess_nir(nir_shader *nir,
const struct brw_compiler *compiler,
@@ -147,6 +149,9 @@ void brw_nir_setup_arb_uniforms(void *mem_ctx, nir_shader *shader,
struct gl_program *prog,
struct brw_stage_prog_data *stage_prog_data);
+void brw_nir_lower_glsl_images(nir_shader *shader,
+ const struct gl_program *prog);
+
void brw_nir_analyze_ubo_ranges(const struct brw_compiler *compiler,
nir_shader *nir,
const struct brw_vs_prog_key *vs_key,
diff --git a/src/intel/compiler/brw_nir_lower_image_load_store.c b/src/intel/compiler/brw_nir_lower_image_load_store.c
index 819fb440f2c..5eba9ddabd3 100644
--- a/src/intel/compiler/brw_nir_lower_image_load_store.c
+++ b/src/intel/compiler/brw_nir_lower_image_load_store.c
@@ -811,3 +811,44 @@ brw_nir_lower_image_load_store(nir_shader *shader,
return progress;
}
+
+void
+brw_nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin,
+ nir_ssa_def *index)
+{
+ nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
+ nir_variable *var = nir_deref_instr_get_variable(deref);
+
+ switch (intrin->intrinsic) {
+#define CASE(op) \
+ case nir_intrinsic_image_deref_##op: \
+ intrin->intrinsic = nir_intrinsic_image_##op; \
+ break;
+ CASE(load)
+ CASE(store)
+ CASE(atomic_add)
+ CASE(atomic_min)
+ CASE(atomic_max)
+ CASE(atomic_and)
+ CASE(atomic_or)
+ CASE(atomic_xor)
+ CASE(atomic_exchange)
+ CASE(atomic_comp_swap)
+ CASE(atomic_fadd)
+ CASE(size)
+ CASE(samples)
+ CASE(load_raw_intel)
+ CASE(store_raw_intel)
+#undef CASE
+ default:
+ unreachable("Unhanded image intrinsic");
+ }
+
+ nir_intrinsic_set_image_dim(intrin, glsl_get_sampler_dim(deref->type));
+ nir_intrinsic_set_image_array(intrin, glsl_sampler_type_is_array(deref->type));
+ nir_intrinsic_set_access(intrin, var->data.image.access);
+ nir_intrinsic_set_format(intrin, var->data.image.format);
+
+ nir_instr_rewrite_src(&intrin->instr, &intrin->src[0],
+ nir_src_for_ssa(index));
+}