aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason Ekstrand <[email protected]>2015-11-25 14:14:05 -0800
committerJason Ekstrand <[email protected]>2015-12-10 12:25:16 -0800
commit78b81be627734ea7fa50ea246c07b0d4a3a1638a (patch)
tree10b0b098de5b3a111d076e9d8c5fca440fad45ad
parentf3970fad9e5b04e04de366a65fed5a30da618f9d (diff)
nir: Get rid of *_indirect variants of input/output load/store intrinsics
There is some special-casing needed in a competent back-end. However, they can do their special-casing easily enough based on whether or not the offset is a constant. In the mean time, having the *_indirect variants adds special cases a number of places where they don't need to be and, in general, only complicates things. To complicate matters, NIR had no way to convdert an indirect load/store to a direct one in the case that the indirect was a constant so we would still not really get what the back-ends wanted. The best solution seems to be to get rid of the *_indirect variants entirely. This commit is a bunch of different changes squashed together: - nir: Get rid of *_indirect variants of input/output load/store intrinsics - nir/glsl: Stop handling UBO/SSBO load/stores differently depending on indirect - nir/lower_io: Get rid of load/store_foo_indirect - i965/fs: Get rid of load/store_foo_indirect - i965/vec4: Get rid of load/store_foo_indirect - tgsi_to_nir: Get rid of load/store_foo_indirect - ir3/nir: Use the new unified io intrinsics - vc4: Do all uniform loads with byte offsets - vc4/nir: Use the new unified io intrinsics - vc4: Fix load_user_clip_plane crash - vc4: add missing src for store outputs - vc4: Fix state uniforms - nir/lower_clip: Update to the new load/store intrinsics - nir/lower_two_sided_color: Update to the new load intrinsic NIR and i965 changes are Reviewed-by: Kenneth Graunke <[email protected]> NIR indirect declarations and vc4 changes are Reviewed-by: Eric Anholt <[email protected]> ir3 changes are Reviewed-by: Rob Clark <[email protected]> NIR changes are Acked-by: Rob Clark <[email protected]>
-rw-r--r--src/gallium/auxiliary/nir/tgsi_to_nir.c52
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c79
-rw-r--r--src/gallium/drivers/vc4/vc4_nir_lower_blend.c2
-rw-r--r--src/gallium/drivers/vc4/vc4_nir_lower_io.c54
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c47
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.h2
-rw-r--r--src/glsl/nir/glsl_to_nir.cpp74
-rw-r--r--src/glsl/nir/nir.h2
-rw-r--r--src/glsl/nir/nir_intrinsics.h88
-rw-r--r--src/glsl/nir/nir_lower_clip.c3
-rw-r--r--src/glsl/nir/nir_lower_io.c113
-rw-r--r--src/glsl/nir/nir_lower_phis_to_scalar.c4
-rw-r--r--src/glsl/nir/nir_lower_two_sided_color.c2
-rw-r--r--src/glsl/nir/nir_print.c6
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_nir.cpp128
-rw-r--r--src/mesa/drivers/dri/i965/brw_nir.c45
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp7
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_nir.cpp95
19 files changed, 391 insertions, 414 deletions
diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index 5fef5423f82..5def6d3f32a 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -468,7 +468,7 @@ ttn_emit_immediate(struct ttn_compile *c)
nir_builder_instr_insert(b, &load_const->instr);
}
-static nir_src
+static nir_ssa_def *
ttn_src_for_indirect(struct ttn_compile *c, struct tgsi_ind_register *indirect);
/* generate either a constant or indirect deref chain for accessing an
@@ -487,7 +487,7 @@ ttn_array_deref(struct ttn_compile *c, nir_intrinsic_instr *instr,
if (indirect) {
arr->deref_array_type = nir_deref_array_type_indirect;
- arr->indirect = ttn_src_for_indirect(c, indirect);
+ arr->indirect = nir_src_for_ssa(ttn_src_for_indirect(c, indirect));
} else {
arr->deref_array_type = nir_deref_array_type_direct;
}
@@ -586,19 +586,14 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
switch (file) {
case TGSI_FILE_INPUT:
- op = indirect ? nir_intrinsic_load_input_indirect :
- nir_intrinsic_load_input;
+ op = nir_intrinsic_load_input;
assert(!dim);
break;
case TGSI_FILE_CONSTANT:
if (dim) {
- op = indirect ? nir_intrinsic_load_ubo_indirect :
- nir_intrinsic_load_ubo;
- /* convert index from vec4 to byte: */
- index *= 16;
+ op = nir_intrinsic_load_ubo;
} else {
- op = indirect ? nir_intrinsic_load_uniform_indirect :
- nir_intrinsic_load_uniform;
+ op = nir_intrinsic_load_uniform;
}
break;
default:
@@ -609,7 +604,6 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
load = nir_intrinsic_instr_create(b->shader, op);
load->num_components = 4;
- load->const_index[0] = index;
if (dim) {
if (dimind) {
load->src[srcn] =
@@ -622,17 +616,26 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
}
srcn++;
}
- if (indirect) {
- load->src[srcn] = ttn_src_for_indirect(c, indirect);
- if (dim) {
- assert(load->src[srcn].is_ssa);
- /* we also need to covert vec4 to byte here too: */
- load->src[srcn] =
- nir_src_for_ssa(nir_ishl(b, load->src[srcn].ssa,
- nir_imm_int(b, 4)));
+
+ nir_ssa_def *offset;
+ if (dim) {
+ /* UBO loads don't have a const_index[0] base offset. */
+ offset = nir_imm_int(b, index);
+ if (indirect) {
+ offset = nir_iadd(b, offset, ttn_src_for_indirect(c, indirect));
+ }
+ /* UBO offsets are in bytes, but TGSI gives them to us in vec4's */
+ offset = nir_ishl(b, offset, nir_imm_int(b, 4));
+ } else {
+ load->const_index[0] = index;
+ if (indirect) {
+ offset = ttn_src_for_indirect(c, indirect);
+ } else {
+ offset = nir_imm_int(b, 0);
}
- srcn++;
}
+ load->src[srcn++] = nir_src_for_ssa(offset);
+
nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
nir_builder_instr_insert(b, &load->instr);
@@ -648,7 +651,7 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
return src;
}
-static nir_src
+static nir_ssa_def *
ttn_src_for_indirect(struct ttn_compile *c, struct tgsi_ind_register *indirect)
{
nir_builder *b = &c->build;
@@ -660,7 +663,7 @@ ttn_src_for_indirect(struct ttn_compile *c, struct tgsi_ind_register *indirect)
indirect->File,
indirect->Index,
NULL, NULL, NULL);
- return nir_src_for_ssa(nir_imov_alu(b, src, 1));
+ return nir_imov_alu(b, src, 1);
}
static nir_alu_dest
@@ -729,7 +732,7 @@ ttn_get_dest(struct ttn_compile *c, struct tgsi_full_dst_register *tgsi_fdst)
if (tgsi_dst->Indirect && (tgsi_dst->File != TGSI_FILE_TEMPORARY)) {
nir_src *indirect = ralloc(c->build.shader, nir_src);
- *indirect = ttn_src_for_indirect(c, &tgsi_fdst->Indirect);
+ *indirect = nir_src_for_ssa(ttn_src_for_indirect(c, &tgsi_fdst->Indirect));
dest.dest.reg.indirect = indirect;
}
@@ -1927,9 +1930,10 @@ ttn_add_output_stores(struct ttn_compile *c)
nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output);
unsigned loc = var->data.driver_location + i;
store->num_components = 4;
- store->const_index[0] = loc;
store->src[0].reg.reg = c->output_regs[loc].reg;
store->src[0].reg.base_offset = c->output_regs[loc].offset;
+ store->const_index[0] = loc;
+ store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
nir_builder_instr_insert(b, &store->instr);
}
}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 2723959cb5f..eea5c5e28db 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -1218,6 +1218,7 @@ emit_intrinsic_load_ubo(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
{
struct ir3_block *b = ctx->block;
struct ir3_instruction *addr, *src0, *src1;
+ nir_const_value *const_offset;
/* UBO addresses are the first driver params: */
unsigned ubo = regid(ctx->so->first_driver_param + IR3_UBOS_OFF, 0);
unsigned off = intr->const_index[0];
@@ -1231,7 +1232,10 @@ emit_intrinsic_load_ubo(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
addr = create_uniform_indirect(ctx, ubo, get_addr(ctx, src0));
}
- if (intr->intrinsic == nir_intrinsic_load_ubo_indirect) {
+ const_offset = nir_src_as_const_value(intr->src[1]);
+ if (const_offset) {
+ off += const_offset->u[0];
+ } else {
/* For load_ubo_indirect, second src is indirect offset: */
src1 = get_src(ctx, &intr->src[1])[0];
@@ -1394,6 +1398,7 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
struct ir3_instruction **dst, **src;
struct ir3_block *b = ctx->block;
unsigned idx = intr->const_index[0];
+ nir_const_value *const_offset;
if (info->has_dest) {
dst = get_dst(ctx, &intr->dest, intr->num_components);
@@ -1403,43 +1408,49 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
switch (intr->intrinsic) {
case nir_intrinsic_load_uniform:
- for (int i = 0; i < intr->num_components; i++) {
- unsigned n = idx * 4 + i;
- dst[i] = create_uniform(ctx, n);
- }
- break;
- case nir_intrinsic_load_uniform_indirect:
- src = get_src(ctx, &intr->src[0]);
- for (int i = 0; i < intr->num_components; i++) {
- unsigned n = idx * 4 + i;
- dst[i] = create_uniform_indirect(ctx, n,
- get_addr(ctx, src[0]));
+ const_offset = nir_src_as_const_value(intr->src[0]);
+ if (const_offset) {
+ idx += const_offset->u[0];
+ for (int i = 0; i < intr->num_components; i++) {
+ unsigned n = idx * 4 + i;
+ dst[i] = create_uniform(ctx, n);
+ }
+ } else {
+ src = get_src(ctx, &intr->src[0]);
+ for (int i = 0; i < intr->num_components; i++) {
+ unsigned n = idx * 4 + i;
+ dst[i] = create_uniform_indirect(ctx, n,
+ get_addr(ctx, src[0]));
+ }
+ /* NOTE: if relative addressing is used, we set
+ * constlen in the compiler (to worst-case value)
+ * since we don't know in the assembler what the max
+ * addr reg value can be:
+ */
+ ctx->so->constlen = ctx->s->num_uniforms;
}
- /* NOTE: if relative addressing is used, we set constlen in
- * the compiler (to worst-case value) since we don't know in
- * the assembler what the max addr reg value can be:
- */
- ctx->so->constlen = ctx->s->num_uniforms;
break;
case nir_intrinsic_load_ubo:
- case nir_intrinsic_load_ubo_indirect:
emit_intrinsic_load_ubo(ctx, intr, dst);
break;
case nir_intrinsic_load_input:
- for (int i = 0; i < intr->num_components; i++) {
- unsigned n = idx * 4 + i;
- dst[i] = ctx->ir->inputs[n];
- }
- break;
- case nir_intrinsic_load_input_indirect:
- src = get_src(ctx, &intr->src[0]);
- struct ir3_instruction *collect =
- create_collect(b, ctx->ir->inputs, ctx->ir->ninputs);
- struct ir3_instruction *addr = get_addr(ctx, src[0]);
- for (int i = 0; i < intr->num_components; i++) {
- unsigned n = idx * 4 + i;
- dst[i] = create_indirect_load(ctx, ctx->ir->ninputs,
- n, addr, collect);
+ const_offset = nir_src_as_const_value(intr->src[0]);
+ if (const_offset) {
+ idx += const_offset->u[0];
+ for (int i = 0; i < intr->num_components; i++) {
+ unsigned n = idx * 4 + i;
+ dst[i] = ctx->ir->inputs[n];
+ }
+ } else {
+ src = get_src(ctx, &intr->src[0]);
+ struct ir3_instruction *collect =
+ create_collect(b, ctx->ir->inputs, ctx->ir->ninputs);
+ struct ir3_instruction *addr = get_addr(ctx, src[0]);
+ for (int i = 0; i < intr->num_components; i++) {
+ unsigned n = idx * 4 + i;
+ dst[i] = create_indirect_load(ctx, ctx->ir->ninputs,
+ n, addr, collect);
+ }
}
break;
case nir_intrinsic_load_var:
@@ -1449,6 +1460,10 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
emit_intrinisic_store_var(ctx, intr);
break;
case nir_intrinsic_store_output:
+ const_offset = nir_src_as_const_value(intr->src[1]);
+ compile_assert(ctx, const_offset != NULL);
+ idx += const_offset->u[0];
+
src = get_src(ctx, &intr->src[0]);
for (int i = 0; i < intr->num_components; i++) {
unsigned n = idx * 4 + i;
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
index 38676cff6b7..4b10cb7fe56 100644
--- a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
@@ -61,6 +61,7 @@ vc4_nir_get_dst_color(nir_builder *b, int sample)
nir_intrinsic_load_input);
load->num_components = 1;
load->const_index[0] = VC4_NIR_TLB_COLOR_READ_INPUT + sample;
+ load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
nir_ssa_dest_init(&load->instr, &load->dest, 1, NULL);
nir_builder_instr_insert(b, &load->instr);
return &load->dest.ssa;
@@ -612,6 +613,7 @@ vc4_nir_store_sample_mask(struct vc4_compile *c, nir_builder *b,
intr->const_index[0] = sample_mask->data.location;
intr->src[0] = nir_src_for_ssa(val);
+ intr->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
nir_builder_instr_insert(b, &intr->instr);
}
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_io.c b/src/gallium/drivers/vc4/vc4_nir_lower_io.c
index 72a514756fd..a46af77f370 100644
--- a/src/gallium/drivers/vc4/vc4_nir_lower_io.c
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_io.c
@@ -179,6 +179,12 @@ vc4_nir_lower_vertex_attr(struct vc4_compile *c, nir_builder *b,
/* All TGSI-to-NIR inputs are vec4. */
assert(intr->num_components == 4);
+ /* We only accept direct outputs and TGSI only ever gives them to us
+ * with an offset value of 0.
+ */
+ assert(nir_src_as_const_value(intr->src[0]) &&
+ nir_src_as_const_value(intr->src[0])->u[0] == 0);
+
/* Generate dword loads for the VPM values (Since these intrinsics may
* be reordered, the actual reads will be generated at the top of the
* shader by ntq_setup_inputs().
@@ -190,6 +196,7 @@ vc4_nir_lower_vertex_attr(struct vc4_compile *c, nir_builder *b,
nir_intrinsic_load_input);
intr_comp->num_components = 1;
intr_comp->const_index[0] = intr->const_index[0] * 4 + i;
+ intr_comp->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, NULL);
nir_builder_instr_insert(b, &intr_comp->instr);
@@ -245,6 +252,12 @@ vc4_nir_lower_fs_input(struct vc4_compile *c, nir_builder *b,
/* All TGSI-to-NIR inputs are vec4. */
assert(intr->num_components == 4);
+ /* We only accept direct inputs and TGSI only ever gives them to us
+ * with an offset value of 0.
+ */
+ assert(nir_src_as_const_value(intr->src[0]) &&
+ nir_src_as_const_value(intr->src[0])->u[0] == 0);
+
/* Generate scalar loads equivalent to the original VEC4. */
nir_ssa_def *dests[4];
for (unsigned i = 0; i < intr->num_components; i++) {
@@ -252,6 +265,8 @@ vc4_nir_lower_fs_input(struct vc4_compile *c, nir_builder *b,
nir_intrinsic_instr_create(c->s, nir_intrinsic_load_input);
intr_comp->num_components = 1;
intr_comp->const_index[0] = intr->const_index[0] * 4 + i;
+ intr_comp->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
+
nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, NULL);
nir_builder_instr_insert(b, &intr_comp->instr);
@@ -319,6 +334,12 @@ vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b,
/* All TGSI-to-NIR outputs are VEC4. */
assert(intr->num_components == 4);
+ /* We only accept direct outputs and TGSI only ever gives them to us
+ * with an offset value of 0.
+ */
+ assert(nir_src_as_const_value(intr->src[1]) &&
+ nir_src_as_const_value(intr->src[1])->u[0] == 0);
+
b->cursor = nir_before_instr(&intr->instr);
for (unsigned i = 0; i < intr->num_components; i++) {
@@ -330,6 +351,7 @@ vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b,
assert(intr->src[0].is_ssa);
intr_comp->src[0] =
nir_src_for_ssa(nir_channel(b, intr->src[0].ssa, i));
+ intr_comp->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
nir_builder_instr_insert(b, &intr_comp->instr);
}
@@ -340,8 +362,8 @@ static void
vc4_nir_lower_uniform(struct vc4_compile *c, nir_builder *b,
nir_intrinsic_instr *intr)
{
- /* All TGSI-to-NIR uniform loads are vec4, but we may create dword
- * loads in our lowering passes.
+ /* All TGSI-to-NIR uniform loads are vec4, but we need byte offsets
+ * in the backend.
*/
if (intr->num_components == 1)
return;
@@ -357,24 +379,23 @@ vc4_nir_lower_uniform(struct vc4_compile *c, nir_builder *b,
intr_comp->num_components = 1;
nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, NULL);
- if (intr->intrinsic == nir_intrinsic_load_uniform_indirect) {
- /* Convert the variable TGSI register index to a byte
- * offset.
+ /* Convert the uniform (not user_clip_plane) offset to bytes.
+ * If it happens to be a constant, constant-folding will clean
+ * up the shift for us.
+ */
+ if (intr->intrinsic == nir_intrinsic_load_uniform) {
+ /* Convert the base offset to bytes and add the
+ * component
*/
+ intr_comp->const_index[0] = (intr->const_index[0] * 16 + i * 4);
+
intr_comp->src[0] =
- nir_src_for_ssa(nir_ishl(b,
- intr->src[0].ssa,
+ nir_src_for_ssa(nir_ishl(b, intr->src[0].ssa,
nir_imm_int(b, 4)));
-
- /* Convert the offset to be a byte index, too. */
- intr_comp->const_index[0] = (intr->const_index[0] * 16 +
- i * 4);
} else {
- /* We want a dword index for non-indirect uniform
- * loads.
- */
- intr_comp->const_index[0] = (intr->const_index[0] * 4 +
- i);
+ assert(intr->intrinsic ==
+ nir_intrinsic_load_user_clip_plane);
+ intr_comp->const_index[0] = intr->const_index[0] * 4 + i;
}
dests[i] = &intr_comp->dest.ssa;
@@ -406,7 +427,6 @@ vc4_nir_lower_io_instr(struct vc4_compile *c, nir_builder *b,
break;
case nir_intrinsic_load_uniform:
- case nir_intrinsic_load_uniform_indirect:
case nir_intrinsic_load_user_clip_plane:
vc4_nir_lower_uniform(c, b, intr);
break;
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 31968bb5db9..caad05cb9f7 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -115,8 +115,9 @@ nir_ssa_def *vc4_nir_get_state_uniform(struct nir_builder *b,
nir_intrinsic_instr *intr =
nir_intrinsic_instr_create(b->shader,
nir_intrinsic_load_uniform);
- intr->const_index[0] = VC4_NIR_STATE_UNIFORM_OFFSET + contents;
+ intr->const_index[0] = (VC4_NIR_STATE_UNIFORM_OFFSET + contents) * 4;
intr->num_components = 1;
+ intr->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
nir_ssa_dest_init(&intr->instr, &intr->dest, 1, NULL);
nir_builder_instr_insert(b, &intr->instr);
return &intr->dest.ssa;
@@ -1516,6 +1517,8 @@ static void
ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
{
const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic];
+ nir_const_value *const_offset;
+ unsigned offset;
struct qreg *dest = NULL;
if (info->has_dest) {
@@ -1525,21 +1528,25 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
switch (instr->intrinsic) {
case nir_intrinsic_load_uniform:
assert(instr->num_components == 1);
- if (instr->const_index[0] < VC4_NIR_STATE_UNIFORM_OFFSET) {
- *dest = qir_uniform(c, QUNIFORM_UNIFORM,
- instr->const_index[0]);
+ const_offset = nir_src_as_const_value(instr->src[0]);
+ if (const_offset) {
+ offset = instr->const_index[0] + const_offset->u[0];
+ assert(offset % 4 == 0);
+ /* We need dwords */
+ offset = offset / 4;
+ if (offset < VC4_NIR_STATE_UNIFORM_OFFSET) {
+ *dest = qir_uniform(c, QUNIFORM_UNIFORM,
+ offset);
+ } else {
+ *dest = qir_uniform(c, offset -
+ VC4_NIR_STATE_UNIFORM_OFFSET,
+ 0);
+ }
} else {
- *dest = qir_uniform(c, instr->const_index[0] -
- VC4_NIR_STATE_UNIFORM_OFFSET,
- 0);
+ *dest = indirect_uniform_load(c, instr);
}
break;
- case nir_intrinsic_load_uniform_indirect:
- *dest = indirect_uniform_load(c, instr);
-
- break;
-
case nir_intrinsic_load_user_clip_plane:
*dest = qir_uniform(c, QUNIFORM_USER_CLIP_PLANE,
instr->const_index[0]);
@@ -1551,7 +1558,10 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
case nir_intrinsic_load_input:
assert(instr->num_components == 1);
+ const_offset = nir_src_as_const_value(instr->src[0]);
+ assert(const_offset && "vc4 doesn't support indirect inputs");
if (instr->const_index[0] >= VC4_NIR_TLB_COLOR_READ_INPUT) {
+ assert(const_offset->u[0] == 0);
/* Reads of the per-sample color need to be done in
* order.
*/
@@ -1565,17 +1575,22 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
}
*dest = c->color_reads[sample_index];
} else {
- *dest = c->inputs[instr->const_index[0]];
+ offset = instr->const_index[0] + const_offset->u[0];
+ *dest = c->inputs[offset];
}
break;
case nir_intrinsic_store_output:
+ const_offset = nir_src_as_const_value(instr->src[1]);
+ assert(const_offset && "vc4 doesn't support indirect outputs");
+ offset = instr->const_index[0] + const_offset->u[0];
+
/* MSAA color outputs are the only case where we have an
* output that's not lowered to being a store of a single 32
* bit value.
*/
if (c->stage == QSTAGE_FRAG && instr->num_components == 4) {
- assert(instr->const_index[0] == c->output_color_index);
+ assert(offset == c->output_color_index);
for (int i = 0; i < 4; i++) {
c->sample_colors[i] =
qir_MOV(c, ntq_get_src(c, instr->src[0],
@@ -1583,9 +1598,9 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
}
} else {
assert(instr->num_components == 1);
- c->outputs[instr->const_index[0]] =
+ c->outputs[offset] =
qir_MOV(c, ntq_get_src(c, instr->src[0], 0));
- c->num_outputs = MAX2(c->num_outputs, instr->const_index[0] + 1);
+ c->num_outputs = MAX2(c->num_outputs, offset + 1);
}
break;
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index d53095ed222..b875760a2ca 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -444,7 +444,7 @@ struct vc4_compile {
/* Special offset for nir_load_uniform values to get a QUNIFORM_*
* state-dependent value.
*/
-#define VC4_NIR_STATE_UNIFORM_OFFSET 2000000000
+#define VC4_NIR_STATE_UNIFORM_OFFSET 1000000000
struct vc4_compile *qir_compile_init(void);
void qir_compile_destroy(struct vc4_compile *c);
diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index fc0f4049941..db8b0cae814 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -885,24 +885,12 @@ nir_visitor::visit(ir_call *ir)
ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
assert(write_mask);
- /* Check if we need the indirect version */
- ir_constant *const_offset = offset->as_constant();
- if (!const_offset) {
- op = nir_intrinsic_store_ssbo_indirect;
- ralloc_free(instr);
- instr = nir_intrinsic_instr_create(shader, op);
- instr->src[2] = nir_src_for_ssa(evaluate_rvalue(offset));
- instr->const_index[0] = 0;
- } else {
- instr->const_index[0] = const_offset->value.u[0];
- }
-
- instr->const_index[1] = write_mask->value.u[0];
-
instr->src[0] = nir_src_for_ssa(evaluate_rvalue(val));
+ instr->src[1] = nir_src_for_ssa(evaluate_rvalue(block));
+ instr->src[2] = nir_src_for_ssa(evaluate_rvalue(offset));
+ instr->const_index[0] = write_mask->value.u[0];
instr->num_components = val->type->vector_elements;
- instr->src[1] = nir_src_for_ssa(evaluate_rvalue(block));
nir_builder_instr_insert(&b, &instr->instr);
break;
}
@@ -913,20 +901,8 @@ nir_visitor::visit(ir_call *ir)
param = param->get_next();
ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
- /* Check if we need the indirect version */
- ir_constant *const_offset = offset->as_constant();
- if (!const_offset) {
- op = nir_intrinsic_load_ssbo_indirect;
- ralloc_free(instr);
- instr = nir_intrinsic_instr_create(shader, op);
- instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset));
- instr->const_index[0] = 0;
- dest = &instr->dest;
- } else {
- instr->const_index[0] = const_offset->value.u[0];
- }
-
instr->src[0] = nir_src_for_ssa(evaluate_rvalue(block));
+ instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset));
const glsl_type *type = ir->return_deref->var->type;
instr->num_components = type->vector_elements;
@@ -1010,18 +986,8 @@ nir_visitor::visit(ir_call *ir)
exec_node *param = ir->actual_parameters.get_head();
ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
- /* Check if we need the indirect version */
- ir_constant *const_offset = offset->as_constant();
- if (!const_offset) {
- op = nir_intrinsic_load_shared_indirect;
- ralloc_free(instr);
- instr = nir_intrinsic_instr_create(shader, op);
- instr->src[0] = nir_src_for_ssa(evaluate_rvalue(offset));
- instr->const_index[0] = 0;
- dest = &instr->dest;
- } else {
- instr->const_index[0] = const_offset->value.u[0];
- }
+ instr->const_index[0] = 0;
+ instr->src[0] = nir_src_for_ssa(evaluate_rvalue(offset));
const glsl_type *type = ir->return_deref->var->type;
instr->num_components = type->vector_elements;
@@ -1044,17 +1010,8 @@ nir_visitor::visit(ir_call *ir)
ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
assert(write_mask);
- /* Check if we need the indirect version */
- ir_constant *const_offset = offset->as_constant();
- if (!const_offset) {
- op = nir_intrinsic_store_shared_indirect;
- ralloc_free(instr);
- instr = nir_intrinsic_instr_create(shader, op);
- instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset));
- instr->const_index[0] = 0;
- } else {
- instr->const_index[0] = const_offset->value.u[0];
- }
+ instr->const_index[0] = 0;
+ instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset));
instr->const_index[1] = write_mask->value.u[0];
@@ -1303,20 +1260,11 @@ nir_visitor::visit(ir_expression *ir)
/* Some special cases */
switch (ir->operation) {
case ir_binop_ubo_load: {
- ir_constant *const_index = ir->operands[1]->as_constant();
-
- nir_intrinsic_op op;
- if (const_index) {
- op = nir_intrinsic_load_ubo;
- } else {
- op = nir_intrinsic_load_ubo_indirect;
- }
- nir_intrinsic_instr *load = nir_intrinsic_instr_create(this->shader, op);
+ nir_intrinsic_instr *load =
+ nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_ubo);
load->num_components = ir->type->vector_elements;
- load->const_index[0] = const_index ? const_index->value.u[0] : 0; /* base offset */
load->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[0]));
- if (!const_index)
- load->src[1] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1]));
+ load->src[1] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1]));
add_instr(&load->instr, ir->type->vector_elements);
/*
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index e161b70fa18..2e72e66699c 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1969,7 +1969,7 @@ void nir_assign_var_locations(struct exec_list *var_list,
void nir_lower_io(nir_shader *shader,
nir_variable_mode mode,
int (*type_size)(const struct glsl_type *));
-nir_src *nir_get_io_indirect_src(nir_intrinsic_instr *instr);
+nir_src *nir_get_io_offset_src(nir_intrinsic_instr *instr);
nir_src *nir_get_io_vertex_index_src(nir_intrinsic_instr *instr);
void nir_lower_vars_to_ssa(nir_shader *shader);
diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h
index 6b6cb32096b..9811fb391de 100644
--- a/src/glsl/nir/nir_intrinsics.h
+++ b/src/glsl/nir/nir_intrinsics.h
@@ -255,56 +255,60 @@ SYSTEM_VALUE(num_work_groups, 3, 0)
SYSTEM_VALUE(helper_invocation, 1, 0)
/*
- * The format of the indices depends on the type of the load. For uniforms,
- * the first index is the base address and the second index is an offset that
- * should be added to the base address. (This way you can determine in the
- * back-end which variable is being accessed even in an array.) For inputs,
- * the one and only index corresponds to the attribute slot. UBO loads also
- * have a single index which is the base address to load from.
+ * Load operations pull data from some piece of GPU memory. All load
+ * operations operate in terms of offsets into some piece of theoretical
+ * memory. Loads from externally visible memory (UBO and SSBO) simply take a
+ * byte offset as a source. Loads from opaque memory (uniforms, inputs, etc.)
+ * take a base+offset pair where the base (const_index[0]) gives the location
+ * of the start of the variable being loaded and and the offset source is a
+ * offset into that variable.
*
- * UBO loads have a (possibly constant) source which is the UBO buffer index.
- * For each type of load, the _indirect variant has one additional source
- * (the second in the case of UBO's) that is the is an indirect to be added to
- * the constant address or base offset to compute the final offset.
+ * Some load operations such as UBO/SSBO load and per_vertex loads take an
+ * additional source to specify which UBO/SSBO/vertex to load from.
*
- * For vector backends, the address is in terms of one vec4, and so each array
- * element is +4 scalar components from the previous array element. For scalar
- * backends, the address is in terms of a single 4-byte float/int and arrays
- * elements begin immediately after the previous array element.
+ * The exact address type depends on the lowering pass that generates the
+ * load/store intrinsics. Typically, this is vec4 units for things such as
+ * varying slots and float units for fragment shader inputs. UBO and SSBO
+ * offsets are always in bytes.
*/
-#define LOAD(name, extra_srcs, indices, flags) \
- INTRINSIC(load_##name, extra_srcs, ARR(1), true, 0, 0, indices, flags) \
- INTRINSIC(load_##name##_indirect, extra_srcs + 1, ARR(1, 1), \
- true, 0, 0, indices, flags)
+#define LOAD(name, srcs, indices, flags) \
+ INTRINSIC(load_##name, srcs, ARR(1, 1, 1, 1), true, 0, 0, indices, flags)
-LOAD(uniform, 0, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
-LOAD(ubo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
-LOAD(input, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
-LOAD(per_vertex_input, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
-LOAD(ssbo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
-LOAD(output, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE)
-LOAD(per_vertex_output, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
-LOAD(shared, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE)
+/* src[] = { offset }. const_index[] = { base } */
+LOAD(uniform, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+/* src[] = { buffer_index, offset }. No const_index */
+LOAD(ubo, 2, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+/* src[] = { offset }. const_index[] = { base } */
+LOAD(input, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+/* src[] = { vertex, offset }. const_index[] = { base } */
+LOAD(per_vertex_input, 2, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+/* src[] = { buffer_index, offset }. No const_index */
+LOAD(ssbo, 2, 0, NIR_INTRINSIC_CAN_ELIMINATE)
+/* src[] = { offset }. const_index[] = { base } */
+LOAD(output, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
+/* src[] = { vertex, offset }. const_index[] = { base } */
+LOAD(per_vertex_output, 2, 1, NIR_INTRINSIC_CAN_ELIMINATE)
+/* src[] = { offset }. const_index[] = { base } */
+LOAD(shared, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
/*
- * Stores work the same way as loads, except now the first register input is
- * the value or array to store and the optional second input is the indirect
- * offset. SSBO stores are similar, but they accept an extra source for the
- * block index and an extra index with the writemask to use.
+ * Stores work the same way as loads, except now the first source is the value
+ * to store and the second (and possibly third) source specify where to store
+ * the value. SSBO and shared memory stores also have a write mask as
+ * const_index[0].
*/
-#define STORE(name, extra_srcs, extra_srcs_size, extra_indices, flags) \
- INTRINSIC(store_##name, 1 + extra_srcs, \
- ARR(0, extra_srcs_size, extra_srcs_size, extra_srcs_size), \
- false, 0, 0, 1 + extra_indices, flags) \
- INTRINSIC(store_##name##_indirect, 2 + extra_srcs, \
- ARR(0, 1, extra_srcs_size, extra_srcs_size), \
- false, 0, 0, 1 + extra_indices, flags)
+#define STORE(name, srcs, indices, flags) \
+ INTRINSIC(store_##name, srcs, ARR(0, 1, 1, 1), false, 0, 0, indices, flags)
-STORE(output, 0, 0, 0, 0)
-STORE(per_vertex_output, 1, 1, 0, 0)
-STORE(ssbo, 1, 1, 1, 0)
-STORE(shared, 0, 0, 1, 0)
+/* src[] = { value, offset }. const_index[] = { base } */
+STORE(output, 2, 1, 0)
+/* src[] = { value, vertex, offset }. const_index[] = { base } */
+STORE(per_vertex_output, 3, 1, 0)
+/* src[] = { value, block_index, offset }. const_index[] = { write_mask } */
+STORE(ssbo, 3, 1, 0)
+/* src[] = { value, offset }. const_index[] = { base, write_mask } */
+STORE(shared, 2, 1, 0)
-LAST_INTRINSIC(store_shared_indirect)
+LAST_INTRINSIC(store_shared)
diff --git a/src/glsl/nir/nir_lower_clip.c b/src/glsl/nir/nir_lower_clip.c
index c58c7785b3f..e2a2bb689a8 100644
--- a/src/glsl/nir/nir_lower_clip.c
+++ b/src/glsl/nir/nir_lower_clip.c
@@ -74,6 +74,7 @@ store_clipdist_output(nir_builder *b, nir_variable *out, nir_ssa_def **val)
store->const_index[0] = out->data.driver_location;
store->src[0].ssa = nir_vec4(b, val[0], val[1], val[2], val[3]);
store->src[0].is_ssa = true;
+ store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
nir_builder_instr_insert(b, &store->instr);
}
@@ -85,6 +86,7 @@ load_clipdist_input(nir_builder *b, nir_variable *in, nir_ssa_def **val)
load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input);
load->num_components = 4;
load->const_index[0] = in->data.driver_location;
+ load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
nir_builder_instr_insert(b, &load->instr);
@@ -112,6 +114,7 @@ find_output_in_block(nir_block *block, void *void_state)
intr->const_index[0] == state->drvloc) {
assert(state->def == NULL);
assert(intr->src[0].is_ssa);
+ assert(nir_src_as_const_value(intr->src[1]));
state->def = intr->src[0].ssa;
#if !defined(DEBUG)
diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c
index f64ac696fa2..3d646eb14b4 100644
--- a/src/glsl/nir/nir_lower_io.c
+++ b/src/glsl/nir/nir_lower_io.c
@@ -86,10 +86,9 @@ is_per_vertex_output(struct lower_io_state *state, nir_variable *var)
stage == MESA_SHADER_TESS_CTRL;
}
-static unsigned
+static nir_ssa_def *
get_io_offset(nir_builder *b, nir_deref_var *deref,
nir_ssa_def **vertex_index,
- nir_ssa_def **out_indirect,
int (*type_size)(const struct glsl_type *))
{
nir_deref *tail = &deref->deref;
@@ -109,8 +108,8 @@ get_io_offset(nir_builder *b, nir_deref_var *deref,
*vertex_index = vtx;
}
- nir_ssa_def *indirect = NULL;
- unsigned base_offset = 0;
+ /* Just emit code and let constant-folding go to town */
+ nir_ssa_def *offset = nir_imm_int(b, 0);
while (tail->child != NULL) {
const struct glsl_type *parent_type = tail->type;
@@ -120,55 +119,46 @@ get_io_offset(nir_builder *b, nir_deref_var *deref,
nir_deref_array *deref_array = nir_deref_as_array(tail);
unsigned size = type_size(tail->type);
- base_offset += size * deref_array->base_offset;
+ offset = nir_iadd(b, offset,
+ nir_imm_int(b, size * deref_array->base_offset));
if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
nir_ssa_def *mul =
nir_imul(b, nir_imm_int(b, size),
nir_ssa_for_src(b, deref_array->indirect, 1));
- indirect = indirect ? nir_iadd(b, indirect, mul) : mul;
+ offset = nir_iadd(b, offset, mul);
}
} else if (tail->deref_type == nir_deref_type_struct) {
nir_deref_struct *deref_struct = nir_deref_as_struct(tail);
+ unsigned field_offset = 0;
for (unsigned i = 0; i < deref_struct->index; i++) {
- base_offset += type_size(glsl_get_struct_field(parent_type, i));
+ field_offset += type_size(glsl_get_struct_field(parent_type, i));
}
+ offset = nir_iadd(b, offset, nir_imm_int(b, field_offset));
}
}
- *out_indirect = indirect;
- return base_offset;
+ return offset;
}
static nir_intrinsic_op
load_op(struct lower_io_state *state,
- nir_variable_mode mode, bool per_vertex, bool has_indirect)
+ nir_variable_mode mode, bool per_vertex)
{
nir_intrinsic_op op;
switch (mode) {
case nir_var_shader_in:
- if (per_vertex) {
- op = has_indirect ? nir_intrinsic_load_per_vertex_input_indirect :
- nir_intrinsic_load_per_vertex_input;
- } else {
- op = has_indirect ? nir_intrinsic_load_input_indirect :
- nir_intrinsic_load_input;
- }
+ op = per_vertex ? nir_intrinsic_load_per_vertex_input :
+ nir_intrinsic_load_input;
break;
case nir_var_shader_out:
- if (per_vertex) {
- op = has_indirect ? nir_intrinsic_load_per_vertex_output_indirect :
- nir_intrinsic_load_per_vertex_output;
- } else {
- op = has_indirect ? nir_intrinsic_load_output_indirect :
- nir_intrinsic_load_output;
- }
+ op = per_vertex ? nir_intrinsic_load_per_vertex_output :
+ nir_intrinsic_load_output;
break;
case nir_var_uniform:
- op = has_indirect ? nir_intrinsic_load_uniform_indirect :
- nir_intrinsic_load_uniform;
+ op = nir_intrinsic_load_uniform;
break;
default:
unreachable("Unknown variable mode");
@@ -211,32 +201,25 @@ nir_lower_io_block(nir_block *block, void *void_state)
is_per_vertex_input(state, intrin->variables[0]->var) ||
is_per_vertex_output(state, intrin->variables[0]->var);
- nir_ssa_def *indirect;
+ nir_ssa_def *offset;
nir_ssa_def *vertex_index;
- unsigned offset = get_io_offset(b, intrin->variables[0],
- per_vertex ? &vertex_index : NULL,
- &indirect, state->type_size);
+ offset = get_io_offset(b, intrin->variables[0],
+ per_vertex ? &vertex_index : NULL,
+ state->type_size);
nir_intrinsic_instr *load =
nir_intrinsic_instr_create(state->mem_ctx,
- load_op(state, mode, per_vertex,
- indirect));
+ load_op(state, mode, per_vertex));
load->num_components = intrin->num_components;
- unsigned location = intrin->variables[0]->var->data.driver_location;
- if (mode == nir_var_uniform) {
- load->const_index[0] = location;
- load->const_index[1] = offset;
- } else {
- load->const_index[0] = location + offset;
- }
+ load->const_index[0] =
+ intrin->variables[0]->var->data.driver_location;
if (per_vertex)
load->src[0] = nir_src_for_ssa(vertex_index);
- if (indirect)
- load->src[per_vertex ? 1 : 0] = nir_src_for_ssa(indirect);
+ load->src[per_vertex ? 1 : 0] = nir_src_for_ssa(offset);
if (intrin->dest.is_ssa) {
nir_ssa_dest_init(&load->instr, &load->dest,
@@ -255,38 +238,33 @@ nir_lower_io_block(nir_block *block, void *void_state)
case nir_intrinsic_store_var: {
assert(mode == nir_var_shader_out);
- nir_ssa_def *indirect;
+ nir_ssa_def *offset;
nir_ssa_def *vertex_index;
bool per_vertex =
is_per_vertex_output(state, intrin->variables[0]->var);
- unsigned offset = get_io_offset(b, intrin->variables[0],
- per_vertex ? &vertex_index : NULL,
- &indirect, state->type_size);
- offset += intrin->variables[0]->var->data.driver_location;
+ offset = get_io_offset(b, intrin->variables[0],
+ per_vertex ? &vertex_index : NULL,
+ state->type_size);
- nir_intrinsic_op store_op;
- if (per_vertex) {
- store_op = indirect ? nir_intrinsic_store_per_vertex_output_indirect
- : nir_intrinsic_store_per_vertex_output;
- } else {
- store_op = indirect ? nir_intrinsic_store_output_indirect
- : nir_intrinsic_store_output;
- }
+ nir_intrinsic_op store_op =
+ per_vertex ? nir_intrinsic_store_per_vertex_output :
+ nir_intrinsic_store_output;
nir_intrinsic_instr *store = nir_intrinsic_instr_create(state->mem_ctx,
store_op);
store->num_components = intrin->num_components;
- store->const_index[0] = offset;
nir_src_copy(&store->src[0], &intrin->src[0], store);
+ store->const_index[0] =
+ intrin->variables[0]->var->data.driver_location;
+
if (per_vertex)
store->src[1] = nir_src_for_ssa(vertex_index);
- if (indirect)
- store->src[per_vertex ? 2 : 1] = nir_src_for_ssa(indirect);
+ store->src[per_vertex ? 2 : 1] = nir_src_for_ssa(offset);
nir_instr_insert_before(&intrin->instr, &store->instr);
nir_instr_remove(&intrin->instr);
@@ -330,21 +308,21 @@ nir_lower_io(nir_shader *shader, nir_variable_mode mode,
}
/**
- * Return the indirect source for a load/store indirect intrinsic.
+ * Return the offset soruce for a load/store intrinsic.
*/
nir_src *
-nir_get_io_indirect_src(nir_intrinsic_instr *instr)
+nir_get_io_offset_src(nir_intrinsic_instr *instr)
{
switch (instr->intrinsic) {
- case nir_intrinsic_load_input_indirect:
- case nir_intrinsic_load_output_indirect:
- case nir_intrinsic_load_uniform_indirect:
+ case nir_intrinsic_load_input:
+ case nir_intrinsic_load_output:
+ case nir_intrinsic_load_uniform:
return &instr->src[0];
- case nir_intrinsic_load_per_vertex_input_indirect:
- case nir_intrinsic_load_per_vertex_output_indirect:
- case nir_intrinsic_store_output_indirect:
+ case nir_intrinsic_load_per_vertex_input:
+ case nir_intrinsic_load_per_vertex_output:
+ case nir_intrinsic_store_output:
return &instr->src[1];
- case nir_intrinsic_store_per_vertex_output_indirect:
+ case nir_intrinsic_store_per_vertex_output:
return &instr->src[2];
default:
return NULL;
@@ -360,11 +338,8 @@ nir_get_io_vertex_index_src(nir_intrinsic_instr *instr)
switch (instr->intrinsic) {
case nir_intrinsic_load_per_vertex_input:
case nir_intrinsic_load_per_vertex_output:
- case nir_intrinsic_load_per_vertex_input_indirect:
- case nir_intrinsic_load_per_vertex_output_indirect:
return &instr->src[0];
case nir_intrinsic_store_per_vertex_output:
- case nir_intrinsic_store_per_vertex_output_indirect:
return &instr->src[1];
default:
return NULL;
diff --git a/src/glsl/nir/nir_lower_phis_to_scalar.c b/src/glsl/nir/nir_lower_phis_to_scalar.c
index aa124d9e6cc..2f5927f6406 100644
--- a/src/glsl/nir/nir_lower_phis_to_scalar.c
+++ b/src/glsl/nir/nir_lower_phis_to_scalar.c
@@ -91,13 +91,9 @@ is_phi_src_scalarizable(nir_phi_src *src,
case nir_intrinsic_interp_var_at_sample:
case nir_intrinsic_interp_var_at_offset:
case nir_intrinsic_load_uniform:
- case nir_intrinsic_load_uniform_indirect:
case nir_intrinsic_load_ubo:
- case nir_intrinsic_load_ubo_indirect:
case nir_intrinsic_load_ssbo:
- case nir_intrinsic_load_ssbo_indirect:
case nir_intrinsic_load_input:
- case nir_intrinsic_load_input_indirect:
return true;
default:
break;
diff --git a/src/glsl/nir/nir_lower_two_sided_color.c b/src/glsl/nir/nir_lower_two_sided_color.c
index 6995b9d6bc1..7df12e070f1 100644
--- a/src/glsl/nir/nir_lower_two_sided_color.c
+++ b/src/glsl/nir/nir_lower_two_sided_color.c
@@ -73,6 +73,7 @@ load_input(nir_builder *b, nir_variable *in)
load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input);
load->num_components = 4;
load->const_index[0] = in->data.driver_location;
+ load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
nir_builder_instr_insert(b, &load->instr);
@@ -151,6 +152,7 @@ nir_lower_two_sided_color_block(nir_block *block, void *void_state)
unsigned drvloc =
state->colors[idx].front->data.driver_location;
if (intr->const_index[0] == drvloc) {
+ assert(nir_src_as_const_value(intr->src[0]));
break;
}
}
diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c
index c98a0476ef9..1a4cc695d5a 100644
--- a/src/glsl/nir/nir_print.c
+++ b/src/glsl/nir/nir_print.c
@@ -439,21 +439,15 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
switch (instr->intrinsic) {
case nir_intrinsic_load_uniform:
- case nir_intrinsic_load_uniform_indirect:
var_list = &state->shader->uniforms;
break;
case nir_intrinsic_load_input:
- case nir_intrinsic_load_input_indirect:
case nir_intrinsic_load_per_vertex_input:
- case nir_intrinsic_load_per_vertex_input_indirect:
var_list = &state->shader->inputs;
break;
case nir_intrinsic_load_output:
- case nir_intrinsic_load_output_indirect:
case nir_intrinsic_store_output:
- case nir_intrinsic_store_output_indirect:
case nir_intrinsic_store_per_vertex_output:
- case nir_intrinsic_store_per_vertex_output_indirect:
var_list = &state->shader->outputs;
break;
default:
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index cead99155f4..f2e384129cb 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -280,7 +280,7 @@ public:
unsigned stream_id);
void emit_gs_thread_end();
void emit_gs_input_load(const fs_reg &dst, const nir_src &vertex_src,
- const fs_reg &indirect_offset, unsigned imm_offset,
+ unsigned base_offset, const nir_src &offset_src,
unsigned num_components);
void emit_cs_terminate();
fs_reg *emit_cs_local_invocation_id_setup();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 13059999e7d..db38f619272 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1603,28 +1603,30 @@ fs_visitor::emit_gs_vertex(const nir_src &vertex_count_nir_src,
void
fs_visitor::emit_gs_input_load(const fs_reg &dst,
const nir_src &vertex_src,
- const fs_reg &indirect_offset,
- unsigned imm_offset,
+ unsigned base_offset,
+ const nir_src &offset_src,
unsigned num_components)
{
struct brw_gs_prog_data *gs_prog_data = (struct brw_gs_prog_data *) prog_data;
+ nir_const_value *vertex_const = nir_src_as_const_value(vertex_src);
+ nir_const_value *offset_const = nir_src_as_const_value(offset_src);
+ const unsigned push_reg_count = gs_prog_data->base.urb_read_length * 8;
+
/* Offset 0 is the VUE header, which contains VARYING_SLOT_LAYER [.y],
* VARYING_SLOT_VIEWPORT [.z], and VARYING_SLOT_PSIZ [.w]. Only
* gl_PointSize is available as a GS input, however, so it must be that.
*/
- const bool is_point_size =
- indirect_offset.file == BAD_FILE && imm_offset == 0;
-
- nir_const_value *vertex_const = nir_src_as_const_value(vertex_src);
- const unsigned push_reg_count = gs_prog_data->base.urb_read_length * 8;
+ const bool is_point_size = (base_offset == 0);
- if (indirect_offset.file == BAD_FILE && vertex_const != NULL &&
- 4 * imm_offset < push_reg_count) {
- imm_offset = 4 * imm_offset + vertex_const->u[0] * push_reg_count;
+ if (offset_const != NULL && vertex_const != NULL &&
+ 4 * (base_offset + offset_const->u[0]) < push_reg_count) {
+ int imm_offset = (base_offset + offset_const->u[0]) * 4 +
+ vertex_const->u[0] * push_reg_count;
/* This input was pushed into registers. */
if (is_point_size) {
/* gl_PointSize comes in .w */
+ assert(imm_offset == 0);
bld.MOV(dst, fs_reg(ATTR, imm_offset + 3, dst.type));
} else {
for (unsigned i = 0; i < num_components; i++) {
@@ -1683,21 +1685,21 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst,
}
fs_inst *inst;
- if (indirect_offset.file == BAD_FILE) {
+ if (offset_const) {
/* Constant indexing - use global offset. */
inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst, icp_handle);
- inst->offset = imm_offset;
+ inst->offset = base_offset + offset_const->u[0];
inst->base_mrf = -1;
inst->mlen = 1;
inst->regs_written = num_components;
} else {
/* Indirect indexing - use per-slot offsets as well. */
- const fs_reg srcs[] = { icp_handle, indirect_offset };
+ const fs_reg srcs[] = { icp_handle, get_nir_src(offset_src) };
fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0);
inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, dst, payload);
- inst->offset = imm_offset;
+ inst->offset = base_offset;
inst->base_mrf = -1;
inst->mlen = 2;
inst->regs_written = num_components;
@@ -1763,17 +1765,12 @@ fs_visitor::nir_emit_gs_intrinsic(const fs_builder &bld,
retype(fs_reg(brw_vec8_grf(2, 0)), BRW_REGISTER_TYPE_UD));
break;
- case nir_intrinsic_load_input_indirect:
case nir_intrinsic_load_input:
unreachable("load_input intrinsics are invalid for the GS stage");
- case nir_intrinsic_load_per_vertex_input_indirect:
- indirect_offset = retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_D);
- /* fallthrough */
case nir_intrinsic_load_per_vertex_input:
- emit_gs_input_load(dest, instr->src[0],
- indirect_offset, instr->const_index[0],
- instr->num_components);
+ emit_gs_input_load(dest, instr->src[0], instr->const_index[0],
+ instr->src[1], instr->num_components);
break;
case nir_intrinsic_emit_vertex_with_counter:
@@ -2137,8 +2134,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
if (nir_intrinsic_infos[instr->intrinsic].has_dest)
dest = get_nir_dest(instr->dest);
- bool has_indirect = false;
-
switch (instr->intrinsic) {
case nir_intrinsic_atomic_counter_inc:
case nir_intrinsic_atomic_counter_dec:
@@ -2327,19 +2322,20 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), brw_imm_d(1));
break;
- case nir_intrinsic_load_uniform_indirect:
- has_indirect = true;
- /* fallthrough */
case nir_intrinsic_load_uniform: {
/* Offsets are in bytes but they should always be multiples of 4 */
assert(instr->const_index[0] % 4 == 0);
- assert(instr->const_index[1] % 4 == 0);
fs_reg src(UNIFORM, instr->const_index[0] / 4, dest.type);
- src.reg_offset = instr->const_index[1] / 4;
- if (has_indirect)
+ nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
+ if (const_offset) {
+ /* Offsets are in bytes but they should always be multiples of 4 */
+ assert(const_offset->u[0] % 4 == 0);
+ src.reg_offset = const_offset->u[0] / 4;
+ } else {
src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
+ }
for (unsigned j = 0; j < instr->num_components; j++) {
bld.MOV(offset(dest, bld, j), offset(src, bld, j));
@@ -2347,9 +2343,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
break;
}
- case nir_intrinsic_load_ubo_indirect:
- has_indirect = true;
- /* fallthrough */
case nir_intrinsic_load_ubo: {
nir_const_value *const_index = nir_src_as_const_value(instr->src[0]);
fs_reg surf_index;
@@ -2377,24 +2370,24 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
nir->info.num_ubos - 1);
}
- if (has_indirect) {
+ nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
+ if (const_offset == NULL) {
fs_reg base_offset = retype(get_nir_src(instr->src[1]),
BRW_REGISTER_TYPE_D);
- unsigned vec4_offset = instr->const_index[0];
for (int i = 0; i < instr->num_components; i++)
VARYING_PULL_CONSTANT_LOAD(bld, offset(dest, bld, i), surf_index,
- base_offset, vec4_offset + i * 4);
+ base_offset, i * 4);
} else {
fs_reg packed_consts = vgrf(glsl_type::float_type);
packed_consts.type = dest.type;
- struct brw_reg const_offset_reg = brw_imm_ud(instr->const_index[0] & ~15);
+ struct brw_reg const_offset_reg = brw_imm_ud(const_offset->u[0] & ~15);
bld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, packed_consts,
surf_index, const_offset_reg);
for (unsigned i = 0; i < instr->num_components; i++) {
- packed_consts.set_smear(instr->const_index[0] % 16 / 4 + i);
+ packed_consts.set_smear(const_offset->u[0] % 16 / 4 + i);
/* The std140 packing rules don't allow vectors to cross 16-byte
* boundaries, and a reg is 32 bytes.
@@ -2408,9 +2401,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
break;
}
- case nir_intrinsic_load_ssbo_indirect:
- has_indirect = true;
- /* fallthrough */
case nir_intrinsic_load_ssbo: {
assert(devinfo->gen >= 7);
@@ -2436,12 +2426,12 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
nir->info.num_ssbos - 1);
}
- /* Get the offset to read from */
fs_reg offset_reg;
- if (has_indirect) {
- offset_reg = get_nir_src(instr->src[1]);
+ nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
+ if (const_offset) {
+ offset_reg = brw_imm_ud(const_offset->u[0]);
} else {
- offset_reg = brw_imm_ud(instr->const_index[0]);
+ offset_reg = get_nir_src(instr->src[1]);
}
/* Read the vector */
@@ -2456,9 +2446,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
break;
}
- case nir_intrinsic_load_shared_indirect:
- has_indirect = true;
- /* fallthrough */
case nir_intrinsic_load_shared: {
assert(devinfo->gen >= 7);
@@ -2466,10 +2453,14 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
/* Get the offset to read from */
fs_reg offset_reg;
- if (has_indirect) {
- offset_reg = get_nir_src(instr->src[0]);
+ nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
+ if (const_offset) {
+ offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u[0]);
} else {
- offset_reg = brw_imm_ud(instr->const_index[0]);
+ offset_reg = vgrf(glsl_type::uint_type);
+ bld.ADD(offset_reg,
+ retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(instr->const_index[0]));
}
/* Read the vector */
@@ -2484,9 +2475,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
break;
}
- case nir_intrinsic_store_shared_indirect:
- has_indirect = true;
- /* fallthrough */
case nir_intrinsic_store_shared: {
assert(devinfo->gen >= 7);
@@ -2509,13 +2497,15 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
unsigned length = ffs(~(writemask >> first_component)) - 1;
fs_reg offset_reg;
- if (!has_indirect) {
- offset_reg = brw_imm_ud(instr->const_index[0] + 4 * first_component);
+ nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
+ if (const_offset) {
+ offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u[0] +
+ 4 * first_component);
} else {
offset_reg = vgrf(glsl_type::uint_type);
bld.ADD(offset_reg,
retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_UD),
- brw_imm_ud(4 * first_component));
+ brw_imm_ud(instr->const_index[0] + 4 * first_component));
}
emit_untyped_write(bld, surf_index, offset_reg,
@@ -2532,9 +2522,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
break;
}
- case nir_intrinsic_load_input_indirect:
- unreachable("Not allowed");
- /* fallthrough */
case nir_intrinsic_load_input: {
fs_reg src;
if (stage == MESA_SHADER_VERTEX) {
@@ -2544,15 +2531,16 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
instr->const_index[0]);
}
+ nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
+ assert(const_offset && "Indirect input loads not allowed");
+ src = offset(src, bld, const_offset->u[0]);
+
for (unsigned j = 0; j < instr->num_components; j++) {
bld.MOV(offset(dest, bld, j), offset(src, bld, j));
}
break;
}
- case nir_intrinsic_store_ssbo_indirect:
- has_indirect = true;
- /* fallthrough */
case nir_intrinsic_store_ssbo: {
assert(devinfo->gen >= 7);
@@ -2579,7 +2567,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
fs_reg val_reg = get_nir_src(instr->src[0]);
/* Writemask */
- unsigned writemask = instr->const_index[1];
+ unsigned writemask = instr->const_index[0];
/* Combine groups of consecutive enabled channels in one write
* message. We use ffs to find the first enabled channel and then ffs on
@@ -2589,10 +2577,11 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
while (writemask) {
unsigned first_component = ffs(writemask) - 1;
unsigned length = ffs(~(writemask >> first_component)) - 1;
- fs_reg offset_reg;
- if (!has_indirect) {
- offset_reg = brw_imm_ud(instr->const_index[0] + 4 * first_component);
+ fs_reg offset_reg;
+ nir_const_value *const_offset = nir_src_as_const_value(instr->src[2]);
+ if (const_offset) {
+ offset_reg = brw_imm_ud(const_offset->u[0] + 4 * first_component);
} else {
offset_reg = vgrf(glsl_type::uint_type);
bld.ADD(offset_reg,
@@ -2613,14 +2602,15 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
break;
}
- case nir_intrinsic_store_output_indirect:
- unreachable("Not allowed");
- /* fallthrough */
case nir_intrinsic_store_output: {
fs_reg src = get_nir_src(instr->src[0]);
fs_reg new_dest = offset(retype(nir_outputs, src.type), bld,
instr->const_index[0]);
+ nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
+ assert(const_offset && "Indirect output stores not allowed");
+ new_dest = offset(new_dest, bld, const_offset->u[0]);
+
for (unsigned j = 0; j < instr->num_components; j++) {
bld.MOV(offset(new_dest, bld, j), offset(src, bld, j));
}
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index d62470379ee..14ad172a2c3 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -24,31 +24,49 @@
#include "brw_nir.h"
#include "brw_shader.h"
#include "glsl/nir/glsl_to_nir.h"
+#include "glsl/nir/nir_builder.h"
#include "program/prog_to_nir.h"
+struct remap_vs_attrs_state {
+ nir_builder b;
+ uint64_t inputs_read;
+};
+
static bool
-remap_vs_attrs(nir_block *block, void *closure)
+remap_vs_attrs(nir_block *block, void *void_state)
{
- GLbitfield64 inputs_read = *((GLbitfield64 *) closure);
+ struct remap_vs_attrs_state *state = void_state;
- nir_foreach_instr(block, instr) {
+ nir_foreach_instr_safe(block, instr) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
- /* We set EmitNoIndirect for VS inputs, so there are no indirects. */
- assert(intrin->intrinsic != nir_intrinsic_load_input_indirect);
-
if (intrin->intrinsic == nir_intrinsic_load_input) {
/* Attributes come in a contiguous block, ordered by their
* gl_vert_attrib value. That means we can compute the slot
* number for an attribute by masking out the enabled attributes
* before it and counting the bits.
*/
- int attr = intrin->const_index[0];
- int slot = _mesa_bitcount_64(inputs_read & BITFIELD64_MASK(attr));
+ nir_const_value *const_offset = nir_src_as_const_value(intrin->src[0]);
+
+ /* We set EmitNoIndirect for VS inputs, so there are no indirects. */
+ assert(const_offset);
+
+ int attr = intrin->const_index[0] + const_offset->u[0];
+ int slot = _mesa_bitcount_64(state->inputs_read &
+ BITFIELD64_MASK(attr));
+
+ /* The NIR -> FS pass will just add the base and offset together, so
+ * there's no reason to keep them separate. Just put it all in
+ * const_index[0] and set the offset src[0] to load_const(0).
+ */
intrin->const_index[0] = 4 * slot;
+
+ state->b.cursor = nir_before_instr(&intrin->instr);
+ nir_instr_rewrite_src(&intrin->instr, &intrin->src[0],
+ nir_src_for_ssa(nir_imm_int(&state->b, 0)));
}
}
return true;
@@ -79,10 +97,17 @@ brw_nir_lower_inputs(nir_shader *nir,
* key->inputs_read since the two are identical aside from Gen4-5
* edge flag differences.
*/
- GLbitfield64 inputs_read = nir->info.inputs_read;
+ struct remap_vs_attrs_state remap_state = {
+ .inputs_read = nir->info.inputs_read,
+ };
+
+ /* This pass needs actual constants */
+ nir_opt_constant_folding(nir);
+
nir_foreach_overload(nir, overload) {
if (overload->impl) {
- nir_foreach_block(overload->impl, remap_vs_attrs, &inputs_read);
+ nir_builder_init(&remap_state.b, overload->impl);
+ nir_foreach_block(overload->impl, remap_vs_attrs, &remap_state);
}
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp
index e51ef4b37d5..6f66978f8e1 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp
@@ -60,19 +60,19 @@ vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
src_reg src;
switch (instr->intrinsic) {
- case nir_intrinsic_load_per_vertex_input_indirect:
- assert(!"EmitNoIndirectInput should prevent this.");
case nir_intrinsic_load_per_vertex_input: {
/* The EmitNoIndirectInput flag guarantees our vertex index will
* be constant. We should handle indirects someday.
*/
nir_const_value *vertex = nir_src_as_const_value(instr->src[0]);
+ nir_const_value *offset = nir_src_as_const_value(instr->src[1]);
/* Make up a type...we have no way of knowing... */
const glsl_type *const type = glsl_type::ivec(instr->num_components);
src = src_reg(ATTR, BRW_VARYING_SLOT_COUNT * vertex->u[0] +
- instr->const_index[0], type);
+ instr->const_index[0] + offset->u[0],
+ type);
dest = get_nir_dest(instr->dest, src.type);
dest.writemask = brw_writemask_for_size(instr->num_components);
emit(MOV(dest, src));
@@ -80,7 +80,6 @@ vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
}
case nir_intrinsic_load_input:
- case nir_intrinsic_load_input_indirect:
unreachable("nir_lower_io should have produced per_vertex intrinsics");
case nir_intrinsic_emit_vertex_with_counter: {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 50570cd7703..f965b39360f 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -369,22 +369,17 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
dst_reg dest;
src_reg src;
- bool has_indirect = false;
-
switch (instr->intrinsic) {
- case nir_intrinsic_load_input_indirect:
- has_indirect = true;
- /* fallthrough */
case nir_intrinsic_load_input: {
- int offset = instr->const_index[0];
- src = src_reg(ATTR, offset, glsl_type::uvec4_type);
+ nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
+
+ /* We set EmitNoIndirectInput for VS */
+ assert(const_offset);
+
+ src = src_reg(ATTR, instr->const_index[0] + const_offset->u[0],
+ glsl_type::uvec4_type);
- if (has_indirect) {
- dest.reladdr = new(mem_ctx) src_reg(get_nir_src(instr->src[0],
- BRW_REGISTER_TYPE_D,
- 1));
- }
dest = get_nir_dest(instr->dest, src.type);
dest.writemask = brw_writemask_for_size(instr->num_components);
@@ -392,11 +387,11 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
break;
}
- case nir_intrinsic_store_output_indirect:
- unreachable("nir_lower_outputs_to_temporaries should prevent this");
-
case nir_intrinsic_store_output: {
- int varying = instr->const_index[0];
+ nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
+ assert(const_offset);
+
+ int varying = instr->const_index[0] + const_offset->u[0];
src = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_F,
instr->num_components);
@@ -431,9 +426,6 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
break;
}
- case nir_intrinsic_store_ssbo_indirect:
- has_indirect = true;
- /* fallthrough */
case nir_intrinsic_store_ssbo: {
assert(devinfo->gen >= 7);
@@ -458,20 +450,19 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
}
/* Offset */
- src_reg offset_reg = src_reg(this, glsl_type::uint_type);
- unsigned const_offset_bytes = 0;
- if (has_indirect) {
- emit(MOV(dst_reg(offset_reg), get_nir_src(instr->src[2], 1)));
+ src_reg offset_reg;
+ nir_const_value *const_offset = nir_src_as_const_value(instr->src[2]);
+ if (const_offset) {
+ offset_reg = brw_imm_ud(const_offset->u[0]);
} else {
- const_offset_bytes = instr->const_index[0];
- emit(MOV(dst_reg(offset_reg), brw_imm_ud(const_offset_bytes)));
+ offset_reg = get_nir_src(instr->src[2], 1);
}
/* Value */
src_reg val_reg = get_nir_src(instr->src[0], 4);
/* Writemask */
- unsigned write_mask = instr->const_index[1];
+ unsigned write_mask = instr->const_index[0];
/* IvyBridge does not have a native SIMD4x2 untyped write message so untyped
* writes will use SIMD8 mode. In order to hide this and keep symmetry across
@@ -537,9 +528,8 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
* write at to skip the channels we skipped, if any.
*/
if (skipped_channels > 0) {
- if (!has_indirect) {
- const_offset_bytes += 4 * skipped_channels;
- offset_reg = brw_imm_ud(const_offset_bytes);
+ if (offset_reg.file == IMM) {
+ offset_reg.ud += 4 * skipped_channels;
} else {
emit(ADD(dst_reg(offset_reg), offset_reg,
brw_imm_ud(4 * skipped_channels)));
@@ -574,9 +564,6 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
break;
}
- case nir_intrinsic_load_ssbo_indirect:
- has_indirect = true;
- /* fallthrough */
case nir_intrinsic_load_ssbo: {
assert(devinfo->gen >= 7);
@@ -604,13 +591,12 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
nir->info.num_ssbos - 1);
}
- src_reg offset_reg = src_reg(this, glsl_type::uint_type);
- unsigned const_offset_bytes = 0;
- if (has_indirect) {
- emit(MOV(dst_reg(offset_reg), get_nir_src(instr->src[1], 1)));
+ src_reg offset_reg;
+ nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
+ if (const_offset) {
+ offset_reg = brw_imm_ud(const_offset->u[0]);
} else {
- const_offset_bytes = instr->const_index[0];
- emit(MOV(dst_reg(offset_reg), brw_imm_ud((const_offset_bytes))));
+ offset_reg = get_nir_src(instr->src[1], 1);
}
/* Read the vector */
@@ -673,20 +659,21 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
break;
}
- case nir_intrinsic_load_uniform_indirect:
- has_indirect = true;
- /* fallthrough */
case nir_intrinsic_load_uniform: {
/* Offsets are in bytes but they should always be multiples of 16 */
assert(instr->const_index[0] % 16 == 0);
- assert(instr->const_index[1] % 16 == 0);
dest = get_nir_dest(instr->dest);
src = src_reg(dst_reg(UNIFORM, instr->const_index[0] / 16));
- src.reg_offset = instr->const_index[1] / 16;
+ src.type = dest.type;
- if (has_indirect) {
+ nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
+ if (const_offset) {
+ /* Offsets are in bytes but they should always be multiples of 16 */
+ assert(const_offset->u[0] % 16 == 0);
+ src.reg_offset = const_offset->u[0] / 16;
+ } else {
src_reg tmp = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_D, 1);
src.reladdr = new(mem_ctx) src_reg(tmp);
}
@@ -724,9 +711,6 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
break;
}
- case nir_intrinsic_load_ubo_indirect:
- has_indirect = true;
- /* fallthrough */
case nir_intrinsic_load_ubo: {
nir_const_value *const_block_index = nir_src_as_const_value(instr->src[0]);
src_reg surf_index;
@@ -760,11 +744,10 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
nir->info.num_ubos - 1);
}
- unsigned const_offset = instr->const_index[0];
src_reg offset;
-
- if (!has_indirect) {
- offset = brw_imm_ud(const_offset & ~15);
+ nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
+ if (const_offset) {
+ offset = brw_imm_ud(const_offset->u[0] & ~15);
} else {
offset = get_nir_src(instr->src[1], nir_type_int, 1);
}
@@ -778,10 +761,12 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
NULL, NULL /* before_block/inst */);
packed_consts.swizzle = brw_swizzle_for_size(instr->num_components);
- packed_consts.swizzle += BRW_SWIZZLE4(const_offset % 16 / 4,
- const_offset % 16 / 4,
- const_offset % 16 / 4,
- const_offset % 16 / 4);
+ if (const_offset) {
+ packed_consts.swizzle += BRW_SWIZZLE4(const_offset->u[0] % 16 / 4,
+ const_offset->u[0] % 16 / 4,
+ const_offset->u[0] % 16 / 4,
+ const_offset->u[0] % 16 / 4);
+ }
emit(MOV(dest, packed_consts));
break;