summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2015-07-29 17:16:26 -0700
committerEric Anholt <[email protected]>2015-07-30 15:47:12 -0700
commit27f728cdc5d90f63839fbeb1942e6f27339b102a (patch)
tree581dfede354ff473f64543a7837ccc274205afa8 /src
parentb85f6ae4b24ee50948f14a9effa982eb0b9b3681 (diff)
vc4: Lower NIR inputs to scalar as well.
For now this is just scalarizing, but it also means we'll get to dump a bunch of QIR-based lowering in a moment.
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/vc4/vc4_nir_lower_io.c44
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c4
2 files changed, 44 insertions, 4 deletions
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_io.c b/src/gallium/drivers/vc4/vc4_nir_lower_io.c
index 43376888248..9882b6b8a35 100644
--- a/src/gallium/drivers/vc4/vc4_nir_lower_io.c
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_io.c
@@ -29,11 +29,47 @@
* Walks the NIR generated by TGSI-to-NIR to lower its io intrinsics into
* something amenable to the VC4 architecture.
*
- * Currently, it split outputs into scalars, and drops any non-position values
- * in coordinate shaders.
+ * Currently, it split inputs and outputs into scalars, and drops any
+ * non-position outputs in coordinate shaders.
*/
static void
+vc4_nir_lower_input(struct vc4_compile *c, nir_builder *b,
+ nir_intrinsic_instr *intr)
+{
+ /* All TGSI-to-NIR inputs are vec4. */
+ assert(intr->num_components == 4);
+
+ nir_builder_insert_before_instr(b, &intr->instr);
+
+ /* Generate scalar loads equivalent to the original VEC4. */
+ nir_ssa_def *dests[4];
+ for (unsigned i = 0; i < intr->num_components; i++) {
+ nir_intrinsic_instr *intr_comp =
+ nir_intrinsic_instr_create(c->s, nir_intrinsic_load_input);
+ intr_comp->num_components = 1;
+ intr_comp->const_index[0] = intr->const_index[0] * 4 + i;
+ nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, NULL);
+ nir_builder_instr_insert(b, &intr_comp->instr);
+
+ dests[i] = &intr_comp->dest.ssa;
+ }
+
+ /* Batch things back together into a vec4. This will get split by the
+ * later ALU scalarization pass.
+ */
+ nir_ssa_def *vec_instr = nir_vec4(b, dests[0], dests[1],
+ dests[2], dests[3]);
+
+ /* Replace the old intrinsic with a reference to our reconstructed
+ * vec4.
+ */
+ nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(vec_instr),
+ ralloc_parent(b->impl));
+ nir_instr_remove(&intr->instr);
+}
+
+static void
vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b,
nir_intrinsic_instr *intr)
{
@@ -84,6 +120,10 @@ vc4_nir_lower_io_instr(struct vc4_compile *c, nir_builder *b,
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
switch (intr->intrinsic) {
+ case nir_intrinsic_load_input:
+ vc4_nir_lower_input(c, b, intr);
+ break;
+
case nir_intrinsic_store_output:
vc4_nir_lower_output(c, b, intr);
break;
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 85bb1c48780..dfc3815c5c1 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -1889,8 +1889,8 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
break;
case nir_intrinsic_load_input:
- for (int i = 0; i < instr->num_components; i++)
- dest[i] = c->inputs[instr->const_index[0] * 4 + i];
+ assert(instr->num_components == 1);
+ *dest = c->inputs[instr->const_index[0]];
break;