summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
authorRob Clark <[email protected]>2015-01-12 23:32:25 -0500
committerRob Clark <[email protected]>2015-01-13 08:17:18 -0500
commit876550ff97b9c97df02f9bf0e29198be963d8e89 (patch)
tree371864120b212c817e295342987fbd2ad53908b5 /src/gallium
parentb6819cd55491c4b5ad6353102e7596a572152314 (diff)
freedreno/ir3: handle "holes" in inputs
If, for example, only the x/y/w components of in.xyzw are actually used, we still need to have a group of four registers and assign all four components. The hardware can't write in.xy and in.w to discontiguous registers. To handle this, pad with a dummy NOP instruction, to keep the neighbor chain contiguous. This fixes a problem noticed with firefox OMTC. Signed-off-by: Rob Clark <[email protected]>
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_group.c32
1 files changed, 31 insertions, 1 deletions
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_group.c b/src/gallium/drivers/freedreno/ir3/ir3_group.c
index f215c1c15d2..da2142e69a8 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_group.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_group.c
@@ -187,6 +187,36 @@ static void instr_find_neighbors(struct ir3_instruction *instr)
}
}
+/* a bit of sadness.. we can't have "holes" in inputs from PoV of
+ * register assignment, they still need to be grouped together. So
+ * we need to insert dummy/padding instruction for grouping, and
+ * then take it back out again before anyone notices.
+ */
+static void pad_and_group_input(struct ir3_instruction **input, unsigned n)
+{
+ int i, mask = 0;
+ struct ir3_block *block = NULL;
+
+ for (i = n - 1; i >= 0; i--) {
+ struct ir3_instruction *instr = input[i];
+ if (instr) {
+ block = instr->block;
+ } else if (block) {
+ instr = ir3_instr_create(block, 0, OPC_NOP);
+ ir3_reg_create(instr, 0, IR3_REG_SSA); /* dst */
+ input[i] = instr;
+ mask |= (1 << i);
+ }
+ }
+
+ group_n(&arr_ops_in, input, n);
+
+ for (i = 0; i < n; i++) {
+ if (mask & (1 << i))
+ input[i] = NULL;
+ }
+}
+
static void block_find_neighbors(struct ir3_block *block)
{
unsigned i;
@@ -214,7 +244,7 @@ static void block_find_neighbors(struct ir3_block *block)
* on vec4 boundaries
*/
for (i = 0; i < block->ninputs; i += 4)
- group_n(&arr_ops_in, &block->inputs[i], 4);
+ pad_and_group_input(&block->inputs[i], 4);
for (i = 0; i < block->noutputs; i += 4)
group_n(&arr_ops_out, &block->outputs[i], 4);