diff options
author | Rob Clark <[email protected]> | 2015-01-12 23:32:25 -0500 |
---|---|---|
committer | Rob Clark <[email protected]> | 2015-01-13 08:17:18 -0500 |
commit | 876550ff97b9c97df02f9bf0e29198be963d8e89 (patch) | |
tree | 371864120b212c817e295342987fbd2ad53908b5 /src/gallium/drivers | |
parent | b6819cd55491c4b5ad6353102e7596a572152314 (diff) |
freedreno/ir3: handle "holes" in inputs
If, for example, only the x/y/w components of in.xyzw are actually used,
we still need to have a group of four registers and assign all four
components. The hardware can't write in.xy and in.w to discontiguous
registers. To handle this, pad with a dummy NOP instruction, to keep
the neighbor chain contiguous.
This fixes a problem noticed with firefox OMTC.
Signed-off-by: Rob Clark <[email protected]>
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_group.c | 32 |
1 files changed, 31 insertions, 1 deletions
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_group.c b/src/gallium/drivers/freedreno/ir3/ir3_group.c index f215c1c15d2..da2142e69a8 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_group.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_group.c @@ -187,6 +187,36 @@ static void instr_find_neighbors(struct ir3_instruction *instr) } } +/* a bit of sadness.. we can't have "holes" in inputs from PoV of + * register assignment, they still need to be grouped together. So + * we need to insert dummy/padding instruction for grouping, and + * then take it back out again before anyone notices. + */ +static void pad_and_group_input(struct ir3_instruction **input, unsigned n) +{ + int i, mask = 0; + struct ir3_block *block = NULL; + + for (i = n - 1; i >= 0; i--) { + struct ir3_instruction *instr = input[i]; + if (instr) { + block = instr->block; + } else if (block) { + instr = ir3_instr_create(block, 0, OPC_NOP); + ir3_reg_create(instr, 0, IR3_REG_SSA); /* dst */ + input[i] = instr; + mask |= (1 << i); + } + } + + group_n(&arr_ops_in, input, n); + + for (i = 0; i < n; i++) { + if (mask & (1 << i)) + input[i] = NULL; + } +} + static void block_find_neighbors(struct ir3_block *block) { unsigned i; @@ -214,7 +244,7 @@ static void block_find_neighbors(struct ir3_block *block) * on vec4 boundaries */ for (i = 0; i < block->ninputs; i += 4) - group_n(&arr_ops_in, &block->inputs[i], 4); + pad_and_group_input(&block->inputs[i], 4); for (i = 0; i < block->noutputs; i += 4) group_n(&arr_ops_out, &block->outputs[i], 4); |