diff options
author | Rob Clark <[email protected]> | 2014-02-19 11:55:25 -0500 |
---|---|---|
committer | Rob Clark <[email protected]> | 2014-02-19 12:01:26 -0500 |
commit | 5993723471a81003bd82d189836ccdd8d085a7b5 (patch) | |
tree | a33b41ee2bfffdfa29b944addc3b8be06bc369a3 | |
parent | bbf8239f92ecd79431dfa41402e1c85318e7267f (diff) |
freedreno/a3xx/compiler: scheduling/legalize fixes
It seems the write-after-read hazard that applies to texture fetch
instructions, also applies to sfu instructions.
Also, cat5/cat6 instructions do not have a (ss) bit, so in these
cases we need to insert a dummy nop instruction with (ss) bit set.
Signed-off-by: Rob Clark <[email protected]>
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/ir3_dump.c | 5 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/ir3_ra.c | 25 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/ir3_sched.c | 2 |
3 files changed, 30 insertions, 2 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/ir3_dump.c b/src/gallium/drivers/freedreno/a3xx/ir3_dump.c index 0afd04861a3..3984cd60e6e 100644 --- a/src/gallium/drivers/freedreno/a3xx/ir3_dump.c +++ b/src/gallium/drivers/freedreno/a3xx/ir3_dump.c @@ -48,6 +48,11 @@ static void dump_instr_name(struct ir3_dump_ctx *ctx, fprintf(ctx->f, "%03u: ", instr->depth); } + if (instr->flags & IR3_INSTR_SY) + fprintf(ctx->f, "(sy)"); + if (instr->flags & IR3_INSTR_SS) + fprintf(ctx->f, "(ss)"); + if (is_meta(instr)) { switch(instr->opc) { case OPC_META_PHI: diff --git a/src/gallium/drivers/freedreno/a3xx/ir3_ra.c b/src/gallium/drivers/freedreno/a3xx/ir3_ra.c index f4834a3778c..5df57e776f9 100644 --- a/src/gallium/drivers/freedreno/a3xx/ir3_ra.c +++ b/src/gallium/drivers/freedreno/a3xx/ir3_ra.c @@ -526,18 +526,41 @@ static void legalize(struct ir3_ra_ctx *ctx, struct ir3_block *block) } } + /* cat5+ does not have an (ss) bit, if needed we need to + * insert a nop to carry the sync flag. Would be kinda + * clever if we were aware of this during scheduling, but + * this should be a pretty rare case: + */ + if ((n->flags & IR3_INSTR_SS) && (n->category >= 5)) { + struct ir3_instruction *nop; + nop = ir3_instr_create(block, 0, OPC_NOP); + nop->flags |= IR3_INSTR_SS; + n->flags &= ~IR3_INSTR_SS; + } + + /* need to be able to set (ss) on first instruction: */ + if ((shader->instrs_count == 0) && (n->category >= 5)) + ir3_instr_create(block, 0, OPC_NOP); + shader->instrs[shader->instrs_count++] = n; if (is_sfu(n)) regmask_set(&needs_ss, n->regs[0]); - if (is_tex(n)) { + + if (is_tex(n)) regmask_set(&needs_sy, n->regs[0]); + + /* both tex/sfu appear to not always immediately consume + * their src register(s): + */ + if (is_tex(n) || is_sfu(n)) { for (i = 1; i < n->regs_count; i++) { reg = n->regs[i]; if (reg_gpr(reg)) regmask_set(&needs_ss_war, reg); } } + if (is_input(n)) last_input = n; } diff --git a/src/gallium/drivers/freedreno/a3xx/ir3_sched.c b/src/gallium/drivers/freedreno/a3xx/ir3_sched.c index 5ac33abc548..5e585271f92 100644 --- a/src/gallium/drivers/freedreno/a3xx/ir3_sched.c +++ b/src/gallium/drivers/freedreno/a3xx/ir3_sched.c @@ -77,7 +77,7 @@ static unsigned distance(struct ir3_sched_ctx *ctx, struct ir3_instruction *n = ctx->scheduled; unsigned d = 0; while (n && (n != instr) && (d < maxd)) { - if (!is_meta(n)) + if (is_alu(n) || is_flow(n)) d++; n = n->next; } |