summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRob Clark <[email protected]>2014-02-19 11:55:25 -0500
committerRob Clark <[email protected]>2014-02-19 12:01:26 -0500
commit5993723471a81003bd82d189836ccdd8d085a7b5 (patch)
treea33b41ee2bfffdfa29b944addc3b8be06bc369a3
parentbbf8239f92ecd79431dfa41402e1c85318e7267f (diff)
freedreno/a3xx/compiler: scheduling/legalize fixes
It seems the write-after-read hazard that applies to texture fetch instructions, also applies to sfu instructions. Also, cat5/cat6 instructions do not have a (ss) bit, so in these cases we need to insert a dummy nop instruction with (ss) bit set. Signed-off-by: Rob Clark <[email protected]>
-rw-r--r--src/gallium/drivers/freedreno/a3xx/ir3_dump.c5
-rw-r--r--src/gallium/drivers/freedreno/a3xx/ir3_ra.c25
-rw-r--r--src/gallium/drivers/freedreno/a3xx/ir3_sched.c2
3 files changed, 30 insertions, 2 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/ir3_dump.c b/src/gallium/drivers/freedreno/a3xx/ir3_dump.c
index 0afd04861a3..3984cd60e6e 100644
--- a/src/gallium/drivers/freedreno/a3xx/ir3_dump.c
+++ b/src/gallium/drivers/freedreno/a3xx/ir3_dump.c
@@ -48,6 +48,11 @@ static void dump_instr_name(struct ir3_dump_ctx *ctx,
fprintf(ctx->f, "%03u: ", instr->depth);
}
+ if (instr->flags & IR3_INSTR_SY)
+ fprintf(ctx->f, "(sy)");
+ if (instr->flags & IR3_INSTR_SS)
+ fprintf(ctx->f, "(ss)");
+
if (is_meta(instr)) {
switch(instr->opc) {
case OPC_META_PHI:
diff --git a/src/gallium/drivers/freedreno/a3xx/ir3_ra.c b/src/gallium/drivers/freedreno/a3xx/ir3_ra.c
index f4834a3778c..5df57e776f9 100644
--- a/src/gallium/drivers/freedreno/a3xx/ir3_ra.c
+++ b/src/gallium/drivers/freedreno/a3xx/ir3_ra.c
@@ -526,18 +526,41 @@ static void legalize(struct ir3_ra_ctx *ctx, struct ir3_block *block)
}
}
+ /* cat5+ does not have an (ss) bit, if needed we need to
+ * insert a nop to carry the sync flag. Would be kinda
+ * clever if we were aware of this during scheduling, but
+ * this should be a pretty rare case:
+ */
+ if ((n->flags & IR3_INSTR_SS) && (n->category >= 5)) {
+ struct ir3_instruction *nop;
+ nop = ir3_instr_create(block, 0, OPC_NOP);
+ nop->flags |= IR3_INSTR_SS;
+ n->flags &= ~IR3_INSTR_SS;
+ }
+
+ /* need to be able to set (ss) on first instruction: */
+ if ((shader->instrs_count == 0) && (n->category >= 5))
+ ir3_instr_create(block, 0, OPC_NOP);
+
shader->instrs[shader->instrs_count++] = n;
if (is_sfu(n))
regmask_set(&needs_ss, n->regs[0]);
- if (is_tex(n)) {
+
+ if (is_tex(n))
regmask_set(&needs_sy, n->regs[0]);
+
+ /* both tex/sfu appear to not always immediately consume
+ * their src register(s):
+ */
+ if (is_tex(n) || is_sfu(n)) {
for (i = 1; i < n->regs_count; i++) {
reg = n->regs[i];
if (reg_gpr(reg))
regmask_set(&needs_ss_war, reg);
}
}
+
if (is_input(n))
last_input = n;
}
diff --git a/src/gallium/drivers/freedreno/a3xx/ir3_sched.c b/src/gallium/drivers/freedreno/a3xx/ir3_sched.c
index 5ac33abc548..5e585271f92 100644
--- a/src/gallium/drivers/freedreno/a3xx/ir3_sched.c
+++ b/src/gallium/drivers/freedreno/a3xx/ir3_sched.c
@@ -77,7 +77,7 @@ static unsigned distance(struct ir3_sched_ctx *ctx,
struct ir3_instruction *n = ctx->scheduled;
unsigned d = 0;
while (n && (n != instr) && (d < maxd)) {
- if (!is_meta(n))
+ if (is_alu(n) || is_flow(n))
d++;
n = n->next;
}