diff options
author | Eric Anholt <[email protected]> | 2014-08-20 14:51:08 -0700 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2014-08-22 10:16:58 -0700 |
commit | ae83955b1da238ccf180cba568f4269f01bb21fa (patch) | |
tree | 5a63d710929c31375c3a543c54a1c38452e50afb | |
parent | c3c922289b2fb080ec184d9bd7e71a8870ced18d (diff) |
vc4: Emit the scoreboard wait just when it's needed.
This should improve performance on real hardware by allowing more shader
instances to run in parallel. It also fixes assertion failures in tests
that don't emit a fragment color, since otherwise we didn't have enough
instructions to fit our signals in.
-rw-r--r-- | src/gallium/drivers/vc4/vc4_qpu_emit.c | 27 |
1 files changed, 25 insertions, 2 deletions
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index 43491019855..477929cc199 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -97,6 +97,7 @@ static void serialize_insts(struct qcompile *c) { int last_sfu_write = -10; + bool scoreboard_wait_emitted = false; while (!is_empty_list(&c->qpu_inst_list)) { struct queued_qpu_inst *q = @@ -173,6 +174,30 @@ serialize_insts(struct qcompile *c) last_sfu_write = c->qpu_inst_count; } + /* "A scoreboard wait must not occur in the first two + * instructions of a fragment shader. This is either the + * explicit Wait for Scoreboard signal or an implicit wait + * with the first tile-buffer read or write instruction." + */ + if (!scoreboard_wait_emitted && + (waddr_a == QPU_W_TLB_Z || waddr_m == QPU_W_TLB_Z || + waddr_a == QPU_W_TLB_COLOR_MS || + waddr_m == QPU_W_TLB_COLOR_MS || + waddr_a == QPU_W_TLB_COLOR_ALL || + waddr_m == QPU_W_TLB_COLOR_ALL || + QPU_GET_FIELD(q->inst, QPU_SIG) == QPU_SIG_COLOR_LOAD)) { + while (c->qpu_inst_count < 3 || + QPU_GET_FIELD(c->qpu_insts[c->qpu_inst_count - 1], + QPU_SIG) != QPU_SIG_NONE) { + serialize_one_inst(c, qpu_inst(qpu_a_NOP(), + qpu_m_NOP())); + } + c->qpu_insts[c->qpu_inst_count - 1] = + qpu_set_sig(c->qpu_insts[c->qpu_inst_count - 1], + QPU_SIG_WAIT_FOR_SCOREBOARD); + scoreboard_wait_emitted = true; + } + serialize_one_inst(c, q->inst); remove_from_list(&q->link); @@ -613,8 +638,6 @@ vc4_generate_code(struct qcompile *c) case QSTAGE_COORD: break; case QSTAGE_FRAG: - c->qpu_insts[2] = qpu_set_sig(c->qpu_insts[2], - QPU_SIG_WAIT_FOR_SCOREBOARD); c->qpu_insts[c->qpu_inst_count - 1] = qpu_set_sig(c->qpu_insts[c->qpu_inst_count - 1], QPU_SIG_SCOREBOARD_UNLOCK); |