summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/vc4
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2016-11-30 14:00:28 -0800
committerEric Anholt <[email protected]>2016-11-30 19:58:09 -0800
commit51244859e368d9270a1f2f84aec6e97fbc1f5fac (patch)
tree973693687a2e72d648f409145a810d64a1c89cfa /src/gallium/drivers/vc4
parent6c34084d8eafff3a764cd1ad49afacd211470f7b (diff)
vc4: Avoid false scheduling dependencies for LOAD_IMMs.
Noticed in shaders with branching, where we ended up scheduling delay slots near the start of a block for the uniforms reset setup. total instructions in shared programs: 93970 -> 93951 (-0.02%) instructions in affected programs: 3117 -> 3098 (-0.61%) 3DMMES performance +0.423087% +/- 0.133521% (n=9,10)
Diffstat (limited to 'src/gallium/drivers/vc4')
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_schedule.c5
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_validate.c4
2 files changed, 9 insertions, 0 deletions
diff --git a/src/gallium/drivers/vc4/vc4_qpu_schedule.c b/src/gallium/drivers/vc4/vc4_qpu_schedule.c
index 878cdf95d96..4b2cb9dbd37 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_schedule.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_schedule.c
@@ -477,6 +477,11 @@ reads_too_soon_after_write(struct choose_scoreboard *scoreboard, uint64_t inst)
uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
+
+ /* Full immediate loads don't read any registers. */
+ if (sig == QPU_SIG_LOAD_IMM)
+ return false;
+
uint32_t src_muxes[] = {
QPU_GET_FIELD(inst, QPU_ADD_A),
QPU_GET_FIELD(inst, QPU_ADD_B),
diff --git a/src/gallium/drivers/vc4/vc4_qpu_validate.c b/src/gallium/drivers/vc4/vc4_qpu_validate.c
index f18ba0364d0..08dd6e5df2e 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_validate.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_validate.c
@@ -58,6 +58,10 @@ _reads_reg(uint64_t inst, uint32_t r, bool ignore_a, bool ignore_b)
if (QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_BRANCH)
return false;
+ /* Load immediates don't read any registers. */
+ if (QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_LOAD_IMM)
+ return false;
+
for (int i = 0; i < ARRAY_SIZE(src_regs); i++) {
if (!ignore_a &&
src_regs[i].mux == QPU_MUX_A &&