aboutsummaryrefslogtreecommitdiffstats
path: root/src/broadcom/compiler/qpu_schedule.c
diff options
context:
space:
mode:
authorJose Maria Casanova Crespo <[email protected]>2019-07-02 18:31:09 +0200
committerJose Maria Casanova Crespo <[email protected]>2019-07-22 03:00:50 +0200
commitc341ab7ffbac822d3d3cbb3d3ae9d2a19ea3cc9a (patch)
tree70eba195aec80c8ba4d6f0f721fd957b386e933f /src/broadcom/compiler/qpu_schedule.c
parentf7224014df0d366453739356b9968ca94ad43979 (diff)
v3d: add shader-db stat to count SFU stalls
SFU operations have a latency of 2 cicles, so if their results are used in the following cycle to a SFU instruction, the GPU stalls for an extra cycle until the result is available. This adds the number of stalls to the shader-db debug mode and sum of instruction + stalls to evaluate optimizations to schedule instructions that avoid generating sfu-stalls. v2: Rename v3d_qpu_generates_sfu_stalls to v3d_qpu_instr_is_sfu (Eric) Reviewed-by: Eric Anholt <[email protected]>
Diffstat (limited to 'src/broadcom/compiler/qpu_schedule.c')
-rw-r--r--src/broadcom/compiler/qpu_schedule.c45
1 files changed, 45 insertions, 0 deletions
diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
index b8e04f6ea13..370881b00ad 100644
--- a/src/broadcom/compiler/qpu_schedule.c
+++ b/src/broadcom/compiler/qpu_schedule.c
@@ -440,6 +440,8 @@ struct choose_scoreboard {
struct dag *dag;
int tick;
int last_magic_sfu_write_tick;
+ int last_stallable_sfu_reg;
+ int last_stallable_sfu_tick;
int last_ldvary_tick;
int last_uniforms_reset_tick;
int last_thrsw_tick;
@@ -531,6 +533,33 @@ pixel_scoreboard_too_soon(struct choose_scoreboard *scoreboard,
return (scoreboard->tick == 0 && qpu_inst_is_tlb(inst));
}
+static bool
+qpu_instruction_uses_rf(const struct v3d_qpu_instr *inst,
+ uint32_t waddr) {
+
+ if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
+ return false;
+
+ if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_A) &&
+ inst->raddr_a == waddr)
+ return true;
+
+ if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_B) &&
+ !inst->sig.small_imm && (inst->raddr_b == waddr))
+ return true;
+
+ return false;
+}
+
+static bool
+mux_read_stalls(struct choose_scoreboard *scoreboard,
+ const struct v3d_qpu_instr *inst)
+{
+ return scoreboard->tick == scoreboard->last_stallable_sfu_tick + 1 &&
+ qpu_instruction_uses_rf(inst,
+ scoreboard->last_stallable_sfu_reg);
+}
+
static int
get_instruction_priority(const struct v3d_qpu_instr *inst)
{
@@ -852,6 +881,16 @@ update_scoreboard_for_magic_waddr(struct choose_scoreboard *scoreboard,
}
static void
+update_scoreboard_for_sfu_stall_waddr(struct choose_scoreboard *scoreboard,
+ const struct v3d_qpu_instr *inst)
+{
+ if (v3d_qpu_instr_is_sfu(inst)) {
+ scoreboard->last_stallable_sfu_reg = inst->alu.add.waddr;
+ scoreboard->last_stallable_sfu_tick = scoreboard->tick;
+ }
+}
+
+static void
update_scoreboard_for_chosen(struct choose_scoreboard *scoreboard,
const struct v3d_qpu_instr *inst)
{
@@ -864,6 +903,9 @@ update_scoreboard_for_chosen(struct choose_scoreboard *scoreboard,
if (inst->alu.add.magic_write) {
update_scoreboard_for_magic_waddr(scoreboard,
inst->alu.add.waddr);
+ } else {
+ update_scoreboard_for_sfu_stall_waddr(scoreboard,
+ inst);
}
}
@@ -1298,6 +1340,8 @@ schedule_instructions(struct v3d_compile *c,
fprintf(stderr, "\n");
}
}
+ if (mux_read_stalls(scoreboard, inst))
+ c->qpu_inst_stalled_count++;
}
/* Update the uniform index for the rewritten location --
@@ -1481,6 +1525,7 @@ v3d_qpu_schedule_instructions(struct v3d_compile *c)
scoreboard.last_magic_sfu_write_tick = -10;
scoreboard.last_uniforms_reset_tick = -10;
scoreboard.last_thrsw_tick = -10;
+ scoreboard.last_stallable_sfu_tick = -10;
if (debug) {
fprintf(stderr, "Pre-schedule instructions\n");