diff options
author | Jose Maria Casanova Crespo <[email protected]> | 2019-07-02 18:31:09 +0200 |
---|---|---|
committer | Jose Maria Casanova Crespo <[email protected]> | 2019-07-22 03:00:50 +0200 |
commit | c341ab7ffbac822d3d3cbb3d3ae9d2a19ea3cc9a (patch) | |
tree | 70eba195aec80c8ba4d6f0f721fd957b386e933f /src/broadcom/compiler/qpu_schedule.c | |
parent | f7224014df0d366453739356b9968ca94ad43979 (diff) |
v3d: add shader-db stat to count SFU stalls
SFU operations have a latency of 2 cicles, so if their results
are used in the following cycle to a SFU instruction, the GPU
stalls for an extra cycle until the result is available.
This adds the number of stalls to the shader-db debug mode and
sum of instruction + stalls to evaluate optimizations to schedule
instructions that avoid generating sfu-stalls.
v2: Rename v3d_qpu_generates_sfu_stalls to v3d_qpu_instr_is_sfu (Eric)
Reviewed-by: Eric Anholt <[email protected]>
Diffstat (limited to 'src/broadcom/compiler/qpu_schedule.c')
-rw-r--r-- | src/broadcom/compiler/qpu_schedule.c | 45 |
1 files changed, 45 insertions, 0 deletions
diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c index b8e04f6ea13..370881b00ad 100644 --- a/src/broadcom/compiler/qpu_schedule.c +++ b/src/broadcom/compiler/qpu_schedule.c @@ -440,6 +440,8 @@ struct choose_scoreboard { struct dag *dag; int tick; int last_magic_sfu_write_tick; + int last_stallable_sfu_reg; + int last_stallable_sfu_tick; int last_ldvary_tick; int last_uniforms_reset_tick; int last_thrsw_tick; @@ -531,6 +533,33 @@ pixel_scoreboard_too_soon(struct choose_scoreboard *scoreboard, return (scoreboard->tick == 0 && qpu_inst_is_tlb(inst)); } +static bool +qpu_instruction_uses_rf(const struct v3d_qpu_instr *inst, + uint32_t waddr) { + + if (inst->type != V3D_QPU_INSTR_TYPE_ALU) + return false; + + if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_A) && + inst->raddr_a == waddr) + return true; + + if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_B) && + !inst->sig.small_imm && (inst->raddr_b == waddr)) + return true; + + return false; +} + +static bool +mux_read_stalls(struct choose_scoreboard *scoreboard, + const struct v3d_qpu_instr *inst) +{ + return scoreboard->tick == scoreboard->last_stallable_sfu_tick + 1 && + qpu_instruction_uses_rf(inst, + scoreboard->last_stallable_sfu_reg); +} + static int get_instruction_priority(const struct v3d_qpu_instr *inst) { @@ -852,6 +881,16 @@ update_scoreboard_for_magic_waddr(struct choose_scoreboard *scoreboard, } static void +update_scoreboard_for_sfu_stall_waddr(struct choose_scoreboard *scoreboard, + const struct v3d_qpu_instr *inst) +{ + if (v3d_qpu_instr_is_sfu(inst)) { + scoreboard->last_stallable_sfu_reg = inst->alu.add.waddr; + scoreboard->last_stallable_sfu_tick = scoreboard->tick; + } +} + +static void update_scoreboard_for_chosen(struct choose_scoreboard *scoreboard, const struct v3d_qpu_instr *inst) { @@ -864,6 +903,9 @@ update_scoreboard_for_chosen(struct choose_scoreboard *scoreboard, if (inst->alu.add.magic_write) { update_scoreboard_for_magic_waddr(scoreboard, inst->alu.add.waddr); + } else { + update_scoreboard_for_sfu_stall_waddr(scoreboard, + inst); } } @@ -1298,6 +1340,8 @@ schedule_instructions(struct v3d_compile *c, fprintf(stderr, "\n"); } } + if (mux_read_stalls(scoreboard, inst)) + c->qpu_inst_stalled_count++; } /* Update the uniform index for the rewritten location -- @@ -1481,6 +1525,7 @@ v3d_qpu_schedule_instructions(struct v3d_compile *c) scoreboard.last_magic_sfu_write_tick = -10; scoreboard.last_uniforms_reset_tick = -10; scoreboard.last_thrsw_tick = -10; + scoreboard.last_stallable_sfu_tick = -10; if (debug) { fprintf(stderr, "Pre-schedule instructions\n"); |