aboutsummaryrefslogtreecommitdiffstats
path: root/src/broadcom/compiler/qpu_schedule.c
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2018-07-20 13:06:50 -0700
committerEric Anholt <[email protected]>2018-07-23 10:21:43 -0700
commite7ae9003415cdb52c345bc1a9bd5fa71f0240dda (patch)
treeaf5eab545d75d9da0ca3118bebd49a36216db03a /src/broadcom/compiler/qpu_schedule.c
parent58c1d3860fefc16878670f1d25dc8187a81cb01b (diff)
v3d: Switch to using the new SFU instructions on V3D 4.x.
These instructions let us write directly to the phys regfile, instead of just R4. That lets us avoid moving out of R4 to avoid conflicting with other SFU results, and to avoid conflicting with thread switches. There is still an extra instruction of latency, which is not represented in the scheduler at the moment. If you use the result before it's ready, the QPU will just stall, unlike the magic R4 mode where you'd read the previous value. That means that the following shader-db results aren't quite representative (since we now cause some stalls instead of emitting nops), but they're impressive enough that I'm happy with the change. total instructions in shared programs: 95669 -> 91275 (-4.59%) instructions in affected programs: 82590 -> 78196 (-5.32%)
Diffstat (limited to 'src/broadcom/compiler/qpu_schedule.c')
-rw-r--r--src/broadcom/compiler/qpu_schedule.c12
1 files changed, 7 insertions, 5 deletions
diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
index 2a035c5521e..af0b9b86b1c 100644
--- a/src/broadcom/compiler/qpu_schedule.c
+++ b/src/broadcom/compiler/qpu_schedule.c
@@ -459,7 +459,7 @@ calculate_reverse_deps(struct v3d_compile *c, struct list_head *schedule_list)
struct choose_scoreboard {
int tick;
- int last_sfu_write_tick;
+ int last_magic_sfu_write_tick;
int last_ldvary_tick;
int last_uniforms_reset_tick;
bool tlb_locked;
@@ -471,7 +471,7 @@ mux_reads_too_soon(struct choose_scoreboard *scoreboard,
{
switch (mux) {
case V3D_QPU_MUX_R4:
- if (scoreboard->tick - scoreboard->last_sfu_write_tick <= 2)
+ if (scoreboard->tick - scoreboard->last_magic_sfu_write_tick <= 2)
return true;
break;
@@ -536,7 +536,7 @@ writes_too_soon_after_write(const struct v3d_device_info *devinfo,
* This would normally be prevented by dependency tracking, but might
* occur if a dead SFU computation makes it to scheduling.
*/
- if (scoreboard->tick - scoreboard->last_sfu_write_tick < 2 &&
+ if (scoreboard->tick - scoreboard->last_magic_sfu_write_tick < 2 &&
v3d_qpu_writes_r4(devinfo, inst))
return true;
@@ -595,6 +595,8 @@ qpu_accesses_peripheral(const struct v3d_qpu_instr *inst)
{
if (v3d_qpu_uses_vpm(inst))
return true;
+ if (v3d_qpu_uses_sfu(inst))
+ return true;
if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
if (inst->alu.add.op != V3D_QPU_A_NOP &&
@@ -825,7 +827,7 @@ update_scoreboard_for_magic_waddr(struct choose_scoreboard *scoreboard,
enum v3d_qpu_waddr waddr)
{
if (v3d_qpu_magic_waddr_is_sfu(waddr))
- scoreboard->last_sfu_write_tick = scoreboard->tick;
+ scoreboard->last_magic_sfu_write_tick = scoreboard->tick;
}
static void
@@ -1467,7 +1469,7 @@ v3d_qpu_schedule_instructions(struct v3d_compile *c)
struct choose_scoreboard scoreboard;
memset(&scoreboard, 0, sizeof(scoreboard));
scoreboard.last_ldvary_tick = -10;
- scoreboard.last_sfu_write_tick = -10;
+ scoreboard.last_magic_sfu_write_tick = -10;
scoreboard.last_uniforms_reset_tick = -10;
if (debug) {