summaryrefslogtreecommitdiffstats
path: root/src/broadcom/qpu
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2018-07-20 13:06:50 -0700
committerEric Anholt <[email protected]>2018-07-23 10:21:43 -0700
commite7ae9003415cdb52c345bc1a9bd5fa71f0240dda (patch)
treeaf5eab545d75d9da0ca3118bebd49a36216db03a /src/broadcom/qpu
parent58c1d3860fefc16878670f1d25dc8187a81cb01b (diff)
v3d: Switch to using the new SFU instructions on V3D 4.x.
These instructions let us write directly to the phys regfile, instead of just R4. That lets us avoid moving out of R4 to avoid conflicting with other SFU results, and to avoid conflicting with thread switches. There is still an extra instruction of latency, which is not represented in the scheduler at the moment. If you use the result before it's ready, the QPU will just stall, unlike the magic R4 mode where you'd read the previous value. That means that the following shader-db results aren't quite representative (since we now cause some stalls instead of emitting nops), but they're impressive enough that I'm happy with the change. total instructions in shared programs: 95669 -> 91275 (-4.59%) instructions in affected programs: 82590 -> 78196 (-5.32%)
Diffstat (limited to 'src/broadcom/qpu')
-rw-r--r--src/broadcom/qpu/qpu_instr.c30
-rw-r--r--src/broadcom/qpu/qpu_instr.h1
2 files changed, 31 insertions, 0 deletions
diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c
index deaa533c8ae..a7fb4186e1a 100644
--- a/src/broadcom/qpu/qpu_instr.c
+++ b/src/broadcom/qpu/qpu_instr.c
@@ -603,6 +603,36 @@ v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst)
}
bool
+v3d_qpu_uses_sfu(const struct v3d_qpu_instr *inst)
+{
+ if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
+ switch (inst->alu.add.op) {
+ case V3D_QPU_A_RECIP:
+ case V3D_QPU_A_RSQRT:
+ case V3D_QPU_A_EXP:
+ case V3D_QPU_A_LOG:
+ case V3D_QPU_A_SIN:
+ case V3D_QPU_A_RSQRT2:
+ return true;
+ default:
+ break;
+ }
+
+ if (inst->alu.add.magic_write &&
+ v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr)) {
+ return true;
+ }
+
+ if (inst->alu.mul.magic_write &&
+ v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr)) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool
v3d_qpu_writes_tmu(const struct v3d_qpu_instr *inst)
{
return (inst->type == V3D_QPU_INSTR_TYPE_ALU &&
diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
index 09dbf3eb4fa..c37abac3cf8 100644
--- a/src/broadcom/qpu/qpu_instr.h
+++ b/src/broadcom/qpu/qpu_instr.h
@@ -444,6 +444,7 @@ bool v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
bool v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
bool v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
bool v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
+bool v3d_qpu_uses_sfu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
bool v3d_qpu_writes_tmu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
bool v3d_qpu_writes_r3(const struct v3d_device_info *devinfo,
const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;