summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2019-02-04 10:35:40 -0800
committerEric Anholt <[email protected]>2019-02-05 15:46:04 -0800
commit940501a446ea954a4437c49d0683e5e7f9681bad (patch)
tree541efd77308688b71d075619ded667d564267ae7
parente5c6938590e5e495ce4f3f07dbdd53ca67297462 (diff)
v3d: Fix copy-propagation of input unpacks.
I had a single function for "does this do float input unpacking" with two major flaws: It was missing the most common thing to try to copy propagate a f32 input nunpack to (the VFPACK to an FP16 render target) along with several other ALU ops, and also would try to propagate an f32 unpack into a VFMUL which only does f16 unpacks. instructions in affected programs: 659232 -> 655895 (-0.51%) uniforms in affected programs: 132613 -> 135336 (2.05%) and a couple of programs increase their thread counts. The uniforms hit appears to be a pattern in generated code of doing (-a >= a) comparisons, which when a is abs(b) can result in the abs instruction being copy propagated once but not fully DCEed.
-rw-r--r--src/broadcom/compiler/v3d_compiler.h1
-rw-r--r--src/broadcom/compiler/vir.c32
-rw-r--r--src/broadcom/compiler/vir_opt_copy_propagate.c27
-rw-r--r--src/broadcom/qpu/qpu_instr.c67
-rw-r--r--src/broadcom/qpu/qpu_instr.h2
5 files changed, 94 insertions, 35 deletions
diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h
index cea26484a8f..1b6d2e7c2dc 100644
--- a/src/broadcom/compiler/v3d_compiler.h
+++ b/src/broadcom/compiler/v3d_compiler.h
@@ -787,7 +787,6 @@ bool vir_is_raw_mov(struct qinst *inst);
bool vir_is_tex(struct qinst *inst);
bool vir_is_add(struct qinst *inst);
bool vir_is_mul(struct qinst *inst);
-bool vir_is_float_input(struct qinst *inst);
bool vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst);
bool vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst);
struct qreg vir_follow_movs(struct v3d_compile *c, struct qreg reg);
diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c
index 10105fbd861..077f9c1ecc9 100644
--- a/src/broadcom/compiler/vir.c
+++ b/src/broadcom/compiler/vir.c
@@ -133,38 +133,6 @@ vir_has_side_effects(struct v3d_compile *c, struct qinst *inst)
}
bool
-vir_is_float_input(struct qinst *inst)
-{
- /* XXX: More instrs */
- switch (inst->qpu.type) {
- case V3D_QPU_INSTR_TYPE_BRANCH:
- return false;
- case V3D_QPU_INSTR_TYPE_ALU:
- switch (inst->qpu.alu.add.op) {
- case V3D_QPU_A_FADD:
- case V3D_QPU_A_FSUB:
- case V3D_QPU_A_FMIN:
- case V3D_QPU_A_FMAX:
- case V3D_QPU_A_FTOIN:
- return true;
- default:
- break;
- }
-
- switch (inst->qpu.alu.mul.op) {
- case V3D_QPU_M_FMOV:
- case V3D_QPU_M_VFMUL:
- case V3D_QPU_M_FMUL:
- return true;
- default:
- break;
- }
- }
-
- return false;
-}
-
-bool
vir_is_raw_mov(struct qinst *inst)
{
if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
diff --git a/src/broadcom/compiler/vir_opt_copy_propagate.c b/src/broadcom/compiler/vir_opt_copy_propagate.c
index 2a22a1b5521..dc35701e3db 100644
--- a/src/broadcom/compiler/vir_opt_copy_propagate.c
+++ b/src/broadcom/compiler/vir_opt_copy_propagate.c
@@ -151,13 +151,36 @@ try_copy_prop(struct v3d_compile *c, struct qinst *inst, struct qinst **movs)
* would be the same between the two
* instructions.
*/
- if (vir_is_float_input(inst) !=
- vir_is_float_input(mov)) {
+ if (v3d_qpu_unpacks_f32(&inst->qpu) !=
+ v3d_qpu_unpacks_f32(&mov->qpu) ||
+ v3d_qpu_unpacks_f16(&inst->qpu) !=
+ v3d_qpu_unpacks_f16(&mov->qpu)) {
continue;
}
+
/* No composing the unpacks. */
if (vir_has_unpack(inst, i))
continue;
+
+ /* these ops can't represent abs. */
+ if (mov->qpu.alu.mul.a_unpack == V3D_QPU_UNPACK_ABS) {
+ switch (inst->qpu.alu.add.op) {
+ case V3D_QPU_A_VFPACK:
+ case V3D_QPU_A_FROUND:
+ case V3D_QPU_A_FTRUNC:
+ case V3D_QPU_A_FFLOOR:
+ case V3D_QPU_A_FCEIL:
+ case V3D_QPU_A_FDX:
+ case V3D_QPU_A_FDY:
+ case V3D_QPU_A_FTOIN:
+ case V3D_QPU_A_FTOIZ:
+ case V3D_QPU_A_FTOUZ:
+ case V3D_QPU_A_FTOC:
+ continue;
+ default:
+ break;
+ }
+ }
}
if (debug) {
diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c
index add2d2a23c8..12a9c32c831 100644
--- a/src/broadcom/qpu/qpu_instr.c
+++ b/src/broadcom/qpu/qpu_instr.c
@@ -867,3 +867,70 @@ v3d_qpu_writes_flags(const struct v3d_qpu_instr *inst)
return false;
}
+
+bool
+v3d_qpu_unpacks_f32(const struct v3d_qpu_instr *inst)
+{
+ if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
+ return false;
+
+ switch (inst->alu.add.op) {
+ case V3D_QPU_A_FADD:
+ case V3D_QPU_A_FADDNF:
+ case V3D_QPU_A_FSUB:
+ case V3D_QPU_A_FMIN:
+ case V3D_QPU_A_FMAX:
+ case V3D_QPU_A_FCMP:
+ case V3D_QPU_A_FROUND:
+ case V3D_QPU_A_FTRUNC:
+ case V3D_QPU_A_FFLOOR:
+ case V3D_QPU_A_FCEIL:
+ case V3D_QPU_A_FDX:
+ case V3D_QPU_A_FDY:
+ case V3D_QPU_A_FTOIN:
+ case V3D_QPU_A_FTOIZ:
+ case V3D_QPU_A_FTOUZ:
+ case V3D_QPU_A_FTOC:
+ case V3D_QPU_A_VFPACK:
+ return true;
+ break;
+ default:
+ break;
+ }
+
+ switch (inst->alu.mul.op) {
+ case V3D_QPU_M_FMOV:
+ case V3D_QPU_M_FMUL:
+ return true;
+ break;
+ default:
+ break;
+ }
+
+ return false;
+}
+bool
+v3d_qpu_unpacks_f16(const struct v3d_qpu_instr *inst)
+{
+ if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
+ return false;
+
+ switch (inst->alu.add.op) {
+ case V3D_QPU_A_VFMIN:
+ case V3D_QPU_A_VFMAX:
+ return true;
+ break;
+ default:
+ break;
+ }
+
+ switch (inst->alu.mul.op) {
+ case V3D_QPU_M_VFMUL:
+ return true;
+ break;
+ default:
+ break;
+ }
+
+ return false;
+}
diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
index 1e2dcb78af6..a77430ff882 100644
--- a/src/broadcom/qpu/qpu_instr.h
+++ b/src/broadcom/qpu/qpu_instr.h
@@ -464,5 +464,7 @@ bool v3d_qpu_reads_flags(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
bool v3d_qpu_writes_flags(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
bool v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo,
const struct v3d_qpu_sig *sig) ATTRIBUTE_CONST;
+bool v3d_qpu_unpacks_f32(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
+bool v3d_qpu_unpacks_f16(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
#endif