diff options
author | Eric Anholt <[email protected]> | 2019-02-04 10:35:40 -0800 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2019-02-05 15:46:04 -0800 |
commit | 940501a446ea954a4437c49d0683e5e7f9681bad (patch) | |
tree | 541efd77308688b71d075619ded667d564267ae7 | |
parent | e5c6938590e5e495ce4f3f07dbdd53ca67297462 (diff) |
v3d: Fix copy-propagation of input unpacks.
I had a single function for "does this do float input unpacking" with two
major flaws: It was missing the most common thing to try to copy propagate
a f32 input nunpack to (the VFPACK to an FP16 render target) along with
several other ALU ops, and also would try to propagate an f32 unpack into
a VFMUL which only does f16 unpacks.
instructions in affected programs: 659232 -> 655895 (-0.51%)
uniforms in affected programs: 132613 -> 135336 (2.05%)
and a couple of programs increase their thread counts.
The uniforms hit appears to be a pattern in generated code of doing (-a >=
a) comparisons, which when a is abs(b) can result in the abs instruction
being copy propagated once but not fully DCEed.
-rw-r--r-- | src/broadcom/compiler/v3d_compiler.h | 1 | ||||
-rw-r--r-- | src/broadcom/compiler/vir.c | 32 | ||||
-rw-r--r-- | src/broadcom/compiler/vir_opt_copy_propagate.c | 27 | ||||
-rw-r--r-- | src/broadcom/qpu/qpu_instr.c | 67 | ||||
-rw-r--r-- | src/broadcom/qpu/qpu_instr.h | 2 |
5 files changed, 94 insertions, 35 deletions
diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h index cea26484a8f..1b6d2e7c2dc 100644 --- a/src/broadcom/compiler/v3d_compiler.h +++ b/src/broadcom/compiler/v3d_compiler.h @@ -787,7 +787,6 @@ bool vir_is_raw_mov(struct qinst *inst); bool vir_is_tex(struct qinst *inst); bool vir_is_add(struct qinst *inst); bool vir_is_mul(struct qinst *inst); -bool vir_is_float_input(struct qinst *inst); bool vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst); bool vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst); struct qreg vir_follow_movs(struct v3d_compile *c, struct qreg reg); diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c index 10105fbd861..077f9c1ecc9 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c @@ -133,38 +133,6 @@ vir_has_side_effects(struct v3d_compile *c, struct qinst *inst) } bool -vir_is_float_input(struct qinst *inst) -{ - /* XXX: More instrs */ - switch (inst->qpu.type) { - case V3D_QPU_INSTR_TYPE_BRANCH: - return false; - case V3D_QPU_INSTR_TYPE_ALU: - switch (inst->qpu.alu.add.op) { - case V3D_QPU_A_FADD: - case V3D_QPU_A_FSUB: - case V3D_QPU_A_FMIN: - case V3D_QPU_A_FMAX: - case V3D_QPU_A_FTOIN: - return true; - default: - break; - } - - switch (inst->qpu.alu.mul.op) { - case V3D_QPU_M_FMOV: - case V3D_QPU_M_VFMUL: - case V3D_QPU_M_FMUL: - return true; - default: - break; - } - } - - return false; -} - -bool vir_is_raw_mov(struct qinst *inst) { if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU || diff --git a/src/broadcom/compiler/vir_opt_copy_propagate.c b/src/broadcom/compiler/vir_opt_copy_propagate.c index 2a22a1b5521..dc35701e3db 100644 --- a/src/broadcom/compiler/vir_opt_copy_propagate.c +++ b/src/broadcom/compiler/vir_opt_copy_propagate.c @@ -151,13 +151,36 @@ try_copy_prop(struct v3d_compile *c, struct qinst *inst, struct qinst **movs) * would be the same between the two * instructions. */ - if (vir_is_float_input(inst) != - vir_is_float_input(mov)) { + if (v3d_qpu_unpacks_f32(&inst->qpu) != + v3d_qpu_unpacks_f32(&mov->qpu) || + v3d_qpu_unpacks_f16(&inst->qpu) != + v3d_qpu_unpacks_f16(&mov->qpu)) { continue; } + /* No composing the unpacks. */ if (vir_has_unpack(inst, i)) continue; + + /* these ops can't represent abs. */ + if (mov->qpu.alu.mul.a_unpack == V3D_QPU_UNPACK_ABS) { + switch (inst->qpu.alu.add.op) { + case V3D_QPU_A_VFPACK: + case V3D_QPU_A_FROUND: + case V3D_QPU_A_FTRUNC: + case V3D_QPU_A_FFLOOR: + case V3D_QPU_A_FCEIL: + case V3D_QPU_A_FDX: + case V3D_QPU_A_FDY: + case V3D_QPU_A_FTOIN: + case V3D_QPU_A_FTOIZ: + case V3D_QPU_A_FTOUZ: + case V3D_QPU_A_FTOC: + continue; + default: + break; + } + } } if (debug) { diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c index add2d2a23c8..12a9c32c831 100644 --- a/src/broadcom/qpu/qpu_instr.c +++ b/src/broadcom/qpu/qpu_instr.c @@ -867,3 +867,70 @@ v3d_qpu_writes_flags(const struct v3d_qpu_instr *inst) return false; } + +bool +v3d_qpu_unpacks_f32(const struct v3d_qpu_instr *inst) +{ + if (inst->type != V3D_QPU_INSTR_TYPE_ALU) + return false; + + switch (inst->alu.add.op) { + case V3D_QPU_A_FADD: + case V3D_QPU_A_FADDNF: + case V3D_QPU_A_FSUB: + case V3D_QPU_A_FMIN: + case V3D_QPU_A_FMAX: + case V3D_QPU_A_FCMP: + case V3D_QPU_A_FROUND: + case V3D_QPU_A_FTRUNC: + case V3D_QPU_A_FFLOOR: + case V3D_QPU_A_FCEIL: + case V3D_QPU_A_FDX: + case V3D_QPU_A_FDY: + case V3D_QPU_A_FTOIN: + case V3D_QPU_A_FTOIZ: + case V3D_QPU_A_FTOUZ: + case V3D_QPU_A_FTOC: + case V3D_QPU_A_VFPACK: + return true; + break; + default: + break; + } + + switch (inst->alu.mul.op) { + case V3D_QPU_M_FMOV: + case V3D_QPU_M_FMUL: + return true; + break; + default: + break; + } + + return false; +} +bool +v3d_qpu_unpacks_f16(const struct v3d_qpu_instr *inst) +{ + if (inst->type != V3D_QPU_INSTR_TYPE_ALU) + return false; + + switch (inst->alu.add.op) { + case V3D_QPU_A_VFMIN: + case V3D_QPU_A_VFMAX: + return true; + break; + default: + break; + } + + switch (inst->alu.mul.op) { + case V3D_QPU_M_VFMUL: + return true; + break; + default: + break; + } + + return false; +} diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h index 1e2dcb78af6..a77430ff882 100644 --- a/src/broadcom/qpu/qpu_instr.h +++ b/src/broadcom/qpu/qpu_instr.h @@ -464,5 +464,7 @@ bool v3d_qpu_reads_flags(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; bool v3d_qpu_writes_flags(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; bool v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo, const struct v3d_qpu_sig *sig) ATTRIBUTE_CONST; +bool v3d_qpu_unpacks_f32(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; +bool v3d_qpu_unpacks_f16(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; #endif |