From 940501a446ea954a4437c49d0683e5e7f9681bad Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 4 Feb 2019 10:35:40 -0800 Subject: v3d: Fix copy-propagation of input unpacks. I had a single function for "does this do float input unpacking" with two major flaws: It was missing the most common thing to try to copy propagate a f32 input nunpack to (the VFPACK to an FP16 render target) along with several other ALU ops, and also would try to propagate an f32 unpack into a VFMUL which only does f16 unpacks. instructions in affected programs: 659232 -> 655895 (-0.51%) uniforms in affected programs: 132613 -> 135336 (2.05%) and a couple of programs increase their thread counts. The uniforms hit appears to be a pattern in generated code of doing (-a >= a) comparisons, which when a is abs(b) can result in the abs instruction being copy propagated once but not fully DCEed. --- src/broadcom/qpu/qpu_instr.c | 67 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) (limited to 'src/broadcom/qpu/qpu_instr.c') diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c index add2d2a23c8..12a9c32c831 100644 --- a/src/broadcom/qpu/qpu_instr.c +++ b/src/broadcom/qpu/qpu_instr.c @@ -867,3 +867,70 @@ v3d_qpu_writes_flags(const struct v3d_qpu_instr *inst) return false; } + +bool +v3d_qpu_unpacks_f32(const struct v3d_qpu_instr *inst) +{ + if (inst->type != V3D_QPU_INSTR_TYPE_ALU) + return false; + + switch (inst->alu.add.op) { + case V3D_QPU_A_FADD: + case V3D_QPU_A_FADDNF: + case V3D_QPU_A_FSUB: + case V3D_QPU_A_FMIN: + case V3D_QPU_A_FMAX: + case V3D_QPU_A_FCMP: + case V3D_QPU_A_FROUND: + case V3D_QPU_A_FTRUNC: + case V3D_QPU_A_FFLOOR: + case V3D_QPU_A_FCEIL: + case V3D_QPU_A_FDX: + case V3D_QPU_A_FDY: + case V3D_QPU_A_FTOIN: + case V3D_QPU_A_FTOIZ: + case V3D_QPU_A_FTOUZ: + case V3D_QPU_A_FTOC: + case V3D_QPU_A_VFPACK: + return true; + break; + default: + break; + } + + switch (inst->alu.mul.op) { + case V3D_QPU_M_FMOV: + case V3D_QPU_M_FMUL: + return true; + break; + default: + break; + } + + return false; +} +bool +v3d_qpu_unpacks_f16(const struct v3d_qpu_instr *inst) +{ + if (inst->type != V3D_QPU_INSTR_TYPE_ALU) + return false; + + switch (inst->alu.add.op) { + case V3D_QPU_A_VFMIN: + case V3D_QPU_A_VFMAX: + return true; + break; + default: + break; + } + + switch (inst->alu.mul.op) { + case V3D_QPU_M_VFMUL: + return true; + break; + default: + break; + } + + return false; +} -- cgit v1.2.3