diff options
author | Eric Anholt <[email protected]> | 2018-01-04 15:35:28 -0800 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2018-01-12 21:54:33 -0800 |
commit | 22a02f3e344d6bc47e3e30949a36d00a9eae84a9 (patch) | |
tree | 632ed61f4bee7c932baf6383d256c4ec304994cd /src/broadcom/qpu | |
parent | 55f8a01acae7c4171ccfef301e48dc3cc67f5836 (diff) |
broadcom/vc5: Use the new LDVPM/STVPM opcodes on V3D 4.1.
Now, instead of a magic write register for VPM stores we have an
instruction to do them (which means no packing of other ALU ops into it),
with the ability to reorder the VPM stores due to the offset being baked
into the instruction.
VPM loads also gain the ability to be reordered by packing the row into
the A argument. They also no longer write to the r3 accumulator, and
instead must be stored to a physical register.
Diffstat (limited to 'src/broadcom/qpu')
-rw-r--r-- | src/broadcom/qpu/qpu_instr.c | 61 | ||||
-rw-r--r-- | src/broadcom/qpu/qpu_instr.h | 10 | ||||
-rw-r--r-- | src/broadcom/qpu/qpu_pack.c | 47 | ||||
-rw-r--r-- | src/broadcom/qpu/tests/qpu_disasm.c | 4 |
4 files changed, 110 insertions, 12 deletions
diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c index 682fee740a0..85d6cf75d81 100644 --- a/src/broadcom/qpu/qpu_instr.c +++ b/src/broadcom/qpu/qpu_instr.c @@ -113,10 +113,13 @@ v3d_qpu_add_op_name(enum v3d_qpu_add_op op) [V3D_QPU_A_TMUWT] = "tmuwt", [V3D_QPU_A_VPMSETUP] = "vpmsetup", [V3D_QPU_A_VPMWT] = "vpmwt", - [V3D_QPU_A_LDVPMV] = "ldvpmv", - [V3D_QPU_A_LDVPMD] = "ldvpmd", + [V3D_QPU_A_LDVPMV_IN] = "ldvpmv_in", + [V3D_QPU_A_LDVPMV_OUT] = "ldvpmv_out", + [V3D_QPU_A_LDVPMD_IN] = "ldvpmd_in", + [V3D_QPU_A_LDVPMD_OUT] = "ldvpmd_out", [V3D_QPU_A_LDVPMP] = "ldvpmp", - [V3D_QPU_A_LDVPMG] = "ldvpmg", + [V3D_QPU_A_LDVPMG_IN] = "ldvpmg_in", + [V3D_QPU_A_LDVPMG_OUT] = "ldvpmg_out", [V3D_QPU_A_FCMP] = "fcmp", [V3D_QPU_A_VFMAX] = "vfmax", [V3D_QPU_A_FROUND] = "fround", @@ -376,10 +379,13 @@ static const uint8_t add_op_args[] = { [V3D_QPU_A_VPMSETUP] = D | A, - [V3D_QPU_A_LDVPMV] = D | A, - [V3D_QPU_A_LDVPMD] = D | A, + [V3D_QPU_A_LDVPMV_IN] = D | A, + [V3D_QPU_A_LDVPMV_OUT] = D | A, + [V3D_QPU_A_LDVPMD_IN] = D | A, + [V3D_QPU_A_LDVPMD_OUT] = D | A, [V3D_QPU_A_LDVPMP] = D | A, - [V3D_QPU_A_LDVPMG] = D | A | B, + [V3D_QPU_A_LDVPMG_IN] = D | A | B, + [V3D_QPU_A_LDVPMG_OUT] = D | A | B, /* FIXME: MOVABSNEG */ @@ -516,6 +522,49 @@ v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr) waddr == V3D_QPU_WADDR_SYNCU); } +static bool +v3d_qpu_add_op_uses_vpm(enum v3d_qpu_add_op op) +{ + switch (op) { + case V3D_QPU_A_VPMSETUP: + case V3D_QPU_A_VPMWT: + case V3D_QPU_A_LDVPMV_IN: + case V3D_QPU_A_LDVPMV_OUT: + case V3D_QPU_A_LDVPMD_IN: + case V3D_QPU_A_LDVPMD_OUT: + case V3D_QPU_A_LDVPMP: + case V3D_QPU_A_LDVPMG_IN: + case V3D_QPU_A_LDVPMG_OUT: + case V3D_QPU_A_STVPMV: + case V3D_QPU_A_STVPMD: + case V3D_QPU_A_STVPMP: + return true; + default: + return false; + } +} + +bool +v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst) +{ + if (inst->type == V3D_QPU_INSTR_TYPE_ALU) { + if (v3d_qpu_add_op_uses_vpm(inst->alu.add.op)) + return true; + + if (inst->alu.add.magic_write && + v3d_qpu_magic_waddr_is_vpm(inst->alu.add.waddr)) { + return true; + } + + if (inst->alu.mul.magic_write && + v3d_qpu_magic_waddr_is_vpm(inst->alu.mul.waddr)) { + return true; + } + } + + return false; +} + bool v3d_qpu_writes_r3(const struct v3d_device_info *devinfo, const struct v3d_qpu_instr *inst) diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h index 468fe89facd..0bd79ca68da 100644 --- a/src/broadcom/qpu/qpu_instr.h +++ b/src/broadcom/qpu/qpu_instr.h @@ -173,10 +173,13 @@ enum v3d_qpu_add_op { V3D_QPU_A_TMUWT, V3D_QPU_A_VPMSETUP, V3D_QPU_A_VPMWT, - V3D_QPU_A_LDVPMV, - V3D_QPU_A_LDVPMD, + V3D_QPU_A_LDVPMV_IN, + V3D_QPU_A_LDVPMV_OUT, + V3D_QPU_A_LDVPMD_IN, + V3D_QPU_A_LDVPMD_OUT, V3D_QPU_A_LDVPMP, - V3D_QPU_A_LDVPMG, + V3D_QPU_A_LDVPMG_IN, + V3D_QPU_A_LDVPMG_OUT, V3D_QPU_A_FCMP, V3D_QPU_A_VFMAX, V3D_QPU_A_FROUND, @@ -425,6 +428,7 @@ bool v3d_qpu_writes_r4(const struct v3d_device_info *devinfo, bool v3d_qpu_writes_r5(const struct v3d_device_info *devinfo, const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST; bool v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux); +bool v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst); bool v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo, const struct v3d_qpu_sig *sig) ATTRIBUTE_CONST; diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c index 161e24f0300..ffabc9a969d 100644 --- a/src/broadcom/qpu/qpu_pack.c +++ b/src/broadcom/qpu/qpu_pack.c @@ -515,7 +515,11 @@ static const struct opcode_desc add_ops[] = { { 187, 187, 1 << 2, 1 << 5, V3D_QPU_A_TMUWT }, { 187, 187, 1 << 2, 1 << 6, V3D_QPU_A_VPMWT }, - { 187, 187, 1 << 3, ANYMUX, V3D_QPU_A_VPMSETUP }, + { 187, 187, 1 << 3, ANYMUX, V3D_QPU_A_VPMSETUP, 33 }, + { 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_IN, 40 }, + { 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_IN, 40 }, + { 188, 188, 1 << 2, ANYMUX, V3D_QPU_A_LDVPMP, 40 }, + { 189, 189, ANYMUX, ANYMUX, V3D_QPU_A_LDVPMG_IN, 40 }, /* FIXME: MORE COMPLICATED */ /* { 190, 191, ANYMUX, ANYMUX, V3D_QPU_A_VFMOVABSNEGNAB }, */ @@ -823,7 +827,24 @@ v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst, instr->alu.add.a = mux_a; instr->alu.add.b = mux_b; instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A); - instr->alu.add.magic_write = packed_inst & VC5_QPU_MA; + + instr->alu.add.magic_write = false; + if (packed_inst & VC5_QPU_MA) { + switch (instr->alu.add.op) { + case V3D_QPU_A_LDVPMV_IN: + instr->alu.add.op = V3D_QPU_A_LDVPMV_OUT; + break; + case V3D_QPU_A_LDVPMD_IN: + instr->alu.add.op = V3D_QPU_A_LDVPMD_OUT; + break; + case V3D_QPU_A_LDVPMG_IN: + instr->alu.add.op = V3D_QPU_A_LDVPMG_OUT; + break; + default: + instr->alu.add.magic_write = true; + break; + } + } return true; } @@ -930,16 +951,36 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo, if (nsrc < 1) mux_a = ffs(desc->mux_a_mask) - 1; + bool no_magic_write = false; + switch (instr->alu.add.op) { case V3D_QPU_A_STVPMV: waddr = 0; + no_magic_write = true; break; case V3D_QPU_A_STVPMD: waddr = 1; + no_magic_write = true; break; case V3D_QPU_A_STVPMP: waddr = 2; + no_magic_write = true; + break; + + case V3D_QPU_A_LDVPMV_IN: + case V3D_QPU_A_LDVPMD_IN: + case V3D_QPU_A_LDVPMP: + case V3D_QPU_A_LDVPMG_IN: + assert(!instr->alu.add.magic_write); break; + + case V3D_QPU_A_LDVPMV_OUT: + case V3D_QPU_A_LDVPMD_OUT: + case V3D_QPU_A_LDVPMG_OUT: + assert(!instr->alu.add.magic_write); + *packed_instr |= VC5_QPU_MA; + break; + default: break; } @@ -1065,7 +1106,7 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo, *packed_instr |= QPU_SET_FIELD(mux_b, VC5_QPU_ADD_B); *packed_instr |= QPU_SET_FIELD(opcode, VC5_QPU_OP_ADD); *packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A); - if (instr->alu.add.magic_write) + if (instr->alu.add.magic_write && !no_magic_write) *packed_instr |= VC5_QPU_MA; return true; diff --git a/src/broadcom/qpu/tests/qpu_disasm.c b/src/broadcom/qpu/tests/qpu_disasm.c index 27dc184f76e..8d77740edce 100644 --- a/src/broadcom/qpu/tests/qpu_disasm.c +++ b/src/broadcom/qpu/tests/qpu_disasm.c @@ -76,6 +76,10 @@ static const struct { { 41, 0xdb3048eb9d533780ull, "fmax rf43.l, r3.h, rf30; fmul rf35.h, r4, r2.l; ldunifarf.r1" }, { 41, 0x733620471e6ce700ull, "faddnf rf7.l, rf28.h, r1.l; fmul r1, r3.h, r3.abs; ldunifarf.rsqrt2" }, { 41, 0x9c094adef634b000ull, "ffloor.ifb rf30.l, r3; fmul.pushz rf43.l, r5, r1.h" }, + + /* v4.1 opcodes */ + { 41, 0x3de020c7bdfd200dull, "ldvpmg_in rf7, r2, r2; mov r3, 13" }, + { 41, 0x3de02040f8ff7201ull, "stvpmv 1, rf8 ; mov r1, 1" }, }; static void |