diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/broadcom/compiler/nir_to_vir.c | 82 | ||||
-rw-r--r-- | src/broadcom/compiler/qpu_schedule.c | 6 | ||||
-rw-r--r-- | src/broadcom/compiler/v3d_compiler.h | 9 | ||||
-rw-r--r-- | src/broadcom/compiler/vir.c | 7 | ||||
-rw-r--r-- | src/broadcom/compiler/vir_register_allocate.c | 22 | ||||
-rw-r--r-- | src/broadcom/qpu/qpu_instr.c | 61 | ||||
-rw-r--r-- | src/broadcom/qpu/qpu_instr.h | 10 | ||||
-rw-r--r-- | src/broadcom/qpu/qpu_pack.c | 47 | ||||
-rw-r--r-- | src/broadcom/qpu/tests/qpu_disasm.c | 4 |
9 files changed, 197 insertions, 51 deletions
diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index 208ee1b86a8..87ce06a49cc 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -29,6 +29,7 @@ #include "util/hash_table.h" #include "compiler/nir/nir.h" #include "compiler/nir/nir_builder.h" +#include "common/v3d_device_info.h" #include "v3d_compiler.h" /* We don't do any address packing. */ @@ -1224,7 +1225,21 @@ emit_frag_end(struct v3d_compile *c) } static void -emit_scaled_viewport_write(struct v3d_compile *c, struct qreg rcp_w) +vir_VPM_WRITE(struct v3d_compile *c, struct qreg val, uint32_t *vpm_index) +{ + if (c->devinfo->ver >= 40) { + vir_STVPMV(c, vir_uniform_ui(c, *vpm_index), val); + *vpm_index = *vpm_index + 1; + } else { + vir_MOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_VPM), val); + } + + c->num_vpm_writes++; +} + +static void +emit_scaled_viewport_write(struct v3d_compile *c, struct qreg rcp_w, + uint32_t *vpm_index) { for (int i = 0; i < 2; i++) { struct qreg coord = c->outputs[c->output_position_index + i]; @@ -1232,34 +1247,32 @@ emit_scaled_viewport_write(struct v3d_compile *c, struct qreg rcp_w) vir_uniform(c, QUNIFORM_VIEWPORT_X_SCALE + i, 0)); coord = vir_FMUL(c, coord, rcp_w); - vir_FTOIN_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_VPM), - coord); + vir_VPM_WRITE(c, vir_FTOIN(c, coord), vpm_index); } } static void -emit_zs_write(struct v3d_compile *c, struct qreg rcp_w) +emit_zs_write(struct v3d_compile *c, struct qreg rcp_w, uint32_t *vpm_index) { struct qreg zscale = vir_uniform(c, QUNIFORM_VIEWPORT_Z_SCALE, 0); struct qreg zoffset = vir_uniform(c, QUNIFORM_VIEWPORT_Z_OFFSET, 0); - vir_FADD_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_VPM), - vir_FMUL(c, vir_FMUL(c, - c->outputs[c->output_position_index + 2], - zscale), - rcp_w), - zoffset); + struct qreg z = c->outputs[c->output_position_index + 2]; + z = vir_FMUL(c, z, zscale); + z = vir_FMUL(c, z, rcp_w); + z = vir_FADD(c, z, zoffset); + vir_VPM_WRITE(c, z, vpm_index); } static void -emit_rcp_wc_write(struct v3d_compile *c, struct qreg rcp_w) +emit_rcp_wc_write(struct v3d_compile *c, struct qreg rcp_w, uint32_t *vpm_index) { - vir_VPM_WRITE(c, rcp_w); + vir_VPM_WRITE(c, rcp_w, vpm_index); } static void -emit_point_size_write(struct v3d_compile *c) +emit_point_size_write(struct v3d_compile *c, uint32_t *vpm_index) { struct qreg point_size; @@ -1273,12 +1286,15 @@ emit_point_size_write(struct v3d_compile *c) */ point_size = vir_FMAX(c, point_size, vir_uniform_f(c, .125)); - vir_VPM_WRITE(c, point_size); + vir_VPM_WRITE(c, point_size, vpm_index); } static void emit_vpm_write_setup(struct v3d_compile *c) { + if (c->devinfo->ver >= 40) + return; + uint32_t packed; struct V3D33_VPM_GENERIC_BLOCK_WRITE_SETUP unpacked = { V3D33_VPM_GENERIC_BLOCK_WRITE_SETUP_header, @@ -1300,6 +1316,7 @@ emit_vpm_write_setup(struct v3d_compile *c) static void emit_vert_end(struct v3d_compile *c) { + uint32_t vpm_index = 0; struct qreg rcp_w = vir_SFU(c, V3D_QPU_WADDR_RECIP, c->outputs[c->output_position_index + 3]); @@ -1307,21 +1324,22 @@ emit_vert_end(struct v3d_compile *c) if (c->vs_key->is_coord) { for (int i = 0; i < 4; i++) - vir_VPM_WRITE(c, c->outputs[c->output_position_index + i]); - emit_scaled_viewport_write(c, rcp_w); + vir_VPM_WRITE(c, c->outputs[c->output_position_index + i], + &vpm_index); + emit_scaled_viewport_write(c, rcp_w, &vpm_index); if (c->vs_key->per_vertex_point_size) { - emit_point_size_write(c); + emit_point_size_write(c, &vpm_index); /* emit_rcp_wc_write(c, rcp_w); */ } /* XXX: Z-only rendering */ if (0) - emit_zs_write(c, rcp_w); + emit_zs_write(c, rcp_w, &vpm_index); } else { - emit_scaled_viewport_write(c, rcp_w); - emit_zs_write(c, rcp_w); - emit_rcp_wc_write(c, rcp_w); + emit_scaled_viewport_write(c, rcp_w, &vpm_index); + emit_zs_write(c, rcp_w, &vpm_index); + emit_rcp_wc_write(c, rcp_w, &vpm_index); if (c->vs_key->per_vertex_point_size) - emit_point_size_write(c); + emit_point_size_write(c, &vpm_index); } for (int i = 0; i < c->vs_key->num_fs_inputs; i++) { @@ -1332,7 +1350,8 @@ emit_vert_end(struct v3d_compile *c) struct v3d_varying_slot output = c->output_slots[j]; if (!memcmp(&input, &output, sizeof(input))) { - vir_VPM_WRITE(c, c->outputs[j]); + vir_VPM_WRITE(c, c->outputs[j], + &vpm_index); break; } } @@ -1340,7 +1359,8 @@ emit_vert_end(struct v3d_compile *c) * this FS input. */ if (j == c->num_outputs) - vir_VPM_WRITE(c, vir_uniform_f(c, 0.0)); + vir_VPM_WRITE(c, vir_uniform_f(c, 0.0), + &vpm_index); } } @@ -1384,6 +1404,12 @@ ntq_emit_vpm_read(struct v3d_compile *c, { struct qreg vpm = vir_reg(QFILE_VPM, vpm_index); + if (c->devinfo->ver >= 40 ) { + return vir_LDVPMV_IN(c, + vir_uniform_ui(c, + (*num_components_queued)++)); + } + if (*num_components_queued != 0) { (*num_components_queued)--; c->num_inputs++; @@ -1501,8 +1527,12 @@ ntq_setup_inputs(struct v3d_compile *c) } if (c->s->info.stage == MESA_SHADER_VERTEX) { - assert(vpm_components_queued == 0); - assert(num_components == 0); + if (c->devinfo->ver >= 40) { + assert(vpm_components_queued == num_components); + } else { + assert(vpm_components_queued == 0); + assert(num_components == 0); + } } } diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c index 7fe46202636..cab117b523e 100644 --- a/src/broadcom/compiler/qpu_schedule.c +++ b/src/broadcom/compiler/qpu_schedule.c @@ -594,6 +594,9 @@ qpu_magic_waddr_is_periph(enum v3d_qpu_waddr waddr) static bool qpu_accesses_peripheral(const struct v3d_qpu_instr *inst) { + if (v3d_qpu_uses_vpm(inst)) + return true; + if (inst->type == V3D_QPU_INSTR_TYPE_ALU) { if (inst->alu.add.op != V3D_QPU_A_NOP && inst->alu.add.magic_write && @@ -601,9 +604,6 @@ qpu_accesses_peripheral(const struct v3d_qpu_instr *inst) return true; } - if (inst->alu.add.op == V3D_QPU_A_VPMSETUP) - return true; - if (inst->alu.mul.op != V3D_QPU_M_NOP && inst->alu.mul.magic_write && qpu_magic_waddr_is_periph(inst->alu.mul.waddr)) { diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h index 4ced588fbbe..0a6638b95c5 100644 --- a/src/broadcom/compiler/v3d_compiler.h +++ b/src/broadcom/compiler/v3d_compiler.h @@ -791,6 +791,7 @@ VIR_A_ALU2(OR) VIR_A_ALU2(XOR) VIR_A_ALU2(VADD) VIR_A_ALU2(VSUB) +VIR_A_ALU2(STVPMV) VIR_A_ALU1(NOT) VIR_A_ALU1(NEG) VIR_A_ALU1(FLAPUSH) @@ -800,6 +801,8 @@ VIR_A_ALU1(SETMSF) VIR_A_ALU1(SETREVF) VIR_A_ALU1(TIDX) VIR_A_ALU1(EIDX) +VIR_A_ALU1(LDVPMV_IN) +VIR_A_ALU1(LDVPMV_OUT) VIR_A_ALU0(FXCD) VIR_A_ALU0(XCD) @@ -854,12 +857,6 @@ vir_SEL(struct v3d_compile *c, enum v3d_qpu_cond cond, return t; } -static inline void -vir_VPM_WRITE(struct v3d_compile *c, struct qreg val) -{ - vir_MOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_VPM), val); -} - static inline struct qinst * vir_NOP(struct v3d_compile *c) { diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c index c129bb047e6..80d11aaf7f6 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c @@ -92,6 +92,9 @@ vir_has_side_effects(struct v3d_compile *c, struct qinst *inst) case V3D_QPU_A_SETREVF: case V3D_QPU_A_SETMSF: case V3D_QPU_A_VPMSETUP: + case V3D_QPU_A_STVPMV: + case V3D_QPU_A_STVPMD: + case V3D_QPU_A_STVPMP: return true; default: break; @@ -412,10 +415,6 @@ static void vir_emit(struct v3d_compile *c, struct qinst *inst) { list_addtail(&inst->link, &c->cur_block->instructions); - - if (inst->dst.file == QFILE_MAGIC && - inst->dst.index == V3D_QPU_WADDR_VPM) - c->num_vpm_writes++; } /* Updates inst to write to a new temporary, emits it, and notes the def. */ diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c index f39f0c2829b..0889937f424 100644 --- a/src/broadcom/compiler/vir_register_allocate.c +++ b/src/broadcom/compiler/vir_register_allocate.c @@ -160,6 +160,28 @@ v3d_register_allocate(struct v3d_compile *c) } } + if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) { + switch (inst->qpu.alu.add.op) { + case V3D_QPU_A_LDVPMV_IN: + case V3D_QPU_A_LDVPMV_OUT: + case V3D_QPU_A_LDVPMD_IN: + case V3D_QPU_A_LDVPMD_OUT: + case V3D_QPU_A_LDVPMP: + case V3D_QPU_A_LDVPMG_IN: + case V3D_QPU_A_LDVPMG_OUT: + /* LDVPMs only store to temps (the MA flag + * decides whether the LDVPM is in or out) + */ + assert(inst->dst.file == QFILE_TEMP); + class_bits[temp_to_node[inst->dst.index]] &= + CLASS_BIT_PHYS; + break; + + default: + break; + } + } + if (inst->src[0].file == QFILE_REG) { switch (inst->src[0].index) { case 0: diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c index 682fee740a0..85d6cf75d81 100644 --- a/src/broadcom/qpu/qpu_instr.c +++ b/src/broadcom/qpu/qpu_instr.c @@ -113,10 +113,13 @@ v3d_qpu_add_op_name(enum v3d_qpu_add_op op) [V3D_QPU_A_TMUWT] = "tmuwt", [V3D_QPU_A_VPMSETUP] = "vpmsetup", [V3D_QPU_A_VPMWT] = "vpmwt", - [V3D_QPU_A_LDVPMV] = "ldvpmv", - [V3D_QPU_A_LDVPMD] = "ldvpmd", + [V3D_QPU_A_LDVPMV_IN] = "ldvpmv_in", + [V3D_QPU_A_LDVPMV_OUT] = "ldvpmv_out", + [V3D_QPU_A_LDVPMD_IN] = "ldvpmd_in", + [V3D_QPU_A_LDVPMD_OUT] = "ldvpmd_out", [V3D_QPU_A_LDVPMP] = "ldvpmp", - [V3D_QPU_A_LDVPMG] = "ldvpmg", + [V3D_QPU_A_LDVPMG_IN] = "ldvpmg_in", + [V3D_QPU_A_LDVPMG_OUT] = "ldvpmg_out", [V3D_QPU_A_FCMP] = "fcmp", [V3D_QPU_A_VFMAX] = "vfmax", [V3D_QPU_A_FROUND] = "fround", @@ -376,10 +379,13 @@ static const uint8_t add_op_args[] = { [V3D_QPU_A_VPMSETUP] = D | A, - [V3D_QPU_A_LDVPMV] = D | A, - [V3D_QPU_A_LDVPMD] = D | A, + [V3D_QPU_A_LDVPMV_IN] = D | A, + [V3D_QPU_A_LDVPMV_OUT] = D | A, + [V3D_QPU_A_LDVPMD_IN] = D | A, + [V3D_QPU_A_LDVPMD_OUT] = D | A, [V3D_QPU_A_LDVPMP] = D | A, - [V3D_QPU_A_LDVPMG] = D | A | B, + [V3D_QPU_A_LDVPMG_IN] = D | A | B, + [V3D_QPU_A_LDVPMG_OUT] = D | A | B, /* FIXME: MOVABSNEG */ @@ -516,6 +522,49 @@ v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr) waddr == V3D_QPU_WADDR_SYNCU); } +static bool +v3d_qpu_add_op_uses_vpm(enum v3d_qpu_add_op op) +{ + switch (op) { + case V3D_QPU_A_VPMSETUP: + case V3D_QPU_A_VPMWT: + case V3D_QPU_A_LDVPMV_IN: + case V3D_QPU_A_LDVPMV_OUT: + case V3D_QPU_A_LDVPMD_IN: + case V3D_QPU_A_LDVPMD_OUT: + case V3D_QPU_A_LDVPMP: + case V3D_QPU_A_LDVPMG_IN: + case V3D_QPU_A_LDVPMG_OUT: + case V3D_QPU_A_STVPMV: + case V3D_QPU_A_STVPMD: + case V3D_QPU_A_STVPMP: + return true; + default: + return false; + } +} + +bool +v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst) +{ + if (inst->type == V3D_QPU_INSTR_TYPE_ALU) { + if (v3d_qpu_add_op_uses_vpm(inst->alu.add.op)) + return true; + + if (inst->alu.add.magic_write && + v3d_qpu_magic_waddr_is_vpm(inst->alu.add.waddr)) { + return true; + } + + if (inst->alu.mul.magic_write && + v3d_qpu_magic_waddr_is_vpm(inst->alu.mul.waddr)) { + return true; + } + } + + return false; +} + bool v3d_qpu_writes_r3(const struct v3d_device_info *devinfo, const struct v3d_qpu_instr *inst) diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h index 468fe89facd..0bd79ca68da 100644 --- a/src/broadcom/qpu/qpu_instr.h +++ b/src/broadcom/qpu/qpu_instr.h @@ -173,10 +173,13 @@ enum v3d_qpu_add_op { V3D_QPU_A_TMUWT, V3D_QPU_A_VPMSETUP, V3D_QPU_A_VPMWT, - V3D_QPU_A_LDVPMV, - V3D_QPU_A_LDVPMD, + V3D_QPU_A_LDVPMV_IN, + V3D_QPU_A_LDVPMV_OUT, + V3D_QPU_A_LDVPMD_IN, + V3D_QPU_A_LDVPMD_OUT, V3D_QPU_A_LDVPMP, - V3D_QPU_A_LDVPMG, + V3D_QPU_A_LDVPMG_IN, + V3D_QPU_A_LDVPMG_OUT, V3D_QPU_A_FCMP, V3D_QPU_A_VFMAX, V3D_QPU_A_FROUND, @@ -425,6 +428,7 @@ bool v3d_qpu_writes_r4(const struct v3d_device_info *devinfo, bool v3d_qpu_writes_r5(const struct v3d_device_info *devinfo, const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST; bool v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux); +bool v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst); bool v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo, const struct v3d_qpu_sig *sig) ATTRIBUTE_CONST; diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c index 161e24f0300..ffabc9a969d 100644 --- a/src/broadcom/qpu/qpu_pack.c +++ b/src/broadcom/qpu/qpu_pack.c @@ -515,7 +515,11 @@ static const struct opcode_desc add_ops[] = { { 187, 187, 1 << 2, 1 << 5, V3D_QPU_A_TMUWT }, { 187, 187, 1 << 2, 1 << 6, V3D_QPU_A_VPMWT }, - { 187, 187, 1 << 3, ANYMUX, V3D_QPU_A_VPMSETUP }, + { 187, 187, 1 << 3, ANYMUX, V3D_QPU_A_VPMSETUP, 33 }, + { 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_IN, 40 }, + { 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_IN, 40 }, + { 188, 188, 1 << 2, ANYMUX, V3D_QPU_A_LDVPMP, 40 }, + { 189, 189, ANYMUX, ANYMUX, V3D_QPU_A_LDVPMG_IN, 40 }, /* FIXME: MORE COMPLICATED */ /* { 190, 191, ANYMUX, ANYMUX, V3D_QPU_A_VFMOVABSNEGNAB }, */ @@ -823,7 +827,24 @@ v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst, instr->alu.add.a = mux_a; instr->alu.add.b = mux_b; instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A); - instr->alu.add.magic_write = packed_inst & VC5_QPU_MA; + + instr->alu.add.magic_write = false; + if (packed_inst & VC5_QPU_MA) { + switch (instr->alu.add.op) { + case V3D_QPU_A_LDVPMV_IN: + instr->alu.add.op = V3D_QPU_A_LDVPMV_OUT; + break; + case V3D_QPU_A_LDVPMD_IN: + instr->alu.add.op = V3D_QPU_A_LDVPMD_OUT; + break; + case V3D_QPU_A_LDVPMG_IN: + instr->alu.add.op = V3D_QPU_A_LDVPMG_OUT; + break; + default: + instr->alu.add.magic_write = true; + break; + } + } return true; } @@ -930,16 +951,36 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo, if (nsrc < 1) mux_a = ffs(desc->mux_a_mask) - 1; + bool no_magic_write = false; + switch (instr->alu.add.op) { case V3D_QPU_A_STVPMV: waddr = 0; + no_magic_write = true; break; case V3D_QPU_A_STVPMD: waddr = 1; + no_magic_write = true; break; case V3D_QPU_A_STVPMP: waddr = 2; + no_magic_write = true; + break; + + case V3D_QPU_A_LDVPMV_IN: + case V3D_QPU_A_LDVPMD_IN: + case V3D_QPU_A_LDVPMP: + case V3D_QPU_A_LDVPMG_IN: + assert(!instr->alu.add.magic_write); break; + + case V3D_QPU_A_LDVPMV_OUT: + case V3D_QPU_A_LDVPMD_OUT: + case V3D_QPU_A_LDVPMG_OUT: + assert(!instr->alu.add.magic_write); + *packed_instr |= VC5_QPU_MA; + break; + default: break; } @@ -1065,7 +1106,7 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo, *packed_instr |= QPU_SET_FIELD(mux_b, VC5_QPU_ADD_B); *packed_instr |= QPU_SET_FIELD(opcode, VC5_QPU_OP_ADD); *packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A); - if (instr->alu.add.magic_write) + if (instr->alu.add.magic_write && !no_magic_write) *packed_instr |= VC5_QPU_MA; return true; diff --git a/src/broadcom/qpu/tests/qpu_disasm.c b/src/broadcom/qpu/tests/qpu_disasm.c index 27dc184f76e..8d77740edce 100644 --- a/src/broadcom/qpu/tests/qpu_disasm.c +++ b/src/broadcom/qpu/tests/qpu_disasm.c @@ -76,6 +76,10 @@ static const struct { { 41, 0xdb3048eb9d533780ull, "fmax rf43.l, r3.h, rf30; fmul rf35.h, r4, r2.l; ldunifarf.r1" }, { 41, 0x733620471e6ce700ull, "faddnf rf7.l, rf28.h, r1.l; fmul r1, r3.h, r3.abs; ldunifarf.rsqrt2" }, { 41, 0x9c094adef634b000ull, "ffloor.ifb rf30.l, r3; fmul.pushz rf43.l, r5, r1.h" }, + + /* v4.1 opcodes */ + { 41, 0x3de020c7bdfd200dull, "ldvpmg_in rf7, r2, r2; mov r3, 13" }, + { 41, 0x3de02040f8ff7201ull, "stvpmv 1, rf8 ; mov r1, 1" }, }; static void |