diff options
-rw-r--r-- | src/broadcom/compiler/qpu_schedule.c | 41 | ||||
-rw-r--r-- | src/broadcom/compiler/qpu_validate.c | 6 | ||||
-rw-r--r-- | src/broadcom/compiler/v3d_compiler.h | 4 | ||||
-rw-r--r-- | src/broadcom/compiler/vir.c | 14 | ||||
-rw-r--r-- | src/broadcom/compiler/vir_dump.c | 45 | ||||
-rw-r--r-- | src/broadcom/compiler/vir_register_allocate.c | 4 | ||||
-rw-r--r-- | src/broadcom/compiler/vir_to_qpu.c | 9 | ||||
-rw-r--r-- | src/broadcom/qpu/qpu_disasm.c | 53 | ||||
-rw-r--r-- | src/broadcom/qpu/qpu_instr.c | 42 | ||||
-rw-r--r-- | src/broadcom/qpu/qpu_instr.h | 16 | ||||
-rw-r--r-- | src/broadcom/qpu/qpu_pack.c | 126 | ||||
-rw-r--r-- | src/broadcom/qpu/tests/qpu_disasm.c | 7 |
12 files changed, 322 insertions, 45 deletions
diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c index 799da805906..7fe46202636 100644 --- a/src/broadcom/compiler/qpu_schedule.c +++ b/src/broadcom/compiler/qpu_schedule.c @@ -78,6 +78,7 @@ struct schedule_node_child { enum direction { F, R }; struct schedule_state { + const struct v3d_device_info *devinfo; struct schedule_node *last_r[6]; struct schedule_node *last_rf[64]; struct schedule_node *last_sf; @@ -265,6 +266,7 @@ process_uf_deps(struct schedule_state *state, struct schedule_node *n, static void calculate_deps(struct schedule_state *state, struct schedule_node *n) { + const struct v3d_device_info *devinfo = state->devinfo; struct qinst *qinst = n->inst; struct v3d_qpu_instr *inst = &qinst->qpu; @@ -356,12 +358,16 @@ calculate_deps(struct schedule_state *state, struct schedule_node *n) process_waddr_deps(state, n, inst->alu.mul.waddr, inst->alu.mul.magic_write); } + if (v3d_qpu_sig_writes_address(devinfo, &inst->sig)) { + process_waddr_deps(state, n, inst->sig_addr, + inst->sig_magic); + } - if (v3d_qpu_writes_r3(inst)) + if (v3d_qpu_writes_r3(devinfo, inst)) add_write_dep(state, &state->last_r[3], n); - if (v3d_qpu_writes_r4(inst)) + if (v3d_qpu_writes_r4(devinfo, inst)) add_write_dep(state, &state->last_r[4], n); - if (v3d_qpu_writes_r5(inst)) + if (v3d_qpu_writes_r5(devinfo, inst)) add_write_dep(state, &state->last_r[5], n); if (inst->sig.thrsw) { @@ -410,6 +416,7 @@ calculate_forward_deps(struct v3d_compile *c, struct list_head *schedule_list) struct schedule_state state; memset(&state, 0, sizeof(state)); + state.devinfo = c->devinfo; state.dir = F; list_for_each_entry(struct schedule_node, node, schedule_list, link) @@ -423,6 +430,7 @@ calculate_reverse_deps(struct v3d_compile *c, struct list_head *schedule_list) struct schedule_state state; memset(&state, 0, sizeof(state)); + state.devinfo = c->devinfo; state.dir = R; for (node = schedule_list->prev; schedule_list != node; node = node->prev) { @@ -514,7 +522,8 @@ reads_too_soon_after_write(struct choose_scoreboard *scoreboard, } static bool -writes_too_soon_after_write(struct choose_scoreboard *scoreboard, +writes_too_soon_after_write(const struct v3d_device_info *devinfo, + struct choose_scoreboard *scoreboard, struct qinst *qinst) { const struct v3d_qpu_instr *inst = &qinst->qpu; @@ -524,7 +533,7 @@ writes_too_soon_after_write(struct choose_scoreboard *scoreboard, * occur if a dead SFU computation makes it to scheduling. */ if (scoreboard->tick - scoreboard->last_sfu_write_tick < 2 && - v3d_qpu_writes_r4(inst)) + v3d_qpu_writes_r4(devinfo, inst)) return true; return false; @@ -605,7 +614,8 @@ qpu_accesses_peripheral(const struct v3d_qpu_instr *inst) return (inst->sig.ldvpm || inst->sig.ldtmu || inst->sig.ldtlb || - inst->sig.ldtlbu); + inst->sig.ldtlbu || + inst->sig.wrtmuc); } static bool @@ -619,7 +629,11 @@ qpu_merge_inst(const struct v3d_device_info *devinfo, return false; } - /* Can't do more than one peripheral access in an instruction. */ + /* Can't do more than one peripheral access in an instruction. + * + * XXX: V3D 4.1 allows TMU read along with a VPM read or write, and + * WRTMUC with a TMU magic register write (other than tmuc). + */ if (qpu_accesses_peripheral(a) && qpu_accesses_peripheral(b)) return false; @@ -663,6 +677,9 @@ qpu_merge_inst(const struct v3d_device_info *devinfo, merge.sig.thrsw |= b->sig.thrsw; merge.sig.ldunif |= b->sig.ldunif; + merge.sig.ldunifrf |= b->sig.ldunifrf; + merge.sig.ldunifa |= b->sig.ldunifa; + merge.sig.ldunifarf |= b->sig.ldunifarf; merge.sig.ldtmu |= b->sig.ldtmu; merge.sig.ldvary |= b->sig.ldvary; merge.sig.ldvpm |= b->sig.ldvpm; @@ -673,6 +690,12 @@ qpu_merge_inst(const struct v3d_device_info *devinfo, merge.sig.rotate |= b->sig.rotate; merge.sig.wrtmuc |= b->sig.wrtmuc; + if (v3d_qpu_sig_writes_address(devinfo, &a->sig) && + v3d_qpu_sig_writes_address(devinfo, &b->sig)) + return false; + merge.sig_addr |= b->sig_addr; + merge.sig_magic |= b->sig_magic; + uint64_t packed; bool ok = v3d_qpu_instr_pack(devinfo, &merge, &packed); @@ -719,7 +742,7 @@ choose_instruction_to_schedule(const struct v3d_device_info *devinfo, if (reads_too_soon_after_write(scoreboard, n->inst)) continue; - if (writes_too_soon_after_write(scoreboard, n->inst)) + if (writes_too_soon_after_write(devinfo, scoreboard, n->inst)) continue; /* "A scoreboard wait must not occur in the first two @@ -735,7 +758,7 @@ choose_instruction_to_schedule(const struct v3d_device_info *devinfo, * otherwise get scheduled so ldunif and ldvary try to update * r5 in the same tick. */ - if (inst->sig.ldunif && + if ((inst->sig.ldunif || inst->sig.ldunifa) && scoreboard->tick == scoreboard->last_ldvary_tick + 1) { continue; } diff --git a/src/broadcom/compiler/qpu_validate.c b/src/broadcom/compiler/qpu_validate.c index d99d76a8beb..3b2c10eabc6 100644 --- a/src/broadcom/compiler/qpu_validate.c +++ b/src/broadcom/compiler/qpu_validate.c @@ -85,6 +85,7 @@ qpu_magic_waddr_matches(const struct v3d_qpu_instr *inst, static void qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst) { + const struct v3d_device_info *devinfo = state->c->devinfo; const struct v3d_qpu_instr *inst = &qinst->qpu; if (inst->type != V3D_QPU_INSTR_TYPE_ALU) @@ -94,7 +95,8 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst) * r5 one instruction later, which is illegal to have * together. */ - if (state->last && state->last->sig.ldvary && inst->sig.ldunif) { + if (state->last && state->last->sig.ldvary && + (inst->sig.ldunif || inst->sig.ldunifa)) { fail_instr(state, "LDUNIF after a LDVARY"); } @@ -143,7 +145,7 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst) if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_R4)) fail_instr(state, "R4 read too soon after SFU"); - if (v3d_qpu_writes_r4(inst)) + if (v3d_qpu_writes_r4(devinfo, inst)) fail_instr(state, "R4 write too soon after SFU"); if (sfu_writes) diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h index 85def2cb02c..4ced588fbbe 100644 --- a/src/broadcom/compiler/v3d_compiler.h +++ b/src/broadcom/compiler/v3d_compiler.h @@ -650,8 +650,8 @@ bool vir_is_add(struct qinst *inst); bool vir_is_mul(struct qinst *inst); bool vir_is_float_input(struct qinst *inst); bool vir_depends_on_flags(struct qinst *inst); -bool vir_writes_r3(struct qinst *inst); -bool vir_writes_r4(struct qinst *inst); +bool vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst); +bool vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst); struct qreg vir_follow_movs(struct v3d_compile *c, struct qreg reg); uint8_t vir_channels_written(struct qinst *inst); diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c index 4e78a477bd7..c129bb047e6 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c @@ -21,6 +21,7 @@ * IN THE SOFTWARE. */ +#include "broadcom/common/v3d_device_info.h" #include "v3d_compiler.h" int @@ -198,7 +199,7 @@ vir_depends_on_flags(struct qinst *inst) } bool -vir_writes_r3(struct qinst *inst) +vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst) { for (int i = 0; i < vir_get_nsrc(inst); i++) { switch (inst->src[i].file) { @@ -210,11 +211,18 @@ vir_writes_r3(struct qinst *inst) } } + if (devinfo->ver < 41 && (inst->qpu.sig.ldvary || + inst->qpu.sig.ldtlb || + inst->qpu.sig.ldtlbu || + inst->qpu.sig.ldvpm)) { + return true; + } + return false; } bool -vir_writes_r4(struct qinst *inst) +vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst) { switch (inst->dst.file) { case QFILE_MAGIC: @@ -231,7 +239,7 @@ vir_writes_r4(struct qinst *inst) break; } - if (inst->qpu.sig.ldtmu) + if (devinfo->ver < 41 && inst->qpu.sig.ldtmu) return true; return false; diff --git a/src/broadcom/compiler/vir_dump.c b/src/broadcom/compiler/vir_dump.c index ad5c061a138..cdb1928ed00 100644 --- a/src/broadcom/compiler/vir_dump.c +++ b/src/broadcom/compiler/vir_dump.c @@ -21,6 +21,7 @@ * IN THE SOFTWARE. */ +#include "broadcom/common/v3d_device_info.h" #include "v3d_compiler.h" static void @@ -146,20 +147,60 @@ vir_print_reg(struct v3d_compile *c, struct qreg reg) } static void +vir_dump_sig_addr(const struct v3d_device_info *devinfo, + const struct v3d_qpu_instr *instr) +{ + if (devinfo->ver < 41) + return; + + if (!instr->sig_magic) + fprintf(stderr, ".rf%d", instr->sig_addr); + else { + const char *name = v3d_qpu_magic_waddr_name(instr->sig_addr); + if (name) + fprintf(stderr, ".%s", name); + else + fprintf(stderr, ".UNKNOWN%d", instr->sig_addr); + } +} + +static void vir_dump_sig(struct v3d_compile *c, struct qinst *inst) { struct v3d_qpu_sig *sig = &inst->qpu.sig; if (sig->thrsw) fprintf(stderr, "; thrsw"); - if (sig->ldvary) + if (sig->ldvary) { fprintf(stderr, "; ldvary"); + vir_dump_sig_addr(c->devinfo, &inst->qpu); + } if (sig->ldvpm) fprintf(stderr, "; ldvpm"); - if (sig->ldtmu) + if (sig->ldtmu) { fprintf(stderr, "; ldtmu"); + vir_dump_sig_addr(c->devinfo, &inst->qpu); + } + if (sig->ldtlb) { + fprintf(stderr, "; ldtlb"); + vir_dump_sig_addr(c->devinfo, &inst->qpu); + } + if (sig->ldtlbu) { + fprintf(stderr, "; ldtlbu"); + vir_dump_sig_addr(c->devinfo, &inst->qpu); + } if (sig->ldunif) fprintf(stderr, "; ldunif"); + if (sig->ldunifrf) { + fprintf(stderr, "; ldunifrf"); + vir_dump_sig_addr(c->devinfo, &inst->qpu); + } + if (sig->ldunifa) + fprintf(stderr, "; ldunifa"); + if (sig->ldunifarf) { + fprintf(stderr, "; ldunifarf"); + vir_dump_sig_addr(c->devinfo, &inst->qpu); + } if (sig->wrtmuc) fprintf(stderr, "; wrtmuc"); } diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c index 9ebf2cd69b4..f39f0c2829b 100644 --- a/src/broadcom/compiler/vir_register_allocate.c +++ b/src/broadcom/compiler/vir_register_allocate.c @@ -139,7 +139,7 @@ v3d_register_allocate(struct v3d_compile *c) * result to a temp), nothing else can be stored in r3/r4 across * it. */ - if (vir_writes_r3(inst)) { + if (vir_writes_r3(c->devinfo, inst)) { for (int i = 0; i < c->num_temps; i++) { if (c->temp_start[i] < ip && c->temp_end[i] > ip) { @@ -149,7 +149,7 @@ v3d_register_allocate(struct v3d_compile *c) } } } - if (vir_writes_r4(inst)) { + if (vir_writes_r4(c->devinfo, inst)) { for (int i = 0; i < c->num_temps; i++) { if (c->temp_start[i] < ip && c->temp_end[i] > ip) { diff --git a/src/broadcom/compiler/vir_to_qpu.c b/src/broadcom/compiler/vir_to_qpu.c index eeb7b0bc291..525638df691 100644 --- a/src/broadcom/compiler/vir_to_qpu.c +++ b/src/broadcom/compiler/vir_to_qpu.c @@ -264,7 +264,14 @@ v3d_generate_code_block(struct v3d_compile *c, } if (qinst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) { - if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) { + if (v3d_qpu_sig_writes_address(c->devinfo, + &qinst->qpu.sig)) { + assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP); + assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP); + + qinst->qpu.sig_addr = dst.index; + qinst->qpu.sig_magic = dst.magic; + } else if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) { assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP); if (nsrc >= 1) { set_src(&qinst->qpu, diff --git a/src/broadcom/qpu/qpu_disasm.c b/src/broadcom/qpu/qpu_disasm.c index 5ee834852bd..73b43f8c3d6 100644 --- a/src/broadcom/qpu/qpu_disasm.c +++ b/src/broadcom/qpu/qpu_disasm.c @@ -91,7 +91,8 @@ v3d_qpu_disasm_add(struct disasm_state *disasm, int num_src = v3d_qpu_add_op_num_src(instr->alu.add.op); append(disasm, "%s", v3d_qpu_add_op_name(instr->alu.add.op)); - append(disasm, "%s", v3d_qpu_cond_name(instr->flags.ac)); + if (!v3d_qpu_sig_writes_address(disasm->devinfo, &instr->sig)) + append(disasm, "%s", v3d_qpu_cond_name(instr->flags.ac)); append(disasm, "%s", v3d_qpu_pf_name(instr->flags.apf)); append(disasm, "%s", v3d_qpu_uf_name(instr->flags.auf)); @@ -130,7 +131,8 @@ v3d_qpu_disasm_mul(struct disasm_state *disasm, append(disasm, "; "); append(disasm, "%s", v3d_qpu_mul_op_name(instr->alu.mul.op)); - append(disasm, "%s", v3d_qpu_cond_name(instr->flags.mc)); + if (!v3d_qpu_sig_writes_address(disasm->devinfo, &instr->sig)) + append(disasm, "%s", v3d_qpu_cond_name(instr->flags.mc)); append(disasm, "%s", v3d_qpu_pf_name(instr->flags.mpf)); append(disasm, "%s", v3d_qpu_uf_name(instr->flags.muf)); @@ -162,6 +164,24 @@ v3d_qpu_disasm_mul(struct disasm_state *disasm, } static void +v3d_qpu_disasm_sig_addr(struct disasm_state *disasm, + const struct v3d_qpu_instr *instr) +{ + if (disasm->devinfo->ver < 41) + return; + + if (!instr->sig_magic) + append(disasm, ".rf%d", instr->sig_addr); + else { + const char *name = v3d_qpu_magic_waddr_name(instr->sig_addr); + if (name) + append(disasm, ".%s", name); + else + append(disasm, ".UNKNOWN%d", instr->sig_addr); + } +} + +static void v3d_qpu_disasm_sig(struct disasm_state *disasm, const struct v3d_qpu_instr *instr) { @@ -172,6 +192,9 @@ v3d_qpu_disasm_sig(struct disasm_state *disasm, !sig->ldvpm && !sig->ldtmu && !sig->ldunif && + !sig->ldunifrf && + !sig->ldunifa && + !sig->ldunifarf && !sig->wrtmuc) { return; } @@ -180,14 +203,36 @@ v3d_qpu_disasm_sig(struct disasm_state *disasm, if (sig->thrsw) append(disasm, "; thrsw"); - if (sig->ldvary) + if (sig->ldvary) { append(disasm, "; ldvary"); + v3d_qpu_disasm_sig_addr(disasm, instr); + } if (sig->ldvpm) append(disasm, "; ldvpm"); - if (sig->ldtmu) + if (sig->ldtmu) { append(disasm, "; ldtmu"); + v3d_qpu_disasm_sig_addr(disasm, instr); + } + if (sig->ldtlb) { + append(disasm, "; ldtlb"); + v3d_qpu_disasm_sig_addr(disasm, instr); + } + if (sig->ldtlbu) { + append(disasm, "; ldtlbu"); + v3d_qpu_disasm_sig_addr(disasm, instr); + } if (sig->ldunif) append(disasm, "; ldunif"); + if (sig->ldunifrf) { + append(disasm, "; ldunifrf"); + v3d_qpu_disasm_sig_addr(disasm, instr); + } + if (sig->ldunifa) + append(disasm, "; ldunifa"); + if (sig->ldunifarf) { + append(disasm, "; ldunifarf"); + v3d_qpu_disasm_sig_addr(disasm, instr); + } if (sig->wrtmuc) append(disasm, "; wrtmuc"); } diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c index 7695e0b9358..c07f3802fd4 100644 --- a/src/broadcom/qpu/qpu_instr.c +++ b/src/broadcom/qpu/qpu_instr.c @@ -23,6 +23,7 @@ #include <stdlib.h> #include "util/macros.h" +#include "broadcom/common/v3d_device_info.h" #include "qpu_instr.h" #ifndef QPU_MASK @@ -600,7 +601,8 @@ v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr) } bool -v3d_qpu_writes_r3(const struct v3d_qpu_instr *inst) +v3d_qpu_writes_r3(const struct v3d_device_info *devinfo, + const struct v3d_qpu_instr *inst) { if (inst->type == V3D_QPU_INSTR_TYPE_ALU) { if (inst->alu.add.magic_write && @@ -614,11 +616,17 @@ v3d_qpu_writes_r3(const struct v3d_qpu_instr *inst) } } + if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) && + inst->sig_magic && inst->sig_addr == V3D_QPU_WADDR_R3) { + return true; + } + return inst->sig.ldvary || inst->sig.ldvpm; } bool -v3d_qpu_writes_r4(const struct v3d_qpu_instr *inst) +v3d_qpu_writes_r4(const struct v3d_device_info *devinfo, + const struct v3d_qpu_instr *inst) { if (inst->sig.ldtmu) return true; @@ -637,11 +645,17 @@ v3d_qpu_writes_r4(const struct v3d_qpu_instr *inst) } } + if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) && + inst->sig_magic && inst->sig_addr == V3D_QPU_WADDR_R4) { + return true; + } + return false; } bool -v3d_qpu_writes_r5(const struct v3d_qpu_instr *inst) +v3d_qpu_writes_r5(const struct v3d_device_info *devinfo, + const struct v3d_qpu_instr *inst) { if (inst->type == V3D_QPU_INSTR_TYPE_ALU) { if (inst->alu.add.magic_write && @@ -655,7 +669,12 @@ v3d_qpu_writes_r5(const struct v3d_qpu_instr *inst) } } - return inst->sig.ldvary || inst->sig.ldunif; + if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) && + inst->sig_magic && inst->sig_addr == V3D_QPU_WADDR_R5) { + return true; + } + + return inst->sig.ldvary || inst->sig.ldunif || inst->sig.ldunifa; } bool @@ -669,3 +688,18 @@ v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux) (mul_nsrc > 0 && inst->alu.mul.a == mux) || (mul_nsrc > 1 && inst->alu.mul.b == mux)); } + +bool +v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo, + const struct v3d_qpu_sig *sig) +{ + if (devinfo->ver < 41) + return false; + + return (sig->ldunifrf || + sig->ldunifarf || + sig->ldvary || + sig->ldtmu || + sig->ldtlb || + sig->ldtlbu); +} diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h index a425fae8b25..cab1885acc4 100644 --- a/src/broadcom/qpu/qpu_instr.h +++ b/src/broadcom/qpu/qpu_instr.h @@ -42,6 +42,9 @@ struct v3d_device_info; struct v3d_qpu_sig { bool thrsw:1; bool ldunif:1; + bool ldunifa:1; + bool ldunifrf:1; + bool ldunifarf:1; bool ldtmu:1; bool ldvary:1; bool ldvpm:1; @@ -347,6 +350,8 @@ struct v3d_qpu_instr { enum v3d_qpu_instr_type type; struct v3d_qpu_sig sig; + uint8_t sig_addr; + bool sig_magic; /* If the signal writes to a magic address */ uint8_t raddr_a; uint8_t raddr_b; struct v3d_qpu_flags flags; @@ -403,9 +408,14 @@ bool v3d_qpu_magic_waddr_is_tmu(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; bool v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; bool v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; bool v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; -bool v3d_qpu_writes_r3(const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST; -bool v3d_qpu_writes_r4(const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST; -bool v3d_qpu_writes_r5(const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST; +bool v3d_qpu_writes_r3(const struct v3d_device_info *devinfo, + const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST; +bool v3d_qpu_writes_r4(const struct v3d_device_info *devinfo, + const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST; +bool v3d_qpu_writes_r5(const struct v3d_device_info *devinfo, + const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST; bool v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux); +bool v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo, + const struct v3d_qpu_sig *sig) ATTRIBUTE_CONST; #endif diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c index 02aa1b86aa4..f9fb016f610 100644 --- a/src/broadcom/qpu/qpu_pack.c +++ b/src/broadcom/qpu/qpu_pack.c @@ -55,11 +55,7 @@ #define VC5_QPU_COND_SHIFT 46 #define VC5_QPU_COND_MASK QPU_MASK(52, 46) - -#define VC5_QPU_COND_IFA 0 -#define VC5_QPU_COND_IFB 1 -#define VC5_QPU_COND_IFNA 2 -#define VC5_QPU_COND_IFNB 3 +#define VC5_QPU_COND_SIG_MAGIC_ADDR (1 << 6) #define VC5_QPU_MM QPU_MASK(45, 45) #define VC5_QPU_MA QPU_MASK(44, 44) @@ -113,6 +109,9 @@ #define THRSW .thrsw = true #define LDUNIF .ldunif = true +#define LDUNIFRF .ldunifrf = true +#define LDUNIFA .ldunifa = true +#define LDUNIFARF .ldunifarf = true #define LDTMU .ldtmu = true #define LDVARY .ldvary = true #define LDVPM .ldvpm = true @@ -156,6 +155,67 @@ static const struct v3d_qpu_sig v33_sig_map[] = { [31] = { SMIMM, }, }; +static const struct v3d_qpu_sig v40_sig_map[] = { + /* MISC R3 R4 R5 */ + [0] = { }, + [1] = { THRSW, }, + [2] = { LDUNIF }, + [3] = { THRSW, LDUNIF }, + [4] = { LDTMU, }, + [5] = { THRSW, LDTMU, }, + [6] = { LDTMU, LDUNIF }, + [7] = { THRSW, LDTMU, LDUNIF }, + [8] = { LDVARY, }, + [9] = { THRSW, LDVARY, }, + [10] = { LDVARY, LDUNIF }, + [11] = { THRSW, LDVARY, LDUNIF }, + /* 12-13 reserved */ + [14] = { SMIMM, LDVARY, }, + [15] = { SMIMM, }, + [16] = { LDTLB, }, + [17] = { LDTLBU, }, + [18] = { WRTMUC }, + [19] = { THRSW, WRTMUC }, + [20] = { LDVARY, WRTMUC }, + [21] = { THRSW, LDVARY, WRTMUC }, + [22] = { UCB, }, + [23] = { ROT, }, + /* 24-30 reserved */ + [31] = { SMIMM, LDTMU, }, +}; + +static const struct v3d_qpu_sig v41_sig_map[] = { + /* MISC phys R5 */ + [0] = { }, + [1] = { THRSW, }, + [2] = { LDUNIF }, + [3] = { THRSW, LDUNIF }, + [4] = { LDTMU, }, + [5] = { THRSW, LDTMU, }, + [6] = { LDTMU, LDUNIF }, + [7] = { THRSW, LDTMU, LDUNIF }, + [8] = { LDVARY, }, + [9] = { THRSW, LDVARY, }, + [10] = { LDVARY, LDUNIF }, + [11] = { THRSW, LDVARY, LDUNIF }, + [12] = { LDUNIFRF }, + [13] = { THRSW, LDUNIFRF }, + [14] = { SMIMM, LDVARY, }, + [15] = { SMIMM, }, + [16] = { LDTLB, }, + [17] = { LDTLBU, }, + [18] = { WRTMUC }, + [19] = { THRSW, WRTMUC }, + [20] = { LDVARY, WRTMUC }, + [21] = { THRSW, LDVARY, WRTMUC }, + [22] = { UCB, }, + [23] = { ROT, }, + /* 24-30 reserved */ + [24] = { LDUNIFA}, + [25] = { LDUNIFARF }, + [31] = { SMIMM, LDTMU, }, +}; + bool v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo, uint32_t packed_sig, @@ -164,7 +224,12 @@ v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo, if (packed_sig >= ARRAY_SIZE(v33_sig_map)) return false; - *sig = v33_sig_map[packed_sig]; + if (devinfo->ver >= 41) + *sig = v41_sig_map[packed_sig]; + else if (devinfo->ver == 40) + *sig = v40_sig_map[packed_sig]; + else + *sig = v33_sig_map[packed_sig]; /* Signals with zeroed unpacked contents after element 0 are reserved. */ return (packed_sig == 0 || @@ -178,7 +243,12 @@ v3d_qpu_sig_pack(const struct v3d_device_info *devinfo, { static const struct v3d_qpu_sig *map; - map = v33_sig_map; + if (devinfo->ver >= 41) + map = v41_sig_map; + else if (devinfo->ver == 40) + map = v40_sig_map; + else + map = v33_sig_map; for (int i = 0; i < ARRAY_SIZE(v33_sig_map); i++) { if (memcmp(&map[i], sig, sizeof(*sig)) == 0) { @@ -1063,10 +1133,21 @@ v3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo, &instr->sig)) return false; - if (!v3d_qpu_flags_unpack(devinfo, - QPU_GET_FIELD(packed_instr, VC5_QPU_COND), - &instr->flags)) - return false; + uint32_t packed_cond = QPU_GET_FIELD(packed_instr, VC5_QPU_COND); + if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) { + instr->sig_addr = packed_cond & ~VC5_QPU_COND_SIG_MAGIC_ADDR; + instr->sig_magic = packed_cond & VC5_QPU_COND_SIG_MAGIC_ADDR; + + instr->flags.ac = V3D_QPU_COND_NONE; + instr->flags.mc = V3D_QPU_COND_NONE; + instr->flags.apf = V3D_QPU_PF_NONE; + instr->flags.mpf = V3D_QPU_PF_NONE; + instr->flags.auf = V3D_QPU_UF_NONE; + instr->flags.muf = V3D_QPU_UF_NONE; + } else { + if (!v3d_qpu_flags_unpack(devinfo, packed_cond, &instr->flags)) + return false; + } instr->raddr_a = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_A); instr->raddr_b = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_B); @@ -1164,9 +1245,28 @@ v3d_qpu_instr_pack_alu(const struct v3d_device_info *devinfo, return false; uint32_t flags; - if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags)) - return false; + if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) { + if (instr->flags.ac != V3D_QPU_COND_NONE || + instr->flags.mc != V3D_QPU_COND_NONE || + instr->flags.apf != V3D_QPU_PF_NONE || + instr->flags.mpf != V3D_QPU_PF_NONE || + instr->flags.auf != V3D_QPU_UF_NONE || + instr->flags.muf != V3D_QPU_UF_NONE) { + return false; + } + + flags = instr->sig_addr; + if (instr->sig_magic) + flags |= VC5_QPU_COND_SIG_MAGIC_ADDR; + } else { + if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags)) + return false; + } + *packed_instr |= QPU_SET_FIELD(flags, VC5_QPU_COND); + } else { + if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) + return false; } return true; diff --git a/src/broadcom/qpu/tests/qpu_disasm.c b/src/broadcom/qpu/tests/qpu_disasm.c index 59668a86ecc..4f6ded73d48 100644 --- a/src/broadcom/qpu/tests/qpu_disasm.c +++ b/src/broadcom/qpu/tests/qpu_disasm.c @@ -63,6 +63,13 @@ static const struct { { 33, 0x041618d57c453000ull, "shl.andn exp, r3, r2; add.ifb rf35, r1, r2" }, { 33, 0x7048e5da49272800ull, "fsub.ifa rf26, r2.l, rf32; fmul.pushc sin, r1.h, r1.abs; ldunif" }, + /* v4.1 signals */ + { 41, 0x1f010520cf60a000ull, "fcmp.andz rf32, r2.h, r1.h; vfmul rf20, r0.hh, r3; ldunifa" }, + { 41, 0x932045e6c16ea000ull, "fcmp rf38, r2.abs, r5; fmul rf23.l, r3, r3.abs; ldunifarf.rf1" }, + { 41, 0xd72f0434e43ae5c0ull, "fcmp rf52.h, rf23, r5.abs; fmul rf16.h, rf23, r1; ldunifarf.rf60" }, + { 41, 0xdb3048eb9d533780ull, "fmax rf43.l, r3.h, rf30; fmul rf35.h, r4, r2.l; ldunifarf.r1" }, + { 41, 0x733620471e6ce700ull, "faddnf rf7.l, rf28.h, r1.l; fmul r1, r3.h, r3.abs; ldunifarf.rsqrt2" }, + { 41, 0x9c094adef634b000ull, "ffloor.ifb rf30.l, r3; fmul.pushz rf43.l, r5, r1.h" }, }; static void |