aboutsummaryrefslogtreecommitdiffstats
path: root/src/broadcom
diff options
context:
space:
mode:
Diffstat (limited to 'src/broadcom')
-rw-r--r--src/broadcom/compiler/qpu_schedule.c41
-rw-r--r--src/broadcom/compiler/qpu_validate.c6
-rw-r--r--src/broadcom/compiler/v3d_compiler.h4
-rw-r--r--src/broadcom/compiler/vir.c14
-rw-r--r--src/broadcom/compiler/vir_dump.c45
-rw-r--r--src/broadcom/compiler/vir_register_allocate.c4
-rw-r--r--src/broadcom/compiler/vir_to_qpu.c9
-rw-r--r--src/broadcom/qpu/qpu_disasm.c53
-rw-r--r--src/broadcom/qpu/qpu_instr.c42
-rw-r--r--src/broadcom/qpu/qpu_instr.h16
-rw-r--r--src/broadcom/qpu/qpu_pack.c126
-rw-r--r--src/broadcom/qpu/tests/qpu_disasm.c7
12 files changed, 322 insertions, 45 deletions
diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
index 799da805906..7fe46202636 100644
--- a/src/broadcom/compiler/qpu_schedule.c
+++ b/src/broadcom/compiler/qpu_schedule.c
@@ -78,6 +78,7 @@ struct schedule_node_child {
enum direction { F, R };
struct schedule_state {
+ const struct v3d_device_info *devinfo;
struct schedule_node *last_r[6];
struct schedule_node *last_rf[64];
struct schedule_node *last_sf;
@@ -265,6 +266,7 @@ process_uf_deps(struct schedule_state *state, struct schedule_node *n,
static void
calculate_deps(struct schedule_state *state, struct schedule_node *n)
{
+ const struct v3d_device_info *devinfo = state->devinfo;
struct qinst *qinst = n->inst;
struct v3d_qpu_instr *inst = &qinst->qpu;
@@ -356,12 +358,16 @@ calculate_deps(struct schedule_state *state, struct schedule_node *n)
process_waddr_deps(state, n, inst->alu.mul.waddr,
inst->alu.mul.magic_write);
}
+ if (v3d_qpu_sig_writes_address(devinfo, &inst->sig)) {
+ process_waddr_deps(state, n, inst->sig_addr,
+ inst->sig_magic);
+ }
- if (v3d_qpu_writes_r3(inst))
+ if (v3d_qpu_writes_r3(devinfo, inst))
add_write_dep(state, &state->last_r[3], n);
- if (v3d_qpu_writes_r4(inst))
+ if (v3d_qpu_writes_r4(devinfo, inst))
add_write_dep(state, &state->last_r[4], n);
- if (v3d_qpu_writes_r5(inst))
+ if (v3d_qpu_writes_r5(devinfo, inst))
add_write_dep(state, &state->last_r[5], n);
if (inst->sig.thrsw) {
@@ -410,6 +416,7 @@ calculate_forward_deps(struct v3d_compile *c, struct list_head *schedule_list)
struct schedule_state state;
memset(&state, 0, sizeof(state));
+ state.devinfo = c->devinfo;
state.dir = F;
list_for_each_entry(struct schedule_node, node, schedule_list, link)
@@ -423,6 +430,7 @@ calculate_reverse_deps(struct v3d_compile *c, struct list_head *schedule_list)
struct schedule_state state;
memset(&state, 0, sizeof(state));
+ state.devinfo = c->devinfo;
state.dir = R;
for (node = schedule_list->prev; schedule_list != node; node = node->prev) {
@@ -514,7 +522,8 @@ reads_too_soon_after_write(struct choose_scoreboard *scoreboard,
}
static bool
-writes_too_soon_after_write(struct choose_scoreboard *scoreboard,
+writes_too_soon_after_write(const struct v3d_device_info *devinfo,
+ struct choose_scoreboard *scoreboard,
struct qinst *qinst)
{
const struct v3d_qpu_instr *inst = &qinst->qpu;
@@ -524,7 +533,7 @@ writes_too_soon_after_write(struct choose_scoreboard *scoreboard,
* occur if a dead SFU computation makes it to scheduling.
*/
if (scoreboard->tick - scoreboard->last_sfu_write_tick < 2 &&
- v3d_qpu_writes_r4(inst))
+ v3d_qpu_writes_r4(devinfo, inst))
return true;
return false;
@@ -605,7 +614,8 @@ qpu_accesses_peripheral(const struct v3d_qpu_instr *inst)
return (inst->sig.ldvpm ||
inst->sig.ldtmu ||
inst->sig.ldtlb ||
- inst->sig.ldtlbu);
+ inst->sig.ldtlbu ||
+ inst->sig.wrtmuc);
}
static bool
@@ -619,7 +629,11 @@ qpu_merge_inst(const struct v3d_device_info *devinfo,
return false;
}
- /* Can't do more than one peripheral access in an instruction. */
+ /* Can't do more than one peripheral access in an instruction.
+ *
+ * XXX: V3D 4.1 allows TMU read along with a VPM read or write, and
+ * WRTMUC with a TMU magic register write (other than tmuc).
+ */
if (qpu_accesses_peripheral(a) && qpu_accesses_peripheral(b))
return false;
@@ -663,6 +677,9 @@ qpu_merge_inst(const struct v3d_device_info *devinfo,
merge.sig.thrsw |= b->sig.thrsw;
merge.sig.ldunif |= b->sig.ldunif;
+ merge.sig.ldunifrf |= b->sig.ldunifrf;
+ merge.sig.ldunifa |= b->sig.ldunifa;
+ merge.sig.ldunifarf |= b->sig.ldunifarf;
merge.sig.ldtmu |= b->sig.ldtmu;
merge.sig.ldvary |= b->sig.ldvary;
merge.sig.ldvpm |= b->sig.ldvpm;
@@ -673,6 +690,12 @@ qpu_merge_inst(const struct v3d_device_info *devinfo,
merge.sig.rotate |= b->sig.rotate;
merge.sig.wrtmuc |= b->sig.wrtmuc;
+ if (v3d_qpu_sig_writes_address(devinfo, &a->sig) &&
+ v3d_qpu_sig_writes_address(devinfo, &b->sig))
+ return false;
+ merge.sig_addr |= b->sig_addr;
+ merge.sig_magic |= b->sig_magic;
+
uint64_t packed;
bool ok = v3d_qpu_instr_pack(devinfo, &merge, &packed);
@@ -719,7 +742,7 @@ choose_instruction_to_schedule(const struct v3d_device_info *devinfo,
if (reads_too_soon_after_write(scoreboard, n->inst))
continue;
- if (writes_too_soon_after_write(scoreboard, n->inst))
+ if (writes_too_soon_after_write(devinfo, scoreboard, n->inst))
continue;
/* "A scoreboard wait must not occur in the first two
@@ -735,7 +758,7 @@ choose_instruction_to_schedule(const struct v3d_device_info *devinfo,
* otherwise get scheduled so ldunif and ldvary try to update
* r5 in the same tick.
*/
- if (inst->sig.ldunif &&
+ if ((inst->sig.ldunif || inst->sig.ldunifa) &&
scoreboard->tick == scoreboard->last_ldvary_tick + 1) {
continue;
}
diff --git a/src/broadcom/compiler/qpu_validate.c b/src/broadcom/compiler/qpu_validate.c
index d99d76a8beb..3b2c10eabc6 100644
--- a/src/broadcom/compiler/qpu_validate.c
+++ b/src/broadcom/compiler/qpu_validate.c
@@ -85,6 +85,7 @@ qpu_magic_waddr_matches(const struct v3d_qpu_instr *inst,
static void
qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
{
+ const struct v3d_device_info *devinfo = state->c->devinfo;
const struct v3d_qpu_instr *inst = &qinst->qpu;
if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
@@ -94,7 +95,8 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
* r5 one instruction later, which is illegal to have
* together.
*/
- if (state->last && state->last->sig.ldvary && inst->sig.ldunif) {
+ if (state->last && state->last->sig.ldvary &&
+ (inst->sig.ldunif || inst->sig.ldunifa)) {
fail_instr(state, "LDUNIF after a LDVARY");
}
@@ -143,7 +145,7 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_R4))
fail_instr(state, "R4 read too soon after SFU");
- if (v3d_qpu_writes_r4(inst))
+ if (v3d_qpu_writes_r4(devinfo, inst))
fail_instr(state, "R4 write too soon after SFU");
if (sfu_writes)
diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h
index 85def2cb02c..4ced588fbbe 100644
--- a/src/broadcom/compiler/v3d_compiler.h
+++ b/src/broadcom/compiler/v3d_compiler.h
@@ -650,8 +650,8 @@ bool vir_is_add(struct qinst *inst);
bool vir_is_mul(struct qinst *inst);
bool vir_is_float_input(struct qinst *inst);
bool vir_depends_on_flags(struct qinst *inst);
-bool vir_writes_r3(struct qinst *inst);
-bool vir_writes_r4(struct qinst *inst);
+bool vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst);
+bool vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst);
struct qreg vir_follow_movs(struct v3d_compile *c, struct qreg reg);
uint8_t vir_channels_written(struct qinst *inst);
diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c
index 4e78a477bd7..c129bb047e6 100644
--- a/src/broadcom/compiler/vir.c
+++ b/src/broadcom/compiler/vir.c
@@ -21,6 +21,7 @@
* IN THE SOFTWARE.
*/
+#include "broadcom/common/v3d_device_info.h"
#include "v3d_compiler.h"
int
@@ -198,7 +199,7 @@ vir_depends_on_flags(struct qinst *inst)
}
bool
-vir_writes_r3(struct qinst *inst)
+vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst)
{
for (int i = 0; i < vir_get_nsrc(inst); i++) {
switch (inst->src[i].file) {
@@ -210,11 +211,18 @@ vir_writes_r3(struct qinst *inst)
}
}
+ if (devinfo->ver < 41 && (inst->qpu.sig.ldvary ||
+ inst->qpu.sig.ldtlb ||
+ inst->qpu.sig.ldtlbu ||
+ inst->qpu.sig.ldvpm)) {
+ return true;
+ }
+
return false;
}
bool
-vir_writes_r4(struct qinst *inst)
+vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst)
{
switch (inst->dst.file) {
case QFILE_MAGIC:
@@ -231,7 +239,7 @@ vir_writes_r4(struct qinst *inst)
break;
}
- if (inst->qpu.sig.ldtmu)
+ if (devinfo->ver < 41 && inst->qpu.sig.ldtmu)
return true;
return false;
diff --git a/src/broadcom/compiler/vir_dump.c b/src/broadcom/compiler/vir_dump.c
index ad5c061a138..cdb1928ed00 100644
--- a/src/broadcom/compiler/vir_dump.c
+++ b/src/broadcom/compiler/vir_dump.c
@@ -21,6 +21,7 @@
* IN THE SOFTWARE.
*/
+#include "broadcom/common/v3d_device_info.h"
#include "v3d_compiler.h"
static void
@@ -146,20 +147,60 @@ vir_print_reg(struct v3d_compile *c, struct qreg reg)
}
static void
+vir_dump_sig_addr(const struct v3d_device_info *devinfo,
+ const struct v3d_qpu_instr *instr)
+{
+ if (devinfo->ver < 41)
+ return;
+
+ if (!instr->sig_magic)
+ fprintf(stderr, ".rf%d", instr->sig_addr);
+ else {
+ const char *name = v3d_qpu_magic_waddr_name(instr->sig_addr);
+ if (name)
+ fprintf(stderr, ".%s", name);
+ else
+ fprintf(stderr, ".UNKNOWN%d", instr->sig_addr);
+ }
+}
+
+static void
vir_dump_sig(struct v3d_compile *c, struct qinst *inst)
{
struct v3d_qpu_sig *sig = &inst->qpu.sig;
if (sig->thrsw)
fprintf(stderr, "; thrsw");
- if (sig->ldvary)
+ if (sig->ldvary) {
fprintf(stderr, "; ldvary");
+ vir_dump_sig_addr(c->devinfo, &inst->qpu);
+ }
if (sig->ldvpm)
fprintf(stderr, "; ldvpm");
- if (sig->ldtmu)
+ if (sig->ldtmu) {
fprintf(stderr, "; ldtmu");
+ vir_dump_sig_addr(c->devinfo, &inst->qpu);
+ }
+ if (sig->ldtlb) {
+ fprintf(stderr, "; ldtlb");
+ vir_dump_sig_addr(c->devinfo, &inst->qpu);
+ }
+ if (sig->ldtlbu) {
+ fprintf(stderr, "; ldtlbu");
+ vir_dump_sig_addr(c->devinfo, &inst->qpu);
+ }
if (sig->ldunif)
fprintf(stderr, "; ldunif");
+ if (sig->ldunifrf) {
+ fprintf(stderr, "; ldunifrf");
+ vir_dump_sig_addr(c->devinfo, &inst->qpu);
+ }
+ if (sig->ldunifa)
+ fprintf(stderr, "; ldunifa");
+ if (sig->ldunifarf) {
+ fprintf(stderr, "; ldunifarf");
+ vir_dump_sig_addr(c->devinfo, &inst->qpu);
+ }
if (sig->wrtmuc)
fprintf(stderr, "; wrtmuc");
}
diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c
index 9ebf2cd69b4..f39f0c2829b 100644
--- a/src/broadcom/compiler/vir_register_allocate.c
+++ b/src/broadcom/compiler/vir_register_allocate.c
@@ -139,7 +139,7 @@ v3d_register_allocate(struct v3d_compile *c)
* result to a temp), nothing else can be stored in r3/r4 across
* it.
*/
- if (vir_writes_r3(inst)) {
+ if (vir_writes_r3(c->devinfo, inst)) {
for (int i = 0; i < c->num_temps; i++) {
if (c->temp_start[i] < ip &&
c->temp_end[i] > ip) {
@@ -149,7 +149,7 @@ v3d_register_allocate(struct v3d_compile *c)
}
}
}
- if (vir_writes_r4(inst)) {
+ if (vir_writes_r4(c->devinfo, inst)) {
for (int i = 0; i < c->num_temps; i++) {
if (c->temp_start[i] < ip &&
c->temp_end[i] > ip) {
diff --git a/src/broadcom/compiler/vir_to_qpu.c b/src/broadcom/compiler/vir_to_qpu.c
index eeb7b0bc291..525638df691 100644
--- a/src/broadcom/compiler/vir_to_qpu.c
+++ b/src/broadcom/compiler/vir_to_qpu.c
@@ -264,7 +264,14 @@ v3d_generate_code_block(struct v3d_compile *c,
}
if (qinst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
- if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) {
+ if (v3d_qpu_sig_writes_address(c->devinfo,
+ &qinst->qpu.sig)) {
+ assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP);
+ assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
+
+ qinst->qpu.sig_addr = dst.index;
+ qinst->qpu.sig_magic = dst.magic;
+ } else if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) {
assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
if (nsrc >= 1) {
set_src(&qinst->qpu,
diff --git a/src/broadcom/qpu/qpu_disasm.c b/src/broadcom/qpu/qpu_disasm.c
index 5ee834852bd..73b43f8c3d6 100644
--- a/src/broadcom/qpu/qpu_disasm.c
+++ b/src/broadcom/qpu/qpu_disasm.c
@@ -91,7 +91,8 @@ v3d_qpu_disasm_add(struct disasm_state *disasm,
int num_src = v3d_qpu_add_op_num_src(instr->alu.add.op);
append(disasm, "%s", v3d_qpu_add_op_name(instr->alu.add.op));
- append(disasm, "%s", v3d_qpu_cond_name(instr->flags.ac));
+ if (!v3d_qpu_sig_writes_address(disasm->devinfo, &instr->sig))
+ append(disasm, "%s", v3d_qpu_cond_name(instr->flags.ac));
append(disasm, "%s", v3d_qpu_pf_name(instr->flags.apf));
append(disasm, "%s", v3d_qpu_uf_name(instr->flags.auf));
@@ -130,7 +131,8 @@ v3d_qpu_disasm_mul(struct disasm_state *disasm,
append(disasm, "; ");
append(disasm, "%s", v3d_qpu_mul_op_name(instr->alu.mul.op));
- append(disasm, "%s", v3d_qpu_cond_name(instr->flags.mc));
+ if (!v3d_qpu_sig_writes_address(disasm->devinfo, &instr->sig))
+ append(disasm, "%s", v3d_qpu_cond_name(instr->flags.mc));
append(disasm, "%s", v3d_qpu_pf_name(instr->flags.mpf));
append(disasm, "%s", v3d_qpu_uf_name(instr->flags.muf));
@@ -162,6 +164,24 @@ v3d_qpu_disasm_mul(struct disasm_state *disasm,
}
static void
+v3d_qpu_disasm_sig_addr(struct disasm_state *disasm,
+ const struct v3d_qpu_instr *instr)
+{
+ if (disasm->devinfo->ver < 41)
+ return;
+
+ if (!instr->sig_magic)
+ append(disasm, ".rf%d", instr->sig_addr);
+ else {
+ const char *name = v3d_qpu_magic_waddr_name(instr->sig_addr);
+ if (name)
+ append(disasm, ".%s", name);
+ else
+ append(disasm, ".UNKNOWN%d", instr->sig_addr);
+ }
+}
+
+static void
v3d_qpu_disasm_sig(struct disasm_state *disasm,
const struct v3d_qpu_instr *instr)
{
@@ -172,6 +192,9 @@ v3d_qpu_disasm_sig(struct disasm_state *disasm,
!sig->ldvpm &&
!sig->ldtmu &&
!sig->ldunif &&
+ !sig->ldunifrf &&
+ !sig->ldunifa &&
+ !sig->ldunifarf &&
!sig->wrtmuc) {
return;
}
@@ -180,14 +203,36 @@ v3d_qpu_disasm_sig(struct disasm_state *disasm,
if (sig->thrsw)
append(disasm, "; thrsw");
- if (sig->ldvary)
+ if (sig->ldvary) {
append(disasm, "; ldvary");
+ v3d_qpu_disasm_sig_addr(disasm, instr);
+ }
if (sig->ldvpm)
append(disasm, "; ldvpm");
- if (sig->ldtmu)
+ if (sig->ldtmu) {
append(disasm, "; ldtmu");
+ v3d_qpu_disasm_sig_addr(disasm, instr);
+ }
+ if (sig->ldtlb) {
+ append(disasm, "; ldtlb");
+ v3d_qpu_disasm_sig_addr(disasm, instr);
+ }
+ if (sig->ldtlbu) {
+ append(disasm, "; ldtlbu");
+ v3d_qpu_disasm_sig_addr(disasm, instr);
+ }
if (sig->ldunif)
append(disasm, "; ldunif");
+ if (sig->ldunifrf) {
+ append(disasm, "; ldunifrf");
+ v3d_qpu_disasm_sig_addr(disasm, instr);
+ }
+ if (sig->ldunifa)
+ append(disasm, "; ldunifa");
+ if (sig->ldunifarf) {
+ append(disasm, "; ldunifarf");
+ v3d_qpu_disasm_sig_addr(disasm, instr);
+ }
if (sig->wrtmuc)
append(disasm, "; wrtmuc");
}
diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c
index 7695e0b9358..c07f3802fd4 100644
--- a/src/broadcom/qpu/qpu_instr.c
+++ b/src/broadcom/qpu/qpu_instr.c
@@ -23,6 +23,7 @@
#include <stdlib.h>
#include "util/macros.h"
+#include "broadcom/common/v3d_device_info.h"
#include "qpu_instr.h"
#ifndef QPU_MASK
@@ -600,7 +601,8 @@ v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr)
}
bool
-v3d_qpu_writes_r3(const struct v3d_qpu_instr *inst)
+v3d_qpu_writes_r3(const struct v3d_device_info *devinfo,
+ const struct v3d_qpu_instr *inst)
{
if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
if (inst->alu.add.magic_write &&
@@ -614,11 +616,17 @@ v3d_qpu_writes_r3(const struct v3d_qpu_instr *inst)
}
}
+ if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
+ inst->sig_magic && inst->sig_addr == V3D_QPU_WADDR_R3) {
+ return true;
+ }
+
return inst->sig.ldvary || inst->sig.ldvpm;
}
bool
-v3d_qpu_writes_r4(const struct v3d_qpu_instr *inst)
+v3d_qpu_writes_r4(const struct v3d_device_info *devinfo,
+ const struct v3d_qpu_instr *inst)
{
if (inst->sig.ldtmu)
return true;
@@ -637,11 +645,17 @@ v3d_qpu_writes_r4(const struct v3d_qpu_instr *inst)
}
}
+ if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
+ inst->sig_magic && inst->sig_addr == V3D_QPU_WADDR_R4) {
+ return true;
+ }
+
return false;
}
bool
-v3d_qpu_writes_r5(const struct v3d_qpu_instr *inst)
+v3d_qpu_writes_r5(const struct v3d_device_info *devinfo,
+ const struct v3d_qpu_instr *inst)
{
if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
if (inst->alu.add.magic_write &&
@@ -655,7 +669,12 @@ v3d_qpu_writes_r5(const struct v3d_qpu_instr *inst)
}
}
- return inst->sig.ldvary || inst->sig.ldunif;
+ if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
+ inst->sig_magic && inst->sig_addr == V3D_QPU_WADDR_R5) {
+ return true;
+ }
+
+ return inst->sig.ldvary || inst->sig.ldunif || inst->sig.ldunifa;
}
bool
@@ -669,3 +688,18 @@ v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux)
(mul_nsrc > 0 && inst->alu.mul.a == mux) ||
(mul_nsrc > 1 && inst->alu.mul.b == mux));
}
+
+bool
+v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo,
+ const struct v3d_qpu_sig *sig)
+{
+ if (devinfo->ver < 41)
+ return false;
+
+ return (sig->ldunifrf ||
+ sig->ldunifarf ||
+ sig->ldvary ||
+ sig->ldtmu ||
+ sig->ldtlb ||
+ sig->ldtlbu);
+}
diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
index a425fae8b25..cab1885acc4 100644
--- a/src/broadcom/qpu/qpu_instr.h
+++ b/src/broadcom/qpu/qpu_instr.h
@@ -42,6 +42,9 @@ struct v3d_device_info;
struct v3d_qpu_sig {
bool thrsw:1;
bool ldunif:1;
+ bool ldunifa:1;
+ bool ldunifrf:1;
+ bool ldunifarf:1;
bool ldtmu:1;
bool ldvary:1;
bool ldvpm:1;
@@ -347,6 +350,8 @@ struct v3d_qpu_instr {
enum v3d_qpu_instr_type type;
struct v3d_qpu_sig sig;
+ uint8_t sig_addr;
+ bool sig_magic; /* If the signal writes to a magic address */
uint8_t raddr_a;
uint8_t raddr_b;
struct v3d_qpu_flags flags;
@@ -403,9 +408,14 @@ bool v3d_qpu_magic_waddr_is_tmu(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
bool v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
bool v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
bool v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
-bool v3d_qpu_writes_r3(const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
-bool v3d_qpu_writes_r4(const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
-bool v3d_qpu_writes_r5(const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
+bool v3d_qpu_writes_r3(const struct v3d_device_info *devinfo,
+ const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
+bool v3d_qpu_writes_r4(const struct v3d_device_info *devinfo,
+ const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
+bool v3d_qpu_writes_r5(const struct v3d_device_info *devinfo,
+ const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
bool v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux);
+bool v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo,
+ const struct v3d_qpu_sig *sig) ATTRIBUTE_CONST;
#endif
diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c
index 02aa1b86aa4..f9fb016f610 100644
--- a/src/broadcom/qpu/qpu_pack.c
+++ b/src/broadcom/qpu/qpu_pack.c
@@ -55,11 +55,7 @@
#define VC5_QPU_COND_SHIFT 46
#define VC5_QPU_COND_MASK QPU_MASK(52, 46)
-
-#define VC5_QPU_COND_IFA 0
-#define VC5_QPU_COND_IFB 1
-#define VC5_QPU_COND_IFNA 2
-#define VC5_QPU_COND_IFNB 3
+#define VC5_QPU_COND_SIG_MAGIC_ADDR (1 << 6)
#define VC5_QPU_MM QPU_MASK(45, 45)
#define VC5_QPU_MA QPU_MASK(44, 44)
@@ -113,6 +109,9 @@
#define THRSW .thrsw = true
#define LDUNIF .ldunif = true
+#define LDUNIFRF .ldunifrf = true
+#define LDUNIFA .ldunifa = true
+#define LDUNIFARF .ldunifarf = true
#define LDTMU .ldtmu = true
#define LDVARY .ldvary = true
#define LDVPM .ldvpm = true
@@ -156,6 +155,67 @@ static const struct v3d_qpu_sig v33_sig_map[] = {
[31] = { SMIMM, },
};
+static const struct v3d_qpu_sig v40_sig_map[] = {
+ /* MISC R3 R4 R5 */
+ [0] = { },
+ [1] = { THRSW, },
+ [2] = { LDUNIF },
+ [3] = { THRSW, LDUNIF },
+ [4] = { LDTMU, },
+ [5] = { THRSW, LDTMU, },
+ [6] = { LDTMU, LDUNIF },
+ [7] = { THRSW, LDTMU, LDUNIF },
+ [8] = { LDVARY, },
+ [9] = { THRSW, LDVARY, },
+ [10] = { LDVARY, LDUNIF },
+ [11] = { THRSW, LDVARY, LDUNIF },
+ /* 12-13 reserved */
+ [14] = { SMIMM, LDVARY, },
+ [15] = { SMIMM, },
+ [16] = { LDTLB, },
+ [17] = { LDTLBU, },
+ [18] = { WRTMUC },
+ [19] = { THRSW, WRTMUC },
+ [20] = { LDVARY, WRTMUC },
+ [21] = { THRSW, LDVARY, WRTMUC },
+ [22] = { UCB, },
+ [23] = { ROT, },
+ /* 24-30 reserved */
+ [31] = { SMIMM, LDTMU, },
+};
+
+static const struct v3d_qpu_sig v41_sig_map[] = {
+ /* MISC phys R5 */
+ [0] = { },
+ [1] = { THRSW, },
+ [2] = { LDUNIF },
+ [3] = { THRSW, LDUNIF },
+ [4] = { LDTMU, },
+ [5] = { THRSW, LDTMU, },
+ [6] = { LDTMU, LDUNIF },
+ [7] = { THRSW, LDTMU, LDUNIF },
+ [8] = { LDVARY, },
+ [9] = { THRSW, LDVARY, },
+ [10] = { LDVARY, LDUNIF },
+ [11] = { THRSW, LDVARY, LDUNIF },
+ [12] = { LDUNIFRF },
+ [13] = { THRSW, LDUNIFRF },
+ [14] = { SMIMM, LDVARY, },
+ [15] = { SMIMM, },
+ [16] = { LDTLB, },
+ [17] = { LDTLBU, },
+ [18] = { WRTMUC },
+ [19] = { THRSW, WRTMUC },
+ [20] = { LDVARY, WRTMUC },
+ [21] = { THRSW, LDVARY, WRTMUC },
+ [22] = { UCB, },
+ [23] = { ROT, },
+ /* 24-30 reserved */
+ [24] = { LDUNIFA},
+ [25] = { LDUNIFARF },
+ [31] = { SMIMM, LDTMU, },
+};
+
bool
v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,
uint32_t packed_sig,
@@ -164,7 +224,12 @@ v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,
if (packed_sig >= ARRAY_SIZE(v33_sig_map))
return false;
- *sig = v33_sig_map[packed_sig];
+ if (devinfo->ver >= 41)
+ *sig = v41_sig_map[packed_sig];
+ else if (devinfo->ver == 40)
+ *sig = v40_sig_map[packed_sig];
+ else
+ *sig = v33_sig_map[packed_sig];
/* Signals with zeroed unpacked contents after element 0 are reserved. */
return (packed_sig == 0 ||
@@ -178,7 +243,12 @@ v3d_qpu_sig_pack(const struct v3d_device_info *devinfo,
{
static const struct v3d_qpu_sig *map;
- map = v33_sig_map;
+ if (devinfo->ver >= 41)
+ map = v41_sig_map;
+ else if (devinfo->ver == 40)
+ map = v40_sig_map;
+ else
+ map = v33_sig_map;
for (int i = 0; i < ARRAY_SIZE(v33_sig_map); i++) {
if (memcmp(&map[i], sig, sizeof(*sig)) == 0) {
@@ -1063,10 +1133,21 @@ v3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo,
&instr->sig))
return false;
- if (!v3d_qpu_flags_unpack(devinfo,
- QPU_GET_FIELD(packed_instr, VC5_QPU_COND),
- &instr->flags))
- return false;
+ uint32_t packed_cond = QPU_GET_FIELD(packed_instr, VC5_QPU_COND);
+ if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
+ instr->sig_addr = packed_cond & ~VC5_QPU_COND_SIG_MAGIC_ADDR;
+ instr->sig_magic = packed_cond & VC5_QPU_COND_SIG_MAGIC_ADDR;
+
+ instr->flags.ac = V3D_QPU_COND_NONE;
+ instr->flags.mc = V3D_QPU_COND_NONE;
+ instr->flags.apf = V3D_QPU_PF_NONE;
+ instr->flags.mpf = V3D_QPU_PF_NONE;
+ instr->flags.auf = V3D_QPU_UF_NONE;
+ instr->flags.muf = V3D_QPU_UF_NONE;
+ } else {
+ if (!v3d_qpu_flags_unpack(devinfo, packed_cond, &instr->flags))
+ return false;
+ }
instr->raddr_a = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_A);
instr->raddr_b = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_B);
@@ -1164,9 +1245,28 @@ v3d_qpu_instr_pack_alu(const struct v3d_device_info *devinfo,
return false;
uint32_t flags;
- if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags))
- return false;
+ if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
+ if (instr->flags.ac != V3D_QPU_COND_NONE ||
+ instr->flags.mc != V3D_QPU_COND_NONE ||
+ instr->flags.apf != V3D_QPU_PF_NONE ||
+ instr->flags.mpf != V3D_QPU_PF_NONE ||
+ instr->flags.auf != V3D_QPU_UF_NONE ||
+ instr->flags.muf != V3D_QPU_UF_NONE) {
+ return false;
+ }
+
+ flags = instr->sig_addr;
+ if (instr->sig_magic)
+ flags |= VC5_QPU_COND_SIG_MAGIC_ADDR;
+ } else {
+ if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags))
+ return false;
+ }
+
*packed_instr |= QPU_SET_FIELD(flags, VC5_QPU_COND);
+ } else {
+ if (v3d_qpu_sig_writes_address(devinfo, &instr->sig))
+ return false;
}
return true;
diff --git a/src/broadcom/qpu/tests/qpu_disasm.c b/src/broadcom/qpu/tests/qpu_disasm.c
index 59668a86ecc..4f6ded73d48 100644
--- a/src/broadcom/qpu/tests/qpu_disasm.c
+++ b/src/broadcom/qpu/tests/qpu_disasm.c
@@ -63,6 +63,13 @@ static const struct {
{ 33, 0x041618d57c453000ull, "shl.andn exp, r3, r2; add.ifb rf35, r1, r2" },
{ 33, 0x7048e5da49272800ull, "fsub.ifa rf26, r2.l, rf32; fmul.pushc sin, r1.h, r1.abs; ldunif" },
+ /* v4.1 signals */
+ { 41, 0x1f010520cf60a000ull, "fcmp.andz rf32, r2.h, r1.h; vfmul rf20, r0.hh, r3; ldunifa" },
+ { 41, 0x932045e6c16ea000ull, "fcmp rf38, r2.abs, r5; fmul rf23.l, r3, r3.abs; ldunifarf.rf1" },
+ { 41, 0xd72f0434e43ae5c0ull, "fcmp rf52.h, rf23, r5.abs; fmul rf16.h, rf23, r1; ldunifarf.rf60" },
+ { 41, 0xdb3048eb9d533780ull, "fmax rf43.l, r3.h, rf30; fmul rf35.h, r4, r2.l; ldunifarf.r1" },
+ { 41, 0x733620471e6ce700ull, "faddnf rf7.l, rf28.h, r1.l; fmul r1, r3.h, r3.abs; ldunifarf.rsqrt2" },
+ { 41, 0x9c094adef634b000ull, "ffloor.ifb rf30.l, r3; fmul.pushz rf43.l, r5, r1.h" },
};
static void