aboutsummaryrefslogtreecommitdiffstats
path: root/src/broadcom/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'src/broadcom/compiler')
-rw-r--r--src/broadcom/compiler/qpu_schedule.c41
-rw-r--r--src/broadcom/compiler/qpu_validate.c6
-rw-r--r--src/broadcom/compiler/v3d_compiler.h4
-rw-r--r--src/broadcom/compiler/vir.c14
-rw-r--r--src/broadcom/compiler/vir_dump.c45
-rw-r--r--src/broadcom/compiler/vir_register_allocate.c4
-rw-r--r--src/broadcom/compiler/vir_to_qpu.c9
7 files changed, 102 insertions, 21 deletions
diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
index 799da805906..7fe46202636 100644
--- a/src/broadcom/compiler/qpu_schedule.c
+++ b/src/broadcom/compiler/qpu_schedule.c
@@ -78,6 +78,7 @@ struct schedule_node_child {
enum direction { F, R };
struct schedule_state {
+ const struct v3d_device_info *devinfo;
struct schedule_node *last_r[6];
struct schedule_node *last_rf[64];
struct schedule_node *last_sf;
@@ -265,6 +266,7 @@ process_uf_deps(struct schedule_state *state, struct schedule_node *n,
static void
calculate_deps(struct schedule_state *state, struct schedule_node *n)
{
+ const struct v3d_device_info *devinfo = state->devinfo;
struct qinst *qinst = n->inst;
struct v3d_qpu_instr *inst = &qinst->qpu;
@@ -356,12 +358,16 @@ calculate_deps(struct schedule_state *state, struct schedule_node *n)
process_waddr_deps(state, n, inst->alu.mul.waddr,
inst->alu.mul.magic_write);
}
+ if (v3d_qpu_sig_writes_address(devinfo, &inst->sig)) {
+ process_waddr_deps(state, n, inst->sig_addr,
+ inst->sig_magic);
+ }
- if (v3d_qpu_writes_r3(inst))
+ if (v3d_qpu_writes_r3(devinfo, inst))
add_write_dep(state, &state->last_r[3], n);
- if (v3d_qpu_writes_r4(inst))
+ if (v3d_qpu_writes_r4(devinfo, inst))
add_write_dep(state, &state->last_r[4], n);
- if (v3d_qpu_writes_r5(inst))
+ if (v3d_qpu_writes_r5(devinfo, inst))
add_write_dep(state, &state->last_r[5], n);
if (inst->sig.thrsw) {
@@ -410,6 +416,7 @@ calculate_forward_deps(struct v3d_compile *c, struct list_head *schedule_list)
struct schedule_state state;
memset(&state, 0, sizeof(state));
+ state.devinfo = c->devinfo;
state.dir = F;
list_for_each_entry(struct schedule_node, node, schedule_list, link)
@@ -423,6 +430,7 @@ calculate_reverse_deps(struct v3d_compile *c, struct list_head *schedule_list)
struct schedule_state state;
memset(&state, 0, sizeof(state));
+ state.devinfo = c->devinfo;
state.dir = R;
for (node = schedule_list->prev; schedule_list != node; node = node->prev) {
@@ -514,7 +522,8 @@ reads_too_soon_after_write(struct choose_scoreboard *scoreboard,
}
static bool
-writes_too_soon_after_write(struct choose_scoreboard *scoreboard,
+writes_too_soon_after_write(const struct v3d_device_info *devinfo,
+ struct choose_scoreboard *scoreboard,
struct qinst *qinst)
{
const struct v3d_qpu_instr *inst = &qinst->qpu;
@@ -524,7 +533,7 @@ writes_too_soon_after_write(struct choose_scoreboard *scoreboard,
* occur if a dead SFU computation makes it to scheduling.
*/
if (scoreboard->tick - scoreboard->last_sfu_write_tick < 2 &&
- v3d_qpu_writes_r4(inst))
+ v3d_qpu_writes_r4(devinfo, inst))
return true;
return false;
@@ -605,7 +614,8 @@ qpu_accesses_peripheral(const struct v3d_qpu_instr *inst)
return (inst->sig.ldvpm ||
inst->sig.ldtmu ||
inst->sig.ldtlb ||
- inst->sig.ldtlbu);
+ inst->sig.ldtlbu ||
+ inst->sig.wrtmuc);
}
static bool
@@ -619,7 +629,11 @@ qpu_merge_inst(const struct v3d_device_info *devinfo,
return false;
}
- /* Can't do more than one peripheral access in an instruction. */
+ /* Can't do more than one peripheral access in an instruction.
+ *
+ * XXX: V3D 4.1 allows TMU read along with a VPM read or write, and
+ * WRTMUC with a TMU magic register write (other than tmuc).
+ */
if (qpu_accesses_peripheral(a) && qpu_accesses_peripheral(b))
return false;
@@ -663,6 +677,9 @@ qpu_merge_inst(const struct v3d_device_info *devinfo,
merge.sig.thrsw |= b->sig.thrsw;
merge.sig.ldunif |= b->sig.ldunif;
+ merge.sig.ldunifrf |= b->sig.ldunifrf;
+ merge.sig.ldunifa |= b->sig.ldunifa;
+ merge.sig.ldunifarf |= b->sig.ldunifarf;
merge.sig.ldtmu |= b->sig.ldtmu;
merge.sig.ldvary |= b->sig.ldvary;
merge.sig.ldvpm |= b->sig.ldvpm;
@@ -673,6 +690,12 @@ qpu_merge_inst(const struct v3d_device_info *devinfo,
merge.sig.rotate |= b->sig.rotate;
merge.sig.wrtmuc |= b->sig.wrtmuc;
+ if (v3d_qpu_sig_writes_address(devinfo, &a->sig) &&
+ v3d_qpu_sig_writes_address(devinfo, &b->sig))
+ return false;
+ merge.sig_addr |= b->sig_addr;
+ merge.sig_magic |= b->sig_magic;
+
uint64_t packed;
bool ok = v3d_qpu_instr_pack(devinfo, &merge, &packed);
@@ -719,7 +742,7 @@ choose_instruction_to_schedule(const struct v3d_device_info *devinfo,
if (reads_too_soon_after_write(scoreboard, n->inst))
continue;
- if (writes_too_soon_after_write(scoreboard, n->inst))
+ if (writes_too_soon_after_write(devinfo, scoreboard, n->inst))
continue;
/* "A scoreboard wait must not occur in the first two
@@ -735,7 +758,7 @@ choose_instruction_to_schedule(const struct v3d_device_info *devinfo,
* otherwise get scheduled so ldunif and ldvary try to update
* r5 in the same tick.
*/
- if (inst->sig.ldunif &&
+ if ((inst->sig.ldunif || inst->sig.ldunifa) &&
scoreboard->tick == scoreboard->last_ldvary_tick + 1) {
continue;
}
diff --git a/src/broadcom/compiler/qpu_validate.c b/src/broadcom/compiler/qpu_validate.c
index d99d76a8beb..3b2c10eabc6 100644
--- a/src/broadcom/compiler/qpu_validate.c
+++ b/src/broadcom/compiler/qpu_validate.c
@@ -85,6 +85,7 @@ qpu_magic_waddr_matches(const struct v3d_qpu_instr *inst,
static void
qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
{
+ const struct v3d_device_info *devinfo = state->c->devinfo;
const struct v3d_qpu_instr *inst = &qinst->qpu;
if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
@@ -94,7 +95,8 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
* r5 one instruction later, which is illegal to have
* together.
*/
- if (state->last && state->last->sig.ldvary && inst->sig.ldunif) {
+ if (state->last && state->last->sig.ldvary &&
+ (inst->sig.ldunif || inst->sig.ldunifa)) {
fail_instr(state, "LDUNIF after a LDVARY");
}
@@ -143,7 +145,7 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_R4))
fail_instr(state, "R4 read too soon after SFU");
- if (v3d_qpu_writes_r4(inst))
+ if (v3d_qpu_writes_r4(devinfo, inst))
fail_instr(state, "R4 write too soon after SFU");
if (sfu_writes)
diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h
index 85def2cb02c..4ced588fbbe 100644
--- a/src/broadcom/compiler/v3d_compiler.h
+++ b/src/broadcom/compiler/v3d_compiler.h
@@ -650,8 +650,8 @@ bool vir_is_add(struct qinst *inst);
bool vir_is_mul(struct qinst *inst);
bool vir_is_float_input(struct qinst *inst);
bool vir_depends_on_flags(struct qinst *inst);
-bool vir_writes_r3(struct qinst *inst);
-bool vir_writes_r4(struct qinst *inst);
+bool vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst);
+bool vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst);
struct qreg vir_follow_movs(struct v3d_compile *c, struct qreg reg);
uint8_t vir_channels_written(struct qinst *inst);
diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c
index 4e78a477bd7..c129bb047e6 100644
--- a/src/broadcom/compiler/vir.c
+++ b/src/broadcom/compiler/vir.c
@@ -21,6 +21,7 @@
* IN THE SOFTWARE.
*/
+#include "broadcom/common/v3d_device_info.h"
#include "v3d_compiler.h"
int
@@ -198,7 +199,7 @@ vir_depends_on_flags(struct qinst *inst)
}
bool
-vir_writes_r3(struct qinst *inst)
+vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst)
{
for (int i = 0; i < vir_get_nsrc(inst); i++) {
switch (inst->src[i].file) {
@@ -210,11 +211,18 @@ vir_writes_r3(struct qinst *inst)
}
}
+ if (devinfo->ver < 41 && (inst->qpu.sig.ldvary ||
+ inst->qpu.sig.ldtlb ||
+ inst->qpu.sig.ldtlbu ||
+ inst->qpu.sig.ldvpm)) {
+ return true;
+ }
+
return false;
}
bool
-vir_writes_r4(struct qinst *inst)
+vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst)
{
switch (inst->dst.file) {
case QFILE_MAGIC:
@@ -231,7 +239,7 @@ vir_writes_r4(struct qinst *inst)
break;
}
- if (inst->qpu.sig.ldtmu)
+ if (devinfo->ver < 41 && inst->qpu.sig.ldtmu)
return true;
return false;
diff --git a/src/broadcom/compiler/vir_dump.c b/src/broadcom/compiler/vir_dump.c
index ad5c061a138..cdb1928ed00 100644
--- a/src/broadcom/compiler/vir_dump.c
+++ b/src/broadcom/compiler/vir_dump.c
@@ -21,6 +21,7 @@
* IN THE SOFTWARE.
*/
+#include "broadcom/common/v3d_device_info.h"
#include "v3d_compiler.h"
static void
@@ -146,20 +147,60 @@ vir_print_reg(struct v3d_compile *c, struct qreg reg)
}
static void
+vir_dump_sig_addr(const struct v3d_device_info *devinfo,
+ const struct v3d_qpu_instr *instr)
+{
+ if (devinfo->ver < 41)
+ return;
+
+ if (!instr->sig_magic)
+ fprintf(stderr, ".rf%d", instr->sig_addr);
+ else {
+ const char *name = v3d_qpu_magic_waddr_name(instr->sig_addr);
+ if (name)
+ fprintf(stderr, ".%s", name);
+ else
+ fprintf(stderr, ".UNKNOWN%d", instr->sig_addr);
+ }
+}
+
+static void
vir_dump_sig(struct v3d_compile *c, struct qinst *inst)
{
struct v3d_qpu_sig *sig = &inst->qpu.sig;
if (sig->thrsw)
fprintf(stderr, "; thrsw");
- if (sig->ldvary)
+ if (sig->ldvary) {
fprintf(stderr, "; ldvary");
+ vir_dump_sig_addr(c->devinfo, &inst->qpu);
+ }
if (sig->ldvpm)
fprintf(stderr, "; ldvpm");
- if (sig->ldtmu)
+ if (sig->ldtmu) {
fprintf(stderr, "; ldtmu");
+ vir_dump_sig_addr(c->devinfo, &inst->qpu);
+ }
+ if (sig->ldtlb) {
+ fprintf(stderr, "; ldtlb");
+ vir_dump_sig_addr(c->devinfo, &inst->qpu);
+ }
+ if (sig->ldtlbu) {
+ fprintf(stderr, "; ldtlbu");
+ vir_dump_sig_addr(c->devinfo, &inst->qpu);
+ }
if (sig->ldunif)
fprintf(stderr, "; ldunif");
+ if (sig->ldunifrf) {
+ fprintf(stderr, "; ldunifrf");
+ vir_dump_sig_addr(c->devinfo, &inst->qpu);
+ }
+ if (sig->ldunifa)
+ fprintf(stderr, "; ldunifa");
+ if (sig->ldunifarf) {
+ fprintf(stderr, "; ldunifarf");
+ vir_dump_sig_addr(c->devinfo, &inst->qpu);
+ }
if (sig->wrtmuc)
fprintf(stderr, "; wrtmuc");
}
diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c
index 9ebf2cd69b4..f39f0c2829b 100644
--- a/src/broadcom/compiler/vir_register_allocate.c
+++ b/src/broadcom/compiler/vir_register_allocate.c
@@ -139,7 +139,7 @@ v3d_register_allocate(struct v3d_compile *c)
* result to a temp), nothing else can be stored in r3/r4 across
* it.
*/
- if (vir_writes_r3(inst)) {
+ if (vir_writes_r3(c->devinfo, inst)) {
for (int i = 0; i < c->num_temps; i++) {
if (c->temp_start[i] < ip &&
c->temp_end[i] > ip) {
@@ -149,7 +149,7 @@ v3d_register_allocate(struct v3d_compile *c)
}
}
}
- if (vir_writes_r4(inst)) {
+ if (vir_writes_r4(c->devinfo, inst)) {
for (int i = 0; i < c->num_temps; i++) {
if (c->temp_start[i] < ip &&
c->temp_end[i] > ip) {
diff --git a/src/broadcom/compiler/vir_to_qpu.c b/src/broadcom/compiler/vir_to_qpu.c
index eeb7b0bc291..525638df691 100644
--- a/src/broadcom/compiler/vir_to_qpu.c
+++ b/src/broadcom/compiler/vir_to_qpu.c
@@ -264,7 +264,14 @@ v3d_generate_code_block(struct v3d_compile *c,
}
if (qinst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
- if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) {
+ if (v3d_qpu_sig_writes_address(c->devinfo,
+ &qinst->qpu.sig)) {
+ assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP);
+ assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
+
+ qinst->qpu.sig_addr = dst.index;
+ qinst->qpu.sig_magic = dst.magic;
+ } else if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) {
assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
if (nsrc >= 1) {
set_src(&qinst->qpu,