summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/broadcom/compiler/qpu_schedule.c9
-rw-r--r--src/broadcom/compiler/v3d_compiler.h1
-rw-r--r--src/broadcom/compiler/vir.c6
-rw-r--r--src/broadcom/qpu/qpu_instr.c8
-rw-r--r--src/broadcom/qpu/qpu_instr.h1
5 files changed, 22 insertions, 3 deletions
diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
index 441b6327825..fb5ecd6410c 100644
--- a/src/broadcom/compiler/qpu_schedule.c
+++ b/src/broadcom/compiler/qpu_schedule.c
@@ -402,7 +402,7 @@ calculate_deps(struct schedule_state *state, struct schedule_node *n)
add_write_dep(state, &state->last_tmu_config, n);
}
- if (inst->sig.ldtmu) {
+ if (v3d_qpu_waits_on_tmu(inst)) {
/* TMU loads are coming from a FIFO, so ordering is important.
*/
add_write_dep(state, &state->last_tmu_write, n);
@@ -564,7 +564,7 @@ get_instruction_priority(const struct v3d_qpu_instr *inst)
next_score++;
/* Schedule texture read results collection late to hide latency. */
- if (inst->sig.ldtmu)
+ if (v3d_qpu_waits_on_tmu(inst))
return next_score;
next_score++;
@@ -605,6 +605,9 @@ qpu_accesses_peripheral(const struct v3d_qpu_instr *inst)
return true;
}
+ if (inst->alu.add.op == V3D_QPU_A_TMUWT)
+ return true;
+
if (inst->alu.mul.op != V3D_QPU_M_NOP &&
inst->alu.mul.magic_write &&
qpu_magic_waddr_is_periph(inst->alu.mul.waddr)) {
@@ -910,7 +913,7 @@ static uint32_t magic_waddr_latency(enum v3d_qpu_waddr waddr,
*
* because we associate the first load_tmu0 with the *second* tmu0_s.
*/
- if (v3d_qpu_magic_waddr_is_tmu(waddr) && after->sig.ldtmu)
+ if (v3d_qpu_magic_waddr_is_tmu(waddr) && v3d_qpu_waits_on_tmu(after))
return 100;
/* Assume that anything depending on us is consuming the SFU result. */
diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h
index a02b5a6404a..33a9942734d 100644
--- a/src/broadcom/compiler/v3d_compiler.h
+++ b/src/broadcom/compiler/v3d_compiler.h
@@ -940,6 +940,7 @@ VIR_A_ALU0(TIDX)
VIR_A_ALU0(EIDX)
VIR_A_ALU1(LDVPMV_IN)
VIR_A_ALU1(LDVPMV_OUT)
+VIR_A_ALU0(TMUWT)
VIR_A_ALU0(FXCD)
VIR_A_ALU0(XCD)
diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c
index b5539b6ef50..86379faa5bb 100644
--- a/src/broadcom/compiler/vir.c
+++ b/src/broadcom/compiler/vir.c
@@ -98,6 +98,7 @@ vir_has_side_effects(struct v3d_compile *c, struct qinst *inst)
case V3D_QPU_A_STVPMD:
case V3D_QPU_A_STVPMP:
case V3D_QPU_A_VPMWT:
+ case V3D_QPU_A_TMUWT:
return true;
default:
break;
@@ -194,6 +195,11 @@ vir_is_tex(struct qinst *inst)
if (inst->dst.file == QFILE_MAGIC)
return v3d_qpu_magic_waddr_is_tmu(inst->dst.index);
+ if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
+ inst->qpu.alu.add.op == V3D_QPU_A_TMUWT) {
+ return true;
+ }
+
return false;
}
diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c
index a7fb4186e1a..0846cc86174 100644
--- a/src/broadcom/qpu/qpu_instr.c
+++ b/src/broadcom/qpu/qpu_instr.c
@@ -526,6 +526,14 @@ v3d_qpu_magic_waddr_is_tmu(enum v3d_qpu_waddr waddr)
}
bool
+v3d_qpu_waits_on_tmu(const struct v3d_qpu_instr *inst)
+{
+ return (inst->sig.ldtmu ||
+ (inst->type == V3D_QPU_INSTR_TYPE_ALU &&
+ inst->alu.add.op == V3D_QPU_A_TMUWT));
+}
+
+bool
v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr)
{
return (waddr == V3D_QPU_WADDR_TLB ||
diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
index c37abac3cf8..c2b4ebd1995 100644
--- a/src/broadcom/qpu/qpu_instr.h
+++ b/src/broadcom/qpu/qpu_instr.h
@@ -452,6 +452,7 @@ bool v3d_qpu_writes_r4(const struct v3d_device_info *devinfo,
const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
bool v3d_qpu_writes_r5(const struct v3d_device_info *devinfo,
const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
+bool v3d_qpu_waits_on_tmu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
bool v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux);
bool v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
bool v3d_qpu_reads_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;