diff options
-rw-r--r-- | src/broadcom/compiler/qpu_schedule.c | 9 | ||||
-rw-r--r-- | src/broadcom/compiler/v3d_compiler.h | 1 | ||||
-rw-r--r-- | src/broadcom/compiler/vir.c | 6 | ||||
-rw-r--r-- | src/broadcom/qpu/qpu_instr.c | 8 | ||||
-rw-r--r-- | src/broadcom/qpu/qpu_instr.h | 1 |
5 files changed, 22 insertions, 3 deletions
diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c index 441b6327825..fb5ecd6410c 100644 --- a/src/broadcom/compiler/qpu_schedule.c +++ b/src/broadcom/compiler/qpu_schedule.c @@ -402,7 +402,7 @@ calculate_deps(struct schedule_state *state, struct schedule_node *n) add_write_dep(state, &state->last_tmu_config, n); } - if (inst->sig.ldtmu) { + if (v3d_qpu_waits_on_tmu(inst)) { /* TMU loads are coming from a FIFO, so ordering is important. */ add_write_dep(state, &state->last_tmu_write, n); @@ -564,7 +564,7 @@ get_instruction_priority(const struct v3d_qpu_instr *inst) next_score++; /* Schedule texture read results collection late to hide latency. */ - if (inst->sig.ldtmu) + if (v3d_qpu_waits_on_tmu(inst)) return next_score; next_score++; @@ -605,6 +605,9 @@ qpu_accesses_peripheral(const struct v3d_qpu_instr *inst) return true; } + if (inst->alu.add.op == V3D_QPU_A_TMUWT) + return true; + if (inst->alu.mul.op != V3D_QPU_M_NOP && inst->alu.mul.magic_write && qpu_magic_waddr_is_periph(inst->alu.mul.waddr)) { @@ -910,7 +913,7 @@ static uint32_t magic_waddr_latency(enum v3d_qpu_waddr waddr, * * because we associate the first load_tmu0 with the *second* tmu0_s. */ - if (v3d_qpu_magic_waddr_is_tmu(waddr) && after->sig.ldtmu) + if (v3d_qpu_magic_waddr_is_tmu(waddr) && v3d_qpu_waits_on_tmu(after)) return 100; /* Assume that anything depending on us is consuming the SFU result. */ diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h index a02b5a6404a..33a9942734d 100644 --- a/src/broadcom/compiler/v3d_compiler.h +++ b/src/broadcom/compiler/v3d_compiler.h @@ -940,6 +940,7 @@ VIR_A_ALU0(TIDX) VIR_A_ALU0(EIDX) VIR_A_ALU1(LDVPMV_IN) VIR_A_ALU1(LDVPMV_OUT) +VIR_A_ALU0(TMUWT) VIR_A_ALU0(FXCD) VIR_A_ALU0(XCD) diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c index b5539b6ef50..86379faa5bb 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c @@ -98,6 +98,7 @@ vir_has_side_effects(struct v3d_compile *c, struct qinst *inst) case V3D_QPU_A_STVPMD: case V3D_QPU_A_STVPMP: case V3D_QPU_A_VPMWT: + case V3D_QPU_A_TMUWT: return true; default: break; @@ -194,6 +195,11 @@ vir_is_tex(struct qinst *inst) if (inst->dst.file == QFILE_MAGIC) return v3d_qpu_magic_waddr_is_tmu(inst->dst.index); + if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU && + inst->qpu.alu.add.op == V3D_QPU_A_TMUWT) { + return true; + } + return false; } diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c index a7fb4186e1a..0846cc86174 100644 --- a/src/broadcom/qpu/qpu_instr.c +++ b/src/broadcom/qpu/qpu_instr.c @@ -526,6 +526,14 @@ v3d_qpu_magic_waddr_is_tmu(enum v3d_qpu_waddr waddr) } bool +v3d_qpu_waits_on_tmu(const struct v3d_qpu_instr *inst) +{ + return (inst->sig.ldtmu || + (inst->type == V3D_QPU_INSTR_TYPE_ALU && + inst->alu.add.op == V3D_QPU_A_TMUWT)); +} + +bool v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr) { return (waddr == V3D_QPU_WADDR_TLB || diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h index c37abac3cf8..c2b4ebd1995 100644 --- a/src/broadcom/qpu/qpu_instr.h +++ b/src/broadcom/qpu/qpu_instr.h @@ -452,6 +452,7 @@ bool v3d_qpu_writes_r4(const struct v3d_device_info *devinfo, const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST; bool v3d_qpu_writes_r5(const struct v3d_device_info *devinfo, const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST; +bool v3d_qpu_waits_on_tmu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; bool v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux); bool v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; bool v3d_qpu_reads_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; |