diff options
author | Eric Anholt <[email protected]> | 2018-01-03 21:42:33 -0800 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2018-01-12 21:53:45 -0800 |
commit | dfee62eed3cacbf77ca3168143be6577849c998d (patch) | |
tree | aa86205704ae035d857f49201b48829c5408c5f7 /src/broadcom/compiler | |
parent | 81ec2ba22975595b4f07c3e8307a8f0a4ec18773 (diff) |
broadcom/vc5: Add support for V3Dv4 signal bits.
The WRTMUC replaces the implicit uniform loads in the first two texture
instructions. LDVPM disappears in favor of an ALU op. LDVARY, LDTMU,
LDTLB, and LDUNIF*RF now write to arbitrary registers, which required
passing the devinfo through to a few more functions.
Diffstat (limited to 'src/broadcom/compiler')
-rw-r--r-- | src/broadcom/compiler/qpu_schedule.c | 41 | ||||
-rw-r--r-- | src/broadcom/compiler/qpu_validate.c | 6 | ||||
-rw-r--r-- | src/broadcom/compiler/v3d_compiler.h | 4 | ||||
-rw-r--r-- | src/broadcom/compiler/vir.c | 14 | ||||
-rw-r--r-- | src/broadcom/compiler/vir_dump.c | 45 | ||||
-rw-r--r-- | src/broadcom/compiler/vir_register_allocate.c | 4 | ||||
-rw-r--r-- | src/broadcom/compiler/vir_to_qpu.c | 9 |
7 files changed, 102 insertions, 21 deletions
diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c index 799da805906..7fe46202636 100644 --- a/src/broadcom/compiler/qpu_schedule.c +++ b/src/broadcom/compiler/qpu_schedule.c @@ -78,6 +78,7 @@ struct schedule_node_child { enum direction { F, R }; struct schedule_state { + const struct v3d_device_info *devinfo; struct schedule_node *last_r[6]; struct schedule_node *last_rf[64]; struct schedule_node *last_sf; @@ -265,6 +266,7 @@ process_uf_deps(struct schedule_state *state, struct schedule_node *n, static void calculate_deps(struct schedule_state *state, struct schedule_node *n) { + const struct v3d_device_info *devinfo = state->devinfo; struct qinst *qinst = n->inst; struct v3d_qpu_instr *inst = &qinst->qpu; @@ -356,12 +358,16 @@ calculate_deps(struct schedule_state *state, struct schedule_node *n) process_waddr_deps(state, n, inst->alu.mul.waddr, inst->alu.mul.magic_write); } + if (v3d_qpu_sig_writes_address(devinfo, &inst->sig)) { + process_waddr_deps(state, n, inst->sig_addr, + inst->sig_magic); + } - if (v3d_qpu_writes_r3(inst)) + if (v3d_qpu_writes_r3(devinfo, inst)) add_write_dep(state, &state->last_r[3], n); - if (v3d_qpu_writes_r4(inst)) + if (v3d_qpu_writes_r4(devinfo, inst)) add_write_dep(state, &state->last_r[4], n); - if (v3d_qpu_writes_r5(inst)) + if (v3d_qpu_writes_r5(devinfo, inst)) add_write_dep(state, &state->last_r[5], n); if (inst->sig.thrsw) { @@ -410,6 +416,7 @@ calculate_forward_deps(struct v3d_compile *c, struct list_head *schedule_list) struct schedule_state state; memset(&state, 0, sizeof(state)); + state.devinfo = c->devinfo; state.dir = F; list_for_each_entry(struct schedule_node, node, schedule_list, link) @@ -423,6 +430,7 @@ calculate_reverse_deps(struct v3d_compile *c, struct list_head *schedule_list) struct schedule_state state; memset(&state, 0, sizeof(state)); + state.devinfo = c->devinfo; state.dir = R; for (node = schedule_list->prev; schedule_list != node; node = node->prev) { @@ -514,7 +522,8 @@ reads_too_soon_after_write(struct choose_scoreboard *scoreboard, } static bool -writes_too_soon_after_write(struct choose_scoreboard *scoreboard, +writes_too_soon_after_write(const struct v3d_device_info *devinfo, + struct choose_scoreboard *scoreboard, struct qinst *qinst) { const struct v3d_qpu_instr *inst = &qinst->qpu; @@ -524,7 +533,7 @@ writes_too_soon_after_write(struct choose_scoreboard *scoreboard, * occur if a dead SFU computation makes it to scheduling. */ if (scoreboard->tick - scoreboard->last_sfu_write_tick < 2 && - v3d_qpu_writes_r4(inst)) + v3d_qpu_writes_r4(devinfo, inst)) return true; return false; @@ -605,7 +614,8 @@ qpu_accesses_peripheral(const struct v3d_qpu_instr *inst) return (inst->sig.ldvpm || inst->sig.ldtmu || inst->sig.ldtlb || - inst->sig.ldtlbu); + inst->sig.ldtlbu || + inst->sig.wrtmuc); } static bool @@ -619,7 +629,11 @@ qpu_merge_inst(const struct v3d_device_info *devinfo, return false; } - /* Can't do more than one peripheral access in an instruction. */ + /* Can't do more than one peripheral access in an instruction. + * + * XXX: V3D 4.1 allows TMU read along with a VPM read or write, and + * WRTMUC with a TMU magic register write (other than tmuc). + */ if (qpu_accesses_peripheral(a) && qpu_accesses_peripheral(b)) return false; @@ -663,6 +677,9 @@ qpu_merge_inst(const struct v3d_device_info *devinfo, merge.sig.thrsw |= b->sig.thrsw; merge.sig.ldunif |= b->sig.ldunif; + merge.sig.ldunifrf |= b->sig.ldunifrf; + merge.sig.ldunifa |= b->sig.ldunifa; + merge.sig.ldunifarf |= b->sig.ldunifarf; merge.sig.ldtmu |= b->sig.ldtmu; merge.sig.ldvary |= b->sig.ldvary; merge.sig.ldvpm |= b->sig.ldvpm; @@ -673,6 +690,12 @@ qpu_merge_inst(const struct v3d_device_info *devinfo, merge.sig.rotate |= b->sig.rotate; merge.sig.wrtmuc |= b->sig.wrtmuc; + if (v3d_qpu_sig_writes_address(devinfo, &a->sig) && + v3d_qpu_sig_writes_address(devinfo, &b->sig)) + return false; + merge.sig_addr |= b->sig_addr; + merge.sig_magic |= b->sig_magic; + uint64_t packed; bool ok = v3d_qpu_instr_pack(devinfo, &merge, &packed); @@ -719,7 +742,7 @@ choose_instruction_to_schedule(const struct v3d_device_info *devinfo, if (reads_too_soon_after_write(scoreboard, n->inst)) continue; - if (writes_too_soon_after_write(scoreboard, n->inst)) + if (writes_too_soon_after_write(devinfo, scoreboard, n->inst)) continue; /* "A scoreboard wait must not occur in the first two @@ -735,7 +758,7 @@ choose_instruction_to_schedule(const struct v3d_device_info *devinfo, * otherwise get scheduled so ldunif and ldvary try to update * r5 in the same tick. */ - if (inst->sig.ldunif && + if ((inst->sig.ldunif || inst->sig.ldunifa) && scoreboard->tick == scoreboard->last_ldvary_tick + 1) { continue; } diff --git a/src/broadcom/compiler/qpu_validate.c b/src/broadcom/compiler/qpu_validate.c index d99d76a8beb..3b2c10eabc6 100644 --- a/src/broadcom/compiler/qpu_validate.c +++ b/src/broadcom/compiler/qpu_validate.c @@ -85,6 +85,7 @@ qpu_magic_waddr_matches(const struct v3d_qpu_instr *inst, static void qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst) { + const struct v3d_device_info *devinfo = state->c->devinfo; const struct v3d_qpu_instr *inst = &qinst->qpu; if (inst->type != V3D_QPU_INSTR_TYPE_ALU) @@ -94,7 +95,8 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst) * r5 one instruction later, which is illegal to have * together. */ - if (state->last && state->last->sig.ldvary && inst->sig.ldunif) { + if (state->last && state->last->sig.ldvary && + (inst->sig.ldunif || inst->sig.ldunifa)) { fail_instr(state, "LDUNIF after a LDVARY"); } @@ -143,7 +145,7 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst) if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_R4)) fail_instr(state, "R4 read too soon after SFU"); - if (v3d_qpu_writes_r4(inst)) + if (v3d_qpu_writes_r4(devinfo, inst)) fail_instr(state, "R4 write too soon after SFU"); if (sfu_writes) diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h index 85def2cb02c..4ced588fbbe 100644 --- a/src/broadcom/compiler/v3d_compiler.h +++ b/src/broadcom/compiler/v3d_compiler.h @@ -650,8 +650,8 @@ bool vir_is_add(struct qinst *inst); bool vir_is_mul(struct qinst *inst); bool vir_is_float_input(struct qinst *inst); bool vir_depends_on_flags(struct qinst *inst); -bool vir_writes_r3(struct qinst *inst); -bool vir_writes_r4(struct qinst *inst); +bool vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst); +bool vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst); struct qreg vir_follow_movs(struct v3d_compile *c, struct qreg reg); uint8_t vir_channels_written(struct qinst *inst); diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c index 4e78a477bd7..c129bb047e6 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c @@ -21,6 +21,7 @@ * IN THE SOFTWARE. */ +#include "broadcom/common/v3d_device_info.h" #include "v3d_compiler.h" int @@ -198,7 +199,7 @@ vir_depends_on_flags(struct qinst *inst) } bool -vir_writes_r3(struct qinst *inst) +vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst) { for (int i = 0; i < vir_get_nsrc(inst); i++) { switch (inst->src[i].file) { @@ -210,11 +211,18 @@ vir_writes_r3(struct qinst *inst) } } + if (devinfo->ver < 41 && (inst->qpu.sig.ldvary || + inst->qpu.sig.ldtlb || + inst->qpu.sig.ldtlbu || + inst->qpu.sig.ldvpm)) { + return true; + } + return false; } bool -vir_writes_r4(struct qinst *inst) +vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst) { switch (inst->dst.file) { case QFILE_MAGIC: @@ -231,7 +239,7 @@ vir_writes_r4(struct qinst *inst) break; } - if (inst->qpu.sig.ldtmu) + if (devinfo->ver < 41 && inst->qpu.sig.ldtmu) return true; return false; diff --git a/src/broadcom/compiler/vir_dump.c b/src/broadcom/compiler/vir_dump.c index ad5c061a138..cdb1928ed00 100644 --- a/src/broadcom/compiler/vir_dump.c +++ b/src/broadcom/compiler/vir_dump.c @@ -21,6 +21,7 @@ * IN THE SOFTWARE. */ +#include "broadcom/common/v3d_device_info.h" #include "v3d_compiler.h" static void @@ -146,20 +147,60 @@ vir_print_reg(struct v3d_compile *c, struct qreg reg) } static void +vir_dump_sig_addr(const struct v3d_device_info *devinfo, + const struct v3d_qpu_instr *instr) +{ + if (devinfo->ver < 41) + return; + + if (!instr->sig_magic) + fprintf(stderr, ".rf%d", instr->sig_addr); + else { + const char *name = v3d_qpu_magic_waddr_name(instr->sig_addr); + if (name) + fprintf(stderr, ".%s", name); + else + fprintf(stderr, ".UNKNOWN%d", instr->sig_addr); + } +} + +static void vir_dump_sig(struct v3d_compile *c, struct qinst *inst) { struct v3d_qpu_sig *sig = &inst->qpu.sig; if (sig->thrsw) fprintf(stderr, "; thrsw"); - if (sig->ldvary) + if (sig->ldvary) { fprintf(stderr, "; ldvary"); + vir_dump_sig_addr(c->devinfo, &inst->qpu); + } if (sig->ldvpm) fprintf(stderr, "; ldvpm"); - if (sig->ldtmu) + if (sig->ldtmu) { fprintf(stderr, "; ldtmu"); + vir_dump_sig_addr(c->devinfo, &inst->qpu); + } + if (sig->ldtlb) { + fprintf(stderr, "; ldtlb"); + vir_dump_sig_addr(c->devinfo, &inst->qpu); + } + if (sig->ldtlbu) { + fprintf(stderr, "; ldtlbu"); + vir_dump_sig_addr(c->devinfo, &inst->qpu); + } if (sig->ldunif) fprintf(stderr, "; ldunif"); + if (sig->ldunifrf) { + fprintf(stderr, "; ldunifrf"); + vir_dump_sig_addr(c->devinfo, &inst->qpu); + } + if (sig->ldunifa) + fprintf(stderr, "; ldunifa"); + if (sig->ldunifarf) { + fprintf(stderr, "; ldunifarf"); + vir_dump_sig_addr(c->devinfo, &inst->qpu); + } if (sig->wrtmuc) fprintf(stderr, "; wrtmuc"); } diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c index 9ebf2cd69b4..f39f0c2829b 100644 --- a/src/broadcom/compiler/vir_register_allocate.c +++ b/src/broadcom/compiler/vir_register_allocate.c @@ -139,7 +139,7 @@ v3d_register_allocate(struct v3d_compile *c) * result to a temp), nothing else can be stored in r3/r4 across * it. */ - if (vir_writes_r3(inst)) { + if (vir_writes_r3(c->devinfo, inst)) { for (int i = 0; i < c->num_temps; i++) { if (c->temp_start[i] < ip && c->temp_end[i] > ip) { @@ -149,7 +149,7 @@ v3d_register_allocate(struct v3d_compile *c) } } } - if (vir_writes_r4(inst)) { + if (vir_writes_r4(c->devinfo, inst)) { for (int i = 0; i < c->num_temps; i++) { if (c->temp_start[i] < ip && c->temp_end[i] > ip) { diff --git a/src/broadcom/compiler/vir_to_qpu.c b/src/broadcom/compiler/vir_to_qpu.c index eeb7b0bc291..525638df691 100644 --- a/src/broadcom/compiler/vir_to_qpu.c +++ b/src/broadcom/compiler/vir_to_qpu.c @@ -264,7 +264,14 @@ v3d_generate_code_block(struct v3d_compile *c, } if (qinst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) { - if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) { + if (v3d_qpu_sig_writes_address(c->devinfo, + &qinst->qpu.sig)) { + assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP); + assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP); + + qinst->qpu.sig_addr = dst.index; + qinst->qpu.sig_magic = dst.magic; + } else if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) { assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP); if (nsrc >= 1) { set_src(&qinst->qpu, |