diff options
-rw-r--r-- | src/broadcom/compiler/nir_to_vir.c | 25 | ||||
-rw-r--r-- | src/broadcom/compiler/qpu_schedule.c | 12 | ||||
-rw-r--r-- | src/broadcom/compiler/v3d_compiler.h | 34 | ||||
-rw-r--r-- | src/broadcom/compiler/vir.c | 14 | ||||
-rw-r--r-- | src/broadcom/compiler/vir_opt_dead_code.c | 13 | ||||
-rw-r--r-- | src/broadcom/compiler/vir_register_allocate.c | 13 | ||||
-rw-r--r-- | src/broadcom/qpu/qpu_instr.c | 30 | ||||
-rw-r--r-- | src/broadcom/qpu/qpu_instr.h | 1 |
8 files changed, 118 insertions, 24 deletions
diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index 5c7acdf72ab..51cb8845cdb 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -74,13 +74,6 @@ vir_emit_thrsw(struct v3d_compile *c) } static struct qreg -vir_SFU(struct v3d_compile *c, int waddr, struct qreg src) -{ - vir_FMOV_dest(c, vir_reg(QFILE_MAGIC, waddr), src); - return vir_FMOV(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4)); -} - -static struct qreg indirect_uniform_load(struct v3d_compile *c, nir_intrinsic_instr *intr) { struct qreg indirect_offset = ntq_get_src(c, intr->src[0], 0); @@ -330,8 +323,7 @@ ntq_fsincos(struct v3d_compile *c, struct qreg src, bool is_cos) input = vir_FADD(c, input, vir_uniform_f(c, 0.5)); struct qreg periods = vir_FROUND(c, input); - struct qreg sin_output = vir_SFU(c, V3D_QPU_WADDR_SIN, - vir_FSUB(c, input, periods)); + struct qreg sin_output = vir_SIN(c, vir_FSUB(c, input, periods)); return vir_XOR(c, sin_output, vir_SHL(c, vir_FTOIN(c, periods), vir_uniform_ui(c, -1))); @@ -369,8 +361,7 @@ emit_fragcoord_input(struct v3d_compile *c, int attr) c->inputs[attr * 4 + 0] = vir_FXCD(c); c->inputs[attr * 4 + 1] = vir_FYCD(c); c->inputs[attr * 4 + 2] = c->payload_z; - c->inputs[attr * 4 + 3] = vir_SFU(c, V3D_QPU_WADDR_RECIP, - c->payload_w); + c->inputs[attr * 4 + 3] = vir_RECIP(c, c->payload_w); } static struct qreg @@ -782,16 +773,16 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr) break; case nir_op_frcp: - result = vir_SFU(c, V3D_QPU_WADDR_RECIP, src[0]); + result = vir_RECIP(c, src[0]); break; case nir_op_frsq: - result = vir_SFU(c, V3D_QPU_WADDR_RSQRT, src[0]); + result = vir_RSQRT(c, src[0]); break; case nir_op_fexp2: - result = vir_SFU(c, V3D_QPU_WADDR_EXP, src[0]); + result = vir_EXP(c, src[0]); break; case nir_op_flog2: - result = vir_SFU(c, V3D_QPU_WADDR_LOG, src[0]); + result = vir_LOG(c, src[0]); break; case nir_op_fceil: @@ -1151,8 +1142,8 @@ emit_vert_end(struct v3d_compile *c) setup_default_position(c); uint32_t vpm_index = 0; - struct qreg rcp_w = vir_SFU(c, V3D_QPU_WADDR_RECIP, - c->outputs[c->output_position_index + 3]); + struct qreg rcp_w = vir_RECIP(c, + c->outputs[c->output_position_index + 3]); emit_vpm_write_setup(c); diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c index 2a035c5521e..af0b9b86b1c 100644 --- a/src/broadcom/compiler/qpu_schedule.c +++ b/src/broadcom/compiler/qpu_schedule.c @@ -459,7 +459,7 @@ calculate_reverse_deps(struct v3d_compile *c, struct list_head *schedule_list) struct choose_scoreboard { int tick; - int last_sfu_write_tick; + int last_magic_sfu_write_tick; int last_ldvary_tick; int last_uniforms_reset_tick; bool tlb_locked; @@ -471,7 +471,7 @@ mux_reads_too_soon(struct choose_scoreboard *scoreboard, { switch (mux) { case V3D_QPU_MUX_R4: - if (scoreboard->tick - scoreboard->last_sfu_write_tick <= 2) + if (scoreboard->tick - scoreboard->last_magic_sfu_write_tick <= 2) return true; break; @@ -536,7 +536,7 @@ writes_too_soon_after_write(const struct v3d_device_info *devinfo, * This would normally be prevented by dependency tracking, but might * occur if a dead SFU computation makes it to scheduling. */ - if (scoreboard->tick - scoreboard->last_sfu_write_tick < 2 && + if (scoreboard->tick - scoreboard->last_magic_sfu_write_tick < 2 && v3d_qpu_writes_r4(devinfo, inst)) return true; @@ -595,6 +595,8 @@ qpu_accesses_peripheral(const struct v3d_qpu_instr *inst) { if (v3d_qpu_uses_vpm(inst)) return true; + if (v3d_qpu_uses_sfu(inst)) + return true; if (inst->type == V3D_QPU_INSTR_TYPE_ALU) { if (inst->alu.add.op != V3D_QPU_A_NOP && @@ -825,7 +827,7 @@ update_scoreboard_for_magic_waddr(struct choose_scoreboard *scoreboard, enum v3d_qpu_waddr waddr) { if (v3d_qpu_magic_waddr_is_sfu(waddr)) - scoreboard->last_sfu_write_tick = scoreboard->tick; + scoreboard->last_magic_sfu_write_tick = scoreboard->tick; } static void @@ -1467,7 +1469,7 @@ v3d_qpu_schedule_instructions(struct v3d_compile *c) struct choose_scoreboard scoreboard; memset(&scoreboard, 0, sizeof(scoreboard)); scoreboard.last_ldvary_tick = -10; - scoreboard.last_sfu_write_tick = -10; + scoreboard.last_magic_sfu_write_tick = -10; scoreboard.last_uniforms_reset_tick = -10; if (debug) { diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h index 133c2e0b7d9..9dc19248aa6 100644 --- a/src/broadcom/compiler/v3d_compiler.h +++ b/src/broadcom/compiler/v3d_compiler.h @@ -867,6 +867,33 @@ vir_##name(struct v3d_compile *c, struct qreg a, struct qreg b) \ a, b)); \ } +#define VIR_SFU(name) \ +static inline struct qreg \ +vir_##name(struct v3d_compile *c, struct qreg a) \ +{ \ + if (c->devinfo->ver >= 41) { \ + return vir_emit_def(c, vir_add_inst(V3D_QPU_A_##name, \ + c->undef, \ + a, c->undef)); \ + } else { \ + vir_FMOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_##name), a); \ + return vir_FMOV(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4)); \ + } \ +} \ +static inline struct qinst * \ +vir_##name##_dest(struct v3d_compile *c, struct qreg dest, \ + struct qreg a) \ +{ \ + if (c->devinfo->ver >= 41) { \ + return vir_emit_nondef(c, vir_add_inst(V3D_QPU_A_##name, \ + dest, \ + a, c->undef)); \ + } else { \ + vir_FMOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_##name), a); \ + return vir_FMOV_dest(c, dest, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4)); \ + } \ +} + #define VIR_A_ALU2(name) VIR_ALU2(name, vir_add_inst, V3D_QPU_A_##name) #define VIR_M_ALU2(name) VIR_ALU2(name, vir_mul_inst, V3D_QPU_M_##name) #define VIR_A_ALU1(name) VIR_ALU1(name, vir_add_inst, V3D_QPU_A_##name) @@ -948,6 +975,13 @@ VIR_M_NODST_2(MULTOP) VIR_M_ALU1(MOV) VIR_M_ALU1(FMOV) +VIR_SFU(RECIP) +VIR_SFU(RSQRT) +VIR_SFU(EXP) +VIR_SFU(LOG) +VIR_SFU(SIN) +VIR_SFU(RSQRT2) + static inline struct qinst * vir_MOV_cond(struct v3d_compile *c, enum v3d_qpu_cond cond, struct qreg dest, struct qreg src) diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c index ee0f329040e..d804fe6089d 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c @@ -935,6 +935,17 @@ vir_uniform(struct v3d_compile *c, return vir_reg(QFILE_UNIF, uniform); } +static bool +vir_can_set_flags(struct v3d_compile *c, struct qinst *inst) +{ + if (c->devinfo->ver >= 40 && (v3d_qpu_reads_vpm(&inst->qpu) || + v3d_qpu_uses_sfu(&inst->qpu))) { + return false; + } + + return true; +} + void vir_PF(struct v3d_compile *c, struct qreg src, enum v3d_qpu_pf pf) { @@ -954,7 +965,8 @@ vir_PF(struct v3d_compile *c, struct qreg src, enum v3d_qpu_pf pf) if (src.file != QFILE_TEMP || !c->defs[src.index] || - last_inst != c->defs[src.index]) { + last_inst != c->defs[src.index] || + !vir_can_set_flags(c, last_inst)) { /* XXX: Make the MOV be the appropriate type */ last_inst = vir_MOV_dest(c, vir_reg(QFILE_NULL, 0), src); } diff --git a/src/broadcom/compiler/vir_opt_dead_code.c b/src/broadcom/compiler/vir_opt_dead_code.c index 7ce05fb5f51..362fc9e52a3 100644 --- a/src/broadcom/compiler/vir_opt_dead_code.c +++ b/src/broadcom/compiler/vir_opt_dead_code.c @@ -85,6 +85,16 @@ has_nonremovable_reads(struct v3d_compile *c, struct qinst *inst) return false; } +static bool +can_write_to_null(struct v3d_compile *c, struct qinst *inst) +{ + /* The SFU instructions must write to a physical register. */ + if (c->devinfo->ver >= 41 && v3d_qpu_uses_sfu(&inst->qpu)) + return false; + + return true; +} + bool vir_opt_dead_code(struct v3d_compile *c) { @@ -122,7 +132,8 @@ vir_opt_dead_code(struct v3d_compile *c) * it's nicer to read the VIR code without * unused destination regs. */ - if (inst->dst.file == QFILE_TEMP) { + if (inst->dst.file == QFILE_TEMP && + can_write_to_null(c, inst)) { if (debug) { fprintf(stderr, "Removing dst from: "); diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c index aa5e2139c1b..5a856acd7ed 100644 --- a/src/broadcom/compiler/vir_register_allocate.c +++ b/src/broadcom/compiler/vir_register_allocate.c @@ -445,6 +445,19 @@ v3d_register_allocate(struct v3d_compile *c, bool *spilled) class_bits[inst->dst.index] &= CLASS_BIT_PHYS; break; + case V3D_QPU_A_RECIP: + case V3D_QPU_A_RSQRT: + case V3D_QPU_A_EXP: + case V3D_QPU_A_LOG: + case V3D_QPU_A_SIN: + case V3D_QPU_A_RSQRT2: + /* The SFU instructions write directly to the + * phys regfile. + */ + assert(inst->dst.file == QFILE_TEMP); + class_bits[inst->dst.index] &= CLASS_BIT_PHYS; + break; + default: break; } diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c index deaa533c8ae..a7fb4186e1a 100644 --- a/src/broadcom/qpu/qpu_instr.c +++ b/src/broadcom/qpu/qpu_instr.c @@ -603,6 +603,36 @@ v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst) } bool +v3d_qpu_uses_sfu(const struct v3d_qpu_instr *inst) +{ + if (inst->type == V3D_QPU_INSTR_TYPE_ALU) { + switch (inst->alu.add.op) { + case V3D_QPU_A_RECIP: + case V3D_QPU_A_RSQRT: + case V3D_QPU_A_EXP: + case V3D_QPU_A_LOG: + case V3D_QPU_A_SIN: + case V3D_QPU_A_RSQRT2: + return true; + default: + break; + } + + if (inst->alu.add.magic_write && + v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr)) { + return true; + } + + if (inst->alu.mul.magic_write && + v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr)) { + return true; + } + } + + return false; +} + +bool v3d_qpu_writes_tmu(const struct v3d_qpu_instr *inst) { return (inst->type == V3D_QPU_INSTR_TYPE_ALU && diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h index 09dbf3eb4fa..c37abac3cf8 100644 --- a/src/broadcom/qpu/qpu_instr.h +++ b/src/broadcom/qpu/qpu_instr.h @@ -444,6 +444,7 @@ bool v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; bool v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; bool v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; bool v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; +bool v3d_qpu_uses_sfu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; bool v3d_qpu_writes_tmu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; bool v3d_qpu_writes_r3(const struct v3d_device_info *devinfo, const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST; |