summaryrefslogtreecommitdiffstats
path: root/src/broadcom/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'src/broadcom/compiler')
-rw-r--r--src/broadcom/compiler/nir_to_vir.c25
-rw-r--r--src/broadcom/compiler/qpu_schedule.c12
-rw-r--r--src/broadcom/compiler/v3d_compiler.h34
-rw-r--r--src/broadcom/compiler/vir.c14
-rw-r--r--src/broadcom/compiler/vir_opt_dead_code.c13
-rw-r--r--src/broadcom/compiler/vir_register_allocate.c13
6 files changed, 87 insertions, 24 deletions
diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c
index 5c7acdf72ab..51cb8845cdb 100644
--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c
@@ -74,13 +74,6 @@ vir_emit_thrsw(struct v3d_compile *c)
}
static struct qreg
-vir_SFU(struct v3d_compile *c, int waddr, struct qreg src)
-{
- vir_FMOV_dest(c, vir_reg(QFILE_MAGIC, waddr), src);
- return vir_FMOV(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4));
-}
-
-static struct qreg
indirect_uniform_load(struct v3d_compile *c, nir_intrinsic_instr *intr)
{
struct qreg indirect_offset = ntq_get_src(c, intr->src[0], 0);
@@ -330,8 +323,7 @@ ntq_fsincos(struct v3d_compile *c, struct qreg src, bool is_cos)
input = vir_FADD(c, input, vir_uniform_f(c, 0.5));
struct qreg periods = vir_FROUND(c, input);
- struct qreg sin_output = vir_SFU(c, V3D_QPU_WADDR_SIN,
- vir_FSUB(c, input, periods));
+ struct qreg sin_output = vir_SIN(c, vir_FSUB(c, input, periods));
return vir_XOR(c, sin_output, vir_SHL(c,
vir_FTOIN(c, periods),
vir_uniform_ui(c, -1)));
@@ -369,8 +361,7 @@ emit_fragcoord_input(struct v3d_compile *c, int attr)
c->inputs[attr * 4 + 0] = vir_FXCD(c);
c->inputs[attr * 4 + 1] = vir_FYCD(c);
c->inputs[attr * 4 + 2] = c->payload_z;
- c->inputs[attr * 4 + 3] = vir_SFU(c, V3D_QPU_WADDR_RECIP,
- c->payload_w);
+ c->inputs[attr * 4 + 3] = vir_RECIP(c, c->payload_w);
}
static struct qreg
@@ -782,16 +773,16 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
break;
case nir_op_frcp:
- result = vir_SFU(c, V3D_QPU_WADDR_RECIP, src[0]);
+ result = vir_RECIP(c, src[0]);
break;
case nir_op_frsq:
- result = vir_SFU(c, V3D_QPU_WADDR_RSQRT, src[0]);
+ result = vir_RSQRT(c, src[0]);
break;
case nir_op_fexp2:
- result = vir_SFU(c, V3D_QPU_WADDR_EXP, src[0]);
+ result = vir_EXP(c, src[0]);
break;
case nir_op_flog2:
- result = vir_SFU(c, V3D_QPU_WADDR_LOG, src[0]);
+ result = vir_LOG(c, src[0]);
break;
case nir_op_fceil:
@@ -1151,8 +1142,8 @@ emit_vert_end(struct v3d_compile *c)
setup_default_position(c);
uint32_t vpm_index = 0;
- struct qreg rcp_w = vir_SFU(c, V3D_QPU_WADDR_RECIP,
- c->outputs[c->output_position_index + 3]);
+ struct qreg rcp_w = vir_RECIP(c,
+ c->outputs[c->output_position_index + 3]);
emit_vpm_write_setup(c);
diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
index 2a035c5521e..af0b9b86b1c 100644
--- a/src/broadcom/compiler/qpu_schedule.c
+++ b/src/broadcom/compiler/qpu_schedule.c
@@ -459,7 +459,7 @@ calculate_reverse_deps(struct v3d_compile *c, struct list_head *schedule_list)
struct choose_scoreboard {
int tick;
- int last_sfu_write_tick;
+ int last_magic_sfu_write_tick;
int last_ldvary_tick;
int last_uniforms_reset_tick;
bool tlb_locked;
@@ -471,7 +471,7 @@ mux_reads_too_soon(struct choose_scoreboard *scoreboard,
{
switch (mux) {
case V3D_QPU_MUX_R4:
- if (scoreboard->tick - scoreboard->last_sfu_write_tick <= 2)
+ if (scoreboard->tick - scoreboard->last_magic_sfu_write_tick <= 2)
return true;
break;
@@ -536,7 +536,7 @@ writes_too_soon_after_write(const struct v3d_device_info *devinfo,
* This would normally be prevented by dependency tracking, but might
* occur if a dead SFU computation makes it to scheduling.
*/
- if (scoreboard->tick - scoreboard->last_sfu_write_tick < 2 &&
+ if (scoreboard->tick - scoreboard->last_magic_sfu_write_tick < 2 &&
v3d_qpu_writes_r4(devinfo, inst))
return true;
@@ -595,6 +595,8 @@ qpu_accesses_peripheral(const struct v3d_qpu_instr *inst)
{
if (v3d_qpu_uses_vpm(inst))
return true;
+ if (v3d_qpu_uses_sfu(inst))
+ return true;
if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
if (inst->alu.add.op != V3D_QPU_A_NOP &&
@@ -825,7 +827,7 @@ update_scoreboard_for_magic_waddr(struct choose_scoreboard *scoreboard,
enum v3d_qpu_waddr waddr)
{
if (v3d_qpu_magic_waddr_is_sfu(waddr))
- scoreboard->last_sfu_write_tick = scoreboard->tick;
+ scoreboard->last_magic_sfu_write_tick = scoreboard->tick;
}
static void
@@ -1467,7 +1469,7 @@ v3d_qpu_schedule_instructions(struct v3d_compile *c)
struct choose_scoreboard scoreboard;
memset(&scoreboard, 0, sizeof(scoreboard));
scoreboard.last_ldvary_tick = -10;
- scoreboard.last_sfu_write_tick = -10;
+ scoreboard.last_magic_sfu_write_tick = -10;
scoreboard.last_uniforms_reset_tick = -10;
if (debug) {
diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h
index 133c2e0b7d9..9dc19248aa6 100644
--- a/src/broadcom/compiler/v3d_compiler.h
+++ b/src/broadcom/compiler/v3d_compiler.h
@@ -867,6 +867,33 @@ vir_##name(struct v3d_compile *c, struct qreg a, struct qreg b) \
a, b)); \
}
+#define VIR_SFU(name) \
+static inline struct qreg \
+vir_##name(struct v3d_compile *c, struct qreg a) \
+{ \
+ if (c->devinfo->ver >= 41) { \
+ return vir_emit_def(c, vir_add_inst(V3D_QPU_A_##name, \
+ c->undef, \
+ a, c->undef)); \
+ } else { \
+ vir_FMOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_##name), a); \
+ return vir_FMOV(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4)); \
+ } \
+} \
+static inline struct qinst * \
+vir_##name##_dest(struct v3d_compile *c, struct qreg dest, \
+ struct qreg a) \
+{ \
+ if (c->devinfo->ver >= 41) { \
+ return vir_emit_nondef(c, vir_add_inst(V3D_QPU_A_##name, \
+ dest, \
+ a, c->undef)); \
+ } else { \
+ vir_FMOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_##name), a); \
+ return vir_FMOV_dest(c, dest, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4)); \
+ } \
+}
+
#define VIR_A_ALU2(name) VIR_ALU2(name, vir_add_inst, V3D_QPU_A_##name)
#define VIR_M_ALU2(name) VIR_ALU2(name, vir_mul_inst, V3D_QPU_M_##name)
#define VIR_A_ALU1(name) VIR_ALU1(name, vir_add_inst, V3D_QPU_A_##name)
@@ -948,6 +975,13 @@ VIR_M_NODST_2(MULTOP)
VIR_M_ALU1(MOV)
VIR_M_ALU1(FMOV)
+VIR_SFU(RECIP)
+VIR_SFU(RSQRT)
+VIR_SFU(EXP)
+VIR_SFU(LOG)
+VIR_SFU(SIN)
+VIR_SFU(RSQRT2)
+
static inline struct qinst *
vir_MOV_cond(struct v3d_compile *c, enum v3d_qpu_cond cond,
struct qreg dest, struct qreg src)
diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c
index ee0f329040e..d804fe6089d 100644
--- a/src/broadcom/compiler/vir.c
+++ b/src/broadcom/compiler/vir.c
@@ -935,6 +935,17 @@ vir_uniform(struct v3d_compile *c,
return vir_reg(QFILE_UNIF, uniform);
}
+static bool
+vir_can_set_flags(struct v3d_compile *c, struct qinst *inst)
+{
+ if (c->devinfo->ver >= 40 && (v3d_qpu_reads_vpm(&inst->qpu) ||
+ v3d_qpu_uses_sfu(&inst->qpu))) {
+ return false;
+ }
+
+ return true;
+}
+
void
vir_PF(struct v3d_compile *c, struct qreg src, enum v3d_qpu_pf pf)
{
@@ -954,7 +965,8 @@ vir_PF(struct v3d_compile *c, struct qreg src, enum v3d_qpu_pf pf)
if (src.file != QFILE_TEMP ||
!c->defs[src.index] ||
- last_inst != c->defs[src.index]) {
+ last_inst != c->defs[src.index] ||
+ !vir_can_set_flags(c, last_inst)) {
/* XXX: Make the MOV be the appropriate type */
last_inst = vir_MOV_dest(c, vir_reg(QFILE_NULL, 0), src);
}
diff --git a/src/broadcom/compiler/vir_opt_dead_code.c b/src/broadcom/compiler/vir_opt_dead_code.c
index 7ce05fb5f51..362fc9e52a3 100644
--- a/src/broadcom/compiler/vir_opt_dead_code.c
+++ b/src/broadcom/compiler/vir_opt_dead_code.c
@@ -85,6 +85,16 @@ has_nonremovable_reads(struct v3d_compile *c, struct qinst *inst)
return false;
}
+static bool
+can_write_to_null(struct v3d_compile *c, struct qinst *inst)
+{
+ /* The SFU instructions must write to a physical register. */
+ if (c->devinfo->ver >= 41 && v3d_qpu_uses_sfu(&inst->qpu))
+ return false;
+
+ return true;
+}
+
bool
vir_opt_dead_code(struct v3d_compile *c)
{
@@ -122,7 +132,8 @@ vir_opt_dead_code(struct v3d_compile *c)
* it's nicer to read the VIR code without
* unused destination regs.
*/
- if (inst->dst.file == QFILE_TEMP) {
+ if (inst->dst.file == QFILE_TEMP &&
+ can_write_to_null(c, inst)) {
if (debug) {
fprintf(stderr,
"Removing dst from: ");
diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c
index aa5e2139c1b..5a856acd7ed 100644
--- a/src/broadcom/compiler/vir_register_allocate.c
+++ b/src/broadcom/compiler/vir_register_allocate.c
@@ -445,6 +445,19 @@ v3d_register_allocate(struct v3d_compile *c, bool *spilled)
class_bits[inst->dst.index] &= CLASS_BIT_PHYS;
break;
+ case V3D_QPU_A_RECIP:
+ case V3D_QPU_A_RSQRT:
+ case V3D_QPU_A_EXP:
+ case V3D_QPU_A_LOG:
+ case V3D_QPU_A_SIN:
+ case V3D_QPU_A_RSQRT2:
+ /* The SFU instructions write directly to the
+ * phys regfile.
+ */
+ assert(inst->dst.file == QFILE_TEMP);
+ class_bits[inst->dst.index] &= CLASS_BIT_PHYS;
+ break;
+
default:
break;
}