summaryrefslogtreecommitdiffstats
path: root/src/broadcom/compiler/nir_to_vir.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/broadcom/compiler/nir_to_vir.c')
-rw-r--r--src/broadcom/compiler/nir_to_vir.c33
1 files changed, 27 insertions, 6 deletions
diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c
index 53af9be9f74..45e34c70a49 100644
--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c
@@ -115,6 +115,20 @@ vir_emit_thrsw(struct v3d_compile *c)
c->lock_scoreboard_on_first_thrsw = true;
}
+uint32_t
+v3d_get_op_for_atomic_add(nir_intrinsic_instr *instr, unsigned src)
+{
+ if (nir_src_is_const(instr->src[src])) {
+ int64_t add_val = nir_src_as_int(instr->src[src]);
+ if (add_val == 1)
+ return V3D_TMU_OP_WRITE_AND_READ_INC;
+ else if (add_val == -1)
+ return V3D_TMU_OP_WRITE_OR_READ_DEC;
+ }
+
+ return V3D_TMU_OP_WRITE_ADD_READ_PREFETCH;
+}
+
static uint32_t
v3d_general_tmu_op(nir_intrinsic_instr *instr)
{
@@ -129,8 +143,9 @@ v3d_general_tmu_op(nir_intrinsic_instr *instr)
case nir_intrinsic_store_scratch:
return V3D_TMU_OP_REGULAR;
case nir_intrinsic_ssbo_atomic_add:
+ return v3d_get_op_for_atomic_add(instr, 2);
case nir_intrinsic_shared_atomic_add:
- return V3D_TMU_OP_WRITE_ADD_READ_PREFETCH;
+ return v3d_get_op_for_atomic_add(instr, 1);
case nir_intrinsic_ssbo_atomic_imin:
case nir_intrinsic_shared_atomic_imin:
return V3D_TMU_OP_WRITE_SMIN;
@@ -171,11 +186,16 @@ static void
ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
bool is_shared_or_scratch)
{
- /* XXX perf: We should turn add/sub of 1 to inc/dec. Perhaps NIR
- * wants to have support for inc/dec?
- */
-
uint32_t tmu_op = v3d_general_tmu_op(instr);
+
+ /* If we were able to replace atomic_add for an inc/dec, then we
+ * need/can to do things slightly different, like not loading the
+ * amount to add/sub, as that is implicit.
+ */
+ bool atomic_add_replaced = ((instr->intrinsic == nir_intrinsic_ssbo_atomic_add ||
+ instr->intrinsic == nir_intrinsic_shared_atomic_add) &&
+ (tmu_op == V3D_TMU_OP_WRITE_AND_READ_INC ||
+ tmu_op == V3D_TMU_OP_WRITE_OR_READ_DEC));
bool is_store = (instr->intrinsic == nir_intrinsic_store_ssbo ||
instr->intrinsic == nir_intrinsic_store_scratch ||
instr->intrinsic == nir_intrinsic_store_shared);
@@ -188,7 +208,8 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
} else if (instr->intrinsic == nir_intrinsic_load_ssbo ||
instr->intrinsic == nir_intrinsic_load_ubo ||
instr->intrinsic == nir_intrinsic_load_scratch ||
- instr->intrinsic == nir_intrinsic_load_shared) {
+ instr->intrinsic == nir_intrinsic_load_shared ||
+ atomic_add_replaced) {
offset_src = 0 + has_index;
} else if (is_store) {
offset_src = 1 + has_index;