summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/freedreno
diff options
context:
space:
mode:
authorRob Clark <[email protected]>2017-11-09 10:48:52 -0500
committerRob Clark <[email protected]>2017-11-12 12:28:59 -0500
commitdd75abc6f3909a2ed9e8e7c5a533be11c124a509 (patch)
tree2df244c2d933a7678d638f2dac47070f0181bbb2 /src/gallium/drivers/freedreno
parent2f8bdf2e2b8cc516b89cd042962d5dab81a695ec (diff)
freedreno/ir3: some SSBO cleanups/fixes
Signed-off-by: Rob Clark <[email protected]>
Diffstat (limited to 'src/gallium/drivers/freedreno')
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3.h2
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c52
2 files changed, 39 insertions, 15 deletions
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h
index b85e5e38bda..e5b1a2dce09 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3.h
@@ -893,6 +893,8 @@ static inline unsigned ir3_cat3_absneg(opc_t opc)
}
}
+#define MASK(n) ((1 << (n)) - 1)
+
/* iterator for an instructions's sources (reg), also returns src #: */
#define foreach_src_n(__srcreg, __n, __instr) \
if ((__instr)->regs_count) \
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index f464d487ca7..0d642772f9e 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -1231,7 +1231,7 @@ emit_intrinsic_load_ubo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
}
static void
-mark_ssbo_read(struct ir3_context *ctx, struct ir3_instruction *instr)
+mark_read(struct ir3_context *ctx, struct ir3_instruction *instr)
{
instr->regs[0]->instr = ctx->last_write;
instr->regs[0]->flags |= IR3_REG_SSA;
@@ -1239,13 +1239,14 @@ mark_ssbo_read(struct ir3_context *ctx, struct ir3_instruction *instr)
}
static void
-mark_ssbo_write(struct ir3_context *ctx, struct ir3_instruction *instr)
+mark_write(struct ir3_context *ctx, struct ir3_instruction *instr)
{
instr->regs[0]->instr = ctx->last_access;
instr->regs[0]->flags |= IR3_REG_SSA;
ctx->last_write = ctx->last_access = instr;
}
+/* src[] = { buffer_index, offset }. No const_index */
static void
emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
struct ir3_instruction **dst)
@@ -1269,10 +1270,11 @@ emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
ldgb = ir3_LDGB(b, create_immed(b, const_offset->u32[0]), 0,
src0, 0, src1, 0);
- ldgb->regs[0]->wrmask = (1 << intr->num_components) - 1;
+ ldgb->regs[0]->wrmask = MASK(intr->num_components);
ldgb->cat6.iim_val = intr->num_components;
+ ldgb->cat6.d = 4;
ldgb->cat6.type = TYPE_U32;
- mark_ssbo_read(ctx, ldgb);
+ mark_read(ctx, ldgb);
split_dest(b, dst, ldgb, 0, intr->num_components);
}
@@ -1284,7 +1286,12 @@ emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
struct ir3_block *b = ctx->block;
struct ir3_instruction *stgb, *src0, *src1, *src2, *offset;
nir_const_value *const_offset;
- unsigned ncomp = ffs(~intr->const_index[0]) - 1;
+ /* TODO handle wrmask properly, see _store_shared().. but I think
+ * it is more a PITA than that, since blob ends up loading the
+ * masked components and writing them back out.
+ */
+ unsigned wrmask = intr->const_index[0];
+ unsigned ncomp = ffs(~wrmask) - 1;
/* can this be non-const buffer_index? how do we handle that? */
const_offset = nir_src_as_const_value(intr->src[1]);
@@ -1305,8 +1312,9 @@ emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
stgb = ir3_STGB(b, create_immed(b, const_offset->u32[0]), 0,
src0, 0, src1, 0, src2, 0);
stgb->cat6.iim_val = ncomp;
+ stgb->cat6.d = 4;
stgb->cat6.type = TYPE_U32;
- mark_ssbo_write(ctx, stgb);
+ mark_write(ctx, stgb);
array_insert(b, b->keeps, stgb);
}
@@ -1326,8 +1334,25 @@ emit_intrinsic_ssbo_size(struct ir3_context *ctx, nir_intrinsic_instr *intr,
dst[0] = create_uniform(ctx, idx);
}
+/*
+ * SSBO atomic intrinsics
+ *
+ * All of the SSBO atomic memory operations read a value from memory,
+ * compute a new value using one of the operations below, write the new
+ * value to memory, and return the original value read.
+ *
+ * All operations take 3 sources except CompSwap that takes 4. These
+ * sources represent:
+ *
+ * 0: The SSBO buffer index.
+ * 1: The offset into the SSBO buffer of the variable that the atomic
+ * operation will operate on.
+ * 2: The data parameter to the atomic function (i.e. the value to add
+ * in ssbo_atomic_add, etc).
+ * 3: For CompSwap only: the second data parameter.
+ */
static struct ir3_instruction *
-emit_intrinsic_atomic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
+emit_intrinsic_atomic_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
{
struct ir3_block *b = ctx->block;
struct ir3_instruction *atomic, *ssbo, *src0, *src1, *src2, *offset;
@@ -1341,9 +1366,9 @@ emit_intrinsic_atomic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
offset = get_src(ctx, &intr->src[1])[0];
- /* src0 is data (or uvec2(data, compare)
+ /* src0 is data (or uvec2(data, compare))
* src1 is offset
- * src2 is uvec2(offset*4, 0)
+ * src2 is uvec2(offset*4, 0) (appears to be 64b byte offset)
*
* Note that nir already multiplies the offset by four
*/
@@ -1397,8 +1422,9 @@ emit_intrinsic_atomic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
}
atomic->cat6.iim_val = 1;
+ atomic->cat6.d = 4;
atomic->cat6.type = type;
- mark_ssbo_write(ctx, atomic);
+ mark_write(ctx, atomic);
/* even if nothing consume the result, we can't DCE the instruction: */
array_insert(b, b->keeps, atomic);
@@ -1558,11 +1584,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
case nir_intrinsic_ssbo_atomic_xor:
case nir_intrinsic_ssbo_atomic_exchange:
case nir_intrinsic_ssbo_atomic_comp_swap:
- if (info->has_dest) {
- dst[0] = emit_intrinsic_atomic(ctx, intr);
- } else {
- emit_intrinsic_atomic(ctx, intr);
- }
+ dst[0] = emit_intrinsic_atomic_ssbo(ctx, intr);
break;
case nir_intrinsic_barrier:
case nir_intrinsic_memory_barrier: