summaryrefslogtreecommitdiffstats
path: root/src/freedreno
diff options
context:
space:
mode:
authorRob Clark <[email protected]>2019-02-20 10:31:15 -0500
committerRob Clark <[email protected]>2019-02-20 18:50:08 -0500
commit50dd773a2d15570944d3955bb851b46ac345cdf4 (patch)
treeadd95452471f3be393567a0c6b157147347b85e8 /src/freedreno
parentc543a2cf6f4ab86b78ef314f4be22d6dd2e4604a (diff)
freedreno/ir3/a6xx: use ldib for ssbo reads
... instead of isam. It seems like when using isam, plus atomics, we can have the problem of old data being in the texture cache. Plus this way we don't have to load a component at a time. Note that blob still seems to use isam in some cases. I suppose it might be preferable in the case of loading a single component, when atomics are not in the picture (or that the ssbo does not need to otherwise be coherent). Signed-off-by: Rob Clark <[email protected]>
Diffstat (limited to 'src/freedreno')
-rw-r--r--src/freedreno/ir3/ir3_a6xx.c34
1 files changed, 10 insertions, 24 deletions
diff --git a/src/freedreno/ir3/ir3_a6xx.c b/src/freedreno/ir3/ir3_a6xx.c
index 29b7ea8416c..1a10b63edb2 100644
--- a/src/freedreno/ir3/ir3_a6xx.c
+++ b/src/freedreno/ir3/ir3_a6xx.c
@@ -56,40 +56,26 @@ emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
{
struct ir3_block *b = ctx->block;
struct ir3_instruction *offset;
- struct ir3_instruction *sam;
+ struct ir3_instruction *ldib;
nir_const_value *buffer_index;
/* can this be non-const buffer_index? how do we handle that? */
buffer_index = nir_src_as_const_value(intr->src[0]);
compile_assert(ctx, buffer_index);
- int tex_idx = ir3_ssbo_to_tex(&ctx->so->image_mapping, buffer_index->u32[0]);
+ int ibo_idx = ir3_ssbo_to_ibo(&ctx->so->image_mapping, buffer_index->u32[0]);
offset = ssbo_offset(b, ir3_get_src(ctx, &intr->src[1])[0]);
- /* Because texture state for SSBO read is setup as a single component
- * format (ie. R32_UINT, etc), we can't read more than the .x component
- * in one shot. Maybe there is some way we could mangle the state to
- * read more than one component at a shot, which would result is some-
- * what less register usage (given how we have to stick in the dummy
- * .y coord) and less alu instructions to calc offsets. But this is
- * also what blob does, so meh?
- */
- for (unsigned i; i < intr->num_components; i++) {
- struct ir3_instruction *coords[2];
-
- coords[0] = (i == 0) ? offset :
- ir3_ADD_U(b, offset, 0, create_immed(b, i), 0);
- coords[1] = create_immed(b, 0);
-
- sam = ir3_SAM(b, OPC_ISAM, TYPE_U32, 0b1, 0,
- tex_idx, tex_idx, ir3_create_collect(ctx, coords, 2), NULL);
+ ldib = ir3_LDIB(b, create_immed(b, ibo_idx), 0, offset, 0);
+ ldib->regs[0]->wrmask = MASK(intr->num_components);
+ ldib->cat6.iim_val = intr->num_components;
+ ldib->cat6.d = 1;
+ ldib->cat6.type = TYPE_U32;
+ ldib->barrier_class = IR3_BARRIER_BUFFER_R;
+ ldib->barrier_conflict = IR3_BARRIER_BUFFER_W;
- sam->barrier_class = IR3_BARRIER_BUFFER_R;
- sam->barrier_conflict = IR3_BARRIER_BUFFER_W;
-
- dst[i] = sam;
- }
+ ir3_split_dest(b, dst, ldib, 0, intr->num_components);
}
/* src[] = { value, block_index, offset }. const_index[] = { write_mask } */