freedreno/ir3/a6xx: use ldib for ssbo reads

... instead of isam. It seems like when using isam, plus atomics, we can have the problem of old data being in the texture cache. Plus this way we don't have to load a component at a time. Note that blob still seems to use isam in some cases. I suppose it might be preferable in the case of loading a single component, when atomics are not in the picture (or that the ssbo does not need to otherwise be coherent). Signed-off-by: Rob Clark <[email protected]>
author: Rob Clark <[email protected]> 2019-02-20 10:31:15 -0500
committer: Rob Clark <[email protected]> 2019-02-20 18:50:08 -0500
commit: 50dd773a2d15570944d3955bb851b46ac345cdf4 (patch)
tree: add95452471f3be393567a0c6b157147347b85e8
parent: c543a2cf6f4ab86b78ef314f4be22d6dd2e4604a (diff)
1 files changed, 10 insertions, 24 deletions
diff --git a/src/freedreno/ir3/ir3_a6xx.c b/src/freedreno/ir3/ir3_a6xx.c
index 29b7ea8416c..1a10b63edb2 100644
--- a/src/freedreno/ir3/ir3_a6xx.c
+++ b/src/freedreno/ir3/ir3_a6xx.c
@@ -56,40 +56,26 @@ emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
 {
 	struct ir3_block *b = ctx->block;
 	struct ir3_instruction *offset;
-	struct ir3_instruction *sam;
+	struct ir3_instruction *ldib;
 	nir_const_value *buffer_index;
 
 	/* can this be non-const buffer_index?  how do we handle that? */
 	buffer_index = nir_src_as_const_value(intr->src[0]);
 	compile_assert(ctx, buffer_index);
 
-	int tex_idx = ir3_ssbo_to_tex(&ctx->so->image_mapping, buffer_index->u32[0]);
+	int ibo_idx = ir3_ssbo_to_ibo(&ctx->so->image_mapping, buffer_index->u32[0]);
 
 	offset = ssbo_offset(b, ir3_get_src(ctx, &intr->src[1])[0]);
 
-	/* Because texture state for SSBO read is setup as a single component
-	 * format (ie. R32_UINT, etc), we can't read more than the .x component
-	 * in one shot.  Maybe there is some way we could mangle the state to
-	 * read more than one component at a shot, which would result is some-
-	 * what less register usage (given how we have to stick in the dummy
-	 * .y coord) and less alu instructions to calc offsets.  But this is
-	 * also what blob does, so meh?
-	 */
-	for (unsigned i; i < intr->num_components; i++) {
-		struct ir3_instruction *coords[2];
-
-		coords[0] = (i == 0) ? offset :
-				ir3_ADD_U(b, offset, 0, create_immed(b, i), 0);
-		coords[1] = create_immed(b, 0);
-
-		sam = ir3_SAM(b, OPC_ISAM, TYPE_U32, 0b1, 0,
-				tex_idx, tex_idx, ir3_create_collect(ctx, coords, 2), NULL);
+	ldib = ir3_LDIB(b, create_immed(b, ibo_idx), 0, offset, 0);
+	ldib->regs[0]->wrmask = MASK(intr->num_components);
+	ldib->cat6.iim_val = intr->num_components;
+	ldib->cat6.d = 1;
+	ldib->cat6.type = TYPE_U32;
+	ldib->barrier_class = IR3_BARRIER_BUFFER_R;
+	ldib->barrier_conflict = IR3_BARRIER_BUFFER_W;
 
-		sam->barrier_class = IR3_BARRIER_BUFFER_R;
-		sam->barrier_conflict = IR3_BARRIER_BUFFER_W;
-
-		dst[i] = sam;
-	}
+	ir3_split_dest(b, dst, ldib, 0, intr->num_components);
 }
 
 /* src[] = { value, block_index, offset }. const_index[] = { write_mask } */
author	Rob Clark <[email protected]>	2019-02-20 10:31:15 -0500
committer	Rob Clark <[email protected]>	2019-02-20 18:50:08 -0500
commit	50dd773a2d15570944d3955bb851b46ac345cdf4 (patch)
tree	add95452471f3be393567a0c6b157147347b85e8
parent	c543a2cf6f4ab86b78ef314f4be22d6dd2e4604a (diff)