From 2e0ea3f09c79c657116cbc9cbc68377e364dfe28 Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@gmail.com>
Date: Tue, 5 Feb 2019 15:33:57 -0500
Subject: freedreno/ir3: add image/ssbo <-> ibo/tex mapping

Images and SSBOs don't map directly to the hw.  They end up being part
texture and part something else.  Starting with a6xx, the hack used for
a5xx to smash the image tex state into hw texture state starting from
MAX counting down won't work, because we start using tex state also for
SSBO read.

Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 src/freedreno/ir3/ir3_a4xx.c                   | 22 +++++---
 src/freedreno/ir3/ir3_compiler_nir.c           |  6 +-
 src/freedreno/ir3/ir3_context.c                |  3 +
 src/freedreno/ir3/ir3_image.c                  | 76 +++++++++++++++++++++-----
 src/freedreno/ir3/ir3_image.h                  |  9 ++-
 src/freedreno/ir3/ir3_shader.h                 | 50 ++++++++++++++++-
 src/gallium/drivers/freedreno/a5xx/fd5_emit.c  | 67 +++++++++--------------
 src/gallium/drivers/freedreno/a5xx/fd5_image.c | 29 ++--------
 src/gallium/drivers/freedreno/a5xx/fd5_image.h |  3 +-
 9 files changed, 173 insertions(+), 92 deletions(-)

(limited to 'src')

diff --git a/src/freedreno/ir3/ir3_a4xx.c b/src/freedreno/ir3/ir3_a4xx.c
index dfaeb5b3961..1f86cd5533c 100644
--- a/src/freedreno/ir3/ir3_a4xx.c
+++ b/src/freedreno/ir3/ir3_a4xx.c
@@ -47,6 +47,8 @@ emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
 	const_offset = nir_src_as_const_value(intr->src[0]);
 	compile_assert(ctx, const_offset);
 
+	int ibo_idx = ir3_ssbo_to_ibo(&ctx->so->image_mapping, const_offset->u32[0]);
+
 	offset = ir3_get_src(ctx, &intr->src[1])[0];
 
 	/* src0 is uvec2(offset*4, 0), src1 is offset.. nir already *= 4: */
@@ -56,7 +58,7 @@ emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
 	}, 2);
 	src1 = ir3_SHR_B(b, offset, 0, create_immed(b, 2), 0);
 
-	ldgb = ir3_LDGB(b, create_immed(b, const_offset->u32[0]), 0,
+	ldgb = ir3_LDGB(b, create_immed(b, ibo_idx), 0,
 			src0, 0, src1, 0);
 	ldgb->regs[0]->wrmask = MASK(intr->num_components);
 	ldgb->cat6.iim_val = intr->num_components;
@@ -86,6 +88,8 @@ emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
 	const_offset = nir_src_as_const_value(intr->src[1]);
 	compile_assert(ctx, const_offset);
 
+	int ibo_idx = ir3_ssbo_to_ibo(&ctx->so->image_mapping,  const_offset->u32[0]);
+
 	offset = ir3_get_src(ctx, &intr->src[2])[0];
 
 	/* src0 is value, src1 is offset, src2 is uvec2(offset*4, 0)..
@@ -98,8 +102,7 @@ emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
 		create_immed(b, 0),
 	}, 2);
 
-	stgb = ir3_STGB(b, create_immed(b, const_offset->u32[0]), 0,
-			src0, 0, src1, 0, src2, 0);
+	stgb = ir3_STGB(b, create_immed(b, ibo_idx), 0, src0, 0, src1, 0, src2, 0);
 	stgb->cat6.iim_val = ncomp;
 	stgb->cat6.d = 4;
 	stgb->cat6.type = TYPE_U32;
@@ -137,7 +140,9 @@ emit_intrinsic_atomic_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
 	/* can this be non-const buffer_index?  how do we handle that? */
 	const_offset = nir_src_as_const_value(intr->src[0]);
 	compile_assert(ctx, const_offset);
-	ssbo = create_immed(b, const_offset->u32[0]);
+
+	int ibo_idx = ir3_ssbo_to_ibo(&ctx->so->image_mapping,  const_offset->u32[0]);
+	ssbo = create_immed(b, ibo_idx);
 
 	offset = ir3_get_src(ctx, &intr->src[1])[0];
 
@@ -262,7 +267,8 @@ emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
 	struct ir3_instruction * const *value = ir3_get_src(ctx, &intr->src[3]);
 	struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]);
 	unsigned ncoords = ir3_get_image_coords(var, NULL);
-	unsigned tex_idx = ir3_get_image_slot(ctx, nir_src_as_deref(intr->src[0]));
+	unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0]));
+	unsigned ibo_idx = ir3_image_to_ibo(&ctx->so->image_mapping, slot);
 	unsigned ncomp = ir3_get_num_components_for_glformat(var->data.image.format);
 
 	/* src0 is value
@@ -277,7 +283,7 @@ emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
 	 * one over the other in various cases.
 	 */
 
-	stib = ir3_STIB(b, create_immed(b, tex_idx), 0,
+	stib = ir3_STIB(b, create_immed(b, ibo_idx), 0,
 			ir3_create_collect(ctx, value, ncomp), 0,
 			ir3_create_collect(ctx, coords, ncoords), 0,
 			offset, 0);
@@ -300,8 +306,10 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
 	struct ir3_instruction *atomic, *image, *src0, *src1, *src2;
 	struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]);
 	unsigned ncoords = ir3_get_image_coords(var, NULL);
+	unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0]));
+	unsigned ibo_idx = ir3_image_to_ibo(&ctx->so->image_mapping, slot);
 
-	image = create_immed(b, ir3_get_image_slot(ctx, nir_src_as_deref(intr->src[0])));
+	image = create_immed(b, ibo_idx);
 
 	/* src0 is value (or uvec2(value, compare))
 	 * src1 is coords
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
index f4745ec83bd..fbc1b63c746 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -896,7 +896,8 @@ emit_intrinsic_load_image(struct ir3_context *ctx, nir_intrinsic_instr *intr,
 	struct ir3_instruction * const *src0 = ir3_get_src(ctx, &intr->src[1]);
 	struct ir3_instruction *coords[4];
 	unsigned flags, ncoords = ir3_get_image_coords(var, &flags);
-	unsigned tex_idx = ir3_get_image_slot(ctx, nir_src_as_deref(intr->src[0]));
+	unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0]));
+	unsigned tex_idx = ir3_image_to_tex(&ctx->so->image_mapping, slot);
 	type_t type = ir3_get_image_type(var);
 
 	/* hmm, this seems a bit odd, but it is what blob does and (at least
@@ -928,7 +929,8 @@ emit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr,
 {
 	struct ir3_block *b = ctx->block;
 	const nir_variable *var = nir_intrinsic_get_var(intr, 0);
-	unsigned tex_idx = ir3_get_image_slot(ctx, nir_src_as_deref(intr->src[0]));
+	unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0]));
+	unsigned tex_idx = ir3_image_to_tex(&ctx->so->image_mapping, slot);
 	struct ir3_instruction *sam, *lod;
 	unsigned flags, ncoords = ir3_get_image_coords(var, &flags);
 
diff --git a/src/freedreno/ir3/ir3_context.c b/src/freedreno/ir3/ir3_context.c
index d121559833b..94264043886 100644
--- a/src/freedreno/ir3/ir3_context.c
+++ b/src/freedreno/ir3/ir3_context.c
@@ -28,6 +28,7 @@
 
 #include "ir3_compiler.h"
 #include "ir3_context.h"
+#include "ir3_image.h"
 #include "ir3_shader.h"
 #include "ir3_nir.h"
 
@@ -103,6 +104,8 @@ ir3_context_init(struct ir3_compiler *compiler,
 	so->num_uniforms = ctx->s->num_uniforms;
 	so->num_ubos = ctx->s->info.num_ubos;
 
+	ir3_ibo_mapping_init(&so->image_mapping, ctx->s->info.num_textures);
+
 	/* Layout of constant registers, each section aligned to vec4.  Note
 	 * that pointer size (ubo, etc) changes depending on generation.
 	 *
diff --git a/src/freedreno/ir3/ir3_image.c b/src/freedreno/ir3/ir3_image.c
index bc564aac402..3ea2c0a3d3e 100644
--- a/src/freedreno/ir3/ir3_image.c
+++ b/src/freedreno/ir3/ir3_image.c
@@ -26,19 +26,69 @@
 
 #include "ir3_image.h"
 
-/* Images get mapped into SSBO/image state (for store/atomic) and texture
- * state block (for load).  To simplify things, invert the image id and
- * map it from end of state block, ie. image 0 becomes num-1, image 1
- * becomes num-2, etc.  This potentially avoids needing to re-emit texture
- * state when switching shaders.
- *
- * TODO is max # of samplers and SSBOs the same.  This shouldn't be hard-
- * coded.  Also, since all the gl shader stages (ie. everything but CS)
- * share the same SSBO/image state block, this might require some more
- * logic if we supported images in anything other than FS..
+
+/*
+ * SSBO/Image to/from IBO/tex hw mapping table:
+ */
+
+void
+ir3_ibo_mapping_init(struct ir3_ibo_mapping *mapping, unsigned num_textures)
+{
+	memset(mapping, IBO_INVALID, sizeof(*mapping));
+	mapping->num_ibo = 0;
+	mapping->num_tex = 0;
+	mapping->tex_base = num_textures;
+}
+
+unsigned
+ir3_ssbo_to_ibo(struct ir3_ibo_mapping *mapping, unsigned ssbo)
+{
+	if (mapping->ssbo_to_ibo[ssbo] == IBO_INVALID) {
+		unsigned ibo = mapping->num_ibo++;
+		mapping->ssbo_to_ibo[ssbo] = ibo;
+		mapping->ibo_to_image[ibo] = IBO_SSBO | ssbo;
+	}
+	return mapping->ssbo_to_ibo[ssbo];
+}
+
+unsigned
+ir3_ssbo_to_tex(struct ir3_ibo_mapping *mapping, unsigned ssbo)
+{
+	if (mapping->ssbo_to_tex[ssbo] == IBO_INVALID) {
+		unsigned tex = mapping->num_tex++;
+		mapping->ssbo_to_tex[ssbo] = tex;
+		mapping->tex_to_image[tex] = IBO_SSBO | ssbo;
+	}
+	return mapping->ssbo_to_tex[ssbo] + mapping->tex_base;
+}
+
+unsigned
+ir3_image_to_ibo(struct ir3_ibo_mapping *mapping, unsigned image)
+{
+	if (mapping->image_to_ibo[image] == IBO_INVALID) {
+		unsigned ibo = mapping->num_ibo++;
+		mapping->image_to_ibo[image] = ibo;
+		mapping->ibo_to_image[ibo] = image;
+	}
+	return mapping->image_to_ibo[image];
+}
+
+unsigned
+ir3_image_to_tex(struct ir3_ibo_mapping *mapping, unsigned image)
+{
+	if (mapping->image_to_tex[image] == IBO_INVALID) {
+		unsigned tex = mapping->num_tex++;
+		mapping->image_to_tex[image] = tex;
+		mapping->tex_to_image[tex] = image;
+	}
+	return mapping->image_to_tex[image] + mapping->tex_base;
+}
+
+/* Helper to parse the deref for an image to get image slot.  This should be
+ * mapped to tex or ibo idx using ir3_image_to_tex() or ir3_image_to_ibo().
  */
 unsigned
-ir3_get_image_slot(struct ir3_context *ctx, nir_deref_instr *deref)
+ir3_get_image_slot(nir_deref_instr *deref)
 {
 	unsigned int loc = 0;
 	unsigned inner_size = 1;
@@ -61,9 +111,7 @@ ir3_get_image_slot(struct ir3_context *ctx, nir_deref_instr *deref)
 
 	loc += deref->var->data.driver_location;
 
-	/* TODO figure out real limit per generation, and don't hardcode: */
-	const unsigned max_samplers = 16;
-	return max_samplers - loc - 1;
+	return loc;
 }
 
 /* see tex_info() for equiv logic for texture instructions.. it would be
diff --git a/src/freedreno/ir3/ir3_image.h b/src/freedreno/ir3/ir3_image.h
index 16f3d6e8209..c89e581eef8 100644
--- a/src/freedreno/ir3/ir3_image.h
+++ b/src/freedreno/ir3/ir3_image.h
@@ -29,7 +29,14 @@
 
 #include "ir3_context.h"
 
-unsigned ir3_get_image_slot(struct ir3_context *ctx, nir_deref_instr *deref);
+
+void ir3_ibo_mapping_init(struct ir3_ibo_mapping *mapping, unsigned num_textures);
+unsigned ir3_ssbo_to_ibo(struct ir3_ibo_mapping *mapping, unsigned ssbo);
+unsigned ir3_ssbo_to_tex(struct ir3_ibo_mapping *mapping, unsigned ssbo);
+unsigned ir3_image_to_ibo(struct ir3_ibo_mapping *mapping, unsigned image);
+unsigned ir3_image_to_tex(struct ir3_ibo_mapping *mapping, unsigned image);
+
+unsigned ir3_get_image_slot(nir_deref_instr *deref);
 unsigned ir3_get_image_coords(const nir_variable *var, unsigned *flagsp);
 type_t ir3_get_image_type(const nir_variable *var);
 unsigned ir3_get_num_components_for_glformat(GLuint format);
diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h
index 418c77ae8b0..5fffaf9fb85 100644
--- a/src/freedreno/ir3/ir3_shader.h
+++ b/src/freedreno/ir3/ir3_shader.h
@@ -274,9 +274,54 @@ ir3_normalize_key(struct ir3_shader_key *key, gl_shader_stage type)
 		/* TODO */
 		break;
 	}
-
 }
 
+/**
+ * On a4xx+a5xx, Images share state with textures and SSBOs:
+ *
+ *   + Uses texture (cat5) state/instruction (isam) to read
+ *   + Uses SSBO state and instructions (cat6) to write and for atomics
+ *
+ * Starting with a6xx, Images and SSBOs are basically the same thing,
+ * with texture state and isam also used for SSBO reads.
+ *
+ * On top of that, gallium makes the SSBO (shader_buffers) state semi
+ * sparse, with the first half of the state space used for atomic
+ * counters lowered to atomic buffers.  We could ignore this, but I
+ * don't think we could *really* handle the case of a single shader
+ * that used the max # of textures + images + SSBOs.  And once we are
+ * offsetting images by num_ssbos (or visa versa) to map them into
+ * the same hardware state, the hardware state has become coupled to
+ * the shader state, so at this point we might as well just use a
+ * mapping table to remap things from image/SSBO idx to hw idx.
+ *
+ * To make things less (more?) confusing, for the hw "SSBO" state
+ * (since it is really both SSBO and Image) I'll use the name "IBO"
+ */
+struct ir3_ibo_mapping {
+#define IBO_INVALID 0xff
+	/* Maps logical SSBO state to hw state: */
+	uint8_t ssbo_to_ibo[IR3_MAX_SHADER_BUFFERS];
+	uint8_t ssbo_to_tex[IR3_MAX_SHADER_BUFFERS];
+
+	/* Maps logical Image state to hw state: */
+	uint8_t image_to_ibo[IR3_MAX_SHADER_IMAGES];
+	uint8_t image_to_tex[IR3_MAX_SHADER_IMAGES];
+
+	/* Maps hw state back to logical SSBO or Image state:
+	 *
+	 * note IBO_SSBO ORd into values to indicate that the
+	 * hw slot is used for SSBO state vs Image state.
+	 */
+#define IBO_SSBO    0x80
+	uint8_t ibo_to_image[32];
+	uint8_t tex_to_image[32];
+
+	uint8_t num_ibo;
+	uint8_t num_tex;    /* including real textures */
+	uint8_t tex_base;   /* the number of real textures, ie. image/ssbo start here */
+};
+
 struct ir3_shader_variant {
 	struct fd_bo *bo;
 
@@ -375,6 +420,9 @@ struct ir3_shader_variant {
 	 */
 	unsigned varying_in;
 
+	/* Remapping table to map Image and SSBO to hw state: */
+	struct ir3_ibo_mapping image_mapping;
+
 	/* number of samplers/textures (which are currently 1:1): */
 	int num_samp;
 
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_emit.c b/src/gallium/drivers/freedreno/a5xx/fd5_emit.c
index ad942fd7f5a..515d120bb1c 100644
--- a/src/gallium/drivers/freedreno/a5xx/fd5_emit.c
+++ b/src/gallium/drivers/freedreno/a5xx/fd5_emit.c
@@ -396,37 +396,24 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
 
 static void
 emit_ssbos(struct fd_context *ctx, struct fd_ringbuffer *ring,
-		enum a4xx_state_block sb, struct fd_shaderbuf_stateobj *so)
+		enum a4xx_state_block sb, struct fd_shaderbuf_stateobj *so,
+		const struct ir3_shader_variant *v)
 {
 	unsigned count = util_last_bit(so->enabled_mask);
+	const struct ir3_ibo_mapping *m = &v->image_mapping;
 
-	if (count == 0)
-		return;
-
-	OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (4 * count));
-	OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
-			CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
-			CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
-			CP_LOAD_STATE4_0_NUM_UNIT(count));
-	OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(0) |
-			CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
-	OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
 	for (unsigned i = 0; i < count; i++) {
-		OUT_RING(ring, 0x00000000);
-		OUT_RING(ring, 0x00000000);
-		OUT_RING(ring, 0x00000000);
-		OUT_RING(ring, 0x00000000);
-	}
+		unsigned slot = m->ssbo_to_ibo[i];
+
+		OUT_PKT7(ring, CP_LOAD_STATE4, 5);
+		OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(slot) |
+				CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
+				CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
+				CP_LOAD_STATE4_0_NUM_UNIT(1));
+		OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(1) |
+				CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
+		OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
 
-	OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (2 * count));
-	OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
-			CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
-			CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
-			CP_LOAD_STATE4_0_NUM_UNIT(count));
-	OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(1) |
-			CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
-	OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
-	for (unsigned i = 0; i < count; i++) {
 		struct pipe_shader_buffer *buf = &so->sb[i];
 		unsigned sz = buf->buffer_size;
 
@@ -435,18 +422,16 @@ emit_ssbos(struct fd_context *ctx, struct fd_ringbuffer *ring,
 
 		OUT_RING(ring, A5XX_SSBO_1_0_WIDTH(sz));
 		OUT_RING(ring, A5XX_SSBO_1_1_HEIGHT(sz >> 16));
-	}
 
-	OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (2 * count));
-	OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) |
-			CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
-			CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
-			CP_LOAD_STATE4_0_NUM_UNIT(count));
-	OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(2) |
-			CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
-	OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
-	for (unsigned i = 0; i < count; i++) {
-		struct pipe_shader_buffer *buf = &so->sb[i];
+		OUT_PKT7(ring, CP_LOAD_STATE4, 5);
+		OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(slot) |
+				CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) |
+				CP_LOAD_STATE4_0_STATE_BLOCK(sb) |
+				CP_LOAD_STATE4_0_NUM_UNIT(1));
+		OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(2) |
+				CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
+		OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
+
 		if (buf->buffer) {
 			struct fd_resource *rsc = fd_resource(buf->buffer);
 			OUT_RELOCW(ring, rsc->bo, buf->buffer_offset, 0, 0);
@@ -821,10 +806,10 @@ fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 		emit_border_color(ctx, ring);
 
 	if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_SSBO)
-		emit_ssbos(ctx, ring, SB4_SSBO, &ctx->shaderbuf[PIPE_SHADER_FRAGMENT]);
+		emit_ssbos(ctx, ring, SB4_SSBO, &ctx->shaderbuf[PIPE_SHADER_FRAGMENT], fp);
 
 	if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_IMAGE)
-		fd5_emit_images(ctx, ring, PIPE_SHADER_FRAGMENT);
+		fd5_emit_images(ctx, ring, PIPE_SHADER_FRAGMENT, fp);
 }
 
 void
@@ -862,10 +847,10 @@ fd5_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 			~0 : ctx->tex[PIPE_SHADER_COMPUTE].num_textures);
 
 	if (dirty & FD_DIRTY_SHADER_SSBO)
-		emit_ssbos(ctx, ring, SB4_CS_SSBO, &ctx->shaderbuf[PIPE_SHADER_COMPUTE]);
+		emit_ssbos(ctx, ring, SB4_CS_SSBO, &ctx->shaderbuf[PIPE_SHADER_COMPUTE], cp);
 
 	if (dirty & FD_DIRTY_SHADER_IMAGE)
-		fd5_emit_images(ctx, ring, PIPE_SHADER_COMPUTE);
+		fd5_emit_images(ctx, ring, PIPE_SHADER_COMPUTE, cp);
 }
 
 /* emit setup at begin of new cmdstream buffer (don't rely on previous
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_image.c b/src/gallium/drivers/freedreno/a5xx/fd5_image.c
index 028497f3a10..c53b2e92245 100644
--- a/src/gallium/drivers/freedreno/a5xx/fd5_image.c
+++ b/src/gallium/drivers/freedreno/a5xx/fd5_image.c
@@ -189,46 +189,25 @@ static void emit_image_ssbo(struct fd_ringbuffer *ring, unsigned slot,
 	}
 }
 
-/* Note that to avoid conflicts with textures and non-image "SSBO"s, images
- * are placedd, in reverse order, at the end of the state block, so for
- * example the sampler state:
- *
- *   0:   first texture
- *   1:   second texture
- *   ....
- *   N-1: second image
- *   N:   first image
- */
-static unsigned
-get_image_slot(unsigned index)
-{
-	/* TODO figure out real limit per generation, and don't hardcode.
-	 * This needs to match get_image_slot() in ir3_compiler_nir.
-	 * Possibly should be factored out into shared helper?
-	 */
-	const unsigned max_samplers = 16;
-	return max_samplers - index - 1;
-}
-
 /* Emit required "SSBO" and sampler state.  The sampler state is used by the
  * hw for imageLoad(), and "SSBO" state for imageStore().  Returns max sampler
  * used.
  */
 void
 fd5_emit_images(struct fd_context *ctx, struct fd_ringbuffer *ring,
-		enum pipe_shader_type shader)
+		enum pipe_shader_type shader, const struct ir3_shader_variant *v)
 {
 	struct fd_shaderimg_stateobj *so = &ctx->shaderimg[shader];
 	unsigned enabled_mask = so->enabled_mask;
+	const struct ir3_ibo_mapping *m = &v->image_mapping;
 
 	while (enabled_mask) {
 		unsigned index = u_bit_scan(&enabled_mask);
-		unsigned slot = get_image_slot(index);
 		struct fd5_image img;
 
 		translate_image(&img, &so->si[index]);
 
-		emit_image_tex(ring, slot, &img, shader);
-		emit_image_ssbo(ring, slot, &img, shader);
+		emit_image_tex(ring, m->image_to_tex[index] + m->tex_base, &img, shader);
+		emit_image_ssbo(ring, m->image_to_ibo[index], &img, shader);
 	}
 }
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_image.h b/src/gallium/drivers/freedreno/a5xx/fd5_image.h
index 98c7faf6154..f7567826ba6 100644
--- a/src/gallium/drivers/freedreno/a5xx/fd5_image.h
+++ b/src/gallium/drivers/freedreno/a5xx/fd5_image.h
@@ -29,7 +29,8 @@
 
 #include "freedreno_context.h"
 
+struct ir3_shader_variant;
 void fd5_emit_images(struct fd_context *ctx, struct fd_ringbuffer *ring,
-		enum pipe_shader_type shader);
+		enum pipe_shader_type shader, const struct ir3_shader_variant *v);
 
 #endif /* FD5_IMAGE_H_ */
-- 
cgit v1.2.3