aboutsummaryrefslogtreecommitdiffstats
path: root/src/freedreno/ir3
diff options
context:
space:
mode:
Diffstat (limited to 'src/freedreno/ir3')
-rw-r--r--src/freedreno/ir3/ir3_a4xx.c22
-rw-r--r--src/freedreno/ir3/ir3_compiler_nir.c6
-rw-r--r--src/freedreno/ir3/ir3_context.c3
-rw-r--r--src/freedreno/ir3/ir3_image.c76
-rw-r--r--src/freedreno/ir3/ir3_image.h9
-rw-r--r--src/freedreno/ir3/ir3_shader.h50
6 files changed, 141 insertions, 25 deletions
diff --git a/src/freedreno/ir3/ir3_a4xx.c b/src/freedreno/ir3/ir3_a4xx.c
index dfaeb5b3961..1f86cd5533c 100644
--- a/src/freedreno/ir3/ir3_a4xx.c
+++ b/src/freedreno/ir3/ir3_a4xx.c
@@ -47,6 +47,8 @@ emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
const_offset = nir_src_as_const_value(intr->src[0]);
compile_assert(ctx, const_offset);
+ int ibo_idx = ir3_ssbo_to_ibo(&ctx->so->image_mapping, const_offset->u32[0]);
+
offset = ir3_get_src(ctx, &intr->src[1])[0];
/* src0 is uvec2(offset*4, 0), src1 is offset.. nir already *= 4: */
@@ -56,7 +58,7 @@ emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
}, 2);
src1 = ir3_SHR_B(b, offset, 0, create_immed(b, 2), 0);
- ldgb = ir3_LDGB(b, create_immed(b, const_offset->u32[0]), 0,
+ ldgb = ir3_LDGB(b, create_immed(b, ibo_idx), 0,
src0, 0, src1, 0);
ldgb->regs[0]->wrmask = MASK(intr->num_components);
ldgb->cat6.iim_val = intr->num_components;
@@ -86,6 +88,8 @@ emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
const_offset = nir_src_as_const_value(intr->src[1]);
compile_assert(ctx, const_offset);
+ int ibo_idx = ir3_ssbo_to_ibo(&ctx->so->image_mapping, const_offset->u32[0]);
+
offset = ir3_get_src(ctx, &intr->src[2])[0];
/* src0 is value, src1 is offset, src2 is uvec2(offset*4, 0)..
@@ -98,8 +102,7 @@ emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
create_immed(b, 0),
}, 2);
- stgb = ir3_STGB(b, create_immed(b, const_offset->u32[0]), 0,
- src0, 0, src1, 0, src2, 0);
+ stgb = ir3_STGB(b, create_immed(b, ibo_idx), 0, src0, 0, src1, 0, src2, 0);
stgb->cat6.iim_val = ncomp;
stgb->cat6.d = 4;
stgb->cat6.type = TYPE_U32;
@@ -137,7 +140,9 @@ emit_intrinsic_atomic_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
/* can this be non-const buffer_index? how do we handle that? */
const_offset = nir_src_as_const_value(intr->src[0]);
compile_assert(ctx, const_offset);
- ssbo = create_immed(b, const_offset->u32[0]);
+
+ int ibo_idx = ir3_ssbo_to_ibo(&ctx->so->image_mapping, const_offset->u32[0]);
+ ssbo = create_immed(b, ibo_idx);
offset = ir3_get_src(ctx, &intr->src[1])[0];
@@ -262,7 +267,8 @@ emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
struct ir3_instruction * const *value = ir3_get_src(ctx, &intr->src[3]);
struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]);
unsigned ncoords = ir3_get_image_coords(var, NULL);
- unsigned tex_idx = ir3_get_image_slot(ctx, nir_src_as_deref(intr->src[0]));
+ unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0]));
+ unsigned ibo_idx = ir3_image_to_ibo(&ctx->so->image_mapping, slot);
unsigned ncomp = ir3_get_num_components_for_glformat(var->data.image.format);
/* src0 is value
@@ -277,7 +283,7 @@ emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
* one over the other in various cases.
*/
- stib = ir3_STIB(b, create_immed(b, tex_idx), 0,
+ stib = ir3_STIB(b, create_immed(b, ibo_idx), 0,
ir3_create_collect(ctx, value, ncomp), 0,
ir3_create_collect(ctx, coords, ncoords), 0,
offset, 0);
@@ -300,8 +306,10 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
struct ir3_instruction *atomic, *image, *src0, *src1, *src2;
struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]);
unsigned ncoords = ir3_get_image_coords(var, NULL);
+ unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0]));
+ unsigned ibo_idx = ir3_image_to_ibo(&ctx->so->image_mapping, slot);
- image = create_immed(b, ir3_get_image_slot(ctx, nir_src_as_deref(intr->src[0])));
+ image = create_immed(b, ibo_idx);
/* src0 is value (or uvec2(value, compare))
* src1 is coords
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
index f4745ec83bd..fbc1b63c746 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -896,7 +896,8 @@ emit_intrinsic_load_image(struct ir3_context *ctx, nir_intrinsic_instr *intr,
struct ir3_instruction * const *src0 = ir3_get_src(ctx, &intr->src[1]);
struct ir3_instruction *coords[4];
unsigned flags, ncoords = ir3_get_image_coords(var, &flags);
- unsigned tex_idx = ir3_get_image_slot(ctx, nir_src_as_deref(intr->src[0]));
+ unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0]));
+ unsigned tex_idx = ir3_image_to_tex(&ctx->so->image_mapping, slot);
type_t type = ir3_get_image_type(var);
/* hmm, this seems a bit odd, but it is what blob does and (at least
@@ -928,7 +929,8 @@ emit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr,
{
struct ir3_block *b = ctx->block;
const nir_variable *var = nir_intrinsic_get_var(intr, 0);
- unsigned tex_idx = ir3_get_image_slot(ctx, nir_src_as_deref(intr->src[0]));
+ unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0]));
+ unsigned tex_idx = ir3_image_to_tex(&ctx->so->image_mapping, slot);
struct ir3_instruction *sam, *lod;
unsigned flags, ncoords = ir3_get_image_coords(var, &flags);
diff --git a/src/freedreno/ir3/ir3_context.c b/src/freedreno/ir3/ir3_context.c
index d121559833b..94264043886 100644
--- a/src/freedreno/ir3/ir3_context.c
+++ b/src/freedreno/ir3/ir3_context.c
@@ -28,6 +28,7 @@
#include "ir3_compiler.h"
#include "ir3_context.h"
+#include "ir3_image.h"
#include "ir3_shader.h"
#include "ir3_nir.h"
@@ -103,6 +104,8 @@ ir3_context_init(struct ir3_compiler *compiler,
so->num_uniforms = ctx->s->num_uniforms;
so->num_ubos = ctx->s->info.num_ubos;
+ ir3_ibo_mapping_init(&so->image_mapping, ctx->s->info.num_textures);
+
/* Layout of constant registers, each section aligned to vec4. Note
* that pointer size (ubo, etc) changes depending on generation.
*
diff --git a/src/freedreno/ir3/ir3_image.c b/src/freedreno/ir3/ir3_image.c
index bc564aac402..3ea2c0a3d3e 100644
--- a/src/freedreno/ir3/ir3_image.c
+++ b/src/freedreno/ir3/ir3_image.c
@@ -26,19 +26,69 @@
#include "ir3_image.h"
-/* Images get mapped into SSBO/image state (for store/atomic) and texture
- * state block (for load). To simplify things, invert the image id and
- * map it from end of state block, ie. image 0 becomes num-1, image 1
- * becomes num-2, etc. This potentially avoids needing to re-emit texture
- * state when switching shaders.
- *
- * TODO is max # of samplers and SSBOs the same. This shouldn't be hard-
- * coded. Also, since all the gl shader stages (ie. everything but CS)
- * share the same SSBO/image state block, this might require some more
- * logic if we supported images in anything other than FS..
+
+/*
+ * SSBO/Image to/from IBO/tex hw mapping table:
+ */
+
+void
+ir3_ibo_mapping_init(struct ir3_ibo_mapping *mapping, unsigned num_textures)
+{
+ memset(mapping, IBO_INVALID, sizeof(*mapping));
+ mapping->num_ibo = 0;
+ mapping->num_tex = 0;
+ mapping->tex_base = num_textures;
+}
+
+unsigned
+ir3_ssbo_to_ibo(struct ir3_ibo_mapping *mapping, unsigned ssbo)
+{
+ if (mapping->ssbo_to_ibo[ssbo] == IBO_INVALID) {
+ unsigned ibo = mapping->num_ibo++;
+ mapping->ssbo_to_ibo[ssbo] = ibo;
+ mapping->ibo_to_image[ibo] = IBO_SSBO | ssbo;
+ }
+ return mapping->ssbo_to_ibo[ssbo];
+}
+
+unsigned
+ir3_ssbo_to_tex(struct ir3_ibo_mapping *mapping, unsigned ssbo)
+{
+ if (mapping->ssbo_to_tex[ssbo] == IBO_INVALID) {
+ unsigned tex = mapping->num_tex++;
+ mapping->ssbo_to_tex[ssbo] = tex;
+ mapping->tex_to_image[tex] = IBO_SSBO | ssbo;
+ }
+ return mapping->ssbo_to_tex[ssbo] + mapping->tex_base;
+}
+
+unsigned
+ir3_image_to_ibo(struct ir3_ibo_mapping *mapping, unsigned image)
+{
+ if (mapping->image_to_ibo[image] == IBO_INVALID) {
+ unsigned ibo = mapping->num_ibo++;
+ mapping->image_to_ibo[image] = ibo;
+ mapping->ibo_to_image[ibo] = image;
+ }
+ return mapping->image_to_ibo[image];
+}
+
+unsigned
+ir3_image_to_tex(struct ir3_ibo_mapping *mapping, unsigned image)
+{
+ if (mapping->image_to_tex[image] == IBO_INVALID) {
+ unsigned tex = mapping->num_tex++;
+ mapping->image_to_tex[image] = tex;
+ mapping->tex_to_image[tex] = image;
+ }
+ return mapping->image_to_tex[image] + mapping->tex_base;
+}
+
+/* Helper to parse the deref for an image to get image slot. This should be
+ * mapped to tex or ibo idx using ir3_image_to_tex() or ir3_image_to_ibo().
*/
unsigned
-ir3_get_image_slot(struct ir3_context *ctx, nir_deref_instr *deref)
+ir3_get_image_slot(nir_deref_instr *deref)
{
unsigned int loc = 0;
unsigned inner_size = 1;
@@ -61,9 +111,7 @@ ir3_get_image_slot(struct ir3_context *ctx, nir_deref_instr *deref)
loc += deref->var->data.driver_location;
- /* TODO figure out real limit per generation, and don't hardcode: */
- const unsigned max_samplers = 16;
- return max_samplers - loc - 1;
+ return loc;
}
/* see tex_info() for equiv logic for texture instructions.. it would be
diff --git a/src/freedreno/ir3/ir3_image.h b/src/freedreno/ir3/ir3_image.h
index 16f3d6e8209..c89e581eef8 100644
--- a/src/freedreno/ir3/ir3_image.h
+++ b/src/freedreno/ir3/ir3_image.h
@@ -29,7 +29,14 @@
#include "ir3_context.h"
-unsigned ir3_get_image_slot(struct ir3_context *ctx, nir_deref_instr *deref);
+
+void ir3_ibo_mapping_init(struct ir3_ibo_mapping *mapping, unsigned num_textures);
+unsigned ir3_ssbo_to_ibo(struct ir3_ibo_mapping *mapping, unsigned ssbo);
+unsigned ir3_ssbo_to_tex(struct ir3_ibo_mapping *mapping, unsigned ssbo);
+unsigned ir3_image_to_ibo(struct ir3_ibo_mapping *mapping, unsigned image);
+unsigned ir3_image_to_tex(struct ir3_ibo_mapping *mapping, unsigned image);
+
+unsigned ir3_get_image_slot(nir_deref_instr *deref);
unsigned ir3_get_image_coords(const nir_variable *var, unsigned *flagsp);
type_t ir3_get_image_type(const nir_variable *var);
unsigned ir3_get_num_components_for_glformat(GLuint format);
diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h
index 418c77ae8b0..5fffaf9fb85 100644
--- a/src/freedreno/ir3/ir3_shader.h
+++ b/src/freedreno/ir3/ir3_shader.h
@@ -274,9 +274,54 @@ ir3_normalize_key(struct ir3_shader_key *key, gl_shader_stage type)
/* TODO */
break;
}
-
}
+/**
+ * On a4xx+a5xx, Images share state with textures and SSBOs:
+ *
+ * + Uses texture (cat5) state/instruction (isam) to read
+ * + Uses SSBO state and instructions (cat6) to write and for atomics
+ *
+ * Starting with a6xx, Images and SSBOs are basically the same thing,
+ * with texture state and isam also used for SSBO reads.
+ *
+ * On top of that, gallium makes the SSBO (shader_buffers) state semi
+ * sparse, with the first half of the state space used for atomic
+ * counters lowered to atomic buffers. We could ignore this, but I
+ * don't think we could *really* handle the case of a single shader
+ * that used the max # of textures + images + SSBOs. And once we are
+ * offsetting images by num_ssbos (or visa versa) to map them into
+ * the same hardware state, the hardware state has become coupled to
+ * the shader state, so at this point we might as well just use a
+ * mapping table to remap things from image/SSBO idx to hw idx.
+ *
+ * To make things less (more?) confusing, for the hw "SSBO" state
+ * (since it is really both SSBO and Image) I'll use the name "IBO"
+ */
+struct ir3_ibo_mapping {
+#define IBO_INVALID 0xff
+ /* Maps logical SSBO state to hw state: */
+ uint8_t ssbo_to_ibo[IR3_MAX_SHADER_BUFFERS];
+ uint8_t ssbo_to_tex[IR3_MAX_SHADER_BUFFERS];
+
+ /* Maps logical Image state to hw state: */
+ uint8_t image_to_ibo[IR3_MAX_SHADER_IMAGES];
+ uint8_t image_to_tex[IR3_MAX_SHADER_IMAGES];
+
+ /* Maps hw state back to logical SSBO or Image state:
+ *
+ * note IBO_SSBO ORd into values to indicate that the
+ * hw slot is used for SSBO state vs Image state.
+ */
+#define IBO_SSBO 0x80
+ uint8_t ibo_to_image[32];
+ uint8_t tex_to_image[32];
+
+ uint8_t num_ibo;
+ uint8_t num_tex; /* including real textures */
+ uint8_t tex_base; /* the number of real textures, ie. image/ssbo start here */
+};
+
struct ir3_shader_variant {
struct fd_bo *bo;
@@ -375,6 +420,9 @@ struct ir3_shader_variant {
*/
unsigned varying_in;
+ /* Remapping table to map Image and SSBO to hw state: */
+ struct ir3_ibo_mapping image_mapping;
+
/* number of samplers/textures (which are currently 1:1): */
int num_samp;