aboutsummaryrefslogtreecommitdiffstats
path: root/src/freedreno/ir3
diff options
context:
space:
mode:
authorRob Clark <[email protected]>2019-02-05 15:33:57 -0500
committerRob Clark <[email protected]>2019-02-16 16:27:59 -0500
commit2e0ea3f09c79c657116cbc9cbc68377e364dfe28 (patch)
tree5d6f4a3793b79332d6de26140fd1b3d21703b131 /src/freedreno/ir3
parent75f3a5245e54d1be5b75db66ba50d7c2f5dee4fc (diff)
freedreno/ir3: add image/ssbo <-> ibo/tex mapping
Images and SSBOs don't map directly to the hw. They end up being part texture and part something else. Starting with a6xx, the hack used for a5xx to smash the image tex state into hw texture state starting from MAX counting down won't work, because we start using tex state also for SSBO read. Signed-off-by: Rob Clark <[email protected]>
Diffstat (limited to 'src/freedreno/ir3')
-rw-r--r--src/freedreno/ir3/ir3_a4xx.c22
-rw-r--r--src/freedreno/ir3/ir3_compiler_nir.c6
-rw-r--r--src/freedreno/ir3/ir3_context.c3
-rw-r--r--src/freedreno/ir3/ir3_image.c76
-rw-r--r--src/freedreno/ir3/ir3_image.h9
-rw-r--r--src/freedreno/ir3/ir3_shader.h50
6 files changed, 141 insertions, 25 deletions
diff --git a/src/freedreno/ir3/ir3_a4xx.c b/src/freedreno/ir3/ir3_a4xx.c
index dfaeb5b3961..1f86cd5533c 100644
--- a/src/freedreno/ir3/ir3_a4xx.c
+++ b/src/freedreno/ir3/ir3_a4xx.c
@@ -47,6 +47,8 @@ emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
const_offset = nir_src_as_const_value(intr->src[0]);
compile_assert(ctx, const_offset);
+ int ibo_idx = ir3_ssbo_to_ibo(&ctx->so->image_mapping, const_offset->u32[0]);
+
offset = ir3_get_src(ctx, &intr->src[1])[0];
/* src0 is uvec2(offset*4, 0), src1 is offset.. nir already *= 4: */
@@ -56,7 +58,7 @@ emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
}, 2);
src1 = ir3_SHR_B(b, offset, 0, create_immed(b, 2), 0);
- ldgb = ir3_LDGB(b, create_immed(b, const_offset->u32[0]), 0,
+ ldgb = ir3_LDGB(b, create_immed(b, ibo_idx), 0,
src0, 0, src1, 0);
ldgb->regs[0]->wrmask = MASK(intr->num_components);
ldgb->cat6.iim_val = intr->num_components;
@@ -86,6 +88,8 @@ emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
const_offset = nir_src_as_const_value(intr->src[1]);
compile_assert(ctx, const_offset);
+ int ibo_idx = ir3_ssbo_to_ibo(&ctx->so->image_mapping, const_offset->u32[0]);
+
offset = ir3_get_src(ctx, &intr->src[2])[0];
/* src0 is value, src1 is offset, src2 is uvec2(offset*4, 0)..
@@ -98,8 +102,7 @@ emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
create_immed(b, 0),
}, 2);
- stgb = ir3_STGB(b, create_immed(b, const_offset->u32[0]), 0,
- src0, 0, src1, 0, src2, 0);
+ stgb = ir3_STGB(b, create_immed(b, ibo_idx), 0, src0, 0, src1, 0, src2, 0);
stgb->cat6.iim_val = ncomp;
stgb->cat6.d = 4;
stgb->cat6.type = TYPE_U32;
@@ -137,7 +140,9 @@ emit_intrinsic_atomic_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
/* can this be non-const buffer_index? how do we handle that? */
const_offset = nir_src_as_const_value(intr->src[0]);
compile_assert(ctx, const_offset);
- ssbo = create_immed(b, const_offset->u32[0]);
+
+ int ibo_idx = ir3_ssbo_to_ibo(&ctx->so->image_mapping, const_offset->u32[0]);
+ ssbo = create_immed(b, ibo_idx);
offset = ir3_get_src(ctx, &intr->src[1])[0];
@@ -262,7 +267,8 @@ emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
struct ir3_instruction * const *value = ir3_get_src(ctx, &intr->src[3]);
struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]);
unsigned ncoords = ir3_get_image_coords(var, NULL);
- unsigned tex_idx = ir3_get_image_slot(ctx, nir_src_as_deref(intr->src[0]));
+ unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0]));
+ unsigned ibo_idx = ir3_image_to_ibo(&ctx->so->image_mapping, slot);
unsigned ncomp = ir3_get_num_components_for_glformat(var->data.image.format);
/* src0 is value
@@ -277,7 +283,7 @@ emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
* one over the other in various cases.
*/
- stib = ir3_STIB(b, create_immed(b, tex_idx), 0,
+ stib = ir3_STIB(b, create_immed(b, ibo_idx), 0,
ir3_create_collect(ctx, value, ncomp), 0,
ir3_create_collect(ctx, coords, ncoords), 0,
offset, 0);
@@ -300,8 +306,10 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
struct ir3_instruction *atomic, *image, *src0, *src1, *src2;
struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]);
unsigned ncoords = ir3_get_image_coords(var, NULL);
+ unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0]));
+ unsigned ibo_idx = ir3_image_to_ibo(&ctx->so->image_mapping, slot);
- image = create_immed(b, ir3_get_image_slot(ctx, nir_src_as_deref(intr->src[0])));
+ image = create_immed(b, ibo_idx);
/* src0 is value (or uvec2(value, compare))
* src1 is coords
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
index f4745ec83bd..fbc1b63c746 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -896,7 +896,8 @@ emit_intrinsic_load_image(struct ir3_context *ctx, nir_intrinsic_instr *intr,
struct ir3_instruction * const *src0 = ir3_get_src(ctx, &intr->src[1]);
struct ir3_instruction *coords[4];
unsigned flags, ncoords = ir3_get_image_coords(var, &flags);
- unsigned tex_idx = ir3_get_image_slot(ctx, nir_src_as_deref(intr->src[0]));
+ unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0]));
+ unsigned tex_idx = ir3_image_to_tex(&ctx->so->image_mapping, slot);
type_t type = ir3_get_image_type(var);
/* hmm, this seems a bit odd, but it is what blob does and (at least
@@ -928,7 +929,8 @@ emit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr,
{
struct ir3_block *b = ctx->block;
const nir_variable *var = nir_intrinsic_get_var(intr, 0);
- unsigned tex_idx = ir3_get_image_slot(ctx, nir_src_as_deref(intr->src[0]));
+ unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0]));
+ unsigned tex_idx = ir3_image_to_tex(&ctx->so->image_mapping, slot);
struct ir3_instruction *sam, *lod;
unsigned flags, ncoords = ir3_get_image_coords(var, &flags);
diff --git a/src/freedreno/ir3/ir3_context.c b/src/freedreno/ir3/ir3_context.c
index d121559833b..94264043886 100644
--- a/src/freedreno/ir3/ir3_context.c
+++ b/src/freedreno/ir3/ir3_context.c
@@ -28,6 +28,7 @@
#include "ir3_compiler.h"
#include "ir3_context.h"
+#include "ir3_image.h"
#include "ir3_shader.h"
#include "ir3_nir.h"
@@ -103,6 +104,8 @@ ir3_context_init(struct ir3_compiler *compiler,
so->num_uniforms = ctx->s->num_uniforms;
so->num_ubos = ctx->s->info.num_ubos;
+ ir3_ibo_mapping_init(&so->image_mapping, ctx->s->info.num_textures);
+
/* Layout of constant registers, each section aligned to vec4. Note
* that pointer size (ubo, etc) changes depending on generation.
*
diff --git a/src/freedreno/ir3/ir3_image.c b/src/freedreno/ir3/ir3_image.c
index bc564aac402..3ea2c0a3d3e 100644
--- a/src/freedreno/ir3/ir3_image.c
+++ b/src/freedreno/ir3/ir3_image.c
@@ -26,19 +26,69 @@
#include "ir3_image.h"
-/* Images get mapped into SSBO/image state (for store/atomic) and texture
- * state block (for load). To simplify things, invert the image id and
- * map it from end of state block, ie. image 0 becomes num-1, image 1
- * becomes num-2, etc. This potentially avoids needing to re-emit texture
- * state when switching shaders.
- *
- * TODO is max # of samplers and SSBOs the same. This shouldn't be hard-
- * coded. Also, since all the gl shader stages (ie. everything but CS)
- * share the same SSBO/image state block, this might require some more
- * logic if we supported images in anything other than FS..
+
+/*
+ * SSBO/Image to/from IBO/tex hw mapping table:
+ */
+
+void
+ir3_ibo_mapping_init(struct ir3_ibo_mapping *mapping, unsigned num_textures)
+{
+ memset(mapping, IBO_INVALID, sizeof(*mapping));
+ mapping->num_ibo = 0;
+ mapping->num_tex = 0;
+ mapping->tex_base = num_textures;
+}
+
+unsigned
+ir3_ssbo_to_ibo(struct ir3_ibo_mapping *mapping, unsigned ssbo)
+{
+ if (mapping->ssbo_to_ibo[ssbo] == IBO_INVALID) {
+ unsigned ibo = mapping->num_ibo++;
+ mapping->ssbo_to_ibo[ssbo] = ibo;
+ mapping->ibo_to_image[ibo] = IBO_SSBO | ssbo;
+ }
+ return mapping->ssbo_to_ibo[ssbo];
+}
+
+unsigned
+ir3_ssbo_to_tex(struct ir3_ibo_mapping *mapping, unsigned ssbo)
+{
+ if (mapping->ssbo_to_tex[ssbo] == IBO_INVALID) {
+ unsigned tex = mapping->num_tex++;
+ mapping->ssbo_to_tex[ssbo] = tex;
+ mapping->tex_to_image[tex] = IBO_SSBO | ssbo;
+ }
+ return mapping->ssbo_to_tex[ssbo] + mapping->tex_base;
+}
+
+unsigned
+ir3_image_to_ibo(struct ir3_ibo_mapping *mapping, unsigned image)
+{
+ if (mapping->image_to_ibo[image] == IBO_INVALID) {
+ unsigned ibo = mapping->num_ibo++;
+ mapping->image_to_ibo[image] = ibo;
+ mapping->ibo_to_image[ibo] = image;
+ }
+ return mapping->image_to_ibo[image];
+}
+
+unsigned
+ir3_image_to_tex(struct ir3_ibo_mapping *mapping, unsigned image)
+{
+ if (mapping->image_to_tex[image] == IBO_INVALID) {
+ unsigned tex = mapping->num_tex++;
+ mapping->image_to_tex[image] = tex;
+ mapping->tex_to_image[tex] = image;
+ }
+ return mapping->image_to_tex[image] + mapping->tex_base;
+}
+
+/* Helper to parse the deref for an image to get image slot. This should be
+ * mapped to tex or ibo idx using ir3_image_to_tex() or ir3_image_to_ibo().
*/
unsigned
-ir3_get_image_slot(struct ir3_context *ctx, nir_deref_instr *deref)
+ir3_get_image_slot(nir_deref_instr *deref)
{
unsigned int loc = 0;
unsigned inner_size = 1;
@@ -61,9 +111,7 @@ ir3_get_image_slot(struct ir3_context *ctx, nir_deref_instr *deref)
loc += deref->var->data.driver_location;
- /* TODO figure out real limit per generation, and don't hardcode: */
- const unsigned max_samplers = 16;
- return max_samplers - loc - 1;
+ return loc;
}
/* see tex_info() for equiv logic for texture instructions.. it would be
diff --git a/src/freedreno/ir3/ir3_image.h b/src/freedreno/ir3/ir3_image.h
index 16f3d6e8209..c89e581eef8 100644
--- a/src/freedreno/ir3/ir3_image.h
+++ b/src/freedreno/ir3/ir3_image.h
@@ -29,7 +29,14 @@
#include "ir3_context.h"
-unsigned ir3_get_image_slot(struct ir3_context *ctx, nir_deref_instr *deref);
+
+void ir3_ibo_mapping_init(struct ir3_ibo_mapping *mapping, unsigned num_textures);
+unsigned ir3_ssbo_to_ibo(struct ir3_ibo_mapping *mapping, unsigned ssbo);
+unsigned ir3_ssbo_to_tex(struct ir3_ibo_mapping *mapping, unsigned ssbo);
+unsigned ir3_image_to_ibo(struct ir3_ibo_mapping *mapping, unsigned image);
+unsigned ir3_image_to_tex(struct ir3_ibo_mapping *mapping, unsigned image);
+
+unsigned ir3_get_image_slot(nir_deref_instr *deref);
unsigned ir3_get_image_coords(const nir_variable *var, unsigned *flagsp);
type_t ir3_get_image_type(const nir_variable *var);
unsigned ir3_get_num_components_for_glformat(GLuint format);
diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h
index 418c77ae8b0..5fffaf9fb85 100644
--- a/src/freedreno/ir3/ir3_shader.h
+++ b/src/freedreno/ir3/ir3_shader.h
@@ -274,9 +274,54 @@ ir3_normalize_key(struct ir3_shader_key *key, gl_shader_stage type)
/* TODO */
break;
}
-
}
+/**
+ * On a4xx+a5xx, Images share state with textures and SSBOs:
+ *
+ * + Uses texture (cat5) state/instruction (isam) to read
+ * + Uses SSBO state and instructions (cat6) to write and for atomics
+ *
+ * Starting with a6xx, Images and SSBOs are basically the same thing,
+ * with texture state and isam also used for SSBO reads.
+ *
+ * On top of that, gallium makes the SSBO (shader_buffers) state semi
+ * sparse, with the first half of the state space used for atomic
+ * counters lowered to atomic buffers. We could ignore this, but I
+ * don't think we could *really* handle the case of a single shader
+ * that used the max # of textures + images + SSBOs. And once we are
+ * offsetting images by num_ssbos (or visa versa) to map them into
+ * the same hardware state, the hardware state has become coupled to
+ * the shader state, so at this point we might as well just use a
+ * mapping table to remap things from image/SSBO idx to hw idx.
+ *
+ * To make things less (more?) confusing, for the hw "SSBO" state
+ * (since it is really both SSBO and Image) I'll use the name "IBO"
+ */
+struct ir3_ibo_mapping {
+#define IBO_INVALID 0xff
+ /* Maps logical SSBO state to hw state: */
+ uint8_t ssbo_to_ibo[IR3_MAX_SHADER_BUFFERS];
+ uint8_t ssbo_to_tex[IR3_MAX_SHADER_BUFFERS];
+
+ /* Maps logical Image state to hw state: */
+ uint8_t image_to_ibo[IR3_MAX_SHADER_IMAGES];
+ uint8_t image_to_tex[IR3_MAX_SHADER_IMAGES];
+
+ /* Maps hw state back to logical SSBO or Image state:
+ *
+ * note IBO_SSBO ORd into values to indicate that the
+ * hw slot is used for SSBO state vs Image state.
+ */
+#define IBO_SSBO 0x80
+ uint8_t ibo_to_image[32];
+ uint8_t tex_to_image[32];
+
+ uint8_t num_ibo;
+ uint8_t num_tex; /* including real textures */
+ uint8_t tex_base; /* the number of real textures, ie. image/ssbo start here */
+};
+
struct ir3_shader_variant {
struct fd_bo *bo;
@@ -375,6 +420,9 @@ struct ir3_shader_variant {
*/
unsigned varying_in;
+ /* Remapping table to map Image and SSBO to hw state: */
+ struct ir3_ibo_mapping image_mapping;
+
/* number of samplers/textures (which are currently 1:1): */
int num_samp;