diff options
Diffstat (limited to 'src/freedreno/ir3')
-rw-r--r-- | src/freedreno/ir3/ir3_a4xx.c | 22 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_compiler_nir.c | 6 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_context.c | 3 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_image.c | 76 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_image.h | 9 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_shader.h | 50 |
6 files changed, 141 insertions, 25 deletions
diff --git a/src/freedreno/ir3/ir3_a4xx.c b/src/freedreno/ir3/ir3_a4xx.c index dfaeb5b3961..1f86cd5533c 100644 --- a/src/freedreno/ir3/ir3_a4xx.c +++ b/src/freedreno/ir3/ir3_a4xx.c @@ -47,6 +47,8 @@ emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr, const_offset = nir_src_as_const_value(intr->src[0]); compile_assert(ctx, const_offset); + int ibo_idx = ir3_ssbo_to_ibo(&ctx->so->image_mapping, const_offset->u32[0]); + offset = ir3_get_src(ctx, &intr->src[1])[0]; /* src0 is uvec2(offset*4, 0), src1 is offset.. nir already *= 4: */ @@ -56,7 +58,7 @@ emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr, }, 2); src1 = ir3_SHR_B(b, offset, 0, create_immed(b, 2), 0); - ldgb = ir3_LDGB(b, create_immed(b, const_offset->u32[0]), 0, + ldgb = ir3_LDGB(b, create_immed(b, ibo_idx), 0, src0, 0, src1, 0); ldgb->regs[0]->wrmask = MASK(intr->num_components); ldgb->cat6.iim_val = intr->num_components; @@ -86,6 +88,8 @@ emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr) const_offset = nir_src_as_const_value(intr->src[1]); compile_assert(ctx, const_offset); + int ibo_idx = ir3_ssbo_to_ibo(&ctx->so->image_mapping, const_offset->u32[0]); + offset = ir3_get_src(ctx, &intr->src[2])[0]; /* src0 is value, src1 is offset, src2 is uvec2(offset*4, 0).. @@ -98,8 +102,7 @@ emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr) create_immed(b, 0), }, 2); - stgb = ir3_STGB(b, create_immed(b, const_offset->u32[0]), 0, - src0, 0, src1, 0, src2, 0); + stgb = ir3_STGB(b, create_immed(b, ibo_idx), 0, src0, 0, src1, 0, src2, 0); stgb->cat6.iim_val = ncomp; stgb->cat6.d = 4; stgb->cat6.type = TYPE_U32; @@ -137,7 +140,9 @@ emit_intrinsic_atomic_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr) /* can this be non-const buffer_index? how do we handle that? */ const_offset = nir_src_as_const_value(intr->src[0]); compile_assert(ctx, const_offset); - ssbo = create_immed(b, const_offset->u32[0]); + + int ibo_idx = ir3_ssbo_to_ibo(&ctx->so->image_mapping, const_offset->u32[0]); + ssbo = create_immed(b, ibo_idx); offset = ir3_get_src(ctx, &intr->src[1])[0]; @@ -262,7 +267,8 @@ emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) struct ir3_instruction * const *value = ir3_get_src(ctx, &intr->src[3]); struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]); unsigned ncoords = ir3_get_image_coords(var, NULL); - unsigned tex_idx = ir3_get_image_slot(ctx, nir_src_as_deref(intr->src[0])); + unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0])); + unsigned ibo_idx = ir3_image_to_ibo(&ctx->so->image_mapping, slot); unsigned ncomp = ir3_get_num_components_for_glformat(var->data.image.format); /* src0 is value @@ -277,7 +283,7 @@ emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) * one over the other in various cases. */ - stib = ir3_STIB(b, create_immed(b, tex_idx), 0, + stib = ir3_STIB(b, create_immed(b, ibo_idx), 0, ir3_create_collect(ctx, value, ncomp), 0, ir3_create_collect(ctx, coords, ncoords), 0, offset, 0); @@ -300,8 +306,10 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) struct ir3_instruction *atomic, *image, *src0, *src1, *src2; struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]); unsigned ncoords = ir3_get_image_coords(var, NULL); + unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0])); + unsigned ibo_idx = ir3_image_to_ibo(&ctx->so->image_mapping, slot); - image = create_immed(b, ir3_get_image_slot(ctx, nir_src_as_deref(intr->src[0]))); + image = create_immed(b, ibo_idx); /* src0 is value (or uvec2(value, compare)) * src1 is coords diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index f4745ec83bd..fbc1b63c746 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -896,7 +896,8 @@ emit_intrinsic_load_image(struct ir3_context *ctx, nir_intrinsic_instr *intr, struct ir3_instruction * const *src0 = ir3_get_src(ctx, &intr->src[1]); struct ir3_instruction *coords[4]; unsigned flags, ncoords = ir3_get_image_coords(var, &flags); - unsigned tex_idx = ir3_get_image_slot(ctx, nir_src_as_deref(intr->src[0])); + unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0])); + unsigned tex_idx = ir3_image_to_tex(&ctx->so->image_mapping, slot); type_t type = ir3_get_image_type(var); /* hmm, this seems a bit odd, but it is what blob does and (at least @@ -928,7 +929,8 @@ emit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr, { struct ir3_block *b = ctx->block; const nir_variable *var = nir_intrinsic_get_var(intr, 0); - unsigned tex_idx = ir3_get_image_slot(ctx, nir_src_as_deref(intr->src[0])); + unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0])); + unsigned tex_idx = ir3_image_to_tex(&ctx->so->image_mapping, slot); struct ir3_instruction *sam, *lod; unsigned flags, ncoords = ir3_get_image_coords(var, &flags); diff --git a/src/freedreno/ir3/ir3_context.c b/src/freedreno/ir3/ir3_context.c index d121559833b..94264043886 100644 --- a/src/freedreno/ir3/ir3_context.c +++ b/src/freedreno/ir3/ir3_context.c @@ -28,6 +28,7 @@ #include "ir3_compiler.h" #include "ir3_context.h" +#include "ir3_image.h" #include "ir3_shader.h" #include "ir3_nir.h" @@ -103,6 +104,8 @@ ir3_context_init(struct ir3_compiler *compiler, so->num_uniforms = ctx->s->num_uniforms; so->num_ubos = ctx->s->info.num_ubos; + ir3_ibo_mapping_init(&so->image_mapping, ctx->s->info.num_textures); + /* Layout of constant registers, each section aligned to vec4. Note * that pointer size (ubo, etc) changes depending on generation. * diff --git a/src/freedreno/ir3/ir3_image.c b/src/freedreno/ir3/ir3_image.c index bc564aac402..3ea2c0a3d3e 100644 --- a/src/freedreno/ir3/ir3_image.c +++ b/src/freedreno/ir3/ir3_image.c @@ -26,19 +26,69 @@ #include "ir3_image.h" -/* Images get mapped into SSBO/image state (for store/atomic) and texture - * state block (for load). To simplify things, invert the image id and - * map it from end of state block, ie. image 0 becomes num-1, image 1 - * becomes num-2, etc. This potentially avoids needing to re-emit texture - * state when switching shaders. - * - * TODO is max # of samplers and SSBOs the same. This shouldn't be hard- - * coded. Also, since all the gl shader stages (ie. everything but CS) - * share the same SSBO/image state block, this might require some more - * logic if we supported images in anything other than FS.. + +/* + * SSBO/Image to/from IBO/tex hw mapping table: + */ + +void +ir3_ibo_mapping_init(struct ir3_ibo_mapping *mapping, unsigned num_textures) +{ + memset(mapping, IBO_INVALID, sizeof(*mapping)); + mapping->num_ibo = 0; + mapping->num_tex = 0; + mapping->tex_base = num_textures; +} + +unsigned +ir3_ssbo_to_ibo(struct ir3_ibo_mapping *mapping, unsigned ssbo) +{ + if (mapping->ssbo_to_ibo[ssbo] == IBO_INVALID) { + unsigned ibo = mapping->num_ibo++; + mapping->ssbo_to_ibo[ssbo] = ibo; + mapping->ibo_to_image[ibo] = IBO_SSBO | ssbo; + } + return mapping->ssbo_to_ibo[ssbo]; +} + +unsigned +ir3_ssbo_to_tex(struct ir3_ibo_mapping *mapping, unsigned ssbo) +{ + if (mapping->ssbo_to_tex[ssbo] == IBO_INVALID) { + unsigned tex = mapping->num_tex++; + mapping->ssbo_to_tex[ssbo] = tex; + mapping->tex_to_image[tex] = IBO_SSBO | ssbo; + } + return mapping->ssbo_to_tex[ssbo] + mapping->tex_base; +} + +unsigned +ir3_image_to_ibo(struct ir3_ibo_mapping *mapping, unsigned image) +{ + if (mapping->image_to_ibo[image] == IBO_INVALID) { + unsigned ibo = mapping->num_ibo++; + mapping->image_to_ibo[image] = ibo; + mapping->ibo_to_image[ibo] = image; + } + return mapping->image_to_ibo[image]; +} + +unsigned +ir3_image_to_tex(struct ir3_ibo_mapping *mapping, unsigned image) +{ + if (mapping->image_to_tex[image] == IBO_INVALID) { + unsigned tex = mapping->num_tex++; + mapping->image_to_tex[image] = tex; + mapping->tex_to_image[tex] = image; + } + return mapping->image_to_tex[image] + mapping->tex_base; +} + +/* Helper to parse the deref for an image to get image slot. This should be + * mapped to tex or ibo idx using ir3_image_to_tex() or ir3_image_to_ibo(). */ unsigned -ir3_get_image_slot(struct ir3_context *ctx, nir_deref_instr *deref) +ir3_get_image_slot(nir_deref_instr *deref) { unsigned int loc = 0; unsigned inner_size = 1; @@ -61,9 +111,7 @@ ir3_get_image_slot(struct ir3_context *ctx, nir_deref_instr *deref) loc += deref->var->data.driver_location; - /* TODO figure out real limit per generation, and don't hardcode: */ - const unsigned max_samplers = 16; - return max_samplers - loc - 1; + return loc; } /* see tex_info() for equiv logic for texture instructions.. it would be diff --git a/src/freedreno/ir3/ir3_image.h b/src/freedreno/ir3/ir3_image.h index 16f3d6e8209..c89e581eef8 100644 --- a/src/freedreno/ir3/ir3_image.h +++ b/src/freedreno/ir3/ir3_image.h @@ -29,7 +29,14 @@ #include "ir3_context.h" -unsigned ir3_get_image_slot(struct ir3_context *ctx, nir_deref_instr *deref); + +void ir3_ibo_mapping_init(struct ir3_ibo_mapping *mapping, unsigned num_textures); +unsigned ir3_ssbo_to_ibo(struct ir3_ibo_mapping *mapping, unsigned ssbo); +unsigned ir3_ssbo_to_tex(struct ir3_ibo_mapping *mapping, unsigned ssbo); +unsigned ir3_image_to_ibo(struct ir3_ibo_mapping *mapping, unsigned image); +unsigned ir3_image_to_tex(struct ir3_ibo_mapping *mapping, unsigned image); + +unsigned ir3_get_image_slot(nir_deref_instr *deref); unsigned ir3_get_image_coords(const nir_variable *var, unsigned *flagsp); type_t ir3_get_image_type(const nir_variable *var); unsigned ir3_get_num_components_for_glformat(GLuint format); diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index 418c77ae8b0..5fffaf9fb85 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -274,9 +274,54 @@ ir3_normalize_key(struct ir3_shader_key *key, gl_shader_stage type) /* TODO */ break; } - } +/** + * On a4xx+a5xx, Images share state with textures and SSBOs: + * + * + Uses texture (cat5) state/instruction (isam) to read + * + Uses SSBO state and instructions (cat6) to write and for atomics + * + * Starting with a6xx, Images and SSBOs are basically the same thing, + * with texture state and isam also used for SSBO reads. + * + * On top of that, gallium makes the SSBO (shader_buffers) state semi + * sparse, with the first half of the state space used for atomic + * counters lowered to atomic buffers. We could ignore this, but I + * don't think we could *really* handle the case of a single shader + * that used the max # of textures + images + SSBOs. And once we are + * offsetting images by num_ssbos (or visa versa) to map them into + * the same hardware state, the hardware state has become coupled to + * the shader state, so at this point we might as well just use a + * mapping table to remap things from image/SSBO idx to hw idx. + * + * To make things less (more?) confusing, for the hw "SSBO" state + * (since it is really both SSBO and Image) I'll use the name "IBO" + */ +struct ir3_ibo_mapping { +#define IBO_INVALID 0xff + /* Maps logical SSBO state to hw state: */ + uint8_t ssbo_to_ibo[IR3_MAX_SHADER_BUFFERS]; + uint8_t ssbo_to_tex[IR3_MAX_SHADER_BUFFERS]; + + /* Maps logical Image state to hw state: */ + uint8_t image_to_ibo[IR3_MAX_SHADER_IMAGES]; + uint8_t image_to_tex[IR3_MAX_SHADER_IMAGES]; + + /* Maps hw state back to logical SSBO or Image state: + * + * note IBO_SSBO ORd into values to indicate that the + * hw slot is used for SSBO state vs Image state. + */ +#define IBO_SSBO 0x80 + uint8_t ibo_to_image[32]; + uint8_t tex_to_image[32]; + + uint8_t num_ibo; + uint8_t num_tex; /* including real textures */ + uint8_t tex_base; /* the number of real textures, ie. image/ssbo start here */ +}; + struct ir3_shader_variant { struct fd_bo *bo; @@ -375,6 +420,9 @@ struct ir3_shader_variant { */ unsigned varying_in; + /* Remapping table to map Image and SSBO to hw state: */ + struct ir3_ibo_mapping image_mapping; + /* number of samplers/textures (which are currently 1:1): */ int num_samp; |