diff options
author | Eduardo Lima Mitev <[email protected]> | 2018-12-17 21:41:24 +0100 |
---|---|---|
committer | Eduardo Lima Mitev <[email protected]> | 2018-12-18 21:15:20 +0100 |
commit | 5820e63418ce2449b19d1bcba8184388e76355d7 (patch) | |
tree | 3f046d7823f5399b57f24556ee98c6a75dfed9d8 /src/freedreno/ir3 | |
parent | 5dad1abfdcfc772c63e7c883ab34ae1ad39461f3 (diff) |
freedreno/ir3: Make imageStore use num components from image format
emit_intrinsic_store_image() is always using 4 components when
collecting registers for the value. When image has less than
4 components (e.g, r32f, rg32i, etc) this results in extra mov
instructions.
This patch uses the actual number of components from the image format.
For example, in a shader like:
layout (r32f, binding=0) writeonly uniform imageBuffer u_image;
...
void main(void) {
...
imageStore (u_image, some_offset, vec4(1.0));
...
}
instruction count is reduced in at least 3 instructions (note image
format is r32f, 1 component only).
This obviously reduces register pressure as well.
v2: - Added support for image formats from NV_image_format extension
(Ilia Mirkin).
- Return 4 components by default instead of asserting. (Rob Clark).
v3: Added more missing formats (Ilia Mirkin).
v4: Added a debug message for unknown image formats (Rob Clark).
Reviewed-by: Ilia Mirkin <[email protected]>
Reviewed-by: Rob Clark <[email protected]>
Diffstat (limited to 'src/freedreno/ir3')
-rw-r--r-- | src/freedreno/ir3/ir3_compiler_nir.c | 72 |
1 files changed, 70 insertions, 2 deletions
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 85f14f354d2..19aef3eb27e 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -1251,6 +1251,73 @@ emit_intrinsic_load_image(struct ir3_context *ctx, nir_intrinsic_instr *intr, ir3_split_dest(b, dst, sam, 0, 4); } +/* Returns the number of components for the different image formats + * supported by the GLES 3.1 spec, plus those added by the + * GL_NV_image_formats extension. + */ +static unsigned +get_num_components_for_glformat(GLuint format) +{ + switch (format) { + case GL_R32F: + case GL_R32I: + case GL_R32UI: + case GL_R16F: + case GL_R16I: + case GL_R16UI: + case GL_R16: + case GL_R16_SNORM: + case GL_R8I: + case GL_R8UI: + case GL_R8: + case GL_R8_SNORM: + return 1; + + case GL_RG32F: + case GL_RG32I: + case GL_RG32UI: + case GL_RG16F: + case GL_RG16I: + case GL_RG16UI: + case GL_RG16: + case GL_RG16_SNORM: + case GL_RG8I: + case GL_RG8UI: + case GL_RG8: + case GL_RG8_SNORM: + return 2; + + case GL_R11F_G11F_B10F: + return 3; + + case GL_RGBA32F: + case GL_RGBA32I: + case GL_RGBA32UI: + case GL_RGBA16F: + case GL_RGBA16I: + case GL_RGBA16UI: + case GL_RGBA16: + case GL_RGBA16_SNORM: + case GL_RGBA8I: + case GL_RGBA8UI: + case GL_RGBA8: + case GL_RGBA8_SNORM: + case GL_RGB10_A2UI: + case GL_RGB10_A2: + return 4; + + default: + /* Return 4 components also for all other formats we don't know + * about. This is always safe. Also, the format should have been + * validated already by the higher level API. Drop a debug message + * just in case. + */ + debug_printf("Unhandled GL format %u while emitting imageStore()\n", + format); + return 4; + } +} + /* src[] = { deref, coord, sample_index, value }. const_index[] = {} */ static void emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) @@ -1262,6 +1329,7 @@ emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]); unsigned ncoords = get_image_coords(var, NULL); unsigned tex_idx = get_image_slot(ctx, nir_src_as_deref(intr->src[0])); + unsigned ncomp = get_num_components_for_glformat(var->data.image.format); /* src0 is value * src1 is coords @@ -1276,10 +1344,10 @@ emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) */ stib = ir3_STIB(b, create_immed(b, tex_idx), 0, - ir3_create_collect(ctx, value, 4), 0, + ir3_create_collect(ctx, value, ncomp), 0, ir3_create_collect(ctx, coords, ncoords), 0, offset, 0); - stib->cat6.iim_val = 4; + stib->cat6.iim_val = ncomp; stib->cat6.d = ncoords; stib->cat6.type = get_image_type(var); stib->cat6.typed = true; |