diff options
author | Alyssa Rosenzweig <[email protected]> | 2019-11-06 21:48:33 -0500 |
---|---|---|
committer | Tomeu Vizoso <[email protected]> | 2019-11-11 15:23:44 +0000 |
commit | 5b31182665f0dfc89e9ae79cc73342d8b4d7536d (patch) | |
tree | 120662d72acd8484cdf76e64c6fdf911ee92124a /src | |
parent | 3295edaadf7515441980c5095acf056d0ae2d8dc (diff) |
panfrost: Select format-specific blending intrinsics
If we have an accelerated path for a particular framebuffer format,
let's use it to save a bunch of instructions in a blend shader.
[Tomeu: Only use the faster intrinsic on >T760]
Signed-off-by: Alyssa Rosenzweig <[email protected]>
Signed-off-by: Tomeu Vizoso <[email protected]>
Reviewed-by: Tomeu Vizoso <[email protected]>
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/drivers/panfrost/nir/nir_lower_blend.h | 3 | ||||
-rw-r--r-- | src/gallium/drivers/panfrost/nir/nir_lower_framebuffer.c | 45 | ||||
-rw-r--r-- | src/gallium/drivers/panfrost/pan_blend_shaders.c | 2 |
3 files changed, 41 insertions, 9 deletions
diff --git a/src/gallium/drivers/panfrost/nir/nir_lower_blend.h b/src/gallium/drivers/panfrost/nir/nir_lower_blend.h index ea99e2348fa..846aca27d74 100644 --- a/src/gallium/drivers/panfrost/nir/nir_lower_blend.h +++ b/src/gallium/drivers/panfrost/nir/nir_lower_blend.h @@ -55,6 +55,7 @@ typedef struct { void nir_lower_blend(nir_shader *shader, nir_lower_blend_options options); void -nir_lower_framebuffer(nir_shader *shader, enum pipe_format format); +nir_lower_framebuffer(nir_shader *shader, enum pipe_format format, + unsigned gpu_id); #endif diff --git a/src/gallium/drivers/panfrost/nir/nir_lower_framebuffer.c b/src/gallium/drivers/panfrost/nir/nir_lower_framebuffer.c index 9afbbe4dd01..60d02320fdf 100644 --- a/src/gallium/drivers/panfrost/nir/nir_lower_framebuffer.c +++ b/src/gallium/drivers/panfrost/nir/nir_lower_framebuffer.c @@ -43,6 +43,24 @@ #include "nir_lower_blend.h" #include "util/u_format.h" +/* Determines the best NIR intrinsic to load a tile buffer of a given type, + * using native format conversion where possible. RGBA8 UNORM has a fast path + * (on some chips). Otherwise, we default to raw reads. */ + +static nir_intrinsic_op +nir_best_load_for_format( + const struct util_format_description *desc, + unsigned *special_bitsize, + unsigned gpu_id) +{ + if (util_format_is_unorm8(desc) && gpu_id != 0x750) { + *special_bitsize = 16; + return nir_intrinsic_load_output_u8_as_fp16_pan; + } else + return nir_intrinsic_load_raw_output_pan; +} + + /* Converters for UNORM8 formats, e.g. R8G8B8A8_UNORM */ static nir_ssa_def * @@ -204,6 +222,7 @@ nir_shader_to_native(nir_builder *b, static nir_ssa_def * nir_native_to_shader(nir_builder *b, nir_ssa_def *c_native, + nir_intrinsic_op op, const struct util_format_description *desc, unsigned bits, bool homogenous_bits) @@ -212,6 +231,15 @@ nir_native_to_shader(nir_builder *b, util_format_is_float(desc->format) || util_format_is_pure_integer(desc->format); + /* Handle preconverted formats */ + if (op == nir_intrinsic_load_output_u8_as_fp16_pan) { + assert(util_format_is_unorm8(desc)); + return nir_f2f32(b, c_native); + } + + /* Otherwise, we're raw */ + assert(op == nir_intrinsic_load_raw_output_pan); + if (util_format_is_unorm8(desc)) return nir_unorm8_to_float(b, c_native); else if (homogenous_bits && float_or_pure_int) @@ -223,7 +251,8 @@ nir_native_to_shader(nir_builder *b, } void -nir_lower_framebuffer(nir_shader *shader, enum pipe_format format) +nir_lower_framebuffer(nir_shader *shader, enum pipe_format format, + unsigned gpu_id) { /* Blend shaders are represented as special fragment shaders */ assert(shader->info.stage == MESA_SHADER_FRAGMENT); @@ -314,20 +343,22 @@ nir_lower_framebuffer(nir_shader *shader, enum pipe_format format) /* For loads, add conversion after */ b.cursor = nir_after_instr(instr); - /* Rewrite to use a native load by creating a new intrinsic */ - - nir_intrinsic_instr *new = - nir_intrinsic_instr_create(shader, nir_intrinsic_load_raw_output_pan); + /* Determine the best op for the format/hardware */ + unsigned bitsize = raw_bitsize_in; + nir_intrinsic_op op = nir_best_load_for_format(format_desc, + &bitsize, + gpu_id); + /* Rewrite to use a native load by creating a new intrinsic */ + nir_intrinsic_instr *new = nir_intrinsic_instr_create(shader, op); new->num_components = 4; - unsigned bitsize = raw_bitsize_in; nir_ssa_dest_init(&new->instr, &new->dest, 4, bitsize, NULL); nir_builder_instr_insert(&b, &new->instr); /* Convert the raw value */ nir_ssa_def *raw = &new->dest.ssa; - nir_ssa_def *converted = nir_native_to_shader(&b, raw, format_desc, bits, homogenous_bits); + nir_ssa_def *converted = nir_native_to_shader(&b, raw, op, format_desc, bits, homogenous_bits); /* Rewrite to use the converted value */ nir_src rewritten = nir_src_for_ssa(converted); diff --git a/src/gallium/drivers/panfrost/pan_blend_shaders.c b/src/gallium/drivers/panfrost/pan_blend_shaders.c index 223925c2ebb..b31b3ce14c8 100644 --- a/src/gallium/drivers/panfrost/pan_blend_shaders.c +++ b/src/gallium/drivers/panfrost/pan_blend_shaders.c @@ -168,7 +168,7 @@ panfrost_compile_blend_shader( nir_make_options(cso, 1); NIR_PASS_V(shader, nir_lower_blend, options); - NIR_PASS_V(shader, nir_lower_framebuffer, format); + NIR_PASS_V(shader, nir_lower_framebuffer, format, screen->gpu_id); /* Compile the built shader */ |