3 files changed, 41 insertions, 9 deletions
diff --git a/src/gallium/drivers/panfrost/nir/nir_lower_blend.h b/src/gallium/drivers/panfrost/nir/nir_lower_blend.h
index ea99e2348fa..846aca27d74 100644
--- a/src/gallium/drivers/panfrost/nir/nir_lower_blend.h
+++ b/src/gallium/drivers/panfrost/nir/nir_lower_blend.h
@@ -55,6 +55,7 @@ typedef struct {
 void nir_lower_blend(nir_shader *shader, nir_lower_blend_options options);
 
 void
-nir_lower_framebuffer(nir_shader *shader, enum pipe_format format);
+nir_lower_framebuffer(nir_shader *shader, enum pipe_format format,
+                      unsigned gpu_id);
 
 #endif
diff --git a/src/gallium/drivers/panfrost/nir/nir_lower_framebuffer.c b/src/gallium/drivers/panfrost/nir/nir_lower_framebuffer.c
index 9afbbe4dd01..60d02320fdf 100644
--- a/src/gallium/drivers/panfrost/nir/nir_lower_framebuffer.c
+++ b/src/gallium/drivers/panfrost/nir/nir_lower_framebuffer.c
@@ -43,6 +43,24 @@
 #include "nir_lower_blend.h"
 #include "util/u_format.h"
 
+/* Determines the best NIR intrinsic to load a tile buffer of a given type,
+ * using native format conversion where possible. RGBA8 UNORM has a fast path
+ * (on some chips). Otherwise, we default to raw reads. */
+
+static nir_intrinsic_op
+nir_best_load_for_format(
+      const struct util_format_description *desc,
+      unsigned *special_bitsize,
+      unsigned gpu_id)
+{
+   if (util_format_is_unorm8(desc) && gpu_id != 0x750) {
+      *special_bitsize = 16;
+      return nir_intrinsic_load_output_u8_as_fp16_pan;
+   } else
+      return nir_intrinsic_load_raw_output_pan;
+}
+
+
 /* Converters for UNORM8 formats, e.g. R8G8B8A8_UNORM */
 
 static nir_ssa_def *
@@ -204,6 +222,7 @@ nir_shader_to_native(nir_builder *b,
 static nir_ssa_def *
 nir_native_to_shader(nir_builder *b,
                      nir_ssa_def *c_native,
+                     nir_intrinsic_op op,
                      const struct util_format_description *desc,
                      unsigned bits,
                      bool homogenous_bits)
@@ -212,6 +231,15 @@ nir_native_to_shader(nir_builder *b,
       util_format_is_float(desc->format) ||
       util_format_is_pure_integer(desc->format);
 
+   /* Handle preconverted formats */
+   if (op == nir_intrinsic_load_output_u8_as_fp16_pan) {
+      assert(util_format_is_unorm8(desc));
+      return nir_f2f32(b, c_native);
+   }
+
+   /* Otherwise, we're raw */
+   assert(op == nir_intrinsic_load_raw_output_pan);
+
    if (util_format_is_unorm8(desc))
       return nir_unorm8_to_float(b, c_native);
    else if (homogenous_bits && float_or_pure_int)
@@ -223,7 +251,8 @@ nir_native_to_shader(nir_builder *b,
 }
 
 void
-nir_lower_framebuffer(nir_shader *shader, enum pipe_format format)
+nir_lower_framebuffer(nir_shader *shader, enum pipe_format format,
+                      unsigned gpu_id)
 {
    /* Blend shaders are represented as special fragment shaders */
    assert(shader->info.stage == MESA_SHADER_FRAGMENT);
@@ -314,20 +343,22 @@ nir_lower_framebuffer(nir_shader *shader, enum pipe_format format)
                /* For loads, add conversion after */
                b.cursor = nir_after_instr(instr);
 
-               /* Rewrite to use a native load by creating a new intrinsic */
-
-               nir_intrinsic_instr *new =
-                  nir_intrinsic_instr_create(shader, nir_intrinsic_load_raw_output_pan);
+               /* Determine the best op for the format/hardware */
+               unsigned bitsize = raw_bitsize_in;
+               nir_intrinsic_op op = nir_best_load_for_format(format_desc,
+                                                              &bitsize,
+                                                              gpu_id);
 
+               /* Rewrite to use a native load by creating a new intrinsic */
+               nir_intrinsic_instr *new = nir_intrinsic_instr_create(shader, op);
                new->num_components = 4;
 
-               unsigned bitsize = raw_bitsize_in;
                nir_ssa_dest_init(&new->instr, &new->dest, 4, bitsize, NULL);
                nir_builder_instr_insert(&b, &new->instr);
 
                /* Convert the raw value */
                nir_ssa_def *raw = &new->dest.ssa;
-               nir_ssa_def *converted = nir_native_to_shader(&b, raw, format_desc, bits, homogenous_bits);
+               nir_ssa_def *converted = nir_native_to_shader(&b, raw, op, format_desc, bits, homogenous_bits);
 
                /* Rewrite to use the converted value */
                nir_src rewritten = nir_src_for_ssa(converted);
diff --git a/src/gallium/drivers/panfrost/pan_blend_shaders.c b/src/gallium/drivers/panfrost/pan_blend_shaders.c
index 223925c2ebb..b31b3ce14c8 100644
--- a/src/gallium/drivers/panfrost/pan_blend_shaders.c
+++ b/src/gallium/drivers/panfrost/pan_blend_shaders.c
@@ -168,7 +168,7 @@ panfrost_compile_blend_shader(
                 nir_make_options(cso, 1);
         NIR_PASS_V(shader, nir_lower_blend, options);
 
-        NIR_PASS_V(shader, nir_lower_framebuffer, format);
+        NIR_PASS_V(shader, nir_lower_framebuffer, format, screen->gpu_id);
 
         /* Compile the built shader */