llvmpipe: add EXT_packed_float render target format support

New conversion code to handle conversion from/to r11g11b10 AoS to/from SoA floats, and also add code for conversion from rgb9e5 AoS to float SoA (which works pretty much the same as r11g11b10 except for the packing). (This code should also be used for texture sampling instead of relying on u_format conversion but it's not yet, so rgb9e5 is unused.) Unfortunately a crazy amount of hacks is necessary to get the conversion code running in llvmpipe's generate_unswizzled_blend, which isn't well suited for formats where the storage representation has nothing to do with what's needed for blending (moreover, the conversion will convert from packed AoS values, which is the storage format, to float SoA values, because this is much more natural for the conversion, and likewise from SoA values to packed AoS values - but the "blend" (which includes trivial things like partial mask) works on AoS values, so incoming fs values will go SoA->AoS, values from destination will go packed AoS->SoA->AoS, then do blend, then AoS->SoA->packed AoS which probably isn't the most efficient way though the shuffles are probably bearable). Passes piglit fbo-blending-formats (with GL_EXT_packed_float parameter), still need to verify Inf/NaNs (where most of the complexity in the conversion comes from actually). v2: drop the (very bogus) rgb9e5 part, and do component extraction in the helper code for r11g11b10 to float conversion, making the code slightly more compact (suggested by Jose), now that there are no other callers left this works quite well. (Could do the same for the opposite way but it's less than ideal there, final part of packing needs to be done in caller anyway and there'd be another conditional.) v3: minor style and comment fixes. Also fix a potential issue with negative zero being potentially returned by max(src, zero) as we don't have well-defined min/max behavior (fortunately no additonal cost). Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
author: Roland Scheidegger <sroland@vmware.com> 2013-03-22 20:09:18 +0100
committer: Roland Scheidegger <sroland@vmware.com> 2013-03-22 20:10:53 +0100
commit: b101a094b5576337f5bb7aecf525eb1755757b9c (patch)
tree: d429450eb17043860c80edc726aa5df008f66474 /src/gallium/drivers/llvmpipe/lp_state_fs.c
parent: 31009b45213ae9efab6af9d8efcc061194919b12 (diff)
1 files changed, 126 insertions, 0 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index d8369b4d807..953a5c1aa44 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -972,6 +972,17 @@ lp_mem_type_from_format_desc(const struct util_format_description *format_desc,
    unsigned i;
    unsigned chan;
 
+   if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT) {
+      /* just make this a 32bit uint */
+      type->floating = false;
+      type->fixed = false;
+      type->sign = false;
+      type->norm = false;
+      type->width = 32;
+      type->length = 1;
+      return;
+   }
+
    for (i = 0; i < 4; i++)
       if (format_desc->channel[i].type != UTIL_FORMAT_TYPE_VOID)
          break;
@@ -1009,6 +1020,17 @@ lp_blend_type_from_format_desc(const struct util_format_description *format_desc
    unsigned i;
    unsigned chan;
 
+   if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT) {
+      /* always use ordinary floats for blending */
+      type->floating = true;
+      type->fixed = false;
+      type->sign = true;
+      type->norm = false;
+      type->width = 32;
+      type->length = 4;
+      return;
+   }
+
    for (i = 0; i < 4; i++)
       if (format_desc->channel[i].type != UTIL_FORMAT_TYPE_VOID)
          break;
@@ -1122,6 +1144,48 @@ convert_to_blend_type(struct gallivm_state *gallivm,
    unsigned pixels = 16 / num_srcs;
    bool is_arith;
 
+   /*
+    * full custom path for packed floats - none of the later functions would do
+    * anything useful, and given the lp_type representation they can't be fixed.
+    */
+   if (src_fmt->format == PIPE_FORMAT_R11G11B10_FLOAT) {
+      LLVMValueRef tmpsrc[4];
+      /*
+       * This is pretty suboptimal for this case blending in SoA would be much
+       * better, since conversion gets us SoA values so need to convert back.
+       */
+      assert(src_type.width == 32);
+      assert(dst_type.floating);
+      assert(dst_type.width = 32);
+      assert(dst_type.length % 4 == 0);
+      for (i = 0; i < 4; i++) {
+         tmpsrc[i] = src[i];
+      }
+      for (i = 0; i < num_srcs / 4; i++) {
+         LLVMValueRef tmpsoa[4];
+         LLVMValueRef tmps = tmpsrc[i];
+         if (num_srcs == 8) {
+            LLVMValueRef shuffles[8];
+            unsigned j;
+            /* fetch was 4 values but need 8-wide output values */
+            tmps = lp_build_concat(gallivm, &tmpsrc[i * 2], src_type, 2);
+            /*
+             * for 8-wide aos transpose would give us wrong order not matching
+             * incoming converted fs values and mask. ARGH.
+             */
+            for (j = 0; j < 4; j++) {
+               shuffles[j] = lp_build_const_int32(gallivm, j * 2);
+               shuffles[j + 4] = lp_build_const_int32(gallivm, j * 2 + 1);
+            }
+            tmps = LLVMBuildShuffleVector(builder, tmps, tmps,
+                                          LLVMConstVector(shuffles, 8), "");
+         }
+         lp_build_r11g11b10_to_float(gallivm, tmps, tmpsoa);
+         lp_build_transpose_aos(gallivm, dst_type, tmpsoa, &src[i * 4]);
+      }
+      return;
+   }
+
    lp_mem_type_from_format_desc(src_fmt, &mem_type);
    lp_blend_type_from_format_desc(src_fmt, &blend_type);
 
@@ -1225,6 +1289,47 @@ convert_from_blend_type(struct gallivm_state *gallivm,
    unsigned pixels = 16 / num_srcs;
    bool is_arith;
 
+   /*
+    * full custom path for packed floats - none of the later functions would do
+    * anything useful, and given the lp_type representation they can't be fixed.
+    */
+   if (src_fmt->format == PIPE_FORMAT_R11G11B10_FLOAT) {
+      /*
+       * This is pretty suboptimal for this case blending in SoA would be much
+       * better - we need to transpose the AoS values back to SoA values for
+       * conversion/packing.
+       */
+      assert(src_type.floating);
+      assert(src_type.width = 32);
+      assert(src_type.length % 4 == 0);
+      assert(dst_type.width == 32);
+      for (i = 0; i < num_srcs / 4; i++) {
+         LLVMValueRef tmpsoa[4], tmpdst;
+         lp_build_transpose_aos(gallivm, src_type, &src[i * 4], tmpsoa);
+         tmpdst = lp_build_float_to_r11g11b10(gallivm, tmpsoa);
+         if (num_srcs == 8) {
+            LLVMValueRef tmpaos, shuffles[8];
+            unsigned j;
+            /*
+             * for 8-wide aos transpose has given us wrong order not matching
+             * output order. HMPF. Also need to split the output values manually.
+             */
+            for (j = 0; j < 4; j++) {
+               shuffles[j * 2] = lp_build_const_int32(gallivm, j);
+               shuffles[j * 2 + 1] = lp_build_const_int32(gallivm, j + 4);
+            }
+            tmpaos = LLVMBuildShuffleVector(builder, tmpdst, tmpdst,
+                                            LLVMConstVector(shuffles, 8), "");
+            src[i * 2] = lp_build_extract_range(gallivm, tmpaos, 0, 4);
+            src[i * 2 + 1] = lp_build_extract_range(gallivm, tmpaos, 4, 4);
+         }
+         else {
+            src[i] = tmpdst;
+         }
+      }
+      return;
+   }
+
    lp_mem_type_from_format_desc(src_fmt, &mem_type);
    lp_blend_type_from_format_desc(src_fmt, &blend_type);
 
@@ -1532,6 +1637,17 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
       }
    }
 
+   if (out_format == PIPE_FORMAT_R11G11B10_FLOAT) {
+      /* the code above can't work for layout_other */
+      dst_channels = 4; /* HACK: this is fake 4 really but need it due to transpose stuff later */
+      has_alpha = true;
+      swizzle[0] = 0;
+      swizzle[1] = 1;
+      swizzle[2] = 2;
+      swizzle[3] = 3;
+      pad_inline = true; /* HACK: prevent rgbxrgbx->rgbrgbxx conversion later */
+   }
+
    /* If 3 channels then pad to include alpha for 4 element transpose */
    if (dst_channels == 3 && !has_alpha) {
       for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
@@ -1756,6 +1872,16 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
 
    dst_type.length *= 16 / dst_count;
 
+   if (out_format == PIPE_FORMAT_R11G11B10_FLOAT) {
+      /*
+       * we need multiple values at once for the conversion, so can as well
+       * load them vectorized here too instead of concatenating later.
+       * (Still need concatenation later for 8-wide vectors).
+       */
+      dst_count = block_height;
+      dst_type.length = block_width;
+   }
+
    load_unswizzled_block(gallivm, color_ptr, stride, block_width, block_height,
                          dst, dst_type, dst_count, dst_alignment);
author	Roland Scheidegger <sroland@vmware.com>	2013-03-22 20:09:18 +0100
committer	Roland Scheidegger <sroland@vmware.com>	2013-03-22 20:10:53 +0100
commit	b101a094b5576337f5bb7aecf525eb1755757b9c (patch)
tree	d429450eb17043860c80edc726aa5df008f66474 /src/gallium/drivers/llvmpipe/lp_state_fs.c
parent	31009b45213ae9efab6af9d8efcc061194919b12 (diff)