llvmpipe: add support for b5g6r5_srgb

The conversion code for srgb was tuned for n x 4x8bit AoS -> 4 x nxfloat SoA (and vice versa), fix this to handle also 16bit 565-style srgb formats. Still not really all that generic, things like r10g10b10a2_srgb or r4g4b4a4_srgb wouldn't work (the latter trivial to fix, the former would not require more work to not crash but near certainly need some higher precision calculation) but not needed right now. The code is not fully optimized for this (could use more direct calculation instead of expanding to 8-bit range first) but should be good enough. Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
author: Roland Scheidegger <sroland@vmware.com> 2014-03-20 16:43:36 +0100
committer: Roland Scheidegger <sroland@vmware.com> 2014-03-21 17:23:38 +0100
commit: 9477d8c862b206c35de0bc957a08524188abc898 (patch)
tree: 9fd13f6453dc28723b30b054451a41156b96a8e8
parent: 2aa77f2777ad35d7fe249939b0d828306f13eade (diff)
5 files changed, 61 insertions, 9 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format.h b/src/gallium/auxiliary/gallivm/lp_bld_format.h
index a7a4ba0a48c..1177fb224dd 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format.h
@@ -167,6 +167,7 @@ lp_build_float_to_srgb_packed(struct gallivm_state *gallivm,
 LLVMValueRef
 lp_build_srgb_to_linear(struct gallivm_state *gallivm,
                         struct lp_type src_type,
+                        unsigned chan_bits,
                         LLVMValueRef src);
 
 
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
index 81cd2b0f7c6..ff2887ee8fa 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
@@ -165,13 +165,12 @@ lp_build_unpack_rgba_soa(struct gallivm_state *gallivm,
 
          if (type.floating) {
             if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
-               assert(width == 8);
                if (format_desc->swizzle[3] == chan) {
                   input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
                }
                else {
                   struct lp_type conv_type = lp_uint_type(type);
-                  input = lp_build_srgb_to_linear(gallivm, conv_type, input);
+                  input = lp_build_srgb_to_linear(gallivm, conv_type, width, input);
                }
             }
             else {
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c b/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c
index 6645151f514..e4849fe043f 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c
@@ -88,11 +88,12 @@
  *   (3rd order polynomial is required for crappy but just sufficient accuracy)
  *
  * @param src   integer (vector) value(s) to convert
- *              (8 bit values unpacked to 32 bit already).
+ *              (chan_bits bit values unpacked to 32 bit already).
  */
 LLVMValueRef
 lp_build_srgb_to_linear(struct gallivm_state *gallivm,
                         struct lp_type src_type,
+                        unsigned chan_bits,
                         LLVMValueRef src)
 {
    struct lp_type f32_type = lp_type_float_vec(32, src_type.length * 32);
@@ -105,6 +106,8 @@ lp_build_srgb_to_linear(struct gallivm_state *gallivm,
    };
 
    assert(src_type.width == 32);
+   /* Technically this would work with more bits too but would be inaccurate. */
+   assert(chan_bits <= 8);
 
    lp_build_context_init(&f32_bld, gallivm, f32_type);
 
@@ -124,6 +127,12 @@ lp_build_srgb_to_linear(struct gallivm_state *gallivm,
     */
    /* doing the 1/255 mul as part of the approximation */
    srcf = lp_build_int_to_float(&f32_bld, src);
+   if (chan_bits != 8) {
+      /* could adjust all the constants instead */
+      LLVMValueRef rescale_const = lp_build_const_vec(gallivm, f32_type,
+                                                      255.0f / ((1 << chan_bits) - 1));
+      srcf = lp_build_mul(&f32_bld, srcf, rescale_const);
+   }
    lin_const = lp_build_const_vec(gallivm, f32_type, 1.0f / (12.6f * 255.0f));
    part_lin = lp_build_mul(&f32_bld, srcf, lin_const);
 
@@ -150,6 +159,7 @@ lp_build_srgb_to_linear(struct gallivm_state *gallivm,
 static LLVMValueRef
 lp_build_linear_to_srgb(struct gallivm_state *gallivm,
                         struct lp_type src_type,
+                        unsigned chan_bits,
                         LLVMValueRef src)
 {
    LLVMBuilderRef builder = gallivm->builder;
@@ -292,6 +302,13 @@ lp_build_linear_to_srgb(struct gallivm_state *gallivm,
    is_linear = lp_build_compare(gallivm, src_type, PIPE_FUNC_LEQUAL, src, lin_thresh);
    tmp = lp_build_select(&f32_bld, is_linear, lin, pow_final);
 
+   if (chan_bits != 8) {
+      /* could adjust all the constants instead */
+      LLVMValueRef rescale_const = lp_build_const_vec(gallivm, src_type,
+                                                      ((1 << chan_bits) - 1) / 255.0f);
+      tmp = lp_build_mul(&f32_bld, tmp, rescale_const);
+   }
+
    f32_bld.type.sign = 0;
    return lp_build_iround(&f32_bld, tmp);
 }
@@ -300,7 +317,9 @@ lp_build_linear_to_srgb(struct gallivm_state *gallivm,
 /**
  * Convert linear float soa values to packed srgb AoS values.
  * This only handles packed formats which are 4x8bit in size
- * (rgba and rgbx plus swizzles).
+ * (rgba and rgbx plus swizzles), and 16bit 565-style formats
+ * with no alpha. (In the latter case the return values won't be
+ * fully packed, it will look like r5g6b5x16r5g6b5x16...)
  *
  * @param src   float SoA (vector) values to convert.
  */
@@ -320,7 +339,8 @@ lp_build_float_to_srgb_packed(struct gallivm_state *gallivm,
 
    /* rgb is subject to linear->srgb conversion, alpha is not */
    for (chan = 0; chan < 3; chan++) {
-      tmpsrgb[chan] = lp_build_linear_to_srgb(gallivm, src_type, src[chan]);
+      unsigned chan_bits = dst_fmt->channel[dst_fmt->swizzle[chan]].size;
+      tmpsrgb[chan] = lp_build_linear_to_srgb(gallivm, src_type, chan_bits, src[chan]);
    }
    /*
     * can't use lp_build_conv since we want to keep values as 32bit
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index c8e95fe329e..fe06e34f675 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -342,6 +342,7 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen,
 
    if (bind & PIPE_BIND_RENDER_TARGET) {
       if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
+         /* this is a lie actually other formats COULD exist where we would fail */
          if (format_desc->nr_channels < 3)
             return FALSE;
       }
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 2f9f907edd6..5e28f0ec4ea 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -868,12 +868,12 @@ lp_mem_type_from_format_desc(const struct util_format_description *format_desc,
    unsigned chan;
 
    if (format_expands_to_float_soa(format_desc)) {
-      /* just make this a 32bit uint */
+      /* just make this a uint with width of block */
       type->floating = false;
       type->fixed = false;
       type->sign = false;
       type->norm = false;
-      type->width = 32;
+      type->width = format_desc->block.bits;
       type->length = 1;
       return;
    }
@@ -1137,12 +1137,24 @@ convert_to_blend_type(struct gallivm_state *gallivm,
        * This is pretty suboptimal for this case blending in SoA would be much
        * better, since conversion gets us SoA values so need to convert back.
        */
-      assert(src_type.width == 32);
+      assert(src_type.width == 32 || src_type.width == 16);
       assert(dst_type.floating);
       assert(dst_type.width == 32);
       assert(dst_type.length % 4 == 0);
       assert(num_srcs % 4 == 0);
 
+      if (src_type.width == 16) {
+         /* expand 4x16bit values to 4x32bit */
+         struct lp_type type32x4 = src_type;
+         LLVMTypeRef ltype32x4;
+         unsigned num_fetch = dst_type.length == 8 ? num_srcs / 2 : num_srcs / 4;
+         type32x4.width = 32;
+         ltype32x4 = lp_build_vec_type(gallivm, type32x4);
+         for (i = 0; i < num_fetch; i++) {
+            src[i] = LLVMBuildZExt(builder, src[i], ltype32x4, "");
+         }
+         src_type.width = 32;
+      }
       for (i = 0; i < 4; i++) {
          tmpsrc[i] = src[i];
       }
@@ -1298,7 +1310,7 @@ convert_from_blend_type(struct gallivm_state *gallivm,
       assert(src_type.floating);
       assert(src_type.width == 32);
       assert(src_type.length % 4 == 0);
-      assert(dst_type.width == 32);
+      assert(dst_type.width == 32 || dst_type.width == 16);
 
       for (i = 0; i < num_srcs / 4; i++) {
          LLVMValueRef tmpsoa[4], tmpdst;
@@ -1333,6 +1345,25 @@ convert_from_blend_type(struct gallivm_state *gallivm,
             src[i] = tmpdst;
          }
       }
+      if (dst_type.width == 16) {
+         struct lp_type type16x8 = dst_type;
+         struct lp_type type32x4 = dst_type;
+         LLVMTypeRef ltype16x4, ltypei64, ltypei128;
+         unsigned num_fetch = src_type.length == 8 ? num_srcs / 2 : num_srcs / 4;
+         type16x8.length = 8;
+         type32x4.width = 32;
+         ltypei128 = LLVMIntTypeInContext(gallivm->context, 128);
+         ltypei64 = LLVMIntTypeInContext(gallivm->context, 64);
+         ltype16x4 = lp_build_vec_type(gallivm, dst_type);
+         /* We could do vector truncation but it doesn't generate very good code */
+         for (i = 0; i < num_fetch; i++) {
+            src[i] = lp_build_pack2(gallivm, type32x4, type16x8,
+                                    src[i], lp_build_zero(gallivm, type32x4));
+            src[i] = LLVMBuildBitCast(builder, src[i], ltypei128, "");
+            src[i] = LLVMBuildTrunc(builder, src[i], ltypei64, "");
+            src[i] = LLVMBuildBitCast(builder, src[i], ltype16x4, "");
+         }
+      }
       return;
    }
author	Roland Scheidegger <sroland@vmware.com>	2014-03-20 16:43:36 +0100
committer	Roland Scheidegger <sroland@vmware.com>	2014-03-21 17:23:38 +0100
commit	9477d8c862b206c35de0bc957a08524188abc898 (patch)
tree	9fd13f6453dc28723b30b054451a41156b96a8e8
parent	2aa77f2777ad35d7fe249939b0d828306f13eade (diff)