3 files changed, 118 insertions, 3 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
index 053f4132080..2f39abc63d8 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
@@ -397,6 +397,92 @@ lp_build_r11g11b10_to_float(struct gallivm_state *gallivm,
 }
 
 
+static LLVMValueRef
+lp_build_rgb9_to_float_helper(struct gallivm_state *gallivm,
+                              struct lp_type f32_type,
+                              LLVMValueRef src,
+                              LLVMValueRef scale,
+                              unsigned mantissa_start)
+{
+   LLVMValueRef shift, mask;
+
+   struct lp_type i32_type = lp_type_int_vec(32, 32 * f32_type.length);
+   struct lp_build_context i32_bld, f32_bld;
+
+   lp_build_context_init(&i32_bld, gallivm, i32_type);
+   lp_build_context_init(&f32_bld, gallivm, f32_type);
+
+   /*
+    * This is much easier as other weirdo float formats, since
+    * there's no sign, no Inf/NaN, and there's nothing special
+    * required for normals/denormals neither (as without the implied one
+    * for the mantissa for other formats, everything looks like a denormal).
+    * So just do (float)comp_bits * scale
+    */
+   shift = lp_build_const_int_vec(gallivm, i32_type, mantissa_start);
+   mask = lp_build_const_int_vec(gallivm, i32_type, 0x1ff);
+   src = lp_build_shr(&i32_bld, src, shift);
+   src = lp_build_and(&i32_bld, src, mask);
+   src = lp_build_int_to_float(&f32_bld, src);
+   return lp_build_mul(&f32_bld, src, scale);
+}
+
+
+/**
+ * Convert shared exponent format (rgb9e5) value(s) to rgba float SoA values.
+ *
+ * @param src   packed AoS rgb9e5 values (as (vector) int32)
+ * @param dst   pointer to the SoA result values
+ */
+void
+lp_build_rgb9e5_to_float(struct gallivm_state *gallivm,
+                         LLVMValueRef src,
+                         LLVMValueRef *dst)
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMTypeRef src_type = LLVMTypeOf(src);
+   LLVMValueRef shift, scale, bias, exp;
+   unsigned src_length = LLVMGetTypeKind(src_type) == LLVMVectorTypeKind ?
+                            LLVMGetVectorSize(src_type) : 1;
+   struct lp_type i32_type = lp_type_int_vec(32, 32 * src_length);
+   struct lp_type u32_type = lp_type_uint_vec(32, 32 * src_length);
+   struct lp_type f32_type = lp_type_float_vec(32, 32 * src_length);
+   struct lp_build_context i32_bld, u32_bld, f32_bld;
+
+   lp_build_context_init(&i32_bld, gallivm, i32_type);
+   lp_build_context_init(&u32_bld, gallivm, u32_type);
+   lp_build_context_init(&f32_bld, gallivm, f32_type);
+
+   /* extract exponent */
+   shift = lp_build_const_int_vec(gallivm, i32_type, 27);
+   /* this shift needs to be unsigned otherwise need mask */
+   exp = lp_build_shr(&u32_bld, src, shift);
+
+   /*
+    * scale factor is 2 ^ (exp - bias)
+    * (and additionally corrected here for the mantissa bits)
+    * not using shift because
+    * a) don't have vector shift in a lot of cases
+    * b) shift direction changes hence need 2 shifts + conditional
+    *    (or rotate instruction which is even more rare (for instance XOP))
+    * so use whacky float 2 ^ function instead manipulating exponent
+    * (saves us the float conversion at the end too)
+    */
+   bias = lp_build_const_int_vec(gallivm, i32_type, 127 - (15 + 9));
+   scale = lp_build_add(&i32_bld, exp, bias);
+   shift = lp_build_const_int_vec(gallivm, i32_type, 23);
+   scale = lp_build_shl(&i32_bld, scale, shift);
+   scale = LLVMBuildBitCast(builder, scale, f32_bld.vec_type, "");
+
+   dst[0] = lp_build_rgb9_to_float_helper(gallivm, f32_type, src, scale, 0);
+   dst[1] = lp_build_rgb9_to_float_helper(gallivm, f32_type, src, scale, 9);
+   dst[2] = lp_build_rgb9_to_float_helper(gallivm, f32_type, src, scale, 18);
+
+   /* Just set alpha to one */
+   dst[3] = f32_bld.one;
+}
+
+
 /**
  * Converts int16 half-float to float32
  * Note this can be performed in 1 instruction if vcvtph2ps exists (sse5 i think?)
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.h b/src/gallium/auxiliary/gallivm/lp_bld_conv.h
index 5bd6f4f1d75..d8bc294bce0 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_conv.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.h
@@ -70,6 +70,11 @@ lp_build_r11g11b10_to_float(struct gallivm_state *gallivm,
                             LLVMValueRef src,
                             LLVMValueRef *dst);
 
+void
+lp_build_rgb9e5_to_float(struct gallivm_state *gallivm,
+                         LLVMValueRef src,
+                         LLVMValueRef *dst);
+
 LLVMValueRef
 lp_build_clamped_float_to_unsigned_norm(struct gallivm_state *gallivm,
                                         struct lp_type src_type,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
index 9eb6ef5438d..54ca61a0c38 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
@@ -310,9 +310,10 @@ lp_build_rgba8_to_fi32_soa(struct gallivm_state *gallivm,
  * \param type  the desired return type for 'rgba'.  The vector length
  *              is the number of texels to fetch
  *
- * \param base_ptr  points to start of the texture image block.  For non-
- *                  compressed formats, this simply points to the texel.
- *                  For compressed formats, it points to the start of the
+ * \param base_ptr  points to the base of the texture mip tree.
+ * \param offset    offset to start of the texture image block.  For non-
+ *                  compressed formats, this simply is an offset to the texel.
+ *                  For compressed formats, it is an offset to the start of the
  *                  compressed data block.
  *
  * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
@@ -368,6 +369,29 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
       return;
    }
 
+   if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT ||
+       format_desc->format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
+      /*
+       * similar conceptually to above but requiring special
+       * AoS packed -> SoA float conversion code.
+       */
+      LLVMValueRef packed;
+
+      assert(type.floating);
+      assert(type.width == 32);
+
+      packed = lp_build_gather(gallivm, type.length,
+                               format_desc->block.bits,
+                               type.width, base_ptr, offset);
+      if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT) {
+         lp_build_r11g11b10_to_float(gallivm, packed, rgba_out);
+      }
+      else {
+         lp_build_rgb9e5_to_float(gallivm, packed, rgba_out);
+      }
+      return;
+   }
+
    /*
     * Try calling lp_build_fetch_rgba_aos for all pixels.
     */