glsl: Add support for lowering 4x8 pack/unpack operations

Lower them to arithmetic and bit manipulation expressions. Reviewed-by: Chad Versace <[email protected]> Reviewed-by: Paul Berry <[email protected]>
author: Matt Turner <[email protected]> 2013-01-21 15:31:00 -0800
committer: Matt Turner <[email protected]> 2013-01-25 14:10:23 -0800
commit: 321555fb41d92ef61679ec3334e1cc24b5ab4c2d (patch)
tree: 7bccb46bc3443f9e48b6e3ee3a871d74ead21fd0 /src
parent: 1ef674f215b10820b94af131b6158d84b89f4e4b (diff)
2 files changed, 285 insertions, 0 deletions
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index ac90b875a60..8f33018404e 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -54,6 +54,12 @@ enum lower_packing_builtins_op {
 
    LOWER_PACK_HALF_2x16_TO_SPLIT        = 0x0040,
    LOWER_UNPACK_HALF_2x16_TO_SPLIT      = 0x0080,
+
+   LOWER_PACK_SNORM_4x8                 = 0x0100,
+   LOWER_UNPACK_SNORM_4x8               = 0x0200,
+
+   LOWER_PACK_UNORM_4x8                 = 0x0400,
+   LOWER_UNPACK_UNORM_4x8               = 0x0800,
 };
 
 bool do_common_optimization(exec_list *ir, bool linked,
diff --git a/src/glsl/lower_packing_builtins.cpp b/src/glsl/lower_packing_builtins.cpp
index 136d4cdfb32..db73c7b0fc2 100644
--- a/src/glsl/lower_packing_builtins.cpp
+++ b/src/glsl/lower_packing_builtins.cpp
@@ -84,9 +84,15 @@ public:
       case LOWER_PACK_SNORM_2x16:
          *rvalue = lower_pack_snorm_2x16(op0);
          break;
+      case LOWER_PACK_SNORM_4x8:
+         *rvalue = lower_pack_snorm_4x8(op0);
+         break;
       case LOWER_PACK_UNORM_2x16:
          *rvalue = lower_pack_unorm_2x16(op0);
          break;
+      case LOWER_PACK_UNORM_4x8:
+         *rvalue = lower_pack_unorm_4x8(op0);
+         break;
       case LOWER_PACK_HALF_2x16:
          *rvalue = lower_pack_half_2x16(op0);
          break;
@@ -96,9 +102,15 @@ public:
       case LOWER_UNPACK_SNORM_2x16:
          *rvalue = lower_unpack_snorm_2x16(op0);
          break;
+      case LOWER_UNPACK_SNORM_4x8:
+         *rvalue = lower_unpack_snorm_4x8(op0);
+         break;
       case LOWER_UNPACK_UNORM_2x16:
          *rvalue = lower_unpack_unorm_2x16(op0);
          break;
+      case LOWER_UNPACK_UNORM_4x8:
+         *rvalue = lower_unpack_unorm_4x8(op0);
+         break;
       case LOWER_UNPACK_HALF_2x16:
          *rvalue = lower_unpack_half_2x16(op0);
          break;
@@ -137,18 +149,30 @@ private:
       case ir_unop_pack_snorm_2x16:
          result = op_mask & LOWER_PACK_SNORM_2x16;
          break;
+      case ir_unop_pack_snorm_4x8:
+         result = op_mask & LOWER_PACK_SNORM_4x8;
+         break;
       case ir_unop_pack_unorm_2x16:
          result = op_mask & LOWER_PACK_UNORM_2x16;
          break;
+      case ir_unop_pack_unorm_4x8:
+         result = op_mask & LOWER_PACK_UNORM_4x8;
+         break;
       case ir_unop_pack_half_2x16:
          result = op_mask & (LOWER_PACK_HALF_2x16 | LOWER_PACK_HALF_2x16_TO_SPLIT);
          break;
       case ir_unop_unpack_snorm_2x16:
          result = op_mask & LOWER_UNPACK_SNORM_2x16;
          break;
+      case ir_unop_unpack_snorm_4x8:
+         result = op_mask & LOWER_UNPACK_SNORM_4x8;
+         break;
       case ir_unop_unpack_unorm_2x16:
          result = op_mask & LOWER_UNPACK_UNORM_2x16;
          break;
+      case ir_unop_unpack_unorm_4x8:
+         result = op_mask & LOWER_UNPACK_UNORM_4x8;
+         break;
       case ir_unop_unpack_half_2x16:
          result = op_mask & (LOWER_UNPACK_HALF_2x16 | LOWER_UNPACK_HALF_2x16_TO_SPLIT);
          break;
@@ -207,6 +231,30 @@ private:
    }
 
    /**
+    * \brief Pack four uint8's into a single uint32.
+    *
+    * Interpret the given uvec4 as a uint32 4-typle. Pack the 4-tuple into a
+    * uint32 where the least significant bits specify the first element of the
+    * 4-tuple. Return the uint32.
+    */
+   ir_rvalue*
+   pack_uvec4_to_uint(ir_rvalue *uvec4_rval)
+   {
+      assert(uvec4_rval->type == glsl_type::uvec4_type);
+
+      /* uvec4 u = UVEC4_RVAL; */
+      ir_variable *u = factory.make_temp(glsl_type::uvec4_type,
+                                          "tmp_pack_uvec4_to_uint");
+      factory.emit(assign(u, bit_and(uvec4_rval, constant(0xffu))));
+
+      /* return (u.w << 24) | (u.z << 16) | (u.y << 8) | u.x; */
+      return bit_or(bit_or(lshift(swizzle_w(u), constant(24u)),
+                           lshift(swizzle_z(u), constant(16u))),
+                    bit_or(lshift(swizzle_y(u), constant(8u)),
+                           swizzle_x(u)));
+   }
+
+   /**
     * \brief Unpack a uint32 into two uint16's.
     *
     * Interpret the given uint32 as a uint16 pair where the uint32's least
@@ -237,6 +285,44 @@ private:
    }
 
    /**
+    * \brief Unpack a uint32 into four uint8's.
+    *
+    * Interpret the given uint32 as a uint8 4-tuple where the uint32's least
+    * significant bits specify the 4-tuple's first element. Return the uint8
+    * 4-tuple as a uvec4.
+    */
+   ir_rvalue*
+   unpack_uint_to_uvec4(ir_rvalue *uint_rval)
+   {
+      assert(uint_rval->type == glsl_type::uint_type);
+
+      /* uint u = UINT_RVAL; */
+      ir_variable *u = factory.make_temp(glsl_type::uint_type,
+                                          "tmp_unpack_uint_to_uvec4_u");
+      factory.emit(assign(u, uint_rval));
+
+      /* uvec4 u4; */
+      ir_variable *u4 = factory.make_temp(glsl_type::uvec4_type,
+                                           "tmp_unpack_uint_to_uvec4_u4");
+
+      /* u4.x = u & 0xffu; */
+      factory.emit(assign(u4, bit_and(u, constant(0xffu)), WRITEMASK_X));
+
+      /* u4.y = (u >> 8u) & 0xffu; */
+      factory.emit(assign(u4, bit_and(rshift(u, constant(8u)),
+                                      constant(0xffu)), WRITEMASK_Y));
+
+      /* u4.z = (u >> 16u) & 0xffu; */
+      factory.emit(assign(u4, bit_and(rshift(u, constant(16u)),
+                                      constant(0xffu)), WRITEMASK_Z));
+
+      /* u4.w = (u >> 24u) */
+      factory.emit(assign(u4, rshift(u, constant(24u)), WRITEMASK_W));
+
+      return deref(u4).val;
+   }
+
+   /**
     * \brief Lower a packSnorm2x16 expression.
     *
     * \param vec2_rval is packSnorm2x16's input
@@ -286,6 +372,55 @@ private:
    }
 
    /**
+    * \brief Lower a packSnorm4x8 expression.
+    *
+    * \param vec4_rval is packSnorm4x8's input
+    * \return packSnorm4x8's output as a uint rvalue
+    */
+   ir_rvalue*
+   lower_pack_snorm_4x8(ir_rvalue *vec4_rval)
+   {
+      /* From page 137 (143 of pdf) of the GLSL 4.30 spec:
+       *
+       *    highp uint packSnorm4x8(vec4 v)
+       *    -------------------------------
+       *    First, converts each component of the normalized floating-point value
+       *    v into 8-bit integer values. Then, the results are packed into the
+       *    returned 32-bit unsigned integer.
+       *
+       *    The conversion for component c of v to fixed point is done as
+       *    follows:
+       *
+       *       packSnorm4x8: round(clamp(c, -1, +1) * 127.0)
+       *
+       *    The first component of the vector will be written to the least
+       *    significant bits of the output; the last component will be written to
+       *    the most significant bits.
+       *
+       * This function generates IR that approximates the following pseudo-GLSL:
+       *
+       *     return pack_uvec4_to_uint(
+       *         uvec4(ivec4(
+       *           round(clamp(VEC4_RVALUE, -1.0f, 1.0f) * 127.0f))));
+       *
+       * It is necessary to first convert the vec4 to ivec4 rather than directly
+       * converting vec4 to uvec4 because the latter conversion is undefined.
+       * From page 87 (93 of pdf) of the GLSL 4.30 spec: "It is undefined to
+       * convert a negative floating point value to an uint".
+       */
+      assert(vec4_rval->type == glsl_type::vec4_type);
+
+      ir_rvalue *result = pack_uvec4_to_uint(
+            i2u(f2i(round_even(mul(clamp(vec4_rval,
+                                         constant(-1.0f),
+                                         constant(1.0f)),
+                                   constant(127.0f))))));
+
+      assert(result->type == glsl_type::uint_type);
+      return result;
+   }
+
+   /**
     * \brief Lower an unpackSnorm2x16 expression.
     *
     * \param uint_rval is unpackSnorm2x16's input
@@ -345,6 +480,65 @@ private:
    }
 
    /**
+    * \brief Lower an unpackSnorm4x8 expression.
+    *
+    * \param uint_rval is unpackSnorm4x8's input
+    * \return unpackSnorm4x8's output as a vec4 rvalue
+    */
+   ir_rvalue*
+   lower_unpack_snorm_4x8(ir_rvalue *uint_rval)
+   {
+      /* From page 137 (143 of pdf) of the GLSL 4.30 spec:
+       *
+       *    highp vec4 unpackSnorm4x8 (highp uint p)
+       *    ----------------------------------------
+       *    First, unpacks a single 32-bit unsigned integer p into four
+       *    8-bit unsigned integers. Then, each component is converted to
+       *    a normalized floating-point value to generate the returned
+       *    four-component vector.
+       *
+       *    The conversion for unpacked fixed-point value f to floating point is
+       *    done as follows:
+       *
+       *       unpackSnorm4x8: clamp(f / 127.0, -1, +1)
+       *
+       *    The first component of the returned vector will be extracted from the
+       *    least significant bits of the input; the last component will be
+       *    extracted from the most significant bits.
+       *
+       * This function generates IR that approximates the following pseudo-GLSL:
+       *
+       *    return clamp(
+       *       ((ivec4(unpack_uint_to_uvec4(UINT_RVALUE)) << 24) >> 24) / 127.0f,
+       *       -1.0f, 1.0f);
+       *
+       * The above IR may appear unnecessarily complex, but the intermediate
+       * conversion to ivec4 and the bit shifts are necessary to correctly unpack
+       * negative floats.
+       *
+       * To see why, consider packing and then unpacking vec4(-1.0, 0.0, 0.0,
+       * 0.0). packSnorm4x8 encodes -1.0 as the int8 0xff. During unpacking, we
+       * place that int8 into an int32, which results in the *positive* integer
+       * 0x000000ff.  The int8's sign bit becomes, in the int32, the rather
+       * unimportant bit 8. We must now extend the int8's sign bit into bits
+       * 9-32, which is accomplished by left-shifting then right-shifting.
+       */
+
+      assert(uint_rval->type == glsl_type::uint_type);
+
+      ir_rvalue *result =
+        clamp(div(i2f(rshift(lshift(u2i(unpack_uint_to_uvec4(uint_rval)),
+                                    constant(24u)),
+                             constant(24u))),
+                  constant(127.0f)),
+              constant(-1.0f),
+              constant(1.0f));
+
+      assert(result->type == glsl_type::vec4_type);
+      return result;
+   }
+
+   /**
     * \brief Lower a packUnorm2x16 expression.
     *
     * \param vec2_rval is packUnorm2x16's input
@@ -389,6 +583,50 @@ private:
    }
 
    /**
+    * \brief Lower a packUnorm4x8 expression.
+    *
+    * \param vec4_rval is packUnorm4x8's input
+    * \return packUnorm4x8's output as a uint rvalue
+    */
+   ir_rvalue*
+   lower_pack_unorm_4x8(ir_rvalue *vec4_rval)
+   {
+      /* From page 137 (143 of pdf) of the GLSL 4.30 spec:
+       *
+       *    highp uint packUnorm4x8 (vec4 v)
+       *    --------------------------------
+       *    First, converts each component of the normalized floating-point value
+       *    v into 8-bit integer values. Then, the results are packed into the
+       *    returned 32-bit unsigned integer.
+       *
+       *    The conversion for component c of v to fixed point is done as
+       *    follows:
+       *
+       *       packUnorm4x8: round(clamp(c, 0, +1) * 255.0)
+       *
+       *    The first component of the vector will be written to the least
+       *    significant bits of the output; the last component will be written to
+       *    the most significant bits.
+       *
+       * This function generates IR that approximates the following pseudo-GLSL:
+       *
+       *     return pack_uvec4_to_uint(uvec4(
+       *                round(clamp(VEC2_RVALUE, 0.0f, 1.0f) * 255.0f)));
+       *
+       * Here it is safe to directly convert the vec4 to uvec4 because the the
+       * vec4 has been clamped to a non-negative range.
+       */
+
+      assert(vec4_rval->type == glsl_type::vec4_type);
+
+      ir_rvalue *result = pack_uvec4_to_uint(
+         f2u(round_even(mul(saturate(vec4_rval), constant(255.0f)))));
+
+      assert(result->type == glsl_type::uint_type);
+      return result;
+   }
+
+   /**
     * \brief Lower an unpackUnorm2x16 expression.
     *
     * \param uint_rval is unpackUnorm2x16's input
@@ -430,6 +668,47 @@ private:
    }
 
    /**
+    * \brief Lower an unpackUnorm4x8 expression.
+    *
+    * \param uint_rval is unpackUnorm4x8's input
+    * \return unpackUnorm4x8's output as a vec4 rvalue
+    */
+   ir_rvalue*
+   lower_unpack_unorm_4x8(ir_rvalue *uint_rval)
+   {
+      /* From page 137 (143 of pdf) of the GLSL 4.30 spec:
+       *
+       *    highp vec4 unpackUnorm4x8 (highp uint p)
+       *    ----------------------------------------
+       *    First, unpacks a single 32-bit unsigned integer p into four
+       *    8-bit unsigned integers. Then, each component is converted to
+       *    a normalized floating-point value to generate the returned
+       *    two-component vector.
+       *
+       *    The conversion for unpacked fixed-point value f to floating point is
+       *    done as follows:
+       *
+       *       unpackUnorm4x8: f / 255.0
+       *
+       *    The first component of the returned vector will be extracted from the
+       *    least significant bits of the input; the last component will be
+       *    extracted from the most significant bits.
+       *
+       * This function generates IR that approximates the following pseudo-GLSL:
+       *
+       *     return vec4(unpack_uint_to_uvec4(UINT_RVALUE)) / 255.0;
+       */
+
+      assert(uint_rval->type == glsl_type::uint_type);
+
+      ir_rvalue *result = div(u2f(unpack_uint_to_uvec4(uint_rval)),
+                              constant(255.0f));
+
+      assert(result->type == glsl_type::vec4_type);
+      return result;
+   }
+
+   /**
     * \brief Lower the component-wise calculation of packHalf2x16.
     *
     * \param f_rval is one component of packHafl2x16's input
author	Matt Turner <[email protected]>	2013-01-21 15:31:00 -0800
committer	Matt Turner <[email protected]>	2013-01-25 14:10:23 -0800
commit	321555fb41d92ef61679ec3334e1cc24b5ab4c2d (patch)
tree	7bccb46bc3443f9e48b6e3ee3a871d74ead21fd0 /src
parent	1ef674f215b10820b94af131b6158d84b89f4e4b (diff)