diff options
author | Ilia Mirkin <[email protected]> | 2015-08-20 21:55:52 -0400 |
---|---|---|
committer | Ilia Mirkin <[email protected]> | 2015-08-28 18:28:04 -0400 |
commit | 275c5810ca7e38560b2a77281e7a0498c50126f8 (patch) | |
tree | 9096495089118805d0e4d042b7186b91f17f2842 /src/glsl/lower_packing_builtins.cpp | |
parent | 889a946a455c54a5a9bca144b2ea2fe66be39274 (diff) |
glsl: provide the option of using BFE for unpack builting lowering
This greatly improves generated code, especially for the snorm variants,
since it is able to get rid of the lshift/rshift for sext, as well as
replacing each shift + mask with a single op.
Signed-off-by: Ilia Mirkin <[email protected]>
Reviewed-by: Matt Turner <[email protected]>
Diffstat (limited to 'src/glsl/lower_packing_builtins.cpp')
-rw-r--r-- | src/glsl/lower_packing_builtins.cpp | 103 |
1 files changed, 90 insertions, 13 deletions
diff --git a/src/glsl/lower_packing_builtins.cpp b/src/glsl/lower_packing_builtins.cpp index 1d76ebf935f..c8bf68be829 100644 --- a/src/glsl/lower_packing_builtins.cpp +++ b/src/glsl/lower_packing_builtins.cpp @@ -119,6 +119,7 @@ public: break; case LOWER_PACK_UNPACK_NONE: case LOWER_PACK_USE_BFI: + case LOWER_PACK_USE_BFE: assert(!"not reached"); break; } @@ -306,6 +307,39 @@ private: } /** + * \brief Unpack a uint32 into two int16's. + * + * Specifically each 16-bit value is sign-extended to the full width of an + * int32 on return. + */ + ir_rvalue * + unpack_uint_to_ivec2(ir_rvalue *uint_rval) + { + assert(uint_rval->type == glsl_type::uint_type); + + if (!(op_mask & LOWER_PACK_USE_BFE)) { + return rshift(lshift(u2i(unpack_uint_to_uvec2(uint_rval)), + constant(16u)), + constant(16u)); + } + + ir_variable *i = factory.make_temp(glsl_type::int_type, + "tmp_unpack_uint_to_ivec2_i"); + factory.emit(assign(i, u2i(uint_rval))); + + /* ivec2 i2; */ + ir_variable *i2 = factory.make_temp(glsl_type::ivec2_type, + "tmp_unpack_uint_to_ivec2_i2"); + + factory.emit(assign(i2, bitfield_extract(i, constant(0), constant(16)), + WRITEMASK_X)); + factory.emit(assign(i2, bitfield_extract(i, constant(16), constant(16)), + WRITEMASK_Y)); + + return deref(i2).val; + } + + /** * \brief Unpack a uint32 into four uint8's. * * Interpret the given uint32 as a uint8 4-tuple where the uint32's least @@ -329,13 +363,23 @@ private: /* u4.x = u & 0xffu; */ factory.emit(assign(u4, bit_and(u, constant(0xffu)), WRITEMASK_X)); - /* u4.y = (u >> 8u) & 0xffu; */ - factory.emit(assign(u4, bit_and(rshift(u, constant(8u)), - constant(0xffu)), WRITEMASK_Y)); - - /* u4.z = (u >> 16u) & 0xffu; */ - factory.emit(assign(u4, bit_and(rshift(u, constant(16u)), - constant(0xffu)), WRITEMASK_Z)); + if (op_mask & LOWER_PACK_USE_BFE) { + /* u4.y = bitfield_extract(u, 8, 8); */ + factory.emit(assign(u4, bitfield_extract(u, constant(8), constant(8)), + WRITEMASK_Y)); + + /* u4.z = bitfield_extract(u, 16, 8); */ + factory.emit(assign(u4, bitfield_extract(u, constant(16), constant(8)), + WRITEMASK_Z)); + } else { + /* u4.y = (u >> 8u) & 0xffu; */ + factory.emit(assign(u4, bit_and(rshift(u, constant(8u)), + constant(0xffu)), WRITEMASK_Y)); + + /* u4.z = (u >> 16u) & 0xffu; */ + factory.emit(assign(u4, bit_and(rshift(u, constant(16u)), + constant(0xffu)), WRITEMASK_Z)); + } /* u4.w = (u >> 24u) */ factory.emit(assign(u4, rshift(u, constant(24u)), WRITEMASK_W)); @@ -344,6 +388,43 @@ private: } /** + * \brief Unpack a uint32 into four int8's. + * + * Specifically each 8-bit value is sign-extended to the full width of an + * int32 on return. + */ + ir_rvalue * + unpack_uint_to_ivec4(ir_rvalue *uint_rval) + { + assert(uint_rval->type == glsl_type::uint_type); + + if (!(op_mask & LOWER_PACK_USE_BFE)) { + return rshift(lshift(u2i(unpack_uint_to_uvec4(uint_rval)), + constant(24u)), + constant(24u)); + } + + ir_variable *i = factory.make_temp(glsl_type::int_type, + "tmp_unpack_uint_to_ivec4_i"); + factory.emit(assign(i, u2i(uint_rval))); + + /* ivec4 i4; */ + ir_variable *i4 = factory.make_temp(glsl_type::ivec4_type, + "tmp_unpack_uint_to_ivec4_i4"); + + factory.emit(assign(i4, bitfield_extract(i, constant(0), constant(8)), + WRITEMASK_X)); + factory.emit(assign(i4, bitfield_extract(i, constant(8), constant(8)), + WRITEMASK_Y)); + factory.emit(assign(i4, bitfield_extract(i, constant(16), constant(8)), + WRITEMASK_Z)); + factory.emit(assign(i4, bitfield_extract(i, constant(24), constant(8)), + WRITEMASK_W)); + + return deref(i4).val; + } + + /** * \brief Lower a packSnorm2x16 expression. * * \param vec2_rval is packSnorm2x16's input @@ -489,9 +570,7 @@ private: assert(uint_rval->type == glsl_type::uint_type); ir_rvalue *result = - clamp(div(i2f(rshift(lshift(u2i(unpack_uint_to_uvec2(uint_rval)), - constant(16)), - constant(16u))), + clamp(div(i2f(unpack_uint_to_ivec2(uint_rval)), constant(32767.0f)), constant(-1.0f), constant(1.0f)); @@ -548,9 +627,7 @@ private: assert(uint_rval->type == glsl_type::uint_type); ir_rvalue *result = - clamp(div(i2f(rshift(lshift(u2i(unpack_uint_to_uvec4(uint_rval)), - constant(24u)), - constant(24u))), + clamp(div(i2f(unpack_uint_to_ivec4(uint_rval)), constant(127.0f)), constant(-1.0f), constant(1.0f)); |