diff options
author | Ilia Mirkin <[email protected]> | 2015-08-20 21:55:52 -0400 |
---|---|---|
committer | Ilia Mirkin <[email protected]> | 2015-08-28 18:28:04 -0400 |
commit | 275c5810ca7e38560b2a77281e7a0498c50126f8 (patch) | |
tree | 9096495089118805d0e4d042b7186b91f17f2842 | |
parent | 889a946a455c54a5a9bca144b2ea2fe66be39274 (diff) |
glsl: provide the option of using BFE for unpack builting lowering
This greatly improves generated code, especially for the snorm variants,
since it is able to get rid of the lshift/rshift for sext, as well as
replacing each shift + mask with a single op.
Signed-off-by: Ilia Mirkin <[email protected]>
Reviewed-by: Matt Turner <[email protected]>
-rw-r--r-- | src/glsl/ir_builder.cpp | 6 | ||||
-rw-r--r-- | src/glsl/ir_builder.h | 1 | ||||
-rw-r--r-- | src/glsl/ir_optimization.h | 1 | ||||
-rw-r--r-- | src/glsl/lower_packing_builtins.cpp | 103 | ||||
-rw-r--r-- | src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 3 |
5 files changed, 100 insertions, 14 deletions
diff --git a/src/glsl/ir_builder.cpp b/src/glsl/ir_builder.cpp index cd03859cac0..c9cf1240dfe 100644 --- a/src/glsl/ir_builder.cpp +++ b/src/glsl/ir_builder.cpp @@ -567,6 +567,12 @@ csel(operand a, operand b, operand c) } ir_expression * +bitfield_extract(operand a, operand b, operand c) +{ + return expr(ir_triop_bitfield_extract, a, b, c); +} + +ir_expression * bitfield_insert(operand a, operand b, operand c, operand d) { void *mem_ctx = ralloc_parent(a.val); diff --git a/src/glsl/ir_builder.h b/src/glsl/ir_builder.h index f76453ffcf0..b483ebf6269 100644 --- a/src/glsl/ir_builder.h +++ b/src/glsl/ir_builder.h @@ -200,6 +200,7 @@ ir_expression *interpolate_at_sample(operand a, operand b); ir_expression *fma(operand a, operand b, operand c); ir_expression *lrp(operand x, operand y, operand a); ir_expression *csel(operand a, operand b, operand c); +ir_expression *bitfield_extract(operand a, operand b, operand c); ir_expression *bitfield_insert(operand a, operand b, operand c, operand d); ir_swizzle *swizzle(operand a, int swizzle, int components); diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index b955874df84..265b2234cb6 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -69,6 +69,7 @@ enum lower_packing_builtins_op { LOWER_UNPACK_UNORM_4x8 = 0x0800, LOWER_PACK_USE_BFI = 0x1000, + LOWER_PACK_USE_BFE = 0x2000, }; bool do_common_optimization(exec_list *ir, bool linked, diff --git a/src/glsl/lower_packing_builtins.cpp b/src/glsl/lower_packing_builtins.cpp index 1d76ebf935f..c8bf68be829 100644 --- a/src/glsl/lower_packing_builtins.cpp +++ b/src/glsl/lower_packing_builtins.cpp @@ -119,6 +119,7 @@ public: break; case LOWER_PACK_UNPACK_NONE: case LOWER_PACK_USE_BFI: + case LOWER_PACK_USE_BFE: assert(!"not reached"); break; } @@ -306,6 +307,39 @@ private: } /** + * \brief Unpack a uint32 into two int16's. + * + * Specifically each 16-bit value is sign-extended to the full width of an + * int32 on return. + */ + ir_rvalue * + unpack_uint_to_ivec2(ir_rvalue *uint_rval) + { + assert(uint_rval->type == glsl_type::uint_type); + + if (!(op_mask & LOWER_PACK_USE_BFE)) { + return rshift(lshift(u2i(unpack_uint_to_uvec2(uint_rval)), + constant(16u)), + constant(16u)); + } + + ir_variable *i = factory.make_temp(glsl_type::int_type, + "tmp_unpack_uint_to_ivec2_i"); + factory.emit(assign(i, u2i(uint_rval))); + + /* ivec2 i2; */ + ir_variable *i2 = factory.make_temp(glsl_type::ivec2_type, + "tmp_unpack_uint_to_ivec2_i2"); + + factory.emit(assign(i2, bitfield_extract(i, constant(0), constant(16)), + WRITEMASK_X)); + factory.emit(assign(i2, bitfield_extract(i, constant(16), constant(16)), + WRITEMASK_Y)); + + return deref(i2).val; + } + + /** * \brief Unpack a uint32 into four uint8's. * * Interpret the given uint32 as a uint8 4-tuple where the uint32's least @@ -329,13 +363,23 @@ private: /* u4.x = u & 0xffu; */ factory.emit(assign(u4, bit_and(u, constant(0xffu)), WRITEMASK_X)); - /* u4.y = (u >> 8u) & 0xffu; */ - factory.emit(assign(u4, bit_and(rshift(u, constant(8u)), - constant(0xffu)), WRITEMASK_Y)); - - /* u4.z = (u >> 16u) & 0xffu; */ - factory.emit(assign(u4, bit_and(rshift(u, constant(16u)), - constant(0xffu)), WRITEMASK_Z)); + if (op_mask & LOWER_PACK_USE_BFE) { + /* u4.y = bitfield_extract(u, 8, 8); */ + factory.emit(assign(u4, bitfield_extract(u, constant(8), constant(8)), + WRITEMASK_Y)); + + /* u4.z = bitfield_extract(u, 16, 8); */ + factory.emit(assign(u4, bitfield_extract(u, constant(16), constant(8)), + WRITEMASK_Z)); + } else { + /* u4.y = (u >> 8u) & 0xffu; */ + factory.emit(assign(u4, bit_and(rshift(u, constant(8u)), + constant(0xffu)), WRITEMASK_Y)); + + /* u4.z = (u >> 16u) & 0xffu; */ + factory.emit(assign(u4, bit_and(rshift(u, constant(16u)), + constant(0xffu)), WRITEMASK_Z)); + } /* u4.w = (u >> 24u) */ factory.emit(assign(u4, rshift(u, constant(24u)), WRITEMASK_W)); @@ -344,6 +388,43 @@ private: } /** + * \brief Unpack a uint32 into four int8's. + * + * Specifically each 8-bit value is sign-extended to the full width of an + * int32 on return. + */ + ir_rvalue * + unpack_uint_to_ivec4(ir_rvalue *uint_rval) + { + assert(uint_rval->type == glsl_type::uint_type); + + if (!(op_mask & LOWER_PACK_USE_BFE)) { + return rshift(lshift(u2i(unpack_uint_to_uvec4(uint_rval)), + constant(24u)), + constant(24u)); + } + + ir_variable *i = factory.make_temp(glsl_type::int_type, + "tmp_unpack_uint_to_ivec4_i"); + factory.emit(assign(i, u2i(uint_rval))); + + /* ivec4 i4; */ + ir_variable *i4 = factory.make_temp(glsl_type::ivec4_type, + "tmp_unpack_uint_to_ivec4_i4"); + + factory.emit(assign(i4, bitfield_extract(i, constant(0), constant(8)), + WRITEMASK_X)); + factory.emit(assign(i4, bitfield_extract(i, constant(8), constant(8)), + WRITEMASK_Y)); + factory.emit(assign(i4, bitfield_extract(i, constant(16), constant(8)), + WRITEMASK_Z)); + factory.emit(assign(i4, bitfield_extract(i, constant(24), constant(8)), + WRITEMASK_W)); + + return deref(i4).val; + } + + /** * \brief Lower a packSnorm2x16 expression. * * \param vec2_rval is packSnorm2x16's input @@ -489,9 +570,7 @@ private: assert(uint_rval->type == glsl_type::uint_type); ir_rvalue *result = - clamp(div(i2f(rshift(lshift(u2i(unpack_uint_to_uvec2(uint_rval)), - constant(16)), - constant(16u))), + clamp(div(i2f(unpack_uint_to_ivec2(uint_rval)), constant(32767.0f)), constant(-1.0f), constant(1.0f)); @@ -548,9 +627,7 @@ private: assert(uint_rval->type == glsl_type::uint_type); ir_rvalue *result = - clamp(div(i2f(rshift(lshift(u2i(unpack_uint_to_uvec4(uint_rval)), - constant(24u)), - constant(24u))), + clamp(div(i2f(unpack_uint_to_ivec4(uint_rval)), constant(127.0f)), constant(-1.0f), constant(1.0f)); diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 7a8c4e1b8fa..95a25c12fb4 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -6020,7 +6020,8 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) LOWER_UNPACK_HALF_2x16; if (ctx->Extensions.ARB_gpu_shader5) - lower_inst |= LOWER_PACK_USE_BFI; + lower_inst |= LOWER_PACK_USE_BFI | + LOWER_PACK_USE_BFE; lower_packing_builtins(ir, lower_inst); } |