diff options
author | Ilia Mirkin <[email protected]> | 2015-08-20 20:52:32 -0400 |
---|---|---|
committer | Ilia Mirkin <[email protected]> | 2015-08-28 18:28:04 -0400 |
commit | 889a946a455c54a5a9bca144b2ea2fe66be39274 (patch) | |
tree | 4eddb51b9a62abb5be1217166f04d3c0f3cc8418 /src/glsl | |
parent | c676c432f30158190c260e7f3731ee6667ad4103 (diff) |
glsl: use bitfield_insert instead of and + shift + or for packing
It is fairly tricky to detect the proper conditions for using bitfield
insert, but easy to just use it up front. This removes a lot of
instructions on nvc0 when invoking the packing builtins.
Signed-off-by: Ilia Mirkin <[email protected]>
Reviewed-by: Matt Turner <[email protected]>
Diffstat (limited to 'src/glsl')
-rw-r--r-- | src/glsl/ir_optimization.h | 4 | ||||
-rw-r--r-- | src/glsl/lower_packing_builtins.cpp | 27 |
2 files changed, 27 insertions, 4 deletions
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index eef107e5249..b955874df84 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -66,7 +66,9 @@ enum lower_packing_builtins_op { LOWER_UNPACK_SNORM_4x8 = 0x0200, LOWER_PACK_UNORM_4x8 = 0x0400, - LOWER_UNPACK_UNORM_4x8 = 0x0800 + LOWER_UNPACK_UNORM_4x8 = 0x0800, + + LOWER_PACK_USE_BFI = 0x1000, }; bool do_common_optimization(exec_list *ir, bool linked, diff --git a/src/glsl/lower_packing_builtins.cpp b/src/glsl/lower_packing_builtins.cpp index a6fb8a8837e..1d76ebf935f 100644 --- a/src/glsl/lower_packing_builtins.cpp +++ b/src/glsl/lower_packing_builtins.cpp @@ -118,6 +118,7 @@ public: *rvalue = split_unpack_half_2x16(op0); break; case LOWER_PACK_UNPACK_NONE: + case LOWER_PACK_USE_BFI: assert(!"not reached"); break; } @@ -222,9 +223,16 @@ private: /* uvec2 u = UVEC2_RVAL; */ ir_variable *u = factory.make_temp(glsl_type::uvec2_type, - "tmp_pack_uvec2_to_uint"); + "tmp_pack_uvec2_to_uint"); factory.emit(assign(u, uvec2_rval)); + if (op_mask & LOWER_PACK_USE_BFI) { + return bitfield_insert(bit_and(swizzle_x(u), constant(0xffffu)), + swizzle_y(u), + constant(16), + constant(16)); + } + /* return (u.y << 16) | (u.x & 0xffff); */ return bit_or(lshift(swizzle_y(u), constant(16u)), bit_and(swizzle_x(u), constant(0xffffu))); @@ -242,9 +250,22 @@ private: { assert(uvec4_rval->type == glsl_type::uvec4_type); - /* uvec4 u = UVEC4_RVAL; */ ir_variable *u = factory.make_temp(glsl_type::uvec4_type, - "tmp_pack_uvec4_to_uint"); + "tmp_pack_uvec4_to_uint"); + + if (op_mask & LOWER_PACK_USE_BFI) { + /* uvec4 u = UVEC4_RVAL; */ + factory.emit(assign(u, uvec4_rval)); + + return bitfield_insert(bitfield_insert( + bitfield_insert( + bit_and(swizzle_x(u), constant(0xffu)), + swizzle_y(u), constant(8), constant(8)), + swizzle_z(u), constant(16), constant(8)), + swizzle_w(u), constant(24), constant(8)); + } + + /* uvec4 u = UVEC4_RVAL & 0xff */ factory.emit(assign(u, bit_and(uvec4_rval, constant(0xffu)))); /* return (u.w << 24) | (u.z << 16) | (u.y << 8) | u.x; */ |