diff options
author | Ilia Mirkin <[email protected]> | 2015-08-20 20:52:32 -0400 |
---|---|---|
committer | Ilia Mirkin <[email protected]> | 2015-08-28 18:28:04 -0400 |
commit | 889a946a455c54a5a9bca144b2ea2fe66be39274 (patch) | |
tree | 4eddb51b9a62abb5be1217166f04d3c0f3cc8418 | |
parent | c676c432f30158190c260e7f3731ee6667ad4103 (diff) |
glsl: use bitfield_insert instead of and + shift + or for packing
It is fairly tricky to detect the proper conditions for using bitfield
insert, but easy to just use it up front. This removes a lot of
instructions on nvc0 when invoking the packing builtins.
Signed-off-by: Ilia Mirkin <[email protected]>
Reviewed-by: Matt Turner <[email protected]>
-rw-r--r-- | src/glsl/ir_optimization.h | 4 | ||||
-rw-r--r-- | src/glsl/lower_packing_builtins.cpp | 27 | ||||
-rw-r--r-- | src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 3 |
3 files changed, 30 insertions, 4 deletions
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index eef107e5249..b955874df84 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -66,7 +66,9 @@ enum lower_packing_builtins_op { LOWER_UNPACK_SNORM_4x8 = 0x0200, LOWER_PACK_UNORM_4x8 = 0x0400, - LOWER_UNPACK_UNORM_4x8 = 0x0800 + LOWER_UNPACK_UNORM_4x8 = 0x0800, + + LOWER_PACK_USE_BFI = 0x1000, }; bool do_common_optimization(exec_list *ir, bool linked, diff --git a/src/glsl/lower_packing_builtins.cpp b/src/glsl/lower_packing_builtins.cpp index a6fb8a8837e..1d76ebf935f 100644 --- a/src/glsl/lower_packing_builtins.cpp +++ b/src/glsl/lower_packing_builtins.cpp @@ -118,6 +118,7 @@ public: *rvalue = split_unpack_half_2x16(op0); break; case LOWER_PACK_UNPACK_NONE: + case LOWER_PACK_USE_BFI: assert(!"not reached"); break; } @@ -222,9 +223,16 @@ private: /* uvec2 u = UVEC2_RVAL; */ ir_variable *u = factory.make_temp(glsl_type::uvec2_type, - "tmp_pack_uvec2_to_uint"); + "tmp_pack_uvec2_to_uint"); factory.emit(assign(u, uvec2_rval)); + if (op_mask & LOWER_PACK_USE_BFI) { + return bitfield_insert(bit_and(swizzle_x(u), constant(0xffffu)), + swizzle_y(u), + constant(16), + constant(16)); + } + /* return (u.y << 16) | (u.x & 0xffff); */ return bit_or(lshift(swizzle_y(u), constant(16u)), bit_and(swizzle_x(u), constant(0xffffu))); @@ -242,9 +250,22 @@ private: { assert(uvec4_rval->type == glsl_type::uvec4_type); - /* uvec4 u = UVEC4_RVAL; */ ir_variable *u = factory.make_temp(glsl_type::uvec4_type, - "tmp_pack_uvec4_to_uint"); + "tmp_pack_uvec4_to_uint"); + + if (op_mask & LOWER_PACK_USE_BFI) { + /* uvec4 u = UVEC4_RVAL; */ + factory.emit(assign(u, uvec4_rval)); + + return bitfield_insert(bitfield_insert( + bitfield_insert( + bit_and(swizzle_x(u), constant(0xffu)), + swizzle_y(u), constant(8), constant(8)), + swizzle_z(u), constant(16), constant(8)), + swizzle_w(u), constant(24), constant(8)); + } + + /* uvec4 u = UVEC4_RVAL & 0xff */ factory.emit(assign(u, bit_and(uvec4_rval, constant(0xffu)))); /* return (u.w << 24) | (u.z << 16) | (u.y << 8) | u.x; */ diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 695644117ac..7a8c4e1b8fa 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -6019,6 +6019,9 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) LOWER_PACK_HALF_2x16 | LOWER_UNPACK_HALF_2x16; + if (ctx->Extensions.ARB_gpu_shader5) + lower_inst |= LOWER_PACK_USE_BFI; + lower_packing_builtins(ir, lower_inst); } |