aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIlia Mirkin <[email protected]>2015-08-20 20:52:32 -0400
committerIlia Mirkin <[email protected]>2015-08-28 18:28:04 -0400
commit889a946a455c54a5a9bca144b2ea2fe66be39274 (patch)
tree4eddb51b9a62abb5be1217166f04d3c0f3cc8418
parentc676c432f30158190c260e7f3731ee6667ad4103 (diff)
glsl: use bitfield_insert instead of and + shift + or for packing
It is fairly tricky to detect the proper conditions for using bitfield insert, but easy to just use it up front. This removes a lot of instructions on nvc0 when invoking the packing builtins. Signed-off-by: Ilia Mirkin <[email protected]> Reviewed-by: Matt Turner <[email protected]>
-rw-r--r--src/glsl/ir_optimization.h4
-rw-r--r--src/glsl/lower_packing_builtins.cpp27
-rw-r--r--src/mesa/state_tracker/st_glsl_to_tgsi.cpp3
3 files changed, 30 insertions, 4 deletions
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index eef107e5249..b955874df84 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -66,7 +66,9 @@ enum lower_packing_builtins_op {
LOWER_UNPACK_SNORM_4x8 = 0x0200,
LOWER_PACK_UNORM_4x8 = 0x0400,
- LOWER_UNPACK_UNORM_4x8 = 0x0800
+ LOWER_UNPACK_UNORM_4x8 = 0x0800,
+
+ LOWER_PACK_USE_BFI = 0x1000,
};
bool do_common_optimization(exec_list *ir, bool linked,
diff --git a/src/glsl/lower_packing_builtins.cpp b/src/glsl/lower_packing_builtins.cpp
index a6fb8a8837e..1d76ebf935f 100644
--- a/src/glsl/lower_packing_builtins.cpp
+++ b/src/glsl/lower_packing_builtins.cpp
@@ -118,6 +118,7 @@ public:
*rvalue = split_unpack_half_2x16(op0);
break;
case LOWER_PACK_UNPACK_NONE:
+ case LOWER_PACK_USE_BFI:
assert(!"not reached");
break;
}
@@ -222,9 +223,16 @@ private:
/* uvec2 u = UVEC2_RVAL; */
ir_variable *u = factory.make_temp(glsl_type::uvec2_type,
- "tmp_pack_uvec2_to_uint");
+ "tmp_pack_uvec2_to_uint");
factory.emit(assign(u, uvec2_rval));
+ if (op_mask & LOWER_PACK_USE_BFI) {
+ return bitfield_insert(bit_and(swizzle_x(u), constant(0xffffu)),
+ swizzle_y(u),
+ constant(16),
+ constant(16));
+ }
+
/* return (u.y << 16) | (u.x & 0xffff); */
return bit_or(lshift(swizzle_y(u), constant(16u)),
bit_and(swizzle_x(u), constant(0xffffu)));
@@ -242,9 +250,22 @@ private:
{
assert(uvec4_rval->type == glsl_type::uvec4_type);
- /* uvec4 u = UVEC4_RVAL; */
ir_variable *u = factory.make_temp(glsl_type::uvec4_type,
- "tmp_pack_uvec4_to_uint");
+ "tmp_pack_uvec4_to_uint");
+
+ if (op_mask & LOWER_PACK_USE_BFI) {
+ /* uvec4 u = UVEC4_RVAL; */
+ factory.emit(assign(u, uvec4_rval));
+
+ return bitfield_insert(bitfield_insert(
+ bitfield_insert(
+ bit_and(swizzle_x(u), constant(0xffu)),
+ swizzle_y(u), constant(8), constant(8)),
+ swizzle_z(u), constant(16), constant(8)),
+ swizzle_w(u), constant(24), constant(8));
+ }
+
+ /* uvec4 u = UVEC4_RVAL & 0xff */
factory.emit(assign(u, bit_and(uvec4_rval, constant(0xffu))));
/* return (u.w << 24) | (u.z << 16) | (u.y << 8) | u.x; */
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 695644117ac..7a8c4e1b8fa 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -6019,6 +6019,9 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
LOWER_PACK_HALF_2x16 |
LOWER_UNPACK_HALF_2x16;
+ if (ctx->Extensions.ARB_gpu_shader5)
+ lower_inst |= LOWER_PACK_USE_BFI;
+
lower_packing_builtins(ir, lower_inst);
}