summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorIlia Mirkin <[email protected]>2015-08-20 21:55:52 -0400
committerIlia Mirkin <[email protected]>2015-08-28 18:28:04 -0400
commit275c5810ca7e38560b2a77281e7a0498c50126f8 (patch)
tree9096495089118805d0e4d042b7186b91f17f2842 /src
parent889a946a455c54a5a9bca144b2ea2fe66be39274 (diff)
glsl: provide the option of using BFE for unpack builting lowering
This greatly improves generated code, especially for the snorm variants, since it is able to get rid of the lshift/rshift for sext, as well as replacing each shift + mask with a single op. Signed-off-by: Ilia Mirkin <[email protected]> Reviewed-by: Matt Turner <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/glsl/ir_builder.cpp6
-rw-r--r--src/glsl/ir_builder.h1
-rw-r--r--src/glsl/ir_optimization.h1
-rw-r--r--src/glsl/lower_packing_builtins.cpp103
-rw-r--r--src/mesa/state_tracker/st_glsl_to_tgsi.cpp3
5 files changed, 100 insertions, 14 deletions
diff --git a/src/glsl/ir_builder.cpp b/src/glsl/ir_builder.cpp
index cd03859cac0..c9cf1240dfe 100644
--- a/src/glsl/ir_builder.cpp
+++ b/src/glsl/ir_builder.cpp
@@ -567,6 +567,12 @@ csel(operand a, operand b, operand c)
}
ir_expression *
+bitfield_extract(operand a, operand b, operand c)
+{
+ return expr(ir_triop_bitfield_extract, a, b, c);
+}
+
+ir_expression *
bitfield_insert(operand a, operand b, operand c, operand d)
{
void *mem_ctx = ralloc_parent(a.val);
diff --git a/src/glsl/ir_builder.h b/src/glsl/ir_builder.h
index f76453ffcf0..b483ebf6269 100644
--- a/src/glsl/ir_builder.h
+++ b/src/glsl/ir_builder.h
@@ -200,6 +200,7 @@ ir_expression *interpolate_at_sample(operand a, operand b);
ir_expression *fma(operand a, operand b, operand c);
ir_expression *lrp(operand x, operand y, operand a);
ir_expression *csel(operand a, operand b, operand c);
+ir_expression *bitfield_extract(operand a, operand b, operand c);
ir_expression *bitfield_insert(operand a, operand b, operand c, operand d);
ir_swizzle *swizzle(operand a, int swizzle, int components);
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index b955874df84..265b2234cb6 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -69,6 +69,7 @@ enum lower_packing_builtins_op {
LOWER_UNPACK_UNORM_4x8 = 0x0800,
LOWER_PACK_USE_BFI = 0x1000,
+ LOWER_PACK_USE_BFE = 0x2000,
};
bool do_common_optimization(exec_list *ir, bool linked,
diff --git a/src/glsl/lower_packing_builtins.cpp b/src/glsl/lower_packing_builtins.cpp
index 1d76ebf935f..c8bf68be829 100644
--- a/src/glsl/lower_packing_builtins.cpp
+++ b/src/glsl/lower_packing_builtins.cpp
@@ -119,6 +119,7 @@ public:
break;
case LOWER_PACK_UNPACK_NONE:
case LOWER_PACK_USE_BFI:
+ case LOWER_PACK_USE_BFE:
assert(!"not reached");
break;
}
@@ -306,6 +307,39 @@ private:
}
/**
+ * \brief Unpack a uint32 into two int16's.
+ *
+ * Specifically each 16-bit value is sign-extended to the full width of an
+ * int32 on return.
+ */
+ ir_rvalue *
+ unpack_uint_to_ivec2(ir_rvalue *uint_rval)
+ {
+ assert(uint_rval->type == glsl_type::uint_type);
+
+ if (!(op_mask & LOWER_PACK_USE_BFE)) {
+ return rshift(lshift(u2i(unpack_uint_to_uvec2(uint_rval)),
+ constant(16u)),
+ constant(16u));
+ }
+
+ ir_variable *i = factory.make_temp(glsl_type::int_type,
+ "tmp_unpack_uint_to_ivec2_i");
+ factory.emit(assign(i, u2i(uint_rval)));
+
+ /* ivec2 i2; */
+ ir_variable *i2 = factory.make_temp(glsl_type::ivec2_type,
+ "tmp_unpack_uint_to_ivec2_i2");
+
+ factory.emit(assign(i2, bitfield_extract(i, constant(0), constant(16)),
+ WRITEMASK_X));
+ factory.emit(assign(i2, bitfield_extract(i, constant(16), constant(16)),
+ WRITEMASK_Y));
+
+ return deref(i2).val;
+ }
+
+ /**
* \brief Unpack a uint32 into four uint8's.
*
* Interpret the given uint32 as a uint8 4-tuple where the uint32's least
@@ -329,13 +363,23 @@ private:
/* u4.x = u & 0xffu; */
factory.emit(assign(u4, bit_and(u, constant(0xffu)), WRITEMASK_X));
- /* u4.y = (u >> 8u) & 0xffu; */
- factory.emit(assign(u4, bit_and(rshift(u, constant(8u)),
- constant(0xffu)), WRITEMASK_Y));
-
- /* u4.z = (u >> 16u) & 0xffu; */
- factory.emit(assign(u4, bit_and(rshift(u, constant(16u)),
- constant(0xffu)), WRITEMASK_Z));
+ if (op_mask & LOWER_PACK_USE_BFE) {
+ /* u4.y = bitfield_extract(u, 8, 8); */
+ factory.emit(assign(u4, bitfield_extract(u, constant(8), constant(8)),
+ WRITEMASK_Y));
+
+ /* u4.z = bitfield_extract(u, 16, 8); */
+ factory.emit(assign(u4, bitfield_extract(u, constant(16), constant(8)),
+ WRITEMASK_Z));
+ } else {
+ /* u4.y = (u >> 8u) & 0xffu; */
+ factory.emit(assign(u4, bit_and(rshift(u, constant(8u)),
+ constant(0xffu)), WRITEMASK_Y));
+
+ /* u4.z = (u >> 16u) & 0xffu; */
+ factory.emit(assign(u4, bit_and(rshift(u, constant(16u)),
+ constant(0xffu)), WRITEMASK_Z));
+ }
/* u4.w = (u >> 24u) */
factory.emit(assign(u4, rshift(u, constant(24u)), WRITEMASK_W));
@@ -344,6 +388,43 @@ private:
}
/**
+ * \brief Unpack a uint32 into four int8's.
+ *
+ * Specifically each 8-bit value is sign-extended to the full width of an
+ * int32 on return.
+ */
+ ir_rvalue *
+ unpack_uint_to_ivec4(ir_rvalue *uint_rval)
+ {
+ assert(uint_rval->type == glsl_type::uint_type);
+
+ if (!(op_mask & LOWER_PACK_USE_BFE)) {
+ return rshift(lshift(u2i(unpack_uint_to_uvec4(uint_rval)),
+ constant(24u)),
+ constant(24u));
+ }
+
+ ir_variable *i = factory.make_temp(glsl_type::int_type,
+ "tmp_unpack_uint_to_ivec4_i");
+ factory.emit(assign(i, u2i(uint_rval)));
+
+ /* ivec4 i4; */
+ ir_variable *i4 = factory.make_temp(glsl_type::ivec4_type,
+ "tmp_unpack_uint_to_ivec4_i4");
+
+ factory.emit(assign(i4, bitfield_extract(i, constant(0), constant(8)),
+ WRITEMASK_X));
+ factory.emit(assign(i4, bitfield_extract(i, constant(8), constant(8)),
+ WRITEMASK_Y));
+ factory.emit(assign(i4, bitfield_extract(i, constant(16), constant(8)),
+ WRITEMASK_Z));
+ factory.emit(assign(i4, bitfield_extract(i, constant(24), constant(8)),
+ WRITEMASK_W));
+
+ return deref(i4).val;
+ }
+
+ /**
* \brief Lower a packSnorm2x16 expression.
*
* \param vec2_rval is packSnorm2x16's input
@@ -489,9 +570,7 @@ private:
assert(uint_rval->type == glsl_type::uint_type);
ir_rvalue *result =
- clamp(div(i2f(rshift(lshift(u2i(unpack_uint_to_uvec2(uint_rval)),
- constant(16)),
- constant(16u))),
+ clamp(div(i2f(unpack_uint_to_ivec2(uint_rval)),
constant(32767.0f)),
constant(-1.0f),
constant(1.0f));
@@ -548,9 +627,7 @@ private:
assert(uint_rval->type == glsl_type::uint_type);
ir_rvalue *result =
- clamp(div(i2f(rshift(lshift(u2i(unpack_uint_to_uvec4(uint_rval)),
- constant(24u)),
- constant(24u))),
+ clamp(div(i2f(unpack_uint_to_ivec4(uint_rval)),
constant(127.0f)),
constant(-1.0f),
constant(1.0f));
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 7a8c4e1b8fa..95a25c12fb4 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -6020,7 +6020,8 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
LOWER_UNPACK_HALF_2x16;
if (ctx->Extensions.ARB_gpu_shader5)
- lower_inst |= LOWER_PACK_USE_BFI;
+ lower_inst |= LOWER_PACK_USE_BFI |
+ LOWER_PACK_USE_BFE;
lower_packing_builtins(ir, lower_inst);
}