summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIan Romanick <[email protected]>2016-06-24 01:53:33 -0700
committerIan Romanick <[email protected]>2016-07-19 12:19:29 -0700
commit1b5477668a7b1c448f64a69fbb488bcf47c3e029 (patch)
tree5239a7b7631dffa326531c40a8499be759eae3a9
parent2a381a3c73be2f2df06f3feee708bf928645cf63 (diff)
glsl: Add lowering pass for ir_unop_find_msb
Signed-off-by: Ian Romanick <[email protected]> Reviewed-by: Matt Turner <[email protected]>
-rw-r--r--src/compiler/glsl/ir_optimization.h1
-rw-r--r--src/compiler/glsl/lower_instructions.cpp106
2 files changed, 107 insertions, 0 deletions
diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h
index abc326b2646..2c7e6155ecb 100644
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -47,6 +47,7 @@
#define INSERT_TO_SHIFTS 0x08000
#define REVERSE_TO_SHIFTS 0x10000
#define FIND_LSB_TO_FLOAT_CAST 0x20000
+#define FIND_MSB_TO_FLOAT_CAST 0x40000
/**
* \see class lower_packing_builtins_visitor
diff --git a/src/compiler/glsl/lower_instructions.cpp b/src/compiler/glsl/lower_instructions.cpp
index cfcc4100e98..4be60273e0d 100644
--- a/src/compiler/glsl/lower_instructions.cpp
+++ b/src/compiler/glsl/lower_instructions.cpp
@@ -164,6 +164,7 @@ private:
void insert_to_shifts(ir_expression *);
void reverse_to_shifts(ir_expression *ir);
void find_lsb_to_float_cast(ir_expression *ir);
+ void find_msb_to_float_cast(ir_expression *ir);
};
} /* anonymous namespace */
@@ -1311,6 +1312,106 @@ lower_instructions_visitor::find_lsb_to_float_cast(ir_expression *ir)
this->progress = true;
}
+void
+lower_instructions_visitor::find_msb_to_float_cast(ir_expression *ir)
+{
+ /* For more details, see:
+ *
+ * http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightFloatCast
+ */
+ const unsigned elements = ir->operands[0]->type->vector_elements;
+ ir_constant *c0 = new(ir) ir_constant(int(0), elements);
+ ir_constant *cminus1 = new(ir) ir_constant(int(-1), elements);
+ ir_constant *c23 = new(ir) ir_constant(int(23), elements);
+ ir_constant *c7F = new(ir) ir_constant(int(0x7F), elements);
+ ir_constant *c000000FF = new(ir) ir_constant(0x000000FFu, elements);
+ ir_constant *cFFFFFF00 = new(ir) ir_constant(0xFFFFFF00u, elements);
+ ir_variable *temp =
+ new(ir) ir_variable(glsl_type::uvec(elements), "temp", ir_var_temporary);
+ ir_variable *as_float =
+ new(ir) ir_variable(glsl_type::vec(elements), "as_float", ir_var_temporary);
+ ir_variable *msb =
+ new(ir) ir_variable(glsl_type::ivec(elements), "msb", ir_var_temporary);
+
+ ir_instruction &i = *base_ir;
+
+ i.insert_before(temp);
+
+ if (ir->operands[0]->type->base_type == GLSL_TYPE_UINT) {
+ i.insert_before(assign(temp, ir->operands[0]));
+ } else {
+ assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT);
+
+ /* findMSB(uint(abs(some_int))) almost always does the right thing.
+ * There are two problem values:
+ *
+ * * 0x80000000. Since abs(0x80000000) == 0x80000000, findMSB returns
+ * 31. However, findMSB(int(0x80000000)) == 30.
+ *
+ * * 0xffffffff. Since abs(0xffffffff) == 1, findMSB returns
+ * 31. Section 8.8 (Integer Functions) of the GLSL 4.50 spec says:
+ *
+ * For a value of zero or negative one, -1 will be returned.
+ *
+ * For all negative number cases, including 0x80000000 and 0xffffffff,
+ * the correct value is obtained from findMSB if instead of negating the
+ * (already negative) value the logical-not is used. A conditonal
+ * logical-not can be achieved in two instructions.
+ */
+ ir_variable *as_int =
+ new(ir) ir_variable(glsl_type::ivec(elements), "as_int", ir_var_temporary);
+ ir_constant *c31 = new(ir) ir_constant(int(31), elements);
+
+ i.insert_before(as_int);
+ i.insert_before(assign(as_int, ir->operands[0]));
+ i.insert_before(assign(temp, i2u(expr(ir_binop_bit_xor,
+ as_int,
+ rshift(as_int, c31)))));
+ }
+
+ /* The int-to-float conversion is lossless because bits are conditionally
+ * masked off the bottom of temp to ensure the value has at most 24 bits of
+ * data or is zero. We don't use the result in the zero case. The uint()
+ * cast is necessary so that 0x80000000 does not generate a negative value.
+ *
+ * float as_float = float(temp > 255 ? temp & ~255 : temp);
+ */
+ i.insert_before(as_float);
+ i.insert_before(assign(as_float, u2f(csel(greater(temp, c000000FF),
+ bit_and(temp, cFFFFFF00),
+ temp))));
+
+ /* This is basically an open-coded frexp. Implementations that have a
+ * native frexp instruction would be better served by that. This is
+ * optimized versus a full-featured open-coded implementation in two ways:
+ *
+ * - We don't care about a correct result from subnormal numbers (including
+ * 0.0), so the raw exponent can always be safely unbiased.
+ *
+ * - The value cannot be negative, so it does not need to be masked off to
+ * extract the exponent.
+ *
+ * int msb = (floatBitsToInt(as_float) >> 23) - 0x7f;
+ */
+ i.insert_before(msb);
+ i.insert_before(assign(msb, sub(rshift(bitcast_f2i(as_float), c23), c7F)));
+
+ /* Use msb in the comparison instead of temp so that the subtract can
+ * possibly generate the result without an explicit comparison.
+ *
+ * (msb < 0) ? -1 : msb;
+ *
+ * Since our input values are all integers, the unbiased exponent must not
+ * be negative. It will only be negative (-0x7f, in fact) if temp is 0.
+ */
+ ir->operation = ir_triop_csel;
+ ir->operands[0] = less(msb, c0);
+ ir->operands[1] = cminus1;
+ ir->operands[2] = new(ir) ir_dereference_variable(msb);
+
+ this->progress = true;
+}
+
ir_visitor_status
lower_instructions_visitor::visit_leave(ir_expression *ir)
{
@@ -1438,6 +1539,11 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
find_lsb_to_float_cast(ir);
break;
+ case ir_unop_find_msb:
+ if (lowering(FIND_MSB_TO_FLOAT_CAST))
+ find_msb_to_float_cast(ir);
+ break;
+
default:
return visit_continue;
}