summaryrefslogtreecommitdiffstats
path: root/src/compiler
diff options
context:
space:
mode:
authorIan Romanick <[email protected]>2016-06-24 01:17:23 -0700
committerIan Romanick <[email protected]>2016-07-19 12:19:29 -0700
commit2a381a3c73be2f2df06f3feee708bf928645cf63 (patch)
tree3f1cc2c1ddd5d4d72c1e9bd483e843395916ec0a /src/compiler
parentad9acb19c39292de220b4d7dfdd2b5673129517a (diff)
glsl: Add lowering pass for ir_unop_find_lsb
Signed-off-by: Ian Romanick <[email protected]> Reviewed-by: Matt Turner <[email protected]>
Diffstat (limited to 'src/compiler')
-rw-r--r--src/compiler/glsl/ir_optimization.h1
-rw-r--r--src/compiler/glsl/lower_instructions.cpp86
2 files changed, 87 insertions, 0 deletions
diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h
index 77c1260e689..abc326b2646 100644
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -46,6 +46,7 @@
#define EXTRACT_TO_SHIFTS 0x04000
#define INSERT_TO_SHIFTS 0x08000
#define REVERSE_TO_SHIFTS 0x10000
+#define FIND_LSB_TO_FLOAT_CAST 0x20000
/**
* \see class lower_packing_builtins_visitor
diff --git a/src/compiler/glsl/lower_instructions.cpp b/src/compiler/glsl/lower_instructions.cpp
index 86af49d00b0..cfcc4100e98 100644
--- a/src/compiler/glsl/lower_instructions.cpp
+++ b/src/compiler/glsl/lower_instructions.cpp
@@ -163,6 +163,7 @@ private:
void extract_to_shifts(ir_expression *);
void insert_to_shifts(ir_expression *);
void reverse_to_shifts(ir_expression *ir);
+ void find_lsb_to_float_cast(ir_expression *ir);
};
} /* anonymous namespace */
@@ -1230,6 +1231,86 @@ lower_instructions_visitor::reverse_to_shifts(ir_expression *ir)
this->progress = true;
}
+void
+lower_instructions_visitor::find_lsb_to_float_cast(ir_expression *ir)
+{
+ /* For more details, see:
+ *
+ * http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightFloatCast
+ */
+ const unsigned elements = ir->operands[0]->type->vector_elements;
+ ir_constant *c0 = new(ir) ir_constant(unsigned(0), elements);
+ ir_constant *cminus1 = new(ir) ir_constant(int(-1), elements);
+ ir_constant *c23 = new(ir) ir_constant(int(23), elements);
+ ir_constant *c7F = new(ir) ir_constant(int(0x7F), elements);
+ ir_variable *temp =
+ new(ir) ir_variable(glsl_type::ivec(elements), "temp", ir_var_temporary);
+ ir_variable *lsb_only =
+ new(ir) ir_variable(glsl_type::uvec(elements), "lsb_only", ir_var_temporary);
+ ir_variable *as_float =
+ new(ir) ir_variable(glsl_type::vec(elements), "as_float", ir_var_temporary);
+ ir_variable *lsb =
+ new(ir) ir_variable(glsl_type::ivec(elements), "lsb", ir_var_temporary);
+
+ ir_instruction &i = *base_ir;
+
+ i.insert_before(temp);
+
+ if (ir->operands[0]->type->base_type == GLSL_TYPE_INT) {
+ i.insert_before(assign(temp, ir->operands[0]));
+ } else {
+ assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT);
+ i.insert_before(assign(temp, u2i(ir->operands[0])));
+ }
+
+ /* The int-to-float conversion is lossless because (value & -value) is
+ * either a power of two or zero. We don't use the result in the zero
+ * case. The uint() cast is necessary so that 0x80000000 does not
+ * generate a negative value.
+ *
+ * uint lsb_only = uint(value & -value);
+ * float as_float = float(lsb_only);
+ */
+ i.insert_before(lsb_only);
+ i.insert_before(assign(lsb_only, i2u(bit_and(temp, neg(temp)))));
+
+ i.insert_before(as_float);
+ i.insert_before(assign(as_float, u2f(lsb_only)));
+
+ /* This is basically an open-coded frexp. Implementations that have a
+ * native frexp instruction would be better served by that. This is
+ * optimized versus a full-featured open-coded implementation in two ways:
+ *
+ * - We don't care about a correct result from subnormal numbers (including
+ * 0.0), so the raw exponent can always be safely unbiased.
+ *
+ * - The value cannot be negative, so it does not need to be masked off to
+ * extract the exponent.
+ *
+ * int lsb = (floatBitsToInt(as_float) >> 23) - 0x7f;
+ */
+ i.insert_before(lsb);
+ i.insert_before(assign(lsb, sub(rshift(bitcast_f2i(as_float), c23), c7F)));
+
+ /* Use lsb_only in the comparison instead of temp so that the & (far above)
+ * can possibly generate the result without an explicit comparison.
+ *
+ * (lsb_only == 0) ? -1 : lsb;
+ *
+ * Since our input values are all integers, the unbiased exponent must not
+ * be negative. It will only be negative (-0x7f, in fact) if lsb_only is
+ * 0. Instead of using (lsb_only == 0), we could use (lsb >= 0). Which is
+ * better is likely GPU dependent. Either way, the difference should be
+ * small.
+ */
+ ir->operation = ir_triop_csel;
+ ir->operands[0] = equal(lsb_only, c0);
+ ir->operands[1] = cminus1;
+ ir->operands[2] = new(ir) ir_dereference_variable(lsb);
+
+ this->progress = true;
+}
+
ir_visitor_status
lower_instructions_visitor::visit_leave(ir_expression *ir)
{
@@ -1352,6 +1433,11 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
reverse_to_shifts(ir);
break;
+ case ir_unop_find_lsb:
+ if (lowering(FIND_LSB_TO_FLOAT_CAST))
+ find_lsb_to_float_cast(ir);
+ break;
+
default:
return visit_continue;
}