summaryrefslogtreecommitdiffstats
path: root/src/compiler
diff options
context:
space:
mode:
authorIan Romanick <[email protected]>2016-06-24 00:11:26 -0700
committerIan Romanick <[email protected]>2016-07-19 12:19:28 -0700
commitad9acb19c39292de220b4d7dfdd2b5673129517a (patch)
tree65416426d798a670b842ce7454a6bddccba14069 /src/compiler
parent3079dcb00c19aa4773d1a1133bd8c4d1a48e4375 (diff)
glsl: Add lowering pass for ir_unop_bitfield_reverse
Signed-off-by: Ian Romanick <[email protected]> Reviewed-by: Matt Turner <[email protected]>
Diffstat (limited to 'src/compiler')
-rw-r--r--src/compiler/glsl/ir_optimization.h1
-rw-r--r--src/compiler/glsl/lower_instructions.cpp91
2 files changed, 92 insertions, 0 deletions
diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h
index 6fda9f62b71..77c1260e689 100644
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -45,6 +45,7 @@
#define BIT_COUNT_TO_MATH 0x02000
#define EXTRACT_TO_SHIFTS 0x04000
#define INSERT_TO_SHIFTS 0x08000
+#define REVERSE_TO_SHIFTS 0x10000
/**
* \see class lower_packing_builtins_visitor
diff --git a/src/compiler/glsl/lower_instructions.cpp b/src/compiler/glsl/lower_instructions.cpp
index 902294397ab..86af49d00b0 100644
--- a/src/compiler/glsl/lower_instructions.cpp
+++ b/src/compiler/glsl/lower_instructions.cpp
@@ -162,6 +162,7 @@ private:
void bit_count_to_math(ir_expression *);
void extract_to_shifts(ir_expression *);
void insert_to_shifts(ir_expression *);
+ void reverse_to_shifts(ir_expression *ir);
};
} /* anonymous namespace */
@@ -1143,6 +1144,92 @@ lower_instructions_visitor::insert_to_shifts(ir_expression *ir)
this->progress = true;
}
+void
+lower_instructions_visitor::reverse_to_shifts(ir_expression *ir)
+{
+ /* For more details, see:
+ *
+ * http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
+ */
+ ir_constant *c1 =
+ new(ir) ir_constant(1u, ir->operands[0]->type->vector_elements);
+ ir_constant *c2 =
+ new(ir) ir_constant(2u, ir->operands[0]->type->vector_elements);
+ ir_constant *c4 =
+ new(ir) ir_constant(4u, ir->operands[0]->type->vector_elements);
+ ir_constant *c8 =
+ new(ir) ir_constant(8u, ir->operands[0]->type->vector_elements);
+ ir_constant *c16 =
+ new(ir) ir_constant(16u, ir->operands[0]->type->vector_elements);
+ ir_constant *c33333333 =
+ new(ir) ir_constant(0x33333333u, ir->operands[0]->type->vector_elements);
+ ir_constant *c55555555 =
+ new(ir) ir_constant(0x55555555u, ir->operands[0]->type->vector_elements);
+ ir_constant *c0F0F0F0F =
+ new(ir) ir_constant(0x0F0F0F0Fu, ir->operands[0]->type->vector_elements);
+ ir_constant *c00FF00FF =
+ new(ir) ir_constant(0x00FF00FFu, ir->operands[0]->type->vector_elements);
+ ir_variable *temp =
+ new(ir) ir_variable(glsl_type::uvec(ir->operands[0]->type->vector_elements),
+ "temp", ir_var_temporary);
+ ir_instruction &i = *base_ir;
+
+ i.insert_before(temp);
+
+ if (ir->operands[0]->type->base_type == GLSL_TYPE_UINT) {
+ i.insert_before(assign(temp, ir->operands[0]));
+ } else {
+ assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT);
+ i.insert_before(assign(temp, i2u(ir->operands[0])));
+ }
+
+ /* Swap odd and even bits.
+ *
+ * temp = ((temp >> 1) & 0x55555555u) | ((temp & 0x55555555u) << 1);
+ */
+ i.insert_before(assign(temp, bit_or(bit_and(rshift(temp, c1), c55555555),
+ lshift(bit_and(temp, c55555555->clone(ir, NULL)),
+ c1->clone(ir, NULL)))));
+ /* Swap consecutive pairs.
+ *
+ * temp = ((temp >> 2) & 0x33333333u) | ((temp & 0x33333333u) << 2);
+ */
+ i.insert_before(assign(temp, bit_or(bit_and(rshift(temp, c2), c33333333),
+ lshift(bit_and(temp, c33333333->clone(ir, NULL)),
+ c2->clone(ir, NULL)))));
+
+ /* Swap nibbles.
+ *
+ * temp = ((temp >> 4) & 0x0F0F0F0Fu) | ((temp & 0x0F0F0F0Fu) << 4);
+ */
+ i.insert_before(assign(temp, bit_or(bit_and(rshift(temp, c4), c0F0F0F0F),
+ lshift(bit_and(temp, c0F0F0F0F->clone(ir, NULL)),
+ c4->clone(ir, NULL)))));
+
+ /* The last step is, basically, bswap. Swap the bytes, then swap the
+ * words. When this code is run through GCC on x86, it does generate a
+ * bswap instruction.
+ *
+ * temp = ((temp >> 8) & 0x00FF00FFu) | ((temp & 0x00FF00FFu) << 8);
+ * temp = ( temp >> 16 ) | ( temp << 16);
+ */
+ i.insert_before(assign(temp, bit_or(bit_and(rshift(temp, c8), c00FF00FF),
+ lshift(bit_and(temp, c00FF00FF->clone(ir, NULL)),
+ c8->clone(ir, NULL)))));
+
+ if (ir->operands[0]->type->base_type == GLSL_TYPE_UINT) {
+ ir->operation = ir_binop_bit_or;
+ ir->operands[0] = rshift(temp, c16);
+ ir->operands[1] = lshift(temp, c16->clone(ir, NULL));
+ } else {
+ ir->operation = ir_unop_u2i;
+ ir->operands[0] = bit_or(rshift(temp, c16),
+ lshift(temp, c16->clone(ir, NULL)));
+ }
+
+ this->progress = true;
+}
+
ir_visitor_status
lower_instructions_visitor::visit_leave(ir_expression *ir)
{
@@ -1260,6 +1347,10 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
insert_to_shifts(ir);
break;
+ case ir_unop_bitfield_reverse:
+ if (lowering(REVERSE_TO_SHIFTS))
+ reverse_to_shifts(ir);
+ break;
default:
return visit_continue;