glsl: Add lowering pass for ir_unop_bit_count

Signed-off-by: Ian Romanick <[email protected]> Reviewed-by: Matt Turner <[email protected]>
author: Ian Romanick <[email protected]> 2016-06-23 16:16:21 -0700
committer: Ian Romanick <[email protected]> 2016-07-19 12:19:28 -0700
commit: 7340be8a01ca2df2d8578fa93589dbe5dea37e18 (patch)
tree: f18be4ba2f09563efcc22b5ba2880ba4b2e5da1c
parent: 806add360ff2ffbf70c6a52c64115fcb7ef718bd (diff)
2 files changed, 54 insertions, 0 deletions
diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h
index ba790762a62..c85600345d0 100644
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -42,6 +42,7 @@
 #define SAT_TO_CLAMP       0x400
 #define DOPS_TO_DFRAC      0x800
 #define DFREXP_DLDEXP_TO_ARITH    0x1000
+#define BIT_COUNT_TO_MATH         0x02000
 
 /**
  * \see class lower_packing_builtins_visitor
diff --git a/src/compiler/glsl/lower_instructions.cpp b/src/compiler/glsl/lower_instructions.cpp
index 16d92d64cd9..fc87605c43f 100644
--- a/src/compiler/glsl/lower_instructions.cpp
+++ b/src/compiler/glsl/lower_instructions.cpp
@@ -159,6 +159,7 @@ private:
    void dround_even_to_dfrac(ir_expression *);
    void dtrunc_to_dfrac(ir_expression *);
    void dsign_to_csel(ir_expression *);
+   void bit_count_to_math(ir_expression *);
 };
 
 } /* anonymous namespace */
@@ -954,6 +955,52 @@ lower_instructions_visitor::dsign_to_csel(ir_expression *ir)
    this->progress = true;
 }
 
+void
+lower_instructions_visitor::bit_count_to_math(ir_expression *ir)
+{
+   /* For more details, see:
+    *
+    * http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetPaallel
+    */
+   const unsigned elements = ir->operands[0]->type->vector_elements;
+   ir_variable *temp = new(ir) ir_variable(glsl_type::uvec(elements), "temp",
+                                           ir_var_temporary);
+   ir_constant *c55555555 = new(ir) ir_constant(0x55555555u);
+   ir_constant *c33333333 = new(ir) ir_constant(0x33333333u);
+   ir_constant *c0F0F0F0F = new(ir) ir_constant(0x0F0F0F0Fu);
+   ir_constant *c01010101 = new(ir) ir_constant(0x01010101u);
+   ir_constant *c1 = new(ir) ir_constant(1u);
+   ir_constant *c2 = new(ir) ir_constant(2u);
+   ir_constant *c4 = new(ir) ir_constant(4u);
+   ir_constant *c24 = new(ir) ir_constant(24u);
+
+   base_ir->insert_before(temp);
+
+   if (ir->operands[0]->type->base_type == GLSL_TYPE_UINT) {
+      base_ir->insert_before(assign(temp, ir->operands[0]));
+   } else {
+      assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT);
+      base_ir->insert_before(assign(temp, i2u(ir->operands[0])));
+   }
+
+   /* temp = temp - ((temp >> 1) & 0x55555555u); */
+   base_ir->insert_before(assign(temp, sub(temp, bit_and(rshift(temp, c1),
+                                                         c55555555))));
+
+   /* temp = (temp & 0x33333333u) + ((temp >> 2) & 0x33333333u); */
+   base_ir->insert_before(assign(temp, add(bit_and(temp, c33333333),
+                                           bit_and(rshift(temp, c2),
+                                                   c33333333->clone(ir, NULL)))));
+
+   /* int(((temp + (temp >> 4) & 0xF0F0F0Fu) * 0x1010101u) >> 24); */
+   ir->operation = ir_unop_u2i;
+   ir->operands[0] = rshift(mul(bit_and(add(temp, rshift(temp, c4)), c0F0F0F0F),
+                                c01010101),
+                            c24);
+
+   this->progress = true;
+}
+
 ir_visitor_status
 lower_instructions_visitor::visit_leave(ir_expression *ir)
 {
@@ -1055,6 +1102,12 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
       if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
          dsign_to_csel(ir);
       break;
+
+   case ir_unop_bit_count:
+      if (lowering(BIT_COUNT_TO_MATH))
+         bit_count_to_math(ir);
+      break;
+
    default:
       return visit_continue;
    }
author	Ian Romanick <[email protected]>	2016-06-23 16:16:21 -0700
committer	Ian Romanick <[email protected]>	2016-07-19 12:19:28 -0700
commit	7340be8a01ca2df2d8578fa93589dbe5dea37e18 (patch)
tree	f18be4ba2f09563efcc22b5ba2880ba4b2e5da1c
parent	806add360ff2ffbf70c6a52c64115fcb7ef718bd (diff)