diff options
Diffstat (limited to 'src/compiler')
-rw-r--r-- | src/compiler/glsl/ir_optimization.h | 1 | ||||
-rw-r--r-- | src/compiler/glsl/lower_instructions.cpp | 53 |
2 files changed, 54 insertions, 0 deletions
diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h index ba790762a62..c85600345d0 100644 --- a/src/compiler/glsl/ir_optimization.h +++ b/src/compiler/glsl/ir_optimization.h @@ -42,6 +42,7 @@ #define SAT_TO_CLAMP 0x400 #define DOPS_TO_DFRAC 0x800 #define DFREXP_DLDEXP_TO_ARITH 0x1000 +#define BIT_COUNT_TO_MATH 0x02000 /** * \see class lower_packing_builtins_visitor diff --git a/src/compiler/glsl/lower_instructions.cpp b/src/compiler/glsl/lower_instructions.cpp index 16d92d64cd9..fc87605c43f 100644 --- a/src/compiler/glsl/lower_instructions.cpp +++ b/src/compiler/glsl/lower_instructions.cpp @@ -159,6 +159,7 @@ private: void dround_even_to_dfrac(ir_expression *); void dtrunc_to_dfrac(ir_expression *); void dsign_to_csel(ir_expression *); + void bit_count_to_math(ir_expression *); }; } /* anonymous namespace */ @@ -954,6 +955,52 @@ lower_instructions_visitor::dsign_to_csel(ir_expression *ir) this->progress = true; } +void +lower_instructions_visitor::bit_count_to_math(ir_expression *ir) +{ + /* For more details, see: + * + * http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetPaallel + */ + const unsigned elements = ir->operands[0]->type->vector_elements; + ir_variable *temp = new(ir) ir_variable(glsl_type::uvec(elements), "temp", + ir_var_temporary); + ir_constant *c55555555 = new(ir) ir_constant(0x55555555u); + ir_constant *c33333333 = new(ir) ir_constant(0x33333333u); + ir_constant *c0F0F0F0F = new(ir) ir_constant(0x0F0F0F0Fu); + ir_constant *c01010101 = new(ir) ir_constant(0x01010101u); + ir_constant *c1 = new(ir) ir_constant(1u); + ir_constant *c2 = new(ir) ir_constant(2u); + ir_constant *c4 = new(ir) ir_constant(4u); + ir_constant *c24 = new(ir) ir_constant(24u); + + base_ir->insert_before(temp); + + if (ir->operands[0]->type->base_type == GLSL_TYPE_UINT) { + base_ir->insert_before(assign(temp, ir->operands[0])); + } else { + assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT); + base_ir->insert_before(assign(temp, i2u(ir->operands[0]))); + } + + /* temp = temp - ((temp >> 1) & 0x55555555u); */ + base_ir->insert_before(assign(temp, sub(temp, bit_and(rshift(temp, c1), + c55555555)))); + + /* temp = (temp & 0x33333333u) + ((temp >> 2) & 0x33333333u); */ + base_ir->insert_before(assign(temp, add(bit_and(temp, c33333333), + bit_and(rshift(temp, c2), + c33333333->clone(ir, NULL))))); + + /* int(((temp + (temp >> 4) & 0xF0F0F0Fu) * 0x1010101u) >> 24); */ + ir->operation = ir_unop_u2i; + ir->operands[0] = rshift(mul(bit_and(add(temp, rshift(temp, c4)), c0F0F0F0F), + c01010101), + c24); + + this->progress = true; +} + ir_visitor_status lower_instructions_visitor::visit_leave(ir_expression *ir) { @@ -1055,6 +1102,12 @@ lower_instructions_visitor::visit_leave(ir_expression *ir) if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) dsign_to_csel(ir); break; + + case ir_unop_bit_count: + if (lowering(BIT_COUNT_TO_MATH)) + bit_count_to_math(ir); + break; + default: return visit_continue; } |