diff options
author | Ian Romanick <[email protected]> | 2016-06-24 00:11:26 -0700 |
---|---|---|
committer | Ian Romanick <[email protected]> | 2016-07-19 12:19:28 -0700 |
commit | ad9acb19c39292de220b4d7dfdd2b5673129517a (patch) | |
tree | 65416426d798a670b842ce7454a6bddccba14069 /src/compiler/glsl | |
parent | 3079dcb00c19aa4773d1a1133bd8c4d1a48e4375 (diff) |
glsl: Add lowering pass for ir_unop_bitfield_reverse
Signed-off-by: Ian Romanick <[email protected]>
Reviewed-by: Matt Turner <[email protected]>
Diffstat (limited to 'src/compiler/glsl')
-rw-r--r-- | src/compiler/glsl/ir_optimization.h | 1 | ||||
-rw-r--r-- | src/compiler/glsl/lower_instructions.cpp | 91 |
2 files changed, 92 insertions, 0 deletions
diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h index 6fda9f62b71..77c1260e689 100644 --- a/src/compiler/glsl/ir_optimization.h +++ b/src/compiler/glsl/ir_optimization.h @@ -45,6 +45,7 @@ #define BIT_COUNT_TO_MATH 0x02000 #define EXTRACT_TO_SHIFTS 0x04000 #define INSERT_TO_SHIFTS 0x08000 +#define REVERSE_TO_SHIFTS 0x10000 /** * \see class lower_packing_builtins_visitor diff --git a/src/compiler/glsl/lower_instructions.cpp b/src/compiler/glsl/lower_instructions.cpp index 902294397ab..86af49d00b0 100644 --- a/src/compiler/glsl/lower_instructions.cpp +++ b/src/compiler/glsl/lower_instructions.cpp @@ -162,6 +162,7 @@ private: void bit_count_to_math(ir_expression *); void extract_to_shifts(ir_expression *); void insert_to_shifts(ir_expression *); + void reverse_to_shifts(ir_expression *ir); }; } /* anonymous namespace */ @@ -1143,6 +1144,92 @@ lower_instructions_visitor::insert_to_shifts(ir_expression *ir) this->progress = true; } +void +lower_instructions_visitor::reverse_to_shifts(ir_expression *ir) +{ + /* For more details, see: + * + * http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel + */ + ir_constant *c1 = + new(ir) ir_constant(1u, ir->operands[0]->type->vector_elements); + ir_constant *c2 = + new(ir) ir_constant(2u, ir->operands[0]->type->vector_elements); + ir_constant *c4 = + new(ir) ir_constant(4u, ir->operands[0]->type->vector_elements); + ir_constant *c8 = + new(ir) ir_constant(8u, ir->operands[0]->type->vector_elements); + ir_constant *c16 = + new(ir) ir_constant(16u, ir->operands[0]->type->vector_elements); + ir_constant *c33333333 = + new(ir) ir_constant(0x33333333u, ir->operands[0]->type->vector_elements); + ir_constant *c55555555 = + new(ir) ir_constant(0x55555555u, ir->operands[0]->type->vector_elements); + ir_constant *c0F0F0F0F = + new(ir) ir_constant(0x0F0F0F0Fu, ir->operands[0]->type->vector_elements); + ir_constant *c00FF00FF = + new(ir) ir_constant(0x00FF00FFu, ir->operands[0]->type->vector_elements); + ir_variable *temp = + new(ir) ir_variable(glsl_type::uvec(ir->operands[0]->type->vector_elements), + "temp", ir_var_temporary); + ir_instruction &i = *base_ir; + + i.insert_before(temp); + + if (ir->operands[0]->type->base_type == GLSL_TYPE_UINT) { + i.insert_before(assign(temp, ir->operands[0])); + } else { + assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT); + i.insert_before(assign(temp, i2u(ir->operands[0]))); + } + + /* Swap odd and even bits. + * + * temp = ((temp >> 1) & 0x55555555u) | ((temp & 0x55555555u) << 1); + */ + i.insert_before(assign(temp, bit_or(bit_and(rshift(temp, c1), c55555555), + lshift(bit_and(temp, c55555555->clone(ir, NULL)), + c1->clone(ir, NULL))))); + /* Swap consecutive pairs. + * + * temp = ((temp >> 2) & 0x33333333u) | ((temp & 0x33333333u) << 2); + */ + i.insert_before(assign(temp, bit_or(bit_and(rshift(temp, c2), c33333333), + lshift(bit_and(temp, c33333333->clone(ir, NULL)), + c2->clone(ir, NULL))))); + + /* Swap nibbles. + * + * temp = ((temp >> 4) & 0x0F0F0F0Fu) | ((temp & 0x0F0F0F0Fu) << 4); + */ + i.insert_before(assign(temp, bit_or(bit_and(rshift(temp, c4), c0F0F0F0F), + lshift(bit_and(temp, c0F0F0F0F->clone(ir, NULL)), + c4->clone(ir, NULL))))); + + /* The last step is, basically, bswap. Swap the bytes, then swap the + * words. When this code is run through GCC on x86, it does generate a + * bswap instruction. + * + * temp = ((temp >> 8) & 0x00FF00FFu) | ((temp & 0x00FF00FFu) << 8); + * temp = ( temp >> 16 ) | ( temp << 16); + */ + i.insert_before(assign(temp, bit_or(bit_and(rshift(temp, c8), c00FF00FF), + lshift(bit_and(temp, c00FF00FF->clone(ir, NULL)), + c8->clone(ir, NULL))))); + + if (ir->operands[0]->type->base_type == GLSL_TYPE_UINT) { + ir->operation = ir_binop_bit_or; + ir->operands[0] = rshift(temp, c16); + ir->operands[1] = lshift(temp, c16->clone(ir, NULL)); + } else { + ir->operation = ir_unop_u2i; + ir->operands[0] = bit_or(rshift(temp, c16), + lshift(temp, c16->clone(ir, NULL))); + } + + this->progress = true; +} + ir_visitor_status lower_instructions_visitor::visit_leave(ir_expression *ir) { @@ -1260,6 +1347,10 @@ lower_instructions_visitor::visit_leave(ir_expression *ir) insert_to_shifts(ir); break; + case ir_unop_bitfield_reverse: + if (lowering(REVERSE_TO_SHIFTS)) + reverse_to_shifts(ir); + break; default: return visit_continue; |