diff options
author | Sagar Ghuge <[email protected]> | 2019-02-27 14:02:54 -0800 |
---|---|---|
committer | Sagar Ghuge <[email protected]> | 2019-03-04 15:50:25 -0800 |
commit | 1d8994a63b546a5b2dc4feb5bd98a84ee853d6af (patch) | |
tree | 6ff1fb8b5cca7261fb887e3bc9dfe773121e2352 /src/compiler/glsl/lower_instructions.cpp | |
parent | e551040c602d392019e68f54d9a3a310d2a937a3 (diff) |
glsl: [u/i]mulExtended optimization for GLSL
Optimize mulExtended to use 32x32->64 multiplication.
Drivers which are not based on NIR, they can set the
MUL64_TO_MUL_AND_MUL_HIGH lowering flag in order to have same old
behavior.
v2: Add missing condition check (Jason Ekstrand)
Signed-off-by: Sagar Ghuge <[email protected]>
Suggested-by: Matt Turner <Matt Turner <[email protected]>
Suggested-by: Jason Ekstrand <[email protected]>
Reviewed-by: Jason Ekstrand <[email protected]>
Diffstat (limited to 'src/compiler/glsl/lower_instructions.cpp')
-rw-r--r-- | src/compiler/glsl/lower_instructions.cpp | 70 |
1 files changed, 70 insertions, 0 deletions
diff --git a/src/compiler/glsl/lower_instructions.cpp b/src/compiler/glsl/lower_instructions.cpp index 91f71b37619..8e0c8744048 100644 --- a/src/compiler/glsl/lower_instructions.cpp +++ b/src/compiler/glsl/lower_instructions.cpp @@ -169,6 +169,7 @@ private: void find_msb_to_float_cast(ir_expression *ir); void imul_high_to_mul(ir_expression *ir); void sqrt_to_abs_sqrt(ir_expression *ir); + void mul64_to_mul_and_mul_high(ir_expression *ir); ir_expression *_carry(operand a, operand b); }; @@ -1666,6 +1667,66 @@ lower_instructions_visitor::sqrt_to_abs_sqrt(ir_expression *ir) this->progress = true; } +void +lower_instructions_visitor::mul64_to_mul_and_mul_high(ir_expression *ir) +{ + /* Lower 32x32-> 64 to + * msb = imul_high(x_lo, y_lo) + * lsb = mul(x_lo, y_lo) + */ + const unsigned elements = ir->operands[0]->type->vector_elements; + + const ir_expression_operation operation = + ir->type->base_type == GLSL_TYPE_UINT64 ? ir_unop_pack_uint_2x32 + : ir_unop_pack_int_2x32; + + const glsl_type *var_type = ir->type->base_type == GLSL_TYPE_UINT64 + ? glsl_type::uvec(elements) + : glsl_type::ivec(elements); + + const glsl_type *ret_type = ir->type->base_type == GLSL_TYPE_UINT64 + ? glsl_type::uvec2_type + : glsl_type::ivec2_type; + + ir_instruction &i = *base_ir; + + ir_variable *msb = + new(ir) ir_variable(var_type, "msb", ir_var_temporary); + ir_variable *lsb = + new(ir) ir_variable(var_type, "lsb", ir_var_temporary); + ir_variable *x = + new(ir) ir_variable(var_type, "x", ir_var_temporary); + ir_variable *y = + new(ir) ir_variable(var_type, "y", ir_var_temporary); + + i.insert_before(x); + i.insert_before(assign(x, ir->operands[0])); + i.insert_before(y); + i.insert_before(assign(y, ir->operands[1])); + i.insert_before(msb); + i.insert_before(lsb); + + i.insert_before(assign(msb, imul_high(x, y))); + i.insert_before(assign(lsb, mul(x, y))); + + ir_rvalue *result[4] = {NULL}; + for (unsigned elem = 0; elem < elements; elem++) { + ir_rvalue *val = new(ir) ir_expression(ir_quadop_vector, ret_type, + swizzle(lsb, elem, 1), + swizzle(msb, elem, 1), NULL, NULL); + result[elem] = expr(operation, val); + } + + ir->operation = ir_quadop_vector; + ir->init_num_operands(); + ir->operands[0] = result[0]; + ir->operands[1] = result[1]; + ir->operands[2] = result[2]; + ir->operands[3] = result[3]; + + this->progress = true; +} + ir_visitor_status lower_instructions_visitor::visit_leave(ir_expression *ir) { @@ -1803,6 +1864,15 @@ lower_instructions_visitor::visit_leave(ir_expression *ir) imul_high_to_mul(ir); break; + case ir_binop_mul: + if (lowering(MUL64_TO_MUL_AND_MUL_HIGH) && + (ir->type->base_type == GLSL_TYPE_INT64 || + ir->type->base_type == GLSL_TYPE_UINT64) && + (ir->operands[0]->type->base_type == GLSL_TYPE_INT || + ir->operands[1]->type->base_type == GLSL_TYPE_UINT)) + mul64_to_mul_and_mul_high(ir); + break; + case ir_unop_rsq: case ir_unop_sqrt: if (lowering(SQRT_TO_ABS_SQRT)) |