diff options
author | Rhys Perry <[email protected]> | 2019-12-03 13:37:49 +0000 |
---|---|---|
committer | Timur Kristóf <[email protected]> | 2019-12-21 12:38:42 +0100 |
commit | 6ff92f3d684747c4597f926766ad264db5875a3c (patch) | |
tree | d138b03383498917df3e14cbe7b67ebf86c67f98 /src | |
parent | 4dd08b710b749d097b5c68f6edd801e6da26903a (diff) |
aco/wave32: fix comparison optimizations
Previously, they weren't done in wave32.
Totals from affected shaders:
SGPRS: 507726 -> 508006 (0.06 %)
VGPRS: 450340 -> 450268 (-0.02 %)
Spilled SGPRs: 298 -> 298 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Code Size: 39689708 -> 39384488 (-0.77 %) bytes
Max Waves: 39631 -> 39636 (0.01 %)
Instructions: 7865919 -> 7793650 (-0.92 %)
Signed-off-by: Rhys Perry <[email protected]>
Reviewed-by: Daniel Schürmann <[email protected]>
Diffstat (limited to 'src')
-rw-r--r-- | src/amd/compiler/aco_optimizer.cpp | 34 |
1 files changed, 16 insertions, 18 deletions
diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index ceecc0f34e4..612497928dd 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -1164,11 +1164,13 @@ Instruction *follow_operand(opt_ctx &ctx, Operand op, bool ignore_uses=false) * s_and_b64(eq(a, a), eq(b, b)) -> v_cmp_o_f32(a, b) */ bool combine_ordering_test(opt_ctx &ctx, aco_ptr<Instruction>& instr) { - if (instr->opcode != aco_opcode::s_or_b64 && instr->opcode != aco_opcode::s_and_b64) + if (instr->definitions[0].regClass() != ctx.program->lane_mask) return false; if (instr->definitions[1].isTemp() && ctx.uses[instr->definitions[1].tempId()]) return false; + bool is_or = instr->opcode == aco_opcode::s_or_b64 || instr->opcode == aco_opcode::s_or_b32; + bool neg[2] = {false, false}; bool abs[2] = {false, false}; bool opsel[2] = {false, false}; @@ -1180,8 +1182,7 @@ bool combine_ordering_test(opt_ctx &ctx, aco_ptr<Instruction>& instr) if (!op_instr[i]) return false; - aco_opcode expected_cmp = instr->opcode == aco_opcode::s_or_b64 ? - aco_opcode::v_cmp_neq_f32 : aco_opcode::v_cmp_eq_f32; + aco_opcode expected_cmp = is_or ? aco_opcode::v_cmp_neq_f32 : aco_opcode::v_cmp_eq_f32; if (op_instr[i]->opcode != expected_cmp) return false; @@ -1213,8 +1214,7 @@ bool combine_ordering_test(opt_ctx &ctx, aco_ptr<Instruction>& instr) decrease_uses(ctx, op_instr[0]); decrease_uses(ctx, op_instr[1]); - aco_opcode new_op = instr->opcode == aco_opcode::s_or_b64 ? - aco_opcode::v_cmp_u_f32 : aco_opcode::v_cmp_o_f32; + aco_opcode new_op = is_or ? aco_opcode::v_cmp_u_f32 : aco_opcode::v_cmp_o_f32; Instruction *new_instr; if (neg[0] || neg[1] || abs[0] || abs[1] || opsel[0] || opsel[1]) { VOP3A_instruction *vop3 = create_instruction<VOP3A_instruction>(new_op, asVOP3(Format::VOPC), 2, 1); @@ -1243,13 +1243,13 @@ bool combine_ordering_test(opt_ctx &ctx, aco_ptr<Instruction>& instr) * s_and_b64(v_cmp_o_f32(a, b), cmp(a, b)) -> get_ordered(cmp)(a, b) */ bool combine_comparison_ordering(opt_ctx &ctx, aco_ptr<Instruction>& instr) { - if (instr->opcode != aco_opcode::s_or_b64 && instr->opcode != aco_opcode::s_and_b64) + if (instr->definitions[0].regClass() != ctx.program->lane_mask) return false; if (instr->definitions[1].isTemp() && ctx.uses[instr->definitions[1].tempId()]) return false; - aco_opcode expected_nan_test = instr->opcode == aco_opcode::s_or_b64 ? - aco_opcode::v_cmp_u_f32 : aco_opcode::v_cmp_o_f32; + bool is_or = instr->opcode == aco_opcode::s_or_b64 || instr->opcode == aco_opcode::s_or_b32; + aco_opcode expected_nan_test = is_or ? aco_opcode::v_cmp_u_f32 : aco_opcode::v_cmp_o_f32; Instruction *nan_test = follow_operand(ctx, instr->operands[0], true); Instruction *cmp = follow_operand(ctx, instr->operands[1], true); @@ -1283,8 +1283,7 @@ bool combine_comparison_ordering(opt_ctx &ctx, aco_ptr<Instruction>& instr) decrease_uses(ctx, nan_test); decrease_uses(ctx, cmp); - aco_opcode new_op = instr->opcode == aco_opcode::s_or_b64 ? - get_unordered(cmp->opcode) : get_ordered(cmp->opcode); + aco_opcode new_op = is_or ? get_unordered(cmp->opcode) : get_ordered(cmp->opcode); Instruction *new_instr; if (cmp->isVOP3()) { VOP3A_instruction *new_vop3 = create_instruction<VOP3A_instruction>(new_op, asVOP3(Format::VOPC), 2, 1); @@ -1314,19 +1313,20 @@ bool combine_comparison_ordering(opt_ctx &ctx, aco_ptr<Instruction>& instr) * s_and_b64(v_cmp_eq_f32(a, a), cmp(a, #b)) and b is not NaN -> get_ordered(cmp)(a, b) */ bool combine_constant_comparison_ordering(opt_ctx &ctx, aco_ptr<Instruction>& instr) { - if (instr->opcode != aco_opcode::s_or_b64 && instr->opcode != aco_opcode::s_and_b64) + if (instr->definitions[0].regClass() != ctx.program->lane_mask) return false; if (instr->definitions[1].isTemp() && ctx.uses[instr->definitions[1].tempId()]) return false; + bool is_or = instr->opcode == aco_opcode::s_or_b64 || instr->opcode == aco_opcode::s_or_b32; + Instruction *nan_test = follow_operand(ctx, instr->operands[0], true); Instruction *cmp = follow_operand(ctx, instr->operands[1], true); if (!nan_test || !cmp) return false; - aco_opcode expected_nan_test = instr->opcode == aco_opcode::s_or_b64 ? - aco_opcode::v_cmp_neq_f32 : aco_opcode::v_cmp_eq_f32; + aco_opcode expected_nan_test = is_or ? aco_opcode::v_cmp_neq_f32 : aco_opcode::v_cmp_eq_f32; if (cmp->opcode == expected_nan_test) std::swap(nan_test, cmp); else if (nan_test->opcode != expected_nan_test) @@ -1379,8 +1379,7 @@ bool combine_constant_comparison_ordering(opt_ctx &ctx, aco_ptr<Instruction>& in decrease_uses(ctx, nan_test); decrease_uses(ctx, cmp); - aco_opcode new_op = instr->opcode == aco_opcode::s_or_b64 ? - get_unordered(cmp->opcode) : get_ordered(cmp->opcode); + aco_opcode new_op = is_or ? get_unordered(cmp->opcode) : get_ordered(cmp->opcode); Instruction *new_instr; if (cmp->isVOP3()) { VOP3A_instruction *new_vop3 = create_instruction<VOP3A_instruction>(new_op, asVOP3(Format::VOPC), 2, 1); @@ -2233,9 +2232,8 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr } else if (instr->opcode == aco_opcode::s_not_b64) { if (combine_inverse_comparison(ctx, instr)) ; else combine_salu_not_bitwise(ctx, instr); - } else if (instr->opcode == aco_opcode::s_and_b32 || instr->opcode == aco_opcode::s_or_b32) { - combine_salu_n2(ctx, instr); - } else if (instr->opcode == aco_opcode::s_and_b64 || instr->opcode == aco_opcode::s_or_b64) { + } else if (instr->opcode == aco_opcode::s_and_b32 || instr->opcode == aco_opcode::s_or_b32 || + instr->opcode == aco_opcode::s_and_b64 || instr->opcode == aco_opcode::s_or_b64) { if (combine_ordering_test(ctx, instr)) ; else if (combine_comparison_ordering(ctx, instr)) ; else if (combine_constant_comparison_ordering(ctx, instr)) ; |