summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRhys Perry <[email protected]>2019-09-12 13:25:18 +0100
committerTimur Kristóf <[email protected]>2019-10-10 09:57:53 +0200
commit619f0a71ccdb079ee0ea77a130e92f2ac4d5a75f (patch)
treeae5e3f4763ab2182e21e83ccff8db1c577b42915
parent6a6bef59b0dd13d790ff0f94745bf02e06b5bb37 (diff)
aco: Use the VOP3-only add/sub GFX10 instructions if needed.
Signed-off-by: Rhys Perry <[email protected]> Reviewed-By: Timur Kristóf <[email protected]> Reviewed-by: Daniel Schürmann <[email protected]>
-rw-r--r--src/amd/compiler/aco_builder_h.py16
1 files changed, 15 insertions, 1 deletions
diff --git a/src/amd/compiler/aco_builder_h.py b/src/amd/compiler/aco_builder_h.py
index 7be3a664c4a..f6fccfec2b2 100644
--- a/src/amd/compiler/aco_builder_h.py
+++ b/src/amd/compiler/aco_builder_h.py
@@ -287,6 +287,8 @@ public:
if (!carry_in.op.isUndefined())
return vop2(aco_opcode::v_addc_co_u32, Definition(dst), hint_vcc(def(s2)), a, b, carry_in);
+ else if (program->chip_class >= GFX10 && carry_out)
+ return vop3(aco_opcode::v_add_co_u32_e64, Definition(dst), def(s2), a, b);
else if (program->chip_class < GFX9 || carry_out)
return vop2(aco_opcode::v_add_co_u32, Definition(dst), hint_vcc(def(s2)), a, b);
else
@@ -314,10 +316,22 @@ public:
} else {
op = reverse ? aco_opcode::v_subrev_u32 : aco_opcode::v_sub_u32;
}
+ bool vop3 = false;
+ if (program->chip_class >= GFX10 && op == aco_opcode::v_subrev_co_u32) {
+ vop3 = true;
+ op = aco_opcode::v_subrev_co_u32_e64;
+ } else if (program->chip_class >= GFX10 && op == aco_opcode::v_sub_co_u32) {
+ vop3 = true;
+ op = aco_opcode::v_sub_co_u32_e64;
+ }
int num_ops = borrow.op.isUndefined() ? 2 : 3;
int num_defs = carry_out ? 2 : 1;
- aco_ptr<Instruction> sub{create_instruction<VOP2_instruction>(op, Format::VOP2, num_ops, num_defs)};
+ aco_ptr<Instruction> sub;
+ if (vop3)
+ sub.reset(create_instruction<VOP3A_instruction>(op, Format::VOP3B, num_ops, num_defs));
+ else
+ sub.reset(create_instruction<VOP2_instruction>(op, Format::VOP2, num_ops, num_defs));
sub->operands[0] = a.op;
sub->operands[1] = b.op;
if (!borrow.op.isUndefined())