diff options
author | Rhys Perry <[email protected]> | 2020-03-20 16:07:08 +0000 |
---|---|---|
committer | Marge Bot <[email protected]> | 2020-03-24 11:28:55 +0000 |
commit | 43918c9a7fc76b56a521d5eea6a8d2b3fb675a15 (patch) | |
tree | 607cdb80339978651b7e96ab0ffb10558945b769 /src | |
parent | 21ba2bc595402180fa52d793bb1e524663788338 (diff) |
aco: implement 64-bit VGPR constant copies in handle_operands()
64-bit VGPR constant copies can happen because of 64-bit constant copy
propagation. Since this optimization is beneficial and more annoying to
deal with in the optimizer, I've implemented 64-bit VGPR constant copies
in handle_operands().
This also sets copy_operation::size correctly for 64-bit constant copies.
Cc: 20.0 <[email protected]>
Signed-off-by: Rhys Perry <[email protected]>
Reviewed-by: Daniel Schürmann <[email protected]>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4260>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4260>
Diffstat (limited to 'src')
-rw-r--r-- | src/amd/compiler/aco_ir.h | 30 | ||||
-rw-r--r-- | src/amd/compiler/aco_lower_to_hw_instr.cpp | 13 |
2 files changed, 39 insertions, 4 deletions
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index 1eae6c5d0cc..0be646d8b0f 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -472,6 +472,36 @@ public: return isConstant() && constantValue() == cmp; } + constexpr uint64_t constantValue64(bool signext=false) const noexcept + { + if (is64BitConst_) { + if (reg_.reg <= 192) + return reg_.reg - 128; + else if (reg_.reg <= 208) + return 0xFFFFFFFFFFFFFFFF - (reg_.reg - 193); + + switch (reg_.reg) { + case 240: + return 0x3FE0000000000000; + case 241: + return 0xBFE0000000000000; + case 242: + return 0x3FF0000000000000; + case 243: + return 0xBFF0000000000000; + case 244: + return 0x4000000000000000; + case 245: + return 0xC000000000000000; + case 246: + return 0x4010000000000000; + case 247: + return 0xC010000000000000; + } + } + return (signext && (data_.i & 0x80000000u) ? 0xffffffff00000000ull : 0ull) | data_.i; + } + /* Indicates that the killed operand's live range intersects with the * instruction's definitions. Unlike isKill() and isFirstKill(), this is * not set by liveness analysis. */ diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index 240f6c3a373..f42484c502f 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -764,6 +764,11 @@ void handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context* preserve_scc = true; } else if (it->second.size == 2 && it->second.def.getTemp().type() == RegType::sgpr) { bld.sop1(aco_opcode::s_mov_b64, it->second.def, Operand(it->second.op.physReg(), s2)); + } else if (it->second.size == 2 && it->second.op.isConstant()) { + uint64_t val = it->second.op.constantValue64(); + bld.vop1(aco_opcode::v_mov_b32, it->second.def, Operand((uint32_t)val)); + bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg{it->second.def.physReg() + 1}, v1), + Operand((uint32_t)(val >> 32))); } else { bld.copy(it->second.def, it->second.op); } @@ -905,7 +910,7 @@ void lower_to_hw_instr(Program* program) if (op.isConstant()) { const PhysReg reg = PhysReg{instr->definitions[0].physReg() + reg_idx}; const Definition def = Definition(reg, rc_def); - copy_operations[reg] = {op, def, 0, 1}; + copy_operations[reg] = {op, def, 0, op.size()}; reg_idx++; continue; } @@ -932,7 +937,7 @@ void lower_to_hw_instr(Program* program) for (unsigned j = 0; j < k; j++) { Operand op = Operand(PhysReg{instr->operands[0].physReg() + (i*k+j)}, rc_op); Definition def = Definition(PhysReg{instr->definitions[i].physReg() + j}, rc_def); - copy_operations[def.physReg()] = {op, def, 0, 1}; + copy_operations[def.physReg()] = {op, def, 0, op.size()}; } } handle_operands(copy_operations, &ctx, program->chip_class, pi); @@ -947,7 +952,7 @@ void lower_to_hw_instr(Program* program) Operand operand = instr->operands[i]; if (operand.isConstant() || operand.size() == 1) { assert(instr->definitions[i].size() == operand.size()); - copy_operations[instr->definitions[i].physReg()] = {operand, instr->definitions[i], 0, 1}; + copy_operations[instr->definitions[i].physReg()] = {operand, instr->definitions[i], 0, operand.size()}; } else { RegClass def_rc = RegClass(instr->definitions[i].regClass().type(), 1); RegClass op_rc = RegClass(operand.getTemp().type(), 1); @@ -1019,7 +1024,7 @@ void lower_to_hw_instr(Program* program) Operand operand = instr->operands[0]; if (operand.isConstant() || operand.size() == 1) { assert(instr->definitions[0].size() == 1); - copy_operations[instr->definitions[0].physReg()] = {operand, instr->definitions[0], 0, 1}; + copy_operations[instr->definitions[0].physReg()] = {operand, instr->definitions[0], 0, operand.size()}; } else { for (unsigned i = 0; i < operand.size(); i++) { |