summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTimur Kristóf <[email protected]>2019-11-13 11:14:51 +0100
committerTimur Kristóf <[email protected]>2020-01-14 21:21:06 +0100
commitd962bbd89528c540b86bd61d19677bda4d352ebe (patch)
tree86e1c24fdc42a299d69e80257cb6602254b98cfa
parent6bd9c4dc570c062e9160807fdd5ed888af22fced (diff)
aco: Implement 64-bit constant propagation.
Signed-off-by: Timur Kristóf <[email protected]> Reviewed-by: Daniel Schürmann <[email protected]>
-rw-r--r--src/amd/compiler/aco_ir.h39
-rw-r--r--src/amd/compiler/aco_lower_bool_phis.cpp4
-rw-r--r--src/amd/compiler/aco_lower_to_hw_instr.cpp2
-rw-r--r--src/amd/compiler/aco_optimizer.cpp38
4 files changed, 58 insertions, 25 deletions
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index 5426dc05fb1..4239e5ffaf8 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -300,10 +300,11 @@ public:
setFixed(PhysReg{128});
}
};
- explicit Operand(uint32_t v) noexcept
+ explicit Operand(uint32_t v, bool is64bit = false) noexcept
{
data_.i = v;
isConstant_ = true;
+ is64BitConst_ = is64bit;
if (v <= 64)
setFixed(PhysReg{128 + v});
else if (v >= 0xFFFFFFF0) /* [-16 .. -1] */
@@ -324,34 +325,46 @@ public:
setFixed(PhysReg{246});
else if (v == 0xc0800000) /* -4.0 */
setFixed(PhysReg{247});
- else /* Literal Constant */
+ else { /* Literal Constant */
+ assert(!is64bit && "attempt to create a 64-bit literal constant");
setFixed(PhysReg{255});
+ }
};
explicit Operand(uint64_t v) noexcept
{
isConstant_ = true;
is64BitConst_ = true;
- if (v <= 64)
+ if (v <= 64) {
+ data_.i = (uint32_t) v;
setFixed(PhysReg{128 + (uint32_t) v});
- else if (v >= 0xFFFFFFFFFFFFFFF0) /* [-16 .. -1] */
+ } else if (v >= 0xFFFFFFFFFFFFFFF0) { /* [-16 .. -1] */
+ data_.i = (uint32_t) v;
setFixed(PhysReg{192 - (uint32_t) v});
- else if (v == 0x3FE0000000000000) /* 0.5 */
+ } else if (v == 0x3FE0000000000000) { /* 0.5 */
+ data_.i = 0x3f000000;
setFixed(PhysReg{240});
- else if (v == 0xBFE0000000000000) /* -0.5 */
+ } else if (v == 0xBFE0000000000000) { /* -0.5 */
+ data_.i = 0xbf000000;
setFixed(PhysReg{241});
- else if (v == 0x3FF0000000000000) /* 1.0 */
+ } else if (v == 0x3FF0000000000000) { /* 1.0 */
+ data_.i = 0x3f800000;
setFixed(PhysReg{242});
- else if (v == 0xBFF0000000000000) /* -1.0 */
+ } else if (v == 0xBFF0000000000000) { /* -1.0 */
+ data_.i = 0xbf800000;
setFixed(PhysReg{243});
- else if (v == 0x4000000000000000) /* 2.0 */
+ } else if (v == 0x4000000000000000) { /* 2.0 */
+ data_.i = 0x40000000;
setFixed(PhysReg{244});
- else if (v == 0xC000000000000000) /* -2.0 */
+ } else if (v == 0xC000000000000000) { /* -2.0 */
+ data_.i = 0xc0000000;
setFixed(PhysReg{245});
- else if (v == 0x4010000000000000) /* 4.0 */
+ } else if (v == 0x4010000000000000) { /* 4.0 */
+ data_.i = 0x40800000;
setFixed(PhysReg{246});
- else if (v == 0xC010000000000000) /* -4.0 */
+ } else if (v == 0xC010000000000000) { /* -4.0 */
+ data_.i = 0xc0800000;
setFixed(PhysReg{247});
- else { /* Literal Constant: we don't know if it is a long or double.*/
+ } else { /* Literal Constant: we don't know if it is a long or double.*/
isConstant_ = 0;
assert(false && "attempt to create a 64-bit literal constant");
}
diff --git a/src/amd/compiler/aco_lower_bool_phis.cpp b/src/amd/compiler/aco_lower_bool_phis.cpp
index 988f753c82d..e5e27d70971 100644
--- a/src/amd/compiler/aco_lower_bool_phis.cpp
+++ b/src/amd/compiler/aco_lower_bool_phis.cpp
@@ -54,7 +54,7 @@ Operand get_ssa(Program *program, unsigned block_idx, ssa_state *state)
while (true) {
auto pos = state->latest.find(block_idx);
if (pos != state->latest.end())
- return Operand({pos->second, program->lane_mask});
+ return Operand(Temp(pos->second, program->lane_mask));
Block& block = program->blocks[block_idx];
size_t pred = block.linear_preds.size();
@@ -78,7 +78,7 @@ Operand get_ssa(Program *program, unsigned block_idx, ssa_state *state)
phi->definitions[0] = Definition(Temp{res, program->lane_mask});
block.instructions.emplace(block.instructions.begin(), std::move(phi));
- return Operand({res, program->lane_mask});
+ return Operand(Temp(res, program->lane_mask));
}
}
}
diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp
index 7c304aa7501..c555ccdfa85 100644
--- a/src/amd/compiler/aco_lower_to_hw_instr.cpp
+++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp
@@ -962,7 +962,7 @@ void lower_to_hw_instr(Program* program)
{
Operand operand = instr->operands[i];
if (operand.isConstant() || operand.size() == 1) {
- assert(instr->definitions[i].size() == 1);
+ assert(instr->definitions[i].size() == operand.size());
copy_operations[instr->definitions[i].physReg()] = {operand, instr->definitions[i], 0, 1};
} else {
RegClass def_rc = RegClass(instr->definitions[i].regClass().type(), 1);
diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp
index 77cdf3b04a9..bf075b6ffab 100644
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -82,11 +82,12 @@ enum Label {
label_minmax = 1 << 19,
label_fcmp = 1 << 20,
label_uniform_bool = 1 << 21,
+ label_constant_64bit = 1 << 22,
};
static constexpr uint32_t instr_labels = label_vec | label_mul | label_mad | label_omod_success | label_clamp_success | label_add_sub | label_bitwise | label_minmax | label_fcmp;
static constexpr uint32_t temp_labels = label_abs | label_neg | label_temp | label_vcc | label_b2f | label_uniform_bool | label_omod2 | label_omod4 | label_omod5 | label_clamp;
-static constexpr uint32_t val_labels = label_constant | label_literal | label_mad;
+static constexpr uint32_t val_labels = label_constant | label_constant_64bit | label_literal | label_mad;
struct ssa_info {
uint32_t val;
@@ -137,6 +138,17 @@ struct ssa_info {
return label & label_constant;
}
+ void set_constant_64bit(uint32_t constant)
+ {
+ add_label(label_constant_64bit);
+ val = constant;
+ }
+
+ bool is_constant_64bit()
+ {
+ return label & label_constant_64bit;
+ }
+
void set_abs(Temp abs_temp)
{
add_label(label_abs);
@@ -604,10 +616,10 @@ bool parse_base_offset(opt_ctx &ctx, Instruction* instr, unsigned op_index, Temp
return false;
}
-Operand get_constant_op(opt_ctx &ctx, uint32_t val)
+Operand get_constant_op(opt_ctx &ctx, uint32_t val, bool is64bit = false)
{
// TODO: this functions shouldn't be needed if we store Operand instead of value.
- Operand op(val);
+ Operand op(val, is64bit);
if (val == 0x3e22f983 && ctx.program->chip_class >= GFX8)
op.setFixed(PhysReg{248}); /* 1/2 PI can be an inline constant on GFX8+ */
return op;
@@ -661,8 +673,8 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
break;
}
}
- if ((info.is_constant() || (info.is_literal() && instr->format == Format::PSEUDO)) && !instr->operands[i].isFixed() && can_accept_constant(instr, i)) {
- instr->operands[i] = get_constant_op(ctx, info.val);
+ if ((info.is_constant() || info.is_constant_64bit() || (info.is_literal() && instr->format == Format::PSEUDO)) && !instr->operands[i].isFixed() && can_accept_constant(instr, i)) {
+ instr->operands[i] = get_constant_op(ctx, info.val, info.is_constant_64bit());
continue;
}
}
@@ -696,18 +708,19 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
static_cast<VOP3A_instruction*>(instr.get())->neg[i] = true;
continue;
}
- if (info.is_constant() && can_accept_constant(instr, i)) {
+ if ((info.is_constant() || info.is_constant_64bit()) && can_accept_constant(instr, i)) {
+ Operand op = get_constant_op(ctx, info.val, info.is_constant_64bit());
perfwarn(instr->opcode == aco_opcode::v_cndmask_b32 && i == 2, "v_cndmask_b32 with a constant selector", instr.get());
if (i == 0 || instr->opcode == aco_opcode::v_readlane_b32 || instr->opcode == aco_opcode::v_writelane_b32) {
- instr->operands[i] = get_constant_op(ctx, info.val);
+ instr->operands[i] = op;
continue;
} else if (!instr->isVOP3() && can_swap_operands(instr)) {
instr->operands[i] = instr->operands[0];
- instr->operands[0] = get_constant_op(ctx, info.val);
+ instr->operands[0] = op;
continue;
} else if (can_use_VOP3(ctx, instr)) {
to_VOP3(ctx, instr);
- instr->operands[i] = get_constant_op(ctx, info.val);
+ instr->operands[i] = op;
continue;
}
}
@@ -856,6 +869,8 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
ctx.info[instr->definitions[i].tempId()].set_literal(vec_op.constantValue());
else if (vec_op.size() == 1)
ctx.info[instr->definitions[i].tempId()].set_constant(vec_op.constantValue());
+ else if (vec_op.size() == 2)
+ ctx.info[instr->definitions[i].tempId()].set_constant_64bit(vec_op.constantValue());
} else {
assert(vec_op.isTemp());
ctx.info[instr->definitions[i].tempId()].set_temp(vec_op.getTemp());
@@ -886,6 +901,9 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
ctx.info[instr->definitions[0].tempId()].set_literal(vec_op.constantValue());
else if (vec_op.size() == 1)
ctx.info[instr->definitions[0].tempId()].set_constant(vec_op.constantValue());
+ else if (vec_op.size() == 2)
+ ctx.info[instr->definitions[0].tempId()].set_constant_64bit(vec_op.constantValue());
+
} else {
assert(vec_op.isTemp());
ctx.info[instr->definitions[0].tempId()].set_temp(vec_op.getTemp());
@@ -906,6 +924,8 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
ctx.info[instr->definitions[0].tempId()].set_literal(instr->operands[0].constantValue());
else if (instr->operands[0].size() == 1)
ctx.info[instr->definitions[0].tempId()].set_constant(instr->operands[0].constantValue());
+ else if (instr->operands[0].size() == 2)
+ ctx.info[instr->definitions[0].tempId()].set_constant_64bit(instr->operands[0].constantValue());
} else if (instr->operands[0].isTemp()) {
ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp());
} else {