diff options
Diffstat (limited to 'src/amd/compiler/aco_optimizer.cpp')
-rw-r--r-- | src/amd/compiler/aco_optimizer.cpp | 194 |
1 files changed, 113 insertions, 81 deletions
diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 37564b7e993..58d22910150 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -61,7 +61,7 @@ struct mad_info { enum Label { label_vec = 1 << 0, - label_constant = 1 << 1, + label_constant_32bit = 1 << 1, /* label_{abs,neg,mul,omod2,omod4,omod5,clamp} are used for both 16 and * 32-bit operations but this shouldn't cause any issues because we don't * look through any conversions */ @@ -91,13 +91,14 @@ enum Label { label_vcc_hint = 1 << 25, label_scc_needed = 1 << 26, label_b2i = 1 << 27, + label_constant_16bit = 1 << 29, }; static constexpr uint32_t instr_labels = label_vec | label_mul | label_mad | label_omod_success | label_clamp_success | label_add_sub | label_bitwise | label_uniform_bitwise | label_minmax | label_fcmp; static constexpr uint32_t temp_labels = label_abs | label_neg | label_temp | label_vcc | label_b2f | label_uniform_bool | label_omod2 | label_omod4 | label_omod5 | label_clamp | label_scc_invert | label_b2i; -static constexpr uint32_t val_labels = label_constant | label_constant_64bit | label_literal | label_mad; +static constexpr uint32_t val_labels = label_constant_32bit | label_constant_64bit | label_constant_16bit | label_literal | label_mad; struct ssa_info { uint32_t val; @@ -122,7 +123,10 @@ struct ssa_info { label &= ~instr_labels; /* instr and temp alias */ } - if (new_label & val_labels) + uint32_t const_labels = label_literal | label_constant_32bit | label_constant_64bit | label_constant_16bit; + if (new_label & const_labels) + label &= ~val_labels | const_labels; + else if (new_label & val_labels) label &= ~val_labels; label |= new_label; @@ -139,26 +143,85 @@ struct ssa_info { return label & label_vec; } - void set_constant(uint32_t constant) + void set_constant(chip_class chip, uint64_t constant) { - add_label(label_constant); + Operand op16((uint16_t)constant); + Operand op32((uint32_t)constant); + add_label(label_literal); val = constant; - } - bool is_constant() + if (chip >= GFX8 && !op16.isLiteral()) + add_label(label_constant_16bit); + + if (!op32.isLiteral() || ((uint32_t)constant == 0x3e22f983 && chip >= GFX8)) + add_label(label_constant_32bit); + + if (constant <= 64) { + add_label(label_constant_64bit); + } else if (constant >= 0xFFFFFFFFFFFFFFF0) { /* [-16 .. -1] */ + add_label(label_constant_64bit); + } else if (constant == 0x3FE0000000000000) { /* 0.5 */ + add_label(label_constant_64bit); + } else if (constant == 0xBFE0000000000000) { /* -0.5 */ + add_label(label_constant_64bit); + } else if (constant == 0x3FF0000000000000) { /* 1.0 */ + add_label(label_constant_64bit); + } else if (constant == 0xBFF0000000000000) { /* -1.0 */ + add_label(label_constant_64bit); + } else if (constant == 0x4000000000000000) { /* 2.0 */ + add_label(label_constant_64bit); + } else if (constant == 0xC000000000000000) { /* -2.0 */ + add_label(label_constant_64bit); + } else if (constant == 0x4010000000000000) { /* 4.0 */ + add_label(label_constant_64bit); + } else if (constant == 0xC010000000000000) { /* -4.0 */ + add_label(label_constant_64bit); + } + + if (label & label_constant_64bit) { + val = Operand(constant).constantValue(); + if (val != constant) + label &= ~(label_literal | label_constant_16bit | label_constant_32bit); + } + } + + bool is_constant(unsigned bits) { - return label & label_constant; + switch (bits) { + case 8: + return label & label_literal; + case 16: + return label & label_constant_16bit; + case 32: + return label & label_constant_32bit; + case 64: + return label & label_constant_64bit; + } + return false; } - void set_constant_64bit(uint32_t constant) + bool is_literal(unsigned bits) { - add_label(label_constant_64bit); - val = constant; + bool is_lit = label & label_literal; + switch (bits) { + case 8: + return false; + case 16: + return is_lit && ~(label & label_constant_16bit); + case 32: + return is_lit && ~(label & label_constant_32bit); + case 64: + return false; + } + return false; } - bool is_constant_64bit() + bool is_constant_or_literal(unsigned bits) { - return label & label_constant_64bit; + if (bits == 64) + return label & label_constant_64bit; + else + return label & label_literal; } void set_abs(Temp abs_temp) @@ -211,17 +274,6 @@ struct ssa_info { return label & label_temp; } - void set_literal(uint32_t lit) - { - add_label(label_literal); - val = lit; - } - - bool is_literal() - { - return label & label_literal; - } - void set_mad(Instruction* mad, uint32_t mad_info_idx) { add_label(label_mad); @@ -321,11 +373,6 @@ struct ssa_info { return label & label_vcc; } - bool is_constant_or_literal() - { - return is_constant() || is_literal(); - } - void set_b2f(Temp val) { add_label(label_b2f); @@ -655,7 +702,7 @@ bool parse_base_offset(opt_ctx &ctx, Instruction* instr, unsigned op_index, Temp if (add_instr->operands[i].isConstant()) { *offset = add_instr->operands[i].constantValue(); } else if (add_instr->operands[i].isTemp() && - ctx.info[add_instr->operands[i].tempId()].is_constant_or_literal()) { + ctx.info[add_instr->operands[i].tempId()].is_constant_or_literal(32)) { *offset = ctx.info[add_instr->operands[i].tempId()].val; } else { continue; @@ -687,11 +734,15 @@ unsigned get_operand_size(aco_ptr<Instruction>& instr, unsigned index) return 0; } -Operand get_constant_op(opt_ctx &ctx, uint32_t val, bool is64bit = false) +Operand get_constant_op(opt_ctx &ctx, ssa_info info, uint32_t bits) { + if (bits == 8) + return Operand((uint8_t)info.val); + if (bits == 16) + return Operand((uint16_t)info.val); // TODO: this functions shouldn't be needed if we store Operand instead of value. - Operand op(val, is64bit); - if (val == 0x3e22f983 && ctx.program->chip_class >= GFX8) + Operand op(info.val, bits == 64); + if (info.is_literal(32) && info.val == 0x3e22f983 && ctx.program->chip_class >= GFX8) op.setFixed(PhysReg{248}); /* 1/2 PI can be an inline constant on GFX8+ */ return op; } @@ -706,7 +757,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr) if (instr->isSALU() || instr->isVALU() || instr->format == Format::PSEUDO) { ASSERTED bool all_const = false; for (Operand& op : instr->operands) - all_const = all_const && (!op.isTemp() || ctx.info[op.tempId()].is_constant_or_literal()); + all_const = all_const && (!op.isTemp() || ctx.info[op.tempId()].is_constant_or_literal(32)); perfwarn(all_const, "All instruction operands are constant", instr.get()); } @@ -728,13 +779,13 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr) /* SALU / PSEUDO: propagate inline constants */ if (instr->isSALU() || instr->format == Format::PSEUDO) { bool is_subdword = false; - // TODO: optimize SGPR and constant propagation for subdword pseudo instructions on gfx9+ + // TODO: optimize SGPR propagation for subdword pseudo instructions on gfx9+ if (instr->format == Format::PSEUDO) { is_subdword = std::any_of(instr->definitions.begin(), instr->definitions.end(), [] (const Definition& def) { return def.regClass().is_subdword();}); is_subdword = is_subdword || std::any_of(instr->operands.begin(), instr->operands.end(), [] (const Operand& op) { return op.hasRegClass() && op.regClass().is_subdword();}); - if (is_subdword) + if (is_subdword && ctx.program->chip_class < GFX9) continue; } @@ -760,9 +811,10 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr) break; } } - if ((info.is_constant() || info.is_constant_64bit() || (info.is_literal() && instr->format == Format::PSEUDO)) && + unsigned bits = get_operand_size(instr, i); + if ((info.is_constant(bits) || (!is_subdword && info.is_literal(bits) && instr->format == Format::PSEUDO)) && !instr->operands[i].isFixed() && alu_can_accept_constant(instr->opcode, i)) { - instr->operands[i] = get_constant_op(ctx, info.val, info.is_constant_64bit()); + instr->operands[i] = get_constant_op(ctx, info, bits); continue; } } @@ -805,8 +857,9 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr) static_cast<VOP3A_instruction*>(instr.get())->neg[i] = true; continue; } - if ((info.is_constant() || info.is_constant_64bit()) && alu_can_accept_constant(instr->opcode, i)) { - Operand op = get_constant_op(ctx, info.val, info.is_constant_64bit()); + unsigned bits = get_operand_size(instr, i); + if (info.is_constant(bits) && alu_can_accept_constant(instr->opcode, i)) { + Operand op = get_constant_op(ctx, info, bits); perfwarn(instr->opcode == aco_opcode::v_cndmask_b32 && i == 2, "v_cndmask_b32 with a constant selector", instr.get()); if (i == 0 || instr->opcode == aco_opcode::v_readlane_b32 || instr->opcode == aco_opcode::v_writelane_b32) { instr->operands[i] = op; @@ -831,13 +884,13 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr) while (info.is_temp()) info = ctx.info[info.temp.id()]; - if (mubuf->offen && i == 1 && info.is_constant_or_literal() && mubuf->offset + info.val < 4096) { + if (mubuf->offen && i == 1 && info.is_constant_or_literal(32) && mubuf->offset + info.val < 4096) { assert(!mubuf->idxen); instr->operands[1] = Operand(v1); mubuf->offset += info.val; mubuf->offen = false; continue; - } else if (i == 2 && info.is_constant_or_literal() && mubuf->offset + info.val < 4096) { + } else if (i == 2 && info.is_constant_or_literal(32) && mubuf->offset + info.val < 4096) { instr->operands[2] = Operand((uint32_t) 0); mubuf->offset += info.val; continue; @@ -891,7 +944,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr) SMEM_instruction *smem = static_cast<SMEM_instruction *>(instr.get()); Temp base; uint32_t offset; - if (i == 1 && info.is_constant_or_literal() && + if (i == 1 && info.is_constant_or_literal(32) && ((ctx.program->chip_class == GFX6 && info.val <= 0x3FF) || (ctx.program->chip_class == GFX7 && info.val <= 0xFFFFFFFF) || (ctx.program->chip_class >= GFX8 && info.val <= 0xFFFFF))) { @@ -900,7 +953,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr) } else if (i == 1 && parse_base_offset(ctx, instr.get(), i, &base, &offset) && base.regClass() == s1 && offset <= 0xFFFFF && ctx.program->chip_class >= GFX9) { bool soe = smem->operands.size() >= (!smem->definitions.empty() ? 3 : 4); if (soe && - (!ctx.info[smem->operands.back().tempId()].is_constant_or_literal() || + (!ctx.info[smem->operands.back().tempId()].is_constant_or_literal(32) || ctx.info[smem->operands.back().tempId()].val != 0)) { continue; } @@ -996,12 +1049,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr) Operand vec_op = vec->operands[vec_index]; if (vec_op.isConstant()) { - if (vec_op.isLiteral()) - ctx.info[instr->definitions[i].tempId()].set_literal(vec_op.constantValue()); - else if (vec_op.size() == 1) - ctx.info[instr->definitions[i].tempId()].set_constant(vec_op.constantValue()); - else if (vec_op.size() == 2) - ctx.info[instr->definitions[i].tempId()].set_constant_64bit(vec_op.constantValue()); + ctx.info[instr->definitions[i].tempId()].set_constant(ctx.program->chip_class, vec_op.constantValue64()); } else if (vec_op.isUndefined()) { ctx.info[instr->definitions[i].tempId()].set_undefined(); } else { @@ -1035,12 +1083,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr) instr->operands[0] = op; if (op.isConstant()) { - if (op.isLiteral()) - ctx.info[instr->definitions[0].tempId()].set_literal(op.constantValue()); - else if (op.size() == 1) - ctx.info[instr->definitions[0].tempId()].set_constant(op.constantValue()); - else if (op.size() == 2) - ctx.info[instr->definitions[0].tempId()].set_constant_64bit(op.constantValue()); + ctx.info[instr->definitions[0].tempId()].set_constant(ctx.program->chip_class, op.constantValue64()); } else if (op.isUndefined()) { ctx.info[instr->definitions[0].tempId()].set_undefined(); } else { @@ -1060,12 +1103,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr) } else if (instr->usesModifiers()) { // TODO } else if (instr->operands[0].isConstant()) { - if (instr->operands[0].isLiteral()) - ctx.info[instr->definitions[0].tempId()].set_literal(instr->operands[0].constantValue()); - else if (instr->operands[0].size() == 1) - ctx.info[instr->definitions[0].tempId()].set_constant(instr->operands[0].constantValue()); - else if (instr->operands[0].size() == 2) - ctx.info[instr->definitions[0].tempId()].set_constant_64bit(instr->operands[0].constantValue()); + ctx.info[instr->definitions[0].tempId()].set_constant(ctx.program->chip_class, instr->operands[0].constantValue64()); } else if (instr->operands[0].isTemp()) { ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp()); } else { @@ -1074,25 +1112,19 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr) break; case aco_opcode::p_is_helper: if (!ctx.program->needs_wqm) - ctx.info[instr->definitions[0].tempId()].set_constant(0u); + ctx.info[instr->definitions[0].tempId()].set_constant(ctx.program->chip_class, 0u); break; case aco_opcode::s_movk_i32: { uint32_t v = static_cast<SOPK_instruction*>(instr.get())->imm; v = v & 0x8000 ? (v | 0xffff0000) : v; - if (v <= 64 || v >= 0xfffffff0) - ctx.info[instr->definitions[0].tempId()].set_constant(v); - else - ctx.info[instr->definitions[0].tempId()].set_literal(v); + ctx.info[instr->definitions[0].tempId()].set_constant(ctx.program->chip_class, v); break; } case aco_opcode::v_bfrev_b32: case aco_opcode::s_brev_b32: { if (instr->operands[0].isConstant()) { uint32_t v = util_bitreverse(instr->operands[0].constantValue()); - if (v <= 64 || v >= 0xfffffff0) - ctx.info[instr->definitions[0].tempId()].set_constant(v); - else - ctx.info[instr->definitions[0].tempId()].set_literal(v); + ctx.info[instr->definitions[0].tempId()].set_constant(ctx.program->chip_class, v); } break; } @@ -1101,10 +1133,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr) unsigned size = instr->operands[0].constantValue() & 0x1f; unsigned start = instr->operands[1].constantValue() & 0x1f; uint32_t v = ((1u << size) - 1u) << start; - if (v <= 64 || v >= 0xfffffff0) - ctx.info[instr->definitions[0].tempId()].set_constant(v); - else - ctx.info[instr->definitions[0].tempId()].set_literal(v); + ctx.info[instr->definitions[0].tempId()].set_constant(ctx.program->chip_class, v); } break; } @@ -1629,7 +1658,7 @@ bool combine_constant_comparison_ordering(opt_ctx &ctx, aco_ptr<Instruction>& in } else if (cmp->operands[constant_operand].isTemp()) { Temp tmp = cmp->operands[constant_operand].getTemp(); unsigned id = original_temp_id(ctx, tmp); - if (!ctx.info[id].is_constant() && !ctx.info[id].is_literal()) + if (!ctx.info[id].is_constant_or_literal(32)) return false; constant = ctx.info[id].val; } else { @@ -2115,7 +2144,7 @@ bool combine_clamp(opt_ctx& ctx, aco_ptr<Instruction>& instr, uint32_t val; if (operands[i].isConstant()) { val = operands[i].constantValue(); - } else if (operands[i].isTemp() && ctx.info[operands[i].tempId()].is_constant_or_literal()) { + } else if (operands[i].isTemp() && ctx.info[operands[i].tempId()].is_constant_or_literal(32)) { val = ctx.info[operands[i].tempId()].val; } else { continue; @@ -2791,9 +2820,10 @@ void select_instruction(opt_ctx &ctx, aco_ptr<Instruction>& instr) } if (!instr->operands[i].isTemp()) continue; + unsigned bits = get_operand_size(instr, i); /* if one of the operands is sgpr, we cannot add a literal somewhere else on pre-GFX10 or operands other than the 1st */ if (instr->operands[i].getTemp().type() == RegType::sgpr && (i > 0 || ctx.program->chip_class < GFX10)) { - if (!sgpr_used && ctx.info[instr->operands[i].tempId()].is_literal()) { + if (!sgpr_used && ctx.info[instr->operands[i].tempId()].is_literal(bits)) { literal_uses = ctx.uses[instr->operands[i].tempId()]; literal_idx = i; } else { @@ -2802,7 +2832,7 @@ void select_instruction(opt_ctx &ctx, aco_ptr<Instruction>& instr) sgpr_used = true; /* don't break because we still need to check constants */ } else if (!sgpr_used && - ctx.info[instr->operands[i].tempId()].is_literal() && + ctx.info[instr->operands[i].tempId()].is_literal(bits) && ctx.uses[instr->operands[i].tempId()] < literal_uses) { literal_uses = ctx.uses[instr->operands[i].tempId()]; literal_idx = i; @@ -2881,6 +2911,7 @@ void select_instruction(opt_ctx &ctx, aco_ptr<Instruction>& instr) /* choose a literal to apply */ for (unsigned i = 0; i < num_operands; i++) { Operand op = instr->operands[i]; + unsigned bits = get_operand_size(instr, i); if (instr->isVALU() && op.isTemp() && op.getTemp().type() == RegType::sgpr && op.tempId() != sgpr_ids[0]) @@ -2889,7 +2920,7 @@ void select_instruction(opt_ctx &ctx, aco_ptr<Instruction>& instr) if (op.isLiteral()) { current_literal = op; continue; - } else if (!op.isTemp() || !ctx.info[op.tempId()].is_literal()) { + } else if (!op.isTemp() || !ctx.info[op.tempId()].is_literal(bits)) { continue; } @@ -2974,7 +3005,8 @@ void apply_literals(opt_ctx &ctx, aco_ptr<Instruction>& instr) if (instr->isSALU() || instr->isVALU()) { for (unsigned i = 0; i < instr->operands.size(); i++) { Operand op = instr->operands[i]; - if (op.isTemp() && ctx.info[op.tempId()].is_literal() && ctx.uses[op.tempId()] == 0) { + unsigned bits = get_operand_size(instr, i); + if (op.isTemp() && ctx.info[op.tempId()].is_literal(bits) && ctx.uses[op.tempId()] == 0) { Operand literal(ctx.info[op.tempId()].val); if (instr->isVALU() && i > 0) to_VOP3(ctx, instr); |