diff options
author | Rhys Perry <[email protected]> | 2019-09-23 14:31:24 +0100 |
---|---|---|
committer | Rhys Perry <[email protected]> | 2019-09-26 13:19:51 +0100 |
commit | 06ea3325c32025a1adc36adb84276ba737e63acd (patch) | |
tree | 3db63cbcb3cfb1c2890fa1d3165cbfa998dc57ab /src/amd/compiler | |
parent | 86ecf92c23ccd4a5493a27104e9c85b8fa41f3a3 (diff) |
aco: CSE readlane/readfirstlane/permute/reduce with the same exec mask
v2: rename pass_temp to pass_flags
v2: also CSE reductions
v3: add ds_swizzle_b32 support
v3: check gds/offset0/offset1 fields
Signed-off-by: Rhys Perry <[email protected]>
Reviewed-by: Daniel Schürmann <[email protected]>
Diffstat (limited to 'src/amd/compiler')
-rw-r--r-- | src/amd/compiler/aco_ir.h | 1 | ||||
-rw-r--r-- | src/amd/compiler/aco_opt_value_numbering.cpp | 45 |
2 files changed, 37 insertions, 9 deletions
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index 663635e5b93..139dd1e371c 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -561,6 +561,7 @@ class Block; struct Instruction { aco_opcode opcode; Format format; + uint32_t pass_flags; aco::span<Operand> operands; aco::span<Definition> definitions; diff --git a/src/amd/compiler/aco_opt_value_numbering.cpp b/src/amd/compiler/aco_opt_value_numbering.cpp index 100537fd47a..856f7d7bf6c 100644 --- a/src/amd/compiler/aco_opt_value_numbering.cpp +++ b/src/amd/compiler/aco_opt_value_numbering.cpp @@ -83,9 +83,15 @@ struct InstrPred { if (a->operands.size() != b->operands.size() || a->definitions.size() != b->definitions.size()) return false; /* possible with pseudo-instructions */ /* We can't value number v_readlane_b32 across control flow or discards - * because of the possibility of live-range splits. */ - if (a->opcode == aco_opcode::v_readfirstlane_b32 || a->opcode == aco_opcode::v_readlane_b32) - return false; + * because of the possibility of live-range splits. + * We can't value number permutes for the same reason as + * v_readlane_b32 and because discards affect the result */ + if (a->opcode == aco_opcode::v_readfirstlane_b32 || a->opcode == aco_opcode::v_readlane_b32 || + a->opcode == aco_opcode::ds_bpermute_b32 || a->opcode == aco_opcode::ds_permute_b32 || + a->opcode == aco_opcode::ds_swizzle_b32 || a->format == Format::PSEUDO_REDUCTION) { + if (a->pass_flags != b->pass_flags) + return false; + } for (unsigned i = 0; i < a->operands.size(); i++) { if (a->operands[i].isConstant()) { if (!b->operands[i].isConstant()) @@ -176,8 +182,11 @@ struct InstrPred { return false; return true; } - case Format::PSEUDO_REDUCTION: - return false; + case Format::PSEUDO_REDUCTION: { + Pseudo_reduction_instruction *aR = static_cast<Pseudo_reduction_instruction*>(a); + Pseudo_reduction_instruction *bR = static_cast<Pseudo_reduction_instruction*>(b); + return aR->reduce_op == bR->reduce_op && aR->cluster_size == bR->cluster_size; + } case Format::MTBUF: { /* this is fine since they are only used for vertex input fetches */ MTBUF_instruction* aM = static_cast<MTBUF_instruction *>(a); @@ -197,8 +206,17 @@ struct InstrPred { case Format::FLAT: case Format::GLOBAL: case Format::SCRATCH: - case Format::DS: return false; + case Format::DS: { + /* we already handle potential issue with permute/swizzle above */ + DS_instruction* aD = static_cast<DS_instruction *>(a); + DS_instruction* bD = static_cast<DS_instruction *>(b); + if (a->opcode != aco_opcode::ds_bpermute_b32 && + a->opcode != aco_opcode::ds_permute_b32 && + a->opcode != aco_opcode::ds_swizzle_b32) + return false; + return aD->gds == bD->gds && aD->offset0 == bD->offset0 && aD->offset1 == bD->offset1; + } case Format::MIMG: { MIMG_instruction* aM = static_cast<MIMG_instruction*>(a); MIMG_instruction* bM = static_cast<MIMG_instruction*>(b); @@ -226,7 +244,8 @@ typedef std::unordered_set<Instruction*, InstrHash, InstrPred> expr_set; void process_block(Block& block, expr_set& expr_values, - std::map<uint32_t, Temp>& renames) + std::map<uint32_t, Temp>& renames, + uint32_t *exec_id) { bool run = false; std::vector<aco_ptr<Instruction>>::iterator it = block.instructions.begin(); @@ -271,6 +290,12 @@ void process_block(Block& block, renames[instr->definitions[0].tempId()] = instr->operands[0].getTemp(); } + if (instr->opcode == aco_opcode::p_discard_if || + instr->opcode == aco_opcode::p_demote_to_helper) + (*exec_id)++; + + instr->pass_flags = *exec_id; + std::pair<expr_set::iterator, bool> res = expr_values.emplace(instr.get()); /* if there was already an expression with the same value number */ @@ -312,16 +337,18 @@ void value_numbering(Program* program) { std::vector<expr_set> expr_values(program->blocks.size()); std::map<uint32_t, Temp> renames; + uint32_t exec_id = 0; for (Block& block : program->blocks) { if (block.logical_idom != -1) { /* initialize expr_values from idom */ expr_values[block.index] = expr_values[block.logical_idom]; - process_block(block, expr_values[block.index], renames); + process_block(block, expr_values[block.index], renames, &exec_id); } else { expr_set empty; - process_block(block, empty, renames); + process_block(block, empty, renames, &exec_id); } + exec_id++; } for (Block& block : program->blocks) |