diff options
author | Daniel Schürmann <[email protected]> | 2020-04-10 11:52:13 +0100 |
---|---|---|
committer | Marge Bot <[email protected]> | 2020-04-13 16:35:40 +0000 |
commit | 28d36d26c2212276e1238fad8f0b12caab97fee8 (patch) | |
tree | 4c2fc9e294e82cda7d49c1c35dc175fcacfc4d6a /src/amd/compiler/aco_optimizer.cpp | |
parent | 0e4432bfbaef1bca65239848c373cd683f083ee0 (diff) |
aco: fix p_extract_vector optimization in presence of unequally sized vector operands
Reviewed-by: Rhys Perry <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4506>
Diffstat (limited to 'src/amd/compiler/aco_optimizer.cpp')
-rw-r--r-- | src/amd/compiler/aco_optimizer.cpp | 49 |
1 files changed, 27 insertions, 22 deletions
diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 5e80d9afe7f..2e83b4e9d74 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -954,33 +954,38 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr) case aco_opcode::p_extract_vector: { /* mov */ if (!ctx.info[instr->operands[0].tempId()].is_vec()) break; + + /* check if we index directly into a vector element */ Instruction* vec = ctx.info[instr->operands[0].tempId()].instr; - if (vec->definitions[0].getTemp().size() == vec->operands.size() && /* TODO: what about 64bit or other combinations? */ - vec->operands[0].size() == instr->definitions[0].size()) { - - /* convert this extract into a mov instruction */ - Operand vec_op = vec->operands[instr->operands[1].constantValue()]; - bool is_vgpr = instr->definitions[0].getTemp().type() == RegType::vgpr; - aco_opcode opcode = is_vgpr ? aco_opcode::v_mov_b32 : aco_opcode::s_mov_b32; - Format format = is_vgpr ? Format::VOP1 : Format::SOP1; - instr->opcode = opcode; - instr->format = format; - while (instr->operands.size() > 1) - instr->operands.pop_back(); - instr->operands[0] = vec_op; + const unsigned index = instr->operands[1].constantValue(); + const unsigned dst_offset = index * instr->definitions[0].bytes(); + unsigned offset = 0; - if (vec_op.isConstant()) { - if (vec_op.isLiteral()) - ctx.info[instr->definitions[0].tempId()].set_literal(vec_op.constantValue()); - else if (vec_op.size() == 1) - ctx.info[instr->definitions[0].tempId()].set_constant(vec_op.constantValue()); - else if (vec_op.size() == 2) - ctx.info[instr->definitions[0].tempId()].set_constant_64bit(vec_op.constantValue()); + for (const Operand& op : vec->operands) { + if (offset < dst_offset) { + offset += op.bytes(); + continue; + } else if (offset != dst_offset || op.bytes() != instr->definitions[0].bytes()) { + break; + } + /* convert this extract into a copy instruction */ + instr->opcode = aco_opcode::p_parallelcopy; + instr->operands.pop_back(); + instr->operands[0] = op; + + if (op.isConstant()) { + if (op.isLiteral()) + ctx.info[instr->definitions[0].tempId()].set_literal(op.constantValue()); + else if (op.size() == 1) + ctx.info[instr->definitions[0].tempId()].set_constant(op.constantValue()); + else if (op.size() == 2) + ctx.info[instr->definitions[0].tempId()].set_constant_64bit(op.constantValue()); } else { - assert(vec_op.isTemp()); - ctx.info[instr->definitions[0].tempId()].set_temp(vec_op.getTemp()); + assert(op.isTemp()); + ctx.info[instr->definitions[0].tempId()].set_temp(op.getTemp()); } + break; } break; } |