aboutsummaryrefslogtreecommitdiffstats
path: root/src/amd
diff options
context:
space:
mode:
authorDaniel Schürmann <[email protected]>2020-04-10 11:52:13 +0100
committerMarge Bot <[email protected]>2020-04-13 16:35:40 +0000
commit28d36d26c2212276e1238fad8f0b12caab97fee8 (patch)
tree4c2fc9e294e82cda7d49c1c35dc175fcacfc4d6a /src/amd
parent0e4432bfbaef1bca65239848c373cd683f083ee0 (diff)
aco: fix p_extract_vector optimization in presence of unequally sized vector operands
Reviewed-by: Rhys Perry <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4506>
Diffstat (limited to 'src/amd')
-rw-r--r--src/amd/compiler/aco_optimizer.cpp49
1 files changed, 27 insertions, 22 deletions
diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp
index 5e80d9afe7f..2e83b4e9d74 100644
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -954,33 +954,38 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
case aco_opcode::p_extract_vector: { /* mov */
if (!ctx.info[instr->operands[0].tempId()].is_vec())
break;
+
+ /* check if we index directly into a vector element */
Instruction* vec = ctx.info[instr->operands[0].tempId()].instr;
- if (vec->definitions[0].getTemp().size() == vec->operands.size() && /* TODO: what about 64bit or other combinations? */
- vec->operands[0].size() == instr->definitions[0].size()) {
-
- /* convert this extract into a mov instruction */
- Operand vec_op = vec->operands[instr->operands[1].constantValue()];
- bool is_vgpr = instr->definitions[0].getTemp().type() == RegType::vgpr;
- aco_opcode opcode = is_vgpr ? aco_opcode::v_mov_b32 : aco_opcode::s_mov_b32;
- Format format = is_vgpr ? Format::VOP1 : Format::SOP1;
- instr->opcode = opcode;
- instr->format = format;
- while (instr->operands.size() > 1)
- instr->operands.pop_back();
- instr->operands[0] = vec_op;
+ const unsigned index = instr->operands[1].constantValue();
+ const unsigned dst_offset = index * instr->definitions[0].bytes();
+ unsigned offset = 0;
- if (vec_op.isConstant()) {
- if (vec_op.isLiteral())
- ctx.info[instr->definitions[0].tempId()].set_literal(vec_op.constantValue());
- else if (vec_op.size() == 1)
- ctx.info[instr->definitions[0].tempId()].set_constant(vec_op.constantValue());
- else if (vec_op.size() == 2)
- ctx.info[instr->definitions[0].tempId()].set_constant_64bit(vec_op.constantValue());
+ for (const Operand& op : vec->operands) {
+ if (offset < dst_offset) {
+ offset += op.bytes();
+ continue;
+ } else if (offset != dst_offset || op.bytes() != instr->definitions[0].bytes()) {
+ break;
+ }
+ /* convert this extract into a copy instruction */
+ instr->opcode = aco_opcode::p_parallelcopy;
+ instr->operands.pop_back();
+ instr->operands[0] = op;
+
+ if (op.isConstant()) {
+ if (op.isLiteral())
+ ctx.info[instr->definitions[0].tempId()].set_literal(op.constantValue());
+ else if (op.size() == 1)
+ ctx.info[instr->definitions[0].tempId()].set_constant(op.constantValue());
+ else if (op.size() == 2)
+ ctx.info[instr->definitions[0].tempId()].set_constant_64bit(op.constantValue());
} else {
- assert(vec_op.isTemp());
- ctx.info[instr->definitions[0].tempId()].set_temp(vec_op.getTemp());
+ assert(op.isTemp());
+ ctx.info[instr->definitions[0].tempId()].set_temp(op.getTemp());
}
+ break;
}
break;
}