diff options
author | Samuel Pitoiset <[email protected]> | 2020-01-17 15:14:41 +0100 |
---|---|---|
committer | Samuel Pitoiset <[email protected]> | 2020-01-23 14:40:27 +0100 |
commit | 22d8822683139a815603a969a54c382ec3378d9e (patch) | |
tree | 3217f9b893b343eeedba7671130ad8ee56bb2e66 /src/amd | |
parent | d6af7571c22816a0724737711941589e4cbe248a (diff) |
aco: implement nir_intrinsic_load_global on GFX6
GFX6 doesn't have FLAT instructions, use MUBUF instructions instead.
Signed-off-by: Samuel Pitoiset <[email protected]>
Reviewed-by: Daniel Schürmann <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3477>
Diffstat (limited to 'src/amd')
-rw-r--r-- | src/amd/compiler/aco_instruction_selection.cpp | 125 |
1 files changed, 94 insertions, 31 deletions
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 9d29fae316c..235cac4e30e 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -4740,40 +4740,103 @@ void visit_load_global(isel_context *ctx, nir_intrinsic_instr *instr) aco_opcode op; if (dst.type() == RegType::vgpr || (glc && ctx->options->chip_class < GFX8)) { bool global = ctx->options->chip_class >= GFX9; - aco_opcode op; - switch (num_bytes) { - case 4: - op = global ? aco_opcode::global_load_dword : aco_opcode::flat_load_dword; - break; - case 8: - op = global ? aco_opcode::global_load_dwordx2 : aco_opcode::flat_load_dwordx2; - break; - case 12: - op = global ? aco_opcode::global_load_dwordx3 : aco_opcode::flat_load_dwordx3; - break; - case 16: - op = global ? aco_opcode::global_load_dwordx4 : aco_opcode::flat_load_dwordx4; - break; - default: - unreachable("load_global not implemented for this size."); - } - aco_ptr<FLAT_instruction> flat{create_instruction<FLAT_instruction>(op, global ? Format::GLOBAL : Format::FLAT, 2, 1)}; - flat->operands[0] = Operand(addr); - flat->operands[1] = Operand(s1); - flat->glc = glc; - flat->dlc = dlc; - flat->barrier = barrier_buffer; - if (dst.type() == RegType::sgpr) { - Temp vec = bld.tmp(RegType::vgpr, dst.size()); - flat->definitions[0] = Definition(vec); - ctx->block->instructions.emplace_back(std::move(flat)); - bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), vec); + if (ctx->options->chip_class >= GFX7) { + aco_opcode op; + switch (num_bytes) { + case 4: + op = global ? aco_opcode::global_load_dword : aco_opcode::flat_load_dword; + break; + case 8: + op = global ? aco_opcode::global_load_dwordx2 : aco_opcode::flat_load_dwordx2; + break; + case 12: + op = global ? aco_opcode::global_load_dwordx3 : aco_opcode::flat_load_dwordx3; + break; + case 16: + op = global ? aco_opcode::global_load_dwordx4 : aco_opcode::flat_load_dwordx4; + break; + default: + unreachable("load_global not implemented for this size."); + } + + aco_ptr<FLAT_instruction> flat{create_instruction<FLAT_instruction>(op, global ? Format::GLOBAL : Format::FLAT, 2, 1)}; + flat->operands[0] = Operand(addr); + flat->operands[1] = Operand(s1); + flat->glc = glc; + flat->dlc = dlc; + flat->barrier = barrier_buffer; + + if (dst.type() == RegType::sgpr) { + Temp vec = bld.tmp(RegType::vgpr, dst.size()); + flat->definitions[0] = Definition(vec); + ctx->block->instructions.emplace_back(std::move(flat)); + bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), vec); + } else { + flat->definitions[0] = Definition(dst); + ctx->block->instructions.emplace_back(std::move(flat)); + } + emit_split_vector(ctx, dst, num_components); } else { - flat->definitions[0] = Definition(dst); - ctx->block->instructions.emplace_back(std::move(flat)); + assert(ctx->options->chip_class == GFX6); + + /* GFX6 doesn't support loading vec3, expand to vec4. */ + num_bytes = num_bytes == 12 ? 16 : num_bytes; + + aco_opcode op; + switch (num_bytes) { + case 4: + op = aco_opcode::buffer_load_dword; + break; + case 8: + op = aco_opcode::buffer_load_dwordx2; + break; + case 16: + op = aco_opcode::buffer_load_dwordx4; + break; + default: + unreachable("load_global not implemented for this size."); + } + + Temp rsrc = get_gfx6_global_rsrc(bld, addr); + + aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(op, Format::MUBUF, 3, 1)}; + mubuf->operands[0] = addr.type() == RegType::vgpr ? Operand(addr) : Operand(v1); + mubuf->operands[1] = Operand(rsrc); + mubuf->operands[2] = Operand(0u); + mubuf->glc = glc; + mubuf->dlc = false; + mubuf->offset = 0; + mubuf->addr64 = addr.type() == RegType::vgpr; + mubuf->disable_wqm = false; + mubuf->barrier = barrier_buffer; + aco_ptr<Instruction> instr = std::move(mubuf); + + /* expand vector */ + if (dst.size() == 3) { + Temp vec = bld.tmp(v4); + instr->definitions[0] = Definition(vec); + bld.insert(std::move(instr)); + emit_split_vector(ctx, vec, 4); + + instr.reset(create_instruction<Pseudo_instruction>(aco_opcode::p_create_vector, Format::PSEUDO, 3, 1)); + instr->operands[0] = Operand(emit_extract_vector(ctx, vec, 0, v1)); + instr->operands[1] = Operand(emit_extract_vector(ctx, vec, 1, v1)); + instr->operands[2] = Operand(emit_extract_vector(ctx, vec, 2, v1)); + } + + if (dst.type() == RegType::sgpr) { + Temp vec = bld.tmp(RegType::vgpr, dst.size()); + instr->definitions[0] = Definition(vec); + bld.insert(std::move(instr)); + expand_vector(ctx, vec, dst, num_components, (1 << num_components) - 1); + bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), vec); + } else { + instr->definitions[0] = Definition(dst); + bld.insert(std::move(instr)); + emit_split_vector(ctx, dst, num_components); + } } - emit_split_vector(ctx, dst, num_components); } else { switch (num_bytes) { case 4: |