diff options
author | Rhys Perry <[email protected]> | 2020-04-16 19:27:13 +0100 |
---|---|---|
committer | Marge Bot <[email protected]> | 2020-04-24 18:52:54 +0000 |
commit | 936b70c8cfe37a51a568a211acbc49b5fe997e00 (patch) | |
tree | 79ce28c847746dd2c886d52ffb93b57f7eb46528 /src/amd/compiler | |
parent | 18817041f7a62806d53abee681d8eaaffcb87834 (diff) |
aco: refactor visit_store_scratch() to use new helpers
Should support 8/16-bit stores now
Signed-off-by: Rhys Perry <[email protected]>
Reviewed-by: Daniel Schürmann <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4639>
Diffstat (limited to 'src/amd/compiler')
-rw-r--r-- | src/amd/compiler/aco_instruction_selection.cpp | 47 |
1 files changed, 15 insertions, 32 deletions
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 8993f2c1a10..47d3f015c2a 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -6798,46 +6798,29 @@ void visit_load_scratch(isel_context *ctx, nir_intrinsic_instr *instr) { } void visit_store_scratch(isel_context *ctx, nir_intrinsic_instr *instr) { - assert(instr->src[0].ssa->bit_size == 32 || instr->src[0].ssa->bit_size == 64); Builder bld(ctx->program, ctx->block); Temp rsrc = get_scratch_resource(ctx); Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa)); Temp offset = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[1].ssa)); unsigned elem_size_bytes = instr->src[0].ssa->bit_size / 8; - unsigned writemask = nir_intrinsic_write_mask(instr); - - while (writemask) { - int start, count; - u_bit_scan_consecutive_range(&writemask, &start, &count); - int num_bytes = count * elem_size_bytes; - - if (num_bytes > 16) { - assert(elem_size_bytes == 8); - writemask |= (((count - 2) << 1) - 1) << (start + 2); - count = 2; - num_bytes = 16; - } - - // TODO: check alignment of sub-dword stores - // TODO: split 3 bytes. there is no store instruction for that + unsigned writemask = widen_mask(nir_intrinsic_write_mask(instr), elem_size_bytes); - Temp write_data; - if (count != instr->num_components) { - aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(aco_opcode::p_create_vector, Format::PSEUDO, count, 1)}; - for (int i = 0; i < count; i++) { - Temp elem = emit_extract_vector(ctx, data, start + i, RegClass(RegType::vgpr, elem_size_bytes / 4)); - vec->operands[i] = Operand(elem); - } - write_data = bld.tmp(RegClass(RegType::vgpr, count * elem_size_bytes / 4)); - vec->definitions[0] = Definition(write_data); - ctx->block->instructions.emplace_back(std::move(vec)); - } else { - write_data = data; - } + unsigned write_count = 0; + Temp write_datas[32]; + unsigned offsets[32]; + split_buffer_store(ctx, instr, false, RegType::vgpr, data, writemask, + 16, &write_count, write_datas, offsets); + for (unsigned i = 0; i < write_count; i++) { aco_opcode op; - switch (num_bytes) { + switch (write_datas[i].bytes()) { + case 1: + op = aco_opcode::buffer_store_byte; + break; + case 2: + op = aco_opcode::buffer_store_short; + break; case 4: op = aco_opcode::buffer_store_dword; break; @@ -6854,7 +6837,7 @@ void visit_store_scratch(isel_context *ctx, nir_intrinsic_instr *instr) { unreachable("Invalid data size for nir_intrinsic_store_scratch."); } - bld.mubuf(op, rsrc, offset, ctx->program->scratch_offset, write_data, start * elem_size_bytes, true); + bld.mubuf(op, rsrc, offset, ctx->program->scratch_offset, write_datas[i], offsets[i], true); } } |