aboutsummaryrefslogtreecommitdiffstats
path: root/src/amd/compiler
diff options
context:
space:
mode:
authorRhys Perry <[email protected]>2020-04-16 19:27:13 +0100
committerMarge Bot <[email protected]>2020-04-24 18:52:54 +0000
commit936b70c8cfe37a51a568a211acbc49b5fe997e00 (patch)
tree79ce28c847746dd2c886d52ffb93b57f7eb46528 /src/amd/compiler
parent18817041f7a62806d53abee681d8eaaffcb87834 (diff)
aco: refactor visit_store_scratch() to use new helpers
Should support 8/16-bit stores now Signed-off-by: Rhys Perry <[email protected]> Reviewed-by: Daniel Schürmann <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4639>
Diffstat (limited to 'src/amd/compiler')
-rw-r--r--src/amd/compiler/aco_instruction_selection.cpp47
1 files changed, 15 insertions, 32 deletions
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 8993f2c1a10..47d3f015c2a 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -6798,46 +6798,29 @@ void visit_load_scratch(isel_context *ctx, nir_intrinsic_instr *instr) {
}
void visit_store_scratch(isel_context *ctx, nir_intrinsic_instr *instr) {
- assert(instr->src[0].ssa->bit_size == 32 || instr->src[0].ssa->bit_size == 64);
Builder bld(ctx->program, ctx->block);
Temp rsrc = get_scratch_resource(ctx);
Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
Temp offset = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[1].ssa));
unsigned elem_size_bytes = instr->src[0].ssa->bit_size / 8;
- unsigned writemask = nir_intrinsic_write_mask(instr);
-
- while (writemask) {
- int start, count;
- u_bit_scan_consecutive_range(&writemask, &start, &count);
- int num_bytes = count * elem_size_bytes;
-
- if (num_bytes > 16) {
- assert(elem_size_bytes == 8);
- writemask |= (((count - 2) << 1) - 1) << (start + 2);
- count = 2;
- num_bytes = 16;
- }
-
- // TODO: check alignment of sub-dword stores
- // TODO: split 3 bytes. there is no store instruction for that
+ unsigned writemask = widen_mask(nir_intrinsic_write_mask(instr), elem_size_bytes);
- Temp write_data;
- if (count != instr->num_components) {
- aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(aco_opcode::p_create_vector, Format::PSEUDO, count, 1)};
- for (int i = 0; i < count; i++) {
- Temp elem = emit_extract_vector(ctx, data, start + i, RegClass(RegType::vgpr, elem_size_bytes / 4));
- vec->operands[i] = Operand(elem);
- }
- write_data = bld.tmp(RegClass(RegType::vgpr, count * elem_size_bytes / 4));
- vec->definitions[0] = Definition(write_data);
- ctx->block->instructions.emplace_back(std::move(vec));
- } else {
- write_data = data;
- }
+ unsigned write_count = 0;
+ Temp write_datas[32];
+ unsigned offsets[32];
+ split_buffer_store(ctx, instr, false, RegType::vgpr, data, writemask,
+ 16, &write_count, write_datas, offsets);
+ for (unsigned i = 0; i < write_count; i++) {
aco_opcode op;
- switch (num_bytes) {
+ switch (write_datas[i].bytes()) {
+ case 1:
+ op = aco_opcode::buffer_store_byte;
+ break;
+ case 2:
+ op = aco_opcode::buffer_store_short;
+ break;
case 4:
op = aco_opcode::buffer_store_dword;
break;
@@ -6854,7 +6837,7 @@ void visit_store_scratch(isel_context *ctx, nir_intrinsic_instr *instr) {
unreachable("Invalid data size for nir_intrinsic_store_scratch.");
}
- bld.mubuf(op, rsrc, offset, ctx->program->scratch_offset, write_data, start * elem_size_bytes, true);
+ bld.mubuf(op, rsrc, offset, ctx->program->scratch_offset, write_datas[i], offsets[i], true);
}
}