summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/amd/compiler/aco_instruction_selection.cpp50
1 files changed, 28 insertions, 22 deletions
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 33b7f319754..6d31cb7cf6b 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -4794,20 +4794,36 @@ void visit_shared_atomic(isel_context *ctx, nir_intrinsic_instr *instr)
ctx->block->instructions.emplace_back(std::move(ds));
}
-void visit_load_scratch(isel_context *ctx, nir_intrinsic_instr *instr) {
- assert(instr->dest.ssa.bit_size == 32 || instr->dest.ssa.bit_size == 64);
+Temp get_scratch_resource(isel_context *ctx)
+{
Builder bld(ctx->program, ctx->block);
Temp scratch_addr = ctx->private_segment_buffer;
- if (ctx->stage != MESA_SHADER_COMPUTE)
+ if (ctx->stage != compute_cs)
scratch_addr = bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), ctx->private_segment_buffer, Operand(0u));
- uint32_t rsrc_conf;
- /* older generations need element size = 16 bytes */
- if (ctx->program->chip_class >= GFX9)
- rsrc_conf = 0x00E00000u;
- else
- rsrc_conf = 0x00F80000u;
- /* buffer res = addr + num_records = -1, index_stride = 64, add_tid_enable = true */
- Temp rsrc = bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), scratch_addr, Operand(-1u), Operand(rsrc_conf));
+
+ uint32_t rsrc_conf = S_008F0C_ADD_TID_ENABLE(1) |
+ S_008F0C_INDEX_STRIDE(ctx->options->wave_size == 64 ? 3 : 2);;
+
+ if (ctx->program->chip_class >= GFX10) {
+ rsrc_conf |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+ S_008F0C_OOB_SELECT(3) |
+ S_008F0C_RESOURCE_LEVEL(1);
+ } else if (ctx->program->chip_class <= GFX7) { /* dfmt modifies stride on GFX8/GFX9 when ADD_TID_EN=1 */
+ rsrc_conf |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+ }
+
+ /* older generations need element size = 16 bytes. element size removed in GFX9 */
+ if (ctx->program->chip_class <= GFX8)
+ rsrc_conf |= S_008F0C_ELEMENT_SIZE(3);
+
+ return bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), scratch_addr, Operand(-1u), Operand(rsrc_conf));
+}
+
+void visit_load_scratch(isel_context *ctx, nir_intrinsic_instr *instr) {
+ assert(instr->dest.ssa.bit_size == 32 || instr->dest.ssa.bit_size == 64);
+ Builder bld(ctx->program, ctx->block);
+ Temp rsrc = get_scratch_resource(ctx);
Temp offset = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
@@ -4866,17 +4882,7 @@ void visit_load_scratch(isel_context *ctx, nir_intrinsic_instr *instr) {
void visit_store_scratch(isel_context *ctx, nir_intrinsic_instr *instr) {
assert(instr->src[0].ssa->bit_size == 32 || instr->src[0].ssa->bit_size == 64);
Builder bld(ctx->program, ctx->block);
- Temp scratch_addr = ctx->private_segment_buffer;
- if (ctx->stage != MESA_SHADER_COMPUTE)
- scratch_addr = bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), ctx->private_segment_buffer, Operand(0u));
- uint32_t rsrc_conf;
- /* older generations need element size = 16 bytes */
- if (ctx->program->chip_class >= GFX9)
- rsrc_conf = 0x00E00000u;
- else
- rsrc_conf = 0x00F80000u;
- /* buffer res = addr + num_records = -1, index_stride = 64, add_tid_enable = true */
- Temp rsrc = bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), scratch_addr, Operand(-1u), Operand(rsrc_conf));
+ Temp rsrc = get_scratch_resource(ctx);
Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
Temp offset = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[1].ssa));