diff options
author | Rhys Perry <[email protected]> | 2020-05-25 14:47:25 +0100 |
---|---|---|
committer | Marge Bot <[email protected]> | 2020-06-24 10:52:27 +0000 |
commit | 35b5e1fc7c5c64ad93607ab8772239da3d56ceda (patch) | |
tree | c52c86704a3912be09a0a6e5a8a5a44b2386c350 /src/amd | |
parent | c702f8ed1572ab32cc3904a6d92cc5126c7c6080 (diff) |
aco: allow SMEM for some sub-dword accesses
Signed-off-by: Rhys Perry <[email protected]>
Reviewed-by: Daniel Schürmann <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5207>
Diffstat (limited to 'src/amd')
-rw-r--r-- | src/amd/compiler/aco_instruction_selection.cpp | 4 | ||||
-rw-r--r-- | src/amd/compiler/aco_instruction_selection_setup.cpp | 24 |
2 files changed, 20 insertions, 8 deletions
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 83f45d6ed8d..6a35c3188c4 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -5145,7 +5145,7 @@ void load_buffer(isel_context *ctx, unsigned num_components, unsigned component_ { Builder bld(ctx->program, ctx->block); - bool use_smem = dst.type() != RegType::vgpr && ((ctx->options->chip_class >= GFX8 && component_size >= 4) || readonly) && allow_smem; + bool use_smem = dst.type() != RegType::vgpr && (ctx->options->chip_class >= GFX8 || readonly) && allow_smem; if (use_smem) offset = bld.as_uniform(offset); @@ -6175,7 +6175,7 @@ void visit_store_ssbo(isel_context *ctx, nir_intrinsic_instr *instr) bool smem = !nir_src_is_divergent(instr->src[2]) && ctx->options->chip_class >= GFX8 && - elem_size_bytes >= 4 && + (elem_size_bytes >= 4 || can_subdword_ssbo_store_use_smem(instr)) && allow_smem; if (smem) offset = bld.as_uniform(offset); diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp index f82cf78fbcc..dbba3d155b4 100644 --- a/src/amd/compiler/aco_instruction_selection_setup.cpp +++ b/src/amd/compiler/aco_instruction_selection_setup.cpp @@ -301,6 +301,20 @@ uint8_t get_all_buffer_resource_flags(isel_context *ctx, nir_ssa_def *def, unsig return res; } +bool can_subdword_ssbo_store_use_smem(nir_intrinsic_instr *intrin) +{ + unsigned wrmask = nir_intrinsic_write_mask(intrin); + if (util_last_bit(wrmask) != util_bitcount(wrmask) || + util_bitcount(wrmask) * intrin->src[0].ssa->bit_size % 32 || + util_bitcount(wrmask) != intrin->src[0].ssa->num_components) + return false; + + if (nir_intrinsic_align_mul(intrin) % 4 || nir_intrinsic_align_offset(intrin) % 4) + return false; + + return true; +} + void fill_desc_set_info(isel_context *ctx, nir_function_impl *impl) { radv_pipeline_layout *pipeline_layout = ctx->options->layout; @@ -334,8 +348,7 @@ void fill_desc_set_info(isel_context *ctx, nir_function_impl *impl) bool glc = access & (ACCESS_VOLATILE | ACCESS_COHERENT | ACCESS_NON_READABLE); switch (intrin->intrinsic) { case nir_intrinsic_load_ssbo: { - unsigned elem_size = intrin->dest.ssa.bit_size; - if (nir_dest_is_divergent(intrin->dest) || ctx->program->chip_class < GFX8 || elem_size < 32) + if (nir_dest_is_divergent(intrin->dest) || ctx->program->chip_class < GFX8) flags |= glc ? has_glc_vmem_load : has_nonglc_vmem_load; res = intrin->src[0].ssa; break; @@ -353,13 +366,12 @@ void fill_desc_set_info(isel_context *ctx, nir_function_impl *impl) flags |= has_glc_vmem_load | has_glc_vmem_store; res = intrin->src[0].ssa; break; - case nir_intrinsic_store_ssbo: { - unsigned elem_size = intrin->src[0].ssa->bit_size; - if (nir_src_is_divergent(intrin->src[2]) || ctx->program->chip_class < GFX8 || elem_size < 32) + case nir_intrinsic_store_ssbo: + if (nir_src_is_divergent(intrin->src[2]) || ctx->program->chip_class < GFX8 || + (intrin->src[0].ssa->bit_size < 32 && !can_subdword_ssbo_store_use_smem(intrin))) flags |= glc ? has_glc_vmem_store : has_nonglc_vmem_store; res = intrin->src[1].ssa; break; - } case nir_intrinsic_load_global: if (!(access & ACCESS_NON_WRITEABLE)) flags |= glc ? has_glc_vmem_load : has_nonglc_vmem_load; |