aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTimur Kristóf <timur.kristof@gmail.com>2020-03-09 12:44:03 +0100
committerMarge Bot <eric+marge@anholt.net>2020-03-11 08:34:10 +0000
commita952bf394609134ff96f4bebb41bd022c621bfa6 (patch)
tree866da5f2aba478523bbc762116879101e91de54f
parent57a7d58c5d7651ac10a41f08afd02f84064abbb3 (diff)
aco: Fix LS VGPR init bug on affected hardware.
Vega 10 and Raven have a HW bug: when the HS thread count is zero, the LS input arguments are loaded in the wrong registers. This commit works around this by using the registers where the data actually is, for the affected arguments. Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3964>
-rw-r--r--src/amd/compiler/aco_instruction_selection.cpp34
1 files changed, 34 insertions, 0 deletions
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 96b2f8c9c71..efd4058e254 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -9383,6 +9383,36 @@ static void emit_streamout(isel_context *ctx, unsigned stream)
} /* end namespace */
+void fix_ls_vgpr_init_bug(isel_context *ctx, Pseudo_instruction *startpgm)
+{
+ assert(ctx->shader->info.stage == MESA_SHADER_VERTEX);
+ Builder bld(ctx->program, ctx->block);
+ constexpr unsigned hs_idx = 1u;
+ Builder::Result hs_thread_count = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc),
+ get_arg(ctx, ctx->args->merged_wave_info),
+ Operand((8u << 16) | (hs_idx * 8u)));
+ Temp ls_has_nonzero_hs_threads = bool_to_vector_condition(ctx, hs_thread_count.def(1).getTemp());
+
+ /* If there are no HS threads, SPI mistakenly loads the LS VGPRs starting at VGPR 0. */
+
+ Temp instance_id = bld.sop2(aco_opcode::v_cndmask_b32, bld.def(v1),
+ get_arg(ctx, ctx->args->ac.instance_id),
+ get_arg(ctx, ctx->args->rel_auto_id),
+ ls_has_nonzero_hs_threads);
+ Temp rel_auto_id = bld.sop2(aco_opcode::v_cndmask_b32, bld.def(v1),
+ get_arg(ctx, ctx->args->rel_auto_id),
+ get_arg(ctx, ctx->args->ac.tcs_rel_ids),
+ ls_has_nonzero_hs_threads);
+ Temp vertex_id = bld.sop2(aco_opcode::v_cndmask_b32, bld.def(v1),
+ get_arg(ctx, ctx->args->ac.vertex_id),
+ get_arg(ctx, ctx->args->ac.tcs_patch_id),
+ ls_has_nonzero_hs_threads);
+
+ ctx->arg_temps[ctx->args->ac.instance_id.arg_index] = instance_id;
+ ctx->arg_temps[ctx->args->rel_auto_id.arg_index] = rel_auto_id;
+ ctx->arg_temps[ctx->args->ac.vertex_id.arg_index] = vertex_id;
+}
+
void split_arguments(isel_context *ctx, Pseudo_instruction *startpgm)
{
/* Split all arguments except for the first (ring_offsets) and the last
@@ -9518,6 +9548,10 @@ void select_program(Program *program,
/* needs to be after init_context() for FS */
Pseudo_instruction *startpgm = add_startpgm(&ctx);
append_logical_start(ctx.block);
+
+ if (unlikely(args->options->has_ls_vgpr_init_bug && ctx.stage == vertex_tess_control_hs))
+ fix_ls_vgpr_init_bug(&ctx, startpgm);
+
split_arguments(&ctx, startpgm);
}