aboutsummaryrefslogtreecommitdiffstats
path: root/src/amd
diff options
context:
space:
mode:
authorTimur Kristóf <[email protected]>2020-03-31 10:49:52 +0200
committerMarge Bot <[email protected]>2020-04-07 11:29:35 +0000
commitd345bfe1958db162b1ddde85eccd3248f884f231 (patch)
tree287b4fb35bc3248328ae6bb8a8715522f297b208 /src/amd
parent90b1047fdf84724a00be36f204e790246ad63a72 (diff)
aco: Extract merged_wave_info_to_mask to its own function.
Currently we only use this at the beginning of merged shader parts, but we are going to need to use it with some NGG code as well. Signed-off-by: Timur Kristóf <[email protected]> Reviewed-by: Daniel Schürmann <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3576>
Diffstat (limited to 'src/amd')
-rw-r--r--src/amd/compiler/aco_instruction_selection.cpp45
1 files changed, 26 insertions, 19 deletions
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 8bc0e58cf48..6128878c83e 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -10254,6 +10254,31 @@ void cleanup_cfg(Program *program)
}
}
+Temp merged_wave_info_to_mask(isel_context *ctx, unsigned i)
+{
+ Builder bld(ctx->program, ctx->block);
+
+ /* The s_bfm only cares about s0.u[5:0] so we don't need either s_bfe nor s_and here */
+ Temp count = i == 0
+ ? get_arg(ctx, ctx->args->merged_wave_info)
+ : bld.sop2(aco_opcode::s_lshr_b32, bld.def(s1), bld.def(s1, scc),
+ get_arg(ctx, ctx->args->merged_wave_info), Operand(i * 8u));
+
+ Temp mask = bld.sop2(aco_opcode::s_bfm_b64, bld.def(s2), count, Operand(0u));
+ Temp cond;
+
+ if (ctx->program->wave_size == 64) {
+ /* Special case for 64 active invocations, because 64 doesn't work with s_bfm */
+ Temp active_64 = bld.sopc(aco_opcode::s_bitcmp1_b32, bld.def(s1, scc), count, Operand(6u /* log2(64) */));
+ cond = bld.sop2(Builder::s_cselect, bld.def(bld.lm), Operand(-1u), mask, bld.scc(active_64));
+ } else {
+ /* We use s_bfm_b64 (not _b32) which works with 32, but we need to extract the lower half of the register */
+ cond = emit_extract_vector(ctx, mask, 0, bld.lm);
+ }
+
+ return cond;
+}
+
void select_program(Program *program,
unsigned shader_count,
struct nir_shader *const *shaders,
@@ -10291,25 +10316,7 @@ void select_program(Program *program,
bool check_merged_wave_info = ctx.tcs_in_out_eq ? i == 0 : (shader_count >= 2 && !empty_shader);
bool endif_merged_wave_info = ctx.tcs_in_out_eq ? i == 1 : check_merged_wave_info;
if (check_merged_wave_info) {
- Builder bld(ctx.program, ctx.block);
-
- /* The s_bfm only cares about s0.u[5:0] so we don't need either s_bfe nor s_and here */
- Temp count = i == 0 ? get_arg(&ctx, args->merged_wave_info)
- : bld.sop2(aco_opcode::s_lshr_b32, bld.def(s1), bld.def(s1, scc),
- get_arg(&ctx, args->merged_wave_info), Operand(i * 8u));
-
- Temp mask = bld.sop2(aco_opcode::s_bfm_b64, bld.def(s2), count, Operand(0u));
- Temp cond;
-
- if (ctx.program->wave_size == 64) {
- /* Special case for 64 active invocations, because 64 doesn't work with s_bfm */
- Temp active_64 = bld.sopc(aco_opcode::s_bitcmp1_b32, bld.def(s1, scc), count, Operand(6u /* log2(64) */));
- cond = bld.sop2(Builder::s_cselect, bld.def(bld.lm), Operand(-1u), mask, bld.scc(active_64));
- } else {
- /* We use s_bfm_b64 (not _b32) which works with 32, but we need to extract the lower half of the register */
- cond = emit_extract_vector(&ctx, mask, 0, bld.lm);
- }
-
+ Temp cond = merged_wave_info_to_mask(&ctx, i);
begin_divergent_if_then(&ctx, &ic_merged_wave_info, cond);
}