diff options
author | Timur Kristóf <[email protected]> | 2020-03-31 10:49:52 +0200 |
---|---|---|
committer | Marge Bot <[email protected]> | 2020-04-07 11:29:35 +0000 |
commit | d345bfe1958db162b1ddde85eccd3248f884f231 (patch) | |
tree | 287b4fb35bc3248328ae6bb8a8715522f297b208 /src/amd | |
parent | 90b1047fdf84724a00be36f204e790246ad63a72 (diff) |
aco: Extract merged_wave_info_to_mask to its own function.
Currently we only use this at the beginning of merged shader parts,
but we are going to need to use it with some NGG code as well.
Signed-off-by: Timur Kristóf <[email protected]>
Reviewed-by: Daniel Schürmann <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3576>
Diffstat (limited to 'src/amd')
-rw-r--r-- | src/amd/compiler/aco_instruction_selection.cpp | 45 |
1 files changed, 26 insertions, 19 deletions
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 8bc0e58cf48..6128878c83e 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -10254,6 +10254,31 @@ void cleanup_cfg(Program *program) } } +Temp merged_wave_info_to_mask(isel_context *ctx, unsigned i) +{ + Builder bld(ctx->program, ctx->block); + + /* The s_bfm only cares about s0.u[5:0] so we don't need either s_bfe nor s_and here */ + Temp count = i == 0 + ? get_arg(ctx, ctx->args->merged_wave_info) + : bld.sop2(aco_opcode::s_lshr_b32, bld.def(s1), bld.def(s1, scc), + get_arg(ctx, ctx->args->merged_wave_info), Operand(i * 8u)); + + Temp mask = bld.sop2(aco_opcode::s_bfm_b64, bld.def(s2), count, Operand(0u)); + Temp cond; + + if (ctx->program->wave_size == 64) { + /* Special case for 64 active invocations, because 64 doesn't work with s_bfm */ + Temp active_64 = bld.sopc(aco_opcode::s_bitcmp1_b32, bld.def(s1, scc), count, Operand(6u /* log2(64) */)); + cond = bld.sop2(Builder::s_cselect, bld.def(bld.lm), Operand(-1u), mask, bld.scc(active_64)); + } else { + /* We use s_bfm_b64 (not _b32) which works with 32, but we need to extract the lower half of the register */ + cond = emit_extract_vector(ctx, mask, 0, bld.lm); + } + + return cond; +} + void select_program(Program *program, unsigned shader_count, struct nir_shader *const *shaders, @@ -10291,25 +10316,7 @@ void select_program(Program *program, bool check_merged_wave_info = ctx.tcs_in_out_eq ? i == 0 : (shader_count >= 2 && !empty_shader); bool endif_merged_wave_info = ctx.tcs_in_out_eq ? i == 1 : check_merged_wave_info; if (check_merged_wave_info) { - Builder bld(ctx.program, ctx.block); - - /* The s_bfm only cares about s0.u[5:0] so we don't need either s_bfe nor s_and here */ - Temp count = i == 0 ? get_arg(&ctx, args->merged_wave_info) - : bld.sop2(aco_opcode::s_lshr_b32, bld.def(s1), bld.def(s1, scc), - get_arg(&ctx, args->merged_wave_info), Operand(i * 8u)); - - Temp mask = bld.sop2(aco_opcode::s_bfm_b64, bld.def(s2), count, Operand(0u)); - Temp cond; - - if (ctx.program->wave_size == 64) { - /* Special case for 64 active invocations, because 64 doesn't work with s_bfm */ - Temp active_64 = bld.sopc(aco_opcode::s_bitcmp1_b32, bld.def(s1, scc), count, Operand(6u /* log2(64) */)); - cond = bld.sop2(Builder::s_cselect, bld.def(bld.lm), Operand(-1u), mask, bld.scc(active_64)); - } else { - /* We use s_bfm_b64 (not _b32) which works with 32, but we need to extract the lower half of the register */ - cond = emit_extract_vector(&ctx, mask, 0, bld.lm); - } - + Temp cond = merged_wave_info_to_mask(&ctx, i); begin_divergent_if_then(&ctx, &ic_merged_wave_info, cond); } |