summaryrefslogtreecommitdiffstats
path: root/src/amd
diff options
context:
space:
mode:
Diffstat (limited to 'src/amd')
-rw-r--r--src/amd/compiler/aco_instruction_selection.cpp11
-rw-r--r--src/amd/compiler/aco_instruction_selection_setup.cpp1
2 files changed, 8 insertions, 4 deletions
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 70cee225670..84c88e4eaa5 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -5709,18 +5709,23 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
Definition tmp = bld.def(dst.regClass());
+ Definition lanemask_tmp = dst.size() == bld.lm.size() ? tmp : bld.def(src.regClass());
if (instr->src[0].ssa->bit_size == 1) {
assert(src.regClass() == bld.lm);
- bld.sop2(Builder::s_and, tmp, bld.def(s1, scc), Operand(exec, bld.lm), src);
+ bld.sop2(Builder::s_and, lanemask_tmp, bld.def(s1, scc), Operand(exec, bld.lm), src);
} else if (instr->src[0].ssa->bit_size == 32 && src.regClass() == v1) {
- bld.vopc(aco_opcode::v_cmp_lg_u32, tmp, Operand(0u), src);
+ bld.vopc(aco_opcode::v_cmp_lg_u32, lanemask_tmp, Operand(0u), src);
} else if (instr->src[0].ssa->bit_size == 64 && src.regClass() == v2) {
- bld.vopc(aco_opcode::v_cmp_lg_u64, tmp, Operand(0u), src);
+ bld.vopc(aco_opcode::v_cmp_lg_u64, lanemask_tmp, Operand(0u), src);
} else {
fprintf(stderr, "Unimplemented NIR instr bit size: ");
nir_print_instr(&instr->instr, stderr);
fprintf(stderr, "\n");
}
+ if (dst.size() != bld.lm.size()) {
+ /* Wave32 with ballot size set to 64 */
+ bld.pseudo(aco_opcode::p_create_vector, Definition(tmp), lanemask_tmp.getTemp(), Operand(0u));
+ }
emit_wqm(ctx, tmp.getTemp(), dst);
break;
}
diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp
index a7446c6c058..47f5778822f 100644
--- a/src/amd/compiler/aco_instruction_selection_setup.cpp
+++ b/src/amd/compiler/aco_instruction_selection_setup.cpp
@@ -294,7 +294,6 @@ void init_context(isel_context *ctx, nir_shader *shader)
break;
case nir_intrinsic_ballot:
type = RegType::sgpr;
- size = lane_mask_size;
break;
case nir_intrinsic_load_sample_id:
case nir_intrinsic_load_sample_mask_in: