summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorRhys Perry <[email protected]>2019-11-12 15:55:05 +0000
committerRhys Perry <[email protected]>2019-11-12 17:21:38 +0000
commit6914b0236f648fa8787dd35a2d4dc0be6d0e03f9 (patch)
treed84d57f0ca3dd70fb3f18baf60d560709d780e25 /src
parent2c98d79d114d3ed82a9e60519d666f51a1172cd3 (diff)
aco: combine read_invocation and shuffle implementations
They do mostly the same thing now. Signed-off-by: Rhys Perry <[email protected]> Reviewed-by: Daniel Schürmann <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/amd/compiler/aco_instruction_selection.cpp36
1 files changed, 7 insertions, 29 deletions
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index cdedb516542..7f4618b21e9 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -5555,13 +5555,15 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
emit_wqm(ctx, tmp.getTemp(), get_ssa_temp(ctx, &instr->dest.ssa));
break;
}
- case nir_intrinsic_shuffle: {
+ case nir_intrinsic_shuffle:
+ case nir_intrinsic_read_invocation: {
Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
- if (!ctx->divergent_vals[instr->dest.ssa.index] &&
- !ctx->divergent_vals[instr->src[0].ssa->index]) {
+ if (!ctx->divergent_vals[instr->src[0].ssa->index]) {
emit_uniform_subgroup(ctx, instr, src);
} else {
Temp tid = get_ssa_temp(ctx, instr->src[1].ssa);
+ if (instr->intrinsic == nir_intrinsic_read_invocation || !ctx->divergent_vals[instr->src[1].ssa->index])
+ tid = bld.as_uniform(tid);
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
if (src.regClass() == v1) {
emit_wqm(ctx, emit_bpermute(ctx, bld, tid, src), dst);
@@ -5572,6 +5574,8 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
hi = emit_wqm(ctx, emit_bpermute(ctx, bld, tid, hi));
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, hi);
emit_split_vector(ctx, dst, 2);
+ } else if (instr->dest.ssa.bit_size == 1 && src.regClass() == s2 && tid.regClass() == s1) {
+ emit_wqm(ctx, bld.sopc(aco_opcode::s_bitcmp1_b64, bld.def(s1, scc), src, tid), dst);
} else if (instr->dest.ssa.bit_size == 1 && src.regClass() == s2) {
Temp tmp = bld.vop3(aco_opcode::v_lshrrev_b64, bld.def(v2), tid, src);
tmp = emit_extract_vector(ctx, tmp, 0, v1);
@@ -5624,32 +5628,6 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
}
break;
}
- case nir_intrinsic_read_invocation: {
- Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
- Temp lane = bld.as_uniform(get_ssa_temp(ctx, instr->src[1].ssa));
- Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
- if (src.regClass() == v1) {
- emit_wqm(ctx, bld.vop3(aco_opcode::v_readlane_b32, bld.def(s1), src, lane), dst);
- } else if (src.regClass() == v2) {
- Temp lo = bld.tmp(v1), hi = bld.tmp(v1);
- bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), src);
- lo = emit_wqm(ctx, bld.vop3(aco_opcode::v_readlane_b32, bld.def(s1), lo, lane));
- hi = emit_wqm(ctx, bld.vop3(aco_opcode::v_readlane_b32, bld.def(s1), hi, lane));
- bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, hi);
- emit_split_vector(ctx, dst, 2);
- } else if (instr->dest.ssa.bit_size == 1 && src.regClass() == s2) {
- emit_wqm(ctx, bld.sopc(aco_opcode::s_bitcmp1_b64, bld.def(s1, scc), src, lane), dst);
- } else if (src.regClass() == s1) {
- bld.sop1(aco_opcode::s_mov_b32, Definition(dst), src);
- } else if (src.regClass() == s2) {
- bld.pseudo(aco_opcode::p_create_vector, Definition(dst), src);
- } else {
- fprintf(stderr, "Unimplemented NIR instr bit size: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
- }
- break;
- }
case nir_intrinsic_vote_all: {
Temp src = as_divergent_bool(ctx, get_ssa_temp(ctx, instr->src[0].ssa), false);
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);