summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason Ekstrand <[email protected]>2018-10-03 12:14:20 -0500
committerJason Ekstrand <[email protected]>2018-10-04 12:43:59 -0500
commitdd553bc67f8ab1513fd196b6ffb7c4a76723adfd (patch)
tree1e6675709cfe879b3c9ae26d465e5da71e1aa7d8
parent5f0567a4f60c6671d4e2a942ab3f3248dbbd6997 (diff)
nir/alu_to_scalar: Use ssa_for_alu_src in hand-rolled expansions
The ssa_for_alu_src helper will correctly handle swizzles and other source modifiers for you. The expansions for unpack_half_2x16, pack_uvec2_to_uint, and pack_uvec4_to_uint were all broken with regards to swizzles. The brokenness of unpack_half_2x16 was causing rendering errors in Rise of the Tomb Raider on Intel ever since c11833ab24dcba26 which added an extra copy propagation to the optimization pipeline and caused us to start seeing swizzles where we hadn't seen any before. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107926 Fixes: 9ce901058f3d "nir: Add lowering of nir_op_unpack_half_2x16." Fixes: 9b8786eba955 "nir: Add lowering support for packing opcodes." Tested-by: Alex Smith <[email protected]> Tested-by: Józef Kucia <[email protected]> Reviewed-by: Matt Turner <[email protected]>
-rw-r--r--src/compiler/nir/nir_lower_alu_to_scalar.c33
1 files changed, 18 insertions, 15 deletions
diff --git a/src/compiler/nir/nir_lower_alu_to_scalar.c b/src/compiler/nir/nir_lower_alu_to_scalar.c
index 742c8d8ee66..0be3aba9456 100644
--- a/src/compiler/nir/nir_lower_alu_to_scalar.c
+++ b/src/compiler/nir/nir_lower_alu_to_scalar.c
@@ -107,11 +107,11 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
if (!b->shader->options->lower_pack_half_2x16)
return false;
+ nir_ssa_def *src_vec2 = nir_ssa_for_alu_src(b, instr, 0);
+
nir_ssa_def *val =
- nir_pack_half_2x16_split(b, nir_channel(b, instr->src[0].src.ssa,
- instr->src[0].swizzle[0]),
- nir_channel(b, instr->src[0].src.ssa,
- instr->src[0].swizzle[1]));
+ nir_pack_half_2x16_split(b, nir_channel(b, src_vec2, 0),
+ nir_channel(b, src_vec2, 1));
nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(val));
nir_instr_remove(&instr->instr);
@@ -130,9 +130,11 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
if (!b->shader->options->lower_unpack_half_2x16)
return false;
+ nir_ssa_def *packed = nir_ssa_for_alu_src(b, instr, 0);
+
nir_ssa_def *comps[2];
- comps[0] = nir_unpack_half_2x16_split_x(b, instr->src[0].src.ssa);
- comps[1] = nir_unpack_half_2x16_split_y(b, instr->src[0].src.ssa);
+ comps[0] = nir_unpack_half_2x16_split_x(b, packed);
+ comps[1] = nir_unpack_half_2x16_split_y(b, packed);
nir_ssa_def *vec = nir_vec(b, comps, 2);
nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(vec));
@@ -144,8 +146,8 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
assert(b->shader->options->lower_pack_snorm_2x16 ||
b->shader->options->lower_pack_unorm_2x16);
- nir_ssa_def *word =
- nir_extract_u16(b, instr->src[0].src.ssa, nir_imm_int(b, 0));
+ nir_ssa_def *word = nir_extract_u16(b, nir_ssa_for_alu_src(b, instr, 0),
+ nir_imm_int(b, 0));
nir_ssa_def *val =
nir_ior(b, nir_ishl(b, nir_channel(b, word, 1), nir_imm_int(b, 16)),
nir_channel(b, word, 0));
@@ -159,8 +161,8 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
assert(b->shader->options->lower_pack_snorm_4x8 ||
b->shader->options->lower_pack_unorm_4x8);
- nir_ssa_def *byte =
- nir_extract_u8(b, instr->src[0].src.ssa, nir_imm_int(b, 0));
+ nir_ssa_def *byte = nir_extract_u8(b, nir_ssa_for_alu_src(b, instr, 0),
+ nir_imm_int(b, 0));
nir_ssa_def *val =
nir_ior(b, nir_ior(b, nir_ishl(b, nir_channel(b, byte, 3), nir_imm_int(b, 24)),
nir_ishl(b, nir_channel(b, byte, 2), nir_imm_int(b, 16))),
@@ -173,14 +175,15 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
}
case nir_op_fdph: {
+ nir_ssa_def *src0_vec = nir_ssa_for_alu_src(b, instr, 0);
+ nir_ssa_def *src1_vec = nir_ssa_for_alu_src(b, instr, 1);
+
nir_ssa_def *sum[4];
for (unsigned i = 0; i < 3; i++) {
- sum[i] = nir_fmul(b, nir_channel(b, instr->src[0].src.ssa,
- instr->src[0].swizzle[i]),
- nir_channel(b, instr->src[1].src.ssa,
- instr->src[1].swizzle[i]));
+ sum[i] = nir_fmul(b, nir_channel(b, src0_vec, i),
+ nir_channel(b, src1_vec, i));
}
- sum[3] = nir_channel(b, instr->src[1].src.ssa, instr->src[1].swizzle[3]);
+ sum[3] = nir_channel(b, src1_vec, 3);
nir_ssa_def *val = nir_fadd(b, nir_fadd(b, sum[0], sum[1]),
nir_fadd(b, sum[2], sum[3]));