summaryrefslogtreecommitdiffstats
path: root/src/intel
diff options
context:
space:
mode:
authorJason Ekstrand <[email protected]>2018-10-17 11:34:32 -0500
committerJason Ekstrand <[email protected]>2019-01-11 10:44:08 -0600
commit1ede463b6e66eb0a6df5250261810b6985c35eb9 (patch)
tree5ddf4e5e2cfc134d69816c195d3da2ce0695c376 /src/intel
parentae683ed3bce6ae7d22fcc8e5ef844b1e0f26b93b (diff)
intel/peephole_ffma: Fix swizzle propagation
The num_components value passed into get_mul_for_src is used to only compose the parts of the swizzle that we know will be used so we don't compose invalid swizzle components. However, we had a bug where we passed the number of components of the add all the way through. For the given source, we need the number of components read from that source. In the case where we have a narrow add, say 2 components, that is sourced from a chain of wider instructions, we may not compose all the swizzles. All we really need to do is pass through the right number of components at each level. Fixes: 2231cf0ba3a "nir: Fix output swizzle in get_mul_for_src" Reviewed-by: Ian Romanick <[email protected]>
Diffstat (limited to 'src/intel')
-rw-r--r--src/intel/compiler/brw_nir_opt_peephole_ffma.c11
1 files changed, 7 insertions, 4 deletions
diff --git a/src/intel/compiler/brw_nir_opt_peephole_ffma.c b/src/intel/compiler/brw_nir_opt_peephole_ffma.c
index cc225e1847b..7271bdbca43 100644
--- a/src/intel/compiler/brw_nir_opt_peephole_ffma.c
+++ b/src/intel/compiler/brw_nir_opt_peephole_ffma.c
@@ -68,7 +68,7 @@ are_all_uses_fadd(nir_ssa_def *def)
}
static nir_alu_instr *
-get_mul_for_src(nir_alu_src *src, int num_components,
+get_mul_for_src(nir_alu_src *src, unsigned num_components,
uint8_t swizzle[4], bool *negate, bool *abs)
{
uint8_t swizzle_tmp[4];
@@ -93,16 +93,19 @@ get_mul_for_src(nir_alu_src *src, int num_components,
switch (alu->op) {
case nir_op_imov:
case nir_op_fmov:
- alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs);
+ alu = get_mul_for_src(&alu->src[0], alu->dest.dest.ssa.num_components,
+ swizzle, negate, abs);
break;
case nir_op_fneg:
- alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs);
+ alu = get_mul_for_src(&alu->src[0], alu->dest.dest.ssa.num_components,
+ swizzle, negate, abs);
*negate = !*negate;
break;
case nir_op_fabs:
- alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs);
+ alu = get_mul_for_src(&alu->src[0], alu->dest.dest.ssa.num_components,
+ swizzle, negate, abs);
*negate = false;
*abs = true;
break;