summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIago Toral Quiroga <[email protected]>2015-05-28 09:06:33 +0200
committerIago Toral Quiroga <[email protected]>2015-05-28 18:25:37 +0200
commit2231cf0ba3a79d9abb08065e0f72811c5eea807f (patch)
tree79748a8f4a8964be0321dbba7199110ab78e4c99
parent09d6243aed016eed4518435c9885275dbb6d2aa9 (diff)
nir: Fix output swizzle in get_mul_for_src
When we compute the output swizzle we want to consider the number of components in the add operation. So far we were using the writemask of the multiplication for this instead, which is not correct. Reviewed-by: Jason Ekstrand <[email protected]>
-rw-r--r--src/glsl/nir/nir_opt_peephole_ffma.c19
1 files changed, 9 insertions, 10 deletions
diff --git a/src/glsl/nir/nir_opt_peephole_ffma.c b/src/glsl/nir/nir_opt_peephole_ffma.c
index b430eac8eab..798506b7595 100644
--- a/src/glsl/nir/nir_opt_peephole_ffma.c
+++ b/src/glsl/nir/nir_opt_peephole_ffma.c
@@ -73,7 +73,8 @@ are_all_uses_fadd(nir_ssa_def *def)
}
static nir_alu_instr *
-get_mul_for_src(nir_alu_src *src, uint8_t swizzle[4], bool *negate, bool *abs)
+get_mul_for_src(nir_alu_src *src, int num_components,
+ uint8_t swizzle[4], bool *negate, bool *abs)
{
assert(src->src.is_ssa && !src->abs && !src->negate);
@@ -85,16 +86,16 @@ get_mul_for_src(nir_alu_src *src, uint8_t swizzle[4], bool *negate, bool *abs)
switch (alu->op) {
case nir_op_imov:
case nir_op_fmov:
- alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs);
+ alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs);
break;
case nir_op_fneg:
- alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs);
+ alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs);
*negate = !*negate;
break;
case nir_op_fabs:
- alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs);
+ alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs);
*negate = false;
*abs = true;
break;
@@ -115,12 +116,8 @@ get_mul_for_src(nir_alu_src *src, uint8_t swizzle[4], bool *negate, bool *abs)
if (!alu)
return NULL;
- for (unsigned i = 0; i < 4; i++) {
- if (!(alu->dest.write_mask & (1 << i)))
- break;
-
+ for (unsigned i = 0; i < num_components; i++)
swizzle[i] = swizzle[src->swizzle[i]];
- }
return alu;
}
@@ -160,7 +157,9 @@ nir_opt_peephole_ffma_block(nir_block *block, void *void_state)
negate = false;
abs = false;
- mul = get_mul_for_src(&add->src[add_mul_src], swizzle, &negate, &abs);
+ mul = get_mul_for_src(&add->src[add_mul_src],
+ add->dest.dest.ssa.num_components,
+ swizzle, &negate, &abs);
if (mul != NULL)
break;