aboutsummaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965
diff options
context:
space:
mode:
authorAlejandro Piñeiro <[email protected]>2015-10-20 13:08:09 +0200
committerAlejandro Piñeiro <[email protected]>2015-11-05 08:57:23 +0100
commit56774e63028b2997a7d8c0abb5009a4c79f9a453 (patch)
treeb37457b83fa5b93db11a101fa5516421cf9ace40 /src/mesa/drivers/dri/i965
parentbb73fc4cb82c1abdf47aa373c78c2a85fe29b3ec (diff)
i965/vec4: select predicate based on writemask for sel emissions
Equivalent to commit 8ac3b525c but with sel operations. In this case we select the PredCtrl based on the writemask. This patch helps on cases like this: 1: cmp.l.f0.0 vgrf40.0.x:F, vgrf0.zzzz:F, vgrf7.xxxx:F 2: cmp.nz.f0.0 null:D, vgrf40.xxxx:D, 0D 3: (+f0.0) sel vgrf41.0.x:UD, vgrf6.xxxx:UD, vgrf5.xxxx:UD In this case, cmod propagation can't optimize instruction #2, because instructions #1 and #2 have different writemasks, and we can't update directly instruction #2 writemask because our code thinks that sel at instruction #3 reads all four channels of the flag, when it actually only reads .x. So, with this patch, the previous case becames this: 1: cmp.l.f0.0 vgrf40.0.x:F, vgrf0.zzzz:F, vgrf7.xxxx:F 2: cmp.nz.f0.0 null:D, vgrf40.xxxx:D, 0D 3: (+f0.0.x) sel vgrf41.0.x:UD, vgrf6.xxxx:UD, vgrf5.xxxx:UD Now only the x channel of the flag is used, allowing dead code eliminate to update the writemask at the second instruction: 1: cmp.l.f0.0 vgrf40.0.x:F, vgrf0.zzzz:F, vgrf7.xxxx:F 2: cmp.nz.f0.0 null.x:D, vgrf40.xxxx:D, 0D 3: (+f0.0.x) sel vgrf41.0.x:UD, vgrf6.xxxx:UD, vgrf5.xxxx:UD So now cmod propagation can simplify out #2: 1: cmp.l.f0.0 vgrf40.0.x:F, attr18.wwww:F, vgrf7.xxxx:F 2: (+f0.0.x) sel vgrf41.0.x:UD, vgrf6.xxxx:UD, vgrf5.xxxx:UD Shader-db numbers: total instructions in shared programs: 6235835 -> 6228008 (-0.13%) instructions in affected programs: 219850 -> 212023 (-3.56%) total loops in shared programs: 1979 -> 1979 (0.00%) helped: 1192 HURT: 0
Diffstat (limited to 'src/mesa/drivers/dri/i965')
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_nir.cpp18
1 files changed, 17 insertions, 1 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 8ca8ddb98fb..b848810ebc7 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -1407,7 +1407,23 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
case nir_op_bcsel:
emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
inst = emit(BRW_OPCODE_SEL, dst, op[1], op[2]);
- inst->predicate = BRW_PREDICATE_NORMAL;
+ switch (dst.writemask) {
+ case WRITEMASK_X:
+ inst->predicate = BRW_PREDICATE_ALIGN16_REPLICATE_X;
+ break;
+ case WRITEMASK_Y:
+ inst->predicate = BRW_PREDICATE_ALIGN16_REPLICATE_Y;
+ break;
+ case WRITEMASK_Z:
+ inst->predicate = BRW_PREDICATE_ALIGN16_REPLICATE_Z;
+ break;
+ case WRITEMASK_W:
+ inst->predicate = BRW_PREDICATE_ALIGN16_REPLICATE_W;
+ break;
+ default:
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ break;
+ }
break;
case nir_op_fdot_replicated2: