From 995d9937103771d9318124b91adfd20d7c6d5fed Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 27 Jun 2018 17:25:34 -0700 Subject: i965/vec4: Don't cmod propagate from CMP to ADD if the writemask isn't compatible MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise we can incorrectly cmod propagate in situations like add(8) g10<1>.xD g2<0>.xD -16D ... cmp.ge.f0(8) null<1>D g2<0>.xD 16D ... (+f0) sel(8) g21<1>.xyUD g14<4>.xyyyUD g18<4>.xyyyUD Sadly, this change hurts quite a few shaders. v2: Refactor writemask compatibility check into a separate function. Suggested by Caio. Ivy Bridge and Haswell had similar results. (Haswell shown) total instructions in shared programs: 12968489 -> 12968738 (<.01%) instructions in affected programs: 60679 -> 60928 (0.41%) helped: 0 HURT: 249 HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 HURT stats (rel) min: 0.22% max: 0.81% x̄: 0.46% x̃: 0.44% 95% mean confidence interval for instructions value: 1.00 1.00 95% mean confidence interval for instructions %-change: 0.44% 0.48% Instructions are HURT. total cycles in shared programs: 409171965 -> 409172317 (<.01%) cycles in affected programs: 260056 -> 260408 (0.14%) helped: 0 HURT: 176 HURT stats (abs) min: 2 max: 2 x̄: 2.00 x̃: 2 HURT stats (rel) min: 0.04% max: 0.34% x̄: 0.17% x̃: 0.17% 95% mean confidence interval for cycles value: 2.00 2.00 95% mean confidence interval for cycles %-change: 0.16% 0.18% Cycles are HURT. Sandy Bridge total instructions in shared programs: 10423577 -> 10423753 (<.01%) instructions in affected programs: 40667 -> 40843 (0.43%) helped: 0 HURT: 176 HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 HURT stats (rel) min: 0.29% max: 0.79% x̄: 0.48% x̃: 0.42% 95% mean confidence interval for instructions value: 1.00 1.00 95% mean confidence interval for instructions %-change: 0.46% 0.51% Instructions are HURT. total cycles in shared programs: 146097503 -> 146097855 (<.01%) cycles in affected programs: 503990 -> 504342 (0.07%) helped: 0 HURT: 176 HURT stats (abs) min: 2 max: 2 x̄: 2.00 x̃: 2 HURT stats (rel) min: 0.02% max: 0.36% x̄: 0.12% x̃: 0.11% 95% mean confidence interval for cycles value: 2.00 2.00 95% mean confidence interval for cycles %-change: 0.11% 0.13% Cycles are HURT. No changes on any other platforms. Signed-off-by: Ian Romanick Fixes: cd635d149b2 i965/vec4: Propagate conditional modifiers from compares to adds Reviewed-by: Caio Marcelo de Oliveira Filho --- src/intel/compiler/brw_vec4_cmod_propagation.cpp | 20 +++++-- src/intel/compiler/test_vec4_cmod_propagation.cpp | 72 +++++++++++++++++++++++ 2 files changed, 87 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/intel/compiler/brw_vec4_cmod_propagation.cpp b/src/intel/compiler/brw_vec4_cmod_propagation.cpp index 5205da4983c..a1d46dc8dca 100644 --- a/src/intel/compiler/brw_vec4_cmod_propagation.cpp +++ b/src/intel/compiler/brw_vec4_cmod_propagation.cpp @@ -35,6 +35,17 @@ namespace brw { +static bool +writemasks_incompatible(const vec4_instruction *earlier, + const vec4_instruction *later) +{ + return (earlier->dst.writemask != WRITEMASK_X && + earlier->dst.writemask != WRITEMASK_XYZW) || + (earlier->dst.writemask == WRITEMASK_XYZW && + later->src[0].swizzle != BRW_SWIZZLE_XYZW) || + (later->dst.writemask & ~earlier->dst.writemask) != 0; +} + static bool opt_cmod_propagation_local(bblock_t *block) { @@ -82,6 +93,9 @@ opt_cmod_propagation_local(bblock_t *block) if (scan_inst->opcode != BRW_OPCODE_ADD) goto not_match; + if (writemasks_incompatible(scan_inst, inst)) + goto not_match; + /* A CMP is basically a subtraction. The result of the * subtraction must be the same as the result of the addition. * This means that one of the operands must be negated. So (a + @@ -132,11 +146,7 @@ opt_cmod_propagation_local(bblock_t *block) scan_inst->dst, scan_inst->size_written)) { if ((scan_inst->predicate && scan_inst->opcode != BRW_OPCODE_SEL) || scan_inst->dst.offset != inst->src[0].offset || - (scan_inst->dst.writemask != WRITEMASK_X && - scan_inst->dst.writemask != WRITEMASK_XYZW) || - (scan_inst->dst.writemask == WRITEMASK_XYZW && - inst->src[0].swizzle != BRW_SWIZZLE_XYZW) || - (inst->dst.writemask & ~scan_inst->dst.writemask) != 0 || + writemasks_incompatible(scan_inst, inst) || scan_inst->exec_size != inst->exec_size || scan_inst->group != inst->group) { break; diff --git a/src/intel/compiler/test_vec4_cmod_propagation.cpp b/src/intel/compiler/test_vec4_cmod_propagation.cpp index 7d9792b4a55..8430924de63 100644 --- a/src/intel/compiler/test_vec4_cmod_propagation.cpp +++ b/src/intel/compiler/test_vec4_cmod_propagation.cpp @@ -821,3 +821,75 @@ TEST_F(cmod_propagation_test, mul_cmp_different_channels_vec4) EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode); EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 1)->conditional_mod); } + +TEST_F(cmod_propagation_test, add_cmp_same_dst_writemask) +{ + const vec4_builder bld = vec4_builder(v).at_end(); + dst_reg dest = dst_reg(v, glsl_type::vec4_type); + src_reg src0 = src_reg(v, glsl_type::vec4_type); + src_reg src1 = src_reg(v, glsl_type::vec4_type); + dst_reg dest_null = bld.null_reg_f(); + + bld.ADD(dest, src0, src1); + vec4_instruction *inst = bld.CMP(dest_null, src0, src1, BRW_CONDITIONAL_GE); + inst->src[1].negate = true; + + /* = Before = + * + * 0: add dest.xyzw src0 src1 + * 1: cmp.ge.f0 null.xyzw src0 -src1 + * + * = After = + * 0: add.ge.f0 dest.xyzw src0 src1 + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_TRUE(cmod_propagation(v)); + + ASSERT_EQ(0, block0->start_ip); + ASSERT_EQ(0, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 0)->conditional_mod); +} + +TEST_F(cmod_propagation_test, add_cmp_different_dst_writemask) +{ + const vec4_builder bld = vec4_builder(v).at_end(); + dst_reg dest = dst_reg(v, glsl_type::float_type); + src_reg src0 = src_reg(v, glsl_type::vec4_type); + src_reg src1 = src_reg(v, glsl_type::vec4_type); + dst_reg dest_null = bld.null_reg_f(); + + bld.ADD(dest, src0, src1); + vec4_instruction *inst = bld.CMP(dest_null, src0, src1, BRW_CONDITIONAL_GE); + inst->src[1].negate = true; + + /* = Before = + * + * 0: add dest.x src0 src1 + * 1: cmp.ge.f0 null.xyzw src0 -src1 + * + * = After = + * (no changes) + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_FALSE(cmod_propagation(v)); + + ASSERT_EQ(0, block0->start_ip); + ASSERT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod); +} -- cgit v1.2.3