diff options
author | Matt Turner <[email protected]> | 2015-02-10 16:25:47 -0800 |
---|---|---|
committer | Matt Turner <[email protected]> | 2015-02-19 21:16:43 -0800 |
commit | 7f8dd91d166e49d7da98f90d6428dc2705fb96d0 (patch) | |
tree | 41fda24f78f53c02194c90b5697726c6f9416fb3 /src/mesa/drivers | |
parent | 871ad3f08bc34e16fdd728e9a4821b9a83e509f0 (diff) |
i965/fs: Consider MOV.SAT to interfere if it has a source modifier.
The saturate propagation pass recognizes that the second instruction
below does not interfere with an attempt to propagate the saturate
modifier from instruction 3 to 1.
1: add(8) dst0 src0 src1
2: mov.sat(8) dst1 dst0
3: mov.sat(8) dst2 dst0
Unfortunately, we did not consider the case of instruction 2 having a
source modifier on dst0. Take for instance:
1: add(8) dst0 src0 src1
2: mov.sat(8) dst1 -dst0
3: mov.sat(8) dst2 dst0
Consider such an instruction to interfere. Increase instruction counts
in Anomaly 2, which could be a bug fix depending on the values the first
instruction produces.
instructions in affected programs: 53228 -> 53934 (1.33%)
HURT: 360
Cc: <[email protected]>
Reviewed-by: Ian Romanick <[email protected]>
Diffstat (limited to 'src/mesa/drivers')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp | 12 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp | 44 |
2 files changed, 52 insertions, 4 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp index bc516618c3d..e406c2899e8 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp @@ -81,12 +81,16 @@ opt_saturate_propagation_local(fs_visitor *v, bblock_t *block) break; } for (int i = 0; i < scan_inst->sources; i++) { - if ((scan_inst->opcode != BRW_OPCODE_MOV || !scan_inst->saturate) && - scan_inst->src[i].file == GRF && + if (scan_inst->src[i].file == GRF && scan_inst->src[i].reg == inst->src[0].reg && scan_inst->src[i].reg_offset == inst->src[0].reg_offset) { - interfered = true; - break; + if (scan_inst->opcode != BRW_OPCODE_MOV || + !scan_inst->saturate || + scan_inst->src[0].abs || + scan_inst->src[0].negate) { + interfered = true; + break; + } } } diff --git a/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp index f897bddb7aa..6f762bcc6e0 100644 --- a/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp +++ b/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp @@ -393,3 +393,47 @@ TEST_F(saturate_propagation_test, intervening_dest_write) EXPECT_EQ(BRW_OPCODE_MOV, instruction(block0, 2)->opcode); EXPECT_TRUE(instruction(block0, 2)->saturate); } + +TEST_F(saturate_propagation_test, mul_neg_mov_sat_mov_sat) +{ + fs_reg dst0 = v->vgrf(glsl_type::float_type); + fs_reg dst1 = v->vgrf(glsl_type::float_type); + fs_reg dst2 = v->vgrf(glsl_type::float_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); + fs_reg src1 = v->vgrf(glsl_type::float_type); + v->emit(BRW_OPCODE_MUL, dst0, src0, src1); + dst0.negate = true; + v->emit(BRW_OPCODE_MOV, dst1, dst0) + ->saturate = true; + dst0.negate = false; + v->emit(BRW_OPCODE_MOV, dst2, dst0) + ->saturate = true; + + /* = Before = + * + * 0: mul(8) dst0 src0 src1 + * 1: mov.sat(8) dst1 -dst0 + * 2: mov.sat(8) dst2 dst0 + * + * = After = + * (no changes) + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(2, block0->end_ip); + + EXPECT_FALSE(saturate_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(2, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_MUL, instruction(block0, 0)->opcode); + EXPECT_FALSE(instruction(block0, 0)->saturate); + EXPECT_FALSE(instruction(block0, 0)->src[1].negate); + EXPECT_EQ(BRW_OPCODE_MOV, instruction(block0, 1)->opcode); + EXPECT_TRUE(instruction(block0, 1)->saturate); + EXPECT_TRUE(instruction(block0, 1)->src[0].negate); + EXPECT_EQ(BRW_OPCODE_MOV, instruction(block0, 2)->opcode); + EXPECT_TRUE(instruction(block0, 2)->saturate); +} |