From cd635d149b23f0522cb1a73d6a007851896883e3 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 21 Mar 2018 15:22:51 -0700 Subject: i965/vec4: Propagate conditional modifiers from compares to adds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No changes on Broadwell or later as those platforms do not use the vec4 backend. Ivy Bridge and Haswell had similar results. (Ivy Bridge shown) total instructions in shared programs: 11682119 -> 11681056 (<.01%) instructions in affected programs: 150403 -> 149340 (-0.71%) helped: 950 HURT: 0 helped stats (abs) min: 1 max: 16 x̄: 1.12 x̃: 1 helped stats (rel) min: 0.23% max: 2.78% x̄: 0.82% x̃: 0.71% 95% mean confidence interval for instructions value: -1.19 -1.04 95% mean confidence interval for instructions %-change: -0.84% -0.79% Instructions are helped. total cycles in shared programs: 257495842 -> 257495238 (<.01%) cycles in affected programs: 270302 -> 269698 (-0.22%) helped: 271 HURT: 13 helped stats (abs) min: 2 max: 14 x̄: 2.42 x̃: 2 helped stats (rel) min: 0.06% max: 1.13% x̄: 0.32% x̃: 0.28% HURT stats (abs) min: 2 max: 12 x̄: 4.00 x̃: 4 HURT stats (rel) min: 0.15% max: 1.18% x̄: 0.30% x̃: 0.26% 95% mean confidence interval for cycles value: -2.41 -1.84 95% mean confidence interval for cycles %-change: -0.31% -0.26% Cycles are helped. Sandy Bridge total instructions in shared programs: 10430493 -> 10429727 (<.01%) instructions in affected programs: 120860 -> 120094 (-0.63%) helped: 766 HURT: 0 helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 helped stats (rel) min: 0.30% max: 2.70% x̄: 0.78% x̃: 0.73% 95% mean confidence interval for instructions value: -1.00 -1.00 95% mean confidence interval for instructions %-change: -0.80% -0.75% Instructions are helped. total cycles in shared programs: 146138718 -> 146138446 (<.01%) cycles in affected programs: 244114 -> 243842 (-0.11%) helped: 132 HURT: 0 helped stats (abs) min: 2 max: 4 x̄: 2.06 x̃: 2 helped stats (rel) min: 0.03% max: 0.43% x̄: 0.16% x̃: 0.19% 95% mean confidence interval for cycles value: -2.12 -2.00 95% mean confidence interval for cycles %-change: -0.18% -0.15% Cycles are helped. GM45 and Iron Lake had identical results. (Iron Lake shown) total instructions in shared programs: 7780251 -> 7780248 (<.01%) instructions in affected programs: 175 -> 172 (-1.71%) helped: 3 HURT: 0 helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 helped stats (rel) min: 1.49% max: 2.44% x̄: 1.81% x̃: 1.49% total cycles in shared programs: 177851584 -> 177851578 (<.01%) cycles in affected programs: 9796 -> 9790 (-0.06%) helped: 3 HURT: 0 helped stats (abs) min: 2 max: 2 x̄: 2.00 x̃: 2 helped stats (rel) min: 0.05% max: 0.08% x̄: 0.06% x̃: 0.05% Signed-off-by: Ian Romanick Reviewed-by: Alejandro Piñeiro Reviewed-by: Matt Turner --- src/intel/compiler/brw_vec4_cmod_propagation.cpp | 70 ++++++++++++++++++++++-- 1 file changed, 65 insertions(+), 5 deletions(-) (limited to 'src/intel/compiler') diff --git a/src/intel/compiler/brw_vec4_cmod_propagation.cpp b/src/intel/compiler/brw_vec4_cmod_propagation.cpp index 7f1001b6d12..5205da4983c 100644 --- a/src/intel/compiler/brw_vec4_cmod_propagation.cpp +++ b/src/intel/compiler/brw_vec4_cmod_propagation.cpp @@ -50,8 +50,14 @@ opt_cmod_propagation_local(bblock_t *block) inst->predicate != BRW_PREDICATE_NONE || !inst->dst.is_null() || (inst->src[0].file != VGRF && inst->src[0].file != ATTR && - inst->src[0].file != UNIFORM) || - inst->src[0].abs) + inst->src[0].file != UNIFORM)) + continue; + + /* An ABS source modifier can only be handled when processing a compare + * with a value other than zero. + */ + if (inst->src[0].abs && + (inst->opcode != BRW_OPCODE_CMP || inst->src[1].is_zero())) continue; if (inst->opcode == BRW_OPCODE_AND && @@ -60,15 +66,68 @@ opt_cmod_propagation_local(bblock_t *block) !inst->src[0].negate)) continue; - if (inst->opcode == BRW_OPCODE_CMP && !inst->src[1].is_zero()) - continue; - if (inst->opcode == BRW_OPCODE_MOV && inst->conditional_mod != BRW_CONDITIONAL_NZ) continue; bool read_flag = false; foreach_inst_in_block_reverse_starting_from(vec4_instruction, scan_inst, inst) { + /* A CMP with a second source of zero can match with anything. A CMP + * with a second source that is not zero can only match with an ADD + * instruction. + */ + if (inst->opcode == BRW_OPCODE_CMP && !inst->src[1].is_zero()) { + bool negate; + + if (scan_inst->opcode != BRW_OPCODE_ADD) + goto not_match; + + /* A CMP is basically a subtraction. The result of the + * subtraction must be the same as the result of the addition. + * This means that one of the operands must be negated. So (a + + * b) vs (a == -b) or (a + -b) vs (a == b). + */ + if ((inst->src[0].equals(scan_inst->src[0]) && + inst->src[1].negative_equals(scan_inst->src[1])) || + (inst->src[0].equals(scan_inst->src[1]) && + inst->src[1].negative_equals(scan_inst->src[0]))) { + negate = false; + } else if ((inst->src[0].negative_equals(scan_inst->src[0]) && + inst->src[1].equals(scan_inst->src[1])) || + (inst->src[0].negative_equals(scan_inst->src[1]) && + inst->src[1].equals(scan_inst->src[0]))) { + negate = true; + } else { + goto not_match; + } + + if (scan_inst->exec_size != inst->exec_size || + scan_inst->group != inst->group) + goto not_match; + + /* From the Sky Lake PRM Vol. 7 "Assigning Conditional Mods": + * + * * Note that the [post condition signal] bits generated at + * the output of a compute are before the .sat. + * + * So we don't have to bail if scan_inst has saturate. + */ + + /* Otherwise, try propagating the conditional. */ + const enum brw_conditional_mod cond = + negate ? brw_swap_cmod(inst->conditional_mod) + : inst->conditional_mod; + + if (scan_inst->can_do_cmod() && + ((!read_flag && scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) || + scan_inst->conditional_mod == cond)) { + scan_inst->conditional_mod = cond; + inst->remove(block); + progress = true; + } + break; + } + if (regions_overlap(inst->src[0], inst->size_read(0), scan_inst->dst, scan_inst->size_written)) { if ((scan_inst->predicate && scan_inst->opcode != BRW_OPCODE_SEL) || @@ -170,6 +229,7 @@ opt_cmod_propagation_local(bblock_t *block) break; } + not_match: if (scan_inst->writes_flag()) break; -- cgit v1.2.3