summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIan Romanick <[email protected]>2018-06-13 10:36:42 -0700
committerIan Romanick <[email protected]>2018-06-15 17:22:27 -0700
commit4467040cb658529cb0e41be463b94e15f48f17a2 (patch)
treed2fcb1e50e8d1d0663015f81e169382c297cecea
parentf2d8bb7a7b8806c91b1ab5a186f450e0de1f5b86 (diff)
i965/fs: Propagate conditional modifiers from not instructions
Skylake total instructions in shared programs: 14399081 -> 14399010 (<.01%) instructions in affected programs: 26961 -> 26890 (-0.26%) helped: 57 HURT: 0 helped stats (abs) min: 1 max: 6 x̄: 1.25 x̃: 1 helped stats (rel) min: 0.16% max: 0.80% x̄: 0.30% x̃: 0.18% 95% mean confidence interval for instructions value: -1.50 -0.99 95% mean confidence interval for instructions %-change: -0.35% -0.25% Instructions are helped. total cycles in shared programs: 532978307 -> 532976050 (<.01%) cycles in affected programs: 468629 -> 466372 (-0.48%) helped: 33 HURT: 20 helped stats (abs) min: 3 max: 360 x̄: 116.52 x̃: 98 helped stats (rel) min: 0.06% max: 3.63% x̄: 1.66% x̃: 1.27% HURT stats (abs) min: 2 max: 172 x̄: 79.40 x̃: 43 HURT stats (rel) min: 0.04% max: 3.02% x̄: 1.48% x̃: 0.44% 95% mean confidence interval for cycles value: -81.29 -3.88 95% mean confidence interval for cycles %-change: -1.07% 0.12% Inconclusive result (%-change mean confidence interval includes 0). All Gen6+ platforms, except Ivy Bridge, had similar results. (Haswell shown) total instructions in shared programs: 12973897 -> 12973838 (<.01%) instructions in affected programs: 25970 -> 25911 (-0.23%) helped: 55 HURT: 0 helped stats (abs) min: 1 max: 2 x̄: 1.07 x̃: 1 helped stats (rel) min: 0.16% max: 0.62% x̄: 0.28% x̃: 0.18% 95% mean confidence interval for instructions value: -1.14 -1.00 95% mean confidence interval for instructions %-change: -0.32% -0.24% Instructions are helped. total cycles in shared programs: 410355841 -> 410352067 (<.01%) cycles in affected programs: 578454 -> 574680 (-0.65%) helped: 47 HURT: 5 helped stats (abs) min: 3 max: 360 x̄: 85.74 x̃: 18 helped stats (rel) min: 0.05% max: 3.68% x̄: 1.18% x̃: 0.38% HURT stats (abs) min: 2 max: 242 x̄: 51.20 x̃: 4 HURT stats (rel) min: <.01% max: 0.45% x̄: 0.15% x̃: 0.11% 95% mean confidence interval for cycles value: -104.89 -40.27 95% mean confidence interval for cycles %-change: -1.45% -0.66% Cycles are helped. Ivy Bridge total instructions in shared programs: 11679351 -> 11679301 (<.01%) instructions in affected programs: 28208 -> 28158 (-0.18%) helped: 50 HURT: 0 helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 helped stats (rel) min: 0.12% max: 0.54% x̄: 0.23% x̃: 0.16% 95% mean confidence interval for instructions value: -1.00 -1.00 95% mean confidence interval for instructions %-change: -0.27% -0.19% Instructions are helped. total cycles in shared programs: 257445362 -> 257444662 (<.01%) cycles in affected programs: 419338 -> 418638 (-0.17%) helped: 40 HURT: 3 helped stats (abs) min: 1 max: 170 x̄: 65.05 x̃: 24 helped stats (rel) min: 0.02% max: 3.51% x̄: 1.26% x̃: 0.41% HURT stats (abs) min: 2 max: 1588 x̄: 634.00 x̃: 312 HURT stats (rel) min: 0.05% max: 2.97% x̄: 1.21% x̃: 0.62% 95% mean confidence interval for cycles value: -97.96 65.41 95% mean confidence interval for cycles %-change: -1.56% -0.62% Inconclusive result (value mean confidence interval includes 0). No changes on Iron Lake or GM45. v2: Move 'if (cond != BRW_CONDITIONAL_Z && cond != BRW_CONDITIONAL_NZ)' check outside the loop. Suggested by Iago. Signed-off-by: Ian Romanick <[email protected]>
-rw-r--r--src/intel/compiler/brw_fs_cmod_propagation.cpp62
1 files changed, 61 insertions, 1 deletions
diff --git a/src/intel/compiler/brw_fs_cmod_propagation.cpp b/src/intel/compiler/brw_fs_cmod_propagation.cpp
index b4f05613e98..5b74f267359 100644
--- a/src/intel/compiler/brw_fs_cmod_propagation.cpp
+++ b/src/intel/compiler/brw_fs_cmod_propagation.cpp
@@ -111,6 +111,60 @@ cmod_propagate_cmp_to_add(const gen_device_info *devinfo, bblock_t *block,
return false;
}
+/**
+ * Propagate conditional modifiers from NOT instructions
+ *
+ * Attempt to convert sequences like
+ *
+ * or(8) g78<8,8,1> g76<8,8,1>UD g77<8,8,1>UD
+ * ...
+ * not.nz.f0(8) null g78<8,8,1>UD
+ *
+ * into
+ *
+ * or.z.f0(8) g78<8,8,1> g76<8,8,1>UD g77<8,8,1>UD
+ */
+static bool
+cmod_propagate_not(const gen_device_info *devinfo, bblock_t *block,
+ fs_inst *inst)
+{
+ const enum brw_conditional_mod cond = brw_negate_cmod(inst->conditional_mod);
+ bool read_flag = false;
+
+ if (cond != BRW_CONDITIONAL_Z && cond != BRW_CONDITIONAL_NZ)
+ return false;
+
+ foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
+ if (regions_overlap(scan_inst->dst, scan_inst->size_written,
+ inst->src[0], inst->size_read(0))) {
+ if (scan_inst->opcode != BRW_OPCODE_OR &&
+ scan_inst->opcode != BRW_OPCODE_AND)
+ break;
+
+ if (scan_inst->is_partial_write() ||
+ scan_inst->dst.offset != inst->src[0].offset ||
+ scan_inst->exec_size != inst->exec_size)
+ break;
+
+ if (scan_inst->can_do_cmod() &&
+ ((!read_flag && scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) ||
+ scan_inst->conditional_mod == cond)) {
+ scan_inst->conditional_mod = cond;
+ inst->remove(block);
+ return true;
+ }
+ break;
+ }
+
+ if (scan_inst->flags_written())
+ break;
+
+ read_flag = read_flag || scan_inst->flags_read(devinfo);
+ }
+
+ return false;
+}
+
static bool
opt_cmod_propagation_local(const gen_device_info *devinfo, bblock_t *block)
{
@@ -122,7 +176,8 @@ opt_cmod_propagation_local(const gen_device_info *devinfo, bblock_t *block)
if ((inst->opcode != BRW_OPCODE_AND &&
inst->opcode != BRW_OPCODE_CMP &&
- inst->opcode != BRW_OPCODE_MOV) ||
+ inst->opcode != BRW_OPCODE_MOV &&
+ inst->opcode != BRW_OPCODE_NOT) ||
inst->predicate != BRW_PREDICATE_NONE ||
!inst->dst.is_null() ||
(inst->src[0].file != VGRF && inst->src[0].file != ATTR &&
@@ -162,6 +217,11 @@ opt_cmod_propagation_local(const gen_device_info *devinfo, bblock_t *block)
continue;
}
+ if (inst->opcode == BRW_OPCODE_NOT) {
+ progress = cmod_propagate_not(devinfo, block, inst) || progress;
+ continue;
+ }
+
bool read_flag = false;
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
if (regions_overlap(scan_inst->dst, scan_inst->size_written,