summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/intel/compiler/brw_fs.cpp106
-rw-r--r--src/intel/compiler/brw_fs.h1
2 files changed, 107 insertions, 0 deletions
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 02a8ea0fd9d..422eedcf0af 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -2844,6 +2844,106 @@ mask_relative_to(const fs_reg &r, const fs_reg &s, unsigned ds)
}
bool
+fs_visitor::opt_peephole_csel()
+{
+ if (devinfo->gen < 8)
+ return false;
+
+ bool progress = false;
+
+ foreach_block_reverse(block, cfg) {
+ int ip = block->end_ip + 1;
+
+ foreach_inst_in_block_reverse_safe(fs_inst, inst, block) {
+ ip--;
+
+ if (inst->opcode != BRW_OPCODE_SEL ||
+ inst->predicate != BRW_PREDICATE_NORMAL ||
+ (inst->dst.type != BRW_REGISTER_TYPE_F &&
+ inst->dst.type != BRW_REGISTER_TYPE_D &&
+ inst->dst.type != BRW_REGISTER_TYPE_UD))
+ continue;
+
+ /* Because it is a 3-src instruction, CSEL cannot have an immediate
+ * value as a source, but we can sometimes handle zero.
+ */
+ if ((inst->src[0].file != VGRF && inst->src[0].file != ATTR &&
+ inst->src[0].file != UNIFORM) ||
+ (inst->src[1].file != VGRF && inst->src[1].file != ATTR &&
+ inst->src[1].file != UNIFORM && !inst->src[1].is_zero()))
+ continue;
+
+ foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
+ if (!scan_inst->flags_written())
+ continue;
+
+ if ((scan_inst->opcode != BRW_OPCODE_CMP &&
+ scan_inst->opcode != BRW_OPCODE_MOV) ||
+ scan_inst->predicate != BRW_PREDICATE_NONE ||
+ (scan_inst->src[0].file != VGRF &&
+ scan_inst->src[0].file != ATTR &&
+ scan_inst->src[0].file != UNIFORM) ||
+ scan_inst->src[0].type != BRW_REGISTER_TYPE_F)
+ break;
+
+ if (scan_inst->opcode == BRW_OPCODE_CMP && !scan_inst->src[1].is_zero())
+ break;
+
+ const brw::fs_builder ibld(this, block, inst);
+
+ const enum brw_conditional_mod cond =
+ inst->predicate_inverse
+ ? brw_negate_cmod(scan_inst->conditional_mod)
+ : scan_inst->conditional_mod;
+
+ fs_inst *csel_inst = NULL;
+
+ if (inst->src[1].file != IMM) {
+ csel_inst = ibld.CSEL(inst->dst,
+ inst->src[0],
+ inst->src[1],
+ scan_inst->src[0],
+ cond);
+ } else if (cond == BRW_CONDITIONAL_NZ) {
+ /* Consider the sequence
+ *
+ * cmp.nz.f0 null<1>F g3<8,8,1>F 0F
+ * (+f0) sel g124<1>UD g2<8,8,1>UD 0x00000000UD
+ *
+ * The sel will pick the immediate value 0 if r0 is ±0.0.
+ * Therefore, this sequence is equivalent:
+ *
+ * cmp.nz.f0 null<1>F g3<8,8,1>F 0F
+ * (+f0) sel g124<1>F g2<8,8,1>F (abs)g3<8,8,1>F
+ *
+ * The abs is ensures that the result is 0UD when g3 is -0.0F.
+ * By normal cmp-sel merging, this is also equivalent:
+ *
+ * csel.nz g124<1>F g2<4,4,1>F (abs)g3<4,4,1>F g3<4,4,1>F
+ */
+ csel_inst = ibld.CSEL(inst->dst,
+ inst->src[0],
+ scan_inst->src[0],
+ scan_inst->src[0],
+ cond);
+
+ csel_inst->src[1].abs = true;
+ }
+
+ if (csel_inst != NULL) {
+ progress = true;
+ inst->remove(block);
+ }
+
+ break;
+ }
+ }
+ }
+
+ return progress;
+}
+
+bool
fs_visitor::compute_to_mrf()
{
bool progress = false;
@@ -6078,6 +6178,12 @@ fs_visitor::optimize()
OPT(compact_virtual_grfs);
} while (progress);
+ /* Do this after cmod propagation has had every possible opportunity to
+ * propagate results into SEL instructions.
+ */
+ if (OPT(opt_peephole_csel))
+ OPT(dead_code_eliminate);
+
progress = false;
pass_num = 0;
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index 1b7df844696..e384db809dc 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -191,6 +191,7 @@ public:
fs_reg resolve_source_modifiers(const fs_reg &src);
void emit_discard_jump();
bool opt_peephole_sel();
+ bool opt_peephole_csel();
bool opt_peephole_predicated_break();
bool opt_saturate_propagation();
bool opt_cmod_propagation();