summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorFrancisco Jerez <[email protected]>2019-12-31 00:10:28 -0800
committerFrancisco Jerez <[email protected]>2020-01-17 13:21:19 -0800
commitab0d1b3b3d17bab2444674aa06f0a8458f6f0821 (patch)
tree59c578be46105bae6489bb38e5e6d5b33a7fbf1a /src
parent1873202f44b7e4f7c8d1fee32b7faaa3bdd1d6a3 (diff)
intel/fs: Rework fs_inst::is_copy_payload() into multiple classification helpers.
This reworks the current fs_inst::is_copy_payload() method into a number of classification helpers with well-defined semantics. This will be useful later on in order to optimize LOAD_PAYLOAD instructions more aggressively in cases where we can determine it's safe to do so. The closest equivalent of the present fs_inst::is_copy_payload() method is the is_coalescing_payload() helper introduced here. No functional nor shader-db changes. Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/intel/compiler/brw_fs.cpp28
-rw-r--r--src/intel/compiler/brw_fs_cse.cpp2
-rw-r--r--src/intel/compiler/brw_fs_register_coalesce.cpp2
-rw-r--r--src/intel/compiler/brw_ir_fs.h100
4 files changed, 101 insertions, 31 deletions
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index ef2cd177d66..c801216fc19 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -428,34 +428,6 @@ fs_inst::has_source_and_destination_hazard() const
}
bool
-fs_inst::is_copy_payload(const brw::simple_allocator &grf_alloc) const
-{
- if (this->opcode != SHADER_OPCODE_LOAD_PAYLOAD)
- return false;
-
- fs_reg reg = this->src[0];
- if (reg.file != VGRF || reg.offset != 0 || reg.stride != 1)
- return false;
-
- if (grf_alloc.sizes[reg.nr] * REG_SIZE != this->size_written)
- return false;
-
- for (int i = 0; i < this->sources; i++) {
- reg.type = this->src[i].type;
- if (!this->src[i].equals(reg))
- return false;
-
- if (i < this->header_size) {
- reg.offset += REG_SIZE;
- } else {
- reg = horiz_offset(reg, this->exec_size);
- }
- }
-
- return true;
-}
-
-bool
fs_inst::can_do_source_mods(const struct gen_device_info *devinfo) const
{
if (devinfo->gen == 6 && is_math())
diff --git a/src/intel/compiler/brw_fs_cse.cpp b/src/intel/compiler/brw_fs_cse.cpp
index f348f915e78..b7c32f3907f 100644
--- a/src/intel/compiler/brw_fs_cse.cpp
+++ b/src/intel/compiler/brw_fs_cse.cpp
@@ -105,7 +105,7 @@ is_expression(const fs_visitor *v, const fs_inst *const inst)
case SHADER_OPCODE_COS:
return inst->mlen < 2;
case SHADER_OPCODE_LOAD_PAYLOAD:
- return !inst->is_copy_payload(v->alloc);
+ return !is_coalescing_payload(v->alloc, inst);
default:
return inst->is_send_from_grf() && !inst->has_side_effects() &&
!inst->is_volatile();
diff --git a/src/intel/compiler/brw_fs_register_coalesce.cpp b/src/intel/compiler/brw_fs_register_coalesce.cpp
index 4fe6773da54..8127b29369c 100644
--- a/src/intel/compiler/brw_fs_register_coalesce.cpp
+++ b/src/intel/compiler/brw_fs_register_coalesce.cpp
@@ -86,7 +86,7 @@ is_coalesce_candidate(const fs_visitor *v, const fs_inst *inst)
return false;
if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) {
- if (!inst->is_copy_payload(v->alloc)) {
+ if (!is_coalescing_payload(v->alloc, inst)) {
return false;
}
}
diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h
index 24e523c5270..973c9fb168d 100644
--- a/src/intel/compiler/brw_ir_fs.h
+++ b/src/intel/compiler/brw_ir_fs.h
@@ -350,7 +350,6 @@ public:
bool is_send_from_grf() const;
bool is_payload(unsigned arg) const;
bool is_partial_write() const;
- bool is_copy_payload(const brw::simple_allocator &grf_alloc) const;
unsigned components_read(unsigned i) const;
unsigned size_read(int arg) const;
bool can_do_source_mods(const struct gen_device_info *devinfo) const;
@@ -570,4 +569,103 @@ has_dst_aligned_region_restriction(const gen_device_info *devinfo,
return false;
}
+/**
+ * Return whether the LOAD_PAYLOAD instruction is a plain copy of bits from
+ * the specified register file into a VGRF.
+ *
+ * This implies identity register regions without any source-destination
+ * overlap, but otherwise has no implications on the location of sources and
+ * destination in the register file: Gathering any number of portions from
+ * multiple virtual registers in any order is allowed.
+ */
+inline bool
+is_copy_payload(brw_reg_file file, const fs_inst *inst)
+{
+ if (inst->opcode != SHADER_OPCODE_LOAD_PAYLOAD ||
+ inst->is_partial_write() || inst->saturate ||
+ inst->dst.file != VGRF)
+ return false;
+
+ for (unsigned i = 0; i < inst->sources; i++) {
+ if (inst->src[i].file != file ||
+ inst->src[i].abs || inst->src[i].negate)
+ return false;
+
+ if (!inst->src[i].is_contiguous())
+ return false;
+
+ if (regions_overlap(inst->dst, inst->size_written,
+ inst->src[i], inst->size_read(i)))
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * Like is_copy_payload(), but the instruction is required to copy a single
+ * contiguous block of registers from the given register file into the
+ * destination without any reordering.
+ */
+inline bool
+is_identity_payload(brw_reg_file file, const fs_inst *inst) {
+ if (is_copy_payload(file, inst)) {
+ fs_reg reg = inst->src[0];
+
+ for (unsigned i = 0; i < inst->sources; i++) {
+ reg.type = inst->src[i].type;
+ if (!inst->src[i].equals(reg))
+ return false;
+
+ reg = byte_offset(reg, inst->size_read(i));
+ }
+
+ return true;
+ } else {
+ return false;
+ }
+}
+
+/**
+ * Like is_copy_payload(), but the instruction is required to source data from
+ * at least two disjoint VGRFs.
+ *
+ * This doesn't necessarily rule out the elimination of this instruction
+ * through register coalescing, but due to limitations of the register
+ * coalesce pass it might be impossible to do so directly until a later stage,
+ * when the LOAD_PAYLOAD instruction is unrolled into a sequence of MOV
+ * instructions.
+ */
+inline bool
+is_multi_copy_payload(const fs_inst *inst) {
+ if (is_copy_payload(VGRF, inst)) {
+ for (unsigned i = 0; i < inst->sources; i++) {
+ if (inst->src[i].nr != inst->src[0].nr)
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/**
+ * Like is_identity_payload(), but the instruction is required to copy the
+ * whole contents of a single VGRF into the destination.
+ *
+ * This means that there is a good chance that the instruction will be
+ * eliminated through register coalescing, but it's neither a necessary nor a
+ * sufficient condition for that to happen -- E.g. consider the case where
+ * source and destination registers diverge due to other instructions in the
+ * program overwriting part of their contents, which isn't something we can
+ * predict up front based on a cheap strictly local test of the copy
+ * instruction.
+ */
+inline bool
+is_coalescing_payload(const brw::simple_allocator &alloc, const fs_inst *inst)
+{
+ return is_identity_payload(VGRF, inst) &&
+ inst->src[0].offset == 0 &&
+ alloc.sizes[inst->src[0].nr] * REG_SIZE == inst->size_written;
+}
+
#endif