summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp65
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp36
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp13
-rw-r--r--src/mesa/drivers/dri/i965/brw_ir_fs.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_ir_vec4.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.cpp29
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp13
7 files changed, 123 insertions, 35 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 7904f4d2862..d2881b2d7a2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -288,6 +288,71 @@ fs_inst::is_send_from_grf() const
}
}
+/**
+ * Returns true if this instruction's sources and destinations cannot
+ * safely be the same register.
+ *
+ * In most cases, a register can be written over safely by the same
+ * instruction that is its last use. For a single instruction, the
+ * sources are dereferenced before writing of the destination starts
+ * (naturally).
+ *
+ * However, there are a few cases where this can be problematic:
+ *
+ * - Virtual opcodes that translate to multiple instructions in the
+ * code generator: if src == dst and one instruction writes the
+ * destination before a later instruction reads the source, then
+ * src will have been clobbered.
+ *
+ * - SIMD16 compressed instructions with certain regioning (see below).
+ *
+ * The register allocator uses this information to set up conflicts between
+ * GRF sources and the destination.
+ */
+bool
+fs_inst::has_source_and_destination_hazard() const
+{
+ switch (opcode) {
+ case FS_OPCODE_PACK_HALF_2x16_SPLIT:
+ /* Multiple partial writes to the destination */
+ return true;
+ default:
+ /* The SIMD16 compressed instruction
+ *
+ * add(16) g4<1>F g4<8,8,1>F g6<8,8,1>F
+ *
+ * is actually decoded in hardware as:
+ *
+ * add(8) g4<1>F g4<8,8,1>F g6<8,8,1>F
+ * add(8) g5<1>F g5<8,8,1>F g7<8,8,1>F
+ *
+ * Which is safe. However, if we have uniform accesses
+ * happening, we get into trouble:
+ *
+ * add(8) g4<1>F g4<0,1,0>F g6<8,8,1>F
+ * add(8) g5<1>F g4<0,1,0>F g7<8,8,1>F
+ *
+ * Now our destination for the first instruction overwrote the
+ * second instruction's src0, and we get garbage for those 8
+ * pixels. There's a similar issue for the pre-gen6
+ * pixel_x/pixel_y, which are registers of 16-bit values and thus
+ * would get stomped by the first decode as well.
+ */
+ if (exec_size == 16) {
+ for (int i = 0; i < sources; i++) {
+ if (src[i].file == VGRF && (src[i].stride == 0 ||
+ src[i].type == BRW_REGISTER_TYPE_UW ||
+ src[i].type == BRW_REGISTER_TYPE_W ||
+ src[i].type == BRW_REGISTER_TYPE_UB ||
+ src[i].type == BRW_REGISTER_TYPE_B)) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+}
+
bool
fs_inst::is_copy_payload(const brw::simple_allocator &grf_alloc) const
{
diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
index 80fb8c28f81..66b70a9144b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
@@ -59,42 +59,8 @@ fs_live_variables::setup_one_read(struct block_data *bd, fs_inst *inst,
int var = var_from_reg(reg);
assert(var < num_vars);
- /* In most cases, a register can be written over safely by the
- * same instruction that is its last use. For a single
- * instruction, the sources are dereferenced before writing of the
- * destination starts (naturally). This gets more complicated for
- * simd16, because the instruction:
- *
- * add(16) g4<1>F g4<8,8,1>F g6<8,8,1>F
- *
- * is actually decoded in hardware as:
- *
- * add(8) g4<1>F g4<8,8,1>F g6<8,8,1>F
- * add(8) g5<1>F g5<8,8,1>F g7<8,8,1>F
- *
- * Which is safe. However, if we have uniform accesses
- * happening, we get into trouble:
- *
- * add(8) g4<1>F g4<0,1,0>F g6<8,8,1>F
- * add(8) g5<1>F g4<0,1,0>F g7<8,8,1>F
- *
- * Now our destination for the first instruction overwrote the
- * second instruction's src0, and we get garbage for those 8
- * pixels. There's a similar issue for the pre-gen6
- * pixel_x/pixel_y, which are registers of 16-bit values and thus
- * would get stomped by the first decode as well.
- */
- int end_ip = ip;
- if (inst->exec_size == 16 && (reg.stride == 0 ||
- reg.type == BRW_REGISTER_TYPE_UW ||
- reg.type == BRW_REGISTER_TYPE_W ||
- reg.type == BRW_REGISTER_TYPE_UB ||
- reg.type == BRW_REGISTER_TYPE_B)) {
- end_ip++;
- }
-
start[var] = MIN2(start[var], ip);
- end[var] = MAX2(end[var], end_ip);
+ end[var] = MAX2(end[var], ip);
/* The use[] bitset marks when the block makes use of a variable (VGRF
* channel) without having completely defined that variable within the
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 40129fd695e..2347cd5d33f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -597,6 +597,19 @@ fs_visitor::assign_regs(bool allow_spilling)
}
}
+ /* Certain instructions can't safely use the same register for their
+ * sources and destination. Add interference.
+ */
+ foreach_block_and_inst(block, fs_inst, inst, cfg) {
+ if (inst->dst.file == VGRF && inst->has_source_and_destination_hazard()) {
+ for (unsigned i = 0; i < 3; i++) {
+ if (inst->src[i].file == VGRF) {
+ ra_add_node_interference(g, inst->dst.nr, inst->src[i].nr);
+ }
+ }
+ }
+ }
+
setup_payload_interference(g, payload_node_count, first_payload_node);
if (devinfo->gen >= 7) {
int first_used_mrf = BRW_MAX_MRF(devinfo->gen);
diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h
index 84ee5292908..c3eec2efb42 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h
@@ -205,6 +205,7 @@ public:
bool can_do_source_mods(const struct brw_device_info *devinfo);
bool can_change_types() const;
bool has_side_effects() const;
+ bool has_source_and_destination_hazard() const;
bool reads_flag() const;
bool writes_flag() const;
diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
index 861d7b83e10..660becaafa7 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
@@ -169,6 +169,7 @@ public:
void reswizzle(int dst_writemask, int swizzle);
bool can_do_source_mods(const struct brw_device_info *devinfo);
bool can_change_types() const;
+ bool has_source_and_destination_hazard() const;
bool reads_flag()
{
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 9a79d67e12f..a697bdf84a0 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -161,6 +161,35 @@ vec4_instruction::is_send_from_grf()
}
}
+/**
+ * Returns true if this instruction's sources and destinations cannot
+ * safely be the same register.
+ *
+ * In most cases, a register can be written over safely by the same
+ * instruction that is its last use. For a single instruction, the
+ * sources are dereferenced before writing of the destination starts
+ * (naturally).
+ *
+ * However, there are a few cases where this can be problematic:
+ *
+ * - Virtual opcodes that translate to multiple instructions in the
+ * code generator: if src == dst and one instruction writes the
+ * destination before a later instruction reads the source, then
+ * src will have been clobbered.
+ *
+ * The register allocator uses this information to set up conflicts between
+ * GRF sources and the destination.
+ */
+bool
+vec4_instruction::has_source_and_destination_hazard() const
+{
+ switch (opcode) {
+ /* Most opcodes in the vec4 world use MRFs. */
+ default:
+ return false;
+ }
+}
+
unsigned
vec4_instruction::regs_read(unsigned arg) const
{
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
index 01c9c96276e..afc326612a2 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
@@ -221,6 +221,19 @@ vec4_visitor::reg_allocate()
}
}
+ /* Certain instructions can't safely use the same register for their
+ * sources and destination. Add interference.
+ */
+ foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
+ if (inst->dst.file == VGRF && inst->has_source_and_destination_hazard()) {
+ for (unsigned i = 0; i < 3; i++) {
+ if (inst->src[i].file == VGRF) {
+ ra_add_node_interference(g, inst->dst.nr, inst->src[i].nr);
+ }
+ }
+ }
+ }
+
setup_payload_interference(g, first_payload_node, node_count);
if (!ra_allocate(g)) {