summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/mesa/drivers/dri/i965/Makefile.sources1
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.cpp16
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.h3
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp260
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_emit.cpp1
5 files changed, 281 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index 6917286ff2f..da7a952e61f 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -119,6 +119,7 @@ i965_CXX_SOURCES := \
brw_shader.cpp \
brw_vec4.cpp \
brw_vec4_emit.cpp \
+ brw_vec4_copy_propagation.cpp \
brw_vec4_reg_allocate.cpp \
brw_vec4_visitor.cpp
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index e3562d29238..656589dee3a 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -31,6 +31,22 @@ extern "C" {
namespace brw {
+bool
+src_reg::equals(src_reg *r)
+{
+ return (file == r->file &&
+ reg == r->reg &&
+ reg_offset == r->reg_offset &&
+ type == r->type &&
+ negate == r->negate &&
+ abs == r->abs &&
+ swizzle == r->swizzle &&
+ !reladdr && !r->reladdr &&
+ memcmp(&fixed_hw_reg, &r->fixed_hw_reg,
+ sizeof(fixed_hw_reg)) == 0 &&
+ imm.u == r->imm.u);
+}
+
void
vec4_visitor::calculate_live_intervals()
{
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index f148ca62cd5..545e8f15a8d 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -162,6 +162,8 @@ public:
this->imm.i = i;
}
+ bool equals(src_reg *r);
+
src_reg(class vec4_visitor *v, const struct glsl_type *type);
explicit src_reg(dst_reg reg);
@@ -396,6 +398,7 @@ public:
void calculate_live_intervals();
bool dead_code_eliminate();
bool virtual_grf_interferes(int a, int b);
+ bool opt_copy_propagation();
vec4_instruction *emit(vec4_instruction *inst);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
new file mode 100644
index 00000000000..1e24e2e1ccc
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
@@ -0,0 +1,260 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * @file brw_vec4_copy_propagation.cpp
+ *
+ * Implements tracking of values copied between registers, and
+ * optimizations based on that: copy propagation and constant
+ * propagation.
+ */
+
+#include "brw_vec4.h"
+extern "C" {
+#include "main/macros.h"
+}
+
+namespace brw {
+
+static bool
+is_direct_copy(vec4_instruction *inst)
+{
+ return (inst->opcode == BRW_OPCODE_MOV &&
+ !inst->predicate &&
+ inst->dst.file == GRF &&
+ !inst->saturate &&
+ !inst->dst.reladdr &&
+ !inst->src[0].reladdr &&
+ inst->dst.type == inst->src[0].type);
+}
+
+static bool
+is_dominated_by_previous_instruction(vec4_instruction *inst)
+{
+ return (inst->opcode != BRW_OPCODE_DO &&
+ inst->opcode != BRW_OPCODE_WHILE &&
+ inst->opcode != BRW_OPCODE_ELSE &&
+ inst->opcode != BRW_OPCODE_ENDIF);
+}
+
+static bool
+try_constant_propagation(vec4_instruction *inst, int arg, src_reg *values[4])
+{
+ /* For constant propagation, we only handle the same constant
+ * across all 4 channels. Some day, we should handle the 8-bit
+ * float vector format, which would let us constant propagate
+ * vectors better.
+ */
+ src_reg value = *values[0];
+ for (int i = 1; i < 4; i++) {
+ if (!value.equals(values[i]))
+ return false;
+ }
+
+ if (value.file != IMM)
+ return false;
+
+ if (inst->src[arg].abs) {
+ if (value.type == BRW_REGISTER_TYPE_F) {
+ value.imm.f = fabs(value.imm.f);
+ } else if (value.type == BRW_REGISTER_TYPE_D) {
+ if (value.imm.i < 0)
+ value.imm.i = -value.imm.i;
+ }
+ }
+
+ if (inst->src[arg].negate) {
+ if (value.type == BRW_REGISTER_TYPE_F)
+ value.imm.f = -value.imm.f;
+ else
+ value.imm.u = -value.imm.u;
+ }
+
+ switch (inst->opcode) {
+ case BRW_OPCODE_MOV:
+ inst->src[arg] = value;
+ return true;
+
+ case BRW_OPCODE_MUL:
+ case BRW_OPCODE_ADD:
+ if (arg == 1) {
+ inst->src[arg] = value;
+ return true;
+ } else if (arg == 0 && inst->src[1].file != IMM) {
+ /* Fit this constant in by commuting the operands */
+ inst->src[0] = inst->src[1];
+ inst->src[1] = value;
+ return true;
+ }
+ break;
+
+ case BRW_OPCODE_CMP:
+ if (arg == 1) {
+ inst->src[arg] = value;
+ return true;
+ } else if (arg == 0 && inst->src[1].file != IMM) {
+ uint32_t new_cmod;
+
+ new_cmod = brw_swap_cmod(inst->conditional_mod);
+ if (new_cmod != ~0u) {
+ /* Fit this constant in by swapping the operands and
+ * flipping the test.
+ */
+ inst->src[0] = inst->src[1];
+ inst->src[1] = value;
+ inst->conditional_mod = new_cmod;
+ return true;
+ }
+ }
+ break;
+
+ case BRW_OPCODE_SEL:
+ if (arg == 1) {
+ inst->src[arg] = value;
+ return true;
+ } else if (arg == 0 && inst->src[1].file != IMM) {
+ inst->src[0] = inst->src[1];
+ inst->src[1] = value;
+
+ /* If this was predicated, flipping operands means
+ * we also need to flip the predicate.
+ */
+ if (inst->conditional_mod == BRW_CONDITIONAL_NONE) {
+ inst->predicate_inverse = !inst->predicate_inverse;
+ }
+ return true;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return false;
+}
+
+bool
+vec4_visitor::opt_copy_propagation()
+{
+ bool progress = false;
+ src_reg *cur_value[virtual_grf_reg_count][4];
+
+ memset(&cur_value, 0, sizeof(cur_value));
+
+ foreach_list(node, &this->instructions) {
+ vec4_instruction *inst = (vec4_instruction *)node;
+
+ /* This pass only works on basic blocks. If there's flow
+ * control, throw out all our information and start from
+ * scratch.
+ *
+ * This should really be fixed by using a structure like in
+ * src/glsl/opt_copy_propagation.cpp to track available copies.
+ */
+ if (!is_dominated_by_previous_instruction(inst)) {
+ memset(cur_value, 0, sizeof(cur_value));
+ continue;
+ }
+
+ /* For each source arg, see if each component comes from a copy
+ * from the same type file (IMM, GRF, UNIFORM), and try
+ * optimizing out access to the copy result
+ */
+ for (int i = 2; i >= 0; i--) {
+ int reg = (virtual_grf_reg_map[inst->src[i].reg] +
+ inst->src[i].reg_offset);
+
+ /* Copied values end up in GRFs, and we don't track reladdr
+ * accesses.
+ */
+ if (inst->src[i].file != GRF ||
+ inst->src[i].reladdr)
+ continue;
+
+ /* Find the regs that each swizzle component came from.
+ */
+ src_reg *values[4];
+ int c;
+ for (c = 0; c < 4; c++) {
+ values[c] = cur_value[reg][BRW_GET_SWZ(inst->src[i].swizzle, c)];
+
+ /* If there's no available copy for this channel, bail.
+ * We could be more aggressive here -- some channels might
+ * not get used based on the destination writemask.
+ */
+ if (!values[c])
+ break;
+
+ /* We'll only be able to copy propagate if the sources are
+ * all from the same file -- there's no ability to swizzle
+ * 0 or 1 constants in with source registers like in i915.
+ */
+ if (c > 0 && values[c - 1]->file != values[c]->file)
+ break;
+ }
+
+ if (c != 4)
+ continue;
+
+ if (try_constant_propagation(inst, i, values))
+ progress = true;
+ }
+
+ /* Track available source registers. */
+ if (is_direct_copy(inst)) {
+ int reg = virtual_grf_reg_map[inst->dst.reg] + inst->dst.reg_offset;
+ for (int i = 0; i < 4; i++) {
+ if (inst->dst.writemask & (1 << i)) {
+ cur_value[reg][i] = &inst->src[0];
+ }
+ }
+ continue;
+ }
+
+ /* For any updated channels, clear tracking of them as a source
+ * or destination.
+ *
+ * FINISHME: Sources aren't handled, which will need to be done
+ * for copy propagation.
+ */
+ if (inst->dst.file == GRF) {
+ if (inst->dst.reladdr)
+ memset(cur_value, 0, sizeof(cur_value));
+ else {
+ int reg = virtual_grf_reg_map[inst->dst.reg] + inst->dst.reg_offset;
+
+ for (int i = 0; i < 4; i++) {
+ if (inst->dst.writemask & (1 << i))
+ cur_value[reg][i] = NULL;
+ }
+ }
+ }
+ }
+
+ if (progress)
+ live_intervals_valid = false;
+
+ return progress;
+}
+
+} /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index 78ffbd7b6ab..c40c41f7a0c 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -614,6 +614,7 @@ vec4_visitor::run()
do {
progress = false;
progress = dead_code_eliminate() || progress;
+ progress = opt_copy_propagation() || progress;
} while (progress);