summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965
diff options
context:
space:
mode:
authorFrancisco Jerez <[email protected]>2016-02-26 17:42:27 -0800
committerFrancisco Jerez <[email protected]>2016-03-06 12:22:40 -0800
commit3dd0441f6cbcceb42effea2219d835082c2e2725 (patch)
treef1cfe0ec3e0b0b1d5650db1fed7eb2878b475ee7 /src/mesa/drivers/dri/i965
parentc70b7c80e3e1dea150ece96e02ef3d364284812d (diff)
i965/vec4: Propagate swizzles correctly during copy propagation.
This simplifies the code that iterates over the per-component values found in the matching copy_entry struct and checks whether the register regions that were copied to each component are similar enough to be treated as a single (reswizzled) value which can be propagated into the current instruction. Aside from being scattered between opt_copy_propagation(), try_copy_propagate(), and try_constant_propagate(), what I found terribly confusing about the preexisting logic was that opt_copy_propagation() tried to reorder the array of values according to the swizzle of the instruction source, which meant one would have had to invert the reordering applied at the top level in order to find out which component to take from each value (we were just taking the i-th component from the i-th value, which is not correct in general). The saturate mask was also being swizzled incorrectly. This consolidates the logic for matching multiple components of a copy_entry into a single function which returns the result as a regular src_reg on success, as if the copy had been performed with a single MOV instruction copying all components of the src_reg into the destination. Fixes several ARB_vertex_program MOV test-cases from: https://cgit.freedesktop.org/~kwg/piglit/log/?h=arb_program Acked-by: Matt Turner <[email protected]>
Diffstat (limited to 'src/mesa/drivers/dri/i965')
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp125
1 files changed, 59 insertions, 66 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
index 1da49fbc82a..92423e1f942 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
@@ -85,21 +85,66 @@ is_logic_op(enum opcode opcode)
opcode == BRW_OPCODE_NOT);
}
+/**
+ * Get the origin of a copy as a single register if all components present in
+ * the given readmask originate from the same register and have compatible
+ * regions, otherwise return a BAD_FILE register.
+ */
+static src_reg
+get_copy_value(const copy_entry &entry, unsigned readmask)
+{
+ unsigned swz[4] = {};
+ src_reg value;
+
+ for (unsigned i = 0; i < 4; i++) {
+ if (readmask & (1 << i)) {
+ if (entry.value[i]) {
+ src_reg src = *entry.value[i];
+
+ if (src.file == IMM) {
+ swz[i] = i;
+ } else {
+ swz[i] = BRW_GET_SWZ(src.swizzle, i);
+ /* Overwrite the original swizzle so the src_reg::equals call
+ * below doesn't care about it, the correct swizzle will be
+ * calculated once the swizzles of all components are known.
+ */
+ src.swizzle = BRW_SWIZZLE_XYZW;
+ }
+
+ if (value.file == BAD_FILE) {
+ value = src;
+ } else if (!value.equals(src)) {
+ return src_reg();
+ }
+ } else {
+ return src_reg();
+ }
+ }
+ }
+
+ return swizzle(value,
+ brw_compose_swizzle(brw_swizzle_for_mask(readmask),
+ BRW_SWIZZLE4(swz[0], swz[1],
+ swz[2], swz[3])));
+}
+
static bool
try_constant_propagate(const struct brw_device_info *devinfo,
vec4_instruction *inst,
- int arg, struct copy_entry *entry)
+ int arg, const copy_entry *entry)
{
/* For constant propagation, we only handle the same constant
* across all 4 channels. Some day, we should handle the 8-bit
* float vector format, which would let us constant propagate
* vectors better.
+ * We could be more aggressive here -- some channels might not get used
+ * based on the destination writemask.
*/
- src_reg value = *entry->value[0];
- for (int i = 1; i < 4; i++) {
- if (!value.equals(*entry->value[i]))
- return false;
- }
+ src_reg value =
+ get_copy_value(*entry,
+ brw_apply_inv_swizzle_to_mask(inst->src[arg].swizzle,
+ WRITEMASK_XYZW));
if (value.file != IMM)
return false;
@@ -238,38 +283,15 @@ try_constant_propagate(const struct brw_device_info *devinfo,
static bool
try_copy_propagate(const struct brw_device_info *devinfo,
vec4_instruction *inst, int arg,
- struct copy_entry *entry, int attributes_per_reg)
+ const copy_entry *entry, int attributes_per_reg)
{
/* Build up the value we are propagating as if it were the source of a
* single MOV
*/
- /* For constant propagation, we only handle the same constant
- * across all 4 channels. Some day, we should handle the 8-bit
- * float vector format, which would let us constant propagate
- * vectors better.
- */
- src_reg value = *entry->value[0];
- for (int i = 1; i < 4; i++) {
- /* This is equals() except we don't care about the swizzle. */
- if (value.file != entry->value[i]->file ||
- value.nr != entry->value[i]->nr ||
- value.reg_offset != entry->value[i]->reg_offset ||
- value.type != entry->value[i]->type ||
- value.negate != entry->value[i]->negate ||
- value.abs != entry->value[i]->abs) {
- return false;
- }
- }
-
- /* Compute the swizzle of the original register by swizzling the
- * component loaded from each value according to the swizzle of
- * operand we're going to change.
- */
- int s[4];
- for (int i = 0; i < 4; i++) {
- s[i] = BRW_GET_SWZ(entry->value[i]->swizzle, i);
- }
- value.swizzle = BRW_SWIZZLE4(s[0], s[1], s[2], s[3]);
+ src_reg value =
+ get_copy_value(*entry,
+ brw_apply_inv_swizzle_to_mask(inst->src[arg].swizzle,
+ WRITEMASK_XYZW));
/* Check that we can propagate that value */
if (value.file != UNIFORM &&
@@ -418,38 +440,9 @@ vec4_visitor::opt_copy_propagation(bool do_constant_prop)
if (inst->regs_read(i) != 1)
continue;
- int reg = (alloc.offsets[inst->src[i].nr] +
- inst->src[i].reg_offset);
-
- /* Find the regs that each swizzle component came from.
- */
- struct copy_entry entry;
- memset(&entry, 0, sizeof(copy_entry));
- int c;
- for (c = 0; c < 4; c++) {
- int channel = BRW_GET_SWZ(inst->src[i].swizzle, c);
- entry.value[c] = entries[reg].value[channel];
-
- /* If there's no available copy for this channel, bail.
- * We could be more aggressive here -- some channels might
- * not get used based on the destination writemask.
- */
- if (!entry.value[c])
- break;
-
- entry.saturatemask |=
- (entries[reg].saturatemask & (1 << channel) ? 1 : 0) << c;
-
- /* We'll only be able to copy propagate if the sources are
- * all from the same file -- there's no ability to swizzle
- * 0 or 1 constants in with source registers like in i915.
- */
- if (c > 0 && entry.value[c - 1]->file != entry.value[c]->file)
- break;
- }
-
- if (c != 4)
- continue;
+ const unsigned reg = (alloc.offsets[inst->src[i].nr] +
+ inst->src[i].reg_offset);
+ const copy_entry &entry = entries[reg];
if (do_constant_prop && try_constant_propagate(devinfo, inst, i, &entry))
progress = true;