diff options
author | Francisco Jerez <[email protected]> | 2016-02-26 17:42:27 -0800 |
---|---|---|
committer | Francisco Jerez <[email protected]> | 2016-03-06 12:22:40 -0800 |
commit | 3dd0441f6cbcceb42effea2219d835082c2e2725 (patch) | |
tree | f1cfe0ec3e0b0b1d5650db1fed7eb2878b475ee7 | |
parent | c70b7c80e3e1dea150ece96e02ef3d364284812d (diff) |
i965/vec4: Propagate swizzles correctly during copy propagation.
This simplifies the code that iterates over the per-component values
found in the matching copy_entry struct and checks whether the
register regions that were copied to each component are similar enough
to be treated as a single (reswizzled) value which can be propagated
into the current instruction.
Aside from being scattered between opt_copy_propagation(),
try_copy_propagate(), and try_constant_propagate(), what I found
terribly confusing about the preexisting logic was that
opt_copy_propagation() tried to reorder the array of values according
to the swizzle of the instruction source, which meant one would have
had to invert the reordering applied at the top level in order to find
out which component to take from each value (we were just taking the
i-th component from the i-th value, which is not correct in general).
The saturate mask was also being swizzled incorrectly.
This consolidates the logic for matching multiple components of a
copy_entry into a single function which returns the result as a
regular src_reg on success, as if the copy had been performed with a
single MOV instruction copying all components of the src_reg into the
destination.
Fixes several ARB_vertex_program MOV test-cases from:
https://cgit.freedesktop.org/~kwg/piglit/log/?h=arb_program
Acked-by: Matt Turner <[email protected]>
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp | 125 |
1 files changed, 59 insertions, 66 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp index 1da49fbc82a..92423e1f942 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp @@ -85,21 +85,66 @@ is_logic_op(enum opcode opcode) opcode == BRW_OPCODE_NOT); } +/** + * Get the origin of a copy as a single register if all components present in + * the given readmask originate from the same register and have compatible + * regions, otherwise return a BAD_FILE register. + */ +static src_reg +get_copy_value(const copy_entry &entry, unsigned readmask) +{ + unsigned swz[4] = {}; + src_reg value; + + for (unsigned i = 0; i < 4; i++) { + if (readmask & (1 << i)) { + if (entry.value[i]) { + src_reg src = *entry.value[i]; + + if (src.file == IMM) { + swz[i] = i; + } else { + swz[i] = BRW_GET_SWZ(src.swizzle, i); + /* Overwrite the original swizzle so the src_reg::equals call + * below doesn't care about it, the correct swizzle will be + * calculated once the swizzles of all components are known. + */ + src.swizzle = BRW_SWIZZLE_XYZW; + } + + if (value.file == BAD_FILE) { + value = src; + } else if (!value.equals(src)) { + return src_reg(); + } + } else { + return src_reg(); + } + } + } + + return swizzle(value, + brw_compose_swizzle(brw_swizzle_for_mask(readmask), + BRW_SWIZZLE4(swz[0], swz[1], + swz[2], swz[3]))); +} + static bool try_constant_propagate(const struct brw_device_info *devinfo, vec4_instruction *inst, - int arg, struct copy_entry *entry) + int arg, const copy_entry *entry) { /* For constant propagation, we only handle the same constant * across all 4 channels. Some day, we should handle the 8-bit * float vector format, which would let us constant propagate * vectors better. + * We could be more aggressive here -- some channels might not get used + * based on the destination writemask. */ - src_reg value = *entry->value[0]; - for (int i = 1; i < 4; i++) { - if (!value.equals(*entry->value[i])) - return false; - } + src_reg value = + get_copy_value(*entry, + brw_apply_inv_swizzle_to_mask(inst->src[arg].swizzle, + WRITEMASK_XYZW)); if (value.file != IMM) return false; @@ -238,38 +283,15 @@ try_constant_propagate(const struct brw_device_info *devinfo, static bool try_copy_propagate(const struct brw_device_info *devinfo, vec4_instruction *inst, int arg, - struct copy_entry *entry, int attributes_per_reg) + const copy_entry *entry, int attributes_per_reg) { /* Build up the value we are propagating as if it were the source of a * single MOV */ - /* For constant propagation, we only handle the same constant - * across all 4 channels. Some day, we should handle the 8-bit - * float vector format, which would let us constant propagate - * vectors better. - */ - src_reg value = *entry->value[0]; - for (int i = 1; i < 4; i++) { - /* This is equals() except we don't care about the swizzle. */ - if (value.file != entry->value[i]->file || - value.nr != entry->value[i]->nr || - value.reg_offset != entry->value[i]->reg_offset || - value.type != entry->value[i]->type || - value.negate != entry->value[i]->negate || - value.abs != entry->value[i]->abs) { - return false; - } - } - - /* Compute the swizzle of the original register by swizzling the - * component loaded from each value according to the swizzle of - * operand we're going to change. - */ - int s[4]; - for (int i = 0; i < 4; i++) { - s[i] = BRW_GET_SWZ(entry->value[i]->swizzle, i); - } - value.swizzle = BRW_SWIZZLE4(s[0], s[1], s[2], s[3]); + src_reg value = + get_copy_value(*entry, + brw_apply_inv_swizzle_to_mask(inst->src[arg].swizzle, + WRITEMASK_XYZW)); /* Check that we can propagate that value */ if (value.file != UNIFORM && @@ -418,38 +440,9 @@ vec4_visitor::opt_copy_propagation(bool do_constant_prop) if (inst->regs_read(i) != 1) continue; - int reg = (alloc.offsets[inst->src[i].nr] + - inst->src[i].reg_offset); - - /* Find the regs that each swizzle component came from. - */ - struct copy_entry entry; - memset(&entry, 0, sizeof(copy_entry)); - int c; - for (c = 0; c < 4; c++) { - int channel = BRW_GET_SWZ(inst->src[i].swizzle, c); - entry.value[c] = entries[reg].value[channel]; - - /* If there's no available copy for this channel, bail. - * We could be more aggressive here -- some channels might - * not get used based on the destination writemask. - */ - if (!entry.value[c]) - break; - - entry.saturatemask |= - (entries[reg].saturatemask & (1 << channel) ? 1 : 0) << c; - - /* We'll only be able to copy propagate if the sources are - * all from the same file -- there's no ability to swizzle - * 0 or 1 constants in with source registers like in i915. - */ - if (c > 0 && entry.value[c - 1]->file != entry.value[c]->file) - break; - } - - if (c != 4) - continue; + const unsigned reg = (alloc.offsets[inst->src[i].nr] + + inst->src[i].reg_offset); + const copy_entry &entry = entries[reg]; if (do_constant_prop && try_constant_propagate(devinfo, inst, i, &entry)) progress = true; |