aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFrancisco Jerez <[email protected]>2019-12-30 00:36:48 -0800
committerFrancisco Jerez <[email protected]>2020-01-17 13:21:41 -0800
commit8eb4f2092a2a177eada93a97fbc0d2bdd3eff037 (patch)
tree58be742b9a5b3e96467713726c0be9fcd973397b
parente328fbd9f8c8e5ad2a41e249bf18be5642d46d8d (diff)
intel/fs: Add support for copy-propagating a block of multiple FIXED_GRFs.
In cases where a LOAD_PAYLOAD instruction copies a single block of sequential GRF registers into the destination (see is_identity_payload()), splitting the block copy into a number of ACP entries (one for each LOAD_PAYLOAD source) is undesirable, because that prevents copy propagation into any instructions which read multiple components at once with the same source (the barycentric source of the LINTERP instruction is going to be the overwhelmingly most common example). Technically it would also be possible to do this for VGRF sources, but there is little benefit from that since register coalesce already covers many of those cases -- There is no way for a block of FIXED_GRFs to be coalesced into a VGRF though. This prevents the following shader-db regressions (including SIMD32 programs) in combination with the interpolation rework part of this series. On SKL: total instructions in shared programs: 18595160 -> 18828562 (1.26%) instructions in affected programs: 13374946 -> 13608348 (1.75%) helped: 7 HURT: 108977 total spills in shared programs: 9116 -> 9106 (-0.11%) spills in affected programs: 404 -> 394 (-2.48%) helped: 7 HURT: 9 total fills in shared programs: 8994 -> 9176 (2.02%) fills in affected programs: 898 -> 1080 (20.27%) helped: 7 HURT: 9 LOST: 469 GAINED: 220 On SNB: total instructions in shared programs: 13996898 -> 14096222 (0.71%) instructions in affected programs: 8088546 -> 8187870 (1.23%) helped: 2 HURT: 66520 total spills in shared programs: 2985 -> 2961 (-0.80%) spills in affected programs: 632 -> 608 (-3.80%) helped: 2 HURT: 0 total fills in shared programs: 3144 -> 3128 (-0.51%) fills in affected programs: 1515 -> 1499 (-1.06%) helped: 2 HURT: 0 LOST: 0 GAINED: 4 Reviewed-by: Kenneth Graunke <[email protected]>
-rw-r--r--src/intel/compiler/brw_fs_copy_propagation.cpp12
1 files changed, 7 insertions, 5 deletions
diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp
index adf0f2fefff..4f5493e4f43 100644
--- a/src/intel/compiler/brw_fs_copy_propagation.cpp
+++ b/src/intel/compiler/brw_fs_copy_propagation.cpp
@@ -48,8 +48,8 @@ struct acp_entry : public exec_node {
fs_reg dst;
fs_reg src;
unsigned global_idx;
- uint8_t size_written;
- uint8_t size_read;
+ unsigned size_written;
+ unsigned size_read;
enum opcode opcode;
bool saturate;
};
@@ -895,7 +895,8 @@ can_propagate_from(fs_inst *inst)
(inst->src[0].file == FIXED_GRF &&
inst->src[0].is_contiguous())) &&
inst->src[0].type == inst->dst.type &&
- !inst->is_partial_write());
+ !inst->is_partial_write()) ||
+ is_identity_payload(FIXED_GRF, inst);
}
/* Walks a basic block and does copy propagation on it using the acp
@@ -948,11 +949,12 @@ fs_visitor::opt_copy_propagation_local(void *copy_prop_ctx, bblock_t *block,
* operand of another instruction, add it to the ACP.
*/
if (can_propagate_from(inst)) {
- acp_entry *entry = ralloc(copy_prop_ctx, acp_entry);
+ acp_entry *entry = rzalloc(copy_prop_ctx, acp_entry);
entry->dst = inst->dst;
entry->src = inst->src[0];
entry->size_written = inst->size_written;
- entry->size_read = inst->size_read(0);
+ for (unsigned i = 0; i < inst->sources; i++)
+ entry->size_read += inst->size_read(i);
entry->opcode = inst->opcode;
entry->saturate = inst->saturate;
acp[entry->dst.nr % ACP_HASH_SIZE].push_tail(entry);