diff options
author | Jason Ekstrand <[email protected]> | 2014-09-23 17:22:09 -0700 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2014-09-30 10:29:14 -0700 |
commit | 1728e74957a62b1b4b9fbb62a7de2c12b77c8a75 (patch) | |
tree | feb4a47dcf858626785d5a3d622c459bf7c6a5eb /src | |
parent | 4d5f0eb0487ad13e90f7248c95c023c35457eaf9 (diff) |
i965/fs: Copy propagate partial reads.
This commit reworks copy propagation a bit to support propagating the
copying of partial registers. This comes up every time we have pull
constants because we do a pull constant read immediately followed by a move
to splat the one component of the out to 8 or 16-wide. This allows us to
eliminate the copy and simply use the one component of the register.
Shader DB results:
total instructions in shared programs: 5044937 -> 5044428 (-0.01%)
instructions in affected programs: 66112 -> 65603 (-0.77%)
GAINED: 0
LOST: 0
Signed-off-by: Jason Ekstrand <[email protected]>
Reviewed-by: Matt Turner <[email protected]>
Diffstat (limited to 'src')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.h | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp | 83 |
2 files changed, 64 insertions, 20 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 831f8ae85db..1cdbf311977 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -337,6 +337,7 @@ public: bool opt_cse_local(bblock_t *block); bool opt_copy_propagate(); bool try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry); + bool try_constant_propagate(fs_inst *inst, acp_entry *entry); bool opt_copy_propagate_local(void *mem_ctx, bblock_t *block, exec_list *acp); void opt_drop_redundant_mov_to_flags(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp index da208dc434d..bd502c4ca80 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp @@ -277,24 +277,30 @@ is_logic_op(enum opcode opcode) bool fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) { + if (inst->src[arg].file != GRF) + return false; + if (entry->src.file == IMM) return false; + assert(entry->src.file == GRF || entry->src.file == UNIFORM); if (entry->opcode == SHADER_OPCODE_LOAD_PAYLOAD && inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) return false; - /* Bail if inst is reading more than entry is writing. */ - if ((inst->regs_read(this, arg) * inst->src[arg].stride * - type_sz(inst->src[arg].type)) > type_sz(entry->dst.type)) + assert(entry->dst.file == GRF); + if (inst->src[arg].reg != entry->dst.reg) return false; - if (inst->src[arg].file != entry->dst.file || - inst->src[arg].reg != entry->dst.reg || - inst->src[arg].reg_offset != entry->dst.reg_offset || - inst->src[arg].subreg_offset != entry->dst.subreg_offset) { + /* Bail if inst is reading a range that isn't contained in the range + * that entry is writing. + */ + int reg_size = dispatch_width * sizeof(float); + if (inst->src[arg].reg_offset < entry->dst.reg_offset || + (inst->src[arg].reg_offset * reg_size + inst->src[arg].subreg_offset + + inst->regs_read(this, arg) * inst->src[arg].stride * reg_size) > + (entry->dst.reg_offset + 1) * reg_size) return false; - } /* See resolve_ud_negate() and comment in brw_fs_emit.cpp. */ if (inst->conditional_mod && @@ -361,11 +367,39 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) inst->src[arg].file = entry->src.file; inst->src[arg].reg = entry->src.reg; - inst->src[arg].reg_offset = entry->src.reg_offset; - inst->src[arg].subreg_offset = entry->src.subreg_offset; inst->src[arg].stride *= entry->src.stride; inst->saturate = inst->saturate || entry->saturate; + switch (entry->src.file) { + case BAD_FILE: + case HW_REG: + case UNIFORM: + inst->src[arg].reg_offset = entry->src.reg_offset; + inst->src[arg].subreg_offset = entry->src.subreg_offset; + break; + case GRF: + { + /* In this case, we have to deal with mapping parts of vgrfs to + * other parts of vgrfs so we have to do some reg_offset magic. + */ + + /* Compute the offset of inst->src[arg] relative to inst->dst */ + assert(entry->dst.subreg_offset == 0); + int rel_offset = inst->src[arg].reg_offset - entry->dst.reg_offset; + int rel_suboffset = inst->src[arg].subreg_offset; + + /* Compute the final register offset (in bytes) */ + int offset = entry->src.reg_offset * reg_size + entry->src.subreg_offset; + offset += rel_offset * reg_size + rel_suboffset; + inst->src[arg].reg_offset = offset / reg_size; + inst->src[arg].subreg_offset = offset % reg_size; + } + break; + default: + unreachable("Invalid register file"); + break; + } + if (!inst->src[arg].abs) { inst->src[arg].abs = entry->src.abs; inst->src[arg].negate ^= entry->src.negate; @@ -375,9 +409,8 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) } -static bool -try_constant_propagate(struct brw_context *brw, fs_inst *inst, - acp_entry *entry) +bool +fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry) { bool progress = false; @@ -385,12 +418,22 @@ try_constant_propagate(struct brw_context *brw, fs_inst *inst, return false; for (int i = inst->sources - 1; i >= 0; i--) { - if (inst->src[i].file != entry->dst.file || - inst->src[i].reg != entry->dst.reg || - inst->src[i].reg_offset != entry->dst.reg_offset || - inst->src[i].subreg_offset != entry->dst.subreg_offset || - inst->src[i].type != entry->dst.type || - inst->src[i].stride > 1) + if (inst->src[i].file != GRF) + continue; + + assert(entry->dst.file == GRF); + if (inst->src[i].reg != entry->dst.reg || + inst->src[i].type != entry->dst.type) + continue; + + /* Bail if inst is reading a range that isn't contained in the range + * that entry is writing. + */ + int reg_size = dispatch_width * sizeof(float); + if (inst->src[i].reg_offset < entry->dst.reg_offset || + (inst->src[i].reg_offset * reg_size + inst->src[i].subreg_offset + + inst->regs_read(this, i) * inst->src[i].stride * reg_size) > + (entry->dst.reg_offset + 1) * reg_size) continue; /* Don't bother with cases that should have been taken care of by the @@ -546,7 +589,7 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block, continue; foreach_in_list(acp_entry, entry, &acp[inst->src[i].reg % ACP_HASH_SIZE]) { - if (try_constant_propagate(brw, inst, entry)) + if (try_constant_propagate(inst, entry)) progress = true; if (try_copy_propagate(inst, i, entry)) |