summaryrefslogtreecommitdiffstats
path: root/src/intel/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'src/intel/compiler')
-rw-r--r--src/intel/compiler/brw_fs.cpp31
-rw-r--r--src/intel/compiler/brw_fs_cmod_propagation.cpp20
-rw-r--r--src/intel/compiler/brw_fs_copy_propagation.cpp8
-rw-r--r--src/intel/compiler/brw_fs_cse.cpp3
-rw-r--r--src/intel/compiler/brw_fs_dead_code_eliminate.cpp2
-rw-r--r--src/intel/compiler/brw_fs_live_variables.cpp2
-rw-r--r--src/intel/compiler/brw_fs_reg_allocate.cpp2
-rw-r--r--src/intel/compiler/brw_fs_register_coalesce.cpp2
-rw-r--r--src/intel/compiler/brw_fs_saturate_propagation.cpp7
-rw-r--r--src/intel/compiler/brw_fs_sel_peephole.cpp4
-rw-r--r--src/intel/compiler/brw_ir_fs.h3
11 files changed, 54 insertions, 30 deletions
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 15f51b038cb..ebcdd52d668 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -734,14 +734,33 @@ fs_visitor::limit_dispatch_width(unsigned n, const char *msg)
* it.
*/
bool
-fs_inst::is_partial_write() const
+fs_inst::is_partial_reg_write() const
{
return ((this->predicate && this->opcode != BRW_OPCODE_SEL) ||
- (this->exec_size * type_sz(this->dst.type)) < 32 ||
!this->dst.is_contiguous() ||
+ (this->exec_size * type_sz(this->dst.type)) < REG_SIZE ||
this->dst.offset % REG_SIZE != 0);
}
+/**
+ * Returns true if the instruction has a flag that means it won't
+ * update an entire variable for the given dispatch width.
+ *
+ * This is only different from is_partial_reg_write() for SIMD8
+ * dispatches of 16-bit (or smaller) instructions.
+ */
+bool
+fs_inst::is_partial_var_write(uint32_t dispatch_width) const
+{
+ const uint32_t type_size = type_sz(this->dst.type);
+ uint32_t var_size = MIN2(REG_SIZE, dispatch_width * type_size);
+
+ return ((this->predicate && this->opcode != BRW_OPCODE_SEL) ||
+ !this->dst.is_contiguous() ||
+ (this->exec_size * type_sz(this->dst.type)) < var_size ||
+ this->dst.offset % var_size != 0);
+}
+
unsigned
fs_inst::components_read(unsigned i) const
{
@@ -2956,7 +2975,7 @@ fs_visitor::opt_register_renaming()
if (depth == 0 &&
inst->dst.file == VGRF &&
alloc.sizes[inst->dst.nr] * REG_SIZE == inst->size_written &&
- !inst->is_partial_write()) {
+ !inst->is_partial_reg_write()) {
if (remap[dst] == ~0u) {
remap[dst] = dst;
} else {
@@ -3160,7 +3179,7 @@ fs_visitor::compute_to_mrf()
next_ip++;
if (inst->opcode != BRW_OPCODE_MOV ||
- inst->is_partial_write() ||
+ inst->is_partial_reg_write() ||
inst->dst.file != MRF || inst->src[0].file != VGRF ||
inst->dst.type != inst->src[0].type ||
inst->src[0].abs || inst->src[0].negate ||
@@ -3193,7 +3212,7 @@ fs_visitor::compute_to_mrf()
* that writes that reg, but it would require smarter
* tracking.
*/
- if (scan_inst->is_partial_write())
+ if (scan_inst->is_partial_reg_write())
break;
/* Handling things not fully contained in the source of the copy
@@ -3511,7 +3530,7 @@ fs_visitor::remove_duplicate_mrf_writes()
if (inst->opcode == BRW_OPCODE_MOV &&
inst->dst.file == MRF &&
inst->src[0].file != ARF &&
- !inst->is_partial_write()) {
+ !inst->is_partial_reg_write()) {
last_mrf_move[inst->dst.nr] = inst;
}
}
diff --git a/src/intel/compiler/brw_fs_cmod_propagation.cpp b/src/intel/compiler/brw_fs_cmod_propagation.cpp
index 14852e52e6e..a821d4e5481 100644
--- a/src/intel/compiler/brw_fs_cmod_propagation.cpp
+++ b/src/intel/compiler/brw_fs_cmod_propagation.cpp
@@ -50,13 +50,13 @@
static bool
cmod_propagate_cmp_to_add(const gen_device_info *devinfo, bblock_t *block,
- fs_inst *inst)
+ fs_inst *inst, unsigned dispatch_width)
{
bool read_flag = false;
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
if (scan_inst->opcode == BRW_OPCODE_ADD &&
- !scan_inst->is_partial_write() &&
+ !scan_inst->is_partial_var_write(dispatch_width) &&
scan_inst->exec_size == inst->exec_size) {
bool negate;
@@ -126,7 +126,7 @@ cmod_propagate_cmp_to_add(const gen_device_info *devinfo, bblock_t *block,
*/
static bool
cmod_propagate_not(const gen_device_info *devinfo, bblock_t *block,
- fs_inst *inst)
+ fs_inst *inst, unsigned dispatch_width)
{
const enum brw_conditional_mod cond = brw_negate_cmod(inst->conditional_mod);
bool read_flag = false;
@@ -141,7 +141,7 @@ cmod_propagate_not(const gen_device_info *devinfo, bblock_t *block,
scan_inst->opcode != BRW_OPCODE_AND)
break;
- if (scan_inst->is_partial_write() ||
+ if (scan_inst->is_partial_var_write(dispatch_width) ||
scan_inst->dst.offset != inst->src[0].offset ||
scan_inst->exec_size != inst->exec_size)
break;
@@ -166,7 +166,9 @@ cmod_propagate_not(const gen_device_info *devinfo, bblock_t *block,
}
static bool
-opt_cmod_propagation_local(const gen_device_info *devinfo, bblock_t *block)
+opt_cmod_propagation_local(const gen_device_info *devinfo,
+ bblock_t *block,
+ unsigned dispatch_width)
{
bool progress = false;
int ip = block->end_ip + 1;
@@ -219,14 +221,14 @@ opt_cmod_propagation_local(const gen_device_info *devinfo, bblock_t *block)
*/
if (inst->opcode == BRW_OPCODE_CMP && !inst->src[1].is_zero()) {
if (brw_reg_type_is_floating_point(inst->src[0].type) &&
- cmod_propagate_cmp_to_add(devinfo, block, inst))
+ cmod_propagate_cmp_to_add(devinfo, block, inst, dispatch_width))
progress = true;
continue;
}
if (inst->opcode == BRW_OPCODE_NOT) {
- progress = cmod_propagate_not(devinfo, block, inst) || progress;
+ progress = cmod_propagate_not(devinfo, block, inst, dispatch_width) || progress;
continue;
}
@@ -234,7 +236,7 @@ opt_cmod_propagation_local(const gen_device_info *devinfo, bblock_t *block)
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
if (regions_overlap(scan_inst->dst, scan_inst->size_written,
inst->src[0], inst->size_read(0))) {
- if (scan_inst->is_partial_write() ||
+ if (scan_inst->is_partial_var_write(dispatch_width) ||
scan_inst->dst.offset != inst->src[0].offset ||
scan_inst->exec_size != inst->exec_size)
break;
@@ -364,7 +366,7 @@ fs_visitor::opt_cmod_propagation()
bool progress = false;
foreach_block_reverse(block, cfg) {
- progress = opt_cmod_propagation_local(devinfo, block) || progress;
+ progress = opt_cmod_propagation_local(devinfo, block, dispatch_width) || progress;
}
if (progress)
diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp
index 1f4e122e6c9..1670eedce77 100644
--- a/src/intel/compiler/brw_fs_copy_propagation.cpp
+++ b/src/intel/compiler/brw_fs_copy_propagation.cpp
@@ -515,7 +515,7 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
/* Compute the first component of the copy that the instruction is
* reading, and the base byte offset within that component.
*/
- assert(entry->dst.offset % REG_SIZE == 0 && entry->dst.stride == 1);
+ assert(entry->dst.stride == 1);
const unsigned component = rel_offset / type_sz(entry->dst.type);
const unsigned suboffset = rel_offset % type_sz(entry->dst.type);
@@ -767,7 +767,7 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
}
static bool
-can_propagate_from(fs_inst *inst)
+can_propagate_from(fs_inst *inst, unsigned dispatch_width)
{
return (inst->opcode == BRW_OPCODE_MOV &&
inst->dst.file == VGRF &&
@@ -778,7 +778,7 @@ can_propagate_from(fs_inst *inst)
inst->src[0].file == UNIFORM ||
inst->src[0].file == IMM) &&
inst->src[0].type == inst->dst.type &&
- !inst->is_partial_write());
+ !inst->is_partial_var_write(dispatch_width));
}
/* Walks a basic block and does copy propagation on it using the acp
@@ -830,7 +830,7 @@ fs_visitor::opt_copy_propagation_local(void *copy_prop_ctx, bblock_t *block,
/* If this instruction's source could potentially be folded into the
* operand of another instruction, add it to the ACP.
*/
- if (can_propagate_from(inst)) {
+ if (can_propagate_from(inst, dispatch_width)) {
acp_entry *entry = ralloc(copy_prop_ctx, acp_entry);
entry->dst = inst->dst;
entry->src = inst->src[0];
diff --git a/src/intel/compiler/brw_fs_cse.cpp b/src/intel/compiler/brw_fs_cse.cpp
index 6efa111b1a4..e955c35e145 100644
--- a/src/intel/compiler/brw_fs_cse.cpp
+++ b/src/intel/compiler/brw_fs_cse.cpp
@@ -252,7 +252,8 @@ fs_visitor::opt_cse_local(bblock_t *block)
int ip = block->start_ip;
foreach_inst_in_block(fs_inst, inst, block) {
/* Skip some cases. */
- if (is_expression(this, inst) && !inst->is_partial_write() &&
+ if (is_expression(this, inst) &&
+ !inst->is_partial_var_write(dispatch_width) &&
((inst->dst.file != ARF && inst->dst.file != FIXED_GRF) ||
inst->dst.is_null()))
{
diff --git a/src/intel/compiler/brw_fs_dead_code_eliminate.cpp b/src/intel/compiler/brw_fs_dead_code_eliminate.cpp
index 38ae1d41a6a..7c60a33eb58 100644
--- a/src/intel/compiler/brw_fs_dead_code_eliminate.cpp
+++ b/src/intel/compiler/brw_fs_dead_code_eliminate.cpp
@@ -107,7 +107,7 @@ fs_visitor::dead_code_eliminate()
}
if (inst->dst.file == VGRF) {
- if (!inst->is_partial_write()) {
+ if (!inst->is_partial_reg_write()) {
int var = live_intervals->var_from_reg(inst->dst);
for (unsigned i = 0; i < regs_written(inst); i++) {
BITSET_CLEAR(live, var + i);
diff --git a/src/intel/compiler/brw_fs_live_variables.cpp b/src/intel/compiler/brw_fs_live_variables.cpp
index 059f076fa51..30625aa586a 100644
--- a/src/intel/compiler/brw_fs_live_variables.cpp
+++ b/src/intel/compiler/brw_fs_live_variables.cpp
@@ -84,7 +84,7 @@ fs_live_variables::setup_one_write(struct block_data *bd, fs_inst *inst,
* screens off previous updates of that variable (VGRF channel).
*/
if (inst->dst.file == VGRF) {
- if (!inst->is_partial_write() && !BITSET_TEST(bd->use, var))
+ if (!inst->is_partial_reg_write() && !BITSET_TEST(bd->use, var))
BITSET_SET(bd->def, var);
BITSET_SET(bd->defout, var);
diff --git a/src/intel/compiler/brw_fs_reg_allocate.cpp b/src/intel/compiler/brw_fs_reg_allocate.cpp
index b3825f1ef8c..94b18153492 100644
--- a/src/intel/compiler/brw_fs_reg_allocate.cpp
+++ b/src/intel/compiler/brw_fs_reg_allocate.cpp
@@ -1054,7 +1054,7 @@ fs_visitor::spill_reg(unsigned spill_reg)
* write, there should be no need for the unspill since the
* instruction will be overwriting the whole destination in any case.
*/
- if (inst->is_partial_write() ||
+ if (inst->is_partial_reg_write() ||
(!inst->force_writemask_all && !per_channel))
emit_unspill(ubld, spill_src, subset_spill_offset,
regs_written(inst));
diff --git a/src/intel/compiler/brw_fs_register_coalesce.cpp b/src/intel/compiler/brw_fs_register_coalesce.cpp
index 4fe6773da54..27234292c30 100644
--- a/src/intel/compiler/brw_fs_register_coalesce.cpp
+++ b/src/intel/compiler/brw_fs_register_coalesce.cpp
@@ -70,7 +70,7 @@ is_coalesce_candidate(const fs_visitor *v, const fs_inst *inst)
{
if ((inst->opcode != BRW_OPCODE_MOV &&
inst->opcode != SHADER_OPCODE_LOAD_PAYLOAD) ||
- inst->is_partial_write() ||
+ inst->is_partial_reg_write() ||
inst->saturate ||
inst->src[0].file != VGRF ||
inst->src[0].negate ||
diff --git a/src/intel/compiler/brw_fs_saturate_propagation.cpp b/src/intel/compiler/brw_fs_saturate_propagation.cpp
index d6cfa79a618..1e1461063ae 100644
--- a/src/intel/compiler/brw_fs_saturate_propagation.cpp
+++ b/src/intel/compiler/brw_fs_saturate_propagation.cpp
@@ -43,7 +43,8 @@
*/
static bool
-opt_saturate_propagation_local(fs_visitor *v, bblock_t *block)
+opt_saturate_propagation_local(fs_visitor *v, bblock_t *block,
+ unsigned dispatch_width)
{
bool progress = false;
int ip = block->end_ip + 1;
@@ -66,7 +67,7 @@ opt_saturate_propagation_local(fs_visitor *v, bblock_t *block)
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
if (regions_overlap(scan_inst->dst, scan_inst->size_written,
inst->src[0], inst->size_read(0))) {
- if (scan_inst->is_partial_write() ||
+ if (scan_inst->is_partial_var_write(dispatch_width) ||
(scan_inst->dst.type != inst->dst.type &&
!scan_inst->can_change_types()))
break;
@@ -153,7 +154,7 @@ fs_visitor::opt_saturate_propagation()
calculate_live_intervals();
foreach_block (block, cfg) {
- progress = opt_saturate_propagation_local(this, block) || progress;
+ progress = opt_saturate_propagation_local(this, block, dispatch_width) || progress;
}
/* Live intervals are still valid. */
diff --git a/src/intel/compiler/brw_fs_sel_peephole.cpp b/src/intel/compiler/brw_fs_sel_peephole.cpp
index 6395b409b7c..98d640a3bfe 100644
--- a/src/intel/compiler/brw_fs_sel_peephole.cpp
+++ b/src/intel/compiler/brw_fs_sel_peephole.cpp
@@ -167,8 +167,8 @@ fs_visitor::opt_peephole_sel()
then_mov[i]->exec_size != else_mov[i]->exec_size ||
then_mov[i]->group != else_mov[i]->group ||
then_mov[i]->force_writemask_all != else_mov[i]->force_writemask_all ||
- then_mov[i]->is_partial_write() ||
- else_mov[i]->is_partial_write() ||
+ then_mov[i]->is_partial_var_write(dispatch_width) ||
+ else_mov[i]->is_partial_var_write(dispatch_width) ||
then_mov[i]->conditional_mod != BRW_CONDITIONAL_NONE ||
else_mov[i]->conditional_mod != BRW_CONDITIONAL_NONE) {
movs = i;
diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h
index 56a4bdc6e52..87e03258d93 100644
--- a/src/intel/compiler/brw_ir_fs.h
+++ b/src/intel/compiler/brw_ir_fs.h
@@ -348,7 +348,8 @@ public:
void resize_sources(uint8_t num_sources);
bool is_send_from_grf() const;
- bool is_partial_write() const;
+ bool is_partial_reg_write() const;
+ bool is_partial_var_write(unsigned dispatch_width) const;
bool is_copy_payload(const brw::simple_allocator &grf_alloc) const;
unsigned components_read(unsigned i) const;
unsigned size_read(int arg) const;