aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp17
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp8
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_generator.cpp7
-rw-r--r--src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp94
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.cpp15
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_generator.cpp7
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp17
8 files changed, 131 insertions, 35 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index bff38f0d6e7..075857f7425 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -64,6 +64,8 @@ fs_inst::init()
/* This will be the case for almost all instructions. */
this->regs_written = 1;
+
+ this->writes_accumulator = false;
}
fs_inst::fs_inst()
@@ -151,6 +153,15 @@ fs_inst::fs_inst(enum opcode opcode, fs_reg dst,
return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1); \
}
+#define ALU2_ACC(op) \
+ fs_inst * \
+ fs_visitor::op(fs_reg dst, fs_reg src0, fs_reg src1) \
+ { \
+ fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1);\
+ inst->writes_accumulator = true; \
+ return inst; \
+ }
+
#define ALU3(op) \
fs_inst * \
fs_visitor::op(fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) \
@@ -166,7 +177,7 @@ ALU1(RNDE)
ALU1(RNDZ)
ALU2(ADD)
ALU2(MUL)
-ALU2(MACH)
+ALU2_ACC(MACH)
ALU2(AND)
ALU2(OR)
ALU2(XOR)
@@ -182,8 +193,8 @@ ALU1(FBH)
ALU1(FBL)
ALU1(CBIT)
ALU3(MAD)
-ALU2(ADDC)
-ALU2(SUBB)
+ALU2_ACC(ADDC)
+ALU2_ACC(SUBB)
ALU2(SEL)
/** Gen4 predicated IF. */
diff --git a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp
index 6672f840fc5..dfeceb00619 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp
@@ -72,13 +72,9 @@ fs_visitor::dead_code_eliminate()
if (!result_live) {
progress = true;
- switch (inst->opcode) {
- case BRW_OPCODE_ADDC:
- case BRW_OPCODE_SUBB:
- case BRW_OPCODE_MACH:
+ if (inst->writes_accumulator) {
inst->dst = fs_reg(retype(brw_null_reg(), inst->dst.type));
- break;
- default:
+ } else {
inst->opcode = BRW_OPCODE_NOP;
continue;
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index e590bdf4c58..1cf35b40ad2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1411,6 +1411,7 @@ fs_generator::generate_code(exec_list *instructions, FILE *dump_file)
brw_set_flag_reg(p, 0, inst->flag_subreg);
brw_set_saturate(p, inst->saturate);
brw_set_mask_control(p, inst->force_writemask_all);
+ brw_set_acc_write_control(p, inst->writes_accumulator);
if (inst->force_uncompressed || dispatch_width == 8) {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
@@ -1434,9 +1435,7 @@ fs_generator::generate_code(exec_list *instructions, FILE *dump_file)
brw_AVG(p, dst, src[0], src[1]);
break;
case BRW_OPCODE_MACH:
- brw_set_acc_write_control(p, 1);
brw_MACH(p, dst, src[0], src[1]);
- brw_set_acc_write_control(p, 0);
break;
case BRW_OPCODE_MAD:
@@ -1540,15 +1539,11 @@ fs_generator::generate_code(exec_list *instructions, FILE *dump_file)
break;
case BRW_OPCODE_ADDC:
assert(brw->gen >= 7);
- brw_set_acc_write_control(p, 1);
brw_ADDC(p, dst, src[0], src[1]);
- brw_set_acc_write_control(p, 0);
break;
case BRW_OPCODE_SUBB:
assert(brw->gen >= 7);
- brw_set_acc_write_control(p, 1);
brw_SUBB(p, dst, src[0], src[1]);
- brw_set_acc_write_control(p, 0);
break;
case BRW_OPCODE_BFE:
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index a9514594e86..5e4f2fe7478 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -742,6 +742,8 @@ fs_instruction_scheduler::is_compressed(fs_inst *inst)
void
fs_instruction_scheduler::calculate_deps()
{
+ const bool gen6plus = v->brw->gen >= 6;
+
/* Pre-register-allocation, this tracks the last write per VGRF (so
* different reg_offsets within it can interfere when they shouldn't).
* After register allocation, reg_offsets are gone and we track individual
@@ -750,6 +752,7 @@ fs_instruction_scheduler::calculate_deps()
schedule_node *last_grf_write[grf_count];
schedule_node *last_mrf_write[BRW_MAX_MRF];
schedule_node *last_conditional_mod[2] = { NULL, NULL };
+ schedule_node *last_accumulator_write = NULL;
/* Fixed HW registers are assumed to be separate from the virtual
* GRFs, so they can be tracked separately. We don't really write
* to fixed GRFs much, so don't bother tracking them on a more
@@ -800,6 +803,8 @@ fs_instruction_scheduler::calculate_deps()
} else {
add_dep(last_fixed_grf_write, n);
}
+ } else if (inst->src[i].is_accumulator() && gen6plus) {
+ add_dep(last_accumulator_write, n);
} else if (inst->src[i].file != BAD_FILE &&
inst->src[i].file != IMM &&
inst->src[i].file != UNIFORM) {
@@ -822,6 +827,14 @@ fs_instruction_scheduler::calculate_deps()
add_dep(last_conditional_mod[inst->flag_subreg], n);
}
+ if (inst->reads_accumulator_implicitly()) {
+ if (gen6plus) {
+ add_dep(last_accumulator_write, n);
+ } else {
+ add_barrier_deps(n);
+ }
+ }
+
/* write-after-write deps. */
if (inst->dst.file == GRF) {
if (post_reg_alloc) {
@@ -854,6 +867,9 @@ fs_instruction_scheduler::calculate_deps()
} else {
last_fixed_grf_write = n;
}
+ } else if (inst->dst.is_accumulator() && gen6plus) {
+ add_dep(last_accumulator_write, n);
+ last_accumulator_write = n;
} else if (inst->dst.file != BAD_FILE) {
add_barrier_deps(n);
}
@@ -869,12 +885,22 @@ fs_instruction_scheduler::calculate_deps()
add_dep(last_conditional_mod[inst->flag_subreg], n, 0);
last_conditional_mod[inst->flag_subreg] = n;
}
+
+ if (inst->writes_accumulator) {
+ if (gen6plus) {
+ add_dep(last_accumulator_write, n);
+ last_accumulator_write = n;
+ } else {
+ add_barrier_deps(n);
+ }
+ }
}
/* bottom-to-top dependencies: WAR */
memset(last_grf_write, 0, sizeof(last_grf_write));
memset(last_mrf_write, 0, sizeof(last_mrf_write));
memset(last_conditional_mod, 0, sizeof(last_conditional_mod));
+ last_accumulator_write = NULL;
last_fixed_grf_write = NULL;
exec_node *node;
@@ -906,6 +932,8 @@ fs_instruction_scheduler::calculate_deps()
} else {
add_dep(n, last_fixed_grf_write);
}
+ } else if (inst->src[i].is_accumulator() && gen6plus) {
+ add_dep(n, last_accumulator_write);
} else if (inst->src[i].file != BAD_FILE &&
inst->src[i].file != IMM &&
inst->src[i].file != UNIFORM) {
@@ -928,6 +956,14 @@ fs_instruction_scheduler::calculate_deps()
add_dep(n, last_conditional_mod[inst->flag_subreg]);
}
+ if (inst->reads_accumulator_implicitly()) {
+ if (gen6plus) {
+ add_dep(n, last_accumulator_write);
+ } else {
+ add_barrier_deps(n);
+ }
+ }
+
/* Update the things this instruction wrote, so earlier reads
* can mark this as WAR dependency.
*/
@@ -959,6 +995,8 @@ fs_instruction_scheduler::calculate_deps()
} else {
last_fixed_grf_write = n;
}
+ } else if (inst->dst.is_accumulator() && gen6plus) {
+ last_accumulator_write = n;
} else if (inst->dst.file != BAD_FILE) {
add_barrier_deps(n);
}
@@ -972,15 +1010,26 @@ fs_instruction_scheduler::calculate_deps()
if (inst->writes_flag()) {
last_conditional_mod[inst->flag_subreg] = n;
}
+
+ if (inst->writes_accumulator) {
+ if (gen6plus) {
+ last_accumulator_write = n;
+ } else {
+ add_barrier_deps(n);
+ }
+ }
}
}
void
vec4_instruction_scheduler::calculate_deps()
{
+ const bool gen6plus = v->brw->gen >= 6;
+
schedule_node *last_grf_write[grf_count];
schedule_node *last_mrf_write[BRW_MAX_MRF];
schedule_node *last_conditional_mod = NULL;
+ schedule_node *last_accumulator_write = NULL;
/* Fixed HW registers are assumed to be separate from the virtual
* GRFs, so they can be tracked separately. We don't really write
* to fixed GRFs much, so don't bother tracking them on a more
@@ -1016,6 +1065,9 @@ vec4_instruction_scheduler::calculate_deps()
(inst->src[i].fixed_hw_reg.file ==
BRW_GENERAL_REGISTER_FILE)) {
add_dep(last_fixed_grf_write, n);
+ } else if (inst->src[i].is_accumulator() && gen6plus) {
+ assert(last_accumulator_write);
+ add_dep(last_accumulator_write, n);
} else if (inst->src[i].file != BAD_FILE &&
inst->src[i].file != IMM &&
inst->src[i].file != UNIFORM) {
@@ -1039,6 +1091,15 @@ vec4_instruction_scheduler::calculate_deps()
add_dep(last_conditional_mod, n);
}
+ if (inst->reads_accumulator_implicitly()) {
+ if (gen6plus) {
+ assert(last_accumulator_write);
+ add_dep(last_accumulator_write, n);
+ } else {
+ add_barrier_deps(n);
+ }
+ }
+
/* write-after-write deps. */
if (inst->dst.file == GRF) {
add_dep(last_grf_write[inst->dst.reg], n);
@@ -1049,6 +1110,9 @@ vec4_instruction_scheduler::calculate_deps()
} else if (inst->dst.file == HW_REG &&
inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
last_fixed_grf_write = n;
+ } else if (inst->dst.is_accumulator() && gen6plus) {
+ add_dep(last_accumulator_write, n);
+ last_accumulator_write = n;
} else if (inst->dst.file != BAD_FILE) {
add_barrier_deps(n);
}
@@ -1064,12 +1128,22 @@ vec4_instruction_scheduler::calculate_deps()
add_dep(last_conditional_mod, n, 0);
last_conditional_mod = n;
}
+
+ if (inst->writes_accumulator) {
+ if (gen6plus) {
+ add_dep(last_accumulator_write, n);
+ last_accumulator_write = n;
+ } else {
+ add_barrier_deps(n);
+ }
+ }
}
/* bottom-to-top dependencies: WAR */
memset(last_grf_write, 0, sizeof(last_grf_write));
memset(last_mrf_write, 0, sizeof(last_mrf_write));
last_conditional_mod = NULL;
+ last_accumulator_write = NULL;
last_fixed_grf_write = NULL;
exec_node *node;
@@ -1088,6 +1162,8 @@ vec4_instruction_scheduler::calculate_deps()
(inst->src[i].fixed_hw_reg.file ==
BRW_GENERAL_REGISTER_FILE)) {
add_dep(n, last_fixed_grf_write);
+ } else if (inst->src[i].is_accumulator() && gen6plus) {
+ add_dep(n, last_accumulator_write);
} else if (inst->src[i].file != BAD_FILE &&
inst->src[i].file != IMM &&
inst->src[i].file != UNIFORM) {
@@ -1109,6 +1185,14 @@ vec4_instruction_scheduler::calculate_deps()
add_dep(n, last_conditional_mod);
}
+ if (inst->reads_accumulator_implicitly()) {
+ if (gen6plus) {
+ add_dep(n, last_accumulator_write);
+ } else {
+ add_barrier_deps(n);
+ }
+ }
+
/* Update the things this instruction wrote, so earlier reads
* can mark this as WAR dependency.
*/
@@ -1119,6 +1203,8 @@ vec4_instruction_scheduler::calculate_deps()
} else if (inst->dst.file == HW_REG &&
inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
last_fixed_grf_write = n;
+ } else if (inst->dst.is_accumulator() && gen6plus) {
+ last_accumulator_write = n;
} else if (inst->dst.file != BAD_FILE) {
add_barrier_deps(n);
}
@@ -1132,6 +1218,14 @@ vec4_instruction_scheduler::calculate_deps()
if (inst->writes_flag()) {
last_conditional_mod = n;
}
+
+ if (inst->writes_accumulator) {
+ if (gen6plus) {
+ last_accumulator_write = n;
+ } else {
+ add_barrier_deps(n);
+ }
+ }
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
index 9ef08e58456..e730ed02b18 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -60,6 +60,7 @@ public:
uint8_t predicate;
bool predicate_inverse;
+ bool writes_accumulator; /**< instruction implicitly writes accumulator */
};
enum instruction_scheduler_mode {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 8aa746d3630..daff3641119 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -350,19 +350,12 @@ try_eliminate_instruction(vec4_instruction *inst, int new_writemask,
* accumulator as a side-effect. Instead just set the destination
* to the null register to free it.
*/
- switch (inst->opcode) {
- case BRW_OPCODE_ADDC:
- case BRW_OPCODE_SUBB:
- case BRW_OPCODE_MACH:
+ if (inst->writes_accumulator || inst->writes_flag()) {
inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type));
- break;
- default:
- if (inst->writes_flag()) {
- inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type));
- } else {
- inst->remove();
- }
+ } else {
+ inst->remove();
}
+
return true;
} else if (inst->dst.writemask != new_writemask) {
switch (inst->opcode) {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index a74514f512c..5f85d315c71 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -971,9 +971,7 @@ vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
brw_MUL(p, dst, src[0], src[1]);
break;
case BRW_OPCODE_MACH:
- brw_set_acc_write_control(p, 1);
brw_MACH(p, dst, src[0], src[1]);
- brw_set_acc_write_control(p, 0);
break;
case BRW_OPCODE_MAD:
@@ -1077,15 +1075,11 @@ vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
break;
case BRW_OPCODE_ADDC:
assert(brw->gen >= 7);
- brw_set_acc_write_control(p, 1);
brw_ADDC(p, dst, src[0], src[1]);
- brw_set_acc_write_control(p, 0);
break;
case BRW_OPCODE_SUBB:
assert(brw->gen >= 7);
- brw_set_acc_write_control(p, 1);
brw_SUBB(p, dst, src[0], src[1]);
- brw_set_acc_write_control(p, 0);
break;
case BRW_OPCODE_BFE:
@@ -1317,6 +1311,7 @@ vec4_generator::generate_code(exec_list *instructions)
brw_set_predicate_inverse(p, inst->predicate_inverse);
brw_set_saturate(p, inst->saturate);
brw_set_mask_control(p, inst->force_writemask_all);
+ brw_set_acc_write_control(p, inst->writes_accumulator);
unsigned pre_emit_nr_insn = p->nr_insn;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index edace108f19..3a764424df8 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -42,6 +42,7 @@ vec4_instruction::vec4_instruction(vec4_visitor *v,
this->force_writemask_all = false;
this->no_dd_clear = false;
this->no_dd_check = false;
+ this->writes_accumulator = false;
this->conditional_mod = BRW_CONDITIONAL_NONE;
this->sampler = 0;
this->texture_offset = 0;
@@ -124,6 +125,16 @@ vec4_visitor::emit(enum opcode opcode)
src0, src1); \
}
+#define ALU2_ACC(op) \
+ vec4_instruction * \
+ vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1) \
+ { \
+ vec4_instruction *inst = new(mem_ctx) vec4_instruction(this, \
+ BRW_OPCODE_##op, dst, src0, src1); \
+ inst->writes_accumulator = true; \
+ return inst; \
+ }
+
#define ALU3(op) \
vec4_instruction * \
vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1, src_reg src2)\
@@ -143,7 +154,7 @@ ALU1(F32TO16)
ALU1(F16TO32)
ALU2(ADD)
ALU2(MUL)
-ALU2(MACH)
+ALU2_ACC(MACH)
ALU2(AND)
ALU2(OR)
ALU2(XOR)
@@ -162,8 +173,8 @@ ALU1(FBH)
ALU1(FBL)
ALU1(CBIT)
ALU3(MAD)
-ALU2(ADDC)
-ALU2(SUBB)
+ALU2_ACC(ADDC)
+ALU2_ACC(SUBB)
/** Gen4 predicated IF. */
vec4_instruction *