diff options
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 17 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp | 8 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 7 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp | 94 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_shader.h | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4.cpp | 15 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 7 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 17 |
8 files changed, 131 insertions, 35 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index bff38f0d6e7..075857f7425 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -64,6 +64,8 @@ fs_inst::init() /* This will be the case for almost all instructions. */ this->regs_written = 1; + + this->writes_accumulator = false; } fs_inst::fs_inst() @@ -151,6 +153,15 @@ fs_inst::fs_inst(enum opcode opcode, fs_reg dst, return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1); \ } +#define ALU2_ACC(op) \ + fs_inst * \ + fs_visitor::op(fs_reg dst, fs_reg src0, fs_reg src1) \ + { \ + fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1);\ + inst->writes_accumulator = true; \ + return inst; \ + } + #define ALU3(op) \ fs_inst * \ fs_visitor::op(fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) \ @@ -166,7 +177,7 @@ ALU1(RNDE) ALU1(RNDZ) ALU2(ADD) ALU2(MUL) -ALU2(MACH) +ALU2_ACC(MACH) ALU2(AND) ALU2(OR) ALU2(XOR) @@ -182,8 +193,8 @@ ALU1(FBH) ALU1(FBL) ALU1(CBIT) ALU3(MAD) -ALU2(ADDC) -ALU2(SUBB) +ALU2_ACC(ADDC) +ALU2_ACC(SUBB) ALU2(SEL) /** Gen4 predicated IF. */ diff --git a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp index 6672f840fc5..dfeceb00619 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp @@ -72,13 +72,9 @@ fs_visitor::dead_code_eliminate() if (!result_live) { progress = true; - switch (inst->opcode) { - case BRW_OPCODE_ADDC: - case BRW_OPCODE_SUBB: - case BRW_OPCODE_MACH: + if (inst->writes_accumulator) { inst->dst = fs_reg(retype(brw_null_reg(), inst->dst.type)); - break; - default: + } else { inst->opcode = BRW_OPCODE_NOP; continue; } diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index e590bdf4c58..1cf35b40ad2 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -1411,6 +1411,7 @@ fs_generator::generate_code(exec_list *instructions, FILE *dump_file) brw_set_flag_reg(p, 0, inst->flag_subreg); brw_set_saturate(p, inst->saturate); brw_set_mask_control(p, inst->force_writemask_all); + brw_set_acc_write_control(p, inst->writes_accumulator); if (inst->force_uncompressed || dispatch_width == 8) { brw_set_compression_control(p, BRW_COMPRESSION_NONE); @@ -1434,9 +1435,7 @@ fs_generator::generate_code(exec_list *instructions, FILE *dump_file) brw_AVG(p, dst, src[0], src[1]); break; case BRW_OPCODE_MACH: - brw_set_acc_write_control(p, 1); brw_MACH(p, dst, src[0], src[1]); - brw_set_acc_write_control(p, 0); break; case BRW_OPCODE_MAD: @@ -1540,15 +1539,11 @@ fs_generator::generate_code(exec_list *instructions, FILE *dump_file) break; case BRW_OPCODE_ADDC: assert(brw->gen >= 7); - brw_set_acc_write_control(p, 1); brw_ADDC(p, dst, src[0], src[1]); - brw_set_acc_write_control(p, 0); break; case BRW_OPCODE_SUBB: assert(brw->gen >= 7); - brw_set_acc_write_control(p, 1); brw_SUBB(p, dst, src[0], src[1]); - brw_set_acc_write_control(p, 0); break; case BRW_OPCODE_BFE: diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp index a9514594e86..5e4f2fe7478 100644 --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp @@ -742,6 +742,8 @@ fs_instruction_scheduler::is_compressed(fs_inst *inst) void fs_instruction_scheduler::calculate_deps() { + const bool gen6plus = v->brw->gen >= 6; + /* Pre-register-allocation, this tracks the last write per VGRF (so * different reg_offsets within it can interfere when they shouldn't). * After register allocation, reg_offsets are gone and we track individual @@ -750,6 +752,7 @@ fs_instruction_scheduler::calculate_deps() schedule_node *last_grf_write[grf_count]; schedule_node *last_mrf_write[BRW_MAX_MRF]; schedule_node *last_conditional_mod[2] = { NULL, NULL }; + schedule_node *last_accumulator_write = NULL; /* Fixed HW registers are assumed to be separate from the virtual * GRFs, so they can be tracked separately. We don't really write * to fixed GRFs much, so don't bother tracking them on a more @@ -800,6 +803,8 @@ fs_instruction_scheduler::calculate_deps() } else { add_dep(last_fixed_grf_write, n); } + } else if (inst->src[i].is_accumulator() && gen6plus) { + add_dep(last_accumulator_write, n); } else if (inst->src[i].file != BAD_FILE && inst->src[i].file != IMM && inst->src[i].file != UNIFORM) { @@ -822,6 +827,14 @@ fs_instruction_scheduler::calculate_deps() add_dep(last_conditional_mod[inst->flag_subreg], n); } + if (inst->reads_accumulator_implicitly()) { + if (gen6plus) { + add_dep(last_accumulator_write, n); + } else { + add_barrier_deps(n); + } + } + /* write-after-write deps. */ if (inst->dst.file == GRF) { if (post_reg_alloc) { @@ -854,6 +867,9 @@ fs_instruction_scheduler::calculate_deps() } else { last_fixed_grf_write = n; } + } else if (inst->dst.is_accumulator() && gen6plus) { + add_dep(last_accumulator_write, n); + last_accumulator_write = n; } else if (inst->dst.file != BAD_FILE) { add_barrier_deps(n); } @@ -869,12 +885,22 @@ fs_instruction_scheduler::calculate_deps() add_dep(last_conditional_mod[inst->flag_subreg], n, 0); last_conditional_mod[inst->flag_subreg] = n; } + + if (inst->writes_accumulator) { + if (gen6plus) { + add_dep(last_accumulator_write, n); + last_accumulator_write = n; + } else { + add_barrier_deps(n); + } + } } /* bottom-to-top dependencies: WAR */ memset(last_grf_write, 0, sizeof(last_grf_write)); memset(last_mrf_write, 0, sizeof(last_mrf_write)); memset(last_conditional_mod, 0, sizeof(last_conditional_mod)); + last_accumulator_write = NULL; last_fixed_grf_write = NULL; exec_node *node; @@ -906,6 +932,8 @@ fs_instruction_scheduler::calculate_deps() } else { add_dep(n, last_fixed_grf_write); } + } else if (inst->src[i].is_accumulator() && gen6plus) { + add_dep(n, last_accumulator_write); } else if (inst->src[i].file != BAD_FILE && inst->src[i].file != IMM && inst->src[i].file != UNIFORM) { @@ -928,6 +956,14 @@ fs_instruction_scheduler::calculate_deps() add_dep(n, last_conditional_mod[inst->flag_subreg]); } + if (inst->reads_accumulator_implicitly()) { + if (gen6plus) { + add_dep(n, last_accumulator_write); + } else { + add_barrier_deps(n); + } + } + /* Update the things this instruction wrote, so earlier reads * can mark this as WAR dependency. */ @@ -959,6 +995,8 @@ fs_instruction_scheduler::calculate_deps() } else { last_fixed_grf_write = n; } + } else if (inst->dst.is_accumulator() && gen6plus) { + last_accumulator_write = n; } else if (inst->dst.file != BAD_FILE) { add_barrier_deps(n); } @@ -972,15 +1010,26 @@ fs_instruction_scheduler::calculate_deps() if (inst->writes_flag()) { last_conditional_mod[inst->flag_subreg] = n; } + + if (inst->writes_accumulator) { + if (gen6plus) { + last_accumulator_write = n; + } else { + add_barrier_deps(n); + } + } } } void vec4_instruction_scheduler::calculate_deps() { + const bool gen6plus = v->brw->gen >= 6; + schedule_node *last_grf_write[grf_count]; schedule_node *last_mrf_write[BRW_MAX_MRF]; schedule_node *last_conditional_mod = NULL; + schedule_node *last_accumulator_write = NULL; /* Fixed HW registers are assumed to be separate from the virtual * GRFs, so they can be tracked separately. We don't really write * to fixed GRFs much, so don't bother tracking them on a more @@ -1016,6 +1065,9 @@ vec4_instruction_scheduler::calculate_deps() (inst->src[i].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE)) { add_dep(last_fixed_grf_write, n); + } else if (inst->src[i].is_accumulator() && gen6plus) { + assert(last_accumulator_write); + add_dep(last_accumulator_write, n); } else if (inst->src[i].file != BAD_FILE && inst->src[i].file != IMM && inst->src[i].file != UNIFORM) { @@ -1039,6 +1091,15 @@ vec4_instruction_scheduler::calculate_deps() add_dep(last_conditional_mod, n); } + if (inst->reads_accumulator_implicitly()) { + if (gen6plus) { + assert(last_accumulator_write); + add_dep(last_accumulator_write, n); + } else { + add_barrier_deps(n); + } + } + /* write-after-write deps. */ if (inst->dst.file == GRF) { add_dep(last_grf_write[inst->dst.reg], n); @@ -1049,6 +1110,9 @@ vec4_instruction_scheduler::calculate_deps() } else if (inst->dst.file == HW_REG && inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) { last_fixed_grf_write = n; + } else if (inst->dst.is_accumulator() && gen6plus) { + add_dep(last_accumulator_write, n); + last_accumulator_write = n; } else if (inst->dst.file != BAD_FILE) { add_barrier_deps(n); } @@ -1064,12 +1128,22 @@ vec4_instruction_scheduler::calculate_deps() add_dep(last_conditional_mod, n, 0); last_conditional_mod = n; } + + if (inst->writes_accumulator) { + if (gen6plus) { + add_dep(last_accumulator_write, n); + last_accumulator_write = n; + } else { + add_barrier_deps(n); + } + } } /* bottom-to-top dependencies: WAR */ memset(last_grf_write, 0, sizeof(last_grf_write)); memset(last_mrf_write, 0, sizeof(last_mrf_write)); last_conditional_mod = NULL; + last_accumulator_write = NULL; last_fixed_grf_write = NULL; exec_node *node; @@ -1088,6 +1162,8 @@ vec4_instruction_scheduler::calculate_deps() (inst->src[i].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE)) { add_dep(n, last_fixed_grf_write); + } else if (inst->src[i].is_accumulator() && gen6plus) { + add_dep(n, last_accumulator_write); } else if (inst->src[i].file != BAD_FILE && inst->src[i].file != IMM && inst->src[i].file != UNIFORM) { @@ -1109,6 +1185,14 @@ vec4_instruction_scheduler::calculate_deps() add_dep(n, last_conditional_mod); } + if (inst->reads_accumulator_implicitly()) { + if (gen6plus) { + add_dep(n, last_accumulator_write); + } else { + add_barrier_deps(n); + } + } + /* Update the things this instruction wrote, so earlier reads * can mark this as WAR dependency. */ @@ -1119,6 +1203,8 @@ vec4_instruction_scheduler::calculate_deps() } else if (inst->dst.file == HW_REG && inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) { last_fixed_grf_write = n; + } else if (inst->dst.is_accumulator() && gen6plus) { + last_accumulator_write = n; } else if (inst->dst.file != BAD_FILE) { add_barrier_deps(n); } @@ -1132,6 +1218,14 @@ vec4_instruction_scheduler::calculate_deps() if (inst->writes_flag()) { last_conditional_mod = n; } + + if (inst->writes_accumulator) { + if (gen6plus) { + last_accumulator_write = n; + } else { + add_barrier_deps(n); + } + } } } diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index 9ef08e58456..e730ed02b18 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -60,6 +60,7 @@ public: uint8_t predicate; bool predicate_inverse; + bool writes_accumulator; /**< instruction implicitly writes accumulator */ }; enum instruction_scheduler_mode { diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 8aa746d3630..daff3641119 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -350,19 +350,12 @@ try_eliminate_instruction(vec4_instruction *inst, int new_writemask, * accumulator as a side-effect. Instead just set the destination * to the null register to free it. */ - switch (inst->opcode) { - case BRW_OPCODE_ADDC: - case BRW_OPCODE_SUBB: - case BRW_OPCODE_MACH: + if (inst->writes_accumulator || inst->writes_flag()) { inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type)); - break; - default: - if (inst->writes_flag()) { - inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type)); - } else { - inst->remove(); - } + } else { + inst->remove(); } + return true; } else if (inst->dst.writemask != new_writemask) { switch (inst->opcode) { diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index a74514f512c..5f85d315c71 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -971,9 +971,7 @@ vec4_generator::generate_vec4_instruction(vec4_instruction *instruction, brw_MUL(p, dst, src[0], src[1]); break; case BRW_OPCODE_MACH: - brw_set_acc_write_control(p, 1); brw_MACH(p, dst, src[0], src[1]); - brw_set_acc_write_control(p, 0); break; case BRW_OPCODE_MAD: @@ -1077,15 +1075,11 @@ vec4_generator::generate_vec4_instruction(vec4_instruction *instruction, break; case BRW_OPCODE_ADDC: assert(brw->gen >= 7); - brw_set_acc_write_control(p, 1); brw_ADDC(p, dst, src[0], src[1]); - brw_set_acc_write_control(p, 0); break; case BRW_OPCODE_SUBB: assert(brw->gen >= 7); - brw_set_acc_write_control(p, 1); brw_SUBB(p, dst, src[0], src[1]); - brw_set_acc_write_control(p, 0); break; case BRW_OPCODE_BFE: @@ -1317,6 +1311,7 @@ vec4_generator::generate_code(exec_list *instructions) brw_set_predicate_inverse(p, inst->predicate_inverse); brw_set_saturate(p, inst->saturate); brw_set_mask_control(p, inst->force_writemask_all); + brw_set_acc_write_control(p, inst->writes_accumulator); unsigned pre_emit_nr_insn = p->nr_insn; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index edace108f19..3a764424df8 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -42,6 +42,7 @@ vec4_instruction::vec4_instruction(vec4_visitor *v, this->force_writemask_all = false; this->no_dd_clear = false; this->no_dd_check = false; + this->writes_accumulator = false; this->conditional_mod = BRW_CONDITIONAL_NONE; this->sampler = 0; this->texture_offset = 0; @@ -124,6 +125,16 @@ vec4_visitor::emit(enum opcode opcode) src0, src1); \ } +#define ALU2_ACC(op) \ + vec4_instruction * \ + vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1) \ + { \ + vec4_instruction *inst = new(mem_ctx) vec4_instruction(this, \ + BRW_OPCODE_##op, dst, src0, src1); \ + inst->writes_accumulator = true; \ + return inst; \ + } + #define ALU3(op) \ vec4_instruction * \ vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1, src_reg src2)\ @@ -143,7 +154,7 @@ ALU1(F32TO16) ALU1(F16TO32) ALU2(ADD) ALU2(MUL) -ALU2(MACH) +ALU2_ACC(MACH) ALU2(AND) ALU2(OR) ALU2(XOR) @@ -162,8 +173,8 @@ ALU1(FBH) ALU1(FBL) ALU1(CBIT) ALU3(MAD) -ALU2(ADDC) -ALU2(SUBB) +ALU2_ACC(ADDC) +ALU2_ACC(SUBB) /** Gen4 predicated IF. */ vec4_instruction * |