diff options
author | Vadim Girlin <[email protected]> | 2013-05-27 04:00:03 +0400 |
---|---|---|
committer | Vadim Girlin <[email protected]> | 2013-05-27 15:19:20 +0400 |
commit | 725671a83a67cc8cf16c0913f6e1835fb272c2fb (patch) | |
tree | 29b9fa66f25675bdf3c64b504dbc010a03ad87a7 | |
parent | 5285c4c88e4beab653660e8a5b51ce9a779fc0d2 (diff) |
r600g/sb: improve optimization of conditional instructions
Signed-off-by: Vadim Girlin <[email protected]>
-rw-r--r-- | src/gallium/drivers/r600/sb/sb_core.cpp | 7 | ||||
-rw-r--r-- | src/gallium/drivers/r600/sb/sb_expr.cpp | 48 | ||||
-rw-r--r-- | src/gallium/drivers/r600/sb/sb_expr.h | 5 | ||||
-rw-r--r-- | src/gallium/drivers/r600/sb/sb_if_conversion.cpp | 2 | ||||
-rw-r--r-- | src/gallium/drivers/r600/sb/sb_pass.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/r600/sb/sb_peephole.cpp | 53 |
6 files changed, 96 insertions, 21 deletions
diff --git a/src/gallium/drivers/r600/sb/sb_core.cpp b/src/gallium/drivers/r600/sb/sb_core.cpp index 034505704fe..5b917ac6e75 100644 --- a/src/gallium/drivers/r600/sb/sb_core.cpp +++ b/src/gallium/drivers/r600/sb/sb_core.cpp @@ -188,9 +188,14 @@ int r600_sb_bytecode_process(struct r600_context *rctx, sh->set_undef(sh->root->live_before); - SB_RUN_PASS(peephole, 1); SB_RUN_PASS(if_conversion, 1); + // if_conversion breaks info about uses, but next pass (peephole) + // doesn't need it, so we can skip def/use update here + // until it's really required + //SB_RUN_PASS(def_use, 0); + + SB_RUN_PASS(peephole, 1); SB_RUN_PASS(def_use, 0); SB_RUN_PASS(gvn, 1); diff --git a/src/gallium/drivers/r600/sb/sb_expr.cpp b/src/gallium/drivers/r600/sb/sb_expr.cpp index 8582c8e8e0b..65a764153a7 100644 --- a/src/gallium/drivers/r600/sb/sb_expr.cpp +++ b/src/gallium/drivers/r600/sb/sb_expr.cpp @@ -580,7 +580,7 @@ unsigned invert_setcc_condition(unsigned cc, bool &swap_args) { return ncc; } -unsigned get_setcc_opcode(unsigned cc, unsigned cmp_type, bool int_dst) { +unsigned get_setcc_op(unsigned cc, unsigned cmp_type, bool int_dst) { if (int_dst && cmp_type == AF_FLOAT_CMP) { switch (cc) { @@ -612,6 +612,8 @@ unsigned get_setcc_opcode(unsigned cc, unsigned cmp_type, bool int_dst) { } case AF_UINT_CMP: { switch (cc) { + case AF_CC_E: return ALU_OP2_SETE_INT; + case AF_CC_NE: return ALU_OP2_SETNE_INT; case AF_CC_GT: return ALU_OP2_SETGT_UINT; case AF_CC_GE: return ALU_OP2_SETGE_UINT; } @@ -624,7 +626,7 @@ unsigned get_setcc_opcode(unsigned cc, unsigned cmp_type, bool int_dst) { return ~0u; } -unsigned get_predsetcc_opcode(unsigned cc, unsigned cmp_type) { +unsigned get_predsetcc_op(unsigned cc, unsigned cmp_type) { switch(cmp_type) { case AF_FLOAT_CMP: { @@ -647,6 +649,8 @@ unsigned get_predsetcc_opcode(unsigned cc, unsigned cmp_type) { } case AF_UINT_CMP: { switch (cc) { + case AF_CC_E: return ALU_OP2_PRED_SETE_INT; + case AF_CC_NE: return ALU_OP2_PRED_SETNE_INT; case AF_CC_GT: return ALU_OP2_PRED_SETGT_UINT; case AF_CC_GE: return ALU_OP2_PRED_SETGE_UINT; } @@ -658,6 +662,44 @@ unsigned get_predsetcc_opcode(unsigned cc, unsigned cmp_type) { return ~0u; } +unsigned get_killcc_op(unsigned cc, unsigned cmp_type) { + + switch(cmp_type) { + case AF_FLOAT_CMP: { + switch (cc) { + case AF_CC_E: return ALU_OP2_KILLE; + case AF_CC_NE: return ALU_OP2_KILLNE; + case AF_CC_GT: return ALU_OP2_KILLGT; + case AF_CC_GE: return ALU_OP2_KILLGE; + } + break; + } + case AF_INT_CMP: { + switch (cc) { + case AF_CC_E: return ALU_OP2_KILLE_INT; + case AF_CC_NE: return ALU_OP2_KILLNE_INT; + case AF_CC_GT: return ALU_OP2_KILLGT_INT; + case AF_CC_GE: return ALU_OP2_KILLGE_INT; + } + break; + } + case AF_UINT_CMP: { + switch (cc) { + case AF_CC_E: return ALU_OP2_KILLE_INT; + case AF_CC_NE: return ALU_OP2_KILLNE_INT; + case AF_CC_GT: return ALU_OP2_KILLGT_UINT; + case AF_CC_GE: return ALU_OP2_KILLGE_UINT; + } + break; + } + } + + assert(!"unexpected cc&cmp_type combination"); + return ~0u; +} + + + void convert_predset_to_set(shader& sh, alu_node* a) { unsigned flags = a->bc.op_ptr->flags; @@ -668,7 +710,7 @@ void convert_predset_to_set(shader& sh, alu_node* a) { cc = invert_setcc_condition(cc, swap_args); - unsigned newop = get_setcc_opcode(cc, cmp_type, true); + unsigned newop = get_setcc_op(cc, cmp_type, true); a->dst.resize(1); a->bc.set_op(newop); diff --git a/src/gallium/drivers/r600/sb/sb_expr.h b/src/gallium/drivers/r600/sb/sb_expr.h index 032867be559..89177141a76 100644 --- a/src/gallium/drivers/r600/sb/sb_expr.h +++ b/src/gallium/drivers/r600/sb/sb_expr.h @@ -37,8 +37,9 @@ value* get_select_value_for_em(shader &sh, value *em); void convert_predset_to_set(shader &sh, alu_node *a); unsigned invert_setcc_condition(unsigned cc, bool &swap_args); -unsigned get_setcc_opcode(unsigned cc, unsigned cmp_type, bool int_dst); -unsigned get_predsetcc_opcode(unsigned cc, unsigned cmp_type); +unsigned get_setcc_op(unsigned cc, unsigned cmp_type, bool int_dst); +unsigned get_predsetcc_op(unsigned cc, unsigned cmp_type); +unsigned get_killcc_op(unsigned cc, unsigned cmp_type); class expr_handler { diff --git a/src/gallium/drivers/r600/sb/sb_if_conversion.cpp b/src/gallium/drivers/r600/sb/sb_if_conversion.cpp index 96dd1ee31fe..93edacec7af 100644 --- a/src/gallium/drivers/r600/sb/sb_if_conversion.cpp +++ b/src/gallium/drivers/r600/sb/sb_if_conversion.cpp @@ -258,7 +258,7 @@ bool if_conversion::run_on(region_node* r) { std::swap(newpredset->bc.src[0], newpredset->bc.src[1]); } - unsigned newopcode = get_predsetcc_opcode(cc, cmptype); + unsigned newopcode = get_predsetcc_op(cc, cmptype); newpredset->bc.set_op(newopcode); // move the code from the 'false' branch ('else') to the 'true' branch diff --git a/src/gallium/drivers/r600/sb/sb_pass.h b/src/gallium/drivers/r600/sb/sb_pass.h index 7e606da822c..c7272ba2680 100644 --- a/src/gallium/drivers/r600/sb/sb_pass.h +++ b/src/gallium/drivers/r600/sb/sb_pass.h @@ -413,7 +413,7 @@ public: void optimize_cc_op(alu_node *a); - void optimize_SETcc_op(alu_node *a); + void optimize_cc_op2(alu_node *a); void optimize_CNDcc_op(alu_node *a); bool get_bool_op_info(value *b, bool_op_info& bop); diff --git a/src/gallium/drivers/r600/sb/sb_peephole.cpp b/src/gallium/drivers/r600/sb/sb_peephole.cpp index 6373b5c3a61..cb423c9b224 100644 --- a/src/gallium/drivers/r600/sb/sb_peephole.cpp +++ b/src/gallium/drivers/r600/sb/sb_peephole.cpp @@ -46,7 +46,7 @@ int peephole::run() { void peephole::run_on(container_node* c) { - for (node_riterator I = c->rbegin(), E = c->rend(); I != E; ++I) { + for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) { node *n = *I; if (n->is_container()) @@ -56,7 +56,8 @@ void peephole::run_on(container_node* c) { if (n->is_alu_inst()) { alu_node *a = static_cast<alu_node*>(n); - if (a->bc.op_ptr->flags & (AF_PRED | AF_SET | AF_CMOV)) { + if (a->bc.op_ptr->flags & + (AF_PRED | AF_SET | AF_CMOV | AF_KILL)) { optimize_cc_op(a); } else if (a->bc.op == ALU_OP1_FLT_TO_INT) { @@ -73,8 +74,8 @@ void peephole::run_on(container_node* c) { void peephole::optimize_cc_op(alu_node* a) { unsigned aflags = a->bc.op_ptr->flags; - if (aflags & (AF_PRED | AF_SET)) { - optimize_SETcc_op(a); + if (aflags & (AF_PRED | AF_SET | AF_KILL)) { + optimize_cc_op2(a); } else if (aflags & AF_CMOV) { optimize_CNDcc_op(a); } @@ -90,26 +91,37 @@ void peephole::convert_float_setcc(alu_node *f2i, alu_node *s) { f2i->remove(); } -void peephole::optimize_SETcc_op(alu_node* a) { +void peephole::optimize_cc_op2(alu_node* a) { unsigned flags = a->bc.op_ptr->flags; unsigned cc = flags & AF_CC_MASK; + + if ((cc != AF_CC_E && cc != AF_CC_NE) || a->pred) + return; + unsigned cmp_type = flags & AF_CMP_TYPE_MASK; unsigned dst_type = flags & AF_DST_TYPE_MASK; - bool is_pred = flags & AF_PRED; - // TODO handle other cases + int op_kind = (flags & AF_PRED) ? 1 : + (flags & AF_SET) ? 2 : + (flags & AF_KILL) ? 3 : 0; + + bool swapped = false; - if (a->src[1]->is_const() && (cc == AF_CC_E || cc == AF_CC_NE) && - a->src[1]->literal_value == literal(0) && - a->bc.src[0].neg == 0 && a->bc.src[0].abs == 0) { + if (a->src[0]->is_const() && a->src[0]->literal_value == literal(0)) { + std::swap(a->src[0],a->src[1]); + swapped = true; + } + + if (swapped || (a->src[1]->is_const() && + a->src[1]->literal_value == literal(0))) { value *s = a->src[0]; bool_op_info bop = {}; PPH_DUMP( - sblog << "optSETcc "; + sblog << "cc_op2: "; dump::dump_op(a); sblog << "\n"; ); @@ -139,8 +151,23 @@ void peephole::optimize_SETcc_op(alu_node* a) { sblog <<"\n"; ); - unsigned newop = is_pred ? get_predsetcc_opcode(cc, cmp_type) : - get_setcc_opcode(cc, cmp_type, dst_type != AF_FLOAT_DST); + unsigned newop; + + switch(op_kind) { + case 1: + newop = get_predsetcc_op(cc, cmp_type); + break; + case 2: + newop = get_setcc_op(cc, cmp_type, dst_type != AF_FLOAT_DST); + break; + case 3: + newop = get_killcc_op(cc, cmp_type); + break; + default: + newop = ALU_OP0_NOP; + assert(!"invalid op kind"); + break; + } a->bc.set_op(newop); |