summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorVadim Girlin <[email protected]>2013-05-27 15:29:56 +0400
committerVadim Girlin <[email protected]>2013-05-27 15:29:56 +0400
commit88e700329b0aea451def26b271b1b70bd964894c (patch)
tree44d7a5ebc729ccb8b129703b58c9f4f660291258 /src
parent725671a83a67cc8cf16c0913f6e1835fb272c2fb (diff)
r600g/sb: optimize CNDcc instructions
Signed-off-by: Vadim Girlin <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/r600/sb/sb_expr.cpp24
-rw-r--r--src/gallium/drivers/r600/sb/sb_expr.h1
-rw-r--r--src/gallium/drivers/r600/sb/sb_peephole.cpp89
3 files changed, 113 insertions, 1 deletions
diff --git a/src/gallium/drivers/r600/sb/sb_expr.cpp b/src/gallium/drivers/r600/sb/sb_expr.cpp
index 65a764153a7..b85302d713e 100644
--- a/src/gallium/drivers/r600/sb/sb_expr.cpp
+++ b/src/gallium/drivers/r600/sb/sb_expr.cpp
@@ -698,6 +698,30 @@ unsigned get_killcc_op(unsigned cc, unsigned cmp_type) {
return ~0u;
}
+unsigned get_cndcc_op(unsigned cc, unsigned cmp_type) {
+
+ switch(cmp_type) {
+ case AF_FLOAT_CMP: {
+ switch (cc) {
+ case AF_CC_E: return ALU_OP3_CNDE;
+ case AF_CC_GT: return ALU_OP3_CNDGT;
+ case AF_CC_GE: return ALU_OP3_CNDGE;
+ }
+ break;
+ }
+ case AF_INT_CMP: {
+ switch (cc) {
+ case AF_CC_E: return ALU_OP3_CNDE_INT;
+ case AF_CC_GT: return ALU_OP3_CNDGT_INT;
+ case AF_CC_GE: return ALU_OP3_CNDGE_INT;
+ }
+ break;
+ }
+ }
+
+ assert(!"unexpected cc&cmp_type combination");
+ return ~0u;
+}
void convert_predset_to_set(shader& sh, alu_node* a) {
diff --git a/src/gallium/drivers/r600/sb/sb_expr.h b/src/gallium/drivers/r600/sb/sb_expr.h
index 89177141a76..1ee48a00597 100644
--- a/src/gallium/drivers/r600/sb/sb_expr.h
+++ b/src/gallium/drivers/r600/sb/sb_expr.h
@@ -40,6 +40,7 @@ unsigned invert_setcc_condition(unsigned cc, bool &swap_args);
unsigned get_setcc_op(unsigned cc, unsigned cmp_type, bool int_dst);
unsigned get_predsetcc_op(unsigned cc, unsigned cmp_type);
unsigned get_killcc_op(unsigned cc, unsigned cmp_type);
+unsigned get_cndcc_op(unsigned cc, unsigned cmp_type);
class expr_handler {
diff --git a/src/gallium/drivers/r600/sb/sb_peephole.cpp b/src/gallium/drivers/r600/sb/sb_peephole.cpp
index cb423c9b224..d4b97557d4e 100644
--- a/src/gallium/drivers/r600/sb/sb_peephole.cpp
+++ b/src/gallium/drivers/r600/sb/sb_peephole.cpp
@@ -111,6 +111,9 @@ void peephole::optimize_cc_op2(alu_node* a) {
if (a->src[0]->is_const() && a->src[0]->literal_value == literal(0)) {
std::swap(a->src[0],a->src[1]);
swapped = true;
+ // clear modifiers
+ memset(&a->bc.src[0], 0, sizeof(bc_alu_src));
+ memset(&a->bc.src[1], 0, sizeof(bc_alu_src));
}
if (swapped || (a->src[1]->is_const() &&
@@ -187,8 +190,92 @@ void peephole::optimize_cc_op2(alu_node* a) {
}
void peephole::optimize_CNDcc_op(alu_node* a) {
+ unsigned flags = a->bc.op_ptr->flags;
+ unsigned cc = flags & AF_CC_MASK;
+ unsigned cmp_type = flags & AF_CMP_TYPE_MASK;
+ bool swap = false;
+
+ if (cc == AF_CC_E) {
+ swap = !swap;
+ cc = AF_CC_NE;
+ } else if (cc != AF_CC_NE)
+ return;
+
+ value *s = a->src[0];
+
+ bool_op_info bop = {};
+
+ PPH_DUMP(
+ sblog << "cndcc: ";
+ dump::dump_op(a);
+ sblog << "\n";
+ );
+
+ if (!get_bool_op_info(s, bop))
+ return;
+
+ alu_node *d = bop.n;
+
+ if (d->bc.omod)
+ return;
+
+ PPH_DUMP(
+ sblog << "cndcc def: ";
+ dump::dump_op(d);
+ sblog << "\n";
+ );
+
+
+ unsigned dflags = d->bc.op_ptr->flags;
+ unsigned dcc = dflags & AF_CC_MASK;
+ unsigned dcmp_type = dflags & AF_CMP_TYPE_MASK;
+ unsigned ddst_type = dflags & AF_DST_TYPE_MASK;
+ int nds;
+
+ // TODO we can handle some of these cases,
+ // though probably this shouldn't happen
+ if (cmp_type != AF_FLOAT_CMP && ddst_type == AF_FLOAT_DST)
+ return;
+
+ if (d->src[0]->is_const() && d->src[0]->literal_value == literal(0))
+ nds = 1;
+ else if ((d->src[1]->is_const() &&
+ d->src[1]->literal_value == literal(0)))
+ nds = 0;
+ else
+ return;
+
+ // can't propagate ABS modifier to CNDcc because it's OP3
+ if (d->bc.src[nds].abs)
+ return;
+
+ // TODO we can handle some cases for uint comparison
+ if (dcmp_type == AF_UINT_CMP)
+ return;
+
+ if (dcc == AF_CC_NE) {
+ dcc = AF_CC_E;
+ swap = !swap;
+ }
+
+ if (nds == 1) {
+ switch (dcc) {
+ case AF_CC_GT: dcc = AF_CC_GE; swap = !swap; break;
+ case AF_CC_GE: dcc = AF_CC_GT; swap = !swap; break;
+ default: break;
+ }
+ }
+
+ a->src[0] = d->src[nds];
+ a->bc.src[0] = d->bc.src[nds];
+
+ if (swap) {
+ std::swap(a->src[1], a->src[2]);
+ std::swap(a->bc.src[1], a->bc.src[2]);
+ }
+
+ a->bc.set_op(get_cndcc_op(dcc, dcmp_type));
- //TODO
}
bool peephole::get_bool_flt_to_int_source(alu_node* &a) {