summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
authorVadim Girlin <[email protected]>2013-05-27 01:37:54 +0400
committerVadim Girlin <[email protected]>2013-05-27 01:45:07 +0400
commit63d09a0cb74f75e7e09773d560a22452c107647f (patch)
treecf6cf51804696ee17172ee8fcaa58b645dce7770 /src/gallium
parent880f435a7e1a658b7fb81cc01f47bf5af30a655a (diff)
r600g/sb: improve handling of KILL instructions
This patch improves handling of unconditional KILL instructions inside the conditional blocks, uncovering more opportunities for if-conversion. Signed-off-by: Vadim Girlin <[email protected]>
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/r600/sb/sb_expr.h7
-rw-r--r--src/gallium/drivers/r600/sb/sb_if_conversion.cpp214
-rw-r--r--src/gallium/drivers/r600/sb/sb_pass.h7
3 files changed, 139 insertions, 89 deletions
diff --git a/src/gallium/drivers/r600/sb/sb_expr.h b/src/gallium/drivers/r600/sb/sb_expr.h
index c7f7dbfe447..032867be559 100644
--- a/src/gallium/drivers/r600/sb/sb_expr.h
+++ b/src/gallium/drivers/r600/sb/sb_expr.h
@@ -72,12 +72,13 @@ public:
bool fold_alu_op2(alu_node &n);
bool fold_alu_op3(alu_node &n);
- void apply_alu_src_mod(const bc_alu &bc, unsigned src, literal &v);
- void apply_alu_dst_mod(const bc_alu &bc, literal &v);
+ static void apply_alu_src_mod(const bc_alu &bc, unsigned src, literal &v);
+ static void apply_alu_dst_mod(const bc_alu &bc, literal &v);
void assign_source(value *dst, value *src);
- bool evaluate_condition(unsigned alu_cnd_flags, literal s1, literal s2);
+ static bool evaluate_condition(unsigned alu_cnd_flags, literal s1,
+ literal s2);
};
} // namespace r600_sb
diff --git a/src/gallium/drivers/r600/sb/sb_if_conversion.cpp b/src/gallium/drivers/r600/sb/sb_if_conversion.cpp
index 6411d06bdf6..96dd1ee31fe 100644
--- a/src/gallium/drivers/r600/sb/sb_if_conversion.cpp
+++ b/src/gallium/drivers/r600/sb/sb_if_conversion.cpp
@@ -56,90 +56,76 @@ int if_conversion::run() {
return 0;
}
-unsigned if_conversion::try_convert_kills(region_node* r) {
+void if_conversion::convert_kill_instructions(region_node *r,
+ value *em, bool branch,
+ container_node *c) {
+ value *cnd = NULL;
- // handling the simplest (and probably most frequent) case only -
- // if - 4 kills - endif
+ for (node_iterator I = c->begin(), E = c->end(), N; I != E; I = N) {
+ N = I + 1;
- // TODO handle more complex cases
+ if (!I->is_alu_inst())
+ continue;
- depart_node *d1 = static_cast<depart_node*>(r->front());
- if (!d1->is_depart())
- return 0;
+ alu_node *a = static_cast<alu_node*>(*I);
+ unsigned flags = a->bc.op_ptr->flags;
- if_node *f = static_cast<if_node*>(d1->front());
- if (!f->is_if())
- return 0;
+ if (!(flags & AF_KILL))
+ continue;
- depart_node *d2 = static_cast<depart_node*>(f->front());
- if (!d2->is_depart())
- return 0;
+ // ignore predicated or non-const kill instructions
+ if (a->pred || !a->src[0]->is_const() || !a->src[1]->is_const())
+ continue;
- unsigned cnt = 0;
+ literal l0 = a->src[0]->literal_value;
+ literal l1 = a->src[1]->literal_value;
- for (node_iterator I = d2->begin(), E = d2->end(); I != E; ++I) {
- alu_node *n = static_cast<alu_node*>(*I);
- if (!n->is_alu_inst())
- return 0;
+ expr_handler::apply_alu_src_mod(a->bc, 0, l0);
+ expr_handler::apply_alu_src_mod(a->bc, 1, l1);
- if (!(n->bc.op_ptr->flags & AF_KILL))
- return 0;
+ if (expr_handler::evaluate_condition(flags, l0, l1)) {
+ // kill with constant 'true' condition, we'll convert it to the
+ // conditional kill outside of the if-then-else block
- if (n->bc.op_ptr->src_count != 2 || n->src.size() != 2)
- return 0;
+ a->remove();
- value *s1 = n->src[0], *s2 = n->src[1];
+ if (!cnd) {
+ cnd = get_select_value_for_em(sh, em);
+ } else {
+ // more than one kill with the same condition, just remove it
+ continue;
+ }
- // assuming that the KILL with constant operands is "always kill"
+ r->insert_before(a);
+ a->bc.set_op(branch ? ALU_OP2_KILLE_INT : ALU_OP2_KILLNE_INT);
- if (!s1 || !s2 || !s1->is_const() || !s2->is_const())
- return 0;
-
- ++cnt;
- }
-
- if (cnt > 4)
- return 0;
-
- value *cond = f->cond;
- value *pred = get_select_value_for_em(sh, cond);
-
- if (!pred)
- return 0;
-
- for (node_iterator N, I = d2->begin(), E = d2->end(); I != E; I = N) {
- N = I; ++N;
-
- alu_node *n = static_cast<alu_node*>(*I);
-
- IFC_DUMP(
- sblog << "converting ";
- dump::dump_op(n);
- sblog << " " << n << "\n";
- );
-
- n->remove();
-
- n->bc.set_op(ALU_OP2_KILLE_INT);
- n->src[0] = pred;
- n->src[1] = sh.get_const_value(0);
- // reset src modifiers
- memset(&n->bc.src[0], 0, sizeof(bc_alu_src));
- memset(&n->bc.src[1], 0, sizeof(bc_alu_src));
-
- r->insert_before(n);
+ a->src[0] = cnd;
+ a->src[1] = sh.get_const_value(0);
+ // clear modifiers
+ memset(&a->bc.src[0], 0, sizeof(bc_alu_src));
+ memset(&a->bc.src[1], 0, sizeof(bc_alu_src));
+ } else {
+ // kill with constant 'false' condition, this shouldn't happen
+ // but remove it anyway
+ a->remove();
+ }
}
-
- return cnt;
}
+bool if_conversion::check_and_convert(region_node *r) {
-
-bool if_conversion::run_on(region_node* r) {
-
- if (r->dep_count() != 2 || r->rep_count() != 1)
+ depart_node *nd1 = static_cast<depart_node*>(r->first);
+ if (!nd1->is_depart())
+ return false;
+ if_node *nif = static_cast<if_node*>(nd1->first);
+ if (!nif->is_if())
+ return false;
+ depart_node *nd2 = static_cast<depart_node*>(nif->first);
+ if (!nd2->is_depart())
return false;
+ value* &em = nif->cond;
+
node_stats s;
r->collect_stats(s);
@@ -149,7 +135,7 @@ bool if_conversion::run_on(region_node* r) {
s.dump();
);
- if (s.region_count || s.fetch_count ||
+ if (s.region_count || s.fetch_count || s.alu_kill_count ||
s.if_count != 1 || s.repeat_count)
return false;
@@ -189,25 +175,8 @@ bool if_conversion::run_on(region_node* r) {
if (real_alu_count > 400)
return false;
- if (s.alu_kill_count) {
- unsigned kcnt = try_convert_kills(r);
- if (kcnt < s.alu_kill_count)
- return false;
- }
-
IFC_DUMP( sblog << "if_cvt: processing...\n"; );
- depart_node *nd1 = static_cast<depart_node*>(r->first);
- if (!nd1->is_depart())
- return false;
- if_node *nif = static_cast<if_node*>(nd1->first);
- if (!nif->is_if())
- return false;
- depart_node *nd2 = static_cast<depart_node*>(nif->first);
- if (!nd2->is_depart())
- return false;
-
- value *em = nif->cond;
value *select = get_select_value_for_em(sh, em);
if (!select)
@@ -230,6 +199,83 @@ bool if_conversion::run_on(region_node* r) {
return true;
}
+bool if_conversion::run_on(region_node* r) {
+
+ if (r->dep_count() != 2 || r->rep_count() != 1)
+ return false;
+
+ depart_node *nd1 = static_cast<depart_node*>(r->first);
+ if (!nd1->is_depart())
+ return false;
+ if_node *nif = static_cast<if_node*>(nd1->first);
+ if (!nif->is_if())
+ return false;
+ depart_node *nd2 = static_cast<depart_node*>(nif->first);
+ if (!nd2->is_depart())
+ return false;
+
+ value* &em = nif->cond;
+
+ convert_kill_instructions(r, em, true, nd2);
+ convert_kill_instructions(r, em, false, nd1);
+
+ if (check_and_convert(r))
+ return true;
+
+ if (nd2->empty() && nif->next) {
+ // empty true branch, non-empty false branch
+ // we'll invert it to get rid of 'else'
+
+ assert(em && em->def);
+
+ alu_node *predset = static_cast<alu_node*>(em->def);
+
+ // create clone of PREDSET instruction with inverted condition.
+ // PREDSET has 3 dst operands in our IR (value written to gpr,
+ // predicate value and exec mask value), we'll split it such that
+ // new PREDSET will define exec mask value only, and two others will
+ // be defined in the old PREDSET (if they are not used then DCE will
+ // simply remove old PREDSET).
+
+ alu_node *newpredset = sh.clone(predset);
+ predset->insert_after(newpredset);
+
+ predset->dst[2] = NULL;
+
+ newpredset->dst[0] = NULL;
+ newpredset->dst[1] = NULL;
+
+ em->def = newpredset;
+
+ unsigned cc = newpredset->bc.op_ptr->flags & AF_CC_MASK;
+ unsigned cmptype = newpredset->bc.op_ptr->flags & AF_CMP_TYPE_MASK;
+ bool swapargs = false;
+
+ cc = invert_setcc_condition(cc, swapargs);
+
+ if (swapargs) {
+ std::swap(newpredset->src[0], newpredset->src[1]);
+ std::swap(newpredset->bc.src[0], newpredset->bc.src[1]);
+ }
+
+ unsigned newopcode = get_predsetcc_opcode(cc, cmptype);
+ newpredset->bc.set_op(newopcode);
+
+ // move the code from the 'false' branch ('else') to the 'true' branch
+ nd2->move(nif->next, NULL);
+
+ // swap phi operands
+ for (node_iterator I = r->phi->begin(), E = r->phi->end(); I != E;
+ ++I) {
+ node *p = *I;
+ assert(p->src.size() == 2);
+ std::swap(p->src[0], p->src[1]);
+ }
+ }
+
+ return false;
+}
+
alu_node* if_conversion::convert_phi(value* select, node* phi) {
assert(phi->dst.size() == 1 || phi->src.size() == 2);
diff --git a/src/gallium/drivers/r600/sb/sb_pass.h b/src/gallium/drivers/r600/sb/sb_pass.h
index a6338ae0176..7e606da822c 100644
--- a/src/gallium/drivers/r600/sb/sb_pass.h
+++ b/src/gallium/drivers/r600/sb/sb_pass.h
@@ -338,9 +338,12 @@ public:
bool run_on(region_node *r);
- alu_node* convert_phi(value *select, node *phi);
+ void convert_kill_instructions(region_node *r, value *em, bool branch,
+ container_node *c);
+
+ bool check_and_convert(region_node *r);
- unsigned try_convert_kills(region_node* r);
+ alu_node* convert_phi(value *select, node *phi);
};