r600g/sb: implement r600 gpr index workaround. (v3.1)

r600, rv610 and rv630 all have a bug in their GPR indexing and how the hw inserts access to PV. If the base index for the src is the same as the dst gpr in a previous group, then it will use PV instead of using the indexed gpr correctly. The workaround is to insert a NOP when you detect this. v2: add second part of fix detecting DST rel writes followed by same src base index reads. v3: forget adding stuff to structs, just iterate over the previous node group again, makes it more obvious. v3.1: drop local_nop. Fixes ~200 piglit regressions on rv635 since SB was introduced. Reviewed-By: Glenn Kennard <[email protected]> Signed-off-by: Dave Airlie <[email protected]>
author: Dave Airlie <[email protected]> 2014-12-09 16:46:55 +1000
committer: Dave Airlie <[email protected]> 2014-12-16 12:44:45 +1000
commit: 3c8ef3a74b51d5c79bbbc3e73244085efd89206b (patch)
tree: 921bad2764da78a9da886b4caea8c97778e4b2c1
parent: de0fd375f6de8f3357d05decc4a7dc231c679645 (diff)
4 files changed, 57 insertions, 9 deletions
diff --git a/src/gallium/drivers/r600/sb/sb_bc.h b/src/gallium/drivers/r600/sb/sb_bc.h
index d03da98777d..6d3dc4da2f6 100644
--- a/src/gallium/drivers/r600/sb/sb_bc.h
+++ b/src/gallium/drivers/r600/sb/sb_bc.h
@@ -616,6 +616,8 @@ public:
 	unsigned num_slots;
 	bool uses_mova_gpr;
 
+	bool r6xx_gpr_index_workaround;
+
 	bool stack_workaround_8xx;
 	bool stack_workaround_9xx;
 
diff --git a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
index 3f362c4d787..8d0be06802c 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
@@ -38,6 +38,18 @@
 
 namespace r600_sb {
 
+void bc_finalizer::insert_rv6xx_load_ar_workaround(alu_group_node *b4) {
+
+	alu_group_node *g = sh.create_alu_group();
+	alu_node *a = sh.create_alu();
+
+	a->bc.set_op(ALU_OP0_NOP);
+	a->bc.last = 1;
+
+	g->push_back(a);
+	b4->insert_before(g);
+}
+
 int bc_finalizer::run() {
 
 	run_on(sh.root);
@@ -211,12 +223,12 @@ void bc_finalizer::finalize_if(region_node* r) {
 }
 
 void bc_finalizer::run_on(container_node* c) {
-
+	node *prev_node = NULL;
 	for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
 		node *n = *I;
 
 		if (n->is_alu_group()) {
-			finalize_alu_group(static_cast<alu_group_node*>(n));
+			finalize_alu_group(static_cast<alu_group_node*>(n), prev_node);
 		} else {
 			if (n->is_alu_clause()) {
 				cf_node *c = static_cast<cf_node*>(n);
@@ -251,17 +263,22 @@ void bc_finalizer::run_on(container_node* c) {
 			if (n->is_container())
 				run_on(static_cast<container_node*>(n));
 		}
+		prev_node = n;
 	}
 }
 
-void bc_finalizer::finalize_alu_group(alu_group_node* g) {
+void bc_finalizer::finalize_alu_group(alu_group_node* g, node *prev_node) {
 
 	alu_node *last = NULL;
+	alu_group_node *prev_g = NULL;
+	bool add_nop = false;
+	if (prev_node && prev_node->is_alu_group()) {
+		prev_g = static_cast<alu_group_node*>(prev_node);
+	}
 
 	for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) {
 		alu_node *n = static_cast<alu_node*>(*I);
 		unsigned slot = n->bc.slot;
-
 		value *d = n->dst.empty() ? NULL : n->dst[0];
 
 		if (d && d->is_special_reg()) {
@@ -299,17 +316,22 @@ void bc_finalizer::finalize_alu_group(alu_group_node* g) {
 
 		update_ngpr(n->bc.dst_gpr);
 
-		finalize_alu_src(g, n);
+		add_nop |= finalize_alu_src(g, n, prev_g);
 
 		last = n;
 	}
 
+	if (add_nop) {
+		if (sh.get_ctx().r6xx_gpr_index_workaround) {
+			insert_rv6xx_load_ar_workaround(g);
+		}
+	}
 	last->bc.last = 1;
 }
 
-void bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a) {
+bool bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a, alu_group_node *prev) {
 	vvec &sv = a->src;
-
+	bool add_nop = false;
 	FBC_DUMP(
 		sblog << "finalize_alu_src: ";
 		dump::dump_op(a);
@@ -336,6 +358,15 @@ void bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a) {
 			if (!v->rel->is_const()) {
 				src.rel = 1;
 				update_ngpr(v->array->gpr.sel() + v->array->array_size -1);
+				if (prev && !add_nop) {
+					for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) {
+						alu_node *pn = static_cast<alu_node*>(*pI);
+						if (pn->bc.dst_gpr == src.sel) {
+							add_nop = true;
+							break;
+						}
+					}
+				}
 			} else
 				src.rel = 0;
 
@@ -393,11 +424,23 @@ void bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a) {
 			assert(!"unknown value kind");
 			break;
 		}
+		if (prev && !add_nop) {
+			for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) {
+				alu_node *pn = static_cast<alu_node*>(*pI);
+				if (pn->bc.dst_rel) {
+					if (pn->bc.dst_gpr == src.sel) {
+						add_nop = true;
+						break;
+					}
+				}
+			}
+		}
 	}
 
 	while (si < 3) {
 		a->bc.src[si++].sel = 0;
 	}
+	return add_nop;
 }
 
 void bc_finalizer::copy_fetch_src(fetch_node &dst, fetch_node &src, unsigned arg_start)
diff --git a/src/gallium/drivers/r600/sb/sb_context.cpp b/src/gallium/drivers/r600/sb/sb_context.cpp
index 8e1142873ac..5dba85b8645 100644
--- a/src/gallium/drivers/r600/sb/sb_context.cpp
+++ b/src/gallium/drivers/r600/sb/sb_context.cpp
@@ -61,6 +61,8 @@ int sb_context::init(r600_isa *isa, sb_hw_chip chip, sb_hw_class cclass) {
 
 	uses_mova_gpr = is_r600() && chip != HW_CHIP_RV670;
 
+	r6xx_gpr_index_workaround = is_r600() && chip != HW_CHIP_RV670 && chip != HW_CHIP_RS780 && chip != HW_CHIP_RS880;
+
 	switch (chip) {
 	case HW_CHIP_RV610:
 	case HW_CHIP_RS780:
diff --git a/src/gallium/drivers/r600/sb/sb_pass.h b/src/gallium/drivers/r600/sb/sb_pass.h
index 812d14a9d96..0346df1b167 100644
--- a/src/gallium/drivers/r600/sb/sb_pass.h
+++ b/src/gallium/drivers/r600/sb/sb_pass.h
@@ -695,8 +695,9 @@ public:
 
 	void run_on(container_node *c);
 
-	void finalize_alu_group(alu_group_node *g);
-	void finalize_alu_src(alu_group_node *g, alu_node *a);
+	void insert_rv6xx_load_ar_workaround(alu_group_node *b4);
+	void finalize_alu_group(alu_group_node *g, node *prev_node);
+	bool finalize_alu_src(alu_group_node *g, alu_node *a, alu_group_node *prev_node);
 
 	void emit_set_grad(fetch_node* f);
 	void finalize_fetch(fetch_node *f);
author	Dave Airlie <[email protected]>	2014-12-09 16:46:55 +1000
committer	Dave Airlie <[email protected]>	2014-12-16 12:44:45 +1000
commit	3c8ef3a74b51d5c79bbbc3e73244085efd89206b (patch)
tree	921bad2764da78a9da886b4caea8c97778e4b2c1
parent	de0fd375f6de8f3357d05decc4a7dc231c679645 (diff)