From 5a68a29706002cd9f59faeb3ce18e7aed8a74201 Mon Sep 17 00:00:00 2001 From: Vadim Girlin Date: Fri, 24 May 2013 18:07:55 +0400 Subject: r600g/sb: relax register allocation for compute shaders We have to assume that all GPRs in compute shader can be indirectly addressed because LLVM backend doesn't provide any indirect array info. That's why for compute shaders GPR array is created that covers all used GPRs (0..r600_bytecode::ngpr-1), but this seriously restricts register allocation in sb. This patch checks for actual use of indirect access in the shader and if it's not used then GPR array is not created, so that regalloc is not unnecessarily restricted. Signed-off-by: Vadim Girlin --- src/gallium/drivers/r600/sb/sb_bc.h | 4 +++- src/gallium/drivers/r600/sb/sb_bc_parser.cpp | 14 +++++++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/sb/sb_bc.h b/src/gallium/drivers/r600/sb/sb_bc.h index 6e73b418d23..25255a71d32 100644 --- a/src/gallium/drivers/r600/sb/sb_bc.h +++ b/src/gallium/drivers/r600/sb/sb_bc.h @@ -789,13 +789,15 @@ class bc_parser { typedef std::stack region_stack; region_stack loop_stack; + bool gpr_reladdr; + public: bc_parser(sb_context &sctx, r600_bytecode *bc, r600_shader* pshader) : ctx(sctx), dec(), bc(bc), pshader(pshader), dw(), bc_ndw(), max_cf(), sh(), error(), slots(), cgroup(), - cf_map(), loop_stack() { } + cf_map(), loop_stack(), gpr_reladdr() { } int decode(); int prepare(); diff --git a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp index f8c39ce246b..3ea09cb73b4 100644 --- a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp +++ b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp @@ -109,7 +109,8 @@ int bc_parser::prepare() { int bc_parser::parse_decls() { if (!pshader) { - sh->add_gpr_array(0, bc->ngpr, 0x0F); + if (gpr_reladdr) + sh->add_gpr_array(0, bc->ngpr, 0x0F); return 0; } @@ -192,8 +193,12 @@ int bc_parser::decode_cf(unsigned &i, bool &eop) { if ((r = decode_fetch_clause(cf))) return r;; } else if (flags & CF_EXP) { + if (cf->bc.rw_rel) + gpr_reladdr = true; assert(!cf->bc.rw_rel); } else if (flags & (CF_STRM | CF_RAT)) { + if (cf->bc.rw_rel) + gpr_reladdr = true; assert(!cf->bc.rw_rel); } else if (flags & CF_BRANCH) { if (cf->bc.addr > max_cf) @@ -257,8 +262,13 @@ int bc_parser::decode_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt) { for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) { n = static_cast(*I); + if (n->bc.dst_rel) + gpr_reladdr = true; + for (int k = 0; k < n->bc.op_ptr->src_count; ++k) { bc_alu_src &src = n->bc.src[k]; + if (src.rel) + gpr_reladdr = true; if (src.sel == ALU_SRC_LITERAL) { literal_mask |= (1 << src.chan); src.value.u = dw[i + src.chan]; @@ -483,6 +493,8 @@ int bc_parser::decode_fetch_clause(cf_node* cf) { cf->push_back(n); if ((r = dec->decode_fetch(i, n->bc))) return r; + if (n->bc.src_rel || n->bc.dst_rel) + gpr_reladdr = true; } return 0; -- cgit v1.2.3