diff options
author | Vadim Girlin <[email protected]> | 2013-12-11 04:08:32 +0400 |
---|---|---|
committer | Vadim Girlin <[email protected]> | 2013-12-11 04:08:32 +0400 |
commit | 00faf82832f3e6ef886abad246b50cc47b901c1f (patch) | |
tree | ae4d72e04fd6cc1196a0a8ed96d8b51f830bc28e /src/gallium/drivers/r600/sb | |
parent | 7a50d38a2bcbecc70e0dd3b49ca717e56c438f21 (diff) |
r600g/sb: fix stack size computation on evergreen
On evergreen we have to reserve 1 stack element in some additional cases
besides the ones mentioned in the docs, but stack size computation was
recently reimplemented exactly as described in the docs by the patch that
added workarounds for stack issues on EG/CM, resulting in regressions
with some apps (Serious Sam 3).
This patch fixes it by restoring previous behavior.
Fixes https://bugs.freedesktop.org/show_bug.cgi?id=72369
Signed-off-by: Vadim Girlin <[email protected]>
Cc: "10.0" <[email protected]>
Tested-by: Andre Heider <[email protected]>
Diffstat (limited to 'src/gallium/drivers/r600/sb')
-rw-r--r-- | src/gallium/drivers/r600/sb/sb_bc_finalize.cpp | 16 |
1 files changed, 12 insertions, 4 deletions
diff --git a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp index bc71cf873da..355eb63810c 100644 --- a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp +++ b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp @@ -770,7 +770,6 @@ void bc_finalizer::update_ngpr(unsigned gpr) { unsigned bc_finalizer::get_stack_depth(node *n, unsigned &loops, unsigned &ifs, unsigned add) { unsigned stack_elements = add; - bool has_non_wqm_push_with_loops_on_stack = false; bool has_non_wqm_push = (add != 0); region_node *r = n->is_region() ? static_cast<region_node*>(n) : n->get_parent_region(); @@ -781,8 +780,6 @@ unsigned bc_finalizer::get_stack_depth(node *n, unsigned &loops, while (r) { if (r->is_loop()) { ++loops; - if (has_non_wqm_push) - has_non_wqm_push_with_loops_on_stack = true; } else { ++ifs; has_non_wqm_push = true; @@ -795,15 +792,26 @@ unsigned bc_finalizer::get_stack_depth(node *n, unsigned &loops, switch (ctx.hw_class) { case HW_CLASS_R600: case HW_CLASS_R700: + // If any non-WQM push is invoked, 2 elements should be reserved. if (has_non_wqm_push) stack_elements += 2; break; case HW_CLASS_CAYMAN: + // If any stack operation is invoked, 2 elements should be reserved if (stack_elements) stack_elements += 2; break; case HW_CLASS_EVERGREEN: - if (has_non_wqm_push_with_loops_on_stack) + // According to the docs we need to reserve 1 element for each of the + // following cases: + // 1) non-WQM push is used with WQM/LOOP frames on stack + // 2) ALU_ELSE_AFTER is used at the point of max stack usage + // NOTE: + // It was found that the conditions above are not sufficient, there are + // other cases where we also need to reserve stack space, that's why + // we always reserve 1 stack element if we have non-WQM push on stack. + // Condition 2 is ignored for now because we don't use this instruction. + if (has_non_wqm_push) ++stack_elements; break; } |