summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorDave Airlie <[email protected]>2018-03-09 16:03:53 +1000
committerEmil Velikov <[email protected]>2018-03-20 16:57:25 +0000
commit2faad178ed090084f1ac676a08dcaee099bc3bd8 (patch)
treebe3d3c4f57130e74da2ca8d4229790e8f6feb497 /src
parent611a88d4a6c89e3ab4793bea4bb78da4de76d200 (diff)
r600: implement callstack workaround for evergreen.
This is ported from the sb backend, there are some issues with evergreen stacks on the boundary between entries and ALU_PUSH_BEFORE instructions. Whenever we are going to use a push before, we check the stack usage and if we have to use the workaround, then we switch to a separate push. I noticed this problem dealing with some of the soft fp64 shaders, in nosb mode, they are quite stack happy. This fixes all the glitches and inconsistencies I've seen with them Reviewed-by: Roland Scheidegger <[email protected]> Tested-by: Elie Tournier <[email protected]> Cc: <[email protected]> Signed-off-by: Dave Airlie <[email protected]> (cherry picked from commit 5d4fbc2b54cb2aaea1cbb52ec087f31009f3ac76)
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/r600/r600_shader.c39
1 files changed, 31 insertions, 8 deletions
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 71bce158a4c..ea3331ad756 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -372,7 +372,7 @@ struct r600_shader_tgsi_instruction {
static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct pipe_stream_output_info *so, int stream, bool ind);
static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
-static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason);
+static inline int callstack_push(struct r600_shader_ctx *ctx, unsigned reason);
static void fc_pushlevel(struct r600_shader_ctx *ctx, int type);
static int tgsi_else(struct r600_shader_ctx *ctx);
static int tgsi_endif(struct r600_shader_ctx *ctx);
@@ -388,6 +388,15 @@ static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src,
static int do_lds_fetch_values(struct r600_shader_ctx *ctx, unsigned temp_reg,
unsigned dst_reg, unsigned mask);
+static bool ctx_needs_stack_workaround_8xx(struct r600_shader_ctx *ctx)
+{
+ if (ctx->bc->family == CHIP_HEMLOCK ||
+ ctx->bc->family == CHIP_CYPRESS ||
+ ctx->bc->family == CHIP_JUNIPER)
+ return false;
+ return true;
+}
+
static int tgsi_last_instruction(unsigned writemask)
{
int i, lasti = 0;
@@ -9635,7 +9644,7 @@ static int pops(struct r600_shader_ctx *ctx, int pops)
return 0;
}
-static inline void callstack_update_max_depth(struct r600_shader_ctx *ctx,
+static inline int callstack_update_max_depth(struct r600_shader_ctx *ctx,
unsigned reason)
{
struct r600_stack_info *stack = &ctx->bc->stack;
@@ -9653,7 +9662,7 @@ static inline void callstack_update_max_depth(struct r600_shader_ctx *ctx,
/* pre-r8xx: if any non-WQM PUSH instruction is invoked, 2 elements on
* the stack must be reserved to hold the current active/continue
* masks */
- if (reason == FC_PUSH_VPM) {
+ if (reason == FC_PUSH_VPM || stack->push > 0) {
elements += 2;
}
break;
@@ -9679,7 +9688,7 @@ static inline void callstack_update_max_depth(struct r600_shader_ctx *ctx,
* NOTE: it seems we also need to reserve additional element in some
* other cases, e.g. when we have 4 levels of PUSH_VPM in the shader,
* then STACK_SIZE should be 2 instead of 1 */
- if (reason == FC_PUSH_VPM) {
+ if (reason == FC_PUSH_VPM || stack->push > 0) {
elements += 1;
}
break;
@@ -9698,6 +9707,7 @@ static inline void callstack_update_max_depth(struct r600_shader_ctx *ctx,
if (entries > stack->max_entries)
stack->max_entries = entries;
+ return elements;
}
static inline void callstack_pop(struct r600_shader_ctx *ctx, unsigned reason)
@@ -9721,7 +9731,7 @@ static inline void callstack_pop(struct r600_shader_ctx *ctx, unsigned reason)
}
}
-static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason)
+static inline int callstack_push(struct r600_shader_ctx *ctx, unsigned reason)
{
switch (reason) {
case FC_PUSH_VPM:
@@ -9729,6 +9739,7 @@ static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason)
break;
case FC_PUSH_WQM:
++ctx->bc->stack.push_wqm;
+ break;
case FC_LOOP:
++ctx->bc->stack.loop;
break;
@@ -9736,7 +9747,7 @@ static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason)
assert(0);
}
- callstack_update_max_depth(ctx, reason);
+ return callstack_update_max_depth(ctx, reason);
}
static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
@@ -9819,12 +9830,25 @@ static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
static int emit_if(struct r600_shader_ctx *ctx, int opcode)
{
int alu_type = CF_OP_ALU_PUSH_BEFORE;
+ bool needs_workaround = false;
+ int elems = callstack_push(ctx, FC_PUSH_VPM);
+
+ if (ctx->bc->chip_class == CAYMAN && ctx->bc->stack.loop > 1)
+ needs_workaround = true;
+
+ if (ctx->bc->chip_class == EVERGREEN && ctx_needs_stack_workaround_8xx(ctx)) {
+ unsigned dmod1 = (elems - 1) % ctx->bc->stack.entry_size;
+ unsigned dmod2 = (elems) % ctx->bc->stack.entry_size;
+
+ if (elems && (!dmod1 || !dmod2))
+ needs_workaround = true;
+ }
/* There is a hardware bug on Cayman where a BREAK/CONTINUE followed by
* LOOP_STARTxxx for nested loops may put the branch stack into a state
* such that ALU_PUSH_BEFORE doesn't work as expected. Workaround this
* by replacing the ALU_PUSH_BEFORE with a PUSH + ALU */
- if (ctx->bc->chip_class == CAYMAN && ctx->bc->stack.loop > 1) {
+ if (needs_workaround) {
r600_bytecode_add_cfinst(ctx->bc, CF_OP_PUSH);
ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
alu_type = CF_OP_ALU;
@@ -9836,7 +9860,6 @@ static int emit_if(struct r600_shader_ctx *ctx, int opcode)
fc_pushlevel(ctx, FC_IF);
- callstack_push(ctx, FC_PUSH_VPM);
return 0;
}