diff options
-rw-r--r-- | src/gallium/drivers/r600/r600_shader.c | 39 |
1 files changed, 31 insertions, 8 deletions
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 4b44f661419..6b5c42f86d9 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -377,7 +377,7 @@ struct r600_shader_tgsi_instruction { static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct pipe_stream_output_info *so, int stream, bool ind); static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[]; static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); -static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason); +static inline int callstack_push(struct r600_shader_ctx *ctx, unsigned reason); static void fc_pushlevel(struct r600_shader_ctx *ctx, int type); static int tgsi_else(struct r600_shader_ctx *ctx); static int tgsi_endif(struct r600_shader_ctx *ctx); @@ -393,6 +393,15 @@ static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src, static int do_lds_fetch_values(struct r600_shader_ctx *ctx, unsigned temp_reg, unsigned dst_reg, unsigned mask); +static bool ctx_needs_stack_workaround_8xx(struct r600_shader_ctx *ctx) +{ + if (ctx->bc->family == CHIP_HEMLOCK || + ctx->bc->family == CHIP_CYPRESS || + ctx->bc->family == CHIP_JUNIPER) + return false; + return true; +} + static int tgsi_last_instruction(unsigned writemask) { int i, lasti = 0; @@ -10168,7 +10177,7 @@ static int pops(struct r600_shader_ctx *ctx, int pops) return 0; } -static inline void callstack_update_max_depth(struct r600_shader_ctx *ctx, +static inline int callstack_update_max_depth(struct r600_shader_ctx *ctx, unsigned reason) { struct r600_stack_info *stack = &ctx->bc->stack; @@ -10186,7 +10195,7 @@ static inline void callstack_update_max_depth(struct r600_shader_ctx *ctx, /* pre-r8xx: if any non-WQM PUSH instruction is invoked, 2 elements on * the stack must be reserved to hold the current active/continue * masks */ - if (reason == FC_PUSH_VPM) { + if (reason == FC_PUSH_VPM || stack->push > 0) { elements += 2; } break; @@ -10212,7 +10221,7 @@ static inline void callstack_update_max_depth(struct r600_shader_ctx *ctx, * NOTE: it seems we also need to reserve additional element in some * other cases, e.g. when we have 4 levels of PUSH_VPM in the shader, * then STACK_SIZE should be 2 instead of 1 */ - if (reason == FC_PUSH_VPM) { + if (reason == FC_PUSH_VPM || stack->push > 0) { elements += 1; } break; @@ -10231,6 +10240,7 @@ static inline void callstack_update_max_depth(struct r600_shader_ctx *ctx, if (entries > stack->max_entries) stack->max_entries = entries; + return elements; } static inline void callstack_pop(struct r600_shader_ctx *ctx, unsigned reason) @@ -10254,7 +10264,7 @@ static inline void callstack_pop(struct r600_shader_ctx *ctx, unsigned reason) } } -static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason) +static inline int callstack_push(struct r600_shader_ctx *ctx, unsigned reason) { switch (reason) { case FC_PUSH_VPM: @@ -10262,6 +10272,7 @@ static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason) break; case FC_PUSH_WQM: ++ctx->bc->stack.push_wqm; + break; case FC_LOOP: ++ctx->bc->stack.loop; break; @@ -10269,7 +10280,7 @@ static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason) assert(0); } - callstack_update_max_depth(ctx, reason); + return callstack_update_max_depth(ctx, reason); } static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) @@ -10353,12 +10364,25 @@ static int emit_if(struct r600_shader_ctx *ctx, int opcode, struct r600_bytecode_alu_src *src) { int alu_type = CF_OP_ALU_PUSH_BEFORE; + bool needs_workaround = false; + int elems = callstack_push(ctx, FC_PUSH_VPM); + + if (ctx->bc->chip_class == CAYMAN && ctx->bc->stack.loop > 1) + needs_workaround = true; + + if (ctx->bc->chip_class == EVERGREEN && ctx_needs_stack_workaround_8xx(ctx)) { + unsigned dmod1 = (elems - 1) % ctx->bc->stack.entry_size; + unsigned dmod2 = (elems) % ctx->bc->stack.entry_size; + + if (elems && (!dmod1 || !dmod2)) + needs_workaround = true; + } /* There is a hardware bug on Cayman where a BREAK/CONTINUE followed by * LOOP_STARTxxx for nested loops may put the branch stack into a state * such that ALU_PUSH_BEFORE doesn't work as expected. Workaround this * by replacing the ALU_PUSH_BEFORE with a PUSH + ALU */ - if (ctx->bc->chip_class == CAYMAN && ctx->bc->stack.loop > 1) { + if (needs_workaround) { r600_bytecode_add_cfinst(ctx->bc, CF_OP_PUSH); ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; alu_type = CF_OP_ALU; @@ -10370,7 +10394,6 @@ static int emit_if(struct r600_shader_ctx *ctx, int opcode, fc_pushlevel(ctx, FC_IF); - callstack_push(ctx, FC_PUSH_VPM); return 0; } |