summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/freedreno/ir3/ir3_compiler_nir.c24
-rw-r--r--src/freedreno/ir3/ir3_context.h5
-rw-r--r--src/freedreno/ir3/ir3_shader.h4
-rw-r--r--src/gallium/drivers/freedreno/a5xx/fd5_program.c4
-rw-r--r--src/gallium/drivers/freedreno/a6xx/fd6_program.c4
5 files changed, 37 insertions, 4 deletions
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
index 6b33c1f8981..f8155747c52 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -2341,6 +2341,20 @@ emit_loop(struct ir3_context *ctx, nir_loop *nloop)
}
static void
+stack_push(struct ir3_context *ctx)
+{
+ ctx->stack++;
+ ctx->max_stack = MAX2(ctx->max_stack, ctx->stack);
+}
+
+static void
+stack_pop(struct ir3_context *ctx)
+{
+ compile_assert(ctx, ctx->stack > 0);
+ ctx->stack--;
+}
+
+static void
emit_cf_list(struct ir3_context *ctx, struct exec_list *list)
{
foreach_list_typed(nir_cf_node, node, node, list) {
@@ -2349,10 +2363,14 @@ emit_cf_list(struct ir3_context *ctx, struct exec_list *list)
emit_block(ctx, nir_cf_node_as_block(node));
break;
case nir_cf_node_if:
+ stack_push(ctx);
emit_if(ctx, nir_cf_node_as_if(node));
+ stack_pop(ctx);
break;
case nir_cf_node_loop:
+ stack_push(ctx);
emit_loop(ctx, nir_cf_node_as_loop(node));
+ stack_pop(ctx);
break;
case nir_cf_node_function:
ir3_context_error(ctx, "TODO\n");
@@ -2479,9 +2497,13 @@ emit_function(struct ir3_context *ctx, nir_function_impl *impl)
{
nir_metadata_require(impl, nir_metadata_block_index);
+ compile_assert(ctx, ctx->stack == 0);
+
emit_cf_list(ctx, &impl->body);
emit_block(ctx, impl->end_block);
+ compile_assert(ctx, ctx->stack == 0);
+
/* at this point, we should have a single empty block,
* into which we emit the 'end' instruction.
*/
@@ -3079,6 +3101,8 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
ir3_print(ir);
}
+ so->branchstack = ctx->max_stack;
+
/* Note that actual_in counts inputs that are not bary.f'd for FS: */
if (so->type == MESA_SHADER_VERTEX)
so->total_in = actual_in;
diff --git a/src/freedreno/ir3/ir3_context.h b/src/freedreno/ir3/ir3_context.h
index 63c5d8baaf9..99f43cb5ab6 100644
--- a/src/freedreno/ir3/ir3_context.h
+++ b/src/freedreno/ir3/ir3_context.h
@@ -86,6 +86,11 @@ struct ir3_context {
unsigned num_arrays;
+ /* Tracking for max level of flowcontrol (branchstack) needed
+ * by a5xx+:
+ */
+ unsigned stack, max_stack;
+
/* a common pattern for indirect addressing is to request the
* same address register multiple times. To avoid generating
* duplicate instruction sequences (which our backend does not
diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h
index bc47160d6ea..418c77ae8b0 100644
--- a/src/freedreno/ir3/ir3_shader.h
+++ b/src/freedreno/ir3/ir3_shader.h
@@ -295,6 +295,10 @@ struct ir3_shader_variant {
struct ir3_info info;
struct ir3 *ir;
+ /* Levels of nesting of flow control:
+ */
+ unsigned branchstack;
+
/* the instructions length is in units of instruction groups
* (4 instructions for a3xx, 16 instructions for a4xx.. each
* instruction is 2 dwords):
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_program.c b/src/gallium/drivers/freedreno/a5xx/fd5_program.c
index 97a84b01c0a..9c54244457f 100644
--- a/src/gallium/drivers/freedreno/a5xx/fd5_program.c
+++ b/src/gallium/drivers/freedreno/a5xx/fd5_program.c
@@ -443,7 +443,7 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_RING(ring, A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(s[VS].i->max_half_reg + 1) |
A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) |
0x6 | /* XXX seems to be always set? */
- A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(0x3) | // XXX need to figure this out somehow..
+ A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(s[VS].v->branchstack) |
COND(s[VS].v->num_samp > 0, A5XX_SP_VS_CTRL_REG0_PIXLODENABLE));
struct ir3_shader_linkage l = {0};
@@ -567,7 +567,7 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
A5XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) |
A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) |
A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) |
- A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(0x3) | // XXX need to figure this out somehow..
+ A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(s[FS].v->branchstack) |
COND(s[FS].v->num_samp > 0, A5XX_SP_FS_CTRL_REG0_PIXLODENABLE));
OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1);
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c
index 71dadef97e2..add2d28b866 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c
@@ -402,7 +402,7 @@ setup_stateobj(struct fd_ringbuffer *ring,
OUT_RING(ring, A6XX_SP_VS_CTRL_REG0_THREADSIZE(fssz) |
A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) |
A6XX_SP_VS_CTRL_REG0_MERGEDREGS |
- A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(0x3) | // XXX need to figure this out somehow..
+ A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(s[VS].v->branchstack) |
COND(s[VS].v->num_samp > 0, A6XX_SP_VS_CTRL_REG0_PIXLODENABLE));
struct ir3_shader_linkage l = {0};
@@ -524,7 +524,7 @@ setup_stateobj(struct fd_ringbuffer *ring,
0x1000000 |
A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) |
A6XX_SP_FS_CTRL_REG0_MERGEDREGS |
- A6XX_SP_FS_CTRL_REG0_BRANCHSTACK(0x3) | // XXX need to figure this out somehow..
+ A6XX_SP_FS_CTRL_REG0_BRANCHSTACK(s[FS].v->branchstack) |
COND(s[FS].v->num_samp > 0, A6XX_SP_FS_CTRL_REG0_PIXLODENABLE));
OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_A982, 1);