summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/r600/r600_asm.c43
-rw-r--r--src/gallium/drivers/r600/r600_asm.h24
-rw-r--r--src/gallium/drivers/r600/r600_shader.c131
3 files changed, 142 insertions, 56 deletions
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 65c705d0aa8..c88b48dc96b 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -87,6 +87,40 @@ static struct r600_bytecode_tex *r600_bytecode_tex(void)
return tex;
}
+static unsigned stack_entry_size(enum radeon_family chip) {
+ /* Wavefront size:
+ * 64: R600/RV670/RV770/Cypress/R740/Barts/Turks/Caicos/
+ * Aruba/Sumo/Sumo2/redwood/juniper
+ * 32: R630/R730/R710/Palm/Cedar
+ * 16: R610/Rs780
+ *
+ * Stack row size:
+ * Wavefront Size 16 32 48 64
+ * Columns per Row (R6xx/R7xx/R8xx only) 8 8 4 4
+ * Columns per Row (R9xx+) 8 4 4 4 */
+
+ switch (chip) {
+ /* FIXME: are some chips missing here? */
+ /* wavefront size 16 */
+ case CHIP_RV610:
+ case CHIP_RS780:
+ case CHIP_RV620:
+ case CHIP_RS880:
+ /* wavefront size 32 */
+ case CHIP_RV630:
+ case CHIP_RV635:
+ case CHIP_RV730:
+ case CHIP_RV710:
+ case CHIP_PALM:
+ case CHIP_CEDAR:
+ return 8;
+
+ /* wavefront size 64 */
+ default:
+ return 4;
+ }
+}
+
void r600_bytecode_init(struct r600_bytecode *bc,
enum chip_class chip_class,
enum radeon_family family,
@@ -104,6 +138,7 @@ void r600_bytecode_init(struct r600_bytecode *bc,
LIST_INITHEAD(&bc->cf);
bc->chip_class = chip_class;
bc->msaa_texture_mode = msaa_texture_mode;
+ bc->stack.entry_size = stack_entry_size(family);
}
int r600_bytecode_add_cf(struct r600_bytecode *bc)
@@ -1522,8 +1557,8 @@ int r600_bytecode_build(struct r600_bytecode *bc)
unsigned addr;
int i, r;
- if (bc->callstack[0].max > 0)
- bc->nstack = ((bc->callstack[0].max + 3) >> 2) + 2;
+ bc->nstack = bc->stack.max_entries;
+
if (bc->type == TGSI_PROCESSOR_VERTEX && !bc->nstack) {
bc->nstack = 1;
}
@@ -1824,8 +1859,8 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
chip = '6';
break;
}
- fprintf(stderr, "bytecode %d dw -- %d gprs ---------------------\n",
- bc->ndw, bc->ngpr);
+ fprintf(stderr, "bytecode %d dw -- %d gprs -- %d nstack -------------\n",
+ bc->ndw, bc->ngpr, bc->nstack);
fprintf(stderr, "shader %d -- %c\n", index++, chip);
LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index c1aa3bae4e3..c052ceabfc7 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -173,16 +173,25 @@ struct r600_cf_stack_entry {
};
#define SQ_MAX_CALL_DEPTH 0x00000020
-struct r600_cf_callstack {
- unsigned fc_sp_before_entry;
- int sub_desc_index;
- int current;
- int max;
-};
#define AR_HANDLE_NORMAL 0
#define AR_HANDLE_RV6XX 1 /* except RV670 */
+struct r600_stack_info {
+ /* current level of non-WQM PUSH operations
+ * (PUSH, PUSH_ELSE, ALU_PUSH_BEFORE) */
+ int push;
+ /* current level of WQM PUSH operations
+ * (PUSH, PUSH_ELSE, PUSH_WQM) */
+ int push_wqm;
+ /* current loop level */
+ int loop;
+
+ /* required depth */
+ int max_entries;
+ /* subentries per entry */
+ int entry_size;
+};
struct r600_bytecode {
enum chip_class chip_class;
@@ -199,8 +208,7 @@ struct r600_bytecode {
uint32_t *bytecode;
uint32_t fc_sp;
struct r600_cf_stack_entry fc_stack[32];
- unsigned call_sp;
- struct r600_cf_callstack callstack[SQ_MAX_CALL_DEPTH];
+ struct r600_stack_info stack;
unsigned ar_loaded;
unsigned ar_reg;
unsigned ar_chan;
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index e74ed54443d..82885d1370e 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -245,7 +245,7 @@ struct r600_shader_tgsi_instruction {
static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
-static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only);
+static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason);
static void fc_pushlevel(struct r600_shader_ctx *ctx, int type);
static int tgsi_else(struct r600_shader_ctx *ctx);
static int tgsi_endif(struct r600_shader_ctx *ctx);
@@ -419,7 +419,7 @@ static void llvm_if(struct r600_shader_ctx *ctx)
{
r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP);
fc_pushlevel(ctx, FC_IF);
- callstack_check_depth(ctx, FC_PUSH_VPM, 0);
+ callstack_push(ctx, FC_PUSH_VPM);
}
static void r600_break_from_byte_stream(struct r600_shader_ctx *ctx)
@@ -5551,63 +5551,107 @@ static int pops(struct r600_shader_ctx *ctx, int pops)
return 0;
}
-static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
+static inline void callstack_update_max_depth(struct r600_shader_ctx *ctx,
+ unsigned reason)
+{
+ struct r600_stack_info *stack = &ctx->bc->stack;
+ unsigned elements, entries;
+
+ unsigned entry_size = stack->entry_size;
+
+ elements = (stack->loop + stack->push_wqm ) * entry_size;
+ elements += stack->push;
+
+ switch (ctx->bc->chip_class) {
+ case R600:
+ case R700:
+ /* pre-r8xx: if any non-WQM PUSH instruction is invoked, 2 elements on
+ * the stack must be reserved to hold the current active/continue
+ * masks */
+ if (reason == FC_PUSH_VPM) {
+ elements += 2;
+ }
+ break;
+
+ case CAYMAN:
+ /* r9xx: any stack operation on empty stack consumes 2 additional
+ * elements */
+ elements += 2;
+
+ /* fallthrough */
+ /* FIXME: do the two elements added above cover the cases for the
+ * r8xx+ below? */
+
+ case EVERGREEN:
+ /* r8xx+: 2 extra elements are not always required, but one extra
+ * element must be added for each of the following cases:
+ * 1. There is an ALU_ELSE_AFTER instruction at the point of greatest
+ * stack usage.
+ * (Currently we don't use ALU_ELSE_AFTER.)
+ * 2. There are LOOP/WQM frames on the stack when any flavor of non-WQM
+ * PUSH instruction executed.
+ *
+ * NOTE: it seems we also need to reserve additional element in some
+ * other cases, e.g. when we have 4 levels of PUSH_VPM in the shader,
+ * then STACK_SIZE should be 2 instead of 1 */
+ if (reason == FC_PUSH_VPM) {
+ elements += 1;
+ }
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+
+ /* NOTE: it seems STACK_SIZE is interpreted by hw as if entry_size is 4
+ * for all chips, so we use 4 in the final formula, not the real entry_size
+ * for the chip */
+ entry_size = 4;
+
+ entries = (elements + (entry_size - 1)) / entry_size;
+
+ if (entries > stack->max_entries)
+ stack->max_entries = entries;
+}
+
+static inline void callstack_pop(struct r600_shader_ctx *ctx, unsigned reason)
{
switch(reason) {
case FC_PUSH_VPM:
- ctx->bc->callstack[ctx->bc->call_sp].current--;
+ --ctx->bc->stack.push;
+ assert(ctx->bc->stack.push >= 0);
break;
case FC_PUSH_WQM:
+ --ctx->bc->stack.push_wqm;
+ assert(ctx->bc->stack.push_wqm >= 0);
+ break;
case FC_LOOP:
- ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
+ --ctx->bc->stack.loop;
+ assert(ctx->bc->stack.loop >= 0);
break;
- case FC_REP:
- /* TOODO : for 16 vp asic should -= 2; */
- ctx->bc->callstack[ctx->bc->call_sp].current --;
+ default:
+ assert(0);
break;
}
}
-static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
+static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason)
{
- if (check_max_only) {
- int diff;
- switch (reason) {
- case FC_PUSH_VPM:
- diff = 1;
- break;
- case FC_PUSH_WQM:
- diff = 4;
- break;
- default:
- assert(0);
- diff = 0;
- }
- if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
- ctx->bc->callstack[ctx->bc->call_sp].max) {
- ctx->bc->callstack[ctx->bc->call_sp].max =
- ctx->bc->callstack[ctx->bc->call_sp].current + diff;
- }
- return;
- }
switch (reason) {
case FC_PUSH_VPM:
- ctx->bc->callstack[ctx->bc->call_sp].current++;
+ ++ctx->bc->stack.push;
break;
case FC_PUSH_WQM:
+ ++ctx->bc->stack.push_wqm;
case FC_LOOP:
- ctx->bc->callstack[ctx->bc->call_sp].current += 4;
- break;
- case FC_REP:
- ctx->bc->callstack[ctx->bc->call_sp].current++;
+ ++ctx->bc->stack.loop;
break;
+ default:
+ assert(0);
}
- if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
- ctx->bc->callstack[ctx->bc->call_sp].max) {
- ctx->bc->callstack[ctx->bc->call_sp].max =
- ctx->bc->callstack[ctx->bc->call_sp].current;
- }
+ callstack_update_max_depth(ctx, reason);
}
static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
@@ -5694,7 +5738,7 @@ static int tgsi_if(struct r600_shader_ctx *ctx)
fc_pushlevel(ctx, FC_IF);
- callstack_check_depth(ctx, FC_PUSH_VPM, 0);
+ callstack_push(ctx, FC_PUSH_VPM);
return 0;
}
@@ -5724,7 +5768,7 @@ static int tgsi_endif(struct r600_shader_ctx *ctx)
}
fc_poplevel(ctx);
- callstack_decrease_current(ctx, FC_PUSH_VPM);
+ callstack_pop(ctx, FC_PUSH_VPM);
return 0;
}
@@ -5737,7 +5781,7 @@ static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
fc_pushlevel(ctx, FC_LOOP);
/* check stack depth */
- callstack_check_depth(ctx, FC_LOOP, 0);
+ callstack_push(ctx, FC_LOOP);
return 0;
}
@@ -5766,7 +5810,7 @@ static int tgsi_endloop(struct r600_shader_ctx *ctx)
}
/* XXX add LOOPRET support */
fc_poplevel(ctx);
- callstack_decrease_current(ctx, FC_LOOP);
+ callstack_pop(ctx, FC_LOOP);
return 0;
}
@@ -5789,7 +5833,6 @@ static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
fc_set_mid(ctx, fscp);
- callstack_check_depth(ctx, FC_PUSH_VPM, 1);
return 0;
}