summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/r600/r600_asm.c
diff options
context:
space:
mode:
authorVadim Girlin <[email protected]>2013-04-02 19:33:40 +0400
committerVadim Girlin <[email protected]>2013-04-02 19:34:14 +0400
commit9be624b3ef32ae6311010cf05531e12051b647dc (patch)
treec4a771f66690477f9e0c794acd831383e3b5233d /src/gallium/drivers/r600/r600_asm.c
parent7e04227f3967fd4c4dd7a088cb42197d916925a5 (diff)
r600g: don't reserve more stack space than required v5
Reduced stack size allows to run more threads in some cases, improving performance for the shaders that use stack (that is, for the shaders with control flow instructions). E.g. with unigine-based apps. v4: implement exact computation taking into account wavefront size v5: add cases for RV620, RS880 Signed-off-by: Vadim Girlin <[email protected]>
Diffstat (limited to 'src/gallium/drivers/r600/r600_asm.c')
-rw-r--r--src/gallium/drivers/r600/r600_asm.c43
1 files changed, 39 insertions, 4 deletions
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 65c705d0aa8..c88b48dc96b 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -87,6 +87,40 @@ static struct r600_bytecode_tex *r600_bytecode_tex(void)
return tex;
}
+static unsigned stack_entry_size(enum radeon_family chip) {
+ /* Wavefront size:
+ * 64: R600/RV670/RV770/Cypress/R740/Barts/Turks/Caicos/
+ * Aruba/Sumo/Sumo2/redwood/juniper
+ * 32: R630/R730/R710/Palm/Cedar
+ * 16: R610/Rs780
+ *
+ * Stack row size:
+ * Wavefront Size 16 32 48 64
+ * Columns per Row (R6xx/R7xx/R8xx only) 8 8 4 4
+ * Columns per Row (R9xx+) 8 4 4 4 */
+
+ switch (chip) {
+ /* FIXME: are some chips missing here? */
+ /* wavefront size 16 */
+ case CHIP_RV610:
+ case CHIP_RS780:
+ case CHIP_RV620:
+ case CHIP_RS880:
+ /* wavefront size 32 */
+ case CHIP_RV630:
+ case CHIP_RV635:
+ case CHIP_RV730:
+ case CHIP_RV710:
+ case CHIP_PALM:
+ case CHIP_CEDAR:
+ return 8;
+
+ /* wavefront size 64 */
+ default:
+ return 4;
+ }
+}
+
void r600_bytecode_init(struct r600_bytecode *bc,
enum chip_class chip_class,
enum radeon_family family,
@@ -104,6 +138,7 @@ void r600_bytecode_init(struct r600_bytecode *bc,
LIST_INITHEAD(&bc->cf);
bc->chip_class = chip_class;
bc->msaa_texture_mode = msaa_texture_mode;
+ bc->stack.entry_size = stack_entry_size(family);
}
int r600_bytecode_add_cf(struct r600_bytecode *bc)
@@ -1522,8 +1557,8 @@ int r600_bytecode_build(struct r600_bytecode *bc)
unsigned addr;
int i, r;
- if (bc->callstack[0].max > 0)
- bc->nstack = ((bc->callstack[0].max + 3) >> 2) + 2;
+ bc->nstack = bc->stack.max_entries;
+
if (bc->type == TGSI_PROCESSOR_VERTEX && !bc->nstack) {
bc->nstack = 1;
}
@@ -1824,8 +1859,8 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
chip = '6';
break;
}
- fprintf(stderr, "bytecode %d dw -- %d gprs ---------------------\n",
- bc->ndw, bc->ngpr);
+ fprintf(stderr, "bytecode %d dw -- %d gprs -- %d nstack -------------\n",
+ bc->ndw, bc->ngpr, bc->nstack);
fprintf(stderr, "shader %d -- %c\n", index++, chip);
LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {