diff options
author | Nicolai Hähnle <[email protected]> | 2017-07-25 14:32:03 +0200 |
---|---|---|
committer | Emil Velikov <[email protected]> | 2017-08-03 00:19:06 +0100 |
commit | a50cb9a09498594313c7ba1389db608085e7f0c2 (patch) | |
tree | bea0e3af5079f846dc7ddf2c3336f0c8aceddc5c | |
parent | 28ae1aac86d2e70ecd94729a67f173277d0512ff (diff) |
radeonsi/gfx9: reduce max threads per block to 1024 on gfx9+
The number of supported waves per thread group has been reduced to 16
with gfx9. Trying to use 32 waves causes hangs, and barriers might
not work correctly with > 16 waves.
Cc: [email protected]
Reviewed-by: Marek Olšák <[email protected]>
(cherry picked from commit a0e6b9a2db5aa5f06a4f60d270aca8344e7d8b3f)
[Emil Velikov: add a HAVE_LLVM check, as applicable in branch]
Signed-off-by: Emil Velikov <[email protected]>
Conflicts:
src/gallium/drivers/radeon/r600_pipe_common.c
-rw-r--r-- | src/gallium/drivers/radeon/r600_pipe_common.c | 42 |
1 files changed, 27 insertions, 15 deletions
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index 3b490404d39..2019ecdd577 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -1014,6 +1014,28 @@ const char *r600_get_llvm_processor_name(enum radeon_family family) } } +static unsigned get_max_threads_per_block(struct r600_common_screen *screen, + enum pipe_shader_ir ir_type) +{ + if (ir_type != PIPE_SHADER_IR_TGSI) + return 256; + + if (HAVE_LLVM < 0x309) + return 256; + + /* Only 16 waves per thread-group on gfx9. */ + if (screen->chip_class >= GFX9) + return 1024; + + /* Up to 40 waves per thread-group on GCN < gfx9. Expose a nice + * round number. + */ + if (screen->chip_class >= SI) + return 2048; + + return 256; +} + static int r600_get_compute_param(struct pipe_screen *screen, enum pipe_shader_ir ir_type, enum pipe_compute_cap param, @@ -1068,27 +1090,17 @@ static int r600_get_compute_param(struct pipe_screen *screen, case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE: if (ret) { uint64_t *block_size = ret; - if (rscreen->chip_class >= SI && HAVE_LLVM >= 0x309 && - ir_type == PIPE_SHADER_IR_TGSI) { - block_size[0] = 2048; - block_size[1] = 2048; - block_size[2] = 2048; - } else { - block_size[0] = 256; - block_size[1] = 256; - block_size[2] = 256; - } + unsigned threads_per_block = get_max_threads_per_block(rscreen, ir_type); + block_size[0] = threads_per_block; + block_size[1] = threads_per_block; + block_size[2] = threads_per_block; } return 3 * sizeof(uint64_t); case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK: if (ret) { uint64_t *max_threads_per_block = ret; - if (rscreen->chip_class >= SI && HAVE_LLVM >= 0x309 && - ir_type == PIPE_SHADER_IR_TGSI) - *max_threads_per_block = 2048; - else - *max_threads_per_block = 256; + *max_threads_per_block = get_max_threads_per_block(rscreen, ir_type); } return sizeof(uint64_t); case PIPE_COMPUTE_CAP_ADDRESS_BITS: |