summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNicolai Hähnle <[email protected]>2017-07-25 14:32:03 +0200
committerEmil Velikov <[email protected]>2017-08-03 00:19:06 +0100
commita50cb9a09498594313c7ba1389db608085e7f0c2 (patch)
treebea0e3af5079f846dc7ddf2c3336f0c8aceddc5c
parent28ae1aac86d2e70ecd94729a67f173277d0512ff (diff)
radeonsi/gfx9: reduce max threads per block to 1024 on gfx9+
The number of supported waves per thread group has been reduced to 16 with gfx9. Trying to use 32 waves causes hangs, and barriers might not work correctly with > 16 waves. Cc: [email protected] Reviewed-by: Marek Olšák <[email protected]> (cherry picked from commit a0e6b9a2db5aa5f06a4f60d270aca8344e7d8b3f) [Emil Velikov: add a HAVE_LLVM check, as applicable in branch] Signed-off-by: Emil Velikov <[email protected]> Conflicts: src/gallium/drivers/radeon/r600_pipe_common.c
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.c42
1 files changed, 27 insertions, 15 deletions
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index 3b490404d39..2019ecdd577 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -1014,6 +1014,28 @@ const char *r600_get_llvm_processor_name(enum radeon_family family)
}
}
+static unsigned get_max_threads_per_block(struct r600_common_screen *screen,
+ enum pipe_shader_ir ir_type)
+{
+ if (ir_type != PIPE_SHADER_IR_TGSI)
+ return 256;
+
+ if (HAVE_LLVM < 0x309)
+ return 256;
+
+ /* Only 16 waves per thread-group on gfx9. */
+ if (screen->chip_class >= GFX9)
+ return 1024;
+
+ /* Up to 40 waves per thread-group on GCN < gfx9. Expose a nice
+ * round number.
+ */
+ if (screen->chip_class >= SI)
+ return 2048;
+
+ return 256;
+}
+
static int r600_get_compute_param(struct pipe_screen *screen,
enum pipe_shader_ir ir_type,
enum pipe_compute_cap param,
@@ -1068,27 +1090,17 @@ static int r600_get_compute_param(struct pipe_screen *screen,
case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
if (ret) {
uint64_t *block_size = ret;
- if (rscreen->chip_class >= SI && HAVE_LLVM >= 0x309 &&
- ir_type == PIPE_SHADER_IR_TGSI) {
- block_size[0] = 2048;
- block_size[1] = 2048;
- block_size[2] = 2048;
- } else {
- block_size[0] = 256;
- block_size[1] = 256;
- block_size[2] = 256;
- }
+ unsigned threads_per_block = get_max_threads_per_block(rscreen, ir_type);
+ block_size[0] = threads_per_block;
+ block_size[1] = threads_per_block;
+ block_size[2] = threads_per_block;
}
return 3 * sizeof(uint64_t);
case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
if (ret) {
uint64_t *max_threads_per_block = ret;
- if (rscreen->chip_class >= SI && HAVE_LLVM >= 0x309 &&
- ir_type == PIPE_SHADER_IR_TGSI)
- *max_threads_per_block = 2048;
- else
- *max_threads_per_block = 256;
+ *max_threads_per_block = get_max_threads_per_block(rscreen, ir_type);
}
return sizeof(uint64_t);
case PIPE_COMPUTE_CAP_ADDRESS_BITS: