summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2016-06-28 14:11:12 +0200
committerMarek Olšák <[email protected]>2016-06-29 16:34:22 +0200
commitdd56d04568ab1a563a29d2900cca0ebc4cf13f77 (patch)
treed4b4c9b047a5a91dc1d92e6db4dc0d95bca2f9ce
parent9a71bf88582164413a021a2fc26c894512bd52af (diff)
radeonsi: set optimal VGT_HS_OFFCHIP_PARAM
ported from Vulkan Reviewed-by: Edward O'Callaghan <[email protected]> Reviewed-by: Nicolai Hähnle <[email protected]>
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.c6
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.h1
-rw-r--r--src/gallium/drivers/radeonsi/si_state.h2
-rw-r--r--src/gallium/drivers/radeonsi/si_state_draw.c5
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c49
5 files changed, 49 insertions, 14 deletions
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index d83568150e1..f38ecc15ab4 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -706,6 +706,12 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
if (!debug_get_bool_option("RADEON_DISABLE_PERFCOUNTERS", false))
si_init_perfcounters(sscreen);
+ /* Hawaii has a bug with offchip buffers > 256 that can be worked
+ * around by setting 4K granularity.
+ */
+ sscreen->tess_offchip_block_dw_size =
+ sscreen->b.family == CHIP_HAWAII ? 4096 : 8192;
+
sscreen->b.has_cp_dma = true;
sscreen->b.has_streamout = true;
pipe_mutex_init(sscreen->shader_parts_mutex);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index d1819058b92..ee64ecc6fd1 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -82,6 +82,7 @@ struct u_suballocator;
struct si_screen {
struct r600_common_screen b;
unsigned gs_table_depth;
+ unsigned tess_offchip_block_dw_size;
/* Whether shaders are monolithic (1-part) or separate (3-part). */
bool use_monolithic_shaders;
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 2e4923d7255..9361849f781 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -40,8 +40,6 @@
#define SI_NUM_IMAGES 16
#define SI_NUM_SHADER_BUFFERS 16
-#define SI_TESS_OFFCHIP_BLOCK_SIZE (8192 * 4)
-
struct si_screen;
struct si_shader;
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index b9a7c144ace..35585107cd3 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -147,8 +147,9 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
output_patch_size));
/* Make sure the output data fits in the offchip buffer */
- *num_patches = MIN2(*num_patches, SI_TESS_OFFCHIP_BLOCK_SIZE /
- output_patch_size);
+ *num_patches = MIN2(*num_patches,
+ (sctx->screen->tess_offchip_block_dw_size * 4) /
+ output_patch_size);
/* Not necessary for correctness, but improves performance. The
* specific value is taken from the proprietary driver.
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 89490bd0c29..9aa4a7c8233 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1798,9 +1798,38 @@ static bool si_update_spi_tmpring_size(struct si_context *sctx)
static void si_init_tess_factor_ring(struct si_context *sctx)
{
- unsigned offchip_blocks = sctx->b.chip_class >= CIK ? 256 : 64;
- assert(!sctx->tf_ring);
+ bool double_offchip_buffers = sctx->b.chip_class >= CIK;
+ unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
+ unsigned max_offchip_buffers = max_offchip_buffers_per_se *
+ sctx->screen->b.info.max_se;
+ unsigned offchip_granularity;
+
+ switch (sctx->screen->tess_offchip_block_dw_size) {
+ default:
+ assert(0);
+ /* fall through */
+ case 8192:
+ offchip_granularity = V_03093C_X_8K_DWORDS;
+ break;
+ case 4096:
+ offchip_granularity = V_03093C_X_4K_DWORDS;
+ break;
+ }
+ switch (sctx->b.chip_class) {
+ case SI:
+ max_offchip_buffers = MIN2(max_offchip_buffers, 126);
+ break;
+ case CIK:
+ max_offchip_buffers = MIN2(max_offchip_buffers, 508);
+ break;
+ case VI:
+ default:
+ max_offchip_buffers = MIN2(max_offchip_buffers, 512);
+ break;
+ }
+
+ assert(!sctx->tf_ring);
sctx->tf_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
PIPE_USAGE_DEFAULT,
32768 * sctx->screen->b.info.max_se);
@@ -1812,8 +1841,8 @@ static void si_init_tess_factor_ring(struct si_context *sctx)
sctx->tess_offchip_ring = pipe_buffer_create(sctx->b.b.screen,
PIPE_BIND_CUSTOM,
PIPE_USAGE_DEFAULT,
- offchip_blocks *
- SI_TESS_OFFCHIP_BLOCK_SIZE);
+ max_offchip_buffers *
+ sctx->screen->tess_offchip_block_dw_size * 4);
if (!sctx->tess_offchip_ring)
return;
@@ -1821,24 +1850,24 @@ static void si_init_tess_factor_ring(struct si_context *sctx)
/* Append these registers to the init config state. */
if (sctx->b.chip_class >= CIK) {
- unsigned offchip_buffering = offchip_blocks;
- if(sctx->b.chip_class >= VI)
- --offchip_buffering;
+ if (sctx->b.chip_class >= VI)
+ --max_offchip_buffers;
si_pm4_set_reg(sctx->init_config, R_030938_VGT_TF_RING_SIZE,
S_030938_SIZE(sctx->tf_ring->width0 / 4));
si_pm4_set_reg(sctx->init_config, R_030940_VGT_TF_MEMORY_BASE,
r600_resource(sctx->tf_ring)->gpu_address >> 8);
si_pm4_set_reg(sctx->init_config, R_03093C_VGT_HS_OFFCHIP_PARAM,
- S_03093C_OFFCHIP_BUFFERING(offchip_buffering) |
- S_03093C_OFFCHIP_GRANULARITY(V_03093C_X_8K_DWORDS));
+ S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
+ S_03093C_OFFCHIP_GRANULARITY(offchip_granularity));
} else {
+ assert(offchip_granularity == V_03093C_X_8K_DWORDS);
si_pm4_set_reg(sctx->init_config, R_008988_VGT_TF_RING_SIZE,
S_008988_SIZE(sctx->tf_ring->width0 / 4));
si_pm4_set_reg(sctx->init_config, R_0089B8_VGT_TF_MEMORY_BASE,
r600_resource(sctx->tf_ring)->gpu_address >> 8);
si_pm4_set_reg(sctx->init_config, R_0089B0_VGT_HS_OFFCHIP_PARAM,
- S_0089B0_OFFCHIP_BUFFERING(offchip_blocks));
+ S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers));
}
/* Flush the context to re-emit the init_config state.