diff options
author | Marek Olšák <[email protected]> | 2020-01-09 16:41:13 -0500 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2020-01-13 15:57:07 -0500 |
commit | 2bb88b2fdc0d9271079e45a009ddbc6bfa6a0f37 (patch) | |
tree | 482cd438bfe5624218f5ab8b5878a1c96180aca3 /src/gallium | |
parent | 363b4027fcbae3cc69ff6e55989f900398c3968a (diff) |
radeonsi: don't enable VBOs in user SGPRs if compute-based culling can be used
Reviewed-by: Pierre-Eric Pelloux-Prayer <[email protected]>
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/drivers/radeonsi/si_compute_prim_discard.c | 56 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.c | 15 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.h | 5 |
3 files changed, 46 insertions, 30 deletions
diff --git a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c index 31c18e098e6..b6d92da8233 100644 --- a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c +++ b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c @@ -187,36 +187,38 @@ /* For emulating the rewind packet on CI. */ #define FORCE_REWIND_EMULATION 0 -void si_initialize_prim_discard_tunables(struct si_context *sctx) +void si_initialize_prim_discard_tunables(struct si_screen *sscreen, + bool is_aux_context, + unsigned *prim_discard_vertex_count_threshold, + unsigned *index_ring_size_per_ib) { - sctx->prim_discard_vertex_count_threshold = UINT_MAX; /* disable */ + *prim_discard_vertex_count_threshold = UINT_MAX; /* disable */ - if (sctx->chip_class == GFX6 || /* SI support is not implemented */ - !sctx->screen->info.has_gds_ordered_append || - sctx->screen->debug_flags & DBG(NO_PD) || - /* If aux_context == NULL, we are initializing aux_context right now. */ - !sctx->screen->aux_context) + if (sscreen->info.chip_class == GFX6 || /* SI support is not implemented */ + !sscreen->info.has_gds_ordered_append || + sscreen->debug_flags & DBG(NO_PD) || + is_aux_context) return; /* TODO: enable this after the GDS kernel memory management is fixed */ bool enable_on_pro_graphics_by_default = false; - if (sctx->screen->debug_flags & DBG(ALWAYS_PD) || - sctx->screen->debug_flags & DBG(PD) || + if (sscreen->debug_flags & DBG(ALWAYS_PD) || + sscreen->debug_flags & DBG(PD) || (enable_on_pro_graphics_by_default && - sctx->screen->info.is_pro_graphics && - (sctx->family == CHIP_BONAIRE || - sctx->family == CHIP_HAWAII || - sctx->family == CHIP_TONGA || - sctx->family == CHIP_FIJI || - sctx->family == CHIP_POLARIS10 || - sctx->family == CHIP_POLARIS11 || - sctx->family == CHIP_VEGA10 || - sctx->family == CHIP_VEGA20))) { - sctx->prim_discard_vertex_count_threshold = 6000 * 3; /* 6K triangles */ - - if (sctx->screen->debug_flags & DBG(ALWAYS_PD)) - sctx->prim_discard_vertex_count_threshold = 0; /* always enable */ + sscreen->info.is_pro_graphics && + (sscreen->info.family == CHIP_BONAIRE || + sscreen->info.family == CHIP_HAWAII || + sscreen->info.family == CHIP_TONGA || + sscreen->info.family == CHIP_FIJI || + sscreen->info.family == CHIP_POLARIS10 || + sscreen->info.family == CHIP_POLARIS11 || + sscreen->info.family == CHIP_VEGA10 || + sscreen->info.family == CHIP_VEGA20))) { + *prim_discard_vertex_count_threshold = 6000 * 3; /* 6K triangles */ + + if (sscreen->debug_flags & DBG(ALWAYS_PD)) + *prim_discard_vertex_count_threshold = 0; /* always enable */ const uint32_t MB = 1024 * 1024; const uint64_t GB = 1024 * 1024 * 1024; @@ -224,12 +226,12 @@ void si_initialize_prim_discard_tunables(struct si_context *sctx) /* The total size is double this per context. * Greater numbers allow bigger gfx IBs. */ - if (sctx->screen->info.vram_size <= 2 * GB) - sctx->index_ring_size_per_ib = 64 * MB; - else if (sctx->screen->info.vram_size <= 4 * GB) - sctx->index_ring_size_per_ib = 128 * MB; + if (sscreen->info.vram_size <= 2 * GB) + *index_ring_size_per_ib = 64 * MB; + else if (sscreen->info.vram_size <= 4 * GB) + *index_ring_size_per_ib = 128 * MB; else - sctx->index_ring_size_per_ib = 256 * MB; + *index_ring_size_per_ib = 256 * MB; } } diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index a69f6c07800..f37a4b222bb 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -593,7 +593,12 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, sctx->queued.named.rasterizer = sctx->discard_rasterizer_state; si_init_draw_functions(sctx); - si_initialize_prim_discard_tunables(sctx); + + /* If aux_context == NULL, we are initializing aux_context right now. */ + bool is_aux_context = !sscreen->aux_context; + si_initialize_prim_discard_tunables(sscreen, is_aux_context, + &sctx->prim_discard_vertex_count_threshold, + &sctx->index_ring_size_per_ib); } /* Initialize SDMA functions. */ @@ -1092,7 +1097,13 @@ radeonsi_screen_create_impl(struct radeon_winsys *ws, if (!debug_get_bool_option("RADEON_DISABLE_PERFCOUNTERS", false)) si_init_perfcounters(sscreen); - sscreen->num_vbos_in_user_sgprs = sscreen->info.chip_class >= GFX9 ? 5 : 1; + unsigned prim_discard_vertex_count_threshold, tmp; + si_initialize_prim_discard_tunables(sscreen, false, + &prim_discard_vertex_count_threshold, + &tmp); + /* Compute-shader-based culling doesn't support VBOs in user SGPRs. */ + if (prim_discard_vertex_count_threshold != UINT_MAX) + sscreen->num_vbos_in_user_sgprs = sscreen->info.chip_class >= GFX9 ? 5 : 1; /* Determine tessellation ring info. */ bool double_offchip_buffers = sscreen->info.chip_class >= GFX7 && diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 6c92dc1a81e..b0287688f9a 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1437,7 +1437,10 @@ void si_dispatch_prim_discard_cs_and_draw(struct si_context *sctx, unsigned base_vertex, uint64_t input_indexbuf_va, unsigned input_indexbuf_max_elements); -void si_initialize_prim_discard_tunables(struct si_context *sctx); +void si_initialize_prim_discard_tunables(struct si_screen *sscreen, + bool is_aux_context, + unsigned *prim_discard_vertex_count_threshold, + unsigned *index_ring_size_per_ib); /* si_pipe.c */ void si_init_compiler(struct si_screen *sscreen, struct ac_llvm_compiler *compiler); |