diff options
author | Nicolai Hähnle <[email protected]> | 2018-09-19 14:53:35 +0200 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2019-07-03 15:51:13 -0400 |
commit | 792a638b032d16fbe6404f9d90c34b3e0f1fb0b5 (patch) | |
tree | 2fc020d12cded02beb595e8e979a347bd537b96f /src/gallium/drivers/radeonsi/si_pipe.h | |
parent | bcd2d2e1942ab7158dd46a5223130498cb0a8f44 (diff) |
radeonsi/gfx10: implement streamout-related queries
The NGG hardware pipeline doesn't track these statistics automatically,
and in fact *cannot* track them automatically when API geometry shaders
are involved, so we accumulate statistics in the shader using atomic
adds.
This implementation accumulates statistics via the memory system and
the RW buffer descriptor setup. We could use GDS, but since these
atomics aren't latency-sensitive, that basically just trades off
L2$ bandwidth vs. export bus bandwidth. One single memory transaction
per shader workgroup doesn't seem too bad. The result ring buffer in
memory is needed either way to avoid pipeline stalls.
The shader code contains the atomic unconditionally, though the
GFX10_GS_QUERY_BUF is a null buffer when no queries are active. The
atomic is simply discarded by the shader hardware in that case.
Acked-by: Bas Nieuwenhuizen <[email protected]>
Diffstat (limited to 'src/gallium/drivers/radeonsi/si_pipe.h')
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.h | 10 |
1 files changed, 10 insertions, 0 deletions
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index a351e5004b1..874b1bf4cd0 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -868,6 +868,7 @@ struct si_context { struct pipe_device_reset_callback device_reset_callback; struct u_log_context *log; void *query_result_shader; + void *sh_query_result_shader; void (*emit_cache_flush)(struct si_context *ctx); @@ -1178,6 +1179,10 @@ struct si_context { unsigned num_sdma_uploads; unsigned max_sdma_uploads; + /* Shader-based queries. */ + struct list_head shader_query_buffers; + unsigned num_active_shader_queries; + /* Statistics gathering for the DCC enablement heuristic. It can't be * in si_texture because si_texture can be shared by multiple * contexts. This is for back buffers only. We shouldn't get too many @@ -1439,6 +1444,11 @@ void *si_clear_render_target_shader(struct pipe_context *ctx); void *si_clear_render_target_shader_1d_array(struct pipe_context *ctx); void *si_create_dcc_retile_cs(struct pipe_context *ctx); void *si_create_query_result_cs(struct si_context *sctx); +void *gfx10_create_sh_query_result_cs(struct si_context *sctx); + +/* gfx10_query.c */ +void gfx10_init_query(struct si_context *sctx); +void gfx10_destroy_query(struct si_context *sctx); /* si_test_dma.c */ void si_test_dma(struct si_screen *sscreen); |