From a1378639ab19682a818ee627745db7f67485d406 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Thu, 7 Feb 2019 00:01:13 -0500 Subject: radeonsi: always use compute rings for clover on CI and newer (v2) initialize all non-compute context functions to NULL. v2: fix SI --- src/gallium/drivers/radeonsi/si_blit.c | 14 ++-- src/gallium/drivers/radeonsi/si_clear.c | 7 +- src/gallium/drivers/radeonsi/si_compute.c | 15 +++-- src/gallium/drivers/radeonsi/si_descriptors.c | 10 ++- src/gallium/drivers/radeonsi/si_gfx_cs.c | 29 ++++---- src/gallium/drivers/radeonsi/si_pipe.c | 95 +++++++++++++++------------ src/gallium/drivers/radeonsi/si_pipe.h | 3 +- src/gallium/drivers/radeonsi/si_state.c | 3 +- src/gallium/drivers/radeonsi/si_state.h | 1 + src/gallium/drivers/radeonsi/si_state_draw.c | 25 +++++-- src/gallium/drivers/radeonsi/si_texture.c | 3 + 11 files changed, 130 insertions(+), 75 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index bb8d1cbd12d..f39cb5d143f 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -1352,7 +1352,10 @@ static void si_flush_resource(struct pipe_context *ctx, void si_decompress_dcc(struct si_context *sctx, struct si_texture *tex) { - if (!tex->dcc_offset) + /* If graphics is disabled, we can't decompress DCC, but it shouldn't + * be compressed either. The caller should simply discard it. + */ + if (!tex->dcc_offset || !sctx->has_graphics) return; si_blit_decompress_color(sctx, tex, 0, tex->buffer.b.b.last_level, @@ -1363,7 +1366,10 @@ void si_decompress_dcc(struct si_context *sctx, struct si_texture *tex) void si_init_blit_functions(struct si_context *sctx) { sctx->b.resource_copy_region = si_resource_copy_region; - sctx->b.blit = si_blit; - sctx->b.flush_resource = si_flush_resource; - sctx->b.generate_mipmap = si_generate_mipmap; + + if (sctx->has_graphics) { + sctx->b.blit = si_blit; + sctx->b.flush_resource = si_flush_resource; + sctx->b.generate_mipmap = si_generate_mipmap; + } } diff --git a/src/gallium/drivers/radeonsi/si_clear.c b/src/gallium/drivers/radeonsi/si_clear.c index 9a00bb73b94..e1805f2a1c9 100644 --- a/src/gallium/drivers/radeonsi/si_clear.c +++ b/src/gallium/drivers/radeonsi/si_clear.c @@ -771,8 +771,11 @@ static void si_clear_texture(struct pipe_context *pipe, void si_init_clear_functions(struct si_context *sctx) { - sctx->b.clear = si_clear; sctx->b.clear_render_target = si_clear_render_target; - sctx->b.clear_depth_stencil = si_clear_depth_stencil; sctx->b.clear_texture = si_clear_texture; + + if (sctx->has_graphics) { + sctx->b.clear = si_clear; + sctx->b.clear_depth_stencil = si_clear_depth_stencil; + } } diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 1a62b3e0844..87addd53976 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -887,12 +887,14 @@ static void si_launch_grid( program->shader.compilation_failed) return; - if (sctx->last_num_draw_calls != sctx->num_draw_calls) { - si_update_fb_dirtiness_after_rendering(sctx); - sctx->last_num_draw_calls = sctx->num_draw_calls; - } + if (sctx->has_graphics) { + if (sctx->last_num_draw_calls != sctx->num_draw_calls) { + si_update_fb_dirtiness_after_rendering(sctx); + sctx->last_num_draw_calls = sctx->num_draw_calls; + } - si_decompress_textures(sctx, 1 << PIPE_SHADER_COMPUTE); + si_decompress_textures(sctx, 1 << PIPE_SHADER_COMPUTE); + } /* Add buffer sizes for memory checking in need_cs_space. */ si_context_add_resource_size(sctx, &program->shader.bo->b.b); @@ -924,7 +926,8 @@ static void si_launch_grid( si_upload_compute_shader_descriptors(sctx); si_emit_compute_shader_pointers(sctx); - if (si_is_atom_dirty(sctx, &sctx->atoms.s.render_cond)) { + if (sctx->has_graphics && + si_is_atom_dirty(sctx, &sctx->atoms.s.render_cond)) { sctx->atoms.s.render_cond.emit(sctx); si_set_atom_dirty(sctx, &sctx->atoms.s.render_cond, false); } diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 21d4ca946d3..0f22c55723c 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -2647,8 +2647,10 @@ void si_all_resident_buffers_begin_new_cs(struct si_context *sctx) void si_init_all_descriptors(struct si_context *sctx) { int i; + unsigned first_shader = + sctx->has_graphics ? 0 : PIPE_SHADER_COMPUTE; - for (i = 0; i < SI_NUM_SHADERS; i++) { + for (i = first_shader; i < SI_NUM_SHADERS; i++) { bool is_2nd = sctx->chip_class >= GFX9 && (i == PIPE_SHADER_TESS_CTRL || i == PIPE_SHADER_GEOMETRY); @@ -2721,7 +2723,6 @@ void si_init_all_descriptors(struct si_context *sctx) sctx->b.bind_sampler_states = si_bind_sampler_states; sctx->b.set_shader_images = si_set_shader_images; sctx->b.set_constant_buffer = si_pipe_set_constant_buffer; - sctx->b.set_polygon_stipple = si_set_polygon_stipple; sctx->b.set_shader_buffers = si_set_shader_buffers; sctx->b.set_sampler_views = si_set_sampler_views; sctx->b.create_texture_handle = si_create_texture_handle; @@ -2731,6 +2732,11 @@ void si_init_all_descriptors(struct si_context *sctx) sctx->b.delete_image_handle = si_delete_image_handle; sctx->b.make_image_handle_resident = si_make_image_handle_resident; + if (!sctx->has_graphics) + return; + + sctx->b.set_polygon_stipple = si_set_polygon_stipple; + /* Shader user data. */ sctx->atoms.s.shader_pointers.emit = si_emit_graphics_shader_pointers; diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index 13d5b5a959a..922fc560d74 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -140,13 +140,15 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags, if (radeon_emitted(ctx->dma_cs, 0)) si_flush_dma_cs(ctx, flags, NULL); - if (!LIST_IS_EMPTY(&ctx->active_queries)) - si_suspend_queries(ctx); - - ctx->streamout.suspended = false; - if (ctx->streamout.begin_emitted) { - si_emit_streamout_end(ctx); - ctx->streamout.suspended = true; + if (ctx->has_graphics) { + if (!LIST_IS_EMPTY(&ctx->active_queries)) + si_suspend_queries(ctx); + + ctx->streamout.suspended = false; + if (ctx->streamout.begin_emitted) { + si_emit_streamout_end(ctx); + ctx->streamout.suspended = true; + } } /* Make sure CP DMA is idle at the end of IBs after L2 prefetches @@ -246,6 +248,15 @@ void si_begin_new_gfx_cs(struct si_context *ctx) SI_CONTEXT_INV_GLOBAL_L2 | SI_CONTEXT_START_PIPELINE_STATS; + ctx->cs_shader_state.initialized = false; + si_all_descriptors_begin_new_cs(ctx); + si_all_resident_buffers_begin_new_cs(ctx); + + if (!ctx->has_graphics) { + ctx->initial_gfx_cs_size = ctx->gfx_cs->current.cdw; + return; + } + /* set all valid group as dirty so they get reemited on * next draw command */ @@ -310,8 +321,6 @@ void si_begin_new_gfx_cs(struct si_context *ctx) /* CLEAR_STATE disables all window rectangles. */ if (!has_clear_state || ctx->num_window_rectangles > 0) si_mark_atom_dirty(ctx, &ctx->atoms.s.window_rectangles); - si_all_descriptors_begin_new_cs(ctx); - si_all_resident_buffers_begin_new_cs(ctx); ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1; ctx->viewports.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1; @@ -353,8 +362,6 @@ void si_begin_new_gfx_cs(struct si_context *ctx) ctx->last_num_tcs_input_cp = -1; ctx->last_ls_hs_config = -1; /* impossible value */ - ctx->cs_shader_state.initialized = false; - if (has_clear_state) { ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_CONTROL] = 0x00000000; ctx->tracked_regs.reg_value[SI_TRACKED_DB_COUNT_CONTROL] = 0x00000000; diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index b965d9d64d4..9b1eab8284b 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -389,16 +389,15 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, if (!sctx) return NULL; + sctx->has_graphics = sscreen->info.chip_class == SI || + !(flags & PIPE_CONTEXT_COMPUTE_ONLY); + if (flags & PIPE_CONTEXT_DEBUG) sscreen->record_llvm_ir = true; /* racy but not critical */ sctx->b.screen = screen; /* this must be set first */ sctx->b.priv = NULL; sctx->b.destroy = si_destroy_context; - sctx->b.emit_string_marker = si_emit_string_marker; - sctx->b.set_debug_callback = si_set_debug_callback; - sctx->b.set_log_context = si_set_log_context; - sctx->b.set_context_param = si_set_context_param; sctx->screen = sscreen; /* Easy accessing of screen/winsys. */ sctx->is_debug = (flags & PIPE_CONTEXT_DEBUG) != 0; @@ -414,11 +413,6 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, sctx->ws->query_value(sctx->ws, RADEON_GPU_RESET_COUNTER); } - sctx->b.get_device_reset_status = si_get_reset_status; - sctx->b.set_device_reset_callback = si_set_device_reset_callback; - - si_init_context_texture_functions(sctx); - si_init_query_functions(sctx); if (sctx->chip_class == CIK || sctx->chip_class == VI || @@ -430,6 +424,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, goto fail; } + /* Initialize context allocators. */ sctx->allocator_zeroed_memory = u_suballocator_create(&sctx->b, 128 * 1024, 0, PIPE_USAGE_DEFAULT, @@ -473,24 +468,8 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, if (use_sdma_upload) u_upload_enable_flush_explicit(sctx->b.const_uploader); - si_init_buffer_functions(sctx); - si_init_clear_functions(sctx); - si_init_blit_functions(sctx); - si_init_compute_functions(sctx); - si_init_compute_blit_functions(sctx); - si_init_debug_functions(sctx); - si_init_msaa_functions(sctx); - si_init_streamout_functions(sctx); - - if (sscreen->info.has_hw_decode) { - sctx->b.create_video_codec = si_uvd_create_decoder; - sctx->b.create_video_buffer = si_video_buffer_create; - } else { - sctx->b.create_video_codec = vl_create_decoder; - sctx->b.create_video_buffer = vl_video_buffer_create; - } - - sctx->gfx_cs = ws->cs_create(sctx->ctx, RING_GFX, + sctx->gfx_cs = ws->cs_create(sctx->ctx, + sctx->has_graphics ? RING_GFX : RING_COMPUTE, (void*)si_flush_gfx_cs, sctx, stop_exec_on_failure); /* Border colors. */ @@ -512,29 +491,62 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, if (!sctx->border_color_map) goto fail; + /* Initialize context functions used by graphics and compute. */ + sctx->b.emit_string_marker = si_emit_string_marker; + sctx->b.set_debug_callback = si_set_debug_callback; + sctx->b.set_log_context = si_set_log_context; + sctx->b.set_context_param = si_set_context_param; + sctx->b.get_device_reset_status = si_get_reset_status; + sctx->b.set_device_reset_callback = si_set_device_reset_callback; + sctx->b.memory_barrier = si_memory_barrier; + si_init_all_descriptors(sctx); + si_init_buffer_functions(sctx); + si_init_clear_functions(sctx); + si_init_blit_functions(sctx); + si_init_compute_functions(sctx); + si_init_compute_blit_functions(sctx); + si_init_debug_functions(sctx); si_init_fence_functions(sctx); - si_init_state_functions(sctx); - si_init_shader_functions(sctx); - si_init_viewport_functions(sctx); - - if (sctx->chip_class >= CIK) - cik_init_sdma_functions(sctx); - else - si_init_dma_functions(sctx); if (sscreen->debug_flags & DBG(FORCE_DMA)) sctx->b.resource_copy_region = sctx->dma_copy; - sctx->blitter = util_blitter_create(&sctx->b); - if (sctx->blitter == NULL) - goto fail; - sctx->blitter->skip_viewport_restore = true; + /* Initialize graphics-only context functions. */ + if (sctx->has_graphics) { + si_init_context_texture_functions(sctx); + si_init_query_functions(sctx); + si_init_msaa_functions(sctx); + si_init_shader_functions(sctx); + si_init_state_functions(sctx); + si_init_streamout_functions(sctx); + si_init_viewport_functions(sctx); + + sctx->blitter = util_blitter_create(&sctx->b); + if (sctx->blitter == NULL) + goto fail; + sctx->blitter->skip_viewport_restore = true; - si_init_draw_functions(sctx); + si_init_draw_functions(sctx); + } + + /* Initialize SDMA functions. */ + if (sctx->chip_class >= CIK) + cik_init_sdma_functions(sctx); + else + si_init_dma_functions(sctx); sctx->sample_mask = 0xffff; + /* Initialize multimedia functions. */ + if (sscreen->info.has_hw_decode) { + sctx->b.create_video_codec = si_uvd_create_decoder; + sctx->b.create_video_buffer = si_video_buffer_create; + } else { + sctx->b.create_video_codec = vl_create_decoder; + sctx->b.create_video_buffer = vl_video_buffer_create; + } + if (sctx->chip_class >= GFX9) { sctx->wait_mem_scratch = si_resource( pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT, 4)); @@ -558,7 +570,8 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, goto fail; sctx->null_const_buf.buffer_size = sctx->null_const_buf.buffer->width0; - for (shader = 0; shader < SI_NUM_SHADERS; shader++) { + unsigned start_shader = sctx->has_graphics ? 0 : PIPE_SHADER_COMPUTE; + for (shader = start_shader; shader < SI_NUM_SHADERS; shader++) { for (i = 0; i < SI_NUM_CONST_BUFFERS; i++) { sctx->b.set_constant_buffer(&sctx->b, shader, i, &sctx->null_const_buf); diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index b208bdeb848..b3198d45ea6 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -794,7 +794,7 @@ struct si_context { struct radeon_winsys *ws; struct radeon_winsys_ctx *ctx; - struct radeon_cmdbuf *gfx_cs; + struct radeon_cmdbuf *gfx_cs; /* compute IB if graphics is disabled */ struct radeon_cmdbuf *dma_cs; struct pipe_fence_handle *last_gfx_fence; struct pipe_fence_handle *last_sdma_fence; @@ -832,6 +832,7 @@ struct si_context { unsigned wait_mem_number; uint16_t prefetch_L2_mask; + bool has_graphics; bool gfx_flush_in_progress:1; bool gfx_last_ib_is_busy:1; bool compute_is_busy:1; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index b49a1b3695e..458b108a7e3 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -4706,7 +4706,7 @@ static void si_texture_barrier(struct pipe_context *ctx, unsigned flags) } /* This only ensures coherency for shader image/buffer stores. */ -static void si_memory_barrier(struct pipe_context *ctx, unsigned flags) +void si_memory_barrier(struct pipe_context *ctx, unsigned flags) { struct si_context *sctx = (struct si_context *)ctx; @@ -4820,7 +4820,6 @@ void si_init_state_functions(struct si_context *sctx) sctx->b.set_vertex_buffers = si_set_vertex_buffers; sctx->b.texture_barrier = si_texture_barrier; - sctx->b.memory_barrier = si_memory_barrier; sctx->b.set_min_samples = si_set_min_samples; sctx->b.set_tess_state = si_set_tess_state; diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 767e789276a..6faa4c511b1 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -489,6 +489,7 @@ void si_bindless_descriptor_slab_free(void *priv, struct pb_slab *pslab); void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf, uint64_t old_va); /* si_state.c */ +void si_memory_barrier(struct pipe_context *ctx, unsigned flags); void si_init_state_functions(struct si_context *sctx); void si_init_screen_state_functions(struct si_screen *sscreen); void diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 9c968e39c2c..2a514f144b9 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -879,7 +879,7 @@ static void si_emit_surface_sync(struct si_context *sctx, { struct radeon_cmdbuf *cs = sctx->gfx_cs; - if (sctx->chip_class >= GFX9) { + if (sctx->chip_class >= GFX9 || !sctx->has_graphics) { /* Flush caches and wait for the caches to assert idle. */ radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0)); radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */ @@ -902,6 +902,18 @@ void si_emit_cache_flush(struct si_context *sctx) { struct radeon_cmdbuf *cs = sctx->gfx_cs; uint32_t flags = sctx->flags; + + if (!sctx->has_graphics) { + /* Only process compute flags. */ + flags &= SI_CONTEXT_INV_ICACHE | + SI_CONTEXT_INV_SMEM_L1 | + SI_CONTEXT_INV_VMEM_L1 | + SI_CONTEXT_INV_GLOBAL_L2 | + SI_CONTEXT_WRITEBACK_GLOBAL_L2 | + SI_CONTEXT_INV_L2_METADATA | + SI_CONTEXT_CS_PARTIAL_FLUSH; + } + uint32_t cp_coher_cntl = 0; uint32_t flush_cb_db = flags & (SI_CONTEXT_FLUSH_AND_INV_CB | SI_CONTEXT_FLUSH_AND_INV_DB); @@ -1068,11 +1080,12 @@ void si_emit_cache_flush(struct si_context *sctx) /* Make sure ME is idle (it executes most packets) before continuing. * This prevents read-after-write hazards between PFP and ME. */ - if (cp_coher_cntl || - (flags & (SI_CONTEXT_CS_PARTIAL_FLUSH | - SI_CONTEXT_INV_VMEM_L1 | - SI_CONTEXT_INV_GLOBAL_L2 | - SI_CONTEXT_WRITEBACK_GLOBAL_L2))) { + if (sctx->has_graphics && + (cp_coher_cntl || + (flags & (SI_CONTEXT_CS_PARTIAL_FLUSH | + SI_CONTEXT_INV_VMEM_L1 | + SI_CONTEXT_INV_GLOBAL_L2 | + SI_CONTEXT_WRITEBACK_GLOBAL_L2)))) { radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); radeon_emit(cs, 0); } diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c index a50088d2d8f..581f90a7b2f 100644 --- a/src/gallium/drivers/radeonsi/si_texture.c +++ b/src/gallium/drivers/radeonsi/si_texture.c @@ -464,6 +464,9 @@ bool si_texture_disable_dcc(struct si_context *sctx, { struct si_screen *sscreen = sctx->screen; + if (!sctx->has_graphics) + return si_texture_discard_dcc(sscreen, tex); + if (!si_can_disable_dcc(tex)) return false; -- cgit v1.2.3