diff options
author | Marek Olšák <[email protected]> | 2017-11-26 03:38:44 +0100 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2017-11-29 18:21:30 +0100 |
commit | 950221f9231eac6e76addf5e806e45fde6e35fc0 (patch) | |
tree | 9620f5c6b20f2b58ae99902756f633e6cd7e896b | |
parent | 4d1fe8f9646a9ad20ba8eedd8b9deac493e69989 (diff) |
radeonsi: remove r600_common_screen
Most files in gallium/radeon now include si_pipe.h.
chip_class and family are now here:
sscreen->info.family
sscreen->info.chip_class
Reviewed-by: Nicolai Hähnle <[email protected]>
40 files changed, 864 insertions, 873 deletions
diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c index 55a2edb2eef..2992455d098 100644 --- a/src/gallium/drivers/radeon/r600_buffer_common.c +++ b/src/gallium/drivers/radeon/r600_buffer_common.c @@ -99,7 +99,7 @@ void *si_buffer_map_sync_with_rings(struct r600_common_context *ctx, return ctx->ws->buffer_map(resource->buf, NULL, usage); } -void si_init_resource_fields(struct r600_common_screen *rscreen, +void si_init_resource_fields(struct si_screen *sscreen, struct r600_resource *res, uint64_t size, unsigned alignment) { @@ -124,8 +124,8 @@ void si_init_resource_fields(struct r600_common_screen *rscreen, /* Older kernels didn't always flush the HDP cache before * CS execution */ - if (rscreen->info.drm_major == 2 && - rscreen->info.drm_minor < 40) { + if (sscreen->info.drm_major == 2 && + sscreen->info.drm_minor < 40) { res->domains = RADEON_DOMAIN_GTT; res->flags |= RADEON_FLAG_GTT_WC; break; @@ -152,8 +152,8 @@ void si_init_resource_fields(struct r600_common_screen *rscreen, * ensures all CPU writes finish before the GPU * executes a command stream. */ - if (rscreen->info.drm_major == 2 && - rscreen->info.drm_minor < 40) + if (sscreen->info.drm_major == 2 && + sscreen->info.drm_minor < 40) res->domains = RADEON_DOMAIN_GTT; } @@ -178,14 +178,14 @@ void si_init_resource_fields(struct r600_common_screen *rscreen, * DRM 3.6.0 has good BO move throttling, so we can allow VRAM-only * placements even with a low amount of stolen VRAM. */ - if (!rscreen->info.has_dedicated_vram && - (rscreen->info.drm_major < 3 || rscreen->info.drm_minor < 6) && + if (!sscreen->info.has_dedicated_vram && + (sscreen->info.drm_major < 3 || sscreen->info.drm_minor < 6) && res->domains == RADEON_DOMAIN_VRAM) { res->domains = RADEON_DOMAIN_VRAM_GTT; res->flags &= ~RADEON_FLAG_NO_CPU_ACCESS; /* disallowed with VRAM_GTT */ } - if (rscreen->debug_flags & DBG(NO_WC)) + if (sscreen->debug_flags & DBG(NO_WC)) res->flags &= ~RADEON_FLAG_GTT_WC; /* Set expected VRAM and GART usage for the buffer. */ @@ -199,20 +199,20 @@ void si_init_resource_fields(struct r600_common_screen *rscreen, res->max_forced_staging_uploads = res->b.max_forced_staging_uploads = - rscreen->info.has_dedicated_vram && - size >= rscreen->info.vram_vis_size / 4 ? 1 : 0; + sscreen->info.has_dedicated_vram && + size >= sscreen->info.vram_vis_size / 4 ? 1 : 0; } else if (res->domains & RADEON_DOMAIN_GTT) { res->gart_usage = size; } } -bool si_alloc_resource(struct r600_common_screen *rscreen, +bool si_alloc_resource(struct si_screen *sscreen, struct r600_resource *res) { struct pb_buffer *old_buf, *new_buf; /* Allocate a new resource. */ - new_buf = rscreen->ws->buffer_create(rscreen->ws, res->bo_size, + new_buf = sscreen->ws->buffer_create(sscreen->ws, res->bo_size, res->bo_alignment, res->domains, res->flags); if (!new_buf) { @@ -226,8 +226,8 @@ bool si_alloc_resource(struct r600_common_screen *rscreen, old_buf = res->buf; res->buf = new_buf; /* should be atomic */ - if (rscreen->info.has_virtual_memory) - res->gpu_address = rscreen->ws->buffer_get_virtual_address(res->buf); + if (sscreen->info.has_virtual_memory) + res->gpu_address = sscreen->ws->buffer_get_virtual_address(res->buf); else res->gpu_address = 0; @@ -237,7 +237,7 @@ bool si_alloc_resource(struct r600_common_screen *rscreen, res->TC_L2_dirty = false; /* Print debug information. */ - if (rscreen->debug_flags & DBG(VM) && res->b.b.target == PIPE_BUFFER) { + if (sscreen->debug_flags & DBG(VM) && res->b.b.target == PIPE_BUFFER) { fprintf(stderr, "VM start=0x%"PRIX64" end=0x%"PRIX64" | Buffer %"PRIu64" bytes\n", res->gpu_address, res->gpu_address + res->buf->size, res->buf->size); @@ -621,15 +621,15 @@ static struct pipe_resource *si_buffer_create(struct pipe_screen *screen, const struct pipe_resource *templ, unsigned alignment) { - struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; + struct si_screen *sscreen = (struct si_screen*)screen; struct r600_resource *rbuffer = r600_alloc_buffer_struct(screen, templ); - si_init_resource_fields(rscreen, rbuffer, templ->width0, alignment); + si_init_resource_fields(sscreen, rbuffer, templ->width0, alignment); if (templ->flags & PIPE_RESOURCE_FLAG_SPARSE) rbuffer->flags |= RADEON_FLAG_SPARSE; - if (!si_alloc_resource(rscreen, rbuffer)) { + if (!si_alloc_resource(sscreen, rbuffer)) { FREE(rbuffer); return NULL; } @@ -662,8 +662,8 @@ si_buffer_from_user_memory(struct pipe_screen *screen, const struct pipe_resource *templ, void *user_memory) { - struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; - struct radeon_winsys *ws = rscreen->ws; + struct si_screen *sscreen = (struct si_screen*)screen; + struct radeon_winsys *ws = sscreen->ws; struct r600_resource *rbuffer = r600_alloc_buffer_struct(screen, templ); rbuffer->domains = RADEON_DOMAIN_GTT; @@ -679,7 +679,7 @@ si_buffer_from_user_memory(struct pipe_screen *screen, return NULL; } - if (rscreen->info.has_virtual_memory) + if (sscreen->info.has_virtual_memory) rbuffer->gpu_address = ws->buffer_get_virtual_address(rbuffer->buf); else @@ -703,9 +703,9 @@ static struct pipe_resource *si_resource_create(struct pipe_screen *screen, void si_init_screen_buffer_functions(struct si_screen *sscreen) { - sscreen->b.b.resource_create = si_resource_create; - sscreen->b.b.resource_destroy = u_resource_destroy_vtbl; - sscreen->b.b.resource_from_user_memory = si_buffer_from_user_memory; + sscreen->b.resource_create = si_resource_create; + sscreen->b.resource_destroy = u_resource_destroy_vtbl; + sscreen->b.resource_from_user_memory = si_buffer_from_user_memory; } void si_init_buffer_functions(struct si_context *sctx) diff --git a/src/gallium/drivers/radeon/r600_cs.h b/src/gallium/drivers/radeon/r600_cs.h index 79737d21a31..2f555dca2ed 100644 --- a/src/gallium/drivers/radeon/r600_cs.h +++ b/src/gallium/drivers/radeon/r600_cs.h @@ -28,7 +28,7 @@ #ifndef R600_CS_H #define R600_CS_H -#include "r600_pipe_common.h" +#include "radeonsi/si_pipe.h" #include "amd/common/sid.h" /** @@ -39,7 +39,7 @@ * \param gtt GTT memory size not added to the buffer list yet */ static inline bool -radeon_cs_memory_below_limit(struct r600_common_screen *screen, +radeon_cs_memory_below_limit(struct si_screen *screen, struct radeon_winsys_cs *cs, uint64_t vram, uint64_t gtt) { diff --git a/src/gallium/drivers/radeon/r600_gpu_load.c b/src/gallium/drivers/radeon/r600_gpu_load.c index 411dcfe8724..c1fdf206ced 100644 --- a/src/gallium/drivers/radeon/r600_gpu_load.c +++ b/src/gallium/drivers/radeon/r600_gpu_load.c @@ -31,7 +31,7 @@ * GPU load between the two samples. */ -#include "r600_pipe_common.h" +#include "radeonsi/si_pipe.h" #include "r600_query.h" #include "util/os_time.h" @@ -76,14 +76,14 @@ p_atomic_inc(&counters->named.field.idle); \ } while (0) -static void r600_update_mmio_counters(struct r600_common_screen *rscreen, +static void r600_update_mmio_counters(struct si_screen *sscreen, union r600_mmio_counters *counters) { uint32_t value = 0; bool gui_busy, sdma_busy = false; /* GRBM_STATUS */ - rscreen->ws->read_registers(rscreen->ws, GRBM_STATUS, 1, &value); + sscreen->ws->read_registers(sscreen->ws, GRBM_STATUS, 1, &value); UPDATE_COUNTER(ta, TA_BUSY); UPDATE_COUNTER(gds, GDS_BUSY); @@ -101,17 +101,17 @@ static void r600_update_mmio_counters(struct r600_common_screen *rscreen, UPDATE_COUNTER(gui, GUI_ACTIVE); gui_busy = GUI_ACTIVE(value); - if (rscreen->chip_class == CIK || rscreen->chip_class == VI) { + if (sscreen->info.chip_class == CIK || sscreen->info.chip_class == VI) { /* SRBM_STATUS2 */ - rscreen->ws->read_registers(rscreen->ws, SRBM_STATUS2, 1, &value); + sscreen->ws->read_registers(sscreen->ws, SRBM_STATUS2, 1, &value); UPDATE_COUNTER(sdma, SDMA_BUSY); sdma_busy = SDMA_BUSY(value); } - if (rscreen->chip_class >= VI) { + if (sscreen->info.chip_class >= VI) { /* CP_STAT */ - rscreen->ws->read_registers(rscreen->ws, CP_STAT, 1, &value); + sscreen->ws->read_registers(sscreen->ws, CP_STAT, 1, &value); UPDATE_COUNTER(pfp, PFP_BUSY); UPDATE_COUNTER(meq, MEQ_BUSY); @@ -130,12 +130,12 @@ static void r600_update_mmio_counters(struct r600_common_screen *rscreen, static int r600_gpu_load_thread(void *param) { - struct r600_common_screen *rscreen = (struct r600_common_screen*)param; + struct si_screen *sscreen = (struct si_screen*)param; const int period_us = 1000000 / SAMPLES_PER_SEC; int sleep_us = period_us; int64_t cur_time, last_time = os_time_get(); - while (!p_atomic_read(&rscreen->gpu_load_stop_thread)) { + while (!p_atomic_read(&sscreen->gpu_load_stop_thread)) { if (sleep_us) os_time_sleep(sleep_us); @@ -153,45 +153,45 @@ r600_gpu_load_thread(void *param) last_time = cur_time; /* Update the counters. */ - r600_update_mmio_counters(rscreen, &rscreen->mmio_counters); + r600_update_mmio_counters(sscreen, &sscreen->mmio_counters); } - p_atomic_dec(&rscreen->gpu_load_stop_thread); + p_atomic_dec(&sscreen->gpu_load_stop_thread); return 0; } -void si_gpu_load_kill_thread(struct r600_common_screen *rscreen) +void si_gpu_load_kill_thread(struct si_screen *sscreen) { - if (!rscreen->gpu_load_thread) + if (!sscreen->gpu_load_thread) return; - p_atomic_inc(&rscreen->gpu_load_stop_thread); - thrd_join(rscreen->gpu_load_thread, NULL); - rscreen->gpu_load_thread = 0; + p_atomic_inc(&sscreen->gpu_load_stop_thread); + thrd_join(sscreen->gpu_load_thread, NULL); + sscreen->gpu_load_thread = 0; } -static uint64_t r600_read_mmio_counter(struct r600_common_screen *rscreen, +static uint64_t r600_read_mmio_counter(struct si_screen *sscreen, unsigned busy_index) { /* Start the thread if needed. */ - if (!rscreen->gpu_load_thread) { - mtx_lock(&rscreen->gpu_load_mutex); + if (!sscreen->gpu_load_thread) { + mtx_lock(&sscreen->gpu_load_mutex); /* Check again inside the mutex. */ - if (!rscreen->gpu_load_thread) - rscreen->gpu_load_thread = - u_thread_create(r600_gpu_load_thread, rscreen); - mtx_unlock(&rscreen->gpu_load_mutex); + if (!sscreen->gpu_load_thread) + sscreen->gpu_load_thread = + u_thread_create(r600_gpu_load_thread, sscreen); + mtx_unlock(&sscreen->gpu_load_mutex); } - unsigned busy = p_atomic_read(&rscreen->mmio_counters.array[busy_index]); - unsigned idle = p_atomic_read(&rscreen->mmio_counters.array[busy_index + 1]); + unsigned busy = p_atomic_read(&sscreen->mmio_counters.array[busy_index]); + unsigned idle = p_atomic_read(&sscreen->mmio_counters.array[busy_index + 1]); return busy | ((uint64_t)idle << 32); } -static unsigned r600_end_mmio_counter(struct r600_common_screen *rscreen, +static unsigned r600_end_mmio_counter(struct si_screen *sscreen, uint64_t begin, unsigned busy_index) { - uint64_t end = r600_read_mmio_counter(rscreen, busy_index); + uint64_t end = r600_read_mmio_counter(sscreen, busy_index); unsigned busy = (end & 0xffffffff) - (begin & 0xffffffff); unsigned idle = (end >> 32) - (begin >> 32); @@ -207,7 +207,7 @@ static unsigned r600_end_mmio_counter(struct r600_common_screen *rscreen, union r600_mmio_counters counters; memset(&counters, 0, sizeof(counters)); - r600_update_mmio_counters(rscreen, &counters); + r600_update_mmio_counters(sscreen, &counters); return counters.array[busy_index] ? 100 : 0; } } @@ -215,66 +215,66 @@ static unsigned r600_end_mmio_counter(struct r600_common_screen *rscreen, #define BUSY_INDEX(rscreen, field) (&rscreen->mmio_counters.named.field.busy - \ rscreen->mmio_counters.array) -static unsigned busy_index_from_type(struct r600_common_screen *rscreen, +static unsigned busy_index_from_type(struct si_screen *sscreen, unsigned type) { switch (type) { case R600_QUERY_GPU_LOAD: - return BUSY_INDEX(rscreen, gpu); + return BUSY_INDEX(sscreen, gpu); case R600_QUERY_GPU_SHADERS_BUSY: - return BUSY_INDEX(rscreen, spi); + return BUSY_INDEX(sscreen, spi); case R600_QUERY_GPU_TA_BUSY: - return BUSY_INDEX(rscreen, ta); + return BUSY_INDEX(sscreen, ta); case R600_QUERY_GPU_GDS_BUSY: - return BUSY_INDEX(rscreen, gds); + return BUSY_INDEX(sscreen, gds); case R600_QUERY_GPU_VGT_BUSY: - return BUSY_INDEX(rscreen, vgt); + return BUSY_INDEX(sscreen, vgt); case R600_QUERY_GPU_IA_BUSY: - return BUSY_INDEX(rscreen, ia); + return BUSY_INDEX(sscreen, ia); case R600_QUERY_GPU_SX_BUSY: - return BUSY_INDEX(rscreen, sx); + return BUSY_INDEX(sscreen, sx); case R600_QUERY_GPU_WD_BUSY: - return BUSY_INDEX(rscreen, wd); + return BUSY_INDEX(sscreen, wd); case R600_QUERY_GPU_BCI_BUSY: - return BUSY_INDEX(rscreen, bci); + return BUSY_INDEX(sscreen, bci); case R600_QUERY_GPU_SC_BUSY: - return BUSY_INDEX(rscreen, sc); + return BUSY_INDEX(sscreen, sc); case R600_QUERY_GPU_PA_BUSY: - return BUSY_INDEX(rscreen, pa); + return BUSY_INDEX(sscreen, pa); case R600_QUERY_GPU_DB_BUSY: - return BUSY_INDEX(rscreen, db); + return BUSY_INDEX(sscreen, db); case R600_QUERY_GPU_CP_BUSY: - return BUSY_INDEX(rscreen, cp); + return BUSY_INDEX(sscreen, cp); case R600_QUERY_GPU_CB_BUSY: - return BUSY_INDEX(rscreen, cb); + return BUSY_INDEX(sscreen, cb); case R600_QUERY_GPU_SDMA_BUSY: - return BUSY_INDEX(rscreen, sdma); + return BUSY_INDEX(sscreen, sdma); case R600_QUERY_GPU_PFP_BUSY: - return BUSY_INDEX(rscreen, pfp); + return BUSY_INDEX(sscreen, pfp); case R600_QUERY_GPU_MEQ_BUSY: - return BUSY_INDEX(rscreen, meq); + return BUSY_INDEX(sscreen, meq); case R600_QUERY_GPU_ME_BUSY: - return BUSY_INDEX(rscreen, me); + return BUSY_INDEX(sscreen, me); case R600_QUERY_GPU_SURF_SYNC_BUSY: - return BUSY_INDEX(rscreen, surf_sync); + return BUSY_INDEX(sscreen, surf_sync); case R600_QUERY_GPU_CP_DMA_BUSY: - return BUSY_INDEX(rscreen, cp_dma); + return BUSY_INDEX(sscreen, cp_dma); case R600_QUERY_GPU_SCRATCH_RAM_BUSY: - return BUSY_INDEX(rscreen, scratch_ram); + return BUSY_INDEX(sscreen, scratch_ram); default: unreachable("invalid query type"); } } -uint64_t si_begin_counter(struct r600_common_screen *rscreen, unsigned type) +uint64_t si_begin_counter(struct si_screen *sscreen, unsigned type) { - unsigned busy_index = busy_index_from_type(rscreen, type); - return r600_read_mmio_counter(rscreen, busy_index); + unsigned busy_index = busy_index_from_type(sscreen, type); + return r600_read_mmio_counter(sscreen, busy_index); } -unsigned si_end_counter(struct r600_common_screen *rscreen, unsigned type, +unsigned si_end_counter(struct si_screen *sscreen, unsigned type, uint64_t begin) { - unsigned busy_index = busy_index_from_type(rscreen, type); - return r600_end_mmio_counter(rscreen, begin, busy_index); + unsigned busy_index = busy_index_from_type(sscreen, type); + return r600_end_mmio_counter(sscreen, begin, busy_index); } diff --git a/src/gallium/drivers/radeon/r600_perfcounter.c b/src/gallium/drivers/radeon/r600_perfcounter.c index d88accc614a..711baef9817 100644 --- a/src/gallium/drivers/radeon/r600_perfcounter.c +++ b/src/gallium/drivers/radeon/r600_perfcounter.c @@ -23,7 +23,7 @@ #include "util/u_memory.h" #include "r600_query.h" -#include "r600_pipe_common.h" +#include "radeonsi/si_pipe.h" #include "amd/common/sid.h" /* Max counters per HW block */ @@ -95,7 +95,7 @@ struct r600_query_pc { struct r600_pc_group *groups; }; -static void r600_pc_query_destroy(struct r600_common_screen *rscreen, +static void r600_pc_query_destroy(struct si_screen *sscreen, struct r600_query *rquery) { struct r600_query_pc *query = (struct r600_query_pc *)rquery; @@ -108,10 +108,10 @@ static void r600_pc_query_destroy(struct r600_common_screen *rscreen, FREE(query->counters); - si_query_hw_destroy(rscreen, rquery); + si_query_hw_destroy(sscreen, rquery); } -static bool r600_pc_query_prepare_buffer(struct r600_common_screen *screen, +static bool r600_pc_query_prepare_buffer(struct si_screen *screen, struct r600_query_hw *hwquery, struct r600_resource *buffer) { @@ -192,7 +192,7 @@ static void r600_pc_query_clear_result(struct r600_query_hw *hwquery, memset(result, 0, sizeof(result->batch[0]) * query->num_counters); } -static void r600_pc_query_add_result(struct r600_common_screen *rscreen, +static void r600_pc_query_add_result(struct si_screen *sscreen, struct r600_query_hw *hwquery, void *buffer, union pipe_query_result *result) @@ -226,7 +226,7 @@ static struct r600_query_hw_ops batch_query_hw_ops = { .add_result = r600_pc_query_add_result, }; -static struct r600_pc_group *get_group_state(struct r600_common_screen *screen, +static struct r600_pc_group *get_group_state(struct si_screen *screen, struct r600_query_pc *query, struct r600_perfcounter_block *block, unsigned sub_gid) @@ -297,8 +297,8 @@ struct pipe_query *si_create_batch_query(struct pipe_context *ctx, unsigned num_queries, unsigned *query_types) { - struct r600_common_screen *screen = - (struct r600_common_screen *)ctx->screen; + struct si_screen *screen = + (struct si_screen *)ctx->screen; struct r600_perfcounters *pc = screen->perfcounters; struct r600_perfcounter_block *block; struct r600_pc_group *group; @@ -423,7 +423,7 @@ error: return NULL; } -static bool r600_init_block_names(struct r600_common_screen *screen, +static bool r600_init_block_names(struct si_screen *screen, struct r600_perfcounter_block *block) { unsigned i, j, k; @@ -507,7 +507,7 @@ static bool r600_init_block_names(struct r600_common_screen *screen, return true; } -int si_get_perfcounter_info(struct r600_common_screen *screen, +int si_get_perfcounter_info(struct si_screen *screen, unsigned index, struct pipe_driver_query_info *info) { @@ -549,7 +549,7 @@ int si_get_perfcounter_info(struct r600_common_screen *screen, return 1; } -int si_get_perfcounter_group_info(struct r600_common_screen *screen, +int si_get_perfcounter_group_info(struct si_screen *screen, unsigned index, struct pipe_driver_query_group_info *info) { @@ -576,10 +576,10 @@ int si_get_perfcounter_group_info(struct r600_common_screen *screen, return 1; } -void si_perfcounters_destroy(struct r600_common_screen *rscreen) +void si_perfcounters_destroy(struct si_screen *sscreen) { - if (rscreen->perfcounters) - rscreen->perfcounters->cleanup(rscreen); + if (sscreen->perfcounters) + sscreen->perfcounters->cleanup(sscreen); } bool si_perfcounters_init(struct r600_perfcounters *pc, @@ -595,7 +595,7 @@ bool si_perfcounters_init(struct r600_perfcounters *pc, return true; } -void si_perfcounters_add_block(struct r600_common_screen *rscreen, +void si_perfcounters_add_block(struct si_screen *sscreen, struct r600_perfcounters *pc, const char *name, unsigned flags, unsigned counters, unsigned selectors, @@ -624,7 +624,7 @@ void si_perfcounters_add_block(struct r600_common_screen *rscreen, } if (block->flags & R600_PC_BLOCK_SE_GROUPS) - block->num_groups *= rscreen->info.max_se; + block->num_groups *= sscreen->info.max_se; if (block->flags & R600_PC_BLOCK_SHADER) block->num_groups *= pc->num_shader_types; diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index e8ee79256b7..08eb40675a6 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -127,12 +127,12 @@ void si_gfx_write_event_eop(struct r600_common_context *ctx, } } -unsigned si_gfx_write_fence_dwords(struct r600_common_screen *screen) +unsigned si_gfx_write_fence_dwords(struct si_screen *screen) { unsigned dwords = 6; - if (screen->chip_class == CIK || - screen->chip_class == VI) + if (screen->info.chip_class == CIK || + screen->info.chip_class == VI) dwords *= 2; if (!screen->info.has_virtual_memory) @@ -403,20 +403,20 @@ static bool r600_resource_commit(struct pipe_context *pctx, } bool si_common_context_init(struct r600_common_context *rctx, - struct r600_common_screen *rscreen, + struct si_screen *sscreen, unsigned context_flags) { - slab_create_child(&rctx->pool_transfers, &rscreen->pool_transfers); - slab_create_child(&rctx->pool_transfers_unsync, &rscreen->pool_transfers); + slab_create_child(&rctx->pool_transfers, &sscreen->pool_transfers); + slab_create_child(&rctx->pool_transfers_unsync, &sscreen->pool_transfers); - rctx->screen = rscreen; - rctx->ws = rscreen->ws; - rctx->family = rscreen->family; - rctx->chip_class = rscreen->chip_class; + rctx->screen = sscreen; + rctx->ws = sscreen->ws; + rctx->family = sscreen->info.family; + rctx->chip_class = sscreen->info.chip_class; rctx->b.resource_commit = r600_resource_commit; - if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 43) { + if (sscreen->info.drm_major == 2 && sscreen->info.drm_minor >= 43) { rctx->b.get_device_reset_status = r600_get_reset_status; rctx->gpu_reset_counter = rctx->ws->query_value(rctx->ws, @@ -432,14 +432,14 @@ bool si_common_context_init(struct r600_common_context *rctx, rctx->chip_class == VI || rctx->chip_class == GFX9) { rctx->eop_bug_scratch = (struct r600_resource*) - pipe_buffer_create(&rscreen->b, 0, PIPE_USAGE_DEFAULT, - 16 * rscreen->info.num_render_backends); + pipe_buffer_create(&sscreen->b, 0, PIPE_USAGE_DEFAULT, + 16 * sscreen->info.num_render_backends); if (!rctx->eop_bug_scratch) return false; } rctx->allocator_zeroed_memory = - u_suballocator_create(&rctx->b, rscreen->info.gart_page_size, + u_suballocator_create(&rctx->b, sscreen->info.gart_page_size, 0, PIPE_USAGE_DEFAULT, 0, true); if (!rctx->allocator_zeroed_memory) return false; @@ -458,7 +458,7 @@ bool si_common_context_init(struct r600_common_context *rctx, if (!rctx->ctx) return false; - if (rscreen->info.num_sdma_rings && !(rscreen->debug_flags & DBG(NO_ASYNC_DMA))) { + if (sscreen->info.num_sdma_rings && !(sscreen->debug_flags & DBG(NO_ASYNC_DMA))) { rctx->dma.cs = rctx->ws->cs_create(rctx->ctx, RING_DMA, r600_flush_dma_ring, rctx); @@ -511,13 +511,13 @@ void si_common_context_cleanup(struct r600_common_context *rctx) } -void si_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst, +void si_screen_clear_buffer(struct si_screen *sscreen, struct pipe_resource *dst, uint64_t offset, uint64_t size, unsigned value) { - struct r600_common_context *rctx = (struct r600_common_context*)rscreen->aux_context; + struct r600_common_context *rctx = (struct r600_common_context*)sscreen->aux_context; - mtx_lock(&rscreen->aux_context_lock); + mtx_lock(&sscreen->aux_context_lock); rctx->dma_clear_buffer(&rctx->b, dst, offset, size, value); - rscreen->aux_context->flush(rscreen->aux_context, NULL, 0); - mtx_unlock(&rscreen->aux_context_lock); + sscreen->aux_context->flush(sscreen->aux_context, NULL, 0); + mtx_unlock(&sscreen->aux_context_lock); } diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index e9b6d46d02f..498a7418c0a 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -359,78 +359,6 @@ struct r600_memory_object { uint32_t offset; }; -struct r600_common_screen { - struct pipe_screen b; - struct radeon_winsys *ws; - enum radeon_family family; - enum chip_class chip_class; - struct radeon_info info; - uint64_t debug_flags; - bool has_rbplus; /* if RB+ registers exist */ - bool rbplus_allowed; /* if RB+ is allowed */ - bool dcc_msaa_allowed; - - struct disk_cache *disk_shader_cache; - - struct slab_parent_pool pool_transfers; - - /* Texture filter settings. */ - int force_aniso; /* -1 = disabled */ - - /* Auxiliary context. Mainly used to initialize resources. - * It must be locked prior to using and flushed before unlocking. */ - struct pipe_context *aux_context; - mtx_t aux_context_lock; - - /* This must be in the screen, because UE4 uses one context for - * compilation and another one for rendering. - */ - unsigned num_compilations; - /* Along with ST_DEBUG=precompile, this should show if applications - * are loading shaders on demand. This is a monotonic counter. - */ - unsigned num_shaders_created; - unsigned num_shader_cache_hits; - - /* GPU load thread. */ - mtx_t gpu_load_mutex; - thrd_t gpu_load_thread; - union r600_mmio_counters mmio_counters; - volatile unsigned gpu_load_stop_thread; /* bool */ - - char renderer_string[100]; - - /* Performance counters. */ - struct r600_perfcounters *perfcounters; - - /* If pipe_screen wants to recompute and re-emit the framebuffer, - * sampler, and image states of all contexts, it should atomically - * increment this. - * - * Each context will compare this with its own last known value of - * the counter before drawing and re-emit the states accordingly. - */ - unsigned dirty_tex_counter; - - /* Atomically increment this counter when an existing texture's - * metadata is enabled or disabled in a way that requires changing - * contexts' compressed texture binding masks. - */ - unsigned compressed_colortex_counter; - - struct { - /* Context flags to set so that all writes from earlier jobs - * in the CP are seen by L2 clients. - */ - unsigned cp_to_L2; - - /* Context flags to set so that all writes from earlier jobs - * that end in L2 are seen by CP. - */ - unsigned L2_to_cp; - } barrier_flags; -}; - /* This encapsulates a state or an operation which can emitted into the GPU * command stream. */ struct r600_atom { @@ -456,7 +384,7 @@ struct radeon_saved_cs { struct r600_common_context { struct pipe_context b; /* base class */ - struct r600_common_screen *screen; + struct si_screen *screen; struct radeon_winsys *ws; struct radeon_winsys_ctx *ctx; enum radeon_family family; @@ -601,10 +529,10 @@ bool si_rings_is_buffer_referenced(struct r600_common_context *ctx, void *si_buffer_map_sync_with_rings(struct r600_common_context *ctx, struct r600_resource *resource, unsigned usage); -void si_init_resource_fields(struct r600_common_screen *rscreen, +void si_init_resource_fields(struct si_screen *sscreen, struct r600_resource *res, uint64_t size, unsigned alignment); -bool si_alloc_resource(struct r600_common_screen *rscreen, +bool si_alloc_resource(struct si_screen *sscreen, struct r600_resource *res); struct pipe_resource *si_aligned_buffer_create(struct pipe_screen *screen, unsigned flags, @@ -623,14 +551,14 @@ void si_gfx_write_event_eop(struct r600_common_context *ctx, unsigned data_sel, struct r600_resource *buf, uint64_t va, uint32_t new_fence, unsigned query_type); -unsigned si_gfx_write_fence_dwords(struct r600_common_screen *screen); +unsigned si_gfx_write_fence_dwords(struct si_screen *screen); void si_gfx_wait_fence(struct r600_common_context *ctx, uint64_t va, uint32_t ref, uint32_t mask); bool si_common_context_init(struct r600_common_context *rctx, - struct r600_common_screen *rscreen, + struct si_screen *sscreen, unsigned context_flags); void si_common_context_cleanup(struct r600_common_context *rctx); -void si_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst, +void si_screen_clear_buffer(struct si_screen *sscreen, struct pipe_resource *dst, uint64_t offset, uint64_t size, unsigned value); void si_need_dma_space(struct r600_common_context *ctx, unsigned num_dw, struct r600_resource *dst, struct r600_resource *src); @@ -640,16 +568,16 @@ void si_clear_saved_cs(struct radeon_saved_cs *saved); bool si_check_device_reset(struct r600_common_context *rctx); /* r600_gpu_load.c */ -void si_gpu_load_kill_thread(struct r600_common_screen *rscreen); -uint64_t si_begin_counter(struct r600_common_screen *rscreen, unsigned type); -unsigned si_end_counter(struct r600_common_screen *rscreen, unsigned type, +void si_gpu_load_kill_thread(struct si_screen *sscreen); +uint64_t si_begin_counter(struct si_screen *sscreen, unsigned type); +unsigned si_end_counter(struct si_screen *sscreen, unsigned type, uint64_t begin); /* r600_perfcounters.c */ -void si_perfcounters_destroy(struct r600_common_screen *rscreen); +void si_perfcounters_destroy(struct si_screen *sscreen); /* r600_query.c */ -void si_init_screen_query_functions(struct r600_common_screen *rscreen); +void si_init_screen_query_functions(struct si_screen *sscreen); void si_init_query_functions(struct r600_common_context *rctx); void si_suspend_queries(struct r600_common_context *ctx); void si_resume_queries(struct r600_common_context *ctx); @@ -662,17 +590,17 @@ bool si_prepare_for_dma_blit(struct r600_common_context *rctx, struct r600_texture *rsrc, unsigned src_level, const struct pipe_box *src_box); -void si_texture_get_fmask_info(struct r600_common_screen *rscreen, +void si_texture_get_fmask_info(struct si_screen *sscreen, struct r600_texture *rtex, unsigned nr_samples, struct r600_fmask_info *out); -void si_texture_get_cmask_info(struct r600_common_screen *rscreen, +void si_texture_get_cmask_info(struct si_screen *sscreen, struct r600_texture *rtex, struct r600_cmask_info *out); bool si_init_flushed_depth_texture(struct pipe_context *ctx, struct pipe_resource *texture, struct r600_texture **staging); -void si_print_texture_info(struct r600_common_screen *rscreen, +void si_print_texture_info(struct si_screen *sscreen, struct r600_texture *rtex, struct u_log_context *log); struct pipe_resource *si_texture_create(struct pipe_screen *screen, const struct pipe_resource *templ); @@ -701,7 +629,7 @@ void vi_separate_dcc_process_and_reset_stats(struct pipe_context *ctx, struct r600_texture *tex); bool si_texture_disable_dcc(struct r600_common_context *rctx, struct r600_texture *rtex); -void si_init_screen_texture_functions(struct r600_common_screen *rscreen); +void si_init_screen_texture_functions(struct si_screen *sscreen); void si_init_context_texture_functions(struct r600_common_context *rctx); diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c index 8e724f3c644..e736875d7cf 100644 --- a/src/gallium/drivers/radeon/r600_query.c +++ b/src/gallium/drivers/radeon/r600_query.c @@ -55,12 +55,12 @@ struct r600_query_sw { struct pipe_fence_handle *fence; }; -static void r600_query_sw_destroy(struct r600_common_screen *rscreen, +static void r600_query_sw_destroy(struct si_screen *sscreen, struct r600_query *rquery) { struct r600_query_sw *query = (struct r600_query_sw *)rquery; - rscreen->b.fence_reference(&rscreen->b, &query->fence, NULL); + sscreen->b.fence_reference(&sscreen->b, &query->fence, NULL); FREE(query); } @@ -499,7 +499,7 @@ static struct pipe_query *r600_query_sw_create(unsigned query_type) return (struct pipe_query *)query; } -void si_query_hw_destroy(struct r600_common_screen *rscreen, +void si_query_hw_destroy(struct si_screen *sscreen, struct r600_query *rquery) { struct r600_query_hw *query = (struct r600_query_hw *)rquery; @@ -518,23 +518,23 @@ void si_query_hw_destroy(struct r600_common_screen *rscreen, FREE(rquery); } -static struct r600_resource *r600_new_query_buffer(struct r600_common_screen *rscreen, +static struct r600_resource *r600_new_query_buffer(struct si_screen *sscreen, struct r600_query_hw *query) { unsigned buf_size = MAX2(query->result_size, - rscreen->info.min_alloc_size); + sscreen->info.min_alloc_size); /* Queries are normally read by the CPU after * being written by the gpu, hence staging is probably a good * usage pattern. */ struct r600_resource *buf = (struct r600_resource*) - pipe_buffer_create(&rscreen->b, 0, + pipe_buffer_create(&sscreen->b, 0, PIPE_USAGE_STAGING, buf_size); if (!buf) return NULL; - if (!query->ops->prepare_buffer(rscreen, query, buf)) { + if (!query->ops->prepare_buffer(sscreen, query, buf)) { r600_resource_reference(&buf, NULL); return NULL; } @@ -542,12 +542,12 @@ static struct r600_resource *r600_new_query_buffer(struct r600_common_screen *rs return buf; } -static bool r600_query_hw_prepare_buffer(struct r600_common_screen *rscreen, +static bool r600_query_hw_prepare_buffer(struct si_screen *sscreen, struct r600_query_hw *query, struct r600_resource *buffer) { /* Callers ensure that the buffer is currently unused by the GPU. */ - uint32_t *results = rscreen->ws->buffer_map(buffer->buf, NULL, + uint32_t *results = sscreen->ws->buffer_map(buffer->buf, NULL, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED); if (!results) @@ -558,8 +558,8 @@ static bool r600_query_hw_prepare_buffer(struct r600_common_screen *rscreen, if (query->b.type == PIPE_QUERY_OCCLUSION_COUNTER || query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE || query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) { - unsigned max_rbs = rscreen->info.num_render_backends; - unsigned enabled_rb_mask = rscreen->info.enabled_rb_mask; + unsigned max_rbs = sscreen->info.num_render_backends; + unsigned enabled_rb_mask = sscreen->info.enabled_rb_mask; unsigned num_results; unsigned i, j; @@ -603,7 +603,7 @@ static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx, struct r600_query_hw *query, struct r600_resource *buffer, uint64_t va); -static void r600_query_hw_add_result(struct r600_common_screen *rscreen, +static void r600_query_hw_add_result(struct si_screen *sscreen, struct r600_query_hw *, void *buffer, union pipe_query_result *result); static void r600_query_hw_clear_result(struct r600_query_hw *, @@ -617,17 +617,17 @@ static struct r600_query_hw_ops query_hw_default_hw_ops = { .add_result = r600_query_hw_add_result, }; -bool si_query_hw_init(struct r600_common_screen *rscreen, +bool si_query_hw_init(struct si_screen *sscreen, struct r600_query_hw *query) { - query->buffer.buf = r600_new_query_buffer(rscreen, query); + query->buffer.buf = r600_new_query_buffer(sscreen, query); if (!query->buffer.buf) return false; return true; } -static struct pipe_query *r600_query_hw_create(struct r600_common_screen *rscreen, +static struct pipe_query *r600_query_hw_create(struct si_screen *sscreen, unsigned query_type, unsigned index) { @@ -643,19 +643,19 @@ static struct pipe_query *r600_query_hw_create(struct r600_common_screen *rscree case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: - query->result_size = 16 * rscreen->info.num_render_backends; + query->result_size = 16 * sscreen->info.num_render_backends; query->result_size += 16; /* for the fence + alignment */ query->num_cs_dw_begin = 6; - query->num_cs_dw_end = 6 + si_gfx_write_fence_dwords(rscreen); + query->num_cs_dw_end = 6 + si_gfx_write_fence_dwords(sscreen); break; case PIPE_QUERY_TIME_ELAPSED: query->result_size = 24; query->num_cs_dw_begin = 8; - query->num_cs_dw_end = 8 + si_gfx_write_fence_dwords(rscreen); + query->num_cs_dw_end = 8 + si_gfx_write_fence_dwords(sscreen); break; case PIPE_QUERY_TIMESTAMP: query->result_size = 16; - query->num_cs_dw_end = 8 + si_gfx_write_fence_dwords(rscreen); + query->num_cs_dw_end = 8 + si_gfx_write_fence_dwords(sscreen); query->flags = R600_QUERY_HW_FLAG_NO_START; break; case PIPE_QUERY_PRIMITIVES_EMITTED: @@ -679,7 +679,7 @@ static struct pipe_query *r600_query_hw_create(struct r600_common_screen *rscree query->result_size = 11 * 16; query->result_size += 8; /* for the fence + alignment */ query->num_cs_dw_begin = 6; - query->num_cs_dw_end = 6 + si_gfx_write_fence_dwords(rscreen); + query->num_cs_dw_end = 6 + si_gfx_write_fence_dwords(sscreen); break; default: assert(0); @@ -687,7 +687,7 @@ static struct pipe_query *r600_query_hw_create(struct r600_common_screen *rscree return NULL; } - if (!si_query_hw_init(rscreen, query)) { + if (!si_query_hw_init(sscreen, query)) { FREE(query); return NULL; } @@ -1025,15 +1025,15 @@ static void r600_emit_query_predication(struct r600_common_context *ctx, static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index) { - struct r600_common_screen *rscreen = - (struct r600_common_screen *)ctx->screen; + struct si_screen *sscreen = + (struct si_screen *)ctx->screen; if (query_type == PIPE_QUERY_TIMESTAMP_DISJOINT || query_type == PIPE_QUERY_GPU_FINISHED || query_type >= PIPE_QUERY_DRIVER_SPECIFIC) return r600_query_sw_create(query_type); - return r600_query_hw_create(rscreen, query_type, index); + return r600_query_hw_create(sscreen, query_type, index); } static void r600_destroy_query(struct pipe_context *ctx, struct pipe_query *query) @@ -1219,12 +1219,12 @@ static unsigned r600_query_read_result(void *map, unsigned start_index, unsigned return 0; } -static void r600_query_hw_add_result(struct r600_common_screen *rscreen, +static void r600_query_hw_add_result(struct si_screen *sscreen, struct r600_query_hw *query, void *buffer, union pipe_query_result *result) { - unsigned max_rbs = rscreen->info.num_render_backends; + unsigned max_rbs = sscreen->info.num_render_backends; switch (query->b.type) { case PIPE_QUERY_OCCLUSION_COUNTER: { @@ -1362,7 +1362,7 @@ bool si_query_hw_get_result(struct r600_common_context *rctx, struct r600_query *rquery, bool wait, union pipe_query_result *result) { - struct r600_common_screen *rscreen = rctx->screen; + struct si_screen *sscreen = rctx->screen; struct r600_query_hw *query = (struct r600_query_hw *)rquery; struct r600_query_buffer *qbuf; @@ -1383,7 +1383,7 @@ bool si_query_hw_get_result(struct r600_common_context *rctx, return false; while (results_base != qbuf->results_end) { - query->ops->add_result(rscreen, query, map + results_base, + query->ops->add_result(sscreen, query, map + results_base, result); results_base += query->result_size; } @@ -1392,7 +1392,7 @@ bool si_query_hw_get_result(struct r600_common_context *rctx, /* Convert the time to expected units. */ if (rquery->type == PIPE_QUERY_TIME_ELAPSED || rquery->type == PIPE_QUERY_TIMESTAMP) { - result->u64 = (1000000 * result->u64) / rscreen->info.clock_crystal_freq; + result->u64 = (1000000 * result->u64) / sscreen->info.clock_crystal_freq; } return true; } @@ -1986,12 +1986,12 @@ static struct pipe_driver_query_info r600_driver_query_list[] = { #undef XG #undef XFULL -static unsigned r600_get_num_queries(struct r600_common_screen *rscreen) +static unsigned r600_get_num_queries(struct si_screen *sscreen) { - if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42) + if (sscreen->info.drm_major == 2 && sscreen->info.drm_minor >= 42) return ARRAY_SIZE(r600_driver_query_list); - else if (rscreen->info.drm_major == 3) { - if (rscreen->chip_class >= VI) + else if (sscreen->info.drm_major == 3) { + if (sscreen->info.chip_class >= VI) return ARRAY_SIZE(r600_driver_query_list); else return ARRAY_SIZE(r600_driver_query_list) - 7; @@ -2004,18 +2004,18 @@ static int r600_get_driver_query_info(struct pipe_screen *screen, unsigned index, struct pipe_driver_query_info *info) { - struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; - unsigned num_queries = r600_get_num_queries(rscreen); + struct si_screen *sscreen = (struct si_screen*)screen; + unsigned num_queries = r600_get_num_queries(sscreen); if (!info) { unsigned num_perfcounters = - si_get_perfcounter_info(rscreen, 0, NULL); + si_get_perfcounter_info(sscreen, 0, NULL); return num_queries + num_perfcounters; } if (index >= num_queries) - return si_get_perfcounter_info(rscreen, index - num_queries, info); + return si_get_perfcounter_info(sscreen, index - num_queries, info); *info = r600_driver_query_list[index]; @@ -2023,23 +2023,23 @@ static int r600_get_driver_query_info(struct pipe_screen *screen, case R600_QUERY_REQUESTED_VRAM: case R600_QUERY_VRAM_USAGE: case R600_QUERY_MAPPED_VRAM: - info->max_value.u64 = rscreen->info.vram_size; + info->max_value.u64 = sscreen->info.vram_size; break; case R600_QUERY_REQUESTED_GTT: case R600_QUERY_GTT_USAGE: case R600_QUERY_MAPPED_GTT: - info->max_value.u64 = rscreen->info.gart_size; + info->max_value.u64 = sscreen->info.gart_size; break; case R600_QUERY_GPU_TEMPERATURE: info->max_value.u64 = 125; break; case R600_QUERY_VRAM_VIS_USAGE: - info->max_value.u64 = rscreen->info.vram_vis_size; + info->max_value.u64 = sscreen->info.vram_vis_size; break; } - if (info->group_id != ~(unsigned)0 && rscreen->perfcounters) - info->group_id += rscreen->perfcounters->num_groups; + if (info->group_id != ~(unsigned)0 && sscreen->perfcounters) + info->group_id += sscreen->perfcounters->num_groups; return 1; } @@ -2052,17 +2052,17 @@ static int r600_get_driver_query_group_info(struct pipe_screen *screen, unsigned index, struct pipe_driver_query_group_info *info) { - struct r600_common_screen *rscreen = (struct r600_common_screen *)screen; + struct si_screen *sscreen = (struct si_screen *)screen; unsigned num_pc_groups = 0; - if (rscreen->perfcounters) - num_pc_groups = rscreen->perfcounters->num_groups; + if (sscreen->perfcounters) + num_pc_groups = sscreen->perfcounters->num_groups; if (!info) return num_pc_groups + R600_NUM_SW_QUERY_GROUPS; if (index < num_pc_groups) - return si_get_perfcounter_group_info(rscreen, index, info); + return si_get_perfcounter_group_info(sscreen, index, info); index -= num_pc_groups; if (index >= R600_NUM_SW_QUERY_GROUPS) @@ -2085,14 +2085,14 @@ void si_init_query_functions(struct r600_common_context *rctx) rctx->b.get_query_result_resource = r600_get_query_result_resource; rctx->render_cond_atom.emit = r600_emit_query_predication; - if (((struct r600_common_screen*)rctx->b.screen)->info.num_render_backends > 0) + if (((struct si_screen*)rctx->b.screen)->info.num_render_backends > 0) rctx->b.render_condition = r600_render_condition; LIST_INITHEAD(&rctx->active_queries); } -void si_init_screen_query_functions(struct r600_common_screen *rscreen) +void si_init_screen_query_functions(struct si_screen *sscreen) { - rscreen->b.get_driver_query_info = r600_get_driver_query_info; - rscreen->b.get_driver_query_group_info = r600_get_driver_query_group_info; + sscreen->b.get_driver_query_info = r600_get_driver_query_info; + sscreen->b.get_driver_query_group_info = r600_get_driver_query_group_info; } diff --git a/src/gallium/drivers/radeon/r600_query.h b/src/gallium/drivers/radeon/r600_query.h index bb81e50d8ca..db86dc14e5a 100644 --- a/src/gallium/drivers/radeon/r600_query.h +++ b/src/gallium/drivers/radeon/r600_query.h @@ -31,7 +31,7 @@ struct pipe_query; struct pipe_resource; struct r600_common_context; -struct r600_common_screen; +struct si_screen; struct r600_query; struct r600_query_hw; struct r600_resource; @@ -118,7 +118,7 @@ enum { }; struct r600_query_ops { - void (*destroy)(struct r600_common_screen *, struct r600_query *); + void (*destroy)(struct si_screen *, struct r600_query *); bool (*begin)(struct r600_common_context *, struct r600_query *); bool (*end)(struct r600_common_context *, struct r600_query *); bool (*get_result)(struct r600_common_context *, @@ -148,7 +148,7 @@ enum { }; struct r600_query_hw_ops { - bool (*prepare_buffer)(struct r600_common_screen *, + bool (*prepare_buffer)(struct si_screen *, struct r600_query_hw *, struct r600_resource *); void (*emit_start)(struct r600_common_context *, @@ -158,7 +158,7 @@ struct r600_query_hw_ops { struct r600_query_hw *, struct r600_resource *buffer, uint64_t va); void (*clear_result)(struct r600_query_hw *, union pipe_query_result *); - void (*add_result)(struct r600_common_screen *screen, + void (*add_result)(struct si_screen *screen, struct r600_query_hw *, void *buffer, union pipe_query_result *result); }; @@ -197,9 +197,9 @@ struct r600_query_hw { unsigned workaround_offset; }; -bool si_query_hw_init(struct r600_common_screen *rscreen, +bool si_query_hw_init(struct si_screen *sscreen, struct r600_query_hw *query); -void si_query_hw_destroy(struct r600_common_screen *rscreen, +void si_query_hw_destroy(struct si_screen *sscreen, struct r600_query *rquery); bool si_query_hw_begin(struct r600_common_context *rctx, struct r600_query *rquery); @@ -288,7 +288,7 @@ struct r600_perfcounters { unsigned count, unsigned *selectors, struct r600_resource *buffer, uint64_t va); - void (*cleanup)(struct r600_common_screen *); + void (*cleanup)(struct si_screen *); bool separate_se; bool separate_instance; @@ -298,15 +298,15 @@ struct pipe_query *si_create_batch_query(struct pipe_context *ctx, unsigned num_queries, unsigned *query_types); -int si_get_perfcounter_info(struct r600_common_screen *, +int si_get_perfcounter_info(struct si_screen *, unsigned index, struct pipe_driver_query_info *info); -int si_get_perfcounter_group_info(struct r600_common_screen *, +int si_get_perfcounter_group_info(struct si_screen *, unsigned index, struct pipe_driver_query_group_info *info); bool si_perfcounters_init(struct r600_perfcounters *, unsigned num_blocks); -void si_perfcounters_add_block(struct r600_common_screen *, +void si_perfcounters_add_block(struct si_screen *, struct r600_perfcounters *, const char *name, unsigned flags, unsigned counters, unsigned selectors, diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c index 1051a3bd3a8..bc72e73823b 100644 --- a/src/gallium/drivers/radeon/r600_texture.c +++ b/src/gallium/drivers/radeon/r600_texture.c @@ -36,10 +36,10 @@ #include "state_tracker/drm_driver.h" #include "amd/common/sid.h" -static void r600_texture_discard_cmask(struct r600_common_screen *rscreen, +static void r600_texture_discard_cmask(struct si_screen *sscreen, struct r600_texture *rtex); static enum radeon_surf_mode -r600_choose_tiling(struct r600_common_screen *rscreen, +r600_choose_tiling(struct si_screen *sscreen, const struct pipe_resource *templ); @@ -178,13 +178,13 @@ static void r600_copy_from_staging_texture(struct pipe_context *ctx, struct r600 src, 0, &sbox); } -static unsigned r600_texture_get_offset(struct r600_common_screen *rscreen, +static unsigned r600_texture_get_offset(struct si_screen *sscreen, struct r600_texture *rtex, unsigned level, const struct pipe_box *box, unsigned *stride, unsigned *layer_stride) { - if (rscreen->chip_class >= GFX9) { + if (sscreen->info.chip_class >= GFX9) { *stride = rtex->surface.u.gfx9.surf_pitch * rtex->surface.bpe; *layer_stride = rtex->surface.u.gfx9.surf_slice_size; @@ -217,7 +217,7 @@ static unsigned r600_texture_get_offset(struct r600_common_screen *rscreen, } } -static int r600_init_surface(struct r600_common_screen *rscreen, +static int r600_init_surface(struct si_screen *sscreen, struct radeon_surf *surface, const struct pipe_resource *ptex, enum radeon_surf_mode array_mode, @@ -249,14 +249,14 @@ static int r600_init_surface(struct r600_common_screen *rscreen, flags |= RADEON_SURF_ZBUFFER; if (tc_compatible_htile && - (rscreen->chip_class >= GFX9 || + (sscreen->info.chip_class >= GFX9 || array_mode == RADEON_SURF_MODE_2D)) { /* TC-compatible HTILE only supports Z32_FLOAT. * GFX9 also supports Z16_UNORM. * On VI, promote Z16 to Z32. DB->CB copies will convert * the format for transfers. */ - if (rscreen->chip_class == VI) + if (sscreen->info.chip_class == VI) bpe = 4; flags |= RADEON_SURF_TC_COMPATIBLE_HTILE; @@ -266,12 +266,12 @@ static int r600_init_surface(struct r600_common_screen *rscreen, flags |= RADEON_SURF_SBUFFER; } - if (rscreen->chip_class >= VI && + if (sscreen->info.chip_class >= VI && (ptex->flags & R600_RESOURCE_FLAG_DISABLE_DCC || ptex->format == PIPE_FORMAT_R9G9B9E5_FLOAT || /* DCC MSAA array textures are disallowed due to incomplete clear impl. */ (ptex->nr_samples >= 2 && - (!rscreen->dcc_msaa_allowed || ptex->array_size > 1)))) + (!sscreen->dcc_msaa_allowed || ptex->array_size > 1)))) flags |= RADEON_SURF_DISABLE_DCC; if (ptex->bind & PIPE_BIND_SCANOUT || is_scanout) { @@ -292,13 +292,13 @@ static int r600_init_surface(struct r600_common_screen *rscreen, if (!(ptex->flags & R600_RESOURCE_FLAG_FORCE_TILING)) flags |= RADEON_SURF_OPTIMIZE_FOR_SPACE; - r = rscreen->ws->surface_init(rscreen->ws, ptex, flags, bpe, + r = sscreen->ws->surface_init(sscreen->ws, ptex, flags, bpe, array_mode, surface); if (r) { return r; } - if (rscreen->chip_class >= GFX9) { + if (sscreen->info.chip_class >= GFX9) { assert(!pitch_in_bytes_override || pitch_in_bytes_override == surface->u.gfx9.surf_pitch * bpe); surface->u.gfx9.surf_offset = offset; @@ -311,7 +311,7 @@ static int r600_init_surface(struct r600_common_screen *rscreen, return 0; } -static void r600_texture_init_metadata(struct r600_common_screen *rscreen, +static void r600_texture_init_metadata(struct si_screen *sscreen, struct r600_texture *rtex, struct radeon_bo_metadata *metadata) { @@ -319,7 +319,7 @@ static void r600_texture_init_metadata(struct r600_common_screen *rscreen, memset(metadata, 0, sizeof(*metadata)); - if (rscreen->chip_class >= GFX9) { + if (sscreen->info.chip_class >= GFX9) { metadata->u.gfx9.swizzle_mode = surface->u.gfx9.surf.swizzle_mode; } else { metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ? @@ -337,13 +337,13 @@ static void r600_texture_init_metadata(struct r600_common_screen *rscreen, } } -static void r600_surface_import_metadata(struct r600_common_screen *rscreen, +static void r600_surface_import_metadata(struct si_screen *sscreen, struct radeon_surf *surf, struct radeon_bo_metadata *metadata, enum radeon_surf_mode *array_mode, bool *is_scanout) { - if (rscreen->chip_class >= GFX9) { + if (sscreen->info.chip_class >= GFX9) { if (metadata->u.gfx9.swizzle_mode > 0) *array_mode = RADEON_SURF_MODE_2D; else @@ -375,20 +375,20 @@ static void r600_surface_import_metadata(struct r600_common_screen *rscreen, static void r600_eliminate_fast_color_clear(struct r600_common_context *rctx, struct r600_texture *rtex) { - struct r600_common_screen *rscreen = rctx->screen; + struct si_screen *sscreen = rctx->screen; struct pipe_context *ctx = &rctx->b; - if (ctx == rscreen->aux_context) - mtx_lock(&rscreen->aux_context_lock); + if (ctx == sscreen->aux_context) + mtx_lock(&sscreen->aux_context_lock); ctx->flush_resource(ctx, &rtex->resource.b.b); ctx->flush(ctx, NULL, 0); - if (ctx == rscreen->aux_context) - mtx_unlock(&rscreen->aux_context_lock); + if (ctx == sscreen->aux_context) + mtx_unlock(&sscreen->aux_context_lock); } -static void r600_texture_discard_cmask(struct r600_common_screen *rscreen, +static void r600_texture_discard_cmask(struct si_screen *sscreen, struct r600_texture *rtex) { if (!rtex->cmask.size) @@ -407,8 +407,8 @@ static void r600_texture_discard_cmask(struct r600_common_screen *rscreen, r600_resource_reference(&rtex->cmask_buffer, NULL); /* Notify all contexts about the change. */ - p_atomic_inc(&rscreen->dirty_tex_counter); - p_atomic_inc(&rscreen->compressed_colortex_counter); + p_atomic_inc(&sscreen->dirty_tex_counter); + p_atomic_inc(&sscreen->compressed_colortex_counter); } static bool r600_can_disable_dcc(struct r600_texture *rtex) @@ -419,7 +419,7 @@ static bool r600_can_disable_dcc(struct r600_texture *rtex) !(rtex->resource.external_usage & PIPE_HANDLE_USAGE_WRITE)); } -static bool r600_texture_discard_dcc(struct r600_common_screen *rscreen, +static bool r600_texture_discard_dcc(struct si_screen *sscreen, struct r600_texture *rtex) { if (!r600_can_disable_dcc(rtex)) @@ -431,7 +431,7 @@ static bool r600_texture_discard_dcc(struct r600_common_screen *rscreen, rtex->dcc_offset = 0; /* Notify all contexts about the change. */ - p_atomic_inc(&rscreen->dirty_tex_counter); + p_atomic_inc(&sscreen->dirty_tex_counter); return true; } @@ -459,22 +459,22 @@ static bool r600_texture_discard_dcc(struct r600_common_screen *rscreen, bool si_texture_disable_dcc(struct r600_common_context *rctx, struct r600_texture *rtex) { - struct r600_common_screen *rscreen = rctx->screen; + struct si_screen *sscreen = rctx->screen; if (!r600_can_disable_dcc(rtex)) return false; - if (&rctx->b == rscreen->aux_context) - mtx_lock(&rscreen->aux_context_lock); + if (&rctx->b == sscreen->aux_context) + mtx_lock(&sscreen->aux_context_lock); /* Decompress DCC. */ rctx->decompress_dcc(&rctx->b, rtex); rctx->b.flush(&rctx->b, NULL, 0); - if (&rctx->b == rscreen->aux_context) - mtx_unlock(&rscreen->aux_context_lock); + if (&rctx->b == sscreen->aux_context) + mtx_unlock(&sscreen->aux_context_lock); - return r600_texture_discard_dcc(rscreen, rtex); + return r600_texture_discard_dcc(sscreen, rtex); } static void r600_reallocate_texture_inplace(struct r600_common_context *rctx, @@ -565,16 +565,15 @@ static void r600_reallocate_texture_inplace(struct r600_common_context *rctx, p_atomic_inc(&rctx->screen->dirty_tex_counter); } -static uint32_t si_get_bo_metadata_word1(struct r600_common_screen *rscreen) +static uint32_t si_get_bo_metadata_word1(struct si_screen *sscreen) { - return (ATI_VENDOR_ID << 16) | rscreen->info.pci_id; + return (ATI_VENDOR_ID << 16) | sscreen->info.pci_id; } -static void si_query_opaque_metadata(struct r600_common_screen *rscreen, +static void si_query_opaque_metadata(struct si_screen *sscreen, struct r600_texture *rtex, struct radeon_bo_metadata *md) { - struct si_screen *sscreen = (struct si_screen*)rscreen; struct pipe_resource *res = &rtex->resource.b.b; static const unsigned char swizzle[] = { PIPE_SWIZZLE_X, @@ -586,7 +585,7 @@ static void si_query_opaque_metadata(struct r600_common_screen *rscreen, bool is_array = util_resource_is_array_texture(res); /* DRM 2.x.x doesn't support this. */ - if (rscreen->info.drm_major != 3) + if (sscreen->info.drm_major != 3) return; assert(rtex->dcc_separate_buffer == NULL); @@ -605,7 +604,7 @@ static void si_query_opaque_metadata(struct r600_common_screen *rscreen, md->metadata[0] = 1; /* metadata image format version 1 */ /* TILE_MODE_INDEX is ambiguous without a PCI ID. */ - md->metadata[1] = si_get_bo_metadata_word1(rscreen); + md->metadata[1] = si_get_bo_metadata_word1(sscreen); si_make_texture_descriptor(sscreen, rtex, true, res->target, res->format, @@ -627,7 +626,7 @@ static void si_query_opaque_metadata(struct r600_common_screen *rscreen, md->size_metadata = 10 * 4; /* Dwords [10:..] contain the mipmap level offsets. */ - if (rscreen->chip_class <= VI) { + if (sscreen->info.chip_class <= VI) { for (i = 0; i <= res->last_level; i++) md->metadata[10+i] = rtex->surface.u.legacy.level[i].offset >> 8; @@ -635,13 +634,13 @@ static void si_query_opaque_metadata(struct r600_common_screen *rscreen, } } -static void si_apply_opaque_metadata(struct r600_common_screen *rscreen, +static void si_apply_opaque_metadata(struct si_screen *sscreen, struct r600_texture *rtex, struct radeon_bo_metadata *md) { uint32_t *desc = &md->metadata[2]; - if (rscreen->chip_class < VI) + if (sscreen->info.chip_class < VI) return; /* Return if DCC is enabled. The texture should be set up with it @@ -649,7 +648,7 @@ static void si_apply_opaque_metadata(struct r600_common_screen *rscreen, */ if (md->size_metadata >= 11 * 4 && md->metadata[0] != 0 && - md->metadata[1] == si_get_bo_metadata_word1(rscreen) && + md->metadata[1] == si_get_bo_metadata_word1(sscreen) && G_008F28_COMPRESSION_EN(desc[6])) { rtex->dcc_offset = (uint64_t)desc[7] << 8; return; @@ -667,7 +666,7 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen, struct winsys_handle *whandle, unsigned usage) { - struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; + struct si_screen *sscreen = (struct si_screen*)screen; struct r600_common_context *rctx; struct r600_resource *res = (struct r600_resource*)resource; struct r600_texture *rtex = (struct r600_texture*)resource; @@ -676,7 +675,7 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen, unsigned stride, offset, slice_size; ctx = threaded_context_unwrap_sync(ctx); - rctx = (struct r600_common_context*)(ctx ? ctx : rscreen->aux_context); + rctx = (struct r600_common_context*)(ctx ? ctx : sscreen->aux_context); if (resource->target != PIPE_BUFFER) { /* This is not supported now, but it might be required for OpenCL @@ -686,7 +685,7 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen, return false; /* Move a suballocated texture into a non-suballocated allocation. */ - if (rscreen->ws->buffer_is_suballocated(res->buf) || + if (sscreen->ws->buffer_is_suballocated(res->buf) || rtex->surface.tile_swizzle || (rtex->resource.flags & RADEON_FLAG_NO_INTERPROCESS_SHARING && whandle->type != DRM_API_HANDLE_TYPE_KMS)) { @@ -718,18 +717,18 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen, * to be called. */ if (rtex->cmask.size) - r600_texture_discard_cmask(rscreen, rtex); + r600_texture_discard_cmask(sscreen, rtex); } /* Set metadata. */ if (!res->b.is_shared || update_metadata) { - r600_texture_init_metadata(rscreen, rtex, &metadata); - si_query_opaque_metadata(rscreen, rtex, &metadata); + r600_texture_init_metadata(sscreen, rtex, &metadata); + si_query_opaque_metadata(sscreen, rtex, &metadata); - rscreen->ws->buffer_set_metadata(res->buf, &metadata); + sscreen->ws->buffer_set_metadata(res->buf, &metadata); } - if (rscreen->chip_class >= GFX9) { + if (sscreen->info.chip_class >= GFX9) { offset = rtex->surface.u.gfx9.surf_offset; stride = rtex->surface.u.gfx9.surf_pitch * rtex->surface.bpe; @@ -742,7 +741,7 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen, } } else { /* Move a suballocated buffer into a non-suballocated allocation. */ - if (rscreen->ws->buffer_is_suballocated(res->buf)) { + if (sscreen->ws->buffer_is_suballocated(res->buf)) { assert(!res->b.is_shared); /* Allocate a new buffer with PIPE_BIND_SHARED. */ @@ -785,7 +784,7 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen, res->external_usage = usage; } - return rscreen->ws->buffer_get_handle(res->buf, stride, offset, + return sscreen->ws->buffer_get_handle(res->buf, stride, offset, slice_size, whandle); } @@ -809,7 +808,7 @@ static void r600_texture_destroy(struct pipe_screen *screen, static const struct u_resource_vtbl r600_texture_vtbl; /* The number of samples can be specified independently of the texture. */ -void si_texture_get_fmask_info(struct r600_common_screen *rscreen, +void si_texture_get_fmask_info(struct si_screen *sscreen, struct r600_texture *rtex, unsigned nr_samples, struct r600_fmask_info *out) @@ -821,7 +820,7 @@ void si_texture_get_fmask_info(struct r600_common_screen *rscreen, memset(out, 0, sizeof(*out)); - if (rscreen->chip_class >= GFX9) { + if (sscreen->info.chip_class >= GFX9) { out->alignment = rtex->surface.u.gfx9.fmask_alignment; out->size = rtex->surface.u.gfx9.fmask_size; return; @@ -843,7 +842,7 @@ void si_texture_get_fmask_info(struct r600_common_screen *rscreen, return; } - if (rscreen->ws->surface_init(rscreen->ws, &templ, flags, bpe, + if (sscreen->ws->surface_init(sscreen->ws, &templ, flags, bpe, RADEON_SURF_MODE_2D, &fmask)) { R600_ERR("Got error in surface_init while allocating FMASK.\n"); return; @@ -863,25 +862,25 @@ void si_texture_get_fmask_info(struct r600_common_screen *rscreen, out->size = fmask.surf_size; } -static void r600_texture_allocate_fmask(struct r600_common_screen *rscreen, +static void r600_texture_allocate_fmask(struct si_screen *sscreen, struct r600_texture *rtex) { - si_texture_get_fmask_info(rscreen, rtex, + si_texture_get_fmask_info(sscreen, rtex, rtex->resource.b.b.nr_samples, &rtex->fmask); rtex->fmask.offset = align64(rtex->size, rtex->fmask.alignment); rtex->size = rtex->fmask.offset + rtex->fmask.size; } -void si_texture_get_cmask_info(struct r600_common_screen *rscreen, +void si_texture_get_cmask_info(struct si_screen *sscreen, struct r600_texture *rtex, struct r600_cmask_info *out) { - unsigned pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes; - unsigned num_pipes = rscreen->info.num_tile_pipes; + unsigned pipe_interleave_bytes = sscreen->info.pipe_interleave_bytes; + unsigned num_pipes = sscreen->info.num_tile_pipes; unsigned cl_width, cl_height; - if (rscreen->chip_class >= GFX9) { + if (sscreen->info.chip_class >= GFX9) { out->alignment = rtex->surface.u.gfx9.cmask_alignment; out->size = rtex->surface.u.gfx9.cmask_size; return; @@ -927,10 +926,10 @@ void si_texture_get_cmask_info(struct r600_common_screen *rscreen, align(slice_bytes, base_align); } -static void r600_texture_allocate_cmask(struct r600_common_screen *rscreen, +static void r600_texture_allocate_cmask(struct si_screen *sscreen, struct r600_texture *rtex) { - si_texture_get_cmask_info(rscreen, rtex, &rtex->cmask); + si_texture_get_cmask_info(sscreen, rtex, &rtex->cmask); rtex->cmask.offset = align64(rtex->size, rtex->cmask.alignment); rtex->size = rtex->cmask.offset + rtex->cmask.size; @@ -938,21 +937,21 @@ static void r600_texture_allocate_cmask(struct r600_common_screen *rscreen, rtex->cb_color_info |= S_028C70_FAST_CLEAR(1); } -static void r600_texture_get_htile_size(struct r600_common_screen *rscreen, +static void r600_texture_get_htile_size(struct si_screen *sscreen, struct r600_texture *rtex) { unsigned cl_width, cl_height, width, height; unsigned slice_elements, slice_bytes, pipe_interleave_bytes, base_align; - unsigned num_pipes = rscreen->info.num_tile_pipes; + unsigned num_pipes = sscreen->info.num_tile_pipes; - assert(rscreen->chip_class <= VI); + assert(sscreen->info.chip_class <= VI); rtex->surface.htile_size = 0; /* HTILE is broken with 1D tiling on old kernels and CIK. */ - if (rscreen->chip_class >= CIK && + if (sscreen->info.chip_class >= CIK && rtex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_1D && - rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 38) + sscreen->info.drm_major == 2 && sscreen->info.drm_minor < 38) return; /* Overalign HTILE on P2 configs to work around GPU hangs in @@ -962,7 +961,7 @@ static void r600_texture_get_htile_size(struct r600_common_screen *rscreen, * are always reproducible. I think I have seen the test hang * on Carrizo too, though it was very rare there. */ - if (rscreen->chip_class >= CIK && num_pipes < 4) + if (sscreen->info.chip_class >= CIK && num_pipes < 4) num_pipes = 4; switch (num_pipes) { @@ -997,7 +996,7 @@ static void r600_texture_get_htile_size(struct r600_common_screen *rscreen, slice_elements = (width * height) / (8 * 8); slice_bytes = slice_elements * 4; - pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes; + pipe_interleave_bytes = sscreen->info.pipe_interleave_bytes; base_align = num_pipes * pipe_interleave_bytes; rtex->surface.htile_alignment = base_align; @@ -1006,11 +1005,11 @@ static void r600_texture_get_htile_size(struct r600_common_screen *rscreen, align(slice_bytes, base_align); } -static void r600_texture_allocate_htile(struct r600_common_screen *rscreen, +static void r600_texture_allocate_htile(struct si_screen *sscreen, struct r600_texture *rtex) { - if (rscreen->chip_class <= VI && !rtex->tc_compatible_htile) - r600_texture_get_htile_size(rscreen, rtex); + if (sscreen->info.chip_class <= VI && !rtex->tc_compatible_htile) + r600_texture_get_htile_size(sscreen, rtex); if (!rtex->surface.htile_size) return; @@ -1019,7 +1018,7 @@ static void r600_texture_allocate_htile(struct r600_common_screen *rscreen, rtex->size = rtex->htile_offset + rtex->surface.htile_size; } -void si_print_texture_info(struct r600_common_screen *rscreen, +void si_print_texture_info(struct si_screen *sscreen, struct r600_texture *rtex, struct u_log_context *log) { int i; @@ -1035,7 +1034,7 @@ void si_print_texture_info(struct r600_common_screen *rscreen, rtex->surface.bpe, rtex->resource.b.b.nr_samples, rtex->surface.flags, util_format_short_name(rtex->resource.b.b.format)); - if (rscreen->chip_class >= GFX9) { + if (sscreen->info.chip_class >= GFX9) { u_log_printf(log, " Surf: size=%"PRIu64", slice_size=%"PRIu64", " "alignment=%u, swmode=%u, epitch=%u, pitch=%u\n", rtex->surface.surf_size, @@ -1176,7 +1175,7 @@ r600_texture_create_object(struct pipe_screen *screen, { struct r600_texture *rtex; struct r600_resource *resource; - struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; + struct si_screen *sscreen = (struct si_screen*)screen; rtex = CALLOC_STRUCT(r600_texture); if (!rtex) @@ -1203,7 +1202,7 @@ r600_texture_create_object(struct pipe_screen *screen, * - VI only supports Z32_FLOAT. * - GFX9 only supports Z32_FLOAT and Z16_UNORM. */ if (rtex->tc_compatible_htile) { - if (rscreen->chip_class >= GFX9 && + if (sscreen->info.chip_class >= GFX9 && base->format == PIPE_FORMAT_Z16_UNORM) rtex->db_render_format = base->format; else { @@ -1225,7 +1224,7 @@ r600_texture_create_object(struct pipe_screen *screen, rtex->ps_draw_ratio = 0; if (rtex->is_depth) { - if (rscreen->chip_class >= GFX9) { + if (sscreen->info.chip_class >= GFX9) { rtex->can_sample_z = true; rtex->can_sample_s = true; } else { @@ -1237,14 +1236,14 @@ r600_texture_create_object(struct pipe_screen *screen, R600_RESOURCE_FLAG_FLUSHED_DEPTH))) { rtex->db_compatible = true; - if (!(rscreen->debug_flags & DBG(NO_HYPERZ))) - r600_texture_allocate_htile(rscreen, rtex); + if (!(sscreen->debug_flags & DBG(NO_HYPERZ))) + r600_texture_allocate_htile(sscreen, rtex); } } else { if (base->nr_samples > 1) { if (!buf) { - r600_texture_allocate_fmask(rscreen, rtex); - r600_texture_allocate_cmask(rscreen, rtex); + r600_texture_allocate_fmask(sscreen, rtex); + r600_texture_allocate_cmask(sscreen, rtex); rtex->cmask_buffer = &rtex->resource; } if (!rtex->fmask.size || !rtex->cmask.size) { @@ -1258,7 +1257,7 @@ r600_texture_create_object(struct pipe_screen *screen, * apply_opaque_metadata later. */ if (rtex->surface.dcc_size && - (buf || !(rscreen->debug_flags & DBG(NO_DCC))) && + (buf || !(sscreen->debug_flags & DBG(NO_DCC))) && !(rtex->surface.flags & RADEON_SURF_SCANOUT)) { /* Reserve space for the DCC buffer. */ rtex->dcc_offset = align64(rtex->size, rtex->surface.dcc_alignment); @@ -1268,19 +1267,19 @@ r600_texture_create_object(struct pipe_screen *screen, /* Now create the backing buffer. */ if (!buf) { - si_init_resource_fields(rscreen, resource, rtex->size, + si_init_resource_fields(sscreen, resource, rtex->size, rtex->surface.surf_alignment); - if (!si_alloc_resource(rscreen, resource)) { + if (!si_alloc_resource(sscreen, resource)) { FREE(rtex); return NULL; } } else { resource->buf = buf; - resource->gpu_address = rscreen->ws->buffer_get_virtual_address(resource->buf); + resource->gpu_address = sscreen->ws->buffer_get_virtual_address(resource->buf); resource->bo_size = buf->size; resource->bo_alignment = buf->alignment; - resource->domains = rscreen->ws->buffer_get_initial_domain(resource->buf); + resource->domains = sscreen->ws->buffer_get_initial_domain(resource->buf); if (resource->domains & RADEON_DOMAIN_VRAM) resource->vram_usage = buf->size; else if (resource->domains & RADEON_DOMAIN_GTT) @@ -1289,17 +1288,17 @@ r600_texture_create_object(struct pipe_screen *screen, if (rtex->cmask.size) { /* Initialize the cmask to 0xCC (= compressed state). */ - si_screen_clear_buffer(rscreen, &rtex->cmask_buffer->b.b, + si_screen_clear_buffer(sscreen, &rtex->cmask_buffer->b.b, rtex->cmask.offset, rtex->cmask.size, 0xCCCCCCCC); } if (rtex->htile_offset) { uint32_t clear_value = 0; - if (rscreen->chip_class >= GFX9 || rtex->tc_compatible_htile) + if (sscreen->info.chip_class >= GFX9 || rtex->tc_compatible_htile) clear_value = 0x0000030F; - si_screen_clear_buffer(rscreen, &rtex->resource.b.b, + si_screen_clear_buffer(sscreen, &rtex->resource.b.b, rtex->htile_offset, rtex->surface.htile_size, clear_value); @@ -1307,7 +1306,7 @@ r600_texture_create_object(struct pipe_screen *screen, /* Initialize DCC only if the texture is not being imported. */ if (!buf && rtex->dcc_offset) { - si_screen_clear_buffer(rscreen, &rtex->resource.b.b, + si_screen_clear_buffer(sscreen, &rtex->resource.b.b, rtex->dcc_offset, rtex->surface.dcc_size, 0xFFFFFFFF); @@ -1317,7 +1316,7 @@ r600_texture_create_object(struct pipe_screen *screen, rtex->cmask.base_address_reg = (rtex->resource.gpu_address + rtex->cmask.offset) >> 8; - if (rscreen->debug_flags & DBG(VM)) { + if (sscreen->debug_flags & DBG(VM)) { fprintf(stderr, "VM start=0x%"PRIX64" end=0x%"PRIX64" | Texture %ix%ix%i, %i levels, %i samples, %s\n", rtex->resource.gpu_address, rtex->resource.gpu_address + rtex->resource.buf->size, @@ -1325,11 +1324,11 @@ r600_texture_create_object(struct pipe_screen *screen, base->nr_samples ? base->nr_samples : 1, util_format_short_name(base->format)); } - if (rscreen->debug_flags & DBG(TEX)) { + if (sscreen->debug_flags & DBG(TEX)) { puts("Texture:"); struct u_log_context log; u_log_context_init(&log); - si_print_texture_info(rscreen, rtex, &log); + si_print_texture_info(sscreen, rtex, &log); u_log_new_page_print(&log, stdout); fflush(stdout); u_log_context_destroy(&log); @@ -1339,7 +1338,7 @@ r600_texture_create_object(struct pipe_screen *screen, } static enum radeon_surf_mode -r600_choose_tiling(struct r600_common_screen *rscreen, +r600_choose_tiling(struct si_screen *sscreen, const struct pipe_resource *templ) { const struct util_format_description *desc = util_format_description(templ->format); @@ -1358,7 +1357,7 @@ r600_choose_tiling(struct r600_common_screen *rscreen, /* Avoid Z/S decompress blits by forcing TC-compatible HTILE on VI, * which requires 2D tiling. */ - if (rscreen->chip_class == VI && + if (sscreen->info.chip_class == VI && is_depth_stencil && (templ->flags & PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY)) return RADEON_SURF_MODE_2D; @@ -1369,7 +1368,7 @@ r600_choose_tiling(struct r600_common_screen *rscreen, if (!force_tiling && !is_depth_stencil && !util_format_is_compressed(templ->format)) { - if (rscreen->debug_flags & DBG(NO_TILING)) + if (sscreen->debug_flags & DBG(NO_TILING)) return RADEON_SURF_MODE_LINEAR_ALIGNED; /* Tiling doesn't work with the 422 (SUBSAMPLED) formats on R600+. */ @@ -1400,7 +1399,7 @@ r600_choose_tiling(struct r600_common_screen *rscreen, /* Make small textures 1D tiled. */ if (templ->width0 <= 16 || templ->height0 <= 16 || - (rscreen->debug_flags & DBG(NO_2D_TILING))) + (sscreen->debug_flags & DBG(NO_2D_TILING))) return RADEON_SURF_MODE_1D; /* The allocator will switch to 1D if needed. */ @@ -1410,21 +1409,21 @@ r600_choose_tiling(struct r600_common_screen *rscreen, struct pipe_resource *si_texture_create(struct pipe_screen *screen, const struct pipe_resource *templ) { - struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; + struct si_screen *sscreen = (struct si_screen*)screen; struct radeon_surf surface = {0}; bool is_flushed_depth = templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH; bool tc_compatible_htile = - rscreen->chip_class >= VI && + sscreen->info.chip_class >= VI && (templ->flags & PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY) && - !(rscreen->debug_flags & DBG(NO_HYPERZ)) && + !(sscreen->debug_flags & DBG(NO_HYPERZ)) && !is_flushed_depth && templ->nr_samples <= 1 && /* TC-compat HTILE is less efficient with MSAA */ util_format_is_depth_or_stencil(templ->format); int r; - r = r600_init_surface(rscreen, &surface, templ, - r600_choose_tiling(rscreen, templ), 0, 0, + r = r600_init_surface(sscreen, &surface, templ, + r600_choose_tiling(sscreen, templ), 0, 0, false, false, is_flushed_depth, tc_compatible_htile); if (r) { @@ -1440,7 +1439,7 @@ static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen struct winsys_handle *whandle, unsigned usage) { - struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; + struct si_screen *sscreen = (struct si_screen*)screen; struct pb_buffer *buf = NULL; unsigned stride = 0, offset = 0; enum radeon_surf_mode array_mode; @@ -1455,15 +1454,15 @@ static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen templ->depth0 != 1 || templ->last_level != 0) return NULL; - buf = rscreen->ws->buffer_from_handle(rscreen->ws, whandle, &stride, &offset); + buf = sscreen->ws->buffer_from_handle(sscreen->ws, whandle, &stride, &offset); if (!buf) return NULL; - rscreen->ws->buffer_get_metadata(buf, &metadata); - r600_surface_import_metadata(rscreen, &surface, &metadata, + sscreen->ws->buffer_get_metadata(buf, &metadata); + r600_surface_import_metadata(sscreen, &surface, &metadata, &array_mode, &is_scanout); - r = r600_init_surface(rscreen, &surface, templ, array_mode, stride, + r = r600_init_surface(sscreen, &surface, templ, array_mode, stride, offset, true, is_scanout, false, false); if (r) { return NULL; @@ -1476,7 +1475,7 @@ static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen rtex->resource.b.is_shared = true; rtex->resource.external_usage = usage; - si_apply_opaque_metadata(rscreen, rtex, &metadata); + si_apply_opaque_metadata(sscreen, rtex, &metadata); assert(rtex->surface.tile_swizzle == 0); return &rtex->resource.b.b; @@ -1577,7 +1576,7 @@ static void r600_init_temp_resource_from_box(struct pipe_resource *res, } } -static bool r600_can_invalidate_texture(struct r600_common_screen *rscreen, +static bool r600_can_invalidate_texture(struct si_screen *sscreen, struct r600_texture *rtex, unsigned transfer_usage, const struct pipe_box *box) @@ -1594,20 +1593,20 @@ static bool r600_can_invalidate_texture(struct r600_common_screen *rscreen, static void r600_texture_invalidate_storage(struct r600_common_context *rctx, struct r600_texture *rtex) { - struct r600_common_screen *rscreen = rctx->screen; + struct si_screen *sscreen = rctx->screen; /* There is no point in discarding depth and tiled buffers. */ assert(!rtex->is_depth); assert(rtex->surface.is_linear); /* Reallocate the buffer in the same pipe_resource. */ - si_alloc_resource(rscreen, &rtex->resource); + si_alloc_resource(sscreen, &rtex->resource); /* Initialize the CMASK base address (needed even without CMASK). */ rtex->cmask.base_address_reg = (rtex->resource.gpu_address + rtex->cmask.offset) >> 8; - p_atomic_inc(&rscreen->dirty_tex_counter); + p_atomic_inc(&sscreen->dirty_tex_counter); rctx->num_alloc_tex_transfer_bytes += rtex->size; } @@ -2340,7 +2339,7 @@ r600_memobj_from_handle(struct pipe_screen *screen, struct winsys_handle *whandle, bool dedicated) { - struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; + struct si_screen *sscreen = (struct si_screen*)screen; struct r600_memory_object *memobj = CALLOC_STRUCT(r600_memory_object); struct pb_buffer *buf = NULL; uint32_t stride, offset; @@ -2348,7 +2347,7 @@ r600_memobj_from_handle(struct pipe_screen *screen, if (!memobj) return NULL; - buf = rscreen->ws->buffer_from_handle(rscreen->ws, whandle, + buf = sscreen->ws->buffer_from_handle(sscreen->ws, whandle, &stride, &offset); if (!buf) { free(memobj); @@ -2381,7 +2380,7 @@ r600_texture_from_memobj(struct pipe_screen *screen, uint64_t offset) { int r; - struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; + struct si_screen *sscreen = (struct si_screen*)screen; struct r600_memory_object *memobj = (struct r600_memory_object *)_memobj; struct r600_texture *rtex; struct radeon_surf surface = {}; @@ -2391,8 +2390,8 @@ r600_texture_from_memobj(struct pipe_screen *screen, struct pb_buffer *buf = NULL; if (memobj->b.dedicated) { - rscreen->ws->buffer_get_metadata(memobj->buf, &metadata); - r600_surface_import_metadata(rscreen, &surface, &metadata, + sscreen->ws->buffer_get_metadata(memobj->buf, &metadata); + r600_surface_import_metadata(sscreen, &surface, &metadata, &array_mode, &is_scanout); } else { /** @@ -2422,7 +2421,7 @@ r600_texture_from_memobj(struct pipe_screen *screen, } - r = r600_init_surface(rscreen, &surface, templ, + r = r600_init_surface(sscreen, &surface, templ, array_mode, memobj->stride, offset, true, is_scanout, false, false); @@ -2441,7 +2440,7 @@ r600_texture_from_memobj(struct pipe_screen *screen, rtex->resource.b.is_shared = true; rtex->resource.external_usage = PIPE_HANDLE_USAGE_READ_WRITE; - si_apply_opaque_metadata(rscreen, rtex, &metadata); + si_apply_opaque_metadata(sscreen, rtex, &metadata); return &rtex->resource.b.b; } @@ -2466,14 +2465,14 @@ static bool si_check_resource_capability(struct pipe_screen *screen, return true; } -void si_init_screen_texture_functions(struct r600_common_screen *rscreen) +void si_init_screen_texture_functions(struct si_screen *sscreen) { - rscreen->b.resource_from_handle = r600_texture_from_handle; - rscreen->b.resource_get_handle = r600_texture_get_handle; - rscreen->b.resource_from_memobj = r600_texture_from_memobj; - rscreen->b.memobj_create_from_handle = r600_memobj_from_handle; - rscreen->b.memobj_destroy = r600_memobj_destroy; - rscreen->b.check_resource_capability = si_check_resource_capability; + sscreen->b.resource_from_handle = r600_texture_from_handle; + sscreen->b.resource_get_handle = r600_texture_get_handle; + sscreen->b.resource_from_memobj = r600_texture_from_memobj; + sscreen->b.memobj_create_from_handle = r600_memobj_from_handle; + sscreen->b.memobj_destroy = r600_memobj_destroy; + sscreen->b.check_resource_capability = si_check_resource_capability; } void si_init_context_texture_functions(struct r600_common_context *rctx) diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c index cd58c66ae90..afa8836c369 100644 --- a/src/gallium/drivers/radeon/radeon_uvd.c +++ b/src/gallium/drivers/radeon/radeon_uvd.c @@ -39,7 +39,7 @@ #include "vl/vl_defines.h" #include "vl/vl_mpeg12_decoder.h" -#include "r600_pipe_common.h" +#include "radeonsi/si_pipe.h" #include "radeon_video.h" #include "radeon_uvd.h" @@ -329,7 +329,7 @@ static unsigned calc_ctx_size_h265_main10(struct ruvd_decoder *dec, struct pipe_ static unsigned get_db_pitch_alignment(struct ruvd_decoder *dec) { - if (((struct r600_common_screen*)dec->screen)->family < CHIP_VEGA10) + if (((struct si_screen*)dec->screen)->info.family < CHIP_VEGA10) return 16; else return 32; @@ -394,7 +394,7 @@ static unsigned calc_dpb_size(struct ruvd_decoder *dec) max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer), max_references); dpb_size = image_size * max_references; if ((dec->stream_type != RUVD_CODEC_H264_PERF) || - (((struct r600_common_screen*)dec->screen)->family < CHIP_POLARIS10)) { + (((struct si_screen*)dec->screen)->info.family < CHIP_POLARIS10)) { dpb_size += max_references * align(width_in_mb * height_in_mb * 192, alignment); dpb_size += align(width_in_mb * height_in_mb * 32, alignment); } @@ -404,7 +404,7 @@ static unsigned calc_dpb_size(struct ruvd_decoder *dec) // reference picture buffer dpb_size = image_size * max_references; if ((dec->stream_type != RUVD_CODEC_H264_PERF) || - (((struct r600_common_screen*)dec->screen)->family < CHIP_POLARIS10)) { + (((struct si_screen*)dec->screen)->info.family < CHIP_POLARIS10)) { // macroblock context buffer dpb_size += width_in_mb * height_in_mb * max_references * 192; // IT surface buffer @@ -604,7 +604,7 @@ static struct ruvd_h265 get_h265_msg(struct ruvd_decoder *dec, struct pipe_video result.sps_info_flags |= pic->pps->sps->sps_temporal_mvp_enabled_flag << 6; result.sps_info_flags |= pic->pps->sps->strong_intra_smoothing_enabled_flag << 7; result.sps_info_flags |= pic->pps->sps->separate_colour_plane_flag << 8; - if (((struct r600_common_screen*)dec->screen)->family == CHIP_CARRIZO) + if (((struct si_screen*)dec->screen)->info.family == CHIP_CARRIZO) result.sps_info_flags |= 1 << 9; if (pic->UseRefPicList == true) result.sps_info_flags |= 1 << 10; @@ -1245,11 +1245,11 @@ static void ruvd_end_frame(struct pipe_video_codec *decoder, dec->msg->body.decode.db_pitch = align(dec->base.width, get_db_pitch_alignment(dec)); if (dec->stream_type == RUVD_CODEC_H264_PERF && - ((struct r600_common_screen*)dec->screen)->family >= CHIP_POLARIS10) + ((struct si_screen*)dec->screen)->info.family >= CHIP_POLARIS10) dec->msg->body.decode.dpb_reserved = dec->ctx.res->buf->size; dt = dec->set_dtb(dec->msg, (struct vl_video_buffer *)target); - if (((struct r600_common_screen*)dec->screen)->family >= CHIP_STONEY) + if (((struct si_screen*)dec->screen)->info.family >= CHIP_STONEY) dec->msg->body.decode.dt_wa_chroma_top_offset = dec->msg->body.decode.dt_pitch / 2; switch (u_reduce_video_profile(picture->profile)) { diff --git a/src/gallium/drivers/radeon/radeon_vce.c b/src/gallium/drivers/radeon/radeon_vce.c index c7e63331b00..7594421d0e7 100644 --- a/src/gallium/drivers/radeon/radeon_vce.c +++ b/src/gallium/drivers/radeon/radeon_vce.c @@ -34,7 +34,7 @@ #include "vl/vl_video_buffer.h" -#include "r600_pipe_common.h" +#include "radeonsi/si_pipe.h" #include "radeon_video.h" #include "radeon_vce.h" @@ -219,10 +219,10 @@ struct rvce_cpb_slot *si_l1_slot(struct rvce_encoder *enc) void si_vce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot, signed *luma_offset, signed *chroma_offset) { - struct r600_common_screen *rscreen = (struct r600_common_screen *)enc->screen; + struct si_screen *sscreen = (struct si_screen *)enc->screen; unsigned pitch, vpitch, fsize; - if (rscreen->chip_class < GFX9) { + if (sscreen->info.chip_class < GFX9) { pitch = align(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe, 128); vpitch = align(enc->luma->u.legacy.level[0].nblk_y, 16); } else { @@ -389,18 +389,18 @@ struct pipe_video_codec *si_vce_create_encoder(struct pipe_context *context, struct radeon_winsys* ws, rvce_get_buffer get_buffer) { - struct r600_common_screen *rscreen = (struct r600_common_screen *)context->screen; + struct si_screen *sscreen = (struct si_screen *)context->screen; struct r600_common_context *rctx = (struct r600_common_context*)context; struct rvce_encoder *enc; struct pipe_video_buffer *tmp_buf, templat = {}; struct radeon_surf *tmp_surf; unsigned cpb_size; - if (!rscreen->info.vce_fw_version) { + if (!sscreen->info.vce_fw_version) { RVID_ERR("Kernel doesn't supports VCE!\n"); return NULL; - } else if (!si_vce_is_fw_version_supported(rscreen)) { + } else if (!si_vce_is_fw_version_supported(sscreen)) { RVID_ERR("Unsupported VCE fw version loaded!\n"); return NULL; } @@ -409,20 +409,20 @@ struct pipe_video_codec *si_vce_create_encoder(struct pipe_context *context, if (!enc) return NULL; - if (rscreen->info.drm_major == 3) + if (sscreen->info.drm_major == 3) enc->use_vm = true; - if ((rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42) || - rscreen->info.drm_major == 3) + if ((sscreen->info.drm_major == 2 && sscreen->info.drm_minor >= 42) || + sscreen->info.drm_major == 3) enc->use_vui = true; - if (rscreen->info.family >= CHIP_TONGA && - rscreen->info.family != CHIP_STONEY && - rscreen->info.family != CHIP_POLARIS11 && - rscreen->info.family != CHIP_POLARIS12) + if (sscreen->info.family >= CHIP_TONGA && + sscreen->info.family != CHIP_STONEY && + sscreen->info.family != CHIP_POLARIS11 && + sscreen->info.family != CHIP_POLARIS12) enc->dual_pipe = true; /* TODO enable B frame with dual instance */ - if ((rscreen->info.family >= CHIP_TONGA) && + if ((sscreen->info.family >= CHIP_TONGA) && (templ->max_references == 1) && - (rscreen->info.vce_harvest_config == 0)) + (sscreen->info.vce_harvest_config == 0)) enc->dual_inst = true; enc->base = *templ; @@ -460,7 +460,7 @@ struct pipe_video_codec *si_vce_create_encoder(struct pipe_context *context, get_buffer(((struct vl_video_buffer *)tmp_buf)->resources[0], NULL, &tmp_surf); - cpb_size = (rscreen->chip_class < GFX9) ? + cpb_size = (sscreen->info.chip_class < GFX9) ? align(tmp_surf->u.legacy.level[0].nblk_x * tmp_surf->bpe, 128) * align(tmp_surf->u.legacy.level[0].nblk_y, 32) : @@ -484,7 +484,7 @@ struct pipe_video_codec *si_vce_create_encoder(struct pipe_context *context, reset_cpb(enc); - switch (rscreen->info.vce_fw_version) { + switch (sscreen->info.vce_fw_version) { case FW_40_2_2: si_vce_40_2_2_init(enc); si_get_pic_param = si_vce_40_2_2_get_param; @@ -506,7 +506,7 @@ struct pipe_video_codec *si_vce_create_encoder(struct pipe_context *context, break; default: - if ((rscreen->info.vce_fw_version & (0xff << 24)) == FW_53) { + if ((sscreen->info.vce_fw_version & (0xff << 24)) == FW_53) { si_vce_52_init(enc); si_get_pic_param = si_vce_52_get_param; } else @@ -529,9 +529,9 @@ error: /** * check if kernel has the right fw version loaded */ -bool si_vce_is_fw_version_supported(struct r600_common_screen *rscreen) +bool si_vce_is_fw_version_supported(struct si_screen *sscreen) { - switch (rscreen->info.vce_fw_version) { + switch (sscreen->info.vce_fw_version) { case FW_40_2_2: case FW_50_0_1: case FW_50_1_2: @@ -542,7 +542,7 @@ bool si_vce_is_fw_version_supported(struct r600_common_screen *rscreen) case FW_52_8_3: return true; default: - if ((rscreen->info.vce_fw_version & (0xff << 24)) == FW_53) + if ((sscreen->info.vce_fw_version & (0xff << 24)) == FW_53) return true; else return false; diff --git a/src/gallium/drivers/radeon/radeon_vce.h b/src/gallium/drivers/radeon/radeon_vce.h index f7ce58388d6..7f30877c727 100644 --- a/src/gallium/drivers/radeon/radeon_vce.h +++ b/src/gallium/drivers/radeon/radeon_vce.h @@ -42,7 +42,7 @@ #define RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE (4096 * 16 * 2.5) #define RVCE_MAX_AUX_BUFFER_NUM 4 -struct r600_common_screen; +struct si_screen; /* driver dependent callback */ typedef void (*rvce_get_buffer)(struct pipe_resource *resource, @@ -422,7 +422,7 @@ struct pipe_video_codec *si_vce_create_encoder(struct pipe_context *context, struct radeon_winsys* ws, rvce_get_buffer get_buffer); -bool si_vce_is_fw_version_supported(struct r600_common_screen *rscreen); +bool si_vce_is_fw_version_supported(struct si_screen *sscreen); void si_vce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf, enum radeon_bo_usage usage, enum radeon_bo_domain domain, diff --git a/src/gallium/drivers/radeon/radeon_vce_52.c b/src/gallium/drivers/radeon/radeon_vce_52.c index 3f2e6cbcda5..10bf718c55a 100644 --- a/src/gallium/drivers/radeon/radeon_vce_52.c +++ b/src/gallium/drivers/radeon/radeon_vce_52.c @@ -34,7 +34,7 @@ #include "vl/vl_video_buffer.h" -#include "r600_pipe_common.h" +#include "radeonsi/si_pipe.h" #include "radeon_video.h" #include "radeon_vce.h" @@ -167,7 +167,7 @@ void si_vce_52_get_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture_ static void create(struct rvce_encoder *enc) { - struct r600_common_screen *rscreen = (struct r600_common_screen *)enc->screen; + struct si_screen *sscreen = (struct si_screen *)enc->screen; enc->task_info(enc, 0x00000000, 0, 0, 0); RVCE_BEGIN(0x01000001); // create cmd @@ -179,7 +179,7 @@ static void create(struct rvce_encoder *enc) RVCE_CS(enc->base.width); // encImageWidth RVCE_CS(enc->base.height); // encImageHeight - if (rscreen->chip_class < GFX9) { + if (sscreen->info.chip_class < GFX9) { RVCE_CS(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe); // encRefPicLumaPitch RVCE_CS(enc->chroma->u.legacy.level[0].nblk_x * enc->chroma->bpe); // encRefPicChromaPitch RVCE_CS(align(enc->luma->u.legacy.level[0].nblk_y, 16) / 8); // encRefYHeightInQw @@ -200,7 +200,7 @@ static void create(struct rvce_encoder *enc) static void encode(struct rvce_encoder *enc) { - struct r600_common_screen *rscreen = (struct r600_common_screen *)enc->screen; + struct si_screen *sscreen = (struct si_screen *)enc->screen; signed luma_offset, chroma_offset, bs_offset; unsigned dep, bs_idx = enc->bs_idx++; int i; @@ -250,7 +250,7 @@ static void encode(struct rvce_encoder *enc) RVCE_CS(enc->enc_pic.eo.end_of_sequence); RVCE_CS(enc->enc_pic.eo.end_of_stream); - if (rscreen->chip_class < GFX9) { + if (sscreen->info.chip_class < GFX9) { RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM, enc->luma->u.legacy.level[0].offset); // inputPictureLumaAddressHi/Lo RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM, diff --git a/src/gallium/drivers/radeon/radeon_vcn_dec.c b/src/gallium/drivers/radeon/radeon_vcn_dec.c index 2ece4a3fdaf..30cd607789d 100644 --- a/src/gallium/drivers/radeon/radeon_vcn_dec.c +++ b/src/gallium/drivers/radeon/radeon_vcn_dec.c @@ -35,7 +35,7 @@ #include "vl/vl_mpeg12_decoder.h" -#include "r600_pipe_common.h" +#include "radeonsi/si_pipe.h" #include "radeon_video.h" #include "radeon_vcn_dec.h" @@ -199,7 +199,7 @@ static rvcn_dec_message_hevc_t get_h265_msg(struct radeon_decoder *dec, result.sps_info_flags |= pic->pps->sps->sps_temporal_mvp_enabled_flag << 6; result.sps_info_flags |= pic->pps->sps->strong_intra_smoothing_enabled_flag << 7; result.sps_info_flags |= pic->pps->sps->separate_colour_plane_flag << 8; - if (((struct r600_common_screen*)dec->screen)->family == CHIP_CARRIZO) + if (((struct si_screen*)dec->screen)->info.family == CHIP_CARRIZO) result.sps_info_flags |= 1 << 9; if (pic->UseRefPicList == true) result.sps_info_flags |= 1 << 10; diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc.c b/src/gallium/drivers/radeon/radeon_vcn_enc.c index 9806a69ff48..4972d11062d 100644 --- a/src/gallium/drivers/radeon/radeon_vcn_enc.c +++ b/src/gallium/drivers/radeon/radeon_vcn_enc.c @@ -34,7 +34,7 @@ #include "vl/vl_video_buffer.h" -#include "r600_pipe_common.h" +#include "radeonsi/si_pipe.h" #include "radeon_video.h" #include "radeon_vcn_enc.h" @@ -221,7 +221,7 @@ struct pipe_video_codec *radeon_create_encoder(struct pipe_context *context, struct radeon_winsys* ws, radeon_enc_get_buffer get_buffer) { - struct r600_common_screen *rscreen = (struct r600_common_screen *)context->screen; + struct si_screen *sscreen = (struct si_screen *)context->screen; struct r600_common_context *rctx = (struct r600_common_context*)context; struct radeon_encoder *enc; struct pipe_video_buffer *tmp_buf, templat = {}; @@ -275,7 +275,7 @@ struct pipe_video_codec *radeon_create_encoder(struct pipe_context *context, get_buffer(((struct vl_video_buffer *)tmp_buf)->resources[0], NULL, &tmp_surf); - cpb_size = (rscreen->chip_class < GFX9) ? + cpb_size = (sscreen->info.chip_class < GFX9) ? align(tmp_surf->u.legacy.level[0].nblk_x * tmp_surf->bpe, 128) * align(tmp_surf->u.legacy.level[0].nblk_y, 32) : align(tmp_surf->u.gfx9.surf_pitch * tmp_surf->bpe, 256) * diff --git a/src/gallium/drivers/radeon/radeon_video.c b/src/gallium/drivers/radeon/radeon_video.c index a5505ab51c4..d80410d51b3 100644 --- a/src/gallium/drivers/radeon/radeon_video.c +++ b/src/gallium/drivers/radeon/radeon_video.c @@ -33,7 +33,7 @@ #include "vl/vl_defines.h" #include "vl/vl_video_buffer.h" -#include "r600_pipe_common.h" +#include "radeonsi/si_pipe.h" #include "radeon_video.h" #include "radeon_vce.h" @@ -80,8 +80,8 @@ void si_vid_destroy_buffer(struct rvid_buffer *buffer) bool si_vid_resize_buffer(struct pipe_screen *screen, struct radeon_winsys_cs *cs, struct rvid_buffer *new_buf, unsigned new_size) { - struct r600_common_screen *rscreen = (struct r600_common_screen *)screen; - struct radeon_winsys* ws = rscreen->ws; + struct si_screen *sscreen = (struct si_screen *)screen; + struct radeon_winsys* ws = sscreen->ws; unsigned bytes = MIN2(new_buf->res->buf->size, new_size); struct rvid_buffer old_buf = *new_buf; void *src = NULL, *dst = NULL; diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c index 8a18bbf455c..3e3b9cd4329 100644 --- a/src/gallium/drivers/radeonsi/cik_sdma.c +++ b/src/gallium/drivers/radeonsi/cik_sdma.c @@ -117,7 +117,7 @@ static unsigned encode_tile_info(struct si_context *sctx, struct r600_texture *tex, unsigned level, bool set_bpp) { - struct radeon_info *info = &sctx->screen->b.info; + struct radeon_info *info = &sctx->screen->info; unsigned tile_index = tex->surface.u.legacy.tiling_index[level]; unsigned macro_tile_index = tex->surface.u.legacy.macro_tile_index; unsigned tile_mode = info->si_tile_mode_array[tile_index]; @@ -143,7 +143,7 @@ static bool cik_sdma_copy_texture(struct si_context *sctx, unsigned src_level, const struct pipe_box *src_box) { - struct radeon_info *info = &sctx->screen->b.info; + struct radeon_info *info = &sctx->screen->info; struct r600_texture *rsrc = (struct r600_texture*)src; struct r600_texture *rdst = (struct r600_texture*)dst; unsigned bpp = rdst->surface.bpe; diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index 0e314e9963c..370ce04a9b2 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -772,7 +772,7 @@ void si_decompress_textures(struct si_context *sctx, unsigned shader_mask) return; /* Update the compressed_colortex_mask if necessary. */ - compressed_colortex_counter = p_atomic_read(&sctx->screen->b.compressed_colortex_counter); + compressed_colortex_counter = p_atomic_read(&sctx->screen->compressed_colortex_counter); if (compressed_colortex_counter != sctx->b.last_compressed_colortex_counter) { sctx->b.last_compressed_colortex_counter = compressed_colortex_counter; si_update_needs_color_decompress_masks(sctx); @@ -1240,7 +1240,7 @@ static void si_blit(struct pipe_context *ctx, info->src.box.z, info->src.box.z + info->src.box.depth - 1); - if (sctx->screen->b.debug_flags & DBG(FORCE_DMA) && + if (sctx->screen->debug_flags & DBG(FORCE_DMA) && util_try_blit_via_copy_region(ctx, info)) return; diff --git a/src/gallium/drivers/radeonsi/si_clear.c b/src/gallium/drivers/radeonsi/si_clear.c index 634d4ccb9a1..ba95ed51f14 100644 --- a/src/gallium/drivers/radeonsi/si_clear.c +++ b/src/gallium/drivers/radeonsi/si_clear.c @@ -41,12 +41,12 @@ static void si_alloc_separate_cmask(struct si_screen *sscreen, assert(rtex->cmask.size == 0); - si_texture_get_cmask_info(&sscreen->b, rtex, &rtex->cmask); + si_texture_get_cmask_info(sscreen, rtex, &rtex->cmask); if (!rtex->cmask.size) return; rtex->cmask_buffer = (struct r600_resource *) - si_aligned_buffer_create(&sscreen->b.b, + si_aligned_buffer_create(&sscreen->b, R600_RESOURCE_FLAG_UNMAPPABLE, PIPE_USAGE_DEFAULT, rtex->cmask.size, @@ -61,7 +61,7 @@ static void si_alloc_separate_cmask(struct si_screen *sscreen, rtex->cb_color_info |= S_028C70_FAST_CLEAR(1); - p_atomic_inc(&sscreen->b.compressed_colortex_counter); + p_atomic_inc(&sscreen->compressed_colortex_counter); } static void si_set_clear_color(struct r600_texture *rtex, @@ -245,11 +245,11 @@ static void si_set_optimal_micro_tile_mode(struct si_screen *sscreen, rtex->surface.micro_tile_mode == rtex->last_msaa_resolve_target_micro_mode) return; - assert(sscreen->b.chip_class >= GFX9 || + assert(sscreen->info.chip_class >= GFX9 || rtex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_2D); assert(rtex->resource.b.b.last_level == 0); - if (sscreen->b.chip_class >= GFX9) { + if (sscreen->info.chip_class >= GFX9) { /* 4K or larger tiles only. 0 is linear. 1-3 are 256B tiles. */ assert(rtex->surface.u.gfx9.surf.swizzle_mode >= 4); @@ -280,7 +280,7 @@ static void si_set_optimal_micro_tile_mode(struct si_screen *sscreen, assert(!"unexpected micro mode"); return; } - } else if (sscreen->b.chip_class >= CIK) { + } else if (sscreen->info.chip_class >= CIK) { /* These magic numbers were copied from addrlib. It doesn't use * any definitions for them either. They are all 2D_TILED_THIN1 * modes with different bpp and micro tile mode. @@ -338,7 +338,7 @@ static void si_set_optimal_micro_tile_mode(struct si_screen *sscreen, rtex->surface.micro_tile_mode = rtex->last_msaa_resolve_target_micro_mode; - p_atomic_inc(&sscreen->b.dirty_tex_counter); + p_atomic_inc(&sscreen->dirty_tex_counter); } static void si_do_fast_color_clear(struct si_context *sctx, @@ -397,8 +397,8 @@ static void si_do_fast_color_clear(struct si_context *sctx, /* fast color clear with 1D tiling doesn't work on old kernels and CIK */ if (sctx->b.chip_class == CIK && tex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_1D && - sctx->screen->b.info.drm_major == 2 && - sctx->screen->b.info.drm_minor < 38) { + sctx->screen->info.drm_major == 2 && + sctx->screen->info.drm_minor < 38) { continue; } @@ -406,7 +406,7 @@ static void si_do_fast_color_clear(struct si_context *sctx, * displayable surfaces. */ if (sctx->b.chip_class >= VI && - !(sctx->screen->b.debug_flags & DBG(NO_DCC_FB))) { + !(sctx->screen->debug_flags & DBG(NO_DCC_FB))) { vi_separate_dcc_try_enable(&sctx->b, tex); /* RB+ isn't supported with a CMASK clear only on Stoney, @@ -427,7 +427,7 @@ static void si_do_fast_color_clear(struct si_context *sctx, * * Always use fast clear on APUs. */ - bool too_small = sctx->screen->b.info.has_dedicated_vram && + bool too_small = sctx->screen->info.has_dedicated_vram && tex->resource.b.b.nr_samples <= 1 && tex->resource.b.b.width0 <= 256 && tex->resource.b.b.height0 <= 256; @@ -437,7 +437,7 @@ static void si_do_fast_color_clear(struct si_context *sctx, uint32_t reset_value; bool clear_words_needed; - if (sctx->screen->b.debug_flags & DBG(NO_DCC_CLEAR)) + if (sctx->screen->debug_flags & DBG(NO_DCC_CLEAR)) continue; /* This can only occur with MSAA. */ @@ -500,7 +500,7 @@ static void si_do_fast_color_clear(struct si_context *sctx, if (need_decompress_pass && !(tex->dirty_level_mask & (1 << level))) { tex->dirty_level_mask |= 1 << level; - p_atomic_inc(&sctx->screen->b.compressed_colortex_counter); + p_atomic_inc(&sctx->screen->compressed_colortex_counter); } /* We can change the micro tile mode before a full clear. */ diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index f3dce65e3dd..ac4fab3ea07 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -35,7 +35,7 @@ #define COMPUTE_DBG(rscreen, fmt, args...) \ do { \ - if ((rscreen->b.debug_flags & DBG(COMPUTE))) fprintf(stderr, fmt, ##args); \ + if ((rscreen->debug_flags & DBG(COMPUTE))) fprintf(stderr, fmt, ##args); \ } while (0); struct dispatch_packet { @@ -171,7 +171,7 @@ static void *si_create_compute_state( program->compiler_ctx_state.debug = sctx->debug; program->compiler_ctx_state.is_debug_context = sctx->is_debug; - p_atomic_inc(&sscreen->b.num_shaders_created); + p_atomic_inc(&sscreen->num_shaders_created); util_queue_fence_init(&program->ready); struct util_async_debug_callback async_debug; @@ -315,9 +315,9 @@ static void si_initialize_compute(struct si_context *sctx) radeon_emit(cs, bc_va >> 8); /* R_030E00_TA_CS_BC_BASE_ADDR */ radeon_emit(cs, bc_va >> 40); /* R_030E04_TA_CS_BC_BASE_ADDR_HI */ } else { - if (sctx->screen->b.info.drm_major == 3 || - (sctx->screen->b.info.drm_major == 2 && - sctx->screen->b.info.drm_minor >= 48)) { + if (sctx->screen->info.drm_major == 3 || + (sctx->screen->info.drm_major == 2 && + sctx->screen->info.drm_minor >= 48)) { radeon_set_config_reg(cs, R_00950C_TA_CS_BC_BASE_ADDR, bc_va >> 8); } @@ -341,7 +341,7 @@ static bool si_setup_compute_scratch_buffer(struct si_context *sctx, r600_resource_reference(&sctx->compute_scratch_buffer, NULL); sctx->compute_scratch_buffer = (struct r600_resource*) - si_aligned_buffer_create(&sctx->screen->b.b, + si_aligned_buffer_create(&sctx->screen->b, R600_RESOURCE_FLAG_UNMAPPABLE, PIPE_USAGE_DEFAULT, scratch_needed, 256); @@ -624,7 +624,7 @@ static bool si_upload_compute_input(struct si_context *sctx, kernel_args_size = program->input_size + num_work_size_bytes; u_upload_alloc(sctx->b.b.const_uploader, 0, kernel_args_size, - sctx->screen->b.info.tcc_cache_line_size, + sctx->screen->info.tcc_cache_line_size, &kernel_args_offset, (struct pipe_resource**)&input_buffer, &kernel_args_ptr); @@ -728,8 +728,8 @@ static void si_emit_dispatch_packets(struct si_context *sctx, S_00B854_SIMD_DEST_CNTL(waves_per_threadgroup % 4 == 0); if (sctx->b.chip_class >= CIK) { - unsigned num_cu_per_se = sscreen->b.info.num_good_compute_units / - sscreen->b.info.max_se; + unsigned num_cu_per_se = sscreen->info.num_good_compute_units / + sscreen->info.max_se; /* Force even distribution on all SIMDs in CU if the workgroup * size is 64. This has shown some good improvements if # of CUs diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c index 446ff2cf87a..b6659267ac6 100644 --- a/src/gallium/drivers/radeonsi/si_cp_dma.c +++ b/src/gallium/drivers/radeonsi/si_cp_dma.c @@ -372,7 +372,7 @@ static void si_cp_dma_realign_engine(struct si_context *sctx, unsigned size, sctx->scratch_buffer->b.b.width0 < scratch_size) { r600_resource_reference(&sctx->scratch_buffer, NULL); sctx->scratch_buffer = (struct r600_resource*) - si_aligned_buffer_create(&sctx->screen->b.b, + si_aligned_buffer_create(&sctx->screen->b, R600_RESOURCE_FLAG_UNMAPPABLE, PIPE_USAGE_DEFAULT, scratch_size, 256); diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c index 447b4ef8e98..22609b7e57f 100644 --- a/src/gallium/drivers/radeonsi/si_debug.c +++ b/src/gallium/drivers/radeonsi/si_debug.c @@ -234,16 +234,16 @@ static void si_dump_mmapped_reg(struct si_context *sctx, FILE *f, static void si_dump_debug_registers(struct si_context *sctx, FILE *f) { - if (sctx->screen->b.info.drm_major == 2 && - sctx->screen->b.info.drm_minor < 42) + if (sctx->screen->info.drm_major == 2 && + sctx->screen->info.drm_minor < 42) return; /* no radeon support */ fprintf(f, "Memory-mapped registers:\n"); si_dump_mmapped_reg(sctx, f, R_008010_GRBM_STATUS); /* No other registers can be read on DRM < 3.1.0. */ - if (sctx->screen->b.info.drm_major < 3 || - sctx->screen->b.info.drm_minor < 1) { + if (sctx->screen->info.drm_major < 3 || + sctx->screen->info.drm_minor < 1) { fprintf(f, "\n"); return; } @@ -692,7 +692,7 @@ static void si_dump_descriptor_list(struct si_screen *screen, chunk->element_dw_size = element_dw_size; chunk->num_elements = num_elements; chunk->slot_remap = slot_remap; - chunk->chip_class = screen->b.chip_class; + chunk->chip_class = screen->info.chip_class; r600_resource_reference(&chunk->buf, desc->buffer); chunk->gpu_list = desc->gpu_list; @@ -1103,7 +1103,7 @@ void si_init_debug_functions(struct si_context *sctx) /* Set the initial dmesg timestamp for this context, so that * only new messages will be checked for VM faults. */ - if (sctx->screen->b.debug_flags & DBG(CHECK_VM)) + if (sctx->screen->debug_flags & DBG(CHECK_VM)) ac_vm_fault_occured(sctx->b.chip_class, &sctx->dmesg_timestamp, NULL); } diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index b35bd6d125e..17115e1355a 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -325,7 +325,7 @@ void si_set_mutable_tex_desc_fields(struct si_screen *sscreen, va = tex->resource.gpu_address; - if (sscreen->b.chip_class >= GFX9) { + if (sscreen->info.chip_class >= GFX9) { /* Only stencil_offset needs to be added here. */ if (is_stencil) va += tex->surface.u.gfx9.stencil_offset; @@ -342,11 +342,11 @@ void si_set_mutable_tex_desc_fields(struct si_screen *sscreen, /* Only macrotiled modes can set tile swizzle. * GFX9 doesn't use (legacy) base_level_info. */ - if (sscreen->b.chip_class >= GFX9 || + if (sscreen->info.chip_class >= GFX9 || base_level_info->mode == RADEON_SURF_MODE_2D) state[0] |= tex->surface.tile_swizzle; - if (sscreen->b.chip_class >= VI) { + if (sscreen->info.chip_class >= VI) { state[6] &= C_008F28_COMPRESSION_EN; state[7] = 0; @@ -354,7 +354,7 @@ void si_set_mutable_tex_desc_fields(struct si_screen *sscreen, meta_va = (!tex->dcc_separate_buffer ? tex->resource.gpu_address : 0) + tex->dcc_offset; - if (sscreen->b.chip_class == VI) { + if (sscreen->info.chip_class == VI) { meta_va += base_level_info->dcc_offset; assert(base_level_info->mode == RADEON_SURF_MODE_2D); } @@ -370,7 +370,7 @@ void si_set_mutable_tex_desc_fields(struct si_screen *sscreen, } } - if (sscreen->b.chip_class >= GFX9) { + if (sscreen->info.chip_class >= GFX9) { state[3] &= C_008F1C_SW_MODE; state[4] &= C_008F20_PITCH_GFX9; @@ -1737,7 +1737,7 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource uint64_t old_va = rbuffer->gpu_address; /* Reallocate the buffer in the same pipe_resource. */ - si_alloc_resource(&sctx->screen->b, rbuffer); + si_alloc_resource(sctx->screen, rbuffer); si_rebind_buffer(ctx, buf, old_va); } diff --git a/src/gallium/drivers/radeonsi/si_dma.c b/src/gallium/drivers/radeonsi/si_dma.c index 6e229446e22..7a6d0b5be8a 100644 --- a/src/gallium/drivers/radeonsi/si_dma.c +++ b/src/gallium/drivers/radeonsi/si_dma.c @@ -139,7 +139,7 @@ static void si_dma_copy_tile(struct si_context *ctx, struct r600_texture *rtiled = detile ? rsrc : rdst; unsigned linear_lvl = detile ? dst_level : src_level; unsigned tiled_lvl = detile ? src_level : dst_level; - struct radeon_info *info = &ctx->screen->b.info; + struct radeon_info *info = &ctx->screen->info; unsigned index = rtiled->surface.u.legacy.tiling_index[tiled_lvl]; unsigned tile_mode = info->si_tile_mode_array[index]; unsigned array_mode, lbpp, pitch_tile_max, slice_tile_max, size; diff --git a/src/gallium/drivers/radeonsi/si_fence.c b/src/gallium/drivers/radeonsi/si_fence.c index 61105217caa..5f478afaf63 100644 --- a/src/gallium/drivers/radeonsi/si_fence.c +++ b/src/gallium/drivers/radeonsi/si_fence.c @@ -67,7 +67,7 @@ static void si_fence_reference(struct pipe_screen *screen, struct pipe_fence_handle **dst, struct pipe_fence_handle *src) { - struct radeon_winsys *ws = ((struct r600_common_screen*)screen)->ws; + struct radeon_winsys *ws = ((struct si_screen*)screen)->ws; struct si_multi_fence **rdst = (struct si_multi_fence **)dst; struct si_multi_fence *rsrc = (struct si_multi_fence *)src; @@ -186,7 +186,7 @@ static boolean si_fence_finish(struct pipe_screen *screen, struct pipe_fence_handle *fence, uint64_t timeout) { - struct radeon_winsys *rws = ((struct r600_common_screen*)screen)->ws; + struct radeon_winsys *rws = ((struct si_screen*)screen)->ws; struct si_multi_fence *rfence = (struct si_multi_fence *)fence; int64_t abs_timeout = os_time_get_absolute_timeout(timeout); @@ -300,13 +300,13 @@ static boolean si_fence_finish(struct pipe_screen *screen, static void si_create_fence_fd(struct pipe_context *ctx, struct pipe_fence_handle **pfence, int fd) { - struct r600_common_screen *rscreen = (struct r600_common_screen*)ctx->screen; - struct radeon_winsys *ws = rscreen->ws; + struct si_screen *sscreen = (struct si_screen*)ctx->screen; + struct radeon_winsys *ws = sscreen->ws; struct si_multi_fence *rfence; *pfence = NULL; - if (!rscreen->info.has_sync_file) + if (!sscreen->info.has_sync_file) return; rfence = si_create_multi_fence(); @@ -325,12 +325,12 @@ static void si_create_fence_fd(struct pipe_context *ctx, static int si_fence_get_fd(struct pipe_screen *screen, struct pipe_fence_handle *fence) { - struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; - struct radeon_winsys *ws = rscreen->ws; + struct si_screen *sscreen = (struct si_screen*)screen; + struct radeon_winsys *ws = sscreen->ws; struct si_multi_fence *rfence = (struct si_multi_fence *)fence; int gfx_fd = -1, sdma_fd = -1; - if (!rscreen->info.has_sync_file) + if (!sscreen->info.has_sync_file) return -1; util_queue_fence_wait(&rfence->ready); @@ -470,7 +470,7 @@ void si_init_fence_functions(struct si_context *ctx) void si_init_screen_fence_functions(struct si_screen *screen) { - screen->b.b.fence_finish = si_fence_finish; - screen->b.b.fence_reference = si_fence_reference; - screen->b.b.fence_get_fd = si_fence_get_fd; + screen->b.fence_finish = si_fence_finish; + screen->b.fence_reference = si_fence_reference; + screen->b.fence_get_fd = si_fence_get_fd; } diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index 56030f7169c..7646ea82550 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -54,7 +54,7 @@ static const char *si_get_marketing_name(struct radeon_winsys *ws) const char *si_get_family_name(const struct si_screen *sscreen) { - switch (sscreen->b.info.family) { + switch (sscreen->info.family) { case CHIP_TAHITI: return "AMD TAHITI"; case CHIP_PITCAIRN: return "AMD PITCAIRN"; case CHIP_VERDE: return "AMD CAPE VERDE"; @@ -83,10 +83,10 @@ static bool si_have_tgsi_compute(struct si_screen *sscreen) { /* Old kernels disallowed some register writes for SI * that are used for indirect dispatches. */ - return (sscreen->b.chip_class >= CIK || - sscreen->b.info.drm_major == 3 || - (sscreen->b.info.drm_major == 2 && - sscreen->b.info.drm_minor >= 45)); + return (sscreen->info.chip_class >= CIK || + sscreen->info.drm_major == 3 || + (sscreen->info.drm_major == 2 && + sscreen->info.drm_minor >= 45)); } static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param) @@ -200,19 +200,19 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return HAVE_LLVM >= 0x0500; case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: - return !SI_BIG_ENDIAN && sscreen->b.info.has_userptr; + return !SI_BIG_ENDIAN && sscreen->info.has_userptr; case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: - return (sscreen->b.info.drm_major == 2 && - sscreen->b.info.drm_minor >= 43) || - sscreen->b.info.drm_major == 3; + return (sscreen->info.drm_major == 2 && + sscreen->info.drm_minor >= 43) || + sscreen->info.drm_major == 3; case PIPE_CAP_TEXTURE_MULTISAMPLE: /* 2D tiling on CIK is supported since DRM 2.35.0 */ - return sscreen->b.chip_class < CIK || - (sscreen->b.info.drm_major == 2 && - sscreen->b.info.drm_minor >= 35) || - sscreen->b.info.drm_major == 3; + return sscreen->info.chip_class < CIK || + (sscreen->info.drm_major == 2 && + sscreen->info.drm_minor >= 35) || + sscreen->info.drm_major == 3; case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: return R600_MAP_BUFFER_ALIGNMENT; @@ -226,34 +226,34 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 4; case PIPE_CAP_GLSL_FEATURE_LEVEL: - if (sscreen->b.debug_flags & DBG(NIR)) + if (sscreen->debug_flags & DBG(NIR)) return 140; /* no geometry and tessellation shaders yet */ if (si_have_tgsi_compute(sscreen)) return 450; return 420; case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: - return MIN2(sscreen->b.info.max_alloc_size, INT_MAX); + return MIN2(sscreen->info.max_alloc_size, INT_MAX); case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: /* SI doesn't support unaligned loads. * CIK needs DRM 2.50.0 on radeon. */ - return sscreen->b.chip_class == SI || - (sscreen->b.info.drm_major == 2 && - sscreen->b.info.drm_minor < 50); + return sscreen->info.chip_class == SI || + (sscreen->info.drm_major == 2 && + sscreen->info.drm_minor < 50); case PIPE_CAP_SPARSE_BUFFER_PAGE_SIZE: /* TODO: GFX9 hangs. */ - if (sscreen->b.chip_class >= GFX9) + if (sscreen->info.chip_class >= GFX9) return 0; /* Disable on SI due to VM faults in CP DMA. Enable once these * faults are mitigated in software. */ - if (sscreen->b.chip_class >= CIK && - sscreen->b.info.drm_major == 3 && - sscreen->b.info.drm_minor >= 13) + if (sscreen->info.chip_class >= CIK && + sscreen->info.drm_major == 3 && + sscreen->info.drm_minor >= 13) return RADEON_SPARSE_PAGE_SIZE; return 0; @@ -277,7 +277,7 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 0; case PIPE_CAP_NATIVE_FENCE_FD: - return sscreen->b.info.has_sync_file; + return sscreen->info.has_sync_file; case PIPE_CAP_QUERY_BUFFER_OBJECT: return si_have_tgsi_compute(sscreen); @@ -291,7 +291,7 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 30; case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: - return sscreen->b.chip_class <= VI ? + return sscreen->info.chip_class <= VI ? PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_R600 : 0; /* Stream output. */ @@ -340,17 +340,17 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_VENDOR_ID: return ATI_VENDOR_ID; case PIPE_CAP_DEVICE_ID: - return sscreen->b.info.pci_id; + return sscreen->info.pci_id; case PIPE_CAP_VIDEO_MEMORY: - return sscreen->b.info.vram_size >> 20; + return sscreen->info.vram_size >> 20; case PIPE_CAP_PCI_GROUP: - return sscreen->b.info.pci_domain; + return sscreen->info.pci_domain; case PIPE_CAP_PCI_BUS: - return sscreen->b.info.pci_bus; + return sscreen->info.pci_bus; case PIPE_CAP_PCI_DEVICE: - return sscreen->b.info.pci_dev; + return sscreen->info.pci_dev; case PIPE_CAP_PCI_FUNCTION: - return sscreen->b.info.pci_func; + return sscreen->info.pci_func; } return 0; } @@ -450,7 +450,7 @@ static int si_get_shader_param(struct pipe_screen* pscreen, case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; case PIPE_SHADER_CAP_PREFERRED_IR: - if (sscreen->b.debug_flags & DBG(NIR) && + if (sscreen->debug_flags & DBG(NIR) && (shader == PIPE_SHADER_VERTEX || shader == PIPE_SHADER_FRAGMENT)) return PIPE_SHADER_IR_NIR; @@ -538,14 +538,14 @@ static void si_get_device_uuid(struct pipe_screen *pscreen, char *uuid) { struct si_screen *sscreen = (struct si_screen *)pscreen; - ac_compute_device_uuid(&sscreen->b.info, uuid, PIPE_UUID_SIZE); + ac_compute_device_uuid(&sscreen->info, uuid, PIPE_UUID_SIZE); } static const char* si_get_name(struct pipe_screen *pscreen) { struct si_screen *sscreen = (struct si_screen*)pscreen; - return sscreen->b.renderer_string; + return sscreen->renderer_string; } static int si_get_video_param_no_decode(struct pipe_screen *screen, @@ -588,14 +588,14 @@ static int si_get_video_param(struct pipe_screen *screen, switch (param) { case PIPE_VIDEO_CAP_SUPPORTED: return codec == PIPE_VIDEO_FORMAT_MPEG4_AVC && - (si_vce_is_fw_version_supported(&sscreen->b) || - sscreen->b.family == CHIP_RAVEN); + (si_vce_is_fw_version_supported(sscreen) || + sscreen->info.family == CHIP_RAVEN); case PIPE_VIDEO_CAP_NPOT_TEXTURES: return 1; case PIPE_VIDEO_CAP_MAX_WIDTH: - return (sscreen->b.family < CHIP_TONGA) ? 2048 : 4096; + return (sscreen->info.family < CHIP_TONGA) ? 2048 : 4096; case PIPE_VIDEO_CAP_MAX_HEIGHT: - return (sscreen->b.family < CHIP_TONGA) ? 1152 : 2304; + return (sscreen->info.family < CHIP_TONGA) ? 1152 : 2304; case PIPE_VIDEO_CAP_PREFERED_FORMAT: return PIPE_FORMAT_NV12; case PIPE_VIDEO_CAP_PREFERS_INTERLACED: @@ -605,7 +605,7 @@ static int si_get_video_param(struct pipe_screen *screen, case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE: return true; case PIPE_VIDEO_CAP_STACKED_FRAMES: - return (sscreen->b.family < CHIP_TONGA) ? 1 : 2; + return (sscreen->info.family < CHIP_TONGA) ? 1 : 2; default: return 0; } @@ -619,9 +619,9 @@ static int si_get_video_param(struct pipe_screen *screen, case PIPE_VIDEO_FORMAT_MPEG4: return 1; case PIPE_VIDEO_FORMAT_MPEG4_AVC: - if ((sscreen->b.family == CHIP_POLARIS10 || - sscreen->b.family == CHIP_POLARIS11) && - sscreen->b.info.uvd_fw_version < UVD_FW_1_66_16 ) { + if ((sscreen->info.family == CHIP_POLARIS10 || + sscreen->info.family == CHIP_POLARIS11) && + sscreen->info.uvd_fw_version < UVD_FW_1_66_16 ) { RVID_ERR("POLARIS10/11 firmware version need to be updated.\n"); return false; } @@ -630,16 +630,16 @@ static int si_get_video_param(struct pipe_screen *screen, return true; case PIPE_VIDEO_FORMAT_HEVC: /* Carrizo only supports HEVC Main */ - if (sscreen->b.family >= CHIP_STONEY) + if (sscreen->info.family >= CHIP_STONEY) return (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN || profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10); - else if (sscreen->b.family >= CHIP_CARRIZO) + else if (sscreen->info.family >= CHIP_CARRIZO) return profile == PIPE_VIDEO_PROFILE_HEVC_MAIN; return false; case PIPE_VIDEO_FORMAT_JPEG: - if (sscreen->b.family < CHIP_CARRIZO || sscreen->b.family >= CHIP_VEGA10) + if (sscreen->info.family < CHIP_CARRIZO || sscreen->info.family >= CHIP_VEGA10) return false; - if (!(sscreen->b.info.drm_major == 3 && sscreen->b.info.drm_minor >= 19)) { + if (!(sscreen->info.drm_major == 3 && sscreen->info.drm_minor >= 19)) { RVID_ERR("No MJPEG support for the kernel version\n"); return false; } @@ -650,9 +650,9 @@ static int si_get_video_param(struct pipe_screen *screen, case PIPE_VIDEO_CAP_NPOT_TEXTURES: return 1; case PIPE_VIDEO_CAP_MAX_WIDTH: - return (sscreen->b.family < CHIP_TONGA) ? 2048 : 4096; + return (sscreen->info.family < CHIP_TONGA) ? 2048 : 4096; case PIPE_VIDEO_CAP_MAX_HEIGHT: - return (sscreen->b.family < CHIP_TONGA) ? 1152 : 4096; + return (sscreen->info.family < CHIP_TONGA) ? 1152 : 4096; case PIPE_VIDEO_CAP_PREFERED_FORMAT: if (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) return PIPE_FORMAT_P016; @@ -691,7 +691,7 @@ static int si_get_video_param(struct pipe_screen *screen, case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE: case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN: case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH: - return (sscreen->b.family < CHIP_TONGA) ? 41 : 52; + return (sscreen->info.family < CHIP_TONGA) ? 41 : 52; case PIPE_VIDEO_PROFILE_HEVC_MAIN: case PIPE_VIDEO_PROFILE_HEVC_MAIN_10: return 186; @@ -727,7 +727,7 @@ static unsigned get_max_threads_per_block(struct si_screen *screen, return 256; /* Only 16 waves per thread-group on gfx9. */ - if (screen->b.chip_class >= GFX9) + if (screen->info.chip_class >= GFX9) return 1024; /* Up to 40 waves per thread-group on GCN < gfx9. Expose a nice @@ -754,7 +754,7 @@ static int si_get_compute_param(struct pipe_screen *screen, else triple = "amdgcn-mesa-mesa3d"; - gpu = ac_get_llvm_processor_name(sscreen->b.family); + gpu = ac_get_llvm_processor_name(sscreen->info.family); if (ret) { sprintf(ret, "%s-%s", gpu, triple); } @@ -816,8 +816,8 @@ static int si_get_compute_param(struct pipe_screen *screen, * 4 * MAX_MEM_ALLOC_SIZE. */ *max_global_size = MIN2(4 * max_mem_alloc_size, - MAX2(sscreen->b.info.gart_size, - sscreen->b.info.vram_size)); + MAX2(sscreen->info.gart_size, + sscreen->info.vram_size)); } return sizeof(uint64_t); @@ -841,21 +841,21 @@ static int si_get_compute_param(struct pipe_screen *screen, if (ret) { uint64_t *max_mem_alloc_size = ret; - *max_mem_alloc_size = sscreen->b.info.max_alloc_size; + *max_mem_alloc_size = sscreen->info.max_alloc_size; } return sizeof(uint64_t); case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY: if (ret) { uint32_t *max_clock_frequency = ret; - *max_clock_frequency = sscreen->b.info.max_shader_clock; + *max_clock_frequency = sscreen->info.max_shader_clock; } return sizeof(uint32_t); case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS: if (ret) { uint32_t *max_compute_units = ret; - *max_compute_units = sscreen->b.info.num_good_compute_units; + *max_compute_units = sscreen->info.num_good_compute_units; } return sizeof(uint32_t); @@ -892,19 +892,19 @@ static uint64_t si_get_timestamp(struct pipe_screen *screen) { struct si_screen *sscreen = (struct si_screen*)screen; - return 1000000 * sscreen->b.ws->query_value(sscreen->b.ws, RADEON_TIMESTAMP) / - sscreen->b.info.clock_crystal_freq; + return 1000000 * sscreen->ws->query_value(sscreen->ws, RADEON_TIMESTAMP) / + sscreen->info.clock_crystal_freq; } static void si_query_memory_info(struct pipe_screen *screen, struct pipe_memory_info *info) { struct si_screen *sscreen = (struct si_screen*)screen; - struct radeon_winsys *ws = sscreen->b.ws; + struct radeon_winsys *ws = sscreen->ws; unsigned vram_usage, gtt_usage; - info->total_device_memory = sscreen->b.info.vram_size / 1024; - info->total_staging_memory = sscreen->b.info.gart_size / 1024; + info->total_device_memory = sscreen->info.vram_size / 1024; + info->total_staging_memory = sscreen->info.gart_size / 1024; /* The real TTM memory usage is somewhat random, because: * @@ -929,7 +929,7 @@ static void si_query_memory_info(struct pipe_screen *screen, info->device_memory_evicted = ws->query_value(ws, RADEON_NUM_BYTES_MOVED) / 1024; - if (sscreen->b.info.drm_major == 3 && sscreen->b.info.drm_minor >= 4) + if (sscreen->info.drm_major == 3 && sscreen->info.drm_minor >= 4) info->nr_device_memory_evictions = ws->query_value(ws, RADEON_NUM_EVICTIONS); else @@ -941,12 +941,12 @@ static struct disk_cache *si_get_disk_shader_cache(struct pipe_screen *pscreen) { struct si_screen *sscreen = (struct si_screen*)pscreen; - return sscreen->b.disk_shader_cache; + return sscreen->disk_shader_cache; } static void si_init_renderer_string(struct si_screen *sscreen) { - struct radeon_winsys *ws = sscreen->b.ws; + struct radeon_winsys *ws = sscreen->ws; char family_name[32] = {}, llvm_string[32] = {}, kernel_version[128] = {}; struct utsname uname_data; @@ -968,35 +968,35 @@ static void si_init_renderer_string(struct si_screen *sscreen) HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH); } - snprintf(sscreen->b.renderer_string, sizeof(sscreen->b.renderer_string), + snprintf(sscreen->renderer_string, sizeof(sscreen->renderer_string), "%s (%sDRM %i.%i.%i%s%s)", - chip_name, family_name, sscreen->b.info.drm_major, - sscreen->b.info.drm_minor, sscreen->b.info.drm_patchlevel, + chip_name, family_name, sscreen->info.drm_major, + sscreen->info.drm_minor, sscreen->info.drm_patchlevel, kernel_version, llvm_string); } void si_init_screen_get_functions(struct si_screen *sscreen) { - sscreen->b.b.get_name = si_get_name; - sscreen->b.b.get_vendor = si_get_vendor; - sscreen->b.b.get_device_vendor = si_get_device_vendor; - sscreen->b.b.get_param = si_get_param; - sscreen->b.b.get_paramf = si_get_paramf; - sscreen->b.b.get_compute_param = si_get_compute_param; - sscreen->b.b.get_timestamp = si_get_timestamp; - sscreen->b.b.get_shader_param = si_get_shader_param; - sscreen->b.b.get_compiler_options = si_get_compiler_options; - sscreen->b.b.get_device_uuid = si_get_device_uuid; - sscreen->b.b.get_driver_uuid = si_get_driver_uuid; - sscreen->b.b.query_memory_info = si_query_memory_info; - sscreen->b.b.get_disk_shader_cache = si_get_disk_shader_cache; - - if (sscreen->b.info.has_hw_decode) { - sscreen->b.b.get_video_param = si_get_video_param; - sscreen->b.b.is_video_format_supported = si_vid_is_format_supported; + sscreen->b.get_name = si_get_name; + sscreen->b.get_vendor = si_get_vendor; + sscreen->b.get_device_vendor = si_get_device_vendor; + sscreen->b.get_param = si_get_param; + sscreen->b.get_paramf = si_get_paramf; + sscreen->b.get_compute_param = si_get_compute_param; + sscreen->b.get_timestamp = si_get_timestamp; + sscreen->b.get_shader_param = si_get_shader_param; + sscreen->b.get_compiler_options = si_get_compiler_options; + sscreen->b.get_device_uuid = si_get_device_uuid; + sscreen->b.get_driver_uuid = si_get_driver_uuid; + sscreen->b.query_memory_info = si_query_memory_info; + sscreen->b.get_disk_shader_cache = si_get_disk_shader_cache; + + if (sscreen->info.has_hw_decode) { + sscreen->b.get_video_param = si_get_video_param; + sscreen->b.is_video_format_supported = si_vid_is_format_supported; } else { - sscreen->b.b.get_video_param = si_get_video_param_no_decode; - sscreen->b.b.is_video_format_supported = vl_video_buffer_is_format_supported; + sscreen->b.get_video_param = si_get_video_param_no_decode; + sscreen->b.is_video_format_supported = vl_video_buffer_is_format_supported; } si_init_renderer_string(sscreen); diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c index 4e94c472ad7..d46c1093f24 100644 --- a/src/gallium/drivers/radeonsi/si_hw_context.c +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -82,7 +82,7 @@ void si_context_gfx_flush(void *context, unsigned flags, if (si_check_device_reset(&ctx->b)) return; - if (ctx->screen->b.debug_flags & DBG(CHECK_VM)) + if (ctx->screen->debug_flags & DBG(CHECK_VM)) flags &= ~RADEON_FLUSH_ASYNC; /* If the state tracker is flushing the GFX IB, r600_flush_from_st is @@ -136,7 +136,7 @@ void si_context_gfx_flush(void *context, unsigned flags, ctx->b.num_gfx_cs_flushes++; /* Check VM faults if needed. */ - if (ctx->screen->b.debug_flags & DBG(CHECK_VM)) { + if (ctx->screen->debug_flags & DBG(CHECK_VM)) { /* Use conservative timeout 800ms, after which we won't wait any * longer and assume the GPU is hung. */ diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c index b7429673d3a..1cf004dff83 100644 --- a/src/gallium/drivers/radeonsi/si_perfcounter.c +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c @@ -670,10 +670,10 @@ static void si_pc_emit_read(struct r600_common_context *ctx, } } -static void si_pc_cleanup(struct r600_common_screen *rscreen) +static void si_pc_cleanup(struct si_screen *sscreen) { - si_perfcounters_do_destroy(rscreen->perfcounters); - rscreen->perfcounters = NULL; + si_perfcounters_do_destroy(sscreen->perfcounters); + sscreen->perfcounters = NULL; } void si_init_perfcounters(struct si_screen *screen) @@ -683,7 +683,7 @@ void si_init_perfcounters(struct si_screen *screen) unsigned num_blocks; unsigned i; - switch (screen->b.chip_class) { + switch (screen->info.chip_class) { case CIK: blocks = groups_CIK; num_blocks = ARRAY_SIZE(groups_CIK); @@ -701,11 +701,11 @@ void si_init_perfcounters(struct si_screen *screen) return; /* not implemented */ } - if (screen->b.info.max_sh_per_se != 1) { + if (screen->info.max_sh_per_se != 1) { /* This should not happen on non-SI chips. */ fprintf(stderr, "si_init_perfcounters: max_sh_per_se = %d not " "supported (inaccurate performance counters)\n", - screen->b.info.max_sh_per_se); + screen->info.max_sh_per_se); } pc = CALLOC_STRUCT(r600_perfcounters); @@ -713,7 +713,7 @@ void si_init_perfcounters(struct si_screen *screen) return; pc->num_start_cs_dwords = 14; - pc->num_stop_cs_dwords = 14 + si_gfx_write_fence_dwords(&screen->b); + pc->num_stop_cs_dwords = 14 + si_gfx_write_fence_dwords(screen); pc->num_instance_cs_dwords = 3; pc->num_shaders_cs_dwords = 4; @@ -738,11 +738,11 @@ void si_init_perfcounters(struct si_screen *screen) unsigned instances = block->instances; if (!strcmp(block->b->name, "IA")) { - if (screen->b.info.max_se > 2) + if (screen->info.max_se > 2) instances = 2; } - si_perfcounters_add_block(&screen->b, pc, + si_perfcounters_add_block(screen, pc, block->b->name, block->b->flags, block->b->num_counters, @@ -751,7 +751,7 @@ void si_init_perfcounters(struct si_screen *screen) block); } - screen->b.perfcounters = pc; + screen->perfcounters = pc; return; error: diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 6c4e1832e42..5d7837dd116 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -207,12 +207,12 @@ static LLVMTargetMachineRef si_create_llvm_target_machine(struct si_screen *sscreen) { enum ac_target_machine_options tm_options = - (sscreen->b.debug_flags & DBG(SI_SCHED) ? AC_TM_SISCHED : 0) | - (sscreen->b.chip_class >= GFX9 ? AC_TM_FORCE_ENABLE_XNACK : 0) | - (sscreen->b.chip_class < GFX9 ? AC_TM_FORCE_DISABLE_XNACK : 0) | + (sscreen->debug_flags & DBG(SI_SCHED) ? AC_TM_SISCHED : 0) | + (sscreen->info.chip_class >= GFX9 ? AC_TM_FORCE_ENABLE_XNACK : 0) | + (sscreen->info.chip_class < GFX9 ? AC_TM_FORCE_DISABLE_XNACK : 0) | (!sscreen->llvm_has_working_vgpr_indexing ? AC_TM_PROMOTE_ALLOCA_TO_SCRATCH : 0); - return ac_create_target_machine(sscreen->b.family, tm_options); + return ac_create_target_machine(sscreen->info.family, tm_options); } static void si_set_debug_callback(struct pipe_context *ctx, @@ -245,7 +245,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, { struct si_context *sctx = CALLOC_STRUCT(si_context); struct si_screen* sscreen = (struct si_screen *)screen; - struct radeon_winsys *ws = sscreen->b.ws; + struct radeon_winsys *ws = sscreen->ws; int shader, i; if (!sctx) @@ -264,10 +264,10 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, sctx->screen = sscreen; /* Easy accessing of screen/winsys. */ sctx->is_debug = (flags & PIPE_CONTEXT_DEBUG) != 0; - if (!si_common_context_init(&sctx->b, &sscreen->b, flags)) + if (!si_common_context_init(&sctx->b, sscreen, flags)) goto fail; - if (sscreen->b.info.drm_major == 3) + if (sscreen->info.drm_major == 3) sctx->b.b.get_device_reset_status = si_amdgpu_get_reset_status; si_init_buffer_functions(sctx); @@ -279,7 +279,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, si_init_msaa_functions(sctx); si_init_streamout_functions(sctx); - if (sscreen->b.info.has_hw_decode) { + if (sscreen->info.has_hw_decode) { sctx->b.b.create_video_codec = si_uvd_create_decoder; sctx->b.b.create_video_buffer = si_video_buffer_create; } else { @@ -322,7 +322,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, else si_init_dma_functions(sctx); - if (sscreen->b.debug_flags & DBG(FORCE_DMA)) + if (sscreen->debug_flags & DBG(FORCE_DMA)) sctx->b.b.resource_copy_region = sctx->b.dma_copy; sctx->blitter = util_blitter_create(&sctx->b.b); @@ -360,7 +360,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, si_aligned_buffer_create(screen, R600_RESOURCE_FLAG_UNMAPPABLE, PIPE_USAGE_DEFAULT, 16, - sctx->screen->b.info.tcc_cache_line_size); + sctx->screen->info.tcc_cache_line_size); if (!sctx->null_const_buf.buffer) goto fail; sctx->null_const_buf.buffer_size = sctx->null_const_buf.buffer->width0; @@ -406,7 +406,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, * bring much benefit, but they still occupy chip resources (think * async compute). I've seen ~2% performance difference between 4 and 32. */ - sctx->scratch_waves = MAX2(32 * sscreen->b.info.num_good_compute_units, + sctx->scratch_waves = MAX2(32 * sscreen->info.num_good_compute_units, max_threads_per_block / 64); sctx->tm = si_create_llvm_target_machine(sscreen); @@ -436,7 +436,7 @@ static struct pipe_context *si_pipe_create_context(struct pipe_screen *screen, struct si_screen *sscreen = (struct si_screen *)screen; struct pipe_context *ctx; - if (sscreen->b.debug_flags & DBG(CHECK_VM)) + if (sscreen->debug_flags & DBG(CHECK_VM)) flags |= PIPE_CONTEXT_DEBUG; ctx = si_create_context(screen, flags); @@ -450,14 +450,14 @@ static struct pipe_context *si_pipe_create_context(struct pipe_screen *screen, /* When shaders are logged to stderr, asynchronous compilation is * disabled too. */ - if (sscreen->b.debug_flags & DBG_ALL_SHADERS) + if (sscreen->debug_flags & DBG_ALL_SHADERS) return ctx; /* Use asynchronous flushes only on amdgpu, since the radeon * implementation for fence_server_sync is incomplete. */ - return threaded_context_create(ctx, &sscreen->b.pool_transfers, + return threaded_context_create(ctx, &sscreen->pool_transfers, si_replace_buffer_storage, - sscreen->b.info.drm_major >= 3 ? si_create_fence : NULL, + sscreen->info.drm_major >= 3 ? si_create_fence : NULL, &((struct si_context*)ctx)->b.tc); } @@ -477,7 +477,7 @@ static void si_destroy_screen(struct pipe_screen* pscreen) }; unsigned i; - if (!sscreen->b.ws->unref(sscreen->b.ws)) + if (!sscreen->ws->unref(sscreen->ws)) return; util_queue_destroy(&sscreen->shader_compiler_queue); @@ -504,27 +504,27 @@ static void si_destroy_screen(struct pipe_screen* pscreen) mtx_destroy(&sscreen->shader_parts_mutex); si_destroy_shader_cache(sscreen); - si_perfcounters_destroy(&sscreen->b); - si_gpu_load_kill_thread(&sscreen->b); + si_perfcounters_destroy(sscreen); + si_gpu_load_kill_thread(sscreen); - mtx_destroy(&sscreen->b.gpu_load_mutex); - mtx_destroy(&sscreen->b.aux_context_lock); - sscreen->b.aux_context->destroy(sscreen->b.aux_context); + mtx_destroy(&sscreen->gpu_load_mutex); + mtx_destroy(&sscreen->aux_context_lock); + sscreen->aux_context->destroy(sscreen->aux_context); - slab_destroy_parent(&sscreen->b.pool_transfers); + slab_destroy_parent(&sscreen->pool_transfers); - disk_cache_destroy(sscreen->b.disk_shader_cache); - sscreen->b.ws->destroy(sscreen->b.ws); + disk_cache_destroy(sscreen->disk_shader_cache); + sscreen->ws->destroy(sscreen->ws); FREE(sscreen); } static bool si_init_gs_info(struct si_screen *sscreen) { /* gs_table_depth is not used by GFX9 */ - if (sscreen->b.chip_class >= GFX9) + if (sscreen->info.chip_class >= GFX9) return true; - switch (sscreen->b.family) { + switch (sscreen->info.family) { case CHIP_OLAND: case CHIP_HAINAN: case CHIP_KAVERI: @@ -563,16 +563,16 @@ static void si_handle_env_var_force_family(struct si_screen *sscreen) for (i = CHIP_TAHITI; i < CHIP_LAST; i++) { if (!strcmp(family, ac_get_llvm_processor_name(i))) { /* Override family and chip_class. */ - sscreen->b.family = sscreen->b.info.family = i; + sscreen->info.family = i; if (i >= CHIP_VEGA10) - sscreen->b.chip_class = sscreen->b.info.chip_class = GFX9; + sscreen->info.chip_class = GFX9; else if (i >= CHIP_TONGA) - sscreen->b.chip_class = sscreen->b.info.chip_class = VI; + sscreen->info.chip_class = VI; else if (i >= CHIP_BONAIRE) - sscreen->b.chip_class = sscreen->b.info.chip_class = CIK; + sscreen->info.chip_class = CIK; else - sscreen->b.chip_class = sscreen->b.info.chip_class = SI; + sscreen->info.chip_class = SI; /* Don't submit any IBs. */ setenv("RADEON_NOOP", "1", 1); @@ -586,10 +586,10 @@ static void si_handle_env_var_force_family(struct si_screen *sscreen) static void si_test_vmfault(struct si_screen *sscreen) { - struct pipe_context *ctx = sscreen->b.aux_context; + struct pipe_context *ctx = sscreen->aux_context; struct si_context *sctx = (struct si_context *)ctx; struct pipe_resource *buf = - pipe_buffer_create(&sscreen->b.b, 0, PIPE_USAGE_DEFAULT, 64); + pipe_buffer_create(&sscreen->b, 0, PIPE_USAGE_DEFAULT, 64); if (!buf) { puts("Buffer allocation failed."); @@ -598,17 +598,17 @@ static void si_test_vmfault(struct si_screen *sscreen) r600_resource(buf)->gpu_address = 0; /* cause a VM fault */ - if (sscreen->b.debug_flags & DBG(TEST_VMFAULT_CP)) { + if (sscreen->debug_flags & DBG(TEST_VMFAULT_CP)) { si_copy_buffer(sctx, buf, buf, 0, 4, 4, 0); ctx->flush(ctx, NULL, 0); puts("VM fault test: CP - done."); } - if (sscreen->b.debug_flags & DBG(TEST_VMFAULT_SDMA)) { + if (sscreen->debug_flags & DBG(TEST_VMFAULT_SDMA)) { sctx->b.dma_clear_buffer(ctx, buf, 0, 4, 0); ctx->flush(ctx, NULL, 0); puts("VM fault test: SDMA - done."); } - if (sscreen->b.debug_flags & DBG(TEST_VMFAULT_SHADER)) { + if (sscreen->debug_flags & DBG(TEST_VMFAULT_SHADER)) { util_test_constant_buffer(ctx, buf); puts("VM fault test: Shader - done."); } @@ -618,11 +618,11 @@ static void si_test_vmfault(struct si_screen *sscreen) static void si_disk_cache_create(struct si_screen *sscreen) { /* Don't use the cache if shader dumping is enabled. */ - if (sscreen->b.debug_flags & DBG_ALL_SHADERS) + if (sscreen->debug_flags & DBG_ALL_SHADERS) return; /* TODO: remove this once gallium supports a nir cache */ - if (sscreen->b.debug_flags & DBG(NIR)) + if (sscreen->debug_flags & DBG(NIR)) return; uint32_t mesa_timestamp; @@ -641,12 +641,12 @@ static void si_disk_cache_create(struct si_screen *sscreen) if (res != -1) { /* These flags affect shader compilation. */ uint64_t shader_debug_flags = - sscreen->b.debug_flags & + sscreen->debug_flags & (DBG(FS_CORRECT_DERIVS_AFTER_KILL) | DBG(SI_SCHED) | DBG(UNSAFE_MATH)); - sscreen->b.disk_shader_cache = + sscreen->disk_shader_cache = disk_cache_create(si_get_family_name(sscreen), timestamp_str, shader_debug_flags); @@ -665,50 +665,48 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, return NULL; } - sscreen->b.ws = ws; - ws->query_info(ws, &sscreen->b.info); + sscreen->ws = ws; + ws->query_info(ws, &sscreen->info); - sscreen->b.family = sscreen->b.info.family; - sscreen->b.chip_class = sscreen->b.info.chip_class; - sscreen->b.debug_flags = debug_get_flags_option("R600_DEBUG", + sscreen->debug_flags = debug_get_flags_option("R600_DEBUG", debug_options, 0); /* Set functions first. */ - sscreen->b.b.context_create = si_pipe_create_context; - sscreen->b.b.destroy = si_destroy_screen; + sscreen->b.context_create = si_pipe_create_context; + sscreen->b.destroy = si_destroy_screen; si_init_screen_get_functions(sscreen); si_init_screen_buffer_functions(sscreen); si_init_screen_fence_functions(sscreen); si_init_screen_state_functions(sscreen); - si_init_screen_texture_functions(&sscreen->b); - si_init_screen_query_functions(&sscreen->b); + si_init_screen_texture_functions(sscreen); + si_init_screen_query_functions(sscreen); /* Set these flags in debug_flags early, so that the shader cache takes * them into account. */ if (driQueryOptionb(config->options, "glsl_correct_derivatives_after_discard")) - sscreen->b.debug_flags |= DBG(FS_CORRECT_DERIVS_AFTER_KILL); + sscreen->debug_flags |= DBG(FS_CORRECT_DERIVS_AFTER_KILL); if (driQueryOptionb(config->options, "radeonsi_enable_sisched")) - sscreen->b.debug_flags |= DBG(SI_SCHED); + sscreen->debug_flags |= DBG(SI_SCHED); - if (sscreen->b.debug_flags & DBG(INFO)) - ac_print_gpu_info(&sscreen->b.info); + if (sscreen->debug_flags & DBG(INFO)) + ac_print_gpu_info(&sscreen->info); - slab_create_parent(&sscreen->b.pool_transfers, + slab_create_parent(&sscreen->pool_transfers, sizeof(struct r600_transfer), 64); - sscreen->b.force_aniso = MIN2(16, debug_get_num_option("R600_TEX_ANISO", -1)); - if (sscreen->b.force_aniso >= 0) { + sscreen->force_aniso = MIN2(16, debug_get_num_option("R600_TEX_ANISO", -1)); + if (sscreen->force_aniso >= 0) { printf("radeonsi: Forcing anisotropy filter to %ix\n", /* round down to a power of two */ - 1 << util_logbase2(sscreen->b.force_aniso)); + 1 << util_logbase2(sscreen->force_aniso)); } - (void) mtx_init(&sscreen->b.aux_context_lock, mtx_plain); - (void) mtx_init(&sscreen->b.gpu_load_mutex, mtx_plain); + (void) mtx_init(&sscreen->aux_context_lock, mtx_plain); + (void) mtx_init(&sscreen->gpu_load_mutex, mtx_plain); if (!si_init_gs_info(sscreen) || !si_init_shader_cache(sscreen)) { @@ -754,96 +752,96 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, * around by setting 4K granularity. */ sscreen->tess_offchip_block_dw_size = - sscreen->b.family == CHIP_HAWAII ? 4096 : 8192; + sscreen->info.family == CHIP_HAWAII ? 4096 : 8192; /* The mere presense of CLEAR_STATE in the IB causes random GPU hangs * on SI. */ - sscreen->has_clear_state = sscreen->b.chip_class >= CIK; + sscreen->has_clear_state = sscreen->info.chip_class >= CIK; sscreen->has_distributed_tess = - sscreen->b.chip_class >= VI && - sscreen->b.info.max_se >= 2; + sscreen->info.chip_class >= VI && + sscreen->info.max_se >= 2; sscreen->has_draw_indirect_multi = - (sscreen->b.family >= CHIP_POLARIS10) || - (sscreen->b.chip_class == VI && - sscreen->b.info.pfp_fw_version >= 121 && - sscreen->b.info.me_fw_version >= 87) || - (sscreen->b.chip_class == CIK && - sscreen->b.info.pfp_fw_version >= 211 && - sscreen->b.info.me_fw_version >= 173) || - (sscreen->b.chip_class == SI && - sscreen->b.info.pfp_fw_version >= 79 && - sscreen->b.info.me_fw_version >= 142); - - sscreen->has_out_of_order_rast = sscreen->b.chip_class >= VI && - sscreen->b.info.max_se >= 2 && - !(sscreen->b.debug_flags & DBG(NO_OUT_OF_ORDER)); + (sscreen->info.family >= CHIP_POLARIS10) || + (sscreen->info.chip_class == VI && + sscreen->info.pfp_fw_version >= 121 && + sscreen->info.me_fw_version >= 87) || + (sscreen->info.chip_class == CIK && + sscreen->info.pfp_fw_version >= 211 && + sscreen->info.me_fw_version >= 173) || + (sscreen->info.chip_class == SI && + sscreen->info.pfp_fw_version >= 79 && + sscreen->info.me_fw_version >= 142); + + sscreen->has_out_of_order_rast = sscreen->info.chip_class >= VI && + sscreen->info.max_se >= 2 && + !(sscreen->debug_flags & DBG(NO_OUT_OF_ORDER)); sscreen->assume_no_z_fights = driQueryOptionb(config->options, "radeonsi_assume_no_z_fights"); sscreen->commutative_blend_add = driQueryOptionb(config->options, "radeonsi_commutative_blend_add"); sscreen->clear_db_cache_before_clear = driQueryOptionb(config->options, "radeonsi_clear_db_cache_before_clear"); - sscreen->has_msaa_sample_loc_bug = (sscreen->b.family >= CHIP_POLARIS10 && - sscreen->b.family <= CHIP_POLARIS12) || - sscreen->b.family == CHIP_VEGA10 || - sscreen->b.family == CHIP_RAVEN; - sscreen->has_ls_vgpr_init_bug = sscreen->b.family == CHIP_VEGA10 || - sscreen->b.family == CHIP_RAVEN; - - if (sscreen->b.debug_flags & DBG(DPBB)) { + sscreen->has_msaa_sample_loc_bug = (sscreen->info.family >= CHIP_POLARIS10 && + sscreen->info.family <= CHIP_POLARIS12) || + sscreen->info.family == CHIP_VEGA10 || + sscreen->info.family == CHIP_RAVEN; + sscreen->has_ls_vgpr_init_bug = sscreen->info.family == CHIP_VEGA10 || + sscreen->info.family == CHIP_RAVEN; + + if (sscreen->debug_flags & DBG(DPBB)) { sscreen->dpbb_allowed = true; } else { /* Only enable primitive binning on Raven by default. */ - sscreen->dpbb_allowed = sscreen->b.family == CHIP_RAVEN && - !(sscreen->b.debug_flags & DBG(NO_DPBB)); + sscreen->dpbb_allowed = sscreen->info.family == CHIP_RAVEN && + !(sscreen->debug_flags & DBG(NO_DPBB)); } - if (sscreen->b.debug_flags & DBG(DFSM)) { + if (sscreen->debug_flags & DBG(DFSM)) { sscreen->dfsm_allowed = sscreen->dpbb_allowed; } else { sscreen->dfsm_allowed = sscreen->dpbb_allowed && - !(sscreen->b.debug_flags & DBG(NO_DFSM)); + !(sscreen->debug_flags & DBG(NO_DFSM)); } /* While it would be nice not to have this flag, we are constrained * by the reality that LLVM 5.0 doesn't have working VGPR indexing * on GFX9. */ - sscreen->llvm_has_working_vgpr_indexing = sscreen->b.chip_class <= VI; + sscreen->llvm_has_working_vgpr_indexing = sscreen->info.chip_class <= VI; /* Some chips have RB+ registers, but don't support RB+. Those must * always disable it. */ - if (sscreen->b.family == CHIP_STONEY || - sscreen->b.chip_class >= GFX9) { - sscreen->b.has_rbplus = true; - - sscreen->b.rbplus_allowed = - !(sscreen->b.debug_flags & DBG(NO_RB_PLUS)) && - (sscreen->b.family == CHIP_STONEY || - sscreen->b.family == CHIP_RAVEN); + if (sscreen->info.family == CHIP_STONEY || + sscreen->info.chip_class >= GFX9) { + sscreen->has_rbplus = true; + + sscreen->rbplus_allowed = + !(sscreen->debug_flags & DBG(NO_RB_PLUS)) && + (sscreen->info.family == CHIP_STONEY || + sscreen->info.family == CHIP_RAVEN); } - sscreen->b.dcc_msaa_allowed = - !(sscreen->b.debug_flags & DBG(NO_DCC_MSAA)) && - (sscreen->b.debug_flags & DBG(DCC_MSAA) || - sscreen->b.chip_class == VI); + sscreen->dcc_msaa_allowed = + !(sscreen->debug_flags & DBG(NO_DCC_MSAA)) && + (sscreen->debug_flags & DBG(DCC_MSAA) || + sscreen->info.chip_class == VI); (void) mtx_init(&sscreen->shader_parts_mutex, mtx_plain); sscreen->use_monolithic_shaders = - (sscreen->b.debug_flags & DBG(MONOLITHIC_SHADERS)) != 0; + (sscreen->debug_flags & DBG(MONOLITHIC_SHADERS)) != 0; - sscreen->b.barrier_flags.cp_to_L2 = SI_CONTEXT_INV_SMEM_L1 | + sscreen->barrier_flags.cp_to_L2 = SI_CONTEXT_INV_SMEM_L1 | SI_CONTEXT_INV_VMEM_L1; - if (sscreen->b.chip_class <= VI) { - sscreen->b.barrier_flags.cp_to_L2 |= SI_CONTEXT_INV_GLOBAL_L2; - sscreen->b.barrier_flags.L2_to_cp |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; + if (sscreen->info.chip_class <= VI) { + sscreen->barrier_flags.cp_to_L2 |= SI_CONTEXT_INV_GLOBAL_L2; + sscreen->barrier_flags.L2_to_cp |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; } if (debug_get_bool_option("RADEON_DUMP_SHADERS", false)) - sscreen->b.debug_flags |= DBG_ALL_SHADERS; + sscreen->debug_flags |= DBG_ALL_SHADERS; for (i = 0; i < num_compiler_threads; i++) sscreen->tm[i] = si_create_llvm_target_machine(sscreen); @@ -851,15 +849,15 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, sscreen->tm_low_priority[i] = si_create_llvm_target_machine(sscreen); /* Create the auxiliary context. This must be done last. */ - sscreen->b.aux_context = si_create_context(&sscreen->b.b, 0); + sscreen->aux_context = si_create_context(&sscreen->b, 0); - if (sscreen->b.debug_flags & DBG(TEST_DMA)) + if (sscreen->debug_flags & DBG(TEST_DMA)) si_test_dma(sscreen); - if (sscreen->b.debug_flags & (DBG(TEST_VMFAULT_CP) | + if (sscreen->debug_flags & (DBG(TEST_VMFAULT_CP) | DBG(TEST_VMFAULT_SDMA) | DBG(TEST_VMFAULT_SHADER))) si_test_vmfault(sscreen); - return &sscreen->b.b; + return &sscreen->b; } diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 0c16cfb71b8..7a099376021 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -94,7 +94,14 @@ struct hash_table; struct u_suballocator; struct si_screen { - struct r600_common_screen b; + struct pipe_screen b; + struct radeon_winsys *ws; + struct disk_cache *disk_shader_cache; + + struct radeon_info info; + uint64_t debug_flags; + char renderer_string[100]; + unsigned gs_table_depth; unsigned tess_offchip_block_dw_size; bool has_clear_state; @@ -113,6 +120,65 @@ struct si_screen { /* Whether shaders are monolithic (1-part) or separate (3-part). */ bool use_monolithic_shaders; bool record_llvm_ir; + bool has_rbplus; /* if RB+ registers exist */ + bool rbplus_allowed; /* if RB+ is allowed */ + bool dcc_msaa_allowed; + + struct slab_parent_pool pool_transfers; + + /* Texture filter settings. */ + int force_aniso; /* -1 = disabled */ + + /* Auxiliary context. Mainly used to initialize resources. + * It must be locked prior to using and flushed before unlocking. */ + struct pipe_context *aux_context; + mtx_t aux_context_lock; + + /* This must be in the screen, because UE4 uses one context for + * compilation and another one for rendering. + */ + unsigned num_compilations; + /* Along with ST_DEBUG=precompile, this should show if applications + * are loading shaders on demand. This is a monotonic counter. + */ + unsigned num_shaders_created; + unsigned num_shader_cache_hits; + + /* GPU load thread. */ + mtx_t gpu_load_mutex; + thrd_t gpu_load_thread; + union r600_mmio_counters mmio_counters; + volatile unsigned gpu_load_stop_thread; /* bool */ + + /* Performance counters. */ + struct r600_perfcounters *perfcounters; + + /* If pipe_screen wants to recompute and re-emit the framebuffer, + * sampler, and image states of all contexts, it should atomically + * increment this. + * + * Each context will compare this with its own last known value of + * the counter before drawing and re-emit the states accordingly. + */ + unsigned dirty_tex_counter; + + /* Atomically increment this counter when an existing texture's + * metadata is enabled or disabled in a way that requires changing + * contexts' compressed texture binding masks. + */ + unsigned compressed_colortex_counter; + + struct { + /* Context flags to set so that all writes from earlier jobs + * in the CP are seen by L2 clients. + */ + unsigned cp_to_L2; + + /* Context flags to set so that all writes from earlier jobs + * that end in L2 are seen by CP. + */ + unsigned L2_to_cp; + } barrier_flags; mtx_t shader_parts_mutex; struct si_shader_part *vs_prologs; @@ -753,13 +819,13 @@ static inline struct si_shader* si_get_vs_state(struct si_context *sctx) static inline bool si_can_dump_shader(struct si_screen *sscreen, unsigned processor) { - return sscreen->b.debug_flags & (1 << processor); + return sscreen->debug_flags & (1 << processor); } static inline bool si_extra_shader_checks(struct si_screen *sscreen, unsigned processor) { - return (sscreen->b.debug_flags & DBG(CHECK_IR)) || + return (sscreen->debug_flags & DBG(CHECK_IR)) || si_can_dump_shader(sscreen, processor); } @@ -780,7 +846,7 @@ si_optimal_tcc_alignment(struct si_context *sctx, unsigned upload_size) * If the upload size is greater, align it to the cache line size. */ alignment = util_next_power_of_two(upload_size); - tcc_cache_line_size = sctx->screen->b.info.tcc_cache_line_size; + tcc_cache_line_size = sctx->screen->info.tcc_cache_line_size; return MIN2(alignment, tcc_cache_line_size); } diff --git a/src/gallium/drivers/radeonsi/si_pm4.c b/src/gallium/drivers/radeonsi/si_pm4.c index 98aa94a4f5f..96e4e1dd1a7 100644 --- a/src/gallium/drivers/radeonsi/si_pm4.c +++ b/src/gallium/drivers/radeonsi/si_pm4.c @@ -173,7 +173,7 @@ void si_pm4_upload_indirect_buffer(struct si_context *sctx, return; /* Pad the IB to 8 DWs to meet CP fetch alignment requirements. */ - if (sctx->screen->b.info.gfx_ib_pad_with_type2) { + if (sctx->screen->info.gfx_ib_pad_with_type2) { for (int i = state->ndw; i < aligned_ndw; i++) state->pm4[i] = 0x80000000; /* type2 nop packet */ } else { diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index e942d345dbc..d3e5e9734e9 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -105,7 +105,7 @@ enum { static bool is_merged_shader(struct si_shader *shader) { - if (shader->selector->screen->b.chip_class <= VI) + if (shader->selector->screen->info.chip_class <= VI) return false; return shader->key.as_ls || @@ -407,7 +407,7 @@ static LLVMValueRef get_tcs_in_vertex_dw_stride(struct si_shader_context *ctx) return LLVMConstInt(ctx->i32, stride * 4, 0); case PIPE_SHADER_TESS_CTRL: - if (ctx->screen->b.chip_class >= GFX9 && + if (ctx->screen->info.chip_class >= GFX9 && ctx->shader->is_monolithic) { stride = util_last_bit64(ctx->shader->key.part.tcs.ls->outputs_written); return LLVMConstInt(ctx->i32, stride * 4, 0); @@ -1323,7 +1323,7 @@ static LLVMValueRef fetch_input_gs( param = si_shader_io_get_unique_index(semantic_name, semantic_index); /* GFX9 has the ESGS ring in LDS. */ - if (ctx->screen->b.chip_class >= GFX9) { + if (ctx->screen->info.chip_class >= GFX9) { unsigned index = reg->Dimension.Index; switch (index / 2) { @@ -2009,7 +2009,7 @@ static LLVMValueRef fetch_constant( * s_buffer_load_dword (that we have to prevent) is when we use use * a literal offset where we don't need bounds checking. */ - if (ctx->screen->b.chip_class == SI && + if (ctx->screen->info.chip_class == SI && HAVE_LLVM < 0x0600 && !reg->Register.Indirect) { addr = LLVMBuildLShr(ctx->ac.builder, addr, LLVMConstInt(ctx->i32, 2, 0), ""); @@ -2658,7 +2658,7 @@ static void si_llvm_export_vs(struct si_shader_context *ctx, pos_args[1].out[1] = ac_to_float(&ctx->ac, edgeflag_value); } - if (ctx->screen->b.chip_class >= GFX9) { + if (ctx->screen->info.chip_class >= GFX9) { /* GFX9 has the layer in out.z[10:0] and the viewport * index in out.z[19:16]. */ @@ -2870,7 +2870,7 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base, /* Store the dynamic HS control word. */ offset = 0; - if (ctx->screen->b.chip_class <= VI) { + if (ctx->screen->info.chip_class <= VI) { ac_build_buffer_store_dword(&ctx->ac, buffer, LLVMConstInt(ctx->i32, 0x80000000, 0), 1, ctx->i32_0, tf_base, @@ -2977,7 +2977,7 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base) invocation_id = unpack_param(ctx, ctx->param_tcs_rel_ids, 8, 5); tf_lds_offset = get_tcs_out_current_patch_data_offset(ctx); - if (ctx->screen->b.chip_class >= GFX9) { + if (ctx->screen->info.chip_class >= GFX9) { LLVMBasicBlockRef blocks[2] = { LLVMGetInsertBlock(builder), ctx->merged_wrap_if_state.entry_block @@ -3003,7 +3003,7 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base) LLVMValueRef ret = ctx->return_value; unsigned vgpr; - if (ctx->screen->b.chip_class >= GFX9) { + if (ctx->screen->info.chip_class >= GFX9) { ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_layout, 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT); ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_addr_base64k, @@ -3180,7 +3180,7 @@ static void si_llvm_emit_ls_epilogue(struct lp_build_tgsi_context *bld_base) } } - if (ctx->screen->b.chip_class >= GFX9) + if (ctx->screen->info.chip_class >= GFX9) si_set_ls_return_value_for_tcs(ctx); } @@ -3195,7 +3195,7 @@ static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context *bld_base) unsigned chan; int i; - if (ctx->screen->b.chip_class >= GFX9 && info->num_outputs) { + if (ctx->screen->info.chip_class >= GFX9 && info->num_outputs) { unsigned itemsize_dw = es->selector->esgs_itemsize / 4; LLVMValueRef vertex_idx = ac_get_thread_id(&ctx->ac); LLVMValueRef wave_idx = unpack_param(ctx, ctx->param_merged_wave_info, 24, 4); @@ -3222,7 +3222,7 @@ static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context *bld_base) out_val = ac_to_integer(&ctx->ac, out_val); /* GFX9 has the ESGS ring in LDS. */ - if (ctx->screen->b.chip_class >= GFX9) { + if (ctx->screen->info.chip_class >= GFX9) { lds_store(bld_base, param * 4 + chan, lds_base, out_val); continue; } @@ -3235,13 +3235,13 @@ static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context *bld_base) } } - if (ctx->screen->b.chip_class >= GFX9) + if (ctx->screen->info.chip_class >= GFX9) si_set_es_return_value_for_gs(ctx); } static LLVMValueRef si_get_gs_wave_id(struct si_shader_context *ctx) { - if (ctx->screen->b.chip_class >= GFX9) + if (ctx->screen->info.chip_class >= GFX9) return unpack_param(ctx, ctx->param_merged_wave_info, 16, 8); else return LLVMGetParam(ctx->main_fn, ctx->param_gs_wave_id); @@ -3254,7 +3254,7 @@ static void si_llvm_emit_gs_epilogue(struct lp_build_tgsi_context *bld_base) ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE, si_get_gs_wave_id(ctx)); - if (ctx->screen->b.chip_class >= GFX9) + if (ctx->screen->info.chip_class >= GFX9) lp_build_endif(&ctx->merged_wrap_if_state); } @@ -3436,9 +3436,9 @@ static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base, /* SI (except OLAND and HAINAN) has a bug that it only looks * at the X writemask component. */ - if (ctx->screen->b.chip_class == SI && - ctx->screen->b.family != CHIP_OLAND && - ctx->screen->b.family != CHIP_HAINAN) + if (ctx->screen->info.chip_class == SI && + ctx->screen->info.family != CHIP_OLAND && + ctx->screen->info.family != CHIP_HAINAN) mask |= 0x1; /* Specify which components to enable */ @@ -4152,7 +4152,7 @@ static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action, * The real barrier instruction isn’t needed, because an entire patch * always fits into a single wave. */ - if (ctx->screen->b.chip_class == SI && + if (ctx->screen->info.chip_class == SI && ctx->type == PIPE_SHADER_TESS_CTRL) { si_emit_waitcnt(ctx, LGKM_CNT & VM_CNT); return; @@ -4211,7 +4211,7 @@ static void si_create_function(struct si_shader_context *ctx, "no-signed-zeros-fp-math", "true"); - if (ctx->screen->b.debug_flags & DBG(UNSAFE_MATH)) { + if (ctx->screen->debug_flags & DBG(UNSAFE_MATH)) { /* These were copied from some LLVM test. */ LLVMAddTargetDependentFunctionAttr(ctx->main_fn, "less-precise-fpmad", @@ -4258,10 +4258,10 @@ static unsigned si_get_max_workgroup_size(const struct si_shader *shader) case PIPE_SHADER_TESS_CTRL: /* Return this so that LLVM doesn't remove s_barrier * instructions on chips where we use s_barrier. */ - return shader->selector->screen->b.chip_class >= CIK ? 128 : 64; + return shader->selector->screen->info.chip_class >= CIK ? 128 : 64; case PIPE_SHADER_GEOMETRY: - return shader->selector->screen->b.chip_class >= GFX9 ? 128 : 64; + return shader->selector->screen->info.chip_class >= GFX9 ? 128 : 64; case PIPE_SHADER_COMPUTE: break; /* see below */ @@ -4387,7 +4387,7 @@ static void create_function(struct si_shader_context *ctx) si_init_function_info(&fninfo); /* Set MERGED shaders. */ - if (ctx->screen->b.chip_class >= GFX9) { + if (ctx->screen->info.chip_class >= GFX9) { if (shader->key.as_ls || type == PIPE_SHADER_TESS_CTRL) type = SI_SHADER_MERGED_VERTEX_TESSCTRL; /* LS or HS */ else if (shader->key.as_es || type == PIPE_SHADER_GEOMETRY) @@ -4754,7 +4754,7 @@ static void preload_ring_buffers(struct si_shader_context *ctx) LLVMValueRef buf_ptr = LLVMGetParam(ctx->main_fn, ctx->param_rw_buffers); - if (ctx->screen->b.chip_class <= VI && + if (ctx->screen->info.chip_class <= VI && (ctx->shader->key.as_es || ctx->type == PIPE_SHADER_GEOMETRY)) { unsigned ring = ctx->type == PIPE_SHADER_GEOMETRY ? SI_GS_RING_ESGS @@ -5017,14 +5017,14 @@ int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader) r600_resource_reference(&shader->bo, NULL); shader->bo = (struct r600_resource*) - pipe_buffer_create(&sscreen->b.b, 0, + pipe_buffer_create(&sscreen->b, 0, PIPE_USAGE_IMMUTABLE, align(bo_size, SI_CPDMA_ALIGNMENT)); if (!shader->bo) return -ENOMEM; /* Upload. */ - ptr = sscreen->b.ws->buffer_map(shader->bo->buf, NULL, + ptr = sscreen->ws->buffer_map(shader->bo->buf, NULL, PIPE_TRANSFER_READ_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED); @@ -5051,7 +5051,7 @@ int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader) else if (mainb->rodata_size > 0) memcpy(ptr, mainb->rodata, mainb->rodata_size); - sscreen->b.ws->buffer_unmap(shader->bo->buf); + sscreen->ws->buffer_unmap(shader->bo->buf); return 0; } @@ -5113,11 +5113,11 @@ static void si_shader_dump_stats(struct si_screen *sscreen, const struct si_shader_config *conf = &shader->config; unsigned num_inputs = shader->selector ? shader->selector->info.num_inputs : 0; unsigned code_size = si_get_shader_binary_size(shader); - unsigned lds_increment = sscreen->b.chip_class >= CIK ? 512 : 256; + unsigned lds_increment = sscreen->info.chip_class >= CIK ? 512 : 256; unsigned lds_per_wave = 0; unsigned max_simd_waves; - switch (sscreen->b.family) { + switch (sscreen->info.family) { /* These always have 8 waves: */ case CHIP_POLARIS10: case CHIP_POLARIS11: @@ -5156,7 +5156,7 @@ static void si_shader_dump_stats(struct si_screen *sscreen, /* Compute the per-SIMD wave counts. */ if (conf->num_sgprs) { - if (sscreen->b.chip_class >= VI) + if (sscreen->info.chip_class >= VI) max_simd_waves = MIN2(max_simd_waves, 800 / conf->num_sgprs); else max_simd_waves = MIN2(max_simd_waves, 512 / conf->num_sgprs); @@ -5261,7 +5261,7 @@ void si_shader_dump(struct si_screen *sscreen, const struct si_shader *shader, if (!check_debug_option || (si_can_dump_shader(sscreen, processor) && - !(sscreen->b.debug_flags & DBG(NO_ASM)))) { + !(sscreen->debug_flags & DBG(NO_ASM)))) { fprintf(file, "\n%s:\n", si_get_shader_name(shader, processor)); if (shader->prolog) @@ -5296,12 +5296,12 @@ static int si_compile_llvm(struct si_screen *sscreen, const char *name) { int r = 0; - unsigned count = p_atomic_inc_return(&sscreen->b.num_compilations); + unsigned count = p_atomic_inc_return(&sscreen->num_compilations); if (si_can_dump_shader(sscreen, processor)) { fprintf(stderr, "radeonsi: Compiling shader %d\n", count); - if (!(sscreen->b.debug_flags & (DBG(NO_IR) | DBG(PREOPT_IR)))) { + if (!(sscreen->debug_flags & (DBG(NO_IR) | DBG(PREOPT_IR)))) { fprintf(stderr, "%s LLVM IR:\n\n", name); ac_dump_module(mod); fprintf(stderr, "\n"); @@ -5551,7 +5551,7 @@ static void si_dump_shader_key(unsigned processor, const struct si_shader *shade break; case PIPE_SHADER_TESS_CTRL: - if (shader->selector->screen->b.chip_class >= GFX9) { + if (shader->selector->screen->info.chip_class >= GFX9) { si_dump_shader_key_vs(key, &key->part.tcs.ls_prolog, "part.tcs.ls_prolog", f); } @@ -5569,7 +5569,7 @@ static void si_dump_shader_key(unsigned processor, const struct si_shader *shade if (shader->is_gs_copy_shader) break; - if (shader->selector->screen->b.chip_class >= GFX9 && + if (shader->selector->screen->info.chip_class >= GFX9 && key->part.gs.es->type == PIPE_SHADER_VERTEX) { si_dump_shader_key_vs(key, &key->part.gs.vs_prolog, "part.gs.vs_prolog", f); @@ -5794,7 +5794,7 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx, * For monolithic merged shaders, the first shader is wrapped in an * if-block together with its prolog in si_build_wrapper_function. */ - if (ctx->screen->b.chip_class >= GFX9) { + if (ctx->screen->info.chip_class >= GFX9) { if (!is_monolithic && sel->info.num_instructions > 1 && /* not empty shader */ (shader->key.as_es || shader->key.as_ls) && @@ -6068,7 +6068,7 @@ static void si_build_gs_prolog_function(struct si_shader_context *ctx, si_init_function_info(&fninfo); - if (ctx->screen->b.chip_class >= GFX9) { + if (ctx->screen->info.chip_class >= GFX9) { num_sgprs = 8 + GFX9_GS_NUM_USER_SGPR; num_vgprs = 5; /* ES inputs are not needed by GS */ } else { @@ -6095,7 +6095,7 @@ static void si_build_gs_prolog_function(struct si_shader_context *ctx, * with registers here. The main shader part will set the correct EXEC * mask. */ - if (ctx->screen->b.chip_class >= GFX9 && !key->gs_prolog.is_monolithic) + if (ctx->screen->info.chip_class >= GFX9 && !key->gs_prolog.is_monolithic) si_init_exec_full_mask(ctx); /* Copy inputs to outputs. This should be no-op, as the registers match, @@ -6130,7 +6130,7 @@ static void si_build_gs_prolog_function(struct si_shader_context *ctx, LLVMValueRef vtx_in[6], vtx_out[6]; LLVMValueRef prim_id, rotate; - if (ctx->screen->b.chip_class >= GFX9) { + if (ctx->screen->info.chip_class >= GFX9) { for (unsigned i = 0; i < 3; i++) { vtx_in[i*2] = unpack_param(ctx, gfx9_vtx_params[i], 0, 16); vtx_in[i*2+1] = unpack_param(ctx, gfx9_vtx_params[i], 16, 16); @@ -6150,7 +6150,7 @@ static void si_build_gs_prolog_function(struct si_shader_context *ctx, vtx_out[i] = LLVMBuildSelect(builder, rotate, rotated, base, ""); } - if (ctx->screen->b.chip_class >= GFX9) { + if (ctx->screen->info.chip_class >= GFX9) { for (unsigned i = 0; i < 3; i++) { LLVMValueRef hi, out; @@ -6417,7 +6417,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, /* Dump TGSI code before doing TGSI->LLVM conversion in case the * conversion fails. */ if (si_can_dump_shader(sscreen, sel->info.processor) && - !(sscreen->b.debug_flags & DBG(NO_TGSI))) { + !(sscreen->debug_flags & DBG(NO_TGSI))) { if (sel->tokens) tgsi_dump(sel->tokens, 0); else @@ -6458,7 +6458,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, si_build_wrapper_function(&ctx, parts + !need_prolog, 1 + need_prolog, need_prolog, 0); } else if (is_monolithic && ctx.type == PIPE_SHADER_TESS_CTRL) { - if (sscreen->b.chip_class >= GFX9) { + if (sscreen->info.chip_class >= GFX9) { struct si_shader_selector *ls = shader->key.part.tcs.ls; LLVMValueRef parts[4]; bool vs_needs_prolog = @@ -6523,7 +6523,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, si_build_wrapper_function(&ctx, parts, 2, 0, 0); } } else if (is_monolithic && ctx.type == PIPE_SHADER_GEOMETRY) { - if (ctx.screen->b.chip_class >= GFX9) { + if (ctx.screen->info.chip_class >= GFX9) { struct si_shader_selector *es = shader->key.part.gs.es; LLVMValueRef es_prolog = NULL; LLVMValueRef es_main = NULL; @@ -6643,7 +6643,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, if (sel->type == PIPE_SHADER_COMPUTE) { unsigned wave_size = 64; unsigned max_vgprs = 256; - unsigned max_sgprs = sscreen->b.chip_class >= VI ? 800 : 512; + unsigned max_sgprs = sscreen->info.chip_class >= VI ? 800 : 512; unsigned max_sgprs_per_wave = 128; unsigned max_block_threads = si_get_max_workgroup_size(shader); unsigned min_waves_per_cu = DIV_ROUND_UP(max_block_threads, wave_size); @@ -6814,7 +6814,7 @@ static LLVMValueRef si_prolog_get_rw_buffers(struct si_shader_context *ctx) { LLVMValueRef ptr[2], list; bool is_merged_shader = - ctx->screen->b.chip_class >= GFX9 && + ctx->screen->info.chip_class >= GFX9 && (ctx->type == PIPE_SHADER_TESS_CTRL || ctx->type == PIPE_SHADER_GEOMETRY || ctx->shader->key.as_ls || ctx->shader->key.as_es); @@ -7026,7 +7026,7 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx, si_init_function_info(&fninfo); - if (ctx->screen->b.chip_class >= GFX9) { + if (ctx->screen->info.chip_class >= GFX9) { add_arg(&fninfo, ARG_SGPR, ctx->i64); ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); /* wave info */ @@ -7075,7 +7075,7 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx, /* Create the function. */ si_create_function(ctx, "tcs_epilog", NULL, 0, &fninfo, - ctx->screen->b.chip_class >= CIK ? 128 : 64); + ctx->screen->info.chip_class >= CIK ? 128 : 64); ac_declare_lds_as_pointer(&ctx->ac); func = ctx->main_fn; @@ -7100,7 +7100,7 @@ static bool si_shader_select_tcs_parts(struct si_screen *sscreen, struct si_shader *shader, struct pipe_debug_callback *debug) { - if (sscreen->b.chip_class >= GFX9) { + if (sscreen->info.chip_class >= GFX9) { struct si_shader *ls_main_part = shader->key.part.tcs.ls->main_shader_part_ls; @@ -7132,7 +7132,7 @@ static bool si_shader_select_gs_parts(struct si_screen *sscreen, struct si_shader *shader, struct pipe_debug_callback *debug) { - if (sscreen->b.chip_class >= GFX9) { + if (sscreen->info.chip_class >= GFX9) { struct si_shader *es_main_part = shader->key.part.gs.es->main_shader_part_es; @@ -7647,9 +7647,9 @@ void si_multiwave_lds_size_workaround(struct si_screen *sscreen, * Make sure we have at least 4k of LDS in use to avoid the bug. * It applies to workgroup sizes of more than one wavefront. */ - if (sscreen->b.family == CHIP_BONAIRE || - sscreen->b.family == CHIP_KABINI || - sscreen->b.family == CHIP_MULLINS) + if (sscreen->info.family == CHIP_BONAIRE || + sscreen->info.family == CHIP_KABINI || + sscreen->info.family == CHIP_MULLINS) *lds_size = MAX2(*lds_size, 8); } diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c index 5552cc8c8bb..35ada5f93c8 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c @@ -51,7 +51,7 @@ static LLVMValueRef get_buffer_size( LLVMBuildExtractElement(builder, descriptor, LLVMConstInt(ctx->i32, 2, 0), ""); - if (ctx->screen->b.chip_class == VI) { + if (ctx->screen->info.chip_class == VI) { /* On VI, the descriptor contains the size in bytes, * but TXQ must return the size in elements. * The stride is always non-zero for resources using TXQ. @@ -114,7 +114,7 @@ static bool tgsi_is_array_image(unsigned target) static LLVMValueRef force_dcc_off(struct si_shader_context *ctx, LLVMValueRef rsrc) { - if (ctx->screen->b.chip_class <= CIK) { + if (ctx->screen->info.chip_class <= CIK) { return rsrc; } else { LLVMValueRef i32_6 = LLVMConstInt(ctx->i32, 6, 0); @@ -235,7 +235,7 @@ static LLVMValueRef image_fetch_coords( coords[chan] = tmp; } - if (ctx->screen->b.chip_class >= GFX9) { + if (ctx->screen->info.chip_class >= GFX9) { /* 1D textures are allocated and used as 2D on GFX9. */ if (target == TGSI_TEXTURE_1D) { coords[1] = ctx->i32_0; @@ -647,7 +647,7 @@ static void store_fetch_args( * The only way to get unaligned stores in radeonsi is through * shader images. */ - bool force_glc = ctx->screen->b.chip_class == SI; + bool force_glc = ctx->screen->info.chip_class == SI; image_fetch_rsrc(bld_base, &memory, true, target, &rsrc); coords = image_fetch_coords(bld_base, inst, 0, rsrc); @@ -1014,7 +1014,7 @@ static LLVMValueRef fix_resinfo(struct si_shader_context *ctx, LLVMBuilderRef builder = ctx->ac.builder; /* 1D textures are allocated and used as 2D on GFX9. */ - if (ctx->screen->b.chip_class >= GFX9 && + if (ctx->screen->info.chip_class >= GFX9 && (target == TGSI_TEXTURE_1D_ARRAY || target == TGSI_TEXTURE_SHADOW1D_ARRAY)) { LLVMValueRef layers = @@ -1153,7 +1153,7 @@ static LLVMValueRef sici_fix_sampler_aniso(struct si_shader_context *ctx, { LLVMValueRef img7, samp0; - if (ctx->screen->b.chip_class >= VI) + if (ctx->screen->info.chip_class >= VI) return samp; img7 = LLVMBuildExtractElement(ctx->ac.builder, res, @@ -1374,7 +1374,7 @@ static void tex_fetch_args( * so the depth comparison value isn't clamped for Z16 and * Z24 anymore. Do it manually here. */ - if (ctx->screen->b.chip_class >= VI) { + if (ctx->screen->info.chip_class >= VI) { LLVMValueRef upgraded; LLVMValueRef clamped; upgraded = LLVMBuildExtractElement(ctx->ac.builder, samp_ptr, @@ -1425,7 +1425,7 @@ static void tex_fetch_args( num_src_deriv_channels = 1; /* 1D textures are allocated and used as 2D on GFX9. */ - if (ctx->screen->b.chip_class >= GFX9) { + if (ctx->screen->info.chip_class >= GFX9) { num_dst_deriv_channels = 2; num_deriv_channels = 2; } else { @@ -1463,7 +1463,7 @@ static void tex_fetch_args( } else if (tgsi_is_array_sampler(target) && opcode != TGSI_OPCODE_TXF && opcode != TGSI_OPCODE_TXF_LZ && - ctx->screen->b.chip_class <= VI) { + ctx->screen->info.chip_class <= VI) { unsigned array_coord = target == TGSI_TEXTURE_1D_ARRAY ? 1 : 2; coords[array_coord] = ac_build_intrinsic(&ctx->ac, "llvm.rint.f32", ctx->f32, @@ -1482,7 +1482,7 @@ static void tex_fetch_args( address[count++] = coords[2]; /* 1D textures are allocated and used as 2D on GFX9. */ - if (ctx->screen->b.chip_class >= GFX9) { + if (ctx->screen->info.chip_class >= GFX9) { LLVMValueRef filler; /* Use 0.5, so that we don't sample the border color. */ @@ -1900,7 +1900,7 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, /* The hardware needs special lowering for Gather4 with integer formats. */ LLVMValueRef gather4_int_result_workaround = NULL; - if (ctx->screen->b.chip_class <= VI && + if (ctx->screen->info.chip_class <= VI && opcode == TGSI_OPCODE_TG4) { assert(inst->Texture.ReturnType != TGSI_RETURN_TYPE_UNKNOWN); diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c index 9c1ea91f06e..e965fa7c415 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c @@ -1156,7 +1156,7 @@ void si_llvm_context_init(struct si_shader_context *ctx, LLVMDisposeTargetData(data_layout); LLVMDisposeMessage(data_layout_str); - bool unsafe_fpmath = (sscreen->b.debug_flags & DBG(UNSAFE_MATH)) != 0; + bool unsafe_fpmath = (sscreen->debug_flags & DBG(UNSAFE_MATH)) != 0; enum lp_float_mode float_mode = unsafe_fpmath ? LP_FLOAT_MODE_UNSAFE_FP_MATH : LP_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH; @@ -1164,7 +1164,7 @@ void si_llvm_context_init(struct si_shader_context *ctx, ctx->gallivm.builder = lp_create_builder(ctx->gallivm.context, float_mode); - ac_llvm_context_init(&ctx->ac, ctx->gallivm.context, sscreen->b.chip_class); + ac_llvm_context_init(&ctx->ac, ctx->gallivm.context, sscreen->info.chip_class); ctx->ac.module = ctx->gallivm.module; ctx->ac.builder = ctx->gallivm.builder; @@ -1319,7 +1319,7 @@ void si_llvm_create_func(struct si_shader_context *ctx, real_shader_type = ctx->type; /* LS is merged into HS (TCS), and ES is merged into GS. */ - if (ctx->screen->b.chip_class >= GFX9) { + if (ctx->screen->info.chip_class >= GFX9) { if (ctx->shader->key.as_ls) real_shader_type = PIPE_SHADER_TESS_CTRL; else if (ctx->shader->key.as_es) @@ -1358,7 +1358,7 @@ void si_llvm_optimize_module(struct si_shader_context *ctx) LLVMTargetLibraryInfoRef target_library_info; /* Dump LLVM IR before any optimization passes */ - if (ctx->screen->b.debug_flags & DBG(PREOPT_IR) && + if (ctx->screen->debug_flags & DBG(PREOPT_IR) && si_can_dump_shader(ctx->screen, ctx->type)) LLVMDumpModule(ctx->gallivm.module); diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index b68dea9271a..5c4c38368fe 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -138,7 +138,7 @@ static void si_emit_cb_render_state(struct si_context *sctx, struct r600_atom *a } /* RB+ register settings. */ - if (sctx->screen->b.rbplus_allowed) { + if (sctx->screen->rbplus_allowed) { unsigned spi_shader_col_format = sctx->ps_shader.cso ? sctx->ps_shader.current->key.part.ps.epilog.spi_shader_col_format : 0; @@ -266,7 +266,7 @@ static void si_emit_cb_render_state(struct si_context *sctx, struct r600_atom *a radeon_emit(cs, sx_ps_downconvert); /* R_028754_SX_PS_DOWNCONVERT */ radeon_emit(cs, sx_blend_opt_epsilon); /* R_028758_SX_BLEND_OPT_EPSILON */ radeon_emit(cs, sx_blend_opt_control); /* R_02875C_SX_BLEND_OPT_CONTROL */ - } else if (sctx->screen->b.has_rbplus) { + } else if (sctx->screen->has_rbplus) { radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 3); radeon_emit(cs, 0); /* R_028754_SX_PS_DOWNCONVERT */ radeon_emit(cs, 0); /* R_028758_SX_BLEND_OPT_EPSILON */ @@ -629,7 +629,7 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx, color_control |= S_028808_MODE(V_028808_CB_DISABLE); } - if (sctx->screen->b.has_rbplus) { + if (sctx->screen->has_rbplus) { /* Disable RB+ blend optimizations for dual source blending. * Vulkan does this. */ @@ -677,7 +677,7 @@ static void si_bind_blend_state(struct pipe_context *ctx, void *state) old_blend->dual_src_blend != blend->dual_src_blend || (old_blend->blend_enable_4bit != blend->blend_enable_4bit && sctx->framebuffer.nr_samples >= 2 && - sctx->screen->b.dcc_msaa_allowed)) + sctx->screen->dcc_msaa_allowed)) si_mark_atom_dirty(sctx, &sctx->cb_render_state); if (!old_blend || @@ -930,7 +930,7 @@ static void *si_create_rs_state(struct pipe_context *ctx, state->poly_smooth || state->line_smooth) | S_028A48_VPORT_SCISSOR_ENABLE(1) | - S_028A48_ALTERNATE_RBS_PER_TILE(sscreen->b.chip_class >= GFX9)); + S_028A48_ALTERNATE_RBS_PER_TILE(sscreen->info.chip_class >= GFX9)); si_pm4_set_reg(pm4, R_028BE4_PA_SU_VTX_CNTL, S_028BE4_PIX_CENTER(state->half_pixel_center) | @@ -1441,8 +1441,8 @@ static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *s if (!rs || !rs->multisample_enable) db_shader_control &= C_02880C_MASK_EXPORT_ENABLE; - if (sctx->screen->b.has_rbplus && - !sctx->screen->b.rbplus_allowed) + if (sctx->screen->has_rbplus && + !sctx->screen->rbplus_allowed) db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1); radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL, @@ -1603,9 +1603,9 @@ static uint32_t si_translate_texformat(struct pipe_screen *screen, int first_non_void) { struct si_screen *sscreen = (struct si_screen*)screen; - bool enable_compressed_formats = (sscreen->b.info.drm_major == 2 && - sscreen->b.info.drm_minor >= 31) || - sscreen->b.info.drm_major == 3; + bool enable_compressed_formats = (sscreen->info.drm_major == 2 && + sscreen->info.drm_minor >= 31) || + sscreen->info.drm_major == 3; bool uniform = true; int i; @@ -1674,8 +1674,8 @@ static uint32_t si_translate_texformat(struct pipe_screen *screen, } if (desc->layout == UTIL_FORMAT_LAYOUT_ETC && - (sscreen->b.family == CHIP_STONEY || - sscreen->b.chip_class >= GFX9)) { + (sscreen->info.family == CHIP_STONEY || + sscreen->info.chip_class >= GFX9)) { switch (format) { case PIPE_FORMAT_ETC1_RGB8: case PIPE_FORMAT_ETC2_RGB8: @@ -1930,7 +1930,7 @@ static unsigned si_tex_dim(struct si_screen *sscreen, struct r600_texture *rtex, /* GFX9 allocates 1D textures as 2D. */ if ((res_target == PIPE_TEXTURE_1D || res_target == PIPE_TEXTURE_1D_ARRAY) && - sscreen->b.chip_class >= GFX9 && + sscreen->info.chip_class >= GFX9 && rtex->surface.u.gfx9.resource_type == RADEON_RESOURCE_2D) { if (res_target == PIPE_TEXTURE_1D) res_target = PIPE_TEXTURE_2D; @@ -2580,7 +2580,7 @@ static void si_init_depth_surface(struct si_context *sctx, surf->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!rtex->tc_compatible_htile); if (sctx->b.chip_class >= CIK) { - struct radeon_info *info = &sctx->screen->b.info; + struct radeon_info *info = &sctx->screen->info; unsigned index = rtex->surface.u.legacy.tiling_index[level]; unsigned stencil_index = rtex->surface.u.legacy.stencil_tiling_index[level]; unsigned macro_index = rtex->surface.u.legacy.macro_tile_index; @@ -3310,7 +3310,7 @@ static bool si_out_of_order_rasterization(struct si_context *sctx) static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom *atom) { struct radeon_winsys_cs *cs = sctx->b.gfx.cs; - unsigned num_tile_pipes = sctx->screen->b.info.num_tile_pipes; + unsigned num_tile_pipes = sctx->screen->info.num_tile_pipes; /* 33% faster rendering to linear color buffers */ bool dst_is_linear = sctx->framebuffer.any_dst_linear; bool out_of_order_rast = si_out_of_order_rasterization(sctx); @@ -3437,8 +3437,8 @@ si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf, desc = util_format_description(format); first_non_void = util_format_get_first_non_void_channel(format); stride = desc->block.bits / 8; - num_format = si_translate_buffer_numformat(&screen->b.b, desc, first_non_void); - data_format = si_translate_buffer_dataformat(&screen->b.b, desc, first_non_void); + num_format = si_translate_buffer_numformat(&screen->b, desc, first_non_void); + data_format = si_translate_buffer_dataformat(&screen->b, desc, first_non_void); num_records = size / stride; num_records = MIN2(num_records, (buf->b.b.width0 - offset) / stride); @@ -3466,7 +3466,7 @@ si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf, * - For VMEM and inst.IDXEN == 0 or STRIDE == 0, it's in byte units. * - For VMEM and inst.IDXEN == 1 and STRIDE != 0, it's in units of STRIDE. */ - if (screen->b.chip_class >= GFX9) + if (screen->info.chip_class >= GFX9) /* When vindex == 0, LLVM sets IDXEN = 0, thus changing units * from STRIDE to bytes. This works around it by setting * NUM_RECORDS to at least the size of one element, so that @@ -3476,7 +3476,7 @@ si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf, * IDXEN is enforced? */ num_records = num_records ? MAX2(num_records, stride) : 0; - else if (screen->b.chip_class == VI) + else if (screen->info.chip_class == VI) num_records *= stride; state[4] = 0; @@ -3638,13 +3638,13 @@ si_make_texture_descriptor(struct si_screen *screen, } } - data_format = si_translate_texformat(&screen->b.b, pipe_format, desc, first_non_void); + data_format = si_translate_texformat(&screen->b, pipe_format, desc, first_non_void); if (data_format == ~0) { data_format = 0; } /* S8 with Z32 HTILE needs a special format. */ - if (screen->b.chip_class >= GFX9 && + if (screen->info.chip_class >= GFX9 && pipe_format == PIPE_FORMAT_S8_UINT && tex->tc_compatible_htile) data_format = V_008F14_IMG_DATA_FORMAT_S8_32; @@ -3652,7 +3652,7 @@ si_make_texture_descriptor(struct si_screen *screen, if (!sampler && (res->target == PIPE_TEXTURE_CUBE || res->target == PIPE_TEXTURE_CUBE_ARRAY || - (screen->b.chip_class <= VI && + (screen->info.chip_class <= VI && res->target == PIPE_TEXTURE_3D))) { /* For the purpose of shader images, treat cube maps and 3D * textures as 2D arrays. For 3D textures, the address @@ -3697,7 +3697,7 @@ si_make_texture_descriptor(struct si_screen *screen, state[6] = 0; state[7] = 0; - if (screen->b.chip_class >= GFX9) { + if (screen->info.chip_class >= GFX9) { unsigned bc_swizzle = gfx9_border_color_swizzle(desc->swizzle); /* Depth is the the last accessible layer on Gfx9. @@ -3726,7 +3726,7 @@ si_make_texture_descriptor(struct si_screen *screen, /* The last dword is unused by hw. The shader uses it to clear * bits in the first dword of sampler state. */ - if (screen->b.chip_class <= CIK && res->nr_samples <= 1) { + if (screen->info.chip_class <= CIK && res->nr_samples <= 1) { if (first_level == last_level) state[7] = C_008F30_MAX_ANISO_RATIO; else @@ -3740,7 +3740,7 @@ si_make_texture_descriptor(struct si_screen *screen, va = tex->resource.gpu_address + tex->fmask.offset; - if (screen->b.chip_class >= GFX9) { + if (screen->info.chip_class >= GFX9) { data_format = V_008F14_IMG_DATA_FORMAT_FMASK; switch (res->nr_samples) { case 2: @@ -3788,7 +3788,7 @@ si_make_texture_descriptor(struct si_screen *screen, fmask_state[6] = 0; fmask_state[7] = 0; - if (screen->b.chip_class >= GFX9) { + if (screen->info.chip_class >= GFX9) { fmask_state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.fmask.swizzle_mode); fmask_state[4] |= S_008F20_DEPTH(last_layer) | S_008F20_PITCH_GFX9(tex->surface.u.gfx9.fmask.epitch); @@ -4085,9 +4085,9 @@ static void *si_create_sampler_state(struct pipe_context *ctx, const struct pipe_sampler_state *state) { struct si_context *sctx = (struct si_context *)ctx; - struct r600_common_screen *rscreen = sctx->b.screen; + struct si_screen *sscreen = sctx->screen; struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state); - unsigned max_aniso = rscreen->force_aniso >= 0 ? rscreen->force_aniso + unsigned max_aniso = sscreen->force_aniso >= 0 ? sscreen->force_aniso : state->max_anisotropy; unsigned max_aniso_ratio = si_tex_aniso_filter(max_aniso); union pipe_color_union clamped_border_color; @@ -4248,8 +4248,8 @@ static void *si_create_vertex_elements(struct pipe_context *ctx, * chips are VI and older except Stoney (GFX8.1). */ if (data_format == V_008F0C_BUF_DATA_FORMAT_2_10_10_10 && - sscreen->b.chip_class <= VI && - sscreen->b.family != CHIP_STONEY) { + sscreen->info.chip_class <= VI && + sscreen->info.family != CHIP_STONEY) { if (num_format == V_008F0C_BUF_NUM_FORMAT_SNORM) { v->fix_fetch[i] = SI_FIX_FETCH_A2_SNORM; } else if (num_format == V_008F0C_BUF_NUM_FORMAT_SSCALED) { @@ -4475,7 +4475,7 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags) /* Indices are read through TC L2 since VI. * L1 isn't used. */ - if (sctx->screen->b.chip_class <= CIK) + if (sctx->screen->info.chip_class <= CIK) sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; } @@ -4492,7 +4492,7 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags) } /* Indirect buffers use TC L2 on GFX9, but not older hw. */ - if (sctx->screen->b.chip_class <= VI && + if (sctx->screen->info.chip_class <= VI && flags & PIPE_BARRIER_INDIRECT_BUFFER) sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; } @@ -4589,7 +4589,7 @@ void si_init_state_functions(struct si_context *sctx) void si_init_screen_state_functions(struct si_screen *sscreen) { - sscreen->b.b.is_format_supported = si_is_format_supported; + sscreen->b.is_format_supported = si_is_format_supported; } static void si_set_grbm_gfx_index(struct si_context *sctx, @@ -4603,7 +4603,7 @@ static void si_set_grbm_gfx_index(struct si_context *sctx, static void si_set_grbm_gfx_index_se(struct si_context *sctx, struct si_pm4_state *pm4, unsigned se) { - assert(se == ~0 || se < sctx->screen->b.info.max_se); + assert(se == ~0 || se < sctx->screen->info.max_se); si_set_grbm_gfx_index(sctx, pm4, (se == ~0 ? S_030800_SE_BROADCAST_WRITES(1) : S_030800_SE_INDEX(se)) | @@ -4617,10 +4617,10 @@ si_write_harvested_raster_configs(struct si_context *sctx, unsigned raster_config, unsigned raster_config_1) { - unsigned sh_per_se = MAX2(sctx->screen->b.info.max_sh_per_se, 1); - unsigned num_se = MAX2(sctx->screen->b.info.max_se, 1); - unsigned rb_mask = sctx->screen->b.info.enabled_rb_mask; - unsigned num_rb = MIN2(sctx->screen->b.info.num_render_backends, 16); + unsigned sh_per_se = MAX2(sctx->screen->info.max_sh_per_se, 1); + unsigned num_se = MAX2(sctx->screen->info.max_se, 1); + unsigned rb_mask = sctx->screen->info.enabled_rb_mask; + unsigned num_rb = MIN2(sctx->screen->info.num_render_backends, 16); unsigned rb_per_pkr = MIN2(num_rb / num_se / sh_per_se, 2); unsigned rb_per_se = num_rb / num_se; unsigned se_mask[4]; @@ -4739,8 +4739,8 @@ si_write_harvested_raster_configs(struct si_context *sctx, static void si_set_raster_config(struct si_context *sctx, struct si_pm4_state *pm4) { struct si_screen *sscreen = sctx->screen; - unsigned num_rb = MIN2(sctx->screen->b.info.num_render_backends, 16); - unsigned rb_mask = sctx->screen->b.info.enabled_rb_mask; + unsigned num_rb = MIN2(sctx->screen->info.num_render_backends, 16); + unsigned rb_mask = sctx->screen->info.enabled_rb_mask; unsigned raster_config, raster_config_1; switch (sctx->b.family) { @@ -4770,7 +4770,7 @@ static void si_set_raster_config(struct si_context *sctx, struct si_pm4_state *p raster_config_1 = 0x0000002e; break; case CHIP_FIJI: - if (sscreen->b.info.cik_macrotile_mode_array[0] == 0x000000e8) { + if (sscreen->info.cik_macrotile_mode_array[0] == 0x000000e8) { /* old kernels with old tiling config */ raster_config = 0x16000012; raster_config_1 = 0x0000002a; @@ -4843,7 +4843,7 @@ static void si_init_config(struct si_context *sctx) struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); /* Only SI can disable CLEAR_STATE for now. */ - assert(has_clear_state || sscreen->b.chip_class == SI); + assert(has_clear_state || sscreen->info.chip_class == SI); if (!pm4) return; @@ -4963,9 +4963,9 @@ static void si_init_config(struct si_context *sctx) S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F)); /* Compute LATE_ALLOC_VS.LIMIT. */ - unsigned num_cu_per_sh = sscreen->b.info.num_good_compute_units / - (sscreen->b.info.max_se * - sscreen->b.info.max_sh_per_se); + unsigned num_cu_per_sh = sscreen->info.num_good_compute_units / + (sscreen->info.max_se * + sscreen->info.max_sh_per_se); unsigned late_alloc_limit; /* The limit is per SH. */ if (sctx->b.family == CHIP_KABINI) { @@ -5029,7 +5029,7 @@ static void si_init_config(struct si_context *sctx) RADEON_PRIO_BORDER_COLORS); if (sctx->b.chip_class >= GFX9) { - unsigned num_se = sscreen->b.info.max_se; + unsigned num_se = sscreen->info.max_se; unsigned pc_lines = 0; switch (sctx->b.family) { diff --git a/src/gallium/drivers/radeonsi/si_state_binning.c b/src/gallium/drivers/radeonsi/si_state_binning.c index 8d98d6d0d08..686701d718f 100644 --- a/src/gallium/drivers/radeonsi/si_state_binning.c +++ b/src/gallium/drivers/radeonsi/si_state_binning.c @@ -46,9 +46,9 @@ static struct uvec2 si_find_bin_size(struct si_screen *sscreen, unsigned sum) { unsigned log_num_rb_per_se = - util_logbase2_ceil(sscreen->b.info.num_render_backends / - sscreen->b.info.max_se); - unsigned log_num_se = util_logbase2_ceil(sscreen->b.info.max_se); + util_logbase2_ceil(sscreen->info.num_render_backends / + sscreen->info.max_se); + unsigned log_num_se = util_logbase2_ceil(sscreen->info.max_se); unsigned i; /* Get the chip-specific subtable. */ diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 7330bf49983..06ef84d20da 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -341,24 +341,24 @@ si_get_init_multi_vgt_param(struct si_screen *sscreen, ia_switch_on_eoi = true; /* Bug with tessellation and GS on Bonaire and older 2 SE chips. */ - if ((sscreen->b.family == CHIP_TAHITI || - sscreen->b.family == CHIP_PITCAIRN || - sscreen->b.family == CHIP_BONAIRE) && + if ((sscreen->info.family == CHIP_TAHITI || + sscreen->info.family == CHIP_PITCAIRN || + sscreen->info.family == CHIP_BONAIRE) && key->u.uses_gs) partial_vs_wave = true; /* Needed for 028B6C_DISTRIBUTION_MODE != 0 */ if (sscreen->has_distributed_tess) { if (key->u.uses_gs) { - if (sscreen->b.chip_class <= VI) + if (sscreen->info.chip_class <= VI) partial_es_wave = true; /* GPU hang workaround. */ - if (sscreen->b.family == CHIP_TONGA || - sscreen->b.family == CHIP_FIJI || - sscreen->b.family == CHIP_POLARIS10 || - sscreen->b.family == CHIP_POLARIS11 || - sscreen->b.family == CHIP_POLARIS12) + if (sscreen->info.family == CHIP_TONGA || + sscreen->info.family == CHIP_FIJI || + sscreen->info.family == CHIP_POLARIS10 || + sscreen->info.family == CHIP_POLARIS11 || + sscreen->info.family == CHIP_POLARIS12) partial_vs_wave = true; } else { partial_vs_wave = true; @@ -368,12 +368,12 @@ si_get_init_multi_vgt_param(struct si_screen *sscreen, /* This is a hardware requirement. */ if (key->u.line_stipple_enabled || - (sscreen->b.debug_flags & DBG(SWITCH_ON_EOP))) { + (sscreen->debug_flags & DBG(SWITCH_ON_EOP))) { ia_switch_on_eop = true; wd_switch_on_eop = true; } - if (sscreen->b.chip_class >= CIK) { + if (sscreen->info.chip_class >= CIK) { /* WD_SWITCH_ON_EOP has no effect on GPUs with less than * 4 shader engines. Set 1 to pass the assertion below. * The other cases are hardware requirements. @@ -381,13 +381,13 @@ si_get_init_multi_vgt_param(struct si_screen *sscreen, * Polaris supports primitive restart with WD_SWITCH_ON_EOP=0 * for points, line strips, and tri strips. */ - if (sscreen->b.info.max_se < 4 || + if (sscreen->info.max_se < 4 || key->u.prim == PIPE_PRIM_POLYGON || key->u.prim == PIPE_PRIM_LINE_LOOP || key->u.prim == PIPE_PRIM_TRIANGLE_FAN || key->u.prim == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY || (key->u.primitive_restart && - (sscreen->b.family < CHIP_POLARIS10 || + (sscreen->info.family < CHIP_POLARIS10 || (key->u.prim != PIPE_PRIM_POINTS && key->u.prim != PIPE_PRIM_LINE_STRIP && key->u.prim != PIPE_PRIM_TRIANGLE_STRIP))) || @@ -397,7 +397,7 @@ si_get_init_multi_vgt_param(struct si_screen *sscreen, /* Hawaii hangs if instancing is enabled and WD_SWITCH_ON_EOP is 0. * We don't know that for indirect drawing, so treat it as * always problematic. */ - if (sscreen->b.family == CHIP_HAWAII && + if (sscreen->info.family == CHIP_HAWAII && key->u.uses_instancing) wd_switch_on_eop = true; @@ -406,24 +406,24 @@ si_get_init_multi_vgt_param(struct si_screen *sscreen, * Assume indirect draws always use small instances. * This is needed for good VS wave utilization. */ - if (sscreen->b.chip_class <= VI && - sscreen->b.info.max_se == 4 && + if (sscreen->info.chip_class <= VI && + sscreen->info.max_se == 4 && key->u.multi_instances_smaller_than_primgroup) wd_switch_on_eop = true; /* Required on CIK and later. */ - if (sscreen->b.info.max_se > 2 && !wd_switch_on_eop) + if (sscreen->info.max_se > 2 && !wd_switch_on_eop) ia_switch_on_eoi = true; /* Required by Hawaii and, for some special cases, by VI. */ if (ia_switch_on_eoi && - (sscreen->b.family == CHIP_HAWAII || - (sscreen->b.chip_class == VI && + (sscreen->info.family == CHIP_HAWAII || + (sscreen->info.chip_class == VI && (key->u.uses_gs || max_primgroup_in_wave != 2)))) partial_vs_wave = true; /* Instancing bug on Bonaire. */ - if (sscreen->b.family == CHIP_BONAIRE && ia_switch_on_eoi && + if (sscreen->info.family == CHIP_BONAIRE && ia_switch_on_eoi && key->u.uses_instancing) partial_vs_wave = true; @@ -432,19 +432,19 @@ si_get_init_multi_vgt_param(struct si_screen *sscreen, } /* If SWITCH_ON_EOI is set, PARTIAL_ES_WAVE must be set too. */ - if (sscreen->b.chip_class <= VI && ia_switch_on_eoi) + if (sscreen->info.chip_class <= VI && ia_switch_on_eoi) partial_es_wave = true; return S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) | S_028AA8_SWITCH_ON_EOI(ia_switch_on_eoi) | S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) | S_028AA8_PARTIAL_ES_WAVE_ON(partial_es_wave) | - S_028AA8_WD_SWITCH_ON_EOP(sscreen->b.chip_class >= CIK ? wd_switch_on_eop : 0) | + S_028AA8_WD_SWITCH_ON_EOP(sscreen->info.chip_class >= CIK ? wd_switch_on_eop : 0) | /* The following field was moved to VGT_SHADER_STAGES_EN in GFX9. */ - S_028AA8_MAX_PRIMGRP_IN_WAVE(sscreen->b.chip_class == VI ? + S_028AA8_MAX_PRIMGRP_IN_WAVE(sscreen->info.chip_class == VI ? max_primgroup_in_wave : 0) | - S_030960_EN_INST_OPT_BASIC(sscreen->b.chip_class >= GFX9) | - S_030960_EN_INST_OPT_ADV(sscreen->b.chip_class >= GFX9); + S_030960_EN_INST_OPT_BASIC(sscreen->info.chip_class >= GFX9) | + S_030960_EN_INST_OPT_ADV(sscreen->info.chip_class >= GFX9); } void si_init_ia_multi_vgt_param_table(struct si_context *sctx) @@ -1361,7 +1361,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) indexbuf = NULL; u_upload_data(ctx->stream_uploader, start_offset, info->count * index_size, - sctx->screen->b.info.tcc_cache_line_size, + sctx->screen->info.tcc_cache_line_size, (char*)info->index.user + start_offset, &index_offset, &indexbuf); if (!indexbuf) diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 53955307b4f..461760f580d 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -206,10 +206,10 @@ static bool si_shader_cache_insert_shader(struct si_screen *sscreen, return false; } - if (sscreen->b.disk_shader_cache && insert_into_disk_cache) { - disk_cache_compute_key(sscreen->b.disk_shader_cache, tgsi_binary, + if (sscreen->disk_shader_cache && insert_into_disk_cache) { + disk_cache_compute_key(sscreen->disk_shader_cache, tgsi_binary, *((uint32_t *)tgsi_binary), key); - disk_cache_put(sscreen->b.disk_shader_cache, key, hw_binary, + disk_cache_put(sscreen->disk_shader_cache, key, hw_binary, *((uint32_t *) hw_binary), NULL); } @@ -223,16 +223,16 @@ static bool si_shader_cache_load_shader(struct si_screen *sscreen, struct hash_entry *entry = _mesa_hash_table_search(sscreen->shader_cache, tgsi_binary); if (!entry) { - if (sscreen->b.disk_shader_cache) { + if (sscreen->disk_shader_cache) { unsigned char sha1[CACHE_KEY_SIZE]; size_t tg_size = *((uint32_t *) tgsi_binary); - disk_cache_compute_key(sscreen->b.disk_shader_cache, + disk_cache_compute_key(sscreen->disk_shader_cache, tgsi_binary, tg_size, sha1); size_t binary_size; uint8_t *buffer = - disk_cache_get(sscreen->b.disk_shader_cache, + disk_cache_get(sscreen->disk_shader_cache, sha1, &binary_size); if (!buffer) return false; @@ -246,7 +246,7 @@ static bool si_shader_cache_load_shader(struct si_screen *sscreen, assert(!"Invalid radeonsi shader disk cache " "item!"); - disk_cache_remove(sscreen->b.disk_shader_cache, + disk_cache_remove(sscreen->disk_shader_cache, sha1); free(buffer); @@ -271,7 +271,7 @@ static bool si_shader_cache_load_shader(struct si_screen *sscreen, else return false; } - p_atomic_inc(&sscreen->b.num_shader_cache_hits); + p_atomic_inc(&sscreen->num_shader_cache_hits); return true; } @@ -372,8 +372,8 @@ static void si_set_tesseval_regs(struct si_screen *sscreen, topology = V_028B6C_OUTPUT_TRIANGLE_CW; if (sscreen->has_distributed_tess) { - if (sscreen->b.family == CHIP_FIJI || - sscreen->b.family >= CHIP_POLARIS10) + if (sscreen->info.family == CHIP_FIJI || + sscreen->info.family >= CHIP_POLARIS10) distribution_mode = V_028B6C_DISTRIBUTION_MODE_TRAPEZOIDS; else distribution_mode = V_028B6C_DISTRIBUTION_MODE_DONUTS; @@ -408,7 +408,7 @@ static void polaris_set_vgt_vertex_reuse(struct si_screen *sscreen, { unsigned type = sel->type; - if (sscreen->b.family < CHIP_POLARIS10) + if (sscreen->info.family < CHIP_POLARIS10) return; /* VS as VS, or VS as ES: */ @@ -445,7 +445,7 @@ static void si_shader_ls(struct si_screen *sscreen, struct si_shader *shader) unsigned vgpr_comp_cnt; uint64_t va; - assert(sscreen->b.chip_class <= VI); + assert(sscreen->info.chip_class <= VI); pm4 = si_get_shader_pm4_state(shader); if (!pm4) @@ -485,7 +485,7 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader) va = shader->bo->gpu_address; si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_BINARY); - if (sscreen->b.chip_class >= GFX9) { + if (sscreen->info.chip_class >= GFX9) { si_pm4_set_reg(pm4, R_00B410_SPI_SHADER_PGM_LO_LS, va >> 8); si_pm4_set_reg(pm4, R_00B414_SPI_SHADER_PGM_HI_LS, va >> 40); @@ -516,7 +516,7 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader) S_00B428_FLOAT_MODE(shader->config.float_mode) | S_00B428_LS_VGPR_COMP_CNT(ls_vgpr_comp_cnt)); - if (sscreen->b.chip_class <= VI) { + if (sscreen->info.chip_class <= VI) { si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS, shader->config.rsrc2); } @@ -530,7 +530,7 @@ static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader) uint64_t va; unsigned oc_lds_en; - assert(sscreen->b.chip_class <= VI); + assert(sscreen->info.chip_class <= VI); pm4 = si_get_shader_pm4_state(shader); if (!pm4) @@ -578,7 +578,7 @@ static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader) */ static uint32_t si_vgt_gs_mode(struct si_shader_selector *sel) { - enum chip_class chip_class = sel->screen->b.chip_class; + enum chip_class chip_class = sel->screen->info.chip_class; unsigned gs_max_vert_out = sel->gs_max_out_vertices; unsigned cut_mode; @@ -751,7 +751,7 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader) va = shader->bo->gpu_address; si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_BINARY); - if (sscreen->b.chip_class >= GFX9) { + if (sscreen->info.chip_class >= GFX9) { unsigned input_prim = sel->info.properties[TGSI_PROPERTY_GS_INPUT_PRIM]; unsigned es_type = shader->key.part.gs.es->type; unsigned es_vgpr_comp_cnt, gs_vgpr_comp_cnt; @@ -870,7 +870,7 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader, si_pm4_set_reg(pm4, R_028A84_VGT_PRIMITIVEID_EN, 0); } - if (sscreen->b.chip_class <= VI) { + if (sscreen->info.chip_class <= VI) { /* Reuse needs to be set off if we write oViewport. */ si_pm4_set_reg(pm4, R_028AB4_VGT_REUSE_OFF, S_028AB4_REUSE_OFF(info->writes_viewport_index)); @@ -1480,7 +1480,7 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, assert(0); } - if (unlikely(sctx->screen->b.debug_flags & DBG(NO_OPT_VARIANT))) + if (unlikely(sctx->screen->debug_flags & DBG(NO_OPT_VARIANT))) memset(&key->opt, 0, sizeof(key->opt)); } @@ -1660,7 +1660,7 @@ current_not_ready: shader->compiler_ctx_state = *compiler_state; /* If this is a merged shader, get the first shader's selector. */ - if (sscreen->b.chip_class >= GFX9) { + if (sscreen->info.chip_class >= GFX9) { if (sel->type == PIPE_SHADER_TESS_CTRL) previous_stage_sel = key->part.tcs.ls; else if (sel->type == PIPE_SHADER_GEOMETRY) @@ -1943,9 +1943,9 @@ static void si_init_shader_selector_async(void *job, int thread_index) } /* Pre-compilation. */ - if (sscreen->b.debug_flags & DBG(PRECOMPILE) && + if (sscreen->debug_flags & DBG(PRECOMPILE) && /* GFX9 needs LS or ES for compilation, which we don't have here. */ - (sscreen->b.chip_class <= VI || + (sscreen->info.chip_class <= VI || (sel->type != PIPE_SHADER_TESS_CTRL && sel->type != PIPE_SHADER_GEOMETRY))) { struct si_shader_ctx_state state = {sel}; @@ -1957,7 +1957,7 @@ static void si_init_shader_selector_async(void *job, int thread_index) &key); /* GFX9 doesn't have LS and ES. */ - if (sscreen->b.chip_class >= GFX9) { + if (sscreen->info.chip_class >= GFX9) { key.as_ls = 0; key.as_es = 0; } @@ -2064,7 +2064,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx, } sel->type = sel->info.processor; - p_atomic_inc(&sscreen->b.num_shaders_created); + p_atomic_inc(&sscreen->num_shaders_created); si_get_active_slot_masks(&sel->info, &sel->active_const_and_shader_buffers, &sel->active_samplers_and_images); @@ -2085,7 +2085,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx, sel->type == PIPE_SHADER_FRAGMENT && sel->info.uses_derivatives && sel->info.uses_kill && - sctx->screen->b.debug_flags & DBG(FS_CORRECT_DERIVS_AFTER_KILL); + sctx->screen->debug_flags & DBG(FS_CORRECT_DERIVS_AFTER_KILL); /* Set which opcode uses which (i,j) pair. */ if (sel->info.uses_persp_opcode_interp_centroid) @@ -2721,7 +2721,7 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx) struct si_pm4_state *pm4; /* Chip constants. */ - unsigned num_se = sctx->screen->b.info.max_se; + unsigned num_se = sctx->screen->info.max_se; unsigned wave_size = 64; unsigned max_gs_waves = 32 * num_se; /* max 32 per SE on GCN */ /* On SI-CI, the value comes from VGT_GS_VERTEX_REUSE = 16. @@ -3021,7 +3021,7 @@ static bool si_update_spi_tmpring_size(struct si_context *sctx) r600_resource_reference(&sctx->scratch_buffer, NULL); sctx->scratch_buffer = (struct r600_resource*) - si_aligned_buffer_create(&sctx->screen->b.b, + si_aligned_buffer_create(&sctx->screen->b, R600_RESOURCE_FLAG_UNMAPPABLE, PIPE_USAGE_DEFAULT, scratch_needed_size, 256); @@ -3060,7 +3060,7 @@ static void si_init_tess_factor_ring(struct si_context *sctx) */ unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 127 : 63; unsigned max_offchip_buffers = max_offchip_buffers_per_se * - sctx->screen->b.info.max_se; + sctx->screen->info.max_se; unsigned offchip_granularity; switch (sctx->screen->tess_offchip_block_dw_size) { @@ -3082,7 +3082,7 @@ static void si_init_tess_factor_ring(struct si_context *sctx) sctx->tf_ring = si_aligned_buffer_create(sctx->b.b.screen, R600_RESOURCE_FLAG_UNMAPPABLE, PIPE_USAGE_DEFAULT, - 32768 * sctx->screen->b.info.max_se, + 32768 * sctx->screen->info.max_se, 64 * 1024); if (!sctx->tf_ring) return; @@ -3366,7 +3366,7 @@ bool si_update_shaders(struct si_context *sctx) si_mark_atom_dirty(sctx, &sctx->spi_map); } - if (sctx->screen->b.rbplus_allowed && + if (sctx->screen->rbplus_allowed && si_pm4_state_changed(sctx, ps) && (!old_ps || old_spi_shader_col_format != diff --git a/src/gallium/drivers/radeonsi/si_test_dma.c b/src/gallium/drivers/radeonsi/si_test_dma.c index de88469b654..779572eddb0 100644 --- a/src/gallium/drivers/radeonsi/si_test_dma.c +++ b/src/gallium/drivers/radeonsi/si_test_dma.c @@ -138,7 +138,7 @@ static enum pipe_format get_format_from_bpp(int bpp) static const char *array_mode_to_string(struct si_screen *sscreen, struct radeon_surf *surf) { - if (sscreen->b.chip_class >= GFX9) { + if (sscreen->info.chip_class >= GFX9) { /* TODO */ return " UNKNOWN"; } else { @@ -173,7 +173,7 @@ static unsigned generate_max_tex_side(unsigned max_tex_side) void si_test_dma(struct si_screen *sscreen) { - struct pipe_screen *screen = &sscreen->b.b; + struct pipe_screen *screen = &sscreen->b; struct pipe_context *ctx = screen->context_create(screen, NULL, 0); struct si_context *sctx = (struct si_context*)ctx; uint64_t max_alloc_size; diff --git a/src/gallium/drivers/radeonsi/si_uvd.c b/src/gallium/drivers/radeonsi/si_uvd.c index 09fdb2322bf..64f2f8e4ced 100644 --- a/src/gallium/drivers/radeonsi/si_uvd.c +++ b/src/gallium/drivers/radeonsi/si_uvd.c @@ -112,7 +112,7 @@ static struct pb_buffer* si_uvd_set_dtb(struct ruvd_msg *msg, struct vl_video_bu struct si_screen *sscreen = (struct si_screen*)buf->base.context->screen; struct r600_texture *luma = (struct r600_texture *)buf->resources[0]; struct r600_texture *chroma = (struct r600_texture *)buf->resources[1]; - enum ruvd_surface_type type = (sscreen->b.chip_class >= GFX9) ? + enum ruvd_surface_type type = (sscreen->info.chip_class >= GFX9) ? RUVD_SURFACE_TYPE_GFX9 : RUVD_SURFACE_TYPE_LEGACY; |