diff options
Diffstat (limited to 'src/gallium/drivers/radeonsi')
-rw-r--r-- | src/gallium/drivers/radeonsi/cik_sdma.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_blit.c | 83 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_descriptors.c | 84 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_dma.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.c | 14 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.h | 8 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state.c | 485 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_draw.c | 27 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_shaders.c | 3 |
10 files changed, 453 insertions, 257 deletions
diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c index 76913914b38..6eb62dcc890 100644 --- a/src/gallium/drivers/radeonsi/cik_sdma.c +++ b/src/gallium/drivers/radeonsi/cik_sdma.c @@ -243,7 +243,7 @@ void cik_sdma_copy(struct pipe_context *ctx, if (src->format != dst->format || rdst->surface.nsamples > 1 || rsrc->surface.nsamples > 1 || (rdst->dirty_level_mask | rdst->stencil_dirty_level_mask) & (1 << dst_level) || - rdst->dcc_buffer || rsrc->dcc_buffer) { + rdst->dcc_offset || rsrc->dcc_offset) { goto fallback; } diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index 115877060ba..f9a6de48f6b 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -241,8 +241,9 @@ static void si_blit_decompress_depth_in_place(struct si_context *sctx, si_mark_atom_dirty(sctx, &sctx->db_render_state); } -void si_flush_depth_textures(struct si_context *sctx, - struct si_textures_info *textures) +static void +si_flush_depth_textures(struct si_context *sctx, + struct si_textures_info *textures) { unsigned i; unsigned mask = textures->depth_texture_mask; @@ -271,18 +272,29 @@ void si_flush_depth_textures(struct si_context *sctx, static void si_blit_decompress_color(struct pipe_context *ctx, struct r600_texture *rtex, unsigned first_level, unsigned last_level, - unsigned first_layer, unsigned last_layer) + unsigned first_layer, unsigned last_layer, + bool need_dcc_decompress) { struct si_context *sctx = (struct si_context *)ctx; unsigned layer, level, checked_last_layer, max_layer; - if (!rtex->dirty_level_mask) + if (!rtex->dirty_level_mask && !need_dcc_decompress) return; for (level = first_level; level <= last_level; level++) { - if (!(rtex->dirty_level_mask & (1 << level))) + void* custom_blend; + + if (!(rtex->dirty_level_mask & (1 << level)) && !need_dcc_decompress) continue; + if (rtex->dcc_offset && need_dcc_decompress) { + custom_blend = sctx->custom_blend_dcc_decompress; + } else if (rtex->fmask.size) { + custom_blend = sctx->custom_blend_decompress; + } else { + custom_blend = sctx->custom_blend_fastclear; + } + /* The smaller the mipmap level, the less layers there are * as far as 3D textures are concerned. */ max_layer = util_max_layer(&rtex->resource.b.b, level); @@ -298,9 +310,7 @@ static void si_blit_decompress_color(struct pipe_context *ctx, cbsurf = ctx->create_surface(ctx, &rtex->resource.b.b, &surf_tmpl); si_blitter_begin(ctx, SI_DECOMPRESS); - util_blitter_custom_color(sctx->blitter, cbsurf, - rtex->fmask.size ? sctx->custom_blend_decompress : - sctx->custom_blend_fastclear); + util_blitter_custom_color(sctx->blitter, cbsurf, custom_blend); si_blitter_end(ctx); pipe_surface_reference(&cbsurf, NULL); @@ -314,8 +324,9 @@ static void si_blit_decompress_color(struct pipe_context *ctx, } } -void si_decompress_color_textures(struct si_context *sctx, - struct si_textures_info *textures) +static void +si_decompress_color_textures(struct si_context *sctx, + struct si_textures_info *textures) { unsigned i; unsigned mask = textures->compressed_colortex_mask; @@ -330,11 +341,37 @@ void si_decompress_color_textures(struct si_context *sctx, assert(view); tex = (struct r600_texture *)view->texture; - assert(tex->cmask.size || tex->fmask.size || tex->dcc_buffer); + assert(tex->cmask.size || tex->fmask.size || tex->dcc_offset); si_blit_decompress_color(&sctx->b.b, tex, view->u.tex.first_level, view->u.tex.last_level, - 0, util_max_layer(&tex->resource.b.b, view->u.tex.first_level)); + 0, util_max_layer(&tex->resource.b.b, view->u.tex.first_level), + false); + } +} + +void si_decompress_textures(struct si_context *sctx) +{ + unsigned compressed_colortex_counter; + + if (sctx->blitter->running) + return; + + /* Update the compressed_colortex_mask if necessary. */ + compressed_colortex_counter = p_atomic_read(&sctx->screen->b.compressed_colortex_counter); + if (compressed_colortex_counter != sctx->b.last_compressed_colortex_counter) { + sctx->b.last_compressed_colortex_counter = compressed_colortex_counter; + si_update_compressed_colortex_masks(sctx); + } + + /* Flush depth textures which need to be flushed. */ + for (int i = 0; i < SI_NUM_SHADERS; i++) { + if (sctx->samplers[i].depth_texture_mask) { + si_flush_depth_textures(sctx, &sctx->samplers[i]); + } + if (sctx->samplers[i].compressed_colortex_mask) { + si_decompress_color_textures(sctx, &sctx->samplers[i]); + } } } @@ -483,9 +520,9 @@ static void si_decompress_subresource(struct pipe_context *ctx, si_blit_decompress_depth_in_place(sctx, rtex, true, level, level, first_layer, last_layer); - } else if (rtex->fmask.size || rtex->cmask.size || rtex->dcc_buffer) { + } else if (rtex->fmask.size || rtex->cmask.size || rtex->dcc_offset) { si_blit_decompress_color(ctx, rtex, level, level, - first_layer, last_layer); + first_layer, last_layer, false); } } @@ -712,7 +749,7 @@ static bool do_hardware_msaa_resolve(struct pipe_context *ctx, dst->surface.level[info->dst.level].mode >= RADEON_SURF_MODE_1D && !(dst->surface.flags & RADEON_SURF_SCANOUT) && (!dst->cmask.size || !dst->dirty_level_mask) && /* dst cannot be fast-cleared */ - !dst->dcc_buffer) { + !dst->dcc_offset) { si_blitter_begin(ctx, SI_COLOR_RESOLVE | (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND)); util_blitter_custom_resolve_color(sctx->blitter, @@ -761,12 +798,23 @@ static void si_flush_resource(struct pipe_context *ctx, assert(res->target != PIPE_BUFFER); - if (!rtex->is_depth && rtex->cmask.size) { + if (!rtex->is_depth && (rtex->cmask.size || rtex->dcc_offset)) { si_blit_decompress_color(ctx, rtex, 0, res->last_level, - 0, util_max_layer(res, 0)); + 0, util_max_layer(res, 0), false); } } +static void si_decompress_dcc(struct pipe_context *ctx, + struct r600_texture *rtex) +{ + if (!rtex->dcc_offset) + return; + + si_blit_decompress_color(ctx, rtex, 0, rtex->resource.b.b.last_level, + 0, util_max_layer(&rtex->resource.b.b, 0), + true); +} + static void si_pipe_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst, unsigned offset, unsigned size, @@ -836,4 +884,5 @@ void si_init_blit_functions(struct si_context *sctx) sctx->b.b.blit = si_blit; sctx->b.b.flush_resource = si_flush_resource; sctx->b.blit_decompress_depth = si_blit_decompress_depth; + sctx->b.decompress_dcc = si_decompress_dcc; } diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 345f2bbc381..d12b3e6b28a 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -150,20 +150,17 @@ static void si_release_sampler_views(struct si_sampler_views *views) si_release_descriptors(&views->desc); } -static void si_sampler_view_add_buffers(struct si_context *sctx, - struct si_sampler_view *rview) +static void si_sampler_view_add_buffer(struct si_context *sctx, + struct pipe_resource *resource) { - if (rview->resource) { - radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, - rview->resource, RADEON_USAGE_READ, - r600_get_sampler_view_priority(rview->resource)); - } + struct r600_resource *rres = (struct r600_resource*)resource; - if (rview->dcc_buffer && rview->dcc_buffer != rview->resource) { - radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, - rview->dcc_buffer, RADEON_USAGE_READ, - RADEON_PRIO_DCC); - } + if (!resource) + return; + + radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, rres, + RADEON_USAGE_READ, + r600_get_sampler_view_priority(rres)); } static void si_sampler_views_begin_new_cs(struct si_context *sctx, @@ -174,10 +171,8 @@ static void si_sampler_views_begin_new_cs(struct si_context *sctx, /* Add buffers to the CS. */ while (mask) { int i = u_bit_scan64(&mask); - struct si_sampler_view *rview = - (struct si_sampler_view*)views->views[i]; - si_sampler_view_add_buffers(sctx, rview); + si_sampler_view_add_buffer(sctx, views->views[i]->texture); } if (!views->desc.buffer) @@ -190,15 +185,20 @@ static void si_set_sampler_view(struct si_context *sctx, struct si_sampler_views *views, unsigned slot, struct pipe_sampler_view *view) { - if (views->views[slot] == view) + struct si_sampler_view *rview = (struct si_sampler_view*)view; + + if (view && view->texture && view->texture->target != PIPE_BUFFER && + G_008F28_COMPRESSION_EN(rview->state[6]) && + ((struct r600_texture*)view->texture)->dcc_offset == 0) { + rview->state[6] &= C_008F28_COMPRESSION_EN & + C_008F28_ALPHA_IS_ON_MSB; + } else if (views->views[slot] == view) return; if (view) { - struct si_sampler_view *rview = - (struct si_sampler_view*)view; - struct r600_texture *rtex = (struct r600_texture*)view->texture; + struct r600_texture *rtex = (struct r600_texture *)view->texture; - si_sampler_view_add_buffers(sctx, rview); + si_sampler_view_add_buffer(sctx, view->texture); pipe_sampler_view_reference(&views->views[slot], view); memcpy(views->desc.list + slot * 16, rview->state, 8*4); @@ -229,6 +229,12 @@ static void si_set_sampler_view(struct si_context *sctx, views->desc.list_dirty = true; } +static bool is_compressed_colortex(struct r600_texture *rtex) +{ + return rtex->cmask.size || rtex->fmask.size || + (rtex->dcc_offset && rtex->dirty_level_mask); +} + static void si_set_sampler_views(struct pipe_context *ctx, unsigned shader, unsigned start, unsigned count, @@ -262,8 +268,7 @@ static void si_set_sampler_views(struct pipe_context *ctx, } else { samplers->depth_texture_mask &= ~(1 << slot); } - if (rtex->cmask.size || rtex->fmask.size || - (rtex->dcc_buffer && rtex->dirty_level_mask)) { + if (is_compressed_colortex(rtex)) { samplers->compressed_colortex_mask |= 1 << slot; } else { samplers->compressed_colortex_mask &= ~(1 << slot); @@ -275,6 +280,27 @@ static void si_set_sampler_views(struct pipe_context *ctx, } } +static void +si_samplers_update_compressed_colortex_mask(struct si_textures_info *samplers) +{ + uint64_t mask = samplers->views.desc.enabled_mask; + + while (mask) { + int i = u_bit_scan64(&mask); + struct pipe_resource *res = samplers->views.views[i]->texture; + + if (res && res->target != PIPE_BUFFER) { + struct r600_texture *rtex = (struct r600_texture *)res; + + if (is_compressed_colortex(rtex)) { + samplers->compressed_colortex_mask |= 1 << i; + } else { + samplers->compressed_colortex_mask &= ~(1 << i); + } + } + } +} + /* SAMPLER STATES */ static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader, @@ -303,6 +329,7 @@ static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader, */ if (samplers->views.views[i] && samplers->views.views[i]->texture && + samplers->views.views[i]->texture->target != PIPE_BUFFER && ((struct r600_texture*)samplers->views.views[i]->texture)->fmask.size) continue; @@ -767,6 +794,19 @@ static void si_desc_reset_buffer_offset(struct pipe_context *ctx, S_008F04_BASE_ADDRESS_HI(va >> 32); } +/* TEXTURE METADATA ENABLE/DISABLE */ + +/* CMASK can be enabled (for fast clear) and disabled (for texture export) + * while the texture is bound, possibly by a different context. In that case, + * call this function to update compressed_colortex_masks. + */ +void si_update_compressed_colortex_masks(struct si_context *sctx) +{ + for (int i = 0; i < SI_NUM_SHADERS; ++i) { + si_samplers_update_compressed_colortex_mask(&sctx->samplers[i]); + } +} + /* BUFFER DISCARD/INVALIDATION */ /* Reallocate a buffer a update all resource bindings where the buffer is diff --git a/src/gallium/drivers/radeonsi/si_dma.c b/src/gallium/drivers/radeonsi/si_dma.c index 240d96190a9..0efca193951 100644 --- a/src/gallium/drivers/radeonsi/si_dma.c +++ b/src/gallium/drivers/radeonsi/si_dma.c @@ -249,7 +249,7 @@ void si_dma_copy(struct pipe_context *ctx, (rdst->dirty_level_mask | rdst->stencil_dirty_level_mask) & (1 << dst_level) || rdst->cmask.size || rdst->fmask.size || rsrc->cmask.size || rsrc->fmask.size || - rdst->dcc_buffer || rsrc->dcc_buffer) { + rdst->dcc_offset || rsrc->dcc_offset) { goto fallback; } diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 37fd4a25d59..8b50a49cba0 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -68,6 +68,8 @@ static void si_destroy_context(struct pipe_context *context) sctx->b.b.delete_blend_state(&sctx->b.b, sctx->custom_blend_decompress); if (sctx->custom_blend_fastclear) sctx->b.b.delete_blend_state(&sctx->b.b, sctx->custom_blend_fastclear); + if (sctx->custom_blend_dcc_decompress) + sctx->b.b.delete_blend_state(&sctx->b.b, sctx->custom_blend_dcc_decompress); util_unreference_framebuffer_state(&sctx->framebuffer.state); if (sctx->blitter) @@ -418,7 +420,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param) return PIPE_ENDIAN_LITTLE; case PIPE_CAP_VENDOR_ID: - return 0x1002; + return ATI_VENDOR_ID; case PIPE_CAP_DEVICE_ID: return sscreen->b.info.pci_id; case PIPE_CAP_ACCELERATED: @@ -427,6 +429,14 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param) return sscreen->b.info.vram_size >> 20; case PIPE_CAP_UMA: return 0; + case PIPE_CAP_PCI_GROUP: + return sscreen->b.info.pci_domain; + case PIPE_CAP_PCI_BUS: + return sscreen->b.info.pci_bus; + case PIPE_CAP_PCI_DEVICE: + return sscreen->b.info.pci_dev; + case PIPE_CAP_PCI_FUNCTION: + return sscreen->b.info.pci_func; } return 0; } @@ -611,6 +621,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) sscreen->b.b.is_format_supported = si_is_format_supported; sscreen->b.b.resource_create = r600_resource_create_common; + si_init_screen_state_functions(sscreen); + if (!r600_common_screen_init(&sscreen->b, ws) || !si_init_gs_info(sscreen) || !si_init_shader_cache(sscreen)) { diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index ef860a58b83..0fef5f72098 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -120,8 +120,6 @@ struct si_blend_color { struct si_sampler_view { struct pipe_sampler_view base; struct list_head list; - struct r600_resource *resource; - struct r600_resource *dcc_buffer; /* [0..7] = image descriptor * [4..7] = buffer descriptor */ uint32_t state[8]; @@ -197,6 +195,7 @@ struct si_context { void *custom_blend_resolve; void *custom_blend_decompress; void *custom_blend_fastclear; + void *custom_blend_dcc_decompress; void *pstipple_sampler_state; struct si_screen *screen; struct pipe_fence_handle *last_gfx_fence; @@ -334,10 +333,7 @@ void cik_sdma_copy(struct pipe_context *ctx, /* si_blit.c */ void si_init_blit_functions(struct si_context *sctx); -void si_flush_depth_textures(struct si_context *sctx, - struct si_textures_info *textures); -void si_decompress_color_textures(struct si_context *sctx, - struct si_textures_info *textures); +void si_decompress_textures(struct si_context *sctx); void si_resource_copy_region(struct pipe_context *ctx, struct pipe_resource *dst, unsigned dst_level, diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index b23b17ad77b..f823af188c7 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -34,6 +34,7 @@ #include "util/u_format_s3tc.h" #include "util/u_memory.h" #include "util/u_pstipple.h" +#include "util/u_resource.h" /* Initialize an external atom (owned by ../radeon). */ static void @@ -2250,11 +2251,7 @@ static void si_initialize_color_surface(struct si_context *sctx, } assert(format != V_028C70_COLOR_INVALID); swap = r600_translate_colorswap(surf->base.format); - if (rtex->resource.b.b.usage == PIPE_USAGE_STAGING) { - endian = V_028C70_ENDIAN_NONE; - } else { - endian = si_colorformat_endian_swap(format); - } + endian = si_colorformat_endian_swap(format); /* blend clamp should be set for all NORM/SRGB types */ if (ntype == V_028C70_NUMBER_UNORM || @@ -2322,9 +2319,8 @@ static void si_initialize_color_surface(struct si_context *sctx, surf->cb_color_info = color_info; surf->cb_color_attrib = color_attrib; - if (sctx->b.chip_class >= VI && rtex->dcc_buffer) { + if (sctx->b.chip_class >= VI && rtex->dcc_offset) { unsigned max_uncompressed_block_size = 2; - uint64_t dcc_offset = rtex->surface.level[level].dcc_offset; if (rtex->surface.nsamples > 1) { if (rtex->surface.bpe == 1) @@ -2335,7 +2331,9 @@ static void si_initialize_color_surface(struct si_context *sctx, surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) | S_028C78_INDEPENDENT_64B_BLOCKS(1); - surf->cb_dcc_base = (rtex->dcc_buffer->gpu_address + dcc_offset) >> 8; + surf->cb_dcc_base = (rtex->resource.gpu_address + + rtex->dcc_offset + + rtex->surface.level[level].dcc_offset) >> 8; } if (rtex->fmask.size) { @@ -2674,12 +2672,6 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom RADEON_PRIO_CMASK); } - if (tex->dcc_buffer && tex->dcc_buffer != &tex->resource) { - radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, - tex->dcc_buffer, RADEON_USAGE_READWRITE, - RADEON_PRIO_DCC); - } - radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, sctx->b.chip_class >= VI ? 14 : 13); radeon_emit(cs, cb->cb_color_base); /* R_028C60_CB_COLOR0_BASE */ @@ -2802,105 +2794,73 @@ static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples) */ /** - * Create a sampler view. - * - * @param ctx context - * @param texture texture - * @param state sampler view template - * @param width0 width0 override (for compressed textures as int) - * @param height0 height0 override (for compressed textures as int) - * @param force_level set the base address to the level (for compressed textures) + * Build the sampler view descriptor for a buffer texture. + * @param state 256-bit descriptor; only the high 128 bits are filled in */ -struct pipe_sampler_view * -si_create_sampler_view_custom(struct pipe_context *ctx, - struct pipe_resource *texture, - const struct pipe_sampler_view *state, - unsigned width0, unsigned height0, - unsigned force_level) +static void +si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf, + enum pipe_format format, + unsigned first_element, unsigned last_element, + uint32_t *state) { - struct si_context *sctx = (struct si_context*)ctx; - struct si_sampler_view *view = CALLOC_STRUCT(si_sampler_view); - struct r600_texture *tmp = (struct r600_texture*)texture; const struct util_format_description *desc; - unsigned format, num_format, base_level, first_level, last_level; - uint32_t pitch = 0; - unsigned char state_swizzle[4], swizzle[4]; - unsigned height, depth, width; - enum pipe_format pipe_format = state->format; - struct radeon_surf_level *surflevel; int first_non_void; uint64_t va; - unsigned last_layer = state->u.tex.last_layer; + unsigned stride; + unsigned num_records; + unsigned num_format, data_format; - if (!view) - return NULL; - - /* initialize base object */ - view->base = *state; - view->base.texture = NULL; - view->base.reference.count = 1; - view->base.context = ctx; - - /* NULL resource, obey swizzle (only ZERO and ONE make sense). */ - if (!texture) { - view->state[3] = S_008F1C_DST_SEL_X(si_map_swizzle(state->swizzle_r)) | - S_008F1C_DST_SEL_Y(si_map_swizzle(state->swizzle_g)) | - S_008F1C_DST_SEL_Z(si_map_swizzle(state->swizzle_b)) | - S_008F1C_DST_SEL_W(si_map_swizzle(state->swizzle_a)) | - S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D); - return &view->base; - } - - pipe_resource_reference(&view->base.texture, texture); - view->resource = &tmp->resource; - - if (state->format == PIPE_FORMAT_X24S8_UINT || - state->format == PIPE_FORMAT_S8X24_UINT || - state->format == PIPE_FORMAT_X32_S8X24_UINT || - state->format == PIPE_FORMAT_S8_UINT) - view->is_stencil_sampler = true; - - /* Buffer resource. */ - if (texture->target == PIPE_BUFFER) { - unsigned stride, num_records; - - desc = util_format_description(state->format); - first_non_void = util_format_get_first_non_void_channel(state->format); - stride = desc->block.bits / 8; - va = tmp->resource.gpu_address + state->u.buf.first_element*stride; - format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void); - num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void); + desc = util_format_description(format); + first_non_void = util_format_get_first_non_void_channel(format); + stride = desc->block.bits / 8; + va = buf->gpu_address + first_element * stride; + num_format = si_translate_buffer_numformat(&screen->b.b, desc, first_non_void); + data_format = si_translate_buffer_dataformat(&screen->b.b, desc, first_non_void); - num_records = state->u.buf.last_element + 1 - state->u.buf.first_element; - num_records = MIN2(num_records, texture->width0 / stride); + num_records = last_element + 1 - first_element; + num_records = MIN2(num_records, buf->b.b.width0 / stride); - if (sctx->b.chip_class >= VI) - num_records *= stride; - - view->state[4] = va; - view->state[5] = S_008F04_BASE_ADDRESS_HI(va >> 32) | - S_008F04_STRIDE(stride); - view->state[6] = num_records; - view->state[7] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | - S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | - S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | - S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) | - S_008F0C_NUM_FORMAT(num_format) | - S_008F0C_DATA_FORMAT(format); + if (screen->b.chip_class >= VI) + num_records *= stride; - LIST_ADDTAIL(&view->list, &sctx->b.texture_buffers); - return &view->base; - } - - state_swizzle[0] = state->swizzle_r; - state_swizzle[1] = state->swizzle_g; - state_swizzle[2] = state->swizzle_b; - state_swizzle[3] = state->swizzle_a; + state[4] = va; + state[5] = S_008F04_BASE_ADDRESS_HI(va >> 32) | + S_008F04_STRIDE(stride); + state[6] = num_records; + state[7] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | + S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | + S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | + S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) | + S_008F0C_NUM_FORMAT(num_format) | + S_008F0C_DATA_FORMAT(data_format); +} - surflevel = tmp->surface.level; +/** + * Build the sampler view descriptor for a texture. + */ +static void +si_make_texture_descriptor(struct si_screen *screen, + struct r600_texture *tex, + enum pipe_texture_target target, + enum pipe_format pipe_format, + const unsigned char state_swizzle[4], + unsigned base_level, unsigned first_level, unsigned last_level, + unsigned first_layer, unsigned last_layer, + unsigned width, unsigned height, unsigned depth, + uint32_t *state, + uint32_t *fmask_state) +{ + struct pipe_resource *res = &tex->resource.b.b; + const struct radeon_surf_level *surflevel = tex->surface.level; + const struct util_format_description *desc; + unsigned char swizzle[4]; + int first_non_void; + unsigned num_format, data_format; + uint32_t pitch; + uint64_t va; /* Texturing with separate depth and stencil. */ - if (tmp->is_depth && !tmp->is_flushing_texture) { + if (tex->is_depth && !tex->is_flushing_texture) { switch (pipe_format) { case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: pipe_format = PIPE_FORMAT_Z32_FLOAT; @@ -2914,7 +2874,7 @@ si_create_sampler_view_custom(struct pipe_context *ctx, case PIPE_FORMAT_S8X24_UINT: case PIPE_FORMAT_X32_S8X24_UINT: pipe_format = PIPE_FORMAT_S8_UINT; - surflevel = tmp->surface.stencil_level; + surflevel = tex->surface.stencil_level; break; default:; } @@ -3008,89 +2968,63 @@ si_create_sampler_view_custom(struct pipe_context *ctx, } } - format = si_translate_texformat(ctx->screen, pipe_format, desc, first_non_void); - if (format == ~0) { - format = 0; - } - - base_level = 0; - first_level = state->u.tex.first_level; - last_level = state->u.tex.last_level; - width = width0; - height = height0; - depth = texture->depth0; - - if (force_level) { - assert(force_level == first_level && - force_level == last_level); - base_level = force_level; - first_level = 0; - last_level = 0; - width = u_minify(width, force_level); - height = u_minify(height, force_level); - depth = u_minify(depth, force_level); + data_format = si_translate_texformat(&screen->b.b, pipe_format, desc, first_non_void); + if (data_format == ~0) { + data_format = 0; } - pitch = surflevel[base_level].nblk_x * util_format_get_blockwidth(pipe_format); - - if (texture->target == PIPE_TEXTURE_1D_ARRAY) { + if (res->target == PIPE_TEXTURE_1D_ARRAY) { height = 1; - depth = texture->array_size; - } else if (texture->target == PIPE_TEXTURE_2D_ARRAY) { - depth = texture->array_size; - } else if (texture->target == PIPE_TEXTURE_CUBE_ARRAY) - depth = texture->array_size / 6; + depth = res->array_size; + } else if (res->target == PIPE_TEXTURE_2D_ARRAY) { + depth = res->array_size; + } else if (res->target == PIPE_TEXTURE_CUBE_ARRAY) + depth = res->array_size / 6; - /* This is not needed if state trackers set last_layer correctly. */ - if (state->target == PIPE_TEXTURE_1D || - state->target == PIPE_TEXTURE_2D || - state->target == PIPE_TEXTURE_RECT || - state->target == PIPE_TEXTURE_CUBE) - last_layer = state->u.tex.first_layer; - - va = tmp->resource.gpu_address + surflevel[base_level].offset; - - view->state[0] = va >> 8; - view->state[1] = (S_008F14_BASE_ADDRESS_HI(va >> 40) | - S_008F14_DATA_FORMAT(format) | - S_008F14_NUM_FORMAT(num_format)); - view->state[2] = (S_008F18_WIDTH(width - 1) | - S_008F18_HEIGHT(height - 1)); - view->state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) | - S_008F1C_DST_SEL_Y(si_map_swizzle(swizzle[1])) | - S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) | - S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) | - S_008F1C_BASE_LEVEL(texture->nr_samples > 1 ? - 0 : first_level) | - S_008F1C_LAST_LEVEL(texture->nr_samples > 1 ? - util_logbase2(texture->nr_samples) : - last_level) | - S_008F1C_TILING_INDEX(si_tile_mode_index(tmp, base_level, false)) | - S_008F1C_POW2_PAD(texture->last_level > 0) | - S_008F1C_TYPE(si_tex_dim(texture->target, state->target, - texture->nr_samples))); - view->state[4] = (S_008F20_DEPTH(depth - 1) | S_008F20_PITCH(pitch - 1)); - view->state[5] = (S_008F24_BASE_ARRAY(state->u.tex.first_layer) | - S_008F24_LAST_ARRAY(last_layer)); - - if (tmp->dcc_buffer) { - uint64_t dcc_offset = surflevel[base_level].dcc_offset; + pitch = surflevel[base_level].nblk_x * util_format_get_blockwidth(pipe_format); + va = tex->resource.gpu_address + surflevel[base_level].offset; + + state[0] = va >> 8; + state[1] = (S_008F14_BASE_ADDRESS_HI(va >> 40) | + S_008F14_DATA_FORMAT(data_format) | + S_008F14_NUM_FORMAT(num_format)); + state[2] = (S_008F18_WIDTH(width - 1) | + S_008F18_HEIGHT(height - 1)); + state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) | + S_008F1C_DST_SEL_Y(si_map_swizzle(swizzle[1])) | + S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) | + S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) | + S_008F1C_BASE_LEVEL(res->nr_samples > 1 ? + 0 : first_level) | + S_008F1C_LAST_LEVEL(res->nr_samples > 1 ? + util_logbase2(res->nr_samples) : + last_level) | + S_008F1C_TILING_INDEX(si_tile_mode_index(tex, base_level, false)) | + S_008F1C_POW2_PAD(res->last_level > 0) | + S_008F1C_TYPE(si_tex_dim(res->target, target, res->nr_samples))); + state[4] = (S_008F20_DEPTH(depth - 1) | S_008F20_PITCH(pitch - 1)); + state[5] = (S_008F24_BASE_ARRAY(first_layer) | + S_008F24_LAST_ARRAY(last_layer)); + + if (tex->dcc_offset) { unsigned swap = r600_translate_colorswap(pipe_format); - view->state[6] = S_008F28_COMPRESSION_EN(1) | S_008F28_ALPHA_IS_ON_MSB(swap <= 1); - view->state[7] = (tmp->dcc_buffer->gpu_address + dcc_offset) >> 8; - view->dcc_buffer = tmp->dcc_buffer; + state[6] = S_008F28_COMPRESSION_EN(1) | S_008F28_ALPHA_IS_ON_MSB(swap <= 1); + state[7] = (tex->resource.gpu_address + + tex->dcc_offset + + surflevel[base_level].dcc_offset) >> 8; } else { - view->state[6] = 0; - view->state[7] = 0; + state[6] = 0; + state[7] = 0; } /* Initialize the sampler view for FMASK. */ - if (tmp->fmask.size) { - uint64_t va = tmp->resource.gpu_address + tmp->fmask.offset; + if (tex->fmask.size) { uint32_t fmask_format; - switch (texture->nr_samples) { + va = tex->resource.gpu_address + tex->fmask.offset; + + switch (res->nr_samples) { case 2: fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2; break; @@ -3105,27 +3039,129 @@ si_create_sampler_view_custom(struct pipe_context *ctx, fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID; } - view->fmask_state[0] = va >> 8; - view->fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | - S_008F14_DATA_FORMAT(fmask_format) | - S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_UINT); - view->fmask_state[2] = S_008F18_WIDTH(width - 1) | - S_008F18_HEIGHT(height - 1); - view->fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | - S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) | - S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | - S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) | - S_008F1C_TILING_INDEX(tmp->fmask.tile_mode_index) | - S_008F1C_TYPE(si_tex_dim(texture->target, - state->target, 0)); - view->fmask_state[4] = S_008F20_DEPTH(depth - 1) | - S_008F20_PITCH(tmp->fmask.pitch_in_pixels - 1); - view->fmask_state[5] = S_008F24_BASE_ARRAY(state->u.tex.first_layer) | - S_008F24_LAST_ARRAY(last_layer); - view->fmask_state[6] = 0; - view->fmask_state[7] = 0; + fmask_state[0] = va >> 8; + fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | + S_008F14_DATA_FORMAT(fmask_format) | + S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_UINT); + fmask_state[2] = S_008F18_WIDTH(width - 1) | + S_008F18_HEIGHT(height - 1); + fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | + S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) | + S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | + S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) | + S_008F1C_TILING_INDEX(tex->fmask.tile_mode_index) | + S_008F1C_TYPE(si_tex_dim(res->target, target, 0)); + fmask_state[4] = S_008F20_DEPTH(depth - 1) | + S_008F20_PITCH(tex->fmask.pitch_in_pixels - 1); + fmask_state[5] = S_008F24_BASE_ARRAY(first_layer) | + S_008F24_LAST_ARRAY(last_layer); + fmask_state[6] = 0; + fmask_state[7] = 0; + } +} + +/** + * Create a sampler view. + * + * @param ctx context + * @param texture texture + * @param state sampler view template + * @param width0 width0 override (for compressed textures as int) + * @param height0 height0 override (for compressed textures as int) + * @param force_level set the base address to the level (for compressed textures) + */ +struct pipe_sampler_view * +si_create_sampler_view_custom(struct pipe_context *ctx, + struct pipe_resource *texture, + const struct pipe_sampler_view *state, + unsigned width0, unsigned height0, + unsigned force_level) +{ + struct si_context *sctx = (struct si_context*)ctx; + struct si_sampler_view *view = CALLOC_STRUCT(si_sampler_view); + struct r600_texture *tmp = (struct r600_texture*)texture; + unsigned base_level, first_level, last_level; + unsigned char state_swizzle[4]; + unsigned height, depth, width; + unsigned last_layer = state->u.tex.last_layer; + + if (!view) + return NULL; + + /* initialize base object */ + view->base = *state; + view->base.texture = NULL; + view->base.reference.count = 1; + view->base.context = ctx; + + /* NULL resource, obey swizzle (only ZERO and ONE make sense). */ + if (!texture) { + view->state[3] = S_008F1C_DST_SEL_X(si_map_swizzle(state->swizzle_r)) | + S_008F1C_DST_SEL_Y(si_map_swizzle(state->swizzle_g)) | + S_008F1C_DST_SEL_Z(si_map_swizzle(state->swizzle_b)) | + S_008F1C_DST_SEL_W(si_map_swizzle(state->swizzle_a)) | + S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D); + return &view->base; } + pipe_resource_reference(&view->base.texture, texture); + + if (state->format == PIPE_FORMAT_X24S8_UINT || + state->format == PIPE_FORMAT_S8X24_UINT || + state->format == PIPE_FORMAT_X32_S8X24_UINT || + state->format == PIPE_FORMAT_S8_UINT) + view->is_stencil_sampler = true; + + /* Buffer resource. */ + if (texture->target == PIPE_BUFFER) { + si_make_buffer_descriptor(sctx->screen, + (struct r600_resource *)texture, + state->format, + state->u.buf.first_element, + state->u.buf.last_element, + view->state); + + LIST_ADDTAIL(&view->list, &sctx->b.texture_buffers); + return &view->base; + } + + state_swizzle[0] = state->swizzle_r; + state_swizzle[1] = state->swizzle_g; + state_swizzle[2] = state->swizzle_b; + state_swizzle[3] = state->swizzle_a; + + base_level = 0; + first_level = state->u.tex.first_level; + last_level = state->u.tex.last_level; + width = width0; + height = height0; + depth = texture->depth0; + + if (force_level) { + assert(force_level == first_level && + force_level == last_level); + base_level = force_level; + first_level = 0; + last_level = 0; + width = u_minify(width, force_level); + height = u_minify(height, force_level); + depth = u_minify(depth, force_level); + } + + /* This is not needed if state trackers set last_layer correctly. */ + if (state->target == PIPE_TEXTURE_1D || + state->target == PIPE_TEXTURE_2D || + state->target == PIPE_TEXTURE_RECT || + state->target == PIPE_TEXTURE_CUBE) + last_layer = state->u.tex.first_layer; + + si_make_texture_descriptor(sctx->screen, tmp, state->target, + state->format, state_swizzle, + base_level, first_level, last_level, + state->u.tex.first_layer, last_layer, + width, height, depth, + view->state, view->fmask_state); + return &view->base; } @@ -3144,7 +3180,7 @@ static void si_sampler_view_destroy(struct pipe_context *ctx, { struct si_sampler_view *view = (struct si_sampler_view *)state; - if (view->resource && view->resource->b.b.target == PIPE_BUFFER) + if (state->texture && state->texture->target == PIPE_BUFFER) LIST_DELINIT(&view->list); pipe_resource_reference(&state->texture, NULL); @@ -3522,6 +3558,7 @@ void si_init_state_functions(struct si_context *sctx) sctx->custom_blend_resolve = si_create_blend_custom(sctx, V_028808_CB_RESOLVE); sctx->custom_blend_decompress = si_create_blend_custom(sctx, V_028808_CB_FMASK_DECOMPRESS); sctx->custom_blend_fastclear = si_create_blend_custom(sctx, V_028808_CB_ELIMINATE_FAST_CLEAR); + sctx->custom_blend_dcc_decompress = si_create_blend_custom(sctx, V_028808_CB_DCC_DECOMPRESS); sctx->b.b.set_clip_state = si_set_clip_state; sctx->b.b.set_scissor_states = si_set_scissor_states; @@ -3564,6 +3601,68 @@ void si_init_state_functions(struct si_context *sctx) si_init_config(sctx); } +static void si_query_opaque_metadata(struct r600_common_screen *rscreen, + struct r600_texture *rtex, + struct radeon_bo_metadata *md) +{ + struct si_screen *sscreen = (struct si_screen*)rscreen; + struct pipe_resource *res = &rtex->resource.b.b; + static const unsigned char swizzle[] = { + PIPE_SWIZZLE_RED, + PIPE_SWIZZLE_GREEN, + PIPE_SWIZZLE_BLUE, + PIPE_SWIZZLE_ALPHA + }; + uint32_t desc[8], i; + bool is_array = util_resource_is_array_texture(res); + + /* DRM 2.x.x doesn't support this. */ + if (rscreen->info.drm_major != 3) + return; + + assert(rtex->fmask.size == 0); + + /* Metadata image format format version 1: + * [0] = 1 (metadata format identifier) + * [1] = (VENDOR_ID << 16) | PCI_ID + * [2:9] = image descriptor for the whole resource + * [2] is always 0, because the base address is cleared + * [9] is the DCC offset bits [39:8] from the beginning of + * the buffer + * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level + */ + + md->metadata[0] = 1; /* metadata image format version 1 */ + + /* TILE_MODE_INDEX is ambiguous without a PCI ID. */ + md->metadata[1] = (ATI_VENDOR_ID << 16) | rscreen->info.pci_id; + + si_make_texture_descriptor(sscreen, rtex, res->target, res->format, + swizzle, 0, 0, res->last_level, 0, + is_array ? res->array_size - 1 : 0, + res->width0, res->height0, res->depth0, + desc, NULL); + + /* Clear the base address and set the relative DCC offset. */ + desc[0] = 0; + desc[1] &= C_008F14_BASE_ADDRESS_HI; + desc[7] = rtex->dcc_offset >> 8; + + /* Dwords [2:9] contain the image descriptor. */ + memcpy(&md->metadata[2], desc, sizeof(desc)); + + /* Dwords [10:..] contain the mipmap level offsets. */ + for (i = 0; i <= res->last_level; i++) + md->metadata[10+i] = rtex->surface.level[i].offset >> 8; + + md->size_metadata = (11 + res->last_level) * 4; +} + +void si_init_screen_state_functions(struct si_screen *sscreen) +{ + sscreen->b.query_opaque_metadata = si_query_opaque_metadata; +} + static void si_write_harvested_raster_configs(struct si_context *sctx, struct si_pm4_state *pm4, diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 40792cbc1d5..60c34f19e55 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -249,6 +249,7 @@ void si_all_descriptors_begin_new_cs(struct si_context *sctx); void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuffer, const uint8_t *ptr, unsigned size, uint32_t *const_offset); void si_shader_change_notify(struct si_context *sctx); +void si_update_compressed_colortex_masks(struct si_context *sctx); void si_emit_shader_userdata(struct si_context *sctx, struct r600_atom *atom); /* si_state.c */ @@ -263,6 +264,7 @@ boolean si_is_format_supported(struct pipe_screen *screen, unsigned sample_count, unsigned usage); void si_init_state_functions(struct si_context *sctx); +void si_init_screen_state_functions(struct si_screen *sscreen); unsigned cik_bank_wh(unsigned bankwh); unsigned cik_db_pipe_config(struct si_screen *sscreen, unsigned tile_mode); unsigned cik_macro_tile_aspect(unsigned macro_tile_aspect); diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 91ccd073267..84b850a2992 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -33,21 +33,6 @@ #include "util/u_upload_mgr.h" #include "util/u_prim.h" -static void si_decompress_textures(struct si_context *sctx) -{ - if (!sctx->blitter->running) { - /* Flush depth textures which need to be flushed. */ - for (int i = 0; i < SI_NUM_SHADERS; i++) { - if (sctx->samplers[i].depth_texture_mask) { - si_flush_depth_textures(sctx, &sctx->samplers[i]); - } - if (sctx->samplers[i].compressed_colortex_mask) { - si_decompress_color_textures(sctx, &sctx->samplers[i]); - } - } - } -} - static unsigned si_conv_pipe_prim(unsigned mode) { static const unsigned prim_conv[] = { @@ -763,7 +748,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) struct si_context *sctx = (struct si_context *)ctx; struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; struct pipe_index_buffer ib = {}; - unsigned mask; + unsigned mask, dirty_fb_counter; if (!info->count && !info->indirect && (info->indexed || !info->count_from_stream_output)) @@ -782,6 +767,16 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) return; } + /* Re-emit the framebuffer state if needed. */ + dirty_fb_counter = p_atomic_read(&sctx->b.screen->dirty_fb_counter); + if (dirty_fb_counter != sctx->b.last_dirty_fb_counter) { + sctx->b.last_dirty_fb_counter = dirty_fb_counter; + sctx->framebuffer.dirty_cbufs |= + ((1 << sctx->framebuffer.state.nr_cbufs) - 1); + sctx->framebuffer.dirty_zsbuf = true; + si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); + } + si_decompress_textures(sctx); /* Set the rasterization primitive type. diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 321b87d80a6..5fe1f7960f3 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -1154,6 +1154,9 @@ static void *si_create_shader_selector(struct pipe_context *ctx, break; } + if (sel->info.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL]) + sel->db_shader_control |= S_02880C_DEPTH_BEFORE_SHADER(1); + /* Compile the main shader part for use with a prolog and/or epilog. */ if (sel->type != PIPE_SHADER_GEOMETRY && !sscreen->use_monolithic_shaders) { |