From 6529daca391912246c13e6f76e885026d2ce88be Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Sat, 24 Oct 2015 01:47:45 +0200 Subject: radeonsi: Implement DCC fast clear. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Uses the DCC buffer instead of the CMASK buffer. The ELIMINATE_FAST_CLEAR still works. Furthermore, with DCC compression we can directly clear to a limited set of colors such that we do not need a postprocessing step. v2 Marek: check dcc_buffer && dirty_level_mask in set_sampler_view Signed-off-by: Bas Nieuwenhuizen Signed-off-by: Marek Olšák --- src/gallium/drivers/radeon/r600_texture.c | 107 +++++++++++++++++++++++--- src/gallium/drivers/radeonsi/si_blit.c | 4 +- src/gallium/drivers/radeonsi/si_descriptors.c | 3 +- 3 files changed, 100 insertions(+), 14 deletions(-) diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c index 03140497629..f7a11a2534c 100644 --- a/src/gallium/drivers/radeon/r600_texture.c +++ b/src/gallium/drivers/radeon/r600_texture.c @@ -1239,6 +1239,81 @@ static void evergreen_set_clear_color(struct r600_texture *rtex, memcpy(rtex->color_clear_value, &uc, 2 * sizeof(uint32_t)); } +static void vi_get_fast_clear_parameters(enum pipe_format surface_format, + const union pipe_color_union *color, + uint32_t* reset_value, + bool* clear_words_needed) +{ + bool values[4] = {}; + int i; + bool main_value = false; + bool extra_value = false; + int extra_channel; + const struct util_format_description *desc = util_format_description(surface_format); + + *clear_words_needed = true; + *reset_value = 0x20202020U; + + /* If we want to clear without needing a fast clear eliminate step, we + * can set each channel to 0 or 1 (or 0/max for integer formats). We + * have two sets of flags, one for the last or first channel(extra) and + * one for the other channels(main). + */ + + if (surface_format == PIPE_FORMAT_R11G11B10_FLOAT || + surface_format == PIPE_FORMAT_B5G6R5_UNORM || + surface_format == PIPE_FORMAT_B5G6R5_SRGB) { + extra_channel = -1; + } else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) { + if(r600_translate_colorswap(surface_format) <= 1) + extra_channel = desc->nr_channels - 1; + else + extra_channel = 0; + } else + return; + + for (i = 0; i < 4; ++i) { + int index = desc->swizzle[i] - UTIL_FORMAT_SWIZZLE_X; + + if (desc->swizzle[i] < UTIL_FORMAT_SWIZZLE_X || + desc->swizzle[i] > UTIL_FORMAT_SWIZZLE_W) + continue; + + if (util_format_is_pure_sint(surface_format)) { + values[i] = color->i[i] != 0; + if (color->i[i] != 0 && color->i[i] != INT32_MAX) + return; + } else if (util_format_is_pure_uint(surface_format)) { + values[i] = color->ui[i] != 0U; + if (color->ui[i] != 0U && color->ui[i] != UINT32_MAX) + return; + } else { + values[i] = color->f[i] != 0.0F; + if (color->f[i] != 0.0F && color->f[i] != 1.0F) + return; + } + + if (index == extra_channel) + extra_value = values[i]; + else + main_value = values[i]; + } + + for (int i = 0; i < 4; ++i) + if (values[i] != main_value && + desc->swizzle[i] - UTIL_FORMAT_SWIZZLE_X != extra_channel && + desc->swizzle[i] >= UTIL_FORMAT_SWIZZLE_X && + desc->swizzle[i] <= UTIL_FORMAT_SWIZZLE_W) + return; + + *clear_words_needed = false; + if (main_value) + *reset_value |= 0x80808080U; + + if (extra_value) + *reset_value |= 0x40404040U; +} + void evergreen_do_fast_color_clear(struct r600_common_context *rctx, struct pipe_framebuffer_state *fb, struct r600_atom *fb_state, @@ -1292,23 +1367,33 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx, continue; } - /* CMASK clear does not work for DCC compressed textures */ if (tex->surface.dcc_enabled) { - continue; - } + uint32_t reset_value; + bool clear_words_needed; - /* ensure CMASK is enabled */ - r600_texture_alloc_cmask_separate(rctx->screen, tex); - if (tex->cmask.size == 0) { - continue; + vi_get_fast_clear_parameters(fb->cbufs[i]->format, color, &reset_value, &clear_words_needed); + + rctx->clear_buffer(&rctx->b, &tex->dcc_buffer->b.b, + 0, tex->surface.dcc_size, reset_value, true); + + if (clear_words_needed) + tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level; + } else { + /* ensure CMASK is enabled */ + r600_texture_alloc_cmask_separate(rctx->screen, tex); + if (tex->cmask.size == 0) { + continue; + } + + /* Do the fast clear. */ + rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b, + tex->cmask.offset, tex->cmask.size, 0, true); + + tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level; } - /* Do the fast clear. */ evergreen_set_clear_color(tex, fb->cbufs[i]->format, color); - rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b, - tex->cmask.offset, tex->cmask.size, 0, true); - tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level; if (dirty_cbufs) *dirty_cbufs |= 1 << i; rctx->set_atom_dirty(rctx, fb_state, true); diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index ab2523a24b7..a2264363db9 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -326,7 +326,7 @@ void si_decompress_color_textures(struct si_context *sctx, assert(view); tex = (struct r600_texture *)view->texture; - assert(tex->cmask.size || tex->fmask.size); + assert(tex->cmask.size || tex->fmask.size || tex->surface.dcc_enabled); si_blit_decompress_color(&sctx->b.b, tex, view->u.tex.first_level, view->u.tex.last_level, @@ -455,7 +455,7 @@ static void si_decompress_subresource(struct pipe_context *ctx, si_blit_decompress_depth_in_place(sctx, rtex, true, level, level, first_layer, last_layer); - } else if (rtex->fmask.size || rtex->cmask.size) { + } else if (rtex->fmask.size || rtex->cmask.size || rtex->surface.dcc_enabled) { si_blit_decompress_color(ctx, rtex, level, level, first_layer, last_layer); } diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 5548cba3a24..a8ff6f27319 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -234,7 +234,8 @@ static void si_set_sampler_views(struct pipe_context *ctx, } else { samplers->depth_texture_mask &= ~(1 << slot); } - if (rtex->cmask.size || rtex->fmask.size) { + if (rtex->cmask.size || rtex->fmask.size || + (rtex->dcc_buffer && rtex->dirty_level_mask)) { samplers->compressed_colortex_mask |= 1 << slot; } else { samplers->compressed_colortex_mask &= ~(1 << slot); -- cgit v1.2.3