summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2016-10-11 23:19:46 +0200
committerMarek Olšák <[email protected]>2016-10-13 19:00:51 +0200
commitd4d9ec55c589156df4edc227a86b4a8c41048d58 (patch)
tree646cdd6806f7a311c7e8a1403d5e715a79386af7 /src/gallium/drivers
parenta077185ea9d685967844b68aa09da6bd8aa430da (diff)
radeonsi: implement TC-compatible HTILE
so that decompress blits aren't needed and depth texturing needs less memory bandwidth. Z16 and Z24 are promoted to Z32_FLOAT by the driver, because TC-compatible HTILE only supports Z32_FLOAT. This doubles memory footprint for Z16. The format promotion is not visible to state trackers. This is part of TC-compatible renderbuffer compression, which has 3 parts: DCC, HTILE, FMASK. Only TC-compatible FMASK compression is missing now. I don't see a measurable increase in performance though. (I tested Talos Principle and DiRT: Showdown, the latter is improved by 0.5%, which is almost noise, and it originally used layered Z16, so at least we know that Z16 promoted to Z32F isn't slower now) Tested-by: Edmondo Tommasina <[email protected]> Reviewed-by: Nicolai Hähnle <[email protected]>
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.h3
-rw-r--r--src/gallium/drivers/radeon/r600_texture.c67
-rw-r--r--src/gallium/drivers/radeon/radeon_winsys.h4
-rw-r--r--src/gallium/drivers/radeonsi/si_blit.c11
-rw-r--r--src/gallium/drivers/radeonsi/si_descriptors.c7
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c18
-rw-r--r--src/gallium/drivers/radeonsi/si_state.c39
-rw-r--r--src/gallium/drivers/radeonsi/si_state_draw.c3
8 files changed, 132 insertions, 20 deletions
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index 290b228b73f..5cfcad688c4 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -245,6 +245,7 @@ struct r600_htile_info {
unsigned height;
unsigned xalign;
unsigned yalign;
+ unsigned alignment;
};
struct r600_texture {
@@ -252,6 +253,7 @@ struct r600_texture {
uint64_t size;
unsigned num_level0_transfers;
+ enum pipe_format db_render_format;
bool is_depth;
bool db_compatible;
bool can_sample_z;
@@ -273,6 +275,7 @@ struct r600_texture {
/* Depth buffer compression and fast clear. */
struct r600_htile_info htile;
struct r600_resource *htile_buffer;
+ bool tc_compatible_htile;
bool depth_cleared; /* if it was cleared at least once */
float depth_clear_value;
bool stencil_cleared; /* if it was cleared at least once */
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index 57cdbcf615d..625d091fe40 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -192,7 +192,8 @@ static int r600_init_surface(struct r600_common_screen *rscreen,
struct radeon_surf *surface,
const struct pipe_resource *ptex,
unsigned array_mode,
- bool is_flushed_depth)
+ bool is_flushed_depth,
+ bool tc_compatible_htile)
{
const struct util_format_description *desc =
util_format_description(ptex->format);
@@ -256,11 +257,22 @@ static int r600_init_surface(struct r600_common_screen *rscreen,
if (!is_flushed_depth && is_depth) {
surface->flags |= RADEON_SURF_ZBUFFER;
+ if (tc_compatible_htile &&
+ array_mode == RADEON_SURF_MODE_2D) {
+ /* TC-compatible HTILE only supports Z32_FLOAT.
+ * Promote Z16 to Z32. DB->CB copies will convert
+ * the format for transfers.
+ */
+ surface->bpe = 4;
+ surface->flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
+ }
+
if (is_stencil) {
surface->flags |= RADEON_SURF_SBUFFER |
RADEON_SURF_HAS_SBUFFER_MIPTREE;
}
}
+
if (rscreen->chip_class >= SI) {
surface->flags |= RADEON_SURF_HAS_TILE_MODE_INDEX;
}
@@ -904,6 +916,7 @@ static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen,
rtex->htile.height = height;
rtex->htile.xalign = cl_width * 8;
rtex->htile.yalign = cl_height * 8;
+ rtex->htile.alignment = base_align;
return (util_max_layer(&rtex->resource.b.b, 0) + 1) *
align(slice_bytes, base_align);
@@ -912,21 +925,34 @@ static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen,
static void r600_texture_allocate_htile(struct r600_common_screen *rscreen,
struct r600_texture *rtex)
{
- unsigned htile_size = r600_texture_get_htile_size(rscreen, rtex);
+ uint64_t htile_size, alignment;
+ uint32_t clear_value;
+
+ if (rtex->tc_compatible_htile) {
+ htile_size = rtex->surface.htile_size;
+ alignment = rtex->surface.htile_alignment;
+ clear_value = 0x0000030F;
+ } else {
+ htile_size = r600_texture_get_htile_size(rscreen, rtex);
+ alignment = rtex->htile.alignment;
+ clear_value = 0;
+ }
if (!htile_size)
return;
rtex->htile_buffer = (struct r600_resource*)
- pipe_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM,
- PIPE_USAGE_DEFAULT, htile_size);
+ r600_aligned_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM,
+ PIPE_USAGE_DEFAULT,
+ htile_size, alignment);
if (rtex->htile_buffer == NULL) {
/* this is not a fatal error as we can still keep rendering
* without htile buffer */
R600_ERR("Failed to create buffer object for htile buffer.\n");
} else {
- r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b, 0,
- htile_size, 0, R600_COHERENCY_NONE);
+ r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b,
+ 0, htile_size, clear_value,
+ R600_COHERENCY_NONE);
}
}
@@ -967,10 +993,11 @@ void r600_print_texture_info(struct r600_texture *rtex, FILE *f)
if (rtex->htile_buffer)
fprintf(f, " HTile: size=%u, alignment=%u, pitch=%u, height=%u, "
- "xalign=%u, yalign=%u\n",
+ "xalign=%u, yalign=%u, TC_compatible = %u\n",
rtex->htile_buffer->b.b.width0,
rtex->htile_buffer->buf->alignment, rtex->htile.pitch,
- rtex->htile.height, rtex->htile.xalign, rtex->htile.yalign);
+ rtex->htile.height, rtex->htile.xalign, rtex->htile.yalign,
+ rtex->tc_compatible_htile);
if (rtex->dcc_offset) {
fprintf(f, " DCC: offset=%"PRIu64", size=%"PRIu64", alignment=%"PRIu64"\n",
@@ -1054,6 +1081,16 @@ r600_texture_create_object(struct pipe_screen *screen,
return NULL;
}
+ rtex->tc_compatible_htile = rtex->surface.htile_size != 0;
+ assert(!!(rtex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE) ==
+ rtex->tc_compatible_htile);
+
+ /* TC-compatible HTILE only supports Z32_FLOAT. */
+ if (rtex->tc_compatible_htile)
+ rtex->db_render_format = PIPE_FORMAT_Z32_FLOAT;
+ else
+ rtex->db_render_format = base->format;
+
/* Tiled depth textures utilize the non-displayable tile order.
* This must be done after r600_setup_surface.
* Applies to R600-Cayman. */
@@ -1241,11 +1278,20 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
{
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
struct radeon_surf surface = {0};
+ bool is_flushed_depth = templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH;
+ bool tc_compatible_htile =
+ rscreen->chip_class >= VI &&
+ (templ->flags & PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY) &&
+ !(rscreen->debug_flags & DBG_NO_HYPERZ) &&
+ !is_flushed_depth &&
+ templ->nr_samples <= 1 && /* TC-compat HTILE is less efficient with MSAA */
+ util_format_is_depth_or_stencil(templ->format);
+
int r;
r = r600_init_surface(rscreen, &surface, templ,
r600_choose_tiling(rscreen, templ),
- templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH);
+ is_flushed_depth, tc_compatible_htile);
if (r) {
return NULL;
}
@@ -1296,7 +1342,8 @@ static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen
else
array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
- r = r600_init_surface(rscreen, &surface, templ, array_mode, false);
+ r = r600_init_surface(rscreen, &surface, templ, array_mode,
+ false, false);
if (r) {
return NULL;
}
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h
index 7146737c826..8946209d337 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -278,6 +278,7 @@ enum radeon_feature_id {
#define RADEON_SURF_HAS_TILE_MODE_INDEX (1 << 20)
#define RADEON_SURF_FMASK (1 << 21)
#define RADEON_SURF_DISABLE_DCC (1 << 22)
+#define RADEON_SURF_TC_COMPATIBLE_HTILE (1 << 23)
#define RADEON_SURF_GET(v, field) (((v) >> RADEON_SURF_ ## field ## _SHIFT) & RADEON_SURF_ ## field ## _MASK)
#define RADEON_SURF_SET(v, field) (((v) & RADEON_SURF_ ## field ## _MASK) << RADEON_SURF_ ## field ## _SHIFT)
@@ -344,6 +345,9 @@ struct radeon_surf {
uint64_t dcc_size;
uint64_t dcc_alignment;
+ /* TC-compatible HTILE only. */
+ uint64_t htile_size;
+ uint64_t htile_alignment;
};
struct radeon_bo_list_item {
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index c143601d55c..db41f565a94 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -332,6 +332,8 @@ si_flush_depth_texture(struct si_context *sctx,
}
}
+ assert(!tex->tc_compatible_htile || levels_z == 0);
+
/* We may have to allocate the flushed texture here when called from
* si_decompress_subresource.
*/
@@ -699,7 +701,10 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
zsbuf->u.tex.level == 0 &&
zsbuf->u.tex.first_layer == 0 &&
zsbuf->u.tex.last_layer == util_max_layer(&zstex->resource.b.b, 0)) {
- if (buffers & PIPE_CLEAR_DEPTH) {
+ /* TC-compatible HTILE only supports depth clears to 0 or 1. */
+ if (buffers & PIPE_CLEAR_DEPTH &&
+ (!zstex->tc_compatible_htile ||
+ depth == 0 || depth == 1)) {
/* Need to disable EXPCLEAR temporarily if clearing
* to a new value. */
if (!zstex->depth_cleared || zstex->depth_clear_value != depth) {
@@ -713,7 +718,9 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
si_mark_atom_dirty(sctx, &sctx->db_render_state);
}
- if (buffers & PIPE_CLEAR_STENCIL) {
+ /* TC-compatible HTILE only supports stencil clears to 0. */
+ if (buffers & PIPE_CLEAR_STENCIL &&
+ (!zstex->tc_compatible_htile || stencil == 0)) {
stencil &= 0xff;
/* Need to disable EXPCLEAR temporarily if clearing
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index 350242aeed5..19cae65e75e 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -399,6 +399,9 @@ void si_set_mutable_tex_desc_fields(struct r600_texture *tex,
state[7] = ((!tex->dcc_separate_buffer ? tex->resource.gpu_address : 0) +
tex->dcc_offset +
base_level_info->dcc_offset) >> 8;
+ } else if (tex->tc_compatible_htile) {
+ state[6] |= S_008F28_COMPRESSION_EN(1);
+ state[7] = tex->htile_buffer->gpu_address >> 8;
}
}
@@ -508,8 +511,10 @@ static void si_set_sampler_views(struct pipe_context *ctx,
if (views[i]->texture && views[i]->texture->target != PIPE_BUFFER) {
struct r600_texture *rtex =
(struct r600_texture*)views[i]->texture;
+ struct si_sampler_view *rview = (struct si_sampler_view *)views[i];
- if (rtex->db_compatible) {
+ if (rtex->db_compatible &&
+ (!rtex->tc_compatible_htile || rview->is_stencil_sampler)) {
samplers->depth_texture_mask |= 1u << slot;
} else {
samplers->depth_texture_mask &= ~(1u << slot);
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index fc50205633d..b2d76994996 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -4607,12 +4607,26 @@ static void tex_fetch_args(
/* Pack depth comparison value */
if (tgsi_is_shadow_target(target) && opcode != TGSI_OPCODE_LODQ) {
+ LLVMValueRef z;
+
if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
- address[count++] = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
+ z = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
} else {
assert(ref_pos >= 0);
- address[count++] = coords[ref_pos];
+ z = coords[ref_pos];
}
+
+ /* TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT,
+ * so the depth comparison value isn't clamped for Z16 and
+ * Z24 anymore. Do it manually here.
+ *
+ * It's unnecessary if the original texture format was
+ * Z32_FLOAT, but we don't know that here.
+ */
+ if (ctx->screen->b.chip_class == VI)
+ z = radeon_llvm_saturate(bld_base, z);
+
+ address[count++] = z;
}
/* Pack user derivatives */
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index ad65fc22f60..b23749c6d89 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -686,6 +686,9 @@ static void si_update_poly_offset_state(struct si_context *sctx)
if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf)
return;
+ /* Use the user format, not db_render_format, so that the polygon
+ * offset behaves as expected by applications.
+ */
switch (sctx->framebuffer.state.zsbuf->texture->format) {
case PIPE_FORMAT_Z16_UNORM:
si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[0]);
@@ -2140,7 +2143,7 @@ static void si_init_depth_surface(struct si_context *sctx,
uint64_t z_offs, s_offs;
uint32_t db_htile_data_base, db_htile_surface;
- format = si_translate_dbformat(rtex->resource.b.b.format);
+ format = si_translate_dbformat(rtex->db_render_format);
if (format == V_028040_Z_INVALID) {
R600_ERR("Invalid DB format: %d, disabling DB.\n", rtex->resource.b.b.format);
@@ -2151,7 +2154,7 @@ static void si_init_depth_surface(struct si_context *sctx,
z_offs += rtex->surface.level[level].offset;
s_offs += rtex->surface.stencil_level[level].offset;
- db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
+ db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!rtex->tc_compatible_htile);
z_info = S_028040_FORMAT(format);
if (rtex->resource.b.b.nr_samples > 1) {
@@ -2208,13 +2211,37 @@ static void si_init_depth_surface(struct si_context *sctx,
*/
if (rtex->resource.b.b.nr_samples <= 1)
s_info |= S_028044_ALLOW_EXPCLEAR(1);
- } else
- /* Use all of the htile_buffer for depth if there's no stencil. */
+ } else if (!rtex->tc_compatible_htile) {
+ /* Use all of the htile_buffer for depth if there's no stencil.
+ * This must not be set when TC-compatible HTILE is enabled
+ * due to a hw bug.
+ */
s_info |= S_028044_TILE_STENCIL_DISABLE(1);
+ }
uint64_t va = rtex->htile_buffer->gpu_address;
db_htile_data_base = va >> 8;
db_htile_surface = S_028ABC_FULL_CACHE(1);
+
+ if (rtex->tc_compatible_htile) {
+ db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
+
+ switch (rtex->resource.b.b.nr_samples) {
+ case 0:
+ case 1:
+ z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5);
+ break;
+ case 2:
+ case 4:
+ z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3);
+ break;
+ case 8:
+ z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(2);
+ break;
+ default:
+ assert(0);
+ }
+ }
} else {
db_htile_data_base = 0;
db_htile_surface = 0;
@@ -2356,6 +2383,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
if (state->zsbuf) {
surf = (struct r600_surface*)state->zsbuf;
+ rtex = (struct r600_texture*)surf->base.texture;
if (!surf->depth_initialized) {
si_init_depth_surface(sctx, surf);
@@ -3021,6 +3049,9 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
surflevel = tmp->surface.level;
if (tmp->db_compatible) {
+ if (!view->is_stencil_sampler)
+ pipe_format = tmp->db_render_format;
+
switch (pipe_format) {
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
pipe_format = PIPE_FORMAT_Z32_FLOAT;
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index c14e852bec2..d18137b6691 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -1118,7 +1118,8 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
struct pipe_surface *surf = sctx->framebuffer.state.zsbuf;
struct r600_texture *rtex = (struct r600_texture *)surf->texture;
- rtex->dirty_level_mask |= 1 << surf->u.tex.level;
+ if (!rtex->tc_compatible_htile)
+ rtex->dirty_level_mask |= 1 << surf->u.tex.level;
if (rtex->surface.flags & RADEON_SURF_SBUFFER)
rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level;