summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/radeon
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2016-10-11 23:19:46 +0200
committerMarek Olšák <[email protected]>2016-10-13 19:00:51 +0200
commitd4d9ec55c589156df4edc227a86b4a8c41048d58 (patch)
tree646cdd6806f7a311c7e8a1403d5e715a79386af7 /src/gallium/drivers/radeon
parenta077185ea9d685967844b68aa09da6bd8aa430da (diff)
radeonsi: implement TC-compatible HTILE
so that decompress blits aren't needed and depth texturing needs less memory bandwidth. Z16 and Z24 are promoted to Z32_FLOAT by the driver, because TC-compatible HTILE only supports Z32_FLOAT. This doubles memory footprint for Z16. The format promotion is not visible to state trackers. This is part of TC-compatible renderbuffer compression, which has 3 parts: DCC, HTILE, FMASK. Only TC-compatible FMASK compression is missing now. I don't see a measurable increase in performance though. (I tested Talos Principle and DiRT: Showdown, the latter is improved by 0.5%, which is almost noise, and it originally used layered Z16, so at least we know that Z16 promoted to Z32F isn't slower now) Tested-by: Edmondo Tommasina <[email protected]> Reviewed-by: Nicolai Hähnle <[email protected]>
Diffstat (limited to 'src/gallium/drivers/radeon')
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.h3
-rw-r--r--src/gallium/drivers/radeon/r600_texture.c67
-rw-r--r--src/gallium/drivers/radeon/radeon_winsys.h4
3 files changed, 64 insertions, 10 deletions
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index 290b228b73f..5cfcad688c4 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -245,6 +245,7 @@ struct r600_htile_info {
unsigned height;
unsigned xalign;
unsigned yalign;
+ unsigned alignment;
};
struct r600_texture {
@@ -252,6 +253,7 @@ struct r600_texture {
uint64_t size;
unsigned num_level0_transfers;
+ enum pipe_format db_render_format;
bool is_depth;
bool db_compatible;
bool can_sample_z;
@@ -273,6 +275,7 @@ struct r600_texture {
/* Depth buffer compression and fast clear. */
struct r600_htile_info htile;
struct r600_resource *htile_buffer;
+ bool tc_compatible_htile;
bool depth_cleared; /* if it was cleared at least once */
float depth_clear_value;
bool stencil_cleared; /* if it was cleared at least once */
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index 57cdbcf615d..625d091fe40 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -192,7 +192,8 @@ static int r600_init_surface(struct r600_common_screen *rscreen,
struct radeon_surf *surface,
const struct pipe_resource *ptex,
unsigned array_mode,
- bool is_flushed_depth)
+ bool is_flushed_depth,
+ bool tc_compatible_htile)
{
const struct util_format_description *desc =
util_format_description(ptex->format);
@@ -256,11 +257,22 @@ static int r600_init_surface(struct r600_common_screen *rscreen,
if (!is_flushed_depth && is_depth) {
surface->flags |= RADEON_SURF_ZBUFFER;
+ if (tc_compatible_htile &&
+ array_mode == RADEON_SURF_MODE_2D) {
+ /* TC-compatible HTILE only supports Z32_FLOAT.
+ * Promote Z16 to Z32. DB->CB copies will convert
+ * the format for transfers.
+ */
+ surface->bpe = 4;
+ surface->flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
+ }
+
if (is_stencil) {
surface->flags |= RADEON_SURF_SBUFFER |
RADEON_SURF_HAS_SBUFFER_MIPTREE;
}
}
+
if (rscreen->chip_class >= SI) {
surface->flags |= RADEON_SURF_HAS_TILE_MODE_INDEX;
}
@@ -904,6 +916,7 @@ static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen,
rtex->htile.height = height;
rtex->htile.xalign = cl_width * 8;
rtex->htile.yalign = cl_height * 8;
+ rtex->htile.alignment = base_align;
return (util_max_layer(&rtex->resource.b.b, 0) + 1) *
align(slice_bytes, base_align);
@@ -912,21 +925,34 @@ static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen,
static void r600_texture_allocate_htile(struct r600_common_screen *rscreen,
struct r600_texture *rtex)
{
- unsigned htile_size = r600_texture_get_htile_size(rscreen, rtex);
+ uint64_t htile_size, alignment;
+ uint32_t clear_value;
+
+ if (rtex->tc_compatible_htile) {
+ htile_size = rtex->surface.htile_size;
+ alignment = rtex->surface.htile_alignment;
+ clear_value = 0x0000030F;
+ } else {
+ htile_size = r600_texture_get_htile_size(rscreen, rtex);
+ alignment = rtex->htile.alignment;
+ clear_value = 0;
+ }
if (!htile_size)
return;
rtex->htile_buffer = (struct r600_resource*)
- pipe_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM,
- PIPE_USAGE_DEFAULT, htile_size);
+ r600_aligned_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM,
+ PIPE_USAGE_DEFAULT,
+ htile_size, alignment);
if (rtex->htile_buffer == NULL) {
/* this is not a fatal error as we can still keep rendering
* without htile buffer */
R600_ERR("Failed to create buffer object for htile buffer.\n");
} else {
- r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b, 0,
- htile_size, 0, R600_COHERENCY_NONE);
+ r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b,
+ 0, htile_size, clear_value,
+ R600_COHERENCY_NONE);
}
}
@@ -967,10 +993,11 @@ void r600_print_texture_info(struct r600_texture *rtex, FILE *f)
if (rtex->htile_buffer)
fprintf(f, " HTile: size=%u, alignment=%u, pitch=%u, height=%u, "
- "xalign=%u, yalign=%u\n",
+ "xalign=%u, yalign=%u, TC_compatible = %u\n",
rtex->htile_buffer->b.b.width0,
rtex->htile_buffer->buf->alignment, rtex->htile.pitch,
- rtex->htile.height, rtex->htile.xalign, rtex->htile.yalign);
+ rtex->htile.height, rtex->htile.xalign, rtex->htile.yalign,
+ rtex->tc_compatible_htile);
if (rtex->dcc_offset) {
fprintf(f, " DCC: offset=%"PRIu64", size=%"PRIu64", alignment=%"PRIu64"\n",
@@ -1054,6 +1081,16 @@ r600_texture_create_object(struct pipe_screen *screen,
return NULL;
}
+ rtex->tc_compatible_htile = rtex->surface.htile_size != 0;
+ assert(!!(rtex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE) ==
+ rtex->tc_compatible_htile);
+
+ /* TC-compatible HTILE only supports Z32_FLOAT. */
+ if (rtex->tc_compatible_htile)
+ rtex->db_render_format = PIPE_FORMAT_Z32_FLOAT;
+ else
+ rtex->db_render_format = base->format;
+
/* Tiled depth textures utilize the non-displayable tile order.
* This must be done after r600_setup_surface.
* Applies to R600-Cayman. */
@@ -1241,11 +1278,20 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
{
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
struct radeon_surf surface = {0};
+ bool is_flushed_depth = templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH;
+ bool tc_compatible_htile =
+ rscreen->chip_class >= VI &&
+ (templ->flags & PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY) &&
+ !(rscreen->debug_flags & DBG_NO_HYPERZ) &&
+ !is_flushed_depth &&
+ templ->nr_samples <= 1 && /* TC-compat HTILE is less efficient with MSAA */
+ util_format_is_depth_or_stencil(templ->format);
+
int r;
r = r600_init_surface(rscreen, &surface, templ,
r600_choose_tiling(rscreen, templ),
- templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH);
+ is_flushed_depth, tc_compatible_htile);
if (r) {
return NULL;
}
@@ -1296,7 +1342,8 @@ static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen
else
array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
- r = r600_init_surface(rscreen, &surface, templ, array_mode, false);
+ r = r600_init_surface(rscreen, &surface, templ, array_mode,
+ false, false);
if (r) {
return NULL;
}
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h
index 7146737c826..8946209d337 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -278,6 +278,7 @@ enum radeon_feature_id {
#define RADEON_SURF_HAS_TILE_MODE_INDEX (1 << 20)
#define RADEON_SURF_FMASK (1 << 21)
#define RADEON_SURF_DISABLE_DCC (1 << 22)
+#define RADEON_SURF_TC_COMPATIBLE_HTILE (1 << 23)
#define RADEON_SURF_GET(v, field) (((v) >> RADEON_SURF_ ## field ## _SHIFT) & RADEON_SURF_ ## field ## _MASK)
#define RADEON_SURF_SET(v, field) (((v) & RADEON_SURF_ ## field ## _MASK) << RADEON_SURF_ ## field ## _SHIFT)
@@ -344,6 +345,9 @@ struct radeon_surf {
uint64_t dcc_size;
uint64_t dcc_alignment;
+ /* TC-compatible HTILE only. */
+ uint64_t htile_size;
+ uint64_t htile_alignment;
};
struct radeon_bo_list_item {