summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
authorRob Clark <[email protected]>2019-05-03 13:39:45 -0700
committerRob Clark <[email protected]>2019-05-04 11:50:44 -0700
commit11583dc65536f274db68cc5c8cb1a0d7007b0201 (patch)
tree9e3125c18ea624feadbf5601ce4c0ee539dec9a9 /src/gallium
parent857d9f3b02c95713d845c54bc6cc1faf877095bb (diff)
freedreno/a6xx: UBWC support for images
There are still some fallbacks we'll need to handle before we can enable UBWC by default. I think we may need to fallback to uncompressed if image atomic operations are used. And we still need to sort out how to handle image and sampler views of compressed resources if the image/ sampler view is using a format that does not support compression. (I think the latter should hopefully be uncommon outside of deqp/piglit.) But at least this gets us to the point where supertuxkart works properly with UBWC enabled ;-) Signed-off-by: Rob Clark <[email protected]>
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/freedreno/a6xx/fd6_image.c63
-rw-r--r--src/gallium/drivers/freedreno/freedreno_resource.c13
2 files changed, 57 insertions, 19 deletions
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_image.c b/src/gallium/drivers/freedreno/a6xx/fd6_image.c
index 9c1182777c0..a38b78907d3 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_image.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_image.c
@@ -47,6 +47,7 @@ struct fd6_image {
uint32_t pitch;
uint32_t array_pitch;
struct fd_bo *bo;
+ uint32_t ubwc_offset;
uint32_t offset;
bool buffer;
};
@@ -77,6 +78,7 @@ static void translate_image(struct fd6_image *img, const struct pipe_image_view
if (prsc->target == PIPE_BUFFER) {
img->buffer = true;
+ img->ubwc_offset = 0; /* not valid for buffers */
img->offset = pimg->u.buf.offset;
img->pitch = 0;
img->array_pitch = 0;
@@ -94,7 +96,8 @@ static void translate_image(struct fd6_image *img, const struct pipe_image_view
unsigned lvl = pimg->u.tex.level;
unsigned layers = pimg->u.tex.last_layer - pimg->u.tex.first_layer + 1;
- img->offset = fd_resource_offset(rsc, lvl, pimg->u.tex.first_layer);
+ img->ubwc_offset = rsc->ubwc_offset; // TODO helper
+ img->offset = fd_resource_offset(rsc, lvl, pimg->u.tex.first_layer) + rsc->offset;
img->pitch = rsc->slices[lvl].pitch * rsc->cpp;
switch (prsc->target) {
@@ -148,6 +151,7 @@ static void translate_buf(struct fd6_image *img, const struct pipe_shader_buffer
img->bo = rsc->bo;
img->buffer = true;
+ img->ubwc_offset = 0; /* not valid for buffers */
img->offset = pimg->buffer_offset;
img->pitch = 0;
img->array_pitch = 0;
@@ -163,6 +167,10 @@ static void translate_buf(struct fd6_image *img, const struct pipe_shader_buffer
static void emit_image_tex(struct fd_ringbuffer *ring, struct fd6_image *img)
{
+ struct fd_resource *rsc = fd_resource(img->prsc);
+ bool ubwc_enabled = rsc->ubwc_size &&
+ !fd_resource_level_linear(img->prsc, img->level);
+
OUT_RING(ring, fd6_tex_const_0(img->prsc, img->level, img->pfmt,
PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W));
@@ -172,7 +180,8 @@ static void emit_image_tex(struct fd_ringbuffer *ring, struct fd6_image *img)
COND(img->buffer, A6XX_TEX_CONST_2_UNK4 | A6XX_TEX_CONST_2_UNK31) |
A6XX_TEX_CONST_2_TYPE(img->type) |
A6XX_TEX_CONST_2_PITCH(img->pitch));
- OUT_RING(ring, A6XX_TEX_CONST_3_ARRAY_PITCH(img->array_pitch));
+ OUT_RING(ring, A6XX_TEX_CONST_3_ARRAY_PITCH(img->array_pitch) |
+ COND(ubwc_enabled, A6XX_TEX_CONST_3_FLAG | A6XX_TEX_CONST_3_UNK27));
if (img->bo) {
OUT_RELOC(ring, img->bo, img->offset,
(uint64_t)A6XX_TEX_CONST_5_DEPTH(img->depth) << 32, 0);
@@ -180,16 +189,25 @@ static void emit_image_tex(struct fd_ringbuffer *ring, struct fd6_image *img)
OUT_RING(ring, 0x00000000);
OUT_RING(ring, A6XX_TEX_CONST_5_DEPTH(img->depth));
}
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
+
+ OUT_RING(ring, 0x00000000); /* texconst6 */
+
+ if (ubwc_enabled) {
+ OUT_RELOC(ring, rsc->bo, img->ubwc_offset, 0, 0);
+ OUT_RING(ring, A6XX_TEX_CONST_9_FLAG_BUFFER_ARRAY_PITCH(rsc->ubwc_size));
+ OUT_RING(ring, A6XX_TEX_CONST_10_FLAG_BUFFER_PITCH(rsc->ubwc_pitch));
+ } else {
+ OUT_RING(ring, 0x00000000); /* texconst7 */
+ OUT_RING(ring, 0x00000000); /* texconst8 */
+ OUT_RING(ring, 0x00000000); /* texconst9 */
+ OUT_RING(ring, 0x00000000); /* texconst10 */
+ }
+
+ OUT_RING(ring, 0x00000000); /* texconst11 */
+ OUT_RING(ring, 0x00000000); /* texconst12 */
+ OUT_RING(ring, 0x00000000); /* texconst13 */
+ OUT_RING(ring, 0x00000000); /* texconst14 */
+ OUT_RING(ring, 0x00000000); /* texconst15 */
}
void
@@ -212,6 +230,8 @@ static void emit_image_ssbo(struct fd_ringbuffer *ring, struct fd6_image *img)
{
struct fd_resource *rsc = fd_resource(img->prsc);
enum a6xx_tile_mode tile_mode = TILE6_LINEAR;
+ bool ubwc_enabled = rsc->ubwc_size &&
+ !fd_resource_level_linear(img->prsc, img->level);
if (rsc->tile_mode && !fd_resource_level_linear(img->prsc, img->level)) {
tile_mode = rsc->tile_mode;
@@ -224,7 +244,8 @@ static void emit_image_ssbo(struct fd_ringbuffer *ring, struct fd6_image *img)
OUT_RING(ring, A6XX_IBO_2_PITCH(img->pitch) |
COND(img->buffer, A6XX_IBO_2_UNK4 | A6XX_IBO_2_UNK31) |
A6XX_IBO_2_TYPE(img->type));
- OUT_RING(ring, A6XX_IBO_3_ARRAY_PITCH(img->array_pitch));
+ OUT_RING(ring, A6XX_IBO_3_ARRAY_PITCH(img->array_pitch) |
+ COND(ubwc_enabled, A6XX_IBO_3_FLAG | A6XX_IBO_3_UNK27));
if (img->bo) {
OUT_RELOCW(ring, img->bo, img->offset,
(uint64_t)A6XX_IBO_5_DEPTH(img->depth) << 32, 0);
@@ -233,10 +254,18 @@ static void emit_image_ssbo(struct fd_ringbuffer *ring, struct fd6_image *img)
OUT_RING(ring, A6XX_IBO_5_DEPTH(img->depth));
}
OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
+
+ if (ubwc_enabled) {
+ OUT_RELOCW(ring, rsc->bo, img->ubwc_offset, 0, 0);
+ OUT_RING(ring, A6XX_IBO_9_FLAG_BUFFER_ARRAY_PITCH(rsc->ubwc_size));
+ OUT_RING(ring, A6XX_IBO_10_FLAG_BUFFER_PITCH(rsc->ubwc_pitch));
+ } else {
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ }
+
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c
index c7436d74da0..1f0bb040c24 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.c
+++ b/src/gallium/drivers/freedreno/freedreno_resource.c
@@ -911,8 +911,17 @@ fd_resource_create_with_modifiers(struct pipe_screen *pscreen,
allow_ubwc = drm_find_modifier(DRM_FORMAT_MOD_QCOM_COMPRESSED, modifiers, count);
/* TODO turn on UBWC for all internal buffers
- * Manhattan benchmark shows artifacts when enabled. Once this
- * is fixed the following line can be removed.
+ *
+ * There are still some regressions in deqp with UBWC enabled. I
+ * think it is mostly related to sampler/image views using a format
+ * that doesn't support compression with a resource created with
+ * a format that does. We need to track the compression state of
+ * a buffer and do an (in-place, hopefully?) resolve if it is re-
+ * interpreted with a format that does not support compression.
+ *
+ * It is possible (likely?) that we can't do atomic ops on a
+ * compressed buffer as well, so this would also require transition
+ * to a compressed state.
*/
allow_ubwc &= !!(fd_mesa_debug & FD_DBG_UBWC);