aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorAlyssa Rosenzweig <[email protected]>2020-07-09 13:42:25 -0400
committerMarge Bot <[email protected]>2020-07-16 15:10:55 +0000
commit293f251871b2fc7fd40d0fcabec4dd8a8324bc47 (patch)
tree628ac4a83448f365ac83e37bd4723c8851144876 /src
parentf611af35948e4d1d56daa94f59d5feb7d44d24ce (diff)
panfrost: Use Midgard-specific reloads
v2: Be more explicit about sampler types. Prefer the term "load" to "resolve" to match VK convention. Generate shaders for MRT 8x. Blit shader generation adds about 6ms to startup cost. We could cache thes. shaders to disk if we needed to (or indeed, ship binaries). v3: Fallback on u_blitter on Bifrost so Bifrost continues to work. KHR_partial_update support is mostly no-oped on Bifrost now, but that's okay for now - compositors are still functional. v4: Specialize on multisample state as well to enable reloads of MSAA textures. This requires 2x the shader variants, so I assume we're up to 12ms startup cost for generation. Annoying. Also fix interactions with depth- or stencil-only clears of combined depth-stencil surfaces. v5: Cache to the device (screen) instead of the context, reducing duplicated work in apps that create many contexts (e.g. Chromium) v6: Squash in KHR_partial_update cleanup to fix intermediate regressions on a few tests. Signed-off-by: Alyssa Rosenzweig <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5824>
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/panfrost/pan_context.c5
-rw-r--r--src/gallium/drivers/panfrost/pan_context.h2
-rw-r--r--src/gallium/drivers/panfrost/pan_job.c240
-rw-r--r--src/gallium/drivers/panfrost/pan_mfbd.c4
-rw-r--r--src/gallium/drivers/panfrost/pan_resource.c69
-rw-r--r--src/gallium/drivers/panfrost/pan_resource.h7
-rw-r--r--src/gallium/drivers/panfrost/pan_screen.c3
-rw-r--r--src/panfrost/Makefile.sources1
-rw-r--r--src/panfrost/encoder/meson.build3
-rw-r--r--src/panfrost/encoder/pan_blit.c370
-rw-r--r--src/panfrost/encoder/pan_device.h18
-rw-r--r--src/panfrost/encoder/pan_pool.h1
-rw-r--r--src/panfrost/encoder/pan_props.c1
-rw-r--r--src/panfrost/encoder/pan_texture.h31
14 files changed, 605 insertions, 150 deletions
diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c
index 4ae8ee31229..e63afd8d9cc 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -1567,6 +1567,11 @@ panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags)
panfrost_batch_init(ctx);
panfrost_invalidate_frame(ctx);
+ if (!(dev->quirks & IS_BIFROST)) {
+ for (unsigned c = 0; c < PIPE_MAX_COLOR_BUFS; ++c)
+ ctx->blit_blend.rt[c].shaders = _mesa_hash_table_u64_create(ctx);
+ }
+
/* By default mask everything on */
ctx->sample_mask = ~0;
diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h
index 8e247091c40..7ba1170cdf3 100644
--- a/src/gallium/drivers/panfrost/pan_context.h
+++ b/src/gallium/drivers/panfrost/pan_context.h
@@ -165,6 +165,8 @@ struct panfrost_context {
struct pipe_stencil_ref stencil_ref;
unsigned sample_mask;
unsigned min_samples;
+
+ struct panfrost_blend_state blit_blend;
};
/* Corresponds to the CSO */
diff --git a/src/gallium/drivers/panfrost/pan_job.c b/src/gallium/drivers/panfrost/pan_job.c
index 1da37214ab4..365b4ccdb3a 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -35,6 +35,7 @@
#include "util/u_pack_color.h"
#include "util/rounding.h"
#include "pan_util.h"
+#include "pan_blending.h"
#include "pandecode/decode.h"
#include "panfrost-quirks.h"
@@ -795,35 +796,19 @@ panfrost_batch_reserve_framebuffer(struct panfrost_batch *batch)
static void
-panfrost_batch_draw_wallpaper(struct panfrost_batch *batch)
+panfrost_load_surface(struct panfrost_batch *batch, struct pipe_surface *surf, unsigned loc)
{
- /* Color 0 is cleared, no need to draw the wallpaper.
- * TODO: MRT wallpapers.
- */
- if (batch->clear & PIPE_CLEAR_COLOR0)
- return;
-
- /* Nothing to reload? TODO: MRT wallpapers */
- if (batch->key.cbufs[0] == NULL)
- return;
-
- /* No draw calls, and no clear on the depth/stencil bufs.
- * Drawing the wallpaper would be useless.
- */
- if (!batch->scoreboard.tiler_dep &&
- !(batch->clear & PIPE_CLEAR_DEPTHSTENCIL))
+ if (!surf)
return;
- /* Check if the buffer has any content on it worth preserving */
-
- struct pipe_surface *surf = batch->key.cbufs[0];
struct panfrost_resource *rsrc = pan_resource(surf->texture);
unsigned level = surf->u.tex.level;
if (!rsrc->slices[level].initialized)
return;
- batch->ctx->wallpaper_batch = batch;
+ if (!rsrc->damage.inverted_len)
+ return;
/* Clamp the rendering area to the damage extent. The
* KHR_partial_update() spec states that trying to render outside of
@@ -840,73 +825,152 @@ panfrost_batch_draw_wallpaper(struct panfrost_batch *batch)
rsrc->damage.extent.maxy);
}
- /* FIXME: Looks like aligning on a tile is not enough, but
- * aligning on twice the tile size seems to works. We don't
- * know exactly what happens here but this deserves extra
- * investigation to figure it out.
- */
- batch->minx = batch->minx & ~((MALI_TILE_LENGTH * 2) - 1);
- batch->miny = batch->miny & ~((MALI_TILE_LENGTH * 2) - 1);
- batch->maxx = MIN2(ALIGN_POT(batch->maxx, MALI_TILE_LENGTH * 2),
- rsrc->base.width0);
- batch->maxy = MIN2(ALIGN_POT(batch->maxy, MALI_TILE_LENGTH * 2),
- rsrc->base.height0);
-
- struct pipe_scissor_state damage;
- struct pipe_box rects[4];
-
- /* Clamp the damage box to the rendering area. */
- damage.minx = MAX2(batch->minx, rsrc->damage.biggest_rect.x);
- damage.miny = MAX2(batch->miny, rsrc->damage.biggest_rect.y);
- damage.maxx = MIN2(batch->maxx,
- rsrc->damage.biggest_rect.x +
- rsrc->damage.biggest_rect.width);
- damage.maxx = MAX2(damage.maxx, damage.minx);
- damage.maxy = MIN2(batch->maxy,
- rsrc->damage.biggest_rect.y +
- rsrc->damage.biggest_rect.height);
- damage.maxy = MAX2(damage.maxy, damage.miny);
-
- /* One damage rectangle means we can end up with at most 4 reload
- * regions:
- * 1: left region, only exists if damage.x > 0
- * 2: right region, only exists if damage.x + damage.width < fb->width
- * 3: top region, only exists if damage.y > 0. The intersection with
- * the left and right regions are dropped
- * 4: bottom region, only exists if damage.y + damage.height < fb->height.
- * The intersection with the left and right regions are dropped
- *
- * ____________________________
- * | | 3 | |
- * | |___________| |
- * | | damage | |
- * | 1 | rect | 2 |
- * | |___________| |
- * | | 4 | |
- * |_______|___________|______|
- */
- u_box_2d(batch->minx, batch->miny, damage.minx - batch->minx,
- batch->maxy - batch->miny, &rects[0]);
- u_box_2d(damage.maxx, batch->miny, batch->maxx - damage.maxx,
- batch->maxy - batch->miny, &rects[1]);
- u_box_2d(damage.minx, batch->miny, damage.maxx - damage.minx,
- damage.miny - batch->miny, &rects[2]);
- u_box_2d(damage.minx, damage.maxy, damage.maxx - damage.minx,
- batch->maxy - damage.maxy, &rects[3]);
-
- for (unsigned i = 0; i < 4; i++) {
- /* Width and height are always >= 0 even if width is declared as a
- * signed integer: u_box_2d() helper takes unsigned args and
- * panfrost_set_damage_region() is taking care of clamping
- * negative values.
- */
- if (!rects[i].width || !rects[i].height)
- continue;
+ /* XXX: Native blits on Bifrost */
+ if (batch->pool.dev->quirks & IS_BIFROST) {
+ if (loc != FRAG_RESULT_DATA0)
+ return;
+
+ /* XXX: why align on *twice* the tile length? */
+ batch->minx = batch->minx & ~((MALI_TILE_LENGTH * 2) - 1);
+ batch->miny = batch->miny & ~((MALI_TILE_LENGTH * 2) - 1);
+ batch->maxx = MIN2(ALIGN_POT(batch->maxx, MALI_TILE_LENGTH * 2),
+ rsrc->base.width0);
+ batch->maxy = MIN2(ALIGN_POT(batch->maxy, MALI_TILE_LENGTH * 2),
+ rsrc->base.height0);
+
+ struct pipe_box rect;
+ batch->ctx->wallpaper_batch = batch;
+ u_box_2d(batch->minx, batch->miny, batch->maxx - batch->minx,
+ batch->maxy - batch->miny, &rect);
+ panfrost_blit_wallpaper(batch->ctx, &rect);
+ batch->ctx->wallpaper_batch = NULL;
+ return;
+ }
+
+ enum pipe_format format = rsrc->base.format;
+
+ if (loc == FRAG_RESULT_DEPTH) {
+ if (!util_format_has_depth(util_format_description(format)))
+ return;
+
+ format = util_format_get_depth_only(format);
+ } else if (loc == FRAG_RESULT_STENCIL) {
+ if (!util_format_has_stencil(util_format_description(format)))
+ return;
+
+ if (rsrc->separate_stencil) {
+ rsrc = rsrc->separate_stencil;
+ format = rsrc->base.format;
+ }
+
+ format = util_format_stencil_only(format);
+ }
+
+ enum mali_texture_type type =
+ panfrost_translate_texture_type(rsrc->base.target);
+
+ unsigned nr_samples = surf->nr_samples;
+
+ if (!nr_samples)
+ nr_samples = surf->texture->nr_samples;
+
+ struct pan_image img = {
+ .width0 = rsrc->base.width0,
+ .height0 = rsrc->base.height0,
+ .depth0 = rsrc->base.depth0,
+ .format = format,
+ .type = type,
+ .layout = rsrc->layout,
+ .array_size = rsrc->base.array_size,
+ .first_level = level,
+ .last_level = level,
+ .first_layer = surf->u.tex.first_layer,
+ .last_layer = surf->u.tex.last_layer,
+ .nr_samples = nr_samples,
+ .cubemap_stride = rsrc->cubemap_stride,
+ .bo = rsrc->bo,
+ .slices = rsrc->slices
+ };
- /* Blit the wallpaper in */
- panfrost_blit_wallpaper(batch->ctx, &rects[i]);
+ mali_ptr blend_shader = 0;
+
+ if (loc >= FRAG_RESULT_DATA0 && !panfrost_can_fixed_blend(rsrc->base.format)) {
+ struct panfrost_blend_shader *b =
+ panfrost_get_blend_shader(batch->ctx, &batch->ctx->blit_blend, rsrc->base.format, loc - FRAG_RESULT_DATA0);
+
+ struct panfrost_bo *bo = panfrost_batch_create_bo(batch, b->size,
+ PAN_BO_EXECUTE,
+ PAN_BO_ACCESS_PRIVATE |
+ PAN_BO_ACCESS_READ |
+ PAN_BO_ACCESS_FRAGMENT);
+
+ memcpy(bo->cpu, b->buffer, b->size);
+ assert(b->work_count <= 4);
+
+ blend_shader = bo->gpu | b->first_tag;
+ }
+
+ struct panfrost_transfer transfer = panfrost_pool_alloc(&batch->pool,
+ 4 * 4 * 6 * rsrc->damage.inverted_len);
+
+ for (unsigned i = 0; i < rsrc->damage.inverted_len; ++i) {
+ float *o = (float *) (transfer.cpu + (4 * 4 * 6 * i));
+ struct pan_rect r = rsrc->damage.inverted_rects[i];
+
+ float rect[] = {
+ r.minx, rsrc->base.height0 - r.miny, 0.0, 1.0,
+ r.maxx, rsrc->base.height0 - r.miny, 0.0, 1.0,
+ r.minx, rsrc->base.height0 - r.maxy, 0.0, 1.0,
+
+ r.maxx, rsrc->base.height0 - r.miny, 0.0, 1.0,
+ r.minx, rsrc->base.height0 - r.maxy, 0.0, 1.0,
+ r.maxx, rsrc->base.height0 - r.maxy, 0.0, 1.0,
+ };
+
+ assert(sizeof(rect) == 4 * 4 * 6);
+ memcpy(o, rect, sizeof(rect));
+ }
+
+ panfrost_load_midg(&batch->pool, &batch->scoreboard,
+ blend_shader,
+ batch->framebuffer.gpu, transfer.gpu,
+ rsrc->damage.inverted_len * 6,
+ &img, loc);
+
+ panfrost_batch_add_bo(batch, batch->pool.dev->blit_shaders.bo,
+ PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ | PAN_BO_ACCESS_FRAGMENT);
+}
+
+static void
+panfrost_batch_draw_wallpaper(struct panfrost_batch *batch)
+{
+ panfrost_batch_reserve_framebuffer(batch);
+
+ /* Assume combined. If either depth or stencil is written, they will
+ * both be written so we need to be careful for reloading */
+
+ unsigned draws = batch->draws;
+
+ if (draws & PIPE_CLEAR_DEPTHSTENCIL)
+ draws |= PIPE_CLEAR_DEPTHSTENCIL;
+
+ /* Mask of buffers which need reload since they are not cleared and
+ * they are drawn. (If they are cleared, reload is useless; if they are
+ * not drawn and also not cleared, we can generally omit the attachment
+ * at the framebuffer descriptor level */
+
+ unsigned reload = ~batch->clear & draws;
+
+ for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
+ if (reload & (PIPE_CLEAR_COLOR0 << i))
+ panfrost_load_surface(batch, batch->key.cbufs[i], FRAG_RESULT_DATA0 + i);
}
- batch->ctx->wallpaper_batch = NULL;
+
+ if (reload & PIPE_CLEAR_DEPTH)
+ panfrost_load_surface(batch, batch->key.zsbuf, FRAG_RESULT_DEPTH);
+
+ if (reload & PIPE_CLEAR_STENCIL)
+ panfrost_load_surface(batch, batch->key.zsbuf, FRAG_RESULT_STENCIL);
}
static void
@@ -1086,13 +1150,11 @@ panfrost_batch_submit(struct panfrost_batch *batch)
* it flushed, the easiest solution is to reload everything.
*/
for (unsigned i = 0; i < batch->key.nr_cbufs; i++) {
- struct panfrost_resource *res;
-
if (!batch->key.cbufs[i])
continue;
- res = pan_resource(batch->key.cbufs[i]->texture);
- panfrost_resource_reset_damage(res);
+ panfrost_resource_set_damage_region(NULL,
+ batch->key.cbufs[i]->texture, 0, NULL);
}
out:
diff --git a/src/gallium/drivers/panfrost/pan_mfbd.c b/src/gallium/drivers/panfrost/pan_mfbd.c
index 247dfd1ae18..4d48f2dcac3 100644
--- a/src/gallium/drivers/panfrost/pan_mfbd.c
+++ b/src/gallium/drivers/panfrost/pan_mfbd.c
@@ -527,7 +527,7 @@ panfrost_mfbd_fragment(struct panfrost_batch *batch, bool has_draws)
for (int cb = 0; cb < rt_descriptors; ++cb) {
struct pipe_surface *surf = batch->key.cbufs[cb];
- if (surf) {
+ if (surf && ((batch->clear | batch->draws) & (PIPE_CLEAR_COLOR0 << cb))) {
unsigned nr_samples = surf->nr_samples;
if (!nr_samples)
@@ -564,7 +564,7 @@ panfrost_mfbd_fragment(struct panfrost_batch *batch, bool has_draws)
rts[cb].format.unk1 |= (cb * 0x400);
}
- if (batch->key.zsbuf) {
+ if (batch->key.zsbuf && ((batch->clear | batch->draws) & PIPE_CLEAR_DEPTHSTENCIL)) {
panfrost_mfbd_set_zsbuf(&fb, &fbx, batch->key.zsbuf);
}
diff --git a/src/gallium/drivers/panfrost/pan_resource.c b/src/gallium/drivers/panfrost/pan_resource.c
index b79ebe72850..9440ef7956a 100644
--- a/src/gallium/drivers/panfrost/pan_resource.c
+++ b/src/gallium/drivers/panfrost/pan_resource.c
@@ -50,17 +50,6 @@
#include "pandecode/decode.h"
#include "panfrost-quirks.h"
-void
-panfrost_resource_reset_damage(struct panfrost_resource *pres)
-{
- /* We set the damage extent to the full resource size but keep the
- * damage box empty so that the FB content is reloaded by default.
- */
- memset(&pres->damage, 0, sizeof(pres->damage));
- pres->damage.extent.maxx = pres->base.width0;
- pres->damage.extent.maxy = pres->base.height0;
-}
-
static struct pipe_resource *
panfrost_resource_from_handle(struct pipe_screen *pscreen,
const struct pipe_resource *templat,
@@ -90,7 +79,7 @@ panfrost_resource_from_handle(struct pipe_screen *pscreen,
rsc->slices[0].stride = whandle->stride;
rsc->slices[0].offset = whandle->offset;
rsc->slices[0].initialized = true;
- panfrost_resource_reset_damage(rsc);
+ panfrost_resource_set_damage_region(NULL, &rsc->base, 0, NULL);
if (dev->quirks & IS_BIFROST &&
templat->bind & PIPE_BIND_RENDER_TARGET) {
@@ -441,57 +430,29 @@ panfrost_resource_set_damage_region(struct pipe_screen *screen,
const struct pipe_box *rects)
{
struct panfrost_resource *pres = pan_resource(res);
- struct pipe_box *damage_rect = &pres->damage.biggest_rect;
struct pipe_scissor_state *damage_extent = &pres->damage.extent;
unsigned int i;
- if (!nrects) {
- panfrost_resource_reset_damage(pres);
- return;
- }
-
- /* We keep track of 2 different things here:
- * 1 the damage extent: the quad including all damage regions. Will be
- * used restrict the rendering area
- * 2 the biggest damage rectangle: when there are more than one damage
- * rect we keep the biggest one and will generate 4 wallpaper quads
- * out of it (see panfrost_draw_wallpaper() for more details). We
- * might want to do something smarter at some point.
- *
- * _________________________________
- * | |
- * | _________________________ |
- * | | rect1| _________| |
- * | |______|_____ | rect 3: | |
- * | | | rect2 | | biggest | |
- * | | |_______| | rect | |
- * | |_______________|_________| |
- * | damage extent |
- * |_______________________________|
- * resource
- */
+ if (pres->damage.inverted_rects)
+ ralloc_free(pres->damage.inverted_rects);
+
memset(&pres->damage, 0, sizeof(pres->damage));
+
+ pres->damage.inverted_rects =
+ pan_subtract_damage(pres,
+ res->width0, res->height0,
+ nrects, rects, &pres->damage.inverted_len);
+
+ /* Track the damage extent: the quad including all damage regions. Will
+ * be used restrict the rendering area */
+
damage_extent->minx = 0xffff;
damage_extent->miny = 0xffff;
+
for (i = 0; i < nrects; i++) {
int x = rects[i].x, w = rects[i].width, h = rects[i].height;
int y = res->height0 - (rects[i].y + h);
- /* Clamp x,y,w,h to prevent negative values. */
- if (x < 0) {
- h += x;
- x = 0;
- }
- if (y < 0) {
- w += y;
- y = 0;
- }
- w = MAX2(w, 0);
- h = MAX2(h, 0);
-
- if (damage_rect->width * damage_rect->height < w * h)
- u_box_2d(x, y, w, h, damage_rect);
-
damage_extent->minx = MIN2(damage_extent->minx, x);
damage_extent->miny = MIN2(damage_extent->miny, y);
damage_extent->maxx = MAX2(damage_extent->maxx,
@@ -543,7 +504,7 @@ panfrost_resource_create(struct pipe_screen *screen,
util_range_init(&so->valid_buffer_range);
panfrost_resource_create_bo(dev, so);
- panfrost_resource_reset_damage(so);
+ panfrost_resource_set_damage_region(NULL, &so->base, 0, NULL);
if (template->bind & PIPE_BIND_INDEX_BUFFER)
so->index_cache = rzalloc(so, struct panfrost_minmax_cache);
diff --git a/src/gallium/drivers/panfrost/pan_resource.h b/src/gallium/drivers/panfrost/pan_resource.h
index baf7604b4f6..8f801473a0b 100644
--- a/src/gallium/drivers/panfrost/pan_resource.h
+++ b/src/gallium/drivers/panfrost/pan_resource.h
@@ -31,6 +31,7 @@
#include "pan_pool.h"
#include "pan_minmax_cache.h"
#include "pan_texture.h"
+#include "pan_partial_update.h"
#include "drm-uapi/drm.h"
#include "util/u_range.h"
@@ -39,8 +40,9 @@
struct panfrost_resource {
struct pipe_resource base;
struct {
- struct pipe_box biggest_rect;
struct pipe_scissor_state extent;
+ struct pan_rect *inverted_rects;
+ unsigned inverted_len;
} damage;
struct panfrost_bo *bo;
@@ -118,9 +120,6 @@ panfrost_blit_wallpaper(struct panfrost_context *ctx,
struct pipe_box *box);
void
-panfrost_resource_reset_damage(struct panfrost_resource *pres);
-
-void
panfrost_resource_set_damage_region(struct pipe_screen *screen,
struct pipe_resource *res,
unsigned int nrects,
diff --git a/src/gallium/drivers/panfrost/pan_screen.c b/src/gallium/drivers/panfrost/pan_screen.c
index b705c362594..b6634955ea9 100644
--- a/src/gallium/drivers/panfrost/pan_screen.c
+++ b/src/gallium/drivers/panfrost/pan_screen.c
@@ -776,5 +776,8 @@ panfrost_create_screen(int fd, struct renderonly *ro)
panfrost_resource_screen_init(&screen->base);
+ if (!(dev->quirks & IS_BIFROST))
+ panfrost_init_blit_shaders(dev);
+
return &screen->base;
}
diff --git a/src/panfrost/Makefile.sources b/src/panfrost/Makefile.sources
index 4efd694b3a7..d34e71dfbca 100644
--- a/src/panfrost/Makefile.sources
+++ b/src/panfrost/Makefile.sources
@@ -25,6 +25,7 @@ encoder_FILES := \
encoder/pan_attributes.c \
encoder/pan_bo.c \
encoder/pan_bo.h \
+ encoder/pan_blit.c \
encoder/pan_device.h \
encoder/pan_encoder.h \
encoder/pan_format.c \
diff --git a/src/panfrost/encoder/meson.build b/src/panfrost/encoder/meson.build
index e7b28e16338..754e7ce246c 100644
--- a/src/panfrost/encoder/meson.build
+++ b/src/panfrost/encoder/meson.build
@@ -25,6 +25,7 @@ libpanfrost_encoder_files = files(
'pan_afbc.c',
'pan_attributes.c',
'pan_bo.c',
+ 'pan_blit.c',
'pan_format.c',
'pan_invocation.c',
'pan_sampler.c',
@@ -42,6 +43,6 @@ libpanfrost_encoder = static_library(
include_directories : [inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux, inc_panfrost_hw],
c_args : [no_override_init_args],
gnu_symbol_visibility : 'hidden',
- dependencies: [dep_libdrm],
+ dependencies: [dep_libdrm, idep_nir],
build_by_default : false,
)
diff --git a/src/panfrost/encoder/pan_blit.c b/src/panfrost/encoder/pan_blit.c
new file mode 100644
index 00000000000..dcb9f8dd194
--- /dev/null
+++ b/src/panfrost/encoder/pan_blit.c
@@ -0,0 +1,370 @@
+/*
+ * Copyright (C) 2020 Collabora, Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Alyssa Rosenzweig <[email protected]>
+ */
+
+#include <math.h>
+#include <stdio.h>
+#include "pan_encoder.h"
+#include "pan_pool.h"
+#include "pan_scoreboard.h"
+#include "pan_texture.h"
+#include "panfrost-quirks.h"
+#include "../midgard/midgard_compile.h"
+#include "compiler/nir/nir_builder.h"
+#include "util/u_math.h"
+
+/* On Midgard, the native blit infrastructure (via MFBD preloads) is broken or
+ * missing in many cases. We instead use software paths as fallbacks to
+ * implement blits, which are done as TILER jobs. No vertex shader is
+ * necessary since we can supply screen-space coordinates directly.
+ *
+ * This is primarily designed as a fallback for preloads but could be extended
+ * for other clears/blits if needed in the future. */
+
+static void
+panfrost_build_blit_shader(panfrost_program *program, unsigned gpu_id, gl_frag_result loc, nir_alu_type T, bool ms)
+{
+ bool is_colour = loc >= FRAG_RESULT_DATA0;
+
+ nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_FRAGMENT, &midgard_nir_options, NULL);
+ nir_function *fn = nir_function_create(shader, "main");
+ nir_function_impl *impl = nir_function_impl_create(fn);
+
+ nir_variable *c_src = nir_variable_create(shader, nir_var_shader_in, glsl_vector_type(GLSL_TYPE_FLOAT, 2), "coord");
+ nir_variable *c_out = nir_variable_create(shader, nir_var_shader_out, glsl_vector_type(
+ GLSL_TYPE_FLOAT, is_colour ? 4 : 1), "out");
+
+ c_src->data.location = VARYING_SLOT_TEX0;
+ c_out->data.location = loc;
+
+ nir_builder _b;
+ nir_builder *b = &_b;
+ nir_builder_init(b, impl);
+ b->cursor = nir_before_block(nir_start_block(impl));
+
+ nir_ssa_def *coord = nir_load_var(b, c_src);
+
+ nir_tex_instr *tex = nir_tex_instr_create(shader, ms ? 3 : 1);
+
+ tex->dest_type = T;
+
+ if (ms) {
+ tex->src[0].src_type = nir_tex_src_coord;
+ tex->src[0].src = nir_src_for_ssa(nir_f2i32(b, coord));
+ tex->coord_components = 2;
+
+ tex->src[1].src_type = nir_tex_src_ms_index;
+ tex->src[1].src = nir_src_for_ssa(nir_load_sample_id(b));
+
+ tex->src[2].src_type = nir_tex_src_lod;
+ tex->src[2].src = nir_src_for_ssa(nir_imm_int(b, 0));
+ tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
+ tex->op = nir_texop_txf_ms;
+ } else {
+ tex->op = nir_texop_tex;
+
+ tex->src[0].src_type = nir_tex_src_coord;
+ tex->src[0].src = nir_src_for_ssa(coord);
+ tex->coord_components = 2;
+
+ tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
+ }
+
+ nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
+ nir_builder_instr_insert(b, &tex->instr);
+
+ if (is_colour)
+ nir_store_var(b, c_out, &tex->dest.ssa, 0xFF);
+ else
+ nir_store_var(b, c_out, nir_channel(b, &tex->dest.ssa, 0), 0xFF);
+
+ midgard_compile_shader_nir(shader, program, false, 0, gpu_id, false);
+}
+
+/* Compile and upload all possible blit shaders ahead-of-time to reduce draw
+ * time overhead. There's only ~30 of them at the moment, so this is fine */
+
+void
+panfrost_init_blit_shaders(struct panfrost_device *dev)
+{
+ static const struct {
+ gl_frag_result loc;
+ unsigned types;
+ } shader_descs[] = {
+ { FRAG_RESULT_DEPTH, 1 << PAN_BLIT_FLOAT },
+ { FRAG_RESULT_STENCIL, 1 << PAN_BLIT_UINT },
+ { FRAG_RESULT_DATA0, ~0 },
+ { FRAG_RESULT_DATA1, ~0 },
+ { FRAG_RESULT_DATA2, ~0 },
+ { FRAG_RESULT_DATA3, ~0 },
+ { FRAG_RESULT_DATA4, ~0 },
+ { FRAG_RESULT_DATA5, ~0 },
+ { FRAG_RESULT_DATA6, ~0 },
+ { FRAG_RESULT_DATA7, ~0 }
+ };
+
+ nir_alu_type nir_types[PAN_BLIT_NUM_TYPES] = {
+ nir_type_float,
+ nir_type_uint,
+ nir_type_int
+ };
+
+ /* Total size = # of shaders * bytes per shader. There are
+ * shaders for each RT (so up to DATA7 -- overestimate is
+ * okay) and up to NUM_TYPES variants of each, * 2 for multisampling
+ * variants. These shaders are simple enough that they should be less
+ * than 8 quadwords each (again, overestimate is fine). */
+
+ unsigned offset = 0;
+ unsigned total_size = (FRAG_RESULT_DATA7 * PAN_BLIT_NUM_TYPES)
+ * (8 * 16) * 2;
+
+ dev->blit_shaders.bo = panfrost_bo_create(dev, total_size, PAN_BO_EXECUTE);
+
+ /* Don't bother generating multisampling variants if we don't actually
+ * support multisampling */
+ bool has_ms = !(dev->quirks & MIDGARD_SFBD);
+
+ for (unsigned ms = 0; ms <= has_ms; ++ms) {
+ for (unsigned i = 0; i < ARRAY_SIZE(shader_descs); ++i) {
+ unsigned loc = shader_descs[i].loc;
+
+ for (enum pan_blit_type T = 0; T < PAN_BLIT_NUM_TYPES; ++T) {
+ if (!(shader_descs[i].types & (1 << T)))
+ continue;
+
+ panfrost_program program;
+ panfrost_build_blit_shader(&program, dev->gpu_id, loc,
+ nir_types[T], ms);
+
+ assert(offset + program.compiled.size < total_size);
+ memcpy(dev->blit_shaders.bo->cpu + offset, program.compiled.data, program.compiled.size);
+
+ dev->blit_shaders.loads[loc][T][ms] = (dev->blit_shaders.bo->gpu + offset) | program.first_tag;
+ offset += ALIGN_POT(program.compiled.size, 64);
+ util_dynarray_fini(&program.compiled);
+ }
+ }
+ }
+}
+
+/* Add a shader-based load on Midgard (draw-time for GL). Shaders are
+ * precached */
+
+void
+panfrost_load_midg(
+ struct pan_pool *pool,
+ struct pan_scoreboard *scoreboard,
+ mali_ptr blend_shader,
+ mali_ptr fbd,
+ mali_ptr coordinates, unsigned vertex_count,
+ struct pan_image *image,
+ unsigned loc)
+{
+ unsigned width = u_minify(image->width0, image->first_level);
+ unsigned height = u_minify(image->height0, image->first_level);
+
+ struct mali_viewport viewport = {
+ .clip_minx = -INFINITY,
+ .clip_miny = -INFINITY,
+ .clip_maxx = INFINITY,
+ .clip_maxy = INFINITY,
+ .clip_minz = 0.0,
+ .clip_maxz = 1.0,
+
+ .viewport0 = { 0, 0 },
+ .viewport1 = { MALI_POSITIVE(width), MALI_POSITIVE(height) }
+ };
+
+ union mali_attr varying = {
+ .elements = coordinates | MALI_ATTR_LINEAR,
+ .stride = 4 * sizeof(float),
+ .size = 4 * sizeof(float) * vertex_count,
+ };
+
+ struct mali_attr_meta varying_meta = {
+ .index = 0,
+ .unknown1 = 2,
+ .swizzle = (MALI_CHANNEL_RED << 0) | (MALI_CHANNEL_GREEN << 3),
+ .format = MALI_RGBA32F
+ };
+
+ struct mali_stencil_test stencil = {
+ .mask = 0xFF,
+ .func = MALI_FUNC_ALWAYS,
+ .sfail = MALI_STENCIL_REPLACE,
+ .dpfail = MALI_STENCIL_REPLACE,
+ .dppass = MALI_STENCIL_REPLACE,
+ };
+
+ union midgard_blend replace = {
+ .equation = {
+ .rgb_mode = 0x122,
+ .alpha_mode = 0x122,
+ .color_mask = MALI_MASK_R | MALI_MASK_G | MALI_MASK_B | MALI_MASK_A,
+ }
+ };
+
+ if (blend_shader)
+ replace.shader = blend_shader;
+
+ /* Determine the sampler type needed. Stencil is always sampled as
+ * UINT. Pure (U)INT is always (U)INT. Everything else is FLOAT. */
+
+ enum pan_blit_type T =
+ (loc == FRAG_RESULT_STENCIL) ? PAN_BLIT_UINT :
+ (util_format_is_pure_uint(image->format)) ? PAN_BLIT_UINT :
+ (util_format_is_pure_sint(image->format)) ? PAN_BLIT_INT :
+ PAN_BLIT_FLOAT;
+
+ bool ms = image->nr_samples > 1;
+
+ struct mali_shader_meta shader_meta = {
+ .shader = pool->dev->blit_shaders.loads[loc][T][ms],
+ .sampler_count = 1,
+ .texture_count = 1,
+ .varying_count = 1,
+ .midgard1 = {
+ .flags_lo = 0x20,
+ .work_count = 4,
+ },
+ .coverage_mask = 0xF,
+ .unknown2_3 = MALI_DEPTH_FUNC(MALI_FUNC_ALWAYS) | 0x10,
+ .unknown2_4 = 0x4e0,
+ .stencil_mask_front = ~0,
+ .stencil_mask_back = ~0,
+ .stencil_front = stencil,
+ .stencil_back = stencil,
+ .blend = {
+ .shader = blend_shader
+ }
+ };
+
+ if (ms)
+ shader_meta.unknown2_3 |= MALI_HAS_MSAA | MALI_PER_SAMPLE;
+ else
+ shader_meta.unknown2_4 |= MALI_NO_MSAA;
+
+ assert(shader_meta.shader);
+
+ if (pool->dev->quirks & MIDGARD_SFBD) {
+ shader_meta.unknown2_4 |= (0x10 | MALI_NO_DITHER);
+ shader_meta.blend = replace;
+
+ if (loc < FRAG_RESULT_DATA0)
+ shader_meta.blend.equation.color_mask = 0x0;
+ }
+
+ if (loc == FRAG_RESULT_DEPTH) {
+ shader_meta.midgard1.flags_lo |= MALI_WRITES_Z;
+ shader_meta.unknown2_3 |= MALI_DEPTH_WRITEMASK;
+ } else if (loc == FRAG_RESULT_STENCIL) {
+ shader_meta.midgard1.flags_hi |= MALI_WRITES_S;
+ shader_meta.unknown2_4 |= MALI_STENCIL_TEST;
+ } else {
+ shader_meta.midgard1.flags_lo |= MALI_EARLY_Z;
+ }
+
+ /* Create the texture descriptor. We partially compute the base address
+ * ourselves to account for layer, such that the texture descriptor
+ * itself is for a 2D texture with array size 1 even for 3D/array
+ * textures, removing the need to separately key the blit shaders for
+ * 2D and 3D variants */
+
+ struct panfrost_transfer texture_t = panfrost_pool_alloc(pool, sizeof(struct mali_texture_descriptor) + sizeof(mali_ptr) * 2 * MAX2(image->nr_samples, 1));
+
+ panfrost_new_texture(texture_t.cpu,
+ image->width0, image->height0,
+ MAX2(image->nr_samples, 1), 1,
+ image->format, MALI_TEX_2D,
+ image->layout,
+ image->first_level, image->last_level,
+ 0, 0,
+ image->nr_samples,
+ 0,
+ (MALI_CHANNEL_RED << 0) | (MALI_CHANNEL_GREEN << 3) | (MALI_CHANNEL_BLUE << 6) | (MALI_CHANNEL_ALPHA << 9),
+ image->bo->gpu + image->first_layer *
+ panfrost_get_layer_stride(image->slices,
+ image->type == MALI_TEX_3D,
+ image->cubemap_stride, image->first_level),
+ image->slices);
+
+ struct mali_sampler_descriptor sampler = {
+ .filter_mode = MALI_SAMP_MAG_NEAREST | MALI_SAMP_MIN_NEAREST,
+ .wrap_s = MALI_WRAP_CLAMP_TO_EDGE,
+ .wrap_t = MALI_WRAP_CLAMP_TO_EDGE,
+ .wrap_r = MALI_WRAP_CLAMP_TO_EDGE,
+ };
+
+ struct panfrost_transfer shader_meta_t = panfrost_pool_alloc(pool, sizeof(shader_meta) + 8 * sizeof(struct midgard_blend_rt));
+ memcpy(shader_meta_t.cpu, &shader_meta, sizeof(shader_meta));
+
+ for (unsigned i = 0; i < 8; ++i) {
+ void *dest = shader_meta_t.cpu + sizeof(shader_meta) + sizeof(struct midgard_blend_rt) * i;
+
+ if (loc == (FRAG_RESULT_DATA0 + i)) {
+ struct midgard_blend_rt blend_rt = {
+ .flags = 0x200 | MALI_BLEND_NO_DITHER,
+ .blend = replace,
+ };
+
+ if (util_format_is_srgb(image->format))
+ blend_rt.flags |= MALI_BLEND_SRGB;
+
+ if (blend_shader) {
+ blend_rt.flags |= MALI_BLEND_MRT_SHADER;
+ blend_rt.blend.shader = blend_shader;
+ }
+
+ memcpy(dest, &blend_rt, sizeof(struct midgard_blend_rt));
+ } else {
+ memset(dest, 0x0, sizeof(struct midgard_blend_rt));
+ }
+ }
+
+ struct midgard_payload_vertex_tiler payload = {
+ .prefix = {
+ .draw_mode = MALI_TRIANGLES,
+ .unknown_draw = 0x3000,
+ .index_count = MALI_POSITIVE(vertex_count)
+ },
+ .postfix = {
+ .gl_enables = 0x7,
+ .position_varying = coordinates,
+ .textures = panfrost_pool_upload(pool, &texture_t.gpu, sizeof(texture_t.gpu)),
+ .sampler_descriptor = panfrost_pool_upload(pool, &sampler, sizeof(sampler)),
+ .shader = shader_meta_t.gpu,
+ .varyings = panfrost_pool_upload(pool, &varying, sizeof(varying)),
+ .varying_meta = panfrost_pool_upload(pool, &varying_meta, sizeof(varying_meta)),
+ .viewport = panfrost_pool_upload(pool, &viewport, sizeof(viewport)),
+ .shared_memory = fbd
+ }
+ };
+
+ panfrost_pack_work_groups_compute(&payload.prefix, 1, vertex_count, 1, 1, 1, 1, true);
+ payload.prefix.workgroups_x_shift_3 = 6;
+
+ panfrost_new_job(pool, scoreboard, JOB_TYPE_TILER, false, 0, &payload, sizeof(payload), true);
+}
diff --git a/src/panfrost/encoder/pan_device.h b/src/panfrost/encoder/pan_device.h
index e68d0445f08..8638ca554fe 100644
--- a/src/panfrost/encoder/pan_device.h
+++ b/src/panfrost/encoder/pan_device.h
@@ -68,6 +68,22 @@
/* Fencepost problem, hence the off-by-one */
#define NR_BO_CACHE_BUCKETS (MAX_BO_CACHE_BUCKET - MIN_BO_CACHE_BUCKET + 1)
+/* Cache for blit shaders. Defined here so they can be cached with the device */
+
+enum pan_blit_type {
+ PAN_BLIT_FLOAT = 0,
+ PAN_BLIT_UINT,
+ PAN_BLIT_INT,
+ PAN_BLIT_NUM_TYPES,
+};
+
+#define PAN_BLIT_NUM_TARGETS (12)
+
+struct pan_blit_shaders {
+ struct panfrost_bo *bo;
+ mali_ptr loads[PAN_BLIT_NUM_TARGETS][PAN_BLIT_NUM_TYPES][2];
+};
+
struct panfrost_device {
/* For ralloc */
void *memctx;
@@ -109,6 +125,8 @@ struct panfrost_device {
struct list_head buckets[NR_BO_CACHE_BUCKETS];
} bo_cache;
+
+ struct pan_blit_shaders blit_shaders;
};
void
diff --git a/src/panfrost/encoder/pan_pool.h b/src/panfrost/encoder/pan_pool.h
index 6d7899800ce..a619bd5e6cc 100644
--- a/src/panfrost/encoder/pan_pool.h
+++ b/src/panfrost/encoder/pan_pool.h
@@ -25,6 +25,7 @@
#ifndef __PAN_POOL_H__
#define __PAN_POOL_H__
+#include <stddef.h>
#include <panfrost-misc.h>
/* Represents a pool of memory that can only grow, used to allocate objects
diff --git a/src/panfrost/encoder/pan_props.c b/src/panfrost/encoder/pan_props.c
index df471a73c8d..a4ff28506df 100644
--- a/src/panfrost/encoder/pan_props.c
+++ b/src/panfrost/encoder/pan_props.c
@@ -177,6 +177,7 @@ panfrost_open_device(void *memctx, int fd, struct panfrost_device *dev)
void
panfrost_close_device(struct panfrost_device *dev)
{
+ panfrost_bo_unreference(dev->blit_shaders.bo);
panfrost_bo_cache_evict_all(dev);
pthread_mutex_destroy(&dev->bo_cache.lock);
drmFreeVersion(dev->kernel_version);
diff --git a/src/panfrost/encoder/pan_texture.h b/src/panfrost/encoder/pan_texture.h
index ea1a1ff0fe1..c4a07d15ad2 100644
--- a/src/panfrost/encoder/pan_texture.h
+++ b/src/panfrost/encoder/pan_texture.h
@@ -30,6 +30,7 @@
#include <stdbool.h>
#include "util/format/u_format.h"
+#include "compiler/shader_enums.h"
#include "panfrost-job.h"
#include "pan_bo.h"
@@ -52,6 +53,20 @@ struct panfrost_slice {
bool initialized;
};
+struct pan_image {
+ /* Format and size */
+ uint16_t width0, height0, depth0, array_size;
+ enum pipe_format format;
+ enum mali_texture_type type;
+ unsigned first_level, last_level;
+ unsigned first_layer, last_layer;
+ unsigned nr_samples;
+ struct panfrost_bo *bo;
+ struct panfrost_slice *slices;
+ unsigned cubemap_stride;
+ enum mali_texture_layout layout;
+};
+
unsigned
panfrost_compute_checksum_size(
struct panfrost_slice *slice,
@@ -164,4 +179,20 @@ panfrost_bifrost_swizzle(unsigned components)
enum mali_format
panfrost_format_to_bifrost_blend(const struct util_format_description *desc);
+struct pan_pool;
+struct pan_scoreboard;
+
+void
+panfrost_init_blit_shaders(struct panfrost_device *dev);
+
+void
+panfrost_load_midg(
+ struct pan_pool *pool,
+ struct pan_scoreboard *scoreboard,
+ mali_ptr blend_shader,
+ mali_ptr fbd,
+ mali_ptr coordinates, unsigned vertex_count,
+ struct pan_image *image,
+ unsigned loc);
+
#endif