summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorRob Clark <[email protected]>2020-04-25 12:16:35 -0700
committerMarge Bot <[email protected]>2020-04-28 23:31:58 +0000
commitf561e516c8a01993ea83f5d48e0126d0b7b6528b (patch)
tree4a1c3a46089dc9300b2fb6fd2c2fe507fd7adc1b /src
parent99d802ccc77c7897f27d75275d38c702f3db6fd8 (diff)
freedreno/a6xx: pre-calculate expected vsc stream sizes
We should only rely on overflow detection for indirect draws, where we have no other option. This doesn't use quite the worst-possible-case sizes, which in practice seem to be ~20x larger than what is required. But instead uses roughly half of that. Signed-off-by: Rob Clark <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4750>
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/freedreno/Makefile.sources2
-rw-r--r--src/gallium/drivers/freedreno/a6xx/fd6_draw.c4
-rw-r--r--src/gallium/drivers/freedreno/a6xx/fd6_gmem.c21
-rw-r--r--src/gallium/drivers/freedreno/a6xx/fd6_vsc.c160
-rw-r--r--src/gallium/drivers/freedreno/a6xx/fd6_vsc.h29
-rw-r--r--src/gallium/drivers/freedreno/freedreno_batch.c3
-rw-r--r--src/gallium/drivers/freedreno/freedreno_batch.h9
-rw-r--r--src/gallium/drivers/freedreno/meson.build2
8 files changed, 229 insertions, 1 deletions
diff --git a/src/gallium/drivers/freedreno/Makefile.sources b/src/gallium/drivers/freedreno/Makefile.sources
index 0fd7bcf5ddd..0268bb11a55 100644
--- a/src/gallium/drivers/freedreno/Makefile.sources
+++ b/src/gallium/drivers/freedreno/Makefile.sources
@@ -195,6 +195,8 @@ a6xx_SOURCES := \
a6xx/fd6_screen.h \
a6xx/fd6_texture.c \
a6xx/fd6_texture.h \
+ a6xx/fd6_vsc.c \
+ a6xx/fd6_vsc.h \
a6xx/fd6_zsa.c \
a6xx/fd6_zsa.h
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c
index 25d81018ccc..f8ad3be75c8 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c
@@ -38,6 +38,7 @@
#include "fd6_emit.h"
#include "fd6_program.h"
#include "fd6_format.h"
+#include "fd6_vsc.h"
#include "fd6_zsa.h"
static void
@@ -192,6 +193,9 @@ fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
if (emit.key.gs)
emit.key.key.has_gs = true;
+ if (!(emit.key.hs || emit.key.ds || emit.key.gs || info->indirect))
+ fd6_vsc_update_sizes(ctx->batch, info);
+
fixup_shader_state(ctx, &emit.key.key);
if (!(ctx->dirty & FD_DIRTY_PROG)) {
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c
index 3ee55f98adf..befe19aadfa 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c
@@ -326,6 +326,27 @@ update_vsc_pipe(struct fd_batch *batch)
struct fd_ringbuffer *ring = batch->gmem;
int i;
+ if (batch->draw_strm_bits/8 > fd6_ctx->vsc_draw_strm_pitch) {
+ if (fd6_ctx->vsc_draw_strm)
+ fd_bo_del(fd6_ctx->vsc_draw_strm);
+ fd6_ctx->vsc_draw_strm = NULL;
+ /* Note: probably only need to align to 0x40, but aligning stronger
+ * reduces the odds that we will have to realloc again on the next
+ * frame:
+ */
+ fd6_ctx->vsc_draw_strm_pitch = align(batch->draw_strm_bits/8, 0x4000);
+ debug_printf("pre-resize VSC_DRAW_STRM_PITCH to: 0x%x\n",
+ fd6_ctx->vsc_draw_strm_pitch);
+ }
+
+ if (batch->prim_strm_bits/8 > fd6_ctx->vsc_prim_strm_pitch) {
+ if (fd6_ctx->vsc_prim_strm)
+ fd_bo_del(fd6_ctx->vsc_prim_strm);
+ fd6_ctx->vsc_prim_strm = NULL;
+ fd6_ctx->vsc_prim_strm_pitch = align(batch->prim_strm_bits/8, 0x4000);
+ debug_printf("pre-resize VSC_PRIM_STRM_PITCH to: 0x%x\n",
+ fd6_ctx->vsc_prim_strm_pitch);
+ }
if (!fd6_ctx->vsc_draw_strm) {
fd6_ctx->vsc_draw_strm = fd_bo_new(ctx->screen->dev,
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_vsc.c b/src/gallium/drivers/freedreno/a6xx/fd6_vsc.c
new file mode 100644
index 00000000000..daf97fe48ac
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_vsc.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright © 2020 Google, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+
+#include "pipe/p_state.h"
+#include "util/u_prim.h"
+
+#include "freedreno_batch.h"
+#include "freedreno_gmem.h"
+
+#include "fd6_vsc.h"
+
+/*
+ * Helper util to update expected vsc draw and primitive stream sizes, see
+ * https://github.com/freedreno/freedreno/wiki/Visibility-Stream-Format
+ */
+
+enum {
+ byte = 8,
+ dword = 4 * byte,
+} bits_per;
+
+/**
+ * Determine # of bits required to store a given number, see
+ * https://github.com/freedreno/freedreno/wiki/Visibility-Stream-Format#numbers
+ */
+static unsigned
+number_size_bits(unsigned nr)
+{
+ unsigned n = util_last_bit(nr);
+ assert(n); /* encoding 0 is not possible */
+ return n + (n - 1);
+}
+
+/**
+ * Determine # of bits requred to store a given bitfield, see
+ * https://github.com/freedreno/freedreno/wiki/Visibility-Stream-Format#bitfields
+ */
+static unsigned
+bitfield_size_bits(unsigned n)
+{
+ return n + 1; /* worst case is always 1 + nr of bits */
+}
+
+static unsigned
+prim_count(const struct pipe_draw_info *info)
+{
+ /* PIPE_PRIM_MAX used internally for RECTLIST blits on 3d pipe: */
+ unsigned vtx_per_prim = (info->mode == PIPE_PRIM_MAX) ? 2 :
+ u_vertices_per_prim(info->mode);
+ return (info->count * info->instance_count) / vtx_per_prim;
+}
+
+/**
+ * The primitive stream uses a run-length encoding, where each packet contains a
+ * bitfield of bins covered and then the number of primitives which have the same
+ * bitfield. Each packet consists of the following, in order:
+ *
+ * - The (compressed) bitfield of bins covered
+ * - The number of primitives with this bitset
+ * - Checksum
+ *
+ * The worst case would be that each primitive has a different bitmask. In practice,
+ * assuming ever other primitive has a different bitmask still gets us conservatively
+ * large primitive stream sizes. (Ie. 10x what is needed, vs. 20x)
+ *
+ * https://github.com/freedreno/freedreno/wiki/Visibility-Stream-Format#primitive-streams
+ */
+static unsigned
+primitive_stream_size_bits(const struct pipe_draw_info *info, unsigned num_bins)
+{
+ unsigned num_prims = prim_count(info);
+ unsigned nbits =
+ (bitfield_size_bits(num_bins) /* bitfield of bins covered */
+ + number_size_bits(1) /* number of primitives with this bitset */
+ + 1 /* checksum */
+ ) * DIV_ROUND_UP(num_prims, 2);
+ return align(nbits, dword);
+}
+
+/**
+ * Each draw stream packet contains the following:
+ *
+ * - Bin bitfield
+ * - Last instance bit
+ * - If bitfield is empty, the number of draws it is empty for, otherwise
+ * the size of the corresponding primitive stream in DWORD's.
+ * - Checksum
+ *
+ * https://github.com/freedreno/freedreno/wiki/Visibility-Stream-Format#draw-streams
+ */
+static unsigned
+draw_stream_size_bits(const struct pipe_draw_info *info, unsigned num_bins,
+ unsigned prim_strm_bits)
+{
+ unsigned ndwords = prim_strm_bits / dword;
+ assert(info->instance_count > 0);
+ return (bitfield_size_bits(num_bins) /* bitfield of bins */
+ + 1 /* last-instance-bit */
+ + number_size_bits(ndwords) /* size of corresponding prim strm */
+ + 1 /* checksum */
+ ) * info->instance_count;
+}
+
+void
+fd6_vsc_update_sizes(struct fd_batch *batch, const struct pipe_draw_info *info)
+{
+ if (!batch->num_bins_per_pipe) {
+ batch->num_bins_per_pipe = fd_gmem_estimate_bins_per_pipe(batch);
+
+ /* This is a convenient spot to add the size of the final draw-
+ * stream packet:
+ *
+ * If there are N bins, the final packet, after all the draws are
+ * done, consists of a 1 followed by N + 17 0's, plus a final 1.
+ * This uses the otherwise-unused pattern of a non-empty bitfield
+ * (initial 1) that is nontheless empty (has all 0's)
+ */
+ unsigned final_pkt_sz = 1 + batch->num_bins_per_pipe + 17 + 1;
+ batch->prim_strm_bits = align(final_pkt_sz, dword);
+ }
+
+ unsigned prim_strm_bits =
+ primitive_stream_size_bits(info, batch->num_bins_per_pipe);
+ unsigned draw_strm_bits =
+ draw_stream_size_bits(info, batch->num_bins_per_pipe, prim_strm_bits);
+
+#if 0
+ printf("vsc: prim_strm_bits=%d, draw_strm_bits=%d, nb=%u, ic=%u, c=%u, pc=%u (%s)\n",
+ prim_strm_bits, draw_strm_bits, batch->num_bins_per_pipe,
+ info->instance_count, info->count,
+ (info->count * info->instance_count) /
+ u_vertices_per_prim(info->mode),
+ u_prim_name(info->mode));
+#endif
+
+ batch->prim_strm_bits += prim_strm_bits;
+ batch->draw_strm_bits += draw_strm_bits;
+}
+
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_vsc.h b/src/gallium/drivers/freedreno/a6xx/fd6_vsc.h
new file mode 100644
index 00000000000..50470f683e0
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_vsc.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright © 2020 Google, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef FD6_VSC_H_
+#define FD6_VSC_H_
+
+void fd6_vsc_update_sizes(struct fd_batch *batch, const struct pipe_draw_info *info);
+
+#endif /* FD6_VSC_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_batch.c b/src/gallium/drivers/freedreno/freedreno_batch.c
index 106959b4fed..082329eb59c 100644
--- a/src/gallium/drivers/freedreno/freedreno_batch.c
+++ b/src/gallium/drivers/freedreno/freedreno_batch.c
@@ -84,6 +84,9 @@ batch_init(struct fd_batch *batch)
batch->gmem_reason = 0;
batch->num_draws = 0;
batch->num_vertices = 0;
+ batch->num_bins_per_pipe = 0;
+ batch->prim_strm_bits = 0;
+ batch->draw_strm_bits = 0;
batch->stage = FD_STAGE_NULL;
fd_reset_wfi(batch);
diff --git a/src/gallium/drivers/freedreno/freedreno_batch.h b/src/gallium/drivers/freedreno/freedreno_batch.h
index 9266790bb80..479d78d5eca 100644
--- a/src/gallium/drivers/freedreno/freedreno_batch.h
+++ b/src/gallium/drivers/freedreno/freedreno_batch.h
@@ -129,9 +129,16 @@ struct fd_batch {
*/
const struct fd_gmem_stateobj *gmem_state;
- unsigned num_draws; /* number of draws in current batch */
+ unsigned num_draws; /* number of draws in current batch */
unsigned num_vertices; /* number of vertices in current batch */
+ /* Currently only used on a6xx, to calculate vsc prim/draw stream
+ * sizes:
+ */
+ unsigned num_bins_per_pipe;
+ unsigned prim_strm_bits;
+ unsigned draw_strm_bits;
+
/* Track the maximal bounds of the scissor of all the draws within a
* batch. Used at the tile rendering step (fd_gmem_render_tiles(),
* mem2gmem/gmem2mem) to avoid needlessly moving data in/out of gmem.
diff --git a/src/gallium/drivers/freedreno/meson.build b/src/gallium/drivers/freedreno/meson.build
index a5c0935e07e..919baf070de 100644
--- a/src/gallium/drivers/freedreno/meson.build
+++ b/src/gallium/drivers/freedreno/meson.build
@@ -205,6 +205,8 @@ files_libfreedreno = files(
'a6xx/fd6_screen.h',
'a6xx/fd6_texture.c',
'a6xx/fd6_texture.h',
+ 'a6xx/fd6_vsc.c',
+ 'a6xx/fd6_vsc.h',
'a6xx/fd6_zsa.c',
'a6xx/fd6_zsa.h',
'ir3/ir3_cache.c',