aboutsummaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/freedreno/drm/msm_ringbuffer_sp.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/freedreno/drm/msm_ringbuffer_sp.c')
-rw-r--r--src/gallium/drivers/freedreno/drm/msm_ringbuffer_sp.c551
1 files changed, 551 insertions, 0 deletions
diff --git a/src/gallium/drivers/freedreno/drm/msm_ringbuffer_sp.c b/src/gallium/drivers/freedreno/drm/msm_ringbuffer_sp.c
new file mode 100644
index 00000000000..997ff147659
--- /dev/null
+++ b/src/gallium/drivers/freedreno/drm/msm_ringbuffer_sp.c
@@ -0,0 +1,551 @@
+/*
+ * Copyright (C) 2018 Rob Clark <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <[email protected]>
+ */
+
+#include <assert.h>
+#include <inttypes.h>
+
+#include "util/hash_table.h"
+#include "util/slab.h"
+
+#include "drm/freedreno_ringbuffer.h"
+#include "msm_priv.h"
+
+/* A "softpin" implementation of submit/ringbuffer, which lowers CPU overhead
+ * by avoiding the additional tracking necessary to build cmds/relocs tables
+ * (but still builds a bos table)
+ */
+
+
+#define INIT_SIZE 0x1000
+
+static pthread_mutex_t idx_lock = PTHREAD_MUTEX_INITIALIZER;
+
+
+struct msm_submit_sp {
+ struct fd_submit base;
+
+ DECLARE_ARRAY(struct drm_msm_gem_submit_bo, submit_bos);
+ DECLARE_ARRAY(struct fd_bo *, bos);
+
+ unsigned seqno;
+
+ /* maps fd_bo to idx in bos table: */
+ struct hash_table *bo_table;
+
+ struct slab_mempool ring_pool;
+
+ struct fd_ringbuffer *primary;
+
+ /* Allow for sub-allocation of stateobj ring buffers (ie. sharing
+ * the same underlying bo)..
+ *
+ * We also rely on previous stateobj having been fully constructed
+ * so we can reclaim extra space at it's end.
+ */
+ struct fd_ringbuffer *suballoc_ring;
+};
+FD_DEFINE_CAST(fd_submit, msm_submit_sp);
+
+/* for FD_RINGBUFFER_GROWABLE rb's, tracks the 'finalized' cmdstream buffers
+ * and sizes. Ie. a finalized buffer can have no more commands appended to
+ * it.
+ */
+struct msm_cmd_sp {
+ struct fd_bo *ring_bo;
+ unsigned size;
+};
+
+/* for _FD_RINGBUFFER_OBJECT rb's we need to track the bo's and flags to
+ * later copy into the submit when the stateobj rb is later referenced by
+ * a regular rb:
+ */
+struct msm_reloc_bo_sp {
+ struct fd_bo *bo;
+ unsigned flags;
+};
+
+struct msm_ringbuffer_sp {
+ struct fd_ringbuffer base;
+
+ /* for FD_RINGBUFFER_STREAMING rb's which are sub-allocated */
+ unsigned offset;
+
+// TODO check disasm.. hopefully compilers CSE can realize that
+// reloc_bos and cmds are at the same offsets and optimize some
+// divergent cases into single case
+ union {
+ /* for _FD_RINGBUFFER_OBJECT case: */
+ struct {
+ struct fd_pipe *pipe;
+ DECLARE_ARRAY(struct msm_reloc_bo_sp, reloc_bos);
+ };
+ /* for other cases: */
+ struct {
+ struct fd_submit *submit;
+ DECLARE_ARRAY(struct msm_cmd_sp, cmds);
+ };
+ } u;
+
+ struct fd_bo *ring_bo;
+};
+FD_DEFINE_CAST(fd_ringbuffer, msm_ringbuffer_sp);
+
+static void finalize_current_cmd(struct fd_ringbuffer *ring);
+static struct fd_ringbuffer * msm_ringbuffer_sp_init(
+ struct msm_ringbuffer_sp *msm_ring,
+ uint32_t size, enum fd_ringbuffer_flags flags);
+
+/* add (if needed) bo to submit and return index: */
+static uint32_t
+append_bo(struct msm_submit_sp *submit, struct fd_bo *bo, uint32_t flags)
+{
+ struct msm_bo *msm_bo = to_msm_bo(bo);
+ uint32_t idx;
+ pthread_mutex_lock(&idx_lock);
+ if (likely(msm_bo->current_submit_seqno == submit->seqno)) {
+ idx = msm_bo->idx;
+ } else {
+ uint32_t hash = _mesa_hash_pointer(bo);
+ struct hash_entry *entry;
+
+ entry = _mesa_hash_table_search_pre_hashed(submit->bo_table, hash, bo);
+ if (entry) {
+ /* found */
+ idx = (uint32_t)(uintptr_t)entry->data;
+ } else {
+ idx = APPEND(submit, submit_bos);
+ idx = APPEND(submit, bos);
+
+ submit->submit_bos[idx].flags = 0;
+ submit->submit_bos[idx].handle = bo->handle;
+ submit->submit_bos[idx].presumed = 0;
+
+ submit->bos[idx] = fd_bo_ref(bo);
+
+ _mesa_hash_table_insert_pre_hashed(submit->bo_table, hash, bo,
+ (void *)(uintptr_t)idx);
+ }
+ msm_bo->current_submit_seqno = submit->seqno;
+ msm_bo->idx = idx;
+ }
+ pthread_mutex_unlock(&idx_lock);
+ if (flags & FD_RELOC_READ)
+ submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_READ;
+ if (flags & FD_RELOC_WRITE)
+ submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_WRITE;
+ return idx;
+}
+
+static void
+msm_submit_suballoc_ring_bo(struct fd_submit *submit,
+ struct msm_ringbuffer_sp *msm_ring, uint32_t size)
+{
+ struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
+ unsigned suballoc_offset = 0;
+ struct fd_bo *suballoc_bo = NULL;
+
+ if (msm_submit->suballoc_ring) {
+ struct msm_ringbuffer_sp *suballoc_ring =
+ to_msm_ringbuffer_sp(msm_submit->suballoc_ring);
+
+ suballoc_bo = suballoc_ring->ring_bo;
+ suballoc_offset = fd_ringbuffer_size(msm_submit->suballoc_ring) +
+ suballoc_ring->offset;
+
+ suballoc_offset = align(suballoc_offset, 0x10);
+
+ if ((size + suballoc_offset) > suballoc_bo->size) {
+ suballoc_bo = NULL;
+ }
+ }
+
+ if (!suballoc_bo) {
+ // TODO possibly larger size for streaming bo?
+ msm_ring->ring_bo = fd_bo_new_ring(
+ submit->pipe->dev, 0x8000, 0);
+ msm_ring->offset = 0;
+ } else {
+ msm_ring->ring_bo = fd_bo_ref(suballoc_bo);
+ msm_ring->offset = suballoc_offset;
+ }
+
+ struct fd_ringbuffer *old_suballoc_ring = msm_submit->suballoc_ring;
+
+ msm_submit->suballoc_ring = fd_ringbuffer_ref(&msm_ring->base);
+
+ if (old_suballoc_ring)
+ fd_ringbuffer_del(old_suballoc_ring);
+}
+
+static struct fd_ringbuffer *
+msm_submit_sp_new_ringbuffer(struct fd_submit *submit, uint32_t size,
+ enum fd_ringbuffer_flags flags)
+{
+ struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
+ struct msm_ringbuffer_sp *msm_ring;
+
+ msm_ring = slab_alloc_st(&msm_submit->ring_pool);
+
+ msm_ring->u.submit = submit;
+
+ /* NOTE: needs to be before _suballoc_ring_bo() since it could
+ * increment the refcnt of the current ring
+ */
+ msm_ring->base.refcnt = 1;
+
+ if (flags & FD_RINGBUFFER_STREAMING) {
+ msm_submit_suballoc_ring_bo(submit, msm_ring, size);
+ } else {
+ if (flags & FD_RINGBUFFER_GROWABLE)
+ size = INIT_SIZE;
+
+ msm_ring->offset = 0;
+ msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size, 0);
+ }
+
+ if (!msm_ringbuffer_sp_init(msm_ring, size, flags))
+ return NULL;
+
+ if (flags & FD_RINGBUFFER_PRIMARY) {
+ debug_assert(!msm_submit->primary);
+ msm_submit->primary = fd_ringbuffer_ref(&msm_ring->base);
+ }
+
+ return &msm_ring->base;
+}
+
+static int
+msm_submit_sp_flush(struct fd_submit *submit, int in_fence_fd,
+ int *out_fence_fd, uint32_t *out_fence)
+{
+ struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
+ struct msm_pipe *msm_pipe = to_msm_pipe(submit->pipe);
+ struct drm_msm_gem_submit req = {
+ .flags = msm_pipe->pipe,
+ .queueid = msm_pipe->queue_id,
+ };
+ int ret;
+
+ debug_assert(msm_submit->primary);
+ finalize_current_cmd(msm_submit->primary);
+
+ struct msm_ringbuffer_sp *primary = to_msm_ringbuffer_sp(msm_submit->primary);
+ struct drm_msm_gem_submit_cmd cmds[primary->u.nr_cmds];
+
+ for (unsigned i = 0; i < primary->u.nr_cmds; i++) {
+ cmds[i].type = MSM_SUBMIT_CMD_BUF;
+ cmds[i].submit_idx =
+ append_bo(msm_submit, primary->u.cmds[i].ring_bo, FD_RELOC_READ);
+ cmds[i].submit_offset = primary->offset;
+ cmds[i].size = primary->u.cmds[i].size;
+ cmds[i].pad = 0;
+ cmds[i].nr_relocs = 0;
+ }
+
+ if (in_fence_fd != -1) {
+ req.flags |= MSM_SUBMIT_FENCE_FD_IN | MSM_SUBMIT_NO_IMPLICIT;
+ req.fence_fd = in_fence_fd;
+ }
+
+ if (out_fence_fd) {
+ req.flags |= MSM_SUBMIT_FENCE_FD_OUT;
+ }
+
+ /* needs to be after get_cmd() as that could create bos/cmds table: */
+ req.bos = VOID2U64(msm_submit->submit_bos),
+ req.nr_bos = msm_submit->nr_submit_bos;
+ req.cmds = VOID2U64(cmds),
+ req.nr_cmds = primary->u.nr_cmds;
+
+ DEBUG_MSG("nr_cmds=%u, nr_bos=%u", req.nr_cmds, req.nr_bos);
+
+ ret = drmCommandWriteRead(submit->pipe->dev->fd, DRM_MSM_GEM_SUBMIT,
+ &req, sizeof(req));
+ if (ret) {
+ ERROR_MSG("submit failed: %d (%s)", ret, strerror(errno));
+ msm_dump_submit(&req);
+ } else if (!ret) {
+ if (out_fence)
+ *out_fence = req.fence;
+
+ if (out_fence_fd)
+ *out_fence_fd = req.fence_fd;
+ }
+
+ return ret;
+}
+
+static void
+msm_submit_sp_destroy(struct fd_submit *submit)
+{
+ struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
+
+ if (msm_submit->primary)
+ fd_ringbuffer_del(msm_submit->primary);
+ if (msm_submit->suballoc_ring)
+ fd_ringbuffer_del(msm_submit->suballoc_ring);
+
+ _mesa_hash_table_destroy(msm_submit->bo_table, NULL);
+
+ // TODO it would be nice to have a way to debug_assert() if all
+ // rb's haven't been free'd back to the slab, because that is
+ // an indication that we are leaking bo's
+ slab_destroy(&msm_submit->ring_pool);
+
+ for (unsigned i = 0; i < msm_submit->nr_bos; i++)
+ fd_bo_del(msm_submit->bos[i]);
+
+ free(msm_submit->submit_bos);
+ free(msm_submit->bos);
+ free(msm_submit);
+}
+
+static const struct fd_submit_funcs submit_funcs = {
+ .new_ringbuffer = msm_submit_sp_new_ringbuffer,
+ .flush = msm_submit_sp_flush,
+ .destroy = msm_submit_sp_destroy,
+};
+
+struct fd_submit *
+msm_submit_sp_new(struct fd_pipe *pipe)
+{
+ struct msm_submit_sp *msm_submit = calloc(1, sizeof(*msm_submit));
+ struct fd_submit *submit;
+ static unsigned submit_cnt = 0;
+
+ msm_submit->seqno = ++submit_cnt;
+ msm_submit->bo_table = _mesa_hash_table_create(NULL,
+ _mesa_hash_pointer, _mesa_key_pointer_equal);
+ // TODO tune size:
+ slab_create(&msm_submit->ring_pool, sizeof(struct msm_ringbuffer_sp), 16);
+
+ submit = &msm_submit->base;
+ submit->pipe = pipe;
+ submit->funcs = &submit_funcs;
+
+ return submit;
+}
+
+
+static void
+finalize_current_cmd(struct fd_ringbuffer *ring)
+{
+ debug_assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
+
+ struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
+ unsigned idx = APPEND(&msm_ring->u, cmds);
+
+ msm_ring->u.cmds[idx].ring_bo = fd_bo_ref(msm_ring->ring_bo);
+ msm_ring->u.cmds[idx].size = offset_bytes(ring->cur, ring->start);
+}
+
+static void
+msm_ringbuffer_sp_grow(struct fd_ringbuffer *ring, uint32_t size)
+{
+ struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
+ struct fd_pipe *pipe = msm_ring->u.submit->pipe;
+
+ debug_assert(ring->flags & FD_RINGBUFFER_GROWABLE);
+
+ finalize_current_cmd(ring);
+
+ fd_bo_del(msm_ring->ring_bo);
+ msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size, 0);
+
+ ring->start = fd_bo_map(msm_ring->ring_bo);
+ ring->end = &(ring->start[size/4]);
+ ring->cur = ring->start;
+ ring->size = size;
+}
+
+static void
+msm_ringbuffer_sp_emit_reloc(struct fd_ringbuffer *ring,
+ const struct fd_reloc *reloc)
+{
+ struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
+ struct fd_pipe *pipe;
+
+ if (ring->flags & _FD_RINGBUFFER_OBJECT) {
+ unsigned idx = APPEND(&msm_ring->u, reloc_bos);
+
+ msm_ring->u.reloc_bos[idx].bo = fd_bo_ref(reloc->bo);
+ msm_ring->u.reloc_bos[idx].flags = reloc->flags;
+
+ pipe = msm_ring->u.pipe;
+ } else {
+ struct msm_submit_sp *msm_submit =
+ to_msm_submit_sp(msm_ring->u.submit);
+
+ append_bo(msm_submit, reloc->bo, reloc->flags);
+
+ pipe = msm_ring->u.submit->pipe;
+ }
+
+ uint64_t iova = fd_bo_get_iova(reloc->bo) + reloc->offset;
+ uint32_t dword = iova;
+ int shift = reloc->shift;
+
+ if (shift < 0)
+ dword >>= -shift;
+ else
+ dword <<= shift;
+
+ (*ring->cur++) = dword | reloc->or;
+
+ if (pipe->gpu_id >= 500) {
+ dword = iova >> 32;
+ shift -= 32;
+
+ if (shift < 0)
+ dword >>= -shift;
+ else
+ dword <<= shift;
+
+ (*ring->cur++) = dword | reloc->orhi;
+ }
+}
+
+static uint32_t
+msm_ringbuffer_sp_emit_reloc_ring(struct fd_ringbuffer *ring,
+ struct fd_ringbuffer *target, uint32_t cmd_idx)
+{
+ struct msm_ringbuffer_sp *msm_target = to_msm_ringbuffer_sp(target);
+ struct fd_bo *bo;
+ uint32_t size;
+
+ if ((target->flags & FD_RINGBUFFER_GROWABLE) &&
+ (cmd_idx < msm_target->u.nr_cmds)) {
+ bo = msm_target->u.cmds[cmd_idx].ring_bo;
+ size = msm_target->u.cmds[cmd_idx].size;
+ } else {
+ bo = msm_target->ring_bo;
+ size = offset_bytes(target->cur, target->start);
+ }
+
+ msm_ringbuffer_sp_emit_reloc(ring, &(struct fd_reloc){
+ .bo = bo,
+ .flags = FD_RELOC_READ,
+ .offset = msm_target->offset,
+ });
+
+ if ((target->flags & _FD_RINGBUFFER_OBJECT) &&
+ !(ring->flags & _FD_RINGBUFFER_OBJECT)) {
+ // TODO it would be nice to know whether we have already
+ // seen this target before. But hopefully we hit the
+ // append_bo() fast path enough for this to not matter:
+ struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
+ struct msm_submit_sp *msm_submit = to_msm_submit_sp(msm_ring->u.submit);
+
+ for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) {
+ append_bo(msm_submit, msm_target->u.reloc_bos[i].bo,
+ msm_target->u.reloc_bos[i].flags);
+ }
+ }
+
+ return size;
+}
+
+static uint32_t
+msm_ringbuffer_sp_cmd_count(struct fd_ringbuffer *ring)
+{
+ if (ring->flags & FD_RINGBUFFER_GROWABLE)
+ return to_msm_ringbuffer_sp(ring)->u.nr_cmds + 1;
+ return 1;
+}
+
+static void
+msm_ringbuffer_sp_destroy(struct fd_ringbuffer *ring)
+{
+ struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
+
+ fd_bo_del(msm_ring->ring_bo);
+
+ if (ring->flags & _FD_RINGBUFFER_OBJECT) {
+ for (unsigned i = 0; i < msm_ring->u.nr_reloc_bos; i++) {
+ fd_bo_del(msm_ring->u.reloc_bos[i].bo);
+ }
+
+ free(msm_ring);
+ } else {
+ struct fd_submit *submit = msm_ring->u.submit;
+
+ for (unsigned i = 0; i < msm_ring->u.nr_cmds; i++) {
+ fd_bo_del(msm_ring->u.cmds[i].ring_bo);
+ }
+
+ slab_free_st(&to_msm_submit_sp(submit)->ring_pool, msm_ring);
+ }
+}
+
+static const struct fd_ringbuffer_funcs ring_funcs = {
+ .grow = msm_ringbuffer_sp_grow,
+ .emit_reloc = msm_ringbuffer_sp_emit_reloc,
+ .emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring,
+ .cmd_count = msm_ringbuffer_sp_cmd_count,
+ .destroy = msm_ringbuffer_sp_destroy,
+};
+
+static inline struct fd_ringbuffer *
+msm_ringbuffer_sp_init(struct msm_ringbuffer_sp *msm_ring, uint32_t size,
+ enum fd_ringbuffer_flags flags)
+{
+ struct fd_ringbuffer *ring = &msm_ring->base;
+
+ debug_assert(msm_ring->ring_bo);
+
+ uint8_t *base = fd_bo_map(msm_ring->ring_bo);
+ ring->start = (void *)(base + msm_ring->offset);
+ ring->end = &(ring->start[size/4]);
+ ring->cur = ring->start;
+
+ ring->size = size;
+ ring->flags = flags;
+
+ ring->funcs = &ring_funcs;
+
+ // TODO initializing these could probably be conditional on flags
+ // since unneed for FD_RINGBUFFER_STAGING case..
+ msm_ring->u.cmds = NULL;
+ msm_ring->u.nr_cmds = msm_ring->u.max_cmds = 0;
+
+ msm_ring->u.reloc_bos = NULL;
+ msm_ring->u.nr_reloc_bos = msm_ring->u.max_reloc_bos = 0;
+
+ return ring;
+}
+
+struct fd_ringbuffer *
+msm_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size)
+{
+ struct msm_ringbuffer_sp *msm_ring = malloc(sizeof(*msm_ring));
+
+ msm_ring->u.pipe = pipe;
+ msm_ring->offset = 0;
+ msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size, 0);
+ msm_ring->base.refcnt = 1;
+
+ return msm_ringbuffer_sp_init(msm_ring, size, _FD_RINGBUFFER_OBJECT);
+}