diff options
-rw-r--r-- | src/freedreno/drm/freedreno_ringbuffer.h | 33 | ||||
-rw-r--r-- | src/freedreno/drm/msm_ringbuffer.c | 36 | ||||
-rw-r--r-- | src/freedreno/drm/msm_ringbuffer_sp.c | 49 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a6xx/fd6_pack.h | 1 |
4 files changed, 39 insertions, 80 deletions
diff --git a/src/freedreno/drm/freedreno_ringbuffer.h b/src/freedreno/drm/freedreno_ringbuffer.h index 6cfbf1a69ef..a5dec1462c2 100644 --- a/src/freedreno/drm/freedreno_ringbuffer.h +++ b/src/freedreno/drm/freedreno_ringbuffer.h @@ -165,14 +165,21 @@ struct fd_reloc { #define FD_RELOC_READ 0x0001 #define FD_RELOC_WRITE 0x0002 #define FD_RELOC_DUMP 0x0004 - uint32_t flags; uint32_t offset; uint32_t or; int32_t shift; uint32_t orhi; /* used for a5xx+ */ }; -#define FD_RELOC_FLAGS_INIT FD_RELOC_READ +/* We always mark BOs for write, instead of tracking it across reloc + * sources in userspace. On the kernel side, this means we track a single + * excl fence in the BO instead of a set of read fences, which is cheaper. + * The downside is that a dmabuf-shared device won't be able to read in + * parallel with a read-only access by freedreno, but most other drivers + * have decided that that usecase isn't important enough to do this + * tracking, as well. + */ +#define FD_RELOC_FLAGS_INIT (FD_RELOC_READ | FD_RELOC_WRITE) /* NOTE: relocs are 2 dwords on a5xx+ */ @@ -228,12 +235,11 @@ OUT_RING(struct fd_ringbuffer *ring, uint32_t data) } /* - * NOTE: OUT_RELOC*() is 2 dwords (64b) on a5xx+ + * NOTE: OUT_RELOC() is 2 dwords (64b) on a5xx+ */ - static inline void -__out_reloc(struct fd_ringbuffer *ring, struct fd_bo *bo, - uint32_t offset, uint64_t or, int32_t shift, uint32_t flags) +OUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo, + uint32_t offset, uint64_t or, int32_t shift) { if (LOG_DWORDS) { fprintf(stderr, "ring[%p]: OUT_RELOC %04x: %p+%u << %d", ring, @@ -242,7 +248,6 @@ __out_reloc(struct fd_ringbuffer *ring, struct fd_bo *bo, debug_assert(offset < fd_bo_size(bo)); fd_ringbuffer_reloc(ring, &(struct fd_reloc){ .bo = bo, - .flags = flags, .offset = offset, .or = or, .shift = shift, @@ -250,19 +255,7 @@ __out_reloc(struct fd_ringbuffer *ring, struct fd_bo *bo, }); } -static inline void -OUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo, - uint32_t offset, uint64_t or, int32_t shift) -{ - __out_reloc(ring, bo, offset, or, shift, 0); -} - -static inline void -OUT_RELOCW(struct fd_ringbuffer *ring, struct fd_bo *bo, - uint32_t offset, uint64_t or, int32_t shift) -{ - __out_reloc(ring, bo, offset, or, shift, FD_RELOC_WRITE); -} +#define OUT_RELOCW OUT_RELOC static inline void OUT_RB(struct fd_ringbuffer *ring, struct fd_ringbuffer *target) diff --git a/src/freedreno/drm/msm_ringbuffer.c b/src/freedreno/drm/msm_ringbuffer.c index 8a15c559bb6..2ff4a3dfc56 100644 --- a/src/freedreno/drm/msm_ringbuffer.c +++ b/src/freedreno/drm/msm_ringbuffer.c @@ -97,15 +97,6 @@ cmd_free(struct msm_cmd *cmd) free(cmd); } -/* for _FD_RINGBUFFER_OBJECT rb's we need to track the bo's and flags to - * later copy into the submit when the stateobj rb is later referenced by - * a regular rb: - */ -struct msm_reloc_bo { - struct fd_bo *bo; - unsigned flags; -}; - struct msm_ringbuffer { struct fd_ringbuffer base; @@ -116,7 +107,7 @@ struct msm_ringbuffer { /* for _FD_RINGBUFFER_OBJECT case: */ struct { struct fd_pipe *pipe; - DECLARE_ARRAY(struct msm_reloc_bo, reloc_bos); + DECLARE_ARRAY(struct fd_bo *, reloc_bos); struct set *ring_set; }; /* for other cases: */ @@ -138,7 +129,7 @@ static struct fd_ringbuffer * msm_ringbuffer_init( /* add (if needed) bo to submit and return index: */ static uint32_t -append_bo(struct msm_submit *submit, struct fd_bo *bo, uint32_t flags) +append_bo(struct msm_submit *submit, struct fd_bo *bo) { struct msm_bo *msm_bo = to_msm_bo(bo); uint32_t idx; @@ -174,9 +165,6 @@ append_bo(struct msm_submit *submit, struct fd_bo *bo, uint32_t flags) msm_bo->idx = idx; } - if (flags & FD_RELOC_WRITE) - submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_WRITE; - return idx; } @@ -278,14 +266,10 @@ handle_stateobj_relocs(struct msm_submit *submit, struct msm_ringbuffer *ring) for (unsigned i = 0; i < cmd->nr_relocs; i++) { unsigned idx = cmd->relocs[i].reloc_idx; - struct fd_bo *bo = ring->u.reloc_bos[idx].bo; - unsigned flags = 0; - - if (ring->u.reloc_bos[idx].flags & MSM_SUBMIT_BO_WRITE) - flags |= FD_RELOC_WRITE; + struct fd_bo *bo = ring->u.reloc_bos[idx]; relocs[i] = cmd->relocs[i]; - relocs[i].reloc_idx = append_bo(submit, bo, flags); + relocs[i].reloc_idx = append_bo(submit, bo); } return relocs; @@ -343,7 +327,7 @@ msm_submit_flush(struct fd_submit *submit, int in_fence_fd, cmds[i].type = MSM_SUBMIT_CMD_IB_TARGET_BUF; cmds[i].submit_idx = - append_bo(msm_submit, msm_ring->ring_bo, 0); + append_bo(msm_submit, msm_ring->ring_bo); cmds[i].submit_offset = msm_ring->offset; cmds[i].size = offset_bytes(ring->cur, ring->start); cmds[i].pad = 0; @@ -359,7 +343,7 @@ msm_submit_flush(struct fd_submit *submit, int in_fence_fd, cmds[i].type = MSM_SUBMIT_CMD_IB_TARGET_BUF; } cmds[i].submit_idx = append_bo(msm_submit, - msm_ring->u.cmds[j]->ring_bo, 0); + msm_ring->u.cmds[j]->ring_bo); cmds[i].submit_offset = msm_ring->offset; cmds[i].size = msm_ring->u.cmds[j]->size; cmds[i].pad = 0; @@ -518,8 +502,7 @@ msm_ringbuffer_emit_reloc(struct fd_ringbuffer *ring, if (ring->flags & _FD_RINGBUFFER_OBJECT) { unsigned idx = APPEND(&msm_ring->u, reloc_bos); - msm_ring->u.reloc_bos[idx].bo = fd_bo_ref(reloc->bo); - msm_ring->u.reloc_bos[idx].flags = reloc->flags; + msm_ring->u.reloc_bos[idx] = fd_bo_ref(reloc->bo); /* this gets fixed up at submit->flush() time, since this state- * object rb can be used with many different submits @@ -531,7 +514,7 @@ msm_ringbuffer_emit_reloc(struct fd_ringbuffer *ring, struct msm_submit *msm_submit = to_msm_submit(msm_ring->u.submit); - reloc_idx = append_bo(msm_submit, reloc->bo, reloc->flags); + reloc_idx = append_bo(msm_submit, reloc->bo); pipe = msm_ring->u.submit->pipe; } @@ -603,7 +586,6 @@ msm_ringbuffer_emit_reloc_ring(struct fd_ringbuffer *ring, msm_ringbuffer_emit_reloc(ring, &(struct fd_reloc){ .bo = bo, - .flags = 0, .offset = msm_target->offset, }); @@ -646,7 +628,7 @@ msm_ringbuffer_destroy(struct fd_ringbuffer *ring) if (ring->flags & _FD_RINGBUFFER_OBJECT) { for (unsigned i = 0; i < msm_ring->u.nr_reloc_bos; i++) { - fd_bo_del(msm_ring->u.reloc_bos[i].bo); + fd_bo_del(msm_ring->u.reloc_bos[i]); } _mesa_set_destroy(msm_ring->u.ring_set, unref_rings); diff --git a/src/freedreno/drm/msm_ringbuffer_sp.c b/src/freedreno/drm/msm_ringbuffer_sp.c index 65bfb2bde80..6bab90b190a 100644 --- a/src/freedreno/drm/msm_ringbuffer_sp.c +++ b/src/freedreno/drm/msm_ringbuffer_sp.c @@ -74,29 +74,19 @@ struct msm_cmd_sp { unsigned size; }; -/* for _FD_RINGBUFFER_OBJECT rb's we need to track the bo's and flags to - * later copy into the submit when the stateobj rb is later referenced by - * a regular rb: - */ -struct msm_reloc_bo_sp { - struct fd_bo *bo; - unsigned flags; -}; - struct msm_ringbuffer_sp { struct fd_ringbuffer base; /* for FD_RINGBUFFER_STREAMING rb's which are sub-allocated */ unsigned offset; -// TODO check disasm.. hopefully compilers CSE can realize that -// reloc_bos and cmds are at the same offsets and optimize some -// divergent cases into single case union { - /* for _FD_RINGBUFFER_OBJECT case: */ + /* for _FD_RINGBUFFER_OBJECT case, the array of BOs referenced from + * this one + */ struct { struct fd_pipe *pipe; - DECLARE_ARRAY(struct msm_reloc_bo_sp, reloc_bos); + DECLARE_ARRAY(struct fd_bo *, reloc_bos); }; /* for other cases: */ struct { @@ -116,7 +106,7 @@ static struct fd_ringbuffer * msm_ringbuffer_sp_init( /* add (if needed) bo to submit and return index: */ static uint32_t -msm_submit_append_bo(struct msm_submit_sp *submit, struct fd_bo *bo, uint32_t flags) +msm_submit_append_bo(struct msm_submit_sp *submit, struct fd_bo *bo) { struct msm_bo *msm_bo = to_msm_bo(bo); uint32_t idx; @@ -152,12 +142,6 @@ msm_submit_append_bo(struct msm_submit_sp *submit, struct fd_bo *bo, uint32_t fl msm_bo->idx = idx; } - STATIC_ASSERT(FD_RELOC_READ == MSM_SUBMIT_BO_READ); - STATIC_ASSERT(FD_RELOC_WRITE == MSM_SUBMIT_BO_WRITE); - STATIC_ASSERT(FD_RELOC_DUMP == MSM_SUBMIT_BO_DUMP); - submit->submit_bos[idx].flags |= - flags & (MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE | MSM_SUBMIT_BO_DUMP); - return idx; } @@ -259,7 +243,7 @@ msm_submit_sp_flush(struct fd_submit *submit, int in_fence_fd, for (unsigned i = 0; i < primary->u.nr_cmds; i++) { cmds[i].type = MSM_SUBMIT_CMD_BUF; cmds[i].submit_idx = msm_submit_append_bo(msm_submit, - primary->u.cmds[i].ring_bo, 0); + primary->u.cmds[i].ring_bo); cmds[i].submit_offset = primary->offset; cmds[i].size = primary->u.cmds[i].size; cmds[i].pad = 0; @@ -403,15 +387,14 @@ msm_ringbuffer_sp_emit_reloc(struct fd_ringbuffer *ring, if (ring->flags & _FD_RINGBUFFER_OBJECT) { unsigned idx = APPEND(&msm_ring->u, reloc_bos); - msm_ring->u.reloc_bos[idx].bo = fd_bo_ref(reloc->bo); - msm_ring->u.reloc_bos[idx].flags = reloc->flags; + msm_ring->u.reloc_bos[idx] = fd_bo_ref(reloc->bo); pipe = msm_ring->u.pipe; } else { struct msm_submit_sp *msm_submit = to_msm_submit_sp(msm_ring->u.submit); - msm_submit_append_bo(msm_submit, reloc->bo, reloc->flags); + msm_submit_append_bo(msm_submit, reloc->bo); pipe = msm_ring->u.submit->pipe; } @@ -465,10 +448,8 @@ msm_ringbuffer_sp_emit_reloc_ring(struct fd_ringbuffer *ring, for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) { unsigned idx = APPEND(&msm_ring->u, reloc_bos); - msm_ring->u.reloc_bos[idx].bo = - fd_bo_ref(msm_target->u.reloc_bos[i].bo); - msm_ring->u.reloc_bos[idx].flags = - msm_target->u.reloc_bos[i].flags; + msm_ring->u.reloc_bos[idx] = + fd_bo_ref(msm_target->u.reloc_bos[i]); } } else { // TODO it would be nice to know whether we have already @@ -477,8 +458,7 @@ msm_ringbuffer_sp_emit_reloc_ring(struct fd_ringbuffer *ring, struct msm_submit_sp *msm_submit = to_msm_submit_sp(msm_ring->u.submit); for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) { - msm_submit_append_bo(msm_submit, msm_target->u.reloc_bos[i].bo, - msm_target->u.reloc_bos[i].flags); + msm_submit_append_bo(msm_submit, msm_target->u.reloc_bos[i]); } } @@ -502,7 +482,7 @@ msm_ringbuffer_sp_destroy(struct fd_ringbuffer *ring) if (ring->flags & _FD_RINGBUFFER_OBJECT) { for (unsigned i = 0; i < msm_ring->u.nr_reloc_bos; i++) { - fd_bo_del(msm_ring->u.reloc_bos[i].bo); + fd_bo_del(msm_ring->u.reloc_bos[i]); } free(msm_ring->u.reloc_bos); @@ -533,6 +513,11 @@ msm_ringbuffer_sp_init(struct msm_ringbuffer_sp *msm_ring, uint32_t size, { struct fd_ringbuffer *ring = &msm_ring->base; + /* We don't do any translation from internal FD_RELOC flags to MSM flags. */ + STATIC_ASSERT(FD_RELOC_READ == MSM_SUBMIT_BO_READ); + STATIC_ASSERT(FD_RELOC_WRITE == MSM_SUBMIT_BO_WRITE); + STATIC_ASSERT(FD_RELOC_DUMP == MSM_SUBMIT_BO_DUMP); + debug_assert(msm_ring->ring_bo); uint8_t *base = fd_bo_map(msm_ring->ring_bo); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_pack.h b/src/gallium/drivers/freedreno/a6xx/fd6_pack.h index ed1c0003849..6b71c10f4e0 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_pack.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_pack.h @@ -58,7 +58,6 @@ struct fd_reg_pair { if (regs[i].bo) { \ struct fd_reloc reloc = { \ .bo = regs[i].bo, \ - .flags = (regs[i].bo_write ? FD_RELOC_WRITE : 0), \ .offset = regs[i].bo_offset, \ .or = regs[i].value, \ .shift = regs[i].bo_shift, \ |