diff options
author | Rob Clark <[email protected]> | 2018-10-21 10:22:11 -0400 |
---|---|---|
committer | Rob Clark <[email protected]> | 2018-10-26 18:10:00 -0400 |
commit | f3cc0d2747568a186dba433ac94af607c38fa024 (patch) | |
tree | a6db86fd1cfbb044180d3aba07e047714e6b9f1d /src/gallium/drivers/freedreno/a6xx | |
parent | aa02d7e8781c25ee18b6da97606300808c84973a (diff) |
freedreno: import libdrm_freedreno + redesign submit
In the pursuit of lowering driver overhead, it became clear that some
amount of redesign of how libdrm_freedreno constructs the submit ioctl
would be needed. In particular, as the gallium driver is starting to
make heavier use of CP_SET_DRAW_STATE state groups/objects, the over-
head of tracking cmd buffers and relocs becomes too much. And for
"streaming" state, which isn't ever reused (like uniform uploads) the
overhead of allocating/freeing ringbuffer[1] objects is too high.
This redesign makes two main changes:
1) Introduces a fd_submit object for tracking bos and cmds table
for the submit ioctl, making ringbuffer objects more light-
weight. This was previously done in the ringbuffer. But we
have many ringbuffer instances involved in a submit (gmem +
draw + potentially 1000's of state-group rbs), and only need
a single bos and cmds table. (Reloc table is still per-rb)
The submit is also a convenient place for a slab allocator for
ringbuffer objects. Other options would have required locking
because, while we can guarantee allocations will only happen on
a single thread, free's could happen either on the application
thread or the flush_queue thread. With the slab allocator in
the submit object, any frees that happen on the flush_queue
thread happen after we know that the application thread is done
with the submit.
2) Introduce a new "softpin" msm_ringbuffer_sp implementation that
does not use relocs and only has cmds table entries for IB1 (ie.
the cmdstream buffers that kernel needs to CP_INDIRECT_BUFFER
to from the RB). To do this properly will require some updates
on the kernel side, so whether you get the softpin or legacy
submit/ringbuffer implementation at runtime depends on your
kernel version.
To make all these changes in libdrm would basically require adding a
libdrm_freedreno2, so this is a good point to just pull the libdrm code
into mesa. Plus it allows for using mesa's hashtable, slab allocator,
etc. And it lets us have asserts enabled for debug mesa buids but
omitted for release builds. And it makes life easier if further API
changes become necessary.
At this point I haven't tried to pull in the kgsl backend. Although
I left the level of vfunc indirection which would make it possible
to have other backends. (And this was convenient to keep to allow
for the "softpin" ringbuffer to coexist.)
NOTE: if bisecting a build error takes you here, try a clean build.
There are a bunch of ways things can go wrong if you still have
libdrm_freedreno cflags.
[1] "ringbuffer" is probably a bad name, the only level of cmdstream
buffer that is actually a ring is RB managed by kernel. User-
space cmdstream is all IB1/IB2 and state-groups.
Reviewed-by: Kristian H. Kristensen <[email protected]>
Reviewed-by: Eric Engestrom <[email protected]>
Signed-off-by: Rob Clark <[email protected]>
Diffstat (limited to 'src/gallium/drivers/freedreno/a6xx')
-rw-r--r-- | src/gallium/drivers/freedreno/a6xx/fd6_context.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a6xx/fd6_draw.c | 3 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a6xx/fd6_emit.c | 26 |
3 files changed, 11 insertions, 20 deletions
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_context.h b/src/gallium/drivers/freedreno/a6xx/fd6_context.h index 43a1b1837c4..f3cdd44dec4 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_context.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_context.h @@ -30,8 +30,6 @@ #include "util/u_upload_mgr.h" -#include "freedreno_drmif.h" - #include "freedreno_context.h" #include "ir3_shader.h" diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c index c0670d3a11c..9ccb03990f7 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c @@ -297,8 +297,7 @@ fd6_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth) // draw if (!batch->lrz_clear) { - batch->lrz_clear = fd_ringbuffer_new(batch->ctx->pipe, 0x1000); - fd_ringbuffer_set_parent(batch->lrz_clear, batch->gmem); + batch->lrz_clear = fd_submit_new_ringbuffer(batch->submit, 0x1000, 0); } ring = batch->lrz_clear; diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c index 8c3336d5ea6..001d69bf1c9 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c @@ -359,8 +359,7 @@ fd6_emit_textures(struct fd_pipe *pipe, struct fd_ringbuffer *ring, if (tex->num_samplers > 0) { struct fd_ringbuffer *state = - fd_ringbuffer_new_flags(pipe, tex->num_samplers * 4 * 4, - FD_RINGBUFFER_OBJECT); + fd_ringbuffer_new_object(pipe, tex->num_samplers * 4 * 4); for (unsigned i = 0; i < tex->num_samplers; i++) { static const struct fd6_sampler_stateobj dummy_sampler = {}; const struct fd6_sampler_stateobj *sampler = tex->samplers[i] ? @@ -390,8 +389,7 @@ fd6_emit_textures(struct fd_pipe *pipe, struct fd_ringbuffer *ring, if (tex->num_textures > 0) { struct fd_ringbuffer *state = - fd_ringbuffer_new_flags(pipe, tex->num_textures * 16 * 4, - FD_RINGBUFFER_OBJECT); + fd_ringbuffer_new_object(pipe, tex->num_textures * 16 * 4); for (unsigned i = 0; i < tex->num_textures; i++) { static const struct fd6_pipe_sampler_view dummy_view = {}; const struct fd6_pipe_sampler_view *view = tex->textures[i] ? @@ -534,9 +532,8 @@ fd6_build_vbo_state(struct fd6_emit *emit, const struct ir3_shader_variant *vp) const struct fd_vertex_state *vtx = emit->vtx; int32_t i, j; - struct fd_ringbuffer *ring = - fd_ringbuffer_new_flags(emit->ctx->pipe, 4 * (10 * vp->inputs_count + 2), - FD_RINGBUFFER_OBJECT | FD_RINGBUFFER_STREAMING); + struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(emit->ctx->batch->submit, + 4 * (10 * vp->inputs_count + 2), FD_RINGBUFFER_STREAMING); for (i = 0, j = 0; i <= vp->inputs_count; i++) { if (vp->inputs[i].sysval) @@ -597,9 +594,8 @@ build_zsa(struct fd6_emit *emit, bool binning_pass) uint32_t gras_lrz_cntl = zsa->gras_lrz_cntl; uint32_t rb_lrz_cntl = zsa->rb_lrz_cntl; - struct fd_ringbuffer *ring = - fd_ringbuffer_new_flags(emit->ctx->pipe, 16, - FD_RINGBUFFER_OBJECT | FD_RINGBUFFER_STREAMING); + struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(emit->ctx->batch->submit, + 16, FD_RINGBUFFER_STREAMING); if (emit->no_lrz_write || !rsc->lrz || !rsc->lrz_valid) { gras_lrz_cntl = 0; @@ -786,9 +782,8 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) FD_DIRTY_SHADER_SSBO | FD_DIRTY_SHADER_IMAGE) if (ctx->dirty_shader[PIPE_SHADER_VERTEX] & DIRTY_CONST) { - struct fd_ringbuffer *vsconstobj = - fd_ringbuffer_new_flags(ctx->pipe, 0x1000, - FD_RINGBUFFER_OBJECT | FD_RINGBUFFER_STREAMING); + struct fd_ringbuffer *vsconstobj = fd_submit_new_ringbuffer( + ctx->batch->submit, 0x1000, FD_RINGBUFFER_STREAMING); ir3_emit_vs_consts(vp, vsconstobj, ctx, emit->info); fd6_emit_add_group(emit, vsconstobj, FD6_GROUP_VS_CONST, 0x7); @@ -796,9 +791,8 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) } if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & DIRTY_CONST) { - struct fd_ringbuffer *fsconstobj = - fd_ringbuffer_new_flags(ctx->pipe, 0x1000, - FD_RINGBUFFER_OBJECT | FD_RINGBUFFER_STREAMING); + struct fd_ringbuffer *fsconstobj = fd_submit_new_ringbuffer( + ctx->batch->submit, 0x1000, FD_RINGBUFFER_STREAMING); ir3_emit_fs_consts(fp, fsconstobj, ctx); fd6_emit_add_group(emit, fsconstobj, FD6_GROUP_FS_CONST, 0x6); |