diff options
author | Eric Anholt <[email protected]> | 2016-09-08 12:56:11 -0700 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2016-09-14 06:25:41 +0100 |
commit | f597ac3966405934e13a9aaa18c73211b5a40c7c (patch) | |
tree | f7f09bead790e4cdd210e1ed0ca4c4b150dbe35e /src/gallium/drivers/vc4 | |
parent | f473348468ae1c68e7ef8eaf29f2cc51d17fbec7 (diff) |
vc4: Implement job shuffling
Track rendering to each FBO independently and flush rendering only when
necessary. This lets us avoid the overhead of storing and loading the
frame when an application momentarily switches to rendering to some other
texture in order to continue rendering the main scene.
Improves glmark -b desktop:effect=shadow:windows=4 by 27%
Improves glmark -b
desktop:blur-radius=5:effect=blur:passes=1:separable=true:windows=4
by 17%
While I haven't tested other apps, this should help X rendering a lot, and
I've heard GLBenchmark needed it too.
Diffstat (limited to 'src/gallium/drivers/vc4')
-rw-r--r-- | src/gallium/drivers/vc4/vc4_blit.c | 26 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_context.c | 101 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_context.h | 39 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_draw.c | 22 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_job.c | 276 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_resource.c | 17 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_simulator.c | 10 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_state.c | 36 |
8 files changed, 333 insertions, 194 deletions
diff --git a/src/gallium/drivers/vc4/vc4_blit.c b/src/gallium/drivers/vc4/vc4_blit.c index d3fc8e922ad..1e056568acb 100644 --- a/src/gallium/drivers/vc4/vc4_blit.c +++ b/src/gallium/drivers/vc4/vc4_blit.c @@ -51,10 +51,6 @@ static bool vc4_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info) { struct vc4_context *vc4 = vc4_context(pctx); - struct vc4_job *job = vc4->job; - bool old_msaa = job->msaa; - int old_tile_width = job->tile_width; - int old_tile_height = job->tile_height; bool msaa = (info->src.resource->nr_samples > 1 || info->dst.resource->nr_samples > 1); int tile_width = msaa ? 32 : 64; @@ -115,8 +111,6 @@ vc4_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info) if (info->dst.resource->format != info->src.resource->format) return false; - vc4_flush(pctx); - if (false) { fprintf(stderr, "RCL blit from %d,%d to %d,%d (%d,%d)\n", info->src.box.x, @@ -132,11 +126,19 @@ vc4_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info) struct pipe_surface *src_surf = vc4_get_blit_surface(pctx, info->src.resource, info->src.level); + vc4_flush_jobs_reading_resource(vc4, info->src.resource); + + struct vc4_job *job = vc4_get_job(vc4, dst_surf, NULL); pipe_surface_reference(&job->color_read, src_surf); - if (dst_surf->texture->nr_samples > 1) - pipe_surface_reference(&job->color_write, dst_surf); - else - pipe_surface_reference(&job->msaa_color_write, dst_surf); + + /* If we're resolving from MSAA to single sample, we still need to run + * the engine in MSAA mode for the load. + */ + if (!job->msaa && info->src.resource->nr_samples > 1) { + job->msaa = true; + job->tile_width = 32; + job->tile_height = 32; + } job->draw_min_x = info->dst.box.x; job->draw_min_y = info->dst.box.y; @@ -153,10 +155,6 @@ vc4_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info) vc4_job_submit(vc4, job); - job->msaa = old_msaa; - job->tile_width = old_tile_width; - job->tile_height = old_tile_height; - pipe_surface_reference(&dst_surf, NULL); pipe_surface_reference(&src_surf, NULL); diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c index 37b002d5cba..3863e4432a9 100644 --- a/src/gallium/drivers/vc4/vc4_context.c +++ b/src/gallium/drivers/vc4/vc4_context.c @@ -41,38 +41,12 @@ void vc4_flush(struct pipe_context *pctx) { struct vc4_context *vc4 = vc4_context(pctx); - struct pipe_surface *cbuf = vc4->framebuffer.cbufs[0]; - struct pipe_surface *zsbuf = vc4->framebuffer.zsbuf; - struct vc4_job *job = vc4->job; - - if (cbuf && (job->resolve & PIPE_CLEAR_COLOR0)) { - if (cbuf->texture->nr_samples > 1) { - pipe_surface_reference(&job->msaa_color_write, cbuf); - } else { - pipe_surface_reference(&job->color_write, cbuf); - } - - pipe_surface_reference(&job->color_read, cbuf); - } - - if (zsbuf && (job->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) { - if (zsbuf->texture->nr_samples > 1) { - pipe_surface_reference(&job->msaa_zs_write, zsbuf); - } else { - pipe_surface_reference(&job->zs_write, zsbuf); - } - pipe_surface_reference(&job->zs_read, zsbuf); + struct hash_entry *entry; + hash_table_foreach(vc4->jobs, entry) { + struct vc4_job *job = entry->data; + vc4_job_submit(vc4, job); } - - vc4_job_submit(vc4, job); - - /* We have no hardware context saved between our draw calls, so we - * need to flag the next draw as needing all state emitted. Emitting - * all state at the start of our draws is also what ensures that we - * return to the state we need after a previous tile has finished. - */ - vc4->dirty = ~0; } static void @@ -92,64 +66,18 @@ vc4_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence, } } -/** - * Flushes the current command lists if they reference the given BO. - * - * This helps avoid flushing the command buffers when unnecessary. - */ -bool -vc4_cl_references_bo(struct pipe_context *pctx, struct vc4_bo *bo, - bool include_reads) -{ - struct vc4_context *vc4 = vc4_context(pctx); - struct vc4_job *job = vc4->job; - - if (!job->needs_flush) - return false; - - /* Walk all the referenced BOs in the drawing command list to see if - * they match. - */ - if (include_reads) { - struct vc4_bo **referenced_bos = job->bo_pointers.base; - for (int i = 0; i < cl_offset(&job->bo_handles) / 4; i++) { - if (referenced_bos[i] == bo) { - return true; - } - } - } - - /* Also check for the Z/color buffers, since the references to those - * are only added immediately before submit. - */ - struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]); - if (csurf) { - struct vc4_resource *ctex = vc4_resource(csurf->base.texture); - if (ctex->bo == bo) { - return true; - } - } - - struct vc4_surface *zsurf = vc4_surface(vc4->framebuffer.zsbuf); - if (zsurf) { - struct vc4_resource *ztex = - vc4_resource(zsurf->base.texture); - if (ztex->bo == bo) { - return true; - } - } - - return false; -} - static void vc4_invalidate_resource(struct pipe_context *pctx, struct pipe_resource *prsc) { struct vc4_context *vc4 = vc4_context(pctx); - struct pipe_surface *zsurf = vc4->framebuffer.zsbuf; - - if (zsurf && zsurf->texture == prsc) - vc4->job->resolve &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL); + struct hash_entry *entry = _mesa_hash_table_search(vc4->write_jobs, + prsc); + if (!entry) + return; + + struct vc4_job *job = entry->data; + if (job->key.zsbuf && job->key.zsbuf->texture == prsc) + job->resolve &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL); } static void @@ -157,6 +85,8 @@ vc4_context_destroy(struct pipe_context *pctx) { struct vc4_context *vc4 = vc4_context(pctx); + vc4_flush(pctx); + if (vc4->blitter) util_blitter_destroy(vc4->blitter); @@ -205,8 +135,7 @@ vc4_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) vc4_query_init(pctx); vc4_resource_context_init(pctx); - vc4->job = rzalloc(vc4, struct vc4_job); - vc4_job_init(vc4->job); + vc4_job_init(vc4); vc4->fd = screen->fd; diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index 38dc3a5998c..87d8c79241b 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -190,6 +190,12 @@ struct vc4_vertex_stateobj { unsigned num_elements; }; +/* Hash table key for vc4->jobs */ +struct vc4_job_key { + struct pipe_surface *cbuf; + struct pipe_surface *zsbuf; +}; + /** * A complete bin/render job. * @@ -266,6 +272,8 @@ struct vc4_job { * the current job. */ uint32_t draw_calls_queued; + + struct vc4_job_key key; }; struct vc4_context { @@ -274,9 +282,21 @@ struct vc4_context { int fd; struct vc4_screen *screen; - /** The render job for the currently bound FBO. */ + /** The 3D rendering job for the currently bound FBO. */ struct vc4_job *job; + /* Map from struct vc4_job_key to the job for that FBO. + */ + struct hash_table *jobs; + + /** + * Map from vc4_resource to a job writing to that resource. + * + * Primarily for flushing jobs rendering to textures that are now + * being read from. + */ + struct hash_table *write_jobs; + struct slab_mempool transfer_pool; struct blitter_context *blitter; @@ -404,7 +424,8 @@ void vc4_program_fini(struct pipe_context *pctx); void vc4_query_init(struct pipe_context *pctx); void vc4_simulator_init(struct vc4_screen *screen); int vc4_simulator_flush(struct vc4_context *vc4, - struct drm_vc4_submit_cl *args); + struct drm_vc4_submit_cl *args, + struct vc4_job *job); void vc4_set_shader_uniform_dirty_flags(struct vc4_compiled_shader *shader); void vc4_write_uniforms(struct vc4_context *vc4, @@ -413,11 +434,17 @@ void vc4_write_uniforms(struct vc4_context *vc4, struct vc4_texture_stateobj *texstate); void vc4_flush(struct pipe_context *pctx); -void vc4_job_init(struct vc4_job *job); +void vc4_job_init(struct vc4_context *vc4); +struct vc4_job *vc4_get_job(struct vc4_context *vc4, + struct pipe_surface *cbuf, + struct pipe_surface *zsbuf); +struct vc4_job *vc4_get_job_for_fbo(struct vc4_context *vc4); + void vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job); -void vc4_job_reset(struct vc4_job *job); -bool vc4_cl_references_bo(struct pipe_context *pctx, struct vc4_bo *bo, - bool include_reads); +void vc4_flush_jobs_writing_resource(struct vc4_context *vc4, + struct pipe_resource *prsc); +void vc4_flush_jobs_reading_resource(struct vc4_context *vc4, + struct pipe_resource *prsc); void vc4_emit_state(struct pipe_context *pctx); void vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c); struct qpu_reg *vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c); diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c index 52a53db9a29..bbdb02010f6 100644 --- a/src/gallium/drivers/vc4/vc4_draw.c +++ b/src/gallium/drivers/vc4/vc4_draw.c @@ -116,9 +116,11 @@ vc4_start_draw(struct vc4_context *vc4, int vert_count) } static void -vc4_update_shadow_textures(struct pipe_context *pctx, +vc4_predraw_check_textures(struct pipe_context *pctx, struct vc4_texture_stateobj *stage_tex) { + struct vc4_context *vc4 = vc4_context(pctx); + for (int i = 0; i < stage_tex->num_textures; i++) { struct pipe_sampler_view *view = stage_tex->textures[i]; if (!view) @@ -126,6 +128,8 @@ vc4_update_shadow_textures(struct pipe_context *pctx, struct vc4_resource *rsc = vc4_resource(view->texture); if (rsc->shadow_parent) vc4_update_shadow_baselevel_texture(pctx, view); + + vc4_flush_jobs_writing_resource(vc4, view->texture); } } @@ -263,12 +267,12 @@ static void vc4_hw_2116_workaround(struct pipe_context *pctx) { struct vc4_context *vc4 = vc4_context(pctx); - struct vc4_job *job = vc4->job; + struct vc4_job *job = vc4_get_job_for_fbo(vc4); if (job->draw_calls_queued == 0x1ef0) { perf_debug("Flushing batch due to HW-2116 workaround " "(too many draw calls per scene\n"); - vc4_flush(pctx); + vc4_job_submit(vc4, job); } } @@ -276,7 +280,6 @@ static void vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) { struct vc4_context *vc4 = vc4_context(pctx); - struct vc4_job *job = vc4->job; if (info->mode >= PIPE_PRIM_QUADS) { util_primconvert_save_index_buffer(vc4->primconvert, &vc4->indexbuf); @@ -288,11 +291,13 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) } /* Before setting up the draw, do any fixup blits necessary. */ - vc4_update_shadow_textures(pctx, &vc4->verttex); - vc4_update_shadow_textures(pctx, &vc4->fragtex); + vc4_predraw_check_textures(pctx, &vc4->verttex); + vc4_predraw_check_textures(pctx, &vc4->fragtex); vc4_hw_2116_workaround(pctx); + struct vc4_job *job = vc4_get_job_for_fbo(vc4); + vc4_get_draw_cl_space(job, info->count); if (vc4->prim_mode != info->mode) { @@ -466,14 +471,15 @@ vc4_clear(struct pipe_context *pctx, unsigned buffers, const union pipe_color_union *color, double depth, unsigned stencil) { struct vc4_context *vc4 = vc4_context(pctx); - struct vc4_job *job = vc4->job; + struct vc4_job *job = vc4_get_job_for_fbo(vc4); /* We can't flag new buffers for clearing once we've queued draws. We * could avoid this by using the 3d engine to clear. */ if (job->draw_calls_queued) { perf_debug("Flushing rendering to process new clear.\n"); - vc4_flush(pctx); + vc4_job_submit(vc4, job); + job = vc4_get_job_for_fbo(vc4); } /* Clearing ZS will clear both Z and stencil, so if we're trying to diff --git a/src/gallium/drivers/vc4/vc4_job.c b/src/gallium/drivers/vc4/vc4_job.c index cb20853fa49..d39472ef131 100644 --- a/src/gallium/drivers/vc4/vc4_job.c +++ b/src/gallium/drivers/vc4/vc4_job.c @@ -28,49 +28,239 @@ #include <xf86drm.h> #include "vc4_context.h" +#include "util/hash_table.h" -void -vc4_job_init(struct vc4_job *job) +static void +remove_from_ht(struct hash_table *ht, void *key) { - vc4_init_cl(job, &job->bcl); - vc4_init_cl(job, &job->shader_rec); - vc4_init_cl(job, &job->uniforms); - vc4_init_cl(job, &job->bo_handles); - vc4_init_cl(job, &job->bo_pointers); - vc4_job_reset(job); + struct hash_entry *entry = _mesa_hash_table_search(ht, key); + _mesa_hash_table_remove(ht, entry); } -void -vc4_job_reset(struct vc4_job *job) +static void +vc4_job_free(struct vc4_context *vc4, struct vc4_job *job) { struct vc4_bo **referenced_bos = job->bo_pointers.base; for (int i = 0; i < cl_offset(&job->bo_handles) / 4; i++) { vc4_bo_unreference(&referenced_bos[i]); } - vc4_reset_cl(&job->bcl); - vc4_reset_cl(&job->shader_rec); - vc4_reset_cl(&job->uniforms); - vc4_reset_cl(&job->bo_handles); - vc4_reset_cl(&job->bo_pointers); - job->shader_rec_count = 0; - job->needs_flush = false; - job->draw_calls_queued = 0; + remove_from_ht(vc4->jobs, &job->key); - job->resolve = 0; - job->cleared = 0; + if (job->color_write) { + remove_from_ht(vc4->write_jobs, job->color_write->texture); + pipe_surface_reference(&job->color_write, NULL); + } + if (job->msaa_color_write) { + remove_from_ht(vc4->write_jobs, job->msaa_color_write->texture); + pipe_surface_reference(&job->msaa_color_write, NULL); + } + if (job->zs_write) { + remove_from_ht(vc4->write_jobs, job->zs_write->texture); + pipe_surface_reference(&job->zs_write, NULL); + } + if (job->msaa_zs_write) { + remove_from_ht(vc4->write_jobs, job->msaa_zs_write->texture); + pipe_surface_reference(&job->msaa_zs_write, NULL); + } + + pipe_surface_reference(&job->color_read, NULL); + pipe_surface_reference(&job->zs_read, NULL); + + if (vc4->job == job) + vc4->job = NULL; + + ralloc_free(job); +} + +static struct vc4_job * +vc4_job_create(struct vc4_context *vc4) +{ + struct vc4_job *job = rzalloc(vc4, struct vc4_job); + + vc4_init_cl(job, &job->bcl); + vc4_init_cl(job, &job->shader_rec); + vc4_init_cl(job, &job->uniforms); + vc4_init_cl(job, &job->bo_handles); + vc4_init_cl(job, &job->bo_pointers); job->draw_min_x = ~0; job->draw_min_y = ~0; job->draw_max_x = 0; job->draw_max_y = 0; - pipe_surface_reference(&job->color_write, NULL); - pipe_surface_reference(&job->color_read, NULL); - pipe_surface_reference(&job->msaa_color_write, NULL); - pipe_surface_reference(&job->zs_write, NULL); - pipe_surface_reference(&job->zs_read, NULL); - pipe_surface_reference(&job->msaa_zs_write, NULL); + return job; +} + +void +vc4_flush_jobs_writing_resource(struct vc4_context *vc4, + struct pipe_resource *prsc) +{ + struct hash_entry *entry = _mesa_hash_table_search(vc4->write_jobs, + prsc); + if (entry) { + struct vc4_job *job = entry->data; + vc4_job_submit(vc4, job); + } +} + +void +vc4_flush_jobs_reading_resource(struct vc4_context *vc4, + struct pipe_resource *prsc) +{ + struct vc4_resource *rsc = vc4_resource(prsc); + + vc4_flush_jobs_writing_resource(vc4, prsc); + + struct hash_entry *entry; + hash_table_foreach(vc4->jobs, entry) { + struct vc4_job *job = entry->data; + + struct vc4_bo **referenced_bos = job->bo_pointers.base; + for (int i = 0; i < cl_offset(&job->bo_handles) / 4; i++) { + if (referenced_bos[i] == rsc->bo) { + vc4_job_submit(vc4, job); + continue; + } + } + + /* Also check for the Z/color buffers, since the references to + * those are only added immediately before submit. + */ + if (job->color_read && !(job->cleared & PIPE_CLEAR_COLOR)) { + struct vc4_resource *ctex = + vc4_resource(job->color_read->texture); + if (ctex->bo == rsc->bo) { + vc4_job_submit(vc4, job); + continue; + } + } + + if (job->zs_read && !(job->cleared & + (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) { + struct vc4_resource *ztex = + vc4_resource(job->zs_read->texture); + if (ztex->bo == rsc->bo) { + vc4_job_submit(vc4, job); + continue; + } + } + } +} + +/** + * Returns a vc4_job struture for tracking V3D rendering to a particular FBO. + * + * If we've already started rendering to this FBO, then return old same job, + * otherwise make a new one. If we're beginning rendering to an FBO, make + * sure that any previous reads of the FBO (or writes to its color/Z surfaces) + * have been flushed. + */ +struct vc4_job * +vc4_get_job(struct vc4_context *vc4, + struct pipe_surface *cbuf, struct pipe_surface *zsbuf) +{ + /* Return the existing job for this FBO if we have one */ + struct vc4_job_key local_key = {.cbuf = cbuf, .zsbuf = zsbuf}; + struct hash_entry *entry = _mesa_hash_table_search(vc4->jobs, + &local_key); + if (entry) + return entry->data; + + /* Creating a new job. Make sure that any previous jobs reading or + * writing these buffers are flushed. + */ + if (cbuf) + vc4_flush_jobs_reading_resource(vc4, cbuf->texture); + if (zsbuf) + vc4_flush_jobs_reading_resource(vc4, zsbuf->texture); + + struct vc4_job *job = vc4_job_create(vc4); + + if (cbuf) { + if (cbuf->texture->nr_samples > 1) { + job->msaa = true; + pipe_surface_reference(&job->msaa_color_write, cbuf); + } else { + pipe_surface_reference(&job->color_write, cbuf); + } + } + + if (zsbuf) { + if (zsbuf->texture->nr_samples > 1) { + job->msaa = true; + pipe_surface_reference(&job->msaa_zs_write, zsbuf); + } else { + pipe_surface_reference(&job->zs_write, zsbuf); + } + } + + if (job->msaa) { + job->tile_width = 32; + job->tile_height = 32; + } else { + job->tile_width = 64; + job->tile_height = 64; + } + + if (cbuf) + _mesa_hash_table_insert(vc4->write_jobs, cbuf->texture, job); + if (zsbuf) + _mesa_hash_table_insert(vc4->write_jobs, zsbuf->texture, job); + + job->key.cbuf = cbuf; + job->key.zsbuf = zsbuf; + _mesa_hash_table_insert(vc4->jobs, &job->key, job); + + return job; +} + +struct vc4_job * +vc4_get_job_for_fbo(struct vc4_context *vc4) +{ + if (vc4->job) + return vc4->job; + + struct pipe_surface *cbuf = vc4->framebuffer.cbufs[0]; + struct pipe_surface *zsbuf = vc4->framebuffer.zsbuf; + struct vc4_job *job = vc4_get_job(vc4, cbuf, zsbuf); + + /* The dirty flags are tracking what's been updated while vc4->job has + * been bound, so set them all to ~0 when switching between jobs. We + * also need to reset all state at the start of rendering. + */ + vc4->dirty = ~0; + + /* Set up the read surfaces in the job. If they aren't actually + * getting read (due to a clear starting the frame), job->cleared will + * mask out the read. + */ + pipe_surface_reference(&job->color_read, cbuf); + pipe_surface_reference(&job->zs_read, zsbuf); + + /* If we're binding to uninitialized buffers, no need to load their + * contents before drawing. + */ + if (cbuf) { + struct vc4_resource *rsc = vc4_resource(cbuf->texture); + if (!rsc->writes) + job->cleared |= PIPE_CLEAR_COLOR0; + } + + if (zsbuf) { + struct vc4_resource *rsc = vc4_resource(zsbuf->texture); + if (!rsc->writes) + job->cleared |= PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL; + } + + job->draw_tiles_x = DIV_ROUND_UP(vc4->framebuffer.width, + job->tile_width); + job->draw_tiles_y = DIV_ROUND_UP(vc4->framebuffer.height, + job->tile_height); + + vc4->job = job; + + return job; } static void @@ -166,15 +356,14 @@ void vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job) { if (!job->needs_flush) - return; + goto done; /* The RCL setup would choke if the draw bounds cause no drawing, so * just drop the drawing if that's the case. */ if (job->draw_max_x <= job->draw_min_x || job->draw_max_y <= job->draw_min_y) { - vc4_job_reset(job); - return; + goto done; } if (vc4_debug & VC4_DEBUG_CL) { @@ -275,7 +464,7 @@ vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job) #ifndef USE_VC4_SIMULATOR ret = drmIoctl(vc4->fd, DRM_IOCTL_VC4_SUBMIT_CL, &submit); #else - ret = vc4_simulator_flush(vc4, &submit); + ret = vc4_simulator_flush(vc4, &submit, job); #endif static bool warned = false; if (ret && !warned) { @@ -304,5 +493,30 @@ vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job) } } - vc4_job_reset(vc4->job); +done: + vc4_job_free(vc4, job); +} + +static bool +vc4_job_compare(const void *a, const void *b) +{ + return memcmp(a, b, sizeof(struct vc4_job_key)) == 0; +} + +static uint32_t +vc4_job_hash(const void *key) +{ + return _mesa_hash_data(key, sizeof(struct vc4_job_key)); +} + +void +vc4_job_init(struct vc4_context *vc4) +{ + vc4->jobs = _mesa_hash_table_create(vc4, + vc4_job_hash, + vc4_job_compare); + vc4->write_jobs = _mesa_hash_table_create(vc4, + _mesa_hash_pointer, + _mesa_key_pointer_equal); } + diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c index 12469ea192e..bfa8f40ba13 100644 --- a/src/gallium/drivers/vc4/vc4_resource.c +++ b/src/gallium/drivers/vc4/vc4_resource.c @@ -115,7 +115,6 @@ vc4_resource_transfer_unmap(struct pipe_context *pctx, blit.filter = PIPE_TEX_FILTER_NEAREST; pctx->blit(pctx, &blit); - vc4_flush(pctx); pipe_resource_reference(&trans->ss_resource, NULL); } @@ -178,20 +177,20 @@ vc4_resource_transfer_map(struct pipe_context *pctx, if (prsc->bind & PIPE_BIND_VERTEX_BUFFER) vc4->dirty |= VC4_DIRTY_VTXBUF; } else { - /* If we failed to reallocate, flush everything so - * that we don't violate any syncing requirements. + /* If we failed to reallocate, flush users so that we + * don't violate any syncing requirements. */ - vc4_flush(pctx); + vc4_flush_jobs_reading_resource(vc4, prsc); } } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { /* If we're writing and the buffer is being used by the CL, we * have to flush the CL first. If we're only reading, we need * to flush if the CL has written our buffer. */ - if (vc4_cl_references_bo(pctx, rsc->bo, - usage & PIPE_TRANSFER_WRITE)) { - vc4_flush(pctx); - } + if (usage & PIPE_TRANSFER_WRITE) + vc4_flush_jobs_reading_resource(vc4, prsc); + else + vc4_flush_jobs_writing_resource(vc4, prsc); } if (usage & PIPE_TRANSFER_WRITE) @@ -245,7 +244,7 @@ vc4_resource_transfer_map(struct pipe_context *pctx, blit.filter = PIPE_TEX_FILTER_NEAREST; pctx->blit(pctx, &blit); - vc4_flush(pctx); + vc4_flush_jobs_writing_resource(vc4, blit.dst.resource); } /* The rest of the mapping process should use our temporary. */ diff --git a/src/gallium/drivers/vc4/vc4_simulator.c b/src/gallium/drivers/vc4/vc4_simulator.c index 81d34761b41..b802391aa6e 100644 --- a/src/gallium/drivers/vc4/vc4_simulator.c +++ b/src/gallium/drivers/vc4/vc4_simulator.c @@ -74,11 +74,10 @@ drm_gem_cma_create(struct drm_device *dev, size_t size) } static int -vc4_simulator_pin_bos(struct drm_device *dev, struct vc4_exec_info *exec) +vc4_simulator_pin_bos(struct drm_device *dev, struct vc4_job *job, + struct vc4_exec_info *exec) { struct drm_vc4_submit_cl *args = exec->args; - struct vc4_context *vc4 = dev->vc4; - struct vc4_job *job = vc4->job; struct vc4_bo **bos = job->bo_pointers.base; exec->bo_count = args->bo_handle_count; @@ -220,7 +219,8 @@ vc4_dump_to_file(struct vc4_exec_info *exec) } int -vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args) +vc4_simulator_flush(struct vc4_context *vc4, + struct drm_vc4_submit_cl *args, struct vc4_job *job) { struct vc4_screen *screen = vc4->screen; struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]); @@ -257,7 +257,7 @@ vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args) exec.args = args; - ret = vc4_simulator_pin_bos(dev, &exec); + ret = vc4_simulator_pin_bos(dev, job, &exec); if (ret) return ret; diff --git a/src/gallium/drivers/vc4/vc4_state.c b/src/gallium/drivers/vc4/vc4_state.c index d697f7c87ff..12471589510 100644 --- a/src/gallium/drivers/vc4/vc4_state.c +++ b/src/gallium/drivers/vc4/vc4_state.c @@ -406,11 +406,10 @@ vc4_set_framebuffer_state(struct pipe_context *pctx, const struct pipe_framebuffer_state *framebuffer) { struct vc4_context *vc4 = vc4_context(pctx); - struct vc4_job *job = vc4->job; struct pipe_framebuffer_state *cso = &vc4->framebuffer; unsigned i; - vc4_flush(pctx); + vc4->job = NULL; for (i = 0; i < framebuffer->nr_cbufs; i++) pipe_surface_reference(&cso->cbufs[i], framebuffer->cbufs[i]); @@ -424,23 +423,6 @@ vc4_set_framebuffer_state(struct pipe_context *pctx, cso->width = framebuffer->width; cso->height = framebuffer->height; - /* If we're binding to uninitialized buffers, no need to load their - * contents before drawing.. - */ - if (cso->cbufs[0]) { - struct vc4_resource *rsc = - vc4_resource(cso->cbufs[0]->texture); - if (!rsc->writes) - job->cleared |= PIPE_CLEAR_COLOR0; - } - - if (cso->zsbuf) { - struct vc4_resource *rsc = - vc4_resource(cso->zsbuf->texture); - if (!rsc->writes) - job->cleared |= PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL; - } - /* Nonzero texture mipmap levels are laid out as if they were in * power-of-two-sized spaces. The renderbuffer config infers its * stride from the width parameter, so we need to configure our @@ -461,22 +443,6 @@ vc4_set_framebuffer_state(struct pipe_context *pctx, rsc->cpp); } - job->msaa = false; - if (cso->cbufs[0]) - job->msaa = cso->cbufs[0]->texture->nr_samples > 1; - else if (cso->zsbuf) - job->msaa = cso->zsbuf->texture->nr_samples > 1; - - if (job->msaa) { - job->tile_width = 32; - job->tile_height = 32; - } else { - job->tile_width = 64; - job->tile_height = 64; - } - job->draw_tiles_x = DIV_ROUND_UP(cso->width, job->tile_width); - job->draw_tiles_y = DIV_ROUND_UP(cso->height, job->tile_height); - vc4->dirty |= VC4_DIRTY_FRAMEBUFFER; } |