diff options
Diffstat (limited to 'src/gallium')
19 files changed, 288 insertions, 264 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index eef5b52f12c..7e83157e38e 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -757,8 +757,9 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, * state, there could have been a context switch between ioctls): */ void -fd3_emit_restore(struct fd_context *ctx, struct fd_ringbuffer *ring) +fd3_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring) { + struct fd_context *ctx = batch->ctx; struct fd3_context *fd3_ctx = fd3_context(ctx); int i; @@ -894,7 +895,7 @@ fd3_emit_restore(struct fd_context *ctx, struct fd_ringbuffer *ring) fd_wfi(ctx, ring); - fd_hw_query_enable(ctx, ring); + fd_hw_query_enable(batch, ring); ctx->needs_rb_fbd = true; } diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h index 110f30e89be..dfe77589542 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h @@ -93,7 +93,7 @@ void fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit); void fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, struct fd3_emit *emit); -void fd3_emit_restore(struct fd_context *ctx, struct fd_ringbuffer *ring); +void fd3_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring); void fd3_emit_init(struct pipe_context *pctx); diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c index b9af45683f9..1788c0c7384 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c @@ -734,7 +734,7 @@ fd3_emit_sysmem_prep(struct fd_batch *batch) pitch = fd_resource(psurf->texture)->slices[psurf->u.tex.level].pitch; } - fd3_emit_restore(batch->ctx, ring); + fd3_emit_restore(batch, ring); OUT_PKT0(ring, REG_A3XX_RB_FRAME_BUFFER_DIMENSION, 1); OUT_RING(ring, A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) | @@ -927,7 +927,7 @@ fd3_emit_tile_init(struct fd_batch *batch) struct fd_gmem_stateobj *gmem = &batch->ctx->gmem; uint32_t rb_render_control; - fd3_emit_restore(batch->ctx, ring); + fd3_emit_restore(batch, ring); /* note: use gmem->bin_w/h, the bin_w/h parameters may be truncated * at the right and bottom edge tiles diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_query.c b/src/gallium/drivers/freedreno/a3xx/fd3_query.c index 8fc0a0d4229..ec034fc127d 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_query.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_query.c @@ -46,10 +46,10 @@ struct fd_rb_samp_ctrs { */ static struct fd_hw_sample * -occlusion_get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring) +occlusion_get_sample(struct fd_batch *batch, struct fd_ringbuffer *ring) { struct fd_hw_sample *samp = - fd_hw_sample_init(ctx, sizeof(struct fd_rb_samp_ctrs)); + fd_hw_sample_init(batch, sizeof(struct fd_rb_samp_ctrs)); /* Set RB_SAMPLE_COUNT_ADDR to samp->offset plus value of * HW_QUERY_BASE_REG register: @@ -68,7 +68,7 @@ occlusion_get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring) INDEX_SIZE_IGN, USE_VISIBILITY, 0)); OUT_RING(ring, 0); /* NumIndices */ - fd_event_write(ctx, ring, ZPASS_DONE); + fd_event_write(batch->ctx, ring, ZPASS_DONE); OUT_PKT0(ring, REG_A3XX_RBBM_PERFCTR_CTL, 1); OUT_RING(ring, A3XX_RBBM_PERFCTR_CTL_ENABLE); diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c index 88e1a40ec90..9ce93f6e33f 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c @@ -736,8 +736,9 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, * state, there could have been a context switch between ioctls): */ void -fd4_emit_restore(struct fd_context *ctx, struct fd_ringbuffer *ring) +fd4_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring) { + struct fd_context *ctx = batch->ctx; struct fd4_context *fd4_ctx = fd4_context(ctx); OUT_PKT0(ring, REG_A4XX_RBBM_PERFCTR_CTL, 1); @@ -885,7 +886,7 @@ fd4_emit_restore(struct fd_context *ctx, struct fd_ringbuffer *ring) OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1); OUT_RING(ring, 0x0); - fd_hw_query_enable(ctx, ring); + fd_hw_query_enable(batch, ring); ctx->needs_rb_fbd = true; } diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.h b/src/gallium/drivers/freedreno/a4xx/fd4_emit.h index 89dc51ad1ee..42e0e5e645a 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.h +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.h @@ -102,7 +102,7 @@ void fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit); void fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, struct fd4_emit *emit); -void fd4_emit_restore(struct fd_context *ctx, struct fd_ringbuffer *ring); +void fd4_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring); void fd4_emit_init(struct pipe_context *pctx); diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c index afd37a88f43..3f3847c2a28 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c @@ -527,7 +527,7 @@ fd4_emit_sysmem_prep(struct fd_batch *batch) struct pipe_framebuffer_state *pfb = &batch->framebuffer; struct fd_ringbuffer *ring = batch->gmem; - fd4_emit_restore(batch->ctx, ring); + fd4_emit_restore(batch, ring); OUT_PKT0(ring, REG_A4XX_RB_FRAME_BUFFER_DIMENSION, 1); OUT_RING(ring, A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) | @@ -666,7 +666,7 @@ fd4_emit_tile_init(struct fd_batch *batch) struct fd_ringbuffer *ring = batch->gmem; struct fd_gmem_stateobj *gmem = &batch->ctx->gmem; - fd4_emit_restore(batch->ctx, ring); + fd4_emit_restore(batch, ring); OUT_PKT0(ring, REG_A4XX_VSC_BIN_SIZE, 1); OUT_RING(ring, A4XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) | diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_query.c b/src/gallium/drivers/freedreno/a4xx/fd4_query.c index 41e3e6506bd..921384c1911 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_query.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_query.c @@ -48,10 +48,10 @@ struct fd_rb_samp_ctrs { */ static struct fd_hw_sample * -occlusion_get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring) +occlusion_get_sample(struct fd_batch *batch, struct fd_ringbuffer *ring) { struct fd_hw_sample *samp = - fd_hw_sample_init(ctx, sizeof(struct fd_rb_samp_ctrs)); + fd_hw_sample_init(batch, sizeof(struct fd_rb_samp_ctrs)); /* low bits of sample addr should be zero (since they are control * flags in RB_SAMPLE_COUNT_CONTROL): @@ -73,7 +73,7 @@ occlusion_get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring) OUT_RING(ring, 1); /* NumInstances */ OUT_RING(ring, 0); /* NumIndices */ - fd_event_write(ctx, ring, ZPASS_DONE); + fd_event_write(batch->ctx, ring, ZPASS_DONE); return samp; } @@ -123,18 +123,18 @@ time_elapsed_enable(struct fd_context *ctx, struct fd_ringbuffer *ring) } static struct fd_hw_sample * -time_elapsed_get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring) +time_elapsed_get_sample(struct fd_batch *batch, struct fd_ringbuffer *ring) { - struct fd_hw_sample *samp = fd_hw_sample_init(ctx, sizeof(uint64_t)); + struct fd_hw_sample *samp = fd_hw_sample_init(batch, sizeof(uint64_t)); /* use unused part of vsc_size_mem as scratch space, to avoid * extra allocation: */ - struct fd_bo *scratch_bo = fd4_context(ctx)->vsc_size_mem; + struct fd_bo *scratch_bo = fd4_context(batch->ctx)->vsc_size_mem; const int sample_off = 128; const int addr_off = sample_off + 8; - debug_assert(ctx->screen->max_freq > 0); + debug_assert(batch->ctx->screen->max_freq > 0); /* Basic issue is that we need to read counter value to a relative * destination (with per-tile offset) rather than absolute dest @@ -161,7 +161,7 @@ time_elapsed_get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring) * shot, but that's really just polishing a turd.. */ - fd_wfi(ctx, ring); + fd_wfi(batch->ctx, ring); /* copy sample counter _LO and _HI to scratch: */ OUT_PKT3(ring, CP_REG_TO_MEM, 2); diff --git a/src/gallium/drivers/freedreno/freedreno_batch.c b/src/gallium/drivers/freedreno/freedreno_batch.c index 2dd7eda72ad..5008f5dbe56 100644 --- a/src/gallium/drivers/freedreno/freedreno_batch.c +++ b/src/gallium/drivers/freedreno/freedreno_batch.c @@ -32,6 +32,7 @@ #include "freedreno_batch.h" #include "freedreno_context.h" #include "freedreno_resource.h" +#include "freedreno_query_hw.h" static void batch_init(struct fd_batch *batch) @@ -61,6 +62,7 @@ batch_init(struct fd_batch *batch) batch->needs_flush = false; batch->gmem_reason = 0; batch->num_draws = 0; + batch->stage = FD_STAGE_NULL; /* reset maximal bounds: */ batch->max_scissor.minx = batch->max_scissor.miny = ~0; @@ -72,6 +74,8 @@ batch_init(struct fd_batch *batch) util_dynarray_init(&batch->rbrc_patches); assert(batch->resources->entries == 0); + + util_dynarray_init(&batch->samples); } struct fd_batch * @@ -98,6 +102,8 @@ fd_batch_create(struct fd_context *ctx) static void batch_fini(struct fd_batch *batch) { + pipe_resource_reference(&batch->query_buf, NULL); + fd_ringbuffer_del(batch->draw); fd_ringbuffer_del(batch->binning); fd_ringbuffer_del(batch->gmem); @@ -106,6 +112,13 @@ batch_fini(struct fd_batch *batch) if (is_a3xx(batch->ctx->screen)) util_dynarray_fini(&batch->rbrc_patches); + + while (batch->samples.size > 0) { + struct fd_hw_sample *samp = + util_dynarray_pop(&batch->samples, struct fd_hw_sample *); + fd_hw_sample_reference(batch->ctx, &samp, NULL); + } + util_dynarray_fini(&batch->samples); } static void diff --git a/src/gallium/drivers/freedreno/freedreno_batch.h b/src/gallium/drivers/freedreno/freedreno_batch.h index 89d1d9fea7b..228a1b72bf6 100644 --- a/src/gallium/drivers/freedreno/freedreno_batch.h +++ b/src/gallium/drivers/freedreno/freedreno_batch.h @@ -28,6 +28,7 @@ #define FREEDRENO_BATCH_H_ #include "util/u_inlines.h" +#include "util/list.h" #include "freedreno_util.h" @@ -35,6 +36,35 @@ struct fd_context; struct fd_resource; enum fd_resource_status; +/* Bitmask of stages in rendering that a particular query query is + * active. Queries will be automatically started/stopped (generating + * additional fd_hw_sample_period's) on entrance/exit from stages that + * are applicable to the query. + * + * NOTE: set the stage to NULL at end of IB to ensure no query is still + * active. Things aren't going to work out the way you want if a query + * is active across IB's (or between tile IB and draw IB) + */ +enum fd_render_stage { + FD_STAGE_NULL = 0x01, + FD_STAGE_DRAW = 0x02, + FD_STAGE_CLEAR = 0x04, + /* TODO before queries which include MEM2GMEM or GMEM2MEM will + * work we will need to call fd_hw_query_prepare() from somewhere + * appropriate so that queries in the tiling IB get backed with + * memory to write results to. + */ + FD_STAGE_MEM2GMEM = 0x08, + FD_STAGE_GMEM2MEM = 0x10, + /* used for driver internal draws (ie. util_blitter_blit()): */ + FD_STAGE_BLIT = 0x20, + FD_STAGE_ALL = 0xff, +}; + +#define MAX_HW_SAMPLE_PROVIDERS 4 +struct fd_hw_sample_provider; +struct fd_hw_sample; + /* A batch tracks everything about a cmdstream batch/submit, including the * ringbuffers used for binning, draw, and gmem cmds, list of associated * fd_resource-s, etc. @@ -118,6 +148,37 @@ struct fd_batch { /** tiling/gmem (IB0) cmdstream: */ struct fd_ringbuffer *gmem; + /** + * hw query related state: + */ + /*@{*/ + /* next sample offset.. incremented for each sample in the batch/ + * submit, reset to zero on next submit. + */ + uint32_t next_sample_offset; + + /* cached samples (in case multiple queries need to reference + * the same sample snapshot) + */ + struct fd_hw_sample *sample_cache[MAX_HW_SAMPLE_PROVIDERS]; + + /* which sample providers were active in the current batch: */ + uint32_t active_providers; + + /* tracking for current stage, to know when to start/stop + * any active queries: + */ + enum fd_render_stage stage; + + /* list of samples in current batch: */ + struct util_dynarray samples; + + /* current query result bo and tile stride: */ + struct pipe_resource *query_buf; + uint32_t query_tile_stride; + /*@}*/ + + /* Set of resources used by currently-unsubmitted batch (read or * write).. does not hold a reference to the resource. */ diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c index 13a17e2a78e..1c32cd9ae92 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.c +++ b/src/gallium/drivers/freedreno/freedreno_context.c @@ -168,8 +168,6 @@ fd_context_init(struct fd_context *ctx, struct pipe_screen *pscreen, */ ctx->sample_mask = 0xffff; - ctx->stage = FD_STAGE_NULL; - pctx = &ctx->base; pctx->screen = pscreen; pctx->priv = priv; diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index 74f53ee554f..45876259fd8 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -107,34 +107,6 @@ struct fd_vertex_state { struct fd_vertexbuf_stateobj vertexbuf; }; -/* Bitmask of stages in rendering that a particular query query is - * active. Queries will be automatically started/stopped (generating - * additional fd_hw_sample_period's) on entrance/exit from stages that - * are applicable to the query. - * - * NOTE: set the stage to NULL at end of IB to ensure no query is still - * active. Things aren't going to work out the way you want if a query - * is active across IB's (or between tile IB and draw IB) - */ -enum fd_render_stage { - FD_STAGE_NULL = 0x01, - FD_STAGE_DRAW = 0x02, - FD_STAGE_CLEAR = 0x04, - /* TODO before queries which include MEM2GMEM or GMEM2MEM will - * work we will need to call fd_hw_query_prepare() from somewhere - * appropriate so that queries in the tiling IB get backed with - * memory to write results to. - */ - FD_STAGE_MEM2GMEM = 0x08, - FD_STAGE_GMEM2MEM = 0x10, - /* used for driver internal draws (ie. util_blitter_blit()): */ - FD_STAGE_BLIT = 0x20, - FD_STAGE_ALL = 0xff, -}; - -#define MAX_HW_SAMPLE_PROVIDERS 4 -struct fd_hw_sample_provider; -struct fd_hw_sample; struct fd_context { struct pipe_context base; @@ -152,39 +124,12 @@ struct fd_context { struct util_slab_mempool sample_pool; struct util_slab_mempool sample_period_pool; - /* next sample offset.. incremented for each sample in the batch/ - * submit, reset to zero on next submit. - */ - uint32_t next_sample_offset; - /* sample-providers for hw queries: */ const struct fd_hw_sample_provider *sample_providers[MAX_HW_SAMPLE_PROVIDERS]; - /* cached samples (in case multiple queries need to reference - * the same sample snapshot) - */ - struct fd_hw_sample *sample_cache[MAX_HW_SAMPLE_PROVIDERS]; - - /* which sample providers were active in the current batch: */ - uint32_t active_providers; - - /* tracking for current stage, to know when to start/stop - * any active queries: - */ - enum fd_render_stage stage; - /* list of active queries: */ struct list_head active_queries; - /* list of queries that are not active, but were active in the - * current submit: - */ - struct list_head current_queries; - - /* current query result bo and tile stride: */ - struct pipe_resource *query_buf; - uint32_t query_tile_stride; - /* table with PIPE_PRIM_MAX entries mapping PIPE_PRIM_x to * DI_PT_x value to use for draw initiator. There are some * slight differences between generation: diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c index 112bf5cb624..fd3da1f20e5 100644 --- a/src/gallium/drivers/freedreno/freedreno_draw.c +++ b/src/gallium/drivers/freedreno/freedreno_draw.c @@ -89,6 +89,10 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) ctx->discard = false; } + /* NOTE: needs to be before resource_written(batch->query_buf), otherwise + * query_buf may not be created yet. + */ + fd_hw_query_set_stage(batch, batch->draw, FD_STAGE_DRAW); /* * Figure out the buffers/features we need: */ @@ -154,6 +158,8 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) if (ctx->streamout.targets[i]) resource_written(batch, ctx->streamout.targets[i]->buffer); + resource_written(batch, batch->query_buf); + batch->num_draws++; prims = u_reduced_prims_for_vertices(info->mode, info->count); @@ -180,7 +186,6 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) util_format_short_name(pipe_surface_format(pfb->cbufs[0])), util_format_short_name(pipe_surface_format(pfb->zsbuf))); - fd_hw_query_set_stage(ctx, batch->draw, FD_STAGE_DRAW); if (ctx->draw_vbo(ctx, info)) batch->needs_flush = true; @@ -253,12 +258,14 @@ fd_clear(struct pipe_context *pctx, unsigned buffers, batch->gmem_reason |= FD_GMEM_CLEARS_DEPTH_STENCIL; } + resource_written(batch, batch->query_buf); + DBG("%p: %x %ux%u depth=%f, stencil=%u (%s/%s)", batch, buffers, pfb->width, pfb->height, depth, stencil, util_format_short_name(pipe_surface_format(pfb->cbufs[0])), util_format_short_name(pipe_surface_format(pfb->zsbuf))); - fd_hw_query_set_stage(ctx, batch->draw, FD_STAGE_CLEAR); + fd_hw_query_set_stage(batch, batch->draw, FD_STAGE_CLEAR); ctx->clear(ctx, buffers, color, depth, stencil); diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c index a075a8b5c95..d57b6a36d8b 100644 --- a/src/gallium/drivers/freedreno/freedreno_gmem.c +++ b/src/gallium/drivers/freedreno/freedreno_gmem.c @@ -323,23 +323,23 @@ render_tiles(struct fd_batch *batch) ctx->emit_tile_prep(batch, tile); if (batch->restore) { - fd_hw_query_set_stage(ctx, batch->gmem, FD_STAGE_MEM2GMEM); + fd_hw_query_set_stage(batch, batch->gmem, FD_STAGE_MEM2GMEM); ctx->emit_tile_mem2gmem(batch, tile); - fd_hw_query_set_stage(ctx, batch->gmem, FD_STAGE_NULL); + fd_hw_query_set_stage(batch, batch->gmem, FD_STAGE_NULL); } ctx->emit_tile_renderprep(batch, tile); - fd_hw_query_prepare_tile(ctx, i, batch->gmem); + fd_hw_query_prepare_tile(batch, i, batch->gmem); /* emit IB to drawcmds: */ ctx->emit_ib(batch->gmem, batch->draw); fd_reset_wfi(ctx); /* emit gmem2mem to transfer tile back to system memory: */ - fd_hw_query_set_stage(ctx, batch->gmem, FD_STAGE_GMEM2MEM); + fd_hw_query_set_stage(batch, batch->gmem, FD_STAGE_GMEM2MEM); ctx->emit_tile_gmem2mem(batch, tile); - fd_hw_query_set_stage(ctx, batch->gmem, FD_STAGE_NULL); + fd_hw_query_set_stage(batch, batch->gmem, FD_STAGE_NULL); } } @@ -350,7 +350,7 @@ render_sysmem(struct fd_batch *batch) ctx->emit_sysmem_prep(batch); - fd_hw_query_prepare_tile(ctx, 0, batch->gmem); + fd_hw_query_prepare_tile(batch, 0, batch->gmem); /* emit IB to drawcmds: */ ctx->emit_ib(batch->gmem, batch->draw); @@ -376,7 +376,7 @@ fd_gmem_render_tiles(struct fd_batch *batch) /* close out the draw cmds by making sure any active queries are * paused: */ - fd_hw_query_set_stage(ctx, batch->draw, FD_STAGE_NULL); + fd_hw_query_set_stage(batch, batch->draw, FD_STAGE_NULL); fd_reset_wfi(ctx); @@ -387,7 +387,7 @@ fd_gmem_render_tiles(struct fd_batch *batch) batch, pfb->width, pfb->height, util_format_short_name(pipe_surface_format(pfb->cbufs[0])), util_format_short_name(pipe_surface_format(pfb->zsbuf))); - fd_hw_query_prepare(ctx, 1); + fd_hw_query_prepare(batch, 1); render_sysmem(batch); ctx->stats.batch_sysmem++; } else { @@ -397,7 +397,7 @@ fd_gmem_render_tiles(struct fd_batch *batch) batch, pfb->width, pfb->height, gmem->nbins_x, gmem->nbins_y, util_format_short_name(pipe_surface_format(pfb->cbufs[0])), util_format_short_name(pipe_surface_format(pfb->zsbuf))); - fd_hw_query_prepare(ctx, gmem->nbins_x * gmem->nbins_y); + fd_hw_query_prepare(batch, gmem->nbins_x * gmem->nbins_y); render_tiles(batch); ctx->stats.batch_gmem++; } diff --git a/src/gallium/drivers/freedreno/freedreno_query_hw.c b/src/gallium/drivers/freedreno/freedreno_query_hw.c index 808bcefc2ad..12d40d04cda 100644 --- a/src/gallium/drivers/freedreno/freedreno_query_hw.c +++ b/src/gallium/drivers/freedreno/freedreno_query_hw.c @@ -61,32 +61,35 @@ static int pidx(unsigned query_type) } static struct fd_hw_sample * -get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring, +get_sample(struct fd_batch *batch, struct fd_ringbuffer *ring, unsigned query_type) { + struct fd_context *ctx = batch->ctx; struct fd_hw_sample *samp = NULL; int idx = pidx(query_type); assume(idx >= 0); /* query never would have been created otherwise */ - if (!ctx->sample_cache[idx]) { - ctx->sample_cache[idx] = - ctx->sample_providers[idx]->get_sample(ctx, ring); - ctx->batch->needs_flush = true; + if (!batch->sample_cache[idx]) { + struct fd_hw_sample *new_samp = + ctx->sample_providers[idx]->get_sample(batch, ring); + fd_hw_sample_reference(ctx, &batch->sample_cache[idx], new_samp); + util_dynarray_append(&batch->samples, struct fd_hw_sample *, new_samp); + batch->needs_flush = true; } - fd_hw_sample_reference(ctx, &samp, ctx->sample_cache[idx]); + fd_hw_sample_reference(ctx, &samp, batch->sample_cache[idx]); return samp; } static void -clear_sample_cache(struct fd_context *ctx) +clear_sample_cache(struct fd_batch *batch) { int i; - for (i = 0; i < ARRAY_SIZE(ctx->sample_cache); i++) - fd_hw_sample_reference(ctx, &ctx->sample_cache[i], NULL); + for (i = 0; i < ARRAY_SIZE(batch->sample_cache); i++) + fd_hw_sample_reference(batch->ctx, &batch->sample_cache[i], NULL); } static bool @@ -97,38 +100,38 @@ is_active(struct fd_hw_query *hq, enum fd_render_stage stage) static void -resume_query(struct fd_context *ctx, struct fd_hw_query *hq, +resume_query(struct fd_batch *batch, struct fd_hw_query *hq, struct fd_ringbuffer *ring) { int idx = pidx(hq->provider->query_type); assert(idx >= 0); /* query never would have been created otherwise */ assert(!hq->period); - ctx->active_providers |= (1 << idx); - hq->period = util_slab_alloc(&ctx->sample_period_pool); + batch->active_providers |= (1 << idx); + hq->period = util_slab_alloc(&batch->ctx->sample_period_pool); list_inithead(&hq->period->list); - hq->period->start = get_sample(ctx, ring, hq->base.type); + hq->period->start = get_sample(batch, ring, hq->base.type); /* NOTE: util_slab_alloc() does not zero out the buffer: */ hq->period->end = NULL; } static void -pause_query(struct fd_context *ctx, struct fd_hw_query *hq, +pause_query(struct fd_batch *batch, struct fd_hw_query *hq, struct fd_ringbuffer *ring) { int idx = pidx(hq->provider->query_type); assert(idx >= 0); /* query never would have been created otherwise */ assert(hq->period && !hq->period->end); - assert(ctx->active_providers & (1 << idx)); - hq->period->end = get_sample(ctx, ring, hq->base.type); - list_addtail(&hq->period->list, &hq->current_periods); + assert(batch->active_providers & (1 << idx)); + hq->period->end = get_sample(batch, ring, hq->base.type); + list_addtail(&hq->period->list, &hq->periods); hq->period = NULL; } static void -destroy_periods(struct fd_context *ctx, struct list_head *list) +destroy_periods(struct fd_context *ctx, struct fd_hw_query *hq) { struct fd_hw_sample_period *period, *s; - LIST_FOR_EACH_ENTRY_SAFE(period, s, list, list) { + LIST_FOR_EACH_ENTRY_SAFE(period, s, &hq->periods, list) { fd_hw_sample_reference(ctx, &period->start, NULL); fd_hw_sample_reference(ctx, &period->end, NULL); list_del(&period->list); @@ -141,8 +144,7 @@ fd_hw_destroy_query(struct fd_context *ctx, struct fd_query *q) { struct fd_hw_query *hq = fd_hw_query(q); - destroy_periods(ctx, &hq->periods); - destroy_periods(ctx, &hq->current_periods); + destroy_periods(ctx, hq); list_del(&hq->list); free(hq); @@ -151,27 +153,31 @@ fd_hw_destroy_query(struct fd_context *ctx, struct fd_query *q) static boolean fd_hw_begin_query(struct fd_context *ctx, struct fd_query *q) { + struct fd_batch *batch = ctx->batch; struct fd_hw_query *hq = fd_hw_query(q); + if (q->active) return false; /* begin_query() should clear previous results: */ - destroy_periods(ctx, &hq->periods); + destroy_periods(ctx, hq); - if (is_active(hq, ctx->stage)) - resume_query(ctx, hq, ctx->batch->draw); + if (batch && is_active(hq, batch->stage)) + resume_query(batch, hq, batch->draw); q->active = true; /* add to active list: */ - list_del(&hq->list); + assert(list_empty(&hq->list)); list_addtail(&hq->list, &ctx->active_queries); - return true; + + return true; } static void fd_hw_end_query(struct fd_context *ctx, struct fd_query *q) { + struct fd_batch *batch = ctx->batch; struct fd_hw_query *hq = fd_hw_query(q); /* there are a couple special cases, which don't have * a matching ->begin_query(): @@ -181,12 +187,11 @@ fd_hw_end_query(struct fd_context *ctx, struct fd_query *q) } if (!q->active) return; - if (is_active(hq, ctx->stage)) - pause_query(ctx, hq, ctx->batch->draw); + if (batch && is_active(hq, batch->stage)) + pause_query(batch, hq, batch->draw); q->active = false; - /* move to current list: */ - list_del(&hq->list); - list_addtail(&hq->list, &ctx->current_queries); + /* remove from active list: */ + list_delinit(&hq->list); } /* helper to get ptr to specified sample: */ @@ -206,27 +211,12 @@ fd_hw_get_query_result(struct fd_context *ctx, struct fd_query *q, if (q->active) return false; - /* if the app tries to read back the query result before the - * batch is submitted, that forces us to flush so that there - * are actually results to wait for: - */ - if (!LIST_IS_EMPTY(&hq->list)) { - /* if app didn't actually trigger any cmdstream, then - * we have nothing to do: - */ - if (!ctx->batch->needs_flush) - return true; - DBG("reading query result forces flush!"); - fd_batch_flush(ctx->batch); - } - util_query_clear_result(result, q->type); if (LIST_IS_EMPTY(&hq->periods)) return true; assert(LIST_IS_EMPTY(&hq->list)); - assert(LIST_IS_EMPTY(&hq->current_periods)); assert(!hq->period); /* if !wait, then check the last sample (the one most likely to @@ -240,6 +230,21 @@ fd_hw_get_query_result(struct fd_context *ctx, struct fd_query *q, struct fd_resource *rsc = fd_resource(period->end->prsc); + if (pending(rsc, false)) { + /* piglit spec@arb_occlusion_query@occlusion_query_conform + * test, and silly apps perhaps, get stuck in a loop trying + * to get query result forever with wait==false.. we don't + * wait to flush unnecessarily but we also don't want to + * spin forever: + */ + if (hq->no_wait_cnt++ > 5) + fd_batch_flush(rsc->write_batch); + return false; + } + + if (!rsc->bo) + return false; + ret = fd_bo_cpu_prep(rsc->bo, ctx->screen->pipe, DRM_FREEDRENO_PREP_READ | DRM_FREEDRENO_PREP_NOSYNC); if (ret) @@ -260,6 +265,13 @@ fd_hw_get_query_result(struct fd_context *ctx, struct fd_query *q, struct fd_resource *rsc = fd_resource(start->prsc); + if (rsc->write_batch) + fd_batch_flush(rsc->write_batch); + + /* some piglit tests at least do query with no draws, I guess: */ + if (!rsc->bo) + continue; + fd_bo_cpu_prep(rsc->bo, ctx->screen->pipe, DRM_FREEDRENO_PREP_READ); void *ptr = fd_bo_map(rsc->bo); @@ -299,7 +311,6 @@ fd_hw_create_query(struct fd_context *ctx, unsigned query_type) hq->provider = ctx->sample_providers[idx]; list_inithead(&hq->periods); - list_inithead(&hq->current_periods); list_inithead(&hq->list); q = &hq->base; @@ -310,19 +321,38 @@ fd_hw_create_query(struct fd_context *ctx, unsigned query_type) } struct fd_hw_sample * -fd_hw_sample_init(struct fd_context *ctx, uint32_t size) +fd_hw_sample_init(struct fd_batch *batch, uint32_t size) { - struct fd_hw_sample *samp = util_slab_alloc(&ctx->sample_pool); + struct fd_hw_sample *samp = util_slab_alloc(&batch->ctx->sample_pool); pipe_reference_init(&samp->reference, 1); samp->size = size; debug_assert(util_is_power_of_two(size)); - ctx->next_sample_offset = align(ctx->next_sample_offset, size); - samp->offset = ctx->next_sample_offset; + batch->next_sample_offset = align(batch->next_sample_offset, size); + samp->offset = batch->next_sample_offset; /* NOTE: util_slab_alloc() does not zero out the buffer: */ samp->prsc = NULL; samp->num_tiles = 0; samp->tile_stride = 0; - ctx->next_sample_offset += size; + batch->next_sample_offset += size; + + if (!batch->query_buf) { + struct pipe_screen *pscreen = &batch->ctx->screen->base; + struct pipe_resource templ = { + .target = PIPE_BUFFER, + .format = PIPE_FORMAT_R8_UNORM, + .bind = PIPE_BIND_QUERY_BUFFER, + .width0 = 0, /* create initially zero size buffer */ + .height0 = 1, + .depth0 = 1, + .array_size = 1, + .last_level = 0, + .nr_samples = 1, + }; + batch->query_buf = pscreen->resource_create(pscreen, &templ); + } + + pipe_resource_reference(&samp->prsc, batch->query_buf); + return samp; } @@ -333,110 +363,49 @@ __fd_hw_sample_destroy(struct fd_context *ctx, struct fd_hw_sample *samp) util_slab_free(&ctx->sample_pool, samp); } -static void -prepare_sample(struct fd_hw_sample *samp, struct pipe_resource *prsc, - uint32_t num_tiles, uint32_t tile_stride) -{ - if (samp->prsc) { - assert(samp->prsc == prsc); - assert(samp->num_tiles == num_tiles); - assert(samp->tile_stride == tile_stride); - return; - } - pipe_resource_reference(&samp->prsc, prsc); - samp->num_tiles = num_tiles; - samp->tile_stride = tile_stride; -} - -static void -prepare_query(struct fd_hw_query *hq, struct pipe_resource *prsc, - uint32_t num_tiles, uint32_t tile_stride) -{ - struct fd_hw_sample_period *period, *s; - - /* prepare all the samples in the query: */ - LIST_FOR_EACH_ENTRY_SAFE(period, s, &hq->current_periods, list) { - prepare_sample(period->start, prsc, num_tiles, tile_stride); - prepare_sample(period->end, prsc, num_tiles, tile_stride); - - /* move from current_periods list to periods list: */ - list_del(&period->list); - list_addtail(&period->list, &hq->periods); - } -} - -static void -prepare_queries(struct fd_context *ctx, struct pipe_resource *prsc, - uint32_t num_tiles, uint32_t tile_stride, - struct list_head *list, bool remove) -{ - struct fd_hw_query *hq, *s; - LIST_FOR_EACH_ENTRY_SAFE(hq, s, list, list) { - prepare_query(hq, prsc, num_tiles, tile_stride); - if (remove) - list_delinit(&hq->list); - } -} - /* called from gmem code once total storage requirements are known (ie. * number of samples times number of tiles) */ void -fd_hw_query_prepare(struct fd_context *ctx, uint32_t num_tiles) +fd_hw_query_prepare(struct fd_batch *batch, uint32_t num_tiles) { - uint32_t tile_stride = ctx->next_sample_offset; - struct pipe_resource *prsc; + uint32_t tile_stride = batch->next_sample_offset; - pipe_resource_reference(&ctx->query_buf, NULL); + if (tile_stride > 0) + fd_resource_resize(batch->query_buf, tile_stride * num_tiles); - if (tile_stride > 0) { - struct pipe_screen *pscreen = &ctx->screen->base; - struct pipe_resource templ = { - .target = PIPE_BUFFER, - .format = PIPE_FORMAT_R8_UNORM, - .bind = PIPE_BIND_QUERY_BUFFER, - .width0 = tile_stride * num_tiles, - .height0 = 1, - .depth0 = 1, - .array_size = 1, - .last_level = 0, - .nr_samples = 1, - }; - prsc = pscreen->resource_create(pscreen, &templ); - } else { - prsc = NULL; - } - - ctx->query_buf = prsc; - ctx->query_tile_stride = tile_stride; + batch->query_tile_stride = tile_stride; - prepare_queries(ctx, prsc, num_tiles, tile_stride, - &ctx->active_queries, false); - prepare_queries(ctx, prsc, num_tiles, tile_stride, - &ctx->current_queries, true); + while (batch->samples.size > 0) { + struct fd_hw_sample *samp = + util_dynarray_pop(&batch->samples, struct fd_hw_sample *); + samp->num_tiles = num_tiles; + samp->tile_stride = tile_stride; + fd_hw_sample_reference(batch->ctx, &samp, NULL); + } /* reset things for next batch: */ - ctx->next_sample_offset = 0; + batch->next_sample_offset = 0; } void -fd_hw_query_prepare_tile(struct fd_context *ctx, uint32_t n, +fd_hw_query_prepare_tile(struct fd_batch *batch, uint32_t n, struct fd_ringbuffer *ring) { - uint32_t tile_stride = ctx->query_tile_stride; + uint32_t tile_stride = batch->query_tile_stride; uint32_t offset = tile_stride * n; /* bail if no queries: */ if (tile_stride == 0) return; - fd_wfi(ctx, ring); + fd_wfi(batch->ctx, ring); OUT_PKT0 (ring, HW_QUERY_BASE_REG, 1); - OUT_RELOCW(ring, fd_resource(ctx->query_buf)->bo, offset, 0, 0); + OUT_RELOCW(ring, fd_resource(batch->query_buf)->bo, offset, 0, 0); } void -fd_hw_query_set_stage(struct fd_context *ctx, struct fd_ringbuffer *ring, +fd_hw_query_set_stage(struct fd_batch *batch, struct fd_ringbuffer *ring, enum fd_render_stage stage) { /* special case: internal blits (like mipmap level generation) @@ -445,24 +414,24 @@ fd_hw_query_set_stage(struct fd_context *ctx, struct fd_ringbuffer *ring, * don't enable queries which should be paused during internal * blits: */ - if ((ctx->stage == FD_STAGE_BLIT) && + if ((batch->stage == FD_STAGE_BLIT) && (stage != FD_STAGE_NULL)) return; - if (stage != ctx->stage) { + if (stage != batch->stage) { struct fd_hw_query *hq; - LIST_FOR_EACH_ENTRY(hq, &ctx->active_queries, list) { - bool was_active = is_active(hq, ctx->stage); + LIST_FOR_EACH_ENTRY(hq, &batch->ctx->active_queries, list) { + bool was_active = is_active(hq, batch->stage); bool now_active = is_active(hq, stage); if (now_active && !was_active) - resume_query(ctx, hq, ring); + resume_query(batch, hq, ring); else if (was_active && !now_active) - pause_query(ctx, hq, ring); + pause_query(batch, hq, ring); } } - clear_sample_cache(ctx); - ctx->stage = stage; + clear_sample_cache(batch); + batch->stage = stage; } /* call the provider->enable() for all the hw queries that were active @@ -470,16 +439,17 @@ fd_hw_query_set_stage(struct fd_context *ctx, struct fd_ringbuffer *ring, * for the duration of the batch. */ void -fd_hw_query_enable(struct fd_context *ctx, struct fd_ringbuffer *ring) +fd_hw_query_enable(struct fd_batch *batch, struct fd_ringbuffer *ring) { + struct fd_context *ctx = batch->ctx; for (int idx = 0; idx < MAX_HW_SAMPLE_PROVIDERS; idx++) { - if (ctx->active_providers & (1 << idx)) { + if (batch->active_providers & (1 << idx)) { assert(ctx->sample_providers[idx]); if (ctx->sample_providers[idx]->enable) ctx->sample_providers[idx]->enable(ctx, ring); } } - ctx->active_providers = 0; /* clear it for next frame */ + batch->active_providers = 0; /* clear it for next frame */ } void @@ -505,7 +475,6 @@ fd_hw_query_init(struct pipe_context *pctx) util_slab_create(&ctx->sample_period_pool, sizeof(struct fd_hw_sample_period), 16, UTIL_SLAB_SINGLETHREADED); list_inithead(&ctx->active_queries); - list_inithead(&ctx->current_queries); } void diff --git a/src/gallium/drivers/freedreno/freedreno_query_hw.h b/src/gallium/drivers/freedreno/freedreno_query_hw.h index 0afece3495f..abd86682a9f 100644 --- a/src/gallium/drivers/freedreno/freedreno_query_hw.h +++ b/src/gallium/drivers/freedreno/freedreno_query_hw.h @@ -84,7 +84,7 @@ struct fd_hw_sample_provider { /* when a new sample is required, emit appropriate cmdstream * and return a sample object: */ - struct fd_hw_sample *(*get_sample)(struct fd_context *ctx, + struct fd_hw_sample *(*get_sample)(struct fd_batch *batch, struct fd_ringbuffer *ring); /* accumulate the results from specified sample period: */ @@ -119,18 +119,17 @@ struct fd_hw_query { const struct fd_hw_sample_provider *provider; - /* list of fd_hw_sample_period in previous submits: */ + /* list of fd_hw_sample_periods: */ struct list_head periods; - /* list of fd_hw_sample_period's in current submit: */ - struct list_head current_periods; - /* if active and not paused, the current sample period (not * yet added to current_periods): */ struct fd_hw_sample_period *period; - struct list_head list; /* list-node in ctx->active_queries */ + struct list_head list; /* list-node in batch->active_queries */ + + int no_wait_cnt; /* see fd_hw_get_query_result */ }; static inline struct fd_hw_query * @@ -141,15 +140,15 @@ fd_hw_query(struct fd_query *q) struct fd_query * fd_hw_create_query(struct fd_context *ctx, unsigned query_type); /* helper for sample providers: */ -struct fd_hw_sample * fd_hw_sample_init(struct fd_context *ctx, uint32_t size); +struct fd_hw_sample * fd_hw_sample_init(struct fd_batch *batch, uint32_t size); /* don't call directly, use fd_hw_sample_reference() */ void __fd_hw_sample_destroy(struct fd_context *ctx, struct fd_hw_sample *samp); -void fd_hw_query_prepare(struct fd_context *ctx, uint32_t num_tiles); -void fd_hw_query_prepare_tile(struct fd_context *ctx, uint32_t n, +void fd_hw_query_prepare(struct fd_batch *batch, uint32_t num_tiles); +void fd_hw_query_prepare_tile(struct fd_batch *batch, uint32_t n, struct fd_ringbuffer *ring); -void fd_hw_query_set_stage(struct fd_context *ctx, +void fd_hw_query_set_stage(struct fd_batch *batch, struct fd_ringbuffer *ring, enum fd_render_stage stage); -void fd_hw_query_enable(struct fd_context *ctx, struct fd_ringbuffer *ring); +void fd_hw_query_enable(struct fd_batch *batch, struct fd_ringbuffer *ring); void fd_hw_query_register_provider(struct pipe_context *pctx, const struct fd_hw_sample_provider *provider); void fd_hw_query_init(struct pipe_context *pctx); diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c index a9b94610e46..b6c9488ec65 100644 --- a/src/gallium/drivers/freedreno/freedreno_resource.c +++ b/src/gallium/drivers/freedreno/freedreno_resource.c @@ -48,23 +48,6 @@ /* XXX this should go away, needed for 'struct winsys_handle' */ #include "state_tracker/drm_driver.h" -static bool -pending(struct fd_resource *rsc, bool write) -{ - /* if we have a pending GPU write, we are busy in any case: */ - if (rsc->write_batch) - return true; - - /* if CPU wants to write, but we are pending a GPU read, we are busy: */ - if (write && rsc->batch_mask) - return true; - - if (rsc->stencil && pending(rsc->stencil, write)) - return true; - - return false; -} - static void fd_invalidate_resource(struct fd_context *ctx, struct pipe_resource *prsc) { @@ -755,6 +738,20 @@ slice_alignment(struct pipe_screen *pscreen, const struct pipe_resource *tmpl) } } +/* special case to resize query buf after allocated.. */ +void +fd_resource_resize(struct pipe_resource *prsc, uint32_t sz) +{ + struct fd_resource *rsc = fd_resource(prsc); + + debug_assert(prsc->width0 == 0); + debug_assert(prsc->target == PIPE_BUFFER); + debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER); + + prsc->width0 = sz; + realloc_bo(rsc, setup_slices(rsc, 1, prsc->format)); +} + /** * Create a new texture object, using the given template info. */ @@ -812,6 +809,15 @@ fd_resource_create(struct pipe_screen *pscreen, size = setup_slices(rsc, alignment, format); + /* special case for hw-query buffer, which we need to allocate before we + * know the size: + */ + if (size == 0) { + /* note, semi-intention == instead of & */ + debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER); + return prsc; + } + if (rsc->layer_first) { rsc->layer_size = align(size, 4096); size = rsc->layer_size * prsc->array_size; @@ -1048,7 +1054,8 @@ fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond, bool discard) util_blitter_save_render_condition(ctx->blitter, ctx->cond_query, ctx->cond_cond, ctx->cond_mode); - fd_hw_query_set_stage(ctx, ctx->batch->draw, FD_STAGE_BLIT); + if (ctx->batch) + fd_hw_query_set_stage(ctx->batch, ctx->batch->draw, FD_STAGE_BLIT); ctx->discard = discard; } @@ -1056,7 +1063,8 @@ fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond, bool discard) static void fd_blitter_pipe_end(struct fd_context *ctx) { - fd_hw_query_set_stage(ctx, ctx->batch->draw, FD_STAGE_NULL); + if (ctx->batch) + fd_hw_query_set_stage(ctx->batch, ctx->batch->draw, FD_STAGE_NULL); } static void diff --git a/src/gallium/drivers/freedreno/freedreno_resource.h b/src/gallium/drivers/freedreno/freedreno_resource.h index fcdb4c1e364..8caab6b8a5a 100644 --- a/src/gallium/drivers/freedreno/freedreno_resource.h +++ b/src/gallium/drivers/freedreno/freedreno_resource.h @@ -104,6 +104,23 @@ fd_resource(struct pipe_resource *ptex) return (struct fd_resource *)ptex; } +static inline bool +pending(struct fd_resource *rsc, bool write) +{ + /* if we have a pending GPU write, we are busy in any case: */ + if (rsc->write_batch) + return true; + + /* if CPU wants to write, but we are pending a GPU read, we are busy: */ + if (write && rsc->batch_mask) + return true; + + if (rsc->stencil && pending(rsc->stencil, write)) + return true; + + return false; +} + struct fd_transfer { struct pipe_transfer base; void *staging; @@ -140,6 +157,8 @@ fd_resource_offset(struct fd_resource *rsc, unsigned level, unsigned layer) void fd_resource_screen_init(struct pipe_screen *pscreen); void fd_resource_context_init(struct pipe_context *pctx); +void fd_resource_resize(struct pipe_resource *prsc, uint32_t sz); + bool fd_render_condition_check(struct pipe_context *pctx); #endif /* FREEDRENO_RESOURCE_H_ */ diff --git a/src/gallium/drivers/freedreno/freedreno_state.c b/src/gallium/drivers/freedreno/freedreno_state.c index 8ac41d29077..849ea08037d 100644 --- a/src/gallium/drivers/freedreno/freedreno_state.c +++ b/src/gallium/drivers/freedreno/freedreno_state.c @@ -37,6 +37,7 @@ #include "freedreno_resource.h" #include "freedreno_texture.h" #include "freedreno_gmem.h" +#include "freedreno_query_hw.h" #include "freedreno_util.h" /* All the generic state handling.. In case of CSO's that are specific @@ -118,8 +119,10 @@ fd_set_framebuffer_state(struct pipe_context *pctx, struct pipe_framebuffer_state *cso; if (ctx->screen->reorder) { - struct fd_batch *batch = - fd_batch_from_fb(&ctx->screen->batch_cache, ctx, framebuffer); + struct fd_batch *batch; + if (likely(ctx->batch)) + fd_hw_query_set_stage(ctx->batch, ctx->batch->draw, FD_STAGE_NULL); + batch = fd_batch_from_fb(&ctx->screen->batch_cache, ctx, framebuffer); fd_batch_reference(&ctx->batch, NULL); ctx->batch = batch; ctx->dirty = ~0; |