/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ /* * Copyright (C) 2014 Rob Clark * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Authors: * Rob Clark */ #include "pipe/p_state.h" #include "util/u_memory.h" #include "util/u_inlines.h" #include "freedreno_query_hw.h" #include "freedreno_context.h" #include "freedreno_util.h" struct fd_hw_sample_period { struct fd_hw_sample *start, *end; struct list_head list; }; /* maps query_type to sample provider idx: */ static int pidx(unsigned query_type) { switch (query_type) { case PIPE_QUERY_OCCLUSION_COUNTER: return 0; case PIPE_QUERY_OCCLUSION_PREDICATE: return 1; /* TODO currently queries only emitted in main pass (not in binning pass).. * which is fine for occlusion query, but pretty much not anything else. */ case PIPE_QUERY_TIME_ELAPSED: return 2; case PIPE_QUERY_TIMESTAMP: return 3; default: return -1; } } static struct fd_hw_sample * get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring, unsigned query_type) { struct fd_hw_sample *samp = NULL; int idx = pidx(query_type); assume(idx >= 0); /* query never would have been created otherwise */ if (!ctx->sample_cache[idx]) { ctx->sample_cache[idx] = ctx->sample_providers[idx]->get_sample(ctx, ring); ctx->batch->needs_flush = true; } fd_hw_sample_reference(ctx, &samp, ctx->sample_cache[idx]); return samp; } static void clear_sample_cache(struct fd_context *ctx) { int i; for (i = 0; i < ARRAY_SIZE(ctx->sample_cache); i++) fd_hw_sample_reference(ctx, &ctx->sample_cache[i], NULL); } static bool is_active(struct fd_hw_query *hq, enum fd_render_stage stage) { return !!(hq->provider->active & stage); } static void resume_query(struct fd_context *ctx, struct fd_hw_query *hq, struct fd_ringbuffer *ring) { int idx = pidx(hq->provider->query_type); assert(idx >= 0); /* query never would have been created otherwise */ assert(!hq->period); ctx->active_providers |= (1 << idx); hq->period = util_slab_alloc(&ctx->sample_period_pool); list_inithead(&hq->period->list); hq->period->start = get_sample(ctx, ring, hq->base.type); /* NOTE: util_slab_alloc() does not zero out the buffer: */ hq->period->end = NULL; } static void pause_query(struct fd_context *ctx, struct fd_hw_query *hq, struct fd_ringbuffer *ring) { int idx = pidx(hq->provider->query_type); assert(idx >= 0); /* query never would have been created otherwise */ assert(hq->period && !hq->period->end); assert(ctx->active_providers & (1 << idx)); hq->period->end = get_sample(ctx, ring, hq->base.type); list_addtail(&hq->period->list, &hq->current_periods); hq->period = NULL; } static void destroy_periods(struct fd_context *ctx, struct list_head *list) { struct fd_hw_sample_period *period, *s; LIST_FOR_EACH_ENTRY_SAFE(period, s, list, list) { fd_hw_sample_reference(ctx, &period->start, NULL); fd_hw_sample_reference(ctx, &period->end, NULL); list_del(&period->list); util_slab_free(&ctx->sample_period_pool, period); } } static void fd_hw_destroy_query(struct fd_context *ctx, struct fd_query *q) { struct fd_hw_query *hq = fd_hw_query(q); destroy_periods(ctx, &hq->periods); destroy_periods(ctx, &hq->current_periods); list_del(&hq->list); free(hq); } static boolean fd_hw_begin_query(struct fd_context *ctx, struct fd_query *q) { struct fd_hw_query *hq = fd_hw_query(q); if (q->active) return false; /* begin_query() should clear previous results: */ destroy_periods(ctx, &hq->periods); if (is_active(hq, ctx->stage)) resume_query(ctx, hq, ctx->batch->draw); q->active = true; /* add to active list: */ list_del(&hq->list); list_addtail(&hq->list, &ctx->active_queries); return true; } static void fd_hw_end_query(struct fd_context *ctx, struct fd_query *q) { struct fd_hw_query *hq = fd_hw_query(q); /* there are a couple special cases, which don't have * a matching ->begin_query(): */ if (skip_begin_query(q->type) && !q->active) { fd_hw_begin_query(ctx, q); } if (!q->active) return; if (is_active(hq, ctx->stage)) pause_query(ctx, hq, ctx->batch->draw); q->active = false; /* move to current list: */ list_del(&hq->list); list_addtail(&hq->list, &ctx->current_queries); } /* helper to get ptr to specified sample: */ static void * sampptr(struct fd_hw_sample *samp, uint32_t n, void *ptr) { return ((char *)ptr) + (samp->tile_stride * n) + samp->offset; } static boolean fd_hw_get_query_result(struct fd_context *ctx, struct fd_query *q, boolean wait, union pipe_query_result *result) { struct fd_hw_query *hq = fd_hw_query(q); const struct fd_hw_sample_provider *p = hq->provider; struct fd_hw_sample_period *period; if (q->active) return false; /* if the app tries to read back the query result before the * batch is submitted, that forces us to flush so that there * are actually results to wait for: */ if (!LIST_IS_EMPTY(&hq->list)) { /* if app didn't actually trigger any cmdstream, then * we have nothing to do: */ if (!ctx->batch->needs_flush) return true; DBG("reading query result forces flush!"); fd_batch_flush(ctx->batch); } util_query_clear_result(result, q->type); if (LIST_IS_EMPTY(&hq->periods)) return true; assert(LIST_IS_EMPTY(&hq->list)); assert(LIST_IS_EMPTY(&hq->current_periods)); assert(!hq->period); /* if !wait, then check the last sample (the one most likely to * not be ready yet) and bail if it is not ready: */ if (!wait) { int ret; period = LIST_ENTRY(struct fd_hw_sample_period, hq->periods.prev, list); ret = fd_bo_cpu_prep(period->end->bo, ctx->screen->pipe, DRM_FREEDRENO_PREP_READ | DRM_FREEDRENO_PREP_NOSYNC); if (ret) return false; fd_bo_cpu_fini(period->end->bo); } /* sum the result across all sample periods: */ LIST_FOR_EACH_ENTRY(period, &hq->periods, list) { struct fd_hw_sample *start = period->start; struct fd_hw_sample *end = period->end; unsigned i; /* start and end samples should be from same batch: */ assert(start->bo == end->bo); assert(start->num_tiles == end->num_tiles); for (i = 0; i < start->num_tiles; i++) { void *ptr; fd_bo_cpu_prep(start->bo, ctx->screen->pipe, DRM_FREEDRENO_PREP_READ); ptr = fd_bo_map(start->bo); p->accumulate_result(ctx, sampptr(period->start, i, ptr), sampptr(period->end, i, ptr), result); fd_bo_cpu_fini(start->bo); } } return true; } static const struct fd_query_funcs hw_query_funcs = { .destroy_query = fd_hw_destroy_query, .begin_query = fd_hw_begin_query, .end_query = fd_hw_end_query, .get_query_result = fd_hw_get_query_result, }; struct fd_query * fd_hw_create_query(struct fd_context *ctx, unsigned query_type) { struct fd_hw_query *hq; struct fd_query *q; int idx = pidx(query_type); if ((idx < 0) || !ctx->sample_providers[idx]) return NULL; hq = CALLOC_STRUCT(fd_hw_query); if (!hq) return NULL; hq->provider = ctx->sample_providers[idx]; list_inithead(&hq->periods); list_inithead(&hq->current_periods); list_inithead(&hq->list); q = &hq->base; q->funcs = &hw_query_funcs; q->type = query_type; return q; } struct fd_hw_sample * fd_hw_sample_init(struct fd_context *ctx, uint32_t size) { struct fd_hw_sample *samp = util_slab_alloc(&ctx->sample_pool); pipe_reference_init(&samp->reference, 1); samp->size = size; debug_assert(util_is_power_of_two(size)); ctx->next_sample_offset = align(ctx->next_sample_offset, size); samp->offset = ctx->next_sample_offset; /* NOTE: util_slab_alloc() does not zero out the buffer: */ samp->bo = NULL; samp->num_tiles = 0; samp->tile_stride = 0; ctx->next_sample_offset += size; return samp; } void __fd_hw_sample_destroy(struct fd_context *ctx, struct fd_hw_sample *samp) { if (samp->bo) fd_bo_del(samp->bo); util_slab_free(&ctx->sample_pool, samp); } static void prepare_sample(struct fd_hw_sample *samp, struct fd_bo *bo, uint32_t num_tiles, uint32_t tile_stride) { if (samp->bo) { assert(samp->bo == bo); assert(samp->num_tiles == num_tiles); assert(samp->tile_stride == tile_stride); return; } samp->bo = fd_bo_ref(bo); samp->num_tiles = num_tiles; samp->tile_stride = tile_stride; } static void prepare_query(struct fd_hw_query *hq, struct fd_bo *bo, uint32_t num_tiles, uint32_t tile_stride) { struct fd_hw_sample_period *period, *s; /* prepare all the samples in the query: */ LIST_FOR_EACH_ENTRY_SAFE(period, s, &hq->current_periods, list) { prepare_sample(period->start, bo, num_tiles, tile_stride); prepare_sample(period->end, bo, num_tiles, tile_stride); /* move from current_periods list to periods list: */ list_del(&period->list); list_addtail(&period->list, &hq->periods); } } static void prepare_queries(struct fd_context *ctx, struct fd_bo *bo, uint32_t num_tiles, uint32_t tile_stride, struct list_head *list, bool remove) { struct fd_hw_query *hq, *s; LIST_FOR_EACH_ENTRY_SAFE(hq, s, list, list) { prepare_query(hq, bo, num_tiles, tile_stride); if (remove) list_delinit(&hq->list); } } /* called from gmem code once total storage requirements are known (ie. * number of samples times number of tiles) */ void fd_hw_query_prepare(struct fd_context *ctx, uint32_t num_tiles) { uint32_t tile_stride = ctx->next_sample_offset; struct fd_bo *bo; if (ctx->query_bo) fd_bo_del(ctx->query_bo); if (tile_stride > 0) { bo = fd_bo_new(ctx->dev, tile_stride * num_tiles, DRM_FREEDRENO_GEM_CACHE_WCOMBINE | DRM_FREEDRENO_GEM_TYPE_KMEM); } else { bo = NULL; } ctx->query_bo = bo; ctx->query_tile_stride = tile_stride; prepare_queries(ctx, bo, num_tiles, tile_stride, &ctx->active_queries, false); prepare_queries(ctx, bo, num_tiles, tile_stride, &ctx->current_queries, true); /* reset things for next batch: */ ctx->next_sample_offset = 0; } void fd_hw_query_prepare_tile(struct fd_context *ctx, uint32_t n, struct fd_ringbuffer *ring) { uint32_t tile_stride = ctx->query_tile_stride; uint32_t offset = tile_stride * n; /* bail if no queries: */ if (tile_stride == 0) return; fd_wfi(ctx, ring); OUT_PKT0 (ring, HW_QUERY_BASE_REG, 1); OUT_RELOCW(ring, ctx->query_bo, offset, 0, 0); } void fd_hw_query_set_stage(struct fd_context *ctx, struct fd_ringbuffer *ring, enum fd_render_stage stage) { /* special case: internal blits (like mipmap level generation) * go through normal draw path (via util_blitter_blit()).. but * we need to ignore the FD_STAGE_DRAW which will be set, so we * don't enable queries which should be paused during internal * blits: */ if ((ctx->stage == FD_STAGE_BLIT) && (stage != FD_STAGE_NULL)) return; if (stage != ctx->stage) { struct fd_hw_query *hq; LIST_FOR_EACH_ENTRY(hq, &ctx->active_queries, list) { bool was_active = is_active(hq, ctx->stage); bool now_active = is_active(hq, stage); if (now_active && !was_active) resume_query(ctx, hq, ring); else if (was_active && !now_active) pause_query(ctx, hq, ring); } } clear_sample_cache(ctx); ctx->stage = stage; } /* call the provider->enable() for all the hw queries that were active * in the current batch. This sets up perfctr selector regs statically * for the duration of the batch. */ void fd_hw_query_enable(struct fd_context *ctx, struct fd_ringbuffer *ring) { for (int idx = 0; idx < MAX_HW_SAMPLE_PROVIDERS; idx++) { if (ctx->active_providers & (1 << idx)) { assert(ctx->sample_providers[idx]); if (ctx->sample_providers[idx]->enable) ctx->sample_providers[idx]->enable(ctx, ring); } } ctx->active_providers = 0; /* clear it for next frame */ } void fd_hw_query_register_provider(struct pipe_context *pctx, const struct fd_hw_sample_provider *provider) { struct fd_context *ctx = fd_context(pctx); int idx = pidx(provider->query_type); assert((0 <= idx) && (idx < MAX_HW_SAMPLE_PROVIDERS)); assert(!ctx->sample_providers[idx]); ctx->sample_providers[idx] = provider; } void fd_hw_query_init(struct pipe_context *pctx) { struct fd_context *ctx = fd_context(pctx); util_slab_create(&ctx->sample_pool, sizeof(struct fd_hw_sample), 16, UTIL_SLAB_SINGLETHREADED); util_slab_create(&ctx->sample_period_pool, sizeof(struct fd_hw_sample_period), 16, UTIL_SLAB_SINGLETHREADED); list_inithead(&ctx->active_queries); list_inithead(&ctx->current_queries); } void fd_hw_query_fini(struct pipe_context *pctx) { struct fd_context *ctx = fd_context(pctx); util_slab_destroy(&ctx->sample_pool); util_slab_destroy(&ctx->sample_period_pool); }