diff options
Diffstat (limited to 'src/gallium/drivers/radeon/r600_query.c')
-rw-r--r-- | src/gallium/drivers/radeon/r600_query.c | 1017 |
1 files changed, 593 insertions, 424 deletions
diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c index 8c2b601a96c..b1cfb6e462b 100644 --- a/src/gallium/drivers/radeon/r600_query.c +++ b/src/gallium/drivers/radeon/r600_query.c @@ -22,81 +22,218 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include "r600_query.h" #include "r600_cs.h" #include "util/u_memory.h" +/* Queries without buffer handling or suspend/resume. */ +struct r600_query_sw { + struct r600_query b; -struct r600_query_buffer { - /* The buffer where query results are stored. */ - struct r600_resource *buf; - /* Offset of the next free result after current query data */ - unsigned results_end; - /* If a query buffer is full, a new buffer is created and the old one - * is put in here. When we calculate the result, we sum up the samples - * from all buffers. */ - struct r600_query_buffer *previous; -}; - -struct r600_query { - /* The query buffer and how many results are in it. */ - struct r600_query_buffer buffer; - /* The type of query */ - unsigned type; - /* Size of the result in memory for both begin_query and end_query, - * this can be one or two numbers, or it could even be a size of a structure. */ - unsigned result_size; - /* The number of dwords for begin_query or end_query. */ - unsigned num_cs_dw; - /* linked list of queries */ - struct list_head list; - /* for custom non-GPU queries */ uint64_t begin_result; uint64_t end_result; /* Fence for GPU_FINISHED. */ struct pipe_fence_handle *fence; - /* For transform feedback: which stream the query is for */ - unsigned stream; }; - -static bool r600_is_timer_query(unsigned type) +static void r600_query_sw_destroy(struct r600_common_context *rctx, + struct r600_query *rquery) { - return type == PIPE_QUERY_TIME_ELAPSED || - type == PIPE_QUERY_TIMESTAMP; + struct pipe_screen *screen = rctx->b.screen; + struct r600_query_sw *query = (struct r600_query_sw *)rquery; + + screen->fence_reference(screen, &query->fence, NULL); + FREE(query); } -static bool r600_query_needs_begin(unsigned type) +static enum radeon_value_id winsys_id_from_type(unsigned type) { - return type != PIPE_QUERY_GPU_FINISHED && - type != PIPE_QUERY_TIMESTAMP; + switch (type) { + case R600_QUERY_REQUESTED_VRAM: return RADEON_REQUESTED_VRAM_MEMORY; + case R600_QUERY_REQUESTED_GTT: return RADEON_REQUESTED_GTT_MEMORY; + case R600_QUERY_BUFFER_WAIT_TIME: return RADEON_BUFFER_WAIT_TIME_NS; + case R600_QUERY_NUM_CS_FLUSHES: return RADEON_NUM_CS_FLUSHES; + case R600_QUERY_NUM_BYTES_MOVED: return RADEON_NUM_BYTES_MOVED; + case R600_QUERY_VRAM_USAGE: return RADEON_VRAM_USAGE; + case R600_QUERY_GTT_USAGE: return RADEON_GTT_USAGE; + case R600_QUERY_GPU_TEMPERATURE: return RADEON_GPU_TEMPERATURE; + case R600_QUERY_CURRENT_GPU_SCLK: return RADEON_CURRENT_SCLK; + case R600_QUERY_CURRENT_GPU_MCLK: return RADEON_CURRENT_MCLK; + default: unreachable("query type does not correspond to winsys id"); + } } -static struct r600_resource *r600_new_query_buffer(struct r600_common_context *ctx, unsigned type) +static boolean r600_query_sw_begin(struct r600_common_context *rctx, + struct r600_query *rquery) { - unsigned j, i, num_results, buf_size = 4096; - uint32_t *results; + struct r600_query_sw *query = (struct r600_query_sw *)rquery; - /* Non-GPU queries. */ - switch (type) { + switch(query->b.type) { case PIPE_QUERY_TIMESTAMP_DISJOINT: case PIPE_QUERY_GPU_FINISHED: + break; case R600_QUERY_DRAW_CALLS: + query->begin_result = rctx->num_draw_calls; + break; case R600_QUERY_REQUESTED_VRAM: case R600_QUERY_REQUESTED_GTT: + case R600_QUERY_VRAM_USAGE: + case R600_QUERY_GTT_USAGE: + case R600_QUERY_GPU_TEMPERATURE: + case R600_QUERY_CURRENT_GPU_SCLK: + case R600_QUERY_CURRENT_GPU_MCLK: + query->begin_result = 0; + break; case R600_QUERY_BUFFER_WAIT_TIME: case R600_QUERY_NUM_CS_FLUSHES: - case R600_QUERY_NUM_BYTES_MOVED: + case R600_QUERY_NUM_BYTES_MOVED: { + enum radeon_value_id ws_id = winsys_id_from_type(query->b.type); + query->begin_result = rctx->ws->query_value(rctx->ws, ws_id); + break; + } + case R600_QUERY_GPU_LOAD: + query->begin_result = r600_gpu_load_begin(rctx->screen); + break; + case R600_QUERY_NUM_COMPILATIONS: + query->begin_result = p_atomic_read(&rctx->screen->num_compilations); + break; + case R600_QUERY_NUM_SHADERS_CREATED: + query->begin_result = p_atomic_read(&rctx->screen->num_shaders_created); + break; + default: + unreachable("r600_query_sw_begin: bad query type"); + } + + return TRUE; +} + +static void r600_query_sw_end(struct r600_common_context *rctx, + struct r600_query *rquery) +{ + struct r600_query_sw *query = (struct r600_query_sw *)rquery; + + switch(query->b.type) { + case PIPE_QUERY_TIMESTAMP_DISJOINT: + break; + case PIPE_QUERY_GPU_FINISHED: + rctx->b.flush(&rctx->b, &query->fence, 0); + break; + case R600_QUERY_DRAW_CALLS: + query->begin_result = rctx->num_draw_calls; + break; + case R600_QUERY_REQUESTED_VRAM: + case R600_QUERY_REQUESTED_GTT: case R600_QUERY_VRAM_USAGE: case R600_QUERY_GTT_USAGE: case R600_QUERY_GPU_TEMPERATURE: case R600_QUERY_CURRENT_GPU_SCLK: case R600_QUERY_CURRENT_GPU_MCLK: + case R600_QUERY_BUFFER_WAIT_TIME: + case R600_QUERY_NUM_CS_FLUSHES: + case R600_QUERY_NUM_BYTES_MOVED: { + enum radeon_value_id ws_id = winsys_id_from_type(query->b.type); + query->end_result = rctx->ws->query_value(rctx->ws, ws_id); + break; + } case R600_QUERY_GPU_LOAD: + query->end_result = r600_gpu_load_end(rctx->screen, + query->begin_result); + query->begin_result = 0; + break; case R600_QUERY_NUM_COMPILATIONS: + query->begin_result = p_atomic_read(&rctx->screen->num_compilations); + break; case R600_QUERY_NUM_SHADERS_CREATED: + query->begin_result = p_atomic_read(&rctx->screen->num_shaders_created); + break; + default: + unreachable("r600_query_sw_end: bad query type"); + } +} + +static boolean r600_query_sw_get_result(struct r600_common_context *rctx, + struct r600_query *rquery, + boolean wait, + union pipe_query_result *result) +{ + struct r600_query_sw *query = (struct r600_query_sw *)rquery; + + switch (query->b.type) { + case PIPE_QUERY_TIMESTAMP_DISJOINT: + /* Convert from cycles per millisecond to cycles per second (Hz). */ + result->timestamp_disjoint.frequency = + (uint64_t)rctx->screen->info.r600_clock_crystal_freq * 1000; + result->timestamp_disjoint.disjoint = FALSE; + return TRUE; + case PIPE_QUERY_GPU_FINISHED: { + struct pipe_screen *screen = rctx->b.screen; + result->b = screen->fence_finish(screen, query->fence, + wait ? PIPE_TIMEOUT_INFINITE : 0); + return result->b; + } + } + + result->u64 = query->end_result - query->begin_result; + + switch (query->b.type) { + case R600_QUERY_BUFFER_WAIT_TIME: + case R600_QUERY_GPU_TEMPERATURE: + result->u64 /= 1000; + break; + case R600_QUERY_CURRENT_GPU_SCLK: + case R600_QUERY_CURRENT_GPU_MCLK: + result->u64 *= 1000000; + break; + } + + return TRUE; +} + +static struct r600_query_ops sw_query_ops = { + .destroy = r600_query_sw_destroy, + .begin = r600_query_sw_begin, + .end = r600_query_sw_end, + .get_result = r600_query_sw_get_result +}; + +static struct pipe_query *r600_query_sw_create(struct pipe_context *ctx, + unsigned query_type) +{ + struct r600_query_sw *query; + + query = CALLOC_STRUCT(r600_query_sw); + if (query == NULL) return NULL; + + query->b.type = query_type; + query->b.ops = &sw_query_ops; + + return (struct pipe_query *)query; +} + +void r600_query_hw_destroy(struct r600_common_context *rctx, + struct r600_query *rquery) +{ + struct r600_query_hw *query = (struct r600_query_hw *)rquery; + struct r600_query_buffer *prev = query->buffer.previous; + + /* Release all query buffers. */ + while (prev) { + struct r600_query_buffer *qbuf = prev; + prev = prev->previous; + pipe_resource_reference((struct pipe_resource**)&qbuf->buf, NULL); + FREE(qbuf); } + pipe_resource_reference((struct pipe_resource**)&query->buffer.buf, NULL); + FREE(rquery); +} + +static struct r600_resource *r600_new_query_buffer(struct r600_common_context *ctx, + struct r600_query_hw *query) +{ + unsigned buf_size = 4096; + /* Queries are normally read by the CPU after * being written by the gpu, hence staging is probably a good * usage pattern. @@ -105,14 +242,30 @@ static struct r600_resource *r600_new_query_buffer(struct r600_common_context *c pipe_buffer_create(ctx->b.screen, PIPE_BIND_CUSTOM, PIPE_USAGE_STAGING, buf_size); - switch (type) { - case PIPE_QUERY_OCCLUSION_COUNTER: - case PIPE_QUERY_OCCLUSION_PREDICATE: - results = r600_buffer_map_sync_with_rings(ctx, buf, PIPE_TRANSFER_WRITE); - memset(results, 0, buf_size); + if (query->flags & R600_QUERY_HW_FLAG_PREDICATE) + query->ops->prepare_buffer(ctx, query, buf); + + return buf; +} + +static void r600_query_hw_prepare_buffer(struct r600_common_context *ctx, + struct r600_query_hw *query, + struct r600_resource *buffer) +{ + /* Callers ensure that the buffer is currently unused by the GPU. */ + uint32_t *results = ctx->ws->buffer_map(buffer->cs_buf, NULL, + PIPE_TRANSFER_WRITE | + PIPE_TRANSFER_UNSYNCHRONIZED); + + memset(results, 0, buffer->b.b.width0); + + if (query->b.type == PIPE_QUERY_OCCLUSION_COUNTER || + query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE) { + unsigned num_results; + unsigned i, j; /* Set top bits for unused backends. */ - num_results = buf_size / (16 * ctx->max_db); + num_results = buffer->b.b.width0 / (16 * ctx->max_db); for (j = 0; j < num_results; j++) { for (i = 0; i < ctx->max_db; i++) { if (!(ctx->backend_mask & (1<<i))) { @@ -122,22 +275,109 @@ static struct r600_resource *r600_new_query_buffer(struct r600_common_context *c } results += 4 * ctx->max_db; } + } +} + +static struct r600_query_ops query_hw_ops = { + .destroy = r600_query_hw_destroy, + .begin = r600_query_hw_begin, + .end = r600_query_hw_end, + .get_result = r600_query_hw_get_result, +}; + +static void r600_query_hw_do_emit_start(struct r600_common_context *ctx, + struct r600_query_hw *query, + struct r600_resource *buffer, + uint64_t va); +static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx, + struct r600_query_hw *query, + struct r600_resource *buffer, + uint64_t va); +static void r600_query_hw_add_result(struct r600_common_context *ctx, + struct r600_query_hw *, void *buffer, + union pipe_query_result *result); +static void r600_query_hw_clear_result(struct r600_query_hw *, + union pipe_query_result *); + +static struct r600_query_hw_ops query_hw_default_hw_ops = { + .prepare_buffer = r600_query_hw_prepare_buffer, + .emit_start = r600_query_hw_do_emit_start, + .emit_stop = r600_query_hw_do_emit_stop, + .clear_result = r600_query_hw_clear_result, + .add_result = r600_query_hw_add_result, +}; + +boolean r600_query_hw_init(struct r600_common_context *rctx, + struct r600_query_hw *query) +{ + query->buffer.buf = r600_new_query_buffer(rctx, query); + if (!query->buffer.buf) + return FALSE; + + return TRUE; +} + +static struct pipe_query *r600_query_hw_create(struct r600_common_context *rctx, + unsigned query_type, + unsigned index) +{ + struct r600_query_hw *query = CALLOC_STRUCT(r600_query_hw); + if (!query) + return NULL; + + query->b.type = query_type; + query->b.ops = &query_hw_ops; + query->ops = &query_hw_default_hw_ops; + + switch (query_type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: + query->result_size = 16 * rctx->max_db; + query->num_cs_dw_begin = 6; + query->num_cs_dw_end = 6; + query->flags |= R600_QUERY_HW_FLAG_PREDICATE; break; case PIPE_QUERY_TIME_ELAPSED: + query->result_size = 16; + query->num_cs_dw_begin = 8; + query->num_cs_dw_end = 8; + query->flags = R600_QUERY_HW_FLAG_TIMER; + break; case PIPE_QUERY_TIMESTAMP: + query->result_size = 8; + query->num_cs_dw_end = 8; + query->flags = R600_QUERY_HW_FLAG_TIMER | + R600_QUERY_HW_FLAG_NO_START; break; case PIPE_QUERY_PRIMITIVES_EMITTED: case PIPE_QUERY_PRIMITIVES_GENERATED: case PIPE_QUERY_SO_STATISTICS: case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + /* NumPrimitivesWritten, PrimitiveStorageNeeded. */ + query->result_size = 32; + query->num_cs_dw_begin = 6; + query->num_cs_dw_end = 6; + query->stream = index; + query->flags |= R600_QUERY_HW_FLAG_PREDICATE; + break; case PIPE_QUERY_PIPELINE_STATISTICS: - results = r600_buffer_map_sync_with_rings(ctx, buf, PIPE_TRANSFER_WRITE); - memset(results, 0, buf_size); + /* 11 values on EG, 8 on R600. */ + query->result_size = (rctx->chip_class >= EVERGREEN ? 11 : 8) * 16; + query->num_cs_dw_begin = 6; + query->num_cs_dw_end = 6; break; default: assert(0); + FREE(query); + return NULL; } - return buf; + + if (!r600_query_hw_init(rctx, query)) { + FREE(query); + return NULL; + } + + return (struct pipe_query *)query; } static void r600_update_occlusion_query_state(struct r600_common_context *rctx, @@ -159,7 +399,7 @@ static void r600_update_occlusion_query_state(struct r600_common_context *rctx, } } -static unsigned event_type_for_stream(struct r600_query *query) +static unsigned event_type_for_stream(struct r600_query_hw *query) { switch (query->stream) { default: @@ -170,28 +410,14 @@ static unsigned event_type_for_stream(struct r600_query *query) } } -static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_query *query) +static void r600_query_hw_do_emit_start(struct r600_common_context *ctx, + struct r600_query_hw *query, + struct r600_resource *buffer, + uint64_t va) { struct radeon_winsys_cs *cs = ctx->gfx.cs; - uint64_t va; - - r600_update_occlusion_query_state(ctx, query->type, 1); - r600_update_prims_generated_query_state(ctx, query->type, 1); - ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw * 2, TRUE); - - /* Get a new query buffer if needed. */ - if (query->buffer.results_end + query->result_size > query->buffer.buf->b.b.width0) { - struct r600_query_buffer *qbuf = MALLOC_STRUCT(r600_query_buffer); - *qbuf = query->buffer; - query->buffer.buf = r600_new_query_buffer(ctx, query->type); - query->buffer.results_end = 0; - query->buffer.previous = qbuf; - } - - /* emit begin query */ - va = query->buffer.buf->gpu_address + query->buffer.results_end; - switch (query->type) { + switch (query->b.type) { case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); @@ -227,30 +453,50 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q } r600_emit_reloc(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE, RADEON_PRIO_QUERY); - - if (r600_is_timer_query(query->type)) - ctx->num_cs_dw_timer_queries_suspend += query->num_cs_dw; - else - ctx->num_cs_dw_nontimer_queries_suspend += query->num_cs_dw; } -static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_query *query) +static void r600_query_hw_emit_start(struct r600_common_context *ctx, + struct r600_query_hw *query) { - struct radeon_winsys_cs *cs = ctx->gfx.cs; uint64_t va; - /* The queries which need begin already called this in begin_query. */ - if (!r600_query_needs_begin(query->type)) { - ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw, FALSE); + r600_update_occlusion_query_state(ctx, query->b.type, 1); + r600_update_prims_generated_query_state(ctx, query->b.type, 1); + + ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw_begin + query->num_cs_dw_end, + TRUE); + + /* Get a new query buffer if needed. */ + if (query->buffer.results_end + query->result_size > query->buffer.buf->b.b.width0) { + struct r600_query_buffer *qbuf = MALLOC_STRUCT(r600_query_buffer); + *qbuf = query->buffer; + query->buffer.buf = r600_new_query_buffer(ctx, query); + query->buffer.results_end = 0; + query->buffer.previous = qbuf; } - va = query->buffer.buf->gpu_address; + /* emit begin query */ + va = query->buffer.buf->gpu_address + query->buffer.results_end; + + query->ops->emit_start(ctx, query, query->buffer.buf, va); - /* emit end query */ - switch (query->type) { + if (query->flags & R600_QUERY_HW_FLAG_TIMER) + ctx->num_cs_dw_timer_queries_suspend += query->num_cs_dw_end; + else + ctx->num_cs_dw_nontimer_queries_suspend += query->num_cs_dw_end; +} + +static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx, + struct r600_query_hw *query, + struct r600_resource *buffer, + uint64_t va) +{ + struct radeon_winsys_cs *cs = ctx->gfx.cs; + + switch (query->b.type) { case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: - va += query->buffer.results_end + 8; + va += 8; radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1)); radeon_emit(cs, va); @@ -260,14 +506,14 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que case PIPE_QUERY_PRIMITIVES_GENERATED: case PIPE_QUERY_SO_STATISTICS: case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - va += query->buffer.results_end + query->result_size/2; + va += query->result_size/2; radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); radeon_emit(cs, EVENT_TYPE(event_type_for_stream(query)) | EVENT_INDEX(3)); radeon_emit(cs, va); radeon_emit(cs, (va >> 32) & 0xFFFF); break; case PIPE_QUERY_TIME_ELAPSED: - va += query->buffer.results_end + query->result_size/2; + va += query->result_size/2; /* fall through */ case PIPE_QUERY_TIMESTAMP: radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); @@ -278,7 +524,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que radeon_emit(cs, 0); break; case PIPE_QUERY_PIPELINE_STATISTICS: - va += query->buffer.results_end + query->result_size/2; + va += query->result_size/2; radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2)); radeon_emit(cs, va); @@ -289,25 +535,41 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que } r600_emit_reloc(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE, RADEON_PRIO_QUERY); +} + +static void r600_query_hw_emit_stop(struct r600_common_context *ctx, + struct r600_query_hw *query) +{ + uint64_t va; + + /* The queries which need begin already called this in begin_query. */ + if (query->flags & R600_QUERY_HW_FLAG_NO_START) { + ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw_end, FALSE); + } + + /* emit end query */ + va = query->buffer.buf->gpu_address + query->buffer.results_end; + + query->ops->emit_stop(ctx, query, query->buffer.buf, va); query->buffer.results_end += query->result_size; - if (r600_query_needs_begin(query->type)) { - if (r600_is_timer_query(query->type)) - ctx->num_cs_dw_timer_queries_suspend -= query->num_cs_dw; + if (!(query->flags & R600_QUERY_HW_FLAG_NO_START)) { + if (query->flags & R600_QUERY_HW_FLAG_TIMER) + ctx->num_cs_dw_timer_queries_suspend -= query->num_cs_dw_end; else - ctx->num_cs_dw_nontimer_queries_suspend -= query->num_cs_dw; + ctx->num_cs_dw_nontimer_queries_suspend -= query->num_cs_dw_end; } - r600_update_occlusion_query_state(ctx, query->type, -1); - r600_update_prims_generated_query_state(ctx, query->type, -1); + r600_update_occlusion_query_state(ctx, query->b.type, -1); + r600_update_prims_generated_query_state(ctx, query->b.type, -1); } static void r600_emit_query_predication(struct r600_common_context *ctx, struct r600_atom *atom) { struct radeon_winsys_cs *cs = ctx->gfx.cs; - struct r600_query *query = (struct r600_query*)ctx->render_cond; + struct r600_query_hw *query = (struct r600_query_hw *)ctx->render_cond; struct r600_query_buffer *qbuf; uint32_t op; bool flag_wait; @@ -318,7 +580,7 @@ static void r600_emit_query_predication(struct r600_common_context *ctx, flag_wait = ctx->render_cond_mode == PIPE_RENDER_COND_WAIT || ctx->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT; - switch (query->type) { + switch (query->b.type) { case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: op = PRED_OP(PREDICATION_OP_ZPASS); @@ -364,94 +626,21 @@ static void r600_emit_query_predication(struct r600_common_context *ctx, static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index) { struct r600_common_context *rctx = (struct r600_common_context *)ctx; - struct r600_query *query; - bool skip_allocation = false; - query = CALLOC_STRUCT(r600_query); - if (query == NULL) - return NULL; - - query->type = query_type; - - switch (query_type) { - case PIPE_QUERY_OCCLUSION_COUNTER: - case PIPE_QUERY_OCCLUSION_PREDICATE: - query->result_size = 16 * rctx->max_db; - query->num_cs_dw = 6; - break; - break; - case PIPE_QUERY_TIME_ELAPSED: - query->result_size = 16; - query->num_cs_dw = 8; - break; - case PIPE_QUERY_TIMESTAMP: - query->result_size = 8; - query->num_cs_dw = 8; - break; - case PIPE_QUERY_PRIMITIVES_EMITTED: - case PIPE_QUERY_PRIMITIVES_GENERATED: - case PIPE_QUERY_SO_STATISTICS: - case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - /* NumPrimitivesWritten, PrimitiveStorageNeeded. */ - query->result_size = 32; - query->num_cs_dw = 6; - query->stream = index; - break; - case PIPE_QUERY_PIPELINE_STATISTICS: - /* 11 values on EG, 8 on R600. */ - query->result_size = (rctx->chip_class >= EVERGREEN ? 11 : 8) * 16; - query->num_cs_dw = 6; - break; - /* Non-GPU queries and queries not requiring a buffer. */ - case PIPE_QUERY_TIMESTAMP_DISJOINT: - case PIPE_QUERY_GPU_FINISHED: - case R600_QUERY_DRAW_CALLS: - case R600_QUERY_REQUESTED_VRAM: - case R600_QUERY_REQUESTED_GTT: - case R600_QUERY_BUFFER_WAIT_TIME: - case R600_QUERY_NUM_CS_FLUSHES: - case R600_QUERY_NUM_BYTES_MOVED: - case R600_QUERY_VRAM_USAGE: - case R600_QUERY_GTT_USAGE: - case R600_QUERY_GPU_TEMPERATURE: - case R600_QUERY_CURRENT_GPU_SCLK: - case R600_QUERY_CURRENT_GPU_MCLK: - case R600_QUERY_GPU_LOAD: - case R600_QUERY_NUM_COMPILATIONS: - case R600_QUERY_NUM_SHADERS_CREATED: - skip_allocation = true; - break; - default: - assert(0); - FREE(query); - return NULL; - } + if (query_type == PIPE_QUERY_TIMESTAMP_DISJOINT || + query_type == PIPE_QUERY_GPU_FINISHED || + query_type >= PIPE_QUERY_DRIVER_SPECIFIC) + return r600_query_sw_create(ctx, query_type); - if (!skip_allocation) { - query->buffer.buf = r600_new_query_buffer(rctx, query_type); - if (!query->buffer.buf) { - FREE(query); - return NULL; - } - } - return (struct pipe_query*)query; + return r600_query_hw_create(rctx, query_type, index); } static void r600_destroy_query(struct pipe_context *ctx, struct pipe_query *query) { - struct r600_query *rquery = (struct r600_query*)query; - struct r600_query_buffer *prev = rquery->buffer.previous; - - /* Release all query buffers. */ - while (prev) { - struct r600_query_buffer *qbuf = prev; - prev = prev->previous; - pipe_resource_reference((struct pipe_resource**)&qbuf->buf, NULL); - FREE(qbuf); - } + struct r600_common_context *rctx = (struct r600_common_context *)ctx; + struct r600_query *rquery = (struct r600_query *)query; - pipe_resource_reference((struct pipe_resource**)&rquery->buffer.buf, NULL); - FREE(query); + rquery->ops->destroy(rctx, rquery); } static boolean r600_begin_query(struct pipe_context *ctx, @@ -459,48 +648,14 @@ static boolean r600_begin_query(struct pipe_context *ctx, { struct r600_common_context *rctx = (struct r600_common_context *)ctx; struct r600_query *rquery = (struct r600_query *)query; - struct r600_query_buffer *prev = rquery->buffer.previous; - if (!r600_query_needs_begin(rquery->type)) { - assert(0); - return false; - } + return rquery->ops->begin(rctx, rquery); +} - /* Non-GPU queries. */ - switch (rquery->type) { - case PIPE_QUERY_TIMESTAMP_DISJOINT: - return true; - case R600_QUERY_DRAW_CALLS: - rquery->begin_result = rctx->num_draw_calls; - return true; - case R600_QUERY_REQUESTED_VRAM: - case R600_QUERY_REQUESTED_GTT: - case R600_QUERY_VRAM_USAGE: - case R600_QUERY_GTT_USAGE: - case R600_QUERY_GPU_TEMPERATURE: - case R600_QUERY_CURRENT_GPU_SCLK: - case R600_QUERY_CURRENT_GPU_MCLK: - rquery->begin_result = 0; - return true; - case R600_QUERY_BUFFER_WAIT_TIME: - rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_BUFFER_WAIT_TIME_NS) / 1000; - return true; - case R600_QUERY_NUM_CS_FLUSHES: - rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_CS_FLUSHES); - return true; - case R600_QUERY_NUM_BYTES_MOVED: - rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_BYTES_MOVED); - return true; - case R600_QUERY_GPU_LOAD: - rquery->begin_result = r600_gpu_load_begin(rctx->screen); - return true; - case R600_QUERY_NUM_COMPILATIONS: - rquery->begin_result = p_atomic_read(&rctx->screen->num_compilations); - return true; - case R600_QUERY_NUM_SHADERS_CREATED: - rquery->begin_result = p_atomic_read(&rctx->screen->num_shaders_created); - return true; - } +static void r600_query_hw_reset_buffers(struct r600_common_context *rctx, + struct r600_query_hw *query) +{ + struct r600_query_buffer *prev = query->buffer.previous; /* Discard the old query buffers. */ while (prev) { @@ -510,22 +665,39 @@ static boolean r600_begin_query(struct pipe_context *ctx, FREE(qbuf); } - /* Obtain a new buffer if the current one can't be mapped without a stall. */ - if (r600_rings_is_buffer_referenced(rctx, rquery->buffer.buf->cs_buf, RADEON_USAGE_READWRITE) || - !rctx->ws->buffer_wait(rquery->buffer.buf->buf, 0, RADEON_USAGE_READWRITE)) { - pipe_resource_reference((struct pipe_resource**)&rquery->buffer.buf, NULL); - rquery->buffer.buf = r600_new_query_buffer(rctx, rquery->type); + if (query->flags & R600_QUERY_HW_FLAG_PREDICATE) { + /* Obtain a new buffer if the current one can't be mapped without a stall. */ + if (r600_rings_is_buffer_referenced(rctx, query->buffer.buf->cs_buf, RADEON_USAGE_READWRITE) || + !rctx->ws->buffer_wait(query->buffer.buf->buf, 0, RADEON_USAGE_READWRITE)) { + pipe_resource_reference((struct pipe_resource**)&query->buffer.buf, NULL); + query->buffer.buf = r600_new_query_buffer(rctx, query); + } else { + query->ops->prepare_buffer(rctx, query, query->buffer.buf); + } } - rquery->buffer.results_end = 0; - rquery->buffer.previous = NULL; + query->buffer.results_end = 0; + query->buffer.previous = NULL; +} - r600_emit_query_begin(rctx, rquery); +boolean r600_query_hw_begin(struct r600_common_context *rctx, + struct r600_query *rquery) +{ + struct r600_query_hw *query = (struct r600_query_hw *)rquery; - if (r600_is_timer_query(rquery->type)) - LIST_ADDTAIL(&rquery->list, &rctx->active_timer_queries); + if (query->flags & R600_QUERY_HW_FLAG_NO_START) { + assert(0); + return false; + } + + r600_query_hw_reset_buffers(rctx, query); + + r600_query_hw_emit_start(rctx, query); + + if (query->flags & R600_QUERY_HW_FLAG_TIMER) + LIST_ADDTAIL(&query->list, &rctx->active_timer_queries); else - LIST_ADDTAIL(&rquery->list, &rctx->active_nontimer_queries); + LIST_ADDTAIL(&query->list, &rctx->active_nontimer_queries); return true; } @@ -534,64 +706,24 @@ static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query) struct r600_common_context *rctx = (struct r600_common_context *)ctx; struct r600_query *rquery = (struct r600_query *)query; - /* Non-GPU queries. */ - switch (rquery->type) { - case PIPE_QUERY_TIMESTAMP_DISJOINT: - return; - case PIPE_QUERY_GPU_FINISHED: - ctx->flush(ctx, &rquery->fence, 0); - return; - case R600_QUERY_DRAW_CALLS: - rquery->end_result = rctx->num_draw_calls; - return; - case R600_QUERY_REQUESTED_VRAM: - rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_REQUESTED_VRAM_MEMORY); - return; - case R600_QUERY_REQUESTED_GTT: - rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_REQUESTED_GTT_MEMORY); - return; - case R600_QUERY_BUFFER_WAIT_TIME: - rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_BUFFER_WAIT_TIME_NS) / 1000; - return; - case R600_QUERY_NUM_CS_FLUSHES: - rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_CS_FLUSHES); - return; - case R600_QUERY_NUM_BYTES_MOVED: - rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_BYTES_MOVED); - return; - case R600_QUERY_VRAM_USAGE: - rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_VRAM_USAGE); - return; - case R600_QUERY_GTT_USAGE: - rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_GTT_USAGE); - return; - case R600_QUERY_GPU_TEMPERATURE: - rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_GPU_TEMPERATURE) / 1000; - return; - case R600_QUERY_CURRENT_GPU_SCLK: - rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_CURRENT_SCLK) * 1000000; - return; - case R600_QUERY_CURRENT_GPU_MCLK: - rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_CURRENT_MCLK) * 1000000; - return; - case R600_QUERY_GPU_LOAD: - rquery->end_result = r600_gpu_load_end(rctx->screen, rquery->begin_result); - return; - case R600_QUERY_NUM_COMPILATIONS: - rquery->end_result = p_atomic_read(&rctx->screen->num_compilations); - return; - case R600_QUERY_NUM_SHADERS_CREATED: - rquery->end_result = p_atomic_read(&rctx->screen->num_shaders_created); - return; - } + rquery->ops->end(rctx, rquery); +} - r600_emit_query_end(rctx, rquery); +void r600_query_hw_end(struct r600_common_context *rctx, + struct r600_query *rquery) +{ + struct r600_query_hw *query = (struct r600_query_hw *)rquery; + + if (query->flags & R600_QUERY_HW_FLAG_NO_START) + r600_query_hw_reset_buffers(rctx, query); - if (r600_query_needs_begin(rquery->type)) - LIST_DELINIT(&rquery->list); + r600_query_hw_emit_stop(rctx, query); + + if (!(query->flags & R600_QUERY_HW_FLAG_NO_START)) + LIST_DELINIT(&query->list); } -static unsigned r600_query_read_result(char *map, unsigned start_index, unsigned end_index, +static unsigned r600_query_read_result(void *map, unsigned start_index, unsigned end_index, bool test_status_bit) { uint32_t *current_result = (uint32_t*)map; @@ -609,80 +741,36 @@ static unsigned r600_query_read_result(char *map, unsigned start_index, unsigned return 0; } -static boolean r600_get_query_buffer_result(struct r600_common_context *ctx, - struct r600_query *query, - struct r600_query_buffer *qbuf, - boolean wait, - union pipe_query_result *result) +static void r600_query_hw_add_result(struct r600_common_context *ctx, + struct r600_query_hw *query, + void *buffer, + union pipe_query_result *result) { - struct pipe_screen *screen = ctx->b.screen; - unsigned results_base = 0; - char *map; - - /* Non-GPU queries. */ - switch (query->type) { - case PIPE_QUERY_TIMESTAMP_DISJOINT: - /* Convert from cycles per millisecond to cycles per second (Hz). */ - result->timestamp_disjoint.frequency = - (uint64_t)ctx->screen->info.r600_clock_crystal_freq * 1000; - result->timestamp_disjoint.disjoint = FALSE; - return TRUE; - case PIPE_QUERY_GPU_FINISHED: - result->b = screen->fence_finish(screen, query->fence, - wait ? PIPE_TIMEOUT_INFINITE : 0); - return result->b; - case R600_QUERY_DRAW_CALLS: - case R600_QUERY_REQUESTED_VRAM: - case R600_QUERY_REQUESTED_GTT: - case R600_QUERY_BUFFER_WAIT_TIME: - case R600_QUERY_NUM_CS_FLUSHES: - case R600_QUERY_NUM_BYTES_MOVED: - case R600_QUERY_VRAM_USAGE: - case R600_QUERY_GTT_USAGE: - case R600_QUERY_GPU_TEMPERATURE: - case R600_QUERY_CURRENT_GPU_SCLK: - case R600_QUERY_CURRENT_GPU_MCLK: - case R600_QUERY_NUM_COMPILATIONS: - case R600_QUERY_NUM_SHADERS_CREATED: - result->u64 = query->end_result - query->begin_result; - return TRUE; - case R600_QUERY_GPU_LOAD: - result->u64 = query->end_result; - return TRUE; - } - - map = r600_buffer_map_sync_with_rings(ctx, qbuf->buf, - PIPE_TRANSFER_READ | - (wait ? 0 : PIPE_TRANSFER_DONTBLOCK)); - if (!map) - return FALSE; - - /* count all results across all data blocks */ - switch (query->type) { - case PIPE_QUERY_OCCLUSION_COUNTER: - while (results_base != qbuf->results_end) { + switch (query->b.type) { + case PIPE_QUERY_OCCLUSION_COUNTER: { + unsigned results_base = 0; + while (results_base != query->result_size) { result->u64 += - r600_query_read_result(map + results_base, 0, 2, true); + r600_query_read_result(buffer + results_base, 0, 2, true); results_base += 16; } break; - case PIPE_QUERY_OCCLUSION_PREDICATE: - while (results_base != qbuf->results_end) { + } + case PIPE_QUERY_OCCLUSION_PREDICATE: { + unsigned results_base = 0; + while (results_base != query->result_size) { result->b = result->b || - r600_query_read_result(map + results_base, 0, 2, true) != 0; + r600_query_read_result(buffer + results_base, 0, 2, true) != 0; results_base += 16; } break; + } case PIPE_QUERY_TIME_ELAPSED: - while (results_base != qbuf->results_end) { - result->u64 += - r600_query_read_result(map + results_base, 0, 2, false); - results_base += query->result_size; - } + result->u64 += r600_query_read_result(buffer, 0, 2, false); break; case PIPE_QUERY_TIMESTAMP: { - uint32_t *current_result = (uint32_t*)map; + uint32_t *current_result = (uint32_t*)buffer; result->u64 = (uint64_t)current_result[0] | (uint64_t)current_result[1] << 32; break; @@ -694,84 +782,64 @@ static boolean r600_get_query_buffer_result(struct r600_common_context *ctx, * u64 PrimitiveStorageNeeded; * } * We only need NumPrimitivesWritten here. */ - while (results_base != qbuf->results_end) { - result->u64 += - r600_query_read_result(map + results_base, 2, 6, true); - results_base += query->result_size; - } + result->u64 += r600_query_read_result(buffer, 2, 6, true); break; case PIPE_QUERY_PRIMITIVES_GENERATED: /* Here we read PrimitiveStorageNeeded. */ - while (results_base != qbuf->results_end) { - result->u64 += - r600_query_read_result(map + results_base, 0, 4, true); - results_base += query->result_size; - } + result->u64 += r600_query_read_result(buffer, 0, 4, true); break; case PIPE_QUERY_SO_STATISTICS: - while (results_base != qbuf->results_end) { - result->so_statistics.num_primitives_written += - r600_query_read_result(map + results_base, 2, 6, true); - result->so_statistics.primitives_storage_needed += - r600_query_read_result(map + results_base, 0, 4, true); - results_base += query->result_size; - } + result->so_statistics.num_primitives_written += + r600_query_read_result(buffer, 2, 6, true); + result->so_statistics.primitives_storage_needed += + r600_query_read_result(buffer, 0, 4, true); break; case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - while (results_base != qbuf->results_end) { - result->b = result->b || - r600_query_read_result(map + results_base, 2, 6, true) != - r600_query_read_result(map + results_base, 0, 4, true); - results_base += query->result_size; - } + result->b = result->b || + r600_query_read_result(buffer, 2, 6, true) != + r600_query_read_result(buffer, 0, 4, true); break; case PIPE_QUERY_PIPELINE_STATISTICS: if (ctx->chip_class >= EVERGREEN) { - while (results_base != qbuf->results_end) { - result->pipeline_statistics.ps_invocations += - r600_query_read_result(map + results_base, 0, 22, false); - result->pipeline_statistics.c_primitives += - r600_query_read_result(map + results_base, 2, 24, false); - result->pipeline_statistics.c_invocations += - r600_query_read_result(map + results_base, 4, 26, false); - result->pipeline_statistics.vs_invocations += - r600_query_read_result(map + results_base, 6, 28, false); - result->pipeline_statistics.gs_invocations += - r600_query_read_result(map + results_base, 8, 30, false); - result->pipeline_statistics.gs_primitives += - r600_query_read_result(map + results_base, 10, 32, false); - result->pipeline_statistics.ia_primitives += - r600_query_read_result(map + results_base, 12, 34, false); - result->pipeline_statistics.ia_vertices += - r600_query_read_result(map + results_base, 14, 36, false); - result->pipeline_statistics.hs_invocations += - r600_query_read_result(map + results_base, 16, 38, false); - result->pipeline_statistics.ds_invocations += - r600_query_read_result(map + results_base, 18, 40, false); - result->pipeline_statistics.cs_invocations += - r600_query_read_result(map + results_base, 20, 42, false); - results_base += query->result_size; - } + result->pipeline_statistics.ps_invocations += + r600_query_read_result(buffer, 0, 22, false); + result->pipeline_statistics.c_primitives += + r600_query_read_result(buffer, 2, 24, false); + result->pipeline_statistics.c_invocations += + r600_query_read_result(buffer, 4, 26, false); + result->pipeline_statistics.vs_invocations += + r600_query_read_result(buffer, 6, 28, false); + result->pipeline_statistics.gs_invocations += + r600_query_read_result(buffer, 8, 30, false); + result->pipeline_statistics.gs_primitives += + r600_query_read_result(buffer, 10, 32, false); + result->pipeline_statistics.ia_primitives += + r600_query_read_result(buffer, 12, 34, false); + result->pipeline_statistics.ia_vertices += + r600_query_read_result(buffer, 14, 36, false); + result->pipeline_statistics.hs_invocations += + r600_query_read_result(buffer, 16, 38, false); + result->pipeline_statistics.ds_invocations += + r600_query_read_result(buffer, 18, 40, false); + result->pipeline_statistics.cs_invocations += + r600_query_read_result(buffer, 20, 42, false); } else { - while (results_base != qbuf->results_end) { - result->pipeline_statistics.ps_invocations += - r600_query_read_result(map + results_base, 0, 16, false); - result->pipeline_statistics.c_primitives += - r600_query_read_result(map + results_base, 2, 18, false); - result->pipeline_statistics.c_invocations += - r600_query_read_result(map + results_base, 4, 20, false); - result->pipeline_statistics.vs_invocations += - r600_query_read_result(map + results_base, 6, 22, false); - result->pipeline_statistics.gs_invocations += - r600_query_read_result(map + results_base, 8, 24, false); - result->pipeline_statistics.gs_primitives += - r600_query_read_result(map + results_base, 10, 26, false); - result->pipeline_statistics.ia_primitives += - r600_query_read_result(map + results_base, 12, 28, false); - result->pipeline_statistics.ia_vertices += - r600_query_read_result(map + results_base, 14, 30, false); - results_base += query->result_size; - } + result->pipeline_statistics.ps_invocations += + r600_query_read_result(buffer, 0, 16, false); + result->pipeline_statistics.c_primitives += + r600_query_read_result(buffer, 2, 18, false); + result->pipeline_statistics.c_invocations += + r600_query_read_result(buffer, 4, 20, false); + result->pipeline_statistics.vs_invocations += + r600_query_read_result(buffer, 6, 22, false); + result->pipeline_statistics.gs_invocations += + r600_query_read_result(buffer, 8, 24, false); + result->pipeline_statistics.gs_primitives += + r600_query_read_result(buffer, 10, 26, false); + result->pipeline_statistics.ia_primitives += + r600_query_read_result(buffer, 12, 28, false); + result->pipeline_statistics.ia_vertices += + r600_query_read_result(buffer, 14, 30, false); } #if 0 /* for testing */ printf("Pipeline stats: IA verts=%llu, IA prims=%llu, VS=%llu, HS=%llu, " @@ -793,23 +861,47 @@ static boolean r600_get_query_buffer_result(struct r600_common_context *ctx, default: assert(0); } - - return TRUE; } static boolean r600_get_query_result(struct pipe_context *ctx, - struct pipe_query *query, - boolean wait, union pipe_query_result *result) + struct pipe_query *query, boolean wait, + union pipe_query_result *result) { struct r600_common_context *rctx = (struct r600_common_context *)ctx; struct r600_query *rquery = (struct r600_query *)query; + + return rquery->ops->get_result(rctx, rquery, wait, result); +} + +static void r600_query_hw_clear_result(struct r600_query_hw *query, + union pipe_query_result *result) +{ + util_query_clear_result(result, query->b.type); +} + +boolean r600_query_hw_get_result(struct r600_common_context *rctx, + struct r600_query *rquery, + boolean wait, union pipe_query_result *result) +{ + struct r600_query_hw *query = (struct r600_query_hw *)rquery; struct r600_query_buffer *qbuf; - util_query_clear_result(result, rquery->type); + query->ops->clear_result(query, result); - for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous) { - if (!r600_get_query_buffer_result(rctx, rquery, qbuf, wait, result)) { + for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) { + unsigned results_base = 0; + void *map; + + map = r600_buffer_map_sync_with_rings(rctx, qbuf->buf, + PIPE_TRANSFER_READ | + (wait ? 0 : PIPE_TRANSFER_DONTBLOCK)); + if (!map) return FALSE; + + while (results_base != qbuf->results_end) { + query->ops->add_result(rctx, query, map + results_base, + result); + results_base += query->result_size; } } @@ -827,7 +919,7 @@ static void r600_render_condition(struct pipe_context *ctx, uint mode) { struct r600_common_context *rctx = (struct r600_common_context *)ctx; - struct r600_query *rquery = (struct r600_query*)query; + struct r600_query_hw *rquery = (struct r600_query_hw *)query; struct r600_query_buffer *qbuf; struct r600_atom *atom = &rctx->render_cond_atom; @@ -837,8 +929,10 @@ static void r600_render_condition(struct pipe_context *ctx, /* Compute the size of SET_PREDICATION packets. */ atom->num_dw = 0; - for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous) - atom->num_dw += (qbuf->results_end / rquery->result_size) * 5; + if (query) { + for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous) + atom->num_dw += (qbuf->results_end / rquery->result_size) * 5; + } rctx->set_atom_dirty(rctx, atom, query != NULL); } @@ -847,10 +941,10 @@ static void r600_suspend_queries(struct r600_common_context *ctx, struct list_head *query_list, unsigned *num_cs_dw_queries_suspend) { - struct r600_query *query; + struct r600_query_hw *query; LIST_FOR_EACH_ENTRY(query, query_list, list) { - r600_emit_query_end(ctx, query); + r600_query_hw_emit_stop(ctx, query); } assert(*num_cs_dw_queries_suspend == 0); } @@ -870,19 +964,19 @@ void r600_suspend_timer_queries(struct r600_common_context *ctx) static unsigned r600_queries_num_cs_dw_for_resuming(struct r600_common_context *ctx, struct list_head *query_list) { - struct r600_query *query; + struct r600_query_hw *query; unsigned num_dw = 0; LIST_FOR_EACH_ENTRY(query, query_list, list) { /* begin + end */ - num_dw += query->num_cs_dw * 2; + num_dw += query->num_cs_dw_begin + query->num_cs_dw_end; /* Workaround for the fact that * num_cs_dw_nontimer_queries_suspend is incremented for every * resumed query, which raises the bar in need_cs_space for * queries about to be resumed. */ - num_dw += query->num_cs_dw; + num_dw += query->num_cs_dw_end; } /* primitives generated query */ num_dw += ctx->streamout.enable_atom.num_dw; @@ -896,7 +990,7 @@ static void r600_resume_queries(struct r600_common_context *ctx, struct list_head *query_list, unsigned *num_cs_dw_queries_suspend) { - struct r600_query *query; + struct r600_query_hw *query; unsigned num_cs_dw = r600_queries_num_cs_dw_for_resuming(ctx, query_list); assert(*num_cs_dw_queries_suspend == 0); @@ -905,7 +999,7 @@ static void r600_resume_queries(struct r600_common_context *ctx, ctx->need_gfx_cs_space(&ctx->b, num_cs_dw, TRUE); LIST_FOR_EACH_ENTRY(query, query_list, list) { - r600_emit_query_begin(ctx, query); + r600_query_hw_emit_start(ctx, query); } } @@ -1002,6 +1096,76 @@ err: return; } +#define X(name_, query_type_, type_, result_type_) \ + { \ + .name = name_, \ + .query_type = R600_QUERY_##query_type_, \ + .type = PIPE_DRIVER_QUERY_TYPE_##type_, \ + .result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_##result_type_, \ + .group_id = ~(unsigned)0 \ + } + +static struct pipe_driver_query_info r600_driver_query_list[] = { + X("num-compilations", NUM_COMPILATIONS, UINT64, CUMULATIVE), + X("num-shaders-created", NUM_SHADERS_CREATED, UINT64, CUMULATIVE), + X("draw-calls", DRAW_CALLS, UINT64, CUMULATIVE), + X("requested-VRAM", REQUESTED_VRAM, BYTES, AVERAGE), + X("requested-GTT", REQUESTED_GTT, BYTES, AVERAGE), + X("buffer-wait-time", BUFFER_WAIT_TIME, MICROSECONDS, CUMULATIVE), + X("num-cs-flushes", NUM_CS_FLUSHES, UINT64, CUMULATIVE), + X("num-bytes-moved", NUM_BYTES_MOVED, BYTES, CUMULATIVE), + X("VRAM-usage", VRAM_USAGE, BYTES, AVERAGE), + X("GTT-usage", GTT_USAGE, BYTES, AVERAGE), + X("GPU-load", GPU_LOAD, UINT64, AVERAGE), + X("temperature", GPU_TEMPERATURE, UINT64, AVERAGE), + X("shader-clock", CURRENT_GPU_SCLK, HZ, AVERAGE), + X("memory-clock", CURRENT_GPU_MCLK, HZ, AVERAGE), +}; + +#undef X + +static unsigned r600_get_num_queries(struct r600_common_screen *rscreen) +{ + if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42) + return Elements(r600_driver_query_list); + else if (rscreen->info.drm_major == 3) + return Elements(r600_driver_query_list) - 3; + else + return Elements(r600_driver_query_list) - 4; +} + +static int r600_get_driver_query_info(struct pipe_screen *screen, + unsigned index, + struct pipe_driver_query_info *info) +{ + struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; + unsigned num_queries = r600_get_num_queries(rscreen); + + if (!info) + return num_queries; + + if (index >= num_queries) + return 0; + + *info = r600_driver_query_list[index]; + + switch (info->query_type) { + case R600_QUERY_REQUESTED_VRAM: + case R600_QUERY_VRAM_USAGE: + info->max_value.u64 = rscreen->info.vram_size; + break; + case R600_QUERY_REQUESTED_GTT: + case R600_QUERY_GTT_USAGE: + info->max_value.u64 = rscreen->info.gart_size; + break; + case R600_QUERY_GPU_TEMPERATURE: + info->max_value.u64 = 125; + break; + } + + return 1; +} + void r600_query_init(struct r600_common_context *rctx) { rctx->b.create_query = r600_create_query; @@ -1017,3 +1181,8 @@ void r600_query_init(struct r600_common_context *rctx) LIST_INITHEAD(&rctx->active_nontimer_queries); LIST_INITHEAD(&rctx->active_timer_queries); } + +void r600_init_screen_query_functions(struct r600_common_screen *rscreen) +{ + rscreen->b.get_driver_query_info = r600_get_driver_query_info; +} |