From a35269cf446bfad2261dc1e7945cd779fb42208d Mon Sep 17 00:00:00 2001 From: Illia Iorin Date: Fri, 10 May 2019 00:44:39 +0300 Subject: iris: Implement ARB_indirect_parameters iris_draw_vbo is divided into two functions to remove unnecessary operations from the loop. This implementation of ARB_indirect_parameters takes into account NV_conditional_render by saving MI_PREDICATE_RESULT at the start of a draw call and restoring it at the end also the result of NV_conditional_render is taken into account when computing predicates that limit draw calls for ARB_indirect_parameters in a similar way to 1952fd8d in ANV. v2: Optimize indirect draws (suggested by Kenneth Graunke) v3: (by Kenneth Graunke) - Fix an issue where indirect draws wouldn't set patch information before updating the compiled TCS. - Move some code back to iris_draw_vbo to avoid duplicating it. - Fix minor indentation issues. Signed-off-by: Illia Iorin Reviewed-by: Kenneth Graunke --- src/gallium/drivers/iris/iris_defines.h | 20 ++++++++ src/gallium/drivers/iris/iris_draw.c | 61 +++++++++++++++++++++++-- src/gallium/drivers/iris/iris_query.c | 15 ------ src/gallium/drivers/iris/iris_screen.c | 2 + src/gallium/drivers/iris/iris_state.c | 81 +++++++++++++++++++++++++++++++-- 5 files changed, 156 insertions(+), 23 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/iris/iris_defines.h b/src/gallium/drivers/iris/iris_defines.h index d36b6452612..119a03a2893 100644 --- a/src/gallium/drivers/iris/iris_defines.h +++ b/src/gallium/drivers/iris/iris_defines.h @@ -52,6 +52,26 @@ #define CS_GPR(n) (0x2600 + (n) * 8) +/* MI_MATH registers */ +#define MI_ALU_R0 0x00 +#define MI_ALU_R1 0x01 +#define MI_ALU_R2 0x02 +#define MI_ALU_R3 0x03 +#define MI_ALU_R4 0x04 + +/* MI_MATH operations */ +#define MI_MATH (0x1a << 23) + +#define _MI_ALU(op, x, y) (((op) << 20) | ((x) << 10) | (y)) + +#define _MI_ALU0(op) _MI_ALU(MI_ALU_##op, 0, 0) +#define _MI_ALU1(op, x) _MI_ALU(MI_ALU_##op, x, 0) +#define _MI_ALU2(op, x, y) _MI_ALU(MI_ALU_##op, x, y) + +#define MI_ALU0(op) _MI_ALU0(op) +#define MI_ALU1(op, x) _MI_ALU1(op, MI_ALU_##x) +#define MI_ALU2(op, x, y) _MI_ALU2(op, MI_ALU_##x, MI_ALU_##y) + /* The number of bits in our TIMESTAMP queries. */ #define TIMESTAMP_BITS 36 diff --git a/src/gallium/drivers/iris/iris_draw.c b/src/gallium/drivers/iris/iris_draw.c index 87399c430ee..17e4eab67d8 100644 --- a/src/gallium/drivers/iris/iris_draw.c +++ b/src/gallium/drivers/iris/iris_draw.c @@ -141,6 +141,58 @@ iris_update_draw_parameters(struct iris_context *ice, } } +static void +iris_indirect_draw_vbo(struct iris_context *ice, + const struct pipe_draw_info *dinfo) +{ + struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER]; + struct pipe_draw_info info = *dinfo; + + if (info.indirect->indirect_draw_count && + ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT) { + /* Upload MI_PREDICATE_RESULT to GPR2.*/ + ice->vtbl.load_register_reg64(batch, CS_GPR(2), MI_PREDICATE_RESULT); + } + + uint64_t orig_dirty = ice->state.dirty; + + for (int i = 0; i < info.indirect->draw_count; i++) { + info.drawid = i; + + iris_batch_maybe_flush(batch, 1500); + + iris_update_draw_parameters(ice, &info); + + ice->vtbl.upload_render_state(ice, batch, &info); + + ice->state.dirty &= ~IRIS_ALL_DIRTY_FOR_RENDER; + + info.indirect->offset += info.indirect->stride; + } + + if (info.indirect->indirect_draw_count && + ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT) { + /* Restore MI_PREDICATE_RESULT. */ + ice->vtbl.load_register_reg64(batch, MI_PREDICATE_RESULT, CS_GPR(2)); + } + + /* Put this back for post-draw resolves, we'll clear it again after. */ + ice->state.dirty = orig_dirty; +} + +static void +iris_simple_draw_vbo(struct iris_context *ice, + const struct pipe_draw_info *draw) +{ + struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER]; + + iris_batch_maybe_flush(batch, 1500); + + iris_update_draw_parameters(ice, draw); + + ice->vtbl.upload_render_state(ice, batch, draw); +} + /** * The pipe->draw_vbo() driver hook. Performs a draw on the GPU. */ @@ -161,10 +213,7 @@ iris_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) if (unlikely(INTEL_DEBUG & DEBUG_REEMIT)) ice->state.dirty |= IRIS_ALL_DIRTY_FOR_RENDER & ~IRIS_DIRTY_SO_BUFFERS; - iris_batch_maybe_flush(batch, 1500); - iris_update_draw_info(ice, info); - iris_update_draw_parameters(ice, dinfo); if (devinfo->gen == 9) gen9_toggle_preemption(ice, batch, info); @@ -184,7 +233,11 @@ iris_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) iris_binder_reserve_3d(ice); ice->vtbl.update_surface_base_address(batch, &ice->state.binder); - ice->vtbl.upload_render_state(ice, batch, info); + + if (info->indirect) + iris_indirect_draw_vbo(ice, info); + else + iris_simple_draw_vbo(ice, info); iris_postdraw_update_resolve_tracking(ice, batch); diff --git a/src/gallium/drivers/iris/iris_query.c b/src/gallium/drivers/iris/iris_query.c index eace45c64ed..6d9659080a7 100644 --- a/src/gallium/drivers/iris/iris_query.c +++ b/src/gallium/drivers/iris/iris_query.c @@ -74,27 +74,12 @@ #define MI_ALU_STORE 0x180 #define MI_ALU_STOREINV 0x580 -#define MI_ALU_R0 0x00 -#define MI_ALU_R1 0x01 -#define MI_ALU_R2 0x02 -#define MI_ALU_R3 0x03 -#define MI_ALU_R4 0x04 #define MI_ALU_SRCA 0x20 #define MI_ALU_SRCB 0x21 #define MI_ALU_ACCU 0x31 #define MI_ALU_ZF 0x32 #define MI_ALU_CF 0x33 -#define _MI_ALU(op, x, y) (((op) << 20) | ((x) << 10) | (y)) - -#define _MI_ALU0(op) _MI_ALU(MI_ALU_##op, 0, 0) -#define _MI_ALU1(op, x) _MI_ALU(MI_ALU_##op, x, 0) -#define _MI_ALU2(op, x, y) _MI_ALU(MI_ALU_##op, x, y) - -#define MI_ALU0(op) _MI_ALU0(op) -#define MI_ALU1(op, x) _MI_ALU1(op, MI_ALU_##x) -#define MI_ALU2(op, x, y) _MI_ALU2(op, MI_ALU_##x, MI_ALU_##y) - #define emit_lri32 ice->vtbl.load_register_imm32 #define emit_lri64 ice->vtbl.load_register_imm64 #define emit_lrr32 ice->vtbl.load_register_reg32 diff --git a/src/gallium/drivers/iris/iris_screen.c b/src/gallium/drivers/iris/iris_screen.c index df0c2a81404..4fd3e040a45 100644 --- a/src/gallium/drivers/iris/iris_screen.c +++ b/src/gallium/drivers/iris/iris_screen.c @@ -145,6 +145,8 @@ iris_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_SAMPLE_SHADING: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: case PIPE_CAP_DRAW_INDIRECT: + case PIPE_CAP_MULTI_DRAW_INDIRECT: + case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS: case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES: case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT: case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT: diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 2d02f631d00..a9af1cd0dc4 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -5288,6 +5288,8 @@ iris_upload_render_state(struct iris_context *ice, struct iris_batch *batch, const struct pipe_draw_info *draw) { + bool use_predicate = ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT; + /* Always pin the binder. If we're emitting new binding table pointers, * we need it. If not, we're probably inheriting old tables via the * context, and need it anyway. Since true zero-bindings cases are @@ -5344,9 +5346,81 @@ iris_upload_render_state(struct iris_context *ice, #define _3DPRIM_BASE_VERTEX 0x2440 if (draw->indirect) { - /* We don't support this MultidrawIndirect. */ - assert(!draw->indirect->indirect_draw_count); + if (draw->indirect->indirect_draw_count) { + use_predicate = true; + + struct iris_bo *draw_count_bo = + iris_resource_bo(draw->indirect->indirect_draw_count); + unsigned draw_count_offset = + draw->indirect->indirect_draw_count_offset; + + iris_emit_pipe_control_flush(batch, PIPE_CONTROL_FLUSH_ENABLE); + + if (ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT) { + static const uint32_t math[] = { + MI_MATH | (9 - 2), + /* Compute (draw index < draw count). + * We do this by subtracting and storing the carry bit. + */ + MI_ALU2(LOAD, SRCA, R0), + MI_ALU2(LOAD, SRCB, R1), + MI_ALU0(SUB), + MI_ALU2(STORE, R3, CF), + /* Compute (subtracting result & MI_PREDICATE). */ + MI_ALU2(LOAD, SRCA, R3), + MI_ALU2(LOAD, SRCB, R2), + MI_ALU0(AND), + MI_ALU2(STORE, R3, ACCU), + }; + + /* Upload the current draw count from the draw parameters + * buffer to GPR1. + */ + ice->vtbl.load_register_mem32(batch, CS_GPR(1), draw_count_bo, + draw_count_offset); + /* Zero the top 32-bits of GPR1. */ + ice->vtbl.load_register_imm32(batch, CS_GPR(1) + 4, 0); + /* Upload the id of the current primitive to GPR0. */ + ice->vtbl.load_register_imm64(batch, CS_GPR(0), draw->drawid); + + iris_batch_emit(batch, math, sizeof(math)); + + /* Store result of MI_MATH computations to MI_PREDICATE_RESULT. */ + ice->vtbl.load_register_reg64(batch, + MI_PREDICATE_RESULT, CS_GPR(3)); + } else { + uint32_t mi_predicate; + /* Upload the id of the current primitive to MI_PREDICATE_SRC1. */ + ice->vtbl.load_register_imm64(batch, MI_PREDICATE_SRC1, + draw->drawid); + /* Upload the current draw count from the draw parameters buffer + * to MI_PREDICATE_SRC0. + */ + ice->vtbl.load_register_mem32(batch, MI_PREDICATE_SRC0, + draw_count_bo, draw_count_offset); + /* Zero the top 32-bits of MI_PREDICATE_SRC0 */ + ice->vtbl.load_register_imm32(batch, MI_PREDICATE_SRC0 + 4, 0); + + if (draw->drawid == 0) { + mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOADINV | + MI_PREDICATE_COMBINEOP_SET | + MI_PREDICATE_COMPAREOP_SRCS_EQUAL; + } else { + /* While draw_index < draw_count the predicate's result will be + * (draw_index == draw_count) ^ TRUE = TRUE + * When draw_index == draw_count the result is + * (TRUE) ^ TRUE = FALSE + * After this all results will be: + * (FALSE) ^ FALSE = FALSE + */ + mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOAD | + MI_PREDICATE_COMBINEOP_XOR | + MI_PREDICATE_COMPAREOP_SRCS_EQUAL; + } + iris_batch_emit(batch, &mi_predicate, sizeof(uint32_t)); + } + } struct iris_bo *bo = iris_resource_bo(draw->indirect->buffer); assert(bo); @@ -5406,8 +5480,7 @@ iris_upload_render_state(struct iris_context *ice, iris_emit_cmd(batch, GENX(3DPRIMITIVE), prim) { prim.VertexAccessType = draw->index_size > 0 ? RANDOM : SEQUENTIAL; - prim.PredicateEnable = - ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT; + prim.PredicateEnable = use_predicate; if (draw->indirect || draw->count_from_stream_output) { prim.IndirectParameterEnable = true; -- cgit v1.2.3