summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
authorIllia Iorin <[email protected]>2019-05-10 00:44:39 +0300
committerKenneth Graunke <[email protected]>2019-05-11 23:56:52 -0700
commita35269cf446bfad2261dc1e7945cd779fb42208d (patch)
tree240c6920af187e94f6a2202990211e0b04f4fbe3 /src/gallium/drivers
parent21a0be4a797e39117d507b970abfa1243fef99b0 (diff)
iris: Implement ARB_indirect_parameters
iris_draw_vbo is divided into two functions to remove unnecessary operations from the loop. This implementation of ARB_indirect_parameters takes into account NV_conditional_render by saving MI_PREDICATE_RESULT at the start of a draw call and restoring it at the end also the result of NV_conditional_render is taken into account when computing predicates that limit draw calls for ARB_indirect_parameters in a similar way to 1952fd8d in ANV. v2: Optimize indirect draws (suggested by Kenneth Graunke) v3: (by Kenneth Graunke) - Fix an issue where indirect draws wouldn't set patch information before updating the compiled TCS. - Move some code back to iris_draw_vbo to avoid duplicating it. - Fix minor indentation issues. Signed-off-by: Illia Iorin <[email protected]> Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/iris/iris_defines.h20
-rw-r--r--src/gallium/drivers/iris/iris_draw.c61
-rw-r--r--src/gallium/drivers/iris/iris_query.c15
-rw-r--r--src/gallium/drivers/iris/iris_screen.c2
-rw-r--r--src/gallium/drivers/iris/iris_state.c81
5 files changed, 156 insertions, 23 deletions
diff --git a/src/gallium/drivers/iris/iris_defines.h b/src/gallium/drivers/iris/iris_defines.h
index d36b6452612..119a03a2893 100644
--- a/src/gallium/drivers/iris/iris_defines.h
+++ b/src/gallium/drivers/iris/iris_defines.h
@@ -52,6 +52,26 @@
#define CS_GPR(n) (0x2600 + (n) * 8)
+/* MI_MATH registers */
+#define MI_ALU_R0 0x00
+#define MI_ALU_R1 0x01
+#define MI_ALU_R2 0x02
+#define MI_ALU_R3 0x03
+#define MI_ALU_R4 0x04
+
+/* MI_MATH operations */
+#define MI_MATH (0x1a << 23)
+
+#define _MI_ALU(op, x, y) (((op) << 20) | ((x) << 10) | (y))
+
+#define _MI_ALU0(op) _MI_ALU(MI_ALU_##op, 0, 0)
+#define _MI_ALU1(op, x) _MI_ALU(MI_ALU_##op, x, 0)
+#define _MI_ALU2(op, x, y) _MI_ALU(MI_ALU_##op, x, y)
+
+#define MI_ALU0(op) _MI_ALU0(op)
+#define MI_ALU1(op, x) _MI_ALU1(op, MI_ALU_##x)
+#define MI_ALU2(op, x, y) _MI_ALU2(op, MI_ALU_##x, MI_ALU_##y)
+
/* The number of bits in our TIMESTAMP queries. */
#define TIMESTAMP_BITS 36
diff --git a/src/gallium/drivers/iris/iris_draw.c b/src/gallium/drivers/iris/iris_draw.c
index 87399c430ee..17e4eab67d8 100644
--- a/src/gallium/drivers/iris/iris_draw.c
+++ b/src/gallium/drivers/iris/iris_draw.c
@@ -141,6 +141,58 @@ iris_update_draw_parameters(struct iris_context *ice,
}
}
+static void
+iris_indirect_draw_vbo(struct iris_context *ice,
+ const struct pipe_draw_info *dinfo)
+{
+ struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
+ struct pipe_draw_info info = *dinfo;
+
+ if (info.indirect->indirect_draw_count &&
+ ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT) {
+ /* Upload MI_PREDICATE_RESULT to GPR2.*/
+ ice->vtbl.load_register_reg64(batch, CS_GPR(2), MI_PREDICATE_RESULT);
+ }
+
+ uint64_t orig_dirty = ice->state.dirty;
+
+ for (int i = 0; i < info.indirect->draw_count; i++) {
+ info.drawid = i;
+
+ iris_batch_maybe_flush(batch, 1500);
+
+ iris_update_draw_parameters(ice, &info);
+
+ ice->vtbl.upload_render_state(ice, batch, &info);
+
+ ice->state.dirty &= ~IRIS_ALL_DIRTY_FOR_RENDER;
+
+ info.indirect->offset += info.indirect->stride;
+ }
+
+ if (info.indirect->indirect_draw_count &&
+ ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT) {
+ /* Restore MI_PREDICATE_RESULT. */
+ ice->vtbl.load_register_reg64(batch, MI_PREDICATE_RESULT, CS_GPR(2));
+ }
+
+ /* Put this back for post-draw resolves, we'll clear it again after. */
+ ice->state.dirty = orig_dirty;
+}
+
+static void
+iris_simple_draw_vbo(struct iris_context *ice,
+ const struct pipe_draw_info *draw)
+{
+ struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
+
+ iris_batch_maybe_flush(batch, 1500);
+
+ iris_update_draw_parameters(ice, draw);
+
+ ice->vtbl.upload_render_state(ice, batch, draw);
+}
+
/**
* The pipe->draw_vbo() driver hook. Performs a draw on the GPU.
*/
@@ -161,10 +213,7 @@ iris_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
if (unlikely(INTEL_DEBUG & DEBUG_REEMIT))
ice->state.dirty |= IRIS_ALL_DIRTY_FOR_RENDER & ~IRIS_DIRTY_SO_BUFFERS;
- iris_batch_maybe_flush(batch, 1500);
-
iris_update_draw_info(ice, info);
- iris_update_draw_parameters(ice, dinfo);
if (devinfo->gen == 9)
gen9_toggle_preemption(ice, batch, info);
@@ -184,7 +233,11 @@ iris_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
iris_binder_reserve_3d(ice);
ice->vtbl.update_surface_base_address(batch, &ice->state.binder);
- ice->vtbl.upload_render_state(ice, batch, info);
+
+ if (info->indirect)
+ iris_indirect_draw_vbo(ice, info);
+ else
+ iris_simple_draw_vbo(ice, info);
iris_postdraw_update_resolve_tracking(ice, batch);
diff --git a/src/gallium/drivers/iris/iris_query.c b/src/gallium/drivers/iris/iris_query.c
index eace45c64ed..6d9659080a7 100644
--- a/src/gallium/drivers/iris/iris_query.c
+++ b/src/gallium/drivers/iris/iris_query.c
@@ -74,27 +74,12 @@
#define MI_ALU_STORE 0x180
#define MI_ALU_STOREINV 0x580
-#define MI_ALU_R0 0x00
-#define MI_ALU_R1 0x01
-#define MI_ALU_R2 0x02
-#define MI_ALU_R3 0x03
-#define MI_ALU_R4 0x04
#define MI_ALU_SRCA 0x20
#define MI_ALU_SRCB 0x21
#define MI_ALU_ACCU 0x31
#define MI_ALU_ZF 0x32
#define MI_ALU_CF 0x33
-#define _MI_ALU(op, x, y) (((op) << 20) | ((x) << 10) | (y))
-
-#define _MI_ALU0(op) _MI_ALU(MI_ALU_##op, 0, 0)
-#define _MI_ALU1(op, x) _MI_ALU(MI_ALU_##op, x, 0)
-#define _MI_ALU2(op, x, y) _MI_ALU(MI_ALU_##op, x, y)
-
-#define MI_ALU0(op) _MI_ALU0(op)
-#define MI_ALU1(op, x) _MI_ALU1(op, MI_ALU_##x)
-#define MI_ALU2(op, x, y) _MI_ALU2(op, MI_ALU_##x, MI_ALU_##y)
-
#define emit_lri32 ice->vtbl.load_register_imm32
#define emit_lri64 ice->vtbl.load_register_imm64
#define emit_lrr32 ice->vtbl.load_register_reg32
diff --git a/src/gallium/drivers/iris/iris_screen.c b/src/gallium/drivers/iris/iris_screen.c
index df0c2a81404..4fd3e040a45 100644
--- a/src/gallium/drivers/iris/iris_screen.c
+++ b/src/gallium/drivers/iris/iris_screen.c
@@ -145,6 +145,8 @@ iris_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_SAMPLE_SHADING:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_DRAW_INDIRECT:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT:
diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c
index 2d02f631d00..a9af1cd0dc4 100644
--- a/src/gallium/drivers/iris/iris_state.c
+++ b/src/gallium/drivers/iris/iris_state.c
@@ -5288,6 +5288,8 @@ iris_upload_render_state(struct iris_context *ice,
struct iris_batch *batch,
const struct pipe_draw_info *draw)
{
+ bool use_predicate = ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT;
+
/* Always pin the binder. If we're emitting new binding table pointers,
* we need it. If not, we're probably inheriting old tables via the
* context, and need it anyway. Since true zero-bindings cases are
@@ -5344,9 +5346,81 @@ iris_upload_render_state(struct iris_context *ice,
#define _3DPRIM_BASE_VERTEX 0x2440
if (draw->indirect) {
- /* We don't support this MultidrawIndirect. */
- assert(!draw->indirect->indirect_draw_count);
+ if (draw->indirect->indirect_draw_count) {
+ use_predicate = true;
+
+ struct iris_bo *draw_count_bo =
+ iris_resource_bo(draw->indirect->indirect_draw_count);
+ unsigned draw_count_offset =
+ draw->indirect->indirect_draw_count_offset;
+
+ iris_emit_pipe_control_flush(batch, PIPE_CONTROL_FLUSH_ENABLE);
+
+ if (ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT) {
+ static const uint32_t math[] = {
+ MI_MATH | (9 - 2),
+ /* Compute (draw index < draw count).
+ * We do this by subtracting and storing the carry bit.
+ */
+ MI_ALU2(LOAD, SRCA, R0),
+ MI_ALU2(LOAD, SRCB, R1),
+ MI_ALU0(SUB),
+ MI_ALU2(STORE, R3, CF),
+ /* Compute (subtracting result & MI_PREDICATE). */
+ MI_ALU2(LOAD, SRCA, R3),
+ MI_ALU2(LOAD, SRCB, R2),
+ MI_ALU0(AND),
+ MI_ALU2(STORE, R3, ACCU),
+ };
+
+ /* Upload the current draw count from the draw parameters
+ * buffer to GPR1.
+ */
+ ice->vtbl.load_register_mem32(batch, CS_GPR(1), draw_count_bo,
+ draw_count_offset);
+ /* Zero the top 32-bits of GPR1. */
+ ice->vtbl.load_register_imm32(batch, CS_GPR(1) + 4, 0);
+ /* Upload the id of the current primitive to GPR0. */
+ ice->vtbl.load_register_imm64(batch, CS_GPR(0), draw->drawid);
+
+ iris_batch_emit(batch, math, sizeof(math));
+
+ /* Store result of MI_MATH computations to MI_PREDICATE_RESULT. */
+ ice->vtbl.load_register_reg64(batch,
+ MI_PREDICATE_RESULT, CS_GPR(3));
+ } else {
+ uint32_t mi_predicate;
+ /* Upload the id of the current primitive to MI_PREDICATE_SRC1. */
+ ice->vtbl.load_register_imm64(batch, MI_PREDICATE_SRC1,
+ draw->drawid);
+ /* Upload the current draw count from the draw parameters buffer
+ * to MI_PREDICATE_SRC0.
+ */
+ ice->vtbl.load_register_mem32(batch, MI_PREDICATE_SRC0,
+ draw_count_bo, draw_count_offset);
+ /* Zero the top 32-bits of MI_PREDICATE_SRC0 */
+ ice->vtbl.load_register_imm32(batch, MI_PREDICATE_SRC0 + 4, 0);
+
+ if (draw->drawid == 0) {
+ mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOADINV |
+ MI_PREDICATE_COMBINEOP_SET |
+ MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
+ } else {
+ /* While draw_index < draw_count the predicate's result will be
+ * (draw_index == draw_count) ^ TRUE = TRUE
+ * When draw_index == draw_count the result is
+ * (TRUE) ^ TRUE = FALSE
+ * After this all results will be:
+ * (FALSE) ^ FALSE = FALSE
+ */
+ mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOAD |
+ MI_PREDICATE_COMBINEOP_XOR |
+ MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
+ }
+ iris_batch_emit(batch, &mi_predicate, sizeof(uint32_t));
+ }
+ }
struct iris_bo *bo = iris_resource_bo(draw->indirect->buffer);
assert(bo);
@@ -5406,8 +5480,7 @@ iris_upload_render_state(struct iris_context *ice,
iris_emit_cmd(batch, GENX(3DPRIMITIVE), prim) {
prim.VertexAccessType = draw->index_size > 0 ? RANDOM : SEQUENTIAL;
- prim.PredicateEnable =
- ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT;
+ prim.PredicateEnable = use_predicate;
if (draw->indirect || draw->count_from_stream_output) {
prim.IndirectParameterEnable = true;