iris: Implement ARB_indirect_parameters

iris_draw_vbo is divided into two functions to remove unnecessary operations from the loop. This implementation of ARB_indirect_parameters takes into account NV_conditional_render by saving MI_PREDICATE_RESULT at the start of a draw call and restoring it at the end also the result of NV_conditional_render is taken into account when computing predicates that limit draw calls for ARB_indirect_parameters in a similar way to 1952fd8d in ANV. v2: Optimize indirect draws (suggested by Kenneth Graunke) v3: (by Kenneth Graunke) - Fix an issue where indirect draws wouldn't set patch information before updating the compiled TCS. - Move some code back to iris_draw_vbo to avoid duplicating it. - Fix minor indentation issues. Signed-off-by: Illia Iorin <[email protected]> Reviewed-by: Kenneth Graunke <[email protected]>
author: Illia Iorin <[email protected]> 2019-05-10 00:44:39 +0300
committer: Kenneth Graunke <[email protected]> 2019-05-11 23:56:52 -0700
commit: a35269cf446bfad2261dc1e7945cd779fb42208d (patch)
tree: 240c6920af187e94f6a2202990211e0b04f4fbe3 /src/gallium/drivers
parent: 21a0be4a797e39117d507b970abfa1243fef99b0 (diff)
5 files changed, 156 insertions, 23 deletions
diff --git a/src/gallium/drivers/iris/iris_defines.h b/src/gallium/drivers/iris/iris_defines.h
index d36b6452612..119a03a2893 100644
--- a/src/gallium/drivers/iris/iris_defines.h
+++ b/src/gallium/drivers/iris/iris_defines.h
@@ -52,6 +52,26 @@
 
 #define CS_GPR(n) (0x2600 + (n) * 8)
 
+/* MI_MATH registers */
+#define MI_ALU_R0        0x00
+#define MI_ALU_R1        0x01
+#define MI_ALU_R2        0x02
+#define MI_ALU_R3        0x03
+#define MI_ALU_R4        0x04
+
+/* MI_MATH operations */
+#define MI_MATH (0x1a << 23)
+
+#define _MI_ALU(op, x, y)  (((op) << 20) | ((x) << 10) | (y))
+
+#define _MI_ALU0(op)       _MI_ALU(MI_ALU_##op, 0, 0)
+#define _MI_ALU1(op, x)    _MI_ALU(MI_ALU_##op, x, 0)
+#define _MI_ALU2(op, x, y) _MI_ALU(MI_ALU_##op, x, y)
+
+#define MI_ALU0(op)        _MI_ALU0(op)
+#define MI_ALU1(op, x)     _MI_ALU1(op, MI_ALU_##x)
+#define MI_ALU2(op, x, y)  _MI_ALU2(op, MI_ALU_##x, MI_ALU_##y)
+
 /* The number of bits in our TIMESTAMP queries. */
 #define TIMESTAMP_BITS 36
 
diff --git a/src/gallium/drivers/iris/iris_draw.c b/src/gallium/drivers/iris/iris_draw.c
index 87399c430ee..17e4eab67d8 100644
--- a/src/gallium/drivers/iris/iris_draw.c
+++ b/src/gallium/drivers/iris/iris_draw.c
@@ -141,6 +141,58 @@ iris_update_draw_parameters(struct iris_context *ice,
    }
 }
 
+static void
+iris_indirect_draw_vbo(struct iris_context *ice,
+                       const struct pipe_draw_info *dinfo)
+{
+   struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
+   struct pipe_draw_info info = *dinfo;
+
+   if (info.indirect->indirect_draw_count &&
+       ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT) {
+      /* Upload MI_PREDICATE_RESULT to GPR2.*/
+      ice->vtbl.load_register_reg64(batch, CS_GPR(2), MI_PREDICATE_RESULT);
+   }
+
+   uint64_t orig_dirty = ice->state.dirty;
+
+   for (int i = 0; i < info.indirect->draw_count; i++) {
+      info.drawid = i;
+
+      iris_batch_maybe_flush(batch, 1500);
+
+      iris_update_draw_parameters(ice, &info);
+
+      ice->vtbl.upload_render_state(ice, batch, &info);
+
+      ice->state.dirty &= ~IRIS_ALL_DIRTY_FOR_RENDER;
+
+      info.indirect->offset += info.indirect->stride;
+   }
+
+   if (info.indirect->indirect_draw_count &&
+       ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT) {
+      /* Restore MI_PREDICATE_RESULT. */
+      ice->vtbl.load_register_reg64(batch, MI_PREDICATE_RESULT, CS_GPR(2));
+   }
+
+   /* Put this back for post-draw resolves, we'll clear it again after. */
+   ice->state.dirty = orig_dirty;
+}
+
+static void
+iris_simple_draw_vbo(struct iris_context *ice,
+                     const struct pipe_draw_info *draw)
+{
+   struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
+
+   iris_batch_maybe_flush(batch, 1500);
+
+   iris_update_draw_parameters(ice, draw);
+
+   ice->vtbl.upload_render_state(ice, batch, draw);
+}
+
 /**
  * The pipe->draw_vbo() driver hook.  Performs a draw on the GPU.
  */
@@ -161,10 +213,7 @@ iris_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
    if (unlikely(INTEL_DEBUG & DEBUG_REEMIT))
       ice->state.dirty |= IRIS_ALL_DIRTY_FOR_RENDER & ~IRIS_DIRTY_SO_BUFFERS;
 
-   iris_batch_maybe_flush(batch, 1500);
-
    iris_update_draw_info(ice, info);
-   iris_update_draw_parameters(ice, dinfo);
 
    if (devinfo->gen == 9)
       gen9_toggle_preemption(ice, batch, info);
@@ -184,7 +233,11 @@ iris_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
    iris_binder_reserve_3d(ice);
 
    ice->vtbl.update_surface_base_address(batch, &ice->state.binder);
-   ice->vtbl.upload_render_state(ice, batch, info);
+
+   if (info->indirect)
+      iris_indirect_draw_vbo(ice, info);
+   else
+      iris_simple_draw_vbo(ice, info);
 
    iris_postdraw_update_resolve_tracking(ice, batch);
 
diff --git a/src/gallium/drivers/iris/iris_query.c b/src/gallium/drivers/iris/iris_query.c
index eace45c64ed..6d9659080a7 100644
--- a/src/gallium/drivers/iris/iris_query.c
+++ b/src/gallium/drivers/iris/iris_query.c
@@ -74,27 +74,12 @@
 #define MI_ALU_STORE     0x180
 #define MI_ALU_STOREINV  0x580
 
-#define MI_ALU_R0        0x00
-#define MI_ALU_R1        0x01
-#define MI_ALU_R2        0x02
-#define MI_ALU_R3        0x03
-#define MI_ALU_R4        0x04
 #define MI_ALU_SRCA      0x20
 #define MI_ALU_SRCB      0x21
 #define MI_ALU_ACCU      0x31
 #define MI_ALU_ZF        0x32
 #define MI_ALU_CF        0x33
 
-#define _MI_ALU(op, x, y)  (((op) << 20) | ((x) << 10) | (y))
-
-#define _MI_ALU0(op)       _MI_ALU(MI_ALU_##op, 0, 0)
-#define _MI_ALU1(op, x)    _MI_ALU(MI_ALU_##op, x, 0)
-#define _MI_ALU2(op, x, y) _MI_ALU(MI_ALU_##op, x, y)
-
-#define MI_ALU0(op)        _MI_ALU0(op)
-#define MI_ALU1(op, x)     _MI_ALU1(op, MI_ALU_##x)
-#define MI_ALU2(op, x, y)  _MI_ALU2(op, MI_ALU_##x, MI_ALU_##y)
-
 #define emit_lri32 ice->vtbl.load_register_imm32
 #define emit_lri64 ice->vtbl.load_register_imm64
 #define emit_lrr32 ice->vtbl.load_register_reg32
diff --git a/src/gallium/drivers/iris/iris_screen.c b/src/gallium/drivers/iris/iris_screen.c
index df0c2a81404..4fd3e040a45 100644
--- a/src/gallium/drivers/iris/iris_screen.c
+++ b/src/gallium/drivers/iris/iris_screen.c
@@ -145,6 +145,8 @@ iris_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_SAMPLE_SHADING:
    case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
    case PIPE_CAP_DRAW_INDIRECT:
+   case PIPE_CAP_MULTI_DRAW_INDIRECT:
+   case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
    case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
    case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
    case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT:
diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c
index 2d02f631d00..a9af1cd0dc4 100644
--- a/src/gallium/drivers/iris/iris_state.c
+++ b/src/gallium/drivers/iris/iris_state.c
@@ -5288,6 +5288,8 @@ iris_upload_render_state(struct iris_context *ice,
                          struct iris_batch *batch,
                          const struct pipe_draw_info *draw)
 {
+   bool use_predicate = ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT;
+
    /* Always pin the binder.  If we're emitting new binding table pointers,
     * we need it.  If not, we're probably inheriting old tables via the
     * context, and need it anyway.  Since true zero-bindings cases are
@@ -5344,9 +5346,81 @@ iris_upload_render_state(struct iris_context *ice,
 #define _3DPRIM_BASE_VERTEX         0x2440
 
    if (draw->indirect) {
-      /* We don't support this MultidrawIndirect. */
-      assert(!draw->indirect->indirect_draw_count);
+      if (draw->indirect->indirect_draw_count) {
+         use_predicate = true;
+
+         struct iris_bo *draw_count_bo =
+            iris_resource_bo(draw->indirect->indirect_draw_count);
+         unsigned draw_count_offset =
+            draw->indirect->indirect_draw_count_offset;
+
+         iris_emit_pipe_control_flush(batch, PIPE_CONTROL_FLUSH_ENABLE);
+
+         if (ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT) {
+            static const uint32_t math[] = {
+               MI_MATH | (9 - 2),
+               /* Compute (draw index < draw count).
+                * We do this by subtracting and storing the carry bit.
+                */
+               MI_ALU2(LOAD, SRCA, R0),
+               MI_ALU2(LOAD, SRCB, R1),
+               MI_ALU0(SUB),
+               MI_ALU2(STORE, R3, CF),
+               /* Compute (subtracting result & MI_PREDICATE). */
+               MI_ALU2(LOAD, SRCA, R3),
+               MI_ALU2(LOAD, SRCB, R2),
+               MI_ALU0(AND),
+               MI_ALU2(STORE, R3, ACCU),
+            };
+
+            /* Upload the current draw count from the draw parameters
+             * buffer to GPR1.
+             */
+            ice->vtbl.load_register_mem32(batch, CS_GPR(1), draw_count_bo,
+                                          draw_count_offset);
+            /* Zero the top 32-bits of GPR1. */
+            ice->vtbl.load_register_imm32(batch, CS_GPR(1) + 4, 0);
+            /* Upload the id of the current primitive to GPR0. */
+            ice->vtbl.load_register_imm64(batch, CS_GPR(0), draw->drawid);
+
+            iris_batch_emit(batch, math, sizeof(math));
+
+            /* Store result of MI_MATH computations to MI_PREDICATE_RESULT. */
+            ice->vtbl.load_register_reg64(batch,
+                                          MI_PREDICATE_RESULT, CS_GPR(3));
+         } else {
+            uint32_t mi_predicate;
 
+            /* Upload the id of the current primitive to MI_PREDICATE_SRC1. */
+            ice->vtbl.load_register_imm64(batch, MI_PREDICATE_SRC1,
+                                          draw->drawid);
+            /* Upload the current draw count from the draw parameters buffer
+             * to MI_PREDICATE_SRC0.
+             */
+            ice->vtbl.load_register_mem32(batch, MI_PREDICATE_SRC0,
+                                          draw_count_bo, draw_count_offset);
+            /* Zero the top 32-bits of MI_PREDICATE_SRC0 */
+            ice->vtbl.load_register_imm32(batch, MI_PREDICATE_SRC0 + 4, 0);
+
+            if (draw->drawid == 0) {
+               mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOADINV |
+                              MI_PREDICATE_COMBINEOP_SET |
+                              MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
+            } else {
+               /* While draw_index < draw_count the predicate's result will be
+                *  (draw_index == draw_count) ^ TRUE = TRUE
+                * When draw_index == draw_count the result is
+                *  (TRUE) ^ TRUE = FALSE
+                * After this all results will be:
+                *  (FALSE) ^ FALSE = FALSE
+                */
+               mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOAD |
+                              MI_PREDICATE_COMBINEOP_XOR |
+                              MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
+            }
+            iris_batch_emit(batch, &mi_predicate, sizeof(uint32_t));
+         }
+      }
       struct iris_bo *bo = iris_resource_bo(draw->indirect->buffer);
       assert(bo);
 
@@ -5406,8 +5480,7 @@ iris_upload_render_state(struct iris_context *ice,
 
    iris_emit_cmd(batch, GENX(3DPRIMITIVE), prim) {
       prim.VertexAccessType = draw->index_size > 0 ? RANDOM : SEQUENTIAL;
-      prim.PredicateEnable =
-         ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT;
+      prim.PredicateEnable = use_predicate;
 
       if (draw->indirect || draw->count_from_stream_output) {
          prim.IndirectParameterEnable = true;
author	Illia Iorin <[email protected]>	2019-05-10 00:44:39 +0300
committer	Kenneth Graunke <[email protected]>	2019-05-11 23:56:52 -0700
commit	a35269cf446bfad2261dc1e7945cd779fb42208d (patch)
tree	240c6920af187e94f6a2202990211e0b04f4fbe3 /src/gallium/drivers
parent	21a0be4a797e39117d507b970abfa1243fef99b0 (diff)