6 files changed, 106 insertions, 91 deletions
diff --git a/src/intel/common/gen_mi_builder.h b/src/intel/common/gen_mi_builder.h
index 3dd8fcb739c..22c0aee3343 100644
--- a/src/intel/common/gen_mi_builder.h
+++ b/src/intel/common/gen_mi_builder.h
@@ -450,6 +450,51 @@ gen_mi_store(struct gen_mi_builder *b,
    gen_mi_value_unref(b, dst);
 }
 
+static inline void
+gen_mi_memset(struct gen_mi_builder *b, __gen_address_type dst,
+              uint32_t value, uint32_t size)
+{
+#if GEN_GEN >= 8 || GEN_IS_HASWELL
+   assert(b->num_math_dwords == 0);
+#endif
+
+   /* This memset operates in units of dwords. */
+   assert(size % 4 == 0);
+
+   for (uint32_t i = 0; i < size; i += 4) {
+      gen_mi_store(b, gen_mi_mem32(__gen_address_offset(dst, i)),
+                      gen_mi_imm(value));
+   }
+}
+
+/* NOTE: On IVB, this function stomps GEN7_3DPRIM_BASE_VERTEX */
+static inline void
+gen_mi_memcpy(struct gen_mi_builder *b, __gen_address_type dst,
+              __gen_address_type src, uint32_t size)
+{
+#if GEN_GEN >= 8 || GEN_IS_HASWELL
+   assert(b->num_math_dwords == 0);
+#endif
+
+   /* This memcpy operates in units of dwords. */
+   assert(size % 4 == 0);
+
+   for (uint32_t i = 0; i < size; i += 4) {
+      struct gen_mi_value dst_val = gen_mi_mem32(__gen_address_offset(dst, i));
+      struct gen_mi_value src_val = gen_mi_mem32(__gen_address_offset(src, i));
+#if GEN_GEN >= 8 || GEN_IS_HASWELL
+      gen_mi_store(b, dst_val, src_val);
+#else
+      /* IVB does not have a general purpose register for command streamer
+       * commands. Therefore, we use an alternate temporary register.
+       */
+      struct gen_mi_value tmp_reg = gen_mi_reg32(0x2440); /* GEN7_3DPRIM_BASE_VERTEX */
+      gen_mi_store(b, tmp_reg, src_val);
+      gen_mi_store(b, dst_val, tmp_reg);
+#endif
+   }
+}
+
 /*
  * MI_MATH Section.  Only available on Haswell+
  */
diff --git a/src/intel/common/tests/gen_mi_builder_test.cpp b/src/intel/common/tests/gen_mi_builder_test.cpp
index e68d35c9fb9..d192e063d00 100644
--- a/src/intel/common/tests/gen_mi_builder_test.cpp
+++ b/src/intel/common/tests/gen_mi_builder_test.cpp
@@ -422,6 +422,36 @@ TEST_F(gen_mi_builder_test, mem_reg)
    EXPECT_EQ(*(uint64_t *)(output + 24), (uint64_t)(uint32_t)value);
 }
 
+TEST_F(gen_mi_builder_test, memset)
+{
+   const unsigned memset_size = 256;
+
+   gen_mi_memset(&b, out_addr(0), 0xdeadbeef, memset_size);
+
+   submit_batch();
+
+   uint32_t *out_u32 = (uint32_t *)output;
+   for (unsigned i = 0; i <  memset_size / sizeof(*out_u32); i++)
+      EXPECT_EQ(out_u32[i], 0xdeadbeef);
+}
+
+TEST_F(gen_mi_builder_test, memcpy)
+{
+   const unsigned memcpy_size = 256;
+
+   uint8_t *in_u8 = (uint8_t *)input;
+   for (unsigned i = 0; i < memcpy_size; i++)
+      in_u8[i] = i;
+
+   gen_mi_memcpy(&b, out_addr(0), in_addr(0), 256);
+
+   submit_batch();
+
+   uint8_t *out_u8 = (uint8_t *)output;
+   for (unsigned i = 0; i < memcpy_size; i++)
+      EXPECT_EQ(out_u8[i], i);
+}
+
 /* Start of MI_MATH section */
 #if GEN_GEN >= 8 || GEN_IS_HASWELL
 
diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h
index 8fd32cabf1e..a5435e566a3 100644
--- a/src/intel/vulkan/anv_genX.h
+++ b/src/intel/vulkan/anv_genX.h
@@ -78,13 +78,5 @@ void genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
                                 struct anv_address dst, struct anv_address src,
                                 uint32_t size);
 
-void genX(cmd_buffer_mi_memcpy)(struct anv_cmd_buffer *cmd_buffer,
-                                struct anv_address dst, struct anv_address src,
-                                uint32_t size);
-
-void genX(cmd_buffer_mi_memset)(struct anv_cmd_buffer *cmd_buffer,
-                                struct anv_address dst, uint32_t value,
-                                uint32_t size);
-
 void genX(blorp_exec)(struct blorp_batch *batch,
                       const struct blorp_params *params);
diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c
index ec72010cb86..7771afe663e 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -822,12 +822,35 @@ genX(copy_fast_clear_dwords)(struct anv_cmd_buffer *cmd_buffer,
       anv_image_get_clear_color_addr(cmd_buffer->device, image, aspect);
    unsigned copy_size = cmd_buffer->device->isl_dev.ss.clear_value_size;
 
+#if GEN_GEN == 7
+   /* On gen7, the combination of commands used here(MI_LOAD_REGISTER_MEM
+    * and MI_STORE_REGISTER_MEM) can cause GPU hangs if any rendering is
+    * in-flight when they are issued even if the memory touched is not
+    * currently active for rendering.  The weird bit is that it is not the
+    * MI_LOAD/STORE_REGISTER_MEM commands which hang but rather the in-flight
+    * rendering hangs such that the next stalling command after the
+    * MI_LOAD/STORE_REGISTER_MEM commands will catch the hang.
+    *
+    * It is unclear exactly why this hang occurs.  Both MI commands come with
+    * warnings about the 3D pipeline but that doesn't seem to fully explain
+    * it.  My (Jason's) best theory is that it has something to do with the
+    * fact that we're using a GPU state register as our temporary and that
+    * something with reading/writing it is causing problems.
+    *
+    * In order to work around this issue, we emit a PIPE_CONTROL with the
+    * command streamer stall bit set.
+    */
+   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT;
+   genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+#endif
+
+   struct gen_mi_builder b;
+   gen_mi_builder_init(&b, &cmd_buffer->batch);
+
    if (copy_from_surface_state) {
-      genX(cmd_buffer_mi_memcpy)(cmd_buffer, entry_addr,
-                                 ss_clear_addr, copy_size);
+      gen_mi_memcpy(&b, entry_addr, ss_clear_addr, copy_size);
    } else {
-      genX(cmd_buffer_mi_memcpy)(cmd_buffer, ss_clear_addr,
-                                 entry_addr, copy_size);
+      gen_mi_memcpy(&b, ss_clear_addr, entry_addr, copy_size);
 
       /* Updating a surface state object may require that the state cache be
        * invalidated. From the SKL PRM, Shared Functions -> State -> State
diff --git a/src/intel/vulkan/genX_gpu_memcpy.c b/src/intel/vulkan/genX_gpu_memcpy.c
index 7ef14dd14ef..49230c3da0c 100644
--- a/src/intel/vulkan/genX_gpu_memcpy.c
+++ b/src/intel/vulkan/genX_gpu_memcpy.c
@@ -52,80 +52,6 @@ gcd_pow2_u64(uint64_t a, uint64_t b)
 }
 
 void
-genX(cmd_buffer_mi_memcpy)(struct anv_cmd_buffer *cmd_buffer,
-                           struct anv_address dst, struct anv_address src,
-                           uint32_t size)
-{
-   /* This memcpy operates in units of dwords. */
-   assert(size % 4 == 0);
-   assert(dst.offset % 4 == 0);
-   assert(src.offset % 4 == 0);
-
-#if GEN_GEN == 7
-   /* On gen7, the combination of commands used here(MI_LOAD_REGISTER_MEM
-    * and MI_STORE_REGISTER_MEM) can cause GPU hangs if any rendering is
-    * in-flight when they are issued even if the memory touched is not
-    * currently active for rendering.  The weird bit is that it is not the
-    * MI_LOAD/STORE_REGISTER_MEM commands which hang but rather the in-flight
-    * rendering hangs such that the next stalling command after the
-    * MI_LOAD/STORE_REGISTER_MEM commands will catch the hang.
-    *
-    * It is unclear exactly why this hang occurs.  Both MI commands come with
-    * warnings about the 3D pipeline but that doesn't seem to fully explain
-    * it.  My (Jason's) best theory is that it has something to do with the
-    * fact that we're using a GPU state register as our temporary and that
-    * something with reading/writing it is causing problems.
-    *
-    * In order to work around this issue, we emit a PIPE_CONTROL with the
-    * command streamer stall bit set.
-    */
-   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT;
-   genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
-#endif
-
-   for (uint32_t i = 0; i < size; i += 4) {
-#if GEN_GEN >= 8
-      anv_batch_emit(&cmd_buffer->batch, GENX(MI_COPY_MEM_MEM), cp) {
-         cp.DestinationMemoryAddress = anv_address_add(dst, i);
-         cp.SourceMemoryAddress = anv_address_add(src, i);
-      }
-#else
-      /* IVB does not have a general purpose register for command streamer
-       * commands. Therefore, we use an alternate temporary register.
-       */
-#define TEMP_REG 0x2440 /* GEN7_3DPRIM_BASE_VERTEX */
-      anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_MEM), load) {
-         load.RegisterAddress = TEMP_REG;
-         load.MemoryAddress = anv_address_add(src, i);
-      }
-      anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), store) {
-         store.RegisterAddress = TEMP_REG;
-         store.MemoryAddress = anv_address_add(dst, i);
-      }
-#undef TEMP_REG
-#endif
-   }
-   return;
-}
-
-void
-genX(cmd_buffer_mi_memset)(struct anv_cmd_buffer *cmd_buffer,
-                           struct anv_address dst, uint32_t value,
-                           uint32_t size)
-{
-   /* This memset operates in units of dwords. */
-   assert(size % 4 == 0);
-   assert(dst.offset % 4 == 0);
-
-   for (uint32_t i = 0; i < size; i += 4) {
-      anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
-         sdi.Address = anv_address_add(dst, i);
-         sdi.ImmediateData = value;
-      }
-   }
-}
-
-void
 genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
                            struct anv_address dst, struct anv_address src,
                            uint32_t size)
diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c
index 3374ba6417e..5fbf92c4ab1 100644
--- a/src/intel/vulkan/genX_query.c
+++ b/src/intel/vulkan/genX_query.c
@@ -363,14 +363,13 @@ emit_query_availability(struct anv_cmd_buffer *cmd_buffer,
  */
 static void
 emit_zero_queries(struct anv_cmd_buffer *cmd_buffer,
-                  struct anv_query_pool *pool,
+                  struct gen_mi_builder *b, struct anv_query_pool *pool,
                   uint32_t first_index, uint32_t num_queries)
 {
    for (uint32_t i = 0; i < num_queries; i++) {
       struct anv_address slot_addr =
          anv_query_address(pool, first_index + i);
-      genX(cmd_buffer_mi_memset)(cmd_buffer, anv_address_add(slot_addr, 8),
-                                 0, pool->stride - 8);
+      gen_mi_memset(b, anv_address_add(slot_addr, 8), 0, pool->stride - 8);
       emit_query_availability(cmd_buffer, slot_addr);
    }
 }
@@ -574,7 +573,7 @@ void genX(CmdEndQueryIndexedEXT)(
       const uint32_t num_queries =
          util_bitcount(cmd_buffer->state.subpass->view_mask);
       if (num_queries > 1)
-         emit_zero_queries(cmd_buffer, pool, query + 1, num_queries - 1);
+         emit_zero_queries(cmd_buffer, &b, pool, query + 1, num_queries - 1);
    }
 }
 
@@ -628,7 +627,7 @@ void genX(CmdWriteTimestamp)(
       const uint32_t num_queries =
          util_bitcount(cmd_buffer->state.subpass->view_mask);
       if (num_queries > 1)
-         emit_zero_queries(cmd_buffer, pool, query + 1, num_queries - 1);
+         emit_zero_queries(cmd_buffer, &b, pool, query + 1, num_queries - 1);
    }
 }