iris: Disable VF cache partial address workaround on Gen11+

The vertex cache uses the full 48-bit address on Gen11+. See the documentation for 3DSTATE_VERTEX_BUFFERS, which describes the workaround and lists it as pre-Icelake. Interestingly, the docs don't mention index buffers as needing a workaround at all. So either we've been overzealous, or the docs never got updated to record that. Which begs the question of whether the issue there was fixed, if there was one... Cuts 40% of the PIPE_CONTROLs from Civilization VI's benchmark; appears that it improves performance by about 1-2% on Icelake 8x8 (not frequency locked).
author: Kenneth Graunke <[email protected]> 2019-11-25 10:04:38 -0800
committer: Kenneth Graunke <[email protected]> 2019-11-26 12:13:34 -0800
commit: 3fdf2bb313b7e91f223fc45ad68adea9d5e76407 (patch)
tree: bd500d402f4a979de642a68d05ea9ea143542d6d
parent: 8d9f5a28e3879523fbdd018a2b87223313333379 (diff)
2 files changed, 14 insertions, 0 deletions
diff --git a/src/gallium/drivers/iris/iris_blorp.c b/src/gallium/drivers/iris/iris_blorp.c
index a1c0dbbcf4e..c2253ac913a 100644
--- a/src/gallium/drivers/iris/iris_blorp.c
+++ b/src/gallium/drivers/iris/iris_blorp.c
@@ -204,6 +204,7 @@ blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *blorp_batch,
                                            const struct blorp_address *addrs,
                                            unsigned num_vbs)
 {
+#if GEN_GEN < 11
    struct iris_context *ice = blorp_batch->blorp->driver_ctx;
    struct iris_batch *batch = blorp_batch->driver_batch;
    bool need_invalidate = false;
@@ -224,6 +225,7 @@ blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *blorp_batch,
                                    PIPE_CONTROL_VF_CACHE_INVALIDATE |
                                    PIPE_CONTROL_CS_STALL);
    }
+#endif
 }
 
 static struct blorp_address
diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c
index a30aa61b3ce..51dffb45eba 100644
--- a/src/gallium/drivers/iris/iris_state.c
+++ b/src/gallium/drivers/iris/iris_state.c
@@ -5784,6 +5784,15 @@ iris_upload_dirty_render_state(struct iris_context *ice,
       }
 
       if (count) {
+#if GEN_GEN >= 11
+         /* Gen11+ doesn't need the cache workaround below */
+         uint64_t bound = dynamic_bound;
+         while (bound) {
+            const int i = u_bit_scan64(&bound);
+            iris_use_optional_res(batch, genx->vertex_buffers[i].resource,
+                                  false);
+         }
+#else
          /* The VF cache designers cut corners, and made the cache key's
           * <VertexBufferIndex, Memory Address> tuple only consider the bottom
           * 32 bits of the address.  If you have two vertex buffers which get
@@ -5819,6 +5828,7 @@ iris_upload_dirty_render_state(struct iris_context *ice,
                                          "workaround: VF cache 32-bit key [VB]",
                                          flush_flags);
          }
+#endif
 
          const unsigned vb_dwords = GENX(VERTEX_BUFFER_STATE_length);
 
@@ -6034,6 +6044,7 @@ iris_upload_render_state(struct iris_context *ice,
          iris_use_pinned_bo(batch, bo, false);
       }
 
+#if GEN_GEN < 11
       /* The VF cache key only uses 32-bits, see vertex buffer comment above */
       uint16_t high_bits = bo->gtt_offset >> 32ull;
       if (high_bits != ice->state.last_index_bo_high_bits) {
@@ -6043,6 +6054,7 @@ iris_upload_render_state(struct iris_context *ice,
                                       PIPE_CONTROL_CS_STALL);
          ice->state.last_index_bo_high_bits = high_bits;
       }
+#endif
    }
 
 #define _3DPRIM_END_OFFSET          0x2420
author	Kenneth Graunke <[email protected]>	2019-11-25 10:04:38 -0800
committer	Kenneth Graunke <[email protected]>	2019-11-26 12:13:34 -0800
commit	3fdf2bb313b7e91f223fc45ad68adea9d5e76407 (patch)
tree	bd500d402f4a979de642a68d05ea9ea143542d6d
parent	8d9f5a28e3879523fbdd018a2b87223313333379 (diff)