i965: Use unsynchronized maps for the program cache on LLC platforms.

There's no reason to stall on pwrite - the CPU always appends to the buffer and never modifies existing contents, and the GPU never writes it. Further, the CPU always appends new data before submitting a batch that requires it. This code predates the unsynchronized mapping feature, so we simply didn't have the option when it was written. Ideally, we would do this for non-LLC platforms too, but unsynchronized mapping support only exists for LLC systems. Saves a bunch of stall avoidance copies when uploading shaders. v2: Rebase on changes to previous patch. Signed-off-by: Kenneth Graunke <[email protected]> Reviewed-by: Kristian Høgsberg <[email protected]> [v1]
author: Kenneth Graunke <[email protected]> 2014-08-21 14:41:17 -0700
committer: Kenneth Graunke <[email protected]> 2014-10-13 06:55:08 -0700
commit: 02ca66fbc3e2b272afcb9bae66348d3b1900e2c3 (patch)
tree: 96613387f81fee43143b87e1da156c0dccf8b751 /src
parent: 4c766c7959192b455ae752a7759ffc159e0524a6 (diff)
1 files changed, 28 insertions, 7 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c
index f4cae8f560b..eb7452ec130 100644
--- a/src/mesa/drivers/dri/i965/brw_state_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_state_cache.c
@@ -172,14 +172,23 @@ brw_cache_new_bo(struct brw_cache *cache, uint32_t new_size)
    drm_intel_bo *new_bo;
 
    new_bo = drm_intel_bo_alloc(brw->bufmgr, "program cache", new_size, 64);
+   if (brw->has_llc)
+      drm_intel_gem_bo_map_unsynchronized(new_bo);
 
    /* Copy any existing data that needs to be saved. */
    if (cache->next_offset != 0) {
-      drm_intel_bo_map(cache->bo, false);
-      drm_intel_bo_subdata(new_bo, 0, cache->next_offset, cache->bo->virtual);
-      drm_intel_bo_unmap(cache->bo);
+      if (brw->has_llc) {
+         memcpy(new_bo->virtual, cache->bo->virtual, cache->next_offset);
+      } else {
+         drm_intel_bo_map(cache->bo, false);
+         drm_intel_bo_subdata(new_bo, 0, cache->next_offset,
+                              cache->bo->virtual);
+         drm_intel_bo_unmap(cache->bo);
+      }
    }
 
+   if (brw->has_llc)
+      drm_intel_bo_unmap(cache->bo);
    drm_intel_bo_unreference(cache->bo);
    cache->bo = new_bo;
    cache->bo_used_by_gpu = false;
@@ -200,6 +209,7 @@ brw_try_upload_using_copy(struct brw_cache *cache,
 			  const void *data,
 			  const void *aux)
 {
+   struct brw_context *brw = cache->brw;
    int i;
    struct brw_cache_item *item;
 
@@ -221,9 +231,11 @@ brw_try_upload_using_copy(struct brw_cache *cache,
 	    continue;
 	 }
 
-	 drm_intel_bo_map(cache->bo, false);
+         if (!brw->has_llc)
+            drm_intel_bo_map(cache->bo, false);
 	 ret = memcmp(cache->bo->virtual + item->offset, data, item->size);
-	 drm_intel_bo_unmap(cache->bo);
+         if (!brw->has_llc)
+            drm_intel_bo_unmap(cache->bo);
 	 if (ret)
 	    continue;
 
@@ -256,7 +268,7 @@ brw_upload_item_data(struct brw_cache *cache,
    /* If we would block on writing to an in-use program BO, just
     * recreate it.
     */
-   if (cache->bo_used_by_gpu) {
+   if (!brw->has_llc && cache->bo_used_by_gpu) {
       perf_debug("Copying busy program cache buffer.\n");
       brw_cache_new_bo(cache, cache->bo->size);
    }
@@ -279,6 +291,7 @@ brw_upload_cache(struct brw_cache *cache,
 		 uint32_t *out_offset,
 		 void *out_aux)
 {
+   struct brw_context *brw = cache->brw;
    struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item);
    GLuint hash;
    void *tmp;
@@ -319,7 +332,11 @@ brw_upload_cache(struct brw_cache *cache,
    cache->n_items++;
 
    /* Copy data to the buffer */
-   drm_intel_bo_subdata(cache->bo, item->offset, data_size, data);
+   if (brw->has_llc) {
+      memcpy((char *) cache->bo->virtual + item->offset, data, data_size);
+   } else {
+      drm_intel_bo_subdata(cache->bo, item->offset, data_size, data);
+   }
 
    *out_offset = item->offset;
    *(void **)out_aux = (void *)((char *)item->key + item->key_size);
@@ -341,6 +358,8 @@ brw_init_caches(struct brw_context *brw)
    cache->bo = drm_intel_bo_alloc(brw->bufmgr,
 				  "program cache",
 				  4096, 64);
+   if (brw->has_llc)
+      drm_intel_gem_bo_map_unsynchronized(cache->bo);
 
    cache->aux_compare[BRW_VS_PROG] = brw_vs_prog_data_compare;
    cache->aux_compare[BRW_GS_PROG] = brw_gs_prog_data_compare;
@@ -407,6 +426,8 @@ brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache)
 
    DBG("%s\n", __FUNCTION__);
 
+   if (brw->has_llc)
+      drm_intel_bo_unmap(cache->bo);
    drm_intel_bo_unreference(cache->bo);
    cache->bo = NULL;
    brw_clear_cache(brw, cache);
author	Kenneth Graunke <[email protected]>	2014-08-21 14:41:17 -0700
committer	Kenneth Graunke <[email protected]>	2014-10-13 06:55:08 -0700
commit	02ca66fbc3e2b272afcb9bae66348d3b1900e2c3 (patch)
tree	96613387f81fee43143b87e1da156c0dccf8b751 /src
parent	4c766c7959192b455ae752a7759ffc159e0524a6 (diff)