i965: Reuse existing program data when a new compiled program matches.

It's common in applications just before the advent of EXT_separate_shader_objects to have multiple linked shaders with the same VS or FS. While we aren't detecting those at the Mesa level, we can detect when our compiled output happens to match an existing compiled program. This patch was created after noting the incredible amount of compiled program data generated by Heroes of Newerth. It reduces the program data in use at the start menu (replayed by apitrace) from 828kb to 632kb, and reduces CACHE_NEW_WM_PROG state flagging by 3/4. It doesn't impact our rate of hardware state changes yet, because things depending on CACHE_NEW_WM_PROG also depend on BRW_NEW_FRAGMENT_PROGRAM which is still being flagged. Reviewed-by: Ian Romanick <[email protected]>
author: Eric Anholt <[email protected]> 2011-02-26 02:01:37 -0800
committer: Eric Anholt <[email protected]> 2011-06-24 10:36:49 -0700
commit: 18d4a44bdc2ed91ec9511d816acddc4a0bd7f9be (patch)
tree: 0c5108ce84f72f3aba6b45f6c2dd5b9fe14a9171 /src/mesa/drivers
parent: d91dc4a356e5509116572770b89d0a7520a55bfc (diff)
2 files changed, 82 insertions, 20 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 16b71f6b1c9..559dc688652 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -365,6 +365,7 @@ struct brw_cache_item {
    /** 32-bit hash of the key data */
    GLuint hash;
    GLuint key_size;		/* for variable-sized keys */
+   GLuint aux_size;
    const void *key;
 
    uint32_t offset;
diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c
index d13711b19b7..3988625ea91 100644
--- a/src/mesa/drivers/dri/i965/brw_state_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_state_cache.c
@@ -187,6 +187,77 @@ brw_cache_new_bo(struct brw_cache *cache, uint32_t new_size)
    brw->state.dirty.brw |= BRW_NEW_PROGRAM_CACHE;
 }
 
+/**
+ * Attempts to find an item in the cache with identical data and aux
+ * data to use
+ */
+static bool
+brw_try_upload_using_copy(struct brw_cache *cache,
+			  struct brw_cache_item *result_item,
+			  const void *data,
+			  const void *aux)
+{
+   int i;
+   struct brw_cache_item *item;
+
+   for (i = 0; i < cache->size; i++) {
+      for (item = cache->items[i]; item; item = item->next) {
+	 const void *item_aux = item->key + item->key_size;
+	 int ret;
+
+	 if (item->cache_id != result_item->cache_id ||
+	     item->size != result_item->size ||
+	     item->aux_size != result_item->aux_size) {
+	    continue;
+	 }
+
+	 if (memcmp(item_aux, aux, item->aux_size) != 0) {
+	    continue;
+	 }
+
+	 drm_intel_bo_map(cache->bo, false);
+	 ret = memcmp(cache->bo->virtual + item->offset, data, item->size);
+	 drm_intel_bo_unmap(cache->bo);
+	 if (ret)
+	    continue;
+
+	 result_item->offset = item->offset;
+
+	 return true;
+      }
+   }
+
+   return false;
+}
+
+static void
+brw_upload_item_data(struct brw_cache *cache,
+		     struct brw_cache_item *item,
+		     const void *data)
+{
+   /* Allocate space in the cache BO for our new program. */
+   if (cache->next_offset + item->size > cache->bo->size) {
+      uint32_t new_size = cache->bo->size * 2;
+
+      while (cache->next_offset + item->size > new_size)
+	 new_size *= 2;
+
+      brw_cache_new_bo(cache, new_size);
+   }
+
+   /* If we would block on writing to an in-use program BO, just
+    * recreate it.
+    */
+   if (cache->bo_used_by_gpu) {
+      brw_cache_new_bo(cache, cache->bo->size);
+   }
+
+   item->offset = cache->next_offset;
+
+   /* Programs are always 64-byte aligned, so set up the next one now */
+   cache->next_offset = ALIGN(item->offset + item->size, 64);
+}
+
 void
 brw_upload_cache(struct brw_cache *cache,
 		 enum brw_cache_id cache_id,
@@ -204,34 +275,24 @@ brw_upload_cache(struct brw_cache *cache,
    void *tmp;
 
    item->cache_id = cache_id;
+   item->size = data_size;
    item->key = key;
    item->key_size = key_size;
+   item->aux_size = aux_size;
    hash = hash_key(item);
    item->hash = hash;
 
-   /* Allocate space in the cache BO for our new program. */
-   if (cache->next_offset + data_size > cache->bo->size) {
-      uint32_t new_size = cache->bo->size * 2;
-
-      while (cache->next_offset + data_size > new_size)
-	 new_size *= 2;
-
-      brw_cache_new_bo(cache, new_size);
-   }
-
-   /* If we would block on writing to an in-use program BO, just
-    * recreate it.
+   /* If we can find a matching prog/prog_data combo in the cache
+    * already, then reuse the existing stuff.  This will mean not
+    * flagging CACHE_NEW_* when transitioning between the two
+    * equivalent hash keys.  This is notably useful for programs
+    * generating shaders at runtime, where multiple shaders may
+    * compile to the thing in our backend.
     */
-   if (cache->bo_used_by_gpu) {
-      brw_cache_new_bo(cache, cache->bo->size);
+   if (!brw_try_upload_using_copy(cache, item, data, aux)) {
+      brw_upload_item_data(cache, item, data);
    }
 
-   item->offset = cache->next_offset;
-   item->size = data_size;
-
-   /* Programs are always 64-byte aligned, so set up the next one now */
-   cache->next_offset = ALIGN(item->offset + data_size, 64);
-
    /* Set up the memory containing the key and aux_data */
    tmp = malloc(key_size + aux_size);
author	Eric Anholt <[email protected]>	2011-02-26 02:01:37 -0800
committer	Eric Anholt <[email protected]>	2011-06-24 10:36:49 -0700
commit	18d4a44bdc2ed91ec9511d816acddc4a0bd7f9be (patch)
tree	0c5108ce84f72f3aba6b45f6c2dd5b9fe14a9171 /src/mesa/drivers
parent	d91dc4a356e5509116572770b89d0a7520a55bfc (diff)