From 0d897be973343fccfd9481360497d8a8792c8f9c Mon Sep 17 00:00:00 2001 From: Timothy Arceri Date: Fri, 11 Nov 2016 15:01:24 +1100 Subject: i965: rename brw_state_cache.c -> brw_program_cache.c Acked-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/Makefile.sources | 2 +- src/mesa/drivers/dri/i965/brw_context.h | 2 +- src/mesa/drivers/dri/i965/brw_program_cache.c | 446 ++++++++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_state.h | 2 +- src/mesa/drivers/dri/i965/brw_state_cache.c | 446 -------------------------- 5 files changed, 449 insertions(+), 449 deletions(-) create mode 100644 src/mesa/drivers/dri/i965/brw_program_cache.c delete mode 100644 src/mesa/drivers/dri/i965/brw_state_cache.c diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 30648e313fd..1c33ea55fae 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -139,6 +139,7 @@ i965_FILES = \ brw_pipe_control.c \ brw_program.c \ brw_program.h \ + brw_program_cache.c \ brw_primitive_restart.c \ brw_queryobj.c \ brw_reset.c \ @@ -148,7 +149,6 @@ i965_FILES = \ brw_sf.h \ brw_sf_state.c \ brw_state_batch.c \ - brw_state_cache.c \ brw_state_dump.c \ brw_state.h \ brw_state_upload.c \ diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 310372ac82f..3a88e56b51e 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -178,7 +178,7 @@ enum brw_cache_id { }; enum brw_state_id { - /* brw_cache_ids must come first - see brw_state_cache.c */ + /* brw_cache_ids must come first - see brw_program_cache.c */ BRW_STATE_URB_FENCE = BRW_MAX_CACHE, BRW_STATE_FRAGMENT_PROGRAM, BRW_STATE_GEOMETRY_PROGRAM, diff --git a/src/mesa/drivers/dri/i965/brw_program_cache.c b/src/mesa/drivers/dri/i965/brw_program_cache.c new file mode 100644 index 00000000000..4a67b96718b --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_program_cache.c @@ -0,0 +1,446 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + +/** @file brw_program_cache.c + * + * This file implements a simple program cache for 965. The consumers can + * query the hash table of programs using a cache_id and program key, and + * receive the corresponding program buffer object (plus associated auxiliary + * data) in return. Objects in the cache may not have relocations + * (pointers to other BOs) in them. + * + * The inner workings are a simple hash table based on a CRC of the + * key data. + * + * Replacement is not implemented. Instead, when the cache gets too + * big we throw out all of the cache data and let it get regenerated. + */ + +#include "main/imports.h" +#include "intel_batchbuffer.h" +#include "brw_state.h" +#include "brw_vs.h" +#include "brw_wm.h" +#include "brw_gs.h" +#include "brw_cs.h" +#include "brw_program.h" + +#define FILE_DEBUG_FLAG DEBUG_STATE + +static GLuint +hash_key(struct brw_cache_item *item) +{ + GLuint *ikey = (GLuint *)item->key; + GLuint hash = item->cache_id, i; + + assert(item->key_size % 4 == 0); + + /* I'm sure this can be improved on: + */ + for (i = 0; i < item->key_size/4; i++) { + hash ^= ikey[i]; + hash = (hash << 5) | (hash >> 27); + } + + return hash; +} + +static int +brw_cache_item_equals(const struct brw_cache_item *a, + const struct brw_cache_item *b) +{ + return a->cache_id == b->cache_id && + a->hash == b->hash && + a->key_size == b->key_size && + (memcmp(a->key, b->key, a->key_size) == 0); +} + +static struct brw_cache_item * +search_cache(struct brw_cache *cache, GLuint hash, + struct brw_cache_item *lookup) +{ + struct brw_cache_item *c; + +#if 0 + int bucketcount = 0; + + for (c = cache->items[hash % cache->size]; c; c = c->next) + bucketcount++; + + fprintf(stderr, "bucket %d/%d = %d/%d items\n", hash % cache->size, + cache->size, bucketcount, cache->n_items); +#endif + + for (c = cache->items[hash % cache->size]; c; c = c->next) { + if (brw_cache_item_equals(lookup, c)) + return c; + } + + return NULL; +} + + +static void +rehash(struct brw_cache *cache) +{ + struct brw_cache_item **items; + struct brw_cache_item *c, *next; + GLuint size, i; + + size = cache->size * 3; + items = calloc(size, sizeof(*items)); + + for (i = 0; i < cache->size; i++) + for (c = cache->items[i]; c; c = next) { + next = c->next; + c->next = items[c->hash % size]; + items[c->hash % size] = c; + } + + free(cache->items); + cache->items = items; + cache->size = size; +} + + +/** + * Returns the buffer object matching cache_id and key, or NULL. + */ +bool +brw_search_cache(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, GLuint key_size, + uint32_t *inout_offset, void *inout_aux) +{ + struct brw_context *brw = cache->brw; + struct brw_cache_item *item; + struct brw_cache_item lookup; + GLuint hash; + + lookup.cache_id = cache_id; + lookup.key = key; + lookup.key_size = key_size; + hash = hash_key(&lookup); + lookup.hash = hash; + + item = search_cache(cache, hash, &lookup); + + if (item == NULL) + return false; + + void *aux = ((char *) item->key) + item->key_size; + + if (item->offset != *inout_offset || aux != *((void **) inout_aux)) { + brw->ctx.NewDriverState |= (1 << cache_id); + *inout_offset = item->offset; + *((void **) inout_aux) = aux; + } + + return true; +} + +static void +brw_cache_new_bo(struct brw_cache *cache, uint32_t new_size) +{ + struct brw_context *brw = cache->brw; + drm_intel_bo *new_bo; + + new_bo = drm_intel_bo_alloc(brw->bufmgr, "program cache", new_size, 64); + if (brw->has_llc) + drm_intel_gem_bo_map_unsynchronized(new_bo); + + /* Copy any existing data that needs to be saved. */ + if (cache->next_offset != 0) { + if (brw->has_llc) { + memcpy(new_bo->virtual, cache->bo->virtual, cache->next_offset); + } else { + drm_intel_bo_map(cache->bo, false); + drm_intel_bo_subdata(new_bo, 0, cache->next_offset, + cache->bo->virtual); + drm_intel_bo_unmap(cache->bo); + } + } + + if (brw->has_llc) + drm_intel_bo_unmap(cache->bo); + drm_intel_bo_unreference(cache->bo); + cache->bo = new_bo; + cache->bo_used_by_gpu = false; + + /* Since we have a new BO in place, we need to signal the units + * that depend on it (state base address on gen5+, or unit state before). + */ + brw->ctx.NewDriverState |= BRW_NEW_PROGRAM_CACHE; + brw->batch.state_base_address_emitted = false; +} + +/** + * Attempts to find an item in the cache with identical data. + */ +static const struct brw_cache_item * +brw_lookup_prog(const struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *data, unsigned data_size) +{ + const struct brw_context *brw = cache->brw; + unsigned i; + const struct brw_cache_item *item; + + for (i = 0; i < cache->size; i++) { + for (item = cache->items[i]; item; item = item->next) { + int ret; + + if (item->cache_id != cache_id || item->size != data_size) + continue; + + if (!brw->has_llc) + drm_intel_bo_map(cache->bo, false); + ret = memcmp(cache->bo->virtual + item->offset, data, item->size); + if (!brw->has_llc) + drm_intel_bo_unmap(cache->bo); + if (ret) + continue; + + return item; + } + } + + return NULL; +} + +static uint32_t +brw_alloc_item_data(struct brw_cache *cache, uint32_t size) +{ + uint32_t offset; + struct brw_context *brw = cache->brw; + + /* Allocate space in the cache BO for our new program. */ + if (cache->next_offset + size > cache->bo->size) { + uint32_t new_size = cache->bo->size * 2; + + while (cache->next_offset + size > new_size) + new_size *= 2; + + brw_cache_new_bo(cache, new_size); + } + + /* If we would block on writing to an in-use program BO, just + * recreate it. + */ + if (!brw->has_llc && cache->bo_used_by_gpu) { + perf_debug("Copying busy program cache buffer.\n"); + brw_cache_new_bo(cache, cache->bo->size); + } + + offset = cache->next_offset; + + /* Programs are always 64-byte aligned, so set up the next one now */ + cache->next_offset = ALIGN(offset + size, 64); + + return offset; +} + +void +brw_upload_cache(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_size, + const void *data, + GLuint data_size, + const void *aux, + GLuint aux_size, + uint32_t *out_offset, + void *out_aux) +{ + struct brw_context *brw = cache->brw; + struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item); + const struct brw_cache_item *matching_data = + brw_lookup_prog(cache, cache_id, data, data_size); + GLuint hash; + void *tmp; + + item->cache_id = cache_id; + item->size = data_size; + item->key = key; + item->key_size = key_size; + item->aux_size = aux_size; + hash = hash_key(item); + item->hash = hash; + + /* If we can find a matching prog in the cache already, then reuse the + * existing stuff without creating new copy into the underlying buffer + * object. This is notably useful for programs generating shaders at + * runtime, where multiple shaders may compile to the same thing in our + * backend. + */ + if (matching_data) { + item->offset = matching_data->offset; + } else { + item->offset = brw_alloc_item_data(cache, data_size); + + /* Copy data to the buffer */ + if (brw->has_llc) { + memcpy((char *)cache->bo->virtual + item->offset, data, data_size); + } else { + drm_intel_bo_subdata(cache->bo, item->offset, data_size, data); + } + } + + /* Set up the memory containing the key and aux_data */ + tmp = malloc(key_size + aux_size); + + memcpy(tmp, key, key_size); + memcpy(tmp + key_size, aux, aux_size); + + item->key = tmp; + + if (cache->n_items > cache->size * 1.5f) + rehash(cache); + + hash %= cache->size; + item->next = cache->items[hash]; + cache->items[hash] = item; + cache->n_items++; + + *out_offset = item->offset; + *(void **)out_aux = (void *)((char *)item->key + item->key_size); + cache->brw->ctx.NewDriverState |= 1 << cache_id; +} + +void +brw_init_caches(struct brw_context *brw) +{ + struct brw_cache *cache = &brw->cache; + + cache->brw = brw; + + cache->size = 7; + cache->n_items = 0; + cache->items = + calloc(cache->size, sizeof(struct brw_cache_item *)); + + cache->bo = drm_intel_bo_alloc(brw->bufmgr, + "program cache", + 4096, 64); + if (brw->has_llc) + drm_intel_gem_bo_map_unsynchronized(cache->bo); +} + +static void +brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) +{ + struct brw_cache_item *c, *next; + GLuint i; + + DBG("%s\n", __func__); + + for (i = 0; i < cache->size; i++) { + for (c = cache->items[i]; c; c = next) { + next = c->next; + if (c->cache_id == BRW_CACHE_VS_PROG || + c->cache_id == BRW_CACHE_TCS_PROG || + c->cache_id == BRW_CACHE_TES_PROG || + c->cache_id == BRW_CACHE_GS_PROG || + c->cache_id == BRW_CACHE_FS_PROG || + c->cache_id == BRW_CACHE_CS_PROG) { + const void *item_aux = c->key + c->key_size; + brw_stage_prog_data_free(item_aux); + } + free((void *)c->key); + free(c); + } + cache->items[i] = NULL; + } + + cache->n_items = 0; + + /* Start putting programs into the start of the BO again, since + * we'll never find the old results. + */ + cache->next_offset = 0; + + /* We need to make sure that the programs get regenerated, since + * any offsets leftover in brw_context will no longer be valid. + */ + brw->NewGLState = ~0; + brw->ctx.NewDriverState = ~0ull; + brw->state.pipelines[BRW_RENDER_PIPELINE].mesa = ~0; + brw->state.pipelines[BRW_RENDER_PIPELINE].brw = ~0ull; + brw->state.pipelines[BRW_COMPUTE_PIPELINE].mesa = ~0; + brw->state.pipelines[BRW_COMPUTE_PIPELINE].brw = ~0ull; + + /* Also, NULL out any stale program pointers. */ + brw->vs.base.prog_data = NULL; + brw->tcs.base.prog_data = NULL; + brw->tes.base.prog_data = NULL; + brw->gs.base.prog_data = NULL; + brw->wm.base.prog_data = NULL; + brw->cs.base.prog_data = NULL; + + intel_batchbuffer_flush(brw); +} + +void +brw_state_cache_check_size(struct brw_context *brw) +{ + /* un-tuned guess. Each object is generally a page, so 2000 of them is 8 MB of + * state cache. + */ + if (brw->cache.n_items > 2000) { + perf_debug("Exceeded state cache size limit. Clearing the set " + "of compiled programs, which will trigger recompiles\n"); + brw_clear_cache(brw, &brw->cache); + } +} + + +static void +brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache) +{ + + DBG("%s\n", __func__); + + if (brw->has_llc) + drm_intel_bo_unmap(cache->bo); + drm_intel_bo_unreference(cache->bo); + cache->bo = NULL; + brw_clear_cache(brw, cache); + free(cache->items); + cache->items = NULL; + cache->size = 0; +} + + +void +brw_destroy_caches(struct brw_context *brw) +{ + brw_destroy_cache(brw, &brw->cache); +} diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 841c3dee5a5..38c2cbc3d18 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -217,7 +217,7 @@ brw_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline) } /*********************************************************************** - * brw_state_cache.c + * brw_program_cache.c */ void brw_upload_cache(struct brw_cache *cache, diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c deleted file mode 100644 index e8e71ab229f..00000000000 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ /dev/null @@ -1,446 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - -/** @file brw_state_cache.c - * - * This file implements a simple static state cache for 965. The - * consumers can query the hash table of state using a cache_id, - * opaque key data, and receive the corresponding state buffer object - * of state (plus associated auxiliary data) in return. Objects in - * the cache may not have relocations (pointers to other BOs) in them. - * - * The inner workings are a simple hash table based on a CRC of the - * key data. - * - * Replacement is not implemented. Instead, when the cache gets too - * big we throw out all of the cache data and let it get regenerated. - */ - -#include "main/imports.h" -#include "intel_batchbuffer.h" -#include "brw_state.h" -#include "brw_vs.h" -#include "brw_wm.h" -#include "brw_gs.h" -#include "brw_cs.h" -#include "brw_program.h" - -#define FILE_DEBUG_FLAG DEBUG_STATE - -static GLuint -hash_key(struct brw_cache_item *item) -{ - GLuint *ikey = (GLuint *)item->key; - GLuint hash = item->cache_id, i; - - assert(item->key_size % 4 == 0); - - /* I'm sure this can be improved on: - */ - for (i = 0; i < item->key_size/4; i++) { - hash ^= ikey[i]; - hash = (hash << 5) | (hash >> 27); - } - - return hash; -} - -static int -brw_cache_item_equals(const struct brw_cache_item *a, - const struct brw_cache_item *b) -{ - return a->cache_id == b->cache_id && - a->hash == b->hash && - a->key_size == b->key_size && - (memcmp(a->key, b->key, a->key_size) == 0); -} - -static struct brw_cache_item * -search_cache(struct brw_cache *cache, GLuint hash, - struct brw_cache_item *lookup) -{ - struct brw_cache_item *c; - -#if 0 - int bucketcount = 0; - - for (c = cache->items[hash % cache->size]; c; c = c->next) - bucketcount++; - - fprintf(stderr, "bucket %d/%d = %d/%d items\n", hash % cache->size, - cache->size, bucketcount, cache->n_items); -#endif - - for (c = cache->items[hash % cache->size]; c; c = c->next) { - if (brw_cache_item_equals(lookup, c)) - return c; - } - - return NULL; -} - - -static void -rehash(struct brw_cache *cache) -{ - struct brw_cache_item **items; - struct brw_cache_item *c, *next; - GLuint size, i; - - size = cache->size * 3; - items = calloc(size, sizeof(*items)); - - for (i = 0; i < cache->size; i++) - for (c = cache->items[i]; c; c = next) { - next = c->next; - c->next = items[c->hash % size]; - items[c->hash % size] = c; - } - - free(cache->items); - cache->items = items; - cache->size = size; -} - - -/** - * Returns the buffer object matching cache_id and key, or NULL. - */ -bool -brw_search_cache(struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *key, GLuint key_size, - uint32_t *inout_offset, void *inout_aux) -{ - struct brw_context *brw = cache->brw; - struct brw_cache_item *item; - struct brw_cache_item lookup; - GLuint hash; - - lookup.cache_id = cache_id; - lookup.key = key; - lookup.key_size = key_size; - hash = hash_key(&lookup); - lookup.hash = hash; - - item = search_cache(cache, hash, &lookup); - - if (item == NULL) - return false; - - void *aux = ((char *) item->key) + item->key_size; - - if (item->offset != *inout_offset || aux != *((void **) inout_aux)) { - brw->ctx.NewDriverState |= (1 << cache_id); - *inout_offset = item->offset; - *((void **) inout_aux) = aux; - } - - return true; -} - -static void -brw_cache_new_bo(struct brw_cache *cache, uint32_t new_size) -{ - struct brw_context *brw = cache->brw; - drm_intel_bo *new_bo; - - new_bo = drm_intel_bo_alloc(brw->bufmgr, "program cache", new_size, 64); - if (brw->has_llc) - drm_intel_gem_bo_map_unsynchronized(new_bo); - - /* Copy any existing data that needs to be saved. */ - if (cache->next_offset != 0) { - if (brw->has_llc) { - memcpy(new_bo->virtual, cache->bo->virtual, cache->next_offset); - } else { - drm_intel_bo_map(cache->bo, false); - drm_intel_bo_subdata(new_bo, 0, cache->next_offset, - cache->bo->virtual); - drm_intel_bo_unmap(cache->bo); - } - } - - if (brw->has_llc) - drm_intel_bo_unmap(cache->bo); - drm_intel_bo_unreference(cache->bo); - cache->bo = new_bo; - cache->bo_used_by_gpu = false; - - /* Since we have a new BO in place, we need to signal the units - * that depend on it (state base address on gen5+, or unit state before). - */ - brw->ctx.NewDriverState |= BRW_NEW_PROGRAM_CACHE; - brw->batch.state_base_address_emitted = false; -} - -/** - * Attempts to find an item in the cache with identical data. - */ -static const struct brw_cache_item * -brw_lookup_prog(const struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *data, unsigned data_size) -{ - const struct brw_context *brw = cache->brw; - unsigned i; - const struct brw_cache_item *item; - - for (i = 0; i < cache->size; i++) { - for (item = cache->items[i]; item; item = item->next) { - int ret; - - if (item->cache_id != cache_id || item->size != data_size) - continue; - - if (!brw->has_llc) - drm_intel_bo_map(cache->bo, false); - ret = memcmp(cache->bo->virtual + item->offset, data, item->size); - if (!brw->has_llc) - drm_intel_bo_unmap(cache->bo); - if (ret) - continue; - - return item; - } - } - - return NULL; -} - -static uint32_t -brw_alloc_item_data(struct brw_cache *cache, uint32_t size) -{ - uint32_t offset; - struct brw_context *brw = cache->brw; - - /* Allocate space in the cache BO for our new program. */ - if (cache->next_offset + size > cache->bo->size) { - uint32_t new_size = cache->bo->size * 2; - - while (cache->next_offset + size > new_size) - new_size *= 2; - - brw_cache_new_bo(cache, new_size); - } - - /* If we would block on writing to an in-use program BO, just - * recreate it. - */ - if (!brw->has_llc && cache->bo_used_by_gpu) { - perf_debug("Copying busy program cache buffer.\n"); - brw_cache_new_bo(cache, cache->bo->size); - } - - offset = cache->next_offset; - - /* Programs are always 64-byte aligned, so set up the next one now */ - cache->next_offset = ALIGN(offset + size, 64); - - return offset; -} - -void -brw_upload_cache(struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *key, - GLuint key_size, - const void *data, - GLuint data_size, - const void *aux, - GLuint aux_size, - uint32_t *out_offset, - void *out_aux) -{ - struct brw_context *brw = cache->brw; - struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item); - const struct brw_cache_item *matching_data = - brw_lookup_prog(cache, cache_id, data, data_size); - GLuint hash; - void *tmp; - - item->cache_id = cache_id; - item->size = data_size; - item->key = key; - item->key_size = key_size; - item->aux_size = aux_size; - hash = hash_key(item); - item->hash = hash; - - /* If we can find a matching prog in the cache already, then reuse the - * existing stuff without creating new copy into the underlying buffer - * object. This is notably useful for programs generating shaders at - * runtime, where multiple shaders may compile to the same thing in our - * backend. - */ - if (matching_data) { - item->offset = matching_data->offset; - } else { - item->offset = brw_alloc_item_data(cache, data_size); - - /* Copy data to the buffer */ - if (brw->has_llc) { - memcpy((char *)cache->bo->virtual + item->offset, data, data_size); - } else { - drm_intel_bo_subdata(cache->bo, item->offset, data_size, data); - } - } - - /* Set up the memory containing the key and aux_data */ - tmp = malloc(key_size + aux_size); - - memcpy(tmp, key, key_size); - memcpy(tmp + key_size, aux, aux_size); - - item->key = tmp; - - if (cache->n_items > cache->size * 1.5f) - rehash(cache); - - hash %= cache->size; - item->next = cache->items[hash]; - cache->items[hash] = item; - cache->n_items++; - - *out_offset = item->offset; - *(void **)out_aux = (void *)((char *)item->key + item->key_size); - cache->brw->ctx.NewDriverState |= 1 << cache_id; -} - -void -brw_init_caches(struct brw_context *brw) -{ - struct brw_cache *cache = &brw->cache; - - cache->brw = brw; - - cache->size = 7; - cache->n_items = 0; - cache->items = - calloc(cache->size, sizeof(struct brw_cache_item *)); - - cache->bo = drm_intel_bo_alloc(brw->bufmgr, - "program cache", - 4096, 64); - if (brw->has_llc) - drm_intel_gem_bo_map_unsynchronized(cache->bo); -} - -static void -brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) -{ - struct brw_cache_item *c, *next; - GLuint i; - - DBG("%s\n", __func__); - - for (i = 0; i < cache->size; i++) { - for (c = cache->items[i]; c; c = next) { - next = c->next; - if (c->cache_id == BRW_CACHE_VS_PROG || - c->cache_id == BRW_CACHE_TCS_PROG || - c->cache_id == BRW_CACHE_TES_PROG || - c->cache_id == BRW_CACHE_GS_PROG || - c->cache_id == BRW_CACHE_FS_PROG || - c->cache_id == BRW_CACHE_CS_PROG) { - const void *item_aux = c->key + c->key_size; - brw_stage_prog_data_free(item_aux); - } - free((void *)c->key); - free(c); - } - cache->items[i] = NULL; - } - - cache->n_items = 0; - - /* Start putting programs into the start of the BO again, since - * we'll never find the old results. - */ - cache->next_offset = 0; - - /* We need to make sure that the programs get regenerated, since - * any offsets leftover in brw_context will no longer be valid. - */ - brw->NewGLState = ~0; - brw->ctx.NewDriverState = ~0ull; - brw->state.pipelines[BRW_RENDER_PIPELINE].mesa = ~0; - brw->state.pipelines[BRW_RENDER_PIPELINE].brw = ~0ull; - brw->state.pipelines[BRW_COMPUTE_PIPELINE].mesa = ~0; - brw->state.pipelines[BRW_COMPUTE_PIPELINE].brw = ~0ull; - - /* Also, NULL out any stale program pointers. */ - brw->vs.base.prog_data = NULL; - brw->tcs.base.prog_data = NULL; - brw->tes.base.prog_data = NULL; - brw->gs.base.prog_data = NULL; - brw->wm.base.prog_data = NULL; - brw->cs.base.prog_data = NULL; - - intel_batchbuffer_flush(brw); -} - -void -brw_state_cache_check_size(struct brw_context *brw) -{ - /* un-tuned guess. Each object is generally a page, so 2000 of them is 8 MB of - * state cache. - */ - if (brw->cache.n_items > 2000) { - perf_debug("Exceeded state cache size limit. Clearing the set " - "of compiled programs, which will trigger recompiles\n"); - brw_clear_cache(brw, &brw->cache); - } -} - - -static void -brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache) -{ - - DBG("%s\n", __func__); - - if (brw->has_llc) - drm_intel_bo_unmap(cache->bo); - drm_intel_bo_unreference(cache->bo); - cache->bo = NULL; - brw_clear_cache(brw, cache); - free(cache->items); - cache->items = NULL; - cache->size = 0; -} - - -void -brw_destroy_caches(struct brw_context *brw) -{ - brw_destroy_cache(brw, &brw->cache); -} -- cgit v1.2.3