/* * Mesa 3-D graphics library * * Copyright 2003 VMware, Inc. * Copyright 2009 VMware, Inc. * All Rights Reserved. * Copyright (C) 2016 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "main/glheader.h" #include "main/context.h" #include "main/varray.h" #include "main/macros.h" #include "main/sse_minmax.h" #include "x86/common_x86_asm.h" #include "util/hash_table.h" struct minmax_cache_key { GLintptr offset; GLuint count; unsigned index_size; }; struct minmax_cache_entry { struct minmax_cache_key key; GLuint min; GLuint max; }; static uint32_t vbo_minmax_cache_hash(const struct minmax_cache_key *key) { return _mesa_hash_data(key, sizeof(*key)); } static bool vbo_minmax_cache_key_equal(const struct minmax_cache_key *a, const struct minmax_cache_key *b) { return (a->offset == b->offset) && (a->count == b->count) && (a->index_size == b->index_size); } static void vbo_minmax_cache_delete_entry(struct hash_entry *entry) { free(entry->data); } static GLboolean vbo_use_minmax_cache(struct gl_buffer_object *bufferObj) { if (bufferObj->UsageHistory & (USAGE_TEXTURE_BUFFER | USAGE_ATOMIC_COUNTER_BUFFER | USAGE_SHADER_STORAGE_BUFFER | USAGE_TRANSFORM_FEEDBACK_BUFFER | USAGE_PIXEL_PACK_BUFFER | USAGE_DISABLE_MINMAX_CACHE)) return GL_FALSE; if ((bufferObj->Mappings[MAP_USER].AccessFlags & (GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT)) == (GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT)) return GL_FALSE; return GL_TRUE; } void vbo_delete_minmax_cache(struct gl_buffer_object *bufferObj) { _mesa_hash_table_destroy(bufferObj->MinMaxCache, vbo_minmax_cache_delete_entry); bufferObj->MinMaxCache = NULL; } static GLboolean vbo_get_minmax_cached(struct gl_buffer_object *bufferObj, unsigned index_size, GLintptr offset, GLuint count, GLuint *min_index, GLuint *max_index) { GLboolean found = GL_FALSE; struct minmax_cache_key key; uint32_t hash; struct hash_entry *result; if (!bufferObj->MinMaxCache) return GL_FALSE; if (!vbo_use_minmax_cache(bufferObj)) return GL_FALSE; mtx_lock(&bufferObj->Mutex); if (bufferObj->MinMaxCacheDirty) { /* Disable the cache permanently for this BO if the number of hits * is asymptotically less than the number of misses. This happens when * applications use the BO for streaming. * * However, some initial optimism allows applications that interleave * draw calls with glBufferSubData during warmup. */ unsigned optimism = bufferObj->Size; if (bufferObj->MinMaxCacheMissIndices > optimism && bufferObj->MinMaxCacheHitIndices < bufferObj->MinMaxCacheMissIndices - optimism) { bufferObj->UsageHistory |= USAGE_DISABLE_MINMAX_CACHE; vbo_delete_minmax_cache(bufferObj); goto out_disable; } _mesa_hash_table_clear(bufferObj->MinMaxCache, vbo_minmax_cache_delete_entry); bufferObj->MinMaxCacheDirty = false; goto out_invalidate; } key.index_size = index_size; key.offset = offset; key.count = count; hash = vbo_minmax_cache_hash(&key); result = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache, hash, &key); if (result) { struct minmax_cache_entry *entry = result->data; *min_index = entry->min; *max_index = entry->max; found = GL_TRUE; } out_invalidate: if (found) { /* The hit counter saturates so that we don't accidently disable the * cache in a long-running program. */ unsigned new_hit_count = bufferObj->MinMaxCacheHitIndices + count; if (new_hit_count >= bufferObj->MinMaxCacheHitIndices) bufferObj->MinMaxCacheHitIndices = new_hit_count; else bufferObj->MinMaxCacheHitIndices = ~(unsigned)0; } else { bufferObj->MinMaxCacheMissIndices += count; } out_disable: mtx_unlock(&bufferObj->Mutex); return found; } static void vbo_minmax_cache_store(struct gl_context *ctx, struct gl_buffer_object *bufferObj, unsigned index_size, GLintptr offset, GLuint count, GLuint min, GLuint max) { struct minmax_cache_entry *entry; struct hash_entry *table_entry; uint32_t hash; if (!vbo_use_minmax_cache(bufferObj)) return; mtx_lock(&bufferObj->Mutex); if (!bufferObj->MinMaxCache) { bufferObj->MinMaxCache = _mesa_hash_table_create(NULL, (uint32_t (*)(const void *))vbo_minmax_cache_hash, (bool (*)(const void *, const void *))vbo_minmax_cache_key_equal); if (!bufferObj->MinMaxCache) goto out; } entry = MALLOC_STRUCT(minmax_cache_entry); if (!entry) goto out; entry->key.offset = offset; entry->key.count = count; entry->key.index_size = index_size; entry->min = min; entry->max = max; hash = vbo_minmax_cache_hash(&entry->key); table_entry = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache, hash, &entry->key); if (table_entry) { /* It seems like this could happen when two contexts are rendering using * the same buffer object from multiple threads. */ _mesa_debug(ctx, "duplicate entry in minmax cache\n"); free(entry); goto out; } table_entry = _mesa_hash_table_insert_pre_hashed(bufferObj->MinMaxCache, hash, &entry->key, entry); if (!table_entry) free(entry); out: mtx_unlock(&bufferObj->Mutex); } /** * Compute min and max elements by scanning the index buffer for * glDraw[Range]Elements() calls. * If primitive restart is enabled, we need to ignore restart * indexes when computing min/max. */ static void vbo_get_minmax_index(struct gl_context *ctx, const struct _mesa_prim *prim, const struct _mesa_index_buffer *ib, GLuint *min_index, GLuint *max_index, const GLuint count) { const GLboolean restart = ctx->Array._PrimitiveRestart; const GLuint restartIndex = _mesa_primitive_restart_index(ctx, ib->index_size); const char *indices; GLuint i; GLintptr offset = 0; indices = (char *) ib->ptr + prim->start * ib->index_size; if (_mesa_is_bufferobj(ib->obj)) { GLsizeiptr size = MIN2(count * ib->index_size, ib->obj->Size); if (vbo_get_minmax_cached(ib->obj, ib->index_size, (GLintptr) indices, count, min_index, max_index)) return; offset = indices; indices = ctx->Driver.MapBufferRange(ctx, offset, size, GL_MAP_READ_BIT, ib->obj, MAP_INTERNAL); } switch (ib->index_size) { case 4: { const GLuint *ui_indices = (const GLuint *)indices; GLuint max_ui = 0; GLuint min_ui = ~0U; if (restart) { for (i = 0; i < count; i++) { if (ui_indices[i] != restartIndex) { if (ui_indices[i] > max_ui) max_ui = ui_indices[i]; if (ui_indices[i] < min_ui) min_ui = ui_indices[i]; } } } else { #if defined(USE_SSE41) if (cpu_has_sse4_1) { _mesa_uint_array_min_max(ui_indices, &min_ui, &max_ui, count); } else #endif for (i = 0; i < count; i++) { if (ui_indices[i] > max_ui) max_ui = ui_indices[i]; if (ui_indices[i] < min_ui) min_ui = ui_indices[i]; } } *min_index = min_ui; *max_index = max_ui; break; } case 2: { const GLushort *us_indices = (const GLushort *)indices; GLuint max_us = 0; GLuint min_us = ~0U; if (restart) { for (i = 0; i < count; i++) { if (us_indices[i] != restartIndex) { if (us_indices[i] > max_us) max_us = us_indices[i]; if (us_indices[i] < min_us) min_us = us_indices[i]; } } } else { for (i = 0; i < count; i++) { if (us_indices[i] > max_us) max_us = us_indices[i]; if (us_indices[i] < min_us) min_us = us_indices[i]; } } *min_index = min_us; *max_index = max_us; break; } case 1: { const GLubyte *ub_indices = (const GLubyte *)indices; GLuint max_ub = 0; GLuint min_ub = ~0U; if (restart) { for (i = 0; i < count; i++) { if (ub_indices[i] != restartIndex) { if (ub_indices[i] > max_ub) max_ub = ub_indices[i]; if (ub_indices[i] < min_ub) min_ub = ub_indices[i]; } } } else { for (i = 0; i < count; i++) { if (ub_indices[i] > max_ub) max_ub = ub_indices[i]; if (ub_indices[i] < min_ub) min_ub = ub_indices[i]; } } *min_index = min_ub; *max_index = max_ub; break; } default: unreachable("not reached"); } if (_mesa_is_bufferobj(ib->obj)) { vbo_minmax_cache_store(ctx, ib->obj, ib->index_size, offset, count, *min_index, *max_index); ctx->Driver.UnmapBuffer(ctx, ib->obj, MAP_INTERNAL); } } /** * Compute min and max elements for nr_prims */ void vbo_get_minmax_indices(struct gl_context *ctx, const struct _mesa_prim *prims, const struct _mesa_index_buffer *ib, GLuint *min_index, GLuint *max_index, GLuint nr_prims) { GLuint tmp_min, tmp_max; GLuint i; GLuint count; *min_index = ~0; *max_index = 0; for (i = 0; i < nr_prims; i++) { const struct _mesa_prim *start_prim; start_prim = &prims[i]; count = start_prim->count; /* Do combination if possible to reduce map/unmap count */ while ((i + 1 < nr_prims) && (prims[i].start + prims[i].count == prims[i+1].start)) { count += prims[i+1].count; i++; } vbo_get_minmax_index(ctx, start_prim, ib, &tmp_min, &tmp_max, count); *min_index = MIN2(*min_index, tmp_min); *max_index = MAX2(*max_index, tmp_max); } }