summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorRafael Antognolli <[email protected]>2018-11-28 11:43:03 -0800
committerRafael Antognolli <[email protected]>2019-01-17 15:07:34 -0800
commit6a2d5ae305ad34bd26d033b7b564d21004147666 (patch)
treed44939864fce62e57cbce173b2daca8e7f9111c7 /src
parent27478ce00e3619e2d3e6809a6e953295e8d651de (diff)
anv/allocator: Add anv_state_table.
Add a structure to hold anv_states. This table will initially be used to recycle anv_states, instead of relying on a linked list implemented in GPU memory. Later it could be used so that all anv_states just point to the content of this struct, instead of making copies of anv_states everywhere. One has to call anv_state_table_add(), which returns an index for the state in the table, and then get a pointer to such index, and finally fill in the rest of the struct. TODO: 1) There's a lot of common code between this table backing store memory and the anv_block_pool buffer, due to how we grow it. I think it's possible to refactory this and reuse code on both places. 2) Add unit tests. v3: - Rename state table memfd (Jason) - Return VK_ERROR_OUT_OF_HOST_MEMORY on more places (Jason) - anv_state_table_grow returns VkResult (Jason) - Rename variables to be more informative (Jason) - Return errors on state table grow. - Rename anv_state_table_push/pop to anv_free_list_push2/pop2 This will be renamed again to remove the trailing "2" later. v4: - Remove exit(-1) from anv_state_table (Jason). - Use uint32_t "next" field in anv_free_entry (Jason). Reviewed-by: Jason Ekstrand <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/intel/vulkan/anv_allocator.c241
-rw-r--r--src/intel/vulkan/anv_private.h44
2 files changed, 283 insertions, 2 deletions
diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c
index fe26bcbeefd..04ef05f5fae 100644
--- a/src/intel/vulkan/anv_allocator.c
+++ b/src/intel/vulkan/anv_allocator.c
@@ -100,6 +100,9 @@
/* Allocations are always at least 64 byte aligned, so 1 is an invalid value.
* We use it to indicate the free list is empty. */
#define EMPTY 1
+#define EMPTY2 UINT32_MAX
+
+#define PAGE_SIZE 4096
struct anv_mmap_cleanup {
void *map;
@@ -130,6 +133,242 @@ round_to_power_of_two(uint32_t value)
return 1 << ilog2_round_up(value);
}
+struct anv_state_table_cleanup {
+ void *map;
+ size_t size;
+};
+
+#define ANV_STATE_TABLE_CLEANUP_INIT ((struct anv_state_table_cleanup){0})
+#define ANV_STATE_ENTRY_SIZE (sizeof(struct anv_free_entry))
+
+static VkResult
+anv_state_table_expand_range(struct anv_state_table *table, uint32_t size);
+
+VkResult
+anv_state_table_init(struct anv_state_table *table,
+ struct anv_device *device,
+ uint32_t initial_entries)
+{
+ VkResult result;
+
+ table->device = device;
+
+ table->fd = memfd_create("state table", MFD_CLOEXEC);
+ if (table->fd == -1)
+ return vk_error(VK_ERROR_INITIALIZATION_FAILED);
+
+ /* Just make it 2GB up-front. The Linux kernel won't actually back it
+ * with pages until we either map and fault on one of them or we use
+ * userptr and send a chunk of it off to the GPU.
+ */
+ if (ftruncate(table->fd, BLOCK_POOL_MEMFD_SIZE) == -1) {
+ result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
+ goto fail_fd;
+ }
+
+ if (!u_vector_init(&table->mmap_cleanups,
+ round_to_power_of_two(sizeof(struct anv_state_table_cleanup)),
+ 128)) {
+ result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
+ goto fail_fd;
+ }
+
+ table->state.next = 0;
+ table->state.end = 0;
+ table->size = 0;
+
+ uint32_t initial_size = initial_entries * ANV_STATE_ENTRY_SIZE;
+ result = anv_state_table_expand_range(table, initial_size);
+ if (result != VK_SUCCESS)
+ goto fail_mmap_cleanups;
+
+ return VK_SUCCESS;
+
+ fail_mmap_cleanups:
+ u_vector_finish(&table->mmap_cleanups);
+ fail_fd:
+ close(table->fd);
+
+ return result;
+}
+
+static VkResult
+anv_state_table_expand_range(struct anv_state_table *table, uint32_t size)
+{
+ void *map;
+ struct anv_mmap_cleanup *cleanup;
+
+ /* Assert that we only ever grow the pool */
+ assert(size >= table->state.end);
+
+ /* Make sure that we don't go outside the bounds of the memfd */
+ if (size > BLOCK_POOL_MEMFD_SIZE)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ cleanup = u_vector_add(&table->mmap_cleanups);
+ if (!cleanup)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ *cleanup = ANV_MMAP_CLEANUP_INIT;
+
+ /* Just leak the old map until we destroy the pool. We can't munmap it
+ * without races or imposing locking on the block allocate fast path. On
+ * the whole the leaked maps adds up to less than the size of the
+ * current map. MAP_POPULATE seems like the right thing to do, but we
+ * should try to get some numbers.
+ */
+ map = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, table->fd, 0);
+ if (map == MAP_FAILED) {
+ return vk_errorf(table->device->instance, table->device,
+ VK_ERROR_OUT_OF_HOST_MEMORY, "mmap failed: %m");
+ }
+
+ cleanup->map = map;
+ cleanup->size = size;
+
+ table->map = map;
+ table->size = size;
+
+ return VK_SUCCESS;
+}
+
+static VkResult
+anv_state_table_grow(struct anv_state_table *table)
+{
+ VkResult result = VK_SUCCESS;
+
+ uint32_t used = align_u32(table->state.next * ANV_STATE_ENTRY_SIZE,
+ PAGE_SIZE);
+ uint32_t old_size = table->size;
+
+ /* The block pool is always initialized to a nonzero size and this function
+ * is always called after initialization.
+ */
+ assert(old_size > 0);
+
+ uint32_t required = MAX2(used, old_size);
+ if (used * 2 <= required) {
+ /* If we're in this case then this isn't the firsta allocation and we
+ * already have enough space on both sides to hold double what we
+ * have allocated. There's nothing for us to do.
+ */
+ goto done;
+ }
+
+ uint32_t size = old_size * 2;
+ while (size < required)
+ size *= 2;
+
+ assert(size > table->size);
+
+ result = anv_state_table_expand_range(table, size);
+
+ done:
+ return result;
+}
+
+void
+anv_state_table_finish(struct anv_state_table *table)
+{
+ struct anv_state_table_cleanup *cleanup;
+
+ u_vector_foreach(cleanup, &table->mmap_cleanups) {
+ if (cleanup->map)
+ munmap(cleanup->map, cleanup->size);
+ }
+
+ u_vector_finish(&table->mmap_cleanups);
+
+ close(table->fd);
+}
+
+VkResult
+anv_state_table_add(struct anv_state_table *table, uint32_t *idx,
+ uint32_t count)
+{
+ struct anv_block_state state, old, new;
+ VkResult result;
+
+ assert(idx);
+
+ while(1) {
+ state.u64 = __sync_fetch_and_add(&table->state.u64, count);
+ if (state.next + count <= state.end) {
+ assert(table->map);
+ struct anv_free_entry *entry = &table->map[state.next];
+ for (int i = 0; i < count; i++) {
+ entry[i].state.idx = state.next + i;
+ }
+ *idx = state.next;
+ return VK_SUCCESS;
+ } else if (state.next <= state.end) {
+ /* We allocated the first block outside the pool so we have to grow
+ * the pool. pool_state->next acts a mutex: threads who try to
+ * allocate now will get block indexes above the current limit and
+ * hit futex_wait below.
+ */
+ new.next = state.next + count;
+ do {
+ result = anv_state_table_grow(table);
+ if (result != VK_SUCCESS)
+ return result;
+ new.end = table->size / ANV_STATE_ENTRY_SIZE;
+ } while (new.end < new.next);
+
+ old.u64 = __sync_lock_test_and_set(&table->state.u64, new.u64);
+ if (old.next != state.next)
+ futex_wake(&table->state.end, INT_MAX);
+ } else {
+ futex_wait(&table->state.end, state.end, NULL);
+ continue;
+ }
+ }
+}
+
+void
+anv_free_list_push2(union anv_free_list2 *list,
+ struct anv_state_table *table,
+ uint32_t first, uint32_t count)
+{
+ union anv_free_list2 current, old, new;
+ uint32_t last = first;
+
+ for (uint32_t i = 1; i < count; i++, last++)
+ table->map[last].next = last + 1;
+
+ old = *list;
+ do {
+ current = old;
+ table->map[last].next = current.offset;
+ new.offset = first;
+ new.count = current.count + 1;
+ old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64);
+ } while (old.u64 != current.u64);
+}
+
+struct anv_state *
+anv_free_list_pop2(union anv_free_list2 *list,
+ struct anv_state_table *table)
+{
+ union anv_free_list2 current, new, old;
+
+ current.u64 = list->u64;
+ while (current.offset != EMPTY2) {
+ __sync_synchronize();
+ new.offset = table->map[current.offset].next;
+ new.count = current.count + 1;
+ old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64);
+ if (old.u64 == current.u64) {
+ struct anv_free_entry *entry = &table->map[current.offset];
+ return &entry->state;
+ }
+ current = old;
+ }
+
+ return NULL;
+}
+
static bool
anv_free_list_pop(union anv_free_list *list, void **map, int32_t *offset)
{
@@ -311,8 +550,6 @@ anv_block_pool_finish(struct anv_block_pool *pool)
close(pool->fd);
}
-#define PAGE_SIZE 4096
-
static VkResult
anv_block_pool_expand_range(struct anv_block_pool *pool,
uint32_t center_bo_offset, uint32_t size)
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 617f4e0f152..d88cb54aaf7 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -616,7 +616,18 @@ union anv_free_list {
uint64_t u64;
};
+union anv_free_list2 {
+ struct {
+ uint32_t offset;
+
+ /* A simple count that is incremented every time the head changes. */
+ uint32_t count;
+ };
+ uint64_t u64;
+};
+
#define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { 1, 0 } })
+#define ANV_FREE_LIST2_EMPTY ((union anv_free_list2) { { UINT32_MAX, 0 } })
struct anv_block_state {
union {
@@ -688,6 +699,7 @@ struct anv_state {
int32_t offset;
uint32_t alloc_size;
void *map;
+ uint32_t idx;
};
#define ANV_STATE_NULL ((struct anv_state) { .alloc_size = 0 })
@@ -702,6 +714,20 @@ struct anv_fixed_size_state_pool {
#define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2 + 1)
+struct anv_free_entry {
+ uint32_t next;
+ struct anv_state state;
+};
+
+struct anv_state_table {
+ struct anv_device *device;
+ int fd;
+ struct anv_free_entry *map;
+ uint32_t size;
+ struct anv_block_state state;
+ struct u_vector mmap_cleanups;
+};
+
struct anv_state_pool {
struct anv_block_pool block_pool;
@@ -763,6 +789,24 @@ void anv_state_stream_finish(struct anv_state_stream *stream);
struct anv_state anv_state_stream_alloc(struct anv_state_stream *stream,
uint32_t size, uint32_t alignment);
+VkResult anv_state_table_init(struct anv_state_table *table,
+ struct anv_device *device,
+ uint32_t initial_entries);
+void anv_state_table_finish(struct anv_state_table *table);
+VkResult anv_state_table_add(struct anv_state_table *table, uint32_t *idx,
+ uint32_t count);
+void anv_free_list_push2(union anv_free_list2 *list,
+ struct anv_state_table *table,
+ uint32_t idx, uint32_t count);
+struct anv_state* anv_free_list_pop2(union anv_free_list2 *list,
+ struct anv_state_table *table);
+
+
+static inline struct anv_state *
+anv_state_table_get(struct anv_state_table *table, uint32_t idx)
+{
+ return &table->map[idx].state;
+}
/**
* Implements a pool of re-usable BOs. The interface is identical to that
* of block_pool except that each block is its own BO.