summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
authorKenneth Graunke <[email protected]>2018-04-03 01:40:23 -0700
committerKenneth Graunke <[email protected]>2019-02-21 10:26:05 -0800
commite4aa8338c30de2f99de86bcfb1a3a39a21cea5e9 (patch)
tree82d0289f10cd3c0c1859e4bd609e6717d53876d5 /src/gallium/drivers
parent3693307670b543d65131ef5d7182c416a32a35c0 (diff)
iris: Soft-pin the universe
Breaks everything, woo!
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/iris/iris_batch.c4
-rw-r--r--src/gallium/drivers/iris/iris_bufmgr.c237
-rw-r--r--src/gallium/drivers/iris/iris_bufmgr.h43
-rw-r--r--src/gallium/drivers/iris/iris_resource.c17
-rw-r--r--src/gallium/drivers/iris/iris_screen.c3
-rw-r--r--src/gallium/drivers/iris/iris_screen.h2
6 files changed, 251 insertions, 55 deletions
diff --git a/src/gallium/drivers/iris/iris_batch.c b/src/gallium/drivers/iris/iris_batch.c
index 7a5c3df3a2e..72ede9f53da 100644
--- a/src/gallium/drivers/iris/iris_batch.c
+++ b/src/gallium/drivers/iris/iris_batch.c
@@ -95,7 +95,7 @@ create_batch_buffer(struct iris_bufmgr *bufmgr,
struct iris_batch_buffer *buf,
const char *name, unsigned size)
{
- buf->bo = iris_bo_alloc(bufmgr, name, size);
+ buf->bo = iris_bo_alloc(bufmgr, name, size, IRIS_MEMZONE_OTHER);
buf->bo->kflags |= EXEC_OBJECT_CAPTURE;
buf->map = iris_bo_map(NULL, buf->bo, MAP_READ | MAP_WRITE);
buf->map_next = buf->map;
@@ -299,7 +299,7 @@ grow_buffer(struct iris_batch *batch,
const unsigned existing_bytes = buffer_bytes_used(buf);
struct iris_bo *new_bo =
- iris_bo_alloc(bufmgr, bo->name, new_size);
+ iris_bo_alloc(bufmgr, bo->name, new_size, IRIS_MEMZONE_OTHER);
buf->map = iris_bo_map(NULL, new_bo, MAP_READ | MAP_WRITE);
buf->map_next = buf->map + existing_bytes;
diff --git a/src/gallium/drivers/iris/iris_bufmgr.c b/src/gallium/drivers/iris/iris_bufmgr.c
index 9bc101ee5ab..80e620b19e2 100644
--- a/src/gallium/drivers/iris/iris_bufmgr.c
+++ b/src/gallium/drivers/iris/iris_bufmgr.c
@@ -51,6 +51,8 @@
#include "util/macros.h"
#include "util/hash_table.h"
#include "util/list.h"
+#include "util/u_dynarray.h"
+#include "util/vma.h"
#include "iris_bufmgr.h"
#include "iris_context.h"
#include "string.h"
@@ -93,8 +95,6 @@ drm_ioctl(int fd, unsigned long request, void *arg)
return ret;
}
-
-
static inline int
atomic_add_unless(int *v, int add, int unless)
{
@@ -105,9 +105,37 @@ atomic_add_unless(int *v, int add, int unless)
return c == unless;
}
+/*
+ * Idea:
+ *
+ * Have a bitmap-allocator for each BO cache bucket size. Because bo_alloc
+ * rounds up allocations to the bucket size anyway, we can make 1 bit in the
+ * bitmap represent N pages of memory, where N = <bucket size / page size>.
+ * Allocations and frees always set/unset a single bit. Because ffsll only
+ * works on uint64_t, use a tree(?) of those.
+ *
+ * Nodes contain a starting address and a uint64_t bitmap. (pair-of-uint64_t)
+ * Bitmap uses 1 for a free block, 0 for in-use.
+ *
+ * Bucket contains...
+ *
+ * Dynamic array of nodes. (pointer, two ints)
+ */
+
+struct vma_bucket_node {
+ uint64_t start_address;
+ uint64_t bitmap;
+};
+
struct bo_cache_bucket {
+ /** List of cached BOs. */
struct list_head head;
+
+ /** Size of this bucket, in bytes. */
uint64_t size;
+
+ /** List of vma_bucket_nodes */
+ struct util_dynarray vma_list[IRIS_MEMZONE_COUNT];
};
struct iris_bufmgr {
@@ -123,6 +151,8 @@ struct iris_bufmgr {
struct hash_table *name_table;
struct hash_table *handle_table;
+ struct util_vma_heap vma_allocator[IRIS_MEMZONE_COUNT];
+
bool has_llc:1;
bool bo_reuse:1;
};
@@ -132,6 +162,10 @@ static int bo_set_tiling_internal(struct iris_bo *bo, uint32_t tiling_mode,
static void bo_free(struct iris_bo *bo);
+static uint64_t vma_alloc(struct iris_bufmgr *bufmgr,
+ enum iris_memory_zone memzone,
+ uint64_t size, uint64_t alignment);
+
static uint32_t
key_hash_uint(const void *key)
{
@@ -191,6 +225,141 @@ bucket_for_size(struct iris_bufmgr *bufmgr, uint64_t size)
&bufmgr->cache_bucket[index] : NULL;
}
+static enum iris_memory_zone
+memzone_for_address(uint64_t address)
+{
+ const uint64_t _4GB = 1ull << 32;
+
+ if (address >= 3 * _4GB)
+ return IRIS_MEMZONE_OTHER;
+
+ if (address >= 2 * _4GB)
+ return IRIS_MEMZONE_DYNAMIC;
+
+ if (address >= 1 * _4GB)
+ return IRIS_MEMZONE_SURFACE;
+
+ return IRIS_MEMZONE_SHADER;
+}
+
+static uint64_t
+bucket_vma_alloc(struct iris_bufmgr *bufmgr,
+ struct bo_cache_bucket *bucket,
+ enum iris_memory_zone memzone)
+{
+ struct util_dynarray *vma_list = &bucket->vma_list[memzone];
+ struct vma_bucket_node *node;
+
+ if (vma_list->size == 0) {
+ /* This bucket allocator is out of space - allocate a new block of
+ * memory from a larger allocator (either another bucket or util_vma).
+ *
+ * Set the first bit used, and return the start address.
+ */
+ node = util_dynarray_grow(vma_list, sizeof(struct vma_bucket_node));
+ node->start_address =
+ vma_alloc(bufmgr, memzone, 64ull * bucket->size, bucket->size);
+ node->bitmap = ~1ull;
+ return node->start_address;
+ }
+
+ /* Pick any bit from any node - they're all the right size and free. */
+ node = util_dynarray_top_ptr(vma_list, struct vma_bucket_node);
+ int bit = ffsll(node->bitmap) - 1;
+ assert(bit != -1);
+
+ /* Reserve the memory by clearing the bit. */
+ node->bitmap &= ~(1ull << bit);
+
+ /* If this node is now completely full, remove it from the free list. */
+ if (node->bitmap == 0ull) {
+ (void) util_dynarray_pop(vma_list, struct vma_bucket_node);
+ }
+
+ return node->start_address + bit * bucket->size;
+}
+
+static void
+bucket_vma_free(struct bo_cache_bucket *bucket,
+ uint64_t address,
+ uint64_t size)
+{
+ enum iris_memory_zone memzone = memzone_for_address(address);
+ struct util_dynarray *vma_list = &bucket->vma_list[memzone];
+ const uint64_t node_bytes = 64ull * bucket->size;
+ struct vma_bucket_node *node = NULL;
+
+ uint64_t start = (address / node_bytes) * node_bytes;
+ int bit = (address - start) / bucket->size;
+
+ util_dynarray_foreach(vma_list, struct vma_bucket_node, cur) {
+ if (cur->start_address == start) {
+ node = cur;
+ break;
+ }
+ }
+
+ if (!node) {
+ node = util_dynarray_grow(vma_list, sizeof(struct vma_bucket_node));
+ node->start_address = start;
+ node->bitmap = 0ull;
+ }
+
+ node->bitmap |= 1ull << bit;
+
+ /* The block might be entirely free now, and if so, we could return it
+ * to the larger allocator. But we may as well hang on to it, in case
+ * we get more allocations at this block size.
+ */
+}
+
+static struct bo_cache_bucket *
+get_bucket_allocator(struct iris_bufmgr *bufmgr, uint64_t size)
+{
+ /* Skip using the bucket allocator for very large sizes, as it allocates
+ * 64 of them and this can balloon rather quickly.
+ */
+ if (size > 1024 * PAGE_SIZE)
+ return NULL;
+
+ struct bo_cache_bucket *bucket = bucket_for_size(bufmgr, size);
+
+ if (bucket && bucket->size == size)
+ return bucket;
+
+ return NULL;
+}
+
+static uint64_t
+vma_alloc(struct iris_bufmgr *bufmgr,
+ enum iris_memory_zone memzone,
+ uint64_t size,
+ uint64_t alignment)
+{
+ struct bo_cache_bucket *bucket = get_bucket_allocator(bufmgr, size);
+
+ if (bucket)
+ return bucket_vma_alloc(bufmgr, bucket, memzone);
+
+ return util_vma_heap_alloc(&bufmgr->vma_allocator[memzone], size,
+ alignment);
+}
+
+static void
+vma_free(struct iris_bufmgr *bufmgr,
+ uint64_t address,
+ uint64_t size)
+{
+ struct bo_cache_bucket *bucket = get_bucket_allocator(bufmgr, size);
+
+ if (bucket) {
+ bucket_vma_free(bucket, address, size);
+ } else {
+ enum iris_memory_zone memzone = memzone_for_address(address);
+ util_vma_heap_free(&bufmgr->vma_allocator[memzone], address, size);
+ }
+}
+
int
iris_bo_busy(struct iris_bo *bo)
{
@@ -237,6 +406,7 @@ static struct iris_bo *
bo_alloc_internal(struct iris_bufmgr *bufmgr,
const char *name,
uint64_t size,
+ enum iris_memory_zone memzone,
unsigned flags,
uint32_t tiling_mode,
uint32_t stride)
@@ -303,7 +473,15 @@ retry:
}
}
- if (!alloc_from_cache) {
+ if (alloc_from_cache) {
+ /* If the cached BO isn't in the right memory zone, free the old
+ * memory and assign it a new address.
+ */
+ if (memzone != memzone_for_address(bo->gtt_offset)) {
+ vma_free(bufmgr, bo->gtt_offset, size);
+ bo->gtt_offset = 0;
+ }
+ } else {
bo = calloc(1, sizeof(*bo));
if (!bo)
goto err;
@@ -325,6 +503,7 @@ retry:
bo->gem_handle = create.handle;
bo->bufmgr = bufmgr;
+ bo->kflags = EXEC_OBJECT_PINNED;
bo->tiling_mode = I915_TILING_NONE;
bo->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
@@ -347,6 +526,13 @@ retry:
goto err_free;
}
+ if (bo->gtt_offset == 0ull) {
+ bo->gtt_offset = vma_alloc(bufmgr, memzone, bo->size, 1);
+
+ if (bo->gtt_offset == 0ull)
+ goto err_free;
+ }
+
bo->name = name;
p_atomic_set(&bo->refcount, 1);
bo->reusable = true;
@@ -370,17 +556,20 @@ err:
struct iris_bo *
iris_bo_alloc(struct iris_bufmgr *bufmgr,
const char *name,
- uint64_t size)
+ uint64_t size,
+ enum iris_memory_zone memzone)
{
- return bo_alloc_internal(bufmgr, name, size, 0, I915_TILING_NONE, 0);
+ return bo_alloc_internal(bufmgr, name, size, memzone,
+ 0, I915_TILING_NONE, 0);
}
struct iris_bo *
iris_bo_alloc_tiled(struct iris_bufmgr *bufmgr, const char *name,
- uint64_t size, uint32_t tiling_mode, uint32_t pitch,
- unsigned flags)
+ uint64_t size, enum iris_memory_zone memzone,
+ uint32_t tiling_mode, uint32_t pitch, unsigned flags)
{
- return bo_alloc_internal(bufmgr, name, size, flags, tiling_mode, pitch);
+ return bo_alloc_internal(bufmgr, name, size, memzone,
+ flags, tiling_mode, pitch);
}
/**
@@ -435,11 +624,13 @@ iris_bo_gem_create_from_name(struct iris_bufmgr *bufmgr,
bo->size = open_arg.size;
bo->gtt_offset = 0;
bo->bufmgr = bufmgr;
+ bo->kflags = EXEC_OBJECT_PINNED;
bo->gem_handle = open_arg.handle;
bo->name = name;
bo->global_name = handle;
bo->reusable = false;
bo->external = true;
+ bo->gtt_offset = vma_alloc(bufmgr, IRIS_MEMZONE_OTHER, bo->size, 1);
_mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
_mesa_hash_table_insert(bufmgr->name_table, &bo->global_name, bo);
@@ -494,6 +685,8 @@ bo_free(struct iris_bo *bo)
_mesa_hash_table_remove(bufmgr->handle_table, entry);
}
+ vma_free(bo->bufmgr, bo->gtt_offset, bo->size);
+
/* Close this object */
struct drm_gem_close close = { .handle = bo->gem_handle };
int ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_GEM_CLOSE, &close);
@@ -542,9 +735,7 @@ bo_unreference_final(struct iris_bo *bo, time_t time)
if (bufmgr->bo_reuse && bo->reusable && bucket != NULL &&
iris_bo_madvise(bo, I915_MADV_DONTNEED)) {
bo->free_time = time;
-
bo->name = NULL;
- bo->kflags = 0;
list_addtail(&bo->head, &bucket->head);
} else {
@@ -960,6 +1151,9 @@ iris_bufmgr_destroy(struct iris_bufmgr *bufmgr)
bo_free(bo);
}
+
+ for (int i = 0; i < IRIS_MEMZONE_COUNT; i++)
+ util_dynarray_fini(&bucket->vma_list[i]);
}
_mesa_hash_table_destroy(bufmgr->name_table, NULL);
@@ -1052,6 +1246,7 @@ iris_bo_import_dmabuf(struct iris_bufmgr *bufmgr, int prime_fd)
bo->size = ret;
bo->bufmgr = bufmgr;
+ bo->kflags = EXEC_OBJECT_PINNED;
bo->gem_handle = handle;
_mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
@@ -1059,6 +1254,7 @@ iris_bo_import_dmabuf(struct iris_bufmgr *bufmgr, int prime_fd)
bo->name = "prime";
bo->reusable = false;
bo->external = true;
+ bo->gtt_offset = vma_alloc(bufmgr, IRIS_MEMZONE_OTHER, bo->size, 1);
struct drm_i915_gem_get_tiling get_tiling = { .handle = bo->gem_handle };
if (drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling))
@@ -1164,6 +1360,8 @@ add_bucket(struct iris_bufmgr *bufmgr, int size)
assert(i < ARRAY_SIZE(bufmgr->cache_bucket));
list_inithead(&bufmgr->cache_bucket[i].head);
+ for (int i = 0; i < IRIS_MEMZONE_COUNT; i++)
+ util_dynarray_init(&bufmgr->cache_bucket[i].vma_list[i], NULL);
bufmgr->cache_bucket[i].size = size;
bufmgr->num_buckets++;
@@ -1185,12 +1383,12 @@ init_cache_buckets(struct iris_bufmgr *bufmgr)
* width/height alignment and rounding of sizes to pages will
* get us useful cache hit rates anyway)
*/
- add_bucket(bufmgr, 4096);
- add_bucket(bufmgr, 4096 * 2);
- add_bucket(bufmgr, 4096 * 3);
+ add_bucket(bufmgr, PAGE_SIZE);
+ add_bucket(bufmgr, PAGE_SIZE * 2);
+ add_bucket(bufmgr, PAGE_SIZE * 3);
/* Initialize the linked lists for BO reuse cache. */
- for (size = 4 * 4096; size <= cache_max_size; size *= 2) {
+ for (size = 4 * PAGE_SIZE; size <= cache_max_size; size *= 2) {
add_bucket(bufmgr, size);
add_bucket(bufmgr, size + size * 1 / 4);
@@ -1284,6 +1482,17 @@ iris_bufmgr_init(struct gen_device_info *devinfo, int fd)
bufmgr->has_llc = devinfo->has_llc;
+ const uint64_t _4GB = 1ull << 32;
+
+ util_vma_heap_init(&bufmgr->vma_allocator[IRIS_MEMZONE_SHADER],
+ PAGE_SIZE, _4GB);
+ util_vma_heap_init(&bufmgr->vma_allocator[IRIS_MEMZONE_SURFACE],
+ 1 * _4GB, _4GB);
+ util_vma_heap_init(&bufmgr->vma_allocator[IRIS_MEMZONE_DYNAMIC],
+ 2 * _4GB, _4GB);
+ util_vma_heap_init(&bufmgr->vma_allocator[IRIS_MEMZONE_OTHER],
+ 3 * _4GB, (1ull << 48) - 3 * _4GB);
+
init_cache_buckets(bufmgr);
bufmgr->name_table =
diff --git a/src/gallium/drivers/iris/iris_bufmgr.h b/src/gallium/drivers/iris/iris_bufmgr.h
index fa4df2a53df..3c52c2d8722 100644
--- a/src/gallium/drivers/iris/iris_bufmgr.h
+++ b/src/gallium/drivers/iris/iris_bufmgr.h
@@ -35,6 +35,15 @@
struct gen_device_info;
struct pipe_debug_callback;
+enum iris_memory_zone {
+ IRIS_MEMZONE_DYNAMIC,
+ IRIS_MEMZONE_SURFACE,
+ IRIS_MEMZONE_SHADER,
+ IRIS_MEMZONE_OTHER,
+};
+
+#define IRIS_MEMZONE_COUNT (IRIS_MEMZONE_OTHER + 1)
+
struct iris_bo {
/**
* Size in bytes of the buffer object.
@@ -51,31 +60,11 @@ struct iris_bo {
uint32_t gem_handle;
/**
- * Offset of the buffer inside the Graphics Translation Table.
- *
- * This is effectively our GPU address for the buffer and we use it
- * as our base for all state pointers into the buffer. However, since the
- * kernel may be forced to move it around during the course of the
- * buffer's lifetime, we can only know where the buffer was on the last
- * execbuf. We presume, and are usually right, that the buffer will not
- * move and so we use that last offset for the next batch and by doing
- * so we can avoid having the kernel perform a relocation fixup pass as
- * our pointers inside the batch will be using the correct base offset.
- *
- * Since we do use it as a base address for the next batch of pointers,
- * the kernel treats our offset as a request, and if possible will
- * arrange the buffer to placed at that address (trying to balance
- * the cost of buffer migration versus the cost of performing
- * relocations). Furthermore, we can force the kernel to place the buffer,
- * or report a failure if we specified a conflicting offset, at our chosen
- * offset by specifying EXEC_OBJECT_PINNED.
+ * Virtual address of the buffer inside the PPGTT (Per-Process Graphics
+ * Translation Table).
*
- * Note the GTT may be either per context, or shared globally across the
- * system. On a shared system, our buffers have to contend for address
- * space with both aperture mappings and framebuffers and so are more
- * likely to be moved. On a full ppGTT system, each batch exists in its
- * own GTT, and so each buffer may have their own offset within each
- * context.
+ * Although each hardware context has its own VMA, we assign BO's to the
+ * same address in all contexts, for simplicity.
*/
uint64_t gtt_offset;
@@ -156,7 +145,8 @@ struct iris_bo {
*/
struct iris_bo *iris_bo_alloc(struct iris_bufmgr *bufmgr,
const char *name,
- uint64_t size);
+ uint64_t size,
+ enum iris_memory_zone memzone);
/**
* Allocate a tiled buffer object.
@@ -174,7 +164,8 @@ struct iris_bo *iris_bo_alloc_tiled(struct iris_bufmgr *bufmgr,
uint64_t size,
uint32_t tiling_mode,
uint32_t pitch,
- unsigned flags);
+ unsigned flags,
+ enum iris_memory_zone memzone);
/** Takes a reference on a buffer object */
static inline void
diff --git a/src/gallium/drivers/iris/iris_resource.c b/src/gallium/drivers/iris/iris_resource.c
index 93855741f84..92c9b038a3d 100644
--- a/src/gallium/drivers/iris/iris_resource.c
+++ b/src/gallium/drivers/iris/iris_resource.c
@@ -256,19 +256,20 @@ iris_resource_create_with_modifiers(struct pipe_screen *pscreen,
.usage = usage,
.tiling_flags = 1 << mod_info->tiling);
- res->bo = iris_bo_alloc_tiled(screen->bufmgr, "resource", res->surf.size_B,
+ enum iris_memory_zone memzone = IRIS_MEMZONE_OTHER;
+ const char *name = "resource";
+ if (templ->flags & IRIS_RESOURCE_FLAG_INSTRUCTION_CACHE) {
+ memzone = IRIS_MEMZONE_SHADER;
+ name = "shader kernels";
+ }
+
+ res->bo = iris_bo_alloc_tiled(screen->bufmgr, name, res->surf.size_B,
+ IRIS_MEMZONE_OTHER,
isl_tiling_to_i915_tiling(res->surf.tiling),
res->surf.row_pitch_B, 0);
if (!res->bo)
goto fail;
- if (templ->flags & IRIS_RESOURCE_FLAG_INSTRUCTION_CACHE) {
- res->bo->kflags = EXEC_OBJECT_PINNED;
- res->bo->name = "instruction cache";
- // XXX: p_atomic_add is backwards :(
- res->bo->gtt_offset = __atomic_fetch_add(&screen->next_instruction_address, res->bo->size, __ATOMIC_ACQ_REL);
- }
-
return &res->base;
fail:
diff --git a/src/gallium/drivers/iris/iris_screen.c b/src/gallium/drivers/iris/iris_screen.c
index d6d125bb95b..79dba4c507c 100644
--- a/src/gallium/drivers/iris/iris_screen.c
+++ b/src/gallium/drivers/iris/iris_screen.c
@@ -526,8 +526,5 @@ iris_screen_create(int fd)
pscreen->fence_finish = iris_fence_finish;
pscreen->query_memory_info = iris_query_memory_info;
- /* Put them somewhere non-zero */
- screen->next_instruction_address = 128 * 1024;
-
return pscreen;
}
diff --git a/src/gallium/drivers/iris/iris_screen.h b/src/gallium/drivers/iris/iris_screen.h
index f24f567532f..5484e535ac9 100644
--- a/src/gallium/drivers/iris/iris_screen.h
+++ b/src/gallium/drivers/iris/iris_screen.h
@@ -47,8 +47,6 @@ struct iris_screen {
struct isl_device isl_dev;
struct iris_bufmgr *bufmgr;
struct brw_compiler *compiler;
-
- uint32_t next_instruction_address;
};
struct pipe_screen *iris_screen_create(int fd);