summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristian König <[email protected]>2017-08-29 16:45:46 +0200
committerChristian König <[email protected]>2017-08-31 14:55:38 +0200
commit214b565bc28bc4419f3eec29ab7bbe34080459fe (patch)
tree9c7a6c9a3d48eb377b6bf6cd7180dda9741b65c0
parent8b3a257851905ff444d981e52938cbf2b36ba830 (diff)
winsys/amdgpu: set AMDGPU_GEM_CREATE_VM_ALWAYS_VALID if possible v2
When the kernel supports it set the local flag and stop adding those BOs to the BO list. Can probably be optimized much more. v2: rename new flag to AMDGPU_GEM_CREATE_VM_ALWAYS_VALID Reviewed-by: Marek Olšák <[email protected]>
-rw-r--r--src/gallium/winsys/amdgpu/drm/amdgpu_bo.c8
-rw-r--r--src/gallium/winsys/amdgpu/drm/amdgpu_bo.h2
-rw-r--r--src/gallium/winsys/amdgpu/drm/amdgpu_cs.c22
3 files changed, 27 insertions, 5 deletions
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index 883a7c18e3e..897b4f05965 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -38,6 +38,10 @@
#include <stdio.h>
#include <inttypes.h>
+#ifndef AMDGPU_GEM_CREATE_VM_ALWAYS_VALID
+#define AMDGPU_GEM_CREATE_VM_ALWAYS_VALID (1 << 6)
+#endif
+
/* Set to 1 for verbose output showing committed sparse buffer ranges. */
#define DEBUG_SPARSE_COMMITS 0
@@ -406,6 +410,9 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
if (flags & RADEON_FLAG_GTT_WC)
request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+ if (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
+ ws->info.drm_minor >= 20)
+ request.flags |= AMDGPU_GEM_CREATE_VM_ALWAYS_VALID;
r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
if (r) {
@@ -439,6 +446,7 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
bo->u.real.va_handle = va_handle;
bo->initial_domain = initial_domain;
bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
+ bo->is_local = !!(request.flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID);
if (initial_domain & RADEON_DOMAIN_VRAM)
ws->allocated_vram += align64(size, ws->info.gart_page_size);
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
index 1311344b812..10b095d7a1c 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
@@ -115,6 +115,8 @@ struct amdgpu_winsys_bo {
unsigned num_fences;
unsigned max_fences;
struct pipe_fence_handle **fences;
+
+ bool is_local;
};
struct amdgpu_slab {
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index 5ddde8e7944..f68071abc8f 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -1121,6 +1121,8 @@ void amdgpu_cs_submit_ib(void *job, int thread_index)
free(handles);
mtx_unlock(&ws->global_bo_list_lock);
} else {
+ unsigned num_handles;
+
if (!amdgpu_add_sparse_backing_buffers(cs)) {
r = -ENOMEM;
goto bo_list_error;
@@ -1140,21 +1142,31 @@ void amdgpu_cs_submit_ib(void *job, int thread_index)
}
}
+ num_handles = 0;
for (i = 0; i < cs->num_real_buffers; ++i) {
struct amdgpu_cs_buffer *buffer = &cs->real_buffers[i];
+ if (buffer->bo->is_local)
+ continue;
+
assert(buffer->u.real.priority_usage != 0);
- cs->handles[i] = buffer->bo->bo;
- cs->flags[i] = (util_last_bit64(buffer->u.real.priority_usage) - 1) / 4;
+ cs->handles[num_handles] = buffer->bo->bo;
+ cs->flags[num_handles] = (util_last_bit64(buffer->u.real.priority_usage) - 1) / 4;
+ ++num_handles;
}
if (acs->ring_type == RING_GFX)
ws->gfx_bo_list_counter += cs->num_real_buffers;
- r = amdgpu_bo_list_create(ws->dev, cs->num_real_buffers,
- cs->handles, cs->flags,
- &cs->request.resources);
+ if (num_handles) {
+ r = amdgpu_bo_list_create(ws->dev, num_handles,
+ cs->handles, cs->flags,
+ &cs->request.resources);
+ } else {
+ r = 0;
+ cs->request.resources = 0;
+ }
}
bo_list_error: