aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2018-09-05 23:13:56 -0400
committerMarek Olšák <[email protected]>2018-09-07 16:03:36 -0400
commit25ffb8401638a07d774cfc68ab6afc7d27780dd8 (patch)
treeda1ddae42e57a2151158b6b4adae9d024a224a5b
parent8016639f636f4a0876fb63e508167eab26be9c69 (diff)
radeonsi: pin the winsys thread to the requested L3 cache (v2)
v2: rebase Reviewed-by: Brian Paul <[email protected]>
-rw-r--r--src/gallium/drivers/radeon/radeon_winsys.h8
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.c15
-rw-r--r--src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c10
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_winsys.c13
4 files changed, 46 insertions, 0 deletions
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h
index 99a793f9028..bb732ab314b 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -257,6 +257,14 @@ struct radeon_winsys {
void (*query_info)(struct radeon_winsys *ws,
struct radeon_info *info);
+ /**
+ * A hint for the winsys that it should pin its execution threads to
+ * a group of cores sharing a specific L3 cache if the CPU has multiple
+ * L3 caches. This is needed for good multithreading performance on
+ * AMD Zen CPUs.
+ */
+ void (*pin_threads_to_L3_cache)(struct radeon_winsys *ws, unsigned cache);
+
/**************************************************************************
* Buffer management. Buffer attributes are mostly fixed over its lifetime.
*
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index c259c260550..a5088adcf24 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -346,6 +346,20 @@ static void si_set_log_context(struct pipe_context *ctx,
u_log_add_auto_logger(log, si_auto_log_cs, sctx);
}
+static void si_set_context_param(struct pipe_context *ctx,
+ enum pipe_context_param param,
+ unsigned value)
+{
+ struct radeon_winsys *ws = ((struct si_context *)ctx)->ws;
+
+ switch (param) {
+ case PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE:
+ ws->pin_threads_to_L3_cache(ws, value);
+ break;
+ default:;
+ }
+}
+
static struct pipe_context *si_create_context(struct pipe_screen *screen,
unsigned flags)
{
@@ -366,6 +380,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
sctx->b.emit_string_marker = si_emit_string_marker;
sctx->b.set_debug_callback = si_set_debug_callback;
sctx->b.set_log_context = si_set_log_context;
+ sctx->b.set_context_param = si_set_context_param;
sctx->screen = sscreen; /* Easy accessing of screen/winsys. */
sctx->is_debug = (flags & PIPE_CONTEXT_DEBUG) != 0;
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
index dcbc075e3c5..f32bbd9d086 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
@@ -30,6 +30,7 @@
#include "amdgpu_cs.h"
#include "amdgpu_public.h"
+#include "util/u_cpu_detect.h"
#include "util/u_hash_table.h"
#include "util/hash_table.h"
#include "util/xmlconfig.h"
@@ -235,6 +236,14 @@ static const char* amdgpu_get_chip_name(struct radeon_winsys *ws)
return amdgpu_get_marketing_name(dev);
}
+static void amdgpu_pin_threads_to_L3_cache(struct radeon_winsys *rws,
+ unsigned cache)
+{
+ struct amdgpu_winsys *ws = (struct amdgpu_winsys*)rws;
+
+ util_pin_thread_to_L3(ws->cs_queue.threads[0], cache,
+ util_cpu_caps.cores_per_L3);
+}
PUBLIC struct radeon_winsys *
amdgpu_winsys_create(int fd, const struct pipe_screen_config *config,
@@ -314,6 +323,7 @@ amdgpu_winsys_create(int fd, const struct pipe_screen_config *config,
ws->base.query_value = amdgpu_query_value;
ws->base.read_registers = amdgpu_read_registers;
ws->base.get_chip_name = amdgpu_get_chip_name;
+ ws->base.pin_threads_to_L3_cache = amdgpu_pin_threads_to_L3_cache;
amdgpu_bo_init_functions(ws);
amdgpu_cs_init_functions(ws);
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index f8702e7c601..19472a50ce1 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -29,6 +29,7 @@
#include "radeon_drm_cs.h"
#include "radeon_drm_public.h"
+#include "util/u_cpu_detect.h"
#include "util/u_memory.h"
#include "util/u_hash_table.h"
@@ -797,6 +798,17 @@ static int handle_compare(void *key1, void *key2)
return PTR_TO_UINT(key1) != PTR_TO_UINT(key2);
}
+static void radeon_pin_threads_to_L3_cache(struct radeon_winsys *ws,
+ unsigned cache)
+{
+ struct radeon_drm_winsys *rws = (struct radeon_drm_winsys*)ws;
+
+ if (util_queue_is_initialized(&rws->cs_queue)) {
+ util_pin_thread_to_L3(rws->cs_queue.threads[0], cache,
+ util_cpu_caps.cores_per_L3);
+ }
+}
+
PUBLIC struct radeon_winsys *
radeon_drm_winsys_create(int fd, const struct pipe_screen_config *config,
radeon_screen_create_t screen_create)
@@ -864,6 +876,7 @@ radeon_drm_winsys_create(int fd, const struct pipe_screen_config *config,
ws->base.unref = radeon_winsys_unref;
ws->base.destroy = radeon_winsys_destroy;
ws->base.query_info = radeon_query_info;
+ ws->base.pin_threads_to_L3_cache = radeon_pin_threads_to_L3_cache;
ws->base.cs_request_feature = radeon_cs_request_feature;
ws->base.query_value = radeon_query_value;
ws->base.read_registers = radeon_read_registers;