summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/vc4
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2016-11-10 17:47:34 -0800
committerEric Anholt <[email protected]>2016-11-16 19:45:01 -0800
commit7f27ad55974d0bdac4c94a4523a4d42cc75334d5 (patch)
tree33ace14da8827227a06899a4e7f98a3aaabddd54 /src/gallium/drivers/vc4
parent45c022f2b06967196516f0616a9e4959ddcd14da (diff)
vc4: Try compiling our FSes in multithreaded mode on new kernels.
Multithreaded fragment shaders let us hide texturing latency by a hyperthreading-style switch to another fragment shader. This gets us up to 20% framerate improvements on glmark2 tests.
Diffstat (limited to 'src/gallium/drivers/vc4')
-rw-r--r--src/gallium/drivers/vc4/vc4_context.h3
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c15
-rw-r--r--src/gallium/drivers/vc4/vc4_screen.c2
-rw-r--r--src/gallium/drivers/vc4/vc4_screen.h1
-rw-r--r--src/gallium/drivers/vc4/vc4_simulator.c1
5 files changed, 20 insertions, 2 deletions
diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h
index e59b1d28287..2005ae0e4ea 100644
--- a/src/gallium/drivers/vc4/vc4_context.h
+++ b/src/gallium/drivers/vc4/vc4_context.h
@@ -42,6 +42,9 @@
#ifndef DRM_VC4_PARAM_SUPPORTS_ETC1
#define DRM_VC4_PARAM_SUPPORTS_ETC1 4
#endif
+#ifndef DRM_VC4_PARAM_SUPPORTS_THREADED_FS
+#define DRM_VC4_PARAM_SUPPORTS_THREADED_FS 5
+#endif
#ifdef USE_VC4_SIMULATOR
#define using_vc4_simulator true
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 3c30f8c477f..1191f1766fe 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -2158,7 +2158,7 @@ count_nir_instrs(nir_shader *nir)
static struct vc4_compile *
vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
- struct vc4_key *key)
+ struct vc4_key *key, bool fs_threaded)
{
struct vc4_compile *c = qir_compile_init();
@@ -2168,6 +2168,7 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
c->program_id = key->shader_state->program_id;
c->variant_id =
p_atomic_inc_return(&key->shader_state->compiled_variant_count);
+ c->fs_threaded = fs_threaded;
c->key = key;
switch (stage) {
@@ -2496,12 +2497,16 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage,
{
struct hash_table *ht;
uint32_t key_size;
+ bool try_threading;
+
if (stage == QSTAGE_FRAG) {
ht = vc4->fs_cache;
key_size = sizeof(struct vc4_fs_key);
+ try_threading = vc4->screen->has_threaded_fs;
} else {
ht = vc4->vs_cache;
key_size = sizeof(struct vc4_vs_key);
+ try_threading = false;
}
struct vc4_compiled_shader *shader;
@@ -2509,7 +2514,13 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage,
if (entry)
return entry->data;
- struct vc4_compile *c = vc4_shader_ntq(vc4, stage, key);
+ struct vc4_compile *c = vc4_shader_ntq(vc4, stage, key, try_threading);
+ /* If the FS failed to compile threaded, fall back to single threaded. */
+ if (try_threading && c->failed) {
+ qir_compile_destroy(c);
+ c = vc4_shader_ntq(vc4, stage, key, false);
+ }
+
shader = rzalloc(NULL, struct vc4_compiled_shader);
shader->program_id = vc4->next_compiled_program_id++;
diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c
index 9f852f0326d..97510b621d4 100644
--- a/src/gallium/drivers/vc4/vc4_screen.c
+++ b/src/gallium/drivers/vc4/vc4_screen.c
@@ -614,6 +614,8 @@ vc4_screen_create(int fd)
vc4_has_feature(screen, DRM_VC4_PARAM_SUPPORTS_BRANCHES);
screen->has_etc1 =
vc4_has_feature(screen, DRM_VC4_PARAM_SUPPORTS_ETC1);
+ screen->has_threaded_fs =
+ vc4_has_feature(screen, DRM_VC4_PARAM_SUPPORTS_THREADED_FS);
if (!vc4_get_chip_info(screen))
goto fail;
diff --git a/src/gallium/drivers/vc4/vc4_screen.h b/src/gallium/drivers/vc4/vc4_screen.h
index 572d62d6ac1..1f91ad37f88 100644
--- a/src/gallium/drivers/vc4/vc4_screen.h
+++ b/src/gallium/drivers/vc4/vc4_screen.h
@@ -90,6 +90,7 @@ struct vc4_screen {
uint32_t bo_count;
bool has_control_flow;
bool has_etc1;
+ bool has_threaded_fs;
struct vc4_simulator_file *sim_file;
};
diff --git a/src/gallium/drivers/vc4/vc4_simulator.c b/src/gallium/drivers/vc4/vc4_simulator.c
index 815898329b3..9565c49efb7 100644
--- a/src/gallium/drivers/vc4/vc4_simulator.c
+++ b/src/gallium/drivers/vc4/vc4_simulator.c
@@ -613,6 +613,7 @@ vc4_simulator_get_param_ioctl(int fd, struct drm_vc4_get_param *args)
switch (args->param) {
case DRM_VC4_PARAM_SUPPORTS_BRANCHES:
case DRM_VC4_PARAM_SUPPORTS_ETC1:
+ case DRM_VC4_PARAM_SUPPORTS_THREADED_FS:
args->value = true;
return 0;