summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2016-11-10 17:28:20 -0800
committerEric Anholt <[email protected]>2016-11-12 19:21:46 -0800
commitace0d810e56a1e2978fc3ac237158918ebe2a23c (patch)
tree7f82646ddac47d8c3bf232795d2d1cccda29ae7c /src/gallium
parent67f72c5f5d8172be1bdb970e672202f0a47bac88 (diff)
vc4: Flag the last thread switch in the program as the last.
We don't allow the last thread switch to be inside control flow, to be sure that we hit the last state exactly once. If the last texturing was in control flow, fall back to single threaded.
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c11
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.h5
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_emit.c18
3 files changed, 34 insertions, 0 deletions
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index ad06d8558fe..d2281ce6bd3 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -2285,6 +2285,17 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
switch (stage) {
case QSTAGE_FRAG:
+ /* FS threading requires that the thread execute
+ * QPU_SIG_LAST_THREAD_SWITCH exactly once before terminating
+ * (with no other THRSW afterwards, obviously). If we didn't
+ * fetch a texture at a top level block, this wouldn't be
+ * true.
+ */
+ if (c->fs_threaded && !c->last_thrsw_at_top_level) {
+ c->failed = true;
+ return c;
+ }
+
emit_frag_end(c);
break;
case QSTAGE_VERT:
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index 03ac1f50128..eebfdf047df 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -514,6 +514,9 @@ struct vc4_compile {
struct list_head qpu_inst_list;
+ /* Pre-QPU-scheduled instruction containing the last THRSW */
+ uint64_t *last_thrsw;
+
uint64_t *qpu_insts;
uint32_t qpu_inst_count;
uint32_t qpu_inst_size;
@@ -540,6 +543,8 @@ struct vc4_compile {
*/
bool fs_threaded;
+ bool last_thrsw_at_top_level;
+
bool failed;
};
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index cb936243c65..e2f04253855 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -504,6 +504,7 @@ vc4_generate_code_block(struct vc4_compile *c,
queue(block, qpu_NOP());
*last_inst(block) = qpu_set_sig(*last_inst(block),
QPU_SIG_THREAD_SWITCH);
+ c->last_thrsw = last_inst(block);
break;
case QOP_BRANCH:
@@ -591,6 +592,23 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
qir_for_each_block(block, c)
vc4_generate_code_block(c, block, temp_registers);
+ /* Switch the last SIG_THRSW instruction to SIG_LAST_THRSW.
+ *
+ * LAST_THRSW is a new signal in BCM2708B0 (including Raspberry Pi)
+ * that ensures that a later thread doesn't try to lock the scoreboard
+ * and terminate before an earlier-spawned thread on the same QPU, by
+ * delaying switching back to the later shader until earlier has
+ * finished. Otherwise, if the earlier thread was hitting the same
+ * quad, the scoreboard would deadlock.
+ */
+ if (c->last_thrsw) {
+ assert(QPU_GET_FIELD(*c->last_thrsw, QPU_SIG) ==
+ QPU_SIG_THREAD_SWITCH);
+ *c->last_thrsw = ((*c->last_thrsw & ~QPU_SIG_MASK) |
+ QPU_SET_FIELD(QPU_SIG_LAST_THREAD_SWITCH,
+ QPU_SIG));
+ }
+
uint32_t cycles = qpu_schedule_instructions(c);
uint32_t inst_count_at_schedule_time = c->qpu_inst_count;