diff options
author | Eric Anholt <[email protected]> | 2016-11-11 14:15:55 -0800 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2016-11-12 18:46:35 -0800 |
commit | 93cdae44defdcc0a758e2f5376226b9944e1c91e (patch) | |
tree | dc619c39d54ff86340b18b19e23364f12eaf3a7d /src/gallium/drivers | |
parent | 977d8b526b983c8d19df00af224033389f8ab7c8 (diff) |
vc4: Add a bit of QPU validation for threaded shaders.
These are both bugs we've run into along the way writing multithreaded FS
support.
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r-- | src/gallium/drivers/vc4/vc4_qpu_validate.c | 103 |
1 files changed, 102 insertions, 1 deletions
diff --git a/src/gallium/drivers/vc4/vc4_qpu_validate.c b/src/gallium/drivers/vc4/vc4_qpu_validate.c index 02fadaf6128..f18ba0364d0 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_validate.c +++ b/src/gallium/drivers/vc4/vc4_qpu_validate.c @@ -109,6 +109,7 @@ void vc4_qpu_validate(uint64_t *insts, uint32_t num_inst) { bool scoreboard_locked = false; + bool threaded = false; /* We don't want to do validation in release builds, but we want to * keep compiling the validation code to make sure it doesn't get @@ -120,11 +121,17 @@ vc4_qpu_validate(uint64_t *insts, uint32_t num_inst) for (int i = 0; i < num_inst; i++) { uint64_t inst = insts[i]; + uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); - if (QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_PROG_END) { + if (sig != QPU_SIG_PROG_END) { if (qpu_inst_is_tlb(inst)) scoreboard_locked = true; + if (sig == QPU_SIG_THREAD_SWITCH || + sig == QPU_SIG_LAST_THREAD_SWITCH) { + threaded = true; + } + continue; } @@ -359,4 +366,98 @@ vc4_qpu_validate(uint64_t *insts, uint32_t num_inst) waddr_mul == QPU_W_UNIFORMS_ADDRESS) last_unif_pointer_update = i; } + + if (threaded) { + bool last_thrsw_found = false; + bool scoreboard_locked = false; + int tex_samples_outstanding = 0; + int last_tex_samples_outstanding = 0; + int thrsw_ip = -1; + + for (int i = 0; i < num_inst; i++) { + uint64_t inst = insts[i]; + uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); + + if (i == thrsw_ip) { + /* In order to get texture results back in the + * correct order, before a new thrsw we have + * to read all the texture results from before + * the previous thrsw. + * + * FIXME: Is collecting the remaining results + * during the delay slots OK, or should we do + * this at THRSW signal time? + */ + if (last_tex_samples_outstanding != 0) { + fail_instr(inst, "THRSW with texture " + "results from the previous " + "THRSW still in the FIFO."); + } + + last_tex_samples_outstanding = + tex_samples_outstanding; + tex_samples_outstanding = 0; + } + + if (qpu_inst_is_tlb(inst)) + scoreboard_locked = true; + + switch (sig) { + case QPU_SIG_THREAD_SWITCH: + case QPU_SIG_LAST_THREAD_SWITCH: + /* No thread switching with the scoreboard + * locked. Doing so means we may deadlock + * when the other thread tries to lock + * scoreboard. + */ + if (scoreboard_locked) { + fail_instr(inst, "THRSW with the " + "scoreboard locked."); + } + + /* No thread switching after lthrsw, since + * lthrsw means that we get delayed until the + * other shader is ready for us to terminate. + */ + if (last_thrsw_found) { + fail_instr(inst, "THRSW after a " + "previous LTHRSW"); + } + + if (sig == QPU_SIG_LAST_THREAD_SWITCH) + last_thrsw_found = true; + + /* No THRSW while we already have a THRSW + * queued. + */ + if (i < thrsw_ip) { + fail_instr(inst, + "THRSW with a THRSW queued."); + } + + thrsw_ip = i + 3; + break; + + case QPU_SIG_LOAD_TMU0: + case QPU_SIG_LOAD_TMU1: + if (last_tex_samples_outstanding == 0) { + fail_instr(inst, "TMU load with nothing " + "in the results fifo from " + "the previous THRSW."); + } + + last_tex_samples_outstanding--; + break; + } + + uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); + uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); + if (waddr_add == QPU_W_TMU0_S || + waddr_add == QPU_W_TMU1_S || + waddr_mul == QPU_W_TMU0_S || + waddr_mul == QPU_W_TMU1_S) { + tex_samples_outstanding++; + } + } + } } |