diff options
author | Eric Anholt <[email protected]> | 2016-11-10 17:16:04 -0800 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2016-11-12 19:21:46 -0800 |
commit | 67f72c5f5d8172be1bdb970e672202f0a47bac88 (patch) | |
tree | 2629a06edc3e1169ad414f13c04dfbaae64b61b0 /src/gallium/drivers/vc4/vc4_program.c | |
parent | e3c620e86856aba939d375764c5224cf3b356b17 (diff) |
vc4: Add THRSW nodes after each tex sample setup in multithreaded mode.
This is a suboptimal implementation, but Jonas Pfeil found that it was
still a massive performance gain.
Diffstat (limited to 'src/gallium/drivers/vc4/vc4_program.c')
-rw-r--r-- | src/gallium/drivers/vc4/vc4_program.c | 25 |
1 files changed, 25 insertions, 0 deletions
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index abd20a5940b..ad06d8558fe 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -65,6 +65,23 @@ resize_qreg_array(struct vc4_compile *c, (*regs)[i] = c->undef; } +static void +ntq_emit_thrsw(struct vc4_compile *c) +{ + if (!c->fs_threaded) + return; + + /* Always thread switch after each texture operation for now. + * + * We could do better by batching a bunch of texture fetches up and + * then doing one thread switch and collecting all their results + * afterward. + */ + qir_emit_nondef(c, qir_inst(QOP_THRSW, c->undef, + c->undef, c->undef)); + c->last_thrsw_at_top_level = (c->execute.file == QFILE_NULL); +} + static struct qreg indirect_uniform_load(struct vc4_compile *c, nir_intrinsic_instr *intr) { @@ -105,6 +122,9 @@ indirect_uniform_load(struct vc4_compile *c, nir_intrinsic_instr *intr) qir_TEX_DIRECT(c, indirect_offset, qir_uniform(c, QUNIFORM_UBO_ADDR, 0)); c->num_texture_samples++; + + ntq_emit_thrsw(c); + return qir_TEX_RESULT(c); } @@ -363,6 +383,8 @@ ntq_emit_txf(struct vc4_compile *c, nir_tex_instr *instr) qir_TEX_DIRECT(c, addr, qir_uniform(c, QUNIFORM_TEXTURE_MSAA_ADDR, unit)); + ntq_emit_thrsw(c); + struct qreg tex = qir_TEX_RESULT(c); c->num_texture_samples++; @@ -483,6 +505,9 @@ ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr) qir_TEX_S(c, s, texture_u[next_texture_u++]); c->num_texture_samples++; + + ntq_emit_thrsw(c); + struct qreg tex = qir_TEX_RESULT(c); enum pipe_format format = c->key->tex[unit].format; |