diff options
-rw-r--r-- | src/gallium/drivers/vc4/Makefile.sources | 1 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_program.c | 1 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_qir.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_reorder_uniforms.c | 84 |
4 files changed, 87 insertions, 0 deletions
diff --git a/src/gallium/drivers/vc4/Makefile.sources b/src/gallium/drivers/vc4/Makefile.sources index bcb4209d0d0..71a28a5da20 100644 --- a/src/gallium/drivers/vc4/Makefile.sources +++ b/src/gallium/drivers/vc4/Makefile.sources @@ -24,6 +24,7 @@ C_SOURCES := \ vc4_qpu.h \ vc4_qpu_validate.c \ vc4_register_allocate.c \ + vc4_reorder_uniforms.c \ vc4_resource.c \ vc4_resource.h \ vc4_screen.c \ diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 539fede50a1..c6603767b5e 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -1440,6 +1440,7 @@ vc4_shader_tgsi_to_qir(struct vc4_compiled_shader *shader, enum qstage stage, fprintf(stderr, "QIR:\n"); qir_dump(c); } + qir_reorder_uniforms(c); vc4_generate_code(c); if (vc4_debug & VC4_DEBUG_SHADERDB) { diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index f3dad5ed67c..2ab30496aad 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -253,6 +253,7 @@ struct qinst *qir_inst4(enum qop op, struct qreg dst, struct qreg c, struct qreg d); void qir_remove_instruction(struct qinst *qinst); +void qir_reorder_uniforms(struct vc4_compile *c); void qir_emit(struct vc4_compile *c, struct qinst *inst); struct qreg qir_get_temp(struct vc4_compile *c); int qir_get_op_nsrc(enum qop qop); diff --git a/src/gallium/drivers/vc4/vc4_reorder_uniforms.c b/src/gallium/drivers/vc4/vc4_reorder_uniforms.c new file mode 100644 index 00000000000..109724369d5 --- /dev/null +++ b/src/gallium/drivers/vc4/vc4_reorder_uniforms.c @@ -0,0 +1,84 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * @file vc4_reorder_uniforms.c + * + * After optimization has occurred, rewrites the shader to have uniform reads + * reading from the c->uniform_contents[] in order, exactly once each. + * + * This allows optimization and instruction scheduling to move things around + * without worrying about how the hardware has the "each uniform read bumps + * the uniform read address" property. + */ + +#include "util/ralloc.h" +#include "util/u_math.h" +#include "vc4_qir.h" + +void +qir_reorder_uniforms(struct vc4_compile *c) +{ + uint32_t *uniform_index = NULL; + uint32_t uniform_index_size = 0; + uint32_t next_uniform = 0; + struct simple_node *node; + foreach(node, &c->instructions) { + struct qinst *inst = (struct qinst *)node; + + for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { + if (inst->src[i].file != QFILE_UNIF) + continue; + + uint32_t new = next_uniform++; + if (uniform_index_size <= new) { + uniform_index_size = + MAX2(uniform_index_size * 2, 16); + uniform_index = + realloc(uniform_index, + uniform_index_size * + sizeof(uint32_t)); + } + + uniform_index[new] = inst->src[i].index; + inst->src[i].index = new; + } + } + + uint32_t *uniform_data = ralloc_array(c, uint32_t, next_uniform); + enum quniform_contents *uniform_contents = + ralloc_array(c, enum quniform_contents, next_uniform); + + for (uint32_t i = 0; i < next_uniform; i++) { + uniform_data[i] = c->uniform_data[uniform_index[i]]; + uniform_contents[i] = c->uniform_contents[uniform_index[i]]; + } + + ralloc_free(c->uniform_data); + c->uniform_data = uniform_data; + ralloc_free(c->uniform_contents); + c->uniform_contents = uniform_contents; + c->num_uniforms = next_uniform; + + free(uniform_index); +} |