summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/vc4/Makefile.sources1
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c1
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.c1
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.h12
-rw-r--r--src/gallium/drivers/vc4/vc4_qir_emit_uniform_stream_resets.c101
-rw-r--r--src/gallium/drivers/vc4/vc4_qir_schedule.c16
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_emit.c8
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_schedule.c21
-rw-r--r--src/gallium/drivers/vc4/vc4_uniforms.c6
9 files changed, 167 insertions, 0 deletions
diff --git a/src/gallium/drivers/vc4/Makefile.sources b/src/gallium/drivers/vc4/Makefile.sources
index 76e46f54089..76e52ce142d 100644
--- a/src/gallium/drivers/vc4/Makefile.sources
+++ b/src/gallium/drivers/vc4/Makefile.sources
@@ -31,6 +31,7 @@ C_SOURCES := \
vc4_opt_vpm.c \
vc4_program.c \
vc4_qir.c \
+ vc4_qir_emit_uniform_stream_resets.c \
vc4_qir_live_variables.c \
vc4_qir_lower_uniforms.c \
vc4_qir_schedule.c \
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 465e052053e..521f971272a 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -2114,6 +2114,7 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
qir_lower_uniforms(c);
qir_schedule_instructions(c);
+ qir_emit_uniform_stream_resets(c);
if (vc4_debug & VC4_DEBUG_QIR) {
fprintf(stderr, "%s prog %d/%d QIR:\n",
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index e1d663dd3a7..9ff15611ef9 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -85,6 +85,7 @@ static const struct qir_op_info qir_op_info[] = {
[QOP_LOAD_IMM] = { "load_imm", 0, 1 },
[QOP_BRANCH] = { "branch", 0, 0, true },
+ [QOP_UNIFORMS_RESET] = { "uniforms_reset", 0, 2, true },
};
static const char *
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index e7ddfaa1fcb..88eda225d80 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -162,6 +162,12 @@ enum qop {
* that block->successor[1] may be unset if the condition is ALWAYS.
*/
QOP_BRANCH,
+
+ /* Emits an ADD from src[0] to src[1], where src[0] must be a
+ * QOP_LOAD_IMM result and src[1] is a QUNIFORM_UNIFORMS_ADDRESS,
+ * required by the kernel as part of its branch validation.
+ */
+ QOP_UNIFORMS_RESET,
};
struct queued_qpu_inst {
@@ -260,6 +266,11 @@ enum quniform_contents {
QUNIFORM_ALPHA_REF,
QUNIFORM_SAMPLE_MASK,
+
+ /* Placeholder uniform that will be updated by the kernel when used by
+ * an instruction writing to QPU_W_UNIFORMS_ADDRESS.
+ */
+ QUNIFORM_UNIFORMS_ADDRESS,
};
struct vc4_varying_slot {
@@ -521,6 +532,7 @@ struct qreg qir_uniform(struct vc4_compile *c,
uint32_t data);
void qir_schedule_instructions(struct vc4_compile *c);
void qir_reorder_uniforms(struct vc4_compile *c);
+void qir_emit_uniform_stream_resets(struct vc4_compile *c);
struct qreg qir_emit_def(struct vc4_compile *c, struct qinst *inst);
struct qinst *qir_emit_nondef(struct vc4_compile *c, struct qinst *inst);
diff --git a/src/gallium/drivers/vc4/vc4_qir_emit_uniform_stream_resets.c b/src/gallium/drivers/vc4/vc4_qir_emit_uniform_stream_resets.c
new file mode 100644
index 00000000000..3fd6358e3d3
--- /dev/null
+++ b/src/gallium/drivers/vc4/vc4_qir_emit_uniform_stream_resets.c
@@ -0,0 +1,101 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * @file vc4_qir_emit_uniform_stream_resets.c
+ *
+ * Adds updates to the uniform stream address at the start of each basic block
+ * that uses uniforms.
+ *
+ * This will be done just before the translation to QPU instructions, once we
+ * have performed optimization know how many uniforms are used in each block.
+ */
+
+#include "vc4_qir.h"
+#include "util/hash_table.h"
+#include "util/u_math.h"
+
+static bool
+inst_reads_a_uniform(struct qinst *inst)
+{
+ if (qir_is_tex(inst))
+ return true;
+
+ for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
+ if (inst->src[i].file == QFILE_UNIF)
+ return true;
+ }
+
+ return false;
+}
+
+static bool
+block_reads_any_uniform(struct qblock *block)
+{
+ qir_for_each_inst(inst, block) {
+ if (inst_reads_a_uniform(inst))
+ return true;
+ }
+
+ return false;
+}
+
+void
+qir_emit_uniform_stream_resets(struct vc4_compile *c)
+{
+ uint32_t uniform_count = 0;
+
+ qir_for_each_block(block, c) {
+ if (block != qir_entry_block(c) &&
+ (block_reads_any_uniform(block) ||
+ block == qir_exit_block(c))) {
+ struct qreg t = qir_get_temp(c);
+ struct qreg uni_addr =
+ qir_uniform(c, QUNIFORM_UNIFORMS_ADDRESS, 0);
+
+ /* Load the offset of the next uniform in the stream
+ * after the one we're generating here.
+ */
+ struct qinst *load_imm =
+ qir_inst(QOP_LOAD_IMM,
+ t,
+ qir_reg(QFILE_LOAD_IMM,
+ (uniform_count + 1) * 4),
+ c->undef);
+ struct qinst *add =
+ qir_inst(QOP_UNIFORMS_RESET, c->undef,
+ t, uni_addr);
+
+ /* Pushes to the top of the block, so in reverse
+ * order.
+ */
+ list_add(&add->link, &block->instructions);
+ list_add(&load_imm->link, &block->instructions);
+ }
+
+ qir_for_each_inst(inst, block) {
+ if (inst_reads_a_uniform(inst))
+ uniform_count++;
+ }
+ }
+}
diff --git a/src/gallium/drivers/vc4/vc4_qir_schedule.c b/src/gallium/drivers/vc4/vc4_qir_schedule.c
index 903c6108824..69bd0dd623e 100644
--- a/src/gallium/drivers/vc4/vc4_qir_schedule.c
+++ b/src/gallium/drivers/vc4/vc4_qir_schedule.c
@@ -138,6 +138,7 @@ struct schedule_setup_state {
struct schedule_node *last_tex_coord;
struct schedule_node *last_tex_result;
struct schedule_node *last_tlb;
+ struct schedule_node *last_uniforms_reset;
enum direction dir;
/**
@@ -280,6 +281,16 @@ calculate_forward_deps(struct vc4_compile *c, void *mem_ctx,
calculate_deps(&state, n);
+ for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
+ switch (inst->src[i].file) {
+ case QFILE_UNIF:
+ add_dep(state.dir, state.last_uniforms_reset, n);
+ break;
+ default:
+ break;
+ }
+ }
+
switch (inst->op) {
case QOP_TEX_S:
case QOP_TEX_T:
@@ -324,6 +335,11 @@ calculate_forward_deps(struct vc4_compile *c, void *mem_ctx,
memset(&state.tex_fifo[state.tex_fifo_pos], 0,
sizeof(state.tex_fifo[0]));
break;
+
+ case QOP_UNIFORMS_RESET:
+ add_write_dep(state.dir, &state.last_uniforms_reset, n);
+ break;
+
default:
assert(!qir_is_tex(inst));
break;
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index 9001643507e..6a10e1b68de 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -427,6 +427,14 @@ vc4_generate_code_block(struct vc4_compile *c,
handled_qinst_cond = true;
break;
+ case QOP_UNIFORMS_RESET:
+ fixup_raddr_conflict(block, dst, &src[0], &src[1],
+ qinst, &unpack);
+
+ queue(block, qpu_a_ADD(qpu_ra(QPU_W_UNIFORMS_ADDRESS),
+ src[0], src[1]));
+ break;
+
default:
assert(qinst->op < ARRAY_SIZE(translate));
assert(translate[qinst->op].op != 0); /* NOPs */
diff --git a/src/gallium/drivers/vc4/vc4_qpu_schedule.c b/src/gallium/drivers/vc4/vc4_qpu_schedule.c
index a55b0351402..1caee51a581 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_schedule.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_schedule.c
@@ -92,6 +92,7 @@ struct schedule_state {
struct schedule_node *last_tmu_write;
struct schedule_node *last_tlb;
struct schedule_node *last_vpm;
+ struct schedule_node *last_uniforms_reset;
enum direction dir;
/* Estimated cycle when the current instruction would start. */
uint32_t time;
@@ -184,6 +185,9 @@ process_raddr_deps(struct schedule_state *state, struct schedule_node *n,
break;
case QPU_R_UNIF:
+ add_read_dep(state, state->last_uniforms_reset, n);
+ break;
+
case QPU_R_NOP:
case QPU_R_ELEM_QPU:
case QPU_R_XY_PIXEL_COORD:
@@ -259,6 +263,7 @@ process_waddr_deps(struct schedule_state *state, struct schedule_node *n,
}
} else if (is_tmu_write(waddr)) {
add_write_dep(state, &state->last_tmu_write, n);
+ add_read_dep(state, state->last_uniforms_reset, n);
} else if (qpu_waddr_is_tlb(waddr) ||
waddr == QPU_W_MS_FLAGS) {
add_write_dep(state, &state->last_tlb, n);
@@ -305,6 +310,10 @@ process_waddr_deps(struct schedule_state *state, struct schedule_node *n,
add_write_dep(state, &state->last_tlb, n);
break;
+ case QPU_W_UNIFORMS_ADDRESS:
+ add_write_dep(state, &state->last_uniforms_reset, n);
+ break;
+
case QPU_W_NOP:
break;
@@ -442,6 +451,7 @@ calculate_reverse_deps(struct vc4_compile *c, struct list_head *schedule_list)
struct choose_scoreboard {
int tick;
int last_sfu_write_tick;
+ int last_uniforms_reset_tick;
uint32_t last_waddr_a, last_waddr_b;
};
@@ -476,6 +486,11 @@ reads_too_soon_after_write(struct choose_scoreboard *scoreboard, uint64_t inst)
}
}
+ if (reads_uniform(inst) &&
+ scoreboard->tick - scoreboard->last_uniforms_reset_tick <= 2) {
+ return true;
+ }
+
return false;
}
@@ -614,6 +629,11 @@ update_scoreboard_for_chosen(struct choose_scoreboard *scoreboard,
(waddr_mul >= QPU_W_SFU_RECIP && waddr_mul <= QPU_W_SFU_LOG)) {
scoreboard->last_sfu_write_tick = scoreboard->tick;
}
+
+ if (waddr_add == QPU_W_UNIFORMS_ADDRESS ||
+ waddr_mul == QPU_W_UNIFORMS_ADDRESS) {
+ scoreboard->last_uniforms_reset_tick = scoreboard->tick;
+ }
}
static void
@@ -971,6 +991,7 @@ qpu_schedule_instructions(struct vc4_compile *c)
scoreboard.last_waddr_a = ~0;
scoreboard.last_waddr_b = ~0;
scoreboard.last_sfu_write_tick = -10;
+ scoreboard.last_uniforms_reset_tick = -10;
if (debug) {
fprintf(stderr, "Pre-schedule instructions\n");
diff --git a/src/gallium/drivers/vc4/vc4_uniforms.c b/src/gallium/drivers/vc4/vc4_uniforms.c
index 4715a7fffd5..ee21771dd89 100644
--- a/src/gallium/drivers/vc4/vc4_uniforms.c
+++ b/src/gallium/drivers/vc4/vc4_uniforms.c
@@ -324,6 +324,11 @@ vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
case QUNIFORM_SAMPLE_MASK:
cl_aligned_u32(&uniforms, vc4->sample_mask);
break;
+
+ case QUNIFORM_UNIFORMS_ADDRESS:
+ /* This will be filled in by the kernel. */
+ cl_aligned_u32(&uniforms, 0xd0d0d0d0);
+ break;
}
#if 0
uint32_t written_val = *((uint32_t *)uniforms - 1);
@@ -345,6 +350,7 @@ vc4_set_shader_uniform_dirty_flags(struct vc4_compiled_shader *shader)
for (int i = 0; i < shader->uniforms.count; i++) {
switch (shader->uniforms.contents[i]) {
case QUNIFORM_CONSTANT:
+ case QUNIFORM_UNIFORMS_ADDRESS:
break;
case QUNIFORM_UNIFORM:
case QUNIFORM_UBO_ADDR: