summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/vc4/Makefile.sources1
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_copy_propagation.c15
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c50
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.c15
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.h2
-rw-r--r--src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c176
6 files changed, 209 insertions, 50 deletions
diff --git a/src/gallium/drivers/vc4/Makefile.sources b/src/gallium/drivers/vc4/Makefile.sources
index 95f1a340ab3..c7254ea1473 100644
--- a/src/gallium/drivers/vc4/Makefile.sources
+++ b/src/gallium/drivers/vc4/Makefile.sources
@@ -20,6 +20,7 @@ C_SOURCES := \
vc4_packet.h \
vc4_program.c \
vc4_qir.c \
+ vc4_qir_lower_uniforms.c \
vc4_qir.h \
vc4_qpu.c \
vc4_qpu_defines.h \
diff --git a/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c b/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c
index 07e1cb14b0d..f8c49a44bd3 100644
--- a/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c
+++ b/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c
@@ -49,22 +49,11 @@ qir_opt_copy_propagation(struct vc4_compile *c)
if (inst->dst.file == QFILE_TEMP)
defs[inst->dst.index] = inst;
- /* A single instruction can only read one uniform value. (It
- * could maybe read the same uniform value in two operands,
- * but that doesn't seem important to do).
- */
- bool reads_a_uniform = false;
- for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
- if (inst->src[i].file == QFILE_UNIF)
- reads_a_uniform = true;
- }
-
for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
int index = inst->src[i].index;
if (inst->src[i].file == QFILE_TEMP &&
(movs[index].file == QFILE_TEMP ||
- (movs[index].file == QFILE_UNIF &&
- !reads_a_uniform))) {
+ movs[index].file == QFILE_UNIF)) {
if (debug) {
fprintf(stderr, "Copy propagate: ");
qir_dump_inst(c, inst);
@@ -72,8 +61,6 @@ qir_opt_copy_propagation(struct vc4_compile *c)
}
inst->src[i] = movs[index];
- if (movs[index].file == QFILE_UNIF)
- reads_a_uniform = true;
if (debug) {
fprintf(stderr, "to: ");
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index a1d9a7f064c..b904679ef00 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -143,24 +143,15 @@ qir_uniform(struct vc4_compile *c,
}
static struct qreg
-get_temp_for_uniform(struct vc4_compile *c, enum quniform_contents contents,
- uint32_t data)
-{
- struct qreg u = qir_uniform(c, contents, data);
- struct qreg t = qir_MOV(c, u);
- return t;
-}
-
-static struct qreg
qir_uniform_ui(struct vc4_compile *c, uint32_t ui)
{
- return get_temp_for_uniform(c, QUNIFORM_CONSTANT, ui);
+ return qir_uniform(c, QUNIFORM_CONSTANT, ui);
}
static struct qreg
qir_uniform_f(struct vc4_compile *c, float f)
{
- return qir_uniform_ui(c, fui(f));
+ return qir_uniform(c, QUNIFORM_CONSTANT, fui(f));
}
static struct qreg
@@ -232,8 +223,7 @@ get_src(struct vc4_compile *c, unsigned tgsi_op,
if (src->Indirect) {
r = indirect_uniform_load(c, full_src, s);
} else {
- r = get_temp_for_uniform(c, QUNIFORM_UNIFORM,
- src->Index * 4 + s);
+ r = qir_uniform(c, QUNIFORM_UNIFORM, src->Index * 4 + s);
}
break;
case TGSI_FILE_INPUT:
@@ -660,13 +650,9 @@ tgsi_to_qir_tex(struct vc4_compile *c,
if (tgsi_inst->Texture.Texture == TGSI_TEXTURE_RECT ||
tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT) {
s = qir_FMUL(c, s,
- get_temp_for_uniform(c,
- QUNIFORM_TEXRECT_SCALE_X,
- unit));
+ qir_uniform(c, QUNIFORM_TEXRECT_SCALE_X, unit));
t = qir_FMUL(c, t,
- get_temp_for_uniform(c,
- QUNIFORM_TEXRECT_SCALE_Y,
- unit));
+ qir_uniform(c, QUNIFORM_TEXRECT_SCALE_Y, unit));
}
if (tgsi_inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
@@ -689,7 +675,7 @@ tgsi_to_qir_tex(struct vc4_compile *c,
c->key->tex[unit].wrap_s == PIPE_TEX_WRAP_CLAMP ||
c->key->tex[unit].wrap_t == PIPE_TEX_WRAP_CLAMP_TO_BORDER ||
c->key->tex[unit].wrap_t == PIPE_TEX_WRAP_CLAMP) {
- qir_TEX_R(c, get_temp_for_uniform(c, QUNIFORM_TEXTURE_BORDER_COLOR, unit),
+ qir_TEX_R(c, qir_uniform(c, QUNIFORM_TEXTURE_BORDER_COLOR, unit),
texture_u[next_texture_u++]);
}
@@ -1504,14 +1490,11 @@ vc4_blend_channel(struct vc4_compile *c,
}
case PIPE_BLENDFACTOR_CONST_COLOR:
return qir_FMUL(c, val,
- get_temp_for_uniform(c,
- QUNIFORM_BLEND_CONST_COLOR,
- channel));
+ qir_uniform(c, QUNIFORM_BLEND_CONST_COLOR,
+ channel));
case PIPE_BLENDFACTOR_CONST_ALPHA:
return qir_FMUL(c, val,
- get_temp_for_uniform(c,
- QUNIFORM_BLEND_CONST_COLOR,
- 3));
+ qir_uniform(c, QUNIFORM_BLEND_CONST_COLOR, 3));
case PIPE_BLENDFACTOR_ZERO:
return qir_uniform_f(c, 0.0);
case PIPE_BLENDFACTOR_INV_SRC_COLOR:
@@ -1529,15 +1512,15 @@ vc4_blend_channel(struct vc4_compile *c,
case PIPE_BLENDFACTOR_INV_CONST_COLOR:
return qir_FMUL(c, val,
qir_FSUB(c, qir_uniform_f(c, 1.0),
- get_temp_for_uniform(c,
- QUNIFORM_BLEND_CONST_COLOR,
- channel)));
+ qir_uniform(c,
+ QUNIFORM_BLEND_CONST_COLOR,
+ channel)));
case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
return qir_FMUL(c, val,
qir_FSUB(c, qir_uniform_f(c, 1.0),
- get_temp_for_uniform(c,
- QUNIFORM_BLEND_CONST_COLOR,
- 3)));
+ qir_uniform(c,
+ QUNIFORM_BLEND_CONST_COLOR,
+ 3)));
default:
case PIPE_BLENDFACTOR_SRC1_COLOR:
@@ -1661,7 +1644,7 @@ static void
alpha_test_discard(struct vc4_compile *c)
{
struct qreg src_alpha;
- struct qreg alpha_ref = get_temp_for_uniform(c, QUNIFORM_ALPHA_REF, 0);
+ struct qreg alpha_ref = qir_uniform(c, QUNIFORM_ALPHA_REF, 0);
if (!c->fs_key->alpha_test)
return;
@@ -2171,6 +2154,7 @@ vc4_shader_tgsi_to_qir(struct vc4_context *vc4, enum qstage stage,
}
qir_optimize(c);
+ qir_lower_uniforms(c);
if (vc4_debug & VC4_DEBUG_QIR) {
fprintf(stderr, "%s prog %d/%d QIR:\n",
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index 9e0ee1f0ae5..5c1fdbddfb6 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -174,6 +174,12 @@ qir_is_multi_instruction(struct qinst *inst)
}
bool
+qir_is_tex(struct qinst *inst)
+{
+ return inst->op >= QOP_TEX_S && inst->op <= QOP_TEX_DIRECT;
+}
+
+bool
qir_depends_on_flags(struct qinst *inst)
{
switch (inst->op) {
@@ -420,9 +426,12 @@ qir_get_stage_name(enum qstage stage)
void
qir_SF(struct vc4_compile *c, struct qreg src)
{
- assert(!is_empty_list(&c->instructions));
- struct qinst *last_inst = (struct qinst *)c->instructions.prev;
- if (last_inst->dst.file != src.file ||
+ struct qinst *last_inst = NULL;
+ if (!is_empty_list(&c->instructions))
+ last_inst = (struct qinst *)c->instructions.prev;
+
+ if (!last_inst ||
+ last_inst->dst.file != src.file ||
last_inst->dst.index != src.index ||
qir_is_multi_instruction(last_inst)) {
src = qir_MOV(c, src);
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index 6da6ff6542e..a1b55605584 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -376,6 +376,7 @@ bool qir_reg_equals(struct qreg a, struct qreg b);
bool qir_has_side_effects(struct vc4_compile *c, struct qinst *inst);
bool qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst);
bool qir_is_multi_instruction(struct qinst *inst);
+bool qir_is_tex(struct qinst *inst);
bool qir_depends_on_flags(struct qinst *inst);
bool qir_writes_r4(struct qinst *inst);
bool qir_reads_r4(struct qinst *inst);
@@ -393,6 +394,7 @@ bool qir_opt_cse(struct vc4_compile *c);
bool qir_opt_dead_code(struct vc4_compile *c);
bool qir_opt_small_immediates(struct vc4_compile *c);
bool qir_opt_vpm_writes(struct vc4_compile *c);
+void qir_lower_uniforms(struct vc4_compile *c);
void qpu_schedule_instructions(struct vc4_compile *c);
diff --git a/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c b/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c
new file mode 100644
index 00000000000..d527889e76f
--- /dev/null
+++ b/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c
@@ -0,0 +1,176 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * @file vc4_opt_algebraic.c
+ *
+ * This is the optimization pass for miscellaneous changes to instructions
+ * where we can simplify the operation by some knowledge about the specific
+ * operations.
+ *
+ * Mostly this will be a matter of turning things into MOVs so that they can
+ * later be copy-propagated out.
+ */
+
+#include "vc4_qir.h"
+#include "util/hash_table.h"
+#include "util/u_math.h"
+
+static inline uint32_t
+index_hash(const void *key)
+{
+ return (uintptr_t)key;
+}
+
+static inline bool
+index_compare(const void *a, const void *b)
+{
+ return a == b;
+}
+
+static void
+add_uniform(struct hash_table *ht, struct qreg reg)
+{
+ struct hash_entry *entry;
+ void *key = (void *)(uintptr_t)reg.index;
+
+ entry = _mesa_hash_table_search(ht, key);
+ if (entry) {
+ entry->data++;
+ } else {
+ _mesa_hash_table_insert(ht, key, (void *)(uintptr_t)1);
+ }
+}
+
+static void
+remove_uniform(struct hash_table *ht, struct qreg reg)
+{
+ struct hash_entry *entry;
+ void *key = (void *)(uintptr_t)reg.index;
+
+ entry = _mesa_hash_table_search(ht, key);
+ assert(entry);
+ entry->data--;
+ if (entry->data == NULL)
+ _mesa_hash_table_remove(ht, entry);
+}
+
+static bool
+is_lowerable_uniform(struct qinst *inst, int i)
+{
+ if (inst->src[i].file != QFILE_UNIF)
+ return false;
+ if (qir_is_tex(inst))
+ return i != 1;
+ return true;
+}
+
+void
+qir_lower_uniforms(struct vc4_compile *c)
+{
+ struct simple_node *node;
+ struct hash_table *ht =
+ _mesa_hash_table_create(c, index_hash, index_compare);
+
+ /* Walk the instruction list, finding which instructions have more
+ * than one uniform referenced, and add those uniform values to the
+ * ht.
+ */
+ foreach(node, &c->instructions) {
+ struct qinst *inst = (struct qinst *)node;
+ uint32_t nsrc = qir_get_op_nsrc(inst->op);
+
+ uint32_t count = 0;
+ for (int i = 0; i < nsrc; i++) {
+ if (inst->src[i].file == QFILE_UNIF)
+ count++;
+ }
+
+ if (count <= 1)
+ continue;
+
+ for (int i = 0; i < nsrc; i++) {
+ if (is_lowerable_uniform(inst, i))
+ add_uniform(ht, inst->src[i]);
+ }
+ }
+
+ while (ht->entries) {
+ /* Find the most commonly used uniform in instructions that
+ * need a uniform lowered.
+ */
+ uint32_t max_count = 0;
+ uint32_t max_index = 0;
+ struct hash_entry *entry;
+ hash_table_foreach(ht, entry) {
+ uint32_t count = (uintptr_t)entry->data;
+ uint32_t index = (uintptr_t)entry->key;
+ if (count > max_count) {
+ max_count = count;
+ max_index = index;
+ }
+ }
+
+ /* Now, find the instructions using this uniform and make them
+ * reference a temp instead.
+ */
+ struct qreg temp = qir_get_temp(c);
+ struct qreg unif = { QFILE_UNIF, max_index };
+ struct qinst *mov = qir_inst(QOP_MOV, temp, unif, c->undef);
+ insert_at_head(&c->instructions, &mov->link);
+ foreach(node, &c->instructions) {
+ struct qinst *inst = (struct qinst *)node;
+ uint32_t nsrc = qir_get_op_nsrc(inst->op);
+
+ uint32_t count = 0;
+ for (int i = 0; i < nsrc; i++) {
+ if (inst->src[i].file == QFILE_UNIF)
+ count++;
+ }
+
+ if (count <= 1)
+ continue;
+
+ for (int i = 0; i < nsrc; i++) {
+ if (is_lowerable_uniform(inst, i) &&
+ inst->src[i].index == max_index) {
+ inst->src[i] = temp;
+ remove_uniform(ht, unif);
+ count--;
+ }
+ }
+
+ /* If the instruction doesn't need lowering any more,
+ * then drop it from the list.
+ */
+ if (count <= 1) {
+ for (int i = 0; i < nsrc; i++) {
+ if (is_lowerable_uniform(inst, i))
+ remove_uniform(ht, inst->src[i]);
+ }
+ }
+ }
+ }
+
+ _mesa_hash_table_destroy(ht, NULL);
+}