summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2015-02-12 14:17:21 -0800
committerEric Anholt <[email protected]>2015-02-12 16:33:16 -0800
commit3f1e1287fd960966eee8b12a75c8a8f62e11cdd2 (patch)
tree9415b611ac95698b0d383b6565ac36a34178d358
parent4413861dd835cf8b9143f3032b670635bd217bf6 (diff)
vc4: Make SF be a flag on the QIR instructions.
Right now the places that used to emit a mov.sf just put the SF on the previous instruction when it generated the source of the SF value. Even without optimization to push the sf up further (and kill thus potentially kill more MOVs), this gets us: total uniforms in shared programs: 13455 -> 13457 (0.01%) uniforms in affected programs: 3 -> 5 (66.67%) total instructions in shared programs: 40296 -> 40198 (-0.24%) instructions in affected programs: 12595 -> 12497 (-0.78%)
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_algebraic.c16
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_cse.c23
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_dead_code.c13
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_vpm_writes.c3
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c6
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.c19
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.h8
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_emit.c10
8 files changed, 47 insertions, 51 deletions
diff --git a/src/gallium/drivers/vc4/vc4_opt_algebraic.c b/src/gallium/drivers/vc4/vc4_opt_algebraic.c
index 994fa907f77..1e0b8c9c097 100644
--- a/src/gallium/drivers/vc4/vc4_opt_algebraic.c
+++ b/src/gallium/drivers/vc4/vc4_opt_algebraic.c
@@ -149,22 +149,6 @@ qir_opt_algebraic(struct vc4_compile *c)
defs[inst->dst.index] = inst;
switch (inst->op) {
- case QOP_SF:
- /* SF just looks at the sign bit, or whether all the
- * bits are 0. This is preserved across an itof
- * transformation.
- */
- if (inst->src[0].file == QFILE_TEMP &&
- defs[inst->src[0].index]->op == QOP_ITOF) {
- dump_from(c, inst);
- inst->src[0] =
- defs[inst->src[0].index]->src[0];
- progress = true;
- dump_to(c, inst);
- break;
- }
- break;
-
case QOP_SEL_X_Y_ZS:
case QOP_SEL_X_Y_ZC:
case QOP_SEL_X_Y_NS:
diff --git a/src/gallium/drivers/vc4/vc4_opt_cse.c b/src/gallium/drivers/vc4/vc4_opt_cse.c
index c11c90efcdc..71794f7d1cf 100644
--- a/src/gallium/drivers/vc4/vc4_opt_cse.c
+++ b/src/gallium/drivers/vc4/vc4_opt_cse.c
@@ -45,7 +45,7 @@ struct inst_key {
enum qop op;
struct qreg src[4];
/**
- * If the instruction depends on the flags, how many QOP_SFs have been
+ * If the instruction depends on the flags, how many SFs have been
* seen before this instruction, or if it depends on r4, how many r4
* writes have been seen.
*/
@@ -122,7 +122,6 @@ qir_opt_cse(struct vc4_compile *c)
{
bool progress = false;
struct simple_node *node, *t;
- struct qinst *last_sf = NULL;
uint32_t sf_count = 0, r4_count = 0;
struct hash_table *ht = _mesa_hash_table_create(NULL, NULL,
@@ -135,27 +134,11 @@ qir_opt_cse(struct vc4_compile *c)
if (qir_has_side_effects(c, inst) ||
qir_has_side_effect_reads(c, inst)) {
- if (inst->op == QOP_TLB_DISCARD_SETUP)
- last_sf = NULL;
continue;
}
- if (inst->op == QOP_SF) {
- if (last_sf &&
- qir_reg_equals(last_sf->src[0], inst->src[0])) {
- if (debug) {
- fprintf(stderr,
- "Removing redundant SF: ");
- qir_dump_inst(c, inst);
- fprintf(stderr, "\n");
- }
- qir_remove_instruction(inst);
- progress = true;
- continue;
- } else {
- last_sf = inst;
- sf_count++;
- }
+ if (inst->sf) {
+ sf_count++;
} else {
struct qinst *cse = vc4_find_cse(c, ht, inst,
sf_count, r4_count);
diff --git a/src/gallium/drivers/vc4/vc4_opt_dead_code.c b/src/gallium/drivers/vc4/vc4_opt_dead_code.c
index 94ab382500d..dd1561d68d4 100644
--- a/src/gallium/drivers/vc4/vc4_opt_dead_code.c
+++ b/src/gallium/drivers/vc4/vc4_opt_dead_code.c
@@ -43,6 +43,7 @@ dce(struct vc4_compile *c, struct qinst *inst)
qir_dump_inst(c, inst);
fprintf(stderr, "\n");
}
+ assert(!inst->sf);
qir_remove_instruction(inst);
}
@@ -93,6 +94,7 @@ qir_opt_dead_code(struct vc4_compile *c)
if (inst->dst.file == QFILE_TEMP &&
!used[inst->dst.index] &&
+ !inst->sf &&
(!qir_has_side_effects(c, inst) ||
inst->op == QOP_TEX_RESULT) &&
!has_nonremovable_reads(c, inst)) {
@@ -120,11 +122,16 @@ qir_opt_dead_code(struct vc4_compile *c)
if (qir_depends_on_flags(inst))
sf_used = true;
- if (inst->op == QOP_SF) {
+ if (inst->sf) {
if (!sf_used) {
- dce(c, inst);
+ if (debug) {
+ fprintf(stderr, "Removing SF on: ");
+ qir_dump_inst(c, inst);
+ fprintf(stderr, "\n");
+ }
+
+ inst->sf = false;
progress = true;
- continue;
}
sf_used = false;
}
diff --git a/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c b/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c
index 0269e32494a..ba322b6421c 100644
--- a/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c
+++ b/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c
@@ -79,7 +79,7 @@ qir_opt_vpm_writes(struct vc4_compile *c)
if (qir_is_multi_instruction(inst))
continue;
- if (qir_depends_on_flags(inst))
+ if (qir_depends_on_flags(inst) || inst->sf)
continue;
if (qir_has_side_effects(c, inst) ||
@@ -106,6 +106,7 @@ qir_opt_vpm_writes(struct vc4_compile *c)
/* Move the generating instruction to the end of the program
* to maintain the order of the VPM writes.
*/
+ assert(!vpm_writes[i]->sf);
move_to_tail(&vpm_writes[i]->link, &inst->link);
qir_remove_instruction(vpm_writes[i]);
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 3f0de2caee1..9d3d868ac90 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -2163,6 +2163,12 @@ vc4_shader_tgsi_to_qir(struct vc4_context *vc4, enum qstage stage,
}
tgsi_parse_free(&c->parser);
+ if (vc4_debug & VC4_DEBUG_QIR) {
+ fprintf(stderr, "%s prog %d/%d pre-opt QIR:\n",
+ qir_get_stage_name(c->stage),
+ c->program_id, c->variant_id);
+ qir_dump(c);
+ }
qir_optimize(c);
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index feb585d69ae..9e0ee1f0ae5 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -59,7 +59,6 @@ static const struct qir_op_info qir_op_info[] = {
[QOP_XOR] = { "xor", 1, 2 },
[QOP_NOT] = { "not", 1, 1 },
- [QOP_SF] = { "sf", 0, 1 },
[QOP_SEL_X_0_NS] = { "fsel_x_0_ns", 1, 1, false, true },
[QOP_SEL_X_0_NC] = { "fsel_x_0_nc", 1, 1, false, true },
[QOP_SEL_X_0_ZS] = { "fsel_x_0_zs", 1, 1, false, true },
@@ -282,7 +281,9 @@ qir_print_reg(struct vc4_compile *c, struct qreg reg, bool write)
void
qir_dump_inst(struct vc4_compile *c, struct qinst *inst)
{
- fprintf(stderr, "%s ", qir_get_op_name(inst->op));
+ fprintf(stderr, "%s%s ",
+ qir_get_op_name(inst->op),
+ inst->sf ? ".sf" : "");
qir_print_reg(c, inst->dst, true);
for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
@@ -416,6 +417,20 @@ qir_get_stage_name(enum qstage stage)
return names[stage];
}
+void
+qir_SF(struct vc4_compile *c, struct qreg src)
+{
+ assert(!is_empty_list(&c->instructions));
+ struct qinst *last_inst = (struct qinst *)c->instructions.prev;
+ if (last_inst->dst.file != src.file ||
+ last_inst->dst.index != src.index ||
+ qir_is_multi_instruction(last_inst)) {
+ src = qir_MOV(c, src);
+ last_inst = (struct qinst *)c->instructions.prev;
+ }
+ last_inst->sf = true;
+}
+
#define OPTPASS(func) \
do { \
bool stage_progress = func(c); \
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index ee869940954..6da6ff6542e 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -24,6 +24,7 @@
#ifndef VC4_QIR_H
#define VC4_QIR_H
+#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
@@ -76,9 +77,6 @@ enum qop {
QOP_XOR,
QOP_NOT,
- /* Sets the flag register according to src. */
- QOP_SF,
-
/* Note: Orderings of these compares must be the same as in
* qpu_defines.h. Selects the src[0] if the ns flag bit is set,
* otherwise 0. */
@@ -173,6 +171,7 @@ struct qinst {
enum qop op;
struct qreg dst;
struct qreg *src;
+ bool sf;
};
enum qstage {
@@ -397,6 +396,8 @@ bool qir_opt_vpm_writes(struct vc4_compile *c);
void qpu_schedule_instructions(struct vc4_compile *c);
+void qir_SF(struct vc4_compile *c, struct qreg src);
+
#define QIR_ALU0(name) \
static inline struct qreg \
qir_##name(struct vc4_compile *c) \
@@ -443,7 +444,6 @@ QIR_ALU2(FADD)
QIR_ALU2(FSUB)
QIR_ALU2(FMUL)
QIR_ALU2(MUL24)
-QIR_NODST_1(SF)
QIR_ALU1(SEL_X_0_ZS)
QIR_ALU1(SEL_X_0_ZC)
QIR_ALU1(SEL_X_0_NS)
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index 7531be5cf89..eeb8d3a21ff 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -270,11 +270,6 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
}
break;
- case QOP_SF:
- queue(c, qpu_a_MOV(qpu_ra(QPU_W_NOP), src[0]));
- *last_inst(c) |= QPU_SF;
- break;
-
case QOP_SEL_X_0_ZS:
case QOP_SEL_X_0_ZC:
case QOP_SEL_X_0_NS:
@@ -548,6 +543,11 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
break;
}
+
+ if (qinst->sf) {
+ assert(!qir_is_multi_instruction(qinst));
+ *last_inst(c) |= QPU_SF;
+ }
}
qpu_schedule_instructions(c);