summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/vc4
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2015-10-24 17:35:03 -0700
committerEric Anholt <[email protected]>2015-10-26 16:48:34 -0700
commit99a9a5a345fab8bbf36ab4e42581f8ee04a59a63 (patch)
treeaecc944d68f8f1fa6a32a12bf213d0550c91dac4 /src/gallium/drivers/vc4
parent548b05d53f3c89630aa77fc92ff174f5d8162ab2 (diff)
vc4: Switch the unpack ops to being unpack flags on a mov.
This paves the way for copy propagating our unpacks. We end up with a small change on shader-db: total instructions in shared programs: 89390 -> 89251 (-0.16%) instructions in affected programs: 19041 -> 18902 (-0.73%) which appears to be because we no longer convert MOVs for an FMAX dst, r4.unpack, r4.unpack (instead of the previous MOV dst, r4.unpack), and this ends up with a slightly better schedule.
Diffstat (limited to 'src/gallium/drivers/vc4')
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_algebraic.c1
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_small_immediates.c4
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.c45
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.h34
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_emit.c57
-rw-r--r--src/gallium/drivers/vc4/vc4_register_allocate.c24
6 files changed, 42 insertions, 123 deletions
diff --git a/src/gallium/drivers/vc4/vc4_opt_algebraic.c b/src/gallium/drivers/vc4/vc4_opt_algebraic.c
index 5b435832b92..f1bab810eff 100644
--- a/src/gallium/drivers/vc4/vc4_opt_algebraic.c
+++ b/src/gallium/drivers/vc4/vc4_opt_algebraic.c
@@ -64,6 +64,7 @@ is_constant_value(struct vc4_compile *c, struct qreg reg,
uint32_t val)
{
if (reg.file == QFILE_UNIF &&
+ !reg.pack &&
c->uniform_contents[reg.index] == QUNIFORM_CONSTANT &&
c->uniform_data[reg.index] == val) {
return true;
diff --git a/src/gallium/drivers/vc4/vc4_opt_small_immediates.c b/src/gallium/drivers/vc4/vc4_opt_small_immediates.c
index d6e98f0aebf..e61562171aa 100644
--- a/src/gallium/drivers/vc4/vc4_opt_small_immediates.c
+++ b/src/gallium/drivers/vc4/vc4_opt_small_immediates.c
@@ -56,6 +56,7 @@ qir_opt_small_immediates(struct vc4_compile *c)
struct qreg src = qir_follow_movs(c, inst->src[i]);
if (src.file != QFILE_UNIF ||
+ src.pack ||
c->uniform_contents[src.index] !=
QUNIFORM_CONSTANT) {
continue;
@@ -72,9 +73,6 @@ qir_opt_small_immediates(struct vc4_compile *c)
continue;
}
- if (qir_src_needs_a_file(inst))
- continue;
-
uint32_t imm = c->uniform_data[src.index];
uint32_t small_imm = qpu_encode_small_immediate(imm);
if (small_imm == ~0)
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index ce6618fe7d0..bba4f6d82ac 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -36,6 +36,7 @@ struct qir_op_info {
static const struct qir_op_info qir_op_info[] = {
[QOP_MOV] = { "mov", 1, 1 },
+ [QOP_FMOV] = { "fmov", 1, 1 },
[QOP_FADD] = { "fadd", 1, 2 },
[QOP_FSUB] = { "fsub", 1, 2 },
[QOP_FMUL] = { "fmul", 1, 2 },
@@ -100,18 +101,6 @@ static const struct qir_op_info qir_op_info[] = {
[QOP_TEX_B] = { "tex_b", 0, 2 },
[QOP_TEX_DIRECT] = { "tex_direct", 0, 2 },
[QOP_TEX_RESULT] = { "tex_result", 1, 0, true },
- [QOP_UNPACK_8A_F] = { "unpack_8a_f", 1, 1 },
- [QOP_UNPACK_8B_F] = { "unpack_8b_f", 1, 1 },
- [QOP_UNPACK_8C_F] = { "unpack_8c_f", 1, 1 },
- [QOP_UNPACK_8D_F] = { "unpack_8d_f", 1, 1 },
- [QOP_UNPACK_16A_F] = { "unpack_16a_f", 1, 1 },
- [QOP_UNPACK_16B_F] = { "unpack_16b_f", 1, 1 },
- [QOP_UNPACK_8A_I] = { "unpack_8a_i", 1, 1 },
- [QOP_UNPACK_8B_I] = { "unpack_8b_i", 1, 1 },
- [QOP_UNPACK_8C_I] = { "unpack_8c_i", 1, 1 },
- [QOP_UNPACK_8D_I] = { "unpack_8d_i", 1, 1 },
- [QOP_UNPACK_16A_I] = { "unpack_16a_i", 1, 1 },
- [QOP_UNPACK_16B_I] = { "unpack_16b_i", 1, 1 },
};
static const char *
@@ -193,6 +182,7 @@ bool
qir_is_float_input(struct qinst *inst)
{
switch (inst->op) {
+ case QOP_FMOV:
case QOP_FMUL:
case QOP_FADD:
case QOP_FSUB:
@@ -201,12 +191,6 @@ qir_is_float_input(struct qinst *inst)
case QOP_FMINABS:
case QOP_FMAXABS:
case QOP_FTOI:
- case QOP_UNPACK_8A_F:
- case QOP_UNPACK_8B_F:
- case QOP_UNPACK_8C_F:
- case QOP_UNPACK_8D_F:
- case QOP_UNPACK_16A_F:
- case QOP_UNPACK_16B_F:
return true;
default:
return false;
@@ -216,7 +200,8 @@ qir_is_float_input(struct qinst *inst)
bool
qir_is_raw_mov(struct qinst *inst)
{
- return (inst->op == QOP_MOV &&
+ return ((inst->op == QOP_MOV ||
+ inst->op == QOP_FMOV) &&
!inst->dst.pack &&
!inst->src[0].pack);
}
@@ -246,28 +231,6 @@ qir_depends_on_flags(struct qinst *inst)
}
bool
-qir_src_needs_a_file(struct qinst *inst)
-{
- switch (inst->op) {
- case QOP_UNPACK_8A_F:
- case QOP_UNPACK_8B_F:
- case QOP_UNPACK_8C_F:
- case QOP_UNPACK_8D_F:
- case QOP_UNPACK_16A_F:
- case QOP_UNPACK_16B_F:
- case QOP_UNPACK_8A_I:
- case QOP_UNPACK_8B_I:
- case QOP_UNPACK_8C_I:
- case QOP_UNPACK_8D_I:
- case QOP_UNPACK_16A_I:
- case QOP_UNPACK_16B_I:
- return true;
- default:
- return false;
- }
-}
-
-bool
qir_writes_r4(struct qinst *inst)
{
switch (inst->op) {
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index 1a1e0f32386..393749b2f60 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -37,6 +37,7 @@
#include "util/u_math.h"
#include "vc4_screen.h"
+#include "vc4_qpu_defines.h"
#include "pipe/p_state.h"
struct nir_builder;
@@ -64,6 +65,7 @@ struct qreg {
enum qop {
QOP_UNDEF,
QOP_MOV,
+ QOP_FMOV,
QOP_FADD,
QOP_FSUB,
QOP_FMUL,
@@ -128,20 +130,6 @@ enum qop {
QOP_FRAG_W,
QOP_FRAG_REV_FLAG,
- QOP_UNPACK_8A_F,
- QOP_UNPACK_8B_F,
- QOP_UNPACK_8C_F,
- QOP_UNPACK_8D_F,
- QOP_UNPACK_16A_F,
- QOP_UNPACK_16B_F,
-
- QOP_UNPACK_8A_I,
- QOP_UNPACK_8B_I,
- QOP_UNPACK_8C_I,
- QOP_UNPACK_8D_I,
- QOP_UNPACK_16A_I,
- QOP_UNPACK_16B_I,
-
/** Texture x coordinate parameter write */
QOP_TEX_S,
/** Texture y coordinate parameter write */
@@ -468,7 +456,6 @@ bool qir_is_tex(struct qinst *inst);
bool qir_is_float_input(struct qinst *inst);
bool qir_depends_on_flags(struct qinst *inst);
bool qir_writes_r4(struct qinst *inst);
-bool qir_src_needs_a_file(struct qinst *inst);
struct qreg qir_follow_movs(struct vc4_compile *c, struct qreg reg);
void qir_dump(struct vc4_compile *c);
@@ -569,6 +556,7 @@ qir_##name(struct vc4_compile *c, struct qreg dest, struct qreg a) \
}
QIR_ALU1(MOV)
+QIR_ALU1(FMOV)
QIR_ALU2(FADD)
QIR_ALU2(FSUB)
QIR_ALU2(FMUL)
@@ -635,32 +623,32 @@ QIR_NODST_1(TLB_STENCIL_SETUP)
static inline struct qreg
qir_UNPACK_8_F(struct vc4_compile *c, struct qreg src, int i)
{
- struct qreg t = qir_get_temp(c);
- qir_emit(c, qir_inst(QOP_UNPACK_8A_F + i, t, src, c->undef));
+ struct qreg t = qir_FMOV(c, src);
+ c->defs[t.index]->src[0].pack = QPU_UNPACK_8A + i;
return t;
}
static inline struct qreg
qir_UNPACK_8_I(struct vc4_compile *c, struct qreg src, int i)
{
- struct qreg t = qir_get_temp(c);
- qir_emit(c, qir_inst(QOP_UNPACK_8A_I + i, t, src, c->undef));
+ struct qreg t = qir_MOV(c, src);
+ c->defs[t.index]->src[0].pack = QPU_UNPACK_8A + i;
return t;
}
static inline struct qreg
qir_UNPACK_16_F(struct vc4_compile *c, struct qreg src, int i)
{
- struct qreg t = qir_get_temp(c);
- qir_emit(c, qir_inst(QOP_UNPACK_16A_F + i, t, src, c->undef));
+ struct qreg t = qir_FMOV(c, src);
+ c->defs[t.index]->src[0].pack = QPU_UNPACK_16A + i;
return t;
}
static inline struct qreg
qir_UNPACK_16_I(struct vc4_compile *c, struct qreg src, int i)
{
- struct qreg t = qir_get_temp(c);
- qir_emit(c, qir_inst(QOP_UNPACK_16A_I + i, t, src, c->undef));
+ struct qreg t = qir_MOV(c, src);
+ c->defs[t.index]->src[0].pack = QPU_UNPACK_16A + i;
return t;
}
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index 94fd187dc0a..a3eff844137 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -134,15 +134,6 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
uint32_t vpm_read_fifo_count = 0;
uint32_t vpm_read_offset = 0;
int last_vpm_read_index = -1;
- /* Map from the QIR ops enum order to QPU unpack bits. */
- static const uint32_t unpack_map[] = {
- QPU_UNPACK_8A,
- QPU_UNPACK_8B,
- QPU_UNPACK_8C,
- QPU_UNPACK_8D,
- QPU_UNPACK_16A,
- QPU_UNPACK_16B,
- };
list_inithead(&c->qpu_inst_list);
@@ -214,8 +205,10 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
* out the same as a MOV.
*/
[QOP_MOV] = { QPU_A_OR },
+ [QOP_FMOV] = { QPU_A_FMAX },
};
+ uint64_t unpack = 0;
struct qpu_reg src[4];
for (int i = 0; i < qir_get_op_nsrc(qinst->op); i++) {
int index = qinst->src[i].index;
@@ -225,6 +218,14 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
break;
case QFILE_TEMP:
src[i] = temp_registers[index];
+ if (qinst->src[i].pack) {
+ assert(!unpack ||
+ unpack == qinst->src[i].pack);
+ unpack = QPU_SET_FIELD(qinst->src[i].pack,
+ QPU_UNPACK);
+ if (src[i].mux == QPU_MUX_R4)
+ unpack |= QPU_PM;
+ }
break;
case QFILE_UNIF:
src[i] = qpu_unif();
@@ -426,44 +427,6 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
queue(c, qpu_a_MOV(dst, qpu_r4()));
break;
- case QOP_UNPACK_8A_F:
- case QOP_UNPACK_8B_F:
- case QOP_UNPACK_8C_F:
- case QOP_UNPACK_8D_F:
- case QOP_UNPACK_16A_F:
- case QOP_UNPACK_16B_F:
- if (src[0].mux == QPU_MUX_R4) {
- queue(c, qpu_a_MOV(dst, src[0]));
- *last_inst(c) |= QPU_PM;
- *last_inst(c) |= QPU_SET_FIELD(QPU_UNPACK_8A +
- (qinst->op -
- QOP_UNPACK_8A_F),
- QPU_UNPACK);
- } else {
- assert(src[0].mux == QPU_MUX_A);
-
- queue(c, qpu_a_FMAX(dst, src[0], src[0]));
- *last_inst(c) |=
- QPU_SET_FIELD(unpack_map[qinst->op -
- QOP_UNPACK_8A_F],
- QPU_UNPACK);
- }
- break;
-
- case QOP_UNPACK_8A_I:
- case QOP_UNPACK_8B_I:
- case QOP_UNPACK_8C_I:
- case QOP_UNPACK_8D_I:
- case QOP_UNPACK_16A_I:
- case QOP_UNPACK_16B_I:
- assert(src[0].mux == QPU_MUX_A);
-
- queue(c, qpu_a_MOV(dst, src[0]));
- *last_inst(c) |= QPU_SET_FIELD(unpack_map[qinst->op -
- QOP_UNPACK_8A_I],
- QPU_UNPACK);
- break;
-
default:
assert(qinst->op < ARRAY_SIZE(translate));
assert(translate[qinst->op].op != 0); /* NOPs */
diff --git a/src/gallium/drivers/vc4/vc4_register_allocate.c b/src/gallium/drivers/vc4/vc4_register_allocate.c
index 2a1b6c35d69..bca36c3e7f4 100644
--- a/src/gallium/drivers/vc4/vc4_register_allocate.c
+++ b/src/gallium/drivers/vc4/vc4_register_allocate.c
@@ -282,17 +282,23 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
class_bits[inst->dst.index] &= CLASS_BIT_A;
}
- if (qir_src_needs_a_file(inst)) {
- if (qir_is_float_input(inst)) {
- /* Special case: these can be done as R4
- * unpacks, as well.
- */
- class_bits[inst->src[0].index] &= (CLASS_BIT_A |
- CLASS_BIT_R4);
- } else {
- class_bits[inst->src[0].index] &= CLASS_BIT_A;
+ /* Apply restrictions for src unpacks. The integer unpacks
+ * can only be done from regfile A, while float unpacks can be
+ * either A or R4.
+ */
+ for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
+ if (inst->src[i].file == QFILE_TEMP &&
+ inst->src[i].pack) {
+ if (qir_is_float_input(inst)) {
+ class_bits[inst->src[i].index] &=
+ CLASS_BIT_A | CLASS_BIT_R4;
+ } else {
+ class_bits[inst->src[i].index] &=
+ CLASS_BIT_A;
+ }
}
}
+
ip++;
}