summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_algebraic.c15
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_small_immediates.c7
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c34
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.c72
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.h40
-rw-r--r--src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c2
-rw-r--r--src/gallium/drivers/vc4/vc4_qir_schedule.c50
-rw-r--r--src/gallium/drivers/vc4/vc4_qir_validate.c24
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_emit.c39
9 files changed, 194 insertions, 89 deletions
diff --git a/src/gallium/drivers/vc4/vc4_opt_algebraic.c b/src/gallium/drivers/vc4/vc4_opt_algebraic.c
index 01ad05d2759..5e7d26923de 100644
--- a/src/gallium/drivers/vc4/vc4_opt_algebraic.c
+++ b/src/gallium/drivers/vc4/vc4_opt_algebraic.c
@@ -94,14 +94,17 @@ static void
replace_with_mov(struct vc4_compile *c, struct qinst *inst, struct qreg arg)
{
dump_from(c, inst);
+
+ inst->src[0] = arg;
+ if (qir_has_implicit_tex_uniform(inst))
+ inst->src[1] = inst->src[qir_get_tex_uniform_src(inst)];
+
if (qir_is_mul(inst))
inst->op = QOP_MMOV;
else if (qir_is_float_input(inst))
inst->op = QOP_FMOV;
else
inst->op = QOP_MOV;
- inst->src[0] = arg;
- inst->src[1] = c->undef;
dump_to(c, inst);
}
@@ -172,8 +175,12 @@ qir_opt_algebraic(struct vc4_compile *c)
break;
case QOP_ADD:
- if (replace_x_0_with_x(c, inst, 0) ||
- replace_x_0_with_x(c, inst, 1)) {
+ /* Kernel validation requires that we use an actual
+ * add instruction.
+ */
+ if (inst->dst.file != QFILE_TEX_S_DIRECT &&
+ (replace_x_0_with_x(c, inst, 0) ||
+ replace_x_0_with_x(c, inst, 1))) {
progress = true;
break;
}
diff --git a/src/gallium/drivers/vc4/vc4_opt_small_immediates.c b/src/gallium/drivers/vc4/vc4_opt_small_immediates.c
index 15cbd12773f..89c48578021 100644
--- a/src/gallium/drivers/vc4/vc4_opt_small_immediates.c
+++ b/src/gallium/drivers/vc4/vc4_opt_small_immediates.c
@@ -62,11 +62,8 @@ qir_opt_small_immediates(struct vc4_compile *c)
continue;
}
- if (i == 1 &&
- (inst->op == QOP_TEX_S ||
- inst->op == QOP_TEX_T ||
- inst->op == QOP_TEX_R ||
- inst->op == QOP_TEX_B)) {
+ if (qir_is_tex(inst) &&
+ i == qir_get_tex_uniform_src(inst)) {
/* No turning the implicit uniform read into
* an immediate.
*/
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 97cbabbd511..66fd902b695 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -120,7 +120,10 @@ indirect_uniform_load(struct vc4_compile *c, nir_intrinsic_instr *intr)
qir_uniform_ui(c, (range->dst_offset +
range->size - 4)));
- qir_TEX_DIRECT(c, indirect_offset, qir_uniform(c, QUNIFORM_UBO_ADDR, 0));
+ qir_ADD_dest(c, qir_reg(QFILE_TEX_S_DIRECT, 0),
+ indirect_offset,
+ qir_uniform(c, QUNIFORM_UBO_ADDR, 0));
+
c->num_texture_samples++;
ntq_emit_thrsw(c);
@@ -381,7 +384,8 @@ ntq_emit_txf(struct vc4_compile *c, nir_tex_instr *instr)
addr = qir_MAX(c, addr, qir_uniform_ui(c, 0));
addr = qir_MIN(c, addr, qir_uniform_ui(c, size - 4));
- qir_TEX_DIRECT(c, addr, qir_uniform(c, QUNIFORM_TEXTURE_MSAA_ADDR, unit));
+ qir_ADD_dest(c, qir_reg(QFILE_TEX_S_DIRECT, 0),
+ addr, qir_uniform(c, QUNIFORM_TEXTURE_MSAA_ADDR, unit));
ntq_emit_thrsw(c);
@@ -479,14 +483,20 @@ ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr)
unit | (is_txl << 16));
}
+ struct qinst *tmu;
if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
- qir_TEX_R(c, r, texture_u[next_texture_u++]);
+ tmu = qir_MOV_dest(c, qir_reg(QFILE_TEX_R, 0), r);
+ tmu->src[qir_get_tex_uniform_src(tmu)] =
+ texture_u[next_texture_u++];
} else if (c->key->tex[unit].wrap_s == PIPE_TEX_WRAP_CLAMP_TO_BORDER ||
c->key->tex[unit].wrap_s == PIPE_TEX_WRAP_CLAMP ||
c->key->tex[unit].wrap_t == PIPE_TEX_WRAP_CLAMP_TO_BORDER ||
c->key->tex[unit].wrap_t == PIPE_TEX_WRAP_CLAMP) {
- qir_TEX_R(c, qir_uniform(c, QUNIFORM_TEXTURE_BORDER_COLOR, unit),
- texture_u[next_texture_u++]);
+ tmu = qir_MOV_dest(c, qir_reg(QFILE_TEX_R, 0),
+ qir_uniform(c, QUNIFORM_TEXTURE_BORDER_COLOR,
+ unit));
+ tmu->src[qir_get_tex_uniform_src(tmu)] =
+ texture_u[next_texture_u++];
}
if (c->key->tex[unit].wrap_s == PIPE_TEX_WRAP_CLAMP) {
@@ -497,12 +507,18 @@ ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr)
t = qir_SAT(c, t);
}
- qir_TEX_T(c, t, texture_u[next_texture_u++]);
+ tmu = qir_MOV_dest(c, qir_reg(QFILE_TEX_T, 0), t);
+ tmu->src[qir_get_tex_uniform_src(tmu)] =
+ texture_u[next_texture_u++];
- if (is_txl || is_txb)
- qir_TEX_B(c, lod, texture_u[next_texture_u++]);
+ if (is_txl || is_txb) {
+ tmu = qir_MOV_dest(c, qir_reg(QFILE_TEX_B, 0), lod);
+ tmu->src[qir_get_tex_uniform_src(tmu)] =
+ texture_u[next_texture_u++];
+ }
- qir_TEX_S(c, s, texture_u[next_texture_u++]);
+ tmu = qir_MOV_dest(c, qir_reg(QFILE_TEX_S, 0), s);
+ tmu->src[qir_get_tex_uniform_src(tmu)] = texture_u[next_texture_u++];
c->num_texture_samples++;
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index 2c9119d9ccf..7c556a98ea2 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -75,11 +75,6 @@ static const struct qir_op_info qir_op_info[] = {
[QOP_FRAG_Z] = { "frag_z", 1, 0 },
[QOP_FRAG_W] = { "frag_w", 1, 0 },
- [QOP_TEX_S] = { "tex_s", 0, 2, true },
- [QOP_TEX_T] = { "tex_t", 0, 2, true },
- [QOP_TEX_R] = { "tex_r", 0, 2, true },
- [QOP_TEX_B] = { "tex_b", 0, 2, true },
- [QOP_TEX_DIRECT] = { "tex_direct", 0, 2, true },
[QOP_TEX_RESULT] = { "tex_result", 1, 0, true },
[QOP_THRSW] = { "thrsw", 0, 0, true },
@@ -104,12 +99,37 @@ qir_get_op_name(enum qop qop)
}
int
-qir_get_nsrc(struct qinst *inst)
+qir_get_non_sideband_nsrc(struct qinst *inst)
{
assert(qir_op_info[inst->op].name);
return qir_op_info[inst->op].nsrc;
}
+int
+qir_get_nsrc(struct qinst *inst)
+{
+ assert(qir_op_info[inst->op].name);
+
+ int nsrc = qir_get_non_sideband_nsrc(inst);
+
+ /* Normal (non-direct) texture coordinate writes also implicitly load
+ * a uniform for the texture parameters.
+ */
+ if (qir_is_tex(inst) && inst->dst.file != QFILE_TEX_S_DIRECT)
+ nsrc++;
+
+ return nsrc;
+}
+
+/* The sideband uniform for textures gets stored after the normal ALU
+ * arguments.
+ */
+int
+qir_get_tex_uniform_src(struct qinst *inst)
+{
+ return qir_get_nsrc(inst) - 1;
+}
+
/**
* Returns whether the instruction has any side effects that must be
* preserved.
@@ -122,6 +142,11 @@ qir_has_side_effects(struct vc4_compile *c, struct qinst *inst)
case QFILE_TLB_COLOR_WRITE:
case QFILE_TLB_COLOR_WRITE_MS:
case QFILE_TLB_STENCIL_SETUP:
+ case QFILE_TEX_S_DIRECT:
+ case QFILE_TEX_S:
+ case QFILE_TEX_T:
+ case QFILE_TEX_R:
+ case QFILE_TEX_B:
return true;
default:
break;
@@ -206,7 +231,30 @@ qir_is_raw_mov(struct qinst *inst)
bool
qir_is_tex(struct qinst *inst)
{
- return inst->op >= QOP_TEX_S && inst->op <= QOP_TEX_DIRECT;
+ switch (inst->dst.file) {
+ case QFILE_TEX_S_DIRECT:
+ case QFILE_TEX_S:
+ case QFILE_TEX_T:
+ case QFILE_TEX_R:
+ case QFILE_TEX_B:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool
+qir_has_implicit_tex_uniform(struct qinst *inst)
+{
+ switch (inst->dst.file) {
+ case QFILE_TEX_S:
+ case QFILE_TEX_T:
+ case QFILE_TEX_R:
+ case QFILE_TEX_B:
+ return true;
+ default:
+ return false;
+ }
}
bool
@@ -298,6 +346,11 @@ qir_print_reg(struct vc4_compile *c, struct qreg reg, bool write)
[QFILE_FRAG_Y] = "frag_y",
[QFILE_FRAG_REV_FLAG] = "frag_rev_flag",
[QFILE_QPU_ELEMENT] = "elem",
+ [QFILE_TEX_S_DIRECT] = "tex_s_direct",
+ [QFILE_TEX_S] = "tex_s",
+ [QFILE_TEX_T] = "tex_t",
+ [QFILE_TEX_R] = "tex_r",
+ [QFILE_TEX_B] = "tex_b",
};
switch (reg.file) {
@@ -330,6 +383,11 @@ qir_print_reg(struct vc4_compile *c, struct qreg reg, bool write)
case QFILE_TLB_COLOR_WRITE_MS:
case QFILE_TLB_Z_WRITE:
case QFILE_TLB_STENCIL_SETUP:
+ case QFILE_TEX_S_DIRECT:
+ case QFILE_TEX_S:
+ case QFILE_TEX_T:
+ case QFILE_TEX_R:
+ case QFILE_TEX_B:
fprintf(stderr, "%s", files[reg.file]);
break;
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index a3b8762951d..99cc957853a 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -55,6 +55,18 @@ enum qfile {
QFILE_TLB_Z_WRITE,
QFILE_TLB_STENCIL_SETUP,
+ /* If tex_s is written on its own without preceding t/r/b setup, it's
+ * a direct memory access using the input value, without the sideband
+ * uniform load. We represent these in QIR as a separate write
+ * destination so we can tell if the sideband uniform is present.
+ */
+ QFILE_TEX_S_DIRECT,
+
+ QFILE_TEX_S,
+ QFILE_TEX_T,
+ QFILE_TEX_R,
+ QFILE_TEX_B,
+
/* Payload registers that aren't in the physical register file, so we
* can just use the corresponding qpu_reg at qpu_emit time.
*/
@@ -132,24 +144,6 @@ enum qop {
QOP_FRAG_Z,
QOP_FRAG_W,
- /** Texture x coordinate parameter write */
- QOP_TEX_S,
- /** Texture y coordinate parameter write */
- QOP_TEX_T,
- /** Texture border color parameter or cube map z coordinate write */
- QOP_TEX_R,
- /** Texture LOD bias parameter write */
- QOP_TEX_B,
-
- /**
- * Texture-unit 4-byte read with address provided direct in S
- * cooordinate.
- *
- * The first operand is the offset from the start of the UBO, and the
- * second is the uniform that has the UBO's base pointer.
- */
- QOP_TEX_DIRECT,
-
/**
* Signal of texture read being necessary and then reading r4 into
* the destination
@@ -203,7 +197,7 @@ struct qinst {
enum qop op;
struct qreg dst;
- struct qreg src[2];
+ struct qreg src[3];
bool sf;
bool cond_is_exec_mask;
uint8_t cond;
@@ -578,12 +572,15 @@ struct qinst *qir_emit_nondef(struct vc4_compile *c, struct qinst *inst);
struct qreg qir_get_temp(struct vc4_compile *c);
void qir_calculate_live_intervals(struct vc4_compile *c);
int qir_get_nsrc(struct qinst *inst);
+int qir_get_non_sideband_nsrc(struct qinst *inst);
+int qir_get_tex_uniform_src(struct qinst *inst);
bool qir_reg_equals(struct qreg a, struct qreg b);
bool qir_has_side_effects(struct vc4_compile *c, struct qinst *inst);
bool qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst);
bool qir_is_mul(struct qinst *inst);
bool qir_is_raw_mov(struct qinst *inst);
bool qir_is_tex(struct qinst *inst);
+bool qir_has_implicit_tex_uniform(struct qinst *inst);
bool qir_is_float_input(struct qinst *inst);
bool qir_depends_on_flags(struct qinst *inst);
bool qir_writes_r4(struct qinst *inst);
@@ -737,11 +734,6 @@ QIR_ALU1(RSQ)
QIR_ALU1(EXP2)
QIR_ALU1(LOG2)
QIR_ALU1(VARY_ADD_C)
-QIR_NODST_2(TEX_S)
-QIR_NODST_2(TEX_T)
-QIR_NODST_2(TEX_R)
-QIR_NODST_2(TEX_B)
-QIR_NODST_2(TEX_DIRECT)
QIR_PAYLOAD(FRAG_Z)
QIR_PAYLOAD(FRAG_W)
QIR_ALU0(TEX_RESULT)
diff --git a/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c b/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c
index 1884cfa5b78..9ecfe65211e 100644
--- a/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c
+++ b/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c
@@ -77,7 +77,7 @@ is_lowerable_uniform(struct qinst *inst, int i)
if (inst->src[i].file != QFILE_UNIF)
return false;
if (qir_is_tex(inst))
- return i != 1;
+ return i != qir_get_tex_uniform_src(inst);
return true;
}
diff --git a/src/gallium/drivers/vc4/vc4_qir_schedule.c b/src/gallium/drivers/vc4/vc4_qir_schedule.c
index c1a2db5e3c5..a8ef189e583 100644
--- a/src/gallium/drivers/vc4/vc4_qir_schedule.c
+++ b/src/gallium/drivers/vc4/vc4_qir_schedule.c
@@ -212,18 +212,6 @@ calculate_deps(struct schedule_setup_state *state, struct schedule_node *n)
add_dep(dir, state->last_vary_read, n);
break;
- case QOP_TEX_S:
- case QOP_TEX_T:
- case QOP_TEX_R:
- case QOP_TEX_B:
- case QOP_TEX_DIRECT:
- /* Texturing setup gets scheduled in order, because
- * the uniforms referenced by them have to land in a
- * specific order.
- */
- add_write_dep(dir, &state->last_tex_coord, n);
- break;
-
case QOP_TEX_RESULT:
/* Results have to be fetched in order. */
add_write_dep(dir, &state->last_tex_result, n);
@@ -278,6 +266,18 @@ calculate_deps(struct schedule_setup_state *state, struct schedule_node *n)
add_write_dep(dir, &state->last_tlb, n);
break;
+ case QFILE_TEX_S_DIRECT:
+ case QFILE_TEX_S:
+ case QFILE_TEX_T:
+ case QFILE_TEX_R:
+ case QFILE_TEX_B:
+ /* Texturing setup gets scheduled in order, because
+ * the uniforms referenced by them have to land in a
+ * specific order.
+ */
+ add_write_dep(dir, &state->last_tex_coord, n);
+ break;
+
default:
break;
}
@@ -315,12 +315,12 @@ calculate_forward_deps(struct vc4_compile *c, void *mem_ctx,
}
}
- switch (inst->op) {
- case QOP_TEX_S:
- case QOP_TEX_T:
- case QOP_TEX_R:
- case QOP_TEX_B:
- case QOP_TEX_DIRECT:
+ switch (inst->dst.file) {
+ case QFILE_TEX_S_DIRECT:
+ case QFILE_TEX_S:
+ case QFILE_TEX_T:
+ case QFILE_TEX_R:
+ case QFILE_TEX_B:
/* From the VC4 spec:
*
* "The TFREQ input FIFO holds two full lots of s,
@@ -364,8 +364,8 @@ calculate_forward_deps(struct vc4_compile *c, void *mem_ctx,
* If the texture result fifo is full, block adding
* any more to it until the last QOP_TEX_RESULT.
*/
- if (inst->op == QOP_TEX_S ||
- inst->op == QOP_TEX_DIRECT) {
+ if (inst->dst.file == QFILE_TEX_S ||
+ inst->dst.file == QFILE_TEX_S_DIRECT) {
if (state.tfrcv_count ==
(c->fs_threaded ? 2 : 4))
block_until_tex_result(&state, n);
@@ -376,6 +376,11 @@ calculate_forward_deps(struct vc4_compile *c, void *mem_ctx,
state.tfreq_count++;
break;
+ default:
+ break;
+ }
+
+ switch (inst->op) {
case QOP_TEX_RESULT:
/* Results have to be fetched after the
* coordinate setup. Note that we're assuming
@@ -398,7 +403,6 @@ calculate_forward_deps(struct vc4_compile *c, void *mem_ctx,
break;
default:
- assert(!qir_is_tex(inst));
break;
}
}
@@ -560,8 +564,8 @@ dump_state(struct vc4_compile *c, struct schedule_state *state)
static uint32_t
latency_between(struct schedule_node *before, struct schedule_node *after)
{
- if ((before->inst->op == QOP_TEX_S ||
- before->inst->op == QOP_TEX_DIRECT) &&
+ if ((before->inst->dst.file == QFILE_TEX_S ||
+ before->inst->dst.file == QFILE_TEX_S_DIRECT) &&
after->inst->op == QOP_TEX_RESULT)
return 100;
diff --git a/src/gallium/drivers/vc4/vc4_qir_validate.c b/src/gallium/drivers/vc4/vc4_qir_validate.c
index 9579f7a15cb..302eb48265c 100644
--- a/src/gallium/drivers/vc4/vc4_qir_validate.c
+++ b/src/gallium/drivers/vc4/vc4_qir_validate.c
@@ -84,6 +84,25 @@ void qir_validate(struct vc4_compile *c)
case QFILE_LOAD_IMM:
fail_instr(c, inst, "Bad dest file");
break;
+
+ case QFILE_TEX_S:
+ case QFILE_TEX_T:
+ case QFILE_TEX_R:
+ case QFILE_TEX_B:
+ if (inst->src[qir_get_tex_uniform_src(inst)].file !=
+ QFILE_UNIF) {
+ fail_instr(c, inst,
+ "tex op missing implicit uniform");
+ }
+ break;
+
+ case QFILE_TEX_S_DIRECT:
+ if (inst->op != QOP_ADD) {
+ fail_instr(c, inst,
+ "kernel validation requires that "
+ "direct texture lookups use an ADD");
+ }
+ break;
}
for (int i = 0; i < qir_get_nsrc(inst); i++) {
@@ -119,6 +138,11 @@ void qir_validate(struct vc4_compile *c)
case QFILE_TLB_COLOR_WRITE_MS:
case QFILE_TLB_Z_WRITE:
case QFILE_TLB_STENCIL_SETUP:
+ case QFILE_TEX_S_DIRECT:
+ case QFILE_TEX_S:
+ case QFILE_TEX_T:
+ case QFILE_TEX_R:
+ case QFILE_TEX_B:
fail_instr(c, inst, "Bad src file");
break;
}
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index 9d9e5d84ecd..47fc0b0928b 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -347,6 +347,11 @@ vc4_generate_code_block(struct vc4_compile *c,
case QFILE_TLB_COLOR_WRITE_MS:
case QFILE_TLB_Z_WRITE:
case QFILE_TLB_STENCIL_SETUP:
+ case QFILE_TEX_S:
+ case QFILE_TEX_S_DIRECT:
+ case QFILE_TEX_T:
+ case QFILE_TEX_R:
+ case QFILE_TEX_B:
unreachable("bad qir src file");
}
}
@@ -379,6 +384,23 @@ vc4_generate_code_block(struct vc4_compile *c,
dst = qpu_ra(QPU_W_TLB_STENCIL_SETUP);
break;
+ case QFILE_TEX_S:
+ case QFILE_TEX_S_DIRECT:
+ dst = qpu_rb(QPU_W_TMU0_S);
+ break;
+
+ case QFILE_TEX_T:
+ dst = qpu_rb(QPU_W_TMU0_T);
+ break;
+
+ case QFILE_TEX_R:
+ dst = qpu_rb(QPU_W_TMU0_R);
+ break;
+
+ case QFILE_TEX_B:
+ dst = qpu_rb(QPU_W_TMU0_B);
+ break;
+
case QFILE_VARY:
case QFILE_UNIF:
case QFILE_SMALL_IMM:
@@ -477,21 +499,6 @@ vc4_generate_code_block(struct vc4_compile *c,
queue(block, qpu_a_FADD(dst, src[0], qpu_r5()) | unpack);
break;
- case QOP_TEX_S:
- case QOP_TEX_T:
- case QOP_TEX_R:
- case QOP_TEX_B:
- queue(block, qpu_a_MOV(qpu_rb(QPU_W_TMU0_S +
- (qinst->op - QOP_TEX_S)),
- src[0]) | unpack);
- break;
-
- case QOP_TEX_DIRECT:
- fixup_raddr_conflict(block, dst, &src[0], &src[1],
- qinst, &unpack);
- queue(block, qpu_a_ADD(qpu_rb(QPU_W_TMU0_S),
- src[0], src[1]) | unpack);
- break;
case QOP_TEX_RESULT:
queue(block, qpu_NOP());
@@ -538,7 +545,7 @@ vc4_generate_code_block(struct vc4_compile *c,
* argument slot as well so that we don't take up
* another raddr just to get unused data.
*/
- if (qir_get_nsrc(qinst) == 1)
+ if (qir_get_non_sideband_nsrc(qinst) == 1)
src[1] = src[0];
fixup_raddr_conflict(block, dst, &src[0], &src[1],