diff options
Diffstat (limited to 'src/gallium/drivers/vc4')
-rw-r--r-- | src/gallium/drivers/vc4/Automake.inc | 4 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/Makefile.am | 1 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_nir_lower_blend.c | 4 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_nir_lower_io.c | 7 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_opt_algebraic.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_program.c | 6 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_qir.c | 8 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_qir.h | 8 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c | 56 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_qpu_emit.c | 4 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_reorder_uniforms.c | 26 |
11 files changed, 86 insertions, 40 deletions
diff --git a/src/gallium/drivers/vc4/Automake.inc b/src/gallium/drivers/vc4/Automake.inc index 6fa3e190cac..5664c2ab14e 100644 --- a/src/gallium/drivers/vc4/Automake.inc +++ b/src/gallium/drivers/vc4/Automake.inc @@ -6,8 +6,4 @@ TARGET_LIB_DEPS += \ $(top_builddir)/src/gallium/winsys/vc4/drm/libvc4drm.la \ $(top_builddir)/src/gallium/drivers/vc4/libvc4.la -if USE_VC4_SIMULATOR -TARGET_CPPFLAGS += -DUSE_VC4_SIMULATOR -endif - endif diff --git a/src/gallium/drivers/vc4/Makefile.am b/src/gallium/drivers/vc4/Makefile.am index f4a57ba3404..a3bf72fc72a 100644 --- a/src/gallium/drivers/vc4/Makefile.am +++ b/src/gallium/drivers/vc4/Makefile.am @@ -23,7 +23,6 @@ include Makefile.sources include $(top_srcdir)/src/gallium/Automake.inc if USE_VC4_SIMULATOR -SIM_CFLAGS = -DUSE_VC4_SIMULATOR=1 SIM_LDFLAGS = -lsimpenrose endif diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c index 373c9e12d11..0672a92226f 100644 --- a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c +++ b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c @@ -509,8 +509,8 @@ vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b, nir_ssa_def *dst_vec4 = nir_unpack_unorm_4x8(b, packed_dst_color); nir_ssa_def *src_color[4], *unpacked_dst_color[4]; for (unsigned i = 0; i < 4; i++) { - src_color[i] = nir_swizzle(b, intr->src[0].ssa, &i, 1, false); - unpacked_dst_color[i] = nir_swizzle(b, dst_vec4, &i, 1, false); + src_color[i] = nir_channel(b, intr->src[0].ssa, i); + unpacked_dst_color[i] = nir_channel(b, dst_vec4, i); } vc4_nir_emit_alpha_test_discard(c, b, src_color[3]); diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_io.c b/src/gallium/drivers/vc4/vc4_nir_lower_io.c index 7ea263afb68..1afe52a63f4 100644 --- a/src/gallium/drivers/vc4/vc4_nir_lower_io.c +++ b/src/gallium/drivers/vc4/vc4_nir_lower_io.c @@ -84,7 +84,7 @@ vc4_nir_unpack_16u(nir_builder *b, nir_ssa_def *src, unsigned chan) static nir_ssa_def * vc4_nir_unpack_8f(nir_builder *b, nir_ssa_def *src, unsigned chan) { - return nir_swizzle(b, nir_unpack_unorm_4x8(b, src), &chan, 1, false); + return nir_channel(b, nir_unpack_unorm_4x8(b, src), chan); } static nir_ssa_def * @@ -326,9 +326,8 @@ vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b, intr_comp->const_index[0] = intr->const_index[0] * 4 + i; assert(intr->src[0].is_ssa); - intr_comp->src[0] = nir_src_for_ssa(nir_swizzle(b, - intr->src[0].ssa, - &i, 1, false)); + intr_comp->src[0] = + nir_src_for_ssa(nir_channel(b, intr->src[0].ssa, i)); nir_builder_instr_insert(b, &intr_comp->instr); } diff --git a/src/gallium/drivers/vc4/vc4_opt_algebraic.c b/src/gallium/drivers/vc4/vc4_opt_algebraic.c index f1bab810eff..07a92266dd2 100644 --- a/src/gallium/drivers/vc4/vc4_opt_algebraic.c +++ b/src/gallium/drivers/vc4/vc4_opt_algebraic.c @@ -144,6 +144,8 @@ qir_opt_algebraic(struct vc4_compile *c) case QOP_SEL_X_Y_ZC: case QOP_SEL_X_Y_NS: case QOP_SEL_X_Y_NC: + case QOP_SEL_X_Y_CS: + case QOP_SEL_X_Y_CC: if (is_zero(c, inst->src[1])) { /* Replace references to a 0 uniform value * with the SEL_X_0 equivalent. diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index a48dad804e2..197577b6c20 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -987,6 +987,10 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr) qir_SF(c, qir_SUB(c, src[0], src[1])); *dest = qir_SEL_X_0_NC(c, qir_uniform_ui(c, ~0)); break; + case nir_op_uge: + qir_SF(c, qir_SUB(c, src[0], src[1])); + *dest = qir_SEL_X_0_CC(c, qir_uniform_ui(c, ~0)); + break; case nir_op_ilt: qir_SF(c, qir_SUB(c, src[0], src[1])); *dest = qir_SEL_X_0_NS(c, qir_uniform_ui(c, ~0)); @@ -1167,7 +1171,7 @@ emit_point_size_write(struct vc4_compile *c) struct qreg point_size; if (c->output_point_size_index != -1) - point_size = c->outputs[c->output_point_size_index + 3]; + point_size = c->outputs[c->output_point_size_index]; else point_size = qir_uniform_f(c, 1.0); diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c index 7894b081b19..f2855e159fc 100644 --- a/src/gallium/drivers/vc4/vc4_qir.c +++ b/src/gallium/drivers/vc4/vc4_qir.c @@ -69,10 +69,14 @@ static const struct qir_op_info qir_op_info[] = { [QOP_SEL_X_0_NC] = { "fsel_x_0_nc", 1, 1, false, true }, [QOP_SEL_X_0_ZS] = { "fsel_x_0_zs", 1, 1, false, true }, [QOP_SEL_X_0_ZC] = { "fsel_x_0_zc", 1, 1, false, true }, + [QOP_SEL_X_0_CS] = { "fsel_x_0_cs", 1, 1, false, true }, + [QOP_SEL_X_0_CC] = { "fsel_x_0_cc", 1, 1, false, true }, [QOP_SEL_X_Y_NS] = { "fsel_x_y_ns", 1, 2, false, true }, [QOP_SEL_X_Y_NC] = { "fsel_x_y_nc", 1, 2, false, true }, [QOP_SEL_X_Y_ZS] = { "fsel_x_y_zs", 1, 2, false, true }, [QOP_SEL_X_Y_ZC] = { "fsel_x_y_zc", 1, 2, false, true }, + [QOP_SEL_X_Y_CS] = { "fsel_x_y_cs", 1, 2, false, true }, + [QOP_SEL_X_Y_CC] = { "fsel_x_y_cc", 1, 2, false, true }, [QOP_RCP] = { "rcp", 1, 1, false, true }, [QOP_RSQ] = { "rsq", 1, 1, false, true }, @@ -218,10 +222,14 @@ qir_depends_on_flags(struct qinst *inst) case QOP_SEL_X_0_NC: case QOP_SEL_X_0_ZS: case QOP_SEL_X_0_ZC: + case QOP_SEL_X_0_CS: + case QOP_SEL_X_0_CC: case QOP_SEL_X_Y_NS: case QOP_SEL_X_Y_NC: case QOP_SEL_X_Y_ZS: case QOP_SEL_X_Y_ZC: + case QOP_SEL_X_Y_CS: + case QOP_SEL_X_Y_CC: return true; default: return false; diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index a92ad93ee07..ddb35e41fcf 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -99,11 +99,15 @@ enum qop { QOP_SEL_X_0_ZC, QOP_SEL_X_0_NS, QOP_SEL_X_0_NC, + QOP_SEL_X_0_CS, + QOP_SEL_X_0_CC, /* Selects the src[0] if the ns flag bit is set, otherwise src[1]. */ QOP_SEL_X_Y_ZS, QOP_SEL_X_Y_ZC, QOP_SEL_X_Y_NS, QOP_SEL_X_Y_NC, + QOP_SEL_X_Y_CS, + QOP_SEL_X_Y_CC, QOP_FTOI, QOP_ITOF, @@ -567,10 +571,14 @@ QIR_ALU1(SEL_X_0_ZS) QIR_ALU1(SEL_X_0_ZC) QIR_ALU1(SEL_X_0_NS) QIR_ALU1(SEL_X_0_NC) +QIR_ALU1(SEL_X_0_CS) +QIR_ALU1(SEL_X_0_CC) QIR_ALU2(SEL_X_Y_ZS) QIR_ALU2(SEL_X_Y_ZC) QIR_ALU2(SEL_X_Y_NS) QIR_ALU2(SEL_X_Y_NC) +QIR_ALU2(SEL_X_Y_CS) +QIR_ALU2(SEL_X_Y_CC) QIR_ALU2(FMIN) QIR_ALU2(FMAX) QIR_ALU2(FMINABS) diff --git a/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c b/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c index f087c3b81b5..a57e100593c 100644 --- a/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c +++ b/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c @@ -22,14 +22,10 @@ */ /** - * @file vc4_opt_algebraic.c + * @file vc4_qir_lower_uniforms.c * - * This is the optimization pass for miscellaneous changes to instructions - * where we can simplify the operation by some knowledge about the specific - * operations. - * - * Mostly this will be a matter of turning things into MOVs so that they can - * later be copy-propagated out. + * This is the pre-code-generation pass for fixing up instructions that try to + * read from multiple uniform values. */ #include "vc4_qir.h" @@ -85,6 +81,33 @@ is_lowerable_uniform(struct qinst *inst, int i) return true; } +/* Returns the number of different uniform values referenced by the + * instruction. + */ +static uint32_t +qir_get_instruction_uniform_count(struct qinst *inst) +{ + uint32_t count = 0; + + for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { + if (inst->src[i].file != QFILE_UNIF) + continue; + + bool is_duplicate = false; + for (int j = 0; j < i; j++) { + if (inst->src[j].file == QFILE_UNIF && + inst->src[j].index == inst->src[i].index) { + is_duplicate = true; + break; + } + } + if (!is_duplicate) + count++; + } + + return count; +} + void qir_lower_uniforms(struct vc4_compile *c) { @@ -98,13 +121,7 @@ qir_lower_uniforms(struct vc4_compile *c) list_for_each_entry(struct qinst, inst, &c->instructions, link) { uint32_t nsrc = qir_get_op_nsrc(inst->op); - uint32_t count = 0; - for (int i = 0; i < nsrc; i++) { - if (inst->src[i].file == QFILE_UNIF) - count++; - } - - if (count <= 1) + if (qir_get_instruction_uniform_count(inst) <= 1) continue; for (int i = 0; i < nsrc; i++) { @@ -140,23 +157,22 @@ qir_lower_uniforms(struct vc4_compile *c) list_for_each_entry(struct qinst, inst, &c->instructions, link) { uint32_t nsrc = qir_get_op_nsrc(inst->op); - uint32_t count = 0; - for (int i = 0; i < nsrc; i++) { - if (inst->src[i].file == QFILE_UNIF) - count++; - } + uint32_t count = qir_get_instruction_uniform_count(inst); if (count <= 1) continue; + bool removed = false; for (int i = 0; i < nsrc; i++) { if (is_lowerable_uniform(inst, i) && inst->src[i].index == max_index) { inst->src[i] = temp; remove_uniform(ht, unif); - count--; + removed = true; } } + if (removed) + count--; /* If the instruction doesn't need lowering any more, * then drop it from the list. diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index 133e1385178..e0d3633da42 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -311,6 +311,8 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) case QOP_SEL_X_0_ZC: case QOP_SEL_X_0_NS: case QOP_SEL_X_0_NC: + case QOP_SEL_X_0_CS: + case QOP_SEL_X_0_CC: queue(c, qpu_a_MOV(dst, src[0]) | unpack); set_last_cond_add(c, qinst->op - QOP_SEL_X_0_ZS + QPU_COND_ZS); @@ -324,6 +326,8 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) case QOP_SEL_X_Y_ZC: case QOP_SEL_X_Y_NS: case QOP_SEL_X_Y_NC: + case QOP_SEL_X_Y_CS: + case QOP_SEL_X_Y_CC: queue(c, qpu_a_MOV(dst, src[0])); if (qinst->src[0].pack) *(last_inst(c)) |= unpack; diff --git a/src/gallium/drivers/vc4/vc4_reorder_uniforms.c b/src/gallium/drivers/vc4/vc4_reorder_uniforms.c index 7f11fba2340..85a0c95e851 100644 --- a/src/gallium/drivers/vc4/vc4_reorder_uniforms.c +++ b/src/gallium/drivers/vc4/vc4_reorder_uniforms.c @@ -44,18 +44,28 @@ qir_reorder_uniforms(struct vc4_compile *c) uint32_t next_uniform = 0; list_for_each_entry(struct qinst, inst, &c->instructions, link) { + uint32_t new = ~0; + for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { if (inst->src[i].file != QFILE_UNIF) continue; - uint32_t new = next_uniform++; - if (uniform_index_size <= new) { - uniform_index_size = - MAX2(uniform_index_size * 2, 16); - uniform_index = - realloc(uniform_index, - uniform_index_size * - sizeof(uint32_t)); + if (new == ~0) { + new = next_uniform++; + if (uniform_index_size <= new) { + uniform_index_size = + MAX2(uniform_index_size * 2, 16); + uniform_index = + realloc(uniform_index, + uniform_index_size * + sizeof(uint32_t)); + } + } else { + /* If we've got two uniform references in this + * instruction, they need to be the same + * uniform value. + */ + assert(inst->src[i].index == uniform_index[new]); } uniform_index[new] = inst->src[i].index; |