summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/vc4/vc4_nir_lower_blend.c132
-rw-r--r--src/gallium/drivers/vc4/vc4_nir_lower_io.c4
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c48
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.c1
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.h12
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu.h11
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_emit.c7
7 files changed, 191 insertions, 24 deletions
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
index 0672a92226f..38676cff6b7 100644
--- a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
@@ -29,6 +29,10 @@
* from the tile buffer after having waited for the scoreboard (which is
* handled by vc4_qpu_emit.c), then do math using your output color and that
* destination value, and update the output color appropriately.
+ *
+ * Once this pass is done, the color write will either have one component (for
+ * single sample) with packed argb8888, or 4 components with the per-sample
+ * argb8888 result.
*/
/**
@@ -40,15 +44,23 @@
#include "glsl/nir/nir_builder.h"
#include "vc4_context.h"
+static bool
+blend_depends_on_dst_color(struct vc4_compile *c)
+{
+ return (c->fs_key->blend.blend_enable ||
+ c->fs_key->blend.colormask != 0xf ||
+ c->fs_key->logicop_func != PIPE_LOGICOP_COPY);
+}
+
/** Emits a load of the previous fragment color from the tile buffer. */
static nir_ssa_def *
-vc4_nir_get_dst_color(nir_builder *b)
+vc4_nir_get_dst_color(nir_builder *b, int sample)
{
nir_intrinsic_instr *load =
nir_intrinsic_instr_create(b->shader,
nir_intrinsic_load_input);
load->num_components = 1;
- load->const_index[0] = VC4_NIR_TLB_COLOR_READ_INPUT;
+ load->const_index[0] = VC4_NIR_TLB_COLOR_READ_INPUT + sample;
nir_ssa_dest_init(&load->instr, &load->dest, 1, NULL);
nir_builder_instr_insert(b, &load->instr);
return &load->dest.ssa;
@@ -496,23 +508,26 @@ vc4_nir_swizzle_and_pack(struct vc4_compile *c, nir_builder *b,
}
-static void
-vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
- nir_intrinsic_instr *intr)
+static nir_ssa_def *
+vc4_nir_blend_pipeline(struct vc4_compile *c, nir_builder *b, nir_ssa_def *src,
+ int sample)
{
enum pipe_format color_format = c->fs_key->color_format;
const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
bool srgb = util_format_is_srgb(color_format);
/* Pull out the float src/dst color components. */
- nir_ssa_def *packed_dst_color = vc4_nir_get_dst_color(b);
+ nir_ssa_def *packed_dst_color = vc4_nir_get_dst_color(b, sample);
nir_ssa_def *dst_vec4 = nir_unpack_unorm_4x8(b, packed_dst_color);
nir_ssa_def *src_color[4], *unpacked_dst_color[4];
for (unsigned i = 0; i < 4; i++) {
- src_color[i] = nir_channel(b, intr->src[0].ssa, i);
+ src_color[i] = nir_channel(b, src, i);
unpacked_dst_color[i] = nir_channel(b, dst_vec4, i);
}
+ if (c->fs_key->sample_alpha_to_one && c->fs_key->msaa)
+ src_color[3] = nir_imm_float(b, 1.0);
+
vc4_nir_emit_alpha_test_discard(c, b, src_color[3]);
nir_ssa_def *packed_color;
@@ -560,16 +575,101 @@ vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
colormask &= ~(0xff << (i * 8));
}
}
- packed_color = nir_ior(b,
- nir_iand(b, packed_color,
- nir_imm_int(b, colormask)),
- nir_iand(b, packed_dst_color,
- nir_imm_int(b, ~colormask)));
- /* Turn the old vec4 output into a store of the packed color. */
- nir_instr_rewrite_src(&intr->instr, &intr->src[0],
- nir_src_for_ssa(packed_color));
+ return nir_ior(b,
+ nir_iand(b, packed_color,
+ nir_imm_int(b, colormask)),
+ nir_iand(b, packed_dst_color,
+ nir_imm_int(b, ~colormask)));
+}
+
+static int
+vc4_nir_next_output_driver_location(nir_shader *s)
+{
+ int maxloc = -1;
+
+ nir_foreach_variable(var, &s->inputs)
+ maxloc = MAX2(maxloc, var->data.driver_location);
+
+ return maxloc;
+}
+
+static void
+vc4_nir_store_sample_mask(struct vc4_compile *c, nir_builder *b,
+ nir_ssa_def *val)
+{
+ nir_variable *sample_mask = nir_variable_create(c->s, nir_var_shader_out,
+ glsl_uint_type(),
+ "sample_mask");
+ sample_mask->data.driver_location =
+ vc4_nir_next_output_driver_location(c->s);
+ sample_mask->data.location = FRAG_RESULT_SAMPLE_MASK;
+ exec_list_push_tail(&c->s->outputs, &sample_mask->node);
+
+ nir_intrinsic_instr *intr =
+ nir_intrinsic_instr_create(c->s, nir_intrinsic_store_output);
intr->num_components = 1;
+ intr->const_index[0] = sample_mask->data.location;
+
+ intr->src[0] = nir_src_for_ssa(val);
+ nir_builder_instr_insert(b, &intr->instr);
+}
+
+static void
+vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
+ nir_intrinsic_instr *intr)
+{
+ nir_ssa_def *frag_color = intr->src[0].ssa;
+
+ if (c->fs_key->sample_coverage) {
+ nir_intrinsic_instr *load =
+ nir_intrinsic_instr_create(b->shader,
+ nir_intrinsic_load_sample_mask_in);
+ load->num_components = 1;
+ nir_ssa_dest_init(&load->instr, &load->dest, 1, NULL);
+ nir_builder_instr_insert(b, &load->instr);
+
+ nir_ssa_def *bitmask = &load->dest.ssa;
+
+ vc4_nir_store_sample_mask(c, b, bitmask);
+ } else if (c->fs_key->sample_alpha_to_coverage) {
+ nir_ssa_def *a = nir_channel(b, frag_color, 3);
+
+ /* XXX: We should do a nice dither based on the fragment
+ * coordinate, instead.
+ */
+ nir_ssa_def *num_samples = nir_imm_float(b, VC4_MAX_SAMPLES);
+ nir_ssa_def *num_bits = nir_f2i(b, nir_fmul(b, a, num_samples));
+ nir_ssa_def *bitmask = nir_isub(b,
+ nir_ishl(b,
+ nir_imm_int(b, 1),
+ num_bits),
+ nir_imm_int(b, 1));
+ vc4_nir_store_sample_mask(c, b, bitmask);
+ }
+
+ /* The TLB color read returns each sample in turn, so if our blending
+ * depends on the destination color, we're going to have to run the
+ * blending function separately for each destination sample value, and
+ * then output the per-sample color using TLB_COLOR_MS.
+ */
+ nir_ssa_def *blend_output;
+ if (c->fs_key->msaa && blend_depends_on_dst_color(c)) {
+ c->msaa_per_sample_output = true;
+
+ nir_ssa_def *samples[4];
+ for (int i = 0; i < VC4_MAX_SAMPLES; i++)
+ samples[i] = vc4_nir_blend_pipeline(c, b, frag_color, i);
+ blend_output = nir_vec4(b,
+ samples[0], samples[1],
+ samples[2], samples[3]);
+ } else {
+ blend_output = vc4_nir_blend_pipeline(c, b, frag_color, 0);
+ }
+
+ nir_instr_rewrite_src(&intr->instr, &intr->src[0],
+ nir_src_for_ssa(blend_output));
+ intr->num_components = blend_output->num_components;
}
static bool
@@ -577,7 +677,7 @@ vc4_nir_lower_blend_block(nir_block *block, void *state)
{
struct vc4_compile *c = state;
- nir_foreach_instr(block, instr) {
+ nir_foreach_instr_safe(block, instr) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_io.c b/src/gallium/drivers/vc4/vc4_nir_lower_io.c
index 1afe52a63f4..72a514756fd 100644
--- a/src/gallium/drivers/vc4/vc4_nir_lower_io.c
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_io.c
@@ -226,7 +226,9 @@ vc4_nir_lower_fs_input(struct vc4_compile *c, nir_builder *b,
{
b->cursor = nir_before_instr(&intr->instr);
- if (intr->const_index[0] == VC4_NIR_TLB_COLOR_READ_INPUT) {
+ if (intr->const_index[0] >= VC4_NIR_TLB_COLOR_READ_INPUT &&
+ intr->const_index[0] < (VC4_NIR_TLB_COLOR_READ_INPUT +
+ VC4_MAX_SAMPLES)) {
/* This doesn't need any lowering. */
return;
}
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 081adfd185c..dda2d84b5b3 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -1124,7 +1124,12 @@ emit_frag_end(struct vc4_compile *c)
qir_TLB_Z_WRITE(c, z);
}
- qir_TLB_COLOR_WRITE(c, color);
+ if (!c->msaa_per_sample_output) {
+ qir_TLB_COLOR_WRITE(c, color);
+ } else {
+ for (int i = 0; i < VC4_MAX_SAMPLES; i++)
+ qir_TLB_COLOR_WRITE_MS(c, c->sample_colors[i]);
+ }
}
static void
@@ -1475,18 +1480,42 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
case nir_intrinsic_load_input:
assert(instr->num_components == 1);
- if (instr->const_index[0] == VC4_NIR_TLB_COLOR_READ_INPUT) {
- *dest = qir_TLB_COLOR_READ(c);
+ if (instr->const_index[0] >= VC4_NIR_TLB_COLOR_READ_INPUT) {
+ /* Reads of the per-sample color need to be done in
+ * order.
+ */
+ int sample_index = (instr->const_index[0] -
+ VC4_NIR_TLB_COLOR_READ_INPUT);
+ for (int i = 0; i <= sample_index; i++) {
+ if (c->color_reads[i].file == QFILE_NULL) {
+ c->color_reads[i] =
+ qir_TLB_COLOR_READ(c);
+ }
+ }
+ *dest = c->color_reads[sample_index];
} else {
*dest = c->inputs[instr->const_index[0]];
}
break;
case nir_intrinsic_store_output:
- assert(instr->num_components == 1);
- c->outputs[instr->const_index[0]] =
- qir_MOV(c, ntq_get_src(c, instr->src[0], 0));
- c->num_outputs = MAX2(c->num_outputs, instr->const_index[0] + 1);
+ /* MSAA color outputs are the only case where we have an
+ * output that's not lowered to being a store of a single 32
+ * bit value.
+ */
+ if (c->stage == QSTAGE_FRAG && instr->num_components == 4) {
+ assert(instr->const_index[0] == c->output_color_index);
+ for (int i = 0; i < 4; i++) {
+ c->sample_colors[i] =
+ qir_MOV(c, ntq_get_src(c, instr->src[0],
+ i));
+ }
+ } else {
+ assert(instr->num_components == 1);
+ c->outputs[instr->const_index[0]] =
+ qir_MOV(c, ntq_get_src(c, instr->src[0], 0));
+ c->num_outputs = MAX2(c->num_outputs, instr->const_index[0] + 1);
+ }
break;
case nir_intrinsic_discard:
@@ -1963,6 +1992,11 @@ vc4_update_compiled_fs(struct vc4_context *vc4, uint8_t prim_mode)
} else {
key->logicop_func = PIPE_LOGICOP_COPY;
}
+ key->msaa = vc4->rasterizer->base.multisample;
+ key->sample_coverage = (vc4->rasterizer->base.multisample &&
+ vc4->sample_mask != (1 << VC4_MAX_SAMPLES) - 1);
+ key->sample_alpha_to_coverage = vc4->blend->alpha_to_coverage;
+ key->sample_alpha_to_one = vc4->blend->alpha_to_one;
if (vc4->framebuffer.cbufs[0])
key->color_format = vc4->framebuffer.cbufs[0]->format;
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index 4c6667a9d9f..4ec25310b67 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -86,6 +86,7 @@ static const struct qir_op_info qir_op_info[] = {
[QOP_TLB_STENCIL_SETUP] = { "tlb_stencil_setup", 0, 1, true },
[QOP_TLB_Z_WRITE] = { "tlb_z", 0, 1, true },
[QOP_TLB_COLOR_WRITE] = { "tlb_color", 0, 1, true },
+ [QOP_TLB_COLOR_WRITE_MS] = { "tlb_color_ms", 0, 1, true },
[QOP_TLB_COLOR_READ] = { "tlb_color_read", 1, 0 },
[QOP_MS_MASK] = { "ms_mask", 0, 1, true },
[QOP_VARY_ADD_C] = { "vary_add_c", 1, 1 },
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index 97a23df10c6..4e406d60d72 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -38,6 +38,7 @@
#include "vc4_screen.h"
#include "vc4_qpu_defines.h"
+#include "kernel/vc4_packet.h"
#include "pipe/p_state.h"
struct nir_builder;
@@ -121,6 +122,7 @@ enum qop {
QOP_TLB_STENCIL_SETUP,
QOP_TLB_Z_WRITE,
QOP_TLB_COLOR_WRITE,
+ QOP_TLB_COLOR_WRITE_MS,
QOP_TLB_COLOR_READ,
QOP_MS_MASK,
QOP_VARY_ADD_C,
@@ -306,6 +308,10 @@ struct vc4_fs_key {
bool alpha_test;
bool point_coord_upper_left;
bool light_twoside;
+ bool msaa;
+ bool sample_coverage;
+ bool sample_alpha_to_coverage;
+ bool sample_alpha_to_one;
uint8_t alpha_test_func;
uint8_t logicop_func;
uint32_t point_sprite_mask;
@@ -350,6 +356,9 @@ struct vc4_compile {
*/
struct qreg *inputs;
struct qreg *outputs;
+ bool msaa_per_sample_output;
+ struct qreg color_reads[VC4_MAX_SAMPLES];
+ struct qreg sample_colors[VC4_MAX_SAMPLES];
uint32_t inputs_array_size;
uint32_t outputs_array_size;
uint32_t uniforms_array_size;
@@ -421,6 +430,8 @@ struct vc4_compile {
*/
#define VC4_NIR_TLB_COLOR_READ_INPUT 2000000000
+#define VC4_NIR_MS_MASK_OUTPUT 2000000000
+
/* Special offset for nir_load_uniform values to get a QUNIFORM_*
* state-dependent value.
*/
@@ -619,6 +630,7 @@ QIR_ALU0(FRAG_REV_FLAG)
QIR_ALU0(TEX_RESULT)
QIR_ALU0(TLB_COLOR_READ)
QIR_NODST_1(TLB_COLOR_WRITE)
+QIR_NODST_1(TLB_COLOR_WRITE_MS)
QIR_NODST_1(TLB_Z_WRITE)
QIR_NODST_1(TLB_DISCARD_SETUP)
QIR_NODST_1(TLB_STENCIL_SETUP)
diff --git a/src/gallium/drivers/vc4/vc4_qpu.h b/src/gallium/drivers/vc4/vc4_qpu.h
index 866ca5c1300..7c4ff1701ca 100644
--- a/src/gallium/drivers/vc4/vc4_qpu.h
+++ b/src/gallium/drivers/vc4/vc4_qpu.h
@@ -116,6 +116,17 @@ qpu_tlbc()
return r;
}
+static inline struct qpu_reg
+qpu_tlbc_ms()
+{
+ struct qpu_reg r = {
+ QPU_MUX_A,
+ QPU_W_TLB_COLOR_MS,
+ };
+
+ return r;
+}
+
static inline struct qpu_reg qpu_r0(void) { return qpu_rn(0); }
static inline struct qpu_reg qpu_r1(void) { return qpu_rn(1); }
static inline struct qpu_reg qpu_r2(void) { return qpu_rn(2); }
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index a3d1627156f..5800e520068 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -438,6 +438,13 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
}
break;
+ case QOP_TLB_COLOR_WRITE_MS:
+ queue(c, qpu_a_MOV(qpu_tlbc_ms(), src[0]));
+ if (discard) {
+ set_last_cond_add(c, QPU_COND_ZS);
+ }
+ break;
+
case QOP_VARY_ADD_C:
queue(c, qpu_a_FADD(dst, src[0], qpu_r5()) | unpack);
break;