diff options
-rw-r--r-- | src/gallium/drivers/vc4/vc4_context.h | 5 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_nir_lower_blend.c | 286 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_qir.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_state.c | 4 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_uniforms.c | 30 |
5 files changed, 276 insertions, 51 deletions
diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index 7a758f8545f..86f2ce5e608 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -291,7 +291,10 @@ struct vc4_context { struct vc4_vertex_stateobj *vtx; - struct pipe_blend_color blend_color; + struct { + struct pipe_blend_color f; + uint8_t ub[4]; + } blend_color; struct pipe_stencil_ref stencil_ref; unsigned sample_mask; struct pipe_framebuffer_state framebuffer; diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c index 17b524653bb..373c9e12d11 100644 --- a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c +++ b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c @@ -86,11 +86,11 @@ vc4_nir_srgb_encode(nir_builder *b, nir_ssa_def *linear) } static nir_ssa_def * -vc4_blend_channel(nir_builder *b, - nir_ssa_def **src, - nir_ssa_def **dst, - unsigned factor, - int channel) +vc4_blend_channel_f(nir_builder *b, + nir_ssa_def **src, + nir_ssa_def **dst, + unsigned factor, + int channel) { switch(factor) { case PIPE_BLENDFACTOR_ONE: @@ -146,8 +146,75 @@ vc4_blend_channel(nir_builder *b, } static nir_ssa_def * -vc4_blend_func(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst, - unsigned func) +vc4_nir_set_packed_chan(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1, + int chan) +{ + unsigned chan_mask = 0xff << (chan * 8); + return nir_ior(b, + nir_iand(b, src0, nir_imm_int(b, ~chan_mask)), + nir_iand(b, src1, nir_imm_int(b, chan_mask))); +} + +static nir_ssa_def * +vc4_blend_channel_i(nir_builder *b, + nir_ssa_def *src, + nir_ssa_def *dst, + nir_ssa_def *src_a, + nir_ssa_def *dst_a, + unsigned factor, + int a_chan) +{ + switch (factor) { + case PIPE_BLENDFACTOR_ONE: + return nir_imm_int(b, ~0); + case PIPE_BLENDFACTOR_SRC_COLOR: + return src; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return src_a; + case PIPE_BLENDFACTOR_DST_ALPHA: + return dst_a; + case PIPE_BLENDFACTOR_DST_COLOR: + return dst; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + return vc4_nir_set_packed_chan(b, + nir_umin_4x8(b, + src_a, + nir_inot(b, dst_a)), + nir_imm_int(b, ~0), + a_chan); + case PIPE_BLENDFACTOR_CONST_COLOR: + return vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_RGBA); + case PIPE_BLENDFACTOR_CONST_ALPHA: + return vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_AAAA); + case PIPE_BLENDFACTOR_ZERO: + return nir_imm_int(b, 0); + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + return nir_inot(b, src); + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + return nir_inot(b, src_a); + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return nir_inot(b, dst_a); + case PIPE_BLENDFACTOR_INV_DST_COLOR: + return nir_inot(b, dst); + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + return nir_inot(b, vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_RGBA)); + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + return nir_inot(b, vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_AAAA)); + + default: + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + /* Unsupported. */ + fprintf(stderr, "Unknown blend factor %d\n", factor); + return nir_imm_int(b, ~0); + } +} + +static nir_ssa_def * +vc4_blend_func_f(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst, + unsigned func) { switch (func) { case PIPE_BLEND_ADD: @@ -169,9 +236,33 @@ vc4_blend_func(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst, } } +static nir_ssa_def * +vc4_blend_func_i(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst, + unsigned func) +{ + switch (func) { + case PIPE_BLEND_ADD: + return nir_usadd_4x8(b, src, dst); + case PIPE_BLEND_SUBTRACT: + return nir_ussub_4x8(b, src, dst); + case PIPE_BLEND_REVERSE_SUBTRACT: + return nir_ussub_4x8(b, dst, src); + case PIPE_BLEND_MIN: + return nir_umin_4x8(b, src, dst); + case PIPE_BLEND_MAX: + return nir_umax_4x8(b, src, dst); + + default: + /* Unsupported. */ + fprintf(stderr, "Unknown blend func %d\n", func); + return src; + + } +} + static void -vc4_do_blending(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result, - nir_ssa_def **src_color, nir_ssa_def **dst_color) +vc4_do_blending_f(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result, + nir_ssa_def **src_color, nir_ssa_def **dst_color) { struct pipe_rt_blend_state *blend = &c->fs_key->blend; @@ -192,20 +283,106 @@ vc4_do_blending(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result, int dst_factor = ((i != 3) ? blend->rgb_dst_factor : blend->alpha_dst_factor); src_blend[i] = nir_fmul(b, src_color[i], - vc4_blend_channel(b, - src_color, dst_color, - src_factor, i)); + vc4_blend_channel_f(b, + src_color, dst_color, + src_factor, i)); dst_blend[i] = nir_fmul(b, dst_color[i], - vc4_blend_channel(b, - src_color, dst_color, - dst_factor, i)); + vc4_blend_channel_f(b, + src_color, dst_color, + dst_factor, i)); } for (int i = 0; i < 4; i++) { - result[i] = vc4_blend_func(b, src_blend[i], dst_blend[i], - ((i != 3) ? blend->rgb_func : - blend->alpha_func)); + result[i] = vc4_blend_func_f(b, src_blend[i], dst_blend[i], + ((i != 3) ? blend->rgb_func : + blend->alpha_func)); + } +} + +static nir_ssa_def * +vc4_nir_splat(nir_builder *b, nir_ssa_def *src) +{ + nir_ssa_def *or1 = nir_ior(b, src, nir_ishl(b, src, nir_imm_int(b, 8))); + return nir_ior(b, or1, nir_ishl(b, or1, nir_imm_int(b, 16))); +} + +static nir_ssa_def * +vc4_do_blending_i(struct vc4_compile *c, nir_builder *b, + nir_ssa_def *src_color, nir_ssa_def *dst_color, + nir_ssa_def *src_float_a) +{ + struct pipe_rt_blend_state *blend = &c->fs_key->blend; + + if (!blend->blend_enable) + return src_color; + + enum pipe_format color_format = c->fs_key->color_format; + const uint8_t *format_swiz = vc4_get_format_swizzle(color_format); + nir_ssa_def *imm_0xff = nir_imm_int(b, 0xff); + nir_ssa_def *src_a = nir_pack_unorm_4x8(b, src_float_a); + nir_ssa_def *dst_a; + int alpha_chan; + for (alpha_chan = 0; alpha_chan < 4; alpha_chan++) { + if (format_swiz[alpha_chan] == 3) + break; + } + if (alpha_chan != 4) { + nir_ssa_def *shift = nir_imm_int(b, alpha_chan * 8); + dst_a = vc4_nir_splat(b, nir_iand(b, nir_ushr(b, dst_color, + shift), imm_0xff)); + } else { + dst_a = nir_imm_int(b, ~0); + } + + nir_ssa_def *src_factor = vc4_blend_channel_i(b, + src_color, dst_color, + src_a, dst_a, + blend->rgb_src_factor, + alpha_chan); + nir_ssa_def *dst_factor = vc4_blend_channel_i(b, + src_color, dst_color, + src_a, dst_a, + blend->rgb_dst_factor, + alpha_chan); + + if (alpha_chan != 4 && + blend->alpha_src_factor != blend->rgb_src_factor) { + nir_ssa_def *src_alpha_factor = + vc4_blend_channel_i(b, + src_color, dst_color, + src_a, dst_a, + blend->alpha_src_factor, + alpha_chan); + src_factor = vc4_nir_set_packed_chan(b, src_factor, + src_alpha_factor, + alpha_chan); + } + if (alpha_chan != 4 && + blend->alpha_dst_factor != blend->rgb_dst_factor) { + nir_ssa_def *dst_alpha_factor = + vc4_blend_channel_i(b, + src_color, dst_color, + src_a, dst_a, + blend->alpha_dst_factor, + alpha_chan); + dst_factor = vc4_nir_set_packed_chan(b, dst_factor, + dst_alpha_factor, + alpha_chan); + } + nir_ssa_def *src_blend = nir_umul_unorm_4x8(b, src_color, src_factor); + nir_ssa_def *dst_blend = nir_umul_unorm_4x8(b, dst_color, dst_factor); + + nir_ssa_def *result = + vc4_blend_func_i(b, src_blend, dst_blend, blend->rgb_func); + if (alpha_chan != 4 && blend->alpha_func != blend->rgb_func) { + nir_ssa_def *result_a = vc4_blend_func_i(b, + src_blend, + dst_blend, + blend->alpha_func); + result = vc4_nir_set_packed_chan(b, result, result_a, + alpha_chan); } + return result; } static nir_ssa_def * @@ -299,12 +476,33 @@ vc4_nir_emit_alpha_test_discard(struct vc4_compile *c, nir_builder *b, nir_builder_instr_insert(b, &discard->instr); } +static nir_ssa_def * +vc4_nir_swizzle_and_pack(struct vc4_compile *c, nir_builder *b, + nir_ssa_def **colors) +{ + enum pipe_format color_format = c->fs_key->color_format; + const uint8_t *format_swiz = vc4_get_format_swizzle(color_format); + + nir_ssa_def *swizzled[4]; + for (int i = 0; i < 4; i++) { + swizzled[i] = vc4_nir_get_swizzled_channel(b, colors, + format_swiz[i]); + } + + return nir_pack_unorm_4x8(b, + nir_vec4(b, + swizzled[0], swizzled[1], + swizzled[2], swizzled[3])); + +} + static void vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b, nir_intrinsic_instr *intr) { enum pipe_format color_format = c->fs_key->color_format; const uint8_t *format_swiz = vc4_get_format_swizzle(color_format); + bool srgb = util_format_is_srgb(color_format); /* Pull out the float src/dst color components. */ nir_ssa_def *packed_dst_color = vc4_nir_get_dst_color(b); @@ -315,45 +513,39 @@ vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b, unpacked_dst_color[i] = nir_swizzle(b, dst_vec4, &i, 1, false); } - /* Unswizzle the destination color. */ - nir_ssa_def *dst_color[4]; - for (unsigned i = 0; i < 4; i++) { - dst_color[i] = vc4_nir_get_swizzled_channel(b, - unpacked_dst_color, - format_swiz[i]); - } - vc4_nir_emit_alpha_test_discard(c, b, src_color[3]); - /* Turn dst color to linear. */ - if (util_format_is_srgb(color_format)) { + nir_ssa_def *packed_color; + if (srgb) { + /* Unswizzle the destination color. */ + nir_ssa_def *dst_color[4]; + for (unsigned i = 0; i < 4; i++) { + dst_color[i] = vc4_nir_get_swizzled_channel(b, + unpacked_dst_color, + format_swiz[i]); + } + + /* Turn dst color to linear. */ for (int i = 0; i < 3; i++) dst_color[i] = vc4_nir_srgb_decode(b, dst_color[i]); - } - nir_ssa_def *blend_color[4]; - vc4_do_blending(c, b, blend_color, src_color, dst_color); + nir_ssa_def *blend_color[4]; + vc4_do_blending_f(c, b, blend_color, src_color, dst_color); - /* sRGB encode the output color */ - if (util_format_is_srgb(color_format)) { + /* sRGB encode the output color */ for (int i = 0; i < 3; i++) blend_color[i] = vc4_nir_srgb_encode(b, blend_color[i]); - } - nir_ssa_def *swizzled_outputs[4]; - for (int i = 0; i < 4; i++) { - swizzled_outputs[i] = - vc4_nir_get_swizzled_channel(b, blend_color, - format_swiz[i]); - } + packed_color = vc4_nir_swizzle_and_pack(c, b, blend_color); + } else { + nir_ssa_def *packed_src_color = + vc4_nir_swizzle_and_pack(c, b, src_color); - nir_ssa_def *packed_color = - nir_pack_unorm_4x8(b, - nir_vec4(b, - swizzled_outputs[0], - swizzled_outputs[1], - swizzled_outputs[2], - swizzled_outputs[3])); + packed_color = + vc4_do_blending_i(c, b, + packed_src_color, packed_dst_color, + src_color[3]); + } packed_color = vc4_logicop(b, c->fs_key->logicop_func, packed_color, packed_dst_color); diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index fce24cd2330..fa1b50f3d10 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -253,6 +253,8 @@ enum quniform_contents { QUNIFORM_BLEND_CONST_COLOR_Y, QUNIFORM_BLEND_CONST_COLOR_Z, QUNIFORM_BLEND_CONST_COLOR_W, + QUNIFORM_BLEND_CONST_COLOR_RGBA, + QUNIFORM_BLEND_CONST_COLOR_AAAA, QUNIFORM_STENCIL, diff --git a/src/gallium/drivers/vc4/vc4_state.c b/src/gallium/drivers/vc4/vc4_state.c index 8a759c2ca4c..147694644f0 100644 --- a/src/gallium/drivers/vc4/vc4_state.c +++ b/src/gallium/drivers/vc4/vc4_state.c @@ -51,7 +51,9 @@ vc4_set_blend_color(struct pipe_context *pctx, const struct pipe_blend_color *blend_color) { struct vc4_context *vc4 = vc4_context(pctx); - vc4->blend_color = *blend_color; + vc4->blend_color.f = *blend_color; + for (int i = 0; i < 4; i++) + vc4->blend_color.ub[i] = float_to_ubyte(blend_color->color[i]); vc4->dirty |= VC4_DIRTY_BLEND_COLOR; } diff --git a/src/gallium/drivers/vc4/vc4_uniforms.c b/src/gallium/drivers/vc4/vc4_uniforms.c index 85d6998205e..f5ad481f186 100644 --- a/src/gallium/drivers/vc4/vc4_uniforms.c +++ b/src/gallium/drivers/vc4/vc4_uniforms.c @@ -262,11 +262,35 @@ vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader, case QUNIFORM_BLEND_CONST_COLOR_Z: case QUNIFORM_BLEND_CONST_COLOR_W: cl_aligned_f(&uniforms, - CLAMP(vc4->blend_color.color[uinfo->contents[i] - - QUNIFORM_BLEND_CONST_COLOR_X], + CLAMP(vc4->blend_color.f.color[uinfo->contents[i] - + QUNIFORM_BLEND_CONST_COLOR_X], 0, 1)); break; + case QUNIFORM_BLEND_CONST_COLOR_RGBA: { + const uint8_t *format_swiz = + vc4_get_format_swizzle(vc4->framebuffer.cbufs[0]->format); + uint32_t color = 0; + for (int i = 0; i < 4; i++) { + if (format_swiz[i] >= 4) + continue; + + color |= (vc4->blend_color.ub[format_swiz[i]] << + (i * 8)); + } + cl_aligned_u32(&uniforms, color); + break; + } + + case QUNIFORM_BLEND_CONST_COLOR_AAAA: { + uint8_t a = vc4->blend_color.ub[3]; + cl_aligned_u32(&uniforms, ((a) | + (a << 8) | + (a << 16) | + (a << 24))); + break; + } + case QUNIFORM_STENCIL: cl_aligned_u32(&uniforms, vc4->zsa->stencil_uniforms[uinfo->data[i]] | @@ -330,6 +354,8 @@ vc4_set_shader_uniform_dirty_flags(struct vc4_compiled_shader *shader) case QUNIFORM_BLEND_CONST_COLOR_Y: case QUNIFORM_BLEND_CONST_COLOR_Z: case QUNIFORM_BLEND_CONST_COLOR_W: + case QUNIFORM_BLEND_CONST_COLOR_RGBA: + case QUNIFORM_BLEND_CONST_COLOR_AAAA: dirty |= VC4_DIRTY_BLEND_COLOR; break; |