v3d: add lowering for OpenGL logic operations

This implements support for OpenGL logic operations by emitting code to read from the TLB if needed and blending the fragment output accordingly. It is similar to VC4's blend lowering pass, but exclusive to logic operations, since blending is otherwise supported in hardware. The pass doesn't handle MSAA targets yet. Fixes the following piglit tests: spec/!opengl 1.0/gl-1.0-logicop/* spec/!opengl 1.1/gl-1.1-xor spec/!opengl 1.1/gl-1.1-xor-copypixels It also fixes text cursor rendering in Libreoffice with the GTK+2 theme, which is rendered via glamor using the XOR logic operation. v2: fix checks for allowed variable location and maximum render target (Eric) Reviewed-by: Eric Anholt <[email protected]>
author: Iago Toral Quiroga <[email protected]> 2019-07-03 09:38:39 +0200
committer: Iago Toral Quiroga <[email protected]> 2019-07-12 09:16:38 +0200
commit: e540775f0cb189ca81565815968323edc935cc74 (patch)
tree: eaa01f6468102fa1af9711601d75c9dd7f7899b3 /src/broadcom
parent: 7c1d70891150c9960b1bb2464b53a95f4645037c (diff)
4 files changed, 279 insertions, 0 deletions
diff --git a/src/broadcom/compiler/meson.build b/src/broadcom/compiler/meson.build
index 1249af5e8ab..d7af999c321 100644
--- a/src/broadcom/compiler/meson.build
+++ b/src/broadcom/compiler/meson.build
@@ -37,6 +37,7 @@ libbroadcom_compiler_files = files(
   'v3d_compiler.h',
   'v3d_nir_lower_io.c',
   'v3d_nir_lower_image_load_store.c',
+  'v3d_nir_lower_logic_ops.c',
   'v3d_nir_lower_scratch.c',
   'v3d_nir_lower_txf_ms.c',
 )
diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h
index 67c7dd48d8c..288273aac14 100644
--- a/src/broadcom/compiler/v3d_compiler.h
+++ b/src/broadcom/compiler/v3d_compiler.h
@@ -812,6 +812,7 @@ bool vir_opt_small_immediates(struct v3d_compile *c);
 bool vir_opt_vpm(struct v3d_compile *c);
 void v3d_nir_lower_blend(nir_shader *s, struct v3d_compile *c);
 void v3d_nir_lower_io(nir_shader *s, struct v3d_compile *c);
+void v3d_nir_lower_logic_ops(nir_shader *s, struct v3d_compile *c);
 void v3d_nir_lower_scratch(nir_shader *s);
 void v3d_nir_lower_txf_ms(nir_shader *s, struct v3d_compile *c);
 void v3d_nir_lower_image_load_store(nir_shader *s);
diff --git a/src/broadcom/compiler/v3d_nir_lower_logic_ops.c b/src/broadcom/compiler/v3d_nir_lower_logic_ops.c
new file mode 100644
index 00000000000..849e554e8b7
--- /dev/null
+++ b/src/broadcom/compiler/v3d_nir_lower_logic_ops.c
@@ -0,0 +1,275 @@
+/*
+ * Copyright © 2019 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * Implements lowering for logical operations.
+ *
+ * V3D doesn't have any hardware support for logic ops.  Instead, you read the
+ * current contents of the destination from the tile buffer, then do math using
+ * your output color and that destination value, and update the output color
+ * appropriately.
+ */
+
+#include "util/u_format.h"
+#include "compiler/nir/nir_builder.h"
+#include "compiler/nir/nir_format_convert.h"
+#include "v3d_compiler.h"
+
+static nir_ssa_def *
+v3d_logicop(nir_builder *b, int logicop_func,
+            nir_ssa_def *src, nir_ssa_def *dst)
+{
+        switch (logicop_func) {
+        case PIPE_LOGICOP_CLEAR:
+                return nir_imm_int(b, 0);
+        case PIPE_LOGICOP_NOR:
+                return nir_inot(b, nir_ior(b, src, dst));
+        case PIPE_LOGICOP_AND_INVERTED:
+                return nir_iand(b, nir_inot(b, src), dst);
+        case PIPE_LOGICOP_COPY_INVERTED:
+                return nir_inot(b, src);
+        case PIPE_LOGICOP_AND_REVERSE:
+                return nir_iand(b, src, nir_inot(b, dst));
+        case PIPE_LOGICOP_INVERT:
+                return nir_inot(b, dst);
+        case PIPE_LOGICOP_XOR:
+                return nir_ixor(b, src, dst);
+        case PIPE_LOGICOP_NAND:
+                return nir_inot(b, nir_iand(b, src, dst));
+        case PIPE_LOGICOP_AND:
+                return nir_iand(b, src, dst);
+        case PIPE_LOGICOP_EQUIV:
+                return nir_inot(b, nir_ixor(b, src, dst));
+        case PIPE_LOGICOP_NOOP:
+                return dst;
+        case PIPE_LOGICOP_OR_INVERTED:
+                return nir_ior(b, nir_inot(b, src), dst);
+        case PIPE_LOGICOP_OR_REVERSE:
+                return nir_ior(b, src, nir_inot(b, dst));
+        case PIPE_LOGICOP_OR:
+                return nir_ior(b, src, dst);
+        case PIPE_LOGICOP_SET:
+                return nir_imm_int(b, ~0);
+        default:
+                fprintf(stderr, "Unknown logic op %d\n", logicop_func);
+                /* FALLTHROUGH */
+        case PIPE_LOGICOP_COPY:
+                return src;
+        }
+}
+
+static nir_ssa_def *
+v3d_nir_get_swizzled_channel(nir_builder *b, nir_ssa_def **srcs, int swiz)
+{
+        switch (swiz) {
+        default:
+        case PIPE_SWIZZLE_NONE:
+                fprintf(stderr, "warning: unknown swizzle\n");
+                /* FALLTHROUGH */
+        case PIPE_SWIZZLE_0:
+                return nir_imm_float(b, 0.0);
+        case PIPE_SWIZZLE_1:
+                return nir_imm_float(b, 1.0);
+        case PIPE_SWIZZLE_X:
+        case PIPE_SWIZZLE_Y:
+        case PIPE_SWIZZLE_Z:
+        case PIPE_SWIZZLE_W:
+                return srcs[swiz];
+        }
+}
+
+static nir_ssa_def *
+v3d_nir_swizzle_and_pack(nir_builder *b, nir_ssa_def **chans,
+                         const uint8_t *swiz)
+{
+        nir_ssa_def *c[4];
+        for (int i = 0; i < 4; i++)
+                c[i] = v3d_nir_get_swizzled_channel(b, chans, swiz[i]);
+
+        return nir_pack_unorm_4x8(b, nir_vec4(b, c[0], c[1], c[2], c[3]));
+}
+
+static nir_ssa_def *
+v3d_nir_unpack_and_swizzle(nir_builder *b, nir_ssa_def *packed,
+                           const uint8_t *swiz)
+{
+        nir_ssa_def *unpacked = nir_unpack_unorm_4x8(b, packed);
+
+        nir_ssa_def *unpacked_chans[4];
+        for (int i = 0; i < 4; i++)
+                unpacked_chans[i] = nir_channel(b, unpacked, i);
+
+        nir_ssa_def *c[4];
+        for (int i = 0; i < 4; i++)
+                c[i] = v3d_nir_get_swizzled_channel(b, unpacked_chans, swiz[i]);
+
+        return nir_vec4(b, c[0], c[1], c[2], c[3]);
+}
+
+static const uint8_t *
+v3d_get_format_swizzle_for_rt(struct v3d_compile *c, int rt)
+{
+        static const uint8_t ident[4] = { 0, 1, 2, 3 };
+
+        /* We will automatically swap R and B channels for BGRA formats
+         * on tile loads and stores (see 'swap_rb' field in v3d_resource) so
+         * we want to treat these surfaces as if they were regular RGBA formats.
+         */
+        if (c->fs_key->color_fmt[rt].swizzle[0] == 2 &&
+            c->fs_key->color_fmt[rt].format != PIPE_FORMAT_B5G6R5_UNORM) {
+                return ident;
+        } else {
+                return  c->fs_key->color_fmt[rt].swizzle;
+        }
+}
+
+static nir_ssa_def *
+v3d_nir_get_tlb_color(nir_builder *b, int rt, int sample)
+{
+        nir_ssa_def *color[4];
+        for (int i = 0; i < 4; i++) {
+                nir_intrinsic_instr *load =
+                        nir_intrinsic_instr_create(b->shader,
+                                                   nir_intrinsic_load_tlb_color_v3d);
+                load->num_components = 1;
+                nir_intrinsic_set_base(load, sample);
+                nir_intrinsic_set_component(load, i);
+                load->src[0] = nir_src_for_ssa(nir_imm_int(b, rt));
+                nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL);
+                nir_builder_instr_insert(b, &load->instr);
+                color[i] = &load->dest.ssa;
+        }
+
+        return nir_vec4(b, color[0], color[1], color[2], color[3]);
+}
+
+static nir_ssa_def *
+v3d_nir_emit_logic_op(struct v3d_compile *c, nir_builder *b,
+                      nir_ssa_def *src, int rt, int sample)
+{
+        nir_ssa_def *dst = v3d_nir_get_tlb_color(b, rt, sample);
+
+        nir_ssa_def *src_chans[4], *dst_chans[4];
+        for (unsigned i = 0; i < 4; i++) {
+                src_chans[i] = nir_channel(b, src, i);
+                dst_chans[i] = nir_channel(b, dst, i);
+        }
+
+        const uint8_t src_swz[4] = { 0, 1, 2, 3 };
+        nir_ssa_def *packed_src =
+                v3d_nir_swizzle_and_pack(b, src_chans, src_swz);
+
+        const uint8_t *fmt_swz = v3d_get_format_swizzle_for_rt(c, rt);
+        nir_ssa_def *packed_dst =
+                v3d_nir_swizzle_and_pack(b, dst_chans, fmt_swz);
+
+        nir_ssa_def *packed_result =
+                v3d_logicop(b, c->fs_key->logicop_func, packed_src, packed_dst);
+
+        return v3d_nir_unpack_and_swizzle(b, packed_result, fmt_swz);
+}
+
+static void
+v3d_nir_lower_logic_op_instr(struct v3d_compile *c,
+                             nir_builder *b,
+                             nir_intrinsic_instr *intr,
+                             int rt)
+{
+        nir_ssa_def *frag_color = intr->src[0].ssa;
+
+        /* XXX: this is not correct for MSAA render targets */
+        nir_ssa_def *result = v3d_nir_emit_logic_op(c, b, frag_color, rt, 0);
+
+        nir_instr_rewrite_src(&intr->instr, &intr->src[0],
+                              nir_src_for_ssa(result));
+        intr->num_components = result->num_components;
+}
+
+static bool
+v3d_nir_lower_logic_ops_block(nir_block *block, struct v3d_compile *c)
+{
+        nir_foreach_instr_safe(instr, block) {
+                if (instr->type != nir_instr_type_intrinsic)
+                        continue;
+
+                nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+                if (intr->intrinsic != nir_intrinsic_store_output)
+                        continue;
+
+                nir_foreach_variable(var, &c->s->outputs) {
+                        const int driver_loc = var->data.driver_location;
+                        if (driver_loc != nir_intrinsic_base(intr))
+                                continue;
+
+                        const int loc = var->data.location;
+                        if (loc != FRAG_RESULT_COLOR &&
+                            (loc < FRAG_RESULT_DATA0 ||
+                             loc >= FRAG_RESULT_DATA0 + V3D_MAX_DRAW_BUFFERS)) {
+                                continue;
+                        }
+
+                        /* Logic operations do not apply on floating point or
+                         * sRGB enabled render targets.
+                         */
+                        const int rt = driver_loc;
+                        assert(rt < V3D_MAX_DRAW_BUFFERS);
+
+                        const enum pipe_format format =
+                                c->fs_key->color_fmt[rt].format;
+                        if (util_format_is_float(format) ||
+                            util_format_is_srgb(format)) {
+                                continue;
+                        }
+
+                        nir_function_impl *impl =
+                                nir_cf_node_get_function(&block->cf_node);
+                        nir_builder b;
+                        nir_builder_init(&b, impl);
+                        b.cursor = nir_before_instr(&intr->instr);
+                        v3d_nir_lower_logic_op_instr(c, &b, intr, rt);
+                }
+        }
+
+        return true;
+}
+
+void
+v3d_nir_lower_logic_ops(nir_shader *s, struct v3d_compile *c)
+{
+        /* Nothing to do if logic op is 'copy src to dst' or if logic ops are
+         * disabled (we set the logic op to copy in that case).
+         */
+        if (c->fs_key->logicop_func == PIPE_LOGICOP_COPY)
+                return;
+
+        nir_foreach_function(function, s) {
+                if (function->impl) {
+                        nir_foreach_block(block, function->impl)
+                                v3d_nir_lower_logic_ops_block(block, c);
+
+                        nir_metadata_preserve(function->impl,
+                                              nir_metadata_block_index |
+                                              nir_metadata_dominance);
+                }
+        }
+}
diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c
index 4f1ee605214..c18318a6295 100644
--- a/src/broadcom/compiler/vir.c
+++ b/src/broadcom/compiler/vir.c
@@ -799,6 +799,8 @@ v3d_nir_lower_fs_early(struct v3d_compile *c)
         if (c->fs_key->int_color_rb || c->fs_key->uint_color_rb)
                 v3d_fixup_fs_output_types(c);
 
+        NIR_PASS_V(c->s, v3d_nir_lower_logic_ops, c);
+
         /* If the shader has no non-TLB side effects, we can promote it to
          * enabling early_fragment_tests even if the user didn't.
          */
author	Iago Toral Quiroga <[email protected]>	2019-07-03 09:38:39 +0200
committer	Iago Toral Quiroga <[email protected]>	2019-07-12 09:16:38 +0200
commit	e540775f0cb189ca81565815968323edc935cc74 (patch)
tree	eaa01f6468102fa1af9711601d75c9dd7f7899b3 /src/broadcom
parent	7c1d70891150c9960b1bb2464b53a95f4645037c (diff)