From c2c8983cf4029a3b427e67f7046efda50689df60 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Wed, 10 Jul 2019 09:51:32 -0700 Subject: panfrost: Move midgard/nir* to nir folder The reason for doing this is two-fold: 1. These passes are likely to be shared with the Bifrost compiler Therefore, we don't want to restrict them to Midgard 2. The coding style is different (NIR-style vs Panfrost-style) The NIR passes are candidates for moving upstream into compiler/nir, so don't block that off for stylistic reasons Signed-off-by: Alyssa Rosenzweig --- src/gallium/drivers/panfrost/meson.build | 8 +- .../drivers/panfrost/midgard/nir_lower_blend.c | 270 ---------------- .../drivers/panfrost/midgard/nir_lower_blend.h | 60 ---- .../panfrost/midgard/nir_lower_framebuffer.c | 345 --------------------- .../drivers/panfrost/midgard/nir_undef_to_zero.c | 87 ------ src/gallium/drivers/panfrost/nir/nir_lower_blend.c | 270 ++++++++++++++++ src/gallium/drivers/panfrost/nir/nir_lower_blend.h | 60 ++++ .../drivers/panfrost/nir/nir_lower_framebuffer.c | 345 +++++++++++++++++++++ .../drivers/panfrost/nir/nir_undef_to_zero.c | 87 ++++++ src/gallium/drivers/panfrost/pan_blend_shaders.c | 2 +- 10 files changed, 767 insertions(+), 767 deletions(-) delete mode 100644 src/gallium/drivers/panfrost/midgard/nir_lower_blend.c delete mode 100644 src/gallium/drivers/panfrost/midgard/nir_lower_blend.h delete mode 100644 src/gallium/drivers/panfrost/midgard/nir_lower_framebuffer.c delete mode 100644 src/gallium/drivers/panfrost/midgard/nir_undef_to_zero.c create mode 100644 src/gallium/drivers/panfrost/nir/nir_lower_blend.c create mode 100644 src/gallium/drivers/panfrost/nir/nir_lower_blend.h create mode 100644 src/gallium/drivers/panfrost/nir/nir_lower_framebuffer.c create mode 100644 src/gallium/drivers/panfrost/nir/nir_undef_to_zero.c diff --git a/src/gallium/drivers/panfrost/meson.build b/src/gallium/drivers/panfrost/meson.build index e1faa104f6b..f3067c6a784 100644 --- a/src/gallium/drivers/panfrost/meson.build +++ b/src/gallium/drivers/panfrost/meson.build @@ -35,13 +35,13 @@ files_panfrost = files( 'midgard/midgard_ra_pipeline.c', 'midgard/midgard_liveness.c', 'midgard/midgard_ops.c', - - 'midgard/nir_undef_to_zero.c', - 'midgard/nir_lower_blend.c', - 'midgard/nir_lower_framebuffer.c', 'midgard/cppwrap.cpp', 'midgard/disassemble.c', + 'nir/nir_undef_to_zero.c', + 'nir/nir_lower_blend.c', + 'nir/nir_lower_framebuffer.c', + 'bifrost/disassemble.c', 'pandecode/common.c', diff --git a/src/gallium/drivers/panfrost/midgard/nir_lower_blend.c b/src/gallium/drivers/panfrost/midgard/nir_lower_blend.c deleted file mode 100644 index 0fadeba6674..00000000000 --- a/src/gallium/drivers/panfrost/midgard/nir_lower_blend.c +++ /dev/null @@ -1,270 +0,0 @@ -/* - * Copyright (C) 2019 Alyssa Rosenzweig - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -/** - * @file - * - * Implements the fragment pipeline (blending and writeout) in software, to be - * run as a dedicated "blend shader" stage on Midgard/Bifrost, or as a fragment - * shader variant on typical GPUs. This pass is useful if hardware lacks - * fixed-function blending in part or in full. - */ - -#include "compiler/nir/nir.h" -#include "compiler/nir/nir_builder.h" -#include "nir_lower_blend.h" - -/* Given processed factors, combine them per a blend function */ - -static nir_ssa_def * -nir_blend_func( - nir_builder *b, - enum blend_func func, - nir_ssa_def *src, nir_ssa_def *dst) -{ - switch (func) { - case BLEND_FUNC_ADD: - return nir_fadd(b, src, dst); - case BLEND_FUNC_SUBTRACT: - return nir_fsub(b, src, dst); - case BLEND_FUNC_REVERSE_SUBTRACT: - return nir_fsub(b, dst, src); - case BLEND_FUNC_MIN: - return nir_fmin(b, src, dst); - case BLEND_FUNC_MAX: - return nir_fmax(b, src, dst); - } - - unreachable("Invalid blend function"); -} - -/* Does this blend function multiply by a blend factor? */ - -static bool -nir_blend_factored(enum blend_func func) -{ - switch (func) { - case BLEND_FUNC_ADD: - case BLEND_FUNC_SUBTRACT: - case BLEND_FUNC_REVERSE_SUBTRACT: - return true; - default: - return false; - } -} - -/* Compute a src_alpha_saturate factor */ -static nir_ssa_def * -nir_alpha_saturate( - nir_builder *b, - nir_ssa_def *src, nir_ssa_def *dst, - unsigned chan) -{ - nir_ssa_def *Asrc = nir_channel(b, src, 3); - nir_ssa_def *Adst = nir_channel(b, dst, 3); - nir_ssa_def *one = nir_imm_float16(b, 1.0); - nir_ssa_def *Adsti = nir_fsub(b, one, Adst); - - return (chan < 3) ? nir_fmin(b, Asrc, Adsti) : one; -} - -/* Returns a scalar single factor, unmultiplied */ - -static nir_ssa_def * -nir_blend_factor_value( - nir_builder *b, - nir_ssa_def *src, nir_ssa_def *dst, nir_ssa_def *bconst, - unsigned chan, - enum blend_factor factor) -{ - switch (factor) { - case BLEND_FACTOR_ZERO: - return nir_imm_float16(b, 0.0); - case BLEND_FACTOR_SRC_COLOR: - return nir_channel(b, src, chan); - case BLEND_FACTOR_DST_COLOR: - return nir_channel(b, dst, chan); - case BLEND_FACTOR_SRC_ALPHA: - return nir_channel(b, src, 3); - case BLEND_FACTOR_DST_ALPHA: - return nir_channel(b, dst, 3); - case BLEND_FACTOR_CONSTANT_COLOR: - return nir_channel(b, bconst, chan); - case BLEND_FACTOR_CONSTANT_ALPHA: - return nir_channel(b, bconst, 3); - case BLEND_FACTOR_SRC_ALPHA_SATURATE: - return nir_alpha_saturate(b, src, dst, chan); - } - - unreachable("Invalid blend factor"); -} - -static nir_ssa_def * -nir_blend_factor( - nir_builder *b, - nir_ssa_def *raw_scalar, - nir_ssa_def *src, nir_ssa_def *dst, nir_ssa_def *bconst, - unsigned chan, - enum blend_factor factor, - bool inverted) -{ - nir_ssa_def *f = - nir_blend_factor_value(b, src, dst, bconst, chan, factor); - - if (inverted) - f = nir_fsub(b, nir_imm_float16(b, 1.0), f); - - return nir_fmul(b, raw_scalar, f); -} - -/* Given a colormask, "blend" with the destination */ - -static nir_ssa_def * -nir_color_mask( - nir_builder *b, - unsigned mask, - nir_ssa_def *src, - nir_ssa_def *dst) -{ - nir_ssa_def *masked[4]; - - for (unsigned c = 0; c < 4; ++c) { - bool enab = (mask & (1 << c)); - masked[c] = enab ? nir_channel(b, src, c) : nir_channel(b, dst, c); - } - - return nir_vec(b, masked, 4); -} - -/* Given a blend state, the source color, and the destination color, - * return the blended color - */ - -static nir_ssa_def * -nir_blend( - nir_builder *b, - nir_lower_blend_options options, - nir_ssa_def *src, nir_ssa_def *dst) -{ - /* Grab the blend constant ahead of time */ - nir_ssa_def *bconst = nir_f2f16(b, nir_load_blend_const_color_rgba(b)); - - /* We blend per channel and recombine later */ - nir_ssa_def *channels[4]; - - for (unsigned c = 0; c < 4; ++c) { - /* Decide properties based on channel */ - nir_lower_blend_channel chan = - (c < 3) ? options.rt[0].rgb : options.rt[0].alpha; - - nir_ssa_def *psrc = nir_channel(b, src, c); - nir_ssa_def *pdst = nir_channel(b, dst, c); - - if (nir_blend_factored(chan.func)) { - psrc = nir_blend_factor( - b, psrc, - src, dst, bconst, c, - chan.src_factor, chan.invert_src_factor); - - pdst = nir_blend_factor( - b, pdst, - src, dst, bconst, c, - chan.dst_factor, chan.invert_dst_factor); - } - - channels[c] = nir_blend_func(b, chan.func, psrc, pdst); - } - - /* Then just recombine with an applied colormask */ - nir_ssa_def *blended = nir_vec(b, channels, 4); - return nir_color_mask(b, options.rt[0].colormask, blended, dst); -} - -static bool -nir_is_blend_channel_replace(nir_lower_blend_channel chan) -{ - return - (chan.src_factor == BLEND_FACTOR_ZERO) && - (chan.dst_factor == BLEND_FACTOR_ZERO) && - (chan.invert_src_factor && !chan.invert_dst_factor) && - (chan.func == BLEND_FUNC_ADD || chan.func == BLEND_FUNC_SUBTRACT || chan.func == BLEND_FUNC_MAX); -} - -static bool -nir_is_blend_replace(nir_lower_blend_options options) -{ - return - nir_is_blend_channel_replace(options.rt[0].rgb) && - nir_is_blend_channel_replace(options.rt[0].alpha); -} - -void -nir_lower_blend(nir_shader *shader, nir_lower_blend_options options) -{ - /* Blend shaders are represented as special fragment shaders */ - assert(shader->info.stage == MESA_SHADER_FRAGMENT); - - /* Special case replace, since there's nothing to do and we don't want to - * degrade intermediate precision (e.g. for non-blendable R32F targets) */ - if (nir_is_blend_replace(options)) - return; - - nir_foreach_function(func, shader) { - nir_foreach_block(block, func->impl) { - nir_foreach_instr_safe(instr, block) { - if (instr->type != nir_instr_type_intrinsic) - continue; - - nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); - if (intr->intrinsic != nir_intrinsic_store_deref) - continue; - - /* TODO: Extending to MRT */ - nir_variable *var = nir_intrinsic_get_var(intr, 0); - if (var->data.location != FRAG_RESULT_COLOR) - continue; - - nir_builder b; - nir_builder_init(&b, func->impl); - b.cursor = nir_before_instr(instr); - - /* Grab the input color */ - nir_ssa_def *src = nir_f2f16(&b, nir_ssa_for_src(&b, intr->src[1], 4)); - - /* Grab the tilebuffer color - io lowered to load_output */ - nir_ssa_def *dst = nir_f2f16(&b, nir_load_var(&b, var)); - - /* Blend the two colors per the passed options */ - nir_ssa_def *blended = nir_f2f32(&b, nir_blend(&b, options, src, dst)); - - /* Write out the final color instead of the input */ - nir_instr_rewrite_src(instr, &intr->src[1], - nir_src_for_ssa(blended)); - - } - } - - nir_metadata_preserve(func->impl, nir_metadata_block_index | - nir_metadata_dominance); - } -} diff --git a/src/gallium/drivers/panfrost/midgard/nir_lower_blend.h b/src/gallium/drivers/panfrost/midgard/nir_lower_blend.h deleted file mode 100644 index 7a2df6e5e29..00000000000 --- a/src/gallium/drivers/panfrost/midgard/nir_lower_blend.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright  2019 Alyssa Rosenzweig - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#ifndef NIR_BLEND_H -#define NIR_BLEND_H - -#include "compiler/nir/nir.h" -#include "pipe/p_format.h" - -/* These structs encapsulates the blend state such that it can be lowered - * cleanly - */ - -typedef struct { - enum blend_func func; - - enum blend_factor src_factor; - bool invert_src_factor; - - enum blend_factor dst_factor; - bool invert_dst_factor; -} nir_lower_blend_channel; - -typedef struct { - struct { - nir_lower_blend_channel rgb; - nir_lower_blend_channel alpha; - - /* 4-bit colormask. 0x0 for none, 0xF for RGBA, 0x1 for R */ - unsigned colormask; - } rt[8]; -} nir_lower_blend_options; - -void nir_lower_blend(nir_shader *shader, nir_lower_blend_options options); - -void -nir_lower_framebuffer(nir_shader *shader, enum pipe_format format); - -#endif diff --git a/src/gallium/drivers/panfrost/midgard/nir_lower_framebuffer.c b/src/gallium/drivers/panfrost/midgard/nir_lower_framebuffer.c deleted file mode 100644 index fe6ae121a4c..00000000000 --- a/src/gallium/drivers/panfrost/midgard/nir_lower_framebuffer.c +++ /dev/null @@ -1,345 +0,0 @@ -/* - * Copyright (C) 2019 Collabora, Ltd. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors (Collabora): - * Alyssa Rosenzweig - */ - -/** - * @file - * - * Implements framebuffer format conversions in software, specifically for - * blend shaders on Midgard/Bifrost. load_output/store_output (derefs more - * correctly -- pre I/O lowering) normally for the fragment stage within the - * blend shader will operate with purely vec4 float ("nir") encodings. This - * lowering stage, to be run before I/O is lowered, converts the native - * framebuffer format to a NIR encoding after loads and vice versa before - * stores. This pass is designed for a single render target; Midgard duplicates - * blend shaders for MRT to simplify everything. - */ - -#include "compiler/nir/nir.h" -#include "compiler/nir/nir_builder.h" -#include "compiler/nir/nir_format_convert.h" -#include "nir_lower_blend.h" -#include "util/u_format.h" - -/* Converters for UNORM8 formats, e.g. R8G8B8A8_UNORM */ - -static nir_ssa_def * -nir_float_to_unorm8(nir_builder *b, nir_ssa_def *c_float) -{ - /* First, we degrade quality to fp16; we don't need the extra bits */ - nir_ssa_def *degraded = nir_f2f16(b, c_float); - - /* Scale from [0, 1] to [0, 255.0] */ - nir_ssa_def *scaled = nir_fmul_imm(b, nir_fsat(b, degraded), 255.0); - - /* Next, we type convert */ - nir_ssa_def *converted = nir_u2u8(b, nir_f2u16(b, - nir_fround_even(b, scaled))); - - return converted; -} - -static nir_ssa_def * -nir_unorm8_to_float(nir_builder *b, nir_ssa_def *c_native) -{ - /* First, we convert up from u8 to f16 */ - nir_ssa_def *converted = nir_u2f16(b, nir_u2u16(b, c_native)); - - /* Next, we scale down from [0, 255.0] to [0, 1] */ - nir_ssa_def *scaled = nir_fsat(b, nir_fmul_imm(b, converted, 1.0/255.0)); - - return scaled; -} - -/* Converters for UNORM4 formats, packing the final result into 16-bit */ - -static nir_ssa_def * -nir_float_to_unorm4(nir_builder *b, nir_ssa_def *c_float) -{ - /* First, we degrade quality to fp16; we don't need the extra bits */ - nir_ssa_def *degraded = nir_f2f16(b, c_float); - - /* Scale from [0, 1] to [0, 15.0] */ - nir_ssa_def *scaled = nir_fmul_imm(b, nir_fsat(b, degraded), 15.0); - - /* Next, we type convert to u16 */ - nir_ssa_def *converted = nir_f2u16(b, - nir_fround_even(b, scaled)); - - /* In u16 land, we now need to pack */ - nir_ssa_def *cr = nir_channel(b, converted, 0); - nir_ssa_def *cg = nir_channel(b, converted, 1); - nir_ssa_def *cb = nir_channel(b, converted, 2); - nir_ssa_def *ca = nir_channel(b, converted, 3); - - nir_ssa_def *pack = - nir_ior(b, - nir_ior(b, cr, nir_ishl(b, cg, nir_imm_int(b, 4))), - nir_ior(b, nir_ishl(b, cb, nir_imm_int(b, 8)), nir_ishl(b, ca, nir_imm_int(b, 12)))); - - return pack; -} - -static nir_ssa_def * -nir_float_to_rgb10a2(nir_builder *b, nir_ssa_def *c_float, bool normalize) -{ - nir_ssa_def *converted = c_float; - - if (normalize) { - nir_ssa_def *scaled = nir_fmul(b, nir_fsat(b, c_float), - nir_imm_vec4(b, 1023.0, 1023.0, 1023.0, 3.0)); - - converted = nir_f2u32(b, - nir_fround_even(b, scaled)); - } - - nir_ssa_def *cr = nir_channel(b, converted, 0); - nir_ssa_def *cg = nir_channel(b, converted, 1); - nir_ssa_def *cb = nir_channel(b, converted, 2); - nir_ssa_def *ca = nir_channel(b, converted, 3); - - nir_ssa_def *pack = - nir_ior(b, - nir_ior(b, cr, nir_ishl(b, cg, nir_imm_int(b, 10))), - nir_ior(b, nir_ishl(b, cb, nir_imm_int(b, 20)), nir_ishl(b, ca, nir_imm_int(b, 30)))); - - return pack; -} - -static nir_ssa_def * -nir_float_to_rgb5a1(nir_builder *b, nir_ssa_def *c_float) -{ - nir_ssa_def *degraded = nir_f2f16(b, c_float); - - nir_ssa_def *scaled = nir_fmul(b, nir_fsat(b, degraded), - nir_imm_vec4_16(b, 31.0, 31.0, 31.0, 1.0)); - - nir_ssa_def *converted = nir_f2u16(b, - nir_fround_even(b, scaled)); - - nir_ssa_def *cr = nir_channel(b, converted, 0); - nir_ssa_def *cg = nir_channel(b, converted, 1); - nir_ssa_def *cb = nir_channel(b, converted, 2); - nir_ssa_def *ca = nir_channel(b, converted, 3); - - nir_ssa_def *pack = - nir_ior(b, - nir_ior(b, cr, nir_ishl(b, cg, nir_imm_int(b, 5))), - nir_ior(b, nir_ishl(b, cb, nir_imm_int(b, 10)), nir_ishl(b, ca, nir_imm_int(b, 15)))); - - return pack; -} - -static nir_ssa_def * -nir_shader_to_native(nir_builder *b, - nir_ssa_def *c_shader, - const struct util_format_description *desc, - unsigned bits, - bool homogenous_bits) -{ - bool float_or_pure_int = - util_format_is_float(desc->format) || - util_format_is_pure_integer(desc->format); - - if (util_format_is_unorm8(desc)) - return nir_float_to_unorm8(b, c_shader); - else if (homogenous_bits && float_or_pure_int) - return c_shader; /* type is already correct */ - - //unsigned bgra[4] = { 2, 1, 0, 3 }; /* BGRA */ - //c_shader = nir_swizzle(b, c_shader, swiz, 4); - - /* Special formats */ - switch (desc->format) { - case PIPE_FORMAT_B4G4R4A4_UNORM: - case PIPE_FORMAT_B4G4R4X4_UNORM: - case PIPE_FORMAT_A4R4_UNORM: - case PIPE_FORMAT_R4A4_UNORM: - case PIPE_FORMAT_A4B4G4R4_UNORM: - return nir_float_to_unorm4(b, c_shader); - - case PIPE_FORMAT_R10G10B10A2_UNORM: - case PIPE_FORMAT_B10G10R10A2_UNORM: - case PIPE_FORMAT_R10G10B10X2_UNORM: - case PIPE_FORMAT_B10G10R10X2_UNORM: - return nir_float_to_rgb10a2(b, c_shader, true); - - case PIPE_FORMAT_R10G10B10A2_UINT: - return nir_float_to_rgb10a2(b, c_shader, false); - - case PIPE_FORMAT_B5G5R5A1_UNORM: - return nir_float_to_rgb5a1(b, c_shader); - - case PIPE_FORMAT_R11G11B10_FLOAT: - return nir_format_pack_11f11f10f(b, c_shader); - - default: - printf("%s\n", desc->name); - unreachable("Unknown format name"); - } -} - -static nir_ssa_def * -nir_native_to_shader(nir_builder *b, - nir_ssa_def *c_native, - const struct util_format_description *desc, - unsigned bits, - bool homogenous_bits) -{ - bool float_or_pure_int = - util_format_is_float(desc->format) || - util_format_is_pure_integer(desc->format); - - if (util_format_is_unorm8(desc)) - return nir_unorm8_to_float(b, c_native); - else if (homogenous_bits && float_or_pure_int) - return c_native; /* type is already correct */ - else { - printf("%s\n", desc->name); - unreachable("Unknown format name"); - } -} - -void -nir_lower_framebuffer(nir_shader *shader, enum pipe_format format) -{ - /* Blend shaders are represented as special fragment shaders */ - assert(shader->info.stage == MESA_SHADER_FRAGMENT); - - const struct util_format_description *format_desc = - util_format_description(format); - - unsigned nr_channels = format_desc->nr_channels; - unsigned bits = format_desc->channel[0].size; - - /* Do all channels have the same bit count? */ - bool homogenous_bits = true; - - for (unsigned c = 1; c < nr_channels; ++c) - homogenous_bits &= (format_desc->channel[c].size == bits); - - if (format == PIPE_FORMAT_R11G11B10_FLOAT) - homogenous_bits = false; - - /* Figure out the formats for the raw */ - unsigned raw_bitsize_in = bits; - unsigned raw_bitsize_out = bits; - unsigned raw_out_components = 4; - - /* We pack a 4-bit vec4 as 16-bit vec1 */ - if ((homogenous_bits && bits == 4 && util_format_is_unorm(format)) || format == PIPE_FORMAT_B5G5R5A1_UNORM) { - raw_bitsize_out = 16; - raw_out_components = 1; - } else if (format == PIPE_FORMAT_R10G10B10A2_UNORM || format == PIPE_FORMAT_B10G10R10A2_UNORM || format == PIPE_FORMAT_R10G10B10A2_UINT || format == PIPE_FORMAT_R11G11B10_FLOAT) { - raw_bitsize_out = 32; - raw_out_components = 1; - } - - nir_foreach_function(func, shader) { - nir_foreach_block(block, func->impl) { - nir_foreach_instr_safe(instr, block) { - if (instr->type != nir_instr_type_intrinsic) - continue; - - nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); - - bool is_load = intr->intrinsic == nir_intrinsic_load_deref; - bool is_store = intr->intrinsic == nir_intrinsic_store_deref; - - if (!(is_load || is_store)) - continue; - - /* Don't worry about MRT */ - nir_variable *var = nir_intrinsic_get_var(intr, 0); - - if (var->data.location != FRAG_RESULT_COLOR) - continue; - - nir_builder b; - nir_builder_init(&b, func->impl); - - if (is_store) { - /* For stores, add conversion before */ - b.cursor = nir_before_instr(instr); - - /* Grab the input color */ - nir_ssa_def *c_nir = nir_ssa_for_src(&b, intr->src[1], 4); - - /* Format convert */ - nir_ssa_def *converted = nir_shader_to_native(&b, c_nir, format_desc, bits, homogenous_bits); - - if (util_format_is_float(format)) { - if (raw_bitsize_out == 16) - converted = nir_f2f16(&b, converted); - else if (raw_bitsize_out == 32) - converted = nir_f2f32(&b, converted); - } else { - converted = nir_i2i(&b, converted, raw_bitsize_out); - } - - /* Rewrite to use a native store by creating a new intrinsic */ - nir_intrinsic_instr *new = - nir_intrinsic_instr_create(shader, nir_intrinsic_store_raw_output_pan); - new->src[0] = nir_src_for_ssa(converted); - - new->num_components = raw_out_components; - - nir_builder_instr_insert(&b, &new->instr); - - /* (And finally removing the old) */ - nir_instr_remove(instr); - } else { - /* For loads, add conversion after */ - b.cursor = nir_after_instr(instr); - - /* Rewrite to use a native load by creating a new intrinsic */ - - nir_intrinsic_instr *new = - nir_intrinsic_instr_create(shader, nir_intrinsic_load_raw_output_pan); - - new->num_components = 4; - - unsigned bitsize = raw_bitsize_in; - nir_ssa_dest_init(&new->instr, &new->dest, 4, bitsize, NULL); - nir_builder_instr_insert(&b, &new->instr); - - /* Convert the raw value */ - nir_ssa_def *raw = &new->dest.ssa; - nir_ssa_def *converted = nir_native_to_shader(&b, raw, format_desc, bits, homogenous_bits); - - /* Rewrite to use the converted value */ - nir_src rewritten = nir_src_for_ssa(converted); - nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, rewritten, instr); - - /* Finally, remove the old load */ - nir_instr_remove(instr); - } - } - } - - nir_metadata_preserve(func->impl, nir_metadata_block_index | - nir_metadata_dominance); - } -} diff --git a/src/gallium/drivers/panfrost/midgard/nir_undef_to_zero.c b/src/gallium/drivers/panfrost/midgard/nir_undef_to_zero.c deleted file mode 100644 index 447aaebae5c..00000000000 --- a/src/gallium/drivers/panfrost/midgard/nir_undef_to_zero.c +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (C) 2019 Collabora, Ltd. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors (Collabora): - * Alyssa Rosenzweig - */ - -/** - * @file - * - * Flushes undefined SSA values to a zero vector fo the appropriate component - * count, to avoid undefined behaviour in the resulting shader. Not required - * for conformance as use of uninitialized variables is explicitly left - * undefined by the spec. Works around buggy apps, however. - * - * Call immediately after nir_opt_undef. If called before, larger optimization - * opportunities from the former pass will be missed. If called outside of an - * optimization loop, constant propagation and algebraic optimizations won't be - * able to kick in to reduce stuff consuming the zero. - */ - -#include "compiler/nir/nir.h" -#include "compiler/nir/nir_builder.h" - -bool nir_undef_to_zero(nir_shader *shader); - -bool -nir_undef_to_zero(nir_shader *shader) -{ - bool progress = false; - - nir_foreach_function(function, shader) { - if (!function->impl) continue; - - nir_builder b; - nir_builder_init(&b, function->impl); - - nir_foreach_block(block, function->impl) { - nir_foreach_instr_safe(instr, block) { - if (instr->type != nir_instr_type_ssa_undef) continue; - - nir_ssa_undef_instr *und = nir_instr_as_ssa_undef(instr); - - /* Get the required size */ - unsigned c = und->def.num_components; - unsigned s = und->def.bit_size; - - nir_const_value v[NIR_MAX_VEC_COMPONENTS]; - memset(v, 0, sizeof(v)); - - b.cursor = nir_before_instr(instr); - nir_ssa_def *zero = nir_build_imm(&b, c, s, v); - nir_src zerosrc = nir_src_for_ssa(zero); - - nir_ssa_def_rewrite_uses(&und->def, zerosrc); - - progress |= true; - } - } - - nir_metadata_preserve(function->impl, nir_metadata_block_index | nir_metadata_dominance); - - } - - return progress; -} - - diff --git a/src/gallium/drivers/panfrost/nir/nir_lower_blend.c b/src/gallium/drivers/panfrost/nir/nir_lower_blend.c new file mode 100644 index 00000000000..0fadeba6674 --- /dev/null +++ b/src/gallium/drivers/panfrost/nir/nir_lower_blend.c @@ -0,0 +1,270 @@ +/* + * Copyright (C) 2019 Alyssa Rosenzweig + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * @file + * + * Implements the fragment pipeline (blending and writeout) in software, to be + * run as a dedicated "blend shader" stage on Midgard/Bifrost, or as a fragment + * shader variant on typical GPUs. This pass is useful if hardware lacks + * fixed-function blending in part or in full. + */ + +#include "compiler/nir/nir.h" +#include "compiler/nir/nir_builder.h" +#include "nir_lower_blend.h" + +/* Given processed factors, combine them per a blend function */ + +static nir_ssa_def * +nir_blend_func( + nir_builder *b, + enum blend_func func, + nir_ssa_def *src, nir_ssa_def *dst) +{ + switch (func) { + case BLEND_FUNC_ADD: + return nir_fadd(b, src, dst); + case BLEND_FUNC_SUBTRACT: + return nir_fsub(b, src, dst); + case BLEND_FUNC_REVERSE_SUBTRACT: + return nir_fsub(b, dst, src); + case BLEND_FUNC_MIN: + return nir_fmin(b, src, dst); + case BLEND_FUNC_MAX: + return nir_fmax(b, src, dst); + } + + unreachable("Invalid blend function"); +} + +/* Does this blend function multiply by a blend factor? */ + +static bool +nir_blend_factored(enum blend_func func) +{ + switch (func) { + case BLEND_FUNC_ADD: + case BLEND_FUNC_SUBTRACT: + case BLEND_FUNC_REVERSE_SUBTRACT: + return true; + default: + return false; + } +} + +/* Compute a src_alpha_saturate factor */ +static nir_ssa_def * +nir_alpha_saturate( + nir_builder *b, + nir_ssa_def *src, nir_ssa_def *dst, + unsigned chan) +{ + nir_ssa_def *Asrc = nir_channel(b, src, 3); + nir_ssa_def *Adst = nir_channel(b, dst, 3); + nir_ssa_def *one = nir_imm_float16(b, 1.0); + nir_ssa_def *Adsti = nir_fsub(b, one, Adst); + + return (chan < 3) ? nir_fmin(b, Asrc, Adsti) : one; +} + +/* Returns a scalar single factor, unmultiplied */ + +static nir_ssa_def * +nir_blend_factor_value( + nir_builder *b, + nir_ssa_def *src, nir_ssa_def *dst, nir_ssa_def *bconst, + unsigned chan, + enum blend_factor factor) +{ + switch (factor) { + case BLEND_FACTOR_ZERO: + return nir_imm_float16(b, 0.0); + case BLEND_FACTOR_SRC_COLOR: + return nir_channel(b, src, chan); + case BLEND_FACTOR_DST_COLOR: + return nir_channel(b, dst, chan); + case BLEND_FACTOR_SRC_ALPHA: + return nir_channel(b, src, 3); + case BLEND_FACTOR_DST_ALPHA: + return nir_channel(b, dst, 3); + case BLEND_FACTOR_CONSTANT_COLOR: + return nir_channel(b, bconst, chan); + case BLEND_FACTOR_CONSTANT_ALPHA: + return nir_channel(b, bconst, 3); + case BLEND_FACTOR_SRC_ALPHA_SATURATE: + return nir_alpha_saturate(b, src, dst, chan); + } + + unreachable("Invalid blend factor"); +} + +static nir_ssa_def * +nir_blend_factor( + nir_builder *b, + nir_ssa_def *raw_scalar, + nir_ssa_def *src, nir_ssa_def *dst, nir_ssa_def *bconst, + unsigned chan, + enum blend_factor factor, + bool inverted) +{ + nir_ssa_def *f = + nir_blend_factor_value(b, src, dst, bconst, chan, factor); + + if (inverted) + f = nir_fsub(b, nir_imm_float16(b, 1.0), f); + + return nir_fmul(b, raw_scalar, f); +} + +/* Given a colormask, "blend" with the destination */ + +static nir_ssa_def * +nir_color_mask( + nir_builder *b, + unsigned mask, + nir_ssa_def *src, + nir_ssa_def *dst) +{ + nir_ssa_def *masked[4]; + + for (unsigned c = 0; c < 4; ++c) { + bool enab = (mask & (1 << c)); + masked[c] = enab ? nir_channel(b, src, c) : nir_channel(b, dst, c); + } + + return nir_vec(b, masked, 4); +} + +/* Given a blend state, the source color, and the destination color, + * return the blended color + */ + +static nir_ssa_def * +nir_blend( + nir_builder *b, + nir_lower_blend_options options, + nir_ssa_def *src, nir_ssa_def *dst) +{ + /* Grab the blend constant ahead of time */ + nir_ssa_def *bconst = nir_f2f16(b, nir_load_blend_const_color_rgba(b)); + + /* We blend per channel and recombine later */ + nir_ssa_def *channels[4]; + + for (unsigned c = 0; c < 4; ++c) { + /* Decide properties based on channel */ + nir_lower_blend_channel chan = + (c < 3) ? options.rt[0].rgb : options.rt[0].alpha; + + nir_ssa_def *psrc = nir_channel(b, src, c); + nir_ssa_def *pdst = nir_channel(b, dst, c); + + if (nir_blend_factored(chan.func)) { + psrc = nir_blend_factor( + b, psrc, + src, dst, bconst, c, + chan.src_factor, chan.invert_src_factor); + + pdst = nir_blend_factor( + b, pdst, + src, dst, bconst, c, + chan.dst_factor, chan.invert_dst_factor); + } + + channels[c] = nir_blend_func(b, chan.func, psrc, pdst); + } + + /* Then just recombine with an applied colormask */ + nir_ssa_def *blended = nir_vec(b, channels, 4); + return nir_color_mask(b, options.rt[0].colormask, blended, dst); +} + +static bool +nir_is_blend_channel_replace(nir_lower_blend_channel chan) +{ + return + (chan.src_factor == BLEND_FACTOR_ZERO) && + (chan.dst_factor == BLEND_FACTOR_ZERO) && + (chan.invert_src_factor && !chan.invert_dst_factor) && + (chan.func == BLEND_FUNC_ADD || chan.func == BLEND_FUNC_SUBTRACT || chan.func == BLEND_FUNC_MAX); +} + +static bool +nir_is_blend_replace(nir_lower_blend_options options) +{ + return + nir_is_blend_channel_replace(options.rt[0].rgb) && + nir_is_blend_channel_replace(options.rt[0].alpha); +} + +void +nir_lower_blend(nir_shader *shader, nir_lower_blend_options options) +{ + /* Blend shaders are represented as special fragment shaders */ + assert(shader->info.stage == MESA_SHADER_FRAGMENT); + + /* Special case replace, since there's nothing to do and we don't want to + * degrade intermediate precision (e.g. for non-blendable R32F targets) */ + if (nir_is_blend_replace(options)) + return; + + nir_foreach_function(func, shader) { + nir_foreach_block(block, func->impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic != nir_intrinsic_store_deref) + continue; + + /* TODO: Extending to MRT */ + nir_variable *var = nir_intrinsic_get_var(intr, 0); + if (var->data.location != FRAG_RESULT_COLOR) + continue; + + nir_builder b; + nir_builder_init(&b, func->impl); + b.cursor = nir_before_instr(instr); + + /* Grab the input color */ + nir_ssa_def *src = nir_f2f16(&b, nir_ssa_for_src(&b, intr->src[1], 4)); + + /* Grab the tilebuffer color - io lowered to load_output */ + nir_ssa_def *dst = nir_f2f16(&b, nir_load_var(&b, var)); + + /* Blend the two colors per the passed options */ + nir_ssa_def *blended = nir_f2f32(&b, nir_blend(&b, options, src, dst)); + + /* Write out the final color instead of the input */ + nir_instr_rewrite_src(instr, &intr->src[1], + nir_src_for_ssa(blended)); + + } + } + + nir_metadata_preserve(func->impl, nir_metadata_block_index | + nir_metadata_dominance); + } +} diff --git a/src/gallium/drivers/panfrost/nir/nir_lower_blend.h b/src/gallium/drivers/panfrost/nir/nir_lower_blend.h new file mode 100644 index 00000000000..7a2df6e5e29 --- /dev/null +++ b/src/gallium/drivers/panfrost/nir/nir_lower_blend.h @@ -0,0 +1,60 @@ +/* + * Copyright  2019 Alyssa Rosenzweig + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef NIR_BLEND_H +#define NIR_BLEND_H + +#include "compiler/nir/nir.h" +#include "pipe/p_format.h" + +/* These structs encapsulates the blend state such that it can be lowered + * cleanly + */ + +typedef struct { + enum blend_func func; + + enum blend_factor src_factor; + bool invert_src_factor; + + enum blend_factor dst_factor; + bool invert_dst_factor; +} nir_lower_blend_channel; + +typedef struct { + struct { + nir_lower_blend_channel rgb; + nir_lower_blend_channel alpha; + + /* 4-bit colormask. 0x0 for none, 0xF for RGBA, 0x1 for R */ + unsigned colormask; + } rt[8]; +} nir_lower_blend_options; + +void nir_lower_blend(nir_shader *shader, nir_lower_blend_options options); + +void +nir_lower_framebuffer(nir_shader *shader, enum pipe_format format); + +#endif diff --git a/src/gallium/drivers/panfrost/nir/nir_lower_framebuffer.c b/src/gallium/drivers/panfrost/nir/nir_lower_framebuffer.c new file mode 100644 index 00000000000..fe6ae121a4c --- /dev/null +++ b/src/gallium/drivers/panfrost/nir/nir_lower_framebuffer.c @@ -0,0 +1,345 @@ +/* + * Copyright (C) 2019 Collabora, Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors (Collabora): + * Alyssa Rosenzweig + */ + +/** + * @file + * + * Implements framebuffer format conversions in software, specifically for + * blend shaders on Midgard/Bifrost. load_output/store_output (derefs more + * correctly -- pre I/O lowering) normally for the fragment stage within the + * blend shader will operate with purely vec4 float ("nir") encodings. This + * lowering stage, to be run before I/O is lowered, converts the native + * framebuffer format to a NIR encoding after loads and vice versa before + * stores. This pass is designed for a single render target; Midgard duplicates + * blend shaders for MRT to simplify everything. + */ + +#include "compiler/nir/nir.h" +#include "compiler/nir/nir_builder.h" +#include "compiler/nir/nir_format_convert.h" +#include "nir_lower_blend.h" +#include "util/u_format.h" + +/* Converters for UNORM8 formats, e.g. R8G8B8A8_UNORM */ + +static nir_ssa_def * +nir_float_to_unorm8(nir_builder *b, nir_ssa_def *c_float) +{ + /* First, we degrade quality to fp16; we don't need the extra bits */ + nir_ssa_def *degraded = nir_f2f16(b, c_float); + + /* Scale from [0, 1] to [0, 255.0] */ + nir_ssa_def *scaled = nir_fmul_imm(b, nir_fsat(b, degraded), 255.0); + + /* Next, we type convert */ + nir_ssa_def *converted = nir_u2u8(b, nir_f2u16(b, + nir_fround_even(b, scaled))); + + return converted; +} + +static nir_ssa_def * +nir_unorm8_to_float(nir_builder *b, nir_ssa_def *c_native) +{ + /* First, we convert up from u8 to f16 */ + nir_ssa_def *converted = nir_u2f16(b, nir_u2u16(b, c_native)); + + /* Next, we scale down from [0, 255.0] to [0, 1] */ + nir_ssa_def *scaled = nir_fsat(b, nir_fmul_imm(b, converted, 1.0/255.0)); + + return scaled; +} + +/* Converters for UNORM4 formats, packing the final result into 16-bit */ + +static nir_ssa_def * +nir_float_to_unorm4(nir_builder *b, nir_ssa_def *c_float) +{ + /* First, we degrade quality to fp16; we don't need the extra bits */ + nir_ssa_def *degraded = nir_f2f16(b, c_float); + + /* Scale from [0, 1] to [0, 15.0] */ + nir_ssa_def *scaled = nir_fmul_imm(b, nir_fsat(b, degraded), 15.0); + + /* Next, we type convert to u16 */ + nir_ssa_def *converted = nir_f2u16(b, + nir_fround_even(b, scaled)); + + /* In u16 land, we now need to pack */ + nir_ssa_def *cr = nir_channel(b, converted, 0); + nir_ssa_def *cg = nir_channel(b, converted, 1); + nir_ssa_def *cb = nir_channel(b, converted, 2); + nir_ssa_def *ca = nir_channel(b, converted, 3); + + nir_ssa_def *pack = + nir_ior(b, + nir_ior(b, cr, nir_ishl(b, cg, nir_imm_int(b, 4))), + nir_ior(b, nir_ishl(b, cb, nir_imm_int(b, 8)), nir_ishl(b, ca, nir_imm_int(b, 12)))); + + return pack; +} + +static nir_ssa_def * +nir_float_to_rgb10a2(nir_builder *b, nir_ssa_def *c_float, bool normalize) +{ + nir_ssa_def *converted = c_float; + + if (normalize) { + nir_ssa_def *scaled = nir_fmul(b, nir_fsat(b, c_float), + nir_imm_vec4(b, 1023.0, 1023.0, 1023.0, 3.0)); + + converted = nir_f2u32(b, + nir_fround_even(b, scaled)); + } + + nir_ssa_def *cr = nir_channel(b, converted, 0); + nir_ssa_def *cg = nir_channel(b, converted, 1); + nir_ssa_def *cb = nir_channel(b, converted, 2); + nir_ssa_def *ca = nir_channel(b, converted, 3); + + nir_ssa_def *pack = + nir_ior(b, + nir_ior(b, cr, nir_ishl(b, cg, nir_imm_int(b, 10))), + nir_ior(b, nir_ishl(b, cb, nir_imm_int(b, 20)), nir_ishl(b, ca, nir_imm_int(b, 30)))); + + return pack; +} + +static nir_ssa_def * +nir_float_to_rgb5a1(nir_builder *b, nir_ssa_def *c_float) +{ + nir_ssa_def *degraded = nir_f2f16(b, c_float); + + nir_ssa_def *scaled = nir_fmul(b, nir_fsat(b, degraded), + nir_imm_vec4_16(b, 31.0, 31.0, 31.0, 1.0)); + + nir_ssa_def *converted = nir_f2u16(b, + nir_fround_even(b, scaled)); + + nir_ssa_def *cr = nir_channel(b, converted, 0); + nir_ssa_def *cg = nir_channel(b, converted, 1); + nir_ssa_def *cb = nir_channel(b, converted, 2); + nir_ssa_def *ca = nir_channel(b, converted, 3); + + nir_ssa_def *pack = + nir_ior(b, + nir_ior(b, cr, nir_ishl(b, cg, nir_imm_int(b, 5))), + nir_ior(b, nir_ishl(b, cb, nir_imm_int(b, 10)), nir_ishl(b, ca, nir_imm_int(b, 15)))); + + return pack; +} + +static nir_ssa_def * +nir_shader_to_native(nir_builder *b, + nir_ssa_def *c_shader, + const struct util_format_description *desc, + unsigned bits, + bool homogenous_bits) +{ + bool float_or_pure_int = + util_format_is_float(desc->format) || + util_format_is_pure_integer(desc->format); + + if (util_format_is_unorm8(desc)) + return nir_float_to_unorm8(b, c_shader); + else if (homogenous_bits && float_or_pure_int) + return c_shader; /* type is already correct */ + + //unsigned bgra[4] = { 2, 1, 0, 3 }; /* BGRA */ + //c_shader = nir_swizzle(b, c_shader, swiz, 4); + + /* Special formats */ + switch (desc->format) { + case PIPE_FORMAT_B4G4R4A4_UNORM: + case PIPE_FORMAT_B4G4R4X4_UNORM: + case PIPE_FORMAT_A4R4_UNORM: + case PIPE_FORMAT_R4A4_UNORM: + case PIPE_FORMAT_A4B4G4R4_UNORM: + return nir_float_to_unorm4(b, c_shader); + + case PIPE_FORMAT_R10G10B10A2_UNORM: + case PIPE_FORMAT_B10G10R10A2_UNORM: + case PIPE_FORMAT_R10G10B10X2_UNORM: + case PIPE_FORMAT_B10G10R10X2_UNORM: + return nir_float_to_rgb10a2(b, c_shader, true); + + case PIPE_FORMAT_R10G10B10A2_UINT: + return nir_float_to_rgb10a2(b, c_shader, false); + + case PIPE_FORMAT_B5G5R5A1_UNORM: + return nir_float_to_rgb5a1(b, c_shader); + + case PIPE_FORMAT_R11G11B10_FLOAT: + return nir_format_pack_11f11f10f(b, c_shader); + + default: + printf("%s\n", desc->name); + unreachable("Unknown format name"); + } +} + +static nir_ssa_def * +nir_native_to_shader(nir_builder *b, + nir_ssa_def *c_native, + const struct util_format_description *desc, + unsigned bits, + bool homogenous_bits) +{ + bool float_or_pure_int = + util_format_is_float(desc->format) || + util_format_is_pure_integer(desc->format); + + if (util_format_is_unorm8(desc)) + return nir_unorm8_to_float(b, c_native); + else if (homogenous_bits && float_or_pure_int) + return c_native; /* type is already correct */ + else { + printf("%s\n", desc->name); + unreachable("Unknown format name"); + } +} + +void +nir_lower_framebuffer(nir_shader *shader, enum pipe_format format) +{ + /* Blend shaders are represented as special fragment shaders */ + assert(shader->info.stage == MESA_SHADER_FRAGMENT); + + const struct util_format_description *format_desc = + util_format_description(format); + + unsigned nr_channels = format_desc->nr_channels; + unsigned bits = format_desc->channel[0].size; + + /* Do all channels have the same bit count? */ + bool homogenous_bits = true; + + for (unsigned c = 1; c < nr_channels; ++c) + homogenous_bits &= (format_desc->channel[c].size == bits); + + if (format == PIPE_FORMAT_R11G11B10_FLOAT) + homogenous_bits = false; + + /* Figure out the formats for the raw */ + unsigned raw_bitsize_in = bits; + unsigned raw_bitsize_out = bits; + unsigned raw_out_components = 4; + + /* We pack a 4-bit vec4 as 16-bit vec1 */ + if ((homogenous_bits && bits == 4 && util_format_is_unorm(format)) || format == PIPE_FORMAT_B5G5R5A1_UNORM) { + raw_bitsize_out = 16; + raw_out_components = 1; + } else if (format == PIPE_FORMAT_R10G10B10A2_UNORM || format == PIPE_FORMAT_B10G10R10A2_UNORM || format == PIPE_FORMAT_R10G10B10A2_UINT || format == PIPE_FORMAT_R11G11B10_FLOAT) { + raw_bitsize_out = 32; + raw_out_components = 1; + } + + nir_foreach_function(func, shader) { + nir_foreach_block(block, func->impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + + bool is_load = intr->intrinsic == nir_intrinsic_load_deref; + bool is_store = intr->intrinsic == nir_intrinsic_store_deref; + + if (!(is_load || is_store)) + continue; + + /* Don't worry about MRT */ + nir_variable *var = nir_intrinsic_get_var(intr, 0); + + if (var->data.location != FRAG_RESULT_COLOR) + continue; + + nir_builder b; + nir_builder_init(&b, func->impl); + + if (is_store) { + /* For stores, add conversion before */ + b.cursor = nir_before_instr(instr); + + /* Grab the input color */ + nir_ssa_def *c_nir = nir_ssa_for_src(&b, intr->src[1], 4); + + /* Format convert */ + nir_ssa_def *converted = nir_shader_to_native(&b, c_nir, format_desc, bits, homogenous_bits); + + if (util_format_is_float(format)) { + if (raw_bitsize_out == 16) + converted = nir_f2f16(&b, converted); + else if (raw_bitsize_out == 32) + converted = nir_f2f32(&b, converted); + } else { + converted = nir_i2i(&b, converted, raw_bitsize_out); + } + + /* Rewrite to use a native store by creating a new intrinsic */ + nir_intrinsic_instr *new = + nir_intrinsic_instr_create(shader, nir_intrinsic_store_raw_output_pan); + new->src[0] = nir_src_for_ssa(converted); + + new->num_components = raw_out_components; + + nir_builder_instr_insert(&b, &new->instr); + + /* (And finally removing the old) */ + nir_instr_remove(instr); + } else { + /* For loads, add conversion after */ + b.cursor = nir_after_instr(instr); + + /* Rewrite to use a native load by creating a new intrinsic */ + + nir_intrinsic_instr *new = + nir_intrinsic_instr_create(shader, nir_intrinsic_load_raw_output_pan); + + new->num_components = 4; + + unsigned bitsize = raw_bitsize_in; + nir_ssa_dest_init(&new->instr, &new->dest, 4, bitsize, NULL); + nir_builder_instr_insert(&b, &new->instr); + + /* Convert the raw value */ + nir_ssa_def *raw = &new->dest.ssa; + nir_ssa_def *converted = nir_native_to_shader(&b, raw, format_desc, bits, homogenous_bits); + + /* Rewrite to use the converted value */ + nir_src rewritten = nir_src_for_ssa(converted); + nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, rewritten, instr); + + /* Finally, remove the old load */ + nir_instr_remove(instr); + } + } + } + + nir_metadata_preserve(func->impl, nir_metadata_block_index | + nir_metadata_dominance); + } +} diff --git a/src/gallium/drivers/panfrost/nir/nir_undef_to_zero.c b/src/gallium/drivers/panfrost/nir/nir_undef_to_zero.c new file mode 100644 index 00000000000..447aaebae5c --- /dev/null +++ b/src/gallium/drivers/panfrost/nir/nir_undef_to_zero.c @@ -0,0 +1,87 @@ +/* + * Copyright (C) 2019 Collabora, Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors (Collabora): + * Alyssa Rosenzweig + */ + +/** + * @file + * + * Flushes undefined SSA values to a zero vector fo the appropriate component + * count, to avoid undefined behaviour in the resulting shader. Not required + * for conformance as use of uninitialized variables is explicitly left + * undefined by the spec. Works around buggy apps, however. + * + * Call immediately after nir_opt_undef. If called before, larger optimization + * opportunities from the former pass will be missed. If called outside of an + * optimization loop, constant propagation and algebraic optimizations won't be + * able to kick in to reduce stuff consuming the zero. + */ + +#include "compiler/nir/nir.h" +#include "compiler/nir/nir_builder.h" + +bool nir_undef_to_zero(nir_shader *shader); + +bool +nir_undef_to_zero(nir_shader *shader) +{ + bool progress = false; + + nir_foreach_function(function, shader) { + if (!function->impl) continue; + + nir_builder b; + nir_builder_init(&b, function->impl); + + nir_foreach_block(block, function->impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_ssa_undef) continue; + + nir_ssa_undef_instr *und = nir_instr_as_ssa_undef(instr); + + /* Get the required size */ + unsigned c = und->def.num_components; + unsigned s = und->def.bit_size; + + nir_const_value v[NIR_MAX_VEC_COMPONENTS]; + memset(v, 0, sizeof(v)); + + b.cursor = nir_before_instr(instr); + nir_ssa_def *zero = nir_build_imm(&b, c, s, v); + nir_src zerosrc = nir_src_for_ssa(zero); + + nir_ssa_def_rewrite_uses(&und->def, zerosrc); + + progress |= true; + } + } + + nir_metadata_preserve(function->impl, nir_metadata_block_index | nir_metadata_dominance); + + } + + return progress; +} + + diff --git a/src/gallium/drivers/panfrost/pan_blend_shaders.c b/src/gallium/drivers/panfrost/pan_blend_shaders.c index 993b612ee50..7e29e1eb430 100644 --- a/src/gallium/drivers/panfrost/pan_blend_shaders.c +++ b/src/gallium/drivers/panfrost/pan_blend_shaders.c @@ -27,7 +27,7 @@ #include "pan_util.h" #include "midgard/midgard_compile.h" #include "compiler/nir/nir_builder.h" -#include "midgard/nir_lower_blend.h" +#include "nir/nir_lower_blend.h" #include "gallium/auxiliary/util/u_blend.h" #include "util/u_memory.h" -- cgit v1.2.3