gallivm: add image load/store/atomic support

Reviewed-by: Roland Scheidegger <[email protected]>
author: Dave Airlie <[email protected]> 2019-07-19 19:06:48 +1000
committer: Dave Airlie <[email protected]> 2019-08-27 12:29:58 +1000
commit: ceb8d0ac5ac665a1cbb2537586d192374ab1e327 (patch)
tree: 473720976b5cd1a98f4c0c130f3f216c9aeb3e9f /src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
parent: 15f7688ac9d75dcc10a07f103881c062bf9fdb9a (diff)
1 files changed, 230 insertions, 1 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
index 22c19b10dbd..a6ec91c4a3f 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
@@ -42,7 +42,9 @@
 #include "lp_bld_format.h"
 #include "lp_bld_arit.h"
 #include "lp_bld_pack.h"
-
+#include "lp_bld_flow.h"
+#include "lp_bld_printf.h"
+#include "lp_bld_intr.h"
 
 static void
 convert_to_soa(struct gallivm_state *gallivm,
@@ -858,3 +860,230 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
       convert_to_soa(gallivm, aos_fetch, rgba_out, type);
    }
 }
+
+static void
+lp_build_insert_soa_chan(struct lp_build_context *bld,
+                         unsigned blockbits,
+                         struct util_format_channel_description chan_desc,
+                         LLVMValueRef *output,
+                         LLVMValueRef rgba)
+{
+    struct gallivm_state *gallivm = bld->gallivm;
+    LLVMBuilderRef builder = gallivm->builder;
+    struct lp_type type = bld->type;
+    const unsigned width = chan_desc.size;
+    const unsigned start = chan_desc.shift;
+    const unsigned stop = start + width;
+    LLVMValueRef chan;
+    switch(chan_desc.type) {
+    case UTIL_FORMAT_TYPE_UNSIGNED:
+
+       if (chan_desc.pure_integer)
+          chan = LLVMBuildBitCast(builder, rgba, bld->int_vec_type, "");
+       else if (type.floating) {
+          if (chan_desc.normalized)
+             chan = lp_build_clamped_float_to_unsigned_norm(gallivm, type, width, rgba);
+          else
+             chan = LLVMBuildFPToSI(builder, rgba, bld->vec_type, "");
+       }
+       if (start)
+          chan = LLVMBuildShl(builder, chan,
+                              lp_build_const_int_vec(gallivm, type, start), "");
+       if (!*output)
+          *output = chan;
+       else
+          *output = LLVMBuildOr(builder, *output, chan, "");
+       break;
+    case UTIL_FORMAT_TYPE_SIGNED:
+       if (chan_desc.pure_integer)
+          chan = LLVMBuildBitCast(builder, rgba, bld->int_vec_type, "");
+       else if (type.floating) {
+          uint32_t mask_val = (1UL << chan_desc.size) - 1;
+          if (chan_desc.normalized) {
+             char intrin[32];
+             double scale = ((1 << (chan_desc.size - 1)) - 1);
+             LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
+             rgba = lp_build_clamp(bld, rgba, lp_build_negate(bld, bld->one), bld->one);
+             rgba = LLVMBuildFMul(builder, rgba, scale_val, "");
+             lp_format_intrinsic(intrin, sizeof intrin, "llvm.rint", bld->vec_type);
+             rgba = lp_build_intrinsic_unary(builder, intrin, bld->vec_type, rgba);
+          }
+          chan = LLVMBuildFPToSI(builder, rgba, bld->int_vec_type, "");
+          chan = LLVMBuildAnd(builder, chan, lp_build_const_int_vec(gallivm, type, mask_val), "");
+       }
+       if (start)
+          chan = LLVMBuildShl(builder, chan,
+                              lp_build_const_int_vec(gallivm, type, start), "");
+       if (!*output)
+          *output = chan;
+       else
+          *output = LLVMBuildOr(builder, *output, chan, "");
+       break;
+    case UTIL_FORMAT_TYPE_FLOAT:
+       if (type.floating) {
+          if (chan_desc.size == 16) {
+             chan = lp_build_float_to_half(gallivm, rgba);
+             chan = LLVMBuildZExt(builder, chan, bld->int_vec_type, "");
+             if (start)
+                chan = LLVMBuildShl(builder, chan,
+                                    lp_build_const_int_vec(gallivm, type, start), "");
+             if (!*output)
+                *output = chan;
+             else
+                *output = LLVMBuildOr(builder, *output, chan, "");
+          } else {
+             assert(start == 0);
+             assert(stop == 32);
+             assert(type.width == 32);
+             *output = LLVMBuildBitCast(builder, rgba, bld->int_vec_type, "");
+          }
+       } else
+          assert(0);
+       break;
+    default:
+       assert(0);
+       *output = bld->undef;
+    }
+}
+
+static void
+lp_build_pack_rgba_soa(struct gallivm_state *gallivm,
+                       const struct util_format_description *format_desc,
+                       struct lp_type type,
+                       const LLVMValueRef rgba_in[4],
+                       LLVMValueRef *packed)
+{
+   unsigned chan;
+   struct lp_build_context bld;
+   assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
+   assert(format_desc->block.width == 1);
+   assert(format_desc->block.height == 1);
+   assert(format_desc->block.bits <= type.width);
+   /* FIXME: Support more output types */
+   assert(type.width == 32);
+
+   lp_build_context_init(&bld, gallivm, type);
+   for (chan = 0; chan < format_desc->nr_channels; ++chan) {
+      struct util_format_channel_description chan_desc = format_desc->channel[chan];
+
+      lp_build_insert_soa_chan(&bld, format_desc->block.bits,
+                               chan_desc,
+                               packed,
+                               rgba_in[chan]);
+   }
+}
+
+void
+lp_build_store_rgba_soa(struct gallivm_state *gallivm,
+                        const struct util_format_description *format_desc,
+                        struct lp_type type,
+                        LLVMValueRef exec_mask,
+                        LLVMValueRef base_ptr,
+                        LLVMValueRef offset,
+                        LLVMValueRef out_of_bounds,
+                        const LLVMValueRef rgba_in[4])
+{
+   enum pipe_format format = format_desc->format;
+   LLVMValueRef packed[4] = {};
+   unsigned num_stores;
+
+   if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
+       format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
+       format_desc->block.width == 1 &&
+       format_desc->block.height == 1 &&
+       format_desc->block.bits <= type.width &&
+       (format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT ||
+        format_desc->channel[0].size == 32 ||
+        format_desc->channel[0].size == 16))
+   {
+      lp_build_pack_rgba_soa(gallivm, format_desc, type, rgba_in, &packed[0]);
+
+      num_stores = 1;
+   } else if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
+       (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB) &&
+       format_desc->block.width == 1 &&
+       format_desc->block.height == 1 &&
+       format_desc->block.bits > type.width &&
+       ((format_desc->block.bits <= type.width * type.length &&
+         format_desc->channel[0].size <= type.width) ||
+        (format_desc->channel[0].size == 64 &&
+         format_desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
+         type.floating)))
+   {
+      /*
+       * Similar to above, but the packed pixel is larger than what fits
+       * into an element of the destination format. The packed pixels will be
+       * shuffled into SoA vectors appropriately, and then the extraction will
+       * be done in parallel as much as possible.
+       * Good for 16xn (n > 2) and 32xn (n > 1) formats, care is taken so
+       * the gathered vectors can be shuffled easily (even with avx).
+       * 64xn float -> 32xn float is handled too but it's a bit special as
+       * it does the conversion pre-shuffle.
+       */
+      struct lp_build_context bld;
+
+      lp_build_context_init(&bld, gallivm, type);
+      assert(type.width == 32);
+      assert(format_desc->block.bits > type.width);
+
+      unsigned store_width = util_next_power_of_two(format_desc->block.bits);
+      num_stores = store_width / type.width;
+      for (unsigned i = 0; i < format_desc->nr_channels; i++) {
+            struct util_format_channel_description chan_desc = format_desc->channel[i];
+            unsigned blockbits = type.width;
+            unsigned vec_nr;
+
+            vec_nr = chan_desc.shift / type.width;
+            chan_desc.shift %= type.width;
+
+            lp_build_insert_soa_chan(&bld, blockbits,
+                                     chan_desc,
+                                     &packed[vec_nr],
+                                     rgba_in[i]);
+      }
+
+      assert(num_stores == 4 || num_stores == 2);
+      /* we can transpose and store at the same time */
+   } else if (format == PIPE_FORMAT_R11G11B10_FLOAT) {
+      packed[0] = lp_build_float_to_r11g11b10(gallivm, rgba_in);
+      num_stores = 1;
+   } else
+      assert(0);
+
+   assert(exec_mask);
+
+   LLVMTypeRef int32_ptr_type = LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0);
+   LLVMTypeRef int16_ptr_type = LLVMPointerType(LLVMInt16TypeInContext(gallivm->context), 0);
+   LLVMTypeRef int8_ptr_type = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
+
+   LLVMValueRef should_store_mask = LLVMBuildAnd(gallivm->builder, exec_mask, LLVMBuildNot(gallivm->builder, out_of_bounds, ""), "store_mask");
+   should_store_mask = LLVMBuildICmp(gallivm->builder, LLVMIntNE, should_store_mask, lp_build_const_int_vec(gallivm, type, 0), "");
+   for (unsigned i = 0; i < num_stores; i++) {
+      struct lp_build_loop_state loop_state;
+
+      LLVMValueRef store_offset = LLVMBuildAdd(gallivm->builder, offset, lp_build_const_int_vec(gallivm, type, i * 4), "");
+      store_offset = LLVMBuildGEP(gallivm->builder, base_ptr, &store_offset, 1, "");
+
+      lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
+
+      struct lp_build_if_state ifthen;
+      LLVMValueRef cond = LLVMBuildExtractElement(gallivm->builder, should_store_mask, loop_state.counter, "");
+      lp_build_if(&ifthen, gallivm, cond);
+
+      LLVMValueRef data = LLVMBuildExtractElement(gallivm->builder, packed[i], loop_state.counter, "");
+      LLVMValueRef this_offset = LLVMBuildExtractElement(gallivm->builder, store_offset, loop_state.counter, "");
+
+      if (format_desc->block.bits == 8) {
+         this_offset = LLVMBuildBitCast(gallivm->builder, this_offset, int8_ptr_type, "");
+         data = LLVMBuildTrunc(gallivm->builder, data, LLVMInt8TypeInContext(gallivm->context), "");
+      } else if (format_desc->block.bits == 16) {
+         this_offset = LLVMBuildBitCast(gallivm->builder, this_offset, int16_ptr_type, "");
+         data = LLVMBuildTrunc(gallivm->builder, data, LLVMInt16TypeInContext(gallivm->context), "");
+      } else
+         this_offset = LLVMBuildBitCast(gallivm->builder, this_offset, int32_ptr_type, "");
+      LLVMBuildStore(gallivm->builder, data, this_offset);
+      lp_build_endif(&ifthen);
+      lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, type.length),
+                             NULL, LLVMIntUGE);
+   }
+}
author	Dave Airlie <[email protected]>	2019-07-19 19:06:48 +1000
committer	Dave Airlie <[email protected]>	2019-08-27 12:29:58 +1000
commit	ceb8d0ac5ac665a1cbb2537586d192374ab1e327 (patch)
tree	473720976b5cd1a98f4c0c130f3f216c9aeb3e9f /src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
parent	15f7688ac9d75dcc10a07f103881c062bf9fdb9a (diff)