diff options
author | Dave Airlie <[email protected]> | 2019-07-19 19:06:48 +1000 |
---|---|---|
committer | Dave Airlie <[email protected]> | 2019-08-27 12:29:58 +1000 |
commit | ceb8d0ac5ac665a1cbb2537586d192374ab1e327 (patch) | |
tree | 473720976b5cd1a98f4c0c130f3f216c9aeb3e9f /src/gallium/auxiliary/gallivm/lp_bld_format_soa.c | |
parent | 15f7688ac9d75dcc10a07f103881c062bf9fdb9a (diff) |
gallivm: add image load/store/atomic support
Reviewed-by: Roland Scheidegger <[email protected]>
Diffstat (limited to 'src/gallium/auxiliary/gallivm/lp_bld_format_soa.c')
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_format_soa.c | 231 |
1 files changed, 230 insertions, 1 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c index 22c19b10dbd..a6ec91c4a3f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c @@ -42,7 +42,9 @@ #include "lp_bld_format.h" #include "lp_bld_arit.h" #include "lp_bld_pack.h" - +#include "lp_bld_flow.h" +#include "lp_bld_printf.h" +#include "lp_bld_intr.h" static void convert_to_soa(struct gallivm_state *gallivm, @@ -858,3 +860,230 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, convert_to_soa(gallivm, aos_fetch, rgba_out, type); } } + +static void +lp_build_insert_soa_chan(struct lp_build_context *bld, + unsigned blockbits, + struct util_format_channel_description chan_desc, + LLVMValueRef *output, + LLVMValueRef rgba) +{ + struct gallivm_state *gallivm = bld->gallivm; + LLVMBuilderRef builder = gallivm->builder; + struct lp_type type = bld->type; + const unsigned width = chan_desc.size; + const unsigned start = chan_desc.shift; + const unsigned stop = start + width; + LLVMValueRef chan; + switch(chan_desc.type) { + case UTIL_FORMAT_TYPE_UNSIGNED: + + if (chan_desc.pure_integer) + chan = LLVMBuildBitCast(builder, rgba, bld->int_vec_type, ""); + else if (type.floating) { + if (chan_desc.normalized) + chan = lp_build_clamped_float_to_unsigned_norm(gallivm, type, width, rgba); + else + chan = LLVMBuildFPToSI(builder, rgba, bld->vec_type, ""); + } + if (start) + chan = LLVMBuildShl(builder, chan, + lp_build_const_int_vec(gallivm, type, start), ""); + if (!*output) + *output = chan; + else + *output = LLVMBuildOr(builder, *output, chan, ""); + break; + case UTIL_FORMAT_TYPE_SIGNED: + if (chan_desc.pure_integer) + chan = LLVMBuildBitCast(builder, rgba, bld->int_vec_type, ""); + else if (type.floating) { + uint32_t mask_val = (1UL << chan_desc.size) - 1; + if (chan_desc.normalized) { + char intrin[32]; + double scale = ((1 << (chan_desc.size - 1)) - 1); + LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale); + rgba = lp_build_clamp(bld, rgba, lp_build_negate(bld, bld->one), bld->one); + rgba = LLVMBuildFMul(builder, rgba, scale_val, ""); + lp_format_intrinsic(intrin, sizeof intrin, "llvm.rint", bld->vec_type); + rgba = lp_build_intrinsic_unary(builder, intrin, bld->vec_type, rgba); + } + chan = LLVMBuildFPToSI(builder, rgba, bld->int_vec_type, ""); + chan = LLVMBuildAnd(builder, chan, lp_build_const_int_vec(gallivm, type, mask_val), ""); + } + if (start) + chan = LLVMBuildShl(builder, chan, + lp_build_const_int_vec(gallivm, type, start), ""); + if (!*output) + *output = chan; + else + *output = LLVMBuildOr(builder, *output, chan, ""); + break; + case UTIL_FORMAT_TYPE_FLOAT: + if (type.floating) { + if (chan_desc.size == 16) { + chan = lp_build_float_to_half(gallivm, rgba); + chan = LLVMBuildZExt(builder, chan, bld->int_vec_type, ""); + if (start) + chan = LLVMBuildShl(builder, chan, + lp_build_const_int_vec(gallivm, type, start), ""); + if (!*output) + *output = chan; + else + *output = LLVMBuildOr(builder, *output, chan, ""); + } else { + assert(start == 0); + assert(stop == 32); + assert(type.width == 32); + *output = LLVMBuildBitCast(builder, rgba, bld->int_vec_type, ""); + } + } else + assert(0); + break; + default: + assert(0); + *output = bld->undef; + } +} + +static void +lp_build_pack_rgba_soa(struct gallivm_state *gallivm, + const struct util_format_description *format_desc, + struct lp_type type, + const LLVMValueRef rgba_in[4], + LLVMValueRef *packed) +{ + unsigned chan; + struct lp_build_context bld; + assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); + assert(format_desc->block.width == 1); + assert(format_desc->block.height == 1); + assert(format_desc->block.bits <= type.width); + /* FIXME: Support more output types */ + assert(type.width == 32); + + lp_build_context_init(&bld, gallivm, type); + for (chan = 0; chan < format_desc->nr_channels; ++chan) { + struct util_format_channel_description chan_desc = format_desc->channel[chan]; + + lp_build_insert_soa_chan(&bld, format_desc->block.bits, + chan_desc, + packed, + rgba_in[chan]); + } +} + +void +lp_build_store_rgba_soa(struct gallivm_state *gallivm, + const struct util_format_description *format_desc, + struct lp_type type, + LLVMValueRef exec_mask, + LLVMValueRef base_ptr, + LLVMValueRef offset, + LLVMValueRef out_of_bounds, + const LLVMValueRef rgba_in[4]) +{ + enum pipe_format format = format_desc->format; + LLVMValueRef packed[4] = {}; + unsigned num_stores; + + if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && + format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB && + format_desc->block.width == 1 && + format_desc->block.height == 1 && + format_desc->block.bits <= type.width && + (format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT || + format_desc->channel[0].size == 32 || + format_desc->channel[0].size == 16)) + { + lp_build_pack_rgba_soa(gallivm, format_desc, type, rgba_in, &packed[0]); + + num_stores = 1; + } else if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && + (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB) && + format_desc->block.width == 1 && + format_desc->block.height == 1 && + format_desc->block.bits > type.width && + ((format_desc->block.bits <= type.width * type.length && + format_desc->channel[0].size <= type.width) || + (format_desc->channel[0].size == 64 && + format_desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT && + type.floating))) + { + /* + * Similar to above, but the packed pixel is larger than what fits + * into an element of the destination format. The packed pixels will be + * shuffled into SoA vectors appropriately, and then the extraction will + * be done in parallel as much as possible. + * Good for 16xn (n > 2) and 32xn (n > 1) formats, care is taken so + * the gathered vectors can be shuffled easily (even with avx). + * 64xn float -> 32xn float is handled too but it's a bit special as + * it does the conversion pre-shuffle. + */ + struct lp_build_context bld; + + lp_build_context_init(&bld, gallivm, type); + assert(type.width == 32); + assert(format_desc->block.bits > type.width); + + unsigned store_width = util_next_power_of_two(format_desc->block.bits); + num_stores = store_width / type.width; + for (unsigned i = 0; i < format_desc->nr_channels; i++) { + struct util_format_channel_description chan_desc = format_desc->channel[i]; + unsigned blockbits = type.width; + unsigned vec_nr; + + vec_nr = chan_desc.shift / type.width; + chan_desc.shift %= type.width; + + lp_build_insert_soa_chan(&bld, blockbits, + chan_desc, + &packed[vec_nr], + rgba_in[i]); + } + + assert(num_stores == 4 || num_stores == 2); + /* we can transpose and store at the same time */ + } else if (format == PIPE_FORMAT_R11G11B10_FLOAT) { + packed[0] = lp_build_float_to_r11g11b10(gallivm, rgba_in); + num_stores = 1; + } else + assert(0); + + assert(exec_mask); + + LLVMTypeRef int32_ptr_type = LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0); + LLVMTypeRef int16_ptr_type = LLVMPointerType(LLVMInt16TypeInContext(gallivm->context), 0); + LLVMTypeRef int8_ptr_type = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); + + LLVMValueRef should_store_mask = LLVMBuildAnd(gallivm->builder, exec_mask, LLVMBuildNot(gallivm->builder, out_of_bounds, ""), "store_mask"); + should_store_mask = LLVMBuildICmp(gallivm->builder, LLVMIntNE, should_store_mask, lp_build_const_int_vec(gallivm, type, 0), ""); + for (unsigned i = 0; i < num_stores; i++) { + struct lp_build_loop_state loop_state; + + LLVMValueRef store_offset = LLVMBuildAdd(gallivm->builder, offset, lp_build_const_int_vec(gallivm, type, i * 4), ""); + store_offset = LLVMBuildGEP(gallivm->builder, base_ptr, &store_offset, 1, ""); + + lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); + + struct lp_build_if_state ifthen; + LLVMValueRef cond = LLVMBuildExtractElement(gallivm->builder, should_store_mask, loop_state.counter, ""); + lp_build_if(&ifthen, gallivm, cond); + + LLVMValueRef data = LLVMBuildExtractElement(gallivm->builder, packed[i], loop_state.counter, ""); + LLVMValueRef this_offset = LLVMBuildExtractElement(gallivm->builder, store_offset, loop_state.counter, ""); + + if (format_desc->block.bits == 8) { + this_offset = LLVMBuildBitCast(gallivm->builder, this_offset, int8_ptr_type, ""); + data = LLVMBuildTrunc(gallivm->builder, data, LLVMInt8TypeInContext(gallivm->context), ""); + } else if (format_desc->block.bits == 16) { + this_offset = LLVMBuildBitCast(gallivm->builder, this_offset, int16_ptr_type, ""); + data = LLVMBuildTrunc(gallivm->builder, data, LLVMInt16TypeInContext(gallivm->context), ""); + } else + this_offset = LLVMBuildBitCast(gallivm->builder, this_offset, int32_ptr_type, ""); + LLVMBuildStore(gallivm->builder, data, this_offset); + lp_build_endif(&ifthen); + lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, type.length), + NULL, LLVMIntUGE); + } +} |