diff options
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_conv.c | 16 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_pack.c | 26 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_pack.h | 5 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_tgsi.c | 1 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c | 73 |
5 files changed, 119 insertions, 2 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c index 7854142f736..7cf0deece81 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c @@ -130,6 +130,7 @@ lp_build_half_to_float(struct gallivm_state *gallivm, * * Convert float32 to half floats, preserving Infs and NaNs, * with rounding towards zero (trunc). + * XXX: For GL, would prefer rounding towards nearest(-even). */ LLVMValueRef lp_build_float_to_half(struct gallivm_state *gallivm, @@ -143,6 +144,15 @@ lp_build_float_to_half(struct gallivm_state *gallivm, struct lp_type i16_type = lp_type_int_vec(16, 16 * length); LLVMValueRef result; + /* + * Note: Newer llvm versions (3.6 or so) support fptrunc to 16 bits + * directly, without any (x86 or generic) intrinsics. + * Albeit the rounding mode cannot be specified (and is undefined, + * though in practice on x86 seems to do nearest-even but it may + * be dependent on instruction set support), so is essentially + * useless. + */ + if (util_cpu_caps.has_f16c && (length == 4 || length == 8)) { struct lp_type i168_type = lp_type_int_vec(16, 16 * 8); @@ -187,7 +197,11 @@ lp_build_float_to_half(struct gallivm_state *gallivm, LLVMValueRef index = LLVMConstInt(i32t, i, 0); LLVMValueRef f32 = LLVMBuildExtractElement(builder, src, index, ""); #if 0 - /* XXX: not really supported by backends */ + /* + * XXX: not really supported by backends. + * Even if they would now, rounding mode cannot be specified and + * is undefined. + */ LLVMValueRef f16 = lp_build_intrinsic_unary(builder, "llvm.convert.to.fp16", i16t, f32); #else LLVMValueRef f16 = LLVMBuildCall(builder, func, &f32, 1, ""); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c index 0b0f7f0147c..daa2043ee28 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c @@ -257,6 +257,32 @@ lp_build_concat_n(struct gallivm_state *gallivm, /** + * Un-interleave vector. + * This will return a vector consisting of every second element + * (depending on lo_hi, beginning at 0 or 1). + * The returned vector size (elems and width) will only be half + * that of the source vector. + */ +LLVMValueRef +lp_build_uninterleave1(struct gallivm_state *gallivm, + unsigned num_elems, + LLVMValueRef a, + unsigned lo_hi) +{ + LLVMValueRef shuffle, elems[LP_MAX_VECTOR_LENGTH]; + unsigned i; + assert(num_elems <= LP_MAX_VECTOR_LENGTH); + + for(i = 0; i < num_elems / 2; ++i) + elems[i] = lp_build_const_int32(gallivm, 2*i + lo_hi); + + shuffle = LLVMConstVector(elems, num_elems / 2); + + return LLVMBuildShuffleVector(gallivm->builder, a, a, shuffle, ""); +} + + +/** * Interleave vector elements. * * Matches the PUNPCKLxx and PUNPCKHxx SSE instructions diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.h b/src/gallium/auxiliary/gallivm/lp_bld_pack.h index 7cede35bbde..367fba1fd21 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.h @@ -58,6 +58,11 @@ lp_build_interleave2(struct gallivm_state *gallivm, LLVMValueRef b, unsigned lo_hi); +LLVMValueRef +lp_build_uninterleave1(struct gallivm_state *gallivm, + unsigned num_elems, + LLVMValueRef a, + unsigned lo_hi); void lp_build_unpack2(struct gallivm_state *gallivm, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c index c88dfbf974a..1cbe47ca91f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c @@ -248,7 +248,6 @@ lp_build_tgsi_inst_llvm( /* Ignore deprecated instructions */ switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_UP2H: case TGSI_OPCODE_UP2US: case TGSI_OPCODE_UP4B: case TGSI_OPCODE_UP4UB: diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c index 6f75bec5005..f6b42eead1e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c @@ -45,8 +45,10 @@ #include "lp_bld_arit.h" #include "lp_bld_bitarit.h" #include "lp_bld_const.h" +#include "lp_bld_conv.h" #include "lp_bld_gather.h" #include "lp_bld_logic.h" +#include "lp_bld_pack.h" #include "tgsi/tgsi_exec.h" @@ -530,6 +532,75 @@ static struct lp_build_tgsi_action log_action = { log_emit /* emit */ }; +/* TGSI_OPCODE_PK2H */ + +static void +pk2h_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + /* src0.x */ + emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, + 0, TGSI_CHAN_X); + /* src0.y */ + emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst, + 0, TGSI_CHAN_Y); +} + +static void +pk2h_emit(const struct lp_build_tgsi_action *action, + struct lp_build_tgsi_context *bld_base, + struct lp_build_emit_data *emit_data) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + struct lp_type f16i_t; + LLVMValueRef lo, hi, res; + + f16i_t = lp_type_uint_vec(16, bld_base->base.type.length * 32); + lo = lp_build_float_to_half(gallivm, emit_data->args[0]); + hi = lp_build_float_to_half(gallivm, emit_data->args[1]); + /* maybe some interleave doubling vector width would be useful... */ + lo = lp_build_pad_vector(gallivm, lo, bld_base->base.type.length * 2); + hi = lp_build_pad_vector(gallivm, hi, bld_base->base.type.length * 2); + res = lp_build_interleave2(gallivm, f16i_t, lo, hi, 0); + + emit_data->output[emit_data->chan] = res; +} + +static struct lp_build_tgsi_action pk2h_action = { + pk2h_fetch_args, /* fetch_args */ + pk2h_emit /* emit */ +}; + +/* TGSI_OPCODE_UP2H */ + +static void +up2h_emit(const struct lp_build_tgsi_action *action, + struct lp_build_tgsi_context *bld_base, + struct lp_build_emit_data *emit_data) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + LLVMContextRef context = gallivm->context; + LLVMValueRef lo, hi, res[2], arg; + unsigned nr = bld_base->base.type.length; + LLVMTypeRef i16t = LLVMVectorType(LLVMInt16TypeInContext(context), nr * 2); + + arg = LLVMBuildBitCast(builder, emit_data->args[0], i16t, ""); + lo = lp_build_uninterleave1(gallivm, nr * 2, arg, 0); + hi = lp_build_uninterleave1(gallivm, nr * 2, arg, 1); + res[0] = lp_build_half_to_float(gallivm, lo); + res[1] = lp_build_half_to_float(gallivm, hi); + + emit_data->output[0] = emit_data->output[2] = res[0]; + emit_data->output[1] = emit_data->output[3] = res[1]; +} + +static struct lp_build_tgsi_action up2h_action = { + scalar_unary_fetch_args, /* fetch_args */ + up2h_emit /* emit */ +}; + /* TGSI_OPCODE_LRP */ static void @@ -1032,10 +1103,12 @@ lp_set_default_actions(struct lp_build_tgsi_context * bld_base) bld_base->op_actions[TGSI_OPCODE_EXP] = exp_action; bld_base->op_actions[TGSI_OPCODE_LIT] = lit_action; bld_base->op_actions[TGSI_OPCODE_LOG] = log_action; + bld_base->op_actions[TGSI_OPCODE_PK2H] = pk2h_action; bld_base->op_actions[TGSI_OPCODE_RSQ] = rsq_action; bld_base->op_actions[TGSI_OPCODE_SQRT] = sqrt_action; bld_base->op_actions[TGSI_OPCODE_POW] = pow_action; bld_base->op_actions[TGSI_OPCODE_SCS] = scs_action; + bld_base->op_actions[TGSI_OPCODE_UP2H] = up2h_action; bld_base->op_actions[TGSI_OPCODE_XPD] = xpd_action; bld_base->op_actions[TGSI_OPCODE_BREAKC].fetch_args = scalar_unary_fetch_args; |