diff options
author | Marek Olšák <[email protected]> | 2018-01-02 04:34:53 +0100 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2018-02-02 16:46:22 +0100 |
commit | 847d0a393d7f0f967f39302900d5330f32b804c8 (patch) | |
tree | 2af9f54291ce78fa52ccb4477760ec00dc8e0742 /src/amd | |
parent | b3a1d9588eaa486a01c3a2964d69540a4bf45818 (diff) |
radeonsi: use pknorm_i16/u16 and pk_i16/u16 LLVM intrinsics
Reviewed-by: Samuel Pitoiset <[email protected]>
Diffstat (limited to 'src/amd')
-rw-r--r-- | src/amd/common/ac_llvm_build.c | 164 | ||||
-rw-r--r-- | src/amd/common/ac_llvm_build.h | 13 |
2 files changed, 177 insertions, 0 deletions
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 762351f2807..6ede60afe71 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -67,6 +67,7 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context, ctx->f16 = LLVMHalfTypeInContext(ctx->context); ctx->f32 = LLVMFloatTypeInContext(ctx->context); ctx->f64 = LLVMDoubleTypeInContext(ctx->context); + ctx->v2i16 = LLVMVectorType(ctx->i16, 2); ctx->v2i32 = LLVMVectorType(ctx->i32, 2); ctx->v3i32 = LLVMVectorType(ctx->i32, 3); ctx->v4i32 = LLVMVectorType(ctx->i32, 4); @@ -1281,6 +1282,20 @@ LLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef a, AC_FUNC_ATTR_READNONE); } +LLVMValueRef ac_build_imin(struct ac_llvm_context *ctx, LLVMValueRef a, + LLVMValueRef b) +{ + LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSLE, a, b, ""); + return LLVMBuildSelect(ctx->builder, cmp, a, b, ""); +} + +LLVMValueRef ac_build_imax(struct ac_llvm_context *ctx, LLVMValueRef a, + LLVMValueRef b) +{ + LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, a, b, ""); + return LLVMBuildSelect(ctx->builder, cmp, a, b, ""); +} + LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b) { @@ -1506,6 +1521,155 @@ LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx, AC_FUNC_ATTR_LEGACY); } +/* Upper 16 bits must be zero. */ +static LLVMValueRef ac_llvm_pack_two_int16(struct ac_llvm_context *ctx, + LLVMValueRef val[2]) +{ + return LLVMBuildOr(ctx->builder, val[0], + LLVMBuildShl(ctx->builder, val[1], + LLVMConstInt(ctx->i32, 16, 0), + ""), ""); +} + +/* Upper 16 bits are ignored and will be dropped. */ +static LLVMValueRef ac_llvm_pack_two_int32_as_int16(struct ac_llvm_context *ctx, + LLVMValueRef val[2]) +{ + LLVMValueRef v[2] = { + LLVMBuildAnd(ctx->builder, val[0], + LLVMConstInt(ctx->i32, 0xffff, 0), ""), + val[1], + }; + return ac_llvm_pack_two_int16(ctx, v); +} + +LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx, + LLVMValueRef args[2]) +{ + if (HAVE_LLVM >= 0x0600) { + LLVMValueRef res = + ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.i16", + ctx->v2i16, args, 2, + AC_FUNC_ATTR_READNONE); + return LLVMBuildBitCast(ctx->builder, res, ctx->i32, ""); + } + + LLVMValueRef val[2]; + + for (int chan = 0; chan < 2; chan++) { + /* Clamp between [-1, 1]. */ + val[chan] = ac_build_fmin(ctx, args[chan], ctx->f32_1); + val[chan] = ac_build_fmax(ctx, val[chan], LLVMConstReal(ctx->f32, -1)); + /* Convert to a signed integer in [-32767, 32767]. */ + val[chan] = LLVMBuildFMul(ctx->builder, val[chan], + LLVMConstReal(ctx->f32, 32767), ""); + /* If positive, add 0.5, else add -0.5. */ + val[chan] = LLVMBuildFAdd(ctx->builder, val[chan], + LLVMBuildSelect(ctx->builder, + LLVMBuildFCmp(ctx->builder, LLVMRealOGE, + val[chan], ctx->f32_0, ""), + LLVMConstReal(ctx->f32, 0.5), + LLVMConstReal(ctx->f32, -0.5), ""), ""); + val[chan] = LLVMBuildFPToSI(ctx->builder, val[chan], ctx->i32, ""); + } + return ac_llvm_pack_two_int32_as_int16(ctx, val); +} + +LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx, + LLVMValueRef args[2]) +{ + if (HAVE_LLVM >= 0x0600) { + LLVMValueRef res = + ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.u16", + ctx->v2i16, args, 2, + AC_FUNC_ATTR_READNONE); + return LLVMBuildBitCast(ctx->builder, res, ctx->i32, ""); + } + + LLVMValueRef val[2]; + + for (int chan = 0; chan < 2; chan++) { + val[chan] = ac_build_clamp(ctx, args[chan]); + val[chan] = LLVMBuildFMul(ctx->builder, val[chan], + LLVMConstReal(ctx->f32, 65535), ""); + val[chan] = LLVMBuildFAdd(ctx->builder, val[chan], + LLVMConstReal(ctx->f32, 0.5), ""); + val[chan] = LLVMBuildFPToUI(ctx->builder, val[chan], + ctx->i32, ""); + } + return ac_llvm_pack_two_int32_as_int16(ctx, val); +} + +/* The 8-bit and 10-bit clamping is for HW workarounds. */ +LLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx, + LLVMValueRef args[2], unsigned bits, bool hi) +{ + assert(bits == 8 || bits == 10 || bits == 16); + + LLVMValueRef max_rgb = LLVMConstInt(ctx->i32, + bits == 8 ? 127 : bits == 10 ? 511 : 32767, 0); + LLVMValueRef min_rgb = LLVMConstInt(ctx->i32, + bits == 8 ? -128 : bits == 10 ? -512 : -32768, 0); + LLVMValueRef max_alpha = + bits != 10 ? max_rgb : ctx->i32_1; + LLVMValueRef min_alpha = + bits != 10 ? min_rgb : LLVMConstInt(ctx->i32, -2, 0); + bool has_intrinsic = HAVE_LLVM >= 0x0600; + + /* Clamp. */ + if (!has_intrinsic || bits != 16) { + for (int i = 0; i < 2; i++) { + bool alpha = hi && i == 1; + args[i] = ac_build_imin(ctx, args[i], + alpha ? max_alpha : max_rgb); + args[i] = ac_build_imax(ctx, args[i], + alpha ? min_alpha : min_rgb); + } + } + + if (has_intrinsic) { + LLVMValueRef res = + ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.i16", + ctx->v2i16, args, 2, + AC_FUNC_ATTR_READNONE); + return LLVMBuildBitCast(ctx->builder, res, ctx->i32, ""); + } + + return ac_llvm_pack_two_int32_as_int16(ctx, args); +} + +/* The 8-bit and 10-bit clamping is for HW workarounds. */ +LLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx, + LLVMValueRef args[2], unsigned bits, bool hi) +{ + assert(bits == 8 || bits == 10 || bits == 16); + + LLVMValueRef max_rgb = LLVMConstInt(ctx->i32, + bits == 8 ? 255 : bits == 10 ? 1023 : 65535, 0); + LLVMValueRef max_alpha = + bits != 10 ? max_rgb : LLVMConstInt(ctx->i32, 3, 0); + bool has_intrinsic = HAVE_LLVM >= 0x0600; + + /* Clamp. */ + if (!has_intrinsic || bits != 16) { + for (int i = 0; i < 2; i++) { + bool alpha = hi && i == 1; + args[i] = ac_build_umin(ctx, args[i], + alpha ? max_alpha : max_rgb); + } + } + + if (has_intrinsic) { + LLVMValueRef res = + ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.u16", + ctx->v2i16, args, 2, + AC_FUNC_ATTR_READNONE); + return LLVMBuildBitCast(ctx->builder, res, ctx->i32, ""); + } + + return ac_llvm_pack_two_int16(ctx, args); +} + LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef i1) { assert(HAVE_LLVM >= 0x0600); diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index f95ad6a27c1..35f849a9869 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -53,6 +53,7 @@ struct ac_llvm_context { LLVMTypeRef f16; LLVMTypeRef f32; LLVMTypeRef f64; + LLVMTypeRef v2i16; LLVMTypeRef v2i32; LLVMTypeRef v3i32; LLVMTypeRef v4i32; @@ -258,6 +259,10 @@ LLVMValueRef ac_build_fmin(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b); LLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b); +LLVMValueRef ac_build_imin(struct ac_llvm_context *ctx, LLVMValueRef a, + LLVMValueRef b); +LLVMValueRef ac_build_imax(struct ac_llvm_context *ctx, LLVMValueRef a, + LLVMValueRef b); LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b); LLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef value); @@ -302,6 +307,14 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, struct ac_image_args *a); LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx, LLVMValueRef args[2]); +LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx, + LLVMValueRef args[2]); +LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx, + LLVMValueRef args[2]); +LLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx, + LLVMValueRef args[2], unsigned bits, bool hi); +LLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx, + LLVMValueRef args[2], unsigned bits, bool hi); LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef i1); void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1); LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input, |