summaryrefslogtreecommitdiffstats
path: root/src/amd
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2018-01-02 04:34:53 +0100
committerMarek Olšák <[email protected]>2018-02-02 16:46:22 +0100
commit847d0a393d7f0f967f39302900d5330f32b804c8 (patch)
tree2af9f54291ce78fa52ccb4477760ec00dc8e0742 /src/amd
parentb3a1d9588eaa486a01c3a2964d69540a4bf45818 (diff)
radeonsi: use pknorm_i16/u16 and pk_i16/u16 LLVM intrinsics
Reviewed-by: Samuel Pitoiset <[email protected]>
Diffstat (limited to 'src/amd')
-rw-r--r--src/amd/common/ac_llvm_build.c164
-rw-r--r--src/amd/common/ac_llvm_build.h13
2 files changed, 177 insertions, 0 deletions
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 762351f2807..6ede60afe71 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -67,6 +67,7 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context,
ctx->f16 = LLVMHalfTypeInContext(ctx->context);
ctx->f32 = LLVMFloatTypeInContext(ctx->context);
ctx->f64 = LLVMDoubleTypeInContext(ctx->context);
+ ctx->v2i16 = LLVMVectorType(ctx->i16, 2);
ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
ctx->v3i32 = LLVMVectorType(ctx->i32, 3);
ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
@@ -1281,6 +1282,20 @@ LLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef a,
AC_FUNC_ATTR_READNONE);
}
+LLVMValueRef ac_build_imin(struct ac_llvm_context *ctx, LLVMValueRef a,
+ LLVMValueRef b)
+{
+ LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSLE, a, b, "");
+ return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
+}
+
+LLVMValueRef ac_build_imax(struct ac_llvm_context *ctx, LLVMValueRef a,
+ LLVMValueRef b)
+{
+ LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, a, b, "");
+ return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
+}
+
LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a,
LLVMValueRef b)
{
@@ -1506,6 +1521,155 @@ LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx,
AC_FUNC_ATTR_LEGACY);
}
+/* Upper 16 bits must be zero. */
+static LLVMValueRef ac_llvm_pack_two_int16(struct ac_llvm_context *ctx,
+ LLVMValueRef val[2])
+{
+ return LLVMBuildOr(ctx->builder, val[0],
+ LLVMBuildShl(ctx->builder, val[1],
+ LLVMConstInt(ctx->i32, 16, 0),
+ ""), "");
+}
+
+/* Upper 16 bits are ignored and will be dropped. */
+static LLVMValueRef ac_llvm_pack_two_int32_as_int16(struct ac_llvm_context *ctx,
+ LLVMValueRef val[2])
+{
+ LLVMValueRef v[2] = {
+ LLVMBuildAnd(ctx->builder, val[0],
+ LLVMConstInt(ctx->i32, 0xffff, 0), ""),
+ val[1],
+ };
+ return ac_llvm_pack_two_int16(ctx, v);
+}
+
+LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx,
+ LLVMValueRef args[2])
+{
+ if (HAVE_LLVM >= 0x0600) {
+ LLVMValueRef res =
+ ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.i16",
+ ctx->v2i16, args, 2,
+ AC_FUNC_ATTR_READNONE);
+ return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
+ }
+
+ LLVMValueRef val[2];
+
+ for (int chan = 0; chan < 2; chan++) {
+ /* Clamp between [-1, 1]. */
+ val[chan] = ac_build_fmin(ctx, args[chan], ctx->f32_1);
+ val[chan] = ac_build_fmax(ctx, val[chan], LLVMConstReal(ctx->f32, -1));
+ /* Convert to a signed integer in [-32767, 32767]. */
+ val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
+ LLVMConstReal(ctx->f32, 32767), "");
+ /* If positive, add 0.5, else add -0.5. */
+ val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
+ LLVMBuildSelect(ctx->builder,
+ LLVMBuildFCmp(ctx->builder, LLVMRealOGE,
+ val[chan], ctx->f32_0, ""),
+ LLVMConstReal(ctx->f32, 0.5),
+ LLVMConstReal(ctx->f32, -0.5), ""), "");
+ val[chan] = LLVMBuildFPToSI(ctx->builder, val[chan], ctx->i32, "");
+ }
+ return ac_llvm_pack_two_int32_as_int16(ctx, val);
+}
+
+LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx,
+ LLVMValueRef args[2])
+{
+ if (HAVE_LLVM >= 0x0600) {
+ LLVMValueRef res =
+ ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.u16",
+ ctx->v2i16, args, 2,
+ AC_FUNC_ATTR_READNONE);
+ return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
+ }
+
+ LLVMValueRef val[2];
+
+ for (int chan = 0; chan < 2; chan++) {
+ val[chan] = ac_build_clamp(ctx, args[chan]);
+ val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
+ LLVMConstReal(ctx->f32, 65535), "");
+ val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
+ LLVMConstReal(ctx->f32, 0.5), "");
+ val[chan] = LLVMBuildFPToUI(ctx->builder, val[chan],
+ ctx->i32, "");
+ }
+ return ac_llvm_pack_two_int32_as_int16(ctx, val);
+}
+
+/* The 8-bit and 10-bit clamping is for HW workarounds. */
+LLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx,
+ LLVMValueRef args[2], unsigned bits, bool hi)
+{
+ assert(bits == 8 || bits == 10 || bits == 16);
+
+ LLVMValueRef max_rgb = LLVMConstInt(ctx->i32,
+ bits == 8 ? 127 : bits == 10 ? 511 : 32767, 0);
+ LLVMValueRef min_rgb = LLVMConstInt(ctx->i32,
+ bits == 8 ? -128 : bits == 10 ? -512 : -32768, 0);
+ LLVMValueRef max_alpha =
+ bits != 10 ? max_rgb : ctx->i32_1;
+ LLVMValueRef min_alpha =
+ bits != 10 ? min_rgb : LLVMConstInt(ctx->i32, -2, 0);
+ bool has_intrinsic = HAVE_LLVM >= 0x0600;
+
+ /* Clamp. */
+ if (!has_intrinsic || bits != 16) {
+ for (int i = 0; i < 2; i++) {
+ bool alpha = hi && i == 1;
+ args[i] = ac_build_imin(ctx, args[i],
+ alpha ? max_alpha : max_rgb);
+ args[i] = ac_build_imax(ctx, args[i],
+ alpha ? min_alpha : min_rgb);
+ }
+ }
+
+ if (has_intrinsic) {
+ LLVMValueRef res =
+ ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.i16",
+ ctx->v2i16, args, 2,
+ AC_FUNC_ATTR_READNONE);
+ return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
+ }
+
+ return ac_llvm_pack_two_int32_as_int16(ctx, args);
+}
+
+/* The 8-bit and 10-bit clamping is for HW workarounds. */
+LLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx,
+ LLVMValueRef args[2], unsigned bits, bool hi)
+{
+ assert(bits == 8 || bits == 10 || bits == 16);
+
+ LLVMValueRef max_rgb = LLVMConstInt(ctx->i32,
+ bits == 8 ? 255 : bits == 10 ? 1023 : 65535, 0);
+ LLVMValueRef max_alpha =
+ bits != 10 ? max_rgb : LLVMConstInt(ctx->i32, 3, 0);
+ bool has_intrinsic = HAVE_LLVM >= 0x0600;
+
+ /* Clamp. */
+ if (!has_intrinsic || bits != 16) {
+ for (int i = 0; i < 2; i++) {
+ bool alpha = hi && i == 1;
+ args[i] = ac_build_umin(ctx, args[i],
+ alpha ? max_alpha : max_rgb);
+ }
+ }
+
+ if (has_intrinsic) {
+ LLVMValueRef res =
+ ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.u16",
+ ctx->v2i16, args, 2,
+ AC_FUNC_ATTR_READNONE);
+ return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
+ }
+
+ return ac_llvm_pack_two_int16(ctx, args);
+}
+
LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef i1)
{
assert(HAVE_LLVM >= 0x0600);
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index f95ad6a27c1..35f849a9869 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -53,6 +53,7 @@ struct ac_llvm_context {
LLVMTypeRef f16;
LLVMTypeRef f32;
LLVMTypeRef f64;
+ LLVMTypeRef v2i16;
LLVMTypeRef v2i32;
LLVMTypeRef v3i32;
LLVMTypeRef v4i32;
@@ -258,6 +259,10 @@ LLVMValueRef ac_build_fmin(struct ac_llvm_context *ctx, LLVMValueRef a,
LLVMValueRef b);
LLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef a,
LLVMValueRef b);
+LLVMValueRef ac_build_imin(struct ac_llvm_context *ctx, LLVMValueRef a,
+ LLVMValueRef b);
+LLVMValueRef ac_build_imax(struct ac_llvm_context *ctx, LLVMValueRef a,
+ LLVMValueRef b);
LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b);
LLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef value);
@@ -302,6 +307,14 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
struct ac_image_args *a);
LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx,
LLVMValueRef args[2]);
+LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx,
+ LLVMValueRef args[2]);
+LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx,
+ LLVMValueRef args[2]);
+LLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx,
+ LLVMValueRef args[2], unsigned bits, bool hi);
+LLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx,
+ LLVMValueRef args[2], unsigned bits, bool hi);
LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef i1);
void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1);
LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input,