summaryrefslogtreecommitdiffstats
path: root/src/gallium/auxiliary/gallivm
diff options
context:
space:
mode:
authorJose Fonseca <[email protected]>2016-04-04 00:05:33 +0100
committerJose Fonseca <[email protected]>2016-06-10 13:47:35 +0100
commit320d1191c61a0a82444605c12e5c4b2ee0b241eb (patch)
tree3d230eb992c8160e289aec139935cb7629ce2468 /src/gallium/auxiliary/gallivm
parent9e8edfa19034ae69139ef10b88f958b4f58d57ea (diff)
gallivm: Use llvm.fmuladd.*.
Reviewed-by: Roland Scheidegger <[email protected]>
Diffstat (limited to 'src/gallium/auxiliary/gallivm')
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.c90
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.h15
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c3
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample.c12
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c14
5 files changed, 87 insertions, 47 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 11a1e7d002a..5d6a0335654 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -50,7 +50,6 @@
#include "util/u_memory.h"
#include "util/u_debug.h"
#include "util/u_math.h"
-#include "util/u_string.h"
#include "util/u_cpu_detect.h"
#include "lp_bld_type.h"
@@ -262,6 +261,22 @@ lp_build_min_simple(struct lp_build_context *bld,
}
+LLVMValueRef
+lp_build_fmuladd(LLVMBuilderRef builder,
+ LLVMValueRef a,
+ LLVMValueRef b,
+ LLVMValueRef c)
+{
+ LLVMTypeRef type = LLVMTypeOf(a);
+ assert(type == LLVMTypeOf(b));
+ assert(type == LLVMTypeOf(c));
+ char intrinsic[32];
+ lp_format_intrinsic(intrinsic, sizeof intrinsic, "llvm.fmuladd", type);
+ LLVMValueRef args[] = { a, b, c };
+ return lp_build_intrinsic(builder, intrinsic, type, args, 3, 0);
+}
+
+
/**
* Generate max(a, b)
* No checks for special case values of a or b = 1 or 0 are done.
@@ -1023,6 +1038,22 @@ lp_build_mul(struct lp_build_context *bld,
}
+/* a * b + c */
+LLVMValueRef
+lp_build_mad(struct lp_build_context *bld,
+ LLVMValueRef a,
+ LLVMValueRef b,
+ LLVMValueRef c)
+{
+ const struct lp_type type = bld->type;
+ if (type.floating) {
+ return lp_build_fmuladd(bld->gallivm->builder, a, b, c);
+ } else {
+ return lp_build_add(bld, lp_build_mul(bld, a, b), c);
+ }
+}
+
+
/**
* Small vector x scale multiplication optimization.
*/
@@ -1153,6 +1184,11 @@ lp_build_lerp_simple(struct lp_build_context *bld,
delta = lp_build_sub(bld, v1, v0);
+ if (bld->type.floating) {
+ assert(flags == 0);
+ return lp_build_mad(bld, x, delta, v0);
+ }
+
if (flags & LP_BLD_LERP_WIDE_NORMALIZED) {
if (!bld->type.sign) {
if (!(flags & LP_BLD_LERP_PRESCALED_WEIGHTS)) {
@@ -2717,23 +2753,10 @@ lp_build_sin_or_cos(struct lp_build_context *bld,
/*
* The magic pass: "Extended precision modular arithmetic"
* x = ((x - y * DP1) - y * DP2) - y * DP3;
- * xmm1 = _mm_mul_ps(y, xmm1);
- * xmm2 = _mm_mul_ps(y, xmm2);
- * xmm3 = _mm_mul_ps(y, xmm3);
- */
- LLVMValueRef xmm1 = LLVMBuildFMul(b, y_2, DP1, "xmm1");
- LLVMValueRef xmm2 = LLVMBuildFMul(b, y_2, DP2, "xmm2");
- LLVMValueRef xmm3 = LLVMBuildFMul(b, y_2, DP3, "xmm3");
-
- /*
- * x = _mm_add_ps(x, xmm1);
- * x = _mm_add_ps(x, xmm2);
- * x = _mm_add_ps(x, xmm3);
*/
-
- LLVMValueRef x_1 = LLVMBuildFAdd(b, x_abs, xmm1, "x_1");
- LLVMValueRef x_2 = LLVMBuildFAdd(b, x_1, xmm2, "x_2");
- LLVMValueRef x_3 = LLVMBuildFAdd(b, x_2, xmm3, "x_3");
+ LLVMValueRef x_1 = lp_build_fmuladd(b, y_2, DP1, x_abs);
+ LLVMValueRef x_2 = lp_build_fmuladd(b, y_2, DP2, x_1);
+ LLVMValueRef x_3 = lp_build_fmuladd(b, y_2, DP3, x_2);
/*
* Evaluate the first polynom (0 <= x <= Pi/4)
@@ -2755,10 +2778,8 @@ lp_build_sin_or_cos(struct lp_build_context *bld,
* y = *(v4sf*)_ps_coscof_p0;
* y = _mm_mul_ps(y, z);
*/
- LLVMValueRef y_3 = LLVMBuildFMul(b, z, coscof_p0, "y_3");
- LLVMValueRef y_4 = LLVMBuildFAdd(b, y_3, coscof_p1, "y_4");
- LLVMValueRef y_5 = LLVMBuildFMul(b, y_4, z, "y_5");
- LLVMValueRef y_6 = LLVMBuildFAdd(b, y_5, coscof_p2, "y_6");
+ LLVMValueRef y_4 = lp_build_fmuladd(b, z, coscof_p0, coscof_p1);
+ LLVMValueRef y_6 = lp_build_fmuladd(b, y_4, z, coscof_p2);
LLVMValueRef y_7 = LLVMBuildFMul(b, y_6, z, "y_7");
LLVMValueRef y_8 = LLVMBuildFMul(b, y_7, z, "y_8");
@@ -2796,13 +2817,10 @@ lp_build_sin_or_cos(struct lp_build_context *bld,
* y2 = _mm_add_ps(y2, x);
*/
- LLVMValueRef y2_3 = LLVMBuildFMul(b, z, sincof_p0, "y2_3");
- LLVMValueRef y2_4 = LLVMBuildFAdd(b, y2_3, sincof_p1, "y2_4");
- LLVMValueRef y2_5 = LLVMBuildFMul(b, y2_4, z, "y2_5");
- LLVMValueRef y2_6 = LLVMBuildFAdd(b, y2_5, sincof_p2, "y2_6");
+ LLVMValueRef y2_4 = lp_build_fmuladd(b, z, sincof_p0, sincof_p1);
+ LLVMValueRef y2_6 = lp_build_fmuladd(b, y2_4, z, sincof_p2);
LLVMValueRef y2_7 = LLVMBuildFMul(b, y2_6, z, "y2_7");
- LLVMValueRef y2_8 = LLVMBuildFMul(b, y2_7, x_3, "y2_8");
- LLVMValueRef y2_9 = LLVMBuildFAdd(b, y2_8, x_3, "y2_9");
+ LLVMValueRef y2_9 = lp_build_fmuladd(b, y2_7, x_3, x_3);
/*
* select the correct result from the two polynoms
@@ -2969,19 +2987,19 @@ lp_build_polynomial(struct lp_build_context *bld,
if (i % 2 == 0) {
if (even)
- even = lp_build_add(bld, coeff, lp_build_mul(bld, x2, even));
+ even = lp_build_mad(bld, x2, even, coeff);
else
even = coeff;
} else {
if (odd)
- odd = lp_build_add(bld, coeff, lp_build_mul(bld, x2, odd));
+ odd = lp_build_mad(bld, x2, odd, coeff);
else
odd = coeff;
}
}
if (odd)
- return lp_build_add(bld, lp_build_mul(bld, odd, x), even);
+ return lp_build_mad(bld, odd, x, even);
else if (even)
return even;
else
@@ -3212,7 +3230,7 @@ lp_build_log2_approx(struct lp_build_context *bld,
LLVMValueRef exp = NULL;
LLVMValueRef mant = NULL;
LLVMValueRef logexp = NULL;
- LLVMValueRef logmant = NULL;
+ LLVMValueRef p_z = NULL;
LLVMValueRef res = NULL;
assert(lp_check_value(bld->type, x));
@@ -3261,13 +3279,11 @@ lp_build_log2_approx(struct lp_build_context *bld,
z = lp_build_mul(bld, y, y);
/* compute P(z) */
- logmant = lp_build_polynomial(bld, z, lp_build_log2_polynomial,
- ARRAY_SIZE(lp_build_log2_polynomial));
-
- /* logmant = y * P(z) */
- logmant = lp_build_mul(bld, y, logmant);
+ p_z = lp_build_polynomial(bld, z, lp_build_log2_polynomial,
+ ARRAY_SIZE(lp_build_log2_polynomial));
- res = lp_build_add(bld, logmant, logexp);
+ /* y * P(z) + logexp */
+ res = lp_build_mad(bld, y, p_z, logexp);
if (type.floating && handle_edge_cases) {
LLVMValueRef negmask, infmask, zmask;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
index e76977cfb0b..622b930a937 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
@@ -87,6 +87,21 @@ lp_build_div(struct lp_build_context *bld,
LLVMValueRef b);
+/* llvm.fmuladd.* intrinsic */
+LLVMValueRef
+lp_build_fmuladd(LLVMBuilderRef builder,
+ LLVMValueRef a,
+ LLVMValueRef b,
+ LLVMValueRef c);
+
+/* a * b + c */
+LLVMValueRef
+lp_build_mad(struct lp_build_context *bld,
+ LLVMValueRef a,
+ LLVMValueRef b,
+ LLVMValueRef c);
+
+
/**
* Set when the weights for normalized are prescaled, that is, in range
* 0..2**n, as opposed to range 0..2**(n-1).
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c b/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c
index a6311a1ce84..d5cf698b060 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c
@@ -289,8 +289,7 @@ lp_build_linear_to_srgb(struct gallivm_state *gallivm,
c_const = lp_build_const_vec(gallivm, src_type, -0.0620f * 255.0f);
tmp = lp_build_mul(&f32_bld, a_const, x0375);
- tmp2 = lp_build_mul(&f32_bld, b_const, x05);
- tmp2 = lp_build_add(&f32_bld, tmp2, c_const);
+ tmp2 = lp_build_mad(&f32_bld, b_const, x05, c_const);
pow_final = lp_build_add(&f32_bld, tmp, tmp2);
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index 4befb3a1c80..a4b3a7b8348 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -580,10 +580,8 @@ lp_build_brilinear_lod(struct lp_build_context *bld,
lp_build_ifloor_fract(bld, lod, out_lod_ipart, &lod_fpart);
- lod_fpart = lp_build_mul(bld, lod_fpart,
- lp_build_const_vec(bld->gallivm, bld->type, factor));
-
- lod_fpart = lp_build_add(bld, lod_fpart,
+ lod_fpart = lp_build_mad(bld, lod_fpart,
+ lp_build_const_vec(bld->gallivm, bld->type, factor),
lp_build_const_vec(bld->gallivm, bld->type, post_offset));
/*
@@ -639,10 +637,8 @@ lp_build_brilinear_rho(struct lp_build_context *bld,
/* fpart = rho / 2**ipart */
lod_fpart = lp_build_extract_mantissa(bld, rho);
- lod_fpart = lp_build_mul(bld, lod_fpart,
- lp_build_const_vec(bld->gallivm, bld->type, factor));
-
- lod_fpart = lp_build_add(bld, lod_fpart,
+ lod_fpart = lp_build_mad(bld, lod_fpart,
+ lp_build_const_vec(bld->gallivm, bld->type, factor),
lp_build_const_vec(bld->gallivm, bld->type, post_offset));
/*
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
index 43af6b4ea0d..1ee97049235 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
@@ -1577,6 +1577,19 @@ log_emit_cpu(
}
+/* TGSI_OPCODE_MAD (CPU Only) */
+
+static void
+mad_emit_cpu(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->output[emit_data->chan] =
+ lp_build_mad(&bld_base->base,
+ emit_data->args[0], emit_data->args[1], emit_data->args[2]);
+}
+
/* TGSI_OPCODE_MAX (CPU Only) */
static void
@@ -2162,6 +2175,7 @@ lp_set_default_actions_cpu(
bld_base->op_actions[TGSI_OPCODE_LG2].emit = lg2_emit_cpu;
bld_base->op_actions[TGSI_OPCODE_LOG].emit = log_emit_cpu;
+ bld_base->op_actions[TGSI_OPCODE_MAD].emit = mad_emit_cpu;
bld_base->op_actions[TGSI_OPCODE_MAX].emit = max_emit_cpu;
bld_base->op_actions[TGSI_OPCODE_MIN].emit = min_emit_cpu;
bld_base->op_actions[TGSI_OPCODE_MOD].emit = mod_emit_cpu;