summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJosé Fonseca <[email protected]>2009-08-09 12:39:38 +0100
committerJosé Fonseca <[email protected]>2009-08-29 09:21:27 +0100
commit2529ed5616b1b152766a3355444260b88184cd6e (patch)
tree4b234e210edefacdf8bedfebfeb201d9281655af
parent20f50b845b375c2fde9910c51b46cec23c95dd45 (diff)
llvmpipe: SoA blending.
Throughput seems to be 4x higher.
-rw-r--r--src/gallium/drivers/llvmpipe/SConscript3
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld.h11
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_blend.h94
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c (renamed from src/gallium/drivers/llvmpipe/lp_bld_blend.c)88
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c237
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_blend.c347
6 files changed, 635 insertions, 145 deletions
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index aca4f21b9ed..0a8e6e8fad6 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -11,6 +11,8 @@ llvmpipe = env.ConvenienceLibrary(
'lp_fs_sse.c',
'lp_fs_llvm.c',
'lp_bld_arit.c',
+ 'lp_bld_blend_aos.c',
+ 'lp_bld_blend_soa.c',
'lp_bld_const.c',
'lp_bld_conv.c',
'lp_bld_intr.c',
@@ -20,7 +22,6 @@ llvmpipe = env.ConvenienceLibrary(
'lp_bld_store.c',
'lp_bld_loop.c',
'lp_bld_logicop.c',
- 'lp_bld_blend.c',
'lp_bld_swizzle.c',
'lp_bld_type.c',
'lp_clear.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_bld.h b/src/gallium/drivers/llvmpipe/lp_bld.h
index e9d9c25a800..a725cbb4740 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld.h
@@ -45,7 +45,6 @@
#include "pipe/p_format.h"
-struct pipe_blend_state;
union lp_type;
@@ -132,14 +131,4 @@ lp_build_logicop(LLVMBuilderRef builder,
LLVMValueRef dst);
-LLVMValueRef
-lp_build_blend(LLVMBuilderRef builder,
- const struct pipe_blend_state *blend,
- union lp_type type,
- LLVMValueRef src,
- LLVMValueRef dst,
- LLVMValueRef const_,
- unsigned alpha_swizzle);
-
-
#endif /* !LP_BLD_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.h b/src/gallium/drivers/llvmpipe/lp_bld_blend.h
new file mode 100644
index 00000000000..36f53dae935
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend.h
@@ -0,0 +1,94 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef LP_BLD_BLEND_H
+#define LP_BLD_BLEND_H
+
+
+/**
+ * @file
+ * LLVM IR building helpers interfaces.
+ *
+ * We use LLVM-C bindings for now. They are not documented, but follow the C++
+ * interfaces very closely, and appear to be complete enough for code
+ * genration. See
+ * http://npcontemplation.blogspot.com/2008/06/secret-of-llvm-c-bindings.html
+ * for a standalone example.
+ */
+
+#include <llvm-c/Core.h>
+
+#include "pipe/p_format.h"
+
+
+struct pipe_blend_state;
+union lp_type;
+struct lp_build_context;
+
+
+/**
+ * Whether the blending function is commutative or not.
+ */
+boolean
+lp_build_blend_func_commutative(unsigned func);
+
+
+/**
+ * Whether the blending functions are the reverse of each other.
+ */
+boolean
+lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func);
+
+
+LLVMValueRef
+lp_build_blend_func(struct lp_build_context *bld,
+ unsigned func,
+ LLVMValueRef term1,
+ LLVMValueRef term2);
+
+
+LLVMValueRef
+lp_build_blend_aos(LLVMBuilderRef builder,
+ const struct pipe_blend_state *blend,
+ union lp_type type,
+ LLVMValueRef src,
+ LLVMValueRef dst,
+ LLVMValueRef const_,
+ unsigned alpha_swizzle);
+
+
+void
+lp_build_blend_soa(LLVMBuilderRef builder,
+ const struct pipe_blend_state *blend,
+ union lp_type type,
+ LLVMValueRef src[4],
+ LLVMValueRef dst[4],
+ LLVMValueRef const_[4],
+ LLVMValueRef res[4]);
+
+
+#endif /* !LP_BLD_BLEND_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
index a144469b354..e4a57af94c7 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
@@ -28,10 +28,7 @@
/**
* @file
- * Blend LLVM IR generation.
- *
- * This code is generic -- it should be able to cope both with floating point
- * and integer inputs in AOS form.
+ * Blend LLVM IR generation -- AOS form.
*
* @author Jose Fonseca <[email protected]>
*/
@@ -39,11 +36,11 @@
#include "pipe/p_state.h"
-#include "lp_bld.h"
#include "lp_bld_type.h"
#include "lp_bld_const.h"
#include "lp_bld_arit.h"
#include "lp_bld_swizzle.h"
+#include "lp_bld_blend.h"
/**
@@ -51,7 +48,7 @@
* recomputing them. Also reusing the values allows us to do simplifications
* that LLVM optimization passes wouldn't normally be able to do.
*/
-struct lp_build_blend_context
+struct lp_build_blend_aos_context
{
struct lp_build_context base;
@@ -72,7 +69,7 @@ struct lp_build_blend_context
static LLVMValueRef
-lp_build_blend_factor_unswizzled(struct lp_build_blend_context *bld,
+lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld,
unsigned factor,
boolean alpha)
{
@@ -174,7 +171,7 @@ lp_build_blend_factor_swizzle(unsigned factor)
static LLVMValueRef
-lp_build_blend_swizzle(struct lp_build_blend_context *bld,
+lp_build_blend_swizzle(struct lp_build_blend_aos_context *bld,
LLVMValueRef rgb,
LLVMValueRef alpha,
enum lp_build_blend_swizzle rgb_swizzle,
@@ -211,7 +208,7 @@ lp_build_blend_swizzle(struct lp_build_blend_context *bld,
* @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendFuncSeparate.xml
*/
static LLVMValueRef
-lp_build_blend_factor(struct lp_build_blend_context *bld,
+lp_build_blend_factor(struct lp_build_blend_aos_context *bld,
LLVMValueRef factor1,
unsigned rgb_factor,
unsigned alpha_factor,
@@ -233,44 +230,75 @@ lp_build_blend_factor(struct lp_build_blend_context *bld,
}
+boolean
+lp_build_blend_func_commutative(unsigned func)
+{
+ switch (func) {
+ case PIPE_BLEND_ADD:
+ case PIPE_BLEND_MIN:
+ case PIPE_BLEND_MAX:
+ return TRUE;
+ case PIPE_BLEND_SUBTRACT:
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ return FALSE;
+ default:
+ assert(0);
+ return TRUE;
+ }
+}
+
+
+boolean
+lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func)
+{
+ if(rgb_func == alpha_func)
+ return FALSE;
+ if(rgb_func == PIPE_BLEND_SUBTRACT && alpha_func == PIPE_BLEND_REVERSE_SUBTRACT)
+ return TRUE;
+ if(rgb_func == PIPE_BLEND_REVERSE_SUBTRACT && alpha_func == PIPE_BLEND_SUBTRACT)
+ return TRUE;
+ return FALSE;
+}
+
+
/**
* @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendEquationSeparate.xml
*/
-static LLVMValueRef
-lp_build_blend_func(struct lp_build_blend_context *bld,
+LLVMValueRef
+lp_build_blend_func(struct lp_build_context *bld,
unsigned func,
LLVMValueRef term1,
LLVMValueRef term2)
{
switch (func) {
case PIPE_BLEND_ADD:
- return lp_build_add(&bld->base, term1, term2);
+ return lp_build_add(bld, term1, term2);
break;
case PIPE_BLEND_SUBTRACT:
- return lp_build_sub(&bld->base, term1, term2);
+ return lp_build_sub(bld, term1, term2);
case PIPE_BLEND_REVERSE_SUBTRACT:
- return lp_build_sub(&bld->base, term2, term1);
+ return lp_build_sub(bld, term2, term1);
case PIPE_BLEND_MIN:
- return lp_build_min(&bld->base, term1, term2);
+ return lp_build_min(bld, term1, term2);
case PIPE_BLEND_MAX:
- return lp_build_max(&bld->base, term1, term2);
+ return lp_build_max(bld, term1, term2);
default:
assert(0);
- return bld->base.zero;
+ return bld->zero;
}
}
LLVMValueRef
-lp_build_blend(LLVMBuilderRef builder,
- const struct pipe_blend_state *blend,
- union lp_type type,
- LLVMValueRef src,
- LLVMValueRef dst,
- LLVMValueRef const_,
- unsigned alpha_swizzle)
+lp_build_blend_aos(LLVMBuilderRef builder,
+ const struct pipe_blend_state *blend,
+ union lp_type type,
+ LLVMValueRef src,
+ LLVMValueRef dst,
+ LLVMValueRef const_,
+ unsigned alpha_swizzle)
{
- struct lp_build_blend_context bld;
+ struct lp_build_blend_aos_context bld;
LLVMValueRef src_term;
LLVMValueRef dst_term;
@@ -284,8 +312,8 @@ lp_build_blend(LLVMBuilderRef builder,
bld.dst = dst;
bld.const_ = const_;
- /* TODO: There are still a few optimization oportunities here. For certain
- * combinations it is possible to reorder the operations and therefor saving
+ /* TODO: There are still a few optimization opportunities here. For certain
+ * combinations it is possible to reorder the operations and therefore saving
* some instructions. */
src_term = lp_build_blend_factor(&bld, src, blend->rgb_src_factor, blend->alpha_src_factor, alpha_swizzle);
@@ -297,7 +325,7 @@ lp_build_blend(LLVMBuilderRef builder,
#endif
if(blend->rgb_func == blend->alpha_func) {
- return lp_build_blend_func(&bld, blend->rgb_func, src_term, dst_term);
+ return lp_build_blend_func(&bld.base, blend->rgb_func, src_term, dst_term);
}
else {
/* Seperate RGB / A functions */
@@ -305,8 +333,8 @@ lp_build_blend(LLVMBuilderRef builder,
LLVMValueRef rgb;
LLVMValueRef alpha;
- rgb = lp_build_blend_func(&bld, blend->rgb_func, src_term, dst_term);
- alpha = lp_build_blend_func(&bld, blend->alpha_func, src_term, dst_term);
+ rgb = lp_build_blend_func(&bld.base, blend->rgb_func, src_term, dst_term);
+ alpha = lp_build_blend_func(&bld.base, blend->alpha_func, src_term, dst_term);
return lp_build_blend_swizzle(&bld, rgb, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle);
}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c
new file mode 100644
index 00000000000..1ef1718cfda
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c
@@ -0,0 +1,237 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * @file
+ * Blend LLVM IR generation -- SoA.
+ *
+ * @author Jose Fonseca <[email protected]>
+ */
+
+
+#include "pipe/p_state.h"
+
+#include "lp_bld_type.h"
+#include "lp_bld_const.h"
+#include "lp_bld_arit.h"
+#include "lp_bld_blend.h"
+
+
+/**
+ * We may the same values several times, so we keep them here to avoid
+ * recomputing them. Also reusing the values allows us to do simplifications
+ * that LLVM optimization passes wouldn't normally be able to do.
+ */
+struct lp_build_blend_soa_context
+{
+ struct lp_build_context base;
+
+ LLVMValueRef src[4];
+ LLVMValueRef dst[4];
+ LLVMValueRef con[4];
+
+ LLVMValueRef inv_src[4];
+ LLVMValueRef inv_dst[4];
+ LLVMValueRef inv_con[4];
+
+ LLVMValueRef src_alpha_saturate;
+
+ /**
+ * We store all factors in a table in order to eliminate redundant
+ * multiplications later.
+ */
+ LLVMValueRef factor[2][8];
+
+ /**
+ * Table with all terms.
+ */
+ LLVMValueRef term[8];
+};
+
+
+static LLVMValueRef
+lp_build_blend_soa_factor(struct lp_build_blend_soa_context *bld,
+ unsigned factor, unsigned i)
+{
+ /*
+ * Compute src/first term RGB
+ */
+ switch (factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ return bld->base.one;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ return bld->src[i];
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ return bld->src[3];
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ return bld->dst[i];
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ return bld->dst[3];
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ if(i == 3)
+ return bld->base.one;
+ else {
+ if(!bld->inv_dst[3])
+ bld->inv_dst[3] = lp_build_comp(&bld->base, bld->dst[3]);
+ if(!bld->src_alpha_saturate)
+ bld->src_alpha_saturate = lp_build_min(&bld->base, bld->src[3], bld->inv_dst[3]);
+ return bld->src_alpha_saturate;
+ }
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ return bld->con[i];
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ return bld->con[3];
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ /* TODO */
+ assert(0);
+ return bld->base.zero;
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ /* TODO */
+ assert(0);
+ return bld->base.zero;
+ case PIPE_BLENDFACTOR_ZERO:
+ return bld->base.zero;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ if(!bld->inv_src[i])
+ bld->inv_src[i] = lp_build_comp(&bld->base, bld->src[i]);
+ return bld->inv_src[i];
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ if(!bld->inv_src[3])
+ bld->inv_src[3] = lp_build_comp(&bld->base, bld->src[3]);
+ return bld->inv_src[3];
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ if(!bld->inv_dst[i])
+ bld->inv_dst[i] = lp_build_comp(&bld->base, bld->dst[i]);
+ return bld->inv_dst[i];
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ if(!bld->inv_dst[3])
+ bld->inv_dst[3] = lp_build_comp(&bld->base, bld->dst[3]);
+ return bld->inv_dst[3];
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ if(!bld->inv_con[i])
+ bld->inv_con[i] = lp_build_comp(&bld->base, bld->con[i]);
+ return bld->inv_con[i];
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ if(!bld->inv_con[3])
+ bld->inv_con[3] = lp_build_comp(&bld->base, bld->con[3]);
+ return bld->inv_con[3];
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ /* TODO */
+ assert(0);
+ return bld->base.zero;
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+ /* TODO */
+ assert(0);
+ return bld->base.zero;
+ default:
+ assert(0);
+ return bld->base.zero;
+ }
+}
+
+
+void
+lp_build_blend_soa(LLVMBuilderRef builder,
+ const struct pipe_blend_state *blend,
+ union lp_type type,
+ LLVMValueRef src[4],
+ LLVMValueRef dst[4],
+ LLVMValueRef con[4],
+ LLVMValueRef res[4])
+{
+ struct lp_build_blend_soa_context bld;
+ unsigned i, j;
+
+ /* Setup build context */
+ memset(&bld, 0, sizeof bld);
+ lp_build_context_init(&bld.base, builder, type);
+ for (i = 0; i < 4; ++i) {
+ bld.src[i] = src[i];
+ bld.dst[i] = dst[i];
+ bld.con[i] = con[i];
+ }
+
+ /*
+ * Compute src/dst factors.
+ */
+ for (i = 0; i < 4; ++i) {
+ unsigned src_factor = i < 3 ? blend->rgb_src_factor : blend->alpha_src_factor;
+ unsigned dst_factor = i < 3 ? blend->rgb_dst_factor : blend->alpha_dst_factor;
+ bld.factor[0][0 + i] = src[i];
+ bld.factor[1][0 + i] = lp_build_blend_soa_factor(&bld, src_factor, i);
+ bld.factor[0][4 + i] = dst[i];
+ bld.factor[1][4 + i] = lp_build_blend_soa_factor(&bld, dst_factor, i);
+ }
+
+ /*
+ * Compute src/dst terms
+ */
+ for (i = 0; i < 8; ++i) {
+
+ /* See if this multiplication has been previously computed */
+ for(j = 0; j < i; ++j) {
+ if((bld.factor[0][j] == bld.factor[0][i] &&
+ bld.factor[1][j] == bld.factor[1][i]) ||
+ (bld.factor[0][j] == bld.factor[1][i] &&
+ bld.factor[1][j] == bld.factor[0][i]))
+ break;
+ }
+
+ if(j < i)
+ bld.term[i] = bld.term[j];
+ else
+ bld.term[i] = lp_build_mul(&bld.base, bld.factor[0][i], bld.factor[1][i]);
+ }
+
+ /*
+ * Combine terms
+ */
+ for (i = 0; i < 4; ++i) {
+ unsigned func = i < 3 ? blend->rgb_func : blend->alpha_func;
+ boolean func_commutative = lp_build_blend_func_commutative(func);
+
+ /* See if this function has been previously applied */
+ for(j = 0; j < i; ++j) {
+ unsigned prev_func = j < 3 ? blend->rgb_func : blend->alpha_func;
+ unsigned func_reverse = lp_build_blend_func_reverse(func, prev_func);
+
+ if((!func_reverse &&
+ bld.factor[0 + j] == bld.factor[0 + i] &&
+ bld.factor[4 + j] == bld.factor[4 + i]) ||
+ ((func_commutative || func_reverse) &&
+ bld.factor[0 + j] == bld.factor[4 + i] &&
+ bld.factor[4 + j] == bld.factor[0 + i]))
+ break;
+ }
+
+ if(j < i)
+ res[i] = res[j];
+ else
+ res[i] = lp_build_blend_func(&bld.base, func, bld.term[i + 0], bld.term[i + 4]);
+ }
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c
index c8901fea984..8bf5508bd4e 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_blend.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c
@@ -37,12 +37,19 @@
*/
-#include "lp_bld.h"
#include "lp_bld_type.h"
#include "lp_bld_arit.h"
+#include "lp_bld_blend.h"
#include "lp_test.h"
+enum vector_mode
+{
+ AoS = 0,
+ SoA = 1
+};
+
+
typedef void (*blend_test_ptr_t)(const void *src, const void *dst, const void *con, void *res);
@@ -52,6 +59,7 @@ write_tsv_header(FILE *fp)
fprintf(fp,
"result\t"
"cycles_per_channel\t"
+ "mode\t"
"type\t"
"sep_func\t"
"sep_src_factor\t"
@@ -70,13 +78,22 @@ write_tsv_header(FILE *fp)
static void
write_tsv_row(FILE *fp,
const struct pipe_blend_state *blend,
+ enum vector_mode mode,
union lp_type type,
double cycles,
boolean success)
{
fprintf(fp, "%s\t", success ? "pass" : "fail");
- fprintf(fp, "%.1f\t", cycles / type.length);
+ if (mode == AoS) {
+ fprintf(fp, "%.1f\t", cycles / type.length);
+ fprintf(fp, "aos\t");
+ }
+
+ if (mode == SoA) {
+ fprintf(fp, "%.1f\t", cycles / (4 * type.length));
+ fprintf(fp, "soa\t");
+ }
fprintf(fp, "%s%u%sx%u\t",
type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
@@ -106,10 +123,19 @@ write_tsv_row(FILE *fp,
static void
dump_blend_type(FILE *fp,
const struct pipe_blend_state *blend,
+ enum vector_mode mode,
union lp_type type)
{
+ fprintf(fp, "%s", mode ? "soa" : "aos");
+
+ fprintf(fp, " type=%s%u%sx%u",
+ type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
+ type.width,
+ type.norm ? "n" : "",
+ type.length);
+
fprintf(fp,
- "%s=%s %s=%s %s=%s %s=%s %s=%s %s=%s",
+ " %s=%s %s=%s %s=%s %s=%s %s=%s %s=%s",
"rgb_func", debug_dump_blend_func(blend->rgb_func, TRUE),
"rgb_src_factor", debug_dump_blend_factor(blend->rgb_src_factor, TRUE),
"rgb_dst_factor", debug_dump_blend_factor(blend->rgb_dst_factor, TRUE),
@@ -117,12 +143,6 @@ dump_blend_type(FILE *fp,
"alpha_src_factor", debug_dump_blend_factor(blend->alpha_src_factor, TRUE),
"alpha_dst_factor", debug_dump_blend_factor(blend->alpha_dst_factor, TRUE));
- fprintf(fp, " type=%s%u%sx%u",
- type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
- type.width,
- type.norm ? "n" : "",
- type.length);
-
fprintf(fp, " ...\n");
fflush(fp);
}
@@ -131,6 +151,7 @@ dump_blend_type(FILE *fp,
static LLVMValueRef
add_blend_test(LLVMModuleRef module,
const struct pipe_blend_state *blend,
+ enum vector_mode mode,
union lp_type type)
{
LLVMTypeRef ret_type;
@@ -143,10 +164,6 @@ add_blend_test(LLVMModuleRef module,
LLVMValueRef res_ptr;
LLVMBasicBlockRef block;
LLVMBuilderRef builder;
- LLVMValueRef src;
- LLVMValueRef dst;
- LLVMValueRef con;
- LLVMValueRef res;
ret_type = LLVMInt64Type();
vec_type = lp_build_vec_type(type);
@@ -163,15 +180,51 @@ add_blend_test(LLVMModuleRef module,
builder = LLVMCreateBuilder();
LLVMPositionBuilderAtEnd(builder, block);
- src = LLVMBuildLoad(builder, src_ptr, "src");
- dst = LLVMBuildLoad(builder, dst_ptr, "dst");
- con = LLVMBuildLoad(builder, const_ptr, "const");
+ if (mode == AoS) {
+ LLVMValueRef src;
+ LLVMValueRef dst;
+ LLVMValueRef con;
+ LLVMValueRef res;
+
+ src = LLVMBuildLoad(builder, src_ptr, "src");
+ dst = LLVMBuildLoad(builder, dst_ptr, "dst");
+ con = LLVMBuildLoad(builder, const_ptr, "const");
- res = lp_build_blend(builder, blend, type, src, dst, con, 3);
+ res = lp_build_blend_aos(builder, blend, type, src, dst, con, 3);
- LLVMSetValueName(res, "res");
+ LLVMSetValueName(res, "res");
- LLVMBuildStore(builder, res, res_ptr);
+ LLVMBuildStore(builder, res, res_ptr);
+ }
+
+ if (mode == SoA) {
+ LLVMValueRef src[4];
+ LLVMValueRef dst[4];
+ LLVMValueRef con[4];
+ LLVMValueRef res[4];
+ char src_name[5] = "src?";
+ char dst_name[5] = "dst?";
+ char con_name[5] = "con?";
+ char res_name[5] = "res?";
+ unsigned i;
+
+ for(i = 0; i < 4; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ con_name[3] = dst_name[3] = src_name[3] = "rgba"[i];
+ src[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, src_ptr, &index, 1, ""), src_name);
+ dst[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), dst_name);
+ con[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), con_name);
+ }
+
+ lp_build_blend_soa(builder, blend, type, src, dst, con, res);
+
+ for(i = 0; i < 4; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ res_name[3] = "rgba"[i];
+ LLVMSetValueName(res[i], res_name);
+ LLVMBuildStore(builder, res[i], LLVMBuildGEP(builder, res_ptr, &index, 1, ""));
+ }
+ }
LLVMBuildRetVoid(builder);;
@@ -415,6 +468,7 @@ static boolean
test_one(unsigned verbose,
FILE *fp,
const struct pipe_blend_state *blend,
+ enum vector_mode mode,
union lp_type type)
{
LLVMModuleRef module = NULL;
@@ -431,11 +485,11 @@ test_one(unsigned verbose,
unsigned i, j;
if(verbose >= 1)
- dump_blend_type(stdout, blend, type);
+ dump_blend_type(stdout, blend, mode, type);
module = LLVMModuleCreateWithName("test");
- func = add_blend_test(module, blend, type);
+ func = add_blend_test(module, blend, mode, type);
if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
LLVMDumpModule(module);
@@ -446,7 +500,7 @@ test_one(unsigned verbose,
provider = LLVMCreateModuleProviderForExistingModule(module);
if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) {
if(verbose < 1)
- dump_blend_type(stderr, blend, type);
+ dump_blend_type(stderr, blend, mode, type);
fprintf(stderr, "%s\n", error);
LLVMDisposeMessage(error);
abort();
@@ -474,66 +528,148 @@ test_one(unsigned verbose,
success = TRUE;
for(i = 0; i < n && success; ++i) {
- uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
- uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
- uint8_t con[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
- uint8_t res[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
- uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
- int64_t start_counter = 0;
- int64_t end_counter = 0;
-
- random_vec(type, src);
- random_vec(type, dst);
- random_vec(type, con);
-
- {
- double fsrc[LP_MAX_VECTOR_LENGTH];
- double fdst[LP_MAX_VECTOR_LENGTH];
- double fcon[LP_MAX_VECTOR_LENGTH];
- double fref[LP_MAX_VECTOR_LENGTH];
-
- read_vec(type, src, fsrc);
- read_vec(type, dst, fdst);
- read_vec(type, con, fcon);
-
- for(j = 0; j < type.length; j += 4)
- compute_blend_ref(blend, fsrc + j, fdst + j, fcon + j, fref + j);
-
- write_vec(type, ref, fref);
+ if(mode == AoS) {
+ uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+ uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+ uint8_t con[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+ uint8_t res[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+ uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+ int64_t start_counter = 0;
+ int64_t end_counter = 0;
+
+ random_vec(type, src);
+ random_vec(type, dst);
+ random_vec(type, con);
+
+ {
+ double fsrc[LP_MAX_VECTOR_LENGTH];
+ double fdst[LP_MAX_VECTOR_LENGTH];
+ double fcon[LP_MAX_VECTOR_LENGTH];
+ double fref[LP_MAX_VECTOR_LENGTH];
+
+ read_vec(type, src, fsrc);
+ read_vec(type, dst, fdst);
+ read_vec(type, con, fcon);
+
+ for(j = 0; j < type.length; j += 4)
+ compute_blend_ref(blend, fsrc + j, fdst + j, fcon + j, fref + j);
+
+ write_vec(type, ref, fref);
+ }
+
+ start_counter = rdtsc();
+ blend_test_ptr(src, dst, con, res);
+ end_counter = rdtsc();
+
+ cycles[i] = end_counter - start_counter;
+
+ if(!compare_vec(type, res, ref)) {
+ success = FALSE;
+
+ if(verbose < 1)
+ dump_blend_type(stderr, blend, mode, type);
+ fprintf(stderr, "MISMATCH\n");
+
+ fprintf(stderr, " Src: ");
+ dump_vec(stderr, type, src);
+ fprintf(stderr, "\n");
+
+ fprintf(stderr, " Dst: ");
+ dump_vec(stderr, type, dst);
+ fprintf(stderr, "\n");
+
+ fprintf(stderr, " Con: ");
+ dump_vec(stderr, type, con);
+ fprintf(stderr, "\n");
+
+ fprintf(stderr, " Res: ");
+ dump_vec(stderr, type, res);
+ fprintf(stderr, "\n");
+
+ fprintf(stderr, " Ref: ");
+ dump_vec(stderr, type, ref);
+ fprintf(stderr, "\n");
+ }
}
- start_counter = rdtsc();
- blend_test_ptr(src, dst, con, res);
- end_counter = rdtsc();
+ if(mode == SoA) {
+ const unsigned stride = type.length*type.width/8;
+ uint8_t src[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+ uint8_t dst[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+ uint8_t con[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+ uint8_t res[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+ uint8_t ref[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+ int64_t start_counter = 0;
+ int64_t end_counter = 0;
+ boolean mismatch;
+
+ for(j = 0; j < 4; ++j) {
+ random_vec(type, src + j*stride);
+ random_vec(type, dst + j*stride);
+ random_vec(type, con + j*stride);
+ }
- cycles[i] = end_counter - start_counter;
+ {
+ double fsrc[4];
+ double fdst[4];
+ double fcon[4];
+ double fref[4];
+ unsigned k;
+
+ for(k = 0; k < type.length; ++k) {
+ for(j = 0; j < 4; ++j) {
+ fsrc[j] = read_elem(type, src + j*stride, k);
+ fdst[j] = read_elem(type, dst + j*stride, k);
+ fcon[j] = read_elem(type, con + j*stride, k);
+ }
- success = compare_vec(type, res, ref);
+ compute_blend_ref(blend, fsrc, fdst, fcon, fref);
- if (!success) {
- if(verbose < 1)
- dump_blend_type(stderr, blend, type);
- fprintf(stderr, "MISMATCH\n");
+ for(j = 0; j < 4; ++j)
+ write_elem(type, ref + j*stride, k, fref[j]);
+ }
+ }
+
+ start_counter = rdtsc();
+ blend_test_ptr(src, dst, con, res);
+ end_counter = rdtsc();
+
+ cycles[i] = end_counter - start_counter;
+
+ mismatch = FALSE;
+ for (j = 0; j < 4; ++j)
+ if(!compare_vec(type, res + j*stride, ref + j*stride))
+ mismatch = TRUE;
- fprintf(stderr, " Src: ");
- dump_vec(stderr, type, src);
- fprintf(stderr, "\n");
+ if (mismatch) {
+ success = FALSE;
- fprintf(stderr, " Dst: ");
- dump_vec(stderr, type, dst);
- fprintf(stderr, "\n");
+ if(verbose < 1)
+ dump_blend_type(stderr, blend, mode, type);
+ fprintf(stderr, "MISMATCH\n");
+ for(j = 0; j < 4; ++j) {
+ char channel = "RGBA"[j];
+ fprintf(stderr, " Src%c: ", channel);
+ dump_vec(stderr, type, src + j*stride);
+ fprintf(stderr, "\n");
- fprintf(stderr, " Con: ");
- dump_vec(stderr, type, con);
- fprintf(stderr, "\n");
+ fprintf(stderr, " Dst%c: ", channel);
+ dump_vec(stderr, type, dst + j*stride);
+ fprintf(stderr, "\n");
- fprintf(stderr, " Res: ");
- dump_vec(stderr, type, res);
- fprintf(stderr, "\n");
+ fprintf(stderr, " Con%c: ", channel);
+ dump_vec(stderr, type, con + j*stride);
+ fprintf(stderr, "\n");
- fprintf(stderr, " Ref: ");
- dump_vec(stderr, type, ref);
- fprintf(stderr, "\n");
+ fprintf(stderr, " Res%c: ", channel);
+ dump_vec(stderr, type, res + j*stride);
+ fprintf(stderr, "\n");
+
+ fprintf(stderr, " Ref%c: ", channel);
+ dump_vec(stderr, type, ref + j*stride);
+ fprintf(stderr, "\n");
+ }
+ }
}
}
@@ -569,7 +705,7 @@ test_one(unsigned verbose,
}
if(fp)
- write_tsv_row(fp, blend, type, cycles_avg, success);
+ write_tsv_row(fp, blend, mode, type, cycles_avg, success);
if (!success) {
if(verbose < 2)
@@ -650,6 +786,7 @@ test_all(unsigned verbose, FILE *fp)
const unsigned *alpha_src_factor;
const unsigned *alpha_dst_factor;
struct pipe_blend_state blend;
+ enum vector_mode mode;
const union lp_type *type;
bool success = TRUE;
@@ -659,24 +796,26 @@ test_all(unsigned verbose, FILE *fp)
for(rgb_dst_factor = blend_factors; rgb_dst_factor <= rgb_src_factor; ++rgb_dst_factor) {
for(alpha_src_factor = blend_factors; alpha_src_factor < &blend_factors[num_factors]; ++alpha_src_factor) {
for(alpha_dst_factor = blend_factors; alpha_dst_factor <= alpha_src_factor; ++alpha_dst_factor) {
- for(type = blend_types; type < &blend_types[num_types]; ++type) {
-
- if(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
- *alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)
- continue;
-
- memset(&blend, 0, sizeof blend);
- blend.blend_enable = 1;
- blend.rgb_func = *rgb_func;
- blend.rgb_src_factor = *rgb_src_factor;
- blend.rgb_dst_factor = *rgb_dst_factor;
- blend.alpha_func = *alpha_func;
- blend.alpha_src_factor = *alpha_src_factor;
- blend.alpha_dst_factor = *alpha_dst_factor;
-
- if(!test_one(verbose, fp, &blend, *type))
- success = FALSE;
-
+ for(mode = 0; mode < 2; ++mode) {
+ for(type = blend_types; type < &blend_types[num_types]; ++type) {
+
+ if(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
+ *alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)
+ continue;
+
+ memset(&blend, 0, sizeof blend);
+ blend.blend_enable = 1;
+ blend.rgb_func = *rgb_func;
+ blend.rgb_src_factor = *rgb_src_factor;
+ blend.rgb_dst_factor = *rgb_dst_factor;
+ blend.alpha_func = *alpha_func;
+ blend.alpha_src_factor = *alpha_src_factor;
+ blend.alpha_dst_factor = *alpha_dst_factor;
+
+ if(!test_one(verbose, fp, &blend, mode, *type))
+ success = FALSE;
+
+ }
}
}
}
@@ -699,6 +838,7 @@ test_some(unsigned verbose, FILE *fp, unsigned long n)
const unsigned *alpha_src_factor;
const unsigned *alpha_dst_factor;
struct pipe_blend_state blend;
+ enum vector_mode mode;
const union lp_type *type;
unsigned long i;
bool success = TRUE;
@@ -717,20 +857,21 @@ test_some(unsigned verbose, FILE *fp, unsigned long n)
alpha_dst_factor = &blend_factors[random() % num_factors];
} while(*alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
- for(type = blend_types; type < &blend_types[num_types]; ++type) {
+ mode = random() & 1;
- memset(&blend, 0, sizeof blend);
- blend.blend_enable = 1;
- blend.rgb_func = *rgb_func;
- blend.rgb_src_factor = *rgb_src_factor;
- blend.rgb_dst_factor = *rgb_dst_factor;
- blend.alpha_func = *alpha_func;
- blend.alpha_src_factor = *alpha_src_factor;
- blend.alpha_dst_factor = *alpha_dst_factor;
+ type = &blend_types[random() % num_types];
- if(!test_one(verbose, fp, &blend, *type))
- success = FALSE;
- }
+ memset(&blend, 0, sizeof blend);
+ blend.blend_enable = 1;
+ blend.rgb_func = *rgb_func;
+ blend.rgb_src_factor = *rgb_src_factor;
+ blend.rgb_dst_factor = *rgb_dst_factor;
+ blend.alpha_func = *alpha_func;
+ blend.alpha_src_factor = *alpha_src_factor;
+ blend.alpha_dst_factor = *alpha_dst_factor;
+
+ if(!test_one(verbose, fp, &blend, mode, *type))
+ success = FALSE;
}
return success;