summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/swr/rasterizer/jitter
diff options
context:
space:
mode:
authorTim Rowley <[email protected]>2016-03-03 18:19:45 -0600
committerTim Rowley <[email protected]>2016-03-25 14:43:13 -0500
commitaca55131843dec6da27f76308b2b4a145fc9e152 (patch)
tree02fd9ccf197a4cfe53962986791269d9ebf78324 /src/gallium/drivers/swr/rasterizer/jitter
parentbfb954189e166cee8b748edc29f5751d0c97c608 (diff)
swr: [rasterizer jitter] vpermps support
Diffstat (limited to 'src/gallium/drivers/swr/rasterizer/jitter')
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp51
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h1
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/scripts/gen_llvm_ir_macros.py1
3 files changed, 52 insertions, 1 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
index f18a9902c8c..b55752c1025 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
@@ -776,8 +776,57 @@ Value *Builder::PERMD(Value* a, Value* idx)
}
else
{
- res = VSHUFFLE(a, a, idx);
+ if (isa<Constant>(idx))
+ {
+ res = VSHUFFLE(a, a, idx);
+ }
+ else
+ {
+ res = VUNDEF_I();
+ for (uint32_t l = 0; l < JM()->mVWidth; ++l)
+ {
+ Value* pIndex = VEXTRACT(idx, C(l));
+ Value* pVal = VEXTRACT(a, pIndex);
+ res = VINSERT(res, pVal, C(l));
+ }
+ }
+ }
+ return res;
+}
+
+//////////////////////////////////////////////////////////////////////////
+/// @brief Generate a VPERMPS operation (shuffle 32 bit float values
+/// across 128 bit lanes) in LLVM IR. If not supported on the underlying
+/// platform, emulate it
+/// @param a - 256bit SIMD lane(8x32bit) of float values.
+/// @param idx - 256bit SIMD lane(8x32bit) of 3 bit lane index values
+Value *Builder::PERMPS(Value* a, Value* idx)
+{
+ Value* res;
+ // use avx2 permute instruction if available
+ if (JM()->mArch.AVX2())
+ {
+ // llvm 3.6.0 swapped the order of the args to vpermd
+ res = VPERMPS(idx, a);
+ }
+ else
+ {
+ if (isa<Constant>(idx))
+ {
+ res = VSHUFFLE(a, a, idx);
+ }
+ else
+ {
+ res = VUNDEF_F();
+ for (uint32_t l = 0; l < JM()->mVWidth; ++l)
+ {
+ Value* pIndex = VEXTRACT(idx, C(l));
+ Value* pVal = VEXTRACT(a, pIndex);
+ res = VINSERT(res, pVal, C(l));
+ }
+ }
}
+
return res;
}
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h
index 172550e28b1..18c30a2891f 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h
@@ -115,6 +115,7 @@ Value *PSHUFB(Value* a, Value* b);
Value *PMOVSXBD(Value* a);
Value *PMOVSXWD(Value* a);
Value *PERMD(Value* a, Value* idx);
+Value *PERMPS(Value* a, Value* idx);
Value *CVTPH2PS(Value* a);
Value *CVTPS2PH(Value* a, Value* rounding);
Value *PMAXSD(Value* a, Value* b);
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/scripts/gen_llvm_ir_macros.py b/src/gallium/drivers/swr/rasterizer/jitter/scripts/gen_llvm_ir_macros.py
index 1814b7c8d5f..c78c9784b3d 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/scripts/gen_llvm_ir_macros.py
+++ b/src/gallium/drivers/swr/rasterizer/jitter/scripts/gen_llvm_ir_macros.py
@@ -103,6 +103,7 @@ intrinsics = [
["VPMOVSXBD", "x86_avx2_pmovsxbd", ["a"]], # sign extend packed 8bit components
["VPMOVSXWD", "x86_avx2_pmovsxwd", ["a"]], # sign extend packed 16bit components
["VPERMD", "x86_avx2_permd", ["idx", "a"]],
+ ["VPERMPS", "x86_avx2_permps", ["idx", "a"]],
["VCVTPH2PS", "x86_vcvtph2ps_256", ["a"]],
["VCVTPS2PH", "x86_vcvtps2ph_256", ["a", "round"]],
["VHSUBPS", "x86_avx_hsub_ps_256", ["a", "b"]],