diff options
author | Tim Rowley <[email protected]> | 2016-03-03 18:19:45 -0600 |
---|---|---|
committer | Tim Rowley <[email protected]> | 2016-03-25 14:43:13 -0500 |
commit | aca55131843dec6da27f76308b2b4a145fc9e152 (patch) | |
tree | 02fd9ccf197a4cfe53962986791269d9ebf78324 /src/gallium/drivers/swr/rasterizer/jitter | |
parent | bfb954189e166cee8b748edc29f5751d0c97c608 (diff) |
swr: [rasterizer jitter] vpermps support
Diffstat (limited to 'src/gallium/drivers/swr/rasterizer/jitter')
3 files changed, 52 insertions, 1 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp index f18a9902c8c..b55752c1025 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp @@ -776,8 +776,57 @@ Value *Builder::PERMD(Value* a, Value* idx) } else { - res = VSHUFFLE(a, a, idx); + if (isa<Constant>(idx)) + { + res = VSHUFFLE(a, a, idx); + } + else + { + res = VUNDEF_I(); + for (uint32_t l = 0; l < JM()->mVWidth; ++l) + { + Value* pIndex = VEXTRACT(idx, C(l)); + Value* pVal = VEXTRACT(a, pIndex); + res = VINSERT(res, pVal, C(l)); + } + } + } + return res; +} + +////////////////////////////////////////////////////////////////////////// +/// @brief Generate a VPERMPS operation (shuffle 32 bit float values +/// across 128 bit lanes) in LLVM IR. If not supported on the underlying +/// platform, emulate it +/// @param a - 256bit SIMD lane(8x32bit) of float values. +/// @param idx - 256bit SIMD lane(8x32bit) of 3 bit lane index values +Value *Builder::PERMPS(Value* a, Value* idx) +{ + Value* res; + // use avx2 permute instruction if available + if (JM()->mArch.AVX2()) + { + // llvm 3.6.0 swapped the order of the args to vpermd + res = VPERMPS(idx, a); + } + else + { + if (isa<Constant>(idx)) + { + res = VSHUFFLE(a, a, idx); + } + else + { + res = VUNDEF_F(); + for (uint32_t l = 0; l < JM()->mVWidth; ++l) + { + Value* pIndex = VEXTRACT(idx, C(l)); + Value* pVal = VEXTRACT(a, pIndex); + res = VINSERT(res, pVal, C(l)); + } + } } + return res; } diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h index 172550e28b1..18c30a2891f 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h @@ -115,6 +115,7 @@ Value *PSHUFB(Value* a, Value* b); Value *PMOVSXBD(Value* a); Value *PMOVSXWD(Value* a); Value *PERMD(Value* a, Value* idx); +Value *PERMPS(Value* a, Value* idx); Value *CVTPH2PS(Value* a); Value *CVTPS2PH(Value* a, Value* rounding); Value *PMAXSD(Value* a, Value* b); diff --git a/src/gallium/drivers/swr/rasterizer/jitter/scripts/gen_llvm_ir_macros.py b/src/gallium/drivers/swr/rasterizer/jitter/scripts/gen_llvm_ir_macros.py index 1814b7c8d5f..c78c9784b3d 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/scripts/gen_llvm_ir_macros.py +++ b/src/gallium/drivers/swr/rasterizer/jitter/scripts/gen_llvm_ir_macros.py @@ -103,6 +103,7 @@ intrinsics = [ ["VPMOVSXBD", "x86_avx2_pmovsxbd", ["a"]], # sign extend packed 8bit components ["VPMOVSXWD", "x86_avx2_pmovsxwd", ["a"]], # sign extend packed 16bit components ["VPERMD", "x86_avx2_permd", ["idx", "a"]], + ["VPERMPS", "x86_avx2_permps", ["idx", "a"]], ["VCVTPH2PS", "x86_vcvtph2ps_256", ["a"]], ["VCVTPS2PH", "x86_vcvtps2ph_256", ["a", "round"]], ["VHSUBPS", "x86_avx_hsub_ps_256", ["a", "b"]], |