diff options
author | Tim Rowley <[email protected]> | 2017-12-12 14:23:50 -0600 |
---|---|---|
committer | Tim Rowley <[email protected]> | 2017-12-15 10:56:54 -0600 |
commit | c68b2d5c79239e721d8825e373a02fc843d15f6a (patch) | |
tree | d3b41c37f3226644487960b99c4b4ebc2ce69f69 /src/gallium/drivers/swr | |
parent | 20f9006603139a479b756c593c04a540041e3471 (diff) |
swr/rast: Replace VPSRL with LSHR
Replace use of x86 intrinsic with general llvm IR instruction.
Generates the same final assembly.
Reviewed-by: Bruce Cherniak <[email protected]>
Diffstat (limited to 'src/gallium/drivers/swr')
4 files changed, 4 insertions, 41 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py index 8bbf36d9b83..9544353eb97 100644 --- a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py +++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py @@ -47,8 +47,6 @@ intrinsics = [ ['VGATHERPS_16', 'x86_avx512_gather_dps_512', ['src', 'pBase', 'indices', 'mask', 'scale']], ['VGATHERDD', 'x86_avx2_gather_d_d_256', ['src', 'pBase', 'indices', 'mask', 'scale']], ['VGATHERDD_16', 'x86_avx512_gather_dpi_512', ['src', 'pBase', 'indices', 'mask', 'scale']], - ['VPSRLI', 'x86_avx2_psrli_d', ['src', 'imm']], - ['VPSRLI_16', 'x86_avx512_psrli_d_512', ['src', 'imm']], ['VSQRTPS', 'x86_avx_sqrt_ps_256', ['a']], ['VRSQRTPS', 'x86_avx_rsqrt_ps_256', ['a']], ['VRCPPS', 'x86_avx_rcp_ps_256', ['a']], diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp index 684c9fac549..bdcafd28a39 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp @@ -809,36 +809,6 @@ namespace SwrJit } #if USE_SIMD16_BUILDER - Value *Builder::PSRLI(Value *a, Value *imm) - { - return VPSRLI(a, imm); - } - - Value *Builder::PSRLI_16(Value *a, Value *imm) - { - Value *result = VUNDEF2_I(); - - // use avx512 shift right instruction if available - if (JM()->mArch.AVX512F()) - { - result = VPSRLI_16(a, imm); - } - else - { - Value *a0 = EXTRACT2_I(a, 0); - Value *a1 = EXTRACT2_I(a, 1); - - Value *result0 = PSRLI(a0, imm); - Value *result1 = PSRLI(a1, imm); - - result = JOIN2(result0, result1); - } - - return result; - } - -#endif -#if USE_SIMD16_BUILDER ////////////////////////////////////////////////////////////////////////// /// @brief Value *Builder::EXTRACT2_F(Value *a2, uint32_t imm) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h index 6c883d8f52b..98bc5633512 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h @@ -143,11 +143,6 @@ void GATHER4DD(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets, Value *GATHERPD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1); -#if USE_SIMD16_BUILDER -Value *PSRLI(Value *a, Value *imm); -Value *PSRLI_16(Value *a, Value *imm); - -#endif void SCATTERPS(Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask); void Shuffle8bpcGather4(const SWR_FORMAT_INFO &info, Value* vGatherInput, Value* vGatherOutput[], bool bPackedOutput); diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp index 1312ac00093..8d97ddfdc98 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp @@ -1422,12 +1422,12 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, // But, we know that elements must be aligned for FETCH. :) // Right shift the offset by a bit and then scale by 2 to remove the sign extension. #if USE_SIMD16_BUILDER - Value *shiftedOffsets = VPSRLI_16(vOffsets16, C(1)); + Value *shiftedOffsets = LSHR(vOffsets16, 1); pVtxSrc2[currentVertexElement] = GATHERPS_16(gatherSrc16, pStreamBase, shiftedOffsets, vGatherMask16, 2); #else - Value *vShiftedOffsets = VPSRLI(vOffsets, C(1)); - Value *vShiftedOffsets2 = VPSRLI(vOffsets2, C(1)); + Value *vShiftedOffsets = LSHR(vOffsets, 1); + Value *vShiftedOffsets2 = LSHR(vOffsets2, 1); vVertexElements[currentVertexElement] = GATHERPS(gatherSrc, pStreamBase, vShiftedOffsets, vGatherMask, 2); vVertexElements2[currentVertexElement] = GATHERPS(gatherSrc2, pStreamBase, vShiftedOffsets2, vGatherMask2, 2); @@ -1492,7 +1492,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, // However, GATHERPS uses signed 32-bit offsets, so only a 2GB range :( // But, we know that elements must be aligned for FETCH. :) // Right shift the offset by a bit and then scale by 2 to remove the sign extension. - Value* vShiftedOffsets = VPSRLI(vOffsets, C(1)); + Value* vShiftedOffsets = LSHR(vOffsets, 1); vVertexElements[currentVertexElement++] = GATHERPS(gatherSrc, pStreamBase, vShiftedOffsets, vGatherMask, 2); } else |