diff options
author | Tim Rowley <[email protected]> | 2017-08-09 17:32:28 -0500 |
---|---|---|
committer | Tim Rowley <[email protected]> | 2017-09-06 11:01:39 -0500 |
commit | 4475583f5ea44c3585e0ffea6118ba3a32fddd72 (patch) | |
tree | 7b0a8dc3970e043993b3652eedad1f43d821dd0f /src/gallium | |
parent | 5c9af800cbce71f818b5d5e8ce9bfc5b56611360 (diff) |
swr/rast: Allow gather of floats from fetch shader with 2-4GB offsets
Reviewed-by: Bruce Cherniak <[email protected]>
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py | 1 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp | 7 |
2 files changed, 7 insertions, 1 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py index 2ed2b2f61eb..025d38ab336 100644 --- a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py +++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py @@ -45,6 +45,7 @@ intrinsics = [ ['VGATHERPD', 'x86_avx2_gather_d_pd_256', ['src', 'pBase', 'indices', 'mask', 'scale']], ['VGATHERPS', 'x86_avx2_gather_d_ps_256', ['src', 'pBase', 'indices', 'mask', 'scale']], ['VGATHERDD', 'x86_avx2_gather_d_d_256', ['src', 'pBase', 'indices', 'mask', 'scale']], + ['VPSRLI', 'x86_avx2_psrli_d', ['src', 'imm']], ['VSQRTPS', 'x86_avx_sqrt_ps_256', ['a']], ['VRSQRTPS', 'x86_avx_rsqrt_ps_256', ['a']], ['VRCPPS', 'x86_avx_rcp_ps_256', ['a']], diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp index dcfe8970f5c..761c58ca27e 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp @@ -1005,7 +1005,12 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, Value *vMask = vGatherMask; // Gather a SIMD of vertices - vVertexElements[currentVertexElement++] = GATHERPS(gatherSrc, pStreamBase, vOffsets, vMask, C((char)1)); + // APIs allow a 4GB range for offsets + // However, GATHERPS uses signed 32-bit offsets, so only a 2GB range :( + // But, we know that elements must be aligned for FETCH. :) + // Right shift the offset by a bit and then scale by 2 to remove the sign extension. + Value* vShiftedOffsets = VPSRLI(vOffsets, C(1)); + vVertexElements[currentVertexElement++] = GATHERPS(gatherSrc, pStreamBase, vShiftedOffsets, vMask, C((char)2)); } else { |