summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorTim Rowley <[email protected]>2017-08-09 17:32:28 -0500
committerTim Rowley <[email protected]>2017-09-06 11:01:39 -0500
commit4475583f5ea44c3585e0ffea6118ba3a32fddd72 (patch)
tree7b0a8dc3970e043993b3652eedad1f43d821dd0f /src
parent5c9af800cbce71f818b5d5e8ce9bfc5b56611360 (diff)
swr/rast: Allow gather of floats from fetch shader with 2-4GB offsets
Reviewed-by: Bruce Cherniak <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py1
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp7
2 files changed, 7 insertions, 1 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
index 2ed2b2f61eb..025d38ab336 100644
--- a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
+++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
@@ -45,6 +45,7 @@ intrinsics = [
['VGATHERPD', 'x86_avx2_gather_d_pd_256', ['src', 'pBase', 'indices', 'mask', 'scale']],
['VGATHERPS', 'x86_avx2_gather_d_ps_256', ['src', 'pBase', 'indices', 'mask', 'scale']],
['VGATHERDD', 'x86_avx2_gather_d_d_256', ['src', 'pBase', 'indices', 'mask', 'scale']],
+ ['VPSRLI', 'x86_avx2_psrli_d', ['src', 'imm']],
['VSQRTPS', 'x86_avx_sqrt_ps_256', ['a']],
['VRSQRTPS', 'x86_avx_rsqrt_ps_256', ['a']],
['VRCPPS', 'x86_avx_rcp_ps_256', ['a']],
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
index dcfe8970f5c..761c58ca27e 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
@@ -1005,7 +1005,12 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
Value *vMask = vGatherMask;
// Gather a SIMD of vertices
- vVertexElements[currentVertexElement++] = GATHERPS(gatherSrc, pStreamBase, vOffsets, vMask, C((char)1));
+ // APIs allow a 4GB range for offsets
+ // However, GATHERPS uses signed 32-bit offsets, so only a 2GB range :(
+ // But, we know that elements must be aligned for FETCH. :)
+ // Right shift the offset by a bit and then scale by 2 to remove the sign extension.
+ Value* vShiftedOffsets = VPSRLI(vOffsets, C(1));
+ vVertexElements[currentVertexElement++] = GATHERPS(gatherSrc, pStreamBase, vShiftedOffsets, vMask, C((char)2));
}
else
{