summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/swr
diff options
context:
space:
mode:
authorGeorge Kyriazis <george.kyriazis@intel.com>2018-01-19 15:47:12 -0600
committerGeorge Kyriazis <george.kyriazis@intel.com>2018-01-19 16:52:42 -0600
commit8c83d2d3713a6de5830228c40cebc77291277d95 (patch)
treeae71c83c143ec8e68ba1a1f78a5a05142a8005f2 /src/gallium/drivers/swr
parent1874d95a8e820bbae37008c66cdc891ce4922722 (diff)
swr: Support simd16 vertex shaders
Supporting simd16 vertex shaders involves packing the output of the fetch shader appropriately, especially the vertexID buffers that have to be formatted in one simd16 register, needed by the VS. As part of this support, we needed to remove the 2nd JitManager, since it was not accounting for vector width correctly. USE_SIMD16_SHADERS is also split into two defines. The additional one (USE_SIMD16_VS) controls the width of the vertex shader (VS), while the original one (USE_SIMD16_SHADERS) controls overall front end width. Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
Diffstat (limited to 'src/gallium/drivers/swr')
-rw-r--r--src/gallium/drivers/swr/swr_screen.cpp6
-rw-r--r--src/gallium/drivers/swr/swr_screen.h3
-rw-r--r--src/gallium/drivers/swr/swr_shader.cpp42
3 files changed, 30 insertions, 21 deletions
diff --git a/src/gallium/drivers/swr/swr_screen.cpp b/src/gallium/drivers/swr/swr_screen.cpp
index 949816aa4b8..b67ac25ac89 100644
--- a/src/gallium/drivers/swr/swr_screen.cpp
+++ b/src/gallium/drivers/swr/swr_screen.cpp
@@ -1064,9 +1064,6 @@ swr_destroy_screen(struct pipe_screen *p_screen)
swr_fence_reference(p_screen, &screen->flush_fence, NULL);
JitDestroyContext(screen->hJitMgr);
-#if USE_SIMD16_SHADERS
- JitDestroyContext(screen->hJitMgr16);
-#endif
if (winsys->destroy)
winsys->destroy(winsys);
@@ -1150,9 +1147,6 @@ swr_create_screen_internal(struct sw_winsys *winsys)
// Pass in "" for architecture for run-time determination
screen->hJitMgr = JitCreateContext(KNOB_SIMD_WIDTH, "", "swr");
-#if USE_SIMD16_SHADERS
- screen->hJitMgr16 = JitCreateContext(16, "", "swr");
-#endif
swr_fence_init(&screen->base);
diff --git a/src/gallium/drivers/swr/swr_screen.h b/src/gallium/drivers/swr/swr_screen.h
index 81b1a18b028..89faab182c5 100644
--- a/src/gallium/drivers/swr/swr_screen.h
+++ b/src/gallium/drivers/swr/swr_screen.h
@@ -49,9 +49,6 @@ struct swr_screen {
uint32_t client_copy_limit;
HANDLE hJitMgr;
-#if USE_SIMD16_SHADERS
- HANDLE hJitMgr16;
-#endif
PFNSwrGetInterface pfnSwrGetInterface;
diff --git a/src/gallium/drivers/swr/swr_shader.cpp b/src/gallium/drivers/swr/swr_shader.cpp
index 257a6aae307..e5fb679f8b3 100644
--- a/src/gallium/drivers/swr/swr_shader.cpp
+++ b/src/gallium/drivers/swr/swr_shader.cpp
@@ -724,7 +724,7 @@ swr_compile_gs(struct swr_context *ctx, swr_jit_gs_key &key)
void
BuilderSWR::WriteVS(Value *pVal, Value *pVsContext, Value *pVtxOutput, unsigned slot, unsigned channel)
{
-#if USE_SIMD16_FRONTEND && !USE_SIMD16_SHADERS
+#if USE_SIMD16_FRONTEND && !USE_SIMD16_VS
// interleave the simdvertex components into the dest simd16vertex
// slot16offset = slot8offset * 2
// comp16offset = comp8offset * 2 + alternateOffset
@@ -787,7 +787,7 @@ BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key)
const_sizes_ptr->setName("num_vs_constants");
Value *vtxInput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVin});
-#if USE_SIMD16_SHADERS
+#if USE_SIMD16_VS
vtxInput = BITCAST(vtxInput, PointerType::get(Gen_simd16vertex(JM()), 0));
#endif
@@ -807,11 +807,22 @@ BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key)
struct lp_bld_tgsi_system_values system_values;
memset(&system_values, 0, sizeof(system_values));
system_values.instance_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_InstanceID}));
+
+#if USE_SIMD16_VS
+ system_values.vertex_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_VertexID16}));
+#else
system_values.vertex_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_VertexID}));
+#endif
+
+#if USE_SIMD16_VS
+ uint32_t vectorWidth = mVWidth16;
+#else
+ uint32_t vectorWidth = mVWidth;
+#endif
lp_build_tgsi_soa(gallivm,
swr_vs->pipe.tokens,
- lp_type_float_vec(32, 32 * mVWidth),
+ lp_type_float_vec(32, 32 * vectorWidth),
NULL, // mask
wrap(consts_ptr),
wrap(const_sizes_ptr),
@@ -829,7 +840,7 @@ BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key)
IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
Value *vtxOutput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVout});
-#if USE_SIMD16_SHADERS
+#if USE_SIMD16_VS
vtxOutput = BITCAST(vtxOutput, PointerType::get(Gen_simd16vertex(JM()), 0));
#endif
@@ -905,10 +916,21 @@ BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key)
Value *py = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 1}));
Value *pz = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 2}));
Value *pw = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 3}));
- Value *dist = FADD(FMUL(unwrap(cx), VBROADCAST(px)),
- FADD(FMUL(unwrap(cy), VBROADCAST(py)),
- FADD(FMUL(unwrap(cz), VBROADCAST(pz)),
- FMUL(unwrap(cw), VBROADCAST(pw)))));
+#if USE_SIMD16_VS
+ Value *bpx = VBROADCAST_16(px);
+ Value *bpy = VBROADCAST_16(py);
+ Value *bpz = VBROADCAST_16(pz);
+ Value *bpw = VBROADCAST_16(pw);
+#else
+ Value *bpx = VBROADCAST(px);
+ Value *bpy = VBROADCAST(py);
+ Value *bpz = VBROADCAST(pz);
+ Value *bpw = VBROADCAST(pw);
+#endif
+ Value *dist = FADD(FMUL(unwrap(cx), bpx),
+ FADD(FMUL(unwrap(cy), bpy),
+ FADD(FMUL(unwrap(cz), bpz),
+ FMUL(unwrap(cw), bpw))));
if (val < 4)
WriteVS(dist, pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_LO_SLOT, val);
@@ -942,11 +964,7 @@ swr_compile_vs(struct swr_context *ctx, swr_jit_vs_key &key)
return NULL;
BuilderSWR builder(
-#if USE_SIMD16_SHADERS
- reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr16),
-#else
reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr),
-#endif
"VS");
PFN_VERTEX_FUNC func = builder.CompileVS(ctx, key);