aboutsummaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
authorGeorge Kyriazis <[email protected]>2018-04-10 01:05:19 -0500
committerGeorge Kyriazis <[email protected]>2018-04-18 10:51:38 -0500
commit0899122c03f06eba89889090b1fb1ab1d4d3ddff (patch)
tree9a992cc0ade7025256d9bd492f5298ca2961dfb6 /src/gallium/drivers
parentec7154abc057debdb7e439d2b2f42ae2b9b0d888 (diff)
swr/rast: Optimize late/bindless JIT of samplers
Add per-worker thread private data to all shader calls Add per-worker sampler cache and jit context Add late LoadTexel JIT support Add per-worker-thread Sampler / LoadTexel JIT Reviewed-by: Bruce Cherniak <[email protected]>
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/api.cpp7
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/api.h47
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/backend.cpp9
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/backend.h4
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/backend_clear.cpp19
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/backend_impl.h7
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp5
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp6
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/binner.cpp2
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/context.h3
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/frontend.cpp29
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp4
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/rasterizer_impl.h15
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/state.h18
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/threads.cpp68
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/threads.h5
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp21
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/tilemgr.h4
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp16
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp8
-rw-r--r--src/gallium/drivers/swr/rasterizer/memory/ClearTile.cpp1
-rw-r--r--src/gallium/drivers/swr/rasterizer/memory/LoadTile.cpp1
-rw-r--r--src/gallium/drivers/swr/rasterizer/memory/StoreTile.cpp1
-rw-r--r--src/gallium/drivers/swr/swr_memory.h9
-rw-r--r--src/gallium/drivers/swr/swr_shader.cpp9
25 files changed, 213 insertions, 105 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp
index 3141db69ef1..e37e2e4a538 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp
@@ -1,5 +1,5 @@
/****************************************************************************
-* Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved.
+* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -122,6 +122,11 @@ HANDLE SwrCreateContext(
pContext->apiThreadInfo.numAPIThreadsPerCore = 1;
}
+ if (pCreateInfo->pWorkerPrivateState)
+ {
+ pContext->workerPrivateState = *pCreateInfo->pWorkerPrivateState;
+ }
+
memset(&pContext->WaitLock, 0, sizeof(pContext->WaitLock));
memset(&pContext->FifosNotEmpty, 0, sizeof(pContext->FifosNotEmpty));
new (&pContext->WaitLock) std::mutex();
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.h b/src/gallium/drivers/swr/rasterizer/core/api.h
index 7247fa4215f..b171188c927 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.h
+++ b/src/gallium/drivers/swr/rasterizer/core/api.h
@@ -1,5 +1,5 @@
/****************************************************************************
-* Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved.
+* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -115,7 +115,8 @@ struct SWR_RECT
/// @param x - destination x coordinate
/// @param y - destination y coordinate
/// @param pDstHotTile - pointer to the hot tile surface
-typedef void(SWR_API *PFN_LOAD_TILE)(HANDLE hPrivateContext, SWR_FORMAT dstFormat,
+typedef void(SWR_API *PFN_LOAD_TILE)(HANDLE hPrivateContext, HANDLE hWorkerPrivateData,
+ SWR_FORMAT dstFormat,
SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex, uint8_t *pDstHotTile);
@@ -127,7 +128,8 @@ typedef void(SWR_API *PFN_LOAD_TILE)(HANDLE hPrivateContext, SWR_FORMAT dstForma
/// @param x - destination x coordinate
/// @param y - destination y coordinate
/// @param pSrcHotTile - pointer to the hot tile surface
-typedef void(SWR_API *PFN_STORE_TILE)(HANDLE hPrivateContext, SWR_FORMAT srcFormat,
+typedef void(SWR_API *PFN_STORE_TILE)(HANDLE hPrivateContext, HANDLE hWorkerPrivateData,
+ SWR_FORMAT srcFormat,
SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex, uint8_t *pSrcHotTile);
@@ -139,7 +141,7 @@ typedef void(SWR_API *PFN_STORE_TILE)(HANDLE hPrivateContext, SWR_FORMAT srcForm
/// @param y - destination y coordinate
/// @param renderTargetArrayIndex - render target array offset from arrayIndex
/// @param pClearColor - pointer to the hot tile's clear value
-typedef void(SWR_API *PFN_CLEAR_TILE)(HANDLE hPrivateContext,
+typedef void(SWR_API *PFN_CLEAR_TILE)(HANDLE hPrivateContext, HANDLE hWorkerPrivateData,
SWR_RENDERTARGET_ATTACHMENT rtIndex,
uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex, const float* pClearColor);
@@ -208,6 +210,21 @@ struct SWR_API_THREADING_INFO
// Independent of KNOB_MAX_THREADS_PER_CORE.
};
+//////////////////////////////////////////////////////////////////////////
+/// SWR_WORKER_PRIVATE_STATE
+/// Data used to allocate per-worker thread private data. A pointer
+/// to this data will be passed in to each shader function.
+/////////////////////////////////////////////////////////////////////////
+struct SWR_WORKER_PRIVATE_STATE
+{
+ typedef void (SWR_API *PFN_WORKER_DATA)(HANDLE hWorkerPrivateData, uint32_t iWorkerNum);
+
+ size_t perWorkerPrivateStateSize; ///< Amount of data to allocate per-worker
+ PFN_WORKER_DATA pfnInitWorkerData; ///< Init function for worker data. If null
+ ///< worker data will be initialized to 0.
+ PFN_WORKER_DATA pfnFinishWorkerData; ///< Finish / destroy function for worker data.
+ ///< Can be null.
+};
//////////////////////////////////////////////////////////////////////////
/// SWR_CREATECONTEXT_INFO
@@ -216,7 +233,10 @@ struct SWR_CREATECONTEXT_INFO
{
// External functions (e.g. sampler) need per draw context state.
// Use SwrGetPrivateContextState() to access private state.
- uint32_t privateStateSize;
+ size_t privateStateSize;
+
+ // Optional per-worker state, can be NULL for no worker-private data
+ SWR_WORKER_PRIVATE_STATE* pWorkerPrivateState;
// Callback functions
PFN_LOAD_TILE pfnLoadTile;
@@ -229,23 +249,23 @@ struct SWR_CREATECONTEXT_INFO
// Pointer to rdtsc buckets mgr returned to the caller.
// Only populated when KNOB_ENABLE_RDTSC is set
- BucketManager* pBucketMgr;
+ BucketManager* pBucketMgr;
// Output: size required memory passed to for SwrSaveState / SwrRestoreState
- size_t contextSaveSize;
+ size_t contextSaveSize;
// ArchRast event manager.
- HANDLE hArEventManager;
+ HANDLE hArEventManager;
// Input (optional): Threading info that overrides any set KNOB values.
- SWR_THREADING_INFO* pThreadInfo;
+ SWR_THREADING_INFO* pThreadInfo;
- // Input (optional}: Info for reserving API threads
- SWR_API_THREADING_INFO* pApiThreadInfo;
+ // Input (optional): Info for reserving API threads
+ SWR_API_THREADING_INFO* pApiThreadInfo;
// Input: if set to non-zero value, overrides KNOB value for maximum
// number of draws in flight
- uint32_t MAX_DRAWS_IN_FLIGHT;
+ uint32_t MAX_DRAWS_IN_FLIGHT;
};
//////////////////////////////////////////////////////////////////////////
@@ -714,6 +734,7 @@ SWR_FUNC(void, SwrInit);
/// @param x, y - Coordinates to raster tile.
/// @param pDstHotTile - Pointer to Hot Tile
SWR_FUNC(void, SwrLoadHotTile,
+ HANDLE hWorkerPrivateData,
const SWR_SURFACE_STATE *pSrcSurface,
SWR_FORMAT dstFormat,
SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
@@ -728,6 +749,7 @@ SWR_FUNC(void, SwrLoadHotTile,
/// @param x, y - Coordinates to raster tile.
/// @param pSrcHotTile - Pointer to Hot Tile
SWR_FUNC(void, SwrStoreHotTileToSurface,
+ HANDLE hWorkerPrivateData,
SWR_SURFACE_STATE *pDstSurface,
SWR_FORMAT srcFormat,
SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
@@ -741,6 +763,7 @@ SWR_FUNC(void, SwrStoreHotTileToSurface,
/// @param x, y - Coordinates to raster tile.
/// @param pClearColor - Pointer to clear color
SWR_FUNC(void, SwrStoreHotTileClear,
+ HANDLE hWorkerPrivateData,
SWR_SURFACE_STATE *pDstSurface,
SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
uint32_t x,
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.cpp b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
index 1e0769ae574..5ac9ceb165e 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
@@ -1,5 +1,5 @@
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -78,7 +78,7 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroup
csContext.pScratchSpace = (uint8_t*)pScratchSpace;
csContext.scratchSpacePerSimd = pDC->pState->state.scratchSpaceSize;
- state.pfnCsFunc(GetPrivateState(pDC), &csContext);
+ state.pfnCsFunc(GetPrivateState(pDC), pContext->threadPool.pThreadData[workerId].pWorkerPrivateData, &csContext);
UPDATE_STAT_BE(CsInvocations, state.totalThreadsInGroup);
AR_EVENT(CSStats(csContext.stats.numInstExecuted));
@@ -107,6 +107,7 @@ void ProcessStoreTileBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile
SWR_RENDERTARGET_ATTACHMENT attachment)
{
SWR_CONTEXT *pContext = pDC->pContext;
+ HANDLE hWorkerPrivateData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
RDTSC_BEGIN(BEStoreTiles, pDC->drawId);
@@ -139,7 +140,7 @@ void ProcessStoreTileBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile
PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[srcFormat];
SWR_ASSERT(pfnClearTiles != nullptr);
- pfnClearTiles(pDC, attachment, macroTile, pHotTile->renderTargetArrayIndex, pHotTile->clearData, pDesc->rect);
+ pfnClearTiles(pDC, hWorkerPrivateData, attachment, macroTile, pHotTile->renderTargetArrayIndex, pHotTile->clearData, pDesc->rect);
}
if (pHotTile->state == HOTTILE_DIRTY || pDesc->postStoreTileState == (SWR_TILE_STATE)HOTTILE_DIRTY)
@@ -147,7 +148,7 @@ void ProcessStoreTileBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile
int32_t destX = KNOB_MACROTILE_X_DIM * x;
int32_t destY = KNOB_MACROTILE_Y_DIM * y;
- pContext->pfnStoreTile(GetPrivateState(pDC), srcFormat,
+ pContext->pfnStoreTile(GetPrivateState(pDC), hWorkerPrivateData, srcFormat,
attachment, destX, destY, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
}
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.h b/src/gallium/drivers/swr/rasterizer/core/backend.h
index c8c37e65257..7a842fe0e20 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend.h
+++ b/src/gallium/drivers/swr/rasterizer/core/backend.h
@@ -1,5 +1,5 @@
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -41,7 +41,7 @@ void ProcessStoreTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTil
void ProcessDiscardInvalidateTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData);
void ProcessShutdownBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData);
-typedef void(*PFN_CLEAR_TILES)(DRAW_CONTEXT*, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t, uint32_t, DWORD[4], const SWR_RECT& rect);
+typedef void(*PFN_CLEAR_TILES)(DRAW_CONTEXT*, HANDLE hWorkerData, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t, uint32_t, DWORD[4], const SWR_RECT& rect);
extern PFN_CLEAR_TILES gClearTilesTable[NUM_SWR_FORMATS];
extern PFN_BACKEND_FUNC gBackendNullPs[SWR_MULTISAMPLE_TYPE_COUNT];
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_clear.cpp b/src/gallium/drivers/swr/rasterizer/core/backend_clear.cpp
index baaa7e61f75..af031f9f9d7 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend_clear.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/backend_clear.cpp
@@ -1,5 +1,5 @@
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -76,7 +76,7 @@ void ClearRasterTile(uint8_t *pTileBuffer, simd16vector &value)
#endif
template<SWR_FORMAT format>
-INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t macroTile, uint32_t renderTargetArrayIndex, DWORD clear[4], const SWR_RECT& rect)
+INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, HANDLE hWorkerPrivateData, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t macroTile, uint32_t renderTargetArrayIndex, DWORD clear[4], const SWR_RECT& rect)
{
// convert clear color to hottile format
// clear color is in RGBA float/uint32
@@ -146,7 +146,7 @@ INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, SWR_RENDERTARGET_ATTACHMENT rt, ui
const uint32_t macroTileRowStep = (KNOB_MACROTILE_X_DIM / KNOB_TILE_X_DIM) * rasterTileStep;
const uint32_t pitch = (FormatTraits<format>::bpp * KNOB_MACROTILE_X_DIM / 8);
- HOTTILE *pHotTile = pDC->pContext->pHotTileMgr->GetHotTile(pDC->pContext, pDC, macroTile, rt, true, numSamples, renderTargetArrayIndex);
+ HOTTILE *pHotTile = pDC->pContext->pHotTileMgr->GetHotTile(pDC->pContext, pDC, hWorkerPrivateData, macroTile, rt, true, numSamples, renderTargetArrayIndex);
uint32_t rasterTileStartOffset = (ComputeTileOffset2D< TilingTraits<SWR_TILE_SWRZ, FormatTraits<format>::bpp > >(pitch, clearTile.xmin, clearTile.ymin)) * numSamples;
uint8_t* pRasterTileRow = pHotTile->pBuffer + rasterTileStartOffset; //(ComputeTileOffset2D< TilingTraits<SWR_TILE_SWRZ, FormatTraits<format>::bpp > >(pitch, x, y)) * numSamples;
@@ -172,6 +172,7 @@ INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, SWR_RENDERTARGET_ATTACHMENT rt, ui
void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData)
{
SWR_CONTEXT *pContext = pDC->pContext;
+ HANDLE hWorkerPrivateData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
if (KNOB_FAST_CLEAR)
{
@@ -191,7 +192,7 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo
{
mask &= ~(1 << rt);
- HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, macroTile, (SWR_RENDERTARGET_ATTACHMENT)rt, true, numSamples, pClear->renderTargetArrayIndex);
+ HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, hWorkerPrivateData, macroTile, (SWR_RENDERTARGET_ATTACHMENT)rt, true, numSamples, pClear->renderTargetArrayIndex);
// All we want to do here is to mark the hot tile as being in a "needs clear" state.
pHotTile->clearData[0] = *(DWORD*)&(pClear->clearRTColor[0]);
@@ -204,14 +205,14 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo
if (pClear->attachmentMask & SWR_ATTACHMENT_DEPTH_BIT)
{
- HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, macroTile, SWR_ATTACHMENT_DEPTH, true, numSamples, pClear->renderTargetArrayIndex);
+ HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, hWorkerPrivateData, macroTile, SWR_ATTACHMENT_DEPTH, true, numSamples, pClear->renderTargetArrayIndex);
pHotTile->clearData[0] = *(DWORD*)&pClear->clearDepth;
pHotTile->state = HOTTILE_CLEAR;
}
if (pClear->attachmentMask & SWR_ATTACHMENT_STENCIL_BIT)
{
- HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, macroTile, SWR_ATTACHMENT_STENCIL, true, numSamples, pClear->renderTargetArrayIndex);
+ HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, hWorkerPrivateData, macroTile, SWR_ATTACHMENT_STENCIL, true, numSamples, pClear->renderTargetArrayIndex);
pHotTile->clearData[0] = pClear->clearStencil;
pHotTile->state = HOTTILE_CLEAR;
@@ -242,7 +243,7 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo
{
mask &= ~(1 << rt);
- pfnClearTiles(pDC, (SWR_RENDERTARGET_ATTACHMENT)rt, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect);
+ pfnClearTiles(pDC, hWorkerPrivateData, (SWR_RENDERTARGET_ATTACHMENT)rt, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect);
}
}
@@ -253,7 +254,7 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo
PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[KNOB_DEPTH_HOT_TILE_FORMAT];
SWR_ASSERT(pfnClearTiles != nullptr);
- pfnClearTiles(pDC, SWR_ATTACHMENT_DEPTH, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect);
+ pfnClearTiles(pDC, hWorkerPrivateData, SWR_ATTACHMENT_DEPTH, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect);
}
if (pClear->attachmentMask & SWR_ATTACHMENT_STENCIL_BIT)
@@ -262,7 +263,7 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo
clearData[0] = pClear->clearStencil;
PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[KNOB_STENCIL_HOT_TILE_FORMAT];
- pfnClearTiles(pDC, SWR_ATTACHMENT_STENCIL, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect);
+ pfnClearTiles(pDC, hWorkerPrivateData, SWR_ATTACHMENT_STENCIL, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect);
}
RDTSC_END(BEClear, 1);
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_impl.h b/src/gallium/drivers/swr/rasterizer/core/backend_impl.h
index 20b2ec58287..05234c21822 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend_impl.h
+++ b/src/gallium/drivers/swr/rasterizer/core/backend_impl.h
@@ -1,5 +1,5 @@
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -884,6 +884,9 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
BarycentricCoeffs coeffs;
SetupBarycentricCoeffs(&coeffs, work);
+ SWR_CONTEXT *pContext = pDC->pContext;
+ void* pWorkerData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
+
SWR_PS_CONTEXT psContext;
const SWR_MULTISAMPLE_POS& samplePos = state.rastState.samplePositions;
SetupPixelShaderContext<T>(&psContext, samplePos, work);
@@ -964,7 +967,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
// execute pixel shader
RDTSC_BEGIN(BEPixelShader, pDC->drawId);
- state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext);
+ state.psState.pfnPixelShader(GetPrivateState(pDC), pWorkerData, &psContext);
UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(activeLanes)));
RDTSC_END(BEPixelShader, 0);
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp b/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp
index c7c6c533e37..5940aa7ba45 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp
@@ -1,5 +1,5 @@
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -43,6 +43,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
RDTSC_BEGIN(BESampleRateBackend, pDC->drawId);
RDTSC_BEGIN(BESetup, pDC->drawId);
+ void* pWorkerData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
const API_STATE &state = GetApiState(pDC);
BarycentricCoeffs coeffs;
@@ -163,7 +164,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
// execute pixel shader
RDTSC_BEGIN(BEPixelShader, pDC->drawId);
- state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext);
+ state.psState.pfnPixelShader(GetPrivateState(pDC), pWorkerData, &psContext);
RDTSC_END(BEPixelShader, 0);
// update stats
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp b/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp
index 26d5a75bd12..aaaba636ed3 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp
@@ -1,5 +1,5 @@
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -43,6 +43,8 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
RDTSC_BEGIN(BESingleSampleBackend, pDC->drawId);
RDTSC_BEGIN(BESetup, pDC->drawId);
+ void* pWorkerData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
+
const API_STATE &state = GetApiState(pDC);
BarycentricCoeffs coeffs;
@@ -146,7 +148,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
// execute pixel shader
RDTSC_BEGIN(BEPixelShader, pDC->drawId);
- state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext);
+ state.psState.pfnPixelShader(GetPrivateState(pDC), pWorkerData, &psContext);
RDTSC_END(BEPixelShader, 0);
// update stats
diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.cpp b/src/gallium/drivers/swr/rasterizer/core/binner.cpp
index d31fd37095d..9f8dc887aa6 100644
--- a/src/gallium/drivers/swr/rasterizer/core/binner.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/binner.cpp
@@ -1,5 +1,5 @@
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h
index 7bc69f50723..af8f4b8db4f 100644
--- a/src/gallium/drivers/swr/rasterizer/core/context.h
+++ b/src/gallium/drivers/swr/rasterizer/core/context.h
@@ -1,5 +1,5 @@
/****************************************************************************
-* Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved.
+* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -483,6 +483,7 @@ struct SWR_CONTEXT
THREAD_POOL threadPool; // Thread pool associated with this context
SWR_THREADING_INFO threadInfo;
SWR_API_THREADING_INFO apiThreadInfo;
+ SWR_WORKER_PRIVATE_STATE workerPrivateState;
uint32_t MAX_DRAWS_IN_FLIGHT;
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
index 30c2e7bab51..9630afa036d 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
@@ -1,5 +1,5 @@
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -799,6 +799,8 @@ static void GeometryShaderStage(
{
RDTSC_BEGIN(FEGeometryShader, pDC->drawId);
+ void* pWorkerData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
+
const API_STATE& state = GetApiState(pDC);
const SWR_GS_STATE* pState = &state.gsState;
SWR_GS_CONTEXT gsContext;
@@ -850,7 +852,7 @@ static void GeometryShaderStage(
gsContext.mask = GenerateMask(numInputPrims);
// execute the geometry shader
- state.pfnGsFunc(GetPrivateState(pDC), &gsContext);
+ state.pfnGsFunc(GetPrivateState(pDC), pWorkerData, &gsContext);
AR_EVENT(GSStats(gsContext.stats.numInstExecuted));
for (uint32_t i = 0; i < KNOB_SIMD_WIDTH; ++i)
@@ -1169,6 +1171,7 @@ static void TessellationStages(
{
const API_STATE& state = GetApiState(pDC);
const SWR_TS_STATE& tsState = state.tsState;
+ void* pWorkerData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
SWR_ASSERT(gt_pTessellationThreadData);
@@ -1250,7 +1253,7 @@ static void TessellationStages(
// Run the HS
RDTSC_BEGIN(FEHullShader, pDC->drawId);
- state.pfnHsFunc(GetPrivateState(pDC), &hsContext);
+ state.pfnHsFunc(GetPrivateState(pDC), pWorkerData, &hsContext);
RDTSC_END(FEHullShader, 0);
UPDATE_STAT_FE(HsInvocations, numPrims);
@@ -1315,7 +1318,7 @@ static void TessellationStages(
dsContext.mask = GenerateMask(tsData.NumDomainPoints - dsInvocations);
RDTSC_BEGIN(FEDomainShader, pDC->drawId);
- state.pfnDsFunc(GetPrivateState(pDC), &dsContext);
+ state.pfnDsFunc(GetPrivateState(pDC), pWorkerData, &dsContext);
RDTSC_END(FEDomainShader, 0);
AR_EVENT(DSStats(dsContext.stats.numInstExecuted));
@@ -1521,6 +1524,8 @@ void ProcessDraw(
RDTSC_BEGIN(FEProcessDraw, pDC->drawId);
+ void* pWorkerData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
+
DRAW_WORK& work = *(DRAW_WORK*)pUserData;
const API_STATE& state = GetApiState(pDC);
@@ -1738,13 +1743,13 @@ void ProcessDraw(
// 1. Execute FS/VS for a single SIMD.
RDTSC_BEGIN(FEFetchShader, pDC->drawId);
#if USE_SIMD16_SHADERS
- state.pfnFetchFunc(GetPrivateState(pDC), fetchInfo_lo, vin);
+ state.pfnFetchFunc(GetPrivateState(pDC), pWorkerData, fetchInfo_lo, vin);
#else
- state.pfnFetchFunc(GetPrivateState(pDC), fetchInfo_lo, vin_lo);
+ state.pfnFetchFunc(GetPrivateState(pDC), pWorkerData, fetchInfo_lo, vin_lo);
if ((i + KNOB_SIMD_WIDTH) < endVertex) // 1/2 of KNOB_SIMD16_WIDTH
{
- state.pfnFetchFunc(GetPrivateState(pDC), fetchInfo_hi, vin_hi);
+ state.pfnFetchFunc(GetPrivateState(pDC), pWorkerData, fetchInfo_hi, vin_hi);
}
#endif
RDTSC_END(FEFetchShader, 0);
@@ -1793,15 +1798,15 @@ void ProcessDraw(
{
RDTSC_BEGIN(FEVertexShader, pDC->drawId);
#if USE_SIMD16_VS
- state.pfnVertexFunc(GetPrivateState(pDC), &vsContext_lo);
+ state.pfnVertexFunc(GetPrivateState(pDC), pWorkerData, &vsContext_lo);
AR_EVENT(VSStats(vsContext_lo.stats.numInstExecuted));
#else
- state.pfnVertexFunc(GetPrivateState(pDC), &vsContext_lo);
+ state.pfnVertexFunc(GetPrivateState(pDC), pWorkerData, &vsContext_lo);
AR_EVENT(VSStats(vsContext_lo.stats.numInstExecuted));
if ((i + KNOB_SIMD_WIDTH) < endVertex) // 1/2 of KNOB_SIMD16_WIDTH
{
- state.pfnVertexFunc(GetPrivateState(pDC), &vsContext_hi);
+ state.pfnVertexFunc(GetPrivateState(pDC), pWorkerData, &vsContext_hi);
AR_EVENT(VSStats(vsContext_hi.stats.numInstExecuted));
}
#endif
@@ -1994,7 +1999,7 @@ void ProcessDraw(
// 1. Execute FS/VS for a single SIMD.
RDTSC_BEGIN(FEFetchShader, pDC->drawId);
- state.pfnFetchFunc(GetPrivateState(pDC), fetchInfo, vout);
+ state.pfnFetchFunc(GetPrivateState(pDC), pWorkerData, fetchInfo, vout);
RDTSC_END(FEFetchShader, 0);
// forward fetch generated vertex IDs to the vertex shader
@@ -2016,7 +2021,7 @@ void ProcessDraw(
#endif
{
RDTSC_BEGIN(FEVertexShader, pDC->drawId);
- state.pfnVertexFunc(GetPrivateState(pDC), &vsContext);
+ state.pfnVertexFunc(GetPrivateState(pDC), pWorkerData, &vsContext);
RDTSC_END(FEVertexShader, 0);
UPDATE_STAT_FE(VsInvocations, GetNumInvocations(i, endVertex));
diff --git a/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp b/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp
index 08dd51bf731..67c28ad97c4 100644
--- a/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp
@@ -1,5 +1,5 @@
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -302,7 +302,7 @@ void RasterizeSimplePoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTi
triDesc.Z[0] = triDesc.Z[1] = triDesc.Z[2] = z;
RenderOutputBuffers renderBuffers;
- GetRenderHotTiles(pDC, macroTile, tileAlignedX >> KNOB_TILE_X_DIM_SHIFT , tileAlignedY >> KNOB_TILE_Y_DIM_SHIFT,
+ GetRenderHotTiles(pDC, workerId, macroTile, tileAlignedX >> KNOB_TILE_X_DIM_SHIFT , tileAlignedY >> KNOB_TILE_Y_DIM_SHIFT,
renderBuffers, triDesc.triFlags.renderTargetArrayIndex);
RDTSC_BEGIN(BEPixelBackend, pDC->drawId);
diff --git a/src/gallium/drivers/swr/rasterizer/core/rasterizer_impl.h b/src/gallium/drivers/swr/rasterizer/core/rasterizer_impl.h
index 7f9b3788c7f..ca39d7c38f8 100644
--- a/src/gallium/drivers/swr/rasterizer/core/rasterizer_impl.h
+++ b/src/gallium/drivers/swr/rasterizer/core/rasterizer_impl.h
@@ -1,5 +1,5 @@
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -40,7 +40,7 @@
extern PFN_WORK_FUNC gRasterizerFuncs[SWR_MULTISAMPLE_TYPE_COUNT][2][2][SWR_INPUT_COVERAGE_COUNT][STATE_VALID_TRI_EDGE_COUNT][2];
template <uint32_t numSamples = 1>
-void GetRenderHotTiles(DRAW_CONTEXT *pDC, uint32_t macroID, uint32_t x, uint32_t y, RenderOutputBuffers &renderBuffers, uint32_t renderTargetArrayIndex);
+void GetRenderHotTiles(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroID, uint32_t x, uint32_t y, RenderOutputBuffers &renderBuffers, uint32_t renderTargetArrayIndex);
template <typename RT>
void StepRasterTileX(uint32_t colorHotTileMask, RenderOutputBuffers &buffers);
template <typename RT>
@@ -1145,7 +1145,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
uint32_t maxX = maxTileX;
RenderOutputBuffers renderBuffers, currentRenderBufferRow;
- GetRenderHotTiles<RT::MT::numSamples>(pDC, macroTile, minTileX, minTileY, renderBuffers, triDesc.triFlags.renderTargetArrayIndex);
+ GetRenderHotTiles<RT::MT::numSamples>(pDC, workerId, macroTile, minTileX, minTileY, renderBuffers, triDesc.triFlags.renderTargetArrayIndex);
currentRenderBufferRow = renderBuffers;
// rasterize and generate coverage masks per sample
@@ -1297,10 +1297,11 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
// Get pointers to hot tile memory for color RT, depth, stencil
template <uint32_t numSamples>
-void GetRenderHotTiles(DRAW_CONTEXT *pDC, uint32_t macroID, uint32_t tileX, uint32_t tileY, RenderOutputBuffers &renderBuffers, uint32_t renderTargetArrayIndex)
+void GetRenderHotTiles(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroID, uint32_t tileX, uint32_t tileY, RenderOutputBuffers &renderBuffers, uint32_t renderTargetArrayIndex)
{
const API_STATE& state = GetApiState(pDC);
SWR_CONTEXT *pContext = pDC->pContext;
+ HANDLE hWorkerPrivateData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
uint32_t mx, my;
MacroTileMgr::getTileIndices(macroID, mx, my);
@@ -1316,7 +1317,7 @@ void GetRenderHotTiles(DRAW_CONTEXT *pDC, uint32_t macroID, uint32_t tileX, uint
uint32_t colorHottileEnableMask = state.colorHottileEnable;
while(_BitScanForward(&rtSlot, colorHottileEnableMask))
{
- HOTTILE *pColor = pContext->pHotTileMgr->GetHotTile(pContext, pDC, macroID, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), true,
+ HOTTILE *pColor = pContext->pHotTileMgr->GetHotTile(pContext, pDC, hWorkerPrivateData, macroID, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), true,
numSamples, renderTargetArrayIndex);
pColor->state = HOTTILE_DIRTY;
renderBuffers.pColor[rtSlot] = pColor->pBuffer + offset;
@@ -1328,7 +1329,7 @@ void GetRenderHotTiles(DRAW_CONTEXT *pDC, uint32_t macroID, uint32_t tileX, uint
const uint32_t pitch = KNOB_MACROTILE_X_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8;
uint32_t offset = ComputeTileOffset2D<TilingTraits<SWR_TILE_SWRZ, FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp> >(pitch, tileX, tileY);
offset*=numSamples;
- HOTTILE *pDepth = pContext->pHotTileMgr->GetHotTile(pContext, pDC, macroID, SWR_ATTACHMENT_DEPTH, true,
+ HOTTILE *pDepth = pContext->pHotTileMgr->GetHotTile(pContext, pDC, hWorkerPrivateData, macroID, SWR_ATTACHMENT_DEPTH, true,
numSamples, renderTargetArrayIndex);
pDepth->state = HOTTILE_DIRTY;
SWR_ASSERT(pDepth->pBuffer != nullptr);
@@ -1339,7 +1340,7 @@ void GetRenderHotTiles(DRAW_CONTEXT *pDC, uint32_t macroID, uint32_t tileX, uint
const uint32_t pitch = KNOB_MACROTILE_X_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8;
uint32_t offset = ComputeTileOffset2D<TilingTraits<SWR_TILE_SWRZ, FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp> >(pitch, tileX, tileY);
offset*=numSamples;
- HOTTILE* pStencil = pContext->pHotTileMgr->GetHotTile(pContext, pDC, macroID, SWR_ATTACHMENT_STENCIL, true,
+ HOTTILE* pStencil = pContext->pHotTileMgr->GetHotTile(pContext, pDC, hWorkerPrivateData, macroID, SWR_ATTACHMENT_STENCIL, true,
numSamples, renderTargetArrayIndex);
pStencil->state = HOTTILE_DIRTY;
SWR_ASSERT(pStencil->pBuffer != nullptr);
diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h
index cdb30f60fdf..217cf44c58f 100644
--- a/src/gallium/drivers/swr/rasterizer/core/state.h
+++ b/src/gallium/drivers/swr/rasterizer/core/state.h
@@ -911,18 +911,18 @@ struct SWR_BLEND_CONTEXT
/// FUNCTION POINTERS FOR SHADERS
#if USE_SIMD16_SHADERS
-typedef void(__cdecl *PFN_FETCH_FUNC)(HANDLE hPrivateData, SWR_FETCH_CONTEXT& fetchInfo, simd16vertex& out);
+typedef void(__cdecl *PFN_FETCH_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_FETCH_CONTEXT& fetchInfo, simd16vertex& out);
#else
-typedef void(__cdecl *PFN_FETCH_FUNC)(HANDLE hPrivateData, SWR_FETCH_CONTEXT& fetchInfo, simdvertex& out);
+typedef void(__cdecl *PFN_FETCH_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_FETCH_CONTEXT& fetchInfo, simdvertex& out);
#endif
-typedef void(__cdecl *PFN_VERTEX_FUNC)(HANDLE hPrivateData, SWR_VS_CONTEXT* pVsContext);
-typedef void(__cdecl *PFN_HS_FUNC)(HANDLE hPrivateData, SWR_HS_CONTEXT* pHsContext);
-typedef void(__cdecl *PFN_DS_FUNC)(HANDLE hPrivateData, SWR_DS_CONTEXT* pDsContext);
-typedef void(__cdecl *PFN_GS_FUNC)(HANDLE hPrivateData, SWR_GS_CONTEXT* pGsContext);
-typedef void(__cdecl *PFN_CS_FUNC)(HANDLE hPrivateData, SWR_CS_CONTEXT* pCsContext);
+typedef void(__cdecl *PFN_VERTEX_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_VS_CONTEXT* pVsContext);
+typedef void(__cdecl *PFN_HS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_HS_CONTEXT* pHsContext);
+typedef void(__cdecl *PFN_DS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_DS_CONTEXT* pDsContext);
+typedef void(__cdecl *PFN_GS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_GS_CONTEXT* pGsContext);
+typedef void(__cdecl *PFN_CS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_CS_CONTEXT* pCsContext);
typedef void(__cdecl *PFN_SO_FUNC)(SWR_STREAMOUT_CONTEXT& soContext);
-typedef void(__cdecl *PFN_PIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext);
-typedef void(__cdecl *PFN_CPIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext);
+typedef void(__cdecl *PFN_PIXEL_KERNEL)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_PS_CONTEXT *pContext);
+typedef void(__cdecl *PFN_CPIXEL_KERNEL)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_PS_CONTEXT *pContext);
typedef void(__cdecl *PFN_BLEND_JIT_FUNC)(SWR_BLEND_CONTEXT*);
typedef simdscalar(*PFN_QUANTIZE_DEPTH)(simdscalar const &);
diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
index 3eb20abcbfc..9e16246c3f4 100644
--- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
@@ -1,5 +1,5 @@
/****************************************************************************
-* Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved.
+* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -42,6 +42,7 @@
#endif
#include "common/os.h"
+#include "core/api.h"
#include "context.h"
#include "frontend.h"
#include "backend.h"
@@ -1128,7 +1129,8 @@ void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
if (pContext->threadInfo.SINGLE_THREADED)
{
- return;
+ numAPIReservedThreads = 0;
+ numThreads = 1;
}
if (numAPIReservedThreads)
@@ -1139,6 +1141,10 @@ void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
{
numAPIReservedThreads = 0;
}
+ else
+ {
+ memset(pPool->pApiThreadData, 0, sizeof(THREAD_DATA) * numAPIReservedThreads);
+ }
}
pPool->numReservedThreads = numAPIReservedThreads;
@@ -1147,8 +1153,37 @@ void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
pPool->pThreadData = new (std::nothrow) THREAD_DATA[pPool->numThreads];
SWR_ASSERT(pPool->pThreadData);
+ memset(pPool->pThreadData, 0, sizeof(THREAD_DATA) * pPool->numThreads);
pPool->numaMask = 0;
+ // Allocate worker private data
+ pPool->pWorkerPrivateDataArray = nullptr;
+ if (pContext->workerPrivateState.perWorkerPrivateStateSize)
+ {
+ size_t perWorkerSize = AlignUpPow2(pContext->workerPrivateState.perWorkerPrivateStateSize, 64);
+ size_t totalSize = perWorkerSize * pPool->numThreads;
+ if (totalSize)
+ {
+ pPool->pWorkerPrivateDataArray = AlignedMalloc(totalSize, 64);
+ SWR_ASSERT(pPool->pWorkerPrivateDataArray);
+
+ void* pWorkerData = pPool->pWorkerPrivateDataArray;
+ for (uint32_t i = 0; i < pPool->numThreads; ++i)
+ {
+ pPool->pThreadData[i].pWorkerPrivateData = pWorkerData;
+ if (pContext->workerPrivateState.pfnInitWorkerData)
+ {
+ pContext->workerPrivateState.pfnInitWorkerData(pWorkerData, i);
+ }
+ pWorkerData = PtrAdd(pWorkerData, perWorkerSize);
+ }
+ }
+ }
+
+ if (pContext->threadInfo.SINGLE_THREADED)
+ {
+ return;
+ }
pPool->pThreads = new (std::nothrow) THREAD_PTR[pPool->numThreads];
SWR_ASSERT(pPool->pThreads);
@@ -1293,13 +1328,13 @@ void StartThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
/// @param pPool - pointer to thread pool object.
void DestroyThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
{
- if (!pContext->threadInfo.SINGLE_THREADED)
- {
- // Wait for all threads to finish
- SwrWaitForIdle(pContext);
+ // Wait for all threads to finish
+ SwrWaitForIdle(pContext);
- // Wait for threads to finish and destroy them
- for (uint32_t t = 0; t < pPool->numThreads; ++t)
+ // Wait for threads to finish and destroy them
+ for (uint32_t t = 0; t < pPool->numThreads; ++t)
+ {
+ if (!pContext->threadInfo.SINGLE_THREADED)
{
// Detach from thread. Cannot join() due to possibility (in Windows) of code
// in some DLLMain(THREAD_DETATCH case) blocking the thread until after this returns.
@@ -1307,10 +1342,17 @@ void DestroyThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
delete(pPool->pThreads[t]);
}
- delete[] pPool->pThreads;
-
- // Clean up data used by threads
- delete[] pPool->pThreadData;
- delete[] pPool->pApiThreadData;
+ if (pContext->workerPrivateState.pfnFinishWorkerData)
+ {
+ pContext->workerPrivateState.pfnFinishWorkerData(pPool->pThreadData[t].pWorkerPrivateData, t);
+ }
}
+
+ delete[] pPool->pThreads;
+
+ // Clean up data used by threads
+ delete[] pPool->pThreadData;
+ delete[] pPool->pApiThreadData;
+
+ AlignedFree(pPool->pWorkerPrivateDataArray);
}
diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.h b/src/gallium/drivers/swr/rasterizer/core/threads.h
index 2e53265f424..cb918ddb60d 100644
--- a/src/gallium/drivers/swr/rasterizer/core/threads.h
+++ b/src/gallium/drivers/swr/rasterizer/core/threads.h
@@ -1,5 +1,5 @@
/****************************************************************************
-* Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved.
+* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -35,9 +35,11 @@ typedef std::thread* THREAD_PTR;
struct SWR_CONTEXT;
struct DRAW_CONTEXT;
+struct SWR_WORKER_PRIVATE_STATE;
struct THREAD_DATA
{
+ void* pWorkerPrivateData;// Pointer to per-worker private data
uint32_t procGroupId; // Will always be 0 for non-Windows OS
uint32_t threadId; // within the procGroup for Windows
uint32_t numaId; // NUMA node id
@@ -55,6 +57,7 @@ struct THREAD_POOL
uint32_t numThreads;
uint32_t numaMask;
THREAD_DATA *pThreadData;
+ void* pWorkerPrivateDataArray; // All memory for worker private data
uint32_t numReservedThreads; // Number of threads reserved for API use
THREAD_DATA *pApiThreadData;
};
diff --git a/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp b/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp
index f4686703291..28fa7877114 100644
--- a/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp
@@ -1,5 +1,5 @@
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -83,7 +83,7 @@ void MacroTileMgr::markTileComplete(uint32_t id)
tile.mWorkItemsBE = 0;
}
-HOTTILE* HotTileMgr::GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples,
+HOTTILE* HotTileMgr::GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, HANDLE hWorkerPrivateData, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples,
uint32_t renderTargetArrayIndex)
{
uint32_t x, y;
@@ -163,11 +163,11 @@ HOTTILE* HotTileMgr::GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32
if (hotTile.state == HOTTILE_DIRTY)
{
- pContext->pfnStoreTile(GetPrivateState(pDC), format, attachment,
+ pContext->pfnStoreTile(GetPrivateState(pDC), hWorkerPrivateData, format, attachment,
x * KNOB_MACROTILE_X_DIM, y * KNOB_MACROTILE_Y_DIM, hotTile.renderTargetArrayIndex, hotTile.pBuffer);
}
- pContext->pfnLoadTile(GetPrivateState(pDC), format, attachment,
+ pContext->pfnLoadTile(GetPrivateState(pDC), hWorkerPrivateData, format, attachment,
x * KNOB_MACROTILE_X_DIM, y * KNOB_MACROTILE_Y_DIM, renderTargetArrayIndex, hotTile.pBuffer);
hotTile.renderTargetArrayIndex = renderTargetArrayIndex;
@@ -379,6 +379,7 @@ void HotTileMgr::ClearStencilHotTile(const HOTTILE* pHotTile)
void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroID)
{
const API_STATE& state = GetApiState(pDC);
+ HANDLE hWorkerPrivateData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
uint32_t x, y;
MacroTileMgr::getTileIndices(macroID, x, y);
@@ -392,13 +393,13 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, ui
uint32_t colorHottileEnableMask = state.colorHottileEnable;
while (_BitScanForward(&rtSlot, colorHottileEnableMask))
{
- HOTTILE* pHotTile = GetHotTile(pContext, pDC, macroID, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), true, numSamples);
+ HOTTILE* pHotTile = GetHotTile(pContext, pDC, hWorkerPrivateData, macroID, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), true, numSamples);
if (pHotTile->state == HOTTILE_INVALID)
{
RDTSC_BEGIN(BELoadTiles, pDC->drawId);
// invalid hottile before draw requires a load from surface before we can draw to it
- pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_COLOR_HOT_TILE_FORMAT, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
+ pContext->pfnLoadTile(GetPrivateState(pDC), hWorkerPrivateData, KNOB_COLOR_HOT_TILE_FORMAT, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
pHotTile->state = HOTTILE_DIRTY;
RDTSC_END(BELoadTiles, 0);
}
@@ -416,12 +417,12 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, ui
// check depth if enabled
if (state.depthHottileEnable)
{
- HOTTILE* pHotTile = GetHotTile(pContext, pDC, macroID, SWR_ATTACHMENT_DEPTH, true, numSamples);
+ HOTTILE* pHotTile = GetHotTile(pContext, pDC, hWorkerPrivateData, macroID, SWR_ATTACHMENT_DEPTH, true, numSamples);
if (pHotTile->state == HOTTILE_INVALID)
{
RDTSC_BEGIN(BELoadTiles, pDC->drawId);
// invalid hottile before draw requires a load from surface before we can draw to it
- pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_DEPTH_HOT_TILE_FORMAT, SWR_ATTACHMENT_DEPTH, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
+ pContext->pfnLoadTile(GetPrivateState(pDC), hWorkerPrivateData, KNOB_DEPTH_HOT_TILE_FORMAT, SWR_ATTACHMENT_DEPTH, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
pHotTile->state = HOTTILE_DIRTY;
RDTSC_END(BELoadTiles, 0);
}
@@ -438,12 +439,12 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, ui
// check stencil if enabled
if (state.stencilHottileEnable)
{
- HOTTILE* pHotTile = GetHotTile(pContext, pDC, macroID, SWR_ATTACHMENT_STENCIL, true, numSamples);
+ HOTTILE* pHotTile = GetHotTile(pContext, pDC, hWorkerPrivateData, macroID, SWR_ATTACHMENT_STENCIL, true, numSamples);
if (pHotTile->state == HOTTILE_INVALID)
{
RDTSC_BEGIN(BELoadTiles, pDC->drawId);
// invalid hottile before draw requires a load from surface before we can draw to it
- pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_STENCIL_HOT_TILE_FORMAT, SWR_ATTACHMENT_STENCIL, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
+ pContext->pfnLoadTile(GetPrivateState(pDC), hWorkerPrivateData, KNOB_STENCIL_HOT_TILE_FORMAT, SWR_ATTACHMENT_STENCIL, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
pHotTile->state = HOTTILE_DIRTY;
RDTSC_END(BELoadTiles, 0);
}
diff --git a/src/gallium/drivers/swr/rasterizer/core/tilemgr.h b/src/gallium/drivers/swr/rasterizer/core/tilemgr.h
index 8ef74a7f4b9..2831010b12f 100644
--- a/src/gallium/drivers/swr/rasterizer/core/tilemgr.h
+++ b/src/gallium/drivers/swr/rasterizer/core/tilemgr.h
@@ -1,5 +1,5 @@
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -305,7 +305,7 @@ public:
void InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroID);
- HOTTILE *GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples = 1,
+ HOTTILE *GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, HANDLE hWorkerData, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples = 1,
uint32_t renderTargetArrayIndex = 0);
HOTTILE *GetHotTileNoLoad(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples = 1);
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
index 7f9c9dd9d7b..284eb27a7d3 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
@@ -1,5 +1,5 @@
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -19,13 +19,13 @@
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
-*
+*
* @file JitManager.cpp
-*
+*
* @brief Implementation if the Jit Manager.
-*
+*
* Notes:
-*
+*
******************************************************************************/
#include "jit_pch.hpp"
@@ -66,7 +66,7 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char* core)
InitializeNativeTargetAsmPrinter();
InitializeNativeTargetDisassembler();
-
+
TargetOptions tOpts;
tOpts.AllowFPOpFusion = FPOpFusion::Fast;
tOpts.NoInfsFPMath = false;
@@ -125,6 +125,8 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char* core)
// llvm5 is picky and does not take a void * type
fsArgs.push_back(PointerType::get(Gen_SWR_FETCH_CONTEXT(this), 0));
+ fsArgs.push_back(Type::getInt8PtrTy(mContext));
+
fsArgs.push_back(PointerType::get(Gen_SWR_FETCH_CONTEXT(this), 0));
#if USE_SIMD16_SHADERS
fsArgs.push_back(PointerType::get(Gen_simd16vertex(this), 0));
@@ -158,7 +160,7 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char* core)
void JitManager::SetupNewModule()
{
SWR_ASSERT(mIsModuleFinalized == true && "Current module is not finalized!");
-
+
std::unique_ptr<Module> newModule(new Module("", mContext));
mpCurrentModule = newModule.get();
#if defined(_WIN32)
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
index af97b83cb2d..09590b71047 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
@@ -1,5 +1,5 @@
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -91,6 +91,7 @@ struct FetchJit : public BuilderGfxMem
void CreateGatherOddFormats(SWR_FORMAT format, Value* pMask, Value* pBase, Value* offsets, Value* result[4]);
void ConvertFormat(SWR_FORMAT format, Value *texels[4]);
+ Value* mpWorkerData;
Value* mpFetchInfo;
};
@@ -113,6 +114,8 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState)
privateContext->setName("privateContext");
SetPrivateContext(privateContext);
+ mpWorkerData = &*argitr; ++argitr;
+ mpWorkerData->setName("pWorkerData");
mpFetchInfo = &*argitr; ++argitr;
mpFetchInfo->setName("fetchInfo");
Value* pVtxOut = &*argitr;
@@ -1097,8 +1100,7 @@ Value* FetchJit::GetSimdValid32bitIndices(Value* pIndices, Value* pLastIndex)
Value* vIndexMask = ICMP_SGT(vMaxIndex, vIndexOffsets);
// Load the indices; OOB loads 0
- pIndices = BITCAST(pIndices, PointerType::get(mSimdInt32Ty, 0));
- return MASKED_LOAD(pIndices, 4, vIndexMask, VIMMED1(0));
+ return MASKED_LOAD(pIndices, 4, vIndexMask, VIMMED1(0), "vIndices", PointerType::get(mSimdInt32Ty, 0), GFX_MEM_CLIENT_FETCH);
}
//////////////////////////////////////////////////////////////////////////
diff --git a/src/gallium/drivers/swr/rasterizer/memory/ClearTile.cpp b/src/gallium/drivers/swr/rasterizer/memory/ClearTile.cpp
index 98bf28b21d1..6a528b6a0f2 100644
--- a/src/gallium/drivers/swr/rasterizer/memory/ClearTile.cpp
+++ b/src/gallium/drivers/swr/rasterizer/memory/ClearTile.cpp
@@ -153,6 +153,7 @@ struct StoreMacroTileClear
/// @param x, y - Coordinates to raster tile.
/// @param pClearColor - Pointer to clear color
void SwrStoreHotTileClear(
+ HANDLE hWorkerPrivateData,
SWR_SURFACE_STATE *pDstSurface,
SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
UINT x,
diff --git a/src/gallium/drivers/swr/rasterizer/memory/LoadTile.cpp b/src/gallium/drivers/swr/rasterizer/memory/LoadTile.cpp
index 9dbc16ad9ae..8033304d20c 100644
--- a/src/gallium/drivers/swr/rasterizer/memory/LoadTile.cpp
+++ b/src/gallium/drivers/swr/rasterizer/memory/LoadTile.cpp
@@ -54,6 +54,7 @@ static std::mutex sBucketMutex;
/// @param x, y - Coordinates to raster tile.
/// @param pDstHotTile - Pointer to Hot Tile
void SwrLoadHotTile(
+ HANDLE hWorkerPrivateData,
const SWR_SURFACE_STATE *pSrcSurface,
SWR_FORMAT dstFormat,
SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
diff --git a/src/gallium/drivers/swr/rasterizer/memory/StoreTile.cpp b/src/gallium/drivers/swr/rasterizer/memory/StoreTile.cpp
index 9c20669f77b..53b82c4c12c 100644
--- a/src/gallium/drivers/swr/rasterizer/memory/StoreTile.cpp
+++ b/src/gallium/drivers/swr/rasterizer/memory/StoreTile.cpp
@@ -59,6 +59,7 @@ static std::vector<int32_t> sBuckets(NUM_SWR_FORMATS, -1);
/// @param x, y - Coordinates to raster tile.
/// @param pSrcHotTile - Pointer to Hot Tile
void SwrStoreHotTileToSurface(
+ HANDLE hWorkerPrivateData,
SWR_SURFACE_STATE *pDstSurface,
SWR_FORMAT srcFormat,
SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
diff --git a/src/gallium/drivers/swr/swr_memory.h b/src/gallium/drivers/swr/swr_memory.h
index fc5561680cb..bab7800c604 100644
--- a/src/gallium/drivers/swr/swr_memory.h
+++ b/src/gallium/drivers/swr/swr_memory.h
@@ -25,6 +25,7 @@
INLINE void
swr_LoadHotTile(HANDLE hPrivateContext,
+ HANDLE hWorkerPrivateData,
SWR_FORMAT dstFormat,
SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
UINT x, UINT y,
@@ -34,11 +35,12 @@ swr_LoadHotTile(HANDLE hPrivateContext,
swr_draw_context *pDC = (swr_draw_context*)hPrivateContext;
SWR_SURFACE_STATE *pSrcSurface = &pDC->renderTargets[renderTargetIndex];
- pDC->pAPI->pfnSwrLoadHotTile(pSrcSurface, dstFormat, renderTargetIndex, x, y, renderTargetArrayIndex, pDstHotTile);
+ pDC->pAPI->pfnSwrLoadHotTile(hWorkerPrivateData, pSrcSurface, dstFormat, renderTargetIndex, x, y, renderTargetArrayIndex, pDstHotTile);
}
INLINE void
swr_StoreHotTile(HANDLE hPrivateContext,
+ HANDLE hWorkerPrivateData,
SWR_FORMAT srcFormat,
SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
UINT x, UINT y,
@@ -48,11 +50,12 @@ swr_StoreHotTile(HANDLE hPrivateContext,
swr_draw_context *pDC = (swr_draw_context*)hPrivateContext;
SWR_SURFACE_STATE *pDstSurface = &pDC->renderTargets[renderTargetIndex];
- pDC->pAPI->pfnSwrStoreHotTileToSurface(pDstSurface, srcFormat, renderTargetIndex, x, y, renderTargetArrayIndex, pSrcHotTile);
+ pDC->pAPI->pfnSwrStoreHotTileToSurface(hWorkerPrivateData, pDstSurface, srcFormat, renderTargetIndex, x, y, renderTargetArrayIndex, pSrcHotTile);
}
INLINE void
swr_StoreHotTileClear(HANDLE hPrivateContext,
+ HANDLE hWorkerPrivateData,
SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
UINT x,
UINT y,
@@ -63,5 +66,5 @@ swr_StoreHotTileClear(HANDLE hPrivateContext,
swr_draw_context *pDC = (swr_draw_context*)hPrivateContext;
SWR_SURFACE_STATE *pDstSurface = &pDC->renderTargets[renderTargetIndex];
- pDC->pAPI->pfnSwrStoreHotTileClear(pDstSurface, renderTargetIndex, x, y, renderTargetArrayIndex, pClearColor);
+ pDC->pAPI->pfnSwrStoreHotTileClear(hWorkerPrivateData, pDstSurface, renderTargetIndex, x, y, renderTargetArrayIndex, pClearColor);
}
diff --git a/src/gallium/drivers/swr/swr_shader.cpp b/src/gallium/drivers/swr/swr_shader.cpp
index 6ea021a987e..13d89863fd7 100644
--- a/src/gallium/drivers/swr/swr_shader.cpp
+++ b/src/gallium/drivers/swr/swr_shader.cpp
@@ -586,6 +586,7 @@ BuilderSWR::CompileGS(struct swr_context *ctx, swr_jit_gs_key &key)
attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
std::vector<Type *> gsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
+ PointerType::get(mInt8Ty, 0),
PointerType::get(Gen_SWR_GS_CONTEXT(JM()), 0)};
FunctionType *vsFuncType =
FunctionType::get(Type::getVoidTy(JM()->mContext), gsArgs, false);
@@ -610,6 +611,8 @@ BuilderSWR::CompileGS(struct swr_context *ctx, swr_jit_gs_key &key)
auto argitr = pFunction->arg_begin();
Value *hPrivateData = &*argitr++;
hPrivateData->setName("hPrivateData");
+ Value *pWorkerData = &*argitr++;
+ pWorkerData->setName("pWorkerData");
Value *pGsCtx = &*argitr++;
pGsCtx->setName("gsCtx");
@@ -754,6 +757,7 @@ BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key)
attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
std::vector<Type *> vsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
+ PointerType::get(mInt8Ty, 0),
PointerType::get(Gen_SWR_VS_CONTEXT(JM()), 0)};
FunctionType *vsFuncType =
FunctionType::get(Type::getVoidTy(JM()->mContext), vsArgs, false);
@@ -778,6 +782,8 @@ BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key)
auto argitr = pFunction->arg_begin();
Value *hPrivateData = &*argitr++;
hPrivateData->setName("hPrivateData");
+ Value *pWorkerData = &*argitr++;
+ pWorkerData->setName("pWorkerData");
Value *pVsCtx = &*argitr++;
pVsCtx->setName("vsCtx");
@@ -1037,6 +1043,7 @@ BuilderSWR::CompileFS(struct swr_context *ctx, swr_jit_fs_key &key)
attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
std::vector<Type *> fsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
+ PointerType::get(mInt8Ty, 0),
PointerType::get(Gen_SWR_PS_CONTEXT(JM()), 0)};
FunctionType *funcType =
FunctionType::get(Type::getVoidTy(JM()->mContext), fsArgs, false);
@@ -1060,6 +1067,8 @@ BuilderSWR::CompileFS(struct swr_context *ctx, swr_jit_fs_key &key)
auto args = pFunction->arg_begin();
Value *hPrivateData = &*args++;
hPrivateData->setName("hPrivateData");
+ Value *pWorkerData = &*args++;
+ pWorkerData->setName("pWorkerData");
Value *pPS = &*args++;
pPS->setName("psCtx");