summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTim Rowley <[email protected]>2016-08-03 17:59:37 -0600
committerTim Rowley <[email protected]>2016-08-10 11:08:05 -0500
commit29e1c4a8a9f26ce41aa53dc9bf39852a8530adc6 (patch)
tree9c46c034c9947a1134acad0e6d84c9fff7a14d97
parente0c10306f58fa3e1a1cb6a23b8942701d8529cce (diff)
swr: [rasterizer core] allow override of KNOB thread settings
- Remove HYPERTHREADED_FE support - Add threading info as optional data passed to SwrCreateContext. If supplied this data will override any KNOB thread settings. Signed-off-by: Tim Rowley <[email protected]>
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/api.cpp18
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/api.h15
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/context.h1
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/threads.cpp73
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/threads.h4
-rw-r--r--src/gallium/drivers/swr/rasterizer/scripts/knob_defs.py12
6 files changed, 53 insertions, 70 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp
index a4856ee2aed..3922606b3a6 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp
@@ -75,6 +75,17 @@ HANDLE SwrCreateContext(
pContext->pMacroTileManagerArray = (MacroTileMgr*)AlignedMalloc(sizeof(MacroTileMgr) * KNOB_MAX_DRAWS_IN_FLIGHT, 64);
pContext->pDispatchQueueArray = (DispatchQueue*)AlignedMalloc(sizeof(DispatchQueue) * KNOB_MAX_DRAWS_IN_FLIGHT, 64);
+ pContext->threadInfo.MAX_WORKER_THREADS = KNOB_MAX_WORKER_THREADS;
+ pContext->threadInfo.MAX_NUMA_NODES = KNOB_MAX_NUMA_NODES;
+ pContext->threadInfo.MAX_CORES_PER_NUMA_NODE = KNOB_MAX_CORES_PER_NUMA_NODE;
+ pContext->threadInfo.MAX_THREADS_PER_CORE = KNOB_MAX_THREADS_PER_CORE;
+ pContext->threadInfo.SINGLE_THREADED = KNOB_SINGLE_THREADED;
+
+ if (pCreateInfo->pThreadInfo)
+ {
+ pContext->threadInfo = *pCreateInfo->pThreadInfo;
+ }
+
for (uint32_t dc = 0; dc < KNOB_MAX_DRAWS_IN_FLIGHT; ++dc)
{
pContext->dcRing[dc].pArena = new CachingArena(pContext->cachingArenaAllocator);
@@ -84,7 +95,7 @@ HANDLE SwrCreateContext(
pContext->dsRing[dc].pArena = new CachingArena(pContext->cachingArenaAllocator);
}
- if (!KNOB_SINGLE_THREADED)
+ if (!pContext->threadInfo.SINGLE_THREADED)
{
memset(&pContext->WaitLock, 0, sizeof(pContext->WaitLock));
memset(&pContext->FifosNotEmpty, 0, sizeof(pContext->FifosNotEmpty));
@@ -95,9 +106,8 @@ HANDLE SwrCreateContext(
}
// Calling createThreadPool() above can set SINGLE_THREADED
- if (KNOB_SINGLE_THREADED)
+ if (pContext->threadInfo.SINGLE_THREADED)
{
- SET_KNOB(HYPERTHREADED_FE, false);
pContext->NumWorkerThreads = 1;
pContext->NumFEThreads = 1;
pContext->NumBEThreads = 1;
@@ -218,7 +228,7 @@ void QueueWork(SWR_CONTEXT *pContext)
pContext->dcRing.Enqueue();
}
- if (KNOB_SINGLE_THREADED)
+ if (pContext->threadInfo.SINGLE_THREADED)
{
// flush denormals to 0
uint32_t mxcsr = _mm_getcsr();
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.h b/src/gallium/drivers/swr/rasterizer/core/api.h
index b45d4498ea0..d7621d54877 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.h
+++ b/src/gallium/drivers/swr/rasterizer/core/api.h
@@ -91,6 +91,18 @@ typedef void(SWR_API *PFN_UPDATE_SO_WRITE_OFFSET)(HANDLE hPrivateContext,
class BucketManager;
//////////////////////////////////////////////////////////////////////////
+/// SWR_THREADING_INFO
+/////////////////////////////////////////////////////////////////////////
+struct SWR_THREADING_INFO
+{
+ uint32_t MAX_WORKER_THREADS;
+ uint32_t MAX_NUMA_NODES;
+ uint32_t MAX_CORES_PER_NUMA_NODE;
+ uint32_t MAX_THREADS_PER_CORE;
+ bool SINGLE_THREADED;
+};
+
+//////////////////////////////////////////////////////////////////////////
/// SWR_CREATECONTEXT_INFO
/////////////////////////////////////////////////////////////////////////
struct SWR_CREATECONTEXT_INFO
@@ -113,6 +125,9 @@ struct SWR_CREATECONTEXT_INFO
// Output: size required memory passed to for SwrSaveState / SwrRestoreState
size_t contextSaveSize;
+
+ // Input (optional): Threading info that overrides any set KNOB values.
+ SWR_THREADING_INFO* pThreadInfo;
};
//////////////////////////////////////////////////////////////////////////
diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h
index 7e6a1673211..47fea162acc 100644
--- a/src/gallium/drivers/swr/rasterizer/core/context.h
+++ b/src/gallium/drivers/swr/rasterizer/core/context.h
@@ -464,6 +464,7 @@ struct SWR_CONTEXT
uint32_t NumBEThreads;
THREAD_POOL threadPool; // Thread pool associated with this context
+ SWR_THREADING_INFO threadInfo;
std::condition_variable FifosNotEmpty;
std::mutex WaitLock;
diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
index b207ebd1731..143a77fb4dc 100644
--- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
@@ -1,5 +1,5 @@
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+* Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -239,10 +239,10 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThread
}
-void bindThread(uint32_t threadId, uint32_t procGroupId = 0, bool bindProcGroup=false)
+void bindThread(SWR_CONTEXT* pContext, uint32_t threadId, uint32_t procGroupId = 0, bool bindProcGroup=false)
{
// Only bind threads when MAX_WORKER_THREADS isn't set.
- if (KNOB_MAX_WORKER_THREADS && bindProcGroup == false)
+ if (pContext->threadInfo.MAX_WORKER_THREADS && bindProcGroup == false)
{
return;
}
@@ -267,9 +267,9 @@ void bindThread(uint32_t threadId, uint32_t procGroupId = 0, bool bindProcGroup=
else
#endif
{
- // If KNOB_MAX_WORKER_THREADS is set, only bind to the proc group,
+ // If MAX_WORKER_THREADS is set, only bind to the proc group,
// Not the individual HW thread.
- if (!KNOB_MAX_WORKER_THREADS)
+ if (!pContext->threadInfo.MAX_WORKER_THREADS)
{
affinity.Mask = KAFFINITY(1) << threadId;
}
@@ -648,7 +648,7 @@ DWORD workerThreadMain(LPVOID pData)
uint32_t threadId = pThreadData->threadId;
uint32_t workerId = pThreadData->workerId;
- bindThread(threadId, pThreadData->procGroupId, pThreadData->forceBindProcGroup);
+ bindThread(pContext, threadId, pThreadData->procGroupId, pThreadData->forceBindProcGroup);
RDTSC_INIT(threadId);
@@ -771,7 +771,7 @@ template<> DWORD workerThreadInit<false, false>(LPVOID pData) = delete;
void CreateThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
{
- bindThread(0);
+ bindThread(pContext, 0);
CPUNumaNodes nodes;
uint32_t numThreadsPerProcGroup = 0;
@@ -796,33 +796,23 @@ void CreateThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
uint32_t numCoresPerNode = numHWCoresPerNode;
uint32_t numHyperThreads = numHWHyperThreads;
- if (KNOB_MAX_WORKER_THREADS)
+ if (pContext->threadInfo.MAX_NUMA_NODES)
{
- SET_KNOB(HYPERTHREADED_FE, false);
+ numNodes = std::min(numNodes, pContext->threadInfo.MAX_NUMA_NODES);
}
- if (KNOB_HYPERTHREADED_FE)
+ if (pContext->threadInfo.MAX_CORES_PER_NUMA_NODE)
{
- SET_KNOB(MAX_THREADS_PER_CORE, 0);
+ numCoresPerNode = std::min(numCoresPerNode, pContext->threadInfo.MAX_CORES_PER_NUMA_NODE);
}
- if (KNOB_MAX_NUMA_NODES)
+ if (pContext->threadInfo.MAX_THREADS_PER_CORE)
{
- numNodes = std::min(numNodes, KNOB_MAX_NUMA_NODES);
- }
-
- if (KNOB_MAX_CORES_PER_NUMA_NODE)
- {
- numCoresPerNode = std::min(numCoresPerNode, KNOB_MAX_CORES_PER_NUMA_NODE);
- }
-
- if (KNOB_MAX_THREADS_PER_CORE)
- {
- numHyperThreads = std::min(numHyperThreads, KNOB_MAX_THREADS_PER_CORE);
+ numHyperThreads = std::min(numHyperThreads, pContext->threadInfo.MAX_THREADS_PER_CORE);
}
#if defined(_WIN32) && !defined(_WIN64)
- if (!KNOB_MAX_WORKER_THREADS)
+ if (!pContext->threadInfo.MAX_WORKER_THREADS)
{
// Limit 32-bit windows to bindable HW threads only
if ((numCoresPerNode * numHWHyperThreads) > 32)
@@ -832,19 +822,14 @@ void CreateThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
}
#endif
- if (numHyperThreads < 2)
- {
- SET_KNOB(HYPERTHREADED_FE, false);
- }
-
// Calculate numThreads
uint32_t numThreads = numNodes * numCoresPerNode * numHyperThreads;
numThreads = std::min(numThreads, numHWThreads);
- if (KNOB_MAX_WORKER_THREADS)
+ if (pContext->threadInfo.MAX_WORKER_THREADS)
{
uint32_t maxHWThreads = numHWNodes * numHWCoresPerNode * numHWHyperThreads;
- numThreads = std::min(KNOB_MAX_WORKER_THREADS, maxHWThreads);
+ numThreads = std::min(pContext->threadInfo.MAX_WORKER_THREADS, maxHWThreads);
}
if (numThreads > KNOB_MAX_NUM_THREADS)
@@ -900,7 +885,7 @@ void CreateThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
pPool->pThreadData = (THREAD_DATA *)malloc(pPool->numThreads * sizeof(THREAD_DATA));
pPool->numaMask = 0;
- if (KNOB_MAX_WORKER_THREADS)
+ if (pContext->threadInfo.MAX_WORKER_THREADS)
{
bool bForceBindProcGroup = (numThreads > numThreadsPerProcGroup);
uint32_t numProcGroups = (numThreads + numThreadsPerProcGroup - 1) / numThreadsPerProcGroup;
@@ -962,25 +947,9 @@ void CreateThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
pPool->pThreadData[workerId].htId = t;
pPool->pThreadData[workerId].pContext = pContext;
- if (KNOB_HYPERTHREADED_FE)
- {
- if (t == 0)
- {
- pContext->NumBEThreads++;
- pPool->threads[workerId] = new std::thread(workerThreadInit<false, true>, &pPool->pThreadData[workerId]);
- }
- else
- {
- pContext->NumFEThreads++;
- pPool->threads[workerId] = new std::thread(workerThreadInit<true, false>, &pPool->pThreadData[workerId]);
- }
- }
- else
- {
- pPool->threads[workerId] = new std::thread(workerThreadInit<true, true>, &pPool->pThreadData[workerId]);
- pContext->NumBEThreads++;
- pContext->NumFEThreads++;
- }
+ pPool->threads[workerId] = new std::thread(workerThreadInit<true, true>, &pPool->pThreadData[workerId]);
+ pContext->NumBEThreads++;
+ pContext->NumFEThreads++;
++workerId;
}
@@ -991,7 +960,7 @@ void CreateThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
void DestroyThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
{
- if (!KNOB_SINGLE_THREADED)
+ if (!pContext->threadInfo.SINGLE_THREADED)
{
// Inform threads to finish up
std::unique_lock<std::mutex> lock(pContext->WaitLock);
diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.h b/src/gallium/drivers/swr/rasterizer/core/threads.h
index 215c699a9d3..157f46aff70 100644
--- a/src/gallium/drivers/swr/rasterizer/core/threads.h
+++ b/src/gallium/drivers/swr/rasterizer/core/threads.h
@@ -1,5 +1,5 @@
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+* Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -45,7 +45,7 @@ struct THREAD_DATA
uint32_t htId; // Hyperthread id
uint32_t workerId;
SWR_CONTEXT *pContext;
- bool forceBindProcGroup; // Only useful when KNOB_MAX_WORKER_THREADS is set.
+ bool forceBindProcGroup; // Only useful when MAX_WORKER_THREADS is set.
};
diff --git a/src/gallium/drivers/swr/rasterizer/scripts/knob_defs.py b/src/gallium/drivers/swr/rasterizer/scripts/knob_defs.py
index 56c3144bfa6..f93147c38da 100644
--- a/src/gallium/drivers/swr/rasterizer/scripts/knob_defs.py
+++ b/src/gallium/drivers/swr/rasterizer/scripts/knob_defs.py
@@ -30,18 +30,6 @@ KNOBS = [
'category' : 'debug',
}],
- ['HYPERTHREADED_FE', {
- 'type' : 'bool',
- 'default' : 'false',
- 'desc' : ['EXPERIMENTAL!!',
- 'If enabled will attempt to use secondary threads per core to perform',
- 'front-end (VS/GS) work.',
- '',
- 'Note: Setting this will cause KNOB_MAX_THREADS_PER_CORE to be ignored.'],
- 'category' : 'perf',
- 'advanced' : 'true',
- }],
-
['DUMP_SHADER_IR', {
'type' : 'bool',
'default' : 'false',