summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/api.cpp94
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/api.h119
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/backend.cpp69
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/context.h9
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/frontend.cpp288
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/frontend.h10
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp122
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/utils.h50
-rw-r--r--src/gallium/drivers/swr/swr_clear.cpp12
-rw-r--r--src/gallium/drivers/swr/swr_context.h1
-rw-r--r--src/gallium/drivers/swr/swr_draw.cpp32
-rw-r--r--src/gallium/drivers/swr/swr_state.cpp9
12 files changed, 400 insertions, 415 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp
index e447bf6fbe3..d53a6cbedda 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp
@@ -46,6 +46,8 @@
#include "common/simdintrin.h"
#include "common/os.h"
+static const SWR_RECT g_MaxScissorRect = { 0, 0, KNOB_MAX_SCISSOR_X, KNOB_MAX_SCISSOR_Y };
+
void SetupDefaultState(SWR_CONTEXT *pContext);
static INLINE SWR_CONTEXT* GetContext(HANDLE hContext)
@@ -713,56 +715,46 @@ void SwrSetViewports(
void SwrSetScissorRects(
HANDLE hContext,
uint32_t numScissors,
- const BBOX* pScissors)
+ const SWR_RECT* pScissors)
{
SWR_ASSERT(numScissors <= KNOB_NUM_VIEWPORTS_SCISSORS,
"Invalid number of scissor rects.");
API_STATE* pState = GetDrawState(GetContext(hContext));
- memcpy(&pState->scissorRects[0], pScissors, numScissors * sizeof(BBOX));
+ memcpy(&pState->scissorRects[0], pScissors, numScissors * sizeof(pScissors[0]));
};
void SetupMacroTileScissors(DRAW_CONTEXT *pDC)
{
API_STATE *pState = &pDC->pState->state;
- uint32_t left, right, top, bottom;
// Set up scissor dimensions based on scissor or viewport
if (pState->rastState.scissorEnable)
{
- // scissor rect right/bottom edge are exclusive, core expects scissor dimensions to be inclusive, so subtract one pixel from right/bottom edges
- left = pState->scissorRects[0].left;
- right = pState->scissorRects[0].right;
- top = pState->scissorRects[0].top;
- bottom = pState->scissorRects[0].bottom;
+ pState->scissorInFixedPoint = pState->scissorRects[0];
}
else
{
// the vp width and height must be added to origin un-rounded then the result round to -inf.
// The cast to int works for rounding assuming all [left, right, top, bottom] are positive.
- left = (int32_t)pState->vp[0].x;
- right = (int32_t)(pState->vp[0].x + pState->vp[0].width);
- top = (int32_t)pState->vp[0].y;
- bottom = (int32_t)(pState->vp[0].y + pState->vp[0].height);
+ pState->scissorInFixedPoint.xmin = (int32_t)pState->vp[0].x;
+ pState->scissorInFixedPoint.xmax = (int32_t)(pState->vp[0].x + pState->vp[0].width);
+ pState->scissorInFixedPoint.ymin = (int32_t)pState->vp[0].y;
+ pState->scissorInFixedPoint.ymax = (int32_t)(pState->vp[0].y + pState->vp[0].height);
}
- right = std::min<uint32_t>(right, KNOB_MAX_SCISSOR_X);
- bottom = std::min<uint32_t>(bottom, KNOB_MAX_SCISSOR_Y);
+ // Clamp to max rect
+ pState->scissorInFixedPoint &= g_MaxScissorRect;
- if (left > KNOB_MAX_SCISSOR_X || top > KNOB_MAX_SCISSOR_Y)
- {
- pState->scissorInFixedPoint.left = 0;
- pState->scissorInFixedPoint.right = 0;
- pState->scissorInFixedPoint.top = 0;
- pState->scissorInFixedPoint.bottom = 0;
- }
- else
- {
- pState->scissorInFixedPoint.left = left * FIXED_POINT_SCALE;
- pState->scissorInFixedPoint.right = right * FIXED_POINT_SCALE - 1;
- pState->scissorInFixedPoint.top = top * FIXED_POINT_SCALE;
- pState->scissorInFixedPoint.bottom = bottom * FIXED_POINT_SCALE - 1;
- }
+ // Scale to fixed point
+ pState->scissorInFixedPoint.xmin *= FIXED_POINT_SCALE;
+ pState->scissorInFixedPoint.xmax *= FIXED_POINT_SCALE;
+ pState->scissorInFixedPoint.ymin *= FIXED_POINT_SCALE;
+ pState->scissorInFixedPoint.ymax *= FIXED_POINT_SCALE;
+
+ // Make scissor inclusive
+ pState->scissorInFixedPoint.xmax -= 1;
+ pState->scissorInFixedPoint.ymax -= 1;
}
// templated backend function tables
@@ -1303,9 +1295,12 @@ void SwrDrawIndexedInstanced(
/// @brief SwrInvalidateTiles
/// @param hContext - Handle passed back from SwrCreateContext
/// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to invalidate.
-void SwrInvalidateTiles(
+/// @param invalidateRect - The pixel-coordinate rectangle to invalidate. This will be expanded to
+/// be hottile size-aligned.
+void SWR_API SwrInvalidateTiles(
HANDLE hContext,
- uint32_t attachmentMask)
+ uint32_t attachmentMask,
+ const SWR_RECT& invalidateRect)
{
if (KNOB_TOSS_DRAW)
{
@@ -1318,7 +1313,8 @@ void SwrInvalidateTiles(
pDC->FeWork.type = DISCARDINVALIDATETILES;
pDC->FeWork.pfnWork = ProcessDiscardInvalidateTiles;
pDC->FeWork.desc.discardInvalidateTiles.attachmentMask = attachmentMask;
- memset(&pDC->FeWork.desc.discardInvalidateTiles.rect, 0, sizeof(SWR_RECT));
+ pDC->FeWork.desc.discardInvalidateTiles.rect = invalidateRect;
+ pDC->FeWork.desc.discardInvalidateTiles.rect &= g_MaxScissorRect;
pDC->FeWork.desc.discardInvalidateTiles.newTileState = SWR_TILE_INVALID;
pDC->FeWork.desc.discardInvalidateTiles.createNewTiles = false;
pDC->FeWork.desc.discardInvalidateTiles.fullTilesOnly = false;
@@ -1331,11 +1327,12 @@ void SwrInvalidateTiles(
/// @brief SwrDiscardRect
/// @param hContext - Handle passed back from SwrCreateContext
/// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to discard.
-/// @param rect - if rect is all zeros, the entire attachment surface will be discarded
-void SwrDiscardRect(
+/// @param rect - The pixel-coordinate rectangle to discard. Only fully-covered hottiles will be
+/// discarded.
+void SWR_API SwrDiscardRect(
HANDLE hContext,
uint32_t attachmentMask,
- SWR_RECT rect)
+ const SWR_RECT& rect)
{
if (KNOB_TOSS_DRAW)
{
@@ -1350,6 +1347,7 @@ void SwrDiscardRect(
pDC->FeWork.pfnWork = ProcessDiscardInvalidateTiles;
pDC->FeWork.desc.discardInvalidateTiles.attachmentMask = attachmentMask;
pDC->FeWork.desc.discardInvalidateTiles.rect = rect;
+ pDC->FeWork.desc.discardInvalidateTiles.rect &= g_MaxScissorRect;
pDC->FeWork.desc.discardInvalidateTiles.newTileState = SWR_TILE_RESOLVED;
pDC->FeWork.desc.discardInvalidateTiles.createNewTiles = true;
pDC->FeWork.desc.discardInvalidateTiles.fullTilesOnly = true;
@@ -1398,10 +1396,11 @@ void SwrDispatch(
// Deswizzles, converts and stores current contents of the hot tiles to surface
// described by pState
-void SwrStoreTiles(
+void SWR_API SwrStoreTiles(
HANDLE hContext,
SWR_RENDERTARGET_ATTACHMENT attachment,
- SWR_TILE_STATE postStoreTileState)
+ SWR_TILE_STATE postStoreTileState,
+ const SWR_RECT& storeRect)
{
if (KNOB_TOSS_DRAW)
{
@@ -1413,12 +1412,12 @@ void SwrStoreTiles(
SWR_CONTEXT *pContext = GetContext(hContext);
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
- SetupMacroTileScissors(pDC);
-
pDC->FeWork.type = STORETILES;
pDC->FeWork.pfnWork = ProcessStoreTiles;
pDC->FeWork.desc.storeTiles.attachment = attachment;
pDC->FeWork.desc.storeTiles.postStoreTileState = postStoreTileState;
+ pDC->FeWork.desc.storeTiles.rect = storeRect;
+ pDC->FeWork.desc.storeTiles.rect &= g_MaxScissorRect;
//enqueue
QueueDraw(pContext);
@@ -1426,12 +1425,21 @@ void SwrStoreTiles(
RDTSC_STOP(APIStoreTiles, 0, 0);
}
-void SwrClearRenderTarget(
+//////////////////////////////////////////////////////////////////////////
+/// @brief SwrClearRenderTarget - Clear attached render targets / depth / stencil
+/// @param hContext - Handle passed back from SwrCreateContext
+/// @param clearMask - combination of SWR_CLEAR_COLOR / SWR_CLEAR_DEPTH / SWR_CLEAR_STENCIL flags (or SWR_CLEAR_NONE)
+/// @param clearColor - color use for clearing render targets
+/// @param z - depth value use for clearing depth buffer
+/// @param stencil - stencil value used for clearing stencil buffer
+/// @param clearRect - The pixel-coordinate rectangle to clear in all cleared buffers
+void SWR_API SwrClearRenderTarget(
HANDLE hContext,
uint32_t clearMask,
const float clearColor[4],
float z,
- uint8_t stencil)
+ uint8_t stencil,
+ const SWR_RECT& clearRect)
{
if (KNOB_TOSS_DRAW)
{
@@ -1441,16 +1449,16 @@ void SwrClearRenderTarget(
RDTSC_START(APIClearRenderTarget);
SWR_CONTEXT *pContext = GetContext(hContext);
-
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
- SetupMacroTileScissors(pDC);
-
CLEAR_FLAGS flags;
+ flags.bits = 0;
flags.mask = clearMask;
pDC->FeWork.type = CLEAR;
pDC->FeWork.pfnWork = ProcessClear;
+ pDC->FeWork.desc.clear.rect = clearRect;
+ pDC->FeWork.desc.clear.rect &= g_MaxScissorRect;
pDC->FeWork.desc.clear.flags = flags;
pDC->FeWork.desc.clear.clearDepth = z;
pDC->FeWork.desc.clear.clearRTColor[0] = clearColor[0];
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.h b/src/gallium/drivers/swr/rasterizer/core/api.h
index ed18fe01010..9ca235d821e 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.h
+++ b/src/gallium/drivers/swr/rasterizer/core/api.h
@@ -32,18 +32,82 @@
#include "common/os.h"
#include <assert.h>
-#include <vector>
+#include <algorithm>
#include "common/simdintrin.h"
#include "common/formats.h"
-#include "core/utils.h"
#include "core/state.h"
-///@todo place all the API functions into the 'swr' namespace.
-
typedef void(SWR_API *PFN_CALLBACK_FUNC)(uint64_t data, uint64_t data2, uint64_t data3);
//////////////////////////////////////////////////////////////////////////
+/// @brief Rectangle structure
+struct SWR_RECT
+{
+ int32_t xmin; ///< inclusive
+ int32_t ymin; ///< inclusive
+ int32_t xmax; ///< exclusive
+ int32_t ymax; ///< exclusive
+
+ bool operator == (const SWR_RECT& rhs)
+ {
+ return (this->ymin == rhs.ymin &&
+ this->ymax == rhs.ymax &&
+ this->xmin == rhs.xmin &&
+ this->xmax == rhs.xmax);
+ }
+
+ bool operator != (const SWR_RECT& rhs)
+ {
+ return !(*this == rhs);
+ }
+
+ SWR_RECT& Intersect(const SWR_RECT& other)
+ {
+ this->xmin = std::max(this->xmin, other.xmin);
+ this->ymin = std::max(this->ymin, other.ymin);
+ this->xmax = std::min(this->xmax, other.xmax);
+ this->ymax = std::min(this->ymax, other.ymax);
+
+ if (xmax - xmin < 0 ||
+ ymax - ymin < 0)
+ {
+ // Zero area
+ ymin = ymax = xmin = xmax = 0;
+ }
+
+ return *this;
+ }
+ SWR_RECT& operator &= (const SWR_RECT& other)
+ {
+ return Intersect(other);
+ }
+
+ SWR_RECT& Union(const SWR_RECT& other)
+ {
+ this->xmin = std::min(this->xmin, other.xmin);
+ this->ymin = std::min(this->ymin, other.ymin);
+ this->xmax = std::max(this->xmax, other.xmax);
+ this->ymax = std::max(this->ymax, other.ymax);
+
+ return *this;
+ }
+
+ SWR_RECT& operator |= (const SWR_RECT& other)
+ {
+ return Union(other);
+ }
+
+ void Translate(int32_t x, int32_t y)
+ {
+ xmin += x;
+ ymin += y;
+ xmax += x;
+ ymax += y;
+ }
+};
+
+//////////////////////////////////////////////////////////////////////////
/// @brief Function signature for load hot tiles
/// @param hPrivateContext - handle to private data
/// @param dstFormat - format of the hot tile
@@ -105,6 +169,10 @@ typedef void(SWR_API *PFN_UPDATE_STATS)(HANDLE hPrivateContext,
typedef void(SWR_API *PFN_UPDATE_STATS_FE)(HANDLE hPrivateContext,
const SWR_STATS_FE* pStats);
+//////////////////////////////////////////////////////////////////////////
+/// BucketManager
+/// Forward Declaration (see rdtsc_buckets.h for full definition)
+/////////////////////////////////////////////////////////////////////////
class BucketManager;
//////////////////////////////////////////////////////////////////////////
@@ -150,17 +218,6 @@ struct SWR_CREATECONTEXT_INFO
};
//////////////////////////////////////////////////////////////////////////
-/// SWR_RECT
-/////////////////////////////////////////////////////////////////////////
-struct SWR_RECT
-{
- uint32_t left;
- uint32_t right;
- uint32_t top;
- uint32_t bottom;
-};
-
-//////////////////////////////////////////////////////////////////////////
/// @brief Create SWR Context.
/// @param pCreateInfo - pointer to creation info.
HANDLE SWR_API SwrCreateContext(
@@ -445,19 +502,23 @@ void SWR_API SwrDrawIndexedInstanced(
/// @brief SwrInvalidateTiles
/// @param hContext - Handle passed back from SwrCreateContext
/// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to invalidate.
+/// @param invalidateRect - The pixel-coordinate rectangle to invalidate. This will be expanded to
+/// be hottile size-aligned.
void SWR_API SwrInvalidateTiles(
HANDLE hContext,
- uint32_t attachmentMask);
+ uint32_t attachmentMask,
+ const SWR_RECT& invalidateRect);
//////////////////////////////////////////////////////////////////////////
/// @brief SwrDiscardRect
/// @param hContext - Handle passed back from SwrCreateContext
/// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to discard.
-/// @param rect - if rect is all zeros, the entire attachment surface will be discarded
+/// @param rect - The pixel-coordinate rectangle to discard. Only fully-covered hottiles will be
+/// discarded.
void SWR_API SwrDiscardRect(
HANDLE hContext,
uint32_t attachmentMask,
- SWR_RECT rect);
+ const SWR_RECT& rect);
//////////////////////////////////////////////////////////////////////////
/// @brief SwrDispatch
@@ -483,15 +544,30 @@ enum SWR_TILE_STATE
void SWR_API SwrStoreTiles(
HANDLE hContext,
SWR_RENDERTARGET_ATTACHMENT attachment,
- SWR_TILE_STATE postStoreTileState);
+ SWR_TILE_STATE postStoreTileState,
+ const SWR_RECT& storeRect);
+
+//////////////////////////////////////////////////////////////////////////
+/// @brief SwrClearRenderTarget - Clear attached render targets / depth / stencil
+/// @param hContext - Handle passed back from SwrCreateContext
+/// @param clearMask - combination of SWR_CLEAR_COLOR / SWR_CLEAR_DEPTH / SWR_CLEAR_STENCIL flags (or SWR_CLEAR_NONE)
+/// @param clearColor - color use for clearing render targets
+/// @param z - depth value use for clearing depth buffer
+/// @param stencil - stencil value used for clearing stencil buffer
+/// @param clearRect - The pixel-coordinate rectangle to clear in all cleared buffers
void SWR_API SwrClearRenderTarget(
HANDLE hContext,
uint32_t clearMask,
const float clearColor[4],
float z,
- uint8_t stencil);
+ uint8_t stencil,
+ const SWR_RECT& clearRect);
+//////////////////////////////////////////////////////////////////////////
+/// @brief SwrSetRastyState
+/// @param hContext - Handle passed back from SwrCreateContext
+/// @param pRastState - New SWR_RASTSTATE used for SwrDraw* commands
void SWR_API SwrSetRastState(
HANDLE hContext,
const SWR_RASTSTATE *pRastState);
@@ -516,7 +592,7 @@ void SWR_API SwrSetViewports(
void SWR_API SwrSetScissorRects(
HANDLE hContext,
uint32_t numScissors,
- const BBOX* pScissors);
+ const SWR_RECT* pScissors);
//////////////////////////////////////////////////////////////////////////
/// @brief Returns a pointer to the private context state for the current
@@ -555,4 +631,5 @@ void SWR_API SwrEnableStats(
/// @param hContext - Handle passed back from SwrCreateContext
void SWR_API SwrEndFrame(
HANDLE hContext);
+
#endif//__SWR_API_H__
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.cpp b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
index dff86b3ee76..1e4dca2fe25 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
@@ -37,7 +37,7 @@
#include <algorithm>
-typedef void(*PFN_CLEAR_TILES)(DRAW_CONTEXT*, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t, DWORD[4]);
+typedef void(*PFN_CLEAR_TILES)(DRAW_CONTEXT*, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t, DWORD[4], const SWR_RECT& rect);
static PFN_CLEAR_TILES sClearTilesTable[NUM_SWR_FORMATS];
//////////////////////////////////////////////////////////////////////////
@@ -88,7 +88,7 @@ void ProcessSyncBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi
template<SWR_FORMAT format>
void ClearRasterTile(uint8_t *pTileBuffer, simdvector &value)
{
- auto lambda = [&](int comp)
+ auto lambda = [&](int32_t comp)
{
FormatTraits<format>::storeSOA(comp, pTileBuffer, value.v[comp]);
pTileBuffer += (KNOB_SIMD_WIDTH * FormatTraits<format>::GetBPC(comp) / 8);
@@ -102,7 +102,7 @@ void ClearRasterTile(uint8_t *pTileBuffer, simdvector &value)
}
template<SWR_FORMAT format>
-INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t macroTile, DWORD clear[4])
+INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t macroTile, DWORD clear[4], const SWR_RECT& rect)
{
// convert clear color to hottile format
// clear color is in RGBA float/uint32
@@ -122,32 +122,33 @@ INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, SWR_RENDERTARGET_ATTACHMENT rt, ui
uint32_t tileX, tileY;
MacroTileMgr::getTileIndices(macroTile, tileX, tileY);
- const API_STATE& state = GetApiState(pDC);
-
- int top = KNOB_MACROTILE_Y_DIM_FIXED * tileY;
- int bottom = top + KNOB_MACROTILE_Y_DIM_FIXED - 1;
- int left = KNOB_MACROTILE_X_DIM_FIXED * tileX;
- int right = left + KNOB_MACROTILE_X_DIM_FIXED - 1;
- // intersect with scissor
- top = std::max(top, state.scissorInFixedPoint.top);
- left = std::max(left, state.scissorInFixedPoint.left);
- bottom = std::min(bottom, state.scissorInFixedPoint.bottom);
- right = std::min(right, state.scissorInFixedPoint.right);
+ // Init to full macrotile
+ SWR_RECT clearTile =
+ {
+ KNOB_MACROTILE_X_DIM * int32_t(tileX),
+ KNOB_MACROTILE_Y_DIM * int32_t(tileY),
+ KNOB_MACROTILE_X_DIM * int32_t(tileX + 1),
+ KNOB_MACROTILE_Y_DIM * int32_t(tileY + 1),
+ };
+
+ // intersect with clear rect
+ clearTile &= rect;
// translate to local hottile origin
- top -= KNOB_MACROTILE_Y_DIM_FIXED * tileY;
- bottom -= KNOB_MACROTILE_Y_DIM_FIXED * tileY;
- left -= KNOB_MACROTILE_X_DIM_FIXED * tileX;
- right -= KNOB_MACROTILE_X_DIM_FIXED * tileX;
+ clearTile.Translate(-int32_t(tileX) * KNOB_MACROTILE_X_DIM, -int32_t(tileY) * KNOB_MACROTILE_Y_DIM);
+
+ // Make maximums inclusive (needed for convert to raster tiles)
+ clearTile.xmax -= 1;
+ clearTile.ymax -= 1;
// convert to raster tiles
- top >>= (KNOB_TILE_Y_DIM_SHIFT + FIXED_POINT_SHIFT);
- bottom >>= (KNOB_TILE_Y_DIM_SHIFT + FIXED_POINT_SHIFT);
- left >>= (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT);
- right >>= (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT);
+ clearTile.ymin >>= (KNOB_TILE_Y_DIM_SHIFT);
+ clearTile.ymax >>= (KNOB_TILE_Y_DIM_SHIFT);
+ clearTile.xmin >>= (KNOB_TILE_X_DIM_SHIFT);
+ clearTile.xmax >>= (KNOB_TILE_X_DIM_SHIFT);
- const int numSamples = GetNumSamples(pDC->pState->state.rastState.sampleCount);
+ const int32_t numSamples = GetNumSamples(pDC->pState->state.rastState.sampleCount);
// compute steps between raster tile samples / raster tiles / macro tile rows
const uint32_t rasterTileSampleStep = KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<format>::bpp / 8;
const uint32_t rasterTileStep = (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<format>::bpp / 8)) * numSamples;
@@ -155,16 +156,16 @@ INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, SWR_RENDERTARGET_ATTACHMENT rt, ui
const uint32_t pitch = (FormatTraits<format>::bpp * KNOB_MACROTILE_X_DIM / 8);
HOTTILE *pHotTile = pDC->pContext->pHotTileMgr->GetHotTile(pDC->pContext, pDC, macroTile, rt, true, numSamples);
- uint32_t rasterTileStartOffset = (ComputeTileOffset2D< TilingTraits<SWR_TILE_SWRZ, FormatTraits<format>::bpp > >(pitch, left, top)) * numSamples;
+ uint32_t rasterTileStartOffset = (ComputeTileOffset2D< TilingTraits<SWR_TILE_SWRZ, FormatTraits<format>::bpp > >(pitch, clearTile.xmin, clearTile.ymin)) * numSamples;
uint8_t* pRasterTileRow = pHotTile->pBuffer + rasterTileStartOffset; //(ComputeTileOffset2D< TilingTraits<SWR_TILE_SWRZ, FormatTraits<format>::bpp > >(pitch, x, y)) * numSamples;
// loop over all raster tiles in the current hot tile
- for (int y = top; y <= bottom; ++y)
+ for (int32_t y = clearTile.ymin; y <= clearTile.ymax; ++y)
{
uint8_t* pRasterTile = pRasterTileRow;
- for (int x = left; x <= right; ++x)
+ for (int32_t x = clearTile.xmin; x <= clearTile.xmax; ++x)
{
- for( int sampleNum = 0; sampleNum < numSamples; sampleNum++)
+ for( int32_t sampleNum = 0; sampleNum < numSamples; sampleNum++)
{
ClearRasterTile<format>(pRasterTile, vClear);
pRasterTile += rasterTileSampleStep;
@@ -241,7 +242,7 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo
PFN_CLEAR_TILES pfnClearTiles = sClearTilesTable[KNOB_COLOR_HOT_TILE_FORMAT];
SWR_ASSERT(pfnClearTiles != nullptr);
- pfnClearTiles(pDC, SWR_ATTACHMENT_COLOR0, macroTile, clearData);
+ pfnClearTiles(pDC, SWR_ATTACHMENT_COLOR0, macroTile, clearData, pClear->rect);
}
if (pClear->flags.mask & SWR_CLEAR_DEPTH)
@@ -251,7 +252,7 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo
PFN_CLEAR_TILES pfnClearTiles = sClearTilesTable[KNOB_DEPTH_HOT_TILE_FORMAT];
SWR_ASSERT(pfnClearTiles != nullptr);
- pfnClearTiles(pDC, SWR_ATTACHMENT_DEPTH, macroTile, clearData);
+ pfnClearTiles(pDC, SWR_ATTACHMENT_DEPTH, macroTile, clearData, pClear->rect);
}
if (pClear->flags.mask & SWR_CLEAR_STENCIL)
@@ -261,7 +262,7 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo
clearData[0] = *(DWORD*)&value;
PFN_CLEAR_TILES pfnClearTiles = sClearTilesTable[KNOB_STENCIL_HOT_TILE_FORMAT];
- pfnClearTiles(pDC, SWR_ATTACHMENT_STENCIL, macroTile, clearData);
+ pfnClearTiles(pDC, SWR_ATTACHMENT_STENCIL, macroTile, clearData, pClear->rect);
}
RDTSC_STOP(BEClear, 0, 0);
@@ -307,13 +308,13 @@ void ProcessStoreTileBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile
PFN_CLEAR_TILES pfnClearTiles = sClearTilesTable[srcFormat];
SWR_ASSERT(pfnClearTiles != nullptr);
- pfnClearTiles(pDC, pDesc->attachment, macroTile, pHotTile->clearData);
+ pfnClearTiles(pDC, pDesc->attachment, macroTile, pHotTile->clearData, pDesc->rect);
}
if (pHotTile->state == HOTTILE_DIRTY || pDesc->postStoreTileState == (SWR_TILE_STATE)HOTTILE_DIRTY)
{
- int destX = KNOB_MACROTILE_X_DIM * x;
- int destY = KNOB_MACROTILE_Y_DIM * y;
+ int32_t destX = KNOB_MACROTILE_X_DIM * x;
+ int32_t destY = KNOB_MACROTILE_Y_DIM * y;
pContext->pfnStoreTile(GetPrivateState(pDC), srcFormat,
pDesc->attachment, destX, destY, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
@@ -334,7 +335,7 @@ void ProcessDiscardInvalidateTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint3
DISCARD_INVALIDATE_TILES_DESC *pDesc = (DISCARD_INVALIDATE_TILES_DESC *)pData;
SWR_CONTEXT *pContext = pDC->pContext;
- const int numSamples = GetNumSamples(pDC->pState->state.rastState.sampleCount);
+ const int32_t numSamples = GetNumSamples(pDC->pState->state.rastState.sampleCount);
for (uint32_t i = 0; i < SWR_NUM_ATTACHMENTS; ++i)
{
diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h
index 0a85ebe30a5..81820530024 100644
--- a/src/gallium/drivers/swr/rasterizer/core/context.h
+++ b/src/gallium/drivers/swr/rasterizer/core/context.h
@@ -109,6 +109,7 @@ union CLEAR_FLAGS
struct CLEAR_DESC
{
+ SWR_RECT rect;
CLEAR_FLAGS flags;
float clearRTColor[4]; // RGBA_32F
float clearDepth; // [0..1]
@@ -136,6 +137,7 @@ struct STORE_TILES_DESC
{
SWR_RENDERTARGET_ATTACHMENT attachment;
SWR_TILE_STATE postStoreTileState;
+ SWR_RECT rect;
};
struct COMPUTE_DESC
@@ -271,8 +273,8 @@ OSALIGNLINE(struct) API_STATE
SWR_VIEWPORT vp[KNOB_NUM_VIEWPORTS_SCISSORS];
SWR_VIEWPORT_MATRICES vpMatrices;
- BBOX scissorRects[KNOB_NUM_VIEWPORTS_SCISSORS];
- BBOX scissorInFixedPoint;
+ SWR_RECT scissorRects[KNOB_NUM_VIEWPORTS_SCISSORS];
+ SWR_RECT scissorInFixedPoint;
// Backend state
SWR_BACKEND_STATE backendState;
@@ -494,8 +496,5 @@ struct SWR_CONTEXT
TileSet singleThreadLockedTiles;
};
-void WaitForDependencies(SWR_CONTEXT *pContext, uint64_t drawId);
-void WakeAllThreads(SWR_CONTEXT *pContext);
-
#define UPDATE_STAT(name, count) if (GetApiState(pDC).enableStats) { pDC->dynState.stats[workerId].name += count; }
#define UPDATE_STAT_FE(name, count) if (GetApiState(pDC).enableStats) { pDC->dynState.statsFE.name += count; }
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
index 2809502ee34..04c62adbc5a 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
@@ -93,26 +93,24 @@ void ProcessClear(
uint32_t workerId,
void *pUserData)
{
- CLEAR_DESC *pClear = (CLEAR_DESC*)pUserData;
+ CLEAR_DESC *pDesc = (CLEAR_DESC*)pUserData;
MacroTileMgr *pTileMgr = pDC->pTileMgr;
- const API_STATE& state = GetApiState(pDC);
-
// queue a clear to each macro tile
- // compute macro tile bounds for the current scissor/viewport
- uint32_t macroTileLeft = state.scissorInFixedPoint.left / KNOB_MACROTILE_X_DIM_FIXED;
- uint32_t macroTileRight = state.scissorInFixedPoint.right / KNOB_MACROTILE_X_DIM_FIXED;
- uint32_t macroTileTop = state.scissorInFixedPoint.top / KNOB_MACROTILE_Y_DIM_FIXED;
- uint32_t macroTileBottom = state.scissorInFixedPoint.bottom / KNOB_MACROTILE_Y_DIM_FIXED;
+ // compute macro tile bounds for the specified rect
+ uint32_t macroTileXMin = pDesc->rect.xmin / KNOB_MACROTILE_X_DIM;
+ uint32_t macroTileXMax = (pDesc->rect.xmax - 1) / KNOB_MACROTILE_X_DIM;
+ uint32_t macroTileYMin = pDesc->rect.ymin / KNOB_MACROTILE_Y_DIM;
+ uint32_t macroTileYMax = (pDesc->rect.ymax - 1) / KNOB_MACROTILE_Y_DIM;
BE_WORK work;
work.type = CLEAR;
work.pfnWork = ProcessClearBE;
- work.desc.clear = *pClear;
+ work.desc.clear = *pDesc;
- for (uint32_t y = macroTileTop; y <= macroTileBottom; ++y)
+ for (uint32_t y = macroTileYMin; y <= macroTileYMax; ++y)
{
- for (uint32_t x = macroTileLeft; x <= macroTileRight; ++x)
+ for (uint32_t x = macroTileXMin; x <= macroTileXMax; ++x)
{
pTileMgr->enqueue(x, y, &work);
}
@@ -133,28 +131,25 @@ void ProcessStoreTiles(
void *pUserData)
{
RDTSC_START(FEProcessStoreTiles);
- STORE_TILES_DESC *pStore = (STORE_TILES_DESC*)pUserData;
MacroTileMgr *pTileMgr = pDC->pTileMgr;
-
- const API_STATE& state = GetApiState(pDC);
+ STORE_TILES_DESC* pDesc = (STORE_TILES_DESC*)pUserData;
// queue a store to each macro tile
- // compute macro tile bounds for the current render target
- const uint32_t macroWidth = KNOB_MACROTILE_X_DIM;
- const uint32_t macroHeight = KNOB_MACROTILE_Y_DIM;
-
- uint32_t numMacroTilesX = ((uint32_t)state.vp[0].width + (uint32_t)state.vp[0].x + (macroWidth - 1)) / macroWidth;
- uint32_t numMacroTilesY = ((uint32_t)state.vp[0].height + (uint32_t)state.vp[0].y + (macroHeight - 1)) / macroHeight;
+ // compute macro tile bounds for the specified rect
+ uint32_t macroTileXMin = pDesc->rect.xmin / KNOB_MACROTILE_X_DIM;
+ uint32_t macroTileXMax = (pDesc->rect.xmax - 1) / KNOB_MACROTILE_X_DIM;
+ uint32_t macroTileYMin = pDesc->rect.ymin / KNOB_MACROTILE_Y_DIM;
+ uint32_t macroTileYMax = (pDesc->rect.ymax - 1) / KNOB_MACROTILE_Y_DIM;
// store tiles
BE_WORK work;
work.type = STORETILES;
work.pfnWork = ProcessStoreTileBE;
- work.desc.storeTiles = *pStore;
+ work.desc.storeTiles = *pDesc;
- for (uint32_t x = 0; x < numMacroTilesX; ++x)
+ for (uint32_t y = macroTileYMin; y <= macroTileYMax; ++y)
{
- for (uint32_t y = 0; y < numMacroTilesY; ++y)
+ for (uint32_t x = macroTileXMin; x <= macroTileXMax; ++x)
{
pTileMgr->enqueue(x, y, &work);
}
@@ -177,64 +172,39 @@ void ProcessDiscardInvalidateTiles(
void *pUserData)
{
RDTSC_START(FEProcessInvalidateTiles);
- DISCARD_INVALIDATE_TILES_DESC *pInv = (DISCARD_INVALIDATE_TILES_DESC*)pUserData;
+ DISCARD_INVALIDATE_TILES_DESC *pDesc = (DISCARD_INVALIDATE_TILES_DESC*)pUserData;
MacroTileMgr *pTileMgr = pDC->pTileMgr;
- SWR_RECT rect;
+ // compute macro tile bounds for the specified rect
+ uint32_t macroTileXMin = (pDesc->rect.xmin + KNOB_MACROTILE_X_DIM - 1) / KNOB_MACROTILE_X_DIM;
+ uint32_t macroTileXMax = (pDesc->rect.xmax / KNOB_MACROTILE_X_DIM) - 1;
+ uint32_t macroTileYMin = (pDesc->rect.ymin + KNOB_MACROTILE_Y_DIM - 1) / KNOB_MACROTILE_Y_DIM;
+ uint32_t macroTileYMax = (pDesc->rect.ymax / KNOB_MACROTILE_Y_DIM) - 1;
- if (pInv->rect.top | pInv->rect.bottom | pInv->rect.right | pInv->rect.left)
- {
- // Valid rect
- rect = pInv->rect;
- }
- else
- {
- // Use viewport dimensions
- const API_STATE& state = GetApiState(pDC);
-
- rect.left = (uint32_t)state.vp[0].x;
- rect.right = (uint32_t)(state.vp[0].x + state.vp[0].width);
- rect.top = (uint32_t)state.vp[0].y;
- rect.bottom = (uint32_t)(state.vp[0].y + state.vp[0].height);
- }
-
- // queue a store to each macro tile
- // compute macro tile bounds for the current render target
- uint32_t macroWidth = KNOB_MACROTILE_X_DIM;
- uint32_t macroHeight = KNOB_MACROTILE_Y_DIM;
-
- // Setup region assuming full tiles
- uint32_t macroTileStartX = (rect.left + (macroWidth - 1)) / macroWidth;
- uint32_t macroTileStartY = (rect.top + (macroHeight - 1)) / macroHeight;
-
- uint32_t macroTileEndX = rect.right / macroWidth;
- uint32_t macroTileEndY = rect.bottom / macroHeight;
-
- if (pInv->fullTilesOnly == false)
+ if (pDesc->fullTilesOnly == false)
{
// include partial tiles
- macroTileStartX = rect.left / macroWidth;
- macroTileStartY = rect.top / macroHeight;
-
- macroTileEndX = (rect.right + macroWidth - 1) / macroWidth;
- macroTileEndY = (rect.bottom + macroHeight - 1) / macroHeight;
+ macroTileXMin = pDesc->rect.xmin / KNOB_MACROTILE_X_DIM;
+ macroTileXMax = (pDesc->rect.xmax - 1) / KNOB_MACROTILE_X_DIM;
+ macroTileYMin = pDesc->rect.ymin / KNOB_MACROTILE_Y_DIM;
+ macroTileYMax = (pDesc->rect.ymax - 1) / KNOB_MACROTILE_Y_DIM;
}
- SWR_ASSERT(macroTileEndX <= KNOB_NUM_HOT_TILES_X);
- SWR_ASSERT(macroTileEndY <= KNOB_NUM_HOT_TILES_Y);
+ SWR_ASSERT(macroTileXMax <= KNOB_NUM_HOT_TILES_X);
+ SWR_ASSERT(macroTileYMax <= KNOB_NUM_HOT_TILES_Y);
- macroTileEndX = std::min<uint32_t>(macroTileEndX, KNOB_NUM_HOT_TILES_X);
- macroTileEndY = std::min<uint32_t>(macroTileEndY, KNOB_NUM_HOT_TILES_Y);
+ macroTileXMax = std::min<int32_t>(macroTileXMax, KNOB_NUM_HOT_TILES_X);
+ macroTileYMax = std::min<int32_t>(macroTileYMax, KNOB_NUM_HOT_TILES_Y);
// load tiles
BE_WORK work;
work.type = DISCARDINVALIDATETILES;
work.pfnWork = ProcessDiscardInvalidateTilesBE;
- work.desc.discardInvalidateTiles = *pInv;
+ work.desc.discardInvalidateTiles = *pDesc;
- for (uint32_t x = macroTileStartX; x < macroTileEndX; ++x)
+ for (uint32_t x = macroTileXMin; x <= macroTileXMax; ++x)
{
- for (uint32_t y = macroTileStartY; y < macroTileEndY; ++y)
+ for (uint32_t y = macroTileYMin; y <= macroTileYMax; ++y)
{
pTileMgr->enqueue(x, y, &work);
}
@@ -587,7 +557,7 @@ static void StreamOut(
//////////////////////////////////////////////////////////////////////////
/// @brief Computes number of invocations. The current index represents
/// the start of the SIMD. The max index represents how much work
-/// items are remaining. If there is less then a SIMD's left of work
+/// items are remaining. If there is less then a SIMD's xmin of work
/// then return the remaining amount of work.
/// @param curIndex - The start index for the SIMD.
/// @param maxIndex - The last index for all work items.
@@ -1694,10 +1664,10 @@ INLINE void calcBoundingBoxIntVertical(const simdvector * const tri, simdscalari
vMaxY = _simd_max_epi32(vMaxY, vY[1]);
vMaxY = _simd_max_epi32(vMaxY, vY[2]);
- bbox.left = vMinX;
- bbox.right = vMaxX;
- bbox.top = vMinY;
- bbox.bottom = vMaxY;
+ bbox.xmin = vMinX;
+ bbox.xmax = vMaxX;
+ bbox.ymin = vMinY;
+ bbox.ymax = vMaxY;
}
//////////////////////////////////////////////////////////////////////////
@@ -1727,10 +1697,10 @@ INLINE void calcBoundingBoxIntVertical<FEConservativeRastT>(const simdvector * c
/// Bounding box needs to be expanded by 1/512 before snapping to 16.8 for conservative rasterization
/// expand bbox by 1/256; coverage will be correctly handled in the rasterizer.
- bbox.left = _simd_sub_epi32(vMinX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value));
- bbox.right = _simd_add_epi32(vMaxX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value));
- bbox.top = _simd_sub_epi32(vMinY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value));
- bbox.bottom = _simd_add_epi32(vMaxY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value));
+ bbox.xmin = _simd_sub_epi32(vMinX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value));
+ bbox.xmax = _simd_add_epi32(vMaxX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value));
+ bbox.ymin = _simd_sub_epi32(vMinY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value));
+ bbox.ymax = _simd_add_epi32(vMaxY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value));
}
//////////////////////////////////////////////////////////////////////////
@@ -1940,27 +1910,27 @@ void BinTriangles(
// determine if triangle falls between pixel centers and discard
// only discard for non-MSAA case and when conservative rast is disabled
- // (left + 127) & ~255
- // (right + 128) & ~255
+ // (xmin + 127) & ~255
+ // (xmax + 128) & ~255
if(rastState.sampleCount == SWR_MULTISAMPLE_1X && (!CT::IsConservativeT::value))
{
origTriMask = triMask;
int cullCenterMask;
{
- simdscalari left = _simd_add_epi32(bbox.left, _simd_set1_epi32(127));
- left = _simd_and_si(left, _simd_set1_epi32(~255));
- simdscalari right = _simd_add_epi32(bbox.right, _simd_set1_epi32(128));
- right = _simd_and_si(right, _simd_set1_epi32(~255));
+ simdscalari xmin = _simd_add_epi32(bbox.xmin, _simd_set1_epi32(127));
+ xmin = _simd_and_si(xmin, _simd_set1_epi32(~255));
+ simdscalari xmax = _simd_add_epi32(bbox.xmax, _simd_set1_epi32(128));
+ xmax = _simd_and_si(xmax, _simd_set1_epi32(~255));
- simdscalari vMaskH = _simd_cmpeq_epi32(left, right);
+ simdscalari vMaskH = _simd_cmpeq_epi32(xmin, xmax);
- simdscalari top = _simd_add_epi32(bbox.top, _simd_set1_epi32(127));
- top = _simd_and_si(top, _simd_set1_epi32(~255));
- simdscalari bottom = _simd_add_epi32(bbox.bottom, _simd_set1_epi32(128));
- bottom = _simd_and_si(bottom, _simd_set1_epi32(~255));
+ simdscalari ymin = _simd_add_epi32(bbox.ymin, _simd_set1_epi32(127));
+ ymin = _simd_and_si(ymin, _simd_set1_epi32(~255));
+ simdscalari ymax = _simd_add_epi32(bbox.ymax, _simd_set1_epi32(128));
+ ymax = _simd_and_si(ymax, _simd_set1_epi32(~255));
- simdscalari vMaskV = _simd_cmpeq_epi32(top, bottom);
+ simdscalari vMaskV = _simd_cmpeq_epi32(ymin, ymax);
vMaskV = _simd_or_si(vMaskH, vMaskV);
cullCenterMask = _simd_movemask_ps(_simd_castsi_ps(vMaskV));
}
@@ -1973,26 +1943,26 @@ void BinTriangles(
}
}
- // Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since right/bottom edge is exclusive.
- bbox.left = _simd_max_epi32(bbox.left, _simd_set1_epi32(state.scissorInFixedPoint.left));
- bbox.top = _simd_max_epi32(bbox.top, _simd_set1_epi32(state.scissorInFixedPoint.top));
- bbox.right = _simd_min_epi32(_simd_sub_epi32(bbox.right, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.right));
- bbox.bottom = _simd_min_epi32(_simd_sub_epi32(bbox.bottom, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.bottom));
+ // Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since xmax/ymax edge is exclusive.
+ bbox.xmin = _simd_max_epi32(bbox.xmin, _simd_set1_epi32(state.scissorInFixedPoint.xmin));
+ bbox.ymin = _simd_max_epi32(bbox.ymin, _simd_set1_epi32(state.scissorInFixedPoint.ymin));
+ bbox.xmax = _simd_min_epi32(_simd_sub_epi32(bbox.xmax, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.xmax));
+ bbox.ymax = _simd_min_epi32(_simd_sub_epi32(bbox.ymax, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.ymax));
if(CT::IsConservativeT::value)
{
// in the case where a degenerate triangle is on a scissor edge, we need to make sure the primitive bbox has
- // some area. Bump the right/bottom edges out
- simdscalari topEqualsBottom = _simd_cmpeq_epi32(bbox.top, bbox.bottom);
- bbox.bottom = _simd_blendv_epi32(bbox.bottom, _simd_add_epi32(bbox.bottom, _simd_set1_epi32(1)), topEqualsBottom);
- simdscalari leftEqualsRight = _simd_cmpeq_epi32(bbox.left, bbox.right);
- bbox.right = _simd_blendv_epi32(bbox.right, _simd_add_epi32(bbox.right, _simd_set1_epi32(1)), leftEqualsRight);
+ // some area. Bump the xmax/ymax edges out
+ simdscalari topEqualsBottom = _simd_cmpeq_epi32(bbox.ymin, bbox.ymax);
+ bbox.ymax = _simd_blendv_epi32(bbox.ymax, _simd_add_epi32(bbox.ymax, _simd_set1_epi32(1)), topEqualsBottom);
+ simdscalari leftEqualsRight = _simd_cmpeq_epi32(bbox.xmin, bbox.xmax);
+ bbox.xmax = _simd_blendv_epi32(bbox.xmax, _simd_add_epi32(bbox.xmax, _simd_set1_epi32(1)), leftEqualsRight);
}
// Cull tris completely outside scissor
{
- simdscalari maskOutsideScissorX = _simd_cmpgt_epi32(bbox.left, bbox.right);
- simdscalari maskOutsideScissorY = _simd_cmpgt_epi32(bbox.top, bbox.bottom);
+ simdscalari maskOutsideScissorX = _simd_cmpgt_epi32(bbox.xmin, bbox.xmax);
+ simdscalari maskOutsideScissorY = _simd_cmpgt_epi32(bbox.ymin, bbox.ymax);
simdscalari maskOutsideScissorXY = _simd_or_si(maskOutsideScissorX, maskOutsideScissorY);
uint32_t maskOutsideScissor = _simd_movemask_ps(_simd_castsi_ps(maskOutsideScissorXY));
triMask = triMask & ~maskOutsideScissor;
@@ -2004,16 +1974,16 @@ void BinTriangles(
}
// Convert triangle bbox to macrotile units.
- bbox.left = _simd_srai_epi32(bbox.left, KNOB_MACROTILE_X_DIM_FIXED_SHIFT);
- bbox.top = _simd_srai_epi32(bbox.top, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT);
- bbox.right = _simd_srai_epi32(bbox.right, KNOB_MACROTILE_X_DIM_FIXED_SHIFT);
- bbox.bottom = _simd_srai_epi32(bbox.bottom, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT);
+ bbox.xmin = _simd_srai_epi32(bbox.xmin, KNOB_MACROTILE_X_DIM_FIXED_SHIFT);
+ bbox.ymin = _simd_srai_epi32(bbox.ymin, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT);
+ bbox.xmax = _simd_srai_epi32(bbox.xmax, KNOB_MACROTILE_X_DIM_FIXED_SHIFT);
+ bbox.ymax = _simd_srai_epi32(bbox.ymax, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT);
OSALIGNSIMD(uint32_t) aMTLeft[KNOB_SIMD_WIDTH], aMTRight[KNOB_SIMD_WIDTH], aMTTop[KNOB_SIMD_WIDTH], aMTBottom[KNOB_SIMD_WIDTH];
- _simd_store_si((simdscalari*)aMTLeft, bbox.left);
- _simd_store_si((simdscalari*)aMTRight, bbox.right);
- _simd_store_si((simdscalari*)aMTTop, bbox.top);
- _simd_store_si((simdscalari*)aMTBottom, bbox.bottom);
+ _simd_store_si((simdscalari*)aMTLeft, bbox.xmin);
+ _simd_store_si((simdscalari*)aMTRight, bbox.xmax);
+ _simd_store_si((simdscalari*)aMTTop, bbox.ymin);
+ _simd_store_si((simdscalari*)aMTBottom, bbox.ymax);
// transpose verts needed for backend
/// @todo modify BE to take non-transformed verts
@@ -2196,11 +2166,11 @@ void BinPoints(
if (CanUseSimplePoints(pDC))
{
- // adjust for top-left rule
+ // adjust for ymin-xmin rule
vXi = _simd_sub_epi32(vXi, _simd_set1_epi32(1));
vYi = _simd_sub_epi32(vYi, _simd_set1_epi32(1));
- // cull points off the top-left edge of the viewport
+ // cull points off the ymin-xmin edge of the viewport
primMask &= ~_simd_movemask_ps(_simd_castsi_ps(vXi));
primMask &= ~_simd_movemask_ps(_simd_castsi_ps(vYi));
@@ -2325,40 +2295,40 @@ void BinPoints(
// bloat point to bbox
simdBBox bbox;
- bbox.left = bbox.right = vXi;
- bbox.top = bbox.bottom = vYi;
+ bbox.xmin = bbox.xmax = vXi;
+ bbox.ymin = bbox.ymax = vYi;
simdscalar vHalfWidth = _simd_mul_ps(vPointSize, _simd_set1_ps(0.5f));
simdscalari vHalfWidthi = fpToFixedPointVertical(vHalfWidth);
- bbox.left = _simd_sub_epi32(bbox.left, vHalfWidthi);
- bbox.right = _simd_add_epi32(bbox.right, vHalfWidthi);
- bbox.top = _simd_sub_epi32(bbox.top, vHalfWidthi);
- bbox.bottom = _simd_add_epi32(bbox.bottom, vHalfWidthi);
+ bbox.xmin = _simd_sub_epi32(bbox.xmin, vHalfWidthi);
+ bbox.xmax = _simd_add_epi32(bbox.xmax, vHalfWidthi);
+ bbox.ymin = _simd_sub_epi32(bbox.ymin, vHalfWidthi);
+ bbox.ymax = _simd_add_epi32(bbox.ymax, vHalfWidthi);
- // Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since right/bottom edge is exclusive.
- bbox.left = _simd_max_epi32(bbox.left, _simd_set1_epi32(state.scissorInFixedPoint.left));
- bbox.top = _simd_max_epi32(bbox.top, _simd_set1_epi32(state.scissorInFixedPoint.top));
- bbox.right = _simd_min_epi32(_simd_sub_epi32(bbox.right, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.right));
- bbox.bottom = _simd_min_epi32(_simd_sub_epi32(bbox.bottom, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.bottom));
+ // Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since xmax/ymax edge is exclusive.
+ bbox.xmin = _simd_max_epi32(bbox.xmin, _simd_set1_epi32(state.scissorInFixedPoint.xmin));
+ bbox.ymin = _simd_max_epi32(bbox.ymin, _simd_set1_epi32(state.scissorInFixedPoint.ymin));
+ bbox.xmax = _simd_min_epi32(_simd_sub_epi32(bbox.xmax, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.xmax));
+ bbox.ymax = _simd_min_epi32(_simd_sub_epi32(bbox.ymax, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.ymax));
// Cull bloated points completely outside scissor
- simdscalari maskOutsideScissorX = _simd_cmpgt_epi32(bbox.left, bbox.right);
- simdscalari maskOutsideScissorY = _simd_cmpgt_epi32(bbox.top, bbox.bottom);
+ simdscalari maskOutsideScissorX = _simd_cmpgt_epi32(bbox.xmin, bbox.xmax);
+ simdscalari maskOutsideScissorY = _simd_cmpgt_epi32(bbox.ymin, bbox.ymax);
simdscalari maskOutsideScissorXY = _simd_or_si(maskOutsideScissorX, maskOutsideScissorY);
uint32_t maskOutsideScissor = _simd_movemask_ps(_simd_castsi_ps(maskOutsideScissorXY));
primMask = primMask & ~maskOutsideScissor;
// Convert bbox to macrotile units.
- bbox.left = _simd_srai_epi32(bbox.left, KNOB_MACROTILE_X_DIM_FIXED_SHIFT);
- bbox.top = _simd_srai_epi32(bbox.top, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT);
- bbox.right = _simd_srai_epi32(bbox.right, KNOB_MACROTILE_X_DIM_FIXED_SHIFT);
- bbox.bottom = _simd_srai_epi32(bbox.bottom, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT);
+ bbox.xmin = _simd_srai_epi32(bbox.xmin, KNOB_MACROTILE_X_DIM_FIXED_SHIFT);
+ bbox.ymin = _simd_srai_epi32(bbox.ymin, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT);
+ bbox.xmax = _simd_srai_epi32(bbox.xmax, KNOB_MACROTILE_X_DIM_FIXED_SHIFT);
+ bbox.ymax = _simd_srai_epi32(bbox.ymax, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT);
OSALIGNSIMD(uint32_t) aMTLeft[KNOB_SIMD_WIDTH], aMTRight[KNOB_SIMD_WIDTH], aMTTop[KNOB_SIMD_WIDTH], aMTBottom[KNOB_SIMD_WIDTH];
- _simd_store_si((simdscalari*)aMTLeft, bbox.left);
- _simd_store_si((simdscalari*)aMTRight, bbox.right);
- _simd_store_si((simdscalari*)aMTTop, bbox.top);
- _simd_store_si((simdscalari*)aMTBottom, bbox.bottom);
+ _simd_store_si((simdscalari*)aMTLeft, bbox.xmin);
+ _simd_store_si((simdscalari*)aMTRight, bbox.xmax);
+ _simd_store_si((simdscalari*)aMTTop, bbox.ymin);
+ _simd_store_si((simdscalari*)aMTBottom, bbox.ymax);
// store render target array index
OSALIGNSIMD(uint32_t) aRTAI[KNOB_SIMD_WIDTH];
@@ -2543,35 +2513,35 @@ void BinLines(
// Calc bounding box of lines
simdBBox bbox;
- bbox.left = _simd_min_epi32(vXi[0], vXi[1]);
- bbox.right = _simd_max_epi32(vXi[0], vXi[1]);
- bbox.top = _simd_min_epi32(vYi[0], vYi[1]);
- bbox.bottom = _simd_max_epi32(vYi[0], vYi[1]);
+ bbox.xmin = _simd_min_epi32(vXi[0], vXi[1]);
+ bbox.xmax = _simd_max_epi32(vXi[0], vXi[1]);
+ bbox.ymin = _simd_min_epi32(vYi[0], vYi[1]);
+ bbox.ymax = _simd_max_epi32(vYi[0], vYi[1]);
// bloat bbox by line width along minor axis
simdscalar vHalfWidth = _simd_set1_ps(rastState.lineWidth / 2.0f);
simdscalari vHalfWidthi = fpToFixedPointVertical(vHalfWidth);
simdBBox bloatBox;
- bloatBox.left = _simd_sub_epi32(bbox.left, vHalfWidthi);
- bloatBox.right = _simd_add_epi32(bbox.right, vHalfWidthi);
- bloatBox.top = _simd_sub_epi32(bbox.top, vHalfWidthi);
- bloatBox.bottom = _simd_add_epi32(bbox.bottom, vHalfWidthi);
-
- bbox.left = _simd_blendv_epi32(bbox.left, bloatBox.left, vYmajorMask);
- bbox.right = _simd_blendv_epi32(bbox.right, bloatBox.right, vYmajorMask);
- bbox.top = _simd_blendv_epi32(bloatBox.top, bbox.top, vYmajorMask);
- bbox.bottom = _simd_blendv_epi32(bloatBox.bottom, bbox.bottom, vYmajorMask);
-
- // Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since right/bottom edge is exclusive.
- bbox.left = _simd_max_epi32(bbox.left, _simd_set1_epi32(state.scissorInFixedPoint.left));
- bbox.top = _simd_max_epi32(bbox.top, _simd_set1_epi32(state.scissorInFixedPoint.top));
- bbox.right = _simd_min_epi32(_simd_sub_epi32(bbox.right, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.right));
- bbox.bottom = _simd_min_epi32(_simd_sub_epi32(bbox.bottom, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.bottom));
+ bloatBox.xmin = _simd_sub_epi32(bbox.xmin, vHalfWidthi);
+ bloatBox.xmax = _simd_add_epi32(bbox.xmax, vHalfWidthi);
+ bloatBox.ymin = _simd_sub_epi32(bbox.ymin, vHalfWidthi);
+ bloatBox.ymax = _simd_add_epi32(bbox.ymax, vHalfWidthi);
+
+ bbox.xmin = _simd_blendv_epi32(bbox.xmin, bloatBox.xmin, vYmajorMask);
+ bbox.xmax = _simd_blendv_epi32(bbox.xmax, bloatBox.xmax, vYmajorMask);
+ bbox.ymin = _simd_blendv_epi32(bloatBox.ymin, bbox.ymin, vYmajorMask);
+ bbox.ymax = _simd_blendv_epi32(bloatBox.ymax, bbox.ymax, vYmajorMask);
+
+ // Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since xmax/ymax edge is exclusive.
+ bbox.xmin = _simd_max_epi32(bbox.xmin, _simd_set1_epi32(state.scissorInFixedPoint.xmin));
+ bbox.ymin = _simd_max_epi32(bbox.ymin, _simd_set1_epi32(state.scissorInFixedPoint.ymin));
+ bbox.xmax = _simd_min_epi32(_simd_sub_epi32(bbox.xmax, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.xmax));
+ bbox.ymax = _simd_min_epi32(_simd_sub_epi32(bbox.ymax, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.ymax));
// Cull prims completely outside scissor
{
- simdscalari maskOutsideScissorX = _simd_cmpgt_epi32(bbox.left, bbox.right);
- simdscalari maskOutsideScissorY = _simd_cmpgt_epi32(bbox.top, bbox.bottom);
+ simdscalari maskOutsideScissorX = _simd_cmpgt_epi32(bbox.xmin, bbox.xmax);
+ simdscalari maskOutsideScissorY = _simd_cmpgt_epi32(bbox.ymin, bbox.ymax);
simdscalari maskOutsideScissorXY = _simd_or_si(maskOutsideScissorX, maskOutsideScissorY);
uint32_t maskOutsideScissor = _simd_movemask_ps(_simd_castsi_ps(maskOutsideScissorXY));
primMask = primMask & ~maskOutsideScissor;
@@ -2583,16 +2553,16 @@ void BinLines(
}
// Convert triangle bbox to macrotile units.
- bbox.left = _simd_srai_epi32(bbox.left, KNOB_MACROTILE_X_DIM_FIXED_SHIFT);
- bbox.top = _simd_srai_epi32(bbox.top, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT);
- bbox.right = _simd_srai_epi32(bbox.right, KNOB_MACROTILE_X_DIM_FIXED_SHIFT);
- bbox.bottom = _simd_srai_epi32(bbox.bottom, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT);
+ bbox.xmin = _simd_srai_epi32(bbox.xmin, KNOB_MACROTILE_X_DIM_FIXED_SHIFT);
+ bbox.ymin = _simd_srai_epi32(bbox.ymin, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT);
+ bbox.xmax = _simd_srai_epi32(bbox.xmax, KNOB_MACROTILE_X_DIM_FIXED_SHIFT);
+ bbox.ymax = _simd_srai_epi32(bbox.ymax, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT);
OSALIGNSIMD(uint32_t) aMTLeft[KNOB_SIMD_WIDTH], aMTRight[KNOB_SIMD_WIDTH], aMTTop[KNOB_SIMD_WIDTH], aMTBottom[KNOB_SIMD_WIDTH];
- _simd_store_si((simdscalari*)aMTLeft, bbox.left);
- _simd_store_si((simdscalari*)aMTRight, bbox.right);
- _simd_store_si((simdscalari*)aMTTop, bbox.top);
- _simd_store_si((simdscalari*)aMTBottom, bbox.bottom);
+ _simd_store_si((simdscalari*)aMTLeft, bbox.xmin);
+ _simd_store_si((simdscalari*)aMTRight, bbox.xmax);
+ _simd_store_si((simdscalari*)aMTTop, bbox.ymin);
+ _simd_store_si((simdscalari*)aMTBottom, bbox.ymax);
// transpose verts needed for backend
/// @todo modify BE to take non-transformed verts
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.h b/src/gallium/drivers/swr/rasterizer/core/frontend.h
index 367d1998b91..6316156bfd0 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.h
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.h
@@ -240,7 +240,7 @@ void viewportTransform(simdvector *v, const SWR_VIEWPORT_MATRICES & vpMatrices,
}
INLINE
-void calcBoundingBoxInt(const __m128i &vX, const __m128i &vY, BBOX &bbox)
+void calcBoundingBoxInt(const __m128i &vX, const __m128i &vY, SWR_RECT &bbox)
{
// Need horizontal fp min here
__m128i vX1 = _mm_shuffle_epi32(vX, _MM_SHUFFLE(3, 2, 0, 1));
@@ -262,10 +262,10 @@ void calcBoundingBoxInt(const __m128i &vX, const __m128i &vY, BBOX &bbox)
__m128i vMaxY = _mm_max_epi32(vY, vY1);
vMaxY = _mm_max_epi32(vMaxY, vY2);
- bbox.left = _mm_extract_epi32(vMinX, 0);
- bbox.right = _mm_extract_epi32(vMaxX, 0);
- bbox.top = _mm_extract_epi32(vMinY, 0);
- bbox.bottom = _mm_extract_epi32(vMaxY, 0);
+ bbox.xmin = _mm_extract_epi32(vMinX, 0);
+ bbox.xmax = _mm_extract_epi32(vMaxX, 0);
+ bbox.ymin = _mm_extract_epi32(vMinY, 0);
+ bbox.ymax = _mm_extract_epi32(vMaxY, 0);
}
INLINE
diff --git a/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp b/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp
index 768b58a1d97..9a8d062818d 100644
--- a/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp
@@ -591,7 +591,7 @@ INLINE void UpdateEdgeMasks<SingleSampleT>(const __m256d(&)[3], const __m256d* v
template <typename RasterScissorEdgesT, typename IsConservativeT, typename RT>
struct ComputeScissorEdges
{
- INLINE ComputeScissorEdges(const BBOX &triBBox, const BBOX &scissorBBox, const int32_t x, const int32_t y,
+ INLINE ComputeScissorEdges(const SWR_RECT &triBBox, const SWR_RECT &scissorBBox, const int32_t x, const int32_t y,
EDGE (&rastEdges)[RT::NumEdgesT::value], __m256d (&vEdgeFix16)[7]){};
};
@@ -604,20 +604,20 @@ struct ComputeScissorEdges<std::true_type, std::true_type, RT>
//////////////////////////////////////////////////////////////////////////
/// @brief Intersect tri bbox with scissor, compute scissor edge vectors,
/// evaluate edge equations and offset them away from pixel center.
- INLINE ComputeScissorEdges(const BBOX &triBBox, const BBOX &scissorBBox, const int32_t x, const int32_t y,
+ INLINE ComputeScissorEdges(const SWR_RECT &triBBox, const SWR_RECT &scissorBBox, const int32_t x, const int32_t y,
EDGE (&rastEdges)[RT::NumEdgesT::value], __m256d (&vEdgeFix16)[7])
{
// if conservative rasterizing, triangle bbox intersected with scissor bbox is used
- BBOX scissor;
- scissor.left = std::max(triBBox.left, scissorBBox.left);
- scissor.right = std::min(triBBox.right, scissorBBox.right);
- scissor.top = std::max(triBBox.top, scissorBBox.top);
- scissor.bottom = std::min(triBBox.bottom, scissorBBox.bottom);
+ SWR_RECT scissor;
+ scissor.xmin = std::max(triBBox.xmin, scissorBBox.xmin);
+ scissor.xmax = std::min(triBBox.xmax, scissorBBox.xmax);
+ scissor.ymin = std::max(triBBox.ymin, scissorBBox.ymin);
+ scissor.ymax = std::min(triBBox.ymax, scissorBBox.ymax);
- POS topLeft{scissor.left, scissor.top};
- POS bottomLeft{scissor.left, scissor.bottom};
- POS topRight{scissor.right, scissor.top};
- POS bottomRight{scissor.right, scissor.bottom};
+ POS topLeft{scissor.xmin, scissor.ymin};
+ POS bottomLeft{scissor.xmin, scissor.ymax};
+ POS topRight{scissor.xmax, scissor.ymin};
+ POS bottomRight{scissor.xmax, scissor.ymax};
// construct 4 scissor edges in ccw direction
ComputeEdgeData(topLeft, bottomLeft, rastEdges[3]);
@@ -625,10 +625,10 @@ struct ComputeScissorEdges<std::true_type, std::true_type, RT>
ComputeEdgeData(bottomRight, topRight, rastEdges[5]);
ComputeEdgeData(topRight, topLeft, rastEdges[6]);
- vEdgeFix16[3] = _mm256_set1_pd((rastEdges[3].a * (x - scissor.left)) + (rastEdges[3].b * (y - scissor.top)));
- vEdgeFix16[4] = _mm256_set1_pd((rastEdges[4].a * (x - scissor.left)) + (rastEdges[4].b * (y - scissor.bottom)));
- vEdgeFix16[5] = _mm256_set1_pd((rastEdges[5].a * (x - scissor.right)) + (rastEdges[5].b * (y - scissor.bottom)));
- vEdgeFix16[6] = _mm256_set1_pd((rastEdges[6].a * (x - scissor.right)) + (rastEdges[6].b * (y - scissor.top)));
+ vEdgeFix16[3] = _mm256_set1_pd((rastEdges[3].a * (x - scissor.xmin)) + (rastEdges[3].b * (y - scissor.ymin)));
+ vEdgeFix16[4] = _mm256_set1_pd((rastEdges[4].a * (x - scissor.xmin)) + (rastEdges[4].b * (y - scissor.ymax)));
+ vEdgeFix16[5] = _mm256_set1_pd((rastEdges[5].a * (x - scissor.xmax)) + (rastEdges[5].b * (y - scissor.ymax)));
+ vEdgeFix16[6] = _mm256_set1_pd((rastEdges[6].a * (x - scissor.xmax)) + (rastEdges[6].b * (y - scissor.ymin)));
// if conservative rasterizing, need to bump the scissor edges out by the conservative uncertainty distance, else do nothing
adjustScissorEdge<RT>(rastEdges[3].a, rastEdges[3].b, vEdgeFix16[3]);
@@ -647,14 +647,14 @@ struct ComputeScissorEdges<std::true_type, std::false_type, RT>
{
//////////////////////////////////////////////////////////////////////////
/// @brief Compute scissor edge vectors and evaluate edge equations
- INLINE ComputeScissorEdges(const BBOX &, const BBOX &scissorBBox, const int32_t x, const int32_t y,
+ INLINE ComputeScissorEdges(const SWR_RECT &, const SWR_RECT &scissorBBox, const int32_t x, const int32_t y,
EDGE (&rastEdges)[RT::NumEdgesT::value], __m256d (&vEdgeFix16)[7])
{
- const BBOX &scissor = scissorBBox;
- POS topLeft{scissor.left, scissor.top};
- POS bottomLeft{scissor.left, scissor.bottom};
- POS topRight{scissor.right, scissor.top};
- POS bottomRight{scissor.right, scissor.bottom};
+ const SWR_RECT &scissor = scissorBBox;
+ POS topLeft{scissor.xmin, scissor.ymin};
+ POS bottomLeft{scissor.xmin, scissor.ymax};
+ POS topRight{scissor.xmax, scissor.ymin};
+ POS bottomRight{scissor.xmax, scissor.ymax};
// construct 4 scissor edges in ccw direction
ComputeEdgeData(topLeft, bottomLeft, rastEdges[3]);
@@ -662,10 +662,10 @@ struct ComputeScissorEdges<std::true_type, std::false_type, RT>
ComputeEdgeData(bottomRight, topRight, rastEdges[5]);
ComputeEdgeData(topRight, topLeft, rastEdges[6]);
- vEdgeFix16[3] = _mm256_set1_pd((rastEdges[3].a * (x - scissor.left)) + (rastEdges[3].b * (y - scissor.top)));
- vEdgeFix16[4] = _mm256_set1_pd((rastEdges[4].a * (x - scissor.left)) + (rastEdges[4].b * (y - scissor.bottom)));
- vEdgeFix16[5] = _mm256_set1_pd((rastEdges[5].a * (x - scissor.right)) + (rastEdges[5].b * (y - scissor.bottom)));
- vEdgeFix16[6] = _mm256_set1_pd((rastEdges[6].a * (x - scissor.right)) + (rastEdges[6].b * (y - scissor.top)));
+ vEdgeFix16[3] = _mm256_set1_pd((rastEdges[3].a * (x - scissor.xmin)) + (rastEdges[3].b * (y - scissor.ymin)));
+ vEdgeFix16[4] = _mm256_set1_pd((rastEdges[4].a * (x - scissor.xmin)) + (rastEdges[4].b * (y - scissor.ymax)));
+ vEdgeFix16[5] = _mm256_set1_pd((rastEdges[5].a * (x - scissor.xmax)) + (rastEdges[5].b * (y - scissor.ymax)));
+ vEdgeFix16[6] = _mm256_set1_pd((rastEdges[6].a * (x - scissor.xmax)) + (rastEdges[6].b * (y - scissor.ymin)));
}
};
@@ -964,23 +964,23 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
triDesc.Z[2] += ComputeDepthBias(&rastState, &triDesc, workDesc.pTriBuffer + 8);
// Calc bounding box of triangle
- OSALIGNSIMD(BBOX) bbox;
+ OSALIGNSIMD(SWR_RECT) bbox;
calcBoundingBoxInt(vXi, vYi, bbox);
if(RT::ValidEdgeMaskT::value != ALL_EDGES_VALID)
{
// If we're rasterizing a degenerate triangle, expand bounding box to guarantee the BBox is valid
- bbox.left--; bbox.right++; bbox.top--; bbox.bottom++;
- SWR_ASSERT(state.scissorInFixedPoint.left >= 0 && state.scissorInFixedPoint.top >= 0,
+ bbox.xmin--; bbox.xmax++; bbox.ymin--; bbox.ymax++;
+ SWR_ASSERT(state.scissorInFixedPoint.xmin >= 0 && state.scissorInFixedPoint.ymin >= 0,
"Conservative rast degenerate handling requires a valid scissor rect");
}
// Intersect with scissor/viewport
- OSALIGNSIMD(BBOX) intersect;
- intersect.left = std::max(bbox.left, state.scissorInFixedPoint.left);
- intersect.right = std::min(bbox.right - 1, state.scissorInFixedPoint.right);
- intersect.top = std::max(bbox.top, state.scissorInFixedPoint.top);
- intersect.bottom = std::min(bbox.bottom - 1, state.scissorInFixedPoint.bottom);
+ OSALIGNSIMD(SWR_RECT) intersect;
+ intersect.xmin = std::max(bbox.xmin, state.scissorInFixedPoint.xmin);
+ intersect.xmax = std::min(bbox.xmax - 1, state.scissorInFixedPoint.xmax);
+ intersect.ymin = std::max(bbox.ymin, state.scissorInFixedPoint.ymin);
+ intersect.ymax = std::min(bbox.ymax - 1, state.scissorInFixedPoint.ymax);
triDesc.triFlags = workDesc.triFlags;
@@ -992,20 +992,20 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
int32_t macroBoxTop = macroY * KNOB_MACROTILE_Y_DIM_FIXED;
int32_t macroBoxBottom = macroBoxTop + KNOB_MACROTILE_Y_DIM_FIXED - 1;
- intersect.left = std::max(intersect.left, macroBoxLeft);
- intersect.top = std::max(intersect.top, macroBoxTop);
- intersect.right = std::min(intersect.right, macroBoxRight);
- intersect.bottom = std::min(intersect.bottom, macroBoxBottom);
+ intersect.xmin = std::max(intersect.xmin, macroBoxLeft);
+ intersect.ymin = std::max(intersect.ymin, macroBoxTop);
+ intersect.xmax = std::min(intersect.xmax, macroBoxRight);
+ intersect.ymax = std::min(intersect.ymax, macroBoxBottom);
- SWR_ASSERT(intersect.left <= intersect.right && intersect.top <= intersect.bottom && intersect.left >= 0 && intersect.right >= 0 && intersect.top >= 0 && intersect.bottom >= 0);
+ SWR_ASSERT(intersect.xmin <= intersect.xmax && intersect.ymin <= intersect.ymax && intersect.xmin >= 0 && intersect.xmax >= 0 && intersect.ymin >= 0 && intersect.ymax >= 0);
RDTSC_STOP(BETriangleSetup, 0, pDC->drawId);
// update triangle desc
- uint32_t minTileX = intersect.left >> (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT);
- uint32_t minTileY = intersect.top >> (KNOB_TILE_Y_DIM_SHIFT + FIXED_POINT_SHIFT);
- uint32_t maxTileX = intersect.right >> (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT);
- uint32_t maxTileY = intersect.bottom >> (KNOB_TILE_Y_DIM_SHIFT + FIXED_POINT_SHIFT);
+ uint32_t minTileX = intersect.xmin >> (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT);
+ uint32_t minTileY = intersect.ymin >> (KNOB_TILE_Y_DIM_SHIFT + FIXED_POINT_SHIFT);
+ uint32_t maxTileX = intersect.xmax >> (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT);
+ uint32_t maxTileY = intersect.ymax >> (KNOB_TILE_Y_DIM_SHIFT + FIXED_POINT_SHIFT);
uint32_t numTilesX = maxTileX - minTileX + 1;
uint32_t numTilesY = maxTileY - minTileY + 1;
@@ -1020,8 +1020,8 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
// Step to pixel center of top-left pixel of the triangle bbox
// Align intersect bbox (top/left) to raster tile's (top/left).
- int32_t x = AlignDown(intersect.left, (FIXED_POINT_SCALE * KNOB_TILE_X_DIM));
- int32_t y = AlignDown(intersect.top, (FIXED_POINT_SCALE * KNOB_TILE_Y_DIM));
+ int32_t x = AlignDown(intersect.xmin, (FIXED_POINT_SCALE * KNOB_TILE_X_DIM));
+ int32_t y = AlignDown(intersect.ymin, (FIXED_POINT_SCALE * KNOB_TILE_Y_DIM));
// convenience typedef
typedef typename RT::NumRasterSamplesT NumRasterSamplesT;
@@ -1663,17 +1663,17 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi
// make sure this macrotile intersects the triangle
__m128i vXai = fpToFixedPoint(vXa);
__m128i vYai = fpToFixedPoint(vYa);
- OSALIGNSIMD(BBOX) bboxA;
+ OSALIGNSIMD(SWR_RECT) bboxA;
calcBoundingBoxInt(vXai, vYai, bboxA);
- if (!(bboxA.left > macroBoxRight ||
- bboxA.left > state.scissorInFixedPoint.right ||
- bboxA.right - 1 < macroBoxLeft ||
- bboxA.right - 1 < state.scissorInFixedPoint.left ||
- bboxA.top > macroBoxBottom ||
- bboxA.top > state.scissorInFixedPoint.bottom ||
- bboxA.bottom - 1 < macroBoxTop ||
- bboxA.bottom - 1 < state.scissorInFixedPoint.top)) {
+ if (!(bboxA.xmin > macroBoxRight ||
+ bboxA.xmin > state.scissorInFixedPoint.xmax ||
+ bboxA.xmax - 1 < macroBoxLeft ||
+ bboxA.xmax - 1 < state.scissorInFixedPoint.xmin ||
+ bboxA.ymin > macroBoxBottom ||
+ bboxA.ymin > state.scissorInFixedPoint.ymax ||
+ bboxA.ymax - 1 < macroBoxTop ||
+ bboxA.ymax - 1 < state.scissorInFixedPoint.ymin)) {
// rasterize triangle
pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc);
}
@@ -1739,14 +1739,14 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi
vYai = fpToFixedPoint(vYa);
calcBoundingBoxInt(vXai, vYai, bboxA);
- if (!(bboxA.left > macroBoxRight ||
- bboxA.left > state.scissorInFixedPoint.right ||
- bboxA.right - 1 < macroBoxLeft ||
- bboxA.right - 1 < state.scissorInFixedPoint.left ||
- bboxA.top > macroBoxBottom ||
- bboxA.top > state.scissorInFixedPoint.bottom ||
- bboxA.bottom - 1 < macroBoxTop ||
- bboxA.bottom - 1 < state.scissorInFixedPoint.top)) {
+ if (!(bboxA.xmin > macroBoxRight ||
+ bboxA.xmin > state.scissorInFixedPoint.xmax ||
+ bboxA.xmax - 1 < macroBoxLeft ||
+ bboxA.xmax - 1 < state.scissorInFixedPoint.xmin ||
+ bboxA.ymin > macroBoxBottom ||
+ bboxA.ymin > state.scissorInFixedPoint.ymax ||
+ bboxA.ymax - 1 < macroBoxTop ||
+ bboxA.ymax - 1 < state.scissorInFixedPoint.ymin)) {
// rasterize triangle
pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc);
}
diff --git a/src/gallium/drivers/swr/rasterizer/core/utils.h b/src/gallium/drivers/swr/rasterizer/core/utils.h
index 0a9430af630..00878925359 100644
--- a/src/gallium/drivers/swr/rasterizer/core/utils.h
+++ b/src/gallium/drivers/swr/rasterizer/core/utils.h
@@ -33,6 +33,7 @@
#include "common/os.h"
#include "common/simdintrin.h"
#include "common/swr_assert.h"
+#include "core/api.h"
#if defined(_WIN64) || defined(__x86_64__)
#define _MM_INSERT_EPI64 _mm_insert_epi64
@@ -74,53 +75,12 @@ INLINE __m128i _MM_INSERT_EPI64(__m128i a, int64_t b, const int32_t ndx)
}
#endif
-OSALIGNLINE(struct) BBOX
-{
- int top{ 0 };
- int bottom{ 0 };
- int left{ 0 };
- int right{ 0 };
-
- BBOX() {}
- BBOX(int t, int b, int l, int r) : top(t), bottom(b), left(l), right(r) {}
-
- bool operator==(const BBOX& rhs)
- {
- return (this->top == rhs.top &&
- this->bottom == rhs.bottom &&
- this->left == rhs.left &&
- this->right == rhs.right);
- }
-
- bool operator!=(const BBOX& rhs)
- {
- return !(*this == rhs);
- }
-
- BBOX& Intersect(const BBOX& other)
- {
- this->top = std::max(this->top, other.top);
- this->bottom = std::min(this->bottom, other.bottom);
- this->left = std::max(this->left, other.left);
- this->right = std::min(this->right, other.right);
-
- if (right - left < 0 ||
- bottom - top < 0)
- {
- // Zero area
- top = bottom = left = right = 0;
- }
-
- return *this;
- }
-};
-
struct simdBBox
{
- simdscalari top;
- simdscalari bottom;
- simdscalari left;
- simdscalari right;
+ simdscalari ymin;
+ simdscalari ymax;
+ simdscalari xmin;
+ simdscalari xmax;
};
INLINE
diff --git a/src/gallium/drivers/swr/swr_clear.cpp b/src/gallium/drivers/swr/swr_clear.cpp
index 4b02dd1fb12..a65f8f4918d 100644
--- a/src/gallium/drivers/swr/swr_clear.cpp
+++ b/src/gallium/drivers/swr/swr_clear.cpp
@@ -67,17 +67,9 @@ swr_clear(struct pipe_context *pipe,
((union pipe_color_union *)color)->f[3] = 1.0; /* cast off your const'd-ness */
#endif
- /* Reset viewport to full framebuffer width/height before clear, then
- * restore it */
- /* Scissor affects clear, viewport should not */
- ctx->dirty |= SWR_NEW_VIEWPORT;
- SWR_VIEWPORT vp = {0};
- vp.width = ctx->framebuffer.width;
- vp.height = ctx->framebuffer.height;
- SwrSetViewports(ctx->swrContext, 1, &vp, NULL);
-
swr_update_draw_context(ctx);
- SwrClearRenderTarget(ctx->swrContext, clearMask, color->f, depth, stencil);
+ SwrClearRenderTarget(ctx->swrContext, clearMask, color->f, depth, stencil,
+ ctx->swr_scissor);
}
diff --git a/src/gallium/drivers/swr/swr_context.h b/src/gallium/drivers/swr/swr_context.h
index b4553fb171b..6854d697db9 100644
--- a/src/gallium/drivers/swr/swr_context.h
+++ b/src/gallium/drivers/swr/swr_context.h
@@ -121,6 +121,7 @@ struct swr_context {
struct pipe_framebuffer_state framebuffer;
struct pipe_poly_stipple poly_stipple;
struct pipe_scissor_state scissor;
+ SWR_RECT swr_scissor;
struct pipe_sampler_view *
sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_SAMPLER_VIEWS];
diff --git a/src/gallium/drivers/swr/swr_draw.cpp b/src/gallium/drivers/swr/swr_draw.cpp
index 0f6a8c6d88c..a46aef924ab 100644
--- a/src/gallium/drivers/swr/swr_draw.cpp
+++ b/src/gallium/drivers/swr/swr_draw.cpp
@@ -258,37 +258,13 @@ swr_store_render_target(struct pipe_context *pipe,
/* Only proceed if there's a valid surface to store to */
if (renderTarget->pBaseAddress) {
- /* Set viewport to full renderTarget width/height and disable scissor
- * before StoreTiles */
- boolean change_viewport =
- (ctx->derived.vp.x != 0.0f || ctx->derived.vp.y != 0.0f
- || ctx->derived.vp.width != renderTarget->width
- || ctx->derived.vp.height != renderTarget->height);
- if (change_viewport) {
- SWR_VIEWPORT vp = {0};
- vp.width = renderTarget->width;
- vp.height = renderTarget->height;
- SwrSetViewports(ctx->swrContext, 1, &vp, NULL);
- }
-
- boolean scissor_enable = ctx->derived.rastState.scissorEnable;
- if (scissor_enable) {
- ctx->derived.rastState.scissorEnable = FALSE;
- SwrSetRastState(ctx->swrContext, &ctx->derived.rastState);
- }
-
swr_update_draw_context(ctx);
+ SWR_RECT full_rect =
+ {0, 0, (int32_t)renderTarget->width, (int32_t)renderTarget->height};
SwrStoreTiles(ctx->swrContext,
(enum SWR_RENDERTARGET_ATTACHMENT)attachment,
- post_tile_state);
-
- /* Restore viewport and scissor enable */
- if (change_viewport)
- SwrSetViewports(ctx->swrContext, 1, &ctx->derived.vp, &ctx->derived.vpm);
- if (scissor_enable) {
- ctx->derived.rastState.scissorEnable = scissor_enable;
- SwrSetRastState(ctx->swrContext, &ctx->derived.rastState);
- }
+ post_tile_state,
+ full_rect);
}
}
diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp
index de41ddc704d..4c9a4327e29 100644
--- a/src/gallium/drivers/swr/swr_state.cpp
+++ b/src/gallium/drivers/swr/swr_state.cpp
@@ -575,6 +575,10 @@ swr_set_scissor_states(struct pipe_context *pipe,
struct swr_context *ctx = swr_context(pipe);
ctx->scissor = *scissor;
+ ctx->swr_scissor.xmin = scissor->minx;
+ ctx->swr_scissor.xmax = scissor->maxx;
+ ctx->swr_scissor.ymin = scissor->miny;
+ ctx->swr_scissor.ymax = scissor->maxy;
ctx->dirty |= SWR_NEW_SCISSOR;
}
@@ -930,10 +934,7 @@ swr_update_derived(struct pipe_context *pipe,
/* Scissor */
if (ctx->dirty & SWR_NEW_SCISSOR) {
- pipe_scissor_state *scissor = &ctx->scissor;
- BBOX bbox(scissor->miny, scissor->maxy,
- scissor->minx, scissor->maxx);
- SwrSetScissorRects(ctx->swrContext, 1, &bbox);
+ SwrSetScissorRects(ctx->swrContext, 1, &ctx->swr_scissor);
}
/* Viewport */