diff options
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/api.cpp | 20 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/backend.cpp | 31 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/context.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/depthstencil.h | 40 | ||||
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/core/state.h | 1 |
5 files changed, 81 insertions, 13 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp index 442cdd420f4..9e13ee142a5 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp @@ -33,6 +33,7 @@ #include "core/api.h" #include "core/backend.h" #include "core/context.h" +#include "core/depthstencil.h" #include "core/frontend.h" #include "core/rasterizer.h" #include "core/rdtsc_core.h" @@ -884,6 +885,25 @@ void SetupPipeline(DRAW_CONTEXT *pDC) !pState->state.blendState.renderTarget[rt].writeDisableBlue) ? (1 << rt) : 0; } } + + // Setup depth quantization function + if (pState->state.depthHottileEnable) + { + switch (pState->state.rastState.depthFormat) + { + case R32_FLOAT_X8X24_TYPELESS: pState->state.pfnQuantizeDepth = QuantizeDepth < R32_FLOAT_X8X24_TYPELESS > ; break; + case R32_FLOAT: pState->state.pfnQuantizeDepth = QuantizeDepth < R32_FLOAT > ; break; + case R24_UNORM_X8_TYPELESS: pState->state.pfnQuantizeDepth = QuantizeDepth < R24_UNORM_X8_TYPELESS > ; break; + case R16_UNORM: pState->state.pfnQuantizeDepth = QuantizeDepth < R16_UNORM > ; break; + default: SWR_ASSERT(false, "Unsupported depth format for depth quantiztion."); + pState->state.pfnQuantizeDepth = QuantizeDepth < R32_FLOAT > ; + } + } + else + { + // set up pass-through quantize if depth isn't enabled + pState->state.pfnQuantizeDepth = QuantizeDepth < R32_FLOAT > ; + } } ////////////////////////////////////////////////////////////////////////// diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.cpp b/src/gallium/drivers/swr/rasterizer/core/backend.cpp index ad0a5a07032..842ea326e68 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/backend.cpp @@ -772,8 +772,10 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3 psContext.vOneOverW.centroid = psContext.vOneOverW.center; } - // interpolate z + // interpolate and quantize z psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center); + psContext.vZ = state.pfnQuantizeDepth(psContext.vZ); + RDTSC_STOP(BEBarycentric, 0, 0); simdmask clipCoverageMask = coverageMask & MASK; @@ -793,7 +795,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3 if(CanEarlyZ(pPSState)) { RDTSC_START(BEEarlyDepthTest); - depthPassMask = DepthStencilTest(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing, + depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, psContext.vZ, pDepthBase, vCoverageMask, pStencilBase, &stencilPassMask); RDTSC_STOP(BEEarlyDepthTest, 0, 0); @@ -825,7 +827,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3 if(!CanEarlyZ(pPSState)) { RDTSC_START(BELateDepthTest); - depthPassMask = DepthStencilTest(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing, + depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, psContext.vZ, pDepthBase, vCoverageMask, pStencilBase, &stencilPassMask); RDTSC_STOP(BELateDepthTest, 0, 0); @@ -977,8 +979,9 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ backendFuncs.pfnCalcSampleBarycentrics(coeffs, psContext); - // interpolate z + // interpolate and quantize z psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.sample, psContext.vJ.sample); + psContext.vZ = state.pfnQuantizeDepth(psContext.vZ); RDTSC_STOP(BEBarycentric, 0, 0); @@ -1000,7 +1003,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ if (CanEarlyZ(pPSState)) { RDTSC_START(BEEarlyDepthTest); - depthPassMask = DepthStencilTest(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing, + depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask); RDTSC_STOP(BEEarlyDepthTest, 0, 0); @@ -1033,7 +1036,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ if (!CanEarlyZ(pPSState)) { RDTSC_START(BELateDepthTest); - depthPassMask = DepthStencilTest(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing, + depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask); RDTSC_STOP(BELateDepthTest, 0, 0); @@ -1200,8 +1203,9 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t RDTSC_START(BEBarycentric); backendFuncs.pfnCalcPixelBarycentrics(coeffs, psContext); - // interpolate z + // interpolate and quantize z psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center); + psContext.vZ = state.pfnQuantizeDepth(psContext.vZ); RDTSC_STOP(BEBarycentric, 0, 0); // execute pixel shader @@ -1263,10 +1267,11 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t // calc I & J per sample backendFuncs.pfnCalcSampleBarycentrics(coeffs, psContext); - // interpolate z + // interpolate and quantize z if (!pPSState->writesODepth) { vZ[sample] = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.sample, psContext.vJ.sample); + vZ[sample] = state.pfnQuantizeDepth(vZ[sample]); } ///@todo: perspective correct vs non-perspective correct clipping? @@ -1292,7 +1297,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t // ZTest for this sample RDTSC_START(BEEarlyDepthTest); stencilPassMask[sample] = vCoverageMask[sample]; - depthPassMask[sample] = DepthStencilTest(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing, + depthPassMask[sample] = DepthStencilTest(&state, work.triFlags.frontFacing, vZ[sample], pDepthSample, vCoverageMask[sample], pStencilSample, &stencilPassMask[sample]); RDTSC_STOP(BEEarlyDepthTest, 0, 0); @@ -1308,8 +1313,9 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t { RDTSC_START(BEBarycentric); backendFuncs.pfnCalcPixelBarycentrics(coeffs, psContext); - // interpolate z + // interpolate and quantize z psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center); + psContext.vZ = state.pfnQuantizeDepth(psContext.vZ); RDTSC_STOP(BEBarycentric, 0, 0); // execute pixel shader @@ -1463,8 +1469,9 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, backendFuncs.pfnCalcSampleBarycentrics(coeffs, psContext); - // interpolate z + // interpolate and quantize z psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.sample, psContext.vJ.sample); + psContext.vZ = state.pfnQuantizeDepth(psContext.vZ); RDTSC_STOP(BEBarycentric, 0, 0); @@ -1483,7 +1490,7 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, uint8_t *pStencilSample = pStencilBase + MultisampleTraits<sampleCount>::RasterTileStencilOffset(sample); RDTSC_START(BEEarlyDepthTest); - simdscalar depthPassMask = DepthStencilTest(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing, + simdscalar depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask); DepthStencilWrite(&state.vp[0], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ, pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask); diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h index 39f23372a18..27abe437718 100644 --- a/src/gallium/drivers/swr/rasterizer/core/context.h +++ b/src/gallium/drivers/swr/rasterizer/core/context.h @@ -308,6 +308,8 @@ OSALIGNLINE(struct) API_STATE uint32_t depthHottileEnable: 1; uint32_t stencilHottileEnable : 1; }; + + PFN_QUANTIZE_DEPTH pfnQuantizeDepth; }; class MacroTileMgr; diff --git a/src/gallium/drivers/swr/rasterizer/core/depthstencil.h b/src/gallium/drivers/swr/rasterizer/core/depthstencil.h index 2cc9d4054ac..7b55580bf0a 100644 --- a/src/gallium/drivers/swr/rasterizer/core/depthstencil.h +++ b/src/gallium/drivers/swr/rasterizer/core/depthstencil.h @@ -80,14 +80,52 @@ void StencilOp(SWR_STENCILOP op, simdscalar mask, simdscalar stencilRefps, simds } +template<SWR_FORMAT depthFormatT> +simdscalar QuantizeDepth(simdscalar depth) +{ + SWR_TYPE depthType = FormatTraits<depthFormatT>::GetType(0); + uint32_t depthBpc = FormatTraits<depthFormatT>::GetBPC(0); + + if (depthType == SWR_TYPE_FLOAT) + { + // assume only 32bit float depth supported + SWR_ASSERT(depthBpc == 32); + + // matches shader precision, no quantizing needed + return depth; + } + + // should be unorm depth if not float + SWR_ASSERT(depthType == SWR_TYPE_UNORM); + + float quantize = (float)((1 << depthBpc) - 1); + simdscalar result = _simd_mul_ps(depth, _simd_set1_ps(quantize)); + result = _simd_add_ps(result, _simd_set1_ps(0.5f)); + result = _simd_round_ps(result, _MM_FROUND_TO_ZERO); + + if (depthBpc > 16) + { + result = _simd_div_ps(result, _simd_set1_ps(quantize)); + } + else + { + result = _simd_mul_ps(result, _simd_set1_ps(1.0f / quantize)); + } + + return result; +} + INLINE -simdscalar DepthStencilTest(const SWR_VIEWPORT* pViewport, const SWR_DEPTH_STENCIL_STATE* pDSState, +simdscalar DepthStencilTest(const API_STATE* pState, bool frontFacing, simdscalar interpZ, uint8_t* pDepthBase, simdscalar coverageMask, uint8_t *pStencilBase, simdscalar* pStencilMask) { static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT, "Unsupported depth hot tile format"); static_assert(KNOB_STENCIL_HOT_TILE_FORMAT == R8_UINT, "Unsupported stencil hot tile format"); + const SWR_DEPTH_STENCIL_STATE* pDSState = &pState->depthStencilState; + const SWR_VIEWPORT* pViewport = &pState->vp[0]; + simdscalar depthResult = _simd_set1_ps(-1.0f); simdscalar zbuf; diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h index 5752094ca10..50361068025 100644 --- a/src/gallium/drivers/swr/rasterizer/core/state.h +++ b/src/gallium/drivers/swr/rasterizer/core/state.h @@ -790,6 +790,7 @@ typedef void(__cdecl *PFN_SO_FUNC)(SWR_STREAMOUT_CONTEXT& soContext); typedef void(__cdecl *PFN_PIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext); typedef void(__cdecl *PFN_CPIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext); typedef void(__cdecl *PFN_BLEND_JIT_FUNC)(const SWR_BLEND_STATE*, simdvector&, simdvector&, uint32_t, uint8_t*, simdvector&, simdscalari*, simdscalari*); +typedef simdscalar(*PFN_QUANTIZE_DEPTH)(simdscalar); ////////////////////////////////////////////////////////////////////////// /// FRONTEND_STATE |