summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTomasz Pyra <[email protected]>2019-12-12 15:38:43 +0100
committerJan Zielinski <[email protected]>2019-12-13 10:58:36 +0000
commitb62217780a1a5a7cb7eb940a2b9bf265af1a91a0 (patch)
tree6acaef12a398a89cbdaf51955e2d1fa41e24ceb8
parentb37c91c12eb8fcdf763dbd98b17c33f98c63cae3 (diff)
gallium/swr: Fix arb_transform_feedback2
Added support for pause/resume transform feedback. Fixed DrawTransformFeedback. Reviewed-by: Jan Zielinski <[email protected]> Reviewed-by: Krzysztof Raszkowski <[email protected]>
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/api.cpp13
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/api.h7
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/context.h3
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/frontend.cpp2
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/threads.cpp3
-rw-r--r--src/gallium/drivers/swr/swr_context.cpp13
-rw-r--r--src/gallium/drivers/swr/swr_context.h3
-rw-r--r--src/gallium/drivers/swr/swr_draw.cpp10
-rw-r--r--src/gallium/drivers/swr/swr_screen.cpp4
-rw-r--r--src/gallium/drivers/swr/swr_state.cpp22
10 files changed, 66 insertions, 14 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp
index a6f86b36f98..5405bf2d8ba 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp
@@ -82,6 +82,7 @@ HANDLE SwrCreateContext(SWR_CREATECONTEXT_INFO* pCreateInfo)
pContext->pfnUpdateSoWriteOffset = pCreateInfo->pfnUpdateSoWriteOffset;
pContext->pfnUpdateStats = pCreateInfo->pfnUpdateStats;
pContext->pfnUpdateStatsFE = pCreateInfo->pfnUpdateStatsFE;
+ pContext->pfnUpdateStreamOut = pCreateInfo->pfnUpdateStreamOut;
pContext->hExternalMemory = pCreateInfo->hExternalMemory;
@@ -616,9 +617,17 @@ void SwrSetSoBuffers(HANDLE hContext, SWR_STREAMOUT_BUFFER* pSoBuffer, uint32_t
{
API_STATE* pState = GetDrawState(GetContext(hContext));
- SWR_ASSERT((slot < 4), "There are only 4 SO buffer slots [0, 3]\nSlot requested: %d", slot);
+ SWR_ASSERT((slot < MAX_SO_STREAMS), "There are only 4 SO buffer slots [0, 3]\nSlot requested: %d", slot);
- pState->soBuffer[slot] = *pSoBuffer;
+ // remember buffer status in case of future resume StreamOut
+ if ((pState->soBuffer[slot].pBuffer != 0) && (pSoBuffer->pBuffer == 0))
+ pState->soPausedBuffer[slot] = pState->soBuffer[slot];
+
+ // resume
+ if (pState->soPausedBuffer[slot].pBuffer == pSoBuffer->pBuffer)
+ pState->soBuffer[slot] = pState->soPausedBuffer[slot];
+ else
+ pState->soBuffer[slot] = *pSoBuffer;
}
void SwrSetVertexFunc(HANDLE hContext, PFN_VERTEX_FUNC pfnVertexFunc)
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.h b/src/gallium/drivers/swr/rasterizer/core/api.h
index 93ea0d42535..29651c9beed 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.h
+++ b/src/gallium/drivers/swr/rasterizer/core/api.h
@@ -188,6 +188,12 @@ typedef void(SWR_API* PFN_UPDATE_STATS)(HANDLE hPrivateContext, const SWR_STATS*
typedef void(SWR_API* PFN_UPDATE_STATS_FE)(HANDLE hPrivateContext, const SWR_STATS_FE* pStats);
//////////////////////////////////////////////////////////////////////////
+/// @brief Callback to allow driver to update StreamOut status
+/// @param hPrivateContext - handle to private data
+/// @param numPrims - number of primitives written to StreamOut buffer
+typedef void(SWR_API* PFN_UPDATE_STREAMOUT)(HANDLE hPrivateContext, uint64_t numPrims);
+
+//////////////////////////////////////////////////////////////////////////
/// BucketManager
/// Forward Declaration (see rdtsc_buckets.h for full definition)
/////////////////////////////////////////////////////////////////////////
@@ -272,6 +278,7 @@ struct SWR_CREATECONTEXT_INFO
PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset;
PFN_UPDATE_STATS pfnUpdateStats;
PFN_UPDATE_STATS_FE pfnUpdateStatsFE;
+ PFN_UPDATE_STREAMOUT pfnUpdateStreamOut;
// Pointer to rdtsc buckets mgr returned to the caller.
diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h
index 13cb7c8b856..8f74f135b63 100644
--- a/src/gallium/drivers/swr/rasterizer/core/context.h
+++ b/src/gallium/drivers/swr/rasterizer/core/context.h
@@ -276,6 +276,7 @@ OSALIGNLINE(struct) API_STATE
// Streamout state
SWR_STREAMOUT_STATE soState;
mutable SWR_STREAMOUT_BUFFER soBuffer[MAX_SO_STREAMS];
+ mutable SWR_STREAMOUT_BUFFER soPausedBuffer[MAX_SO_STREAMS];
// Tessellation State
PFN_HS_FUNC pfnHsFunc;
@@ -422,6 +423,7 @@ struct DRAW_DYNAMIC_STATE
SWR_STATS_FE statsFE; // Only one FE thread per DC.
SWR_STATS* pStats;
+ uint64_t soPrims; // number of primitives written to StremOut buffer
};
// Draw Context
@@ -540,6 +542,7 @@ struct SWR_CONTEXT
PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset;
PFN_UPDATE_STATS pfnUpdateStats;
PFN_UPDATE_STATS_FE pfnUpdateStatsFE;
+ PFN_UPDATE_STREAMOUT pfnUpdateStreamOut;
// Global Stats
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
index ab079ab4aa0..45bc545b164 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
@@ -589,6 +589,8 @@ static void StreamOut(
}
}
+ pDC->dynState.soPrims += soContext.numPrimsWritten;
+
UPDATE_STAT_FE(SoPrimStorageNeeded[streamIndex], soContext.numPrimStorageNeeded);
UPDATE_STAT_FE(SoNumPrimsWritten[streamIndex], soContext.numPrimsWritten);
diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
index 987469340d2..113a31ee0c6 100644
--- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
@@ -714,6 +714,9 @@ INLINE void CompleteDrawFE(SWR_CONTEXT* pContext, uint32_t workerId, DRAW_CONTEX
}
}
+ if (pContext->pfnUpdateStreamOut)
+ pContext->pfnUpdateStreamOut(GetPrivateState(pDC), pDC->dynState.soPrims);
+
// Ensure all streaming writes are globally visible before marking this FE done
_mm_mfence();
pDC->doneFE = true;
diff --git a/src/gallium/drivers/swr/swr_context.cpp b/src/gallium/drivers/swr/swr_context.cpp
index 74516c80f9b..dbc4487fbda 100644
--- a/src/gallium/drivers/swr/swr_context.cpp
+++ b/src/gallium/drivers/swr/swr_context.cpp
@@ -472,6 +472,18 @@ swr_UpdateStatsFE(HANDLE hPrivateContext, const SWR_STATS_FE *pStats)
}
}
+static void
+swr_UpdateStreamOut(HANDLE hPrivateContext, uint64_t numPrims)
+{
+ swr_draw_context *pDC = (swr_draw_context*)hPrivateContext;
+
+ if (!pDC)
+ return;
+
+ if (pDC->soPrims)
+ *pDC->soPrims += numPrims;
+}
+
struct pipe_context *
swr_create_context(struct pipe_screen *p_screen, void *priv, unsigned flags)
{
@@ -496,6 +508,7 @@ swr_create_context(struct pipe_screen *p_screen, void *priv, unsigned flags)
createInfo.pfnStoreTile = swr_StoreHotTile;
createInfo.pfnUpdateStats = swr_UpdateStats;
createInfo.pfnUpdateStatsFE = swr_UpdateStatsFE;
+ createInfo.pfnUpdateStreamOut = swr_UpdateStreamOut;
createInfo.pfnMakeGfxPtr = swr_MakeGfxPtr;
SWR_THREADING_INFO threadingInfo {0};
diff --git a/src/gallium/drivers/swr/swr_context.h b/src/gallium/drivers/swr/swr_context.h
index 55de8e04663..82e6a6692f7 100644
--- a/src/gallium/drivers/swr/swr_context.h
+++ b/src/gallium/drivers/swr/swr_context.h
@@ -107,6 +107,8 @@ struct swr_draw_context {
struct swr_query_result *pStats; // @llvm_struct
SWR_INTERFACE *pAPI; // @llvm_struct - Needed for the swr_memory callbacks
SWR_TILE_INTERFACE *pTileAPI; // @llvm_struct - Needed for the swr_memory callbacks
+
+ uint64_t* soPrims; //number of primitives written to StreamOut buffer
};
/* gen_llvm_types FINI */
@@ -160,6 +162,7 @@ struct swr_context {
// streamout
pipe_stream_output_target *so_targets[MAX_SO_STREAMS];
uint32_t num_so_targets;
+ uint64_t so_primCounter; // number of primitives written to StreamOut buffer
/* Temp storage for user_buffer constants */
struct swr_scratch_buffers *scratch;
diff --git a/src/gallium/drivers/swr/swr_draw.cpp b/src/gallium/drivers/swr/swr_draw.cpp
index a94cdd6da0b..0377861b7a4 100644
--- a/src/gallium/drivers/swr/swr_draw.cpp
+++ b/src/gallium/drivers/swr/swr_draw.cpp
@@ -62,6 +62,16 @@ swr_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
swr_update_draw_context(ctx);
+ struct pipe_draw_info resolved_info;
+ /* DrawTransformFeedback */
+ if (info->count_from_stream_output) {
+ // trick copied from softpipe to modify const struct *info
+ memcpy(&resolved_info, (void*)info, sizeof(struct pipe_draw_info));
+ resolved_info.count = ctx->so_primCounter * resolved_info.vertices_per_patch;
+ resolved_info.max_index = resolved_info.count - 1;
+ info = &resolved_info;
+ }
+
if (ctx->vs->pipe.stream_output.num_outputs) {
if (!ctx->vs->soFunc[info->mode]) {
STREAMOUT_COMPILE_STATE state = {0};
diff --git a/src/gallium/drivers/swr/swr_screen.cpp b/src/gallium/drivers/swr/swr_screen.cpp
index ac53fc518f0..e54be2dc35b 100644
--- a/src/gallium/drivers/swr/swr_screen.cpp
+++ b/src/gallium/drivers/swr/swr_screen.cpp
@@ -277,6 +277,8 @@ swr_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_QUERY_LOD:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
case PIPE_CAP_TGSI_TG4_COMPONENT_IN_SWIZZLE:
+ case PIPE_CAP_QUERY_SO_OVERFLOW:
+ case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
return 1;
/* MSAA support
@@ -347,7 +349,6 @@ swr_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS:
case PIPE_CAP_TGSI_ARRAY_COMPONENTS:
case PIPE_CAP_TGSI_CAN_READ_OUTPUTS:
- case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
case PIPE_CAP_NATIVE_FENCE_FD:
case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY:
case PIPE_CAP_FBFETCH:
@@ -365,7 +366,6 @@ swr_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_POST_DEPTH_COVERAGE:
case PIPE_CAP_BINDLESS_TEXTURE:
case PIPE_CAP_NIR_SAMPLERS_AS_DEREF:
- case PIPE_CAP_QUERY_SO_OVERFLOW:
case PIPE_CAP_MEMOBJ:
case PIPE_CAP_LOAD_CONSTBUF:
case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS:
diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp
index 345fef4c856..3a007db4c1c 100644
--- a/src/gallium/drivers/swr/swr_state.cpp
+++ b/src/gallium/drivers/swr/swr_state.cpp
@@ -1763,22 +1763,23 @@ swr_update_derived(struct pipe_context *pipe,
pipe_stream_output_info *stream_output = &ctx->vs->pipe.stream_output;
- for (uint32_t i = 0; i < ctx->num_so_targets; i++) {
+ for (uint32_t i = 0; i < MAX_SO_STREAMS; i++) {
SWR_STREAMOUT_BUFFER buffer = {0};
- if (!ctx->so_targets[i])
- continue;
- buffer.enable = true;
- buffer.pBuffer =
- (gfxptr_t)(swr_resource_data(ctx->so_targets[i]->buffer) +
- ctx->so_targets[i]->buffer_offset);
- buffer.bufferSize = ctx->so_targets[i]->buffer_size >> 2;
- buffer.pitch = stream_output->stride[i];
- buffer.streamOffset = 0;
+ if (ctx->so_targets[i]) {
+ buffer.enable = true;
+ buffer.pBuffer =
+ (gfxptr_t)(swr_resource_data(ctx->so_targets[i]->buffer) +
+ ctx->so_targets[i]->buffer_offset);
+ buffer.bufferSize = ctx->so_targets[i]->buffer_size >> 2;
+ buffer.pitch = stream_output->stride[i];
+ buffer.streamOffset = 0;
+ }
ctx->api.pfnSwrSetSoBuffers(ctx->swrContext, &buffer, i);
}
}
+
if (ctx->dirty & (SWR_NEW_CLIP | SWR_NEW_RASTERIZER | SWR_NEW_VS)) {
// shader exporting clip distances overrides all user clip planes
if (ctx->rasterizer->clip_plane_enable &&
@@ -1902,6 +1903,7 @@ swr_set_so_targets(struct pipe_context *pipe,
}
swr->num_so_targets = num_targets;
+ swr->swrDC.soPrims = &swr->so_primCounter;
swr->dirty |= SWR_NEW_SO;
}