diff options
author | Neha Bhende <[email protected]> | 2020-05-26 21:29:50 +0530 |
---|---|---|
committer | Marge Bot <[email protected]> | 2020-06-05 06:36:54 +0000 |
commit | ba37d408da30d87b6848d76242d9d797dbef80a0 (patch) | |
tree | 87447cb12f6f5b7ca80c69308ae636639842b4c0 /src/gallium/drivers/svga | |
parent | ccb4ea5a43e89fcc93fff98c881639223f1538e5 (diff) |
svga: Performance fixes
This is a squash commit of in house performance fixes and misc bug fixes
for GL4.1 support.
Performance fixes:
* started using system memory for constant buffer to gain 3X performance boost with metro redux
Misc bug fixes:
* fixed usage of vertexid in shader
* added empty control point phase in hull shader for zero ouput control point
* misc shader signature fixes
* fixed clip_distance input declaration
* clearing the dirty bit for the surface while using direct map if surface is already flushed
and there is no pending primitive
This patch also uses SVGA_RETRY macro for commands retries. Part of it is already
used in previous patch.
Reviewed-by: Brian Paul <[email protected]>
Reviewed-by: Charmaine Lee <[email protected]>
Signed-off-by: Neha Bhende <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5317>
Diffstat (limited to 'src/gallium/drivers/svga')
37 files changed, 1072 insertions, 717 deletions
diff --git a/src/gallium/drivers/svga/svga_cmd.h b/src/gallium/drivers/svga/svga_cmd.h index 22a40cf05cb..9247298733a 100644 --- a/src/gallium/drivers/svga/svga_cmd.h +++ b/src/gallium/drivers/svga/svga_cmd.h @@ -657,6 +657,12 @@ SVGA3D_vgpu10_SetSingleConstantBuffer(struct svga_winsys_context *swc, uint32 sizeInBytes); enum pipe_error +SVGA3D_vgpu10_SetConstantBufferOffset(struct svga_winsys_context *swc, + unsigned command, + unsigned slot, + uint32 offsetInBytes); + +enum pipe_error SVGA3D_vgpu10_UpdateSubResource(struct svga_winsys_context *swc, struct svga_winsys_surface *surface, const SVGA3dBox *box, diff --git a/src/gallium/drivers/svga/svga_cmd_vgpu10.c b/src/gallium/drivers/svga/svga_cmd_vgpu10.c index eb5a482d9ba..30e174a754a 100644 --- a/src/gallium/drivers/svga/svga_cmd_vgpu10.c +++ b/src/gallium/drivers/svga/svga_cmd_vgpu10.c @@ -1265,6 +1265,31 @@ SVGA3D_vgpu10_SetSingleConstantBuffer(struct svga_winsys_context *swc, enum pipe_error +SVGA3D_vgpu10_SetConstantBufferOffset(struct svga_winsys_context *swc, + unsigned command, + unsigned slot, + uint32 offsetInBytes) +{ + SVGA3dCmdDXSetConstantBufferOffset *cmd; + + assert(offsetInBytes % 256 == 0); + + cmd = SVGA3D_FIFOReserve(swc, command, + sizeof(SVGA3dCmdDXSetConstantBufferOffset), + 0); /* one relocation */ + if (!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + cmd->slot = slot; + cmd->offsetInBytes = offsetInBytes; + + swc->commit(swc); + + return PIPE_OK; +} + + +enum pipe_error SVGA3D_vgpu10_ReadbackSubResource(struct svga_winsys_context *swc, struct svga_winsys_surface *surface, unsigned subResource) diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c index 4ef99efe989..d80336cf785 100644 --- a/src/gallium/drivers/svga/svga_context.c +++ b/src/gallium/drivers/svga/svga_context.c @@ -75,7 +75,9 @@ svga_destroy(struct pipe_context *pipe) /* free HW constant buffers */ for (shader = 0; shader < ARRAY_SIZE(svga->state.hw_draw.constbuf); shader++) { - pipe_resource_reference(&svga->state.hw_draw.constbuf[shader], NULL); + for (i = 0; i < ARRAY_SIZE(svga->state.hw_draw.constbuf[0]); i++) { + pipe_resource_reference(&svga->state.hw_draw.constbuf[shader][i], NULL); + } } pipe->delete_blend_state(pipe, svga->noop_blend); diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h index c0c315119f6..4d3a9f101dc 100644 --- a/src/gallium/drivers/svga/svga_context.h +++ b/src/gallium/drivers/svga/svga_context.h @@ -258,6 +258,10 @@ struct svga_velems_state { SVGA3dElementLayoutId id; /**< VGPU10 */ }; +struct svga_constant_buffer { + struct svga_winsys_surface *handle; + unsigned size; +}; /* Use to calculate differences between state emitted to hardware and * current driver-calculated state. @@ -308,6 +312,8 @@ struct svga_state unsigned num_vertex_buffers; enum pipe_prim_type reduced_prim; + unsigned vertex_id_bias; + struct { unsigned flag_1d; unsigned flag_srgb; @@ -391,7 +397,8 @@ struct svga_hw_draw_state struct svga_shader_variant *cs; /** Currently bound constant buffer, per shader stage */ - struct pipe_resource *constbuf[PIPE_SHADER_TYPES]; + struct pipe_resource *constbuf[PIPE_SHADER_TYPES][SVGA_MAX_CONST_BUFS]; + struct svga_constant_buffer constbufoffsets[PIPE_SHADER_TYPES][SVGA_MAX_CONST_BUFS]; /** Bitmask of enabled constant buffers */ unsigned enabled_constbufs[PIPE_SHADER_TYPES]; @@ -699,8 +706,18 @@ struct svga_context #define SVGA_NEW_TCS_CONST_BUFFER ((uint64_t) 0x1000000000) #define SVGA_NEW_TES_CONST_BUFFER ((uint64_t) 0x2000000000) #define SVGA_NEW_TCS_PARAM ((uint64_t) 0x4000000000) +#define SVGA_NEW_FS_CONSTS ((uint64_t) 0x8000000000) +#define SVGA_NEW_VS_CONSTS ((uint64_t) 0x10000000000) +#define SVGA_NEW_GS_CONSTS ((uint64_t) 0x20000000000) +#define SVGA_NEW_TCS_CONSTS ((uint64_t) 0x40000000000) +#define SVGA_NEW_TES_CONSTS ((uint64_t) 0x800000000000) #define SVGA_NEW_ALL ((uint64_t) 0xFFFFFFFFFFFFFFFF) +#define SVGA_NEW_CONST_BUFFER \ + (SVGA_NEW_FS_CONST_BUFFER | SVGA_NEW_VS_CONST_BUFFER | \ + SVGA_NEW_GS_CONST_BUFFER | \ + SVGA_NEW_TCS_CONST_BUFFER | SVGA_NEW_TES_CONST_BUFFER) + void svga_init_state_functions( struct svga_context *svga ); void svga_init_flush_functions( struct svga_context *svga ); @@ -739,6 +756,7 @@ void svga_context_finish(struct svga_context *svga); void svga_hwtnl_flush_retry( struct svga_context *svga ); void svga_hwtnl_flush_buffer( struct svga_context *svga, struct pipe_resource *buffer ); +boolean svga_hwtnl_has_pending_prim(struct svga_hwtnl *); void svga_surfaces_flush(struct svga_context *svga); diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c index f8db818b3d0..d17653723c4 100644 --- a/src/gallium/drivers/svga/svga_draw.c +++ b/src/gallium/drivers/svga/svga_draw.c @@ -419,12 +419,10 @@ validate_constant_buffers(struct svga_context *svga) for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_COMPUTE; shader++) { enum pipe_error ret; struct svga_buffer *buffer; - struct svga_winsys_surface *handle; - unsigned enabled_constbufs; /* Rebind the default constant buffer if needed */ if (svga->rebind.flags.constbufs) { - buffer = svga_buffer(svga->state.hw_draw.constbuf[shader]); + buffer = svga_buffer(svga->state.hw_draw.constbuf[shader][0]); if (buffer) { ret = svga->swc->resource_rebind(svga->swc, buffer->handle, @@ -435,6 +433,9 @@ validate_constant_buffers(struct svga_context *svga) } } + struct svga_winsys_surface *handle; + unsigned enabled_constbufs; + /* * Reference other bound constant buffers to ensure pending updates are * noticed by the device. @@ -443,18 +444,23 @@ validate_constant_buffers(struct svga_context *svga) while (enabled_constbufs) { unsigned i = u_bit_scan(&enabled_constbufs); buffer = svga_buffer(svga->curr.constbufs[shader][i].buffer); - if (buffer) { + + /* If the constant buffer has hw storage, get the buffer winsys handle. + * Rebind the resource if needed. + */ + if (buffer && !buffer->use_swbuf) handle = svga_buffer_handle(svga, &buffer->b.b, PIPE_BIND_CONSTANT_BUFFER); + else + handle = svga->state.hw_draw.constbufoffsets[shader][i].handle; - if (svga->rebind.flags.constbufs) { - ret = svga->swc->resource_rebind(svga->swc, - handle, - NULL, - SVGA_RELOC_READ); - if (ret != PIPE_OK) - return ret; - } + if (svga->rebind.flags.constbufs && handle) { + ret = svga->swc->resource_rebind(svga->swc, + handle, + NULL, + SVGA_RELOC_READ); + if (ret != PIPE_OK) + return ret; } } } @@ -1189,3 +1195,13 @@ done: SVGA_STATS_TIME_POP(svga_screen(hwtnl->svga->pipe.screen)->sws); return ret; } + + +/** + * Return TRUE if there are pending primitives. + */ +boolean +svga_hwtnl_has_pending_prim(struct svga_hwtnl *hwtnl) +{ + return hwtnl->cmd.prim_count > 0; +} diff --git a/src/gallium/drivers/svga/svga_link.c b/src/gallium/drivers/svga/svga_link.c index c9861a7e481..394040d9303 100644 --- a/src/gallium/drivers/svga/svga_link.c +++ b/src/gallium/drivers/svga/svga_link.c @@ -50,6 +50,10 @@ svga_link_shaders(const struct tgsi_shader_info *outshader_info, linkage->input_map[i] = INVALID_INDEX; } + for (i = 0; i < ARRAY_SIZE(linkage->prevShader.output_map); i++) { + linkage->prevShader.output_map[i] = INVALID_INDEX; + } + /* Assign input slots for input shader inputs. * Basically, we want to use the same index for the output shader's outputs * and the input shader's inputs that should be linked together. @@ -65,26 +69,31 @@ svga_link_shaders(const struct tgsi_shader_info *outshader_info, enum tgsi_semantic sem_name = inshader_info->input_semantic_name[i]; unsigned sem_index = inshader_info->input_semantic_index[i]; unsigned j; + unsigned out_index; + + /* search output shader outputs for same item */ + for (j = 0; j < outshader_info->num_outputs; j++) { + assert(j < ARRAY_SIZE(outshader_info->output_semantic_name)); + if (outshader_info->output_semantic_name[j] == sem_name && + outshader_info->output_semantic_index[j] == sem_index) { + linkage->input_map[i] = j; + linkage->prevShader.output_map[j] = i; + break; + } + } + /** - * Get the clip distance inputs from the output shader's - * clip distance shadow copy. + * The clip distance inputs come from the output shader's + * clip distance shadow copy, so mark the input index to match + * the index of the shadow copy. */ if (sem_name == TGSI_SEMANTIC_CLIPDIST) { - linkage->input_map[i] = outshader_info->num_outputs + 1 + sem_index; + out_index = outshader_info->num_outputs + 1 + sem_index; + linkage->input_map[i] = out_index; + linkage->prevShader.output_map[out_index] = i; /* make sure free_slot includes this extra output */ free_slot = MAX2(free_slot, linkage->input_map[i] + 1); } - else { - /* search output shader outputs for same item */ - for (j = 0; j < outshader_info->num_outputs; j++) { - assert(j < ARRAY_SIZE(outshader_info->output_semantic_name)); - if (outshader_info->output_semantic_name[j] == sem_name && - outshader_info->output_semantic_index[j] == sem_index) { - linkage->input_map[i] = j; - break; - } - } - } } /* Find the index for position */ @@ -97,12 +106,14 @@ svga_link_shaders(const struct tgsi_shader_info *outshader_info, } linkage->num_inputs = inshader_info->num_inputs; + linkage->prevShader.num_outputs = outshader_info->num_outputs; /* Things like the front-face register are handled here */ for (i = 0; i < inshader_info->num_inputs; i++) { if (linkage->input_map[i] == INVALID_INDEX) { unsigned j = free_slot++; linkage->input_map[i] = j; + linkage->prevShader.output_map[j] = i; } } linkage->input_map_max = free_slot - 1; @@ -111,8 +122,11 @@ svga_link_shaders(const struct tgsi_shader_info *outshader_info, if (SVGA_DEBUG & DEBUG_TGSI) { uint64_t reg = 0; uint64_t one = 1; - debug_printf("### linkage info: num_inputs=%d input_map_max=%d\n", - linkage->num_inputs, linkage->input_map_max); + + debug_printf( + "### linkage info: num_inputs=%d input_map_max=%d prevShader.num_outputs=%d\n", + linkage->num_inputs, linkage->input_map_max, + linkage->prevShader.num_outputs); for (i = 0; i < linkage->num_inputs; i++) { diff --git a/src/gallium/drivers/svga/svga_link.h b/src/gallium/drivers/svga/svga_link.h index 8d3517ea28a..c48be279651 100644 --- a/src/gallium/drivers/svga/svga_link.h +++ b/src/gallium/drivers/svga/svga_link.h @@ -8,10 +8,15 @@ struct svga_context; struct shader_linkage { - unsigned num_inputs; + unsigned num_inputs; /* number of inputs in the current shader */ unsigned position_index; /* position register index */ unsigned input_map_max; /* highest index of mapped inputs */ ubyte input_map[PIPE_MAX_SHADER_INPUTS]; + + struct { + unsigned num_outputs; + ubyte output_map[PIPE_MAX_SHADER_OUTPUTS]; + } prevShader; }; void diff --git a/src/gallium/drivers/svga/svga_pipe_constants.c b/src/gallium/drivers/svga/svga_pipe_constants.c index 4ef8fb73796..388fc959e3a 100644 --- a/src/gallium/drivers/svga/svga_pipe_constants.c +++ b/src/gallium/drivers/svga/svga_pipe_constants.c @@ -77,15 +77,32 @@ svga_set_constant_buffer(struct pipe_context *pipe, svga->curr.constbufs[shader][index].buffer_offset = cb ? cb->buffer_offset : 0; svga->curr.constbufs[shader][index].user_buffer = NULL; /* not used */ - if (shader == PIPE_SHADER_FRAGMENT) - svga->dirty |= SVGA_NEW_FS_CONST_BUFFER; - else if (shader == PIPE_SHADER_VERTEX) - svga->dirty |= SVGA_NEW_VS_CONST_BUFFER; - else - svga->dirty |= SVGA_NEW_GS_CONST_BUFFER; - - /* update bitmask of dirty const buffers */ - svga->state.dirty_constbufs[shader] |= (1 << index); + if (index == 0) { + if (shader == PIPE_SHADER_FRAGMENT) + svga->dirty |= SVGA_NEW_FS_CONSTS; + else if (shader == PIPE_SHADER_VERTEX) + svga->dirty |= SVGA_NEW_VS_CONSTS; + else if (shader == PIPE_SHADER_GEOMETRY) + svga->dirty |= SVGA_NEW_GS_CONSTS; + else if (shader == PIPE_SHADER_TESS_CTRL) + svga->dirty |= SVGA_NEW_TCS_CONSTS; + else if (shader == PIPE_SHADER_TESS_EVAL) + svga->dirty |= SVGA_NEW_TES_CONSTS; + } else { + if (shader == PIPE_SHADER_FRAGMENT) + svga->dirty |= SVGA_NEW_FS_CONST_BUFFER; + else if (shader == PIPE_SHADER_VERTEX) + svga->dirty |= SVGA_NEW_VS_CONST_BUFFER; + else if (shader == PIPE_SHADER_GEOMETRY) + svga->dirty |= SVGA_NEW_GS_CONST_BUFFER; + else if (shader == PIPE_SHADER_TESS_CTRL) + svga->dirty |= SVGA_NEW_TCS_CONST_BUFFER; + else if (shader == PIPE_SHADER_TESS_EVAL) + svga->dirty |= SVGA_NEW_TES_CONST_BUFFER; + + /* update bitmask of dirty const buffers */ + svga->state.dirty_constbufs[shader] |= (1 << index); + } if (cb && cb->user_buffer) { pipe_resource_reference(&buf, NULL); diff --git a/src/gallium/drivers/svga/svga_pipe_depthstencil.c b/src/gallium/drivers/svga/svga_pipe_depthstencil.c index 9f06a4b2692..55878c89532 100644 --- a/src/gallium/drivers/svga/svga_pipe_depthstencil.c +++ b/src/gallium/drivers/svga/svga_pipe_depthstencil.c @@ -79,8 +79,6 @@ static void define_depth_stencil_state_object(struct svga_context *svga, struct svga_depth_stencil_state *ds) { - unsigned try; - assert(svga_have_vgpu10(svga)); ds->id = util_bitmask_add(svga->ds_object_id_bm); @@ -90,40 +88,33 @@ define_depth_stencil_state_object(struct svga_context *svga, STATIC_ASSERT(SVGA3D_COMPARISON_LESS == SVGA3D_CMP_LESS); STATIC_ASSERT(SVGA3D_COMPARISON_NOT_EQUAL == SVGA3D_CMP_NOTEQUAL); - /* Loop in case command buffer is full and we need to flush and retry */ - for (try = 0; try < 2; try++) { - enum pipe_error ret; - - /* Note: we use the ds->stencil[0].enabled value for both the front - * and back-face enables. If single-side stencil is used, we'll have - * set the back state the same as the front state. - */ - ret = SVGA3D_vgpu10_DefineDepthStencilState(svga->swc, - ds->id, - /* depth/Z */ - ds->zenable, - ds->zwriteenable, - ds->zfunc, - /* Stencil */ - ds->stencil[0].enabled, /*f|b*/ - ds->stencil[0].enabled, /*f*/ - ds->stencil[0].enabled, /*b*/ - ds->stencil_mask, - ds->stencil_writemask, - /* front stencil */ - ds->stencil[0].fail, - ds->stencil[0].zfail, - ds->stencil[0].pass, - ds->stencil[0].func, - /* back stencil */ - ds->stencil[1].fail, - ds->stencil[1].zfail, - ds->stencil[1].pass, - ds->stencil[1].func); - if (ret == PIPE_OK) - return; - svga_context_flush(svga, NULL); - } + /* Note: we use the ds->stencil[0].enabled value for both the front + * and back-face enables. If single-side stencil is used, we'll have + * set the back state the same as the front state. + */ + SVGA_RETRY(svga, SVGA3D_vgpu10_DefineDepthStencilState + (svga->swc, + ds->id, + /* depth/Z */ + ds->zenable, + ds->zwriteenable, + ds->zfunc, + /* Stencil */ + ds->stencil[0].enabled, /*f|b*/ + ds->stencil[0].enabled, /*f*/ + ds->stencil[0].enabled, /*b*/ + ds->stencil_mask, + ds->stencil_writemask, + /* front stencil */ + ds->stencil[0].fail, + ds->stencil[0].zfail, + ds->stencil[0].pass, + ds->stencil[0].func, + /* back stencil */ + ds->stencil[1].fail, + ds->stencil[1].zfail, + ds->stencil[1].pass, + ds->stencil[1].func)); } @@ -251,18 +242,12 @@ svga_delete_depth_stencil_state(struct pipe_context *pipe, void *depth_stencil) (struct svga_depth_stencil_state *) depth_stencil; if (svga_have_vgpu10(svga)) { - enum pipe_error ret; - svga_hwtnl_flush_retry(svga); assert(ds->id != SVGA3D_INVALID_ID); - ret = SVGA3D_vgpu10_DestroyDepthStencilState(svga->swc, ds->id); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_vgpu10_DestroyDepthStencilState(svga->swc, ds->id); - assert(ret == PIPE_OK); - } + SVGA_RETRY(svga, SVGA3D_vgpu10_DestroyDepthStencilState(svga->swc, + ds->id)); if (ds->id == svga->state.hw_draw.depth_stencil_id) svga->state.hw_draw.depth_stencil_id = SVGA3D_INVALID_ID; diff --git a/src/gallium/drivers/svga/svga_pipe_draw.c b/src/gallium/drivers/svga/svga_pipe_draw.c index e6fabfc995e..06540e46dcf 100644 --- a/src/gallium/drivers/svga/svga_pipe_draw.c +++ b/src/gallium/drivers/svga/svga_pipe_draw.c @@ -244,6 +244,15 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) svga->dirty |= SVGA_NEW_REDUCED_PRIMITIVE; } + /* We need to adjust the vertexID in the vertex shader since SV_VertexID + * always start from 0 for DrawArrays and does not include baseVertex for + * DrawIndexed. + */ + if (svga->curr.vertex_id_bias != (info->start + info->index_bias)) { + svga->curr.vertex_id_bias = info->start + info->index_bias; + svga->dirty |= SVGA_NEW_VS_CONSTS; + } + if (svga->curr.vertices_per_patch != info->vertices_per_patch) { svga->curr.vertices_per_patch = info->vertices_per_patch; diff --git a/src/gallium/drivers/svga/svga_pipe_fs.c b/src/gallium/drivers/svga/svga_pipe_fs.c index 7795afbfe1f..45621749106 100644 --- a/src/gallium/drivers/svga/svga_pipe_fs.c +++ b/src/gallium/drivers/svga/svga_pipe_fs.c @@ -87,7 +87,6 @@ svga_delete_fs_state(struct pipe_context *pipe, void *shader) struct svga_fragment_shader *fs = (struct svga_fragment_shader *) shader; struct svga_fragment_shader *next_fs; struct svga_shader_variant *variant, *tmp; - enum pipe_error ret; svga_hwtnl_flush_retry(svga); @@ -103,12 +102,7 @@ svga_delete_fs_state(struct pipe_context *pipe, void *shader) /* Check if deleting currently bound shader */ if (variant == svga->state.hw_draw.fs) { - ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_PS, NULL); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_PS, NULL); - assert(ret == PIPE_OK); - } + SVGA_RETRY(svga, svga_set_shader(svga, SVGA3D_SHADERTYPE_PS, NULL)); svga->state.hw_draw.fs = NULL; } diff --git a/src/gallium/drivers/svga/svga_pipe_gs.c b/src/gallium/drivers/svga/svga_pipe_gs.c index edc03f6b6e1..333ca3d6bba 100644 --- a/src/gallium/drivers/svga/svga_pipe_gs.c +++ b/src/gallium/drivers/svga/svga_pipe_gs.c @@ -89,7 +89,6 @@ svga_delete_gs_state(struct pipe_context *pipe, void *shader) struct svga_geometry_shader *gs = (struct svga_geometry_shader *)shader; struct svga_geometry_shader *next_gs; struct svga_shader_variant *variant, *tmp; - enum pipe_error ret; svga_hwtnl_flush_retry(svga); @@ -111,12 +110,7 @@ svga_delete_gs_state(struct pipe_context *pipe, void *shader) /* Check if deleting currently bound shader */ if (variant == svga->state.hw_draw.gs) { - ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_GS, NULL); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_GS, NULL); - assert(ret == PIPE_OK); - } + SVGA_RETRY(svga, svga_set_shader(svga, SVGA3D_SHADERTYPE_GS, NULL)); svga->state.hw_draw.gs = NULL; } diff --git a/src/gallium/drivers/svga/svga_pipe_query.c b/src/gallium/drivers/svga/svga_pipe_query.c index 38874deb414..77be3692ba0 100644 --- a/src/gallium/drivers/svga/svga_pipe_query.c +++ b/src/gallium/drivers/svga/svga_pipe_query.c @@ -119,11 +119,10 @@ define_query_vgpu9(struct svga_context *svga, return PIPE_OK; } -static enum pipe_error +static void begin_query_vgpu9(struct svga_context *svga, struct svga_query *sq) { struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws; - enum pipe_error ret = PIPE_OK; if (sq->queryResult->state == SVGA3D_QUERYSTATE_PENDING) { /* The application doesn't care for the pending query result. @@ -141,28 +140,16 @@ begin_query_vgpu9(struct svga_context *svga, struct svga_query *sq) sq->queryResult->state = SVGA3D_QUERYSTATE_NEW; sws->fence_reference(sws, &sq->fence, NULL); - ret = SVGA3D_BeginQuery(svga->swc, sq->svga_type); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_BeginQuery(svga->swc, sq->svga_type); - } - return ret; + SVGA_RETRY(svga, SVGA3D_BeginQuery(svga->swc, sq->svga_type)); } -static enum pipe_error +static void end_query_vgpu9(struct svga_context *svga, struct svga_query *sq) { - enum pipe_error ret = PIPE_OK; - /* Set to PENDING before sending EndQuery. */ sq->queryResult->state = SVGA3D_QUERYSTATE_PENDING; - ret = SVGA3D_EndQuery(svga->swc, sq->svga_type, sq->hwbuf); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_EndQuery(svga->swc, sq->svga_type, sq->hwbuf); - } - return ret; + SVGA_RETRY(svga, SVGA3D_EndQuery(svga->swc, sq->svga_type, sq->hwbuf)); } static bool @@ -170,7 +157,6 @@ get_query_result_vgpu9(struct svga_context *svga, struct svga_query *sq, bool wait, uint64_t *result) { struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws; - enum pipe_error ret; SVGA3dQueryState state; if (!sq->fence) { @@ -178,12 +164,8 @@ get_query_result_vgpu9(struct svga_context *svga, struct svga_query *sq, * SVGA_3D_CMD_WAIT_FOR_QUERY is emitted. Unfortunately this will cause * a synchronous wait on the host. */ - ret = SVGA3D_WaitForQuery(svga->swc, sq->svga_type, sq->hwbuf); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_WaitForQuery(svga->swc, sq->svga_type, sq->hwbuf); - } - assert (ret == PIPE_OK); + SVGA_RETRY(svga, SVGA3D_WaitForQuery(svga->swc, sq->svga_type, + sq->hwbuf)); svga_context_flush(svga, &sq->fence); assert(sq->fence); } @@ -510,12 +492,8 @@ define_query_vgpu10(struct svga_context *svga, svga->gb_query_alloc_mask = util_bitmask_create(); /* Bind the query object to the context */ - if (svga->swc->query_bind(svga->swc, svga->gb_query, - SVGA_QUERY_FLAG_SET) != PIPE_OK) { - svga_context_flush(svga, NULL); - svga->swc->query_bind(svga->swc, svga->gb_query, - SVGA_QUERY_FLAG_SET); - } + SVGA_RETRY(svga, svga->swc->query_bind(svga->swc, svga->gb_query, + SVGA_QUERY_FLAG_SET)); } sq->gb_query = svga->gb_query; @@ -536,42 +514,26 @@ define_query_vgpu10(struct svga_context *svga, /** * Send SVGA3D commands to define the query */ - ret = SVGA3D_vgpu10_DefineQuery(svga->swc, sq->id, sq->svga_type, sq->flags); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_vgpu10_DefineQuery(svga->swc, sq->id, sq->svga_type, sq->flags); - } + SVGA_RETRY_OOM(svga, ret, SVGA3D_vgpu10_DefineQuery(svga->swc, sq->id, + sq->svga_type, + sq->flags)); if (ret != PIPE_OK) return PIPE_ERROR_OUT_OF_MEMORY; - ret = SVGA3D_vgpu10_BindQuery(svga->swc, sq->gb_query, sq->id); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_vgpu10_BindQuery(svga->swc, sq->gb_query, sq->id); - } - assert(ret == PIPE_OK); - - ret = SVGA3D_vgpu10_SetQueryOffset(svga->swc, sq->id, sq->offset); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_vgpu10_SetQueryOffset(svga->swc, sq->id, sq->offset); - } - assert(ret == PIPE_OK); + SVGA_RETRY(svga, SVGA3D_vgpu10_BindQuery(svga->swc, sq->gb_query, sq->id)); + SVGA_RETRY(svga, SVGA3D_vgpu10_SetQueryOffset(svga->swc, sq->id, + sq->offset)); return PIPE_OK; } -static enum pipe_error +static void destroy_query_vgpu10(struct svga_context *svga, struct svga_query *sq) { - enum pipe_error ret; - - ret = SVGA3D_vgpu10_DestroyQuery(svga->swc, sq->id); + SVGA_RETRY(svga, SVGA3D_vgpu10_DestroyQuery(svga->swc, sq->id)); /* Deallocate the memory slot allocated for this query */ deallocate_query(svga, sq); - - return ret; } @@ -581,13 +543,8 @@ destroy_query_vgpu10(struct svga_context *svga, struct svga_query *sq) static void rebind_vgpu10_query(struct svga_context *svga) { - if (svga->swc->query_bind(svga->swc, svga->gb_query, - SVGA_QUERY_FLAG_REF) != PIPE_OK) { - svga_context_flush(svga, NULL); - svga->swc->query_bind(svga->swc, svga->gb_query, - SVGA_QUERY_FLAG_REF); - } - + SVGA_RETRY(svga, svga->swc->query_bind(svga->swc, svga->gb_query, + SVGA_QUERY_FLAG_REF)); svga->rebind.flags.query = FALSE; } @@ -596,7 +553,6 @@ static enum pipe_error begin_query_vgpu10(struct svga_context *svga, struct svga_query *sq) { struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws; - enum pipe_error ret = PIPE_OK; int status = 0; sws->fence_reference(sws, &sq->fence, NULL); @@ -611,30 +567,18 @@ begin_query_vgpu10(struct svga_context *svga, struct svga_query *sq) } /* Send the BeginQuery command to the device */ - ret = SVGA3D_vgpu10_BeginQuery(svga->swc, sq->id); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_vgpu10_BeginQuery(svga->swc, sq->id); - } - return ret; + SVGA_RETRY(svga, SVGA3D_vgpu10_BeginQuery(svga->swc, sq->id)); + return PIPE_OK; } -static enum pipe_error +static void end_query_vgpu10(struct svga_context *svga, struct svga_query *sq) { - enum pipe_error ret = PIPE_OK; - if (svga->rebind.flags.query) { rebind_vgpu10_query(svga); } - ret = SVGA3D_vgpu10_EndQuery(svga->swc, sq->id); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_vgpu10_EndQuery(svga->swc, sq->id); - } - - return ret; + SVGA_RETRY(svga, SVGA3D_vgpu10_EndQuery(svga->swc, sq->id)); } static bool @@ -775,6 +719,7 @@ svga_create_query(struct pipe_context *pipe, case SVGA_QUERY_NUM_BUFFERS_MAPPED: case SVGA_QUERY_NUM_TEXTURES_MAPPED: case SVGA_QUERY_NUM_BYTES_UPLOADED: + case SVGA_QUERY_NUM_COMMAND_BUFFERS: case SVGA_QUERY_COMMAND_BUFFER_SIZE: case SVGA_QUERY_SURFACE_WRITE_FLUSHES: case SVGA_QUERY_MEMORY_USED: @@ -790,12 +735,16 @@ svga_create_query(struct pipe_context *pipe, case SVGA_QUERY_NUM_CONST_UPDATES: case SVGA_QUERY_NUM_FAILED_ALLOCATIONS: case SVGA_QUERY_NUM_COMMANDS_PER_DRAW: + case SVGA_QUERY_NUM_SHADER_RELOCATIONS: + case SVGA_QUERY_NUM_SURFACE_RELOCATIONS: + case SVGA_QUERY_SHADER_MEM_USED: break; case SVGA_QUERY_FLUSH_TIME: case SVGA_QUERY_MAP_BUFFER_TIME: /* These queries need os_time_get() */ svga->hud.uses_time = TRUE; break; + default: assert(!"unexpected query type in svga_create_query()"); } @@ -856,6 +805,7 @@ svga_destroy_query(struct pipe_context *pipe, struct pipe_query *q) case SVGA_QUERY_NUM_BUFFERS_MAPPED: case SVGA_QUERY_NUM_TEXTURES_MAPPED: case SVGA_QUERY_NUM_BYTES_UPLOADED: + case SVGA_QUERY_NUM_COMMAND_BUFFERS: case SVGA_QUERY_COMMAND_BUFFER_SIZE: case SVGA_QUERY_FLUSH_TIME: case SVGA_QUERY_SURFACE_WRITE_FLUSHES: @@ -872,6 +822,9 @@ svga_destroy_query(struct pipe_context *pipe, struct pipe_query *q) case SVGA_QUERY_NUM_CONST_UPDATES: case SVGA_QUERY_NUM_FAILED_ALLOCATIONS: case SVGA_QUERY_NUM_COMMANDS_PER_DRAW: + case SVGA_QUERY_NUM_SHADER_RELOCATIONS: + case SVGA_QUERY_NUM_SURFACE_RELOCATIONS: + case SVGA_QUERY_SHADER_MEM_USED: /* nothing */ break; default: @@ -890,14 +843,11 @@ svga_begin_query(struct pipe_context *pipe, struct pipe_query *q) { struct svga_context *svga = svga_context(pipe); struct svga_query *sq = svga_query(q); - enum pipe_error ret; + enum pipe_error ret = PIPE_OK; assert(sq); assert(sq->type < SVGA_QUERY_MAX); - SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d\n", __FUNCTION__, - sq, sq->id); - /* Need to flush out buffered drawing commands so that they don't * get counted in the query results. */ @@ -917,7 +867,7 @@ svga_begin_query(struct pipe_context *pipe, struct pipe_query *q) (void) status; } } else { - ret = begin_query_vgpu9(svga, sq); + begin_query_vgpu9(svga, sq); } assert(ret == PIPE_OK); (void) ret; @@ -954,6 +904,9 @@ svga_begin_query(struct pipe_context *pipe, struct pipe_query *q) case SVGA_QUERY_NUM_BYTES_UPLOADED: sq->begin_count = svga->hud.num_bytes_uploaded; break; + case SVGA_QUERY_NUM_COMMAND_BUFFERS: + sq->begin_count = svga->swc->num_command_buffers; + break; case SVGA_QUERY_COMMAND_BUFFER_SIZE: sq->begin_count = svga->hud.command_buffer_size; break; @@ -978,6 +931,12 @@ svga_begin_query(struct pipe_context *pipe, struct pipe_query *q) case SVGA_QUERY_NUM_CONST_UPDATES: sq->begin_count = svga->hud.num_const_updates; break; + case SVGA_QUERY_NUM_SHADER_RELOCATIONS: + sq->begin_count = svga->swc->num_shader_reloc; + break; + case SVGA_QUERY_NUM_SURFACE_RELOCATIONS: + sq->begin_count = svga->swc->num_surf_reloc; + break; case SVGA_QUERY_MEMORY_USED: case SVGA_QUERY_NUM_SHADERS: case SVGA_QUERY_NUM_RESOURCES: @@ -986,6 +945,7 @@ svga_begin_query(struct pipe_context *pipe, struct pipe_query *q) case SVGA_QUERY_NUM_GENERATE_MIPMAP: case SVGA_QUERY_NUM_FAILED_ALLOCATIONS: case SVGA_QUERY_NUM_COMMANDS_PER_DRAW: + case SVGA_QUERY_SHADER_MEM_USED: /* nothing */ break; default: @@ -1006,17 +966,19 @@ svga_end_query(struct pipe_context *pipe, struct pipe_query *q) { struct svga_context *svga = svga_context(pipe); struct svga_query *sq = svga_query(q); - enum pipe_error ret; assert(sq); assert(sq->type < SVGA_QUERY_MAX); - SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d\n", __FUNCTION__, - sq, sq->id); + SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x type=%d\n", + __FUNCTION__, sq, sq->type); if (sq->type == PIPE_QUERY_TIMESTAMP && !sq->active) svga_begin_query(pipe, q); + SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d type=%d svga_type=%d\n", + __FUNCTION__, sq, sq->id, sq->type, sq->svga_type); + svga_hwtnl_flush_retry(svga); assert(sq->active); @@ -1026,27 +988,21 @@ svga_end_query(struct pipe_context *pipe, struct pipe_query *q) case PIPE_QUERY_OCCLUSION_PREDICATE: case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: if (svga_have_vgpu10(svga)) { - ret = end_query_vgpu10(svga, sq); + end_query_vgpu10(svga, sq); /* also need to end the associated occlusion predicate query */ if (sq->predicate) { - enum pipe_error status; - status = end_query_vgpu10(svga, svga_query(sq->predicate)); - assert(status == PIPE_OK); - (void) status; + end_query_vgpu10(svga, svga_query(sq->predicate)); } } else { - ret = end_query_vgpu9(svga, sq); + end_query_vgpu9(svga, sq); } - assert(ret == PIPE_OK); - (void) ret; break; case PIPE_QUERY_PRIMITIVES_GENERATED: case PIPE_QUERY_PRIMITIVES_EMITTED: case PIPE_QUERY_SO_STATISTICS: case PIPE_QUERY_TIMESTAMP: assert(svga_have_vgpu10(svga)); - ret = end_query_vgpu10(svga, sq); - assert(ret == PIPE_OK); + end_query_vgpu10(svga, sq); break; case SVGA_QUERY_NUM_DRAW_CALLS: sq->end_count = svga->hud.num_draw_calls; @@ -1072,6 +1028,9 @@ svga_end_query(struct pipe_context *pipe, struct pipe_query *q) case SVGA_QUERY_NUM_BYTES_UPLOADED: sq->end_count = svga->hud.num_bytes_uploaded; break; + case SVGA_QUERY_NUM_COMMAND_BUFFERS: + sq->end_count = svga->swc->num_command_buffers; + break; case SVGA_QUERY_COMMAND_BUFFER_SIZE: sq->end_count = svga->hud.command_buffer_size; break; @@ -1096,6 +1055,12 @@ svga_end_query(struct pipe_context *pipe, struct pipe_query *q) case SVGA_QUERY_NUM_CONST_UPDATES: sq->end_count = svga->hud.num_const_updates; break; + case SVGA_QUERY_NUM_SHADER_RELOCATIONS: + sq->end_count = svga->swc->num_shader_reloc; + break; + case SVGA_QUERY_NUM_SURFACE_RELOCATIONS: + sq->end_count = svga->swc->num_surf_reloc; + break; case SVGA_QUERY_MEMORY_USED: case SVGA_QUERY_NUM_SHADERS: case SVGA_QUERY_NUM_RESOURCES: @@ -1104,6 +1069,7 @@ svga_end_query(struct pipe_context *pipe, struct pipe_query *q) case SVGA_QUERY_NUM_GENERATE_MIPMAP: case SVGA_QUERY_NUM_FAILED_ALLOCATIONS: case SVGA_QUERY_NUM_COMMANDS_PER_DRAW: + case SVGA_QUERY_SHADER_MEM_USED: /* nothing */ break; default: @@ -1204,6 +1170,7 @@ svga_get_query_result(struct pipe_context *pipe, case SVGA_QUERY_NUM_BUFFERS_MAPPED: case SVGA_QUERY_NUM_TEXTURES_MAPPED: case SVGA_QUERY_NUM_BYTES_UPLOADED: + case SVGA_QUERY_NUM_COMMAND_BUFFERS: case SVGA_QUERY_COMMAND_BUFFER_SIZE: case SVGA_QUERY_FLUSH_TIME: case SVGA_QUERY_SURFACE_WRITE_FLUSHES: @@ -1212,6 +1179,8 @@ svga_get_query_result(struct pipe_context *pipe, case SVGA_QUERY_NUM_BUFFER_UPLOADS: case SVGA_QUERY_NUM_CONST_BUF_UPDATES: case SVGA_QUERY_NUM_CONST_UPDATES: + case SVGA_QUERY_NUM_SHADER_RELOCATIONS: + case SVGA_QUERY_NUM_SURFACE_RELOCATIONS: vresult->u64 = sq->end_count - sq->begin_count; break; /* These are running total counters */ @@ -1245,6 +1214,9 @@ svga_get_query_result(struct pipe_context *pipe, vresult->f = (float) svga->swc->num_commands / (float) svga->swc->num_draw_commands; break; + case SVGA_QUERY_SHADER_MEM_USED: + vresult->u64 = svga->hud.shader_mem_used; + break; default: assert(!"unexpected query type in svga_get_query_result"); } @@ -1262,7 +1234,6 @@ svga_render_condition(struct pipe_context *pipe, struct pipe_query *q, struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws; struct svga_query *sq = svga_query(q); SVGA3dQueryId queryId; - enum pipe_error ret; SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__); @@ -1296,13 +1267,8 @@ svga_render_condition(struct pipe_context *pipe, struct pipe_query *q, * This is probably acceptable for the typical case of occlusion culling. */ if (sws->have_set_predication_cmd) { - ret = SVGA3D_vgpu10_SetPredication(svga->swc, queryId, - (uint32) condition); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_vgpu10_SetPredication(svga->swc, queryId, - (uint32) condition); - } + SVGA_RETRY(svga, SVGA3D_vgpu10_SetPredication(svga->swc, queryId, + (uint32) condition)); svga->pred.query_id = queryId; svga->pred.cond = condition; } @@ -1350,7 +1316,6 @@ svga_toggle_render_condition(struct svga_context *svga, boolean on) { SVGA3dQueryId query_id; - enum pipe_error ret; if (render_condition_enabled || svga->pred.query_id == SVGA3D_INVALID_ID) { @@ -1365,14 +1330,8 @@ svga_toggle_render_condition(struct svga_context *svga, */ query_id = on ? svga->pred.query_id : SVGA3D_INVALID_ID; - ret = SVGA3D_vgpu10_SetPredication(svga->swc, query_id, - (uint32) svga->pred.cond); - if (ret == PIPE_ERROR_OUT_OF_MEMORY) { - svga_context_flush(svga, NULL); - ret = SVGA3D_vgpu10_SetPredication(svga->swc, query_id, - (uint32) svga->pred.cond); - assert(ret == PIPE_OK); - } + SVGA_RETRY(svga, SVGA3D_vgpu10_SetPredication(svga->swc, query_id, + (uint32) svga->pred.cond)); } diff --git a/src/gallium/drivers/svga/svga_pipe_rasterizer.c b/src/gallium/drivers/svga/svga_pipe_rasterizer.c index 7764a855391..1b823d64ec0 100644 --- a/src/gallium/drivers/svga/svga_pipe_rasterizer.c +++ b/src/gallium/drivers/svga/svga_pipe_rasterizer.c @@ -121,8 +121,6 @@ define_rasterizer_object(struct svga_context *svga, const uint8 pv_last = !rast->templ.flatshade_first && svgascreen->haveProvokingVertex; - unsigned try; - rast->id = util_bitmask_add(svga->rast_object_id_bm); if (rast->templ.fill_front != rast->templ.fill_back) { @@ -133,31 +131,24 @@ define_rasterizer_object(struct svga_context *svga, fill_mode = SVGA3D_FILLMODE_FILL; } - for (try = 0; try < 2; try++) { - const uint8 pv_last = !rast->templ.flatshade_first && - svgascreen->haveProvokingVertex; - enum pipe_error ret = - SVGA3D_vgpu10_DefineRasterizerState(svga->swc, - rast->id, - fill_mode, - cull_mode, - rast->templ.front_ccw, - depth_bias, - depth_bias_clamp, - slope_scaled_depth_bias, - rast->templ.depth_clip_near, - rast->templ.scissor, - rast->templ.multisample, - rast->templ.line_smooth, - line_width, - rast->templ.line_stipple_enable, - line_factor, - line_pattern, - pv_last); - if (ret == PIPE_OK) - return; - svga_context_flush(svga, NULL); - } + SVGA_RETRY(svga, SVGA3D_vgpu10_DefineRasterizerState + (svga->swc, + rast->id, + fill_mode, + cull_mode, + rast->templ.front_ccw, + depth_bias, + depth_bias_clamp, + slope_scaled_depth_bias, + rast->templ.depth_clip_near, + rast->templ.scissor, + rast->templ.multisample, + rast->templ.line_smooth, + line_width, + rast->templ.line_stipple_enable, + line_factor, + line_pattern, + pv_last)); } @@ -418,12 +409,8 @@ svga_delete_rasterizer_state(struct pipe_context *pipe, void *state) (struct svga_rasterizer_state *) state; if (svga_have_vgpu10(svga)) { - enum pipe_error ret = - SVGA3D_vgpu10_DestroyRasterizerState(svga->swc, raster->id); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_vgpu10_DestroyRasterizerState(svga->swc, raster->id); - } + SVGA_RETRY(svga, SVGA3D_vgpu10_DestroyRasterizerState(svga->swc, + raster->id)); if (raster->id == svga->state.hw_draw.rasterizer_id) svga->state.hw_draw.rasterizer_id = SVGA3D_INVALID_ID; diff --git a/src/gallium/drivers/svga/svga_pipe_sampler.c b/src/gallium/drivers/svga/svga_pipe_sampler.c index 2f8fedb7aa0..d73a5816be1 100644 --- a/src/gallium/drivers/svga/svga_pipe_sampler.c +++ b/src/gallium/drivers/svga/svga_pipe_sampler.c @@ -166,7 +166,6 @@ define_sampler_state_object(struct svga_context *svga, uint8 compare_func; SVGA3dFilter filter; SVGA3dRGBAFloat bcolor; - unsigned try; float min_lod, max_lod; assert(svga_have_vgpu10(svga)); @@ -207,25 +206,19 @@ define_sampler_state_object(struct svga_context *svga, for (i = 0; i <= ss->compare_mode; i++) { ss->id[i] = util_bitmask_add(svga->sampler_object_id_bm); - /* Loop in case command buffer is full and we need to flush and retry */ - for (try = 0; try < 2; try++) { - enum pipe_error ret = - SVGA3D_vgpu10_DefineSamplerState(svga->swc, - ss->id[i], - filter, - ss->addressu, - ss->addressv, - ss->addressw, - ss->lod_bias, /* float */ - max_aniso, - compare_func, - bcolor, - min_lod, /* float */ - max_lod); /* float */ - if (ret == PIPE_OK) - break; - svga_context_flush(svga, NULL); - } + SVGA_RETRY(svga, SVGA3D_vgpu10_DefineSamplerState + (svga->swc, + ss->id[i], + filter, + ss->addressu, + ss->addressv, + ss->addressw, + ss->lod_bias, /* float */ + max_aniso, + compare_func, + bcolor, + min_lod, /* float */ + max_lod)); /* float */ /* turn off the shadow compare option for second iteration */ filter &= ~SVGA3D_FILTER_COMPARE; @@ -349,16 +342,11 @@ svga_delete_sampler_state(struct pipe_context *pipe, void *sampler) if (svga_have_vgpu10(svga)) { unsigned i; for (i = 0; i < 2; i++) { - enum pipe_error ret; - if (ss->id[i] != SVGA3D_INVALID_ID) { svga_hwtnl_flush_retry(svga); - ret = SVGA3D_vgpu10_DestroySamplerState(svga->swc, ss->id[i]); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_vgpu10_DestroySamplerState(svga->swc, ss->id[i]); - } + SVGA_RETRY(svga, SVGA3D_vgpu10_DestroySamplerState(svga->swc, + ss->id[i])); util_bitmask_clear(svga->sampler_object_id_bm, ss->id[i]); } } @@ -405,17 +393,12 @@ svga_sampler_view_destroy(struct pipe_context *pipe, struct svga_pipe_sampler_view *sv = svga_pipe_sampler_view(view); if (svga_have_vgpu10(svga) && sv->id != SVGA3D_INVALID_ID) { - enum pipe_error ret; - assert(view->context == pipe); svga_hwtnl_flush_retry(svga); - ret = SVGA3D_vgpu10_DestroyShaderResourceView(svga->swc, sv->id); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_vgpu10_DestroyShaderResourceView(svga->swc, sv->id); - } + SVGA_RETRY(svga, SVGA3D_vgpu10_DestroyShaderResourceView(svga->swc, + sv->id)); util_bitmask_clear(svga->sampler_view_id_bm, sv->id); } @@ -534,7 +517,7 @@ svga_cleanup_sampler_state(struct svga_context *svga) { enum pipe_shader_type shader; - for (shader = 0; shader <= PIPE_SHADER_GEOMETRY; shader++) { + for (shader = 0; shader <= PIPE_SHADER_TESS_EVAL; shader++) { unsigned i; for (i = 0; i < svga->state.hw_draw.num_sampler_views[shader]; i++) { diff --git a/src/gallium/drivers/svga/svga_pipe_streamout.c b/src/gallium/drivers/svga/svga_pipe_streamout.c index 380ceaa3aa7..f20f58d49d1 100644 --- a/src/gallium/drivers/svga/svga_pipe_streamout.c +++ b/src/gallium/drivers/svga/svga_pipe_streamout.c @@ -354,6 +354,15 @@ svga_delete_stream_output(struct svga_context *svga, sws->buffer_destroy(sws, streamout->declBuf); } + /* Before deleting the current streamout, make sure to stop any pending + * SO queries. + */ + if (svga->current_so == streamout) { + if (svga->in_streamout) + svga_end_stream_output_queries(svga, svga->current_so->streammask); + svga->current_so = NULL; + } + /* Release the ID */ util_bitmask_clear(svga->stream_output_id_bm, streamout->id); diff --git a/src/gallium/drivers/svga/svga_pipe_vertex.c b/src/gallium/drivers/svga/svga_pipe_vertex.c index 346a13105b7..cd38dab6ca5 100644 --- a/src/gallium/drivers/svga/svga_pipe_vertex.c +++ b/src/gallium/drivers/svga/svga_pipe_vertex.c @@ -120,7 +120,6 @@ define_input_element_object(struct svga_context *svga, struct svga_velems_state *velems) { SVGA3dInputElementDesc elements[PIPE_MAX_ATTRIBS]; - enum pipe_error ret; unsigned i; assert(velems->count <= PIPE_MAX_ATTRIBS); @@ -186,14 +185,8 @@ define_input_element_object(struct svga_context *svga, velems->id = util_bitmask_add(svga->input_element_object_id_bm); - ret = SVGA3D_vgpu10_DefineElementLayout(svga->swc, velems->count, - velems->id, elements); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_vgpu10_DefineElementLayout(svga->swc, velems->count, - velems->id, elements); - assert(ret == PIPE_OK); - } + SVGA_RETRY(svga, SVGA3D_vgpu10_DefineElementLayout(svga->swc, velems->count, + velems->id, elements)); } @@ -293,16 +286,10 @@ svga_delete_vertex_elements_state(struct pipe_context *pipe, void *state) struct svga_velems_state *velems = (struct svga_velems_state *) state; if (svga_have_vgpu10(svga)) { - enum pipe_error ret; - svga_hwtnl_flush_retry(svga); - ret = SVGA3D_vgpu10_DestroyElementLayout(svga->swc, velems->id); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_vgpu10_DestroyElementLayout(svga->swc, velems->id); - assert(ret == PIPE_OK); - } + SVGA_RETRY(svga, SVGA3D_vgpu10_DestroyElementLayout(svga->swc, + velems->id)); if (velems->id == svga->state.hw_draw.layout_id) svga->state.hw_draw.layout_id = SVGA3D_INVALID_ID; diff --git a/src/gallium/drivers/svga/svga_pipe_vs.c b/src/gallium/drivers/svga/svga_pipe_vs.c index aa7396c2c6b..b94576f78d6 100644 --- a/src/gallium/drivers/svga/svga_pipe_vs.c +++ b/src/gallium/drivers/svga/svga_pipe_vs.c @@ -168,7 +168,6 @@ svga_delete_vs_state(struct pipe_context *pipe, void *shader) struct svga_vertex_shader *vs = (struct svga_vertex_shader *)shader; struct svga_vertex_shader *next_vs; struct svga_shader_variant *variant, *tmp; - enum pipe_error ret; svga_hwtnl_flush_retry(svga); @@ -194,12 +193,7 @@ svga_delete_vs_state(struct pipe_context *pipe, void *shader) /* Check if deleting currently bound shader */ if (variant == svga->state.hw_draw.vs) { - ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_VS, NULL); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_VS, NULL); - assert(ret == PIPE_OK); - } + SVGA_RETRY(svga, svga_set_shader(svga, SVGA3D_SHADERTYPE_VS, NULL)); svga->state.hw_draw.vs = NULL; } diff --git a/src/gallium/drivers/svga/svga_resource_buffer.c b/src/gallium/drivers/svga/svga_resource_buffer.c index 4f19b8ca035..80f91a9ef65 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer.c +++ b/src/gallium/drivers/svga/svga_resource_buffer.c @@ -62,14 +62,13 @@ svga_buffer_needs_hw_storage(const struct svga_screen *ss, * tagged with PIPE_BIND_CUSTOM */ bind_mask |= PIPE_BIND_CUSTOM; - /* Uniform buffer objects. - * Make sure we don't create hardware storage for gallium frontend - * const0 buffers, because we frequently map them for reading. - * They are distinguished by having PIPE_USAGE_STREAM, but not - * PIPE_BIND_CUSTOM. + /** + * Uniform buffer objects. + * Don't create hardware storage for state-tracker constant buffers, + * because we frequently map them for reading and writing, and + * the length of those buffers are always small, so it is better + * to just use system memory. */ - if (template->usage != PIPE_USAGE_STREAM) - bind_mask |= PIPE_BIND_CONSTANT_BUFFER; } if (template->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) @@ -132,7 +131,6 @@ svga_buffer_transfer_map(struct pipe_context *pipe, if ((usage & PIPE_TRANSFER_READ) && sbuf->dirty && !sbuf->key.coherent && !svga->swc->force_coherent) { - enum pipe_error ret; /* Host-side buffers can only be dirtied with vgpu10 features * (streamout and buffer copy). @@ -150,13 +148,8 @@ svga_buffer_transfer_map(struct pipe_context *pipe, assert(sbuf->handle); - ret = SVGA3D_vgpu10_ReadbackSubResource(svga->swc, sbuf->handle, 0); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_vgpu10_ReadbackSubResource(svga->swc, sbuf->handle, 0); - assert(ret == PIPE_OK); - } - + SVGA_RETRY(svga, SVGA3D_vgpu10_ReadbackSubResource(svga->swc, + sbuf->handle, 0)); svga->hud.num_readbacks++; svga_context_finish(svga); @@ -278,15 +271,18 @@ svga_buffer_transfer_map(struct pipe_context *pipe, else if (svga_buffer_has_hw_storage(sbuf)) { boolean retry; - map = svga_buffer_hw_storage_map(svga, sbuf, transfer->usage, &retry); + map = SVGA_TRY_MAP(svga_buffer_hw_storage_map + (svga, sbuf, transfer->usage, &retry), retry); if (map == NULL && retry) { /* * At this point, svga_buffer_get_transfer() has already * hit the DISCARD_WHOLE_RESOURCE path and flushed HWTNL * for this buffer. */ + svga_retry_enter(svga); svga_context_flush(svga, NULL); map = svga_buffer_hw_storage_map(svga, sbuf, transfer->usage, &retry); + svga_retry_exit(svga); } } else { @@ -349,6 +345,7 @@ svga_buffer_transfer_unmap(struct pipe_context *pipe, } if (svga_buffer_has_hw_storage(sbuf)) { + /* Note: we may wind up flushing here and unmapping other buffers * which leads to recursively locking ss->swc_mutex. */ @@ -370,6 +367,19 @@ svga_buffer_transfer_unmap(struct pipe_context *pipe, if (!(svga->swc->force_coherent || sbuf->key.coherent) || sbuf->swbuf) svga_buffer_add_range(sbuf, 0, sbuf->b.b.width0); } + + if (sbuf->swbuf && + (!sbuf->bind_flags || (sbuf->bind_flags & PIPE_BIND_CONSTANT_BUFFER))) { + /* + * Since the constant buffer is in system buffer, we need + * to set the constant buffer dirty bits, so that the context + * can update the changes in the device. + * According to the GL spec, buffer bound to other contexts will + * have to be explicitly rebound by the user to have the changes take + * into effect. + */ + svga->dirty |= SVGA_NEW_CONST_BUFFER; + } } mtx_unlock(&ss->swc_mutex); @@ -491,6 +501,13 @@ svga_buffer_create(struct pipe_screen *screen, sbuf->swbuf = align_malloc(sbuf->b.b.width0, 64); if (!sbuf->swbuf) goto error2; + + /* Since constant buffer is usually small, it is much cheaper to + * use system memory for the data just as it is being done for + * the default constant buffer. + */ + if ((bind_flags & PIPE_BIND_CONSTANT_BUFFER) || !bind_flags) + sbuf->use_swbuf = TRUE; } debug_reference(&sbuf->b.b.reference, diff --git a/src/gallium/drivers/svga/svga_resource_buffer.h b/src/gallium/drivers/svga/svga_resource_buffer.h index bd219742e68..09648d2ec09 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer.h +++ b/src/gallium/drivers/svga/svga_resource_buffer.h @@ -94,6 +94,11 @@ struct svga_buffer boolean user; /** + * Whether swbuf is used for this buffer. + */ + boolean use_swbuf; + + /** * Creation key for the host surface handle. * * This structure describes all the host surface characteristics so that it @@ -323,16 +328,10 @@ svga_buffer_hw_storage_unmap(struct svga_context *svga, if (sws->have_gb_objects) { struct svga_winsys_context *swc = svga->swc; boolean rebind; + swc->surface_unmap(swc, sbuf->handle, &rebind); if (rebind) { - enum pipe_error ret; - ret = SVGA3D_BindGBSurface(swc, sbuf->handle); - if (ret != PIPE_OK) { - /* flush and retry */ - svga_context_flush(svga, NULL); - ret = SVGA3D_BindGBSurface(swc, sbuf->handle); - assert(ret == PIPE_OK); - } + SVGA_RETRY(svga, SVGA3D_BindGBSurface(swc, sbuf->handle)); } } else sws->buffer_unmap(sws, sbuf->hwbuf); diff --git a/src/gallium/drivers/svga/svga_resource_buffer_upload.c b/src/gallium/drivers/svga/svga_resource_buffer_upload.c index 5d2b934e7c1..78535643563 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer_upload.c +++ b/src/gallium/drivers/svga/svga_resource_buffer_upload.c @@ -72,14 +72,16 @@ svga_winsys_buffer_create( struct svga_context *svga, struct svga_winsys_buffer *buf; /* Just try */ - buf = sws->buffer_create(sws, alignment, usage, size); + buf = SVGA_TRY_PTR(sws->buffer_create(sws, alignment, usage, size)); if (!buf) { SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "flushing context to find %d bytes GMR\n", size); /* Try flushing all pending DMAs */ + svga_retry_enter(svga); svga_context_flush(svga, NULL); buf = sws->buffer_create(sws, alignment, usage, size); + svga_retry_exit(svga); } return buf; @@ -229,6 +231,12 @@ svga_buffer_create_host_surface(struct svga_screen *ss, bind_flags); } + if (ss->sws->have_gb_objects) { + /* Initialize the surface with zero */ + ss->sws->surface_init(ss->sws, sbuf->handle, svga_surface_size(&sbuf->key), + sbuf->key.flags); + } + return ret; } @@ -255,14 +263,9 @@ svga_buffer_recreate_host_surface(struct svga_context *svga, if (ret == PIPE_OK) { /* Copy the surface data */ assert(sbuf->handle); - ret = SVGA3D_vgpu10_BufferCopy(svga->swc, old_handle, sbuf->handle, - 0, 0, sbuf->b.b.width0); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_vgpu10_BufferCopy(svga->swc, old_handle, sbuf->handle, - 0, 0, sbuf->b.b.width0); - assert(ret == PIPE_OK); - } + SVGA_RETRY(svga, SVGA3D_vgpu10_BufferCopy(svga->swc, old_handle, + sbuf->handle, + 0, 0, sbuf->b.b.width0)); } /* Set the new bind flags for this buffer resource */ @@ -344,8 +347,6 @@ svga_buffer_bind_host_surface(struct svga_context *svga, struct svga_buffer *sbuf, struct svga_buffer_surface *bufsurf) { - enum pipe_error ret; - /* Update the to-bind surface */ assert(bufsurf->handle); assert(sbuf->handle); @@ -354,14 +355,9 @@ svga_buffer_bind_host_surface(struct svga_context *svga, * make sure to copy the buffer content. */ if (sbuf->bind_flags & PIPE_BIND_STREAM_OUTPUT) { - ret = SVGA3D_vgpu10_BufferCopy(svga->swc, sbuf->handle, bufsurf->handle, - 0, 0, sbuf->b.b.width0); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_vgpu10_BufferCopy(svga->swc, sbuf->handle, bufsurf->handle, - 0, 0, sbuf->b.b.width0); - assert(ret == PIPE_OK); - } + SVGA_RETRY(svga, SVGA3D_vgpu10_BufferCopy(svga->swc, sbuf->handle, + bufsurf->handle, + 0, 0, sbuf->b.b.width0)); } /* Set this surface as the current one */ @@ -943,7 +939,6 @@ svga_buffer_upload_piecewise(struct svga_screen *ss, while (offset < range->end) { struct svga_winsys_buffer *hwbuf; uint8_t *map; - enum pipe_error ret; if (offset + size > range->end) size = range->end - offset; @@ -968,19 +963,10 @@ svga_buffer_upload_piecewise(struct svga_screen *ss, sws->buffer_unmap(sws, hwbuf); } - ret = SVGA3D_BufferDMA(svga->swc, - hwbuf, sbuf->handle, - SVGA3D_WRITE_HOST_VRAM, - size, 0, offset, sbuf->dma.flags); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_BufferDMA(svga->swc, - hwbuf, sbuf->handle, - SVGA3D_WRITE_HOST_VRAM, - size, 0, offset, sbuf->dma.flags); - assert(ret == PIPE_OK); - } - + SVGA_RETRY(svga, SVGA3D_BufferDMA(svga->swc, + hwbuf, sbuf->handle, + SVGA3D_WRITE_HOST_VRAM, + size, 0, offset, sbuf->dma.flags)); sbuf->dma.flags.discard = FALSE; sws->buffer_destroy(sws, hwbuf); @@ -1061,12 +1047,7 @@ svga_buffer_handle(struct svga_context *svga, struct pipe_resource *buf, ret = svga_buffer_update_hw(svga, sbuf, sbuf->bind_flags); if (ret == PIPE_OK) { /* Emit DMA or UpdateGBImage commands */ - ret = svga_buffer_upload_command(svga, sbuf); - if (ret == PIPE_ERROR_OUT_OF_MEMORY) { - svga_context_flush(svga, NULL); - ret = svga_buffer_upload_command(svga, sbuf); - assert(ret == PIPE_OK); - } + SVGA_RETRY_OOM(svga, ret, svga_buffer_upload_command(svga, sbuf)); if (ret == PIPE_OK) { sbuf->dma.pending = TRUE; assert(!sbuf->head.prev && !sbuf->head.next); diff --git a/src/gallium/drivers/svga/svga_resource_texture.c b/src/gallium/drivers/svga/svga_resource_texture.c index 1bae8c39595..137d15bcb4f 100644 --- a/src/gallium/drivers/svga/svga_resource_texture.c +++ b/src/gallium/drivers/svga/svga_resource_texture.c @@ -58,7 +58,6 @@ svga_transfer_dma_band(struct svga_context *svga, { struct svga_texture *texture = svga_texture(st->base.resource); SVGA3dCopyBox box; - enum pipe_error ret; assert(!st->use_direct_map); @@ -87,12 +86,7 @@ svga_transfer_dma_band(struct svga_context *svga, (util_format_get_blockwidth(texture->b.b.format) * util_format_get_blockheight(texture->b.b.format))); - ret = SVGA3D_SurfaceDMA(svga->swc, st, transfer, &box, 1, flags); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_SurfaceDMA(svga->swc, st, transfer, &box, 1, flags); - assert(ret == PIPE_OK); - } + SVGA_RETRY(svga, SVGA3D_SurfaceDMA(svga->swc, st, transfer, &box, 1, flags)); } @@ -274,40 +268,28 @@ need_tex_readback(struct svga_transfer *st) } -static enum pipe_error +static void readback_image_vgpu9(struct svga_context *svga, struct svga_winsys_surface *surf, unsigned slice, unsigned level) { - enum pipe_error ret; - - ret = SVGA3D_ReadbackGBImage(svga->swc, surf, slice, level); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_ReadbackGBImage(svga->swc, surf, slice, level); - } - return ret; + SVGA_RETRY(svga, SVGA3D_ReadbackGBImage(svga->swc, surf, slice, level)); } -static enum pipe_error +static void readback_image_vgpu10(struct svga_context *svga, struct svga_winsys_surface *surf, unsigned slice, unsigned level, unsigned numMipLevels) { - enum pipe_error ret; unsigned subResource; subResource = slice * numMipLevels + level; - ret = SVGA3D_vgpu10_ReadbackSubResource(svga->swc, surf, subResource); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_vgpu10_ReadbackSubResource(svga->swc, surf, subResource); - } - return ret; + SVGA_RETRY(svga, SVGA3D_vgpu10_ReadbackSubResource(svga->swc, surf, + subResource)); } @@ -397,25 +379,20 @@ svga_texture_transfer_map_direct(struct svga_context *svga, unsigned usage = st->base.usage; if (need_tex_readback(st)) { - enum pipe_error ret; - svga_surfaces_flush(svga); if (!svga->swc->force_coherent || tex->imported) { for (i = 0; i < st->box.d; i++) { if (svga_have_vgpu10(svga)) { - ret = readback_image_vgpu10(svga, surf, st->slice + i, level, - tex->b.b.last_level + 1); + readback_image_vgpu10(svga, surf, st->slice + i, level, + tex->b.b.last_level + 1); } else { - ret = readback_image_vgpu9(svga, surf, st->slice + i, level); + readback_image_vgpu9(svga, surf, st->slice + i, level); } } svga->hud.num_readbacks++; SVGA_STATS_COUNT_INC(sws, SVGA_STATS_COUNT_TEXREADBACK); - assert(ret == PIPE_OK); - (void) ret; - svga_context_flush(svga, NULL); } /* @@ -465,26 +442,19 @@ svga_texture_transfer_map_direct(struct svga_context *svga, usage |= PIPE_TRANSFER_PERSISTENT | PIPE_TRANSFER_COHERENT; } - map = swc->surface_map(swc, surf, usage, &retry, &rebind); + map = SVGA_TRY_MAP(svga->swc->surface_map + (svga->swc, surf, usage, &retry, &rebind), retry); + if (map == NULL && retry) { /* * At this point, the svga_surfaces_flush() should already have * called in svga_texture_get_transfer(). */ svga->hud.surface_write_flushes++; + svga_retry_enter(svga); svga_context_flush(svga, NULL); - map = swc->surface_map(swc, surf, usage, &retry, &rebind); - } - if (map && rebind) { - enum pipe_error ret; - - ret = SVGA3D_BindGBSurface(swc, surf); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_BindGBSurface(swc, surf); - assert(ret == PIPE_OK); - } - svga_context_flush(svga, NULL); + map = svga->swc->surface_map(svga->swc, surf, usage, &retry, &rebind); + svga_retry_exit(svga); } if (map && rebind) { @@ -626,9 +596,12 @@ svga_texture_transfer_map(struct pipe_context *pipe, pipe_resource_reference(&st->base.resource, texture); /* If this is the first time mapping to the surface in this - * command buffer, clear the dirty masks of this surface. + * command buffer and there is no pending primitives, clear + * the dirty masks of this surface. */ - if (sws->surface_is_flushed(sws, surf)) { + if (sws->surface_is_flushed(sws, surf) && + (svga_have_vgpu10(svga) || + !svga_hwtnl_has_pending_prim(svga->hwtnl))) { svga_clear_texture_dirty(tex); } @@ -716,37 +689,23 @@ svga_texture_surface_unmap(struct svga_context *svga, swc->surface_unmap(swc, surf, &rebind); if (rebind) { - enum pipe_error ret; - ret = SVGA3D_BindGBSurface(swc, surf); - if (ret != PIPE_OK) { - /* flush and retry */ - svga_context_flush(svga, NULL); - ret = SVGA3D_BindGBSurface(swc, surf); - assert(ret == PIPE_OK); - } + SVGA_RETRY(svga, SVGA3D_BindGBSurface(swc, surf)); } } -static enum pipe_error +static void update_image_vgpu9(struct svga_context *svga, struct svga_winsys_surface *surf, const SVGA3dBox *box, unsigned slice, unsigned level) { - enum pipe_error ret; - - ret = SVGA3D_UpdateGBImage(svga->swc, surf, box, slice, level); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_UpdateGBImage(svga->swc, surf, box, slice, level); - } - return ret; + SVGA_RETRY(svga, SVGA3D_UpdateGBImage(svga->swc, surf, box, slice, level)); } -static enum pipe_error +static void update_image_vgpu10(struct svga_context *svga, struct svga_winsys_surface *surf, const SVGA3dBox *box, @@ -754,17 +713,12 @@ update_image_vgpu10(struct svga_context *svga, unsigned level, unsigned numMipLevels) { - enum pipe_error ret; unsigned subResource; subResource = slice * numMipLevels + level; - ret = SVGA3D_vgpu10_UpdateSubResource(svga->swc, surf, box, subResource); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_vgpu10_UpdateSubResource(svga->swc, surf, box, subResource); - } - return ret; + SVGA_RETRY(svga, SVGA3D_vgpu10_UpdateSubResource(svga->swc, surf, box, + subResource)); } @@ -819,7 +773,6 @@ svga_texture_transfer_unmap_direct(struct svga_context *svga, /* Now send an update command to update the content in the backend. */ if (st->base.usage & PIPE_TRANSFER_WRITE) { struct svga_winsys_surface *surf = tex->handle; - enum pipe_error ret; assert(svga_have_gb_objects(svga)); @@ -851,19 +804,16 @@ svga_texture_transfer_unmap_direct(struct svga_context *svga, unsigned i; for (i = 0; i < nlayers; i++) { - ret = update_image_vgpu10(svga, surf, &box, - st->slice + i, transfer->level, - tex->b.b.last_level + 1); - assert(ret == PIPE_OK); + update_image_vgpu10(svga, surf, &box, + st->slice + i, transfer->level, + tex->b.b.last_level + 1); } } else { assert(nlayers == 1); - ret = update_image_vgpu9(svga, surf, &box, st->slice, - transfer->level); - assert(ret == PIPE_OK); + update_image_vgpu9(svga, surf, &box, st->slice, + transfer->level); } } - (void) ret; } } @@ -1311,7 +1261,6 @@ svga_texture_generate_mipmap(struct pipe_context *pipe, struct svga_pipe_sampler_view *sv; struct svga_context *svga = svga_context(pipe); struct svga_texture *tex = svga_texture(pt); - enum pipe_error ret; assert(svga_have_vgpu10(svga)); @@ -1342,18 +1291,9 @@ svga_texture_generate_mipmap(struct pipe_context *pipe, return false; sv = svga_pipe_sampler_view(psv); - ret = svga_validate_pipe_sampler_view(svga, sv); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = svga_validate_pipe_sampler_view(svga, sv); - assert(ret == PIPE_OK); - } + SVGA_RETRY(svga, svga_validate_pipe_sampler_view(svga, sv)); - ret = SVGA3D_vgpu10_GenMips(svga->swc, sv->id, tex->handle); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_vgpu10_GenMips(svga->swc, sv->id, tex->handle); - } + SVGA_RETRY(svga, SVGA3D_vgpu10_GenMips(svga->swc, sv->id, tex->handle)); pipe_sampler_view_reference(&psv, NULL); svga->hud.num_generate_mipmap++; @@ -1521,7 +1461,6 @@ svga_texture_transfer_unmap_upload(struct svga_context *svga, struct svga_winsys_surface *dstsurf; struct pipe_resource *texture = st->base.resource; struct svga_texture *tex = svga_texture(texture); - enum pipe_error ret; unsigned subResource; unsigned numMipLevels; unsigned i, layer; @@ -1545,22 +1484,12 @@ svga_texture_transfer_unmap_upload(struct svga_context *svga, /* send a transferFromBuffer command to update the host texture surface */ assert((offset & 15) == 0); - ret = SVGA3D_vgpu10_TransferFromBuffer(svga->swc, srcsurf, - offset, - st->base.stride, - st->base.layer_stride, - dstsurf, subResource, - &st->upload.box); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_vgpu10_TransferFromBuffer(svga->swc, srcsurf, - offset, - st->base.stride, - st->base.layer_stride, - dstsurf, subResource, - &st->upload.box); - assert(ret == PIPE_OK); - } + SVGA_RETRY(svga, SVGA3D_vgpu10_TransferFromBuffer(svga->swc, srcsurf, + offset, + st->base.stride, + st->base.layer_stride, + dstsurf, subResource, + &st->upload.box)); offset += st->base.layer_stride; /* Set rendered-to flag */ diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index f7e3a900290..1dc5319f572 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -723,7 +723,7 @@ vgpu10_get_shader_param(struct pipe_screen *screen, else if (shader == PIPE_SHADER_GEOMETRY) return VGPU10_MAX_GS_INPUTS; else if (shader == PIPE_SHADER_TESS_CTRL) - return VGPU11_MAX_HS_INPUTS; + return VGPU11_MAX_HS_INPUT_CONTROL_POINTS; else if (shader == PIPE_SHADER_TESS_EVAL) return VGPU11_MAX_DS_INPUT_CONTROL_POINTS; else diff --git a/src/gallium/drivers/svga/svga_screen_cache.c b/src/gallium/drivers/svga/svga_screen_cache.c index a0e5f5ff2b9..aba6e304fbd 100644 --- a/src/gallium/drivers/svga/svga_screen_cache.c +++ b/src/gallium/drivers/svga/svga_screen_cache.c @@ -41,8 +41,8 @@ /** * Return the size of the surface described by the key (in bytes). */ -static unsigned -surface_size(const struct svga_host_surface_cache_key *key) +unsigned +svga_surface_size(const struct svga_host_surface_cache_key *key) { unsigned bw, bh, bpb, total_size, i; @@ -142,7 +142,7 @@ svga_screen_cache_lookup(struct svga_screen *svgascreen, list_add(&entry->head, &cache->empty); /* update the cache size */ - surf_size = surface_size(&entry->key); + surf_size = svga_surface_size(&entry->key); assert(surf_size <= cache->total_size); if (surf_size > cache->total_size) cache->total_size = 0; /* should never happen, but be safe */ @@ -187,7 +187,7 @@ svga_screen_cache_shrink(struct svga_screen *svgascreen, if (entry->key.format != SVGA3D_BUFFER) { /* we don't want to discard vertex/index buffers */ - cache->total_size -= surface_size(&entry->key); + cache->total_size -= svga_surface_size(&entry->key); assert(entry->handle); sws->surface_reference(sws, &entry->handle, NULL); @@ -225,7 +225,7 @@ svga_screen_cache_add(struct svga_screen *svgascreen, if (!handle) return; - surf_size = surface_size(key); + surf_size = svga_surface_size(key); *p_handle = NULL; mtx_lock(&cache->mutex); @@ -273,7 +273,7 @@ svga_screen_cache_add(struct svga_screen *svgascreen, SVGA_DBG(DEBUG_CACHE|DEBUG_DMA, "unref sid %p (make space)\n", entry->handle); - cache->total_size -= surface_size(&entry->key); + cache->total_size -= svga_surface_size(&entry->key); sws->surface_reference(sws, &entry->handle, NULL); @@ -373,7 +373,8 @@ svga_screen_cache_flush(struct svga_screen *svgascreen, /* It is now safe to invalidate the surface content. * It will be done using the current context. */ - if (SVGA3D_InvalidateGBSurface(svga->swc, entry->handle) != PIPE_OK) { + if (SVGA_TRY(SVGA3D_InvalidateGBSurface(svga->swc, entry->handle)) + != PIPE_OK) { ASSERTED enum pipe_error ret; /* Even though surface invalidation here is done after the command @@ -384,13 +385,16 @@ svga_screen_cache_flush(struct svga_screen *svgascreen, * Note, we don't want to call svga_context_flush() here because * this function itself is called inside svga_context_flush(). */ + svga_retry_enter(svga); svga->swc->flush(svga->swc, NULL); nsurf = 0; ret = SVGA3D_InvalidateGBSurface(svga->swc, entry->handle); + svga_retry_exit(svga); assert(ret == PIPE_OK); } /* add the entry to the invalidated list */ + list_add(&entry->head, &cache->invalidated); nsurf++; } @@ -430,7 +434,7 @@ svga_screen_cache_cleanup(struct svga_screen *svgascreen) "unref sid %p (shutdown)\n", cache->entries[i].handle); sws->surface_reference(sws, &cache->entries[i].handle, NULL); - cache->total_size -= surface_size(&cache->entries[i].key); + cache->total_size -= svga_surface_size(&cache->entries[i].key); } if (cache->entries[i].fence) diff --git a/src/gallium/drivers/svga/svga_screen_cache.h b/src/gallium/drivers/svga/svga_screen_cache.h index 055a267c189..c2bfc076ffa 100644 --- a/src/gallium/drivers/svga/svga_screen_cache.h +++ b/src/gallium/drivers/svga/svga_screen_cache.h @@ -159,5 +159,8 @@ svga_screen_surface_destroy(struct svga_screen *svgascreen, void svga_screen_cache_dump(const struct svga_screen *svgascreen); +unsigned +svga_surface_size(const struct svga_host_surface_cache_key *key); + #endif /* SVGA_SCREEN_CACHE_H_ */ diff --git a/src/gallium/drivers/svga/svga_shader.h b/src/gallium/drivers/svga/svga_shader.h index 31ccf97d39a..4fccde40535 100644 --- a/src/gallium/drivers/svga/svga_shader.h +++ b/src/gallium/drivers/svga/svga_shader.h @@ -50,6 +50,8 @@ struct svga_compile_key unsigned need_prescale:1; unsigned undo_viewport:1; unsigned allow_psiz:1; + unsigned need_vertex_id_bias:1; + /** The following are all 32-bit bitmasks (per VS input) */ unsigned adjust_attrib_range; unsigned attrib_is_pure_int; @@ -93,6 +95,7 @@ struct svga_compile_key /* tessellation control shader */ struct { unsigned vertices_per_patch:8; + unsigned vertices_out:8; enum pipe_prim_type prim_mode:8; enum pipe_tess_spacing spacing:3; unsigned vertices_order_cw:1; diff --git a/src/gallium/drivers/svga/svga_state.c b/src/gallium/drivers/svga/svga_state.c index ad647d8784c..4f6af8052b6 100644 --- a/src/gallium/drivers/svga/svga_state.c +++ b/src/gallium/drivers/svga/svga_state.c @@ -97,8 +97,11 @@ static const struct svga_tracked_state *hw_draw_state_vgpu10[] = &svga_hw_clip_planes, &svga_hw_vdecl, &svga_hw_fs_constants, + &svga_hw_fs_constbufs, &svga_hw_gs_constants, + &svga_hw_gs_constbufs, &svga_hw_vs_constants, + &svga_hw_vs_constbufs, NULL }; @@ -122,10 +125,15 @@ static const struct svga_tracked_state *hw_draw_state_sm5[] = &svga_hw_clip_planes, &svga_hw_vdecl, &svga_hw_fs_constants, + &svga_hw_fs_constbufs, &svga_hw_gs_constants, + &svga_hw_gs_constbufs, &svga_hw_tes_constants, + &svga_hw_tes_constbufs, &svga_hw_tcs_constants, + &svga_hw_tcs_constbufs, &svga_hw_vs_constants, + &svga_hw_vs_constbufs, NULL }; diff --git a/src/gallium/drivers/svga/svga_state.h b/src/gallium/drivers/svga/svga_state.h index 76befebe4a3..853f867b70f 100644 --- a/src/gallium/drivers/svga/svga_state.h +++ b/src/gallium/drivers/svga/svga_state.h @@ -76,6 +76,11 @@ extern struct svga_tracked_state svga_hw_gs_constants; extern struct svga_tracked_state svga_hw_vs_constants; extern struct svga_tracked_state svga_hw_tes_constants; extern struct svga_tracked_state svga_hw_tcs_constants; +extern struct svga_tracked_state svga_hw_fs_constbufs; +extern struct svga_tracked_state svga_hw_vs_constbufs; +extern struct svga_tracked_state svga_hw_gs_constbufs; +extern struct svga_tracked_state svga_hw_tcs_constbufs; +extern struct svga_tracked_state svga_hw_tes_constbufs; /* SWTNL_DRAW */ diff --git a/src/gallium/drivers/svga/svga_state_constants.c b/src/gallium/drivers/svga/svga_state_constants.c index 9d9f8934ec4..7e245baf22c 100644 --- a/src/gallium/drivers/svga/svga_state_constants.c +++ b/src/gallium/drivers/svga/svga_state_constants.c @@ -219,6 +219,17 @@ svga_get_extra_vs_constants(const struct svga_context *svga, float *dest) count += 1; } + /* Bias to be added to VertexID */ + if (variant->key.vs.need_vertex_id_bias) { + uint32_t *dest_u = (uint32_t *) dest; // uint version of dest + dest_u[0] = svga->curr.vertex_id_bias; + dest_u[1] = 1; + dest_u[2] = 1; + dest_u[3] = 1; + dest+=4; + count++; + } + /* SVGA_NEW_CLIP */ count += svga_get_clip_plane_constants(svga, variant, &dest); @@ -560,22 +571,170 @@ emit_consts_vgpu9(struct svga_context *svga, enum pipe_shader_type shader) } - +/** + * A helper function to emit a constant buffer binding at the + * specified slot for the specified shader type + */ static enum pipe_error -emit_constbuf_vgpu10(struct svga_context *svga, enum pipe_shader_type shader) +emit_constbuf(struct svga_context *svga, + unsigned slot, + enum pipe_shader_type shader, + unsigned buffer_offset, + unsigned buffer_size, + const void *buffer, + unsigned extra_buffer_offset, + unsigned extra_buffer_size, + const void *extra_buffer) { - const struct pipe_constant_buffer *cbuf; + struct svga_buffer *sbuf = svga_buffer((struct pipe_resource *)buffer); struct pipe_resource *dst_buffer = NULL; enum pipe_error ret = PIPE_OK; struct pipe_transfer *src_transfer; - struct svga_winsys_surface *dst_handle; + struct svga_winsys_surface *dst_handle = NULL; + unsigned new_buf_size = 0; + unsigned alloc_buf_size; + unsigned offset = 0;; + void *src_map = NULL, *dst_map; + + if ((sbuf && sbuf->swbuf) || extra_buffer) { + + /* buffer here is a user-space buffer so mapping it is really cheap. */ + if (buffer_size > 0) { + src_map = pipe_buffer_map_range(&svga->pipe, + (struct pipe_resource *)buffer, + buffer_offset, buffer_size, + PIPE_TRANSFER_READ, &src_transfer); + assert(src_map); + if (!src_map) { + return PIPE_ERROR_OUT_OF_MEMORY; + } + } + + new_buf_size = MAX2(buffer_size, extra_buffer_offset) + extra_buffer_size; + + /* According to the DX10 spec, the constant buffer size must be + * in multiples of 16. + */ + new_buf_size = align(new_buf_size, 16); + + /* Constant buffer size in the upload buffer must be in multiples of 256. + * In order to maximize the chance of merging the upload buffer chunks + * when svga_buffer_add_range() is called, + * the allocate buffer size needs to be in multiples of 256 as well. + * Otherwise, since there is gap between each dirty range of the upload buffer, + * each dirty range will end up in its own UPDATE_GB_IMAGE command. + */ + alloc_buf_size = align(new_buf_size, CONST0_UPLOAD_ALIGNMENT); + + u_upload_alloc(svga->const0_upload, 0, alloc_buf_size, + CONST0_UPLOAD_ALIGNMENT, &offset, + &dst_buffer, &dst_map); + + if (!dst_map) { + if (src_map) + pipe_buffer_unmap(&svga->pipe, src_transfer); + return PIPE_ERROR_OUT_OF_MEMORY; + } + + /* Initialize the allocated buffer slot to 0 to ensure the padding is + * filled with 0. + */ + memset(dst_map, 0, alloc_buf_size); + + if (src_map) { + memcpy(dst_map, src_map, buffer_size); + pipe_buffer_unmap(&svga->pipe, src_transfer); + } + + if (extra_buffer_size) { + assert(extra_buffer_offset + extra_buffer_size <= new_buf_size); + memcpy((char *) dst_map + extra_buffer_offset, extra_buffer, + extra_buffer_size); + } + + /* Get winsys handle for the constant buffer */ + if (svga->state.hw_draw.const0_buffer == dst_buffer && + svga->state.hw_draw.const0_handle) { + /* re-reference already mapped buffer */ + dst_handle = svga->state.hw_draw.const0_handle; + } + else { + /* we must unmap the buffer before getting the winsys handle */ + u_upload_unmap(svga->const0_upload); + + dst_handle = svga_buffer_handle(svga, dst_buffer, + PIPE_BIND_CONSTANT_BUFFER); + if (!dst_handle) { + pipe_resource_reference(&dst_buffer, NULL); + return PIPE_ERROR_OUT_OF_MEMORY; + } + } + } + else if (sbuf) { + dst_handle = svga_buffer_handle(svga, &sbuf->b.b, PIPE_BIND_CONSTANT_BUFFER); + new_buf_size = align(buffer_size, 16); + offset = buffer_offset; + } + + assert(new_buf_size % 16 == 0); + + const struct svga_screen *screen = svga_screen(svga->pipe.screen); + const struct svga_winsys_screen *sws = screen->sws; + + /* Issue the SetSingleConstantBuffer command */ + if (!sws->have_constant_buffer_offset_cmd || + svga->state.hw_draw.constbufoffsets[shader][slot].handle != dst_handle || + svga->state.hw_draw.constbufoffsets[shader][slot].size != new_buf_size) { + ret = SVGA3D_vgpu10_SetSingleConstantBuffer(svga->swc, + slot, /* index */ + svga_shader_type(shader), + dst_handle, + offset, + new_buf_size); + } + else if (dst_handle){ + unsigned command = SVGA_3D_CMD_DX_SET_VS_CONSTANT_BUFFER_OFFSET + shader; + ret = SVGA3D_vgpu10_SetConstantBufferOffset(svga->swc, + command, + slot, /* index */ + offset); + } + + if (ret != PIPE_OK) { + pipe_resource_reference(&dst_buffer, NULL); + return ret; + } + + /* save the upload buffer / handle for next time */ + if (dst_buffer != buffer && dst_buffer) { + pipe_resource_reference(&svga->state.hw_draw.const0_buffer, dst_buffer); + svga->state.hw_draw.const0_handle = dst_handle; + } + + /* Save this const buffer until it's replaced in the future. + * Otherwise, all references to the buffer will go away after the + * command buffer is submitted, it'll get recycled and we will have + * incorrect constant buffer bindings. + */ + pipe_resource_reference(&svga->state.hw_draw.constbuf[shader][slot], dst_buffer); + svga->state.hw_draw.constbufoffsets[shader][slot].handle = dst_handle; + svga->state.hw_draw.constbufoffsets[shader][slot].size = new_buf_size; + + pipe_resource_reference(&dst_buffer, NULL); + + return PIPE_OK; +} + + +/* For constbuf 0 */ +static enum pipe_error +emit_consts_vgpu10(struct svga_context *svga, enum pipe_shader_type shader) +{ + const struct pipe_constant_buffer *cbuf; + enum pipe_error ret = PIPE_OK; float extras[MAX_EXTRA_CONSTS][4]; unsigned extra_count, extra_size, extra_offset; - unsigned new_buf_size; - void *src_map = NULL, *dst_map; - unsigned offset; const struct svga_shader_variant *variant; - unsigned alloc_buf_size; assert(shader == PIPE_SHADER_VERTEX || shader == PIPE_SHADER_GEOMETRY || @@ -630,131 +789,30 @@ emit_constbuf_vgpu10(struct svga_context *svga, enum pipe_shader_type shader) if (cbuf->buffer_size + extra_size == 0) return PIPE_OK; /* nothing to do */ - /* Typically, the cbuf->buffer here is a user-space buffer so mapping - * it is really cheap. If we ever get real HW buffers for constants - * we should void mapping and instead use a ResourceCopy command. - */ - if (cbuf->buffer_size > 0) { - src_map = pipe_buffer_map_range(&svga->pipe, cbuf->buffer, - cbuf->buffer_offset, cbuf->buffer_size, - PIPE_TRANSFER_READ, &src_transfer); - assert(src_map); - if (!src_map) { - return PIPE_ERROR_OUT_OF_MEMORY; - } - } - - /* The new/dest buffer's size must be large enough to hold the original, - * user-specified constants, plus the extra constants. - * The size of the original constant buffer _should_ agree with what the - * shader is expecting, but it might not (it's not enforced anywhere by - * gallium). - */ - new_buf_size = MAX2(cbuf->buffer_size, extra_offset) + extra_size; - - /* According to the DX10 spec, the constant buffer size must be - * in multiples of 16. - */ - new_buf_size = align(new_buf_size, 16); - - /* Constant buffer size in the upload buffer must be in multiples of 256. - * In order to maximize the chance of merging the upload buffer chunks - * when svga_buffer_add_range() is called, - * the allocate buffer size needs to be in multiples of 256 as well. - * Otherwise, since there is gap between each dirty range of the upload buffer, - * each dirty range will end up in its own UPDATE_GB_IMAGE command. - */ - alloc_buf_size = align(new_buf_size, CONST0_UPLOAD_ALIGNMENT); - - u_upload_alloc(svga->const0_upload, 0, alloc_buf_size, - CONST0_UPLOAD_ALIGNMENT, &offset, - &dst_buffer, &dst_map); - if (!dst_map) { - if (src_map) - pipe_buffer_unmap(&svga->pipe, src_transfer); - return PIPE_ERROR_OUT_OF_MEMORY; - } - - if (src_map) { - memcpy(dst_map, src_map, cbuf->buffer_size); - pipe_buffer_unmap(&svga->pipe, src_transfer); - } - - if (extra_size) { - assert(extra_offset + extra_size <= new_buf_size); - memcpy((char *) dst_map + extra_offset, extras, extra_size); - } - - /* Get winsys handle for the constant buffer */ - if (svga->state.hw_draw.const0_buffer == dst_buffer && - svga->state.hw_draw.const0_handle) { - /* re-reference already mapped buffer */ - dst_handle = svga->state.hw_draw.const0_handle; - } - else { - /* we must unmap the buffer before getting the winsys handle */ - u_upload_unmap(svga->const0_upload); - - dst_handle = svga_buffer_handle(svga, dst_buffer, - PIPE_BIND_CONSTANT_BUFFER); - if (!dst_handle) { - pipe_resource_reference(&dst_buffer, NULL); - return PIPE_ERROR_OUT_OF_MEMORY; - } - - /* save the buffer / handle for next time */ - pipe_resource_reference(&svga->state.hw_draw.const0_buffer, dst_buffer); - svga->state.hw_draw.const0_handle = dst_handle; - } - - /* Issue the SetSingleConstantBuffer command */ - assert(new_buf_size % 16 == 0); - ret = SVGA3D_vgpu10_SetSingleConstantBuffer(svga->swc, - 0, /* index */ - svga_shader_type(shader), - dst_handle, - offset, - new_buf_size); - - if (ret != PIPE_OK) { - pipe_resource_reference(&dst_buffer, NULL); + ret = emit_constbuf(svga, 0, shader, + cbuf->buffer_offset, cbuf->buffer_size, cbuf->buffer, + extra_offset, extra_size, extras); + if (ret != PIPE_OK) return ret; - } - - /* Save this const buffer until it's replaced in the future. - * Otherwise, all references to the buffer will go away after the - * command buffer is submitted, it'll get recycled and we will have - * incorrect constant buffer bindings. - */ - pipe_resource_reference(&svga->state.hw_draw.constbuf[shader], dst_buffer); - svga->state.hw_draw.default_constbuf_size[shader] = new_buf_size; + svga->state.hw_draw.default_constbuf_size[shader] = + svga->state.hw_draw.constbufoffsets[shader][0].size; - pipe_resource_reference(&dst_buffer, NULL); - - svga->hud.num_const_buf_updates++; + svga->hud.num_const_updates++; return ret; } static enum pipe_error -emit_consts_vgpu10(struct svga_context *svga, enum pipe_shader_type shader) +emit_constbuf_vgpu10(struct svga_context *svga, enum pipe_shader_type shader) { - enum pipe_error ret; + enum pipe_error ret = PIPE_OK; unsigned dirty_constbufs; unsigned enabled_constbufs; - /* Emit 0th constant buffer (with extra constants) */ - ret = emit_constbuf_vgpu10(svga, shader); - if (ret != PIPE_OK) { - return ret; - } - enabled_constbufs = svga->state.hw_draw.enabled_constbufs[shader] | 1u; - - /* Emit other constant buffers (UBOs) */ - dirty_constbufs = svga->state.dirty_constbufs[shader] & ~1u; + dirty_constbufs = (svga->state.dirty_constbufs[shader]|enabled_constbufs) & ~1u; while (dirty_constbufs) { unsigned index = u_bit_scan(&dirty_constbufs); @@ -762,15 +820,11 @@ emit_consts_vgpu10(struct svga_context *svga, enum pipe_shader_type shader) unsigned size = svga->curr.constbufs[shader][index].buffer_size; struct svga_buffer *buffer = svga_buffer(svga->curr.constbufs[shader][index].buffer); - struct svga_winsys_surface *handle; if (buffer) { - handle = svga_buffer_handle(svga, &buffer->b.b, - PIPE_BIND_CONSTANT_BUFFER); enabled_constbufs |= 1 << index; } else { - handle = NULL; enabled_constbufs &= ~(1 << index); assert(offset == 0); assert(size == 0); @@ -795,12 +849,9 @@ emit_consts_vgpu10(struct svga_context *svga, enum pipe_shader_type shader) } assert(size % 16 == 0); - ret = SVGA3D_vgpu10_SetSingleConstantBuffer(svga->swc, - index, - svga_shader_type(shader), - handle, - offset, - size); + + ret = emit_constbuf(svga, index, shader, offset, size, buffer, + 0, 0, NULL); if (ret != PIPE_OK) return ret; @@ -824,7 +875,7 @@ emit_fs_consts(struct svga_context *svga, uint64_t dirty) if (!variant) return PIPE_OK; - /* SVGA_NEW_FS_CONST_BUFFER + /* SVGA_NEW_FS_CONSTS */ if (svga_have_vgpu10(svga)) { ret = emit_consts_vgpu10(svga, PIPE_SHADER_FRAGMENT); @@ -836,17 +887,42 @@ emit_fs_consts(struct svga_context *svga, uint64_t dirty) return ret; } +static enum pipe_error +emit_fs_constbuf(struct svga_context *svga, uint64_t dirty) +{ + const struct svga_shader_variant *variant = svga->state.hw_draw.fs; + enum pipe_error ret = PIPE_OK; + + /* SVGA_NEW_FS_VARIANT + */ + if (!variant) + return PIPE_OK; + + /* SVGA_NEW_FS_CONSTBUF + */ + assert(svga_have_vgpu10(svga)); + ret = emit_constbuf_vgpu10(svga, PIPE_SHADER_FRAGMENT); + + return ret; +} struct svga_tracked_state svga_hw_fs_constants = { "hw fs params", - (SVGA_NEW_FS_CONST_BUFFER | + (SVGA_NEW_FS_CONSTS | SVGA_NEW_FS_VARIANT | SVGA_NEW_TEXTURE_CONSTS), emit_fs_consts }; +struct svga_tracked_state svga_hw_fs_constbufs = +{ + "hw fs params", + SVGA_NEW_FS_CONST_BUFFER, + emit_fs_constbuf +}; + static enum pipe_error emit_vs_consts(struct svga_context *svga, uint64_t dirty) @@ -872,17 +948,45 @@ emit_vs_consts(struct svga_context *svga, uint64_t dirty) } +static enum pipe_error +emit_vs_constbuf(struct svga_context *svga, uint64_t dirty) +{ + const struct svga_shader_variant *variant = svga->state.hw_draw.vs; + enum pipe_error ret = PIPE_OK; + + /* SVGA_NEW_FS_VARIANT + */ + if (!variant) + return PIPE_OK; + + /* SVGA_NEW_FS_CONSTBUF + */ + assert(svga_have_vgpu10(svga)); + ret = emit_constbuf_vgpu10(svga, PIPE_SHADER_VERTEX); + + return ret; +} + + struct svga_tracked_state svga_hw_vs_constants = { "hw vs params", (SVGA_NEW_PRESCALE | - SVGA_NEW_VS_CONST_BUFFER | + SVGA_NEW_VS_CONSTS | SVGA_NEW_VS_VARIANT | SVGA_NEW_TEXTURE_CONSTS), emit_vs_consts }; +struct svga_tracked_state svga_hw_vs_constbufs = +{ + "hw vs params", + SVGA_NEW_VS_CONST_BUFFER, + emit_vs_constbuf +}; + + static enum pipe_error emit_gs_consts(struct svga_context *svga, uint64_t dirty) { @@ -912,11 +1016,31 @@ emit_gs_consts(struct svga_context *svga, uint64_t dirty) } +static enum pipe_error +emit_gs_constbuf(struct svga_context *svga, uint64_t dirty) +{ + const struct svga_shader_variant *variant = svga->state.hw_draw.gs; + enum pipe_error ret = PIPE_OK; + + /* SVGA_NEW_GS_VARIANT + */ + if (!variant) + return PIPE_OK; + + /* SVGA_NEW_GS_CONSTBUF + */ + assert(svga_have_vgpu10(svga)); + ret = emit_constbuf_vgpu10(svga, PIPE_SHADER_GEOMETRY); + + return ret; +} + + struct svga_tracked_state svga_hw_gs_constants = { "hw gs params", (SVGA_NEW_PRESCALE | - SVGA_NEW_GS_CONST_BUFFER | + SVGA_NEW_GS_CONSTS | SVGA_NEW_RAST | SVGA_NEW_GS_VARIANT | SVGA_NEW_TEXTURE_CONSTS), @@ -924,6 +1048,14 @@ struct svga_tracked_state svga_hw_gs_constants = }; +struct svga_tracked_state svga_hw_gs_constbufs = +{ + "hw gs params", + SVGA_NEW_GS_CONST_BUFFER, + emit_gs_constbuf +}; + + /** * Emit constant buffer for tessellation control shader */ @@ -947,15 +1079,43 @@ emit_tcs_consts(struct svga_context *svga, uint64_t dirty) } +static enum pipe_error +emit_tcs_constbuf(struct svga_context *svga, uint64_t dirty) +{ + const struct svga_shader_variant *variant = svga->state.hw_draw.tcs; + enum pipe_error ret = PIPE_OK; + + /* SVGA_NEW_TCS_VARIANT + */ + if (!variant) + return PIPE_OK; + + /* SVGA_NEW_TCS_CONSTBUF + */ + assert(svga_have_vgpu10(svga)); + ret = emit_constbuf_vgpu10(svga, PIPE_SHADER_TESS_CTRL); + + return ret; +} + + struct svga_tracked_state svga_hw_tcs_constants = { "hw tcs params", - (SVGA_NEW_TCS_CONST_BUFFER | + (SVGA_NEW_TCS_CONSTS | SVGA_NEW_TCS_VARIANT), emit_tcs_consts }; +struct svga_tracked_state svga_hw_tcs_constbufs = +{ + "hw tcs params", + SVGA_NEW_TCS_CONST_BUFFER, + emit_tcs_constbuf +}; + + /** * Emit constant buffer for tessellation evaluation shader */ @@ -977,11 +1137,39 @@ emit_tes_consts(struct svga_context *svga, uint64_t dirty) } +static enum pipe_error +emit_tes_constbuf(struct svga_context *svga, uint64_t dirty) +{ + const struct svga_shader_variant *variant = svga->state.hw_draw.tes; + enum pipe_error ret = PIPE_OK; + + /* SVGA_NEW_TES_VARIANT + */ + if (!variant) + return PIPE_OK; + + /* SVGA_NEW_TES_CONSTBUF + */ + assert(svga_have_vgpu10(svga)); + ret = emit_constbuf_vgpu10(svga, PIPE_SHADER_TESS_EVAL); + + return ret; +} + + struct svga_tracked_state svga_hw_tes_constants = { "hw tes params", (SVGA_NEW_PRESCALE | - SVGA_NEW_TES_CONST_BUFFER | + SVGA_NEW_TES_CONSTS | SVGA_NEW_TES_VARIANT), emit_tes_consts }; + + +struct svga_tracked_state svga_hw_tes_constbufs = +{ + "hw gs params", + SVGA_NEW_TES_CONST_BUFFER, + emit_tes_constbuf +}; diff --git a/src/gallium/drivers/svga/svga_state_fs.c b/src/gallium/drivers/svga/svga_state_fs.c index 675fec96cf8..5f3df6a2ba6 100644 --- a/src/gallium/drivers/svga/svga_state_fs.c +++ b/src/gallium/drivers/svga/svga_state_fs.c @@ -210,16 +210,34 @@ make_fs_key(const struct svga_context *svga, if (!svga->state.sw.need_swtnl) { /* SVGA_NEW_RAST, SVGA_NEW_REDUCED_PRIMITIVE */ + enum pipe_prim_type prim_mode; + struct svga_shader *shader; + + /* Find the last shader in the vertex pipeline and the output primitive mode + * from that shader. + */ + if (svga->curr.tes) { + shader = &svga->curr.tes->base; + prim_mode = shader->info.properties[TGSI_PROPERTY_TES_PRIM_MODE]; + } else if (svga->curr.gs) { + shader = &svga->curr.gs->base; + prim_mode = shader->info.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM]; + } else { + shader = &svga->curr.vs->base; + prim_mode = svga->curr.reduced_prim; + } + key->fs.light_twoside = svga->curr.rast->templ.light_twoside; key->fs.front_ccw = svga->curr.rast->templ.front_ccw; key->fs.pstipple = (svga->curr.rast->templ.poly_stipple_enable && - svga->curr.reduced_prim == PIPE_PRIM_TRIANGLES); + prim_mode == PIPE_PRIM_TRIANGLES); + key->fs.aa_point = (svga->curr.rast->templ.point_smooth && - svga->curr.reduced_prim == PIPE_PRIM_POINTS && + prim_mode == PIPE_PRIM_POINTS && (svga->curr.rast->pointsize > 1.0 || - svga->curr.vs->base.info.writes_psize)); - if (key->fs.aa_point) { - assert(svga->curr.gs != NULL); + shader->info.writes_psize)); + + if (key->fs.aa_point && svga->curr.gs) { assert(svga->curr.gs->aa_point_coord_index != -1); key->fs.aa_point_coord_index = svga->curr.gs->aa_point_coord_index; } diff --git a/src/gallium/drivers/svga/svga_state_ts.c b/src/gallium/drivers/svga/svga_state_ts.c index 890d153c7d6..28f2ae403dd 100644 --- a/src/gallium/drivers/svga/svga_state_ts.c +++ b/src/gallium/drivers/svga/svga_state_ts.c @@ -89,6 +89,14 @@ make_tcs_key(struct svga_context *svga, struct svga_compile_key *key) key->tcs.vertices_order_cw = tes->vertices_order_cw; key->tcs.point_mode = tes->point_mode; + /* The number of control point output from tcs is determined by the + * number of control point input expected in tes. If tes does not expect + * any control point input, then vertices_per_patch in the tes key will + * be 0, otherwise it will contain the number of vertices out as specified + * in the tcs property. + */ + key->tcs.vertices_out = tes->base.key.tes.vertices_per_patch; + if (svga->tcs.passthrough) key->tcs.passthrough = 1; @@ -208,6 +216,7 @@ static void make_tes_key(struct svga_context *svga, struct svga_compile_key *key) { struct svga_tes_shader *tes = svga->curr.tes; + boolean has_control_point_inputs = FALSE; memset(key, 0, sizeof *key); @@ -217,8 +226,23 @@ make_tes_key(struct svga_context *svga, struct svga_compile_key *key) svga_init_shader_key_common(svga, PIPE_SHADER_TESS_EVAL, &tes->base, key); assert(svga->curr.tcs); - key->tes.vertices_per_patch = - svga->curr.tcs->base.info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT]; + + /* + * Check if this tes expects any output control points from tcs. + */ + for (unsigned i = 0; i < tes->base.info.num_inputs; i++) { + switch (tes->base.info.input_semantic_name[i]) { + case TGSI_SEMANTIC_PATCH: + case TGSI_SEMANTIC_TESSOUTER: + case TGSI_SEMANTIC_TESSINNER: + break; + default: + has_control_point_inputs = TRUE; + } + } + + key->tes.vertices_per_patch = has_control_point_inputs ? + svga->curr.tcs->base.info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] : 0; key->tes.need_prescale = svga->state.hw_clear.prescale[0].enabled && (svga->curr.gs == NULL); @@ -239,7 +263,7 @@ make_tes_key(struct svga_context *svga, struct svga_compile_key *key) key->tes.need_tessinner = 0; key->tes.need_tessouter = 0; - for (int i = 0; i < svga->curr.tcs->base.info.num_outputs; i++) { + for (unsigned i = 0; i < svga->curr.tcs->base.info.num_outputs; i++) { switch (svga->curr.tcs->base.info.output_semantic_name[i]) { case TGSI_SEMANTIC_TESSOUTER: key->tes.need_tessouter = 1; diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c index 147b07aaeb1..492a929bd8f 100644 --- a/src/gallium/drivers/svga/svga_state_vs.c +++ b/src/gallium/drivers/svga/svga_state_vs.c @@ -175,6 +175,10 @@ make_vs_key(struct svga_context *svga, struct svga_compile_key *key) return; } + if (svga_have_vgpu10(svga)) { + key->vs.need_vertex_id_bias = 1; + } + /* SVGA_NEW_PRESCALE */ key->vs.need_prescale = svga->state.hw_clear.prescale[0].enabled && (svga->curr.tes == NULL) && diff --git a/src/gallium/drivers/svga/svga_surface.c b/src/gallium/drivers/svga/svga_surface.c index d3dd23d2d81..68edf1c21db 100644 --- a/src/gallium/drivers/svga/svga_surface.c +++ b/src/gallium/drivers/svga/svga_surface.c @@ -54,7 +54,6 @@ svga_texture_copy_region(struct svga_context *svga, unsigned dst_x, unsigned dst_y, unsigned dst_z, unsigned width, unsigned height, unsigned depth) { - enum pipe_error ret; SVGA3dCopyBox box; assert(svga_have_vgpu10(svga)); @@ -69,16 +68,9 @@ svga_texture_copy_region(struct svga_context *svga, box.srcy = src_y; box.srcz = src_z; - ret = SVGA3D_vgpu10_PredCopyRegion(svga->swc, - dst_handle, dstSubResource, - src_handle, srcSubResource, &box); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_vgpu10_PredCopyRegion(svga->swc, - dst_handle, dstSubResource, - src_handle, srcSubResource, &box); - assert(ret == PIPE_OK); - } + SVGA_RETRY(svga, SVGA3D_vgpu10_PredCopyRegion + (svga->swc, dst_handle, dstSubResource, + src_handle, srcSubResource, &box)); } @@ -93,7 +85,6 @@ svga_texture_copy_handle(struct svga_context *svga, unsigned width, unsigned height, unsigned depth) { struct svga_surface dst, src; - enum pipe_error ret; SVGA3dCopyBox box, *boxes; assert(svga); @@ -124,18 +115,11 @@ svga_texture_copy_handle(struct svga_context *svga, dst_handle, dst_level, dst_x, dst_y, dst_z); */ - ret = SVGA3D_BeginSurfaceCopy(svga->swc, - &src.base, - &dst.base, - &boxes, 1); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_BeginSurfaceCopy(svga->swc, - &src.base, - &dst.base, - &boxes, 1); - assert(ret == PIPE_OK); - } + SVGA_RETRY(svga, SVGA3D_BeginSurfaceCopy(svga->swc, + &src.base, + &dst.base, + &boxes, 1)); + *boxes = box; SVGA_FIFOCommitAll(svga->swc); } @@ -563,7 +547,7 @@ svga_validate_surface_view(struct svga_context *svga, struct svga_surface *s) * associated resource. We will then use the cloned surface view for * render target. */ - for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_GEOMETRY; shader++) { + for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_TESS_EVAL; shader++) { if (svga_check_sampler_view_resource_collision(svga, s->handle, shader)) { SVGA_DBG(DEBUG_VIEWS, "same resource used in shaderResource and renderTarget 0x%x\n", @@ -601,11 +585,7 @@ svga_validate_surface_view(struct svga_context *svga, struct svga_surface *s) * need to update the host-side copy with the invalid * content when the associated mob is first bound to the surface. */ - if (SVGA3D_InvalidateGBSurface(svga->swc, stex->handle) != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_InvalidateGBSurface(svga->swc, stex->handle); - assert(ret == PIPE_OK); - } + SVGA_RETRY(svga, SVGA3D_InvalidateGBSurface(svga->swc, stex->handle)); stex->validated = TRUE; } @@ -670,7 +650,6 @@ svga_surface_destroy(struct pipe_context *pipe, struct svga_surface *s = svga_surface(surf); struct svga_texture *t = svga_texture(surf->texture); struct svga_screen *ss = svga_screen(surf->texture->screen); - enum pipe_error ret = PIPE_OK; SVGA_STATS_TIME_PUSH(ss->sws, SVGA_STATS_TIME_DESTROYSURFACE); @@ -689,8 +668,6 @@ svga_surface_destroy(struct pipe_context *pipe, } if (s->view_id != SVGA3D_INVALID_ID) { - unsigned try; - /* The SVGA3D device will generate a device error if the * render target view or depth stencil view is destroyed from * a context other than the one it was created with. @@ -702,18 +679,14 @@ svga_surface_destroy(struct pipe_context *pipe, } else { assert(svga_have_vgpu10(svga)); - for (try = 0; try < 2; try++) { - if (util_format_is_depth_or_stencil(s->base.format)) { - ret = SVGA3D_vgpu10_DestroyDepthStencilView(svga->swc, s->view_id); - } - else { - ret = SVGA3D_vgpu10_DestroyRenderTargetView(svga->swc, s->view_id); - } - if (ret == PIPE_OK) - break; - svga_context_flush(svga, NULL); + if (util_format_is_depth_or_stencil(s->base.format)) { + SVGA_RETRY(svga, SVGA3D_vgpu10_DestroyDepthStencilView(svga->swc, + s->view_id)); + } + else { + SVGA_RETRY(svga, SVGA3D_vgpu10_DestroyRenderTargetView(svga->swc, + s->view_id)); } - assert(ret == PIPE_OK); util_bitmask_clear(svga->surface_view_id_bm, s->view_id); } } diff --git a/src/gallium/drivers/svga/svga_swtnl_backend.c b/src/gallium/drivers/svga/svga_swtnl_backend.c index 3e8c90d8e1e..5887a9ad7d7 100644 --- a/src/gallium/drivers/svga/svga_swtnl_backend.c +++ b/src/gallium/drivers/svga/svga_swtnl_backend.c @@ -216,9 +216,9 @@ svga_vbuf_submit_state(struct svga_vbuf_render *svga_render) { struct svga_context *svga = svga_render->svga; SVGA3dVertexDecl vdecl[PIPE_MAX_ATTRIBS]; - enum pipe_error ret; unsigned i; static const unsigned zero[PIPE_MAX_ATTRIBS] = {0}; + boolean retried; /* if the vdecl or vbuf hasn't changed do nothing */ if (!svga->swtnl.new_vdecl) @@ -230,13 +230,10 @@ svga_vbuf_submit_state(struct svga_vbuf_render *svga_render) memcpy(vdecl, svga_render->vdecl, sizeof(vdecl)); /* flush the hw state */ - ret = svga_hwtnl_flush(svga->hwtnl); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = svga_hwtnl_flush(svga->hwtnl); + SVGA_RETRY_CHECK(svga, svga_hwtnl_flush(svga->hwtnl), retried); + if (retried) { /* if we hit this path we might become synced with hw */ svga->swtnl.new_vbuf = TRUE; - assert(ret == PIPE_OK); } for (i = 0; i < svga_render->vdecl_count; i++) { diff --git a/src/gallium/drivers/svga/svga_swtnl_draw.c b/src/gallium/drivers/svga/svga_swtnl_draw.c index 96d8a52eb62..b719dd400c6 100644 --- a/src/gallium/drivers/svga/svga_swtnl_draw.c +++ b/src/gallium/drivers/svga/svga_swtnl_draw.c @@ -47,7 +47,7 @@ svga_swtnl_draw_vbo(struct svga_context *svga, ASSERTED unsigned old_num_vertex_buffers; unsigned i; const void *map; - enum pipe_error ret; + boolean retried; SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_SWTNLDRAWVBO); @@ -58,12 +58,9 @@ svga_swtnl_draw_vbo(struct svga_context *svga, /* Make sure that the need_swtnl flag does not go away */ svga->state.sw.in_swtnl_draw = TRUE; - ret = svga_update_state(svga, SVGA_STATE_SWTNL_DRAW); - if (ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = svga_update_state(svga, SVGA_STATE_SWTNL_DRAW); + SVGA_RETRY_CHECK(svga, svga_update_state(svga, SVGA_STATE_SWTNL_DRAW), retried); + if (retried) { svga->swtnl.new_vbuf = TRUE; - assert(ret == PIPE_OK); } /* @@ -148,7 +145,7 @@ svga_swtnl_draw_vbo(struct svga_context *svga, svga->dirty |= SVGA_NEW_NEED_PIPELINE | SVGA_NEW_NEED_SWVFETCH; SVGA_STATS_TIME_POP(svga_sws(svga)); - return ret; + return PIPE_OK; } diff --git a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c index 6e607cd0616..0c07985519c 100644 --- a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c +++ b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c @@ -279,6 +279,10 @@ struct svga_shader_emitter_v10 /* viewport constant */ unsigned viewport_index; + unsigned vertex_id_bias_index; + unsigned vertex_id_sys_index; + unsigned vertex_id_tmp_index; + /* temp index of adjusted vertex attributes */ unsigned adjusted_input[PIPE_MAX_SHADER_INPUTS]; } vs; @@ -333,7 +337,6 @@ struct svga_shader_emitter_v10 struct { unsigned vertices_per_patch_index; /**< vertices_per_patch system value index */ unsigned imm_index; /**< immediate for tcs */ - unsigned vertices_out; unsigned invocation_id_sys_index; /**< invocation id */ unsigned invocation_id_tmp_index; unsigned instruction_token_pos; /* token pos for the first instruction */ @@ -343,6 +346,7 @@ struct svga_shader_emitter_v10 unsigned control_point_tmp_index; /* control point temporary register */ unsigned control_point_out_count; /* control point output count */ boolean control_point_phase; /* true if in control point phase */ + boolean fork_phase_add_signature; /* true if needs to add signature in fork phase */ unsigned patch_generic_out_count; /* per-patch generic output count */ unsigned patch_generic_out_index; /* per-patch generic output register index*/ unsigned patch_generic_tmp_index; /* per-patch generic temporary register index*/ @@ -408,6 +412,7 @@ struct svga_shader_emitter_v10 /* VS/TCS/TES/GS/FS Linkage info */ struct shader_linkage linkage; + struct tgsi_shader_info *prevShaderInfo; /* Shader signature */ struct svga_shader_signature signature; @@ -603,7 +608,7 @@ check_register_index(struct svga_shader_emitter_v10 *emit, (emit->unit == PIPE_SHADER_FRAGMENT && index >= VGPU10_MAX_FS_INPUTS) || (emit->unit == PIPE_SHADER_TESS_CTRL && - index >= VGPU11_MAX_HS_INPUTS) || + index >= VGPU11_MAX_HS_INPUT_CONTROL_POINTS) || (emit->unit == PIPE_SHADER_TESS_EVAL && index >= VGPU11_MAX_DS_INPUT_CONTROL_POINTS)) { emit->register_overflow = TRUE; @@ -1445,7 +1450,7 @@ static boolean need_temp_reg_initialization(struct svga_shader_emitter_v10 *emit, unsigned index) { - if (!(emit->info.indirect_files & (1u << TGSI_FILE_TEMPORARY)) + if (!(emit->info.indirect_files && (1u << TGSI_FILE_TEMPORARY)) && emit->current_loop_depth == 0) { if (!emit->temp_map[index].initialized && emit->temp_map[index].index < emit->num_shader_temps) { @@ -1575,10 +1580,18 @@ emit_src_register(struct svga_shader_emitter_v10 *emit, } } else if (file == TGSI_FILE_SYSTEM_VALUE) { - /* Map the TGSI system value to a VGPU10 input register */ - assert(index < ARRAY_SIZE(emit->system_value_indexes)); - file = TGSI_FILE_INPUT; - index = emit->system_value_indexes[index]; + if (index == emit->vs.vertex_id_sys_index && + emit->vs.vertex_id_tmp_index != INVALID_INDEX) { + file = TGSI_FILE_TEMPORARY; + index = emit->vs.vertex_id_tmp_index; + swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X; + } + else { + /* Map the TGSI system value to a VGPU10 input register */ + assert(index < ARRAY_SIZE(emit->system_value_indexes)); + file = TGSI_FILE_INPUT; + index = emit->system_value_indexes[index]; + } } } else if (emit->unit == PIPE_SHADER_TESS_CTRL) { @@ -1600,7 +1613,10 @@ emit_src_register(struct svga_shader_emitter_v10 *emit, */ operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID; - index = 0; + operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; + operand0.mask = 0; + emit_dword(emit, operand0.value); + return; } else { /* There is no control point ID input declaration in @@ -1718,6 +1734,8 @@ emit_src_register(struct svga_shader_emitter_v10 *emit, * to align with the tcs output index. */ index = emit->linkage.input_map[index]; + + assert(index2 < emit->key.tes.vertices_per_patch); } else { if (index < emit->key.tes.tessfactor_index) @@ -2824,7 +2842,7 @@ emit_vgpu10_property(struct svga_shader_emitter_v10 *emit, break; case TGSI_PROPERTY_TCS_VERTICES_OUT: - emit->tcs.vertices_out = prop->u[0].Data; + /* This info is already captured in the shader key */ break; case TGSI_PROPERTY_TES_PRIM_MODE: @@ -2935,7 +2953,7 @@ emit_domain_shader_declarations(struct svga_shader_emitter_v10 *emit) assert(emit->unit == PIPE_SHADER_TESS_EVAL); /* Emit the input control point count */ - assert(emit->key.tes.vertices_per_patch > 0 && + assert(emit->key.tes.vertices_per_patch >= 0 && emit->key.tes.vertices_per_patch <= 32); opcode0.value = 0; @@ -3066,11 +3084,11 @@ emit_hull_shader_declarations(struct svga_shader_emitter_v10 *emit) end_emit_instruction(emit); /* Emit the output control point count */ - assert(emit->tcs.vertices_out >= 0 && emit->tcs.vertices_out <= 32); + assert(emit->key.tcs.vertices_out >= 0 && emit->key.tcs.vertices_out <= 32); opcode0.value = 0; opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT; - opcode0.controlPointCount = emit->tcs.vertices_out; + opcode0.controlPointCount = emit->key.tcs.vertices_out; begin_emit_instruction(emit); emit_dword(emit, opcode0.value); end_emit_instruction(emit); @@ -3157,7 +3175,8 @@ needs_control_point_phase(struct svga_shader_emitter_v10 *emit) * we need a control point phase to explicitly set the output control * points. */ - if (emit->key.tcs.vertices_per_patch != emit->tcs.vertices_out) + if ((emit->key.tcs.vertices_per_patch != emit->key.tcs.vertices_out) && + emit->key.tcs.vertices_out) return TRUE; for (i = 0; i < emit->info.num_outputs; i++) { @@ -3175,23 +3194,93 @@ needs_control_point_phase(struct svga_shader_emitter_v10 *emit) /** - * Start the hull shader control point phase + * A helper function to add shader signature for passthrough control point + * phase. This signature is also generated for passthrough control point + * phase from HLSL compiler and is needed by Metal Renderer. */ -static boolean -emit_hull_shader_control_point_phase(struct svga_shader_emitter_v10 *emit) +static void +emit_passthrough_control_point_signature(struct svga_shader_emitter_v10 *emit) { - VGPU10OpcodeToken0 opcode0; + struct svga_shader_signature *sgn = &emit->signature; + SVGA3dDXShaderSignatureEntry *sgnEntry; + unsigned i; - /* If there is no control point output, skip the control point phase. */ - if (!needs_control_point_phase(emit)) - return FALSE; + for (i = 0; i < emit->info.num_inputs; i++) { + unsigned index = emit->linkage.input_map[i]; + enum tgsi_semantic sem_name = emit->info.input_semantic_name[i]; + + sgnEntry = &sgn->inputs[sgn->header.numInputSignatures++]; + + set_shader_signature_entry(sgnEntry, index, + tgsi_semantic_to_sgn_name[sem_name], + VGPU10_OPERAND_4_COMPONENT_MASK_ALL, + SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN, + SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT); + + sgnEntry = &sgn->outputs[sgn->header.numOutputSignatures++]; + + set_shader_signature_entry(sgnEntry, i, + tgsi_semantic_to_sgn_name[sem_name], + VGPU10_OPERAND_4_COMPONENT_MASK_ALL, + SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN, + SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT); + } +} + + +/** + * A helper function to emit an instruction to start the control point phase + * in the hull shader. + */ +static void +emit_control_point_phase_instruction(struct svga_shader_emitter_v10 *emit) +{ + VGPU10OpcodeToken0 opcode0; - /* Start the control point phase in the hull shader */ opcode0.value = 0; opcode0.opcodeType = VGPU10_OPCODE_HS_CONTROL_POINT_PHASE; begin_emit_instruction(emit); emit_dword(emit, opcode0.value); end_emit_instruction(emit); +} + + +/** + * Start the hull shader control point phase + */ +static boolean +emit_hull_shader_control_point_phase(struct svga_shader_emitter_v10 *emit) +{ + /* If there is no control point output, skip the control point phase. */ + if (!needs_control_point_phase(emit)) { + if (!emit->key.tcs.vertices_out) { + /** + * If the tcs does not explicitly generate any control point output + * and the tes does not use any input control point, then + * emit an empty control point phase with zero output control + * point count. + */ + emit_control_point_phase_instruction(emit); + + /** + * Since this is an empty control point phase, we will need to + * add input signatures when we parse the tcs again in the + * patch constant phase. + */ + emit->tcs.fork_phase_add_signature = TRUE; + } + else { + /** + * Before skipping the control point phase, add the signature for + * the passthrough control point. + */ + emit_passthrough_control_point_signature(emit); + } + return FALSE; + } + + /* Start the control point phase in the hull shader */ + emit_control_point_phase_instruction(emit); /* Declare the output control point ID */ if (emit->tcs.invocation_id_sys_index == INVALID_INDEX) { @@ -3799,9 +3888,6 @@ emit_fs_output_declarations(struct svga_shader_emitter_v10 *emit) emit->key.fs.write_color0_to_n_cbufs; } } - else { - assert(!emit->key.fs.write_color0_to_n_cbufs); - } } else if (semantic_name == TGSI_SEMANTIC_POSITION) { /* Fragment depth output */ @@ -4064,7 +4150,7 @@ emit_tesslevel_declaration(struct svga_shader_emitter_v10 *emit, SVGA3dDXShaderSignatureEntry *sgnEntry = &sgn->patchConstants[sgn->header.numPatchConstantSignatures++]; set_shader_signature_entry(sgnEntry, index, - sgnName, SVGA3DWRITEMASK_0, + sgnName, VGPU10_OPERAND_4_COMPONENT_MASK_X, SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN, SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT); } @@ -4324,6 +4410,7 @@ emit_system_value_declaration(struct svga_shader_emitter_v10 *emit, map_tgsi_semantic_to_sgn_name(semantic_name)); break; case TGSI_SEMANTIC_VERTEXID: + emit->vs.vertex_id_sys_index = index; index = alloc_system_value_index(emit, index); emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV, VGPU10_OPERAND_TYPE_INPUT, @@ -4786,6 +4873,10 @@ emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit) unsigned i; unsigned size = emit->key.tcs.vertices_per_patch; unsigned indicesMask = 0; + boolean addSignature = TRUE; + + if (!emit->tcs.control_point_phase) + addSignature = emit->tcs.fork_phase_add_signature; for (i = 0; i < emit->info.num_inputs; i++) { unsigned usage_mask = emit->info.input_usage_mask[i]; @@ -4793,7 +4884,8 @@ emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit) enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i]; VGPU10_SYSTEM_NAME name = VGPU10_NAME_UNDEFINED; VGPU10_OPERAND_TYPE operandType = VGPU10_OPERAND_TYPE_INPUT; - boolean addSignature = TRUE; + SVGA3dDXSignatureSemanticName sgn_name = + map_tgsi_semantic_to_sgn_name(semantic_name); /* indices that are declared */ indicesMask |= 1 << index; @@ -4806,13 +4898,18 @@ emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit) else if (usage_mask == 0) { continue; /* register is not actually used */ } + else if (semantic_name == TGSI_SEMANTIC_CLIPDIST) { + /* The shadow copy is being used here. So set the signature name + * to UNDEFINED. + */ + sgn_name = SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED; + } /* input control points in the patch constant phase are emitted in the * vicp register rather than the v register. */ if (!emit->tcs.control_point_phase) { operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT; - addSignature = emit->tcs.control_point_out_count == 0; } /* Tessellation control shader inputs are two dimensional. @@ -4826,9 +4923,7 @@ emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit) VGPU10_OPERAND_4_COMPONENT_MASK_MODE, VGPU10_OPERAND_4_COMPONENT_MASK_ALL, VGPU10_INTERPOLATION_UNDEFINED, - addSignature, - map_tgsi_semantic_to_sgn_name(semantic_name)); - + addSignature, sgn_name); } if (emit->tcs.control_point_phase) { @@ -4983,6 +5078,54 @@ emit_tes_input_declarations(struct svga_shader_emitter_v10 *emit) } emit_tessfactor_input_declarations(emit); + + /* DX spec requires DS input controlpoint/patch-constant signatures to match + * the HS output controlpoint/patch-constant signatures exactly. + * Add missing input declarations even if they are not used in the shader. + */ + if (emit->linkage.num_inputs < emit->linkage.prevShader.num_outputs) { + struct tgsi_shader_info *prevInfo = emit->prevShaderInfo; + for (i = 0; i < emit->linkage.prevShader.num_outputs; i++) { + + /* If a tcs output does not have a corresponding input register in + * tes, add one. + */ + if (emit->linkage.prevShader.output_map[i] > + emit->linkage.input_map_max) { + const enum tgsi_semantic sem_name = prevInfo->output_semantic_name[i]; + + if (sem_name == TGSI_SEMANTIC_PATCH) { + emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, + VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT, + VGPU10_OPERAND_INDEX_1D, + i, 1, VGPU10_NAME_UNDEFINED, + VGPU10_OPERAND_4_COMPONENT, + VGPU10_OPERAND_4_COMPONENT_MASK_MODE, + VGPU10_OPERAND_4_COMPONENT_MASK_ALL, + VGPU10_INTERPOLATION_UNDEFINED, + TRUE, + map_tgsi_semantic_to_sgn_name(sem_name)); + + } else if (sem_name != TGSI_SEMANTIC_TESSINNER && + sem_name != TGSI_SEMANTIC_TESSOUTER) { + emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, + VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT, + VGPU10_OPERAND_INDEX_2D, + i, emit->key.tes.vertices_per_patch, + VGPU10_NAME_UNDEFINED, + VGPU10_OPERAND_4_COMPONENT, + VGPU10_OPERAND_4_COMPONENT_MASK_MODE, + VGPU10_OPERAND_4_COMPONENT_MASK_ALL, + VGPU10_INTERPOLATION_UNDEFINED, + TRUE, + map_tgsi_semantic_to_sgn_name(sem_name)); + } + /* tessellation factors are taken care of in + * emit_tessfactor_input_declarations(). + */ + } + } + } } @@ -5088,7 +5231,7 @@ emit_output_declarations(struct svga_shader_emitter_v10 *emit) VGPU10_NAME_UNDEFINED, emit->output_usage_mask[emit->clip_dist_out_index], TRUE, - SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE); + SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED); if (emit->info.num_written_clipdistance > 4) { /* for the second clip distance register, each handles 4 planes */ @@ -5097,7 +5240,7 @@ emit_output_declarations(struct svga_shader_emitter_v10 *emit) VGPU10_NAME_UNDEFINED, emit->output_usage_mask[emit->clip_dist_out_index+1], TRUE, - SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE); + SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED); } } @@ -5184,6 +5327,11 @@ emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit) total_temps++; } + if (emit->info.uses_vertexid) { + assert(emit->unit == PIPE_SHADER_VERTEX); + emit->vs.vertex_id_tmp_index = total_temps++; + } + if (emit->unit == PIPE_SHADER_VERTEX || emit->unit == PIPE_SHADER_GEOMETRY) { if (emit->vposition.need_prescale || emit->key.vs.undo_viewport || emit->key.clip_plane_enable || @@ -5440,6 +5588,9 @@ emit_constant_declaration(struct svga_shader_emitter_v10 *emit) if (emit->key.vs.undo_viewport) { emit->vs.viewport_index = total_consts++; } + if (emit->key.vs.need_vertex_id_bias) { + emit->vs.vertex_id_bias_index = total_consts++; + } } /* user-defined clip planes */ @@ -9986,6 +10137,33 @@ emit_temp_prescale_instructions(struct svga_shader_emitter_v10 *emit) /** + * A helper function to emit an instruction in a vertex shader to add a bias + * to the VertexID system value. This patches the VertexID in the SVGA vertex + * shader to include the base vertex of an indexed primitive or the start index + * of a non-indexed primitive. + */ +static void +emit_vertex_id_nobase_instruction(struct svga_shader_emitter_v10 *emit) +{ + struct tgsi_full_src_register vertex_id_bias_index = + make_src_const_reg(emit->vs.vertex_id_bias_index); + struct tgsi_full_src_register vertex_id_sys_src = + make_src_reg(TGSI_FILE_SYSTEM_VALUE, emit->vs.vertex_id_sys_index); + struct tgsi_full_src_register vertex_id_sys_src_x = + scalar_src(&vertex_id_sys_src, TGSI_SWIZZLE_X); + struct tgsi_full_dst_register vertex_id_tmp_dst = + make_dst_temp_reg(emit->vs.vertex_id_tmp_index); + + /* IADD vertex_id_tmp, vertex_id_sys, vertex_id_bias */ + unsigned vertex_id_tmp_index = emit->vs.vertex_id_tmp_index; + emit->vs.vertex_id_tmp_index = INVALID_INDEX; + emit_instruction_opn(emit, VGPU10_OPCODE_IADD, &vertex_id_tmp_dst, + &vertex_id_sys_src_x, &vertex_id_bias_index, NULL, FALSE, + FALSE); + emit->vs.vertex_id_tmp_index = vertex_id_tmp_index; +} + +/** * Hull Shader must have control point outputs. But tessellation * control shader can return without writing to control point output. * In this case, the control point output is assumed to be passthrough @@ -10155,6 +10333,7 @@ emit_pre_helpers(struct svga_shader_emitter_v10 *emit) * do a second pass of the instructions for the patch constant phase. */ emit->tcs.instruction_token_pos = emit->cur_tgsi_token; + emit->tcs.fork_phase_add_signature = FALSE; if (!emit_hull_shader_control_point_phase(emit)) { emit->skip_instruction = TRUE; @@ -10230,6 +10409,9 @@ emit_pre_helpers(struct svga_shader_emitter_v10 *emit) } else if (emit->unit == PIPE_SHADER_VERTEX) { emit_vertex_attrib_instructions(emit); + + if (emit->info.uses_vertexid) + emit_vertex_id_nobase_instruction(emit); } else if (emit->unit == PIPE_SHADER_TESS_EVAL) { emit_temp_tessfactor_instructions(emit); @@ -10707,6 +10889,7 @@ compute_input_mapping(struct svga_context *svga, if (prevShader != NULL) { svga_link_shaders(&prevShader->info, &emit->info, &emit->linkage); + emit->prevShaderInfo = &prevShader->info; } else { /** @@ -10830,6 +11013,10 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga, emit->vposition.so_index = INVALID_INDEX; emit->vposition.out_index = INVALID_INDEX; + emit->vs.vertex_id_sys_index = INVALID_INDEX; + emit->vs.vertex_id_tmp_index = INVALID_INDEX; + emit->vs.vertex_id_bias_index = INVALID_INDEX; + emit->fs.color_tmp_index = INVALID_INDEX; emit->fs.face_input_index = INVALID_INDEX; emit->fs.fragcoord_input_index = INVALID_INDEX; diff --git a/src/gallium/drivers/svga/svga_winsys.h b/src/gallium/drivers/svga/svga_winsys.h index 717e56caccf..55534953d0c 100644 --- a/src/gallium/drivers/svga/svga_winsys.h +++ b/src/gallium/drivers/svga/svga_winsys.h @@ -132,6 +132,8 @@ enum svga_stats_time { SVGA_STATS_TIME_CREATEGS, SVGA_STATS_TIME_CREATESURFACE, SVGA_STATS_TIME_CREATESURFACEVIEW, + SVGA_STATS_TIME_CREATETCS, + SVGA_STATS_TIME_CREATETES, SVGA_STATS_TIME_CREATETEXTURE, SVGA_STATS_TIME_CREATEVS, SVGA_STATS_TIME_DEFINESHADER, @@ -141,6 +143,8 @@ enum svga_stats_time { SVGA_STATS_TIME_DRAWELEMENTS, SVGA_STATS_TIME_EMITFS, SVGA_STATS_TIME_EMITGS, + SVGA_STATS_TIME_EMITTCS, + SVGA_STATS_TIME_EMITTES, SVGA_STATS_TIME_EMITVS, SVGA_STATS_TIME_EMULATESURFACEVIEW, SVGA_STATS_TIME_FENCEFINISH, @@ -201,6 +205,8 @@ enum svga_stats_time { SVGA_STATS_PREFIX "CreateGS", \ SVGA_STATS_PREFIX "CreateSurface", \ SVGA_STATS_PREFIX "CreateSurfaceView", \ + SVGA_STATS_PREFIX "CreateTCS", \ + SVGA_STATS_PREFIX "CreateTES", \ SVGA_STATS_PREFIX "CreateTexture", \ SVGA_STATS_PREFIX "CreateVS", \ SVGA_STATS_PREFIX "DefineShader", \ @@ -210,6 +216,8 @@ enum svga_stats_time { SVGA_STATS_PREFIX "DrawElements", \ SVGA_STATS_PREFIX "EmitFS", \ SVGA_STATS_PREFIX "EmitGS", \ + SVGA_STATS_PREFIX "EmitTCS", \ + SVGA_STATS_PREFIX "EmitTES", \ SVGA_STATS_PREFIX "EmitVS", \ SVGA_STATS_PREFIX "EmulateSurfaceView", \ SVGA_STATS_PREFIX "FenceFinish", \ @@ -594,6 +602,11 @@ struct svga_winsys_screen uint32 numMipLevels, uint32 numSamples); + void + (*surface_init)(struct svga_winsys_screen *sws, + struct svga_winsys_surface *surface, + unsigned surf_size, SVGA3dSurfaceAllFlags flags); + /** * Buffer management. Buffer attributes are mostly fixed over its lifetime. * @@ -786,6 +799,7 @@ struct svga_winsys_screen boolean have_transfer_from_buffer_cmd; boolean have_fence_fd; boolean have_intra_surface_copy; + boolean have_constant_buffer_offset_cmd; }; |