aboutsummaryrefslogtreecommitdiffstats
path: root/src/freedreno
diff options
context:
space:
mode:
authorBrian Ho <[email protected]>2020-05-15 10:52:43 -0700
committerMarge Bot <[email protected]>2020-06-22 14:35:45 +0000
commitf08a80dcd493c64922c04a2563025bddabcac230 (patch)
tree13b3e71e8696801445da81f83f210897a5173dd7 /src/freedreno
parenteefdca2e2f5a558e02102c1f6e1736b61acc67b2 (diff)
turnip: Allocate tess BOs as a function of draw size
To store tess outputs, the HS stg's into two buffers, one for per-vertex/per-patch output variables (tess_param) and one for TessLevelInner/Outer (tess_factor). The addresses of these buffers are uploaded as consts to the HS/DS and the tess_factor iova is written to REG_A6XX_PC_TESSFACTOR_ADDR. While the sizes of these buffers are a function of vetex count and patch count, allocation is relatively straightforward on freedreno- just keep track of the max required buffer size for the entire batch and allocate before batch submit. In Vulkan, however, a given pipeline can be bound multiple times across any number of command buffers, each drawing with a different number of vertices. One solution is to track the max buffer size for the entire command buffer (similar to fd_batch) and on vkEndCommandBuffer, allocate appropriately sized tess BOs. Since the tess BOs addresses are emitted as part of the pipeline state setup (e.g. PKT4 to REG_A6XX_PC_TESSFACTOR_ADDR), we need to create a new state group independent of a specific pipeline and parameterize its IB with the command buffer specific tess BO iovas. Without a larger refactor, the simplest way to do this is just to emit per-draw call consts and leverage scratch_bo to re-use buffers. This way we won't have to store and rewrite earlier packets in the command stream on vkEndCommandBuffer. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5059>
Diffstat (limited to 'src/freedreno')
-rw-r--r--src/freedreno/vulkan/tu_cmd_buffer.c130
-rw-r--r--src/freedreno/vulkan/tu_pipeline.c113
-rw-r--r--src/freedreno/vulkan/tu_private.h10
3 files changed, 235 insertions, 18 deletions
diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c
index 731d361712e..a18f19ea0a0 100644
--- a/src/freedreno/vulkan/tu_cmd_buffer.c
+++ b/src/freedreno/vulkan/tu_cmd_buffer.c
@@ -3027,6 +3027,121 @@ tu6_emit_streamout(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
}
}
+static uint64_t
+get_tess_param_bo_size(const struct tu_pipeline *pipeline,
+ const struct tu_draw_info *draw_info)
+{
+ /* TODO: For indirect draws, we can't compute the BO size ahead of time.
+ * Still not sure what to do here, so just allocate a reasonably large
+ * BO and hope for the best for now.
+ * (maxTessellationControlPerVertexOutputComponents * 2048 vertices +
+ * maxTessellationControlPerPatchOutputComponents * 512 patches) */
+ if (draw_info->indirect) {
+ return ((128 * 2048) + (128 * 512)) * 4;
+ }
+
+ /* For each patch, adreno lays out the tess param BO in memory as:
+ * (v_input[0][0])...(v_input[i][j])(p_input[0])...(p_input[k]).
+ * where i = # vertices per patch, j = # per-vertex outputs, and
+ * k = # per-patch outputs.*/
+ uint32_t verts_per_patch = pipeline->ia.primtype - DI_PT_PATCHES0;
+ uint32_t num_patches = draw_info->count / verts_per_patch;
+ return draw_info->count * pipeline->tess.per_vertex_output_size +
+ pipeline->tess.per_patch_output_size * num_patches;
+}
+
+static uint64_t
+get_tess_factor_bo_size(const struct tu_pipeline *pipeline,
+ const struct tu_draw_info *draw_info)
+{
+ /* TODO: For indirect draws, we can't compute the BO size ahead of time.
+ * Still not sure what to do here, so just allocate a reasonably large
+ * BO and hope for the best for now.
+ * (quad factor stride * 512 patches) */
+ if (draw_info->indirect) {
+ return (28 * 512) * 4;
+ }
+
+ /* Each distinct patch gets its own tess factor output. */
+ uint32_t verts_per_patch = pipeline->ia.primtype - DI_PT_PATCHES0;
+ uint32_t num_patches = draw_info->count / verts_per_patch;
+ uint32_t factor_stride;
+ switch (pipeline->tess.patch_type) {
+ case IR3_TESS_ISOLINES:
+ factor_stride = 12;
+ break;
+ case IR3_TESS_TRIANGLES:
+ factor_stride = 20;
+ break;
+ case IR3_TESS_QUADS:
+ factor_stride = 28;
+ break;
+ default:
+ unreachable("bad tessmode");
+ }
+ return factor_stride * num_patches;
+}
+
+static VkResult
+tu6_emit_tess_consts(struct tu_cmd_buffer *cmd,
+ const struct tu_draw_info *draw,
+ const struct tu_pipeline *pipeline,
+ struct tu_cs_entry *entry)
+{
+ struct tu_cs cs;
+ VkResult result = tu_cs_begin_sub_stream(&cmd->sub_cs, 20, &cs);
+ if (result != VK_SUCCESS)
+ return result;
+
+ uint64_t tess_factor_size = get_tess_factor_bo_size(pipeline, draw);
+ uint64_t tess_param_size = get_tess_param_bo_size(pipeline, draw);
+ uint64_t tess_bo_size = tess_factor_size + tess_param_size;
+ if (tess_bo_size > 0) {
+ struct tu_bo *tess_bo;
+ result = tu_get_scratch_bo(cmd->device, tess_bo_size, &tess_bo);
+ if (result != VK_SUCCESS)
+ return result;
+
+ tu_bo_list_add(&cmd->bo_list, tess_bo,
+ MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE);
+ uint64_t tess_factor_iova = tess_bo->iova;
+ uint64_t tess_param_iova = tess_factor_iova + tess_factor_size;
+
+ tu_cs_emit_pkt7(&cs, CP_LOAD_STATE6_GEOM, 3 + 4);
+ tu_cs_emit(&cs, CP_LOAD_STATE6_0_DST_OFF(pipeline->tess.hs_bo_regid) |
+ CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
+ CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
+ CP_LOAD_STATE6_0_STATE_BLOCK(SB6_HS_SHADER) |
+ CP_LOAD_STATE6_0_NUM_UNIT(1));
+ tu_cs_emit(&cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
+ tu_cs_emit(&cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
+ tu_cs_emit_qw(&cs, tess_param_iova);
+ tu_cs_emit_qw(&cs, tess_factor_iova);
+
+ tu_cs_emit_pkt7(&cs, CP_LOAD_STATE6_GEOM, 3 + 4);
+ tu_cs_emit(&cs, CP_LOAD_STATE6_0_DST_OFF(pipeline->tess.ds_bo_regid) |
+ CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
+ CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
+ CP_LOAD_STATE6_0_STATE_BLOCK(SB6_DS_SHADER) |
+ CP_LOAD_STATE6_0_NUM_UNIT(1));
+ tu_cs_emit(&cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
+ tu_cs_emit(&cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
+ tu_cs_emit_qw(&cs, tess_param_iova);
+ tu_cs_emit_qw(&cs, tess_factor_iova);
+
+ tu_cs_emit_pkt4(&cs, REG_A6XX_PC_TESSFACTOR_ADDR_LO, 2);
+ tu_cs_emit_qw(&cs, tess_factor_iova);
+
+ /* TODO: Without this WFI here, the hardware seems unable to read these
+ * addresses we just emitted. Freedreno emits these consts as part of
+ * IB1 instead of in a draw state which might make this WFI unnecessary,
+ * but it requires a bit more indirection (SS6_INDIRECT for consts). */
+ tu_cs_emit_wfi(&cs);
+ }
+ *entry = tu_cs_end_sub_stream(&cmd->sub_cs, &cs);
+ return VK_SUCCESS;
+}
+
static VkResult
tu6_bind_draw_states(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
@@ -3092,6 +3207,15 @@ tu6_bind_draw_states(struct tu_cmd_buffer *cmd,
if (result != VK_SUCCESS)
return result;
+ bool has_tess =
+ pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
+ struct tu_cs_entry tess_consts = {};
+ if (has_tess) {
+ result = tu6_emit_tess_consts(cmd, draw, pipeline, &tess_consts);
+ if (result != VK_SUCCESS)
+ return result;
+ }
+
/* for the first draw in a renderpass, re-emit all the draw states
*
* and if a draw-state disabling path (CmdClearAttachments 3D fallback) was
@@ -3107,6 +3231,7 @@ tu6_bind_draw_states(struct tu_cmd_buffer *cmd,
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_PROGRAM, pipeline->program.state_ib);
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_PROGRAM_BINNING, pipeline->program.binning_state_ib);
+ tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_TESS, tess_consts);
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VI, pipeline->vi.state_ib);
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VI_BINNING, pipeline->vi.binning_state_ib);
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_RAST, pipeline->rast.state_ib);
@@ -3132,6 +3257,7 @@ tu6_bind_draw_states(struct tu_cmd_buffer *cmd,
* note we eventually don't want to have to emit anything here
*/
uint32_t draw_state_count =
+ has_tess +
((cmd->state.dirty & TU_CMD_DIRTY_SHADER_CONSTS) ? 3 : 0) +
((cmd->state.dirty & TU_CMD_DIRTY_DESCRIPTOR_SETS) ? 1 : 0) +
((cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS) ? 1 : 0) +
@@ -3139,6 +3265,10 @@ tu6_bind_draw_states(struct tu_cmd_buffer *cmd,
tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * draw_state_count);
+ /* We may need to re-emit tess consts if the current draw call is
+ * sufficiently larger than the last draw call. */
+ if (has_tess)
+ tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_TESS, tess_consts);
if (cmd->state.dirty & TU_CMD_DIRTY_SHADER_CONSTS) {
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VS_CONST, cmd->state.shader_const_ib[MESA_SHADER_VERTEX]);
tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_GS_CONST, cmd->state.shader_const_ib[MESA_SHADER_GEOMETRY]);
diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c
index e6442575af5..5e21b6031fa 100644
--- a/src/freedreno/vulkan/tu_pipeline.c
+++ b/src/freedreno/vulkan/tu_pipeline.c
@@ -663,8 +663,8 @@ tu6_emit_link_map(struct tu_cs *cs,
if (size <= 0)
return;
- tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, base, SB6_GS_SHADER, 0, size,
- patch_locs);
+ tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, base, SB6_GS_SHADER, 0,
+ size, patch_locs);
}
static uint16_t
@@ -1129,24 +1129,65 @@ tu6_emit_fs_outputs(struct tu_cs *cs,
}
static void
-tu6_emit_geometry_consts(struct tu_cs *cs,
- const struct ir3_shader_variant *vs,
- const struct ir3_shader_variant *gs) {
- unsigned num_vertices = gs->shader->nir->info.gs.vertices_in;
-
- uint32_t params[4] = {
- vs->output_size * num_vertices * 4, /* primitive stride */
- vs->output_size * 4, /* vertex stride */
+tu6_emit_geom_tess_consts(struct tu_cs *cs,
+ const struct ir3_shader_variant *vs,
+ const struct ir3_shader_variant *hs,
+ const struct ir3_shader_variant *ds,
+ const struct ir3_shader_variant *gs,
+ uint32_t cps_per_patch)
+{
+ uint32_t num_vertices =
+ hs ? cps_per_patch : gs->shader->nir->info.gs.vertices_in;
+
+ uint32_t vs_params[4] = {
+ vs->output_size * num_vertices * 4, /* vs primitive stride */
+ vs->output_size * 4, /* vs vertex stride */
0,
0,
};
uint32_t vs_base = ir3_const_state(vs)->offsets.primitive_param;
tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, vs_base, SB6_VS_SHADER, 0,
- ARRAY_SIZE(params), params);
+ ARRAY_SIZE(vs_params), vs_params);
+
+ if (hs) {
+ assert(ds->type != MESA_SHADER_NONE);
+ uint32_t hs_params[4] = {
+ vs->output_size * num_vertices * 4, /* hs primitive stride */
+ vs->output_size * 4, /* hs vertex stride */
+ hs->output_size,
+ cps_per_patch,
+ };
+
+ uint32_t hs_base = hs->const_state->offsets.primitive_param;
+ tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, hs_base, SB6_HS_SHADER, 0,
+ ARRAY_SIZE(hs_params), hs_params);
+ if (gs)
+ num_vertices = gs->shader->nir->info.gs.vertices_in;
+
+ uint32_t ds_params[4] = {
+ ds->output_size * num_vertices * 4, /* ds primitive stride */
+ ds->output_size * 4, /* ds vertex stride */
+ hs->output_size, /* hs vertex stride (dwords) */
+ hs->shader->nir->info.tess.tcs_vertices_out
+ };
+
+ uint32_t ds_base = ds->const_state->offsets.primitive_param;
+ tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, ds_base, SB6_DS_SHADER, 0,
+ ARRAY_SIZE(ds_params), ds_params);
+ }
- uint32_t gs_base = ir3_const_state(gs)->offsets.primitive_param;
- tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, gs_base, SB6_GS_SHADER, 0,
- ARRAY_SIZE(params), params);
+ if (gs) {
+ const struct ir3_shader_variant *prev = ds ? ds : vs;
+ uint32_t gs_params[4] = {
+ prev->output_size * num_vertices * 4, /* gs primitive stride */
+ prev->output_size * 4, /* gs vertex stride */
+ 0,
+ 0,
+ };
+ uint32_t gs_base = gs->const_state->offsets.primitive_param;
+ tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, gs_base, SB6_GS_SHADER, 0,
+ ARRAY_SIZE(gs_params), gs_params);
+ }
}
static void
@@ -1158,6 +1199,8 @@ tu6_emit_program(struct tu_cs *cs,
{
const struct ir3_shader_variant *vs = builder->variants[MESA_SHADER_VERTEX];
const struct ir3_shader_variant *bs = builder->binning_variant;
+ const struct ir3_shader_variant *hs = builder->variants[MESA_SHADER_TESS_CTRL];
+ const struct ir3_shader_variant *ds = builder->variants[MESA_SHADER_TESS_EVAL];
const struct ir3_shader_variant *gs = builder->variants[MESA_SHADER_GEOMETRY];
const struct ir3_shader_variant *fs = builder->variants[MESA_SHADER_FRAGMENT];
gl_shader_stage stage = MESA_SHADER_VERTEX;
@@ -1207,8 +1250,11 @@ tu6_emit_program(struct tu_cs *cs,
builder->render_components);
}
- if (gs)
- tu6_emit_geometry_consts(cs, vs, gs);
+ if (gs || hs) {
+ uint32_t cps_per_patch = builder->create_info->pTessellationState ?
+ builder->create_info->pTessellationState->patchControlPoints : 0;
+ tu6_emit_geom_tess_consts(cs, vs, hs, ds, gs, cps_per_patch);
+ }
}
static void
@@ -1695,7 +1741,8 @@ tu6_get_tessmode(struct tu_shader* shader)
}
static VkResult
-tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder)
+tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
+ struct tu_pipeline *pipeline)
{
const VkPipelineShaderStageCreateInfo *stage_infos[MESA_SHADER_STAGES] = {
NULL
@@ -1732,6 +1779,8 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder)
builder->shaders[stage] = shader;
}
+ pipeline->tess.patch_type = key.tessellation;
+
for (gl_shader_stage stage = MESA_SHADER_STAGES - 1;
stage > MESA_SHADER_NONE; stage--) {
if (!builder->shaders[stage])
@@ -1767,6 +1816,30 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder)
sizeof(uint32_t) * variant->info.sizedwords;
builder->binning_variant = variant;
+ if (builder->shaders[MESA_SHADER_TESS_CTRL]) {
+ struct ir3_shader *hs =
+ builder->shaders[MESA_SHADER_TESS_CTRL]->ir3_shader;
+ assert(hs->type != MESA_SHADER_NONE);
+
+ /* Calculate and store the per-vertex and per-patch HS-output sizes. */
+ uint32_t per_vertex_output_size = 0;
+ uint32_t per_patch_output_size = 0;
+ nir_foreach_variable (output, &hs->nir->outputs) {
+ switch (output->data.location) {
+ case VARYING_SLOT_TESS_LEVEL_OUTER:
+ case VARYING_SLOT_TESS_LEVEL_INNER:
+ continue;
+ }
+ uint32_t size = glsl_count_attribute_slots(output->type, false) * 4;
+ if (output->data.patch)
+ per_patch_output_size += size;
+ else
+ per_vertex_output_size += size;
+ }
+ pipeline->tess.per_vertex_output_size = per_vertex_output_size;
+ pipeline->tess.per_patch_output_size = per_patch_output_size;
+ }
+
return VK_SUCCESS;
}
@@ -1942,6 +2015,10 @@ tu_pipeline_builder_parse_tessellation(struct tu_pipeline_builder *builder,
assert(pipeline->ia.primtype == DI_PT_PATCHES0);
assert(tess_info->patchControlPoints <= 32);
pipeline->ia.primtype += tess_info->patchControlPoints;
+ const struct ir3_shader_variant *hs = builder->variants[MESA_SHADER_TESS_CTRL];
+ const struct ir3_shader_variant *ds = builder->variants[MESA_SHADER_TESS_EVAL];
+ pipeline->tess.hs_bo_regid = hs->const_state->offsets.primitive_param + 1;
+ pipeline->tess.ds_bo_regid = ds->const_state->offsets.primitive_param + 1;
}
static void
@@ -2151,7 +2228,7 @@ tu_pipeline_builder_build(struct tu_pipeline_builder *builder,
(*pipeline)->layout = builder->layout;
/* compile and upload shaders */
- result = tu_pipeline_builder_compile_shaders(builder);
+ result = tu_pipeline_builder_compile_shaders(builder, *pipeline);
if (result == VK_SUCCESS)
result = tu_pipeline_builder_upload_shaders(builder, *pipeline);
if (result != VK_SUCCESS) {
diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h
index 39f303ee7d7..153184f5999 100644
--- a/src/freedreno/vulkan/tu_private.h
+++ b/src/freedreno/vulkan/tu_private.h
@@ -427,6 +427,7 @@ enum tu_draw_state_group_id
{
TU_DRAW_STATE_PROGRAM,
TU_DRAW_STATE_PROGRAM_BINNING,
+ TU_DRAW_STATE_TESS,
TU_DRAW_STATE_VB,
TU_DRAW_STATE_VI,
TU_DRAW_STATE_VI_BINNING,
@@ -1102,6 +1103,15 @@ struct tu_pipeline
struct
{
+ uint32_t patch_type;
+ uint32_t per_vertex_output_size;
+ uint32_t per_patch_output_size;
+ uint32_t hs_bo_regid;
+ uint32_t ds_bo_regid;
+ } tess;
+
+ struct
+ {
struct tu_cs_entry state_ib;
} rast;