aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/intel/vulkan/anv_cmd_buffer.c6
-rw-r--r--src/intel/vulkan/anv_pipeline.c25
-rw-r--r--src/intel/vulkan/anv_private.h11
-rw-r--r--src/intel/vulkan/genX_cmd_buffer.c12
-rw-r--r--src/intel/vulkan/genX_pipeline.c18
5 files changed, 40 insertions, 32 deletions
diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c
index ea5ec415340..1ca33f206aa 100644
--- a/src/intel/vulkan/anv_cmd_buffer.c
+++ b/src/intel/vulkan/anv_cmd_buffer.c
@@ -838,9 +838,9 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
const struct anv_push_range *range = &pipeline->cs->bind_map.push_ranges[0];
- const uint32_t threads = anv_cs_threads(pipeline);
+ const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline);
const unsigned total_push_constants_size =
- brw_cs_push_const_total_size(cs_prog_data, threads);
+ brw_cs_push_const_total_size(cs_prog_data, cs_params.threads);
if (total_push_constants_size == 0)
return (struct anv_state) { .offset = 0 };
@@ -863,7 +863,7 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
}
if (cs_prog_data->push.per_thread.size > 0) {
- for (unsigned t = 0; t < threads; t++) {
+ for (unsigned t = 0; t < cs_params.threads; t++) {
memcpy(dst, src, cs_prog_data->push.per_thread.size);
uint32_t *subgroup_id = dst +
diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index 88bc58f3771..bd2d1884d7a 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -1728,21 +1728,22 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
return VK_SUCCESS;
}
-uint32_t
-anv_cs_workgroup_size(const struct anv_compute_pipeline *pipeline)
+struct anv_cs_parameters
+anv_cs_parameters(const struct anv_compute_pipeline *pipeline)
{
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
- return cs_prog_data->local_size[0] *
- cs_prog_data->local_size[1] *
- cs_prog_data->local_size[2];
-}
-uint32_t
-anv_cs_threads(const struct anv_compute_pipeline *pipeline)
-{
- const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
- return DIV_ROUND_UP(anv_cs_workgroup_size(pipeline),
- cs_prog_data->simd_size);
+ struct anv_cs_parameters cs_params = {};
+
+ cs_params.group_size = cs_prog_data->local_size[0] *
+ cs_prog_data->local_size[1] *
+ cs_prog_data->local_size[2];
+ cs_params.simd_size =
+ brw_cs_simd_size_for_group_size(&pipeline->base.device->info,
+ cs_prog_data, cs_params.group_size);
+ cs_params.threads = DIV_ROUND_UP(cs_params.group_size, cs_params.simd_size);
+
+ return cs_params;
}
/**
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 4bee161227d..8f5dcd37fbc 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -3413,11 +3413,14 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
const char *entrypoint,
const VkSpecializationInfo *spec_info);
-uint32_t
-anv_cs_workgroup_size(const struct anv_compute_pipeline *pipeline);
+struct anv_cs_parameters {
+ uint32_t group_size;
+ uint32_t simd_size;
+ uint32_t threads;
+};
-uint32_t
-anv_cs_threads(const struct anv_compute_pipeline *pipeline);
+struct anv_cs_parameters
+anv_cs_parameters(const struct anv_compute_pipeline *pipeline);
struct anv_format_plane {
enum isl_format isl_format:16;
diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c
index 91ea16a0105..bf2a5a6dc75 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -4360,12 +4360,14 @@ void genX(CmdDispatchBase)(
if (cmd_buffer->state.conditional_render_enabled)
genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
+ const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline);
+
anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), ggw) {
ggw.PredicateEnable = cmd_buffer->state.conditional_render_enabled;
- ggw.SIMDSize = prog_data->simd_size / 16;
+ ggw.SIMDSize = cs_params.simd_size / 16;
ggw.ThreadDepthCounterMaximum = 0;
ggw.ThreadHeightCounterMaximum = 0;
- ggw.ThreadWidthCounterMaximum = anv_cs_threads(pipeline) - 1;
+ ggw.ThreadWidthCounterMaximum = cs_params.threads - 1;
ggw.ThreadGroupIDXDimension = groupCountX;
ggw.ThreadGroupIDYDimension = groupCountY;
ggw.ThreadGroupIDZDimension = groupCountZ;
@@ -4474,14 +4476,16 @@ void genX(CmdDispatchIndirect)(
genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
#endif
+ const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline);
+
anv_batch_emit(batch, GENX(GPGPU_WALKER), ggw) {
ggw.IndirectParameterEnable = true;
ggw.PredicateEnable = GEN_GEN <= 7 ||
cmd_buffer->state.conditional_render_enabled;
- ggw.SIMDSize = prog_data->simd_size / 16;
+ ggw.SIMDSize = cs_params.simd_size / 16;
ggw.ThreadDepthCounterMaximum = 0;
ggw.ThreadHeightCounterMaximum = 0;
- ggw.ThreadWidthCounterMaximum = anv_cs_threads(pipeline) - 1;
+ ggw.ThreadWidthCounterMaximum = cs_params.threads - 1;
ggw.RightExecutionMask = pipeline->cs_right_mask;
ggw.BottomExecutionMask = 0xffffffff;
}
diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c
index 43078ce2b84..6753b1419ef 100644
--- a/src/intel/vulkan/genX_pipeline.c
+++ b/src/intel/vulkan/genX_pipeline.c
@@ -2325,19 +2325,16 @@ compute_pipeline_create(
anv_pipeline_setup_l3_config(&pipeline->base, cs_prog_data->base.total_shared > 0);
- uint32_t group_size = cs_prog_data->local_size[0] *
- cs_prog_data->local_size[1] * cs_prog_data->local_size[2];
- uint32_t remainder = group_size & (cs_prog_data->simd_size - 1);
+ const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline);
+ uint32_t remainder = cs_params.group_size & (cs_params.simd_size - 1);
if (remainder > 0)
pipeline->cs_right_mask = ~0u >> (32 - remainder);
else
- pipeline->cs_right_mask = ~0u >> (32 - cs_prog_data->simd_size);
-
- const uint32_t threads = anv_cs_threads(pipeline);
+ pipeline->cs_right_mask = ~0u >> (32 - cs_params.simd_size);
const uint32_t vfe_curbe_allocation =
- ALIGN(cs_prog_data->push.per_thread.regs * threads +
+ ALIGN(cs_prog_data->push.per_thread.regs * cs_params.threads +
cs_prog_data->push.cross_thread.regs, 2);
const uint32_t subslices = MAX2(device->physical->subslice_total, 1);
@@ -2388,7 +2385,10 @@ compute_pipeline_create(
}
struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = {
- .KernelStartPointer = cs_bin->kernel.offset,
+ .KernelStartPointer =
+ cs_bin->kernel.offset +
+ brw_cs_prog_data_prog_offset(cs_prog_data, cs_params.simd_size),
+
/* WA_1606682166 */
.SamplerCount = GEN_GEN == 11 ? 0 : get_sampler_count(cs_bin),
/* We add 1 because the CS indirect parameters buffer isn't accounted
@@ -2420,7 +2420,7 @@ compute_pipeline_create(
.ThreadPreemptionDisable = true,
#endif
- .NumberofThreadsinGPGPUThreadGroup = threads,
+ .NumberofThreadsinGPGPUThreadGroup = cs_params.threads,
};
GENX(INTERFACE_DESCRIPTOR_DATA_pack)(NULL,
pipeline->interface_descriptor_data,