aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/iris/iris_context.h1
-rw-r--r--src/gallium/drivers/iris/iris_program.c5
-rw-r--r--src/gallium/drivers/iris/iris_state.c14
3 files changed, 12 insertions, 8 deletions
diff --git a/src/gallium/drivers/iris/iris_context.h b/src/gallium/drivers/iris/iris_context.h
index e6a7af1732c..d1952ed27e4 100644
--- a/src/gallium/drivers/iris/iris_context.h
+++ b/src/gallium/drivers/iris/iris_context.h
@@ -847,6 +847,7 @@ void iris_init_perfquery_functions(struct pipe_context *ctx);
void iris_update_compiled_shaders(struct iris_context *ice);
void iris_update_compiled_compute_shader(struct iris_context *ice);
void iris_fill_cs_push_const_buffer(struct brw_cs_prog_data *cs_prog_data,
+ unsigned threads,
uint32_t *dst);
diff --git a/src/gallium/drivers/iris/iris_program.c b/src/gallium/drivers/iris/iris_program.c
index 3e8abcd428d..c876540cbbb 100644
--- a/src/gallium/drivers/iris/iris_program.c
+++ b/src/gallium/drivers/iris/iris_program.c
@@ -2020,13 +2020,14 @@ iris_update_compiled_compute_shader(struct iris_context *ice)
void
iris_fill_cs_push_const_buffer(struct brw_cs_prog_data *cs_prog_data,
+ unsigned threads,
uint32_t *dst)
{
- assert(brw_cs_push_const_total_size(cs_prog_data, cs_prog_data->threads) > 0);
+ assert(brw_cs_push_const_total_size(cs_prog_data, threads) > 0);
assert(cs_prog_data->push.cross_thread.size == 0);
assert(cs_prog_data->push.per_thread.dwords == 1);
assert(cs_prog_data->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID);
- for (unsigned t = 0; t < cs_prog_data->threads; t++)
+ for (unsigned t = 0; t < threads; t++)
dst[8 * t] = t;
}
diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c
index 5ea1cd8a2f1..f572c0213dd 100644
--- a/src/gallium/drivers/iris/iris_state.c
+++ b/src/gallium/drivers/iris/iris_state.c
@@ -4365,7 +4365,6 @@ iris_store_cs_state(struct iris_context *ice,
iris_pack_state(GENX(INTERFACE_DESCRIPTOR_DATA), map, desc) {
desc.KernelStartPointer = KSP(shader);
desc.ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs;
- desc.NumberofThreadsinGPGPUThreadGroup = cs_prog_data->threads;
desc.SharedLocalMemorySize =
encode_slm_size(GEN_GEN, prog_data->total_shared);
desc.BarrierEnable = cs_prog_data->uses_barrier;
@@ -6471,6 +6470,9 @@ iris_upload_compute_state(struct iris_context *ice,
struct brw_stage_prog_data *prog_data = shader->prog_data;
struct brw_cs_prog_data *cs_prog_data = (void *) prog_data;
+ const uint32_t group_size = grid->block[0] * grid->block[1] * grid->block[2];
+ const unsigned threads = DIV_ROUND_UP(group_size, cs_prog_data->simd_size);
+
/* Always pin the binder. If we're emitting new binding table pointers,
* we need it. If not, we're probably inheriting old tables via the
* context, and need it anyway. Since true zero-bindings cases are
@@ -6532,7 +6534,7 @@ iris_upload_compute_state(struct iris_context *ice,
vfe.URBEntryAllocationSize = 2;
vfe.CURBEAllocationSize =
- ALIGN(cs_prog_data->push.per_thread.regs * cs_prog_data->threads +
+ ALIGN(cs_prog_data->push.per_thread.regs * threads +
cs_prog_data->push.cross_thread.regs, 2);
}
}
@@ -6544,7 +6546,7 @@ iris_upload_compute_state(struct iris_context *ice,
cs_prog_data->push.per_thread.dwords == 1 &&
cs_prog_data->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID);
const unsigned push_const_size =
- brw_cs_push_const_total_size(cs_prog_data, cs_prog_data->threads);
+ brw_cs_push_const_total_size(cs_prog_data, threads);
uint32_t *curbe_data_map =
stream_state(batch, ice->state.dynamic_uploader,
&ice->state.last_res.cs_thread_ids,
@@ -6552,7 +6554,7 @@ iris_upload_compute_state(struct iris_context *ice,
&curbe_data_offset);
assert(curbe_data_map);
memset(curbe_data_map, 0x5a, ALIGN(push_const_size, 64));
- iris_fill_cs_push_const_buffer(cs_prog_data, curbe_data_map);
+ iris_fill_cs_push_const_buffer(cs_prog_data, threads, curbe_data_map);
iris_emit_cmd(batch, GENX(MEDIA_CURBE_LOAD), curbe) {
curbe.CURBETotalDataLength = ALIGN(push_const_size, 64);
@@ -6569,6 +6571,7 @@ iris_upload_compute_state(struct iris_context *ice,
iris_pack_state(GENX(INTERFACE_DESCRIPTOR_DATA), desc, idd) {
idd.SamplerStatePointer = shs->sampler_table.offset;
idd.BindingTablePointer = binder->bt_offset[MESA_SHADER_COMPUTE];
+ idd.NumberofThreadsinGPGPUThreadGroup = threads;
}
for (int i = 0; i < GENX(INTERFACE_DESCRIPTOR_DATA_length); i++)
@@ -6583,7 +6586,6 @@ iris_upload_compute_state(struct iris_context *ice,
}
}
- uint32_t group_size = grid->block[0] * grid->block[1] * grid->block[2];
uint32_t remainder = group_size & (cs_prog_data->simd_size - 1);
uint32_t right_mask;
@@ -6618,7 +6620,7 @@ iris_upload_compute_state(struct iris_context *ice,
ggw.SIMDSize = cs_prog_data->simd_size / 16;
ggw.ThreadDepthCounterMaximum = 0;
ggw.ThreadHeightCounterMaximum = 0;
- ggw.ThreadWidthCounterMaximum = cs_prog_data->threads - 1;
+ ggw.ThreadWidthCounterMaximum = threads - 1;
ggw.ThreadGroupIDXDimension = grid->grid[0];
ggw.ThreadGroupIDYDimension = grid->grid[1];
ggw.ThreadGroupIDZDimension = grid->grid[2];