aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCaio Marcelo de Oliveira Filho <[email protected]>2020-04-09 16:54:25 -0700
committerCaio Marcelo de Oliveira Filho <[email protected]>2020-04-09 19:23:12 -0700
commit9ff55621d9c9d299ac8e4eb2fcfe6db8a7a1b2a1 (patch)
tree7e435aeb30c276110fef276073faed02886163eb
parent928f5f54349902c497e9293adeae2580123afbd9 (diff)
iris: Stop using cs_prog_data->threads
This is a preparation for dropping this field since this value is expected to be calculated by the drivers now for variable group size case. And also the field would get in the way of brw_compile_cs producing multiple SIMD variants (like FS). Reviewed-by: Jordan Justen <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4504>
-rw-r--r--src/gallium/drivers/iris/iris_context.h1
-rw-r--r--src/gallium/drivers/iris/iris_program.c5
-rw-r--r--src/gallium/drivers/iris/iris_state.c14
3 files changed, 12 insertions, 8 deletions
diff --git a/src/gallium/drivers/iris/iris_context.h b/src/gallium/drivers/iris/iris_context.h
index e6a7af1732c..d1952ed27e4 100644
--- a/src/gallium/drivers/iris/iris_context.h
+++ b/src/gallium/drivers/iris/iris_context.h
@@ -847,6 +847,7 @@ void iris_init_perfquery_functions(struct pipe_context *ctx);
void iris_update_compiled_shaders(struct iris_context *ice);
void iris_update_compiled_compute_shader(struct iris_context *ice);
void iris_fill_cs_push_const_buffer(struct brw_cs_prog_data *cs_prog_data,
+ unsigned threads,
uint32_t *dst);
diff --git a/src/gallium/drivers/iris/iris_program.c b/src/gallium/drivers/iris/iris_program.c
index 3e8abcd428d..c876540cbbb 100644
--- a/src/gallium/drivers/iris/iris_program.c
+++ b/src/gallium/drivers/iris/iris_program.c
@@ -2020,13 +2020,14 @@ iris_update_compiled_compute_shader(struct iris_context *ice)
void
iris_fill_cs_push_const_buffer(struct brw_cs_prog_data *cs_prog_data,
+ unsigned threads,
uint32_t *dst)
{
- assert(brw_cs_push_const_total_size(cs_prog_data, cs_prog_data->threads) > 0);
+ assert(brw_cs_push_const_total_size(cs_prog_data, threads) > 0);
assert(cs_prog_data->push.cross_thread.size == 0);
assert(cs_prog_data->push.per_thread.dwords == 1);
assert(cs_prog_data->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID);
- for (unsigned t = 0; t < cs_prog_data->threads; t++)
+ for (unsigned t = 0; t < threads; t++)
dst[8 * t] = t;
}
diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c
index 5ea1cd8a2f1..f572c0213dd 100644
--- a/src/gallium/drivers/iris/iris_state.c
+++ b/src/gallium/drivers/iris/iris_state.c
@@ -4365,7 +4365,6 @@ iris_store_cs_state(struct iris_context *ice,
iris_pack_state(GENX(INTERFACE_DESCRIPTOR_DATA), map, desc) {
desc.KernelStartPointer = KSP(shader);
desc.ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs;
- desc.NumberofThreadsinGPGPUThreadGroup = cs_prog_data->threads;
desc.SharedLocalMemorySize =
encode_slm_size(GEN_GEN, prog_data->total_shared);
desc.BarrierEnable = cs_prog_data->uses_barrier;
@@ -6471,6 +6470,9 @@ iris_upload_compute_state(struct iris_context *ice,
struct brw_stage_prog_data *prog_data = shader->prog_data;
struct brw_cs_prog_data *cs_prog_data = (void *) prog_data;
+ const uint32_t group_size = grid->block[0] * grid->block[1] * grid->block[2];
+ const unsigned threads = DIV_ROUND_UP(group_size, cs_prog_data->simd_size);
+
/* Always pin the binder. If we're emitting new binding table pointers,
* we need it. If not, we're probably inheriting old tables via the
* context, and need it anyway. Since true zero-bindings cases are
@@ -6532,7 +6534,7 @@ iris_upload_compute_state(struct iris_context *ice,
vfe.URBEntryAllocationSize = 2;
vfe.CURBEAllocationSize =
- ALIGN(cs_prog_data->push.per_thread.regs * cs_prog_data->threads +
+ ALIGN(cs_prog_data->push.per_thread.regs * threads +
cs_prog_data->push.cross_thread.regs, 2);
}
}
@@ -6544,7 +6546,7 @@ iris_upload_compute_state(struct iris_context *ice,
cs_prog_data->push.per_thread.dwords == 1 &&
cs_prog_data->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID);
const unsigned push_const_size =
- brw_cs_push_const_total_size(cs_prog_data, cs_prog_data->threads);
+ brw_cs_push_const_total_size(cs_prog_data, threads);
uint32_t *curbe_data_map =
stream_state(batch, ice->state.dynamic_uploader,
&ice->state.last_res.cs_thread_ids,
@@ -6552,7 +6554,7 @@ iris_upload_compute_state(struct iris_context *ice,
&curbe_data_offset);
assert(curbe_data_map);
memset(curbe_data_map, 0x5a, ALIGN(push_const_size, 64));
- iris_fill_cs_push_const_buffer(cs_prog_data, curbe_data_map);
+ iris_fill_cs_push_const_buffer(cs_prog_data, threads, curbe_data_map);
iris_emit_cmd(batch, GENX(MEDIA_CURBE_LOAD), curbe) {
curbe.CURBETotalDataLength = ALIGN(push_const_size, 64);
@@ -6569,6 +6571,7 @@ iris_upload_compute_state(struct iris_context *ice,
iris_pack_state(GENX(INTERFACE_DESCRIPTOR_DATA), desc, idd) {
idd.SamplerStatePointer = shs->sampler_table.offset;
idd.BindingTablePointer = binder->bt_offset[MESA_SHADER_COMPUTE];
+ idd.NumberofThreadsinGPGPUThreadGroup = threads;
}
for (int i = 0; i < GENX(INTERFACE_DESCRIPTOR_DATA_length); i++)
@@ -6583,7 +6586,6 @@ iris_upload_compute_state(struct iris_context *ice,
}
}
- uint32_t group_size = grid->block[0] * grid->block[1] * grid->block[2];
uint32_t remainder = group_size & (cs_prog_data->simd_size - 1);
uint32_t right_mask;
@@ -6618,7 +6620,7 @@ iris_upload_compute_state(struct iris_context *ice,
ggw.SIMDSize = cs_prog_data->simd_size / 16;
ggw.ThreadDepthCounterMaximum = 0;
ggw.ThreadHeightCounterMaximum = 0;
- ggw.ThreadWidthCounterMaximum = cs_prog_data->threads - 1;
+ ggw.ThreadWidthCounterMaximum = threads - 1;
ggw.ThreadGroupIDXDimension = grid->grid[0];
ggw.ThreadGroupIDYDimension = grid->grid[1];
ggw.ThreadGroupIDZDimension = grid->grid[2];