diff options
-rw-r--r-- | src/gallium/drivers/iris/iris_context.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/iris/iris_program.c | 5 | ||||
-rw-r--r-- | src/gallium/drivers/iris/iris_state.c | 14 |
3 files changed, 12 insertions, 8 deletions
diff --git a/src/gallium/drivers/iris/iris_context.h b/src/gallium/drivers/iris/iris_context.h index e6a7af1732c..d1952ed27e4 100644 --- a/src/gallium/drivers/iris/iris_context.h +++ b/src/gallium/drivers/iris/iris_context.h @@ -847,6 +847,7 @@ void iris_init_perfquery_functions(struct pipe_context *ctx); void iris_update_compiled_shaders(struct iris_context *ice); void iris_update_compiled_compute_shader(struct iris_context *ice); void iris_fill_cs_push_const_buffer(struct brw_cs_prog_data *cs_prog_data, + unsigned threads, uint32_t *dst); diff --git a/src/gallium/drivers/iris/iris_program.c b/src/gallium/drivers/iris/iris_program.c index 3e8abcd428d..c876540cbbb 100644 --- a/src/gallium/drivers/iris/iris_program.c +++ b/src/gallium/drivers/iris/iris_program.c @@ -2020,13 +2020,14 @@ iris_update_compiled_compute_shader(struct iris_context *ice) void iris_fill_cs_push_const_buffer(struct brw_cs_prog_data *cs_prog_data, + unsigned threads, uint32_t *dst) { - assert(brw_cs_push_const_total_size(cs_prog_data, cs_prog_data->threads) > 0); + assert(brw_cs_push_const_total_size(cs_prog_data, threads) > 0); assert(cs_prog_data->push.cross_thread.size == 0); assert(cs_prog_data->push.per_thread.dwords == 1); assert(cs_prog_data->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID); - for (unsigned t = 0; t < cs_prog_data->threads; t++) + for (unsigned t = 0; t < threads; t++) dst[8 * t] = t; } diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 5ea1cd8a2f1..f572c0213dd 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -4365,7 +4365,6 @@ iris_store_cs_state(struct iris_context *ice, iris_pack_state(GENX(INTERFACE_DESCRIPTOR_DATA), map, desc) { desc.KernelStartPointer = KSP(shader); desc.ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs; - desc.NumberofThreadsinGPGPUThreadGroup = cs_prog_data->threads; desc.SharedLocalMemorySize = encode_slm_size(GEN_GEN, prog_data->total_shared); desc.BarrierEnable = cs_prog_data->uses_barrier; @@ -6471,6 +6470,9 @@ iris_upload_compute_state(struct iris_context *ice, struct brw_stage_prog_data *prog_data = shader->prog_data; struct brw_cs_prog_data *cs_prog_data = (void *) prog_data; + const uint32_t group_size = grid->block[0] * grid->block[1] * grid->block[2]; + const unsigned threads = DIV_ROUND_UP(group_size, cs_prog_data->simd_size); + /* Always pin the binder. If we're emitting new binding table pointers, * we need it. If not, we're probably inheriting old tables via the * context, and need it anyway. Since true zero-bindings cases are @@ -6532,7 +6534,7 @@ iris_upload_compute_state(struct iris_context *ice, vfe.URBEntryAllocationSize = 2; vfe.CURBEAllocationSize = - ALIGN(cs_prog_data->push.per_thread.regs * cs_prog_data->threads + + ALIGN(cs_prog_data->push.per_thread.regs * threads + cs_prog_data->push.cross_thread.regs, 2); } } @@ -6544,7 +6546,7 @@ iris_upload_compute_state(struct iris_context *ice, cs_prog_data->push.per_thread.dwords == 1 && cs_prog_data->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID); const unsigned push_const_size = - brw_cs_push_const_total_size(cs_prog_data, cs_prog_data->threads); + brw_cs_push_const_total_size(cs_prog_data, threads); uint32_t *curbe_data_map = stream_state(batch, ice->state.dynamic_uploader, &ice->state.last_res.cs_thread_ids, @@ -6552,7 +6554,7 @@ iris_upload_compute_state(struct iris_context *ice, &curbe_data_offset); assert(curbe_data_map); memset(curbe_data_map, 0x5a, ALIGN(push_const_size, 64)); - iris_fill_cs_push_const_buffer(cs_prog_data, curbe_data_map); + iris_fill_cs_push_const_buffer(cs_prog_data, threads, curbe_data_map); iris_emit_cmd(batch, GENX(MEDIA_CURBE_LOAD), curbe) { curbe.CURBETotalDataLength = ALIGN(push_const_size, 64); @@ -6569,6 +6571,7 @@ iris_upload_compute_state(struct iris_context *ice, iris_pack_state(GENX(INTERFACE_DESCRIPTOR_DATA), desc, idd) { idd.SamplerStatePointer = shs->sampler_table.offset; idd.BindingTablePointer = binder->bt_offset[MESA_SHADER_COMPUTE]; + idd.NumberofThreadsinGPGPUThreadGroup = threads; } for (int i = 0; i < GENX(INTERFACE_DESCRIPTOR_DATA_length); i++) @@ -6583,7 +6586,6 @@ iris_upload_compute_state(struct iris_context *ice, } } - uint32_t group_size = grid->block[0] * grid->block[1] * grid->block[2]; uint32_t remainder = group_size & (cs_prog_data->simd_size - 1); uint32_t right_mask; @@ -6618,7 +6620,7 @@ iris_upload_compute_state(struct iris_context *ice, ggw.SIMDSize = cs_prog_data->simd_size / 16; ggw.ThreadDepthCounterMaximum = 0; ggw.ThreadHeightCounterMaximum = 0; - ggw.ThreadWidthCounterMaximum = cs_prog_data->threads - 1; + ggw.ThreadWidthCounterMaximum = threads - 1; ggw.ThreadGroupIDXDimension = grid->grid[0]; ggw.ThreadGroupIDYDimension = grid->grid[1]; ggw.ThreadGroupIDZDimension = grid->grid[2]; |