diff options
author | Jordan Justen <[email protected]> | 2018-11-11 18:01:56 -0800 |
---|---|---|
committer | Jordan Justen <[email protected]> | 2018-12-12 22:28:06 -0800 |
commit | c506eae53de5d7cd3b14f862b42fc490fa997e58 (patch) | |
tree | 8c383636a5d17018715fdbd932e79f93b8a9e728 /src/mesa/drivers/dri/i965/genX_state_upload.c | |
parent | 1b85c605a60a80975460a04c8a15af88866ade1b (diff) |
i965/compute: Emit GPGPU_WALKER in genX_state_upload
Signed-off-by: Jordan Justen <[email protected]>
Reviewed-by: Jason Ekstrand <[email protected]>
Diffstat (limited to 'src/mesa/drivers/dri/i965/genX_state_upload.c')
-rw-r--r-- | src/mesa/drivers/dri/i965/genX_state_upload.c | 102 |
1 files changed, 102 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c index 6ccf0530342..bad0aff9c67 100644 --- a/src/mesa/drivers/dri/i965/genX_state_upload.c +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c @@ -4579,6 +4579,107 @@ static const struct brw_tracked_state genX(cs_state) = { .emit = genX(upload_cs_state) }; +#define GPGPU_DISPATCHDIMX 0x2500 +#define GPGPU_DISPATCHDIMY 0x2504 +#define GPGPU_DISPATCHDIMZ 0x2508 + +#define MI_PREDICATE_SRC0 0x2400 +#define MI_PREDICATE_SRC1 0x2408 + +static void +prepare_indirect_gpgpu_walker(struct brw_context *brw) +{ + GLintptr indirect_offset = brw->compute.num_work_groups_offset; + struct brw_bo *bo = brw->compute.num_work_groups_bo; + + emit_lrm(brw, GPGPU_DISPATCHDIMX, ro_bo(bo, indirect_offset + 0)); + emit_lrm(brw, GPGPU_DISPATCHDIMY, ro_bo(bo, indirect_offset + 4)); + emit_lrm(brw, GPGPU_DISPATCHDIMZ, ro_bo(bo, indirect_offset + 8)); + +#if GEN_GEN <= 7 + /* Clear upper 32-bits of SRC0 and all 64-bits of SRC1 */ + emit_lri(brw, MI_PREDICATE_SRC0 + 4, 0); + emit_lri(brw, MI_PREDICATE_SRC1 , 0); + emit_lri(brw, MI_PREDICATE_SRC1 + 4, 0); + + /* Load compute_dispatch_indirect_x_size into SRC0 */ + emit_lrm(brw, MI_PREDICATE_SRC0, ro_bo(bo, indirect_offset + 0)); + + /* predicate = (compute_dispatch_indirect_x_size == 0); */ + brw_batch_emit(brw, GENX(MI_PREDICATE), mip) { + mip.LoadOperation = LOAD_LOAD; + mip.CombineOperation = COMBINE_SET; + mip.CompareOperation = COMPARE_SRCS_EQUAL; + } + + /* Load compute_dispatch_indirect_y_size into SRC0 */ + emit_lrm(brw, MI_PREDICATE_SRC0, ro_bo(bo, indirect_offset + 4)); + + /* predicate |= (compute_dispatch_indirect_y_size == 0); */ + brw_batch_emit(brw, GENX(MI_PREDICATE), mip) { + mip.LoadOperation = LOAD_LOAD; + mip.CombineOperation = COMBINE_OR; + mip.CompareOperation = COMPARE_SRCS_EQUAL; + } + + /* Load compute_dispatch_indirect_z_size into SRC0 */ + emit_lrm(brw, MI_PREDICATE_SRC0, ro_bo(bo, indirect_offset + 8)); + + /* predicate |= (compute_dispatch_indirect_z_size == 0); */ + brw_batch_emit(brw, GENX(MI_PREDICATE), mip) { + mip.LoadOperation = LOAD_LOAD; + mip.CombineOperation = COMBINE_OR; + mip.CompareOperation = COMPARE_SRCS_EQUAL; + } + + /* predicate = !predicate; */ +#define COMPARE_FALSE 1 + brw_batch_emit(brw, GENX(MI_PREDICATE), mip) { + mip.LoadOperation = LOAD_LOADINV; + mip.CombineOperation = COMBINE_OR; + mip.CompareOperation = COMPARE_FALSE; + } +#endif +} + +static void +genX(emit_gpgpu_walker)(struct brw_context *brw) +{ + const struct brw_cs_prog_data *prog_data = + brw_cs_prog_data(brw->cs.base.prog_data); + + const GLuint *num_groups = brw->compute.num_work_groups; + + bool indirect = brw->compute.num_work_groups_bo != NULL; + if (indirect) + prepare_indirect_gpgpu_walker(brw); + + const unsigned simd_size = prog_data->simd_size; + unsigned group_size = prog_data->local_size[0] * + prog_data->local_size[1] * prog_data->local_size[2]; + + uint32_t right_mask = 0xffffffffu >> (32 - simd_size); + const unsigned right_non_aligned = group_size & (simd_size - 1); + if (right_non_aligned != 0) + right_mask >>= (simd_size - right_non_aligned); + + brw_batch_emit(brw, GENX(GPGPU_WALKER), ggw) { + ggw.IndirectParameterEnable = indirect; + ggw.PredicateEnable = GEN_GEN <= 7 && indirect; + ggw.SIMDSize = prog_data->simd_size / 16; + ggw.ThreadDepthCounterMaximum = 0; + ggw.ThreadHeightCounterMaximum = 0; + ggw.ThreadWidthCounterMaximum = prog_data->threads - 1; + ggw.ThreadGroupIDXDimension = num_groups[0]; + ggw.ThreadGroupIDYDimension = num_groups[1]; + ggw.ThreadGroupIDZDimension = num_groups[2]; + ggw.RightExecutionMask = right_mask; + ggw.BottomExecutionMask = 0xffffffff; + } + + brw_batch_emit(brw, GENX(MEDIA_STATE_FLUSH), msf); +} + #endif /* ---------------------------------------------------------------------- */ @@ -5972,5 +6073,6 @@ genX(init_atoms)(struct brw_context *brw) compute_atoms, ARRAY_SIZE(compute_atoms)); brw->vtbl.emit_mi_report_perf_count = genX(emit_mi_report_perf_count); + brw->vtbl.emit_compute_walker = genX(emit_gpgpu_walker); #endif } |