diff options
author | Jordan Justen <[email protected]> | 2015-09-24 01:29:32 -0700 |
---|---|---|
committer | Jordan Justen <[email protected]> | 2015-09-29 08:23:47 -0700 |
commit | 63d7b33f516815ce3f9b3bb1b26a39f1545a4446 (patch) | |
tree | bb26f70747e55d3223b10a41433914af3aff5a1c /src/mesa/drivers/dri/i965 | |
parent | d1be9d21265cf4e344a5d78b17cea7ee2c8408a1 (diff) |
i965/cs: Setup surface binding for gl_NumWorkGroups
This will only be setup when the prog_data uses_num_work_groups
boolean is set.
At this point nothing will set uses_num_work_groups, but soon code
will set it when emitting code for the intrinsic that loads
gl_NumWorkGroups.
We can't emit this surface information earlier at the start of the
DispatchCompute* call because we may not have generated the program
yet. Until we generate the program, we don't know if the
gl_NumWorkGroups variable is accessed.
We also can't emit the surface as part of the brw_cs_state atom,
because we might not need the surface if gl_NumWorkGroups is not used
by the program.
Lastly, we cannot emit the surface later (after state upload) in the
DispatchCompute* call, because it needs to be run before the
brw_cs_state atom is emitted, since it changes the surface state.
Signed-off-by: Jordan Justen <[email protected]>
Reviewed-by: Kristian Høgsberg <[email protected]>
Diffstat (limited to 'src/mesa/drivers/dri/i965')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_compute.c | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.h | 5 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_state.h | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_state_upload.c | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 43 |
5 files changed, 53 insertions, 1 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_compute.c b/src/mesa/drivers/dri/i965/brw_compute.c index 505023d2851..fe991a46153 100644 --- a/src/mesa/drivers/dri/i965/brw_compute.c +++ b/src/mesa/drivers/dri/i965/brw_compute.c @@ -184,6 +184,7 @@ brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups) { brw->compute.num_work_groups_bo = NULL; brw->compute.num_work_groups = num_groups; + ctx->NewDriverState |= BRW_NEW_CS_WORK_GROUPS; brw_dispatch_compute_common(ctx); } @@ -202,6 +203,7 @@ brw_dispatch_compute_indirect(struct gl_context *ctx, GLintptr indirect) brw->compute.num_work_groups_bo = bo; brw->compute.num_work_groups_offset = indirect; brw->compute.num_work_groups = indirect_group_counts; + ctx->NewDriverState |= BRW_NEW_CS_WORK_GROUPS; brw_dispatch_compute_common(ctx); } diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index e407dec8eab..8b790fe0bca 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -213,6 +213,7 @@ enum brw_state_id { BRW_STATE_SAMPLER_STATE_TABLE, BRW_STATE_VS_ATTRIB_WORKAROUNDS, BRW_STATE_COMPUTE_PROGRAM, + BRW_STATE_CS_WORK_GROUPS, BRW_NUM_STATE_BITS }; @@ -294,6 +295,7 @@ enum brw_state_id { #define BRW_NEW_SAMPLER_STATE_TABLE (1ull << BRW_STATE_SAMPLER_STATE_TABLE) #define BRW_NEW_VS_ATTRIB_WORKAROUNDS (1ull << BRW_STATE_VS_ATTRIB_WORKAROUNDS) #define BRW_NEW_COMPUTE_PROGRAM (1ull << BRW_STATE_COMPUTE_PROGRAM) +#define BRW_NEW_CS_WORK_GROUPS (1ull << BRW_STATE_CS_WORK_GROUPS) struct brw_state_flags { /** State update flags signalled by mesa internals */ @@ -497,6 +499,7 @@ struct brw_cs_prog_data { unsigned local_size[3]; unsigned simd_size; bool uses_barrier; + bool uses_num_work_groups; struct { /** @{ @@ -1545,7 +1548,7 @@ struct brw_context int num_atoms[BRW_NUM_PIPELINES]; const struct brw_tracked_state render_atoms[60]; - const struct brw_tracked_state compute_atoms[6]; + const struct brw_tracked_state compute_atoms[7]; /* If (INTEL_DEBUG & DEBUG_BATCH) */ struct { diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index afce8ad3b27..3b7a4330c7a 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -158,6 +158,7 @@ extern const struct brw_tracked_state gen8_sf_clip_viewport; extern const struct brw_tracked_state gen8_vertices; extern const struct brw_tracked_state gen8_vf_topology; extern const struct brw_tracked_state gen8_vs_state; +extern const struct brw_tracked_state brw_cs_work_groups_surface; static inline bool brw_state_dirty(struct brw_context *brw, GLuint mesa_flags, uint64_t brw_flags) diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 2e8a0b3de9b..46687e342d3 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -261,6 +261,7 @@ static const struct brw_tracked_state *gen7_compute_atoms[] = &gen7_cs_push_constants, &brw_cs_abo_surfaces, &brw_texture_surfaces, + &brw_cs_work_groups_surface, &brw_cs_state, }; @@ -353,6 +354,7 @@ static const struct brw_tracked_state *gen8_compute_atoms[] = &gen7_cs_push_constants, &brw_cs_abo_surfaces, &brw_texture_surfaces, + &brw_cs_work_groups_surface, &brw_cs_state, }; @@ -613,6 +615,7 @@ static struct dirty_bit_map brw_bits[] = { DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE), DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS), DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM), + DEFINE_BIT(BRW_NEW_CS_WORK_GROUPS), {0, 0, 0} }; diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index f2aaa0b178e..c9316963840 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -1336,3 +1336,46 @@ gen4_init_vtable_surface_functions(struct brw_context *brw) brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state; brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state; } + +static void +brw_upload_cs_work_groups_surface(struct brw_context *brw) +{ + struct gl_context *ctx = &brw->ctx; + /* _NEW_PROGRAM */ + struct gl_shader_program *prog = + ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE]; + + if (prog && brw->cs.prog_data->uses_num_work_groups) { + const unsigned surf_idx = + brw->cs.prog_data->binding_table.work_groups_start; + uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx]; + drm_intel_bo *bo; + uint32_t bo_offset; + + if (brw->compute.num_work_groups_bo == NULL) { + bo = NULL; + intel_upload_data(brw, + (void *)brw->compute.num_work_groups, + 3 * sizeof(GLuint), + sizeof(GLuint), + &bo, + &bo_offset); + } else { + bo = brw->compute.num_work_groups_bo; + bo_offset = brw->compute.num_work_groups_offset; + } + + brw->vtbl.emit_buffer_surface_state(brw, surf_offset, + bo, bo_offset, + BRW_SURFACEFORMAT_RAW, + 3 * sizeof(GLuint), 1, true); + brw->ctx.NewDriverState |= BRW_NEW_SURFACES; + } +} + +const struct brw_tracked_state brw_cs_work_groups_surface = { + .dirty = { + .brw = BRW_NEW_CS_WORK_GROUPS + }, + .emit = brw_upload_cs_work_groups_surface, +}; |