diff options
Diffstat (limited to 'src/mesa')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_compiler.h | 49 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 8 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4.cpp | 8 |
3 files changed, 65 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h index 84d3dde8c99..445c166082e 100644 --- a/src/mesa/drivers/dri/i965/brw_compiler.h +++ b/src/mesa/drivers/dri/i965/brw_compiler.h @@ -868,6 +868,55 @@ encode_slm_size(unsigned gen, uint32_t bytes) return slm_size; } +/** + * Return true if the given shader stage is dispatched contiguously by the + * relevant fixed function starting from channel 0 of the SIMD thread, which + * implies that the dispatch mask of a thread can be assumed to have the form + * '2^n - 1' for some n. + */ +static inline bool +brw_stage_has_packed_dispatch(const struct gen_device_info *devinfo, + gl_shader_stage stage, + const struct brw_stage_prog_data *prog_data) +{ + /* The code below makes assumptions about the hardware's thread dispatch + * behavior that could be proven wrong in future generations -- Make sure + * to do a full test run with brw_fs_test_dispatch_packing() hooked up to + * the NIR front-end before changing this assertion. + */ + assert(devinfo->gen <= 9); + + switch (stage) { + case MESA_SHADER_FRAGMENT: { + /* The PSD discards subspans coming in with no lit samples, which in the + * per-pixel shading case implies that each subspan will either be fully + * lit (due to the VMask being used to allow derivative computations), + * or not dispatched at all. In per-sample dispatch mode individual + * samples from the same subspan have a fixed relative location within + * the SIMD thread, so dispatch of unlit samples cannot be avoided in + * general and we should return false. + */ + const struct brw_wm_prog_data *wm_prog_data = + (const struct brw_wm_prog_data *)prog_data; + return !wm_prog_data->persample_dispatch; + } + case MESA_SHADER_COMPUTE: + /* Compute shaders will be spawned with either a fully enabled dispatch + * mask or with whatever bottom/right execution mask was given to the + * GPGPU walker command to be used along the workgroup edges -- In both + * cases the dispatch mask is required to be tightly packed for our + * invocation index calculations to work. + */ + return true; + default: + /* Most remaining fixed functions are limited to use a packed dispatch + * mask due to the hardware representation of the dispatch mask as a + * single counter representing the number of enabled channels. + */ + return true; + } +} + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 5c44007ba88..b60ec71888a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -2835,6 +2835,14 @@ fs_visitor::eliminate_find_live_channel() bool progress = false; unsigned depth = 0; + if (!brw_stage_has_packed_dispatch(devinfo, stage, stage_prog_data)) { + /* The optimization below assumes that channel zero is live on thread + * dispatch, which may not be the case if the fixed function dispatches + * threads sparsely. + */ + return false; + } + foreach_block_and_inst_safe(block, fs_inst, inst, cfg) { switch (inst->opcode) { case BRW_OPCODE_IF: diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 58c8a8a5bdb..6aa91028fb7 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1291,6 +1291,14 @@ vec4_visitor::eliminate_find_live_channel() bool progress = false; unsigned depth = 0; + if (!brw_stage_has_packed_dispatch(devinfo, stage, stage_prog_data)) { + /* The optimization below assumes that channel zero is live on thread + * dispatch, which may not be the case if the fixed function dispatches + * threads sparsely. + */ + return false; + } + foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) { switch (inst->opcode) { case BRW_OPCODE_IF: |