diff options
author | Jason Ekstrand <[email protected]> | 2016-04-28 15:37:39 -0700 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2016-05-14 13:34:25 -0700 |
commit | bee160b31be9e09eeab83f62d26ac331f08955fa (patch) | |
tree | e0446c57d900f30d17419758c3ea3b37c24ded4a /src/mesa/drivers/dri/i965/brw_fs.cpp | |
parent | 7be100ac9af52b1ab5e2c34b45aba0d66304d55a (diff) |
i965/fs: Organize prog_data by ksp number rather than SIMD width
The hardware packets organize kernel pointers and GRF start by slots that
don't map directly to dispatch width. This means that all of the state
setup code has to re-arrange the data from prog_data into these slots.
This logic has been duplicated 4 times in the GL driver and one more time
in the Vulkan driver. Let's just put it all in brw_fs.cpp.
Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_fs.cpp')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 52 |
1 files changed, 39 insertions, 13 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index f66ba473411..1e84b101a8e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -5800,11 +5800,6 @@ fs_visitor::run_fs(bool do_rep_send) return false; } - if (dispatch_width == 8) - wm_prog_data->reg_blocks = brw_register_blocks(grf_used); - else - wm_prog_data->reg_blocks_16 = brw_register_blocks(grf_used); - return !failed; } @@ -6004,6 +5999,8 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, shader); cfg_t *simd8_cfg = NULL, *simd16_cfg = NULL; + uint8_t simd8_grf_start, simd16_grf_start; + unsigned simd8_grf_used, simd16_grf_used; fs_visitor v8(compiler, log_data, mem_ctx, key, &prog_data->base, prog, shader, 8, @@ -6015,7 +6012,8 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, return NULL; } else if (likely(!(INTEL_DEBUG & DEBUG_NO8))) { simd8_cfg = v8.cfg; - prog_data->base.dispatch_grf_start_reg = v8.payload.num_regs; + simd8_grf_start = v8.payload.num_regs; + simd8_grf_used = v8.grf_used; } if (!v8.simd16_unsupported && @@ -6031,7 +6029,8 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, v16.fail_msg); } else { simd16_cfg = v16.cfg; - prog_data->dispatch_grf_start_reg_16 = v16.payload.num_regs; + simd16_grf_start = v16.payload.num_regs; + simd16_grf_used = v16.grf_used; } } @@ -6047,6 +6046,24 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, if (compiler->devinfo->gen < 5 && simd16_cfg) simd8_cfg = NULL; + if (prog_data->persample_dispatch) { + /* Starting with SandyBridge (where we first get MSAA), the different + * pixel dispatch combinations are grouped into classifications A + * through F (SNB PRM Vol. 2 Part 1 Section 7.7.1). On all hardware + * generations, the only configurations supporting persample dispatch + * are are this in which only one dispatch width is enabled. + * + * If computed depth is enabled, SNB only allows SIMD8 while IVB+ + * allow SIMD8 or SIMD16 so we choose SIMD16 if available. + */ + if (compiler->devinfo->gen == 6 && + prog_data->computed_depth_mode != BRW_PSCDEPTH_OFF) { + simd16_cfg = NULL; + } else if (simd16_cfg) { + simd8_cfg = NULL; + } + } + /* We have to compute the flat inputs after the visitor is finished running * because it relies on prog_data->urb_setup which is computed in * fs_visitor::calculate_urb_setup(). @@ -6065,15 +6082,24 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, } if (simd8_cfg) { + prog_data->dispatch_8 = true; g.generate_code(simd8_cfg, 8); - prog_data->no_8 = false; - } else { - prog_data->no_8 = true; + prog_data->base.dispatch_grf_start_reg = simd8_grf_start; + prog_data->reg_blocks_0 = brw_register_blocks(simd8_grf_used); + + if (simd16_cfg) { + prog_data->dispatch_16 = true; + prog_data->prog_offset_2 = g.generate_code(simd16_cfg, 16); + prog_data->dispatch_grf_start_reg_2 = simd16_grf_start; + prog_data->reg_blocks_2 = brw_register_blocks(simd16_grf_used); + } + } else if (simd16_cfg) { + prog_data->dispatch_16 = true; + g.generate_code(simd16_cfg, 16); + prog_data->base.dispatch_grf_start_reg = simd16_grf_start; + prog_data->reg_blocks_0 = brw_register_blocks(simd16_grf_used); } - if (simd16_cfg) - prog_data->prog_offset_16 = g.generate_code(simd16_cfg, 16); - return g.get_assembly(final_assembly_size); } |