diff options
-rw-r--r-- | src/intel/compiler/brw_fs.cpp | 54 |
1 files changed, 43 insertions, 11 deletions
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index b7c1f6312b4..c8062de4ffb 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -7117,7 +7117,7 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, prog_data->barycentric_interp_modes = brw_compute_barycentric_interp_modes(compiler->devinfo, shader); - cfg_t *simd8_cfg = NULL, *simd16_cfg = NULL; + cfg_t *simd8_cfg = NULL, *simd16_cfg = NULL, *simd32_cfg = NULL; fs_visitor v8(compiler, log_data, mem_ctx, key, &prog_data->base, prog, shader, 8, @@ -7151,6 +7151,26 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, } } + /* Currently, the compiler only supports SIMD32 on SNB+ */ + if (v8.max_dispatch_width >= 32 && !use_rep_send && + compiler->devinfo->gen >= 6 && + unlikely(INTEL_DEBUG & DEBUG_DO32)) { + /* Try a SIMD32 compile */ + fs_visitor v32(compiler, log_data, mem_ctx, key, + &prog_data->base, prog, shader, 32, + shader_time_index32); + v32.import_uniforms(&v8); + if (!v32.run_fs(allow_spilling, false)) { + compiler->shader_perf_log(log_data, + "SIMD32 shader failed to compile: %s", + v32.fail_msg); + } else { + simd32_cfg = v32.cfg; + prog_data->dispatch_grf_start_reg_32 = v32.payload.num_regs; + prog_data->reg_blocks_32 = brw_register_blocks(v32.grf_used); + } + } + /* When the caller requests a repclear shader, they want SIMD16-only */ if (use_rep_send) simd8_cfg = NULL; @@ -7160,8 +7180,17 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, * Instead, we just give them exactly one shader and we pick the widest one * available. */ - if (compiler->devinfo->gen < 5 && simd16_cfg) - simd8_cfg = NULL; + if (compiler->devinfo->gen < 5) { + if (simd32_cfg || simd16_cfg) + simd8_cfg = NULL; + if (simd32_cfg) + simd16_cfg = NULL; + } + + /* If computed depth is enabled SNB only allows SIMD8. */ + if (compiler->devinfo->gen == 6 && + prog_data->computed_depth_mode != BRW_PSCDEPTH_OFF) + assert(simd16_cfg == NULL && simd32_cfg == NULL); if (compiler->devinfo->gen <= 5 && !simd8_cfg) { /* Iron lake and earlier only have one Dispatch GRF start field. Make @@ -7170,6 +7199,9 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, if (simd16_cfg) { prog_data->base.dispatch_grf_start_reg = prog_data->dispatch_grf_start_reg_16; + } else if (simd32_cfg) { + prog_data->base.dispatch_grf_start_reg = + prog_data->dispatch_grf_start_reg_32; } } @@ -7179,16 +7211,11 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, * through F (SNB PRM Vol. 2 Part 1 Section 7.7.1). On all hardware * generations, the only configurations supporting persample dispatch * are are this in which only one dispatch width is enabled. - * - * If computed depth is enabled, SNB only allows SIMD8 while IVB+ - * allow SIMD8 or SIMD16 so we choose SIMD16 if available. */ - if (compiler->devinfo->gen == 6 && - prog_data->computed_depth_mode != BRW_PSCDEPTH_OFF) { - simd16_cfg = NULL; - } else if (simd16_cfg) { + if (simd32_cfg || simd16_cfg) simd8_cfg = NULL; - } + if (simd32_cfg) + simd16_cfg = NULL; } /* We have to compute the flat inputs after the visitor is finished running @@ -7218,6 +7245,11 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, prog_data->prog_offset_16 = g.generate_code(simd16_cfg, 16); } + if (simd32_cfg) { + prog_data->dispatch_32 = true; + prog_data->prog_offset_32 = g.generate_code(simd32_cfg, 32); + } + return g.get_assembly(); } |