aboutsummaryrefslogtreecommitdiffstats
path: root/src/intel/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'src/intel/compiler')
-rw-r--r--src/intel/compiler/brw_fs.cpp24
1 files changed, 17 insertions, 7 deletions
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 010519ae565..c65bb204242 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -8646,8 +8646,7 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
char **error_str)
{
const struct gen_device_info *devinfo = compiler->devinfo;
-
- unsigned max_subgroup_size = unlikely(INTEL_DEBUG & DEBUG_DO32) ? 32 : 16;
+ const unsigned max_subgroup_size = compiler->devinfo->gen >= 6 ? 32 : 16;
brw_nir_apply_key(shader, compiler, &key->base, max_subgroup_size, true);
brw_nir_lower_fs_inputs(shader, devinfo, key);
@@ -8707,6 +8706,7 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
fs_visitor *v8 = NULL, *v16 = NULL, *v32 = NULL;
cfg_t *simd8_cfg = NULL, *simd16_cfg = NULL, *simd32_cfg = NULL;
+ float throughput = 0;
v8 = new fs_visitor(compiler, log_data, mem_ctx, &key->base,
&prog_data->base, shader, 8, shader_time_index8);
@@ -8720,6 +8720,8 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
simd8_cfg = v8->cfg;
prog_data->base.dispatch_grf_start_reg = v8->payload.num_regs;
prog_data->reg_blocks_8 = brw_register_blocks(v8->grf_used);
+ const performance &perf = v8->performance_analysis.require();
+ throughput = MAX2(throughput, perf.throughput);
}
/* Limit dispatch width to simd8 with dual source blending on gen8.
@@ -8746,13 +8748,14 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
simd16_cfg = v16->cfg;
prog_data->dispatch_grf_start_reg_16 = v16->payload.num_regs;
prog_data->reg_blocks_16 = brw_register_blocks(v16->grf_used);
+ const performance &perf = v16->performance_analysis.require();
+ throughput = MAX2(throughput, perf.throughput);
}
}
/* Currently, the compiler only supports SIMD32 on SNB+ */
if (v8->max_dispatch_width >= 32 && !use_rep_send &&
- compiler->devinfo->gen >= 6 &&
- unlikely(INTEL_DEBUG & DEBUG_DO32)) {
+ devinfo->gen >= 6 && simd16_cfg) {
/* Try a SIMD32 compile */
v32 = new fs_visitor(compiler, log_data, mem_ctx, &key->base,
&prog_data->base, shader, 32, shader_time_index32);
@@ -8762,9 +8765,16 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
"SIMD32 shader failed to compile: %s",
v32->fail_msg);
} else {
- simd32_cfg = v32->cfg;
- prog_data->dispatch_grf_start_reg_32 = v32->payload.num_regs;
- prog_data->reg_blocks_32 = brw_register_blocks(v32->grf_used);
+ const performance &perf = v32->performance_analysis.require();
+
+ if (!(INTEL_DEBUG & DEBUG_DO32) && throughput >= perf.throughput) {
+ compiler->shader_perf_log(log_data, "SIMD32 shader inefficient\n");
+ } else {
+ simd32_cfg = v32->cfg;
+ prog_data->dispatch_grf_start_reg_32 = v32->payload.num_regs;
+ prog_data->reg_blocks_32 = brw_register_blocks(v32->grf_used);
+ throughput = MAX2(throughput, perf.throughput);
+ }
}
}