diff options
author | Kenneth Graunke <[email protected]> | 2016-06-09 16:56:31 -0700 |
---|---|---|
committer | Kenneth Graunke <[email protected]> | 2016-06-12 00:40:14 -0700 |
commit | 147a90d82a5de637f968e0d5f383cabcb792f1ce (patch) | |
tree | 274f226d15f40f96c413b767e79f40d532096f41 /src/mesa/drivers | |
parent | a7d029d3dfac1da2701be75ff4d1589ac562e916 (diff) |
i965: Fix Haswell CS per-thread scratch space encoding.
Most scratch stages use power of two sizes, in kilobytes, where
0 means 1kB. But compute shaders on Haswell have a minimum of 2kB,
and use a representation where 0 = 2kB.
This meant that we were effectively telling the hardware to allocate
each thread twice as much space as we meant to, while simultaneously
not allocating that much space in the buffer, leading to overflows.
Note that the existing code is completely wrong for Ivybridge,
but that will take additional work to sort out, so I've left it
as is for now. A subsequent commit will take care of that.
Together with the previous patches, this fixes rendering corruption
on Synmark's Gl43CSDof on Haswell.
Cc: "12.0" <[email protected]>
Signed-off-by: Kenneth Graunke <[email protected]>
Reviewed-by: Francisco Jerez <[email protected]>
Reviewed-by: Jordan Justen <[email protected]>
Diffstat (limited to 'src/mesa/drivers')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 12 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen7_cs_state.c | 16 |
2 files changed, 25 insertions, 3 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 4b29ee52883..8c0ec4ed27f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -5985,8 +5985,18 @@ fs_visitor::allocate_registers(bool allow_spilling) schedule_instructions(SCHEDULE_POST); - if (last_scratch > 0) + if (last_scratch > 0) { prog_data->total_scratch = brw_get_scratch_size(last_scratch); + + if (devinfo->is_haswell && stage == MESA_SHADER_COMPUTE) { + /* According to the MEDIA_VFE_STATE's "Per Thread Scratch Space" + * field documentation, Haswell supports a minimum of 2kB of + * scratch space for compute shaders, unlike every other stage + * and platform. + */ + prog_data->total_scratch = MAX2(prog_data->total_scratch, 2048); + } + } } bool diff --git a/src/mesa/drivers/dri/i965/gen7_cs_state.c b/src/mesa/drivers/dri/i965/gen7_cs_state.c index a71a5957191..42cd61fefef 100644 --- a/src/mesa/drivers/dri/i965/gen7_cs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_cs_state.c @@ -64,14 +64,26 @@ brw_upload_cs_state(struct brw_context *brw) OUT_BATCH(MEDIA_VFE_STATE << 16 | (dwords - 2)); if (prog_data->total_scratch) { - if (brw->gen >= 8) + if (brw->gen >= 8) { + /* Broadwell's Per Thread Scratch Space is in the range [0, 11] + * where 0 = 1k, 1 = 4k, 2 = 8k, ..., 11 = 2M. + */ OUT_RELOC64(stage_state->scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, ffs(prog_data->total_scratch) - 11); - else + } else if (brw->is_haswell) { + /* Haswell's Per Thread Scratch Space is in the range [0, 10] + * where 0 = 2k, 1 = 4k, 2 = 8k, ..., 10 = 2M. + */ + OUT_RELOC(stage_state->scratch_bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + ffs(prog_data->total_scratch) - 12); + } else { + /* This is wrong but we'll fix it later */ OUT_RELOC(stage_state->scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, ffs(prog_data->total_scratch) - 11); + } } else { OUT_BATCH(0); if (brw->gen >= 8) |