summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorJordan Justen <[email protected]>2018-03-06 08:35:50 -0800
committerJordan Justen <[email protected]>2018-03-09 16:15:34 -0800
commit06e3bd02c01e499332a9c02b40f506df9695bced (patch)
treefdc73c4709dfc1366a6b9ea40897a5d7245916bf /src
parentdb495b8962909f74e90b9eb0463fb37f37ac5f62 (diff)
i965: Hard code CS scratch_ids_per_subslice for Cherryview
Ken suggested that we might be underallocating scratch space on HD 400. Allocating scratch space as though there was actually 8 EUs seems to help with a GPU hang seen on synmark CSDof. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104636 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105290 Cc: <[email protected]> Signed-off-by: Jordan Justen <[email protected]> Reviewed-by: Kenneth Graunke <[email protected]> Tested-by: Eero Tamminen <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/mesa/drivers/dri/i965/brw_program.c44
1 files changed, 27 insertions, 17 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index 527f003977b..4ba46a3c826 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -402,23 +402,33 @@ brw_alloc_stage_scratch(struct brw_context *brw,
if (devinfo->gen >= 9)
subslices = 4 * brw->screen->devinfo.num_slices;
- /* WaCSScratchSize:hsw
- *
- * Haswell's scratch space address calculation appears to be sparse
- * rather than tightly packed. The Thread ID has bits indicating
- * which subslice, EU within a subslice, and thread within an EU
- * it is. There's a maximum of two slices and two subslices, so these
- * can be stored with a single bit. Even though there are only 10 EUs
- * per subslice, this is stored in 4 bits, so there's an effective
- * maximum value of 16 EUs. Similarly, although there are only 7
- * threads per EU, this is stored in a 3 bit number, giving an effective
- * maximum value of 8 threads per EU.
- *
- * This means that we need to use 16 * 8 instead of 10 * 7 for the
- * number of threads per subslice.
- */
- const unsigned scratch_ids_per_subslice =
- devinfo->is_haswell ? 16 * 8 : devinfo->max_cs_threads;
+ unsigned scratch_ids_per_subslice;
+ if (devinfo->is_haswell) {
+ /* WaCSScratchSize:hsw
+ *
+ * Haswell's scratch space address calculation appears to be sparse
+ * rather than tightly packed. The Thread ID has bits indicating
+ * which subslice, EU within a subslice, and thread within an EU it
+ * is. There's a maximum of two slices and two subslices, so these
+ * can be stored with a single bit. Even though there are only 10 EUs
+ * per subslice, this is stored in 4 bits, so there's an effective
+ * maximum value of 16 EUs. Similarly, although there are only 7
+ * threads per EU, this is stored in a 3 bit number, giving an
+ * effective maximum value of 8 threads per EU.
+ *
+ * This means that we need to use 16 * 8 instead of 10 * 7 for the
+ * number of threads per subslice.
+ */
+ scratch_ids_per_subslice = 16 * 8;
+ } else if (devinfo->is_cherryview) {
+ /* Cherryview devices have either 6 or 8 EUs per subslice, and each
+ * EU has 7 threads. The 6 EU devices appear to calculate thread IDs
+ * as if it had 8 EUs.
+ */
+ scratch_ids_per_subslice = 8 * 7;
+ } else {
+ scratch_ids_per_subslice = devinfo->max_cs_threads;
+ }
thread_count = scratch_ids_per_subslice * subslices;
break;