summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFrancisco Jerez <[email protected]>2016-06-13 14:56:22 -0700
committerEmil Velikov <[email protected]>2016-06-15 09:29:12 +0100
commit2cf78b485137a74ed3a7184f48928f3990883928 (patch)
tree54ad3a3195eb1e1e2411d8f34ab94c0abce6bdc4
parentb9f69df93dc6680b2f25477d26f462aec2a76241 (diff)
i965: Keep track of the per-thread scratch allocation in brw_stage_state.
This will be used to find out what per-thread slot size a previously allocated scratch BO was used with in order to fix a hardware race condition without introducing additional stalls or memory allocations. Instead of calling brw_get_scratch_bo() manually from the various codegen functions, call a new helper function that keeps track of the per-thread scratch size and conditionally allocates a larger scratch BO. v2: Handle BO allocation manually instead of relying on brw_get_scratch_bo (Ken). Cc: <[email protected]> Reviewed-by: Kenneth Graunke <[email protected]> (cherry picked from commit d960284e447df9b1563deef0ce950617decfba63)
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h10
-rw-r--r--src/mesa/drivers/dri/i965/brw_cs.c48
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs.c8
-rw-r--r--src/mesa/drivers/dri/i965/brw_program.c22
-rw-r--r--src/mesa/drivers/dri/i965/brw_tcs.c8
-rw-r--r--src/mesa/drivers/dri/i965/brw_tes.c8
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.c8
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.c7
8 files changed, 70 insertions, 49 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index daa9ed2c7e1..9618b4a8182 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -677,6 +677,12 @@ struct brw_stage_state
*/
drm_intel_bo *scratch_bo;
+ /**
+ * Scratch slot size allocated for each thread in the buffer object given
+ * by \c scratch_bo.
+ */
+ uint32_t per_thread_scratch;
+
/** Offset in the program cache to the program */
uint32_t prog_offset;
@@ -1481,6 +1487,10 @@ brw_get_scratch_size(int size)
}
void brw_get_scratch_bo(struct brw_context *brw,
drm_intel_bo **scratch_bo, int size);
+void brw_alloc_stage_scratch(struct brw_context *brw,
+ struct brw_stage_state *stage_state,
+ unsigned per_thread_size,
+ unsigned thread_count);
void brw_init_shader_time(struct brw_context *brw);
int brw_get_shader_time_index(struct brw_context *brw,
struct gl_shader_program *shader_prog,
diff --git a/src/mesa/drivers/dri/i965/brw_cs.c b/src/mesa/drivers/dri/i965/brw_cs.c
index 22856b64179..45128bc439a 100644
--- a/src/mesa/drivers/dri/i965/brw_cs.c
+++ b/src/mesa/drivers/dri/i965/brw_cs.c
@@ -148,31 +148,29 @@ brw_codegen_cs_prog(struct brw_context *brw,
}
}
- if (prog_data.base.total_scratch) {
- const unsigned subslices = MAX2(brw->intelScreen->subslice_total, 1);
-
- /* WaCSScratchSize:hsw
- *
- * Haswell's scratch space address calculation appears to be sparse
- * rather than tightly packed. The Thread ID has bits indicating
- * which subslice, EU within a subslice, and thread within an EU
- * it is. There's a maximum of two slices and two subslices, so these
- * can be stored with a single bit. Even though there are only 10 EUs
- * per subslice, this is stored in 4 bits, so there's an effective
- * maximum value of 16 EUs. Similarly, although there are only 7
- * threads per EU, this is stored in a 3 bit number, giving an effective
- * maximum value of 8 threads per EU.
- *
- * This means that we need to use 16 * 8 instead of 10 * 7 for the
- * number of threads per subslice.
- */
- const unsigned scratch_ids_per_subslice =
- brw->is_haswell ? 16 * 8 : brw->max_cs_threads;
-
- brw_get_scratch_bo(brw, &brw->cs.base.scratch_bo,
- prog_data.base.total_scratch *
- scratch_ids_per_subslice * subslices);
- }
+ const unsigned subslices = MAX2(brw->intelScreen->subslice_total, 1);
+
+ /* WaCSScratchSize:hsw
+ *
+ * Haswell's scratch space address calculation appears to be sparse
+ * rather than tightly packed. The Thread ID has bits indicating
+ * which subslice, EU within a subslice, and thread within an EU
+ * it is. There's a maximum of two slices and two subslices, so these
+ * can be stored with a single bit. Even though there are only 10 EUs
+ * per subslice, this is stored in 4 bits, so there's an effective
+ * maximum value of 16 EUs. Similarly, although there are only 7
+ * threads per EU, this is stored in a 3 bit number, giving an effective
+ * maximum value of 8 threads per EU.
+ *
+ * This means that we need to use 16 * 8 instead of 10 * 7 for the
+ * number of threads per subslice.
+ */
+ const unsigned scratch_ids_per_subslice =
+ brw->is_haswell ? 16 * 8 : brw->max_cs_threads;
+
+ brw_alloc_stage_scratch(brw, &brw->cs.base,
+ prog_data.base.total_scratch,
+ scratch_ids_per_subslice * subslices);
if (unlikely(INTEL_DEBUG & DEBUG_CS))
fprintf(stderr, "\n");
diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c
index 7ead182da6c..4ac1009447c 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -180,11 +180,9 @@ brw_codegen_gs_prog(struct brw_context *brw,
}
/* Scratch space is used for register spilling */
- if (prog_data.base.base.total_scratch) {
- brw_get_scratch_bo(brw, &stage_state->scratch_bo,
- prog_data.base.base.total_scratch *
- brw->max_gs_threads);
- }
+ brw_alloc_stage_scratch(brw, stage_state,
+ prog_data.base.base.total_scratch,
+ brw->max_gs_threads);
brw_upload_cache(&brw->cache, BRW_CACHE_GS_PROG,
key, sizeof(*key),
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index 792f81b80aa..a1a81165ab1 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -345,6 +345,28 @@ brw_get_scratch_bo(struct brw_context *brw,
}
}
+/**
+ * Reserve enough scratch space for the given stage to hold \p per_thread_size
+ * bytes times the given \p thread_count.
+ */
+void
+brw_alloc_stage_scratch(struct brw_context *brw,
+ struct brw_stage_state *stage_state,
+ unsigned per_thread_size,
+ unsigned thread_count)
+{
+ if (stage_state->per_thread_scratch < per_thread_size) {
+ stage_state->per_thread_scratch = per_thread_size;
+
+ if (stage_state->scratch_bo)
+ drm_intel_bo_unreference(stage_state->scratch_bo);
+
+ stage_state->scratch_bo =
+ drm_intel_bo_alloc(brw->bufmgr, "shader scratch space",
+ per_thread_size * thread_count, 4096);
+ }
+}
+
void brwInitFragProgFuncs( struct dd_function_table *functions )
{
assert(functions->ProgramStringNotify == _tnl_program_string);
diff --git a/src/mesa/drivers/dri/i965/brw_tcs.c b/src/mesa/drivers/dri/i965/brw_tcs.c
index 83fc15754e1..7fc8eb40726 100644
--- a/src/mesa/drivers/dri/i965/brw_tcs.c
+++ b/src/mesa/drivers/dri/i965/brw_tcs.c
@@ -294,11 +294,9 @@ brw_codegen_tcs_prog(struct brw_context *brw,
}
/* Scratch space is used for register spilling */
- if (prog_data.base.base.total_scratch) {
- brw_get_scratch_bo(brw, &stage_state->scratch_bo,
- prog_data.base.base.total_scratch *
- brw->max_hs_threads);
- }
+ brw_alloc_stage_scratch(brw, stage_state,
+ prog_data.base.base.total_scratch,
+ brw->max_hs_threads);
brw_upload_cache(&brw->cache, BRW_CACHE_TCS_PROG,
key, sizeof(*key),
diff --git a/src/mesa/drivers/dri/i965/brw_tes.c b/src/mesa/drivers/dri/i965/brw_tes.c
index a4cd4daadde..d7b3e69a269 100644
--- a/src/mesa/drivers/dri/i965/brw_tes.c
+++ b/src/mesa/drivers/dri/i965/brw_tes.c
@@ -214,11 +214,9 @@ brw_codegen_tes_prog(struct brw_context *brw,
}
/* Scratch space is used for register spilling */
- if (prog_data.base.base.total_scratch) {
- brw_get_scratch_bo(brw, &stage_state->scratch_bo,
- prog_data.base.base.total_scratch *
- brw->max_ds_threads);
- }
+ brw_alloc_stage_scratch(brw, stage_state,
+ prog_data.base.base.total_scratch,
+ brw->max_ds_threads);
brw_upload_cache(&brw->cache, BRW_CACHE_TES_PROG,
key, sizeof(*key),
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index abf03b1fb7a..d929f9b403b 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -208,11 +208,9 @@ brw_codegen_vs_prog(struct brw_context *brw,
}
/* Scratch space is used for register spilling */
- if (prog_data.base.base.total_scratch) {
- brw_get_scratch_bo(brw, &brw->vs.base.scratch_bo,
- prog_data.base.base.total_scratch *
- brw->max_vs_threads);
- }
+ brw_alloc_stage_scratch(brw, &brw->vs.base,
+ prog_data.base.base.total_scratch,
+ brw->max_vs_threads);
brw_upload_cache(&brw->cache, BRW_CACHE_VS_PROG,
key, sizeof(struct brw_vs_prog_key),
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index c9c5d5e443e..46839bc526e 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -163,10 +163,9 @@ brw_codegen_wm_prog(struct brw_context *brw,
}
}
- if (prog_data.base.total_scratch) {
- brw_get_scratch_bo(brw, &brw->wm.base.scratch_bo,
- prog_data.base.total_scratch * brw->max_wm_threads);
- }
+ brw_alloc_stage_scratch(brw, &brw->wm.base,
+ prog_data.base.total_scratch,
+ brw->max_wm_threads);
if (unlikely(INTEL_DEBUG & DEBUG_WM))
fprintf(stderr, "\n");