summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h3
-rw-r--r--src/mesa/drivers/dri/i965/brw_cs.c24
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_program.c64
-rw-r--r--src/mesa/drivers/dri/i965/brw_tcs.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_tes.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.c4
8 files changed, 62 insertions, 45 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 0102f154248..3bee3e99ed2 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1345,8 +1345,7 @@ void brw_get_scratch_bo(struct brw_context *brw,
struct brw_bo **scratch_bo, int size);
void brw_alloc_stage_scratch(struct brw_context *brw,
struct brw_stage_state *stage_state,
- unsigned per_thread_size,
- unsigned thread_count);
+ unsigned per_thread_size);
void brw_init_shader_time(struct brw_context *brw);
int brw_get_shader_time_index(struct brw_context *brw,
struct gl_program *prog,
diff --git a/src/mesa/drivers/dri/i965/brw_cs.c b/src/mesa/drivers/dri/i965/brw_cs.c
index 1a0e9f62c63..1d34a8a79d1 100644
--- a/src/mesa/drivers/dri/i965/brw_cs.c
+++ b/src/mesa/drivers/dri/i965/brw_cs.c
@@ -114,29 +114,7 @@ brw_codegen_cs_prog(struct brw_context *brw,
}
}
- const unsigned subslices = MAX2(brw->screen->subslice_total, 1);
-
- /* WaCSScratchSize:hsw
- *
- * Haswell's scratch space address calculation appears to be sparse
- * rather than tightly packed. The Thread ID has bits indicating
- * which subslice, EU within a subslice, and thread within an EU
- * it is. There's a maximum of two slices and two subslices, so these
- * can be stored with a single bit. Even though there are only 10 EUs
- * per subslice, this is stored in 4 bits, so there's an effective
- * maximum value of 16 EUs. Similarly, although there are only 7
- * threads per EU, this is stored in a 3 bit number, giving an effective
- * maximum value of 8 threads per EU.
- *
- * This means that we need to use 16 * 8 instead of 10 * 7 for the
- * number of threads per subslice.
- */
- const unsigned scratch_ids_per_subslice =
- devinfo->is_haswell ? 16 * 8 : devinfo->max_cs_threads;
-
- brw_alloc_stage_scratch(brw, &brw->cs.base,
- prog_data.base.total_scratch,
- scratch_ids_per_subslice * subslices);
+ brw_alloc_stage_scratch(brw, &brw->cs.base, prog_data.base.total_scratch);
/* The param and pull_param arrays will be freed by the shader cache. */
ralloc_steal(NULL, prog_data.base.param);
diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c
index e6e757ce686..19eab2f5332 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -138,8 +138,7 @@ brw_codegen_gs_prog(struct brw_context *brw,
/* Scratch space is used for register spilling */
brw_alloc_stage_scratch(brw, stage_state,
- prog_data.base.base.total_scratch,
- devinfo->max_gs_threads);
+ prog_data.base.base.total_scratch);
/* The param and pull_param arrays will be freed by the shader cache. */
ralloc_steal(NULL, prog_data.base.base.param);
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index 02d095b06f6..7607bc38840 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -328,19 +328,65 @@ brw_get_scratch_bo(struct brw_context *brw,
void
brw_alloc_stage_scratch(struct brw_context *brw,
struct brw_stage_state *stage_state,
- unsigned per_thread_size,
- unsigned thread_count)
+ unsigned per_thread_size)
{
- if (stage_state->per_thread_scratch < per_thread_size) {
- stage_state->per_thread_scratch = per_thread_size;
+ if (stage_state->per_thread_scratch >= per_thread_size)
+ return;
+
+ stage_state->per_thread_scratch = per_thread_size;
- if (stage_state->scratch_bo)
- brw_bo_unreference(stage_state->scratch_bo);
+ if (stage_state->scratch_bo)
+ brw_bo_unreference(stage_state->scratch_bo);
+
+ const struct gen_device_info *devinfo = &brw->screen->devinfo;
+ unsigned thread_count;
+ switch(stage_state->stage) {
+ case MESA_SHADER_VERTEX:
+ thread_count = devinfo->max_vs_threads;
+ break;
+ case MESA_SHADER_TESS_CTRL:
+ thread_count = devinfo->max_tcs_threads;
+ break;
+ case MESA_SHADER_TESS_EVAL:
+ thread_count = devinfo->max_tes_threads;
+ break;
+ case MESA_SHADER_GEOMETRY:
+ thread_count = devinfo->max_gs_threads;
+ break;
+ case MESA_SHADER_FRAGMENT:
+ thread_count = devinfo->max_wm_threads;
+ break;
+ case MESA_SHADER_COMPUTE: {
+ const unsigned subslices = MAX2(brw->screen->subslice_total, 1);
+
+ /* WaCSScratchSize:hsw
+ *
+ * Haswell's scratch space address calculation appears to be sparse
+ * rather than tightly packed. The Thread ID has bits indicating
+ * which subslice, EU within a subslice, and thread within an EU
+ * it is. There's a maximum of two slices and two subslices, so these
+ * can be stored with a single bit. Even though there are only 10 EUs
+ * per subslice, this is stored in 4 bits, so there's an effective
+ * maximum value of 16 EUs. Similarly, although there are only 7
+ * threads per EU, this is stored in a 3 bit number, giving an effective
+ * maximum value of 8 threads per EU.
+ *
+ * This means that we need to use 16 * 8 instead of 10 * 7 for the
+ * number of threads per subslice.
+ */
+ const unsigned scratch_ids_per_subslice =
+ devinfo->is_haswell ? 16 * 8 : devinfo->max_cs_threads;
- stage_state->scratch_bo =
- brw_bo_alloc(brw->bufmgr, "shader scratch space",
- per_thread_size * thread_count, 4096);
+ thread_count = scratch_ids_per_subslice * subslices;
+ break;
}
+ default:
+ unreachable("Unsupported stage!");
+ }
+
+ stage_state->scratch_bo =
+ brw_bo_alloc(brw->bufmgr, "shader scratch space",
+ per_thread_size * thread_count, 4096);
}
void brwInitFragProgFuncs( struct dd_function_table *functions )
diff --git a/src/mesa/drivers/dri/i965/brw_tcs.c b/src/mesa/drivers/dri/i965/brw_tcs.c
index 5ac728ee68f..685336eb973 100644
--- a/src/mesa/drivers/dri/i965/brw_tcs.c
+++ b/src/mesa/drivers/dri/i965/brw_tcs.c
@@ -259,8 +259,7 @@ brw_codegen_tcs_prog(struct brw_context *brw, struct brw_program *tcp,
/* Scratch space is used for register spilling */
brw_alloc_stage_scratch(brw, stage_state,
- prog_data.base.base.total_scratch,
- devinfo->max_tcs_threads);
+ prog_data.base.base.total_scratch);
/* The param and pull_param arrays will be freed by the shader cache. */
ralloc_steal(NULL, prog_data.base.base.param);
diff --git a/src/mesa/drivers/dri/i965/brw_tes.c b/src/mesa/drivers/dri/i965/brw_tes.c
index 21f44ae74af..225d2401c7e 100644
--- a/src/mesa/drivers/dri/i965/brw_tes.c
+++ b/src/mesa/drivers/dri/i965/brw_tes.c
@@ -129,8 +129,7 @@ brw_codegen_tes_prog(struct brw_context *brw,
/* Scratch space is used for register spilling */
brw_alloc_stage_scratch(brw, stage_state,
- prog_data.base.base.total_scratch,
- devinfo->max_tes_threads);
+ prog_data.base.base.total_scratch);
/* The param and pull_param arrays will be freed by the shader cache. */
ralloc_steal(NULL, prog_data.base.base.param);
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index d308bb83332..de0ba79f357 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -248,8 +248,7 @@ brw_codegen_vs_prog(struct brw_context *brw,
/* Scratch space is used for register spilling */
brw_alloc_stage_scratch(brw, &brw->vs.base,
- prog_data.base.base.total_scratch,
- devinfo->max_vs_threads);
+ prog_data.base.base.total_scratch);
/* The param and pull_param arrays will be freed by the shader cache. */
ralloc_steal(NULL, prog_data.base.base.param);
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index 4144cd11ee4..34a3a1e5c16 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -209,9 +209,7 @@ brw_codegen_wm_prog(struct brw_context *brw,
}
}
- brw_alloc_stage_scratch(brw, &brw->wm.base,
- prog_data.base.total_scratch,
- devinfo->max_wm_threads);
+ brw_alloc_stage_scratch(brw, &brw->wm.base, prog_data.base.total_scratch);
if (unlikely((INTEL_DEBUG & DEBUG_WM) && fp->program.is_arb_asm))
fprintf(stderr, "\n");