summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2013-11-01 17:43:43 -0700
committerEric Anholt <[email protected]>2013-11-05 15:39:45 -0800
commitff337bc80069c74c6ad5d4ce84cd2029282d9e93 (patch)
tree81874fe9931bd534dfcfe3548eaa3adfc3140ab2
parent3f319eef76a31776085accb38c06851bc04f64b8 (diff)
i965: Tell the unit states how many binding table entries we have.
Before the series with 3c9dc2d31b80fc73bffa1f40a91443a53229c8e2 to dynamically assign our binding table indices, we didn't really track our binding table count per shader, so we never filled in these fields. Affects cairo-gl trace runtime by -2.47953% +/- 1.07281% (n=20) Reviewed-by: Kenneth Graunke <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_state.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_state.c3
-rw-r--r--src/mesa/drivers/dri/i965/gen6_vs_state.c4
-rw-r--r--src/mesa/drivers/dri/i965/gen6_wm_state.c5
-rw-r--r--src/mesa/drivers/dri/i965/gen7_gs_state.c4
-rw-r--r--src/mesa/drivers/dri/i965/gen7_vs_state.c4
-rw-r--r--src/mesa/drivers/dri/i965/gen7_wm_state.c4
7 files changed, 22 insertions, 5 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c
index cdffac3b385..216b3dd77af 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_state.c
@@ -79,7 +79,8 @@ brw_upload_vs_unit(struct brw_context *brw)
*/
vs->thread1.single_program_flow = (brw->gen == 5);
- vs->thread1.binding_table_entry_count = 0;
+ vs->thread1.binding_table_entry_count =
+ brw->vs.prog_data->base.base.binding_table.size_bytes / 4;
if (brw->vs.prog_data->base.total_scratch != 0) {
vs->thread2.scratch_space_base_pointer =
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
index 9aa32c08ec9..406dbbe3338 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -120,7 +120,8 @@ brw_upload_wm_unit(struct brw_context *brw)
else
wm->thread1.floating_point_mode = BRW_FLOATING_POINT_IEEE_754;
- wm->thread1.binding_table_entry_count = 0;
+ wm->thread1.binding_table_entry_count =
+ brw->wm.prog_data->base.binding_table.size_bytes / 4;
if (brw->wm.prog_data->total_scratch != 0) {
wm->thread2.scratch_space_base_pointer =
diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
index 569ec8cdc89..80129cdf168 100644
--- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -165,7 +165,9 @@ upload_vs_state(struct brw_context *brw)
OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
OUT_BATCH(stage_state->prog_offset);
OUT_BATCH(floating_point_mode |
- ((ALIGN(stage_state->sampler_count, 4)/4) << GEN6_VS_SAMPLER_COUNT_SHIFT));
+ ((ALIGN(stage_state->sampler_count, 4)/4) << GEN6_VS_SAMPLER_COUNT_SHIFT) |
+ ((brw->vs.prog_data->base.base.binding_table.size_bytes / 4) <<
+ GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
if (brw->vs.prog_data->base.total_scratch) {
OUT_RELOC(stage_state->scratch_bo,
diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index 42d8789ed67..57732465114 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -146,6 +146,11 @@ upload_wm_state(struct brw_context *brw)
/* CACHE_NEW_SAMPLER */
dw2 |= (ALIGN(brw->wm.base.sampler_count, 4) / 4) <<
GEN6_WM_SAMPLER_COUNT_SHIFT;
+
+ /* CACHE_NEW_WM_PROG */
+ dw2 |= ((brw->wm.prog_data->base.binding_table.size_bytes / 4) <<
+ GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT);
+
dw4 |= (brw->wm.prog_data->first_curbe_grf <<
GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
dw4 |= (brw->wm.prog_data->first_curbe_grf_16 <<
diff --git a/src/mesa/drivers/dri/i965/gen7_gs_state.c b/src/mesa/drivers/dri/i965/gen7_gs_state.c
index 2602200eb68..584f2db8f8e 100644
--- a/src/mesa/drivers/dri/i965/gen7_gs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_gs_state.c
@@ -85,7 +85,9 @@ upload_gs_state(struct brw_context *brw)
OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
OUT_BATCH(stage_state->prog_offset);
OUT_BATCH(((ALIGN(stage_state->sampler_count, 4)/4) <<
- GEN6_GS_SAMPLER_COUNT_SHIFT));
+ GEN6_GS_SAMPLER_COUNT_SHIFT) |
+ ((brw->gs.prog_data->base.base.binding_table.size_bytes / 4) <<
+ GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
if (brw->gs.prog_data->base.total_scratch) {
OUT_RELOC(stage_state->scratch_bo,
diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c
index 4fd19139e8b..1e76eb1ee2e 100644
--- a/src/mesa/drivers/dri/i965/gen7_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c
@@ -100,7 +100,9 @@ upload_vs_state(struct brw_context *brw)
OUT_BATCH(stage_state->prog_offset);
OUT_BATCH(floating_point_mode |
((ALIGN(stage_state->sampler_count, 4)/4) <<
- GEN6_VS_SAMPLER_COUNT_SHIFT));
+ GEN6_VS_SAMPLER_COUNT_SHIFT) |
+ ((brw->vs.prog_data->base.base.binding_table.size_bytes / 4) <<
+ GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
if (brw->vs.prog_data->base.total_scratch) {
OUT_RELOC(stage_state->scratch_bo,
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c
index 58a6438e79e..531b1a4c9cd 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
@@ -160,6 +160,10 @@ upload_ps_state(struct brw_context *brw)
dw2 |=
(ALIGN(brw->wm.base.sampler_count, 4) / 4) << GEN7_PS_SAMPLER_COUNT_SHIFT;
+ /* CACHE_NEW_WM_PROG */
+ dw2 |= ((brw->wm.prog_data->base.binding_table.size_bytes / 4) <<
+ GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT);
+
/* Use ALT floating point mode for ARB fragment programs, because they
* require 0^0 == 1. Even though _CurrentFragmentProgram is used for
* rendering, CurrentFragmentProgram is used for this check to