summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2012-02-15 14:15:14 -0800
committerEric Anholt <[email protected]>2012-02-21 11:54:14 -0800
commitf9c3ea32cd9b243050ee16f10d6eb9d9c8b3a8ea (patch)
treebc8d85b12d70377ce22fa3444e22e2cffec64120 /src/mesa/drivers
parent07e00b3040d6da381595c65db5afe597f20d99fc (diff)
i965: Split the gen6 GS binding table to a separate table.
Improves VS state change microbenchmark performance by 7.08729% +/- 1.22289% (n=10) on gen7, because we don't upload the 64 dwords of unused binding table any more. Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src/mesa/drivers')
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h23
-rw-r--r--src/mesa/drivers/dri/i965/brw_misc_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_state.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_upload.c1
-rw-r--r--src/mesa/drivers/dri/i965/gen6_sol.c58
5 files changed, 75 insertions, 10 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 44a01e69ba5..9c89617e66d 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -484,11 +484,6 @@ struct brw_vs_ouput_sizes {
* | . | . |
* | : | : |
* | 24 | Texture 15 |
- * +-----|-------------------------+
- * | 25 | SOL Binding 0 |
- * | . | . |
- * | : | : |
- * | 88 | SOL Binding 63 |
* +-------------------------------+
*
* Our VS binding tables are programmed as follows:
@@ -502,6 +497,15 @@ struct brw_vs_ouput_sizes {
* | 16 | Texture 15 |
* +-------------------------------+
*
+ * Our (gen6) GS binding tables are programmed as follows:
+ *
+ * +-----+-------------------------+
+ * | 0 | SOL Binding 0 |
+ * | . | . |
+ * | : | : |
+ * | 63 | SOL Binding 63 |
+ * +-----+-------------------------+
+ *
* Note that nothing actually uses the SURF_INDEX_DRAW macro, so it has to be
* the identity function or things will break. We do want to keep draw buffers
* first so we can use headerless render target writes for RT 0.
@@ -509,15 +513,17 @@ struct brw_vs_ouput_sizes {
#define SURF_INDEX_DRAW(d) (d)
#define SURF_INDEX_FRAG_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS + 1)
#define SURF_INDEX_TEXTURE(t) (BRW_MAX_DRAW_BUFFERS + 2 + (t))
-#define SURF_INDEX_SOL_BINDING(t) (SURF_INDEX_TEXTURE(BRW_MAX_TEX_UNIT) + (t))
/** Maximum size of the binding table. */
-#define BRW_MAX_SURFACES SURF_INDEX_SOL_BINDING(BRW_MAX_SOL_BINDINGS)
+#define BRW_MAX_SURFACES SURF_INDEX_TEXTURE(BRW_MAX_TEX_UNIT)
#define SURF_INDEX_VERT_CONST_BUFFER (0)
#define SURF_INDEX_VS_TEXTURE(t) (SURF_INDEX_VERT_CONST_BUFFER + 1 + (t))
#define BRW_MAX_VS_SURFACES SURF_INDEX_VS_TEXTURE(BRW_MAX_TEX_UNIT)
+#define SURF_INDEX_SOL_BINDING(t) ((t))
+#define BRW_MAX_GS_SURFACES SURF_INDEX_SOL_BINDING(BRW_MAX_SOL_BINDINGS)
+
enum brw_cache_id {
BRW_BLEND_STATE,
BRW_DEPTH_STENCIL_STATE,
@@ -868,6 +874,9 @@ struct brw_context
/** Offset in the program cache to the CLIP program pre-gen6 */
uint32_t prog_offset;
uint32_t state_offset;
+
+ uint32_t bind_bo_offset;
+ uint32_t surf_offset[BRW_MAX_VS_SURFACES];
} gs;
struct {
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 7bc7e1c1025..c86755de659 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -116,7 +116,7 @@ static void upload_gen6_binding_table_pointers(struct brw_context *brw)
GEN6_BINDING_TABLE_MODIFY_PS |
(4 - 2));
OUT_BATCH(brw->vs.bind_bo_offset); /* vs */
- OUT_BATCH(brw->bind.bo_offset); /* gs */
+ OUT_BATCH(brw->gs.bind_bo_offset); /* gs */
OUT_BATCH(brw->bind.bo_offset); /* wm/ps */
ADVANCE_BATCH();
}
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 59a2bb32501..a58b4b3c0b8 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -90,6 +90,7 @@ extern const struct brw_tracked_state gen6_clip_vp;
extern const struct brw_tracked_state gen6_color_calc_state;
extern const struct brw_tracked_state gen6_depth_stencil_state;
extern const struct brw_tracked_state gen6_gs_state;
+extern const struct brw_tracked_state gen6_gs_binding_table;
extern const struct brw_tracked_state gen6_renderbuffer_surfaces;
extern const struct brw_tracked_state gen6_sampler_state;
extern const struct brw_tracked_state gen6_scissor_state;
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 28e4d26209e..3f5c03d8f90 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -148,6 +148,7 @@ static const struct brw_tracked_state *gen6_atoms[] =
&brw_texture_surfaces,
&gen6_sol_surface,
&brw_vs_binding_table,
+ &gen6_gs_binding_table,
&brw_binding_table,
&brw_samplers,
diff --git a/src/mesa/drivers/dri/i965/gen6_sol.c b/src/mesa/drivers/dri/i965/gen6_sol.c
index 41923b7f527..fbd8e71631f 100644
--- a/src/mesa/drivers/dri/i965/gen6_sol.c
+++ b/src/mesa/drivers/dri/i965/gen6_sol.c
@@ -30,6 +30,7 @@
#include "brw_context.h"
#include "intel_batchbuffer.h"
#include "brw_defines.h"
+#include "brw_state.h"
static void
gen6_update_sol_surfaces(struct brw_context *brw)
@@ -54,11 +55,11 @@ gen6_update_sol_surfaces(struct brw_context *brw)
xfb_obj->Offset[buffer] / 4 +
linked_xfb_info->Outputs[i].DstOffset;
brw_update_sol_surface(
- brw, xfb_obj->Buffers[buffer], &brw->bind.surf_offset[surf_index],
+ brw, xfb_obj->Buffers[buffer], &brw->gs.surf_offset[surf_index],
linked_xfb_info->Outputs[i].NumComponents,
linked_xfb_info->BufferStride[buffer], buffer_offset);
} else {
- brw->bind.surf_offset[surf_index] = 0;
+ brw->gs.surf_offset[surf_index] = 0;
}
}
@@ -75,6 +76,59 @@ const struct brw_tracked_state gen6_sol_surface = {
.emit = gen6_update_sol_surfaces,
};
+/**
+ * Constructs the binding table for the WM surface state, which maps unit
+ * numbers to surface state objects.
+ */
+static void
+brw_gs_upload_binding_table(struct brw_context *brw)
+{
+ struct gl_context *ctx = &brw->intel.ctx;
+ /* BRW_NEW_VERTEX_PROGRAM */
+ const struct gl_shader_program *shaderprog =
+ ctx->Shader.CurrentVertexProgram;
+ const struct gl_transform_feedback_info *linked_xfb_info =
+ &shaderprog->LinkedTransformFeedback;
+ /* Currently we only ever upload surfaces for SOL. */
+ bool has_surfaces = linked_xfb_info->NumOutputs != 0;
+
+ uint32_t *bind;
+
+ /* CACHE_NEW_GS_PROG: Skip making a binding table if we don't use textures or
+ * pull constants.
+ */
+ if (!has_surfaces) {
+ if (brw->gs.bind_bo_offset != 0) {
+ brw->state.dirty.brw |= BRW_NEW_GS_BINDING_TABLE;
+ brw->gs.bind_bo_offset = 0;
+ }
+ return;
+ }
+
+ /* Might want to calculate nr_surfaces first, to avoid taking up so much
+ * space for the binding table.
+ */
+ bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
+ sizeof(uint32_t) * BRW_MAX_SURFACES,
+ 32, &brw->gs.bind_bo_offset);
+
+ /* BRW_NEW_SURFACES */
+ memcpy(bind, brw->gs.surf_offset, BRW_MAX_GS_SURFACES * sizeof(uint32_t));
+
+ brw->state.dirty.brw |= BRW_NEW_GS_BINDING_TABLE;
+}
+
+const struct brw_tracked_state gen6_gs_binding_table = {
+ .dirty = {
+ .mesa = 0,
+ .brw = (BRW_NEW_BATCH |
+ BRW_NEW_VERTEX_PROGRAM |
+ BRW_NEW_SURFACES),
+ .cache = 0
+ },
+ .emit = brw_gs_upload_binding_table,
+};
+
static void
gen6_update_sol_indices(struct brw_context *brw)
{