summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2011-04-22 09:55:25 -0700
committerEric Anholt <[email protected]>2011-04-29 15:25:37 -0700
commit32cc0c9d8de343f699e80e7e416ea0d7e3121a42 (patch)
tree2381472893ecd003140ee9f26ffcaea9c1bd39da
parent90c70123b830bead0ac622df94f2809ac056af95 (diff)
i965/gen6: Stream the VS push constants.
Improves 3DMMES taiji demo performance by 10.1% +/- 0.9% (n=15). Reviewed-by: Kenneth Graunke <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h3
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw.c1
-rw-r--r--src/mesa/drivers/dri/i965/brw_state.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_upload.c1
-rw-r--r--src/mesa/drivers/dri/i965/gen6_vs_state.c91
5 files changed, 62 insertions, 35 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 6a244984e91..d9b755ba19b 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -640,6 +640,9 @@ struct brw_context
uint32_t bind_bo_offset;
uint32_t surf_offset[BRW_VS_MAX_SURF];
GLuint nr_surfaces;
+
+ uint32_t push_const_offset; /* Offset in the batchbuffer */
+ int push_const_size; /* in 256-bit register increments */
} vs;
struct {
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index 321dbc19ede..0c93a03151b 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -367,6 +367,7 @@ static GLboolean brw_try_draw_prims( struct gl_context *ctx,
int estimated_max_prim_size;
estimated_max_prim_size = 512; /* batchbuffer commands */
+ estimated_max_prim_size += 1024; /* gen6 VS push constants */
estimated_max_prim_size += 1024; /* gen6 WM push constants */
estimated_max_prim_size += 512; /* misc. pad */
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 86b0caa4a4e..4c9ac1ed90e 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -107,6 +107,7 @@ extern const struct brw_tracked_state gen6_sf_state;
extern const struct brw_tracked_state gen6_sf_vp;
extern const struct brw_tracked_state gen6_urb;
extern const struct brw_tracked_state gen6_viewport_state;
+extern const struct brw_tracked_state gen6_vs_constants;
extern const struct brw_tracked_state gen6_vs_state;
extern const struct brw_tracked_state gen6_wm_constants;
extern const struct brw_tracked_state gen6_wm_state;
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 6f521be6599..a397460feca 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -129,6 +129,7 @@ static const struct brw_tracked_state *gen6_atoms[] =
&brw_vs_constants, /* Before vs_surfaces and constant_buffer */
&brw_wm_constants, /* Before wm_surfaces and constant_buffer */
+ &gen6_vs_constants, /* Before vs_state */
&gen6_wm_constants, /* Before wm_state */
&brw_vs_surfaces, /* must do before unit */
diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
index a10cec318d6..00c28ed8d22 100644
--- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -34,43 +34,36 @@
#include "intel_batchbuffer.h"
static void
-upload_vs_state(struct brw_context *brw)
+gen6_prepare_vs_push_constants(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
struct gl_context *ctx = &intel->ctx;
+ /* _BRW_NEW_VERTEX_PROGRAM */
const struct brw_vertex_program *vp =
brw_vertex_program_const(brw->vertex_program);
unsigned int nr_params = brw->vs.prog_data->nr_params / 4;
- drm_intel_bo *constant_bo;
- int i;
+ if (brw->vertex_program->IsNVProgram)
+ _mesa_load_tracked_matrices(ctx);
+
+ /* Updates the ParamaterValues[i] pointers for all parameters of the
+ * basic type of PROGRAM_STATE_VAR.
+ */
+ /* XXX: Should this happen somewhere before to get our state flag set? */
+ _mesa_load_state_parameters(ctx, vp->program.Base.Parameters);
+
+ /* CACHE_NEW_VS_PROG | _NEW_TRANSFORM */
if (brw->vs.prog_data->nr_params == 0 && !ctx->Transform.ClipPlanesEnabled) {
- /* Disable the push constant buffers. */
- BEGIN_BATCH(5);
- OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (5 - 2));
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- ADVANCE_BATCH();
+ brw->vs.push_const_size = 0;
} else {
- int params_uploaded = 0, param_regs;
+ int params_uploaded = 0;
float *param;
+ int i;
- if (brw->vertex_program->IsNVProgram)
- _mesa_load_tracked_matrices(ctx);
-
- /* Updates the ParamaterValues[i] pointers for all parameters of the
- * basic type of PROGRAM_STATE_VAR.
- */
- _mesa_load_state_parameters(ctx, vp->program.Base.Parameters);
-
- constant_bo = drm_intel_bo_alloc(intel->bufmgr, "VS constant_bo",
- (MAX_CLIP_PLANES + nr_params) *
- 4 * sizeof(float),
- 4096);
- drm_intel_gem_bo_map_gtt(constant_bo);
- param = constant_bo->virtual;
+ param = brw_state_batch(brw,
+ (MAX_CLIP_PLANES + nr_params) *
+ 4 * sizeof(float),
+ 32, &brw->vs.push_const_offset);
/* This should be loaded like any other param, but it's ad-hoc
* until we redo the VS backend.
@@ -100,30 +93,56 @@ upload_vs_state(struct brw_context *brw)
if (0) {
printf("VS constant buffer:\n");
for (i = 0; i < params_uploaded; i++) {
- float *buf = (float *)constant_bo->virtual + i * 4;
+ float *buf = param + i * 4;
printf("%d: %f %f %f %f\n",
i, buf[0], buf[1], buf[2], buf[3]);
}
}
- drm_intel_gem_bo_unmap_gtt(constant_bo);
+ brw->vs.push_const_size = (params_uploaded + 1) / 2;
+ /* We can only push 32 registers of constants at a time. */
+ assert(brw->vs.push_const_size <= 32);
+ }
+}
- param_regs = (params_uploaded + 1) / 2;
- assert(param_regs <= 32);
+const struct brw_tracked_state gen6_vs_constants = {
+ .dirty = {
+ .mesa = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS,
+ .brw = (BRW_NEW_BATCH |
+ BRW_NEW_VERTEX_PROGRAM),
+ .cache = 0,
+ },
+ .prepare = gen6_prepare_vs_push_constants,
+};
+static void
+upload_vs_state(struct brw_context *brw)
+{
+ struct intel_context *intel = &brw->intel;
+
+ if (brw->vs.push_const_size == 0) {
+ /* Disable the push constant buffers. */
+ BEGIN_BATCH(5);
+ OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (5 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+ } else {
BEGIN_BATCH(5);
OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 |
GEN6_CONSTANT_BUFFER_0_ENABLE |
(5 - 2));
- OUT_RELOC(constant_bo,
+ /* This is also the set of state flags from gen6_prepare_vs_constants */
+ OUT_RELOC(intel->batch.bo,
I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */
- param_regs - 1);
+ brw->vs.push_const_offset +
+ brw->vs.push_const_size - 1);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
-
- drm_intel_bo_unreference(constant_bo);
}
BEGIN_BATCH(6);
@@ -149,7 +168,9 @@ const struct brw_tracked_state gen6_vs_state = {
.brw = (BRW_NEW_CURBE_OFFSETS |
BRW_NEW_NR_VS_SURFACES |
BRW_NEW_URB_FENCE |
- BRW_NEW_CONTEXT),
+ BRW_NEW_CONTEXT |
+ BRW_NEW_VERTEX_PROGRAM |
+ BRW_NEW_BATCH),
.cache = CACHE_NEW_VS_PROG
},
.emit = upload_vs_state,