summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri
diff options
context:
space:
mode:
authorPaul Berry <[email protected]>2011-12-14 10:44:49 -0800
committerPaul Berry <[email protected]>2011-12-20 15:22:08 -0800
commit7d2ff0bf0b7422c34676c2f47dbe754f57edb51e (patch)
treea7b297b3b237d4b60b82301ef092c97b0e4a5059 /src/mesa/drivers/dri
parent21504b462acda4977c5fdfffc192e73273b8fb26 (diff)
i965 gen6: Implement rasterizer discard.
This patch enables rasterizer discard functionality (a part of transform feedback) in Gen6, by generating an alternate GS program when rasterizer discard is active. Instead of forwarding vertices down the pipeline, the alternate GS program uses a URB Write message to deallocate the URB entry that was allocated by FF sync and terminate the thread. Note: parts of the Sandy Bridge PRM seem to imply that we could do this more efficiently, by clearing the GEN6_GS_RENDERING_ENABLE bit, and not allocating a URB entry at all. However, it's not clear how we are supposed to terminate the thread if we do that. Volume 2 part 1, section 4.5.4, says "GS threads must terminate by sending a URB_WRITE message with the EOT and Complete bits set.", and my experiments so far confirm that. Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src/mesa/drivers/dri')
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs.c6
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs_emit.c30
3 files changed, 37 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c
index 1e605efd6e4..ee3f94ca613 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -208,6 +208,12 @@ static void populate_key( struct brw_context *brw,
linked_xfb_info->Outputs[i].OutputRegister;
}
}
+ /* On Gen6, GS is also used for rasterizer discard. */
+ /* _NEW_TRANSFORM_FEEDBACK */
+ if (ctx->TransformFeedback.RasterDiscard) {
+ key->need_gs_prog = true;
+ key->rasterizer_discard = true;
+ }
} else {
/* Pre-gen6, GS is used to transform QUADLIST, QUADSTRIP, and LINELOOP
* into simpler primitives.
diff --git a/src/mesa/drivers/dri/i965/brw_gs.h b/src/mesa/drivers/dri/i965/brw_gs.h
index 33d8d7ab5a7..7bf2248a5ed 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.h
+++ b/src/mesa/drivers/dri/i965/brw_gs.h
@@ -50,6 +50,7 @@ struct brw_gs_prog_key {
GLuint pv_first:1;
GLuint need_gs_prog:1;
GLuint userclip_active:1;
+ GLuint rasterizer_discard:1;
/**
* Number of varyings that are output to transform feedback.
diff --git a/src/mesa/drivers/dri/i965/brw_gs_emit.c b/src/mesa/drivers/dri/i965/brw_gs_emit.c
index 269a49559a5..1f96a164637 100644
--- a/src/mesa/drivers/dri/i965/brw_gs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_gs_emit.c
@@ -193,6 +193,28 @@ static void brw_gs_emit_vue(struct brw_gs_compile *c,
}
/**
+ * De-allocate the URB entry that was previously allocated to this thread
+ * (without writing any vertex data to it), and terminate the thread. This is
+ * used to implement RASTERIZER_DISCARD functionality.
+ */
+static void brw_gs_terminate(struct brw_gs_compile *c)
+{
+ struct brw_compile *p = &c->func;
+ brw_urb_WRITE(p,
+ retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), /* dest */
+ 0, /* msg_reg_nr */
+ c->reg.header, /* src0 */
+ false, /* allocate */
+ false, /* used */
+ 1, /* msg_length */
+ 0, /* response_length */
+ true, /* eot */
+ true, /* writes_complete */
+ 0, /* offset */
+ BRW_URB_SWIZZLE_NONE);
+}
+
+/**
* Send an FF_SYNC message to ensure that all previously spawned GS threads
* have finished sending primitives down the pipeline, and to allocate a URB
* entry for the first output vertex. Only needed when intel->needs_ff_sync
@@ -409,6 +431,14 @@ gen6_sol_program(struct brw_gs_compile *c, struct brw_gs_prog_key *key,
brw_gs_ff_sync(c, 1);
+ /* If RASTERIZER_DISCARD is enabled, we have nothing further to do, so
+ * release the URB that was just allocated, and terminate the thread.
+ */
+ if (key->rasterizer_discard) {
+ brw_gs_terminate(c);
+ return;
+ }
+
brw_gs_overwrite_header_dw2_from_r0(c);
switch (num_verts) {
case 1: