i965: Defer the throttle until we submit new commands

Currently, we throttle before the user begins preparing commands for the next frame when we acquire the draw/read buffers. However, construction of the command buffer can itself take significant time relative to the frame time. If we move the throttle from the buffer acquire to the command submit phase we can allow the user to improve concurrency between the CPU and GPU (i.e. reduce the amount of time we waste inside the throttle). v2: Whitespace + delay throttling until after the next submission for greater parallelism Signed-off-by: Chris Wilson <[email protected]> Cc: Daniel Vetter <[email protected]> Cc: Kenneth Graunke <[email protected]> Cc: Ben Widawsky <[email protected]> Cc: Kristian Høgsberg <[email protected]> Cc: Chad Versace <[email protected]> Cc: Ian Romanick <[email protected]> Reviewed-by: Chad Versace <[email protected]> [v1]
author: Chris Wilson <[email protected]> 2015-03-11 12:21:29 +0000
committer: Chris Wilson <[email protected]> 2015-03-18 09:33:33 +0000
commit: eeb504e0ae7796e7ba475f6e9d6c26daa6b06608 (patch)
tree: 9acd17ccf04e0e6266cc0cde1cb185293faa6730
parent: 64788b2e8dc2ddedc2712ed02b7e9096638b7bae (diff)
2 files changed, 44 insertions, 34 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 8257fb60043..88685cd8b6a 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -1231,40 +1231,6 @@ intel_prepare_render(struct brw_context *brw)
     */
    if (brw_is_front_buffer_drawing(ctx->DrawBuffer))
       brw->front_buffer_dirty = true;
-
-   /* Wait for the swapbuffers before the one we just emitted, so we
-    * don't get too many swaps outstanding for apps that are GPU-heavy
-    * but not CPU-heavy.
-    *
-    * We're using intelDRI2Flush (called from the loader before
-    * swapbuffer) and glFlush (for front buffer rendering) as the
-    * indicator that a frame is done and then throttle when we get
-    * here as we prepare to render the next frame.  At this point for
-    * round trips for swap/copy and getting new buffers are done and
-    * we'll spend less time waiting on the GPU.
-    *
-    * Unfortunately, we don't have a handle to the batch containing
-    * the swap, and getting our hands on that doesn't seem worth it,
-    * so we just us the first batch we emitted after the last swap.
-    */
-   if (brw->need_swap_throttle && brw->throttle_batch[0]) {
-      if (brw->throttle_batch[1]) {
-         if (!brw->disable_throttling)
-            drm_intel_bo_wait_rendering(brw->throttle_batch[1]);
-         drm_intel_bo_unreference(brw->throttle_batch[1]);
-      }
-      brw->throttle_batch[1] = brw->throttle_batch[0];
-      brw->throttle_batch[0] = NULL;
-      brw->need_swap_throttle = false;
-      /* Throttling here is more precise than the throttle ioctl, so skip it */
-      brw->need_flush_throttle = false;
-   }
-
-   if (brw->need_flush_throttle) {
-      __DRIscreen *psp = brw->intelScreen->driScrnPriv;
-      drmCommandNone(psp->fd, DRM_I915_GEM_THROTTLE);
-      brw->need_flush_throttle = false;
-   }
 }
 
 /**
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index 87862cd492c..3cf44adf585 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -33,6 +33,9 @@
 #include "intel_fbo.h"
 #include "brw_context.h"
 
+#include <xf86drm.h>
+#include <i915_drm.h>
+
 static void
 intel_batchbuffer_reset(struct brw_context *brw);
 
@@ -226,6 +229,44 @@ brw_finish_batch(struct brw_context *brw)
    brw->cache.bo_used_by_gpu = true;
 }
 
+static void
+throttle(struct brw_context *brw)
+{
+   /* Wait for the swapbuffers before the one we just emitted, so we
+    * don't get too many swaps outstanding for apps that are GPU-heavy
+    * but not CPU-heavy.
+    *
+    * We're using intelDRI2Flush (called from the loader before
+    * swapbuffer) and glFlush (for front buffer rendering) as the
+    * indicator that a frame is done and then throttle when we get
+    * here as we prepare to render the next frame.  At this point for
+    * round trips for swap/copy and getting new buffers are done and
+    * we'll spend less time waiting on the GPU.
+    *
+    * Unfortunately, we don't have a handle to the batch containing
+    * the swap, and getting our hands on that doesn't seem worth it,
+    * so we just use the first batch we emitted after the last swap.
+    */
+   if (brw->need_swap_throttle && brw->throttle_batch[0]) {
+      if (brw->throttle_batch[1]) {
+         if (!brw->disable_throttling)
+            drm_intel_bo_wait_rendering(brw->throttle_batch[1]);
+         drm_intel_bo_unreference(brw->throttle_batch[1]);
+      }
+      brw->throttle_batch[1] = brw->throttle_batch[0];
+      brw->throttle_batch[0] = NULL;
+      brw->need_swap_throttle = false;
+      /* Throttling here is more precise than the throttle ioctl, so skip it */
+      brw->need_flush_throttle = false;
+   }
+
+   if (brw->need_flush_throttle) {
+      __DRIscreen *psp = brw->intelScreen->driScrnPriv;
+      drmCommandNone(psp->fd, DRM_I915_GEM_THROTTLE);
+      brw->need_flush_throttle = false;
+   }
+}
+
 /* TODO: Push this whole function into bufmgr.
  */
 static int
@@ -260,6 +301,7 @@ do_flush_locked(struct brw_context *brw)
       if (ret == 0) {
          if (unlikely(INTEL_DEBUG & DEBUG_AUB))
             brw_annotate_aub(brw);
+
 	 if (brw->hw_ctx == NULL || batch->ring != RENDER_RING) {
 	    ret = drm_intel_bo_mrb_exec(batch->bo, 4 * batch->used, NULL, 0, 0,
 					flags);
@@ -268,6 +310,8 @@ do_flush_locked(struct brw_context *brw)
 						4 * batch->used, flags);
 	 }
       }
+
+      throttle(brw);
    }
 
    if (unlikely(INTEL_DEBUG & DEBUG_BATCH))
author	Chris Wilson <[email protected]>	2015-03-11 12:21:29 +0000
committer	Chris Wilson <[email protected]>	2015-03-18 09:33:33 +0000
commit	eeb504e0ae7796e7ba475f6e9d6c26daa6b06608 (patch)
tree	9acd17ccf04e0e6266cc0cde1cb185293faa6730
parent	64788b2e8dc2ddedc2712ed02b7e9096638b7bae (diff)