aboutsummaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965/brw_context.h
diff options
context:
space:
mode:
authorKenneth Graunke <[email protected]>2016-09-26 10:30:30 -0700
committerKenneth Graunke <[email protected]>2016-10-03 18:41:10 -0700
commit9d6ca7c3d091e1ab71ce2f75bf4f13dc8844d801 (patch)
tree1d23319263fc859fd759a2ccf9514b4f2bd634fe /src/mesa/drivers/dri/i965/brw_context.h
parent7eb7684818ead4ec7444ee309e22a9db731dd234 (diff)
i965: Only emit 1 viewport when possible.
In core profile, we support up to 16 viewports. However, in the majority of cases, only 1 of them is actually used - we only need the others if the last shader stage prior to the rasterizer writes gl_ViewportIndex. Processing all 16 viewports adds additional CPU overhead, which hurts CPU-intensive workloads such as Glamor. This meant that switching to core profile actually penalized Glamor to an extent, which is unfortunate. This patch tracks the number of relevant viewports, switching between 1 and ctx->Const.MaxViewports if gl_ViewportIndex is written. A new BRW_NEW_VIEWPORT_COUNT flag tracks this. This could mean re-emitting viewport state when switching, but hopefully this is offset by doing 1/16th of the work in the common case. The new flag is also lighter weight than BRW_NEW_VUE_MAP_GEOM_OUT, which we were using in one case. According to Eric Anholt, x11perf -copypixwin10 performance improves by 11.5094% +/- 3.10841% (n=10) on his Skylake. Signed-off-by: Kenneth Graunke <[email protected]> Reviewed-by: Ian Romanick <[email protected]> Acked-by: Anuj Phogat <[email protected]>
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_context.h')
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h9
1 files changed, 9 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 00f0adca4d5..b27fe51e706 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -226,6 +226,7 @@ enum brw_state_id {
BRW_STATE_URB_SIZE,
BRW_STATE_CC_STATE,
BRW_STATE_BLORP,
+ BRW_STATE_VIEWPORT_COUNT,
BRW_NUM_STATE_BITS
};
@@ -294,6 +295,7 @@ enum brw_state_id {
#define BRW_NEW_PROGRAM_CACHE (1ull << BRW_STATE_PROGRAM_CACHE)
#define BRW_NEW_STATE_BASE_ADDRESS (1ull << BRW_STATE_STATE_BASE_ADDRESS)
#define BRW_NEW_VUE_MAP_GEOM_OUT (1ull << BRW_STATE_VUE_MAP_GEOM_OUT)
+#define BRW_NEW_VIEWPORT_COUNT (1ull << BRW_STATE_VIEWPORT_COUNT)
#define BRW_NEW_TRANSFORM_FEEDBACK (1ull << BRW_STATE_TRANSFORM_FEEDBACK)
#define BRW_NEW_RASTERIZER_DISCARD (1ull << BRW_STATE_RASTERIZER_DISCARD)
#define BRW_NEW_STATS_WM (1ull << BRW_STATE_STATS_WM)
@@ -1160,6 +1162,13 @@ struct brw_context
* instead of vp_bo.
*/
uint32_t vp_offset;
+
+ /**
+ * The number of viewports to use. If gl_ViewportIndex is written,
+ * we can have up to ctx->Const.MaxViewports viewports. If not,
+ * the viewport index is always 0, so we can only emit one.
+ */
+ uint8_t viewport_count;
} clip;