summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKenneth Graunke <[email protected]>2017-05-30 14:29:08 -0700
committerKenneth Graunke <[email protected]>2017-08-02 13:31:56 -0700
commitebd2fd6ef3b50dc85190b407d0921c91b6c4f1ca (patch)
tree114d9c1edc7e1f9f043ee15b175e28d37fecc781
parenta60c584575ac7e6e5f21282f9883da1c3839c334 (diff)
i965: Set "Subslice Hashing Mode" to 16x16 on Apollolake.
As of 4.11, the kernel isn't bothering to set the subslice hashing mode on Apollolake, leaving it at the default of 8x8. (It initializes it to 16x4 on most platforms.) Performance data for GPUTest Triangle on Apollolake at 1024x640: X-tiled RT: ----------- 8x8 -> 16x4: 2.4325% +/- 0.383683% (n=107) 8x8 -> 8x4: -3.75105% +/- 0.592491% (n=40) 8x8 -> 16x16: 6.17238% +/- 0.67157% (n=30) Y-tiled RT: ----------- 8x8 -> 16x4: 1.30307% +/- 0.297292% (n=205) 8x8 -> 8x4: -0.769282% +/- 0.729557% (n=35) 8x8 -> 16x16: 3.00254% +/- 0.715503% (n=40) 8x MSAA RT (INTEL_FORCE_MSAA=8): -------------------------------- 8x8 -> 16x4: 1.38889% +/- 0.93729% (n=7) 8x8 -> 8x4: -2.10643% +/- 1.15153% (n=3) 8x8 -> 16x16: 3.87183% +/- 1.08851% (n=5) Based on this, we choose 16x16 for Apollolake. Skylake GT2 with X-tiled buffers appears to be a toss-up between 16x4 and 16x16, and with Y-tiled buffers it doesn't seem to really matter. So we'll leave Skylake alone for now. The hashing mode doesn't seem to make a measurable impact on more complex benchmarks. Acked-by: Matt Turner <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h7
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_upload.c9
2 files changed, 16 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 2a8dbf8cb9a..4abb790612d 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1617,6 +1617,13 @@ enum brw_pixel_shader_coverage_mask_mode {
# define GEN8_HIZ_PMA_MASK_BITS \
REG_MASK(GEN8_HIZ_NP_PMA_FIX_ENABLE | GEN8_HIZ_NP_EARLY_Z_FAILS_DISABLE)
+#define GEN7_GT_MODE 0x7008
+# define GEN9_SUBSLICE_HASHING_8x8 (0 << 8)
+# define GEN9_SUBSLICE_HASHING_16x4 (1 << 8)
+# define GEN9_SUBSLICE_HASHING_8x4 (2 << 8)
+# define GEN9_SUBSLICE_HASHING_16x16 (3 << 8)
+# define GEN9_SUBSLICE_HASHING_MASK_BITS REG_MASK(3 << 8)
+
/* Predicate registers */
#define MI_PREDICATE_SRC0 0x2400
#define MI_PREDICATE_SRC1 0x2408
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index acaa97ee7d4..f38c1946df6 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -72,6 +72,15 @@ brw_upload_initial_gpu_state(struct brw_context *brw)
GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE |
GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC);
ADVANCE_BATCH();
+
+ if (brw->is_broxton) {
+ BEGIN_BATCH(3);
+ OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
+ OUT_BATCH(GEN7_GT_MODE);
+ OUT_BATCH(GEN9_SUBSLICE_HASHING_MASK_BITS |
+ GEN9_SUBSLICE_HASHING_16x16);
+ ADVANCE_BATCH();
+ }
}
if (brw->gen >= 8) {