aboutsummaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965/brw_state_upload.c
diff options
context:
space:
mode:
authorRafael Antognolli <[email protected]>2019-07-22 15:49:24 -0700
committerRafael Antognolli <[email protected]>2019-08-12 16:19:08 -0700
commitf0d29238df33d66ca4ee1e5f25a1edd9034f632e (patch)
tree63f3d47fd44441426478c31899afc61a30fae118 /src/mesa/drivers/dri/i965/brw_state_upload.c
parent7bc022b4bbcd501be3e7d7d036f7382f6c0542b0 (diff)
i965/gen11: Emit SLICE_HASH_TABLE when pipes are unbalanced.
If the pixel pipes have a different number of subslices, emit a slice hashing table that will ensure proper workload distribution. v2: Set Mask field to 0xffff for workaround (Ken).
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_state_upload.c')
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_upload.c84
1 files changed, 84 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 7a2daf4a533..c095f2e59c1 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -43,6 +43,7 @@
#include "brw_gs.h"
#include "brw_wm.h"
#include "brw_cs.h"
+#include "genX_bits.h"
#include "main/framebuffer.h"
void
@@ -68,6 +69,86 @@ brw_enable_obj_preemption(struct brw_context *brw, bool enable)
}
static void
+brw_upload_gen11_slice_hashing_state(struct brw_context *brw)
+{
+ const struct gen_device_info *devinfo = &brw->screen->devinfo;
+ int subslices_delta =
+ devinfo->ppipe_subslices[0] - devinfo->ppipe_subslices[1];
+ if (subslices_delta == 0)
+ return;
+
+ unsigned size = GEN11_SLICE_HASH_TABLE_length * 4;
+ uint32_t hash_address;
+
+ uint32_t *map = brw_state_batch(brw, size, 64, &hash_address);
+
+ unsigned idx = 0;
+
+ unsigned sl_small = 0;
+ unsigned sl_big = 1;
+ if (subslices_delta > 0) {
+ sl_small = 1;
+ sl_big = 0;
+ }
+
+ /**
+ * Create a 16x16 slice hashing table like the following one:
+ *
+ * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
+ * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
+ * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
+ * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
+ * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
+ * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
+ * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
+ * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
+ * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
+ * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
+ * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
+ * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
+ * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
+ * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
+ * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
+ * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
+ *
+ * The table above is used when the pixel pipe 0 has less subslices than
+ * pixel pipe 1. When pixel pipe 0 has more subslices, then a similar table
+ * with 0's and 1's inverted is used.
+ */
+ for (int i = 0; i < GEN11_SLICE_HASH_TABLE_length; i++) {
+ uint32_t dw = 0;
+
+ for (int j = 0; j < 8; j++) {
+ unsigned slice = idx++ % 3 ? sl_big : sl_small;
+ dw |= slice << (j * 4);
+ }
+ map[i] = dw;
+ }
+
+ BEGIN_BATCH(2);
+ OUT_BATCH(_3DSTATE_SLICE_TABLE_STATE_POINTERS << 16 | (2 - 2));
+ OUT_RELOC(brw->batch.state.bo, 0, hash_address | 1);
+ ADVANCE_BATCH();
+
+ /* From gen10/gen11 workaround table in h/w specs:
+ *
+ * "On 3DSTATE_3D_MODE, driver must always program bits 31:16 of DW1
+ * a value of 0xFFFF"
+ *
+ * This means that whenever we update a field with this instruction, we need
+ * to update all the others.
+ *
+ * Since this is the first time we emit this
+ * instruction, we are only setting the fSLICE_HASHING_TABLE_ENABLE flag,
+ * and leaving everything else at their default state (0).
+ */
+ BEGIN_BATCH(2);
+ OUT_BATCH(_3DSTATE_3D_MODE << 16 | (2 - 2));
+ OUT_BATCH(0xffff | SLICE_HASHING_TABLE_ENABLE);
+ ADVANCE_BATCH();
+}
+
+static void
brw_upload_initial_gpu_state(struct brw_context *brw)
{
const struct gen_device_info *devinfo = &brw->screen->devinfo;
@@ -179,6 +260,9 @@ brw_upload_initial_gpu_state(struct brw_context *brw)
if (devinfo->gen >= 10)
brw_enable_obj_preemption(brw, true);
+
+ if (devinfo->gen == 11)
+ brw_upload_gen11_slice_hashing_state(brw);
}
static inline const struct brw_tracked_state *