aboutsummaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers')
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h5
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h5
-rw-r--r--src/mesa/drivers/dri/i965/brw_misc_state.c90
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_upload.c9
-rw-r--r--src/mesa/drivers/dri/i965/genX_blorp_exec.c6
5 files changed, 109 insertions, 6 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 2ac443bf032..17639bf5995 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1219,6 +1219,9 @@ struct brw_context
enum gen9_astc5x5_wa_tex_type gen9_astc5x5_wa_tex_mask;
+ /** Last rendering scale argument provided to brw_emit_hashing_mode(). */
+ unsigned current_hash_scale;
+
__DRIcontext *driContext;
struct intel_screen *screen;
};
@@ -1265,6 +1268,8 @@ GLboolean brwCreateContext(gl_api api,
*/
void brw_workaround_depthstencil_alignment(struct brw_context *brw,
GLbitfield clear_mask);
+void brw_emit_hashing_mode(struct brw_context *brw, unsigned width,
+ unsigned height, unsigned scale);
/* brw_object_purgeable.c */
void brw_init_object_purgeable_functions(struct dd_function_table *functions);
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 425f5534110..33d042be869 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1570,6 +1570,11 @@ enum brw_pixel_shader_coverage_mask_mode {
# define GEN9_SUBSLICE_HASHING_8x4 (2 << 8)
# define GEN9_SUBSLICE_HASHING_16x16 (3 << 8)
# define GEN9_SUBSLICE_HASHING_MASK_BITS REG_MASK(3 << 8)
+# define GEN9_SLICE_HASHING_NORMAL (0 << 11)
+# define GEN9_SLICE_HASHING_DISABLED (1 << 11)
+# define GEN9_SLICE_HASHING_32x16 (2 << 11)
+# define GEN9_SLICE_HASHING_32x32 (3 << 11)
+# define GEN9_SLICE_HASHING_MASK_BITS REG_MASK(3 << 11)
/* Predicate registers */
#define MI_PREDICATE_SRC0 0x2400
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index e73cadc5d3e..1291470d479 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -602,6 +602,96 @@ brw_emit_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline)
}
/**
+ * Update the pixel hashing modes that determine the balancing of PS threads
+ * across subslices and slices.
+ *
+ * \param width Width bound of the rendering area (already scaled down if \p
+ * scale is greater than 1).
+ * \param height Height bound of the rendering area (already scaled down if \p
+ * scale is greater than 1).
+ * \param scale The number of framebuffer samples that could potentially be
+ * affected by an individual channel of the PS thread. This is
+ * typically one for single-sampled rendering, but for operations
+ * like CCS resolves and fast clears a single PS invocation may
+ * update a huge number of pixels, in which case a finer
+ * balancing is desirable in order to maximally utilize the
+ * bandwidth available. UINT_MAX can be used as shorthand for
+ * "finest hashing mode available".
+ */
+void
+brw_emit_hashing_mode(struct brw_context *brw, unsigned width,
+ unsigned height, unsigned scale)
+{
+ const struct gen_device_info *devinfo = &brw->screen->devinfo;
+
+ if (devinfo->gen == 9) {
+ const uint32_t slice_hashing[] = {
+ /* Because all Gen9 platforms with more than one slice require
+ * three-way subslice hashing, a single "normal" 16x16 slice hashing
+ * block is guaranteed to suffer from substantial imbalance, with one
+ * subslice receiving twice as much work as the other two in the
+ * slice.
+ *
+ * The performance impact of that would be particularly severe when
+ * three-way hashing is also in use for slice balancing (which is the
+ * case for all Gen9 GT4 platforms), because one of the slices
+ * receives one every three 16x16 blocks in either direction, which
+ * is roughly the periodicity of the underlying subslice imbalance
+ * pattern ("roughly" because in reality the hardware's
+ * implementation of three-way hashing doesn't do exact modulo 3
+ * arithmetic, which somewhat decreases the magnitude of this effect
+ * in practice). This leads to a systematic subslice imbalance
+ * within that slice regardless of the size of the primitive. The
+ * 32x32 hashing mode guarantees that the subslice imbalance within a
+ * single slice hashing block is minimal, largely eliminating this
+ * effect.
+ */
+ GEN9_SLICE_HASHING_32x32,
+ /* Finest slice hashing mode available. */
+ GEN9_SLICE_HASHING_NORMAL
+ };
+ const uint32_t subslice_hashing[] = {
+ /* The 16x16 subslice hashing mode is used on non-LLC platforms to
+ * match the performance of previous Mesa versions. 16x16 has a
+ * slight cache locality benefit especially visible in the sampler L1
+ * cache efficiency of low-bandwidth platforms, but it comes at the
+ * cost of greater subslice imbalance for primitives of dimensions
+ * approximately intermediate between 16x4 and 16x16.
+ */
+ (devinfo->has_llc ? GEN9_SUBSLICE_HASHING_16x4 :
+ GEN9_SUBSLICE_HASHING_16x16),
+ /* Finest subslice hashing mode available. */
+ GEN9_SUBSLICE_HASHING_8x4
+ };
+ /* Dimensions of the smallest hashing block of a given hashing mode. If
+ * the rendering area is smaller than this there can't possibly be any
+ * benefit from switching to this mode, so we optimize out the
+ * transition.
+ */
+ const unsigned min_size[][2] = {
+ { 16, 4 },
+ { 8, 4 }
+ };
+ const unsigned idx = scale > 1;
+
+ if (width > min_size[idx][0] || height > min_size[idx][1]) {
+ const uint32_t gt_mode =
+ (devinfo->num_slices == 1 ? 0 :
+ GEN9_SLICE_HASHING_MASK_BITS | slice_hashing[idx]) |
+ GEN9_SUBSLICE_HASHING_MASK_BITS | subslice_hashing[idx];
+
+ brw_emit_pipe_control_flush(brw,
+ PIPE_CONTROL_STALL_AT_SCOREBOARD |
+ PIPE_CONTROL_CS_STALL);
+
+ brw_load_register_imm32(brw, GEN7_GT_MODE, gt_mode);
+
+ brw->current_hash_scale = scale;
+ }
+ }
+}
+
+/**
* Misc invariant state packets
*/
void
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index ac9ee2dabf1..7a2daf4a533 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -133,12 +133,6 @@ brw_upload_initial_gpu_state(struct brw_context *brw)
REG_MASK(GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC) |
GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE |
GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC);
-
- if (gen_device_info_is_9lp(devinfo)) {
- brw_load_register_imm32(brw, GEN7_GT_MODE,
- GEN9_SUBSLICE_HASHING_MASK_BITS |
- GEN9_SUBSLICE_HASHING_16x16);
- }
}
if (devinfo->gen >= 8) {
@@ -543,6 +537,9 @@ brw_upload_pipeline_state(struct brw_context *brw,
brw_select_pipeline(brw, pipeline);
+ if (pipeline == BRW_RENDER_PIPELINE && brw->current_hash_scale != 1)
+ brw_emit_hashing_mode(brw, UINT_MAX, UINT_MAX, 1);
+
if (unlikely(INTEL_DEBUG & DEBUG_REEMIT)) {
/* Always re-emit all state. */
brw->NewGLState = ~0;
diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
index 8f06f5e9ef2..62a8310f68b 100644
--- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c
+++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
@@ -343,6 +343,12 @@ retry:
gen8_write_pma_stall_bits(brw, 0);
#endif
+ const unsigned scale = params->fast_clear_op ? UINT_MAX : 1;
+ if (brw->current_hash_scale != scale) {
+ brw_emit_hashing_mode(brw, params->x1 - params->x0,
+ params->y1 - params->y0, scale);
+ }
+
blorp_emit(batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) {
rect.ClippedDrawingRectangleXMax = MAX2(params->x1, params->x0) - 1;
rect.ClippedDrawingRectangleYMax = MAX2(params->y1, params->y0) - 1;