diff options
author | Paul Berry <[email protected]> | 2013-05-01 08:04:12 -0700 |
---|---|---|
committer | Paul Berry <[email protected]> | 2013-06-12 11:10:06 -0700 |
commit | 5e5d4e021f7dde12fb0f4dfaf40fbbd4d119f4ab (patch) | |
tree | e57a14e96f071bdefc593d1402b0c18ad4c25807 /src/mesa/drivers | |
parent | dd3f950115218c69c9118436a5110a1ee6a2dda5 (diff) |
i965/gen7+: Implement fast color clear operation in BLORP.
Since we defer allocation of the MCS miptree until the time of the
fast clear operation, this patch also implements creation of the MCS
miptree.
In addition, this patch adds the field
intel_mipmap_tree::fast_clear_color_value, which holds the most recent
fast color clear value, if any. We use it to set the SURFACE_STATE's
clear color for render targets.
v2: Flag BRW_NEW_SURFACES when allocating the MCS miptree. Generate a
perf_debug message if clearing to a color that isn't compatible with
fast color clear. Fix "control reaches end of non-void function"
build warning.
Reviewed-by: Eric Anholt <[email protected]>
Diffstat (limited to 'src/mesa/drivers')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_blorp.cpp | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_blorp.h | 11 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_blorp_clear.cpp | 149 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_clear.c | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_defines.h | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen7_blorp.cpp | 18 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 10 | ||||
-rw-r--r-- | src/mesa/drivers/dri/intel/intel_mipmap_tree.c | 48 | ||||
-rw-r--r-- | src/mesa/drivers/dri/intel/intel_mipmap_tree.h | 13 |
9 files changed, 240 insertions, 14 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.cpp b/src/mesa/drivers/dri/i965/brw_blorp.cpp index a2d02bfc5e0..9c9a4a7b38d 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp.cpp @@ -145,6 +145,7 @@ brw_blorp_params::brw_blorp_params() y1(0), depth_format(0), hiz_op(GEN6_HIZ_OP_NONE), + fast_clear_op(GEN7_FAST_CLEAR_OP_NONE), num_samples(0), use_wm_prog(false) { diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h index 51b23dbea12..08082060b7d 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.h +++ b/src/mesa/drivers/dri/i965/brw_blorp.h @@ -47,7 +47,8 @@ brw_blorp_blit_miptrees(struct intel_context *intel, bool mirror_x, bool mirror_y); bool -brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb); +brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb, + bool partial_clear); #ifdef __cplusplus } /* end extern "C" */ @@ -192,6 +193,13 @@ struct brw_blorp_prog_data bool persample_msaa_dispatch; }; + +enum gen7_fast_clear_op { + GEN7_FAST_CLEAR_OP_NONE, + GEN7_FAST_CLEAR_OP_FAST_CLEAR, +}; + + class brw_blorp_params { public: @@ -209,6 +217,7 @@ public: brw_blorp_surface_info src; brw_blorp_surface_info dst; enum gen6_hiz_op hiz_op; + enum gen7_fast_clear_op fast_clear_op; unsigned num_samples; bool use_wm_prog; brw_blorp_wm_push_constants wm_push_consts; diff --git a/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp b/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp index b626659f97e..1f98360b51f 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp @@ -49,7 +49,8 @@ public: brw_blorp_clear_params(struct brw_context *brw, struct gl_framebuffer *fb, struct gl_renderbuffer *rb, - GLubyte *color_mask); + GLubyte *color_mask, + bool partial_clear); virtual uint32_t get_wm_prog(struct brw_context *brw, brw_blorp_prog_data **prog_data) const; @@ -105,10 +106,53 @@ brw_blorp_clear_program::~brw_blorp_clear_program() ralloc_free(mem_ctx); } + +/** + * Determine if fast color clear supports the given clear color. + * + * Fast color clear can only clear to color values of 1.0 or 0.0. At the + * moment we only support floating point, unorm, and snorm buffers. + */ +static bool +is_color_fast_clear_compatible(struct intel_context *intel, + gl_format format, + const union gl_color_union *color) +{ + if (_mesa_is_format_integer_color(format)) + return false; + + for (int i = 0; i < 4; i++) { + if (color->f[i] != 0.0 && color->f[i] != 1.0) { + perf_debug("Clear color unsupported by fast color clear. " + "Falling back to slow clear."); + return false; + } + } + return true; +} + + +/** + * Convert the given color to a bitfield suitable for ORing into DWORD 7 of + * SURFACE_STATE. + */ +static uint32_t +compute_fast_clear_color_bits(const union gl_color_union *color) +{ + uint32_t bits = 0; + for (int i = 0; i < 4; i++) { + if (color->f[i] != 0.0) + bits |= 1 << (GEN7_SURFACE_CLEAR_COLOR_SHIFT + (3 - i)); + } + return bits; +} + + brw_blorp_clear_params::brw_blorp_clear_params(struct brw_context *brw, struct gl_framebuffer *fb, struct gl_renderbuffer *rb, - GLubyte *color_mask) + GLubyte *color_mask, + bool partial_clear) { struct intel_context *intel = &brw->intel; struct gl_context *ctx = &intel->ctx; @@ -161,6 +205,56 @@ brw_blorp_clear_params::brw_blorp_clear_params(struct brw_context *brw, wm_prog_key.use_simd16_replicated_data = false; } } + + /* If we can do this as a fast color clear, do so. */ + if (irb->mt->mcs_state != INTEL_MCS_STATE_NONE && !partial_clear && + wm_prog_key.use_simd16_replicated_data && + is_color_fast_clear_compatible(intel, format, &ctx->Color.ClearColor)) { + memset(push_consts, 0xff, 4*sizeof(float)); + fast_clear_op = GEN7_FAST_CLEAR_OP_FAST_CLEAR; + + /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render + * Target(s)", beneath the "Fast Color Clear" bullet (p327): + * + * Clear pass must have a clear rectangle that must follow alignment + * rules in terms of pixels and lines as shown in the table + * below. Further, the clear-rectangle height and width must be + * multiple of the following dimensions. If the height and width of + * the render target being cleared do not meet these requirements, + * an MCS buffer can be created such that it follows the requirement + * and covers the RT. + * + * The alignment size in the table that follows is related to the + * alignment size returned by intel_get_non_msrt_mcs_alignment(), but + * with X alignment multiplied by 16 and Y alignment multiplied by 32. + */ + unsigned x_align, y_align; + intel_get_non_msrt_mcs_alignment(intel, irb->mt, &x_align, &y_align); + x_align *= 16; + y_align *= 32; + x0 = ROUND_DOWN_TO(x0, x_align); + y0 = ROUND_DOWN_TO(y0, y_align); + x1 = ALIGN(x1, x_align); + y1 = ALIGN(y1, y_align); + + /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render + * Target(s)", beneath the "Fast Color Clear" bullet (p327): + * + * In order to optimize the performance MCS buffer (when bound to 1X + * RT) clear similarly to MCS buffer clear for MSRT case, clear rect + * is required to be scaled by the following factors in the + * horizontal and vertical directions: + * + * The X and Y scale down factors in the table that follows are each + * equal to half the alignment value computed above. + */ + unsigned x_scaledown = x_align / 2; + unsigned y_scaledown = y_align / 2; + x0 /= x_scaledown; + y0 /= y_scaledown; + x1 /= x_scaledown; + y1 /= y_scaledown; + } } uint32_t @@ -264,7 +358,8 @@ brw_blorp_clear_program::compile(struct brw_context *brw, extern "C" { bool -brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb) +brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb, + bool partial_clear) { struct gl_context *ctx = &intel->ctx; struct brw_context *brw = brw_context(ctx); @@ -286,6 +381,7 @@ brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb) for (unsigned buf = 0; buf < ctx->DrawBuffer->_NumColorDrawBuffers; buf++) { struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[buf]; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); /* If this is an ES2 context or GL_ARB_ES2_compatibility is supported, * the framebuffer can be complete with some attachments missing. In @@ -294,8 +390,53 @@ brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb) if (rb == NULL) continue; - brw_blorp_clear_params params(brw, fb, rb, ctx->Color.ColorMask[buf]); + brw_blorp_clear_params params(brw, fb, rb, ctx->Color.ColorMask[buf], + partial_clear); + + bool is_fast_clear = + (params.fast_clear_op == GEN7_FAST_CLEAR_OP_FAST_CLEAR); + if (is_fast_clear) { + /* Record the clear color in the miptree so that it will be + * programmed in SURFACE_STATE by later rendering and resolve + * operations. + */ + uint32_t new_color_value = + compute_fast_clear_color_bits(&ctx->Color.ClearColor); + if (irb->mt->fast_clear_color_value != new_color_value) { + irb->mt->fast_clear_color_value = new_color_value; + brw->state.dirty.brw |= BRW_NEW_SURFACES; + } + + /* If the buffer is already in INTEL_MCS_STATE_CLEAR, the clear is + * redundant and can be skipped. + */ + if (irb->mt->mcs_state == INTEL_MCS_STATE_CLEAR) + continue; + + /* If the MCS buffer hasn't been allocated yet, we need to allocate + * it now. + */ + if (!irb->mt->mcs_mt) { + if (!intel_miptree_alloc_non_msrt_mcs(intel, irb->mt)) { + /* MCS allocation failed--probably this will only happen in + * out-of-memory conditions. But in any case, try to recover + * by falling back to a non-blorp clear technique. + */ + return false; + } + brw->state.dirty.brw |= BRW_NEW_SURFACES; + } + } + brw_blorp_exec(intel, ¶ms); + + if (is_fast_clear) { + /* Now that the fast clear has occurred, put the buffer in + * INTEL_MCS_STATE_CLEAR so that we won't waste time doing redundant + * clears. + */ + irb->mt->mcs_state = INTEL_MCS_STATE_CLEAR; + } } return true; diff --git a/src/mesa/drivers/dri/i965/brw_clear.c b/src/mesa/drivers/dri/i965/brw_clear.c index 2b999bfb5b9..80b7a0c0751 100644 --- a/src/mesa/drivers/dri/i965/brw_clear.c +++ b/src/mesa/drivers/dri/i965/brw_clear.c @@ -234,7 +234,7 @@ brw_clear(struct gl_context *ctx, GLbitfield mask) /* BLORP is currently only supported on Gen6+. */ if (intel->gen >= 6) { if (mask & BUFFER_BITS_COLOR) { - if (brw_blorp_clear_color(intel, fb)) { + if (brw_blorp_clear_color(intel, fb, partial_clear)) { debug_mask("blorp color", mask & BUFFER_BITS_COLOR); mask &= ~BUFFER_BITS_COLOR; } diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index d61151f6506..ce1f71db9e7 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -557,6 +557,7 @@ #define GEN7_SURFACE_MCS_PITCH_MASK INTEL_MASK(11, 3) /* Surface state DW7 */ +#define GEN7_SURFACE_CLEAR_COLOR_SHIFT 28 #define GEN7_SURFACE_SCS_R_SHIFT 25 #define GEN7_SURFACE_SCS_R_MASK INTEL_MASK(27, 25) #define GEN7_SURFACE_SCS_G_SHIFT 22 @@ -1615,6 +1616,7 @@ enum brw_wm_barycentric_interp_mode { # define GEN7_PS_PUSH_CONSTANT_ENABLE (1 << 11) # define GEN7_PS_ATTRIBUTE_ENABLE (1 << 10) # define GEN7_PS_OMASK_TO_RENDER_TARGET (1 << 9) +# define GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE (1 << 8) # define GEN7_PS_DUAL_SOURCE_BLEND_ENABLE (1 << 7) # define GEN7_PS_POSOFFSET_NONE (0 << 3) # define GEN7_PS_POSOFFSET_CENTROID (2 << 3) diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp index 208c66a28f5..1b2d3099491 100644 --- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp +++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp @@ -199,11 +199,13 @@ gen7_blorp_emit_surface_state(struct brw_context *brw, is_render_target); } + surf[7] = surface->mt->fast_clear_color_value; + if (intel->is_haswell) { - surf[7] = SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) | - SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) | - SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) | - SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A); + surf[7] |= (SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) | + SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) | + SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) | + SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A)); } /* Emit relocation to surface contents */ @@ -584,6 +586,14 @@ gen7_blorp_emit_ps_config(struct brw_context *brw, dw5 |= prog_data->first_curbe_grf << GEN7_PS_DISPATCH_START_GRF_SHIFT_0; } + switch (params->fast_clear_op) { + case GEN7_FAST_CLEAR_OP_FAST_CLEAR: + dw4 |= GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE; + break; + default: + break; + } + BEGIN_BATCH(8); OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2)); OUT_BATCH(params->use_wm_prog ? prog_offset : 0); diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c index 3164f994dd4..1a4e416d777 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c @@ -614,11 +614,13 @@ gen7_update_renderbuffer_surface(struct brw_context *brw, irb->mt->mcs_mt, true /* is RT */); } + surf[7] = irb->mt->fast_clear_color_value; + if (intel->is_haswell) { - surf[7] = SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) | - SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) | - SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) | - SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A); + surf[7] |= (SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) | + SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) | + SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) | + SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A)); } drm_intel_bo_emit_reloc(brw->intel.batch.bo, diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c index a75ac81994b..ba941c099f2 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c @@ -1201,6 +1201,54 @@ intel_miptree_alloc_mcs(struct intel_context *intel, #endif } + +bool +intel_miptree_alloc_non_msrt_mcs(struct intel_context *intel, + struct intel_mipmap_tree *mt) +{ +#ifdef I915 + assert(!"MCS not supported on i915"); + return false; +#else + assert(mt->mcs_mt == NULL); + + /* The format of the MCS buffer is opaque to the driver; all that matters + * is that we get its size and pitch right. We'll pretend that the format + * is R32. Since an MCS tile covers 128 blocks horizontally, and a Y-tiled + * R32 buffer is 32 pixels across, we'll need to scale the width down by + * the block width and then a further factor of 4. Since an MCS tile + * covers 256 blocks vertically, and a Y-tiled R32 buffer is 32 rows high, + * we'll need to scale the height down by the block height and then a + * further factor of 8. + */ + const gl_format format = MESA_FORMAT_R_UINT32; + unsigned block_width_px; + unsigned block_height; + intel_get_non_msrt_mcs_alignment(intel, mt, &block_width_px, &block_height); + unsigned width_divisor = block_width_px * 4; + unsigned height_divisor = block_height * 8; + unsigned mcs_width = + ALIGN(mt->logical_width0, width_divisor) / width_divisor; + unsigned mcs_height = + ALIGN(mt->logical_height0, height_divisor) / height_divisor; + assert(mt->logical_depth0 == 1); + mt->mcs_mt = intel_miptree_create(intel, + mt->target, + format, + mt->first_level, + mt->last_level, + mcs_width, + mcs_height, + mt->logical_depth0, + true, + 0 /* num_samples */, + INTEL_MIPTREE_TILING_Y); + + return mt->mcs_mt; +#endif +} + + /** * Helper for intel_miptree_alloc_hiz() that sets * \c mt->level[level].slice[layer].has_hiz. Return true if and only if diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h index e11d0d63a27..c44c8eaf4a9 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h @@ -459,6 +459,15 @@ struct intel_mipmap_tree enum intel_mcs_state mcs_state; #endif + /** + * The SURFACE_STATE bits associated with the last fast color clear to this + * color mipmap tree, if any. + * + * This value will only ever contain ones in bits 28-31, so it is safe to + * OR into dword 7 of SURFACE_STATE. + */ + uint32_t fast_clear_color_value; + /* These are also refcounted: */ GLuint refcount; @@ -479,6 +488,10 @@ intel_get_non_msrt_mcs_alignment(struct intel_context *intel, struct intel_mipmap_tree *mt, unsigned *width_px, unsigned *height); +bool +intel_miptree_alloc_non_msrt_mcs(struct intel_context *intel, + struct intel_mipmap_tree *mt); + struct intel_mipmap_tree *intel_miptree_create(struct intel_context *intel, GLenum target, gl_format format, |