diff options
Diffstat (limited to 'src/mesa')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_blorp.cpp | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_blorp.h | 11 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_blorp_clear.cpp | 149 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_clear.c | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_defines.h | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen7_blorp.cpp | 18 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 10 | ||||
-rw-r--r-- | src/mesa/drivers/dri/intel/intel_mipmap_tree.c | 48 | ||||
-rw-r--r-- | src/mesa/drivers/dri/intel/intel_mipmap_tree.h | 13 |
9 files changed, 240 insertions, 14 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.cpp b/src/mesa/drivers/dri/i965/brw_blorp.cpp index a2d02bfc5e0..9c9a4a7b38d 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp.cpp @@ -145,6 +145,7 @@ brw_blorp_params::brw_blorp_params() y1(0), depth_format(0), hiz_op(GEN6_HIZ_OP_NONE), + fast_clear_op(GEN7_FAST_CLEAR_OP_NONE), num_samples(0), use_wm_prog(false) { diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h index 51b23dbea12..08082060b7d 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.h +++ b/src/mesa/drivers/dri/i965/brw_blorp.h @@ -47,7 +47,8 @@ brw_blorp_blit_miptrees(struct intel_context *intel, bool mirror_x, bool mirror_y); bool -brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb); +brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb, + bool partial_clear); #ifdef __cplusplus } /* end extern "C" */ @@ -192,6 +193,13 @@ struct brw_blorp_prog_data bool persample_msaa_dispatch; }; + +enum gen7_fast_clear_op { + GEN7_FAST_CLEAR_OP_NONE, + GEN7_FAST_CLEAR_OP_FAST_CLEAR, +}; + + class brw_blorp_params { public: @@ -209,6 +217,7 @@ public: brw_blorp_surface_info src; brw_blorp_surface_info dst; enum gen6_hiz_op hiz_op; + enum gen7_fast_clear_op fast_clear_op; unsigned num_samples; bool use_wm_prog; brw_blorp_wm_push_constants wm_push_consts; diff --git a/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp b/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp index b626659f97e..1f98360b51f 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp @@ -49,7 +49,8 @@ public: brw_blorp_clear_params(struct brw_context *brw, struct gl_framebuffer *fb, struct gl_renderbuffer *rb, - GLubyte *color_mask); + GLubyte *color_mask, + bool partial_clear); virtual uint32_t get_wm_prog(struct brw_context *brw, brw_blorp_prog_data **prog_data) const; @@ -105,10 +106,53 @@ brw_blorp_clear_program::~brw_blorp_clear_program() ralloc_free(mem_ctx); } + +/** + * Determine if fast color clear supports the given clear color. + * + * Fast color clear can only clear to color values of 1.0 or 0.0. At the + * moment we only support floating point, unorm, and snorm buffers. + */ +static bool +is_color_fast_clear_compatible(struct intel_context *intel, + gl_format format, + const union gl_color_union *color) +{ + if (_mesa_is_format_integer_color(format)) + return false; + + for (int i = 0; i < 4; i++) { + if (color->f[i] != 0.0 && color->f[i] != 1.0) { + perf_debug("Clear color unsupported by fast color clear. " + "Falling back to slow clear."); + return false; + } + } + return true; +} + + +/** + * Convert the given color to a bitfield suitable for ORing into DWORD 7 of + * SURFACE_STATE. + */ +static uint32_t +compute_fast_clear_color_bits(const union gl_color_union *color) +{ + uint32_t bits = 0; + for (int i = 0; i < 4; i++) { + if (color->f[i] != 0.0) + bits |= 1 << (GEN7_SURFACE_CLEAR_COLOR_SHIFT + (3 - i)); + } + return bits; +} + + brw_blorp_clear_params::brw_blorp_clear_params(struct brw_context *brw, struct gl_framebuffer *fb, struct gl_renderbuffer *rb, - GLubyte *color_mask) + GLubyte *color_mask, + bool partial_clear) { struct intel_context *intel = &brw->intel; struct gl_context *ctx = &intel->ctx; @@ -161,6 +205,56 @@ brw_blorp_clear_params::brw_blorp_clear_params(struct brw_context *brw, wm_prog_key.use_simd16_replicated_data = false; } } + + /* If we can do this as a fast color clear, do so. */ + if (irb->mt->mcs_state != INTEL_MCS_STATE_NONE && !partial_clear && + wm_prog_key.use_simd16_replicated_data && + is_color_fast_clear_compatible(intel, format, &ctx->Color.ClearColor)) { + memset(push_consts, 0xff, 4*sizeof(float)); + fast_clear_op = GEN7_FAST_CLEAR_OP_FAST_CLEAR; + + /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render + * Target(s)", beneath the "Fast Color Clear" bullet (p327): + * + * Clear pass must have a clear rectangle that must follow alignment + * rules in terms of pixels and lines as shown in the table + * below. Further, the clear-rectangle height and width must be + * multiple of the following dimensions. If the height and width of + * the render target being cleared do not meet these requirements, + * an MCS buffer can be created such that it follows the requirement + * and covers the RT. + * + * The alignment size in the table that follows is related to the + * alignment size returned by intel_get_non_msrt_mcs_alignment(), but + * with X alignment multiplied by 16 and Y alignment multiplied by 32. + */ + unsigned x_align, y_align; + intel_get_non_msrt_mcs_alignment(intel, irb->mt, &x_align, &y_align); + x_align *= 16; + y_align *= 32; + x0 = ROUND_DOWN_TO(x0, x_align); + y0 = ROUND_DOWN_TO(y0, y_align); + x1 = ALIGN(x1, x_align); + y1 = ALIGN(y1, y_align); + + /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render + * Target(s)", beneath the "Fast Color Clear" bullet (p327): + * + * In order to optimize the performance MCS buffer (when bound to 1X + * RT) clear similarly to MCS buffer clear for MSRT case, clear rect + * is required to be scaled by the following factors in the + * horizontal and vertical directions: + * + * The X and Y scale down factors in the table that follows are each + * equal to half the alignment value computed above. + */ + unsigned x_scaledown = x_align / 2; + unsigned y_scaledown = y_align / 2; + x0 /= x_scaledown; + y0 /= y_scaledown; + x1 /= x_scaledown; + y1 /= y_scaledown; + } } uint32_t @@ -264,7 +358,8 @@ brw_blorp_clear_program::compile(struct brw_context *brw, extern "C" { bool -brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb) +brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb, + bool partial_clear) { struct gl_context *ctx = &intel->ctx; struct brw_context *brw = brw_context(ctx); @@ -286,6 +381,7 @@ brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb) for (unsigned buf = 0; buf < ctx->DrawBuffer->_NumColorDrawBuffers; buf++) { struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[buf]; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); /* If this is an ES2 context or GL_ARB_ES2_compatibility is supported, * the framebuffer can be complete with some attachments missing. In @@ -294,8 +390,53 @@ brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb) if (rb == NULL) continue; - brw_blorp_clear_params params(brw, fb, rb, ctx->Color.ColorMask[buf]); + brw_blorp_clear_params params(brw, fb, rb, ctx->Color.ColorMask[buf], + partial_clear); + + bool is_fast_clear = + (params.fast_clear_op == GEN7_FAST_CLEAR_OP_FAST_CLEAR); + if (is_fast_clear) { + /* Record the clear color in the miptree so that it will be + * programmed in SURFACE_STATE by later rendering and resolve + * operations. + */ + uint32_t new_color_value = + compute_fast_clear_color_bits(&ctx->Color.ClearColor); + if (irb->mt->fast_clear_color_value != new_color_value) { + irb->mt->fast_clear_color_value = new_color_value; + brw->state.dirty.brw |= BRW_NEW_SURFACES; + } + + /* If the buffer is already in INTEL_MCS_STATE_CLEAR, the clear is + * redundant and can be skipped. + */ + if (irb->mt->mcs_state == INTEL_MCS_STATE_CLEAR) + continue; + + /* If the MCS buffer hasn't been allocated yet, we need to allocate + * it now. + */ + if (!irb->mt->mcs_mt) { + if (!intel_miptree_alloc_non_msrt_mcs(intel, irb->mt)) { + /* MCS allocation failed--probably this will only happen in + * out-of-memory conditions. But in any case, try to recover + * by falling back to a non-blorp clear technique. + */ + return false; + } + brw->state.dirty.brw |= BRW_NEW_SURFACES; + } + } + brw_blorp_exec(intel, ¶ms); + + if (is_fast_clear) { + /* Now that the fast clear has occurred, put the buffer in + * INTEL_MCS_STATE_CLEAR so that we won't waste time doing redundant + * clears. + */ + irb->mt->mcs_state = INTEL_MCS_STATE_CLEAR; + } } return true; diff --git a/src/mesa/drivers/dri/i965/brw_clear.c b/src/mesa/drivers/dri/i965/brw_clear.c index 2b999bfb5b9..80b7a0c0751 100644 --- a/src/mesa/drivers/dri/i965/brw_clear.c +++ b/src/mesa/drivers/dri/i965/brw_clear.c @@ -234,7 +234,7 @@ brw_clear(struct gl_context *ctx, GLbitfield mask) /* BLORP is currently only supported on Gen6+. */ if (intel->gen >= 6) { if (mask & BUFFER_BITS_COLOR) { - if (brw_blorp_clear_color(intel, fb)) { + if (brw_blorp_clear_color(intel, fb, partial_clear)) { debug_mask("blorp color", mask & BUFFER_BITS_COLOR); mask &= ~BUFFER_BITS_COLOR; } diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index d61151f6506..ce1f71db9e7 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -557,6 +557,7 @@ #define GEN7_SURFACE_MCS_PITCH_MASK INTEL_MASK(11, 3) /* Surface state DW7 */ +#define GEN7_SURFACE_CLEAR_COLOR_SHIFT 28 #define GEN7_SURFACE_SCS_R_SHIFT 25 #define GEN7_SURFACE_SCS_R_MASK INTEL_MASK(27, 25) #define GEN7_SURFACE_SCS_G_SHIFT 22 @@ -1615,6 +1616,7 @@ enum brw_wm_barycentric_interp_mode { # define GEN7_PS_PUSH_CONSTANT_ENABLE (1 << 11) # define GEN7_PS_ATTRIBUTE_ENABLE (1 << 10) # define GEN7_PS_OMASK_TO_RENDER_TARGET (1 << 9) +# define GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE (1 << 8) # define GEN7_PS_DUAL_SOURCE_BLEND_ENABLE (1 << 7) # define GEN7_PS_POSOFFSET_NONE (0 << 3) # define GEN7_PS_POSOFFSET_CENTROID (2 << 3) diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp index 208c66a28f5..1b2d3099491 100644 --- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp +++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp @@ -199,11 +199,13 @@ gen7_blorp_emit_surface_state(struct brw_context *brw, is_render_target); } + surf[7] = surface->mt->fast_clear_color_value; + if (intel->is_haswell) { - surf[7] = SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) | - SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) | - SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) | - SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A); + surf[7] |= (SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) | + SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) | + SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) | + SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A)); } /* Emit relocation to surface contents */ @@ -584,6 +586,14 @@ gen7_blorp_emit_ps_config(struct brw_context *brw, dw5 |= prog_data->first_curbe_grf << GEN7_PS_DISPATCH_START_GRF_SHIFT_0; } + switch (params->fast_clear_op) { + case GEN7_FAST_CLEAR_OP_FAST_CLEAR: + dw4 |= GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE; + break; + default: + break; + } + BEGIN_BATCH(8); OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2)); OUT_BATCH(params->use_wm_prog ? prog_offset : 0); diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c index 3164f994dd4..1a4e416d777 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c @@ -614,11 +614,13 @@ gen7_update_renderbuffer_surface(struct brw_context *brw, irb->mt->mcs_mt, true /* is RT */); } + surf[7] = irb->mt->fast_clear_color_value; + if (intel->is_haswell) { - surf[7] = SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) | - SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) | - SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) | - SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A); + surf[7] |= (SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) | + SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) | + SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) | + SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A)); } drm_intel_bo_emit_reloc(brw->intel.batch.bo, diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c index a75ac81994b..ba941c099f2 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c @@ -1201,6 +1201,54 @@ intel_miptree_alloc_mcs(struct intel_context *intel, #endif } + +bool +intel_miptree_alloc_non_msrt_mcs(struct intel_context *intel, + struct intel_mipmap_tree *mt) +{ +#ifdef I915 + assert(!"MCS not supported on i915"); + return false; +#else + assert(mt->mcs_mt == NULL); + + /* The format of the MCS buffer is opaque to the driver; all that matters + * is that we get its size and pitch right. We'll pretend that the format + * is R32. Since an MCS tile covers 128 blocks horizontally, and a Y-tiled + * R32 buffer is 32 pixels across, we'll need to scale the width down by + * the block width and then a further factor of 4. Since an MCS tile + * covers 256 blocks vertically, and a Y-tiled R32 buffer is 32 rows high, + * we'll need to scale the height down by the block height and then a + * further factor of 8. + */ + const gl_format format = MESA_FORMAT_R_UINT32; + unsigned block_width_px; + unsigned block_height; + intel_get_non_msrt_mcs_alignment(intel, mt, &block_width_px, &block_height); + unsigned width_divisor = block_width_px * 4; + unsigned height_divisor = block_height * 8; + unsigned mcs_width = + ALIGN(mt->logical_width0, width_divisor) / width_divisor; + unsigned mcs_height = + ALIGN(mt->logical_height0, height_divisor) / height_divisor; + assert(mt->logical_depth0 == 1); + mt->mcs_mt = intel_miptree_create(intel, + mt->target, + format, + mt->first_level, + mt->last_level, + mcs_width, + mcs_height, + mt->logical_depth0, + true, + 0 /* num_samples */, + INTEL_MIPTREE_TILING_Y); + + return mt->mcs_mt; +#endif +} + + /** * Helper for intel_miptree_alloc_hiz() that sets * \c mt->level[level].slice[layer].has_hiz. Return true if and only if diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h index e11d0d63a27..c44c8eaf4a9 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h @@ -459,6 +459,15 @@ struct intel_mipmap_tree enum intel_mcs_state mcs_state; #endif + /** + * The SURFACE_STATE bits associated with the last fast color clear to this + * color mipmap tree, if any. + * + * This value will only ever contain ones in bits 28-31, so it is safe to + * OR into dword 7 of SURFACE_STATE. + */ + uint32_t fast_clear_color_value; + /* These are also refcounted: */ GLuint refcount; @@ -479,6 +488,10 @@ intel_get_non_msrt_mcs_alignment(struct intel_context *intel, struct intel_mipmap_tree *mt, unsigned *width_px, unsigned *height); +bool +intel_miptree_alloc_non_msrt_mcs(struct intel_context *intel, + struct intel_mipmap_tree *mt); + struct intel_mipmap_tree *intel_miptree_create(struct intel_context *intel, GLenum target, gl_format format, |