summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Berry <[email protected]>2012-04-29 21:41:42 -0700
committerPaul Berry <[email protected]>2012-05-15 15:09:23 -0700
commit19e9b24626c2b9d7abef054d57bb2a52106c545b (patch)
tree400b049b32a91ad064f94dadd38929f6a65b6768
parent506d70be21cd3469118de89297cba0c0f709c1ae (diff)
i965/gen6: Initial implementation of MSAA.
This patch enables MSAA for Gen6, by modifying intel_mipmap_tree to understand multisampled buffers, adapting the rendering pipeline setup to enable multisampled rendering, and adding multisample resolve operations to brw_blorp_blit.cpp. Some preparation work is also included for Gen7, but it is not yet enabled. MSAA support is still fairly preliminary. In particular, the following are not yet supported: - Fully general blits between MSAA and non-MSAA buffers. - Formats other than RGBA8, DEPTH24, and STENCIL8. - Centroid interpolation. - Coverage parameters (glSampleCoverage, GL_SAMPLE_ALPHA_TO_COVERAGE, GL_SAMPLE_ALPHA_TO_ONE, GL_SAMPLE_COVERAGE, GL_SAMPLE_COVERAGE_VALUE, GL_SAMPLE_COVERAGE_INVERT). Fixes piglit tests "EXT_framebuffer_multisample/accuracy" on i965/Gen6. v2: - In intel_alloc_renderbuffer_storage(), quantize the requested number of samples to the next higher sample count supported by the hardware. This ensures that a query of GL_SAMPLES will return the correct value. It also ensures that MSAA is fully disabled on Gen7 for now (since Gen7 MSAA support doesn't work yet). - When reading from a non-MSAA surface, ensure that s_is_zero is true so that we won't try to read from a nonexistent sample.
-rw-r--r--src/mesa/drivers/dri/i965/Makefile.sources1
-rw-r--r--src/mesa/drivers/dri/i965/brw_blorp.cpp10
-rw-r--r--src/mesa/drivers/dri/i965/brw_blorp.h30
-rw-r--r--src/mesa/drivers/dri/i965/brw_blorp_blit.cpp332
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h8
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h7
-rw-r--r--src/mesa/drivers/dri/i965/brw_misc_state.c33
-rw-r--r--src/mesa/drivers/dri/i965/brw_state.h4
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_upload.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_surface_state.c16
-rw-r--r--src/mesa/drivers/dri/i965/gen6_blorp.cpp44
-rw-r--r--src/mesa/drivers/dri/i965/gen6_multisample_state.c102
-rw-r--r--src/mesa/drivers/dri/i965/gen6_sf_state.c15
-rw-r--r--src/mesa/drivers/dri/i965/gen6_wm_state.c12
-rw-r--r--src/mesa/drivers/dri/i965/gen7_blorp.cpp20
-rw-r--r--src/mesa/drivers/dri/i965/gen7_sf_state.c14
-rw-r--r--src/mesa/drivers/dri/i965/gen7_wm_state.c18
-rw-r--r--src/mesa/drivers/dri/i965/gen7_wm_surface_state.c16
-rw-r--r--src/mesa/drivers/dri/intel/intel_fbo.c31
-rw-r--r--src/mesa/drivers/dri/intel/intel_mipmap_tree.c52
-rw-r--r--src/mesa/drivers/dri/intel/intel_mipmap_tree.h10
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex_image.c3
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex_validate.c3
23 files changed, 662 insertions, 121 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index 182b432ed45..3847f9187d2 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -88,6 +88,7 @@ i965_C_FILES = \
gen6_clip_state.c \
gen6_depthstencil.c \
gen6_gs_state.c \
+ gen6_multisample_state.c \
gen6_sampler_state.c \
gen6_scissor_state.c \
gen6_sf_state.c \
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.cpp b/src/mesa/drivers/dri/i965/brw_blorp.cpp
index 762d7350632..8e225117fab 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp.cpp
@@ -36,7 +36,8 @@ brw_blorp_mip_info::brw_blorp_mip_info()
}
brw_blorp_surface_info::brw_blorp_surface_info()
- : map_stencil_as_y_tiled(false)
+ : map_stencil_as_y_tiled(false),
+ num_samples(0)
{
}
@@ -60,11 +61,15 @@ brw_blorp_surface_info::set(struct intel_mipmap_tree *mt,
if (mt->format == MESA_FORMAT_S8) {
/* The miptree is a W-tiled stencil buffer. Surface states can't be set
* up for W tiling, so we'll need to use Y tiling and have the WM
- * program swizzle the coordinates.
+ * program swizzle the coordinates. Furthermore, we need to set up the
+ * surface state as single-sampled, because the memory layout of related
+ * samples doesn't match between W and Y tiling.
*/
this->map_stencil_as_y_tiled = true;
+ this->num_samples = 0;
} else {
this->map_stencil_as_y_tiled = false;
+ this->num_samples = mt->num_samples;
}
}
@@ -88,6 +93,7 @@ brw_blorp_params::brw_blorp_params()
y1(0),
depth_format(0),
hiz_op(GEN6_HIZ_OP_NONE),
+ num_samples(0),
use_wm_prog(false)
{
}
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h
index b6b659dbdae..f14a5c7aae0 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp.h
@@ -97,6 +97,8 @@ public:
* width and height of the buffer.
*/
bool map_stencil_as_y_tiled;
+
+ unsigned num_samples;
};
@@ -151,6 +153,7 @@ public:
brw_blorp_surface_info src;
brw_blorp_surface_info dst;
enum gen6_hiz_op hiz_op;
+ unsigned num_samples;
bool use_wm_prog;
brw_blorp_wm_push_constants wm_push_consts;
};
@@ -177,16 +180,39 @@ public:
struct brw_blorp_blit_prog_key
{
+ /* Number of samples per pixel that have been configured in the surface
+ * state for texturing from.
+ */
+ unsigned tex_samples;
+
+ /* Actual number of samples per pixel in the source image. */
+ unsigned src_samples;
+
+ /* Number of samples per pixel that have been configured in the render
+ * target.
+ */
+ unsigned rt_samples;
+
+ /* Actual number of samples per pixel in the destination image. */
+ unsigned dst_samples;
+
/* True if the source image is W tiled. If true, the surface state for the
- * source image must be configured as Y tiled.
+ * source image must be configured as Y tiled, and tex_samples must be 0.
*/
bool src_tiled_w;
/* True if the destination image is W tiled. If true, the surface state
- * for the render target must be configured as Y tiled.
+ * for the render target must be configured as Y tiled, and rt_samples must
+ * be 0.
*/
bool dst_tiled_w;
+ /* True if all source samples should be blended together to produce each
+ * destination pixel. If true, src_tiled_w must be false, tex_samples must
+ * equal src_samples, and tex_samples must be nonzero.
+ */
+ bool blend;
+
/* True if the rectangle being sent through the rendering pipeline might be
* larger than the destination rectangle, so the WM program should kill any
* pixels that are outside the destination rectangle.
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index cce5d1b560e..1f0c3185394 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -215,11 +215,29 @@ brw_blorp_framebuffer(struct intel_context *intel,
*
* The bulk of the work done by the WM program is to wrap and unwrap the
* coordinate transformations used by the hardware to store surfaces in
- * memory. The hardware transforms a pixel location (X, Y) to a memory offset
- * by the following formulas:
+ * memory. The hardware transforms a pixel location (X, Y, S) (where S is the
+ * sample index for a multisampled surface) to a memory offset by the
+ * following formulas:
*
- * offset = tile(tiling_format, X, Y)
- * (X, Y) = detile(tiling_format, offset)
+ * offset = tile(tiling_format, encode_msaa(num_samples, X, Y, S))
+ * (X, Y, S) = decode_msaa(num_samples, detile(tiling_format, offset))
+ *
+ * For a single-sampled surface, encode_msaa() and decode_msaa are the
+ * identity function:
+ *
+ * encode_msaa(1, X, Y, 0) = (X, Y)
+ * decode_msaa(1, X, Y) = (X, Y, 0)
+ *
+ * For a 4x multisampled surface, encode_msaa() embeds the sample number into
+ * bit 1 of the X and Y coordinates:
+ *
+ * encode_msaa(4, X, Y, S) = (X', Y')
+ * where X' = (X & ~0b1) << 1 | (S & 0b1) << 1 | (X & 0b1)
+ * Y' = (Y & ~0b1 ) << 1 | (S & 0b10) | (Y & 0b1)
+ * decode_msaa(4, X, Y) = (X', Y', S)
+ * where X' = (X & ~0b11) >> 1 | (X & 0b1)
+ * Y' = (Y & ~0b11) >> 1 | (Y & 0b1)
+ * S = (Y & 0b10) | (X & 0b10) >> 1
*
* For X tiling, tile() combines together the low-order bits of the X and Y
* coordinates in the pattern 0byyyxxxxxxxxx, creating 4k tiles that are 512
@@ -239,7 +257,7 @@ brw_blorp_framebuffer(struct intel_context *intel,
* | (A & 0b111111111)
*
* (In all tiling formulas, cpp is the number of bytes occupied by a single
- * pixel ("chars per pixel"), and tile_pitch is the number of 4k tiles
+ * sample ("chars per pixel"), and tile_pitch is the number of 4k tiles
* required to fill the width of the surface).
*
* For Y tiling, tile() combines together the low-order bits of the X and Y
@@ -301,7 +319,7 @@ brw_blorp_framebuffer(struct intel_context *intel,
* X' = A % pitch
*
* (In these formulas, pitch is the number of bytes occupied by a single row
- * of pixels).
+ * of samples).
*/
class brw_blorp_blit_program
{
@@ -319,8 +337,12 @@ private:
void alloc_push_const_regs(int base_reg);
void compute_frag_coords();
void translate_tiling(bool old_tiled_w, bool new_tiled_w);
+ void encode_msaa(unsigned num_samples);
+ void decode_msaa(unsigned num_samples);
void kill_if_outside_dst_rect();
void translate_dst_to_src();
+ void single_to_blend();
+ void sample();
void texel_fetch();
void texture_lookup(GLuint msg_type,
struct brw_reg mrf_u, struct brw_reg mrf_v);
@@ -364,6 +386,14 @@ private:
*/
int xy_coord_index;
+ /* True if, at the point in the program currently being compiled, the
+ * sample index is known to be zero.
+ */
+ bool s_is_zero;
+
+ /* Register storing the sample index when s_is_zero is false. */
+ struct brw_reg sample_index;
+
/* Temporaries */
struct brw_reg t1;
struct brw_reg t2;
@@ -395,6 +425,37 @@ const GLuint *
brw_blorp_blit_program::compile(struct brw_context *brw,
GLuint *program_size)
{
+ /* Sanity checks */
+ if (key->src_tiled_w) {
+ /* If the source image is W tiled, then tex_samples must be 0.
+ * Otherwise, after conversion between W and Y tiling, there's no
+ * guarantee that the sample index will be 0.
+ */
+ assert(key->tex_samples == 0);
+ }
+
+ if (key->dst_tiled_w) {
+ /* If the destination image is W tiled, then dst_samples must be 0.
+ * Otherwise, after conversion between W and Y tiling, there's no
+ * guarantee that all samples corresponding to a single pixel will still
+ * be together.
+ */
+ assert(key->rt_samples == 0);
+ }
+
+ if (key->blend) {
+ /* We are blending, which means we'll be using a SAMPLE message, which
+ * causes the hardware to pick up the all of the samples corresponding
+ * to this pixel and average them together. Since we'll be relying on
+ * the hardware to find all of the samples and combine them together,
+ * the surface state for the texture must be configured with the correct
+ * tiling and sample count.
+ */
+ assert(!key->src_tiled_w);
+ assert(key->tex_samples == key->src_samples);
+ assert(key->tex_samples > 0);
+ }
+
brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
alloc_regs();
@@ -405,22 +466,29 @@ brw_blorp_blit_program::compile(struct brw_context *brw,
const bool tex_tiled_w = false;
/* The address that data will be written to is determined by the
- * coordinates supplied to the WM thread and the tiling of the render
- * target, according to the formula:
+ * coordinates supplied to the WM thread and the tiling and sample count of
+ * the render target, according to the formula:
*
- * (X, Y) = detile(rt_tiling, offset)
+ * (X, Y, S) = decode_msaa(rt_samples, detile(rt_tiling, offset))
*
- * If the actual tiling of the destination surface is not the same as the
- * configuration of the render target, then these coordinates are wrong and
- * we have to adjust them to compensate for the difference.
+ * If the actual tiling and sample count of the destination surface are not
+ * the same as the configuration of the render target, then these
+ * coordinates are wrong and we have to adjust them to compensate for the
+ * difference.
*/
- if (rt_tiled_w != key->dst_tiled_w)
+ if (rt_tiled_w != key->dst_tiled_w ||
+ key->rt_samples != key->dst_samples) {
+ encode_msaa(key->rt_samples);
+ /* Now (X, Y) = detile(rt_tiling, offset) */
translate_tiling(rt_tiled_w, key->dst_tiled_w);
+ /* Now (X, Y) = detile(dst_tiling, offset) */
+ decode_msaa(key->dst_samples);
+ }
- /* Now (X, Y) = detile(dst_tiling, offset).
+ /* Now (X, Y, S) = decode_msaa(dst_samples, detile(dst_tiling, offset)).
*
- * That is: X and Y now contain the true coordinates of the data that the
- * WM thread should output.
+ * That is: X, Y and S now contain the true coordinates and sample index of
+ * the data that the WM thread should output.
*
* If we need to kill pixels that are outside the destination rectangle,
* now is the time to do it.
@@ -432,31 +500,50 @@ brw_blorp_blit_program::compile(struct brw_context *brw,
/* Next, apply a translation to obtain coordinates in the source image. */
translate_dst_to_src();
- /* X and Y are now the coordinates of the pixel in the source image that we
- * want to texture from.
- *
- * The address that we want to fetch from is
- * related to the X and Y values according to the formula:
- *
- * (X, Y) = detile(src_tiling, offset).
- *
- * If the actual tiling of the source surface is not the same as the
- * configuration of the texture, then we need to adjust the coordinates to
- * compensate for the difference.
+ /* If the source image is not multisampled, then we want to fetch sample
+ * number 0, because that's the only sample there is.
*/
- if (tex_tiled_w != key->src_tiled_w)
- translate_tiling(key->src_tiled_w, tex_tiled_w);
+ if (key->src_samples == 0)
+ s_is_zero = true;
- /* Now (X, Y) = detile(tex_tiling, offset).
- *
- * In other words: X and Y now contain values which, when passed to
- * the texturing unit, will cause data to be read from the correct
- * memory location. So we can fetch the texel now.
+ /* X, Y, and S are now the coordinates of the pixel in the source image
+ * that we want to texture from. Exception: if we are blending, then S is
+ * irrelevant, because we are going to fetch all samples.
*/
- texel_fetch();
+ if (key->blend) {
+ single_to_blend();
+ sample();
+ } else {
+ /* We aren't blending, which means we just want to fetch a single sample
+ * from the source surface. The address that we want to fetch from is
+ * related to the X, Y and S values according to the formula:
+ *
+ * (X, Y, S) = decode_msaa(src_samples, detile(src_tiling, offset)).
+ *
+ * If the actual tiling and sample count of the source surface are not
+ * the same as the configuration of the texture, then we need to adjust
+ * the coordinates to compensate for the difference.
+ */
+ if (tex_tiled_w != key->src_tiled_w ||
+ key->tex_samples != key->src_samples) {
+ encode_msaa(key->src_samples);
+ /* Now (X, Y) = detile(src_tiling, offset) */
+ translate_tiling(key->src_tiled_w, tex_tiled_w);
+ /* Now (X, Y) = detile(tex_tiling, offset) */
+ decode_msaa(key->tex_samples);
+ }
- /* Finally, write the fetched value to the render target and terminate the
- * thread.
+ /* Now (X, Y, S) = decode_msaa(tex_samples, detile(tex_tiling, offset)).
+ *
+ * In other words: X, Y, and S now contain values which, when passed to
+ * the texturing unit, will cause data to be read from the correct
+ * memory location. So we can fetch the texel now.
+ */
+ texel_fetch();
+ }
+
+ /* Finally, write the fetched (or blended) value to the render target and
+ * terminate the thread.
*/
render_target_write();
return brw_get_program(&func, program_size);
@@ -499,6 +586,8 @@ brw_blorp_blit_program::alloc_regs()
= vec16(retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW));
}
this->xy_coord_index = 0;
+ this->sample_index
+ = vec16(retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW));
this->t1 = vec16(retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW));
this->t2 = vec16(retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW));
@@ -511,11 +600,14 @@ brw_blorp_blit_program::alloc_regs()
/* In the code that follows, X and Y can be used to quickly refer to the
* active elements of x_coords and y_coords, and Xp and Yp ("X prime" and "Y
* prime") to the inactive elements.
+ *
+ * S can be used to quickly refer to sample_index.
*/
#define X x_coords[xy_coord_index]
#define Y y_coords[xy_coord_index]
#define Xp x_coords[!xy_coord_index]
#define Yp y_coords[!xy_coord_index]
+#define S sample_index
/* Quickly swap the roles of (X, Y) and (Xp, Yp). Saves us from having to do
* MOVs to transfor (Xp, Yp) to (X, Y) after a coordinate transformation.
@@ -564,6 +656,12 @@ brw_blorp_blit_program::compute_frag_coords()
* pixels n+2 and n+3 are in the bottom half of the subspan.
*/
brw_ADD(&func, Y, stride(suboffset(R1, 5), 2, 4, 0), brw_imm_v(0x11001100));
+
+ /* Since we always run the WM in a mode that causes a single fragment
+ * dispatch per pixel, it's not meaningful to compute a sample value. Just
+ * set it to 0.
+ */
+ s_is_zero = true;
}
/**
@@ -656,6 +754,86 @@ brw_blorp_blit_program::translate_tiling(bool old_tiled_w, bool new_tiled_w)
}
/**
+ * Emit code to compensate for the difference between MSAA and non-MSAA
+ * surfaces.
+ *
+ * This code modifies the X and Y coordinates according to the formula:
+ *
+ * (X', Y') = encode_msaa_4x(X, Y, S)
+ *
+ * (See brw_blorp_blit_program).
+ */
+void
+brw_blorp_blit_program::encode_msaa(unsigned num_samples)
+{
+ if (num_samples == 0) {
+ /* No translation necessary. */
+ } else {
+ /* encode_msaa_4x(X, Y, S) = (X', Y')
+ * where X' = (X & ~0b1) << 1 | (S & 0b1) << 1 | (X & 0b1)
+ * Y' = (Y & ~0b1 ) << 1 | (S & 0b10) | (Y & 0b1)
+ */
+ brw_AND(&func, t1, X, brw_imm_uw(0xfffe)); /* X & ~0b1 */
+ if (!s_is_zero) {
+ brw_AND(&func, t2, S, brw_imm_uw(1)); /* S & 0b1 */
+ brw_OR(&func, t1, t1, t2); /* (X & ~0b1) | (S & 0b1) */
+ }
+ brw_SHL(&func, t1, t1, brw_imm_uw(1)); /* (X & ~0b1) << 1
+ | (S & 0b1) << 1 */
+ brw_AND(&func, t2, X, brw_imm_uw(1)); /* X & 0b1 */
+ brw_OR(&func, Xp, t1, t2);
+ brw_AND(&func, t1, Y, brw_imm_uw(0xfffe)); /* Y & ~0b1 */
+ brw_SHL(&func, t1, t1, brw_imm_uw(1)); /* (Y & ~0b1) << 1 */
+ if (!s_is_zero) {
+ brw_AND(&func, t2, S, brw_imm_uw(2)); /* S & 0b10 */
+ brw_OR(&func, t1, t1, t2); /* (Y & ~0b1) << 1 | (S & 0b10) */
+ }
+ brw_AND(&func, t2, Y, brw_imm_uw(1));
+ brw_OR(&func, Yp, t1, t2);
+ SWAP_XY_AND_XPYP();
+ }
+}
+
+/**
+ * Emit code to compensate for the difference between MSAA and non-MSAA
+ * surfaces.
+ *
+ * This code modifies the X and Y coordinates according to the formula:
+ *
+ * (X', Y', S) = decode_msaa(num_samples, X, Y)
+ *
+ * (See brw_blorp_blit_program).
+ */
+void
+brw_blorp_blit_program::decode_msaa(unsigned num_samples)
+{
+ if (num_samples == 0) {
+ /* No translation necessary. */
+ s_is_zero = true;
+ } else {
+ /* decode_msaa_4x(X, Y) = (X', Y', S)
+ * where X' = (X & ~0b11) >> 1 | (X & 0b1)
+ * Y' = (Y & ~0b11) >> 1 | (Y & 0b1)
+ * S = (Y & 0b10) | (X & 0b10) >> 1
+ */
+ brw_AND(&func, t1, X, brw_imm_uw(0xfffc)); /* X & ~0b11 */
+ brw_SHR(&func, t1, t1, brw_imm_uw(1)); /* (X & ~0b11) >> 1 */
+ brw_AND(&func, t2, X, brw_imm_uw(1)); /* X & 0b1 */
+ brw_OR(&func, Xp, t1, t2);
+ brw_AND(&func, t1, Y, brw_imm_uw(0xfffc)); /* Y & ~0b11 */
+ brw_SHR(&func, t1, t1, brw_imm_uw(1)); /* (Y & ~0b11) >> 1 */
+ brw_AND(&func, t2, Y, brw_imm_uw(1)); /* Y & 0b1 */
+ brw_OR(&func, Yp, t1, t2);
+ brw_AND(&func, t1, Y, brw_imm_uw(2)); /* Y & 0b10 */
+ brw_AND(&func, t2, X, brw_imm_uw(2)); /* X & 0b10 */
+ brw_SHR(&func, t2, t2, brw_imm_uw(1)); /* (X & 0b10) >> 1 */
+ brw_OR(&func, S, t1, t2);
+ s_is_zero = false;
+ SWAP_XY_AND_XPYP();
+ }
+}
+
+/**
* Emit code that kills pixels whose X and Y coordinates are outside the
* boundary of the rectangle defined by the push constants (dst_x0, dst_y0,
* dst_x1, dst_y1).
@@ -694,12 +872,43 @@ brw_blorp_blit_program::translate_dst_to_src()
}
/**
+ * Emit code to transform the X and Y coordinates as needed for blending
+ * together the different samples in an MSAA texture.
+ */
+void
+brw_blorp_blit_program::single_to_blend()
+{
+ /* When looking up samples in an MSAA texture using the SAMPLE message,
+ * Gen6 requires the texture coordinates to be odd integers (so that they
+ * correspond to the center of a 2x2 block representing the four samples
+ * that maxe up a pixel). So we need to multiply our X and Y coordinates
+ * each by 2 and then add 1.
+ */
+ brw_SHL(&func, t1, X, brw_imm_w(1));
+ brw_SHL(&func, t2, Y, brw_imm_w(1));
+ brw_ADD(&func, Xp, t1, brw_imm_w(1));
+ brw_ADD(&func, Yp, t2, brw_imm_w(1));
+ SWAP_XY_AND_XPYP();
+}
+
+/**
+ * Emit code to look up a value in the texture using the SAMPLE message (which
+ * does blending of MSAA surfaces).
+ */
+void
+brw_blorp_blit_program::sample()
+{
+ texture_lookup(GEN5_SAMPLER_MESSAGE_SAMPLE, mrf_u_float, mrf_v_float);
+}
+
+/**
* Emit code to look up a value in the texture using the SAMPLE_LD message
* (which does a simple texel fetch).
*/
void
brw_blorp_blit_program::texel_fetch()
{
+ assert(s_is_zero);
texture_lookup(GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
retype(mrf_u_float, BRW_REGISTER_TYPE_UD),
retype(mrf_v_float, BRW_REGISTER_TYPE_UD));
@@ -816,6 +1025,39 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct intel_mipmap_tree *src_mt,
use_wm_prog = true;
memset(&wm_prog_key, 0, sizeof(wm_prog_key));
+ if (src_mt->num_samples > 0 && dst_mt->num_samples > 0) {
+ /* We are blitting from a multisample buffer to a multisample buffer, so
+ * we must preserve samples within a pixel. This means we have to
+ * configure the render target and texture surface states as
+ * single-sampled, so that the WM program can access each sample
+ * individually.
+ */
+ src.num_samples = dst.num_samples = 0;
+ }
+
+ /* The render path must be configured to use the same number of samples as
+ * the destination buffer.
+ */
+ num_samples = dst.num_samples;
+
+ GLenum base_format = _mesa_get_format_base_format(src_mt->format);
+ if (base_format != GL_DEPTH_COMPONENT && /* TODO: what about depth/stencil? */
+ base_format != GL_STENCIL_INDEX &&
+ src_mt->num_samples > 0 && dst_mt->num_samples == 0) {
+ /* We are downsampling a color buffer, so blend. */
+ wm_prog_key.blend = true;
+ }
+
+ /* src_samples and dst_samples are the true sample counts */
+ wm_prog_key.src_samples = src_mt->num_samples;
+ wm_prog_key.dst_samples = dst_mt->num_samples;
+
+ /* tex_samples and rt_samples are the sample counts that are set up in
+ * SURFACE_STATE.
+ */
+ wm_prog_key.tex_samples = src.num_samples;
+ wm_prog_key.rt_samples = dst.num_samples;
+
wm_prog_key.src_tiled_w = src.map_stencil_as_y_tiled;
wm_prog_key.dst_tiled_w = dst.map_stencil_as_y_tiled;
x0 = wm_push_consts.dst_x0 = dst_x0;
@@ -825,6 +1067,22 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct intel_mipmap_tree *src_mt,
wm_push_consts.x_transform.setup(src_x0, dst_x0, dst_x1, mirror_x);
wm_push_consts.y_transform.setup(src_y0, dst_y0, dst_y1, mirror_y);
+ if (dst.num_samples == 0 && dst_mt->num_samples > 0) {
+ /* We must expand the rectangle we send through the rendering pipeline,
+ * to account for the fact that we are mapping the destination region as
+ * single-sampled when it is in fact multisampled. We must also align
+ * it to a multiple of the multisampling pattern, because the
+ * differences between multisampled and single-sampled surface formats
+ * will mean that pixels are scrambled within the multisampling pattern.
+ * TODO: what if this makes the coordinates too large?
+ */
+ x0 = (x0 * 2) & ~3;
+ y0 = (y0 * 2) & ~3;
+ x1 = ALIGN(x1 * 2, 4);
+ y1 = ALIGN(y1 * 2, 4);
+ wm_prog_key.use_kill = true;
+ }
+
if (dst.map_stencil_as_y_tiled) {
/* We must modify the rectangle we send through the rendering pipeline,
* to account for the fact that we are mapping it as Y-tiled when it is
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 8ffd208ef64..a7684166949 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1099,6 +1099,14 @@ brw_blorp_framebuffer(struct intel_context *intel,
GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
GLbitfield mask, GLenum filter);
+/* gen6_multisample_state.c */
+void
+gen6_emit_3dstate_multisample(struct brw_context *brw,
+ unsigned num_samples);
+void
+gen6_emit_3dstate_sample_mask(struct brw_context *brw,
+ unsigned num_samples);
+
/*======================================================================
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 01bad5c1892..aaab5a2158f 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -456,6 +456,11 @@
/* Surface state DW4 */
#define BRW_SURFACE_MIN_LOD_SHIFT 28
#define BRW_SURFACE_MIN_LOD_MASK INTEL_MASK(31, 28)
+#define BRW_SURFACE_MULTISAMPLECOUNT_1 (0 << 4)
+#define BRW_SURFACE_MULTISAMPLECOUNT_4 (2 << 4)
+#define GEN7_SURFACE_MULTISAMPLECOUNT_1 0
+#define GEN7_SURFACE_MULTISAMPLECOUNT_4 2
+#define GEN7_SURFACE_MULTISAMPLECOUNT_8 3
/* Surface state DW5 */
#define BRW_SURFACE_X_OFFSET_SHIFT 25
@@ -1305,6 +1310,7 @@ enum brw_wm_barycentric_interp_mode {
# define GEN6_WM_MSRAST_OFF_PATTERN (1 << 1)
# define GEN6_WM_MSRAST_ON_PIXEL (2 << 1)
# define GEN6_WM_MSRAST_ON_PATTERN (3 << 1)
+# define GEN6_WM_MSDISPMODE_PERSAMPLE (0 << 0)
# define GEN6_WM_MSDISPMODE_PERPIXEL (1 << 0)
/* DW7: kernel 1 pointer */
/* DW8: kernel 2 pointer */
@@ -1388,6 +1394,7 @@ enum brw_wm_barycentric_interp_mode {
# define GEN7_WM_MSRAST_ON_PIXEL (2 << 0)
# define GEN7_WM_MSRAST_ON_PATTERN (3 << 0)
/* DW2 */
+# define GEN7_WM_MSDISPMODE_PERSAMPLE (0 << 31)
# define GEN7_WM_MSDISPMODE_PERPIXEL (1 << 31)
#define _3DSTATE_PS 0x7820 /* GEN7+ */
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 0c0389f8bdf..b00278a233d 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -782,33 +782,16 @@ static void upload_invariant_state( struct brw_context *brw )
ADVANCE_BATCH();
}
- if (intel->gen >= 6) {
+ if (intel->gen == 6) {
int i;
- int len = intel->gen >= 7 ? 4 : 3;
-
- BEGIN_BATCH(len);
- OUT_BATCH(_3DSTATE_MULTISAMPLE << 16 | (len - 2));
- OUT_BATCH(MS_PIXEL_LOCATION_CENTER |
- MS_NUMSAMPLES_1);
- OUT_BATCH(0); /* positions for 4/8-sample */
- if (intel->gen >= 7)
- OUT_BATCH(0);
- ADVANCE_BATCH();
- BEGIN_BATCH(2);
- OUT_BATCH(_3DSTATE_SAMPLE_MASK << 16 | (2 - 2));
- OUT_BATCH(1);
- ADVANCE_BATCH();
-
- if (intel->gen < 7) {
- for (i = 0; i < 4; i++) {
- BEGIN_BATCH(4);
- OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
- OUT_BATCH(i << SVB_INDEX_SHIFT);
- OUT_BATCH(0);
- OUT_BATCH(0xffffffff);
- ADVANCE_BATCH();
- }
+ for (i = 0; i < 4; i++) {
+ BEGIN_BATCH(4);
+ OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
+ OUT_BATCH(i << SVB_INDEX_SHIFT);
+ OUT_BATCH(0);
+ OUT_BATCH(0xffffffff);
+ ADVANCE_BATCH();
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 9e3736170c7..89d09637081 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -95,6 +95,7 @@ extern const struct brw_tracked_state gen6_color_calc_state;
extern const struct brw_tracked_state gen6_depth_stencil_state;
extern const struct brw_tracked_state gen6_gs_state;
extern const struct brw_tracked_state gen6_gs_binding_table;
+extern const struct brw_tracked_state gen6_multisample_state;
extern const struct brw_tracked_state gen6_renderbuffer_surfaces;
extern const struct brw_tracked_state gen6_sampler_state;
extern const struct brw_tracked_state gen6_scissor_state;
@@ -181,6 +182,7 @@ void *brw_state_batch(struct brw_context *brw,
/* brw_wm_surface_state.c */
void gen4_init_vtable_surface_functions(struct brw_context *brw);
uint32_t brw_get_surface_tiling_bits(uint32_t tiling);
+uint32_t brw_get_surface_num_multisamples(unsigned num_samples);
void brw_create_constant_surface(struct brw_context *brw,
drm_intel_bo *bo,
int width,
@@ -197,6 +199,8 @@ GLuint translate_tex_format(gl_format mesa_format,
/* gen7_wm_surface_state.c */
void gen7_set_surface_tiling(struct gen7_surface_state *surf, uint32_t tiling);
+void gen7_set_surface_num_multisamples(struct gen7_surface_state *surf,
+ unsigned num_samples);
void gen7_init_vtable_surface_functions(struct brw_context *brw);
void gen7_create_constant_surface(struct brw_context *brw,
drm_intel_bo *bo,
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index b02e1600d62..551fa6a5df1 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -153,6 +153,7 @@ static const struct brw_tracked_state *gen6_atoms[] =
&brw_samplers,
&gen6_sampler_state,
+ &gen6_multisample_state, /* TODO: is this the right spot? */
&gen6_vs_state,
&gen6_gs_state,
@@ -221,6 +222,7 @@ const struct brw_tracked_state *gen7_atoms[] =
&brw_wm_binding_table,
&gen7_samplers,
+ &gen6_multisample_state, /* TODO: is this the right spot? */
&gen7_disable_stages,
&gen7_vs_state,
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 104d475f3f9..849da852277 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -633,6 +633,17 @@ brw_get_surface_tiling_bits(uint32_t tiling)
}
}
+
+uint32_t
+brw_get_surface_num_multisamples(unsigned num_samples)
+{
+ if (num_samples > 0)
+ return BRW_SURFACE_MULTISAMPLECOUNT_4;
+ else
+ return BRW_SURFACE_MULTISAMPLECOUNT_1;
+}
+
+
static void
brw_update_texture_surface( struct gl_context *ctx, GLuint unit )
{
@@ -943,7 +954,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
intel_image->base.Base.Level,
intel_image->base.Base.Level,
width, height, depth,
- true);
+ true,
+ 0 /* num_samples */);
intel_miptree_copy_teximage(intel, intel_image, new_mt);
intel_miptree_reference(&irb->mt, intel_image->mt);
@@ -993,7 +1005,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
surf[3] = (brw_get_surface_tiling_bits(region->tiling) |
((region->pitch * region->cpp) - 1) << BRW_SURFACE_PITCH_SHIFT);
- surf[4] = 0;
+ surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
/* Note that the low bits of these fields are missing, so
diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
index 00aeda63e60..6db8f40c33b 100644
--- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
@@ -100,28 +100,8 @@ gen6_blorp_emit_batch_head(struct brw_context *brw,
ADVANCE_BATCH();
}
- /* 3DSTATE_MULTISAMPLE */
- {
- int length = intel->gen == 7 ? 4 : 3;
-
- BEGIN_BATCH(length);
- OUT_BATCH(_3DSTATE_MULTISAMPLE << 16 | (length - 2));
- OUT_BATCH(MS_PIXEL_LOCATION_CENTER |
- MS_NUMSAMPLES_1);
- OUT_BATCH(0);
- if (length >= 4)
- OUT_BATCH(0);
- ADVANCE_BATCH();
-
- }
-
- /* 3DSTATE_SAMPLE_MASK */
- {
- BEGIN_BATCH(2);
- OUT_BATCH(_3DSTATE_SAMPLE_MASK << 16 | (2 - 2));
- OUT_BATCH(1);
- ADVANCE_BATCH();
- }
+ gen6_emit_3dstate_multisample(brw, params->num_samples);
+ gen6_emit_3dstate_sample_mask(brw, params->num_samples);
/* CMD_STATE_BASE_ADDRESS
*
@@ -426,6 +406,10 @@ gen6_blorp_emit_surface_state(struct brw_context *brw,
uint32_t wm_surf_offset;
uint32_t width, height;
surface->get_miplevel_dims(&width, &height);
+ if (surface->num_samples > 0) { /* TODO: seems clumsy */
+ width /= 2;
+ height /= 2;
+ }
if (surface->map_stencil_as_y_tiled) {
width *= 2;
height /= 2;
@@ -462,7 +446,7 @@ gen6_blorp_emit_surface_state(struct brw_context *brw,
0 << BRW_SURFACE_DEPTH_SHIFT |
(pitch_bytes - 1) << BRW_SURFACE_PITCH_SHIFT);
- surf[4] = 0;
+ surf[4] = brw_get_surface_num_multisamples(surface->num_samples);
surf[5] = (0 << BRW_SURFACE_X_OFFSET_SHIFT |
0 << BRW_SURFACE_Y_OFFSET_SHIFT |
@@ -695,7 +679,9 @@ gen6_blorp_emit_sf_config(struct brw_context *brw,
OUT_BATCH((1 - 1) << GEN6_SF_NUM_OUTPUTS_SHIFT | /* only position */
1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
0 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT);
- for (int i = 0; i < 18; ++i)
+ OUT_BATCH(0); /* dw2 */
+ OUT_BATCH(params->num_samples > 0 ? GEN6_SF_MSRAST_ON_PATTERN : 0);
+ for (int i = 0; i < 16; ++i)
OUT_BATCH(0);
ADVANCE_BATCH();
}
@@ -754,6 +740,14 @@ gen6_blorp_emit_wm_config(struct brw_context *brw,
dw5 |= GEN6_WM_DISPATCH_ENABLE; /* We are rendering */
}
+ if (params->num_samples > 0) {
+ dw6 |= GEN6_WM_MSRAST_ON_PATTERN;
+ dw6 |= GEN6_WM_MSDISPMODE_PERPIXEL;
+ } else {
+ dw6 |= GEN6_WM_MSRAST_OFF_PIXEL;
+ dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE;
+ }
+
BEGIN_BATCH(9);
OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2));
OUT_BATCH(params->use_wm_prog ? prog_offset : 0);
@@ -761,7 +755,7 @@ gen6_blorp_emit_wm_config(struct brw_context *brw,
OUT_BATCH(0); /* No scratch needed */
OUT_BATCH(dw4);
OUT_BATCH(dw5);
- OUT_BATCH(dw6); /* only position */
+ OUT_BATCH(dw6);
OUT_BATCH(0); /* No other programs */
OUT_BATCH(0); /* No other programs */
ADVANCE_BATCH();
diff --git a/src/mesa/drivers/dri/i965/gen6_multisample_state.c b/src/mesa/drivers/dri/i965/gen6_multisample_state.c
new file mode 100644
index 00000000000..e01ead10522
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen6_multisample_state.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "intel_batchbuffer.h"
+
+#include "brw_context.h"
+#include "brw_defines.h"
+
+
+/**
+ * 3DSTATE_MULTISAMPLE
+ */
+void
+gen6_emit_3dstate_multisample(struct brw_context *brw,
+ unsigned num_samples)
+{
+ struct intel_context *intel = &brw->intel;
+
+ /* TODO: MSAA only implemented on Gen6 */
+ if (intel->gen != 6) {
+ assert(num_samples == 0);
+ }
+
+ int len = intel->gen >= 7 ? 4 : 3;
+ BEGIN_BATCH(len);
+ OUT_BATCH(_3DSTATE_MULTISAMPLE << 16 | (len - 2));
+ OUT_BATCH(MS_PIXEL_LOCATION_CENTER |
+ (num_samples > 0 ? MS_NUMSAMPLES_4 : MS_NUMSAMPLES_1));
+ OUT_BATCH(num_samples > 0 ? 0xae2ae662 : 0); /* positions for 4/8-sample */
+ if (intel->gen >= 7)
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+}
+
+
+/**
+ * 3DSTATE_SAMPLE_MASK
+ */
+void
+gen6_emit_3dstate_sample_mask(struct brw_context *brw,
+ unsigned num_samples)
+{
+ struct intel_context *intel = &brw->intel;
+
+ /* TODO: MSAA only implemented on Gen6 */
+ if (intel->gen != 6) {
+ assert(num_samples == 0);
+ }
+
+ BEGIN_BATCH(2);
+ OUT_BATCH(_3DSTATE_SAMPLE_MASK << 16 | (2 - 2));
+ OUT_BATCH(num_samples > 0 ? 15 : 1);
+ ADVANCE_BATCH();
+}
+
+
+static void upload_multisample_state(struct brw_context *brw)
+{
+ struct intel_context *intel = &brw->intel;
+ struct gl_context *ctx = &intel->ctx;
+ unsigned num_samples = 0;
+
+ /* _NEW_BUFFERS */
+ if (ctx->DrawBuffer->_ColorDrawBuffers[0])
+ num_samples = ctx->DrawBuffer->_ColorDrawBuffers[0]->NumSamples;
+
+ /* 3DSTATE_MULTISAMPLE is nonpipelined. */
+ intel_emit_post_sync_nonzero_flush(intel);
+
+ gen6_emit_3dstate_multisample(brw, num_samples);
+ gen6_emit_3dstate_sample_mask(brw, num_samples);
+}
+
+
+const struct brw_tracked_state gen6_multisample_state = {
+ .dirty = {
+ .mesa = _NEW_BUFFERS,
+ .brw = BRW_NEW_CONTEXT,
+ .cache = 0
+ },
+ .emit = upload_multisample_state
+};
diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c
index 5c4293ca91e..e0aaa9074f1 100644
--- a/src/mesa/drivers/dri/i965/gen6_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c
@@ -122,6 +122,10 @@ upload_sf_state(struct brw_context *brw)
int i;
/* _NEW_BUFFER */
bool render_to_fbo = _mesa_is_user_fbo(brw->intel.ctx.DrawBuffer);
+ bool multisampled = false;
+ if (ctx->DrawBuffer->_ColorDrawBuffers[0])
+ multisampled = ctx->DrawBuffer->_ColorDrawBuffers[0]->NumSamples > 0;
+
int attr = 0, input_index = 0;
int urb_entry_read_offset = 1;
float point_size;
@@ -226,13 +230,20 @@ upload_sf_state(struct brw_context *brw)
}
/* _NEW_LINE */
- dw3 |= U_FIXED(CLAMP(ctx->Line.Width, 0.0, 7.99), 7) <<
- GEN6_SF_LINE_WIDTH_SHIFT;
+ {
+ uint32_t line_width_u3_7 = U_FIXED(CLAMP(ctx->Line.Width, 0.0, 7.99), 7);
+ /* TODO: line width of 0 is not allowed when MSAA enabled */
+ if (line_width_u3_7 == 0)
+ line_width_u3_7 = 1;
+ dw3 |= line_width_u3_7 << GEN6_SF_LINE_WIDTH_SHIFT;
+ }
if (ctx->Line.SmoothFlag) {
dw3 |= GEN6_SF_LINE_AA_ENABLE;
dw3 |= GEN6_SF_LINE_AA_MODE_TRUE;
dw3 |= GEN6_SF_LINE_END_CAP_WIDTH_1_0;
}
+ if (multisampled)
+ dw3 |= GEN6_SF_MSRAST_ON_PATTERN;
/* _NEW_PROGRAM | _NEW_POINT */
if (!(ctx->VertexProgram.PointSizeEnabled ||
diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index fd1eca45049..28b3c2989c3 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -98,6 +98,11 @@ upload_wm_state(struct brw_context *brw)
const struct brw_fragment_program *fp =
brw_fragment_program_const(brw->fragment_program);
uint32_t dw2, dw4, dw5, dw6;
+ bool multisampled = false;
+
+ /* _NEW_BUFFERS */
+ if (ctx->DrawBuffer->_ColorDrawBuffers[0])
+ multisampled = ctx->DrawBuffer->_ColorDrawBuffers[0]->NumSamples > 0;
/* CACHE_NEW_WM_PROG */
if (brw->wm.prog_data->nr_params == 0) {
@@ -185,6 +190,13 @@ upload_wm_state(struct brw_context *brw)
dw6 |= _mesa_bitcount_64(brw->fragment_program->Base.InputsRead) <<
GEN6_WM_NUM_SF_OUTPUTS_SHIFT;
+ if (multisampled) {
+ dw6 |= GEN6_WM_MSRAST_ON_PATTERN;
+ dw6 |= GEN6_WM_MSDISPMODE_PERPIXEL;
+ } else {
+ dw6 |= GEN6_WM_MSRAST_OFF_PIXEL;
+ dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE;
+ }
BEGIN_BATCH(9);
OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2));
diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
index f10d0aab2f8..fbb94dfed56 100644
--- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
@@ -143,6 +143,10 @@ gen7_blorp_emit_surface_state(struct brw_context *brw,
uint32_t wm_surf_offset;
uint32_t width, height;
surface->get_miplevel_dims(&width, &height);
+ if (surface->num_samples > 0) { /* TODO: wrong for 8x */
+ width /= 2;
+ height /= 2;
+ }
if (surface->map_stencil_as_y_tiled) {
width *= 2;
height /= 2;
@@ -181,6 +185,8 @@ gen7_blorp_emit_surface_state(struct brw_context *brw,
pitch_bytes *= 2;
surf->ss3.pitch = pitch_bytes - 1;
+ gen7_set_surface_num_multisamples(surf, surface->num_samples);
+
if (intel->is_haswell) {
surf->ss7.shader_chanel_select_r = HSW_SCS_RED;
surf->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
@@ -366,7 +372,7 @@ gen7_blorp_emit_sf_config(struct brw_context *brw,
OUT_BATCH(_3DSTATE_SF << 16 | (7 - 2));
OUT_BATCH(params->depth_format <<
GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT);
- OUT_BATCH(0);
+ OUT_BATCH(params->num_samples > 0 ? GEN6_SF_MSRAST_ON_PATTERN : 0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
@@ -397,7 +403,7 @@ gen7_blorp_emit_wm_config(struct brw_context *brw,
{
struct intel_context *intel = &brw->intel;
- uint32_t dw1 = 0;
+ uint32_t dw1 = 0, dw2 = 0;
switch (params->hiz_op) {
case GEN6_HIZ_OP_DEPTH_CLEAR:
@@ -423,10 +429,18 @@ gen7_blorp_emit_wm_config(struct brw_context *brw,
dw1 |= GEN7_WM_DISPATCH_ENABLE; /* We are rendering */
}
+ if (params->num_samples > 0) {
+ dw1 |= GEN7_WM_MSRAST_ON_PATTERN;
+ dw2 |= GEN7_WM_MSDISPMODE_PERPIXEL;
+ } else {
+ dw1 |= GEN7_WM_MSRAST_OFF_PIXEL;
+ dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE;
+ }
+
BEGIN_BATCH(3);
OUT_BATCH(_3DSTATE_WM << 16 | (3 - 2));
OUT_BATCH(dw1);
- OUT_BATCH(0);
+ OUT_BATCH(dw2);
ADVANCE_BATCH();
}
diff --git a/src/mesa/drivers/dri/i965/gen7_sf_state.c b/src/mesa/drivers/dri/i965/gen7_sf_state.c
index 5c6fcedcedf..8a6c09bf8ac 100644
--- a/src/mesa/drivers/dri/i965/gen7_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c
@@ -161,6 +161,9 @@ upload_sf_state(struct brw_context *brw)
float point_size;
/* _NEW_BUFFERS */
bool render_to_fbo = _mesa_is_user_fbo(brw->intel.ctx.DrawBuffer);
+ bool multisampled = false;
+ if (ctx->DrawBuffer->_ColorDrawBuffers[0])
+ multisampled = ctx->DrawBuffer->_ColorDrawBuffers[0]->NumSamples > 0;
dw1 = GEN6_SF_STATISTICS_ENABLE |
GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
@@ -243,8 +246,13 @@ upload_sf_state(struct brw_context *brw)
dw2 |= GEN6_SF_SCISSOR_ENABLE;
/* _NEW_LINE */
- dw2 |= U_FIXED(CLAMP(ctx->Line.Width, 0.0, 7.99), 7) <<
- GEN6_SF_LINE_WIDTH_SHIFT;
+ {
+ uint32_t line_width_u3_7 = U_FIXED(CLAMP(ctx->Line.Width, 0.0, 7.99), 7);
+ /* TODO: line width of 0 is not allowed when MSAA enabled */
+ if (line_width_u3_7 == 0)
+ line_width_u3_7 = 1;
+ dw2 |= line_width_u3_7 << GEN6_SF_LINE_WIDTH_SHIFT;
+ }
if (ctx->Line.SmoothFlag) {
dw2 |= GEN6_SF_LINE_AA_ENABLE;
dw2 |= GEN6_SF_LINE_AA_MODE_TRUE;
@@ -253,6 +261,8 @@ upload_sf_state(struct brw_context *brw)
if (ctx->Line.StippleFlag && intel->is_haswell) {
dw2 |= HSW_SF_LINE_STIPPLE_ENABLE;
}
+ if (multisampled)
+ dw2 |= GEN6_SF_MSRAST_ON_PATTERN;
/* FINISHME: Last Pixel Enable? Vertex Sub Pixel Precision Select?
* FINISHME: AA Line Distance Mode?
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c
index 024c855ab12..2a0462f3fc9 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
@@ -39,9 +39,14 @@ upload_wm_state(struct brw_context *brw)
const struct brw_fragment_program *fp =
brw_fragment_program_const(brw->fragment_program);
bool writes_depth = false;
- uint32_t dw1;
+ bool multisampled = false;
+ uint32_t dw1, dw2;
- dw1 = 0;
+ /* _NEW_BUFFERS */
+ if (ctx->DrawBuffer->_ColorDrawBuffers[0])
+ multisampled = ctx->DrawBuffer->_ColorDrawBuffers[0]->NumSamples > 0;
+
+ dw1 = dw2 = 0;
dw1 |= GEN7_WM_STATISTICS_ENABLE;
dw1 |= GEN7_WM_LINE_AA_WIDTH_1_0;
dw1 |= GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5;
@@ -74,11 +79,18 @@ upload_wm_state(struct brw_context *brw)
dw1 & GEN7_WM_KILL_ENABLE) {
dw1 |= GEN7_WM_DISPATCH_ENABLE;
}
+ if (multisampled) {
+ dw1 |= GEN7_WM_MSRAST_ON_PATTERN;
+ dw2 |= GEN7_WM_MSDISPMODE_PERPIXEL;
+ } else {
+ dw1 |= GEN7_WM_MSRAST_OFF_PIXEL;
+ dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE;
+ }
BEGIN_BATCH(3);
OUT_BATCH(_3DSTATE_WM << 16 | (3 - 2));
OUT_BATCH(dw1);
- OUT_BATCH(0);
+ OUT_BATCH(dw2);
ADVANCE_BATCH();
}
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
index d84e07582a0..5aa62bddb1a 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
@@ -54,6 +54,20 @@ gen7_set_surface_tiling(struct gen7_surface_state *surf, uint32_t tiling)
}
}
+
+void
+gen7_set_surface_num_multisamples(struct gen7_surface_state *surf,
+ unsigned num_samples)
+{
+ if (num_samples > 4)
+ surf->ss4.num_multisamples = GEN7_SURFACE_MULTISAMPLECOUNT_8;
+ else if (num_samples > 0)
+ surf->ss4.num_multisamples = GEN7_SURFACE_MULTISAMPLECOUNT_4;
+ else
+ surf->ss4.num_multisamples = GEN7_SURFACE_MULTISAMPLECOUNT_1;
+}
+
+
static void
gen7_update_buffer_texture_surface(struct gl_context *ctx, GLuint unit)
{
@@ -328,6 +342,8 @@ gen7_update_renderbuffer_surface(struct brw_context *brw,
gen7_set_surface_tiling(surf, region->tiling);
surf->ss3.pitch = (region->pitch * region->cpp) - 1;
+ gen7_set_surface_num_multisamples(surf, irb->mt->num_samples);
+
if (intel->is_haswell) {
surf->ss7.shader_chanel_select_r = HSW_SCS_RED;
surf->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c
index f92d78f378a..bbd5f6652e9 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -189,6 +189,29 @@ intel_unmap_renderbuffer(struct gl_context *ctx,
/**
+ * Round up the requested multisample count to the next supported sample size.
+ */
+static unsigned
+quantize_num_samples(struct intel_context *intel, unsigned num_samples)
+{
+ switch (intel->gen) {
+ case 6:
+ /* Gen6 supports only 4x multisampling. */
+ if (num_samples > 0)
+ return 4;
+ else
+ return 0;
+ case 7:
+ /* TODO: MSAA only implemented on Gen6 */
+ return 0;
+ default:
+ /* MSAA unsupported */
+ return 0;
+ }
+}
+
+
+/**
* Called via glRenderbufferStorageEXT() to set the format and allocate
* storage for a user-created renderbuffer.
*/
@@ -199,6 +222,7 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer
{
struct intel_context *intel = intel_context(ctx);
struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+ rb->NumSamples = quantize_num_samples(intel, rb->NumSamples);
ASSERT(rb->Name != 0);
@@ -241,12 +265,13 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer
return true;
irb->mt = intel_miptree_create_for_renderbuffer(intel, rb->Format,
- width, height);
+ width, height,
+ rb->NumSamples);
if (!irb->mt)
return false;
if (intel->vtbl.is_hiz_depth_format(intel, rb->Format)) {
- bool ok = intel_miptree_alloc_hiz(intel, irb->mt);
+ bool ok = intel_miptree_alloc_hiz(intel, irb->mt, rb->NumSamples);
if (!ok) {
intel_miptree_release(&irb->mt);
return false;
@@ -495,7 +520,7 @@ intel_renderbuffer_update_wrapper(struct intel_context *intel,
if (mt->hiz_mt == NULL &&
intel->vtbl.is_hiz_depth_format(intel, rb->Format)) {
- intel_miptree_alloc_hiz(intel, mt);
+ intel_miptree_alloc_hiz(intel, mt, 0 /* num_samples */);
if (!mt->hiz_mt)
return false;
}
diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
index 91ebc8d4e4d..99f42303006 100644
--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
@@ -72,7 +72,8 @@ intel_miptree_create_internal(struct intel_context *intel,
GLuint width0,
GLuint height0,
GLuint depth0,
- bool for_region)
+ bool for_region,
+ GLuint num_samples)
{
struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
int compress_byte = 0;
@@ -92,6 +93,7 @@ intel_miptree_create_internal(struct intel_context *intel,
mt->width0 = width0;
mt->height0 = height0;
mt->cpp = compress_byte ? compress_byte : _mesa_get_format_bytes(mt->format);
+ mt->num_samples = num_samples;
mt->compressed = compress_byte ? 1 : 0;
mt->refcount = 1;
@@ -115,7 +117,8 @@ intel_miptree_create_internal(struct intel_context *intel,
mt->width0,
mt->height0,
mt->depth0,
- true);
+ true,
+ num_samples);
if (!mt->stencil_mt) {
intel_miptree_release(&mt);
return NULL;
@@ -161,7 +164,8 @@ intel_miptree_create(struct intel_context *intel,
GLuint width0,
GLuint height0,
GLuint depth0,
- bool expect_accelerated_upload)
+ bool expect_accelerated_upload,
+ GLuint num_samples)
{
struct intel_mipmap_tree *mt;
uint32_t tiling = I915_TILING_NONE;
@@ -172,7 +176,21 @@ intel_miptree_create(struct intel_context *intel,
(base_format == GL_DEPTH_COMPONENT ||
base_format == GL_DEPTH_STENCIL_EXT))
tiling = I915_TILING_Y;
- else if (width0 >= 64)
+ else if (num_samples > 0) {
+ /* From p82 of the Sandy Bridge PRM, dw3[1] of SURFACE_STATE ("Tiled
+ * Surface"):
+ *
+ * [DevSNB+]: For multi-sample render targets, this field must be
+ * 1. MSRTs can only be tiled.
+ *
+ * Our usual reason for preferring X tiling (fast blits using the
+ * blitting engine) doesn't apply to MSAA, since we'll generally be
+ * downsampling or upsampling when blitting between the MSAA buffer
+ * and another buffer, and the blitting engine doesn't support that.
+ * So use Y tiling, since it makes better use of the cache.
+ */
+ tiling = I915_TILING_Y;
+ } else if (width0 >= 64)
tiling = I915_TILING_X;
}
@@ -189,7 +207,7 @@ intel_miptree_create(struct intel_context *intel,
mt = intel_miptree_create_internal(intel, target, format,
first_level, last_level, width0,
height0, depth0,
- false);
+ false, num_samples);
/*
* pitch == 0 || height == 0 indicates the null texture
*/
@@ -225,7 +243,7 @@ intel_miptree_create_for_region(struct intel_context *intel,
mt = intel_miptree_create_internal(intel, target, format,
0, 0,
region->width, region->height, 1,
- true);
+ true, 0 /* num_samples */);
if (!mt)
return mt;
@@ -238,12 +256,24 @@ struct intel_mipmap_tree*
intel_miptree_create_for_renderbuffer(struct intel_context *intel,
gl_format format,
uint32_t width,
- uint32_t height)
+ uint32_t height,
+ uint32_t num_samples)
{
struct intel_mipmap_tree *mt;
+ /* Adjust width/height for MSAA */
+ if (num_samples > 4) {
+ num_samples = 8;
+ width *= 4;
+ height *= 2;
+ } else if (num_samples > 0) {
+ num_samples = 4;
+ width *= 2;
+ height *= 2;
+ }
+
mt = intel_miptree_create(intel, GL_TEXTURE_2D, format, 0, 0,
- width, height, 1, true);
+ width, height, 1, true, num_samples);
return mt;
}
@@ -513,7 +543,8 @@ intel_miptree_copy_teximage(struct intel_context *intel,
bool
intel_miptree_alloc_hiz(struct intel_context *intel,
- struct intel_mipmap_tree *mt)
+ struct intel_mipmap_tree *mt,
+ GLuint num_samples)
{
assert(mt->hiz_mt == NULL);
mt->hiz_mt = intel_miptree_create(intel,
@@ -524,7 +555,8 @@ intel_miptree_alloc_hiz(struct intel_context *intel,
mt->width0,
mt->height0,
mt->depth0,
- true);
+ true,
+ num_samples);
if (!mt->hiz_mt)
return false;
diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
index 0886c95f234..ca1666da2f5 100644
--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
@@ -169,6 +169,7 @@ struct intel_mipmap_tree
GLuint width0, height0, depth0; /**< Level zero image dimensions */
GLuint cpp;
+ GLuint num_samples;
bool compressed;
/* Derived from the above:
@@ -231,7 +232,8 @@ struct intel_mipmap_tree *intel_miptree_create(struct intel_context *intel,
GLuint width0,
GLuint height0,
GLuint depth0,
- bool expect_accelerated_upload);
+ bool expect_accelerated_upload,
+ GLuint num_samples);
struct intel_mipmap_tree *
intel_miptree_create_for_region(struct intel_context *intel,
@@ -250,7 +252,8 @@ struct intel_mipmap_tree*
intel_miptree_create_for_renderbuffer(struct intel_context *intel,
gl_format format,
uint32_t width,
- uint32_t height);
+ uint32_t height,
+ uint32_t num_samples);
/** \brief Assert that the level and layer are valid for the miptree. */
static inline void
@@ -341,7 +344,8 @@ intel_miptree_s8z24_gather(struct intel_context *intel,
bool
intel_miptree_alloc_hiz(struct intel_context *intel,
- struct intel_mipmap_tree *mt);
+ struct intel_mipmap_tree *mt,
+ GLuint num_samples);
void
intel_miptree_slice_set_needs_hiz_resolve(struct intel_mipmap_tree *mt,
diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c
index 651095a27de..68f4ff4c9ed 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_image.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_image.c
@@ -99,7 +99,8 @@ intel_miptree_create_for_teximage(struct intel_context *intel,
width,
height,
depth,
- expect_accelerated_upload);
+ expect_accelerated_upload,
+ 0 /* num_samples */);
}
/* There are actually quite a few combinations this will work for,
diff --git a/src/mesa/drivers/dri/intel/intel_tex_validate.c b/src/mesa/drivers/dri/intel/intel_tex_validate.c
index 256c21ec826..cadba29ff41 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_validate.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_validate.c
@@ -86,7 +86,8 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit)
width,
height,
depth,
- true);
+ true,
+ 0 /* num_samples */);
if (!intelObj->mt)
return false;
}