aboutsummaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965/gen6_blorp.cpp
diff options
context:
space:
mode:
authorPaul Berry <[email protected]>2012-04-29 21:50:22 -0700
committerPaul Berry <[email protected]>2012-05-10 10:30:01 -0700
commitea1274c9a6ab20a4a05371cf392953eb1d1c0f8b (patch)
tree1ac723263a1ca81a0b6cae0e3a6f7f39c2e5a3d9 /src/mesa/drivers/dri/i965/gen6_blorp.cpp
parent20b153b9ca5e57aa0e20e992347ece56388e71e4 (diff)
i965/hiz: Convert gen{6,7}_hiz.c to C++
This patch converts the files gen6_hiz.c and gen7_hiz.c to C++, in preparation for expanding the HiZ code to support arbitrary blits. The new files are called gen6_blorp.cpp and gen7_blorp.cpp to reflect the expanded role that this code will serve--"blorp" stands for "BLit Or Resolve Pass". Reviewed-by: Chad Versace <[email protected]>
Diffstat (limited to 'src/mesa/drivers/dri/i965/gen6_blorp.cpp')
-rw-r--r--src/mesa/drivers/dri/i965/gen6_blorp.cpp662
1 files changed, 662 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
new file mode 100644
index 00000000000..536bd7ed626
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
@@ -0,0 +1,662 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <assert.h>
+
+#include "intel_batchbuffer.h"
+#include "intel_fbo.h"
+#include "intel_mipmap_tree.h"
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_state.h"
+
+#include "gen6_hiz.h"
+
+/**
+ * \name Constants for HiZ VBO
+ * \{
+ *
+ * \see brw_context::hiz::vertex_bo
+ */
+#define GEN6_HIZ_NUM_VERTICES 3
+#define GEN6_HIZ_NUM_VUE_ELEMS 8
+#define GEN6_HIZ_VBO_SIZE (GEN6_HIZ_NUM_VERTICES \
+ * GEN6_HIZ_NUM_VUE_ELEMS \
+ * sizeof(float))
+/** \} */
+
+void
+gen6_hiz_emit_batch_head(struct brw_context *brw)
+{
+ struct gl_context *ctx = &brw->intel.ctx;
+ struct intel_context *intel = &brw->intel;
+
+ /* To ensure that the batch contains only the resolve, flush the batch
+ * before beginning and after finishing emitting the resolve packets.
+ *
+ * Ideally, we would not need to flush for the resolve op. But, I suspect
+ * that it's unsafe for CMD_PIPELINE_SELECT to occur multiple times in
+ * a single batch, and there is no safe way to ensure that other than by
+ * fencing the resolve with flushes. Ideally, we would just detect if
+ * a batch is in progress and do the right thing, but that would require
+ * the ability to *safely* access brw_context::state::dirty::brw
+ * outside of the brw_upload_state() codepath.
+ */
+ intel_flush(ctx);
+
+ /* CMD_PIPELINE_SELECT
+ *
+ * Select the 3D pipeline, as opposed to the media pipeline.
+ */
+ {
+ BEGIN_BATCH(1);
+ OUT_BATCH(brw->CMD_PIPELINE_SELECT << 16);
+ ADVANCE_BATCH();
+ }
+
+ /* 3DSTATE_MULTISAMPLE */
+ {
+ int length = intel->gen == 7 ? 4 : 3;
+
+ BEGIN_BATCH(length);
+ OUT_BATCH(_3DSTATE_MULTISAMPLE << 16 | (length - 2));
+ OUT_BATCH(MS_PIXEL_LOCATION_CENTER |
+ MS_NUMSAMPLES_1);
+ OUT_BATCH(0);
+ if (length >= 4)
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+
+ }
+
+ /* 3DSTATE_SAMPLE_MASK */
+ {
+ BEGIN_BATCH(2);
+ OUT_BATCH(_3DSTATE_SAMPLE_MASK << 16 | (2 - 2));
+ OUT_BATCH(1);
+ ADVANCE_BATCH();
+ }
+
+ /* CMD_STATE_BASE_ADDRESS
+ *
+ * From the Sandy Bridge PRM, Volume 1, Part 1, Table STATE_BASE_ADDRESS:
+ * The following commands must be reissued following any change to the
+ * base addresses:
+ * 3DSTATE_CC_POINTERS
+ * 3DSTATE_BINDING_TABLE_POINTERS
+ * 3DSTATE_SAMPLER_STATE_POINTERS
+ * 3DSTATE_VIEWPORT_STATE_POINTERS
+ * MEDIA_STATE_POINTERS
+ */
+ {
+ BEGIN_BATCH(10);
+ OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
+ OUT_BATCH(1); /* GeneralStateBaseAddressModifyEnable */
+ /* SurfaceStateBaseAddress */
+ OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1);
+ /* DynamicStateBaseAddress */
+ OUT_RELOC(intel->batch.bo, (I915_GEM_DOMAIN_RENDER |
+ I915_GEM_DOMAIN_INSTRUCTION), 0, 1);
+ OUT_BATCH(1); /* IndirectObjectBaseAddress */
+ OUT_BATCH(1); /* InstructionBaseAddress */
+ OUT_BATCH(1); /* GeneralStateUpperBound */
+ OUT_BATCH(1); /* DynamicStateUpperBound */
+ OUT_BATCH(1); /* IndirectObjectUpperBound*/
+ OUT_BATCH(1); /* InstructionAccessUpperBound */
+ ADVANCE_BATCH();
+ }
+}
+
+void
+gen6_hiz_emit_vertices(struct brw_context *brw,
+ struct intel_mipmap_tree *mt,
+ unsigned int level,
+ unsigned int layer)
+{
+ struct intel_context *intel = &brw->intel;
+ uint32_t vertex_offset;
+
+ /* Setup VBO for the rectangle primitive..
+ *
+ * A rectangle primitive (3DPRIM_RECTLIST) consists of only three
+ * vertices. The vertices reside in screen space with DirectX coordinates
+ * (that is, (0, 0) is the upper left corner).
+ *
+ * v2 ------ implied
+ * | |
+ * | |
+ * v0 ----- v1
+ *
+ * Since the VS is disabled, the clipper loads each VUE directly from
+ * the URB. This is controlled by the 3DSTATE_VERTEX_BUFFERS and
+ * 3DSTATE_VERTEX_ELEMENTS packets below. The VUE contents are as follows:
+ * dw0: Reserved, MBZ.
+ * dw1: Render Target Array Index. The HiZ op does not use indexed
+ * vertices, so set the dword to 0.
+ * dw2: Viewport Index. The HiZ op disables viewport mapping and
+ * scissoring, so set the dword to 0.
+ * dw3: Point Width: The HiZ op does not emit the POINTLIST primitive, so
+ * set the dword to 0.
+ * dw4: Vertex Position X.
+ * dw5: Vertex Position Y.
+ * dw6: Vertex Position Z.
+ * dw7: Vertex Position W.
+ *
+ * For details, see the Sandybridge PRM, Volume 2, Part 1, Section 1.5.1
+ * "Vertex URB Entry (VUE) Formats".
+ */
+ {
+ const int width = mt->level[level].width;
+ const int height = mt->level[level].height;
+ float *vertex_data;
+
+ const float vertices[GEN6_HIZ_VBO_SIZE] = {
+ /* v0 */ 0, 0, 0, 0, 0, height, 0, 1,
+ /* v1 */ 0, 0, 0, 0, width, height, 0, 1,
+ /* v2 */ 0, 0, 0, 0, 0, 0, 0, 1,
+ };
+
+ vertex_data = (float *) brw_state_batch(brw, AUB_TRACE_NO_TYPE,
+ GEN6_HIZ_VBO_SIZE, 32,
+ &vertex_offset);
+ memcpy(vertex_data, vertices, GEN6_HIZ_VBO_SIZE);
+ }
+
+ /* 3DSTATE_VERTEX_BUFFERS */
+ {
+ const int num_buffers = 1;
+ const int batch_length = 1 + 4 * num_buffers;
+
+ uint32_t dw0 = GEN6_VB0_ACCESS_VERTEXDATA |
+ (GEN6_HIZ_NUM_VUE_ELEMS * sizeof(float)) << BRW_VB0_PITCH_SHIFT;
+
+ if (intel->gen >= 7)
+ dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE;
+
+ BEGIN_BATCH(batch_length);
+ OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (batch_length - 2));
+ OUT_BATCH(dw0);
+ /* start address */
+ OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_VERTEX, 0,
+ vertex_offset);
+ /* end address */
+ OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_VERTEX, 0,
+ vertex_offset + GEN6_HIZ_VBO_SIZE - 1);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+ }
+
+ /* 3DSTATE_VERTEX_ELEMENTS
+ *
+ * Fetch dwords 0 - 7 from each VUE. See the comments above where
+ * hiz->vertex_bo is filled with data.
+ */
+ {
+ const int num_elements = 2;
+ const int batch_length = 1 + 2 * num_elements;
+
+ BEGIN_BATCH(batch_length);
+ OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (batch_length - 2));
+ /* Element 0 */
+ OUT_BATCH(GEN6_VE0_VALID |
+ BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT |
+ 0 << BRW_VE0_SRC_OFFSET_SHIFT);
+ OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT |
+ BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT |
+ BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_2_SHIFT |
+ BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_3_SHIFT);
+ /* Element 1 */
+ OUT_BATCH(GEN6_VE0_VALID |
+ BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT |
+ 16 << BRW_VE0_SRC_OFFSET_SHIFT);
+ OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT |
+ BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT |
+ BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_2_SHIFT |
+ BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_3_SHIFT);
+ ADVANCE_BATCH();
+ }
+}
+
+/**
+ * \brief Execute a HiZ op on a miptree slice.
+ *
+ * To execute the HiZ op, this function manually constructs and emits a batch
+ * to "draw" the HiZ op's rectangle primitive. The batchbuffer is flushed
+ * before constructing and after emitting the batch.
+ *
+ * This function alters no GL state.
+ *
+ * For an overview of HiZ ops, see the following sections of the Sandy Bridge
+ * PRM, Volume 1, Part 2:
+ * - 7.5.3.1 Depth Buffer Clear
+ * - 7.5.3.2 Depth Buffer Resolve
+ * - 7.5.3.3 Hierarchical Depth Buffer Resolve
+ */
+static void
+gen6_hiz_exec(struct intel_context *intel,
+ struct intel_mipmap_tree *mt,
+ unsigned int level,
+ unsigned int layer,
+ enum gen6_hiz_op op)
+{
+ struct gl_context *ctx = &intel->ctx;
+ struct brw_context *brw = brw_context(ctx);
+ uint32_t draw_x, draw_y;
+ uint32_t tile_mask_x, tile_mask_y;
+
+ assert(op != GEN6_HIZ_OP_DEPTH_CLEAR); /* Not implemented yet. */
+ assert(mt->hiz_mt != NULL);
+ intel_miptree_check_level_layer(mt, level, layer);
+
+ {
+ /* Construct a dummy renderbuffer just to extract tile offsets. */
+ struct intel_renderbuffer rb;
+ rb.mt = mt;
+ rb.mt_level = level;
+ rb.mt_layer = layer;
+ intel_renderbuffer_set_draw_offset(&rb);
+ draw_x = rb.draw_x;
+ draw_y = rb.draw_y;
+ }
+
+ /* Compute masks to determine how much of draw_x and draw_y should be
+ * performed using the fine adjustment of "depth coordinate offset X/Y"
+ * (dw5 of 3DSTATE_DEPTH_BUFFER). See the emit_depthbuffer() function for
+ * details.
+ */
+ {
+ uint32_t depth_mask_x, depth_mask_y, hiz_mask_x, hiz_mask_y;
+ intel_region_get_tile_masks(mt->region, &depth_mask_x, &depth_mask_y);
+ intel_region_get_tile_masks(mt->hiz_mt->region,
+ &hiz_mask_x, &hiz_mask_y);
+
+ /* Each HiZ row represents 2 rows of pixels */
+ hiz_mask_y = hiz_mask_y << 1 | 1;
+
+ tile_mask_x = depth_mask_x | hiz_mask_x;
+ tile_mask_y = depth_mask_y | hiz_mask_y;
+ }
+
+ gen6_hiz_emit_batch_head(brw);
+ gen6_hiz_emit_vertices(brw, mt, level, layer);
+
+ /* 3DSTATE_URB
+ *
+ * Assign the entire URB to the VS. Even though the VS disabled, URB space
+ * is still needed because the clipper loads the VUE's from the URB. From
+ * the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE,
+ * Dword 1.15:0 "VS Number of URB Entries":
+ * This field is always used (even if VS Function Enable is DISABLED).
+ *
+ * The warning below appears in the PRM (Section 3DSTATE_URB), but we can
+ * safely ignore it because this batch contains only one draw call.
+ * Because of URB corruption caused by allocating a previous GS unit
+ * URB entry to the VS unit, software is required to send a “GS NULL
+ * Fence” (Send URB fence with VS URB size == 1 and GS URB size == 0)
+ * plus a dummy DRAW call before any case where VS will be taking over
+ * GS URB space.
+ */
+ {
+ BEGIN_BATCH(3);
+ OUT_BATCH(_3DSTATE_URB << 16 | (3 - 2));
+ OUT_BATCH(brw->urb.max_vs_entries << GEN6_URB_VS_ENTRIES_SHIFT);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+ }
+
+ /* 3DSTATE_CC_STATE_POINTERS
+ *
+ * The pointer offsets are relative to
+ * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress.
+ *
+ * The HiZ op doesn't use BLEND_STATE or COLOR_CALC_STATE.
+ */
+ {
+ uint32_t depthstencil_offset;
+ gen6_hiz_emit_depth_stencil_state(brw, op, &depthstencil_offset);
+
+ BEGIN_BATCH(4);
+ OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (4 - 2));
+ OUT_BATCH(1); /* BLEND_STATE offset */
+ OUT_BATCH(depthstencil_offset | 1); /* DEPTH_STENCIL_STATE offset */
+ OUT_BATCH(1); /* COLOR_CALC_STATE offset */
+ ADVANCE_BATCH();
+ }
+
+ /* 3DSTATE_VS
+ *
+ * Disable vertex shader.
+ */
+ {
+ /* From the BSpec, Volume 2a, Part 3 "Vertex Shader", Section
+ * 3DSTATE_VS, Dword 5.0 "VS Function Enable":
+ * [DevSNB] A pipeline flush must be programmed prior to a 3DSTATE_VS
+ * command that causes the VS Function Enable to toggle. Pipeline
+ * flush can be executed by sending a PIPE_CONTROL command with CS
+ * stall bit set and a post sync operation.
+ */
+ intel_emit_post_sync_nonzero_flush(intel);
+
+ BEGIN_BATCH(6);
+ OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+ }
+
+ /* 3DSTATE_GS
+ *
+ * Disable the geometry shader.
+ */
+ {
+ BEGIN_BATCH(7);
+ OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+ }
+
+ /* 3DSTATE_CLIP
+ *
+ * Disable the clipper.
+ *
+ * The HiZ op emits a rectangle primitive, which requires clipping to
+ * be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1
+ * Section 1.3 "3D Primitives Overview":
+ * RECTLIST:
+ * Either the CLIP unit should be DISABLED, or the CLIP unit's Clip
+ * Mode should be set to a value other than CLIPMODE_NORMAL.
+ *
+ * Also disable perspective divide. This doesn't change the clipper's
+ * output, but does spare a few electrons.
+ */
+ {
+ BEGIN_BATCH(4);
+ OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+ }
+
+ /* 3DSTATE_SF
+ *
+ * Disable ViewportTransformEnable (dw2.1)
+ *
+ * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D
+ * Primitives Overview":
+ * RECTLIST: Viewport Mapping must be DISABLED (as is typical with the
+ * use of screen- space coordinates).
+ *
+ * A solid rectangle must be rendered, so set FrontFaceFillMode (dw2.4:3)
+ * and BackFaceFillMode (dw2.5:6) to SOLID(0).
+ *
+ * From the Sandy Bridge PRM, Volume 2, Part 1, Section
+ * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode:
+ * SOLID: Any triangle or rectangle object found to be front-facing
+ * is rendered as a solid object. This setting is required when
+ * (rendering rectangle (RECTLIST) objects.
+ */
+ {
+ BEGIN_BATCH(20);
+ OUT_BATCH(_3DSTATE_SF << 16 | (20 - 2));
+ OUT_BATCH((1 - 1) << GEN6_SF_NUM_OUTPUTS_SHIFT | /* only position */
+ 1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
+ 0 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT);
+ for (int i = 0; i < 18; ++i)
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+ }
+
+ /* 3DSTATE_WM
+ *
+ * Disable thread dispatch (dw5.19) and enable the HiZ op.
+ *
+ * Even though thread dispatch is disabled, max threads (dw5.25:31) must be
+ * nonzero to prevent the GPU from hanging. See the valid ranges in the
+ * BSpec, Volume 2a.11 Windower, Section 3DSTATE_WM, Dword 5.25:31
+ * "Maximum Number Of Threads".
+ */
+ {
+ uint32_t dw4 = 0;
+
+ switch (op) {
+ case GEN6_HIZ_OP_DEPTH_CLEAR:
+ assert(!"not implemented");
+ dw4 |= GEN6_WM_DEPTH_CLEAR;
+ break;
+ case GEN6_HIZ_OP_DEPTH_RESOLVE:
+ dw4 |= GEN6_WM_DEPTH_RESOLVE;
+ break;
+ case GEN6_HIZ_OP_HIZ_RESOLVE:
+ dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ BEGIN_BATCH(9);
+ OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(dw4);
+ OUT_BATCH((brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT);
+ OUT_BATCH((1 - 1) << GEN6_WM_NUM_SF_OUTPUTS_SHIFT); /* only position */
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+ }
+
+ /* 3DSTATE_DEPTH_BUFFER */
+ {
+ uint32_t width = mt->level[level].width;
+ uint32_t height = mt->level[level].height;
+
+ uint32_t tile_x = draw_x & tile_mask_x;
+ uint32_t tile_y = draw_y & tile_mask_y;
+ uint32_t offset = intel_region_get_aligned_offset(mt->region,
+ draw_x & ~tile_mask_x,
+ draw_y & ~tile_mask_y);
+
+ /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
+ * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
+ * Coordinate Offset X/Y":
+ *
+ * "The 3 LSBs of both offsets must be zero to ensure correct
+ * alignment"
+ *
+ * We have no guarantee that tile_x and tile_y are correctly aligned,
+ * since they are determined by the mipmap layout, which is only aligned
+ * to multiples of 4.
+ *
+ * So, to avoid hanging the GPU, just smash the low order 3 bits of
+ * tile_x and tile_y to 0. This is a temporary workaround until we come
+ * up with a better solution.
+ */
+ tile_x &= ~7;
+ tile_y &= ~7;
+
+ uint32_t format;
+ switch (mt->format) {
+ case MESA_FORMAT_Z16: format = BRW_DEPTHFORMAT_D16_UNORM; break;
+ case MESA_FORMAT_Z32_FLOAT: format = BRW_DEPTHFORMAT_D32_FLOAT; break;
+ case MESA_FORMAT_X8_Z24: format = BRW_DEPTHFORMAT_D24_UNORM_X8_UINT; break;
+ default: assert(0); break;
+ }
+
+ intel_emit_post_sync_nonzero_flush(intel);
+ intel_emit_depth_stall_flushes(intel);
+
+ BEGIN_BATCH(7);
+ OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
+ OUT_BATCH(((mt->region->pitch * mt->region->cpp) - 1) |
+ format << 18 |
+ 1 << 21 | /* separate stencil enable */
+ 1 << 22 | /* hiz enable */
+ BRW_TILEWALK_YMAJOR << 26 |
+ 1 << 27 | /* y-tiled */
+ BRW_SURFACE_2D << 29);
+ OUT_RELOC(mt->region->bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ offset);
+ OUT_BATCH(BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1 |
+ (width + tile_x - 1) << 6 |
+ (height + tile_y - 1) << 19);
+ OUT_BATCH(0);
+ OUT_BATCH(tile_x |
+ tile_y << 16);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+ }
+
+ /* 3DSTATE_HIER_DEPTH_BUFFER */
+ {
+ struct intel_region *hiz_region = mt->hiz_mt->region;
+ uint32_t hiz_offset =
+ intel_region_get_aligned_offset(hiz_region,
+ draw_x & ~tile_mask_x,
+ (draw_y & ~tile_mask_y) / 2);
+
+ BEGIN_BATCH(3);
+ OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
+ OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1);
+ OUT_RELOC(hiz_region->bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ hiz_offset);
+ ADVANCE_BATCH();
+ }
+
+ /* 3DSTATE_STENCIL_BUFFER */
+ {
+ BEGIN_BATCH(3);
+ OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+ }
+
+ /* 3DSTATE_CLEAR_PARAMS
+ *
+ * From the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE_CLEAR_PARAMS:
+ * [DevSNB] 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE
+ * packet when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
+ */
+ {
+ BEGIN_BATCH(2);
+ OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 | (2 - 2));
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+ }
+
+ /* 3DSTATE_DRAWING_RECTANGLE */
+ {
+ BEGIN_BATCH(4);
+ OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(((mt->level[level].width - 1) & 0xffff) |
+ ((mt->level[level].height - 1) << 16));
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+ }
+
+ /* 3DPRIMITIVE */
+ {
+ BEGIN_BATCH(6);
+ OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) |
+ _3DPRIM_RECTLIST << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
+ GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
+ OUT_BATCH(3); /* vertex count per instance */
+ OUT_BATCH(0);
+ OUT_BATCH(1); /* instance count */
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+ }
+
+ /* See comments above at first invocation of intel_flush() in
+ * gen6_hiz_emit_batch_head().
+ */
+ intel_flush(ctx);
+
+ /* Be safe. */
+ brw->state.dirty.brw = ~0;
+ brw->state.dirty.cache = ~0;
+}
+
+/**
+ * \param out_offset is relative to
+ * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress.
+ */
+void
+gen6_hiz_emit_depth_stencil_state(struct brw_context *brw,
+ enum gen6_hiz_op op,
+ uint32_t *out_offset)
+{
+ struct gen6_depth_stencil_state *state;
+ state = (struct gen6_depth_stencil_state *)
+ brw_state_batch(brw, AUB_TRACE_DEPTH_STENCIL_STATE,
+ sizeof(*state), 64,
+ out_offset);
+ memset(state, 0, sizeof(*state));
+
+ /* See the following sections of the Sandy Bridge PRM, Volume 1, Part2:
+ * - 7.5.3.1 Depth Buffer Clear
+ * - 7.5.3.2 Depth Buffer Resolve
+ * - 7.5.3.3 Hierarchical Depth Buffer Resolve
+ */
+ state->ds2.depth_write_enable = 1;
+ if (op == GEN6_HIZ_OP_DEPTH_RESOLVE) {
+ state->ds2.depth_test_enable = 1;
+ state->ds2.depth_test_func = COMPAREFUNC_NEVER;
+ }
+}
+
+/** \see intel_context::vtbl::resolve_hiz_slice */
+void
+gen6_resolve_hiz_slice(struct intel_context *intel,
+ struct intel_mipmap_tree *mt,
+ uint32_t level,
+ uint32_t layer)
+{
+ gen6_hiz_exec(intel, mt, level, layer, GEN6_HIZ_OP_HIZ_RESOLVE);
+}
+
+/** \see intel_context::vtbl::resolve_depth_slice */
+void
+gen6_resolve_depth_slice(struct intel_context *intel,
+ struct intel_mipmap_tree *mt,
+ uint32_t level,
+ uint32_t layer)
+{
+ gen6_hiz_exec(intel, mt, level, layer, GEN6_HIZ_OP_DEPTH_RESOLVE);
+}