diff options
author | Paul Berry <[email protected]> | 2012-04-29 21:50:22 -0700 |
---|---|---|
committer | Paul Berry <[email protected]> | 2012-05-10 10:30:01 -0700 |
commit | ea1274c9a6ab20a4a05371cf392953eb1d1c0f8b (patch) | |
tree | 1ac723263a1ca81a0b6cae0e3a6f7f39c2e5a3d9 /src/mesa/drivers/dri/i965/gen6_blorp.cpp | |
parent | 20b153b9ca5e57aa0e20e992347ece56388e71e4 (diff) |
i965/hiz: Convert gen{6,7}_hiz.c to C++
This patch converts the files gen6_hiz.c and gen7_hiz.c to C++, in
preparation for expanding the HiZ code to support arbitrary blits.
The new files are called gen6_blorp.cpp and gen7_blorp.cpp to reflect
the expanded role that this code will serve--"blorp" stands for "BLit
Or Resolve Pass".
Reviewed-by: Chad Versace <[email protected]>
Diffstat (limited to 'src/mesa/drivers/dri/i965/gen6_blorp.cpp')
-rw-r--r-- | src/mesa/drivers/dri/i965/gen6_blorp.cpp | 662 |
1 files changed, 662 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp b/src/mesa/drivers/dri/i965/gen6_blorp.cpp new file mode 100644 index 00000000000..536bd7ed626 --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp @@ -0,0 +1,662 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <assert.h> + +#include "intel_batchbuffer.h" +#include "intel_fbo.h" +#include "intel_mipmap_tree.h" + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_state.h" + +#include "gen6_hiz.h" + +/** + * \name Constants for HiZ VBO + * \{ + * + * \see brw_context::hiz::vertex_bo + */ +#define GEN6_HIZ_NUM_VERTICES 3 +#define GEN6_HIZ_NUM_VUE_ELEMS 8 +#define GEN6_HIZ_VBO_SIZE (GEN6_HIZ_NUM_VERTICES \ + * GEN6_HIZ_NUM_VUE_ELEMS \ + * sizeof(float)) +/** \} */ + +void +gen6_hiz_emit_batch_head(struct brw_context *brw) +{ + struct gl_context *ctx = &brw->intel.ctx; + struct intel_context *intel = &brw->intel; + + /* To ensure that the batch contains only the resolve, flush the batch + * before beginning and after finishing emitting the resolve packets. + * + * Ideally, we would not need to flush for the resolve op. But, I suspect + * that it's unsafe for CMD_PIPELINE_SELECT to occur multiple times in + * a single batch, and there is no safe way to ensure that other than by + * fencing the resolve with flushes. Ideally, we would just detect if + * a batch is in progress and do the right thing, but that would require + * the ability to *safely* access brw_context::state::dirty::brw + * outside of the brw_upload_state() codepath. + */ + intel_flush(ctx); + + /* CMD_PIPELINE_SELECT + * + * Select the 3D pipeline, as opposed to the media pipeline. + */ + { + BEGIN_BATCH(1); + OUT_BATCH(brw->CMD_PIPELINE_SELECT << 16); + ADVANCE_BATCH(); + } + + /* 3DSTATE_MULTISAMPLE */ + { + int length = intel->gen == 7 ? 4 : 3; + + BEGIN_BATCH(length); + OUT_BATCH(_3DSTATE_MULTISAMPLE << 16 | (length - 2)); + OUT_BATCH(MS_PIXEL_LOCATION_CENTER | + MS_NUMSAMPLES_1); + OUT_BATCH(0); + if (length >= 4) + OUT_BATCH(0); + ADVANCE_BATCH(); + + } + + /* 3DSTATE_SAMPLE_MASK */ + { + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_SAMPLE_MASK << 16 | (2 - 2)); + OUT_BATCH(1); + ADVANCE_BATCH(); + } + + /* CMD_STATE_BASE_ADDRESS + * + * From the Sandy Bridge PRM, Volume 1, Part 1, Table STATE_BASE_ADDRESS: + * The following commands must be reissued following any change to the + * base addresses: + * 3DSTATE_CC_POINTERS + * 3DSTATE_BINDING_TABLE_POINTERS + * 3DSTATE_SAMPLER_STATE_POINTERS + * 3DSTATE_VIEWPORT_STATE_POINTERS + * MEDIA_STATE_POINTERS + */ + { + BEGIN_BATCH(10); + OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2)); + OUT_BATCH(1); /* GeneralStateBaseAddressModifyEnable */ + /* SurfaceStateBaseAddress */ + OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1); + /* DynamicStateBaseAddress */ + OUT_RELOC(intel->batch.bo, (I915_GEM_DOMAIN_RENDER | + I915_GEM_DOMAIN_INSTRUCTION), 0, 1); + OUT_BATCH(1); /* IndirectObjectBaseAddress */ + OUT_BATCH(1); /* InstructionBaseAddress */ + OUT_BATCH(1); /* GeneralStateUpperBound */ + OUT_BATCH(1); /* DynamicStateUpperBound */ + OUT_BATCH(1); /* IndirectObjectUpperBound*/ + OUT_BATCH(1); /* InstructionAccessUpperBound */ + ADVANCE_BATCH(); + } +} + +void +gen6_hiz_emit_vertices(struct brw_context *brw, + struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int layer) +{ + struct intel_context *intel = &brw->intel; + uint32_t vertex_offset; + + /* Setup VBO for the rectangle primitive.. + * + * A rectangle primitive (3DPRIM_RECTLIST) consists of only three + * vertices. The vertices reside in screen space with DirectX coordinates + * (that is, (0, 0) is the upper left corner). + * + * v2 ------ implied + * | | + * | | + * v0 ----- v1 + * + * Since the VS is disabled, the clipper loads each VUE directly from + * the URB. This is controlled by the 3DSTATE_VERTEX_BUFFERS and + * 3DSTATE_VERTEX_ELEMENTS packets below. The VUE contents are as follows: + * dw0: Reserved, MBZ. + * dw1: Render Target Array Index. The HiZ op does not use indexed + * vertices, so set the dword to 0. + * dw2: Viewport Index. The HiZ op disables viewport mapping and + * scissoring, so set the dword to 0. + * dw3: Point Width: The HiZ op does not emit the POINTLIST primitive, so + * set the dword to 0. + * dw4: Vertex Position X. + * dw5: Vertex Position Y. + * dw6: Vertex Position Z. + * dw7: Vertex Position W. + * + * For details, see the Sandybridge PRM, Volume 2, Part 1, Section 1.5.1 + * "Vertex URB Entry (VUE) Formats". + */ + { + const int width = mt->level[level].width; + const int height = mt->level[level].height; + float *vertex_data; + + const float vertices[GEN6_HIZ_VBO_SIZE] = { + /* v0 */ 0, 0, 0, 0, 0, height, 0, 1, + /* v1 */ 0, 0, 0, 0, width, height, 0, 1, + /* v2 */ 0, 0, 0, 0, 0, 0, 0, 1, + }; + + vertex_data = (float *) brw_state_batch(brw, AUB_TRACE_NO_TYPE, + GEN6_HIZ_VBO_SIZE, 32, + &vertex_offset); + memcpy(vertex_data, vertices, GEN6_HIZ_VBO_SIZE); + } + + /* 3DSTATE_VERTEX_BUFFERS */ + { + const int num_buffers = 1; + const int batch_length = 1 + 4 * num_buffers; + + uint32_t dw0 = GEN6_VB0_ACCESS_VERTEXDATA | + (GEN6_HIZ_NUM_VUE_ELEMS * sizeof(float)) << BRW_VB0_PITCH_SHIFT; + + if (intel->gen >= 7) + dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE; + + BEGIN_BATCH(batch_length); + OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (batch_length - 2)); + OUT_BATCH(dw0); + /* start address */ + OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_VERTEX, 0, + vertex_offset); + /* end address */ + OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_VERTEX, 0, + vertex_offset + GEN6_HIZ_VBO_SIZE - 1); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_VERTEX_ELEMENTS + * + * Fetch dwords 0 - 7 from each VUE. See the comments above where + * hiz->vertex_bo is filled with data. + */ + { + const int num_elements = 2; + const int batch_length = 1 + 2 * num_elements; + + BEGIN_BATCH(batch_length); + OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (batch_length - 2)); + /* Element 0 */ + OUT_BATCH(GEN6_VE0_VALID | + BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT | + 0 << BRW_VE0_SRC_OFFSET_SHIFT); + OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT | + BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT | + BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_2_SHIFT | + BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_3_SHIFT); + /* Element 1 */ + OUT_BATCH(GEN6_VE0_VALID | + BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT | + 16 << BRW_VE0_SRC_OFFSET_SHIFT); + OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT | + BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT | + BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_2_SHIFT | + BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_3_SHIFT); + ADVANCE_BATCH(); + } +} + +/** + * \brief Execute a HiZ op on a miptree slice. + * + * To execute the HiZ op, this function manually constructs and emits a batch + * to "draw" the HiZ op's rectangle primitive. The batchbuffer is flushed + * before constructing and after emitting the batch. + * + * This function alters no GL state. + * + * For an overview of HiZ ops, see the following sections of the Sandy Bridge + * PRM, Volume 1, Part 2: + * - 7.5.3.1 Depth Buffer Clear + * - 7.5.3.2 Depth Buffer Resolve + * - 7.5.3.3 Hierarchical Depth Buffer Resolve + */ +static void +gen6_hiz_exec(struct intel_context *intel, + struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int layer, + enum gen6_hiz_op op) +{ + struct gl_context *ctx = &intel->ctx; + struct brw_context *brw = brw_context(ctx); + uint32_t draw_x, draw_y; + uint32_t tile_mask_x, tile_mask_y; + + assert(op != GEN6_HIZ_OP_DEPTH_CLEAR); /* Not implemented yet. */ + assert(mt->hiz_mt != NULL); + intel_miptree_check_level_layer(mt, level, layer); + + { + /* Construct a dummy renderbuffer just to extract tile offsets. */ + struct intel_renderbuffer rb; + rb.mt = mt; + rb.mt_level = level; + rb.mt_layer = layer; + intel_renderbuffer_set_draw_offset(&rb); + draw_x = rb.draw_x; + draw_y = rb.draw_y; + } + + /* Compute masks to determine how much of draw_x and draw_y should be + * performed using the fine adjustment of "depth coordinate offset X/Y" + * (dw5 of 3DSTATE_DEPTH_BUFFER). See the emit_depthbuffer() function for + * details. + */ + { + uint32_t depth_mask_x, depth_mask_y, hiz_mask_x, hiz_mask_y; + intel_region_get_tile_masks(mt->region, &depth_mask_x, &depth_mask_y); + intel_region_get_tile_masks(mt->hiz_mt->region, + &hiz_mask_x, &hiz_mask_y); + + /* Each HiZ row represents 2 rows of pixels */ + hiz_mask_y = hiz_mask_y << 1 | 1; + + tile_mask_x = depth_mask_x | hiz_mask_x; + tile_mask_y = depth_mask_y | hiz_mask_y; + } + + gen6_hiz_emit_batch_head(brw); + gen6_hiz_emit_vertices(brw, mt, level, layer); + + /* 3DSTATE_URB + * + * Assign the entire URB to the VS. Even though the VS disabled, URB space + * is still needed because the clipper loads the VUE's from the URB. From + * the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE, + * Dword 1.15:0 "VS Number of URB Entries": + * This field is always used (even if VS Function Enable is DISABLED). + * + * The warning below appears in the PRM (Section 3DSTATE_URB), but we can + * safely ignore it because this batch contains only one draw call. + * Because of URB corruption caused by allocating a previous GS unit + * URB entry to the VS unit, software is required to send a “GS NULL + * Fence” (Send URB fence with VS URB size == 1 and GS URB size == 0) + * plus a dummy DRAW call before any case where VS will be taking over + * GS URB space. + */ + { + BEGIN_BATCH(3); + OUT_BATCH(_3DSTATE_URB << 16 | (3 - 2)); + OUT_BATCH(brw->urb.max_vs_entries << GEN6_URB_VS_ENTRIES_SHIFT); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_CC_STATE_POINTERS + * + * The pointer offsets are relative to + * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress. + * + * The HiZ op doesn't use BLEND_STATE or COLOR_CALC_STATE. + */ + { + uint32_t depthstencil_offset; + gen6_hiz_emit_depth_stencil_state(brw, op, &depthstencil_offset); + + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (4 - 2)); + OUT_BATCH(1); /* BLEND_STATE offset */ + OUT_BATCH(depthstencil_offset | 1); /* DEPTH_STENCIL_STATE offset */ + OUT_BATCH(1); /* COLOR_CALC_STATE offset */ + ADVANCE_BATCH(); + } + + /* 3DSTATE_VS + * + * Disable vertex shader. + */ + { + /* From the BSpec, Volume 2a, Part 3 "Vertex Shader", Section + * 3DSTATE_VS, Dword 5.0 "VS Function Enable": + * [DevSNB] A pipeline flush must be programmed prior to a 3DSTATE_VS + * command that causes the VS Function Enable to toggle. Pipeline + * flush can be executed by sending a PIPE_CONTROL command with CS + * stall bit set and a post sync operation. + */ + intel_emit_post_sync_nonzero_flush(intel); + + BEGIN_BATCH(6); + OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_GS + * + * Disable the geometry shader. + */ + { + BEGIN_BATCH(7); + OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_CLIP + * + * Disable the clipper. + * + * The HiZ op emits a rectangle primitive, which requires clipping to + * be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1 + * Section 1.3 "3D Primitives Overview": + * RECTLIST: + * Either the CLIP unit should be DISABLED, or the CLIP unit's Clip + * Mode should be set to a value other than CLIPMODE_NORMAL. + * + * Also disable perspective divide. This doesn't change the clipper's + * output, but does spare a few electrons. + */ + { + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_SF + * + * Disable ViewportTransformEnable (dw2.1) + * + * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D + * Primitives Overview": + * RECTLIST: Viewport Mapping must be DISABLED (as is typical with the + * use of screen- space coordinates). + * + * A solid rectangle must be rendered, so set FrontFaceFillMode (dw2.4:3) + * and BackFaceFillMode (dw2.5:6) to SOLID(0). + * + * From the Sandy Bridge PRM, Volume 2, Part 1, Section + * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode: + * SOLID: Any triangle or rectangle object found to be front-facing + * is rendered as a solid object. This setting is required when + * (rendering rectangle (RECTLIST) objects. + */ + { + BEGIN_BATCH(20); + OUT_BATCH(_3DSTATE_SF << 16 | (20 - 2)); + OUT_BATCH((1 - 1) << GEN6_SF_NUM_OUTPUTS_SHIFT | /* only position */ + 1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT | + 0 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT); + for (int i = 0; i < 18; ++i) + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_WM + * + * Disable thread dispatch (dw5.19) and enable the HiZ op. + * + * Even though thread dispatch is disabled, max threads (dw5.25:31) must be + * nonzero to prevent the GPU from hanging. See the valid ranges in the + * BSpec, Volume 2a.11 Windower, Section 3DSTATE_WM, Dword 5.25:31 + * "Maximum Number Of Threads". + */ + { + uint32_t dw4 = 0; + + switch (op) { + case GEN6_HIZ_OP_DEPTH_CLEAR: + assert(!"not implemented"); + dw4 |= GEN6_WM_DEPTH_CLEAR; + break; + case GEN6_HIZ_OP_DEPTH_RESOLVE: + dw4 |= GEN6_WM_DEPTH_RESOLVE; + break; + case GEN6_HIZ_OP_HIZ_RESOLVE: + dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE; + break; + default: + assert(0); + break; + } + + BEGIN_BATCH(9); + OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(dw4); + OUT_BATCH((brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT); + OUT_BATCH((1 - 1) << GEN6_WM_NUM_SF_OUTPUTS_SHIFT); /* only position */ + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_DEPTH_BUFFER */ + { + uint32_t width = mt->level[level].width; + uint32_t height = mt->level[level].height; + + uint32_t tile_x = draw_x & tile_mask_x; + uint32_t tile_y = draw_y & tile_mask_y; + uint32_t offset = intel_region_get_aligned_offset(mt->region, + draw_x & ~tile_mask_x, + draw_y & ~tile_mask_y); + + /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327 + * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth + * Coordinate Offset X/Y": + * + * "The 3 LSBs of both offsets must be zero to ensure correct + * alignment" + * + * We have no guarantee that tile_x and tile_y are correctly aligned, + * since they are determined by the mipmap layout, which is only aligned + * to multiples of 4. + * + * So, to avoid hanging the GPU, just smash the low order 3 bits of + * tile_x and tile_y to 0. This is a temporary workaround until we come + * up with a better solution. + */ + tile_x &= ~7; + tile_y &= ~7; + + uint32_t format; + switch (mt->format) { + case MESA_FORMAT_Z16: format = BRW_DEPTHFORMAT_D16_UNORM; break; + case MESA_FORMAT_Z32_FLOAT: format = BRW_DEPTHFORMAT_D32_FLOAT; break; + case MESA_FORMAT_X8_Z24: format = BRW_DEPTHFORMAT_D24_UNORM_X8_UINT; break; + default: assert(0); break; + } + + intel_emit_post_sync_nonzero_flush(intel); + intel_emit_depth_stall_flushes(intel); + + BEGIN_BATCH(7); + OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2)); + OUT_BATCH(((mt->region->pitch * mt->region->cpp) - 1) | + format << 18 | + 1 << 21 | /* separate stencil enable */ + 1 << 22 | /* hiz enable */ + BRW_TILEWALK_YMAJOR << 26 | + 1 << 27 | /* y-tiled */ + BRW_SURFACE_2D << 29); + OUT_RELOC(mt->region->bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + offset); + OUT_BATCH(BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1 | + (width + tile_x - 1) << 6 | + (height + tile_y - 1) << 19); + OUT_BATCH(0); + OUT_BATCH(tile_x | + tile_y << 16); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_HIER_DEPTH_BUFFER */ + { + struct intel_region *hiz_region = mt->hiz_mt->region; + uint32_t hiz_offset = + intel_region_get_aligned_offset(hiz_region, + draw_x & ~tile_mask_x, + (draw_y & ~tile_mask_y) / 2); + + BEGIN_BATCH(3); + OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2)); + OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1); + OUT_RELOC(hiz_region->bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + hiz_offset); + ADVANCE_BATCH(); + } + + /* 3DSTATE_STENCIL_BUFFER */ + { + BEGIN_BATCH(3); + OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_CLEAR_PARAMS + * + * From the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE_CLEAR_PARAMS: + * [DevSNB] 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE + * packet when HiZ is enabled and the DEPTH_BUFFER_STATE changes. + */ + { + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 | (2 - 2)); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_DRAWING_RECTANGLE */ + { + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(((mt->level[level].width - 1) & 0xffff) | + ((mt->level[level].height - 1) << 16)); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DPRIMITIVE */ + { + BEGIN_BATCH(6); + OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) | + _3DPRIM_RECTLIST << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT | + GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL); + OUT_BATCH(3); /* vertex count per instance */ + OUT_BATCH(0); + OUT_BATCH(1); /* instance count */ + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* See comments above at first invocation of intel_flush() in + * gen6_hiz_emit_batch_head(). + */ + intel_flush(ctx); + + /* Be safe. */ + brw->state.dirty.brw = ~0; + brw->state.dirty.cache = ~0; +} + +/** + * \param out_offset is relative to + * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress. + */ +void +gen6_hiz_emit_depth_stencil_state(struct brw_context *brw, + enum gen6_hiz_op op, + uint32_t *out_offset) +{ + struct gen6_depth_stencil_state *state; + state = (struct gen6_depth_stencil_state *) + brw_state_batch(brw, AUB_TRACE_DEPTH_STENCIL_STATE, + sizeof(*state), 64, + out_offset); + memset(state, 0, sizeof(*state)); + + /* See the following sections of the Sandy Bridge PRM, Volume 1, Part2: + * - 7.5.3.1 Depth Buffer Clear + * - 7.5.3.2 Depth Buffer Resolve + * - 7.5.3.3 Hierarchical Depth Buffer Resolve + */ + state->ds2.depth_write_enable = 1; + if (op == GEN6_HIZ_OP_DEPTH_RESOLVE) { + state->ds2.depth_test_enable = 1; + state->ds2.depth_test_func = COMPAREFUNC_NEVER; + } +} + +/** \see intel_context::vtbl::resolve_hiz_slice */ +void +gen6_resolve_hiz_slice(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer) +{ + gen6_hiz_exec(intel, mt, level, layer, GEN6_HIZ_OP_HIZ_RESOLVE); +} + +/** \see intel_context::vtbl::resolve_depth_slice */ +void +gen6_resolve_depth_slice(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer) +{ + gen6_hiz_exec(intel, mt, level, layer, GEN6_HIZ_OP_DEPTH_RESOLVE); +} |