From ea1274c9a6ab20a4a05371cf392953eb1d1c0f8b Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Sun, 29 Apr 2012 21:50:22 -0700 Subject: i965/hiz: Convert gen{6,7}_hiz.c to C++ This patch converts the files gen6_hiz.c and gen7_hiz.c to C++, in preparation for expanding the HiZ code to support arbitrary blits. The new files are called gen6_blorp.cpp and gen7_blorp.cpp to reflect the expanded role that this code will serve--"blorp" stands for "BLit Or Resolve Pass". Reviewed-by: Chad Versace --- src/mesa/drivers/dri/i965/Makefile.sources | 6 +- src/mesa/drivers/dri/i965/gen6_blorp.cpp | 662 +++++++++++++++++++++++++++++ src/mesa/drivers/dri/i965/gen6_hiz.c | 662 ----------------------------- src/mesa/drivers/dri/i965/gen7_blorp.cpp | 501 ++++++++++++++++++++++ src/mesa/drivers/dri/i965/gen7_hiz.c | 501 ---------------------- 5 files changed, 1166 insertions(+), 1166 deletions(-) create mode 100644 src/mesa/drivers/dri/i965/gen6_blorp.cpp delete mode 100644 src/mesa/drivers/dri/i965/gen6_hiz.c create mode 100644 src/mesa/drivers/dri/i965/gen7_blorp.cpp delete mode 100644 src/mesa/drivers/dri/i965/gen7_hiz.c (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index c564d95d44b..c99a034a462 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -88,7 +88,6 @@ i965_C_FILES = \ gen6_clip_state.c \ gen6_depthstencil.c \ gen6_gs_state.c \ - gen6_hiz.c \ gen6_sampler_state.c \ gen6_scissor_state.c \ gen6_sf_state.c \ @@ -100,7 +99,6 @@ i965_C_FILES = \ gen7_cc_state.c \ gen7_clip_state.c \ gen7_disable.c \ - gen7_hiz.c \ gen7_misc_state.c \ gen7_sampler_state.c \ gen7_sf_state.c \ @@ -127,6 +125,8 @@ i965_CXX_FILES = \ brw_vec4_emit.cpp \ brw_vec4_copy_propagation.cpp \ brw_vec4_reg_allocate.cpp \ - brw_vec4_visitor.cpp + brw_vec4_visitor.cpp \ + gen6_blorp.cpp \ + gen7_blorp.cpp i965_ASM_FILES = diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp b/src/mesa/drivers/dri/i965/gen6_blorp.cpp new file mode 100644 index 00000000000..536bd7ed626 --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp @@ -0,0 +1,662 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +#include "intel_batchbuffer.h" +#include "intel_fbo.h" +#include "intel_mipmap_tree.h" + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_state.h" + +#include "gen6_hiz.h" + +/** + * \name Constants for HiZ VBO + * \{ + * + * \see brw_context::hiz::vertex_bo + */ +#define GEN6_HIZ_NUM_VERTICES 3 +#define GEN6_HIZ_NUM_VUE_ELEMS 8 +#define GEN6_HIZ_VBO_SIZE (GEN6_HIZ_NUM_VERTICES \ + * GEN6_HIZ_NUM_VUE_ELEMS \ + * sizeof(float)) +/** \} */ + +void +gen6_hiz_emit_batch_head(struct brw_context *brw) +{ + struct gl_context *ctx = &brw->intel.ctx; + struct intel_context *intel = &brw->intel; + + /* To ensure that the batch contains only the resolve, flush the batch + * before beginning and after finishing emitting the resolve packets. + * + * Ideally, we would not need to flush for the resolve op. But, I suspect + * that it's unsafe for CMD_PIPELINE_SELECT to occur multiple times in + * a single batch, and there is no safe way to ensure that other than by + * fencing the resolve with flushes. Ideally, we would just detect if + * a batch is in progress and do the right thing, but that would require + * the ability to *safely* access brw_context::state::dirty::brw + * outside of the brw_upload_state() codepath. + */ + intel_flush(ctx); + + /* CMD_PIPELINE_SELECT + * + * Select the 3D pipeline, as opposed to the media pipeline. + */ + { + BEGIN_BATCH(1); + OUT_BATCH(brw->CMD_PIPELINE_SELECT << 16); + ADVANCE_BATCH(); + } + + /* 3DSTATE_MULTISAMPLE */ + { + int length = intel->gen == 7 ? 4 : 3; + + BEGIN_BATCH(length); + OUT_BATCH(_3DSTATE_MULTISAMPLE << 16 | (length - 2)); + OUT_BATCH(MS_PIXEL_LOCATION_CENTER | + MS_NUMSAMPLES_1); + OUT_BATCH(0); + if (length >= 4) + OUT_BATCH(0); + ADVANCE_BATCH(); + + } + + /* 3DSTATE_SAMPLE_MASK */ + { + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_SAMPLE_MASK << 16 | (2 - 2)); + OUT_BATCH(1); + ADVANCE_BATCH(); + } + + /* CMD_STATE_BASE_ADDRESS + * + * From the Sandy Bridge PRM, Volume 1, Part 1, Table STATE_BASE_ADDRESS: + * The following commands must be reissued following any change to the + * base addresses: + * 3DSTATE_CC_POINTERS + * 3DSTATE_BINDING_TABLE_POINTERS + * 3DSTATE_SAMPLER_STATE_POINTERS + * 3DSTATE_VIEWPORT_STATE_POINTERS + * MEDIA_STATE_POINTERS + */ + { + BEGIN_BATCH(10); + OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2)); + OUT_BATCH(1); /* GeneralStateBaseAddressModifyEnable */ + /* SurfaceStateBaseAddress */ + OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1); + /* DynamicStateBaseAddress */ + OUT_RELOC(intel->batch.bo, (I915_GEM_DOMAIN_RENDER | + I915_GEM_DOMAIN_INSTRUCTION), 0, 1); + OUT_BATCH(1); /* IndirectObjectBaseAddress */ + OUT_BATCH(1); /* InstructionBaseAddress */ + OUT_BATCH(1); /* GeneralStateUpperBound */ + OUT_BATCH(1); /* DynamicStateUpperBound */ + OUT_BATCH(1); /* IndirectObjectUpperBound*/ + OUT_BATCH(1); /* InstructionAccessUpperBound */ + ADVANCE_BATCH(); + } +} + +void +gen6_hiz_emit_vertices(struct brw_context *brw, + struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int layer) +{ + struct intel_context *intel = &brw->intel; + uint32_t vertex_offset; + + /* Setup VBO for the rectangle primitive.. + * + * A rectangle primitive (3DPRIM_RECTLIST) consists of only three + * vertices. The vertices reside in screen space with DirectX coordinates + * (that is, (0, 0) is the upper left corner). + * + * v2 ------ implied + * | | + * | | + * v0 ----- v1 + * + * Since the VS is disabled, the clipper loads each VUE directly from + * the URB. This is controlled by the 3DSTATE_VERTEX_BUFFERS and + * 3DSTATE_VERTEX_ELEMENTS packets below. The VUE contents are as follows: + * dw0: Reserved, MBZ. + * dw1: Render Target Array Index. The HiZ op does not use indexed + * vertices, so set the dword to 0. + * dw2: Viewport Index. The HiZ op disables viewport mapping and + * scissoring, so set the dword to 0. + * dw3: Point Width: The HiZ op does not emit the POINTLIST primitive, so + * set the dword to 0. + * dw4: Vertex Position X. + * dw5: Vertex Position Y. + * dw6: Vertex Position Z. + * dw7: Vertex Position W. + * + * For details, see the Sandybridge PRM, Volume 2, Part 1, Section 1.5.1 + * "Vertex URB Entry (VUE) Formats". + */ + { + const int width = mt->level[level].width; + const int height = mt->level[level].height; + float *vertex_data; + + const float vertices[GEN6_HIZ_VBO_SIZE] = { + /* v0 */ 0, 0, 0, 0, 0, height, 0, 1, + /* v1 */ 0, 0, 0, 0, width, height, 0, 1, + /* v2 */ 0, 0, 0, 0, 0, 0, 0, 1, + }; + + vertex_data = (float *) brw_state_batch(brw, AUB_TRACE_NO_TYPE, + GEN6_HIZ_VBO_SIZE, 32, + &vertex_offset); + memcpy(vertex_data, vertices, GEN6_HIZ_VBO_SIZE); + } + + /* 3DSTATE_VERTEX_BUFFERS */ + { + const int num_buffers = 1; + const int batch_length = 1 + 4 * num_buffers; + + uint32_t dw0 = GEN6_VB0_ACCESS_VERTEXDATA | + (GEN6_HIZ_NUM_VUE_ELEMS * sizeof(float)) << BRW_VB0_PITCH_SHIFT; + + if (intel->gen >= 7) + dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE; + + BEGIN_BATCH(batch_length); + OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (batch_length - 2)); + OUT_BATCH(dw0); + /* start address */ + OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_VERTEX, 0, + vertex_offset); + /* end address */ + OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_VERTEX, 0, + vertex_offset + GEN6_HIZ_VBO_SIZE - 1); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_VERTEX_ELEMENTS + * + * Fetch dwords 0 - 7 from each VUE. See the comments above where + * hiz->vertex_bo is filled with data. + */ + { + const int num_elements = 2; + const int batch_length = 1 + 2 * num_elements; + + BEGIN_BATCH(batch_length); + OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (batch_length - 2)); + /* Element 0 */ + OUT_BATCH(GEN6_VE0_VALID | + BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT | + 0 << BRW_VE0_SRC_OFFSET_SHIFT); + OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT | + BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT | + BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_2_SHIFT | + BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_3_SHIFT); + /* Element 1 */ + OUT_BATCH(GEN6_VE0_VALID | + BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT | + 16 << BRW_VE0_SRC_OFFSET_SHIFT); + OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT | + BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT | + BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_2_SHIFT | + BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_3_SHIFT); + ADVANCE_BATCH(); + } +} + +/** + * \brief Execute a HiZ op on a miptree slice. + * + * To execute the HiZ op, this function manually constructs and emits a batch + * to "draw" the HiZ op's rectangle primitive. The batchbuffer is flushed + * before constructing and after emitting the batch. + * + * This function alters no GL state. + * + * For an overview of HiZ ops, see the following sections of the Sandy Bridge + * PRM, Volume 1, Part 2: + * - 7.5.3.1 Depth Buffer Clear + * - 7.5.3.2 Depth Buffer Resolve + * - 7.5.3.3 Hierarchical Depth Buffer Resolve + */ +static void +gen6_hiz_exec(struct intel_context *intel, + struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int layer, + enum gen6_hiz_op op) +{ + struct gl_context *ctx = &intel->ctx; + struct brw_context *brw = brw_context(ctx); + uint32_t draw_x, draw_y; + uint32_t tile_mask_x, tile_mask_y; + + assert(op != GEN6_HIZ_OP_DEPTH_CLEAR); /* Not implemented yet. */ + assert(mt->hiz_mt != NULL); + intel_miptree_check_level_layer(mt, level, layer); + + { + /* Construct a dummy renderbuffer just to extract tile offsets. */ + struct intel_renderbuffer rb; + rb.mt = mt; + rb.mt_level = level; + rb.mt_layer = layer; + intel_renderbuffer_set_draw_offset(&rb); + draw_x = rb.draw_x; + draw_y = rb.draw_y; + } + + /* Compute masks to determine how much of draw_x and draw_y should be + * performed using the fine adjustment of "depth coordinate offset X/Y" + * (dw5 of 3DSTATE_DEPTH_BUFFER). See the emit_depthbuffer() function for + * details. + */ + { + uint32_t depth_mask_x, depth_mask_y, hiz_mask_x, hiz_mask_y; + intel_region_get_tile_masks(mt->region, &depth_mask_x, &depth_mask_y); + intel_region_get_tile_masks(mt->hiz_mt->region, + &hiz_mask_x, &hiz_mask_y); + + /* Each HiZ row represents 2 rows of pixels */ + hiz_mask_y = hiz_mask_y << 1 | 1; + + tile_mask_x = depth_mask_x | hiz_mask_x; + tile_mask_y = depth_mask_y | hiz_mask_y; + } + + gen6_hiz_emit_batch_head(brw); + gen6_hiz_emit_vertices(brw, mt, level, layer); + + /* 3DSTATE_URB + * + * Assign the entire URB to the VS. Even though the VS disabled, URB space + * is still needed because the clipper loads the VUE's from the URB. From + * the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE, + * Dword 1.15:0 "VS Number of URB Entries": + * This field is always used (even if VS Function Enable is DISABLED). + * + * The warning below appears in the PRM (Section 3DSTATE_URB), but we can + * safely ignore it because this batch contains only one draw call. + * Because of URB corruption caused by allocating a previous GS unit + * URB entry to the VS unit, software is required to send a “GS NULL + * Fence” (Send URB fence with VS URB size == 1 and GS URB size == 0) + * plus a dummy DRAW call before any case where VS will be taking over + * GS URB space. + */ + { + BEGIN_BATCH(3); + OUT_BATCH(_3DSTATE_URB << 16 | (3 - 2)); + OUT_BATCH(brw->urb.max_vs_entries << GEN6_URB_VS_ENTRIES_SHIFT); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_CC_STATE_POINTERS + * + * The pointer offsets are relative to + * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress. + * + * The HiZ op doesn't use BLEND_STATE or COLOR_CALC_STATE. + */ + { + uint32_t depthstencil_offset; + gen6_hiz_emit_depth_stencil_state(brw, op, &depthstencil_offset); + + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (4 - 2)); + OUT_BATCH(1); /* BLEND_STATE offset */ + OUT_BATCH(depthstencil_offset | 1); /* DEPTH_STENCIL_STATE offset */ + OUT_BATCH(1); /* COLOR_CALC_STATE offset */ + ADVANCE_BATCH(); + } + + /* 3DSTATE_VS + * + * Disable vertex shader. + */ + { + /* From the BSpec, Volume 2a, Part 3 "Vertex Shader", Section + * 3DSTATE_VS, Dword 5.0 "VS Function Enable": + * [DevSNB] A pipeline flush must be programmed prior to a 3DSTATE_VS + * command that causes the VS Function Enable to toggle. Pipeline + * flush can be executed by sending a PIPE_CONTROL command with CS + * stall bit set and a post sync operation. + */ + intel_emit_post_sync_nonzero_flush(intel); + + BEGIN_BATCH(6); + OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_GS + * + * Disable the geometry shader. + */ + { + BEGIN_BATCH(7); + OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_CLIP + * + * Disable the clipper. + * + * The HiZ op emits a rectangle primitive, which requires clipping to + * be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1 + * Section 1.3 "3D Primitives Overview": + * RECTLIST: + * Either the CLIP unit should be DISABLED, or the CLIP unit's Clip + * Mode should be set to a value other than CLIPMODE_NORMAL. + * + * Also disable perspective divide. This doesn't change the clipper's + * output, but does spare a few electrons. + */ + { + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_SF + * + * Disable ViewportTransformEnable (dw2.1) + * + * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D + * Primitives Overview": + * RECTLIST: Viewport Mapping must be DISABLED (as is typical with the + * use of screen- space coordinates). + * + * A solid rectangle must be rendered, so set FrontFaceFillMode (dw2.4:3) + * and BackFaceFillMode (dw2.5:6) to SOLID(0). + * + * From the Sandy Bridge PRM, Volume 2, Part 1, Section + * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode: + * SOLID: Any triangle or rectangle object found to be front-facing + * is rendered as a solid object. This setting is required when + * (rendering rectangle (RECTLIST) objects. + */ + { + BEGIN_BATCH(20); + OUT_BATCH(_3DSTATE_SF << 16 | (20 - 2)); + OUT_BATCH((1 - 1) << GEN6_SF_NUM_OUTPUTS_SHIFT | /* only position */ + 1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT | + 0 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT); + for (int i = 0; i < 18; ++i) + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_WM + * + * Disable thread dispatch (dw5.19) and enable the HiZ op. + * + * Even though thread dispatch is disabled, max threads (dw5.25:31) must be + * nonzero to prevent the GPU from hanging. See the valid ranges in the + * BSpec, Volume 2a.11 Windower, Section 3DSTATE_WM, Dword 5.25:31 + * "Maximum Number Of Threads". + */ + { + uint32_t dw4 = 0; + + switch (op) { + case GEN6_HIZ_OP_DEPTH_CLEAR: + assert(!"not implemented"); + dw4 |= GEN6_WM_DEPTH_CLEAR; + break; + case GEN6_HIZ_OP_DEPTH_RESOLVE: + dw4 |= GEN6_WM_DEPTH_RESOLVE; + break; + case GEN6_HIZ_OP_HIZ_RESOLVE: + dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE; + break; + default: + assert(0); + break; + } + + BEGIN_BATCH(9); + OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(dw4); + OUT_BATCH((brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT); + OUT_BATCH((1 - 1) << GEN6_WM_NUM_SF_OUTPUTS_SHIFT); /* only position */ + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_DEPTH_BUFFER */ + { + uint32_t width = mt->level[level].width; + uint32_t height = mt->level[level].height; + + uint32_t tile_x = draw_x & tile_mask_x; + uint32_t tile_y = draw_y & tile_mask_y; + uint32_t offset = intel_region_get_aligned_offset(mt->region, + draw_x & ~tile_mask_x, + draw_y & ~tile_mask_y); + + /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327 + * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth + * Coordinate Offset X/Y": + * + * "The 3 LSBs of both offsets must be zero to ensure correct + * alignment" + * + * We have no guarantee that tile_x and tile_y are correctly aligned, + * since they are determined by the mipmap layout, which is only aligned + * to multiples of 4. + * + * So, to avoid hanging the GPU, just smash the low order 3 bits of + * tile_x and tile_y to 0. This is a temporary workaround until we come + * up with a better solution. + */ + tile_x &= ~7; + tile_y &= ~7; + + uint32_t format; + switch (mt->format) { + case MESA_FORMAT_Z16: format = BRW_DEPTHFORMAT_D16_UNORM; break; + case MESA_FORMAT_Z32_FLOAT: format = BRW_DEPTHFORMAT_D32_FLOAT; break; + case MESA_FORMAT_X8_Z24: format = BRW_DEPTHFORMAT_D24_UNORM_X8_UINT; break; + default: assert(0); break; + } + + intel_emit_post_sync_nonzero_flush(intel); + intel_emit_depth_stall_flushes(intel); + + BEGIN_BATCH(7); + OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2)); + OUT_BATCH(((mt->region->pitch * mt->region->cpp) - 1) | + format << 18 | + 1 << 21 | /* separate stencil enable */ + 1 << 22 | /* hiz enable */ + BRW_TILEWALK_YMAJOR << 26 | + 1 << 27 | /* y-tiled */ + BRW_SURFACE_2D << 29); + OUT_RELOC(mt->region->bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + offset); + OUT_BATCH(BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1 | + (width + tile_x - 1) << 6 | + (height + tile_y - 1) << 19); + OUT_BATCH(0); + OUT_BATCH(tile_x | + tile_y << 16); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_HIER_DEPTH_BUFFER */ + { + struct intel_region *hiz_region = mt->hiz_mt->region; + uint32_t hiz_offset = + intel_region_get_aligned_offset(hiz_region, + draw_x & ~tile_mask_x, + (draw_y & ~tile_mask_y) / 2); + + BEGIN_BATCH(3); + OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2)); + OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1); + OUT_RELOC(hiz_region->bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + hiz_offset); + ADVANCE_BATCH(); + } + + /* 3DSTATE_STENCIL_BUFFER */ + { + BEGIN_BATCH(3); + OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_CLEAR_PARAMS + * + * From the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE_CLEAR_PARAMS: + * [DevSNB] 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE + * packet when HiZ is enabled and the DEPTH_BUFFER_STATE changes. + */ + { + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 | (2 - 2)); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_DRAWING_RECTANGLE */ + { + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(((mt->level[level].width - 1) & 0xffff) | + ((mt->level[level].height - 1) << 16)); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DPRIMITIVE */ + { + BEGIN_BATCH(6); + OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) | + _3DPRIM_RECTLIST << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT | + GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL); + OUT_BATCH(3); /* vertex count per instance */ + OUT_BATCH(0); + OUT_BATCH(1); /* instance count */ + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* See comments above at first invocation of intel_flush() in + * gen6_hiz_emit_batch_head(). + */ + intel_flush(ctx); + + /* Be safe. */ + brw->state.dirty.brw = ~0; + brw->state.dirty.cache = ~0; +} + +/** + * \param out_offset is relative to + * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress. + */ +void +gen6_hiz_emit_depth_stencil_state(struct brw_context *brw, + enum gen6_hiz_op op, + uint32_t *out_offset) +{ + struct gen6_depth_stencil_state *state; + state = (struct gen6_depth_stencil_state *) + brw_state_batch(brw, AUB_TRACE_DEPTH_STENCIL_STATE, + sizeof(*state), 64, + out_offset); + memset(state, 0, sizeof(*state)); + + /* See the following sections of the Sandy Bridge PRM, Volume 1, Part2: + * - 7.5.3.1 Depth Buffer Clear + * - 7.5.3.2 Depth Buffer Resolve + * - 7.5.3.3 Hierarchical Depth Buffer Resolve + */ + state->ds2.depth_write_enable = 1; + if (op == GEN6_HIZ_OP_DEPTH_RESOLVE) { + state->ds2.depth_test_enable = 1; + state->ds2.depth_test_func = COMPAREFUNC_NEVER; + } +} + +/** \see intel_context::vtbl::resolve_hiz_slice */ +void +gen6_resolve_hiz_slice(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer) +{ + gen6_hiz_exec(intel, mt, level, layer, GEN6_HIZ_OP_HIZ_RESOLVE); +} + +/** \see intel_context::vtbl::resolve_depth_slice */ +void +gen6_resolve_depth_slice(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer) +{ + gen6_hiz_exec(intel, mt, level, layer, GEN6_HIZ_OP_DEPTH_RESOLVE); +} diff --git a/src/mesa/drivers/dri/i965/gen6_hiz.c b/src/mesa/drivers/dri/i965/gen6_hiz.c deleted file mode 100644 index 536bd7ed626..00000000000 --- a/src/mesa/drivers/dri/i965/gen6_hiz.c +++ /dev/null @@ -1,662 +0,0 @@ -/* - * Copyright © 2011 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include - -#include "intel_batchbuffer.h" -#include "intel_fbo.h" -#include "intel_mipmap_tree.h" - -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_state.h" - -#include "gen6_hiz.h" - -/** - * \name Constants for HiZ VBO - * \{ - * - * \see brw_context::hiz::vertex_bo - */ -#define GEN6_HIZ_NUM_VERTICES 3 -#define GEN6_HIZ_NUM_VUE_ELEMS 8 -#define GEN6_HIZ_VBO_SIZE (GEN6_HIZ_NUM_VERTICES \ - * GEN6_HIZ_NUM_VUE_ELEMS \ - * sizeof(float)) -/** \} */ - -void -gen6_hiz_emit_batch_head(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->intel.ctx; - struct intel_context *intel = &brw->intel; - - /* To ensure that the batch contains only the resolve, flush the batch - * before beginning and after finishing emitting the resolve packets. - * - * Ideally, we would not need to flush for the resolve op. But, I suspect - * that it's unsafe for CMD_PIPELINE_SELECT to occur multiple times in - * a single batch, and there is no safe way to ensure that other than by - * fencing the resolve with flushes. Ideally, we would just detect if - * a batch is in progress and do the right thing, but that would require - * the ability to *safely* access brw_context::state::dirty::brw - * outside of the brw_upload_state() codepath. - */ - intel_flush(ctx); - - /* CMD_PIPELINE_SELECT - * - * Select the 3D pipeline, as opposed to the media pipeline. - */ - { - BEGIN_BATCH(1); - OUT_BATCH(brw->CMD_PIPELINE_SELECT << 16); - ADVANCE_BATCH(); - } - - /* 3DSTATE_MULTISAMPLE */ - { - int length = intel->gen == 7 ? 4 : 3; - - BEGIN_BATCH(length); - OUT_BATCH(_3DSTATE_MULTISAMPLE << 16 | (length - 2)); - OUT_BATCH(MS_PIXEL_LOCATION_CENTER | - MS_NUMSAMPLES_1); - OUT_BATCH(0); - if (length >= 4) - OUT_BATCH(0); - ADVANCE_BATCH(); - - } - - /* 3DSTATE_SAMPLE_MASK */ - { - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_SAMPLE_MASK << 16 | (2 - 2)); - OUT_BATCH(1); - ADVANCE_BATCH(); - } - - /* CMD_STATE_BASE_ADDRESS - * - * From the Sandy Bridge PRM, Volume 1, Part 1, Table STATE_BASE_ADDRESS: - * The following commands must be reissued following any change to the - * base addresses: - * 3DSTATE_CC_POINTERS - * 3DSTATE_BINDING_TABLE_POINTERS - * 3DSTATE_SAMPLER_STATE_POINTERS - * 3DSTATE_VIEWPORT_STATE_POINTERS - * MEDIA_STATE_POINTERS - */ - { - BEGIN_BATCH(10); - OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2)); - OUT_BATCH(1); /* GeneralStateBaseAddressModifyEnable */ - /* SurfaceStateBaseAddress */ - OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1); - /* DynamicStateBaseAddress */ - OUT_RELOC(intel->batch.bo, (I915_GEM_DOMAIN_RENDER | - I915_GEM_DOMAIN_INSTRUCTION), 0, 1); - OUT_BATCH(1); /* IndirectObjectBaseAddress */ - OUT_BATCH(1); /* InstructionBaseAddress */ - OUT_BATCH(1); /* GeneralStateUpperBound */ - OUT_BATCH(1); /* DynamicStateUpperBound */ - OUT_BATCH(1); /* IndirectObjectUpperBound*/ - OUT_BATCH(1); /* InstructionAccessUpperBound */ - ADVANCE_BATCH(); - } -} - -void -gen6_hiz_emit_vertices(struct brw_context *brw, - struct intel_mipmap_tree *mt, - unsigned int level, - unsigned int layer) -{ - struct intel_context *intel = &brw->intel; - uint32_t vertex_offset; - - /* Setup VBO for the rectangle primitive.. - * - * A rectangle primitive (3DPRIM_RECTLIST) consists of only three - * vertices. The vertices reside in screen space with DirectX coordinates - * (that is, (0, 0) is the upper left corner). - * - * v2 ------ implied - * | | - * | | - * v0 ----- v1 - * - * Since the VS is disabled, the clipper loads each VUE directly from - * the URB. This is controlled by the 3DSTATE_VERTEX_BUFFERS and - * 3DSTATE_VERTEX_ELEMENTS packets below. The VUE contents are as follows: - * dw0: Reserved, MBZ. - * dw1: Render Target Array Index. The HiZ op does not use indexed - * vertices, so set the dword to 0. - * dw2: Viewport Index. The HiZ op disables viewport mapping and - * scissoring, so set the dword to 0. - * dw3: Point Width: The HiZ op does not emit the POINTLIST primitive, so - * set the dword to 0. - * dw4: Vertex Position X. - * dw5: Vertex Position Y. - * dw6: Vertex Position Z. - * dw7: Vertex Position W. - * - * For details, see the Sandybridge PRM, Volume 2, Part 1, Section 1.5.1 - * "Vertex URB Entry (VUE) Formats". - */ - { - const int width = mt->level[level].width; - const int height = mt->level[level].height; - float *vertex_data; - - const float vertices[GEN6_HIZ_VBO_SIZE] = { - /* v0 */ 0, 0, 0, 0, 0, height, 0, 1, - /* v1 */ 0, 0, 0, 0, width, height, 0, 1, - /* v2 */ 0, 0, 0, 0, 0, 0, 0, 1, - }; - - vertex_data = (float *) brw_state_batch(brw, AUB_TRACE_NO_TYPE, - GEN6_HIZ_VBO_SIZE, 32, - &vertex_offset); - memcpy(vertex_data, vertices, GEN6_HIZ_VBO_SIZE); - } - - /* 3DSTATE_VERTEX_BUFFERS */ - { - const int num_buffers = 1; - const int batch_length = 1 + 4 * num_buffers; - - uint32_t dw0 = GEN6_VB0_ACCESS_VERTEXDATA | - (GEN6_HIZ_NUM_VUE_ELEMS * sizeof(float)) << BRW_VB0_PITCH_SHIFT; - - if (intel->gen >= 7) - dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE; - - BEGIN_BATCH(batch_length); - OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (batch_length - 2)); - OUT_BATCH(dw0); - /* start address */ - OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_VERTEX, 0, - vertex_offset); - /* end address */ - OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_VERTEX, 0, - vertex_offset + GEN6_HIZ_VBO_SIZE - 1); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - - /* 3DSTATE_VERTEX_ELEMENTS - * - * Fetch dwords 0 - 7 from each VUE. See the comments above where - * hiz->vertex_bo is filled with data. - */ - { - const int num_elements = 2; - const int batch_length = 1 + 2 * num_elements; - - BEGIN_BATCH(batch_length); - OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (batch_length - 2)); - /* Element 0 */ - OUT_BATCH(GEN6_VE0_VALID | - BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT | - 0 << BRW_VE0_SRC_OFFSET_SHIFT); - OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT | - BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT | - BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_2_SHIFT | - BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_3_SHIFT); - /* Element 1 */ - OUT_BATCH(GEN6_VE0_VALID | - BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT | - 16 << BRW_VE0_SRC_OFFSET_SHIFT); - OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT | - BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT | - BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_2_SHIFT | - BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_3_SHIFT); - ADVANCE_BATCH(); - } -} - -/** - * \brief Execute a HiZ op on a miptree slice. - * - * To execute the HiZ op, this function manually constructs and emits a batch - * to "draw" the HiZ op's rectangle primitive. The batchbuffer is flushed - * before constructing and after emitting the batch. - * - * This function alters no GL state. - * - * For an overview of HiZ ops, see the following sections of the Sandy Bridge - * PRM, Volume 1, Part 2: - * - 7.5.3.1 Depth Buffer Clear - * - 7.5.3.2 Depth Buffer Resolve - * - 7.5.3.3 Hierarchical Depth Buffer Resolve - */ -static void -gen6_hiz_exec(struct intel_context *intel, - struct intel_mipmap_tree *mt, - unsigned int level, - unsigned int layer, - enum gen6_hiz_op op) -{ - struct gl_context *ctx = &intel->ctx; - struct brw_context *brw = brw_context(ctx); - uint32_t draw_x, draw_y; - uint32_t tile_mask_x, tile_mask_y; - - assert(op != GEN6_HIZ_OP_DEPTH_CLEAR); /* Not implemented yet. */ - assert(mt->hiz_mt != NULL); - intel_miptree_check_level_layer(mt, level, layer); - - { - /* Construct a dummy renderbuffer just to extract tile offsets. */ - struct intel_renderbuffer rb; - rb.mt = mt; - rb.mt_level = level; - rb.mt_layer = layer; - intel_renderbuffer_set_draw_offset(&rb); - draw_x = rb.draw_x; - draw_y = rb.draw_y; - } - - /* Compute masks to determine how much of draw_x and draw_y should be - * performed using the fine adjustment of "depth coordinate offset X/Y" - * (dw5 of 3DSTATE_DEPTH_BUFFER). See the emit_depthbuffer() function for - * details. - */ - { - uint32_t depth_mask_x, depth_mask_y, hiz_mask_x, hiz_mask_y; - intel_region_get_tile_masks(mt->region, &depth_mask_x, &depth_mask_y); - intel_region_get_tile_masks(mt->hiz_mt->region, - &hiz_mask_x, &hiz_mask_y); - - /* Each HiZ row represents 2 rows of pixels */ - hiz_mask_y = hiz_mask_y << 1 | 1; - - tile_mask_x = depth_mask_x | hiz_mask_x; - tile_mask_y = depth_mask_y | hiz_mask_y; - } - - gen6_hiz_emit_batch_head(brw); - gen6_hiz_emit_vertices(brw, mt, level, layer); - - /* 3DSTATE_URB - * - * Assign the entire URB to the VS. Even though the VS disabled, URB space - * is still needed because the clipper loads the VUE's from the URB. From - * the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE, - * Dword 1.15:0 "VS Number of URB Entries": - * This field is always used (even if VS Function Enable is DISABLED). - * - * The warning below appears in the PRM (Section 3DSTATE_URB), but we can - * safely ignore it because this batch contains only one draw call. - * Because of URB corruption caused by allocating a previous GS unit - * URB entry to the VS unit, software is required to send a “GS NULL - * Fence” (Send URB fence with VS URB size == 1 and GS URB size == 0) - * plus a dummy DRAW call before any case where VS will be taking over - * GS URB space. - */ - { - BEGIN_BATCH(3); - OUT_BATCH(_3DSTATE_URB << 16 | (3 - 2)); - OUT_BATCH(brw->urb.max_vs_entries << GEN6_URB_VS_ENTRIES_SHIFT); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - - /* 3DSTATE_CC_STATE_POINTERS - * - * The pointer offsets are relative to - * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress. - * - * The HiZ op doesn't use BLEND_STATE or COLOR_CALC_STATE. - */ - { - uint32_t depthstencil_offset; - gen6_hiz_emit_depth_stencil_state(brw, op, &depthstencil_offset); - - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (4 - 2)); - OUT_BATCH(1); /* BLEND_STATE offset */ - OUT_BATCH(depthstencil_offset | 1); /* DEPTH_STENCIL_STATE offset */ - OUT_BATCH(1); /* COLOR_CALC_STATE offset */ - ADVANCE_BATCH(); - } - - /* 3DSTATE_VS - * - * Disable vertex shader. - */ - { - /* From the BSpec, Volume 2a, Part 3 "Vertex Shader", Section - * 3DSTATE_VS, Dword 5.0 "VS Function Enable": - * [DevSNB] A pipeline flush must be programmed prior to a 3DSTATE_VS - * command that causes the VS Function Enable to toggle. Pipeline - * flush can be executed by sending a PIPE_CONTROL command with CS - * stall bit set and a post sync operation. - */ - intel_emit_post_sync_nonzero_flush(intel); - - BEGIN_BATCH(6); - OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - - /* 3DSTATE_GS - * - * Disable the geometry shader. - */ - { - BEGIN_BATCH(7); - OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - - /* 3DSTATE_CLIP - * - * Disable the clipper. - * - * The HiZ op emits a rectangle primitive, which requires clipping to - * be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1 - * Section 1.3 "3D Primitives Overview": - * RECTLIST: - * Either the CLIP unit should be DISABLED, or the CLIP unit's Clip - * Mode should be set to a value other than CLIPMODE_NORMAL. - * - * Also disable perspective divide. This doesn't change the clipper's - * output, but does spare a few electrons. - */ - { - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2)); - OUT_BATCH(0); - OUT_BATCH(GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - - /* 3DSTATE_SF - * - * Disable ViewportTransformEnable (dw2.1) - * - * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D - * Primitives Overview": - * RECTLIST: Viewport Mapping must be DISABLED (as is typical with the - * use of screen- space coordinates). - * - * A solid rectangle must be rendered, so set FrontFaceFillMode (dw2.4:3) - * and BackFaceFillMode (dw2.5:6) to SOLID(0). - * - * From the Sandy Bridge PRM, Volume 2, Part 1, Section - * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode: - * SOLID: Any triangle or rectangle object found to be front-facing - * is rendered as a solid object. This setting is required when - * (rendering rectangle (RECTLIST) objects. - */ - { - BEGIN_BATCH(20); - OUT_BATCH(_3DSTATE_SF << 16 | (20 - 2)); - OUT_BATCH((1 - 1) << GEN6_SF_NUM_OUTPUTS_SHIFT | /* only position */ - 1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT | - 0 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT); - for (int i = 0; i < 18; ++i) - OUT_BATCH(0); - ADVANCE_BATCH(); - } - - /* 3DSTATE_WM - * - * Disable thread dispatch (dw5.19) and enable the HiZ op. - * - * Even though thread dispatch is disabled, max threads (dw5.25:31) must be - * nonzero to prevent the GPU from hanging. See the valid ranges in the - * BSpec, Volume 2a.11 Windower, Section 3DSTATE_WM, Dword 5.25:31 - * "Maximum Number Of Threads". - */ - { - uint32_t dw4 = 0; - - switch (op) { - case GEN6_HIZ_OP_DEPTH_CLEAR: - assert(!"not implemented"); - dw4 |= GEN6_WM_DEPTH_CLEAR; - break; - case GEN6_HIZ_OP_DEPTH_RESOLVE: - dw4 |= GEN6_WM_DEPTH_RESOLVE; - break; - case GEN6_HIZ_OP_HIZ_RESOLVE: - dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE; - break; - default: - assert(0); - break; - } - - BEGIN_BATCH(9); - OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(dw4); - OUT_BATCH((brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT); - OUT_BATCH((1 - 1) << GEN6_WM_NUM_SF_OUTPUTS_SHIFT); /* only position */ - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - - /* 3DSTATE_DEPTH_BUFFER */ - { - uint32_t width = mt->level[level].width; - uint32_t height = mt->level[level].height; - - uint32_t tile_x = draw_x & tile_mask_x; - uint32_t tile_y = draw_y & tile_mask_y; - uint32_t offset = intel_region_get_aligned_offset(mt->region, - draw_x & ~tile_mask_x, - draw_y & ~tile_mask_y); - - /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327 - * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth - * Coordinate Offset X/Y": - * - * "The 3 LSBs of both offsets must be zero to ensure correct - * alignment" - * - * We have no guarantee that tile_x and tile_y are correctly aligned, - * since they are determined by the mipmap layout, which is only aligned - * to multiples of 4. - * - * So, to avoid hanging the GPU, just smash the low order 3 bits of - * tile_x and tile_y to 0. This is a temporary workaround until we come - * up with a better solution. - */ - tile_x &= ~7; - tile_y &= ~7; - - uint32_t format; - switch (mt->format) { - case MESA_FORMAT_Z16: format = BRW_DEPTHFORMAT_D16_UNORM; break; - case MESA_FORMAT_Z32_FLOAT: format = BRW_DEPTHFORMAT_D32_FLOAT; break; - case MESA_FORMAT_X8_Z24: format = BRW_DEPTHFORMAT_D24_UNORM_X8_UINT; break; - default: assert(0); break; - } - - intel_emit_post_sync_nonzero_flush(intel); - intel_emit_depth_stall_flushes(intel); - - BEGIN_BATCH(7); - OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2)); - OUT_BATCH(((mt->region->pitch * mt->region->cpp) - 1) | - format << 18 | - 1 << 21 | /* separate stencil enable */ - 1 << 22 | /* hiz enable */ - BRW_TILEWALK_YMAJOR << 26 | - 1 << 27 | /* y-tiled */ - BRW_SURFACE_2D << 29); - OUT_RELOC(mt->region->bo, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - offset); - OUT_BATCH(BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1 | - (width + tile_x - 1) << 6 | - (height + tile_y - 1) << 19); - OUT_BATCH(0); - OUT_BATCH(tile_x | - tile_y << 16); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - - /* 3DSTATE_HIER_DEPTH_BUFFER */ - { - struct intel_region *hiz_region = mt->hiz_mt->region; - uint32_t hiz_offset = - intel_region_get_aligned_offset(hiz_region, - draw_x & ~tile_mask_x, - (draw_y & ~tile_mask_y) / 2); - - BEGIN_BATCH(3); - OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2)); - OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1); - OUT_RELOC(hiz_region->bo, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - hiz_offset); - ADVANCE_BATCH(); - } - - /* 3DSTATE_STENCIL_BUFFER */ - { - BEGIN_BATCH(3); - OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - - /* 3DSTATE_CLEAR_PARAMS - * - * From the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE_CLEAR_PARAMS: - * [DevSNB] 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE - * packet when HiZ is enabled and the DEPTH_BUFFER_STATE changes. - */ - { - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 | (2 - 2)); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - - /* 3DSTATE_DRAWING_RECTANGLE */ - { - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2)); - OUT_BATCH(0); - OUT_BATCH(((mt->level[level].width - 1) & 0xffff) | - ((mt->level[level].height - 1) << 16)); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - - /* 3DPRIMITIVE */ - { - BEGIN_BATCH(6); - OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) | - _3DPRIM_RECTLIST << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT | - GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL); - OUT_BATCH(3); /* vertex count per instance */ - OUT_BATCH(0); - OUT_BATCH(1); /* instance count */ - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - - /* See comments above at first invocation of intel_flush() in - * gen6_hiz_emit_batch_head(). - */ - intel_flush(ctx); - - /* Be safe. */ - brw->state.dirty.brw = ~0; - brw->state.dirty.cache = ~0; -} - -/** - * \param out_offset is relative to - * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress. - */ -void -gen6_hiz_emit_depth_stencil_state(struct brw_context *brw, - enum gen6_hiz_op op, - uint32_t *out_offset) -{ - struct gen6_depth_stencil_state *state; - state = (struct gen6_depth_stencil_state *) - brw_state_batch(brw, AUB_TRACE_DEPTH_STENCIL_STATE, - sizeof(*state), 64, - out_offset); - memset(state, 0, sizeof(*state)); - - /* See the following sections of the Sandy Bridge PRM, Volume 1, Part2: - * - 7.5.3.1 Depth Buffer Clear - * - 7.5.3.2 Depth Buffer Resolve - * - 7.5.3.3 Hierarchical Depth Buffer Resolve - */ - state->ds2.depth_write_enable = 1; - if (op == GEN6_HIZ_OP_DEPTH_RESOLVE) { - state->ds2.depth_test_enable = 1; - state->ds2.depth_test_func = COMPAREFUNC_NEVER; - } -} - -/** \see intel_context::vtbl::resolve_hiz_slice */ -void -gen6_resolve_hiz_slice(struct intel_context *intel, - struct intel_mipmap_tree *mt, - uint32_t level, - uint32_t layer) -{ - gen6_hiz_exec(intel, mt, level, layer, GEN6_HIZ_OP_HIZ_RESOLVE); -} - -/** \see intel_context::vtbl::resolve_depth_slice */ -void -gen6_resolve_depth_slice(struct intel_context *intel, - struct intel_mipmap_tree *mt, - uint32_t level, - uint32_t layer) -{ - gen6_hiz_exec(intel, mt, level, layer, GEN6_HIZ_OP_DEPTH_RESOLVE); -} diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp new file mode 100644 index 00000000000..18272491be8 --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp @@ -0,0 +1,501 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +#include "intel_batchbuffer.h" +#include "intel_fbo.h" +#include "intel_mipmap_tree.h" + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_state.h" + +#include "gen6_hiz.h" +#include "gen7_hiz.h" + +/** + * \copydoc gen6_hiz_exec() + */ +static void +gen7_hiz_exec(struct intel_context *intel, + struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int layer, + enum gen6_hiz_op op) +{ + struct gl_context *ctx = &intel->ctx; + struct brw_context *brw = brw_context(ctx); + uint32_t draw_x, draw_y; + uint32_t tile_mask_x, tile_mask_y; + + assert(op != GEN6_HIZ_OP_DEPTH_CLEAR); /* Not implemented yet. */ + assert(mt->hiz_mt != NULL); + intel_miptree_check_level_layer(mt, level, layer); + + uint32_t depth_format; + switch (mt->format) { + case MESA_FORMAT_Z16: depth_format = BRW_DEPTHFORMAT_D16_UNORM; break; + case MESA_FORMAT_Z32_FLOAT: depth_format = BRW_DEPTHFORMAT_D32_FLOAT; break; + case MESA_FORMAT_X8_Z24: depth_format = BRW_DEPTHFORMAT_D24_UNORM_X8_UINT; break; + default: assert(0); break; + } + + { + /* Construct a dummy renderbuffer just to extract tile offsets. */ + struct intel_renderbuffer rb; + rb.mt = mt; + rb.mt_level = level; + rb.mt_layer = layer; + intel_renderbuffer_set_draw_offset(&rb); + draw_x = rb.draw_x; + draw_y = rb.draw_y; + } + + /* Compute masks to determine how much of draw_x and draw_y should be + * performed using the fine adjustment of "depth coordinate offset X/Y" + * (dw5 of 3DSTATE_DEPTH_BUFFER). See the emit_depthbuffer() function for + * details. + */ + { + uint32_t depth_mask_x, depth_mask_y, hiz_mask_x, hiz_mask_y; + intel_region_get_tile_masks(mt->region, &depth_mask_x, &depth_mask_y); + intel_region_get_tile_masks(mt->hiz_mt->region, + &hiz_mask_x, &hiz_mask_y); + + /* Each HiZ row represents 2 rows of pixels */ + hiz_mask_y = hiz_mask_y << 1 | 1; + + tile_mask_x = depth_mask_x | hiz_mask_x; + tile_mask_y = depth_mask_y | hiz_mask_y; + } + + gen6_hiz_emit_batch_head(brw); + gen6_hiz_emit_vertices(brw, mt, level, layer); + + /* 3DSTATE_URB_VS + * 3DSTATE_URB_HS + * 3DSTATE_URB_DS + * 3DSTATE_URB_GS + * + * If the 3DSTATE_URB_VS is emitted, than the others must be also. From the + * BSpec, Volume 2a "3D Pipeline Overview", Section 1.7.1 3DSTATE_URB_VS: + * 3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be + * programmed in order for the programming of this state to be + * valid. + */ + { + /* The minimum valid value is 32. See 3DSTATE_URB_VS, + * Dword 1.15:0 "VS Number of URB Entries". + */ + int num_vs_entries = 32; + + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_URB_VS << 16 | (2 - 2)); + OUT_BATCH(1 << GEN7_URB_ENTRY_SIZE_SHIFT | + 0 << GEN7_URB_STARTING_ADDRESS_SHIFT | + num_vs_entries); + ADVANCE_BATCH(); + + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_URB_GS << 16 | (2 - 2)); + OUT_BATCH(0); + ADVANCE_BATCH(); + + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_URB_HS << 16 | (2 - 2)); + OUT_BATCH(0); + ADVANCE_BATCH(); + + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_URB_DS << 16 | (2 - 2)); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_DEPTH_STENCIL_STATE_POINTERS + * + * The offset is relative to CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress. + */ + { + uint32_t depthstencil_offset; + gen6_hiz_emit_depth_stencil_state(brw, op, &depthstencil_offset); + + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_DEPTH_STENCIL_STATE_POINTERS << 16 | (2 - 2)); + OUT_BATCH(depthstencil_offset | 1); + ADVANCE_BATCH(); + } + + /* 3DSTATE_VS + * + * Disable vertex shader. + */ + { + BEGIN_BATCH(6); + OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_HS + * + * Disable the hull shader. + */ + { + BEGIN_BATCH(7); + OUT_BATCH(_3DSTATE_HS << 16 | (7 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_TE + * + * Disable the tesselation engine. + */ + { + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_TE << 16 | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_DS + * + * Disable the domain shader. + */ + { + BEGIN_BATCH(6); + OUT_BATCH(_3DSTATE_DS << 16 | (6 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_GS + * + * Disable the geometry shader. + */ + { + BEGIN_BATCH(7); + OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_STREAMOUT + * + * Disable streamout. + */ + { + BEGIN_BATCH(3); + OUT_BATCH(_3DSTATE_STREAMOUT << 16 | (3 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_CLIP + * + * Disable the clipper. + * + * The HiZ op emits a rectangle primitive, which requires clipping to + * be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1 + * Section 1.3 "3D Primitives Overview": + * RECTLIST: + * Either the CLIP unit should be DISABLED, or the CLIP unit's Clip + * Mode should be set to a value other than CLIPMODE_NORMAL. + * + * Also disable perspective divide. This doesn't change the clipper's + * output, but does spare a few electrons. + */ + { + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_SF + * + * Disable ViewportTransformEnable (dw1.1) + * + * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D + * Primitives Overview": + * RECTLIST: Viewport Mapping must be DISABLED (as is typical with the + * use of screen- space coordinates). + * + * A solid rectangle must be rendered, so set FrontFaceFillMode (dw1.6:5) + * and BackFaceFillMode (dw1.4:3) to SOLID(0). + * + * From the Sandy Bridge PRM, Volume 2, Part 1, Section + * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode: + * SOLID: Any triangle or rectangle object found to be front-facing + * is rendered as a solid object. This setting is required when + * (rendering rectangle (RECTLIST) objects. + */ + { + BEGIN_BATCH(7); + OUT_BATCH(_3DSTATE_SF << 16 | (7 - 2)); + OUT_BATCH(depth_format << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_SBE */ + { + BEGIN_BATCH(14); + OUT_BATCH(_3DSTATE_SBE << 16 | (14 - 2)); + OUT_BATCH((1 - 1) << GEN7_SBE_NUM_OUTPUTS_SHIFT | /* only position */ + 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT | + 0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT); + for (int i = 0; i < 12; ++i) + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_WM + * + * Disable PS thread dispatch (dw1.29) and enable the HiZ op. + */ + { + uint32_t dw1 = 0; + + switch (op) { + case GEN6_HIZ_OP_DEPTH_CLEAR: + assert(!"not implemented"); + dw1 |= GEN7_WM_DEPTH_CLEAR; + break; + case GEN6_HIZ_OP_DEPTH_RESOLVE: + dw1 |= GEN7_WM_DEPTH_RESOLVE; + break; + case GEN6_HIZ_OP_HIZ_RESOLVE: + dw1 |= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE; + break; + default: + assert(0); + break; + } + + BEGIN_BATCH(3); + OUT_BATCH(_3DSTATE_WM << 16 | (3 - 2)); + OUT_BATCH(dw1); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_PS + * + * Pixel shader dispatch is disabled above in 3DSTATE_WM, dw1.29. Despite + * that, thread dispatch info must still be specified. + * - Maximum Number of Threads (dw4.24:31) must be nonzero, as the BSpec + * states that the valid range for this field is [0x3, 0x2f]. + * - A dispatch mode must be given; that is, at least one of the + * "N Pixel Dispatch Enable" (N=8,16,32) fields must be set. This was + * discovered through simulator error messages. + */ + { + BEGIN_BATCH(8); + OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(((brw->max_wm_threads - 1) << IVB_PS_MAX_THREADS_SHIFT) | + GEN7_PS_32_DISPATCH_ENABLE); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_DEPTH_BUFFER */ + { + uint32_t width = mt->level[level].width; + uint32_t height = mt->level[level].height; + + uint32_t tile_x = draw_x & tile_mask_x; + uint32_t tile_y = draw_y & tile_mask_y; + uint32_t offset = intel_region_get_aligned_offset(mt->region, + draw_x & ~tile_mask_x, + draw_y & ~tile_mask_y); + + /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327 + * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth + * Coordinate Offset X/Y": + * + * "The 3 LSBs of both offsets must be zero to ensure correct + * alignment" + * + * We have no guarantee that tile_x and tile_y are correctly aligned, + * since they are determined by the mipmap layout, which is only aligned + * to multiples of 4. + * + * So, to avoid hanging the GPU, just smash the low order 3 bits of + * tile_x and tile_y to 0. This is a temporary workaround until we come + * up with a better solution. + */ + tile_x &= ~7; + tile_y &= ~7; + + intel_emit_depth_stall_flushes(intel); + + BEGIN_BATCH(7); + OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2)); + OUT_BATCH(((mt->region->pitch * mt->region->cpp) - 1) | + depth_format << 18 | + 1 << 22 | /* hiz enable */ + 1 << 28 | /* depth write */ + BRW_SURFACE_2D << 29); + OUT_RELOC(mt->region->bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + offset); + OUT_BATCH((width + tile_x - 1) << 4 | + (height + tile_y - 1) << 18); + OUT_BATCH(0); + OUT_BATCH(tile_x | + tile_y << 16); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_HIER_DEPTH_BUFFER */ + { + struct intel_region *hiz_region = mt->hiz_mt->region; + uint32_t hiz_offset = + intel_region_get_aligned_offset(hiz_region, + draw_x & ~tile_mask_x, + (draw_y & ~tile_mask_y) / 2); + + BEGIN_BATCH(3); + OUT_BATCH((GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2)); + OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1); + OUT_RELOC(hiz_region->bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + hiz_offset); + ADVANCE_BATCH(); + } + + /* 3DSTATE_STENCIL_BUFFER */ + { + BEGIN_BATCH(3); + OUT_BATCH((GEN7_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_CLEAR_PARAMS + * + * From the BSpec, Volume 2a.11 Windower, Section 1.5.6.3.2 + * 3DSTATE_CLEAR_PARAMS: + * [DevIVB] 3DSTATE_CLEAR_PARAMS must always be programmed in the along + * with the other Depth/Stencil state commands(i.e. 3DSTATE_DEPTH_BUFFER, + * 3DSTATE_STENCIL_BUFFER, or 3DSTATE_HIER_DEPTH_BUFFER). + */ + { + BEGIN_BATCH(3); + OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS << 16 | (3 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_DRAWING_RECTANGLE */ + { + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(((mt->level[level].width - 1) & 0xffff) | + ((mt->level[level].height - 1) << 16)); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DPRIMITIVE */ + { + BEGIN_BATCH(7); + OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2)); + OUT_BATCH(GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL | + _3DPRIM_RECTLIST); + OUT_BATCH(3); /* vertex count per instance */ + OUT_BATCH(0); + OUT_BATCH(1); /* instance count */ + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* See comments above at first invocation of intel_flush() in + * gen6_hiz_emit_batch_head(). + */ + intel_flush(ctx); + + /* Be safe. */ + brw->state.dirty.brw = ~0; + brw->state.dirty.cache = ~0; +} + +/** \copydoc gen6_resolve_hiz_slice() */ +void +gen7_resolve_hiz_slice(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer) +{ + gen7_hiz_exec(intel, mt, level, layer, GEN6_HIZ_OP_HIZ_RESOLVE); +} + +/** \copydoc gen6_resolve_depth_slice() */ +void +gen7_resolve_depth_slice(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer) +{ + gen7_hiz_exec(intel, mt, level, layer, GEN6_HIZ_OP_DEPTH_RESOLVE); +} diff --git a/src/mesa/drivers/dri/i965/gen7_hiz.c b/src/mesa/drivers/dri/i965/gen7_hiz.c deleted file mode 100644 index 18272491be8..00000000000 --- a/src/mesa/drivers/dri/i965/gen7_hiz.c +++ /dev/null @@ -1,501 +0,0 @@ -/* - * Copyright © 2011 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include - -#include "intel_batchbuffer.h" -#include "intel_fbo.h" -#include "intel_mipmap_tree.h" - -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_state.h" - -#include "gen6_hiz.h" -#include "gen7_hiz.h" - -/** - * \copydoc gen6_hiz_exec() - */ -static void -gen7_hiz_exec(struct intel_context *intel, - struct intel_mipmap_tree *mt, - unsigned int level, - unsigned int layer, - enum gen6_hiz_op op) -{ - struct gl_context *ctx = &intel->ctx; - struct brw_context *brw = brw_context(ctx); - uint32_t draw_x, draw_y; - uint32_t tile_mask_x, tile_mask_y; - - assert(op != GEN6_HIZ_OP_DEPTH_CLEAR); /* Not implemented yet. */ - assert(mt->hiz_mt != NULL); - intel_miptree_check_level_layer(mt, level, layer); - - uint32_t depth_format; - switch (mt->format) { - case MESA_FORMAT_Z16: depth_format = BRW_DEPTHFORMAT_D16_UNORM; break; - case MESA_FORMAT_Z32_FLOAT: depth_format = BRW_DEPTHFORMAT_D32_FLOAT; break; - case MESA_FORMAT_X8_Z24: depth_format = BRW_DEPTHFORMAT_D24_UNORM_X8_UINT; break; - default: assert(0); break; - } - - { - /* Construct a dummy renderbuffer just to extract tile offsets. */ - struct intel_renderbuffer rb; - rb.mt = mt; - rb.mt_level = level; - rb.mt_layer = layer; - intel_renderbuffer_set_draw_offset(&rb); - draw_x = rb.draw_x; - draw_y = rb.draw_y; - } - - /* Compute masks to determine how much of draw_x and draw_y should be - * performed using the fine adjustment of "depth coordinate offset X/Y" - * (dw5 of 3DSTATE_DEPTH_BUFFER). See the emit_depthbuffer() function for - * details. - */ - { - uint32_t depth_mask_x, depth_mask_y, hiz_mask_x, hiz_mask_y; - intel_region_get_tile_masks(mt->region, &depth_mask_x, &depth_mask_y); - intel_region_get_tile_masks(mt->hiz_mt->region, - &hiz_mask_x, &hiz_mask_y); - - /* Each HiZ row represents 2 rows of pixels */ - hiz_mask_y = hiz_mask_y << 1 | 1; - - tile_mask_x = depth_mask_x | hiz_mask_x; - tile_mask_y = depth_mask_y | hiz_mask_y; - } - - gen6_hiz_emit_batch_head(brw); - gen6_hiz_emit_vertices(brw, mt, level, layer); - - /* 3DSTATE_URB_VS - * 3DSTATE_URB_HS - * 3DSTATE_URB_DS - * 3DSTATE_URB_GS - * - * If the 3DSTATE_URB_VS is emitted, than the others must be also. From the - * BSpec, Volume 2a "3D Pipeline Overview", Section 1.7.1 3DSTATE_URB_VS: - * 3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be - * programmed in order for the programming of this state to be - * valid. - */ - { - /* The minimum valid value is 32. See 3DSTATE_URB_VS, - * Dword 1.15:0 "VS Number of URB Entries". - */ - int num_vs_entries = 32; - - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_URB_VS << 16 | (2 - 2)); - OUT_BATCH(1 << GEN7_URB_ENTRY_SIZE_SHIFT | - 0 << GEN7_URB_STARTING_ADDRESS_SHIFT | - num_vs_entries); - ADVANCE_BATCH(); - - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_URB_GS << 16 | (2 - 2)); - OUT_BATCH(0); - ADVANCE_BATCH(); - - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_URB_HS << 16 | (2 - 2)); - OUT_BATCH(0); - ADVANCE_BATCH(); - - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_URB_DS << 16 | (2 - 2)); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - - /* 3DSTATE_DEPTH_STENCIL_STATE_POINTERS - * - * The offset is relative to CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress. - */ - { - uint32_t depthstencil_offset; - gen6_hiz_emit_depth_stencil_state(brw, op, &depthstencil_offset); - - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_DEPTH_STENCIL_STATE_POINTERS << 16 | (2 - 2)); - OUT_BATCH(depthstencil_offset | 1); - ADVANCE_BATCH(); - } - - /* 3DSTATE_VS - * - * Disable vertex shader. - */ - { - BEGIN_BATCH(6); - OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - - /* 3DSTATE_HS - * - * Disable the hull shader. - */ - { - BEGIN_BATCH(7); - OUT_BATCH(_3DSTATE_HS << 16 | (7 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - - /* 3DSTATE_TE - * - * Disable the tesselation engine. - */ - { - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_TE << 16 | (4 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - - /* 3DSTATE_DS - * - * Disable the domain shader. - */ - { - BEGIN_BATCH(6); - OUT_BATCH(_3DSTATE_DS << 16 | (6 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - - /* 3DSTATE_GS - * - * Disable the geometry shader. - */ - { - BEGIN_BATCH(7); - OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - - /* 3DSTATE_STREAMOUT - * - * Disable streamout. - */ - { - BEGIN_BATCH(3); - OUT_BATCH(_3DSTATE_STREAMOUT << 16 | (3 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - - /* 3DSTATE_CLIP - * - * Disable the clipper. - * - * The HiZ op emits a rectangle primitive, which requires clipping to - * be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1 - * Section 1.3 "3D Primitives Overview": - * RECTLIST: - * Either the CLIP unit should be DISABLED, or the CLIP unit's Clip - * Mode should be set to a value other than CLIPMODE_NORMAL. - * - * Also disable perspective divide. This doesn't change the clipper's - * output, but does spare a few electrons. - */ - { - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2)); - OUT_BATCH(0); - OUT_BATCH(GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - - /* 3DSTATE_SF - * - * Disable ViewportTransformEnable (dw1.1) - * - * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D - * Primitives Overview": - * RECTLIST: Viewport Mapping must be DISABLED (as is typical with the - * use of screen- space coordinates). - * - * A solid rectangle must be rendered, so set FrontFaceFillMode (dw1.6:5) - * and BackFaceFillMode (dw1.4:3) to SOLID(0). - * - * From the Sandy Bridge PRM, Volume 2, Part 1, Section - * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode: - * SOLID: Any triangle or rectangle object found to be front-facing - * is rendered as a solid object. This setting is required when - * (rendering rectangle (RECTLIST) objects. - */ - { - BEGIN_BATCH(7); - OUT_BATCH(_3DSTATE_SF << 16 | (7 - 2)); - OUT_BATCH(depth_format << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - - /* 3DSTATE_SBE */ - { - BEGIN_BATCH(14); - OUT_BATCH(_3DSTATE_SBE << 16 | (14 - 2)); - OUT_BATCH((1 - 1) << GEN7_SBE_NUM_OUTPUTS_SHIFT | /* only position */ - 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT | - 0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT); - for (int i = 0; i < 12; ++i) - OUT_BATCH(0); - ADVANCE_BATCH(); - } - - /* 3DSTATE_WM - * - * Disable PS thread dispatch (dw1.29) and enable the HiZ op. - */ - { - uint32_t dw1 = 0; - - switch (op) { - case GEN6_HIZ_OP_DEPTH_CLEAR: - assert(!"not implemented"); - dw1 |= GEN7_WM_DEPTH_CLEAR; - break; - case GEN6_HIZ_OP_DEPTH_RESOLVE: - dw1 |= GEN7_WM_DEPTH_RESOLVE; - break; - case GEN6_HIZ_OP_HIZ_RESOLVE: - dw1 |= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE; - break; - default: - assert(0); - break; - } - - BEGIN_BATCH(3); - OUT_BATCH(_3DSTATE_WM << 16 | (3 - 2)); - OUT_BATCH(dw1); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - - /* 3DSTATE_PS - * - * Pixel shader dispatch is disabled above in 3DSTATE_WM, dw1.29. Despite - * that, thread dispatch info must still be specified. - * - Maximum Number of Threads (dw4.24:31) must be nonzero, as the BSpec - * states that the valid range for this field is [0x3, 0x2f]. - * - A dispatch mode must be given; that is, at least one of the - * "N Pixel Dispatch Enable" (N=8,16,32) fields must be set. This was - * discovered through simulator error messages. - */ - { - BEGIN_BATCH(8); - OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(((brw->max_wm_threads - 1) << IVB_PS_MAX_THREADS_SHIFT) | - GEN7_PS_32_DISPATCH_ENABLE); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - - /* 3DSTATE_DEPTH_BUFFER */ - { - uint32_t width = mt->level[level].width; - uint32_t height = mt->level[level].height; - - uint32_t tile_x = draw_x & tile_mask_x; - uint32_t tile_y = draw_y & tile_mask_y; - uint32_t offset = intel_region_get_aligned_offset(mt->region, - draw_x & ~tile_mask_x, - draw_y & ~tile_mask_y); - - /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327 - * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth - * Coordinate Offset X/Y": - * - * "The 3 LSBs of both offsets must be zero to ensure correct - * alignment" - * - * We have no guarantee that tile_x and tile_y are correctly aligned, - * since they are determined by the mipmap layout, which is only aligned - * to multiples of 4. - * - * So, to avoid hanging the GPU, just smash the low order 3 bits of - * tile_x and tile_y to 0. This is a temporary workaround until we come - * up with a better solution. - */ - tile_x &= ~7; - tile_y &= ~7; - - intel_emit_depth_stall_flushes(intel); - - BEGIN_BATCH(7); - OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2)); - OUT_BATCH(((mt->region->pitch * mt->region->cpp) - 1) | - depth_format << 18 | - 1 << 22 | /* hiz enable */ - 1 << 28 | /* depth write */ - BRW_SURFACE_2D << 29); - OUT_RELOC(mt->region->bo, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - offset); - OUT_BATCH((width + tile_x - 1) << 4 | - (height + tile_y - 1) << 18); - OUT_BATCH(0); - OUT_BATCH(tile_x | - tile_y << 16); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - - /* 3DSTATE_HIER_DEPTH_BUFFER */ - { - struct intel_region *hiz_region = mt->hiz_mt->region; - uint32_t hiz_offset = - intel_region_get_aligned_offset(hiz_region, - draw_x & ~tile_mask_x, - (draw_y & ~tile_mask_y) / 2); - - BEGIN_BATCH(3); - OUT_BATCH((GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2)); - OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1); - OUT_RELOC(hiz_region->bo, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - hiz_offset); - ADVANCE_BATCH(); - } - - /* 3DSTATE_STENCIL_BUFFER */ - { - BEGIN_BATCH(3); - OUT_BATCH((GEN7_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - - /* 3DSTATE_CLEAR_PARAMS - * - * From the BSpec, Volume 2a.11 Windower, Section 1.5.6.3.2 - * 3DSTATE_CLEAR_PARAMS: - * [DevIVB] 3DSTATE_CLEAR_PARAMS must always be programmed in the along - * with the other Depth/Stencil state commands(i.e. 3DSTATE_DEPTH_BUFFER, - * 3DSTATE_STENCIL_BUFFER, or 3DSTATE_HIER_DEPTH_BUFFER). - */ - { - BEGIN_BATCH(3); - OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS << 16 | (3 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - - /* 3DSTATE_DRAWING_RECTANGLE */ - { - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2)); - OUT_BATCH(0); - OUT_BATCH(((mt->level[level].width - 1) & 0xffff) | - ((mt->level[level].height - 1) << 16)); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - - /* 3DPRIMITIVE */ - { - BEGIN_BATCH(7); - OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2)); - OUT_BATCH(GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL | - _3DPRIM_RECTLIST); - OUT_BATCH(3); /* vertex count per instance */ - OUT_BATCH(0); - OUT_BATCH(1); /* instance count */ - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - - /* See comments above at first invocation of intel_flush() in - * gen6_hiz_emit_batch_head(). - */ - intel_flush(ctx); - - /* Be safe. */ - brw->state.dirty.brw = ~0; - brw->state.dirty.cache = ~0; -} - -/** \copydoc gen6_resolve_hiz_slice() */ -void -gen7_resolve_hiz_slice(struct intel_context *intel, - struct intel_mipmap_tree *mt, - uint32_t level, - uint32_t layer) -{ - gen7_hiz_exec(intel, mt, level, layer, GEN6_HIZ_OP_HIZ_RESOLVE); -} - -/** \copydoc gen6_resolve_depth_slice() */ -void -gen7_resolve_depth_slice(struct intel_context *intel, - struct intel_mipmap_tree *mt, - uint32_t level, - uint32_t layer) -{ - gen7_hiz_exec(intel, mt, level, layer, GEN6_HIZ_OP_DEPTH_RESOLVE); -} -- cgit v1.2.3