diff options
-rw-r--r-- | src/mesa/drivers/dri/i965/Makefile.sources | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_blorp_emit.c | 577 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen6_blorp.c | 375 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen7_blorp.c | 166 |
4 files changed, 578 insertions, 541 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index eea9479c8e7..735a312e9a5 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -103,6 +103,7 @@ i965_FILES = \ brw_binding_tables.c \ brw_blorp.c \ brw_blorp.h \ + brw_blorp_emit.c \ brw_cc.c \ brw_clear.c \ brw_clip.c \ diff --git a/src/mesa/drivers/dri/i965/brw_blorp_emit.c b/src/mesa/drivers/dri/i965/brw_blorp_emit.c new file mode 100644 index 00000000000..c0a7e55195c --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_blorp_emit.c @@ -0,0 +1,577 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <assert.h> + +#include "intel_batchbuffer.h" +#include "intel_mipmap_tree.h" + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_state.h" + +#include "blorp_priv.h" +#include "vbo/vbo.h" +#include "brw_draw.h" + +static void +gen6_blorp_emit_input_varying_data(struct brw_context *brw, + const struct brw_blorp_params *params, + unsigned *offset, + unsigned *size) +{ + const unsigned vec4_size_in_bytes = 4 * sizeof(float); + const unsigned max_num_varyings = + DIV_ROUND_UP(sizeof(params->wm_inputs), vec4_size_in_bytes); + const unsigned num_varyings = params->wm_prog_data->num_varying_inputs; + + *size = num_varyings * vec4_size_in_bytes; + + const float *const inputs_src = (const float *)¶ms->wm_inputs; + float *inputs = (float *)brw_state_batch(brw, AUB_TRACE_VERTEX_BUFFER, + *size, 32, offset); + + /* Walk over the attribute slots, determine if the attribute is used by + * the program and when necessary copy the values from the input storage to + * the vertex data buffer. + */ + for (unsigned i = 0; i < max_num_varyings; i++) { + const gl_varying_slot attr = VARYING_SLOT_VAR0 + i; + + if (!(params->wm_prog_data->inputs_read & BITFIELD64_BIT(attr))) + continue; + + memcpy(inputs, inputs_src + i * 4, vec4_size_in_bytes); + + inputs += 4; + } +} + +static void +gen6_blorp_emit_vertex_data(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + uint32_t vertex_offset; + uint32_t const_data_offset = 0; + unsigned const_data_size = 0; + + /* Setup VBO for the rectangle primitive.. + * + * A rectangle primitive (3DPRIM_RECTLIST) consists of only three + * vertices. The vertices reside in screen space with DirectX coordinates + * (that is, (0, 0) is the upper left corner). + * + * v2 ------ implied + * | | + * | | + * v0 ----- v1 + * + * Since the VS is disabled, the clipper loads each VUE directly from + * the URB. This is controlled by the 3DSTATE_VERTEX_BUFFERS and + * 3DSTATE_VERTEX_ELEMENTS packets below. The VUE contents are as follows: + * dw0: Reserved, MBZ. + * dw1: Render Target Array Index. The HiZ op does not use indexed + * vertices, so set the dword to 0. + * dw2: Viewport Index. The HiZ op disables viewport mapping and + * scissoring, so set the dword to 0. + * dw3: Point Width: The HiZ op does not emit the POINTLIST primitive, so + * set the dword to 0. + * dw4: Vertex Position X. + * dw5: Vertex Position Y. + * dw6: Vertex Position Z. + * dw7: Vertex Position W. + * + * dw8: Flat vertex input 0 + * dw9: Flat vertex input 1 + * ... + * dwn: Flat vertex input n - 8 + * + * For details, see the Sandybridge PRM, Volume 2, Part 1, Section 1.5.1 + * "Vertex URB Entry (VUE) Formats". + * + * Only vertex position X and Y are going to be variable, Z is fixed to + * zero and W to one. Header words dw0-3 are all zero. There is no need to + * include the fixed values in the vertex buffer. Vertex fetcher can be + * instructed to fill vertex elements with constant values of one and zero + * instead of reading them from the buffer. + * Flat inputs are program constants that are not interpolated. Moreover + * their values will be the same between vertices. + * + * See the vertex element setup below. + */ + const float vertices[] = { + /* v0 */ (float)params->x0, (float)params->y1, + /* v1 */ (float)params->x1, (float)params->y1, + /* v2 */ (float)params->x0, (float)params->y0, + }; + + float *const vertex_data = (float *)brw_state_batch( + brw, AUB_TRACE_VERTEX_BUFFER, + sizeof(vertices), 32, + &vertex_offset); + memcpy(vertex_data, vertices, sizeof(vertices)); + + if (params->wm_prog_data && params->wm_prog_data->num_varying_inputs) + gen6_blorp_emit_input_varying_data(brw, params, + &const_data_offset, + &const_data_size); + + /* 3DSTATE_VERTEX_BUFFERS */ + const int num_buffers = 1 + (const_data_size > 0); + const int batch_length = 1 + 4 * num_buffers; + + BEGIN_BATCH(batch_length); + OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (batch_length - 2)); + + const unsigned blorp_num_vue_elems = 2; + const unsigned stride = blorp_num_vue_elems * sizeof(float); + EMIT_VERTEX_BUFFER_STATE(brw, 0 /* buffer_nr */, brw->batch.bo, + vertex_offset, vertex_offset + sizeof(vertices), + stride, 0 /* steprate */); + + if (const_data_size) { + /* Tell vertex fetcher not to advance the pointer in the buffer when + * moving to the next vertex. This will effectively provide the same + * data for all the vertices. For flat inputs only the data provided + * for the first provoking vertex actually matters. + */ + const unsigned stride_zero = 0; + EMIT_VERTEX_BUFFER_STATE(brw, 1 /* buffer_nr */, brw->batch.bo, + const_data_offset, + const_data_offset + const_data_size, + stride_zero, 0 /* step_rate */); + } + + ADVANCE_BATCH(); +} + +void +gen6_blorp_emit_vertices(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + gen6_blorp_emit_vertex_data(brw, params); + + const unsigned num_varyings = + params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0; + const unsigned num_elements = 2 + num_varyings; + const int batch_length = 1 + 2 * num_elements; + + BEGIN_BATCH(batch_length); + + /* 3DSTATE_VERTEX_ELEMENTS + * + * Fetch dwords 0 - 7 from each VUE. See the comments above where + * the vertex_bo is filled with data. First element contains dwords + * for the VUE header, second the actual position values and the + * remaining contain the flat inputs. + */ + { + OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (batch_length - 2)); + /* Element 0 */ + OUT_BATCH(GEN6_VE0_VALID | + BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT | + 0 << BRW_VE0_SRC_OFFSET_SHIFT); + OUT_BATCH(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT | + BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT | + BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT | + BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT); + /* Element 1 */ + OUT_BATCH(GEN6_VE0_VALID | + BRW_SURFACEFORMAT_R32G32_FLOAT << BRW_VE0_FORMAT_SHIFT | + 0 << BRW_VE0_SRC_OFFSET_SHIFT); + OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT | + BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT | + BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT | + BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT); + } + + for (unsigned i = 0; i < num_varyings; ++i) { + /* Element 2 + i */ + OUT_BATCH(1 << GEN6_VE0_INDEX_SHIFT | + GEN6_VE0_VALID | + BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT | + (i * 4 * sizeof(float)) << BRW_VE0_SRC_OFFSET_SHIFT); + OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT | + BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT | + BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_2_SHIFT | + BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_3_SHIFT); + } + + ADVANCE_BATCH(); +} + + +/* BLEND_STATE */ +uint32_t +gen6_blorp_emit_blend_state(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + uint32_t cc_blend_state_offset; + + assume(params->num_draw_buffers); + + const unsigned size = params->num_draw_buffers * + sizeof(struct gen6_blend_state); + struct gen6_blend_state *blend = (struct gen6_blend_state *) + brw_state_batch(brw, AUB_TRACE_BLEND_STATE, size, 64, + &cc_blend_state_offset); + + memset(blend, 0, size); + + for (unsigned i = 0; i < params->num_draw_buffers; ++i) { + blend[i].blend1.pre_blend_clamp_enable = 1; + blend[i].blend1.post_blend_clamp_enable = 1; + blend[i].blend1.clamp_range = BRW_RENDERTARGET_CLAMPRANGE_FORMAT; + + blend[i].blend1.write_disable_r = params->color_write_disable[0]; + blend[i].blend1.write_disable_g = params->color_write_disable[1]; + blend[i].blend1.write_disable_b = params->color_write_disable[2]; + blend[i].blend1.write_disable_a = params->color_write_disable[3]; + } + + return cc_blend_state_offset; +} + + +/* CC_STATE */ +uint32_t +gen6_blorp_emit_cc_state(struct brw_context *brw) +{ + uint32_t cc_state_offset; + + struct gen6_color_calc_state *cc = (struct gen6_color_calc_state *) + brw_state_batch(brw, AUB_TRACE_CC_STATE, + sizeof(gen6_color_calc_state), 64, + &cc_state_offset); + memset(cc, 0, sizeof(*cc)); + + return cc_state_offset; +} + + +/** + * \param out_offset is relative to + * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress. + */ +uint32_t +gen6_blorp_emit_depth_stencil_state(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + uint32_t depthstencil_offset; + + struct gen6_depth_stencil_state *state; + state = (struct gen6_depth_stencil_state *) + brw_state_batch(brw, AUB_TRACE_DEPTH_STENCIL_STATE, + sizeof(*state), 64, + &depthstencil_offset); + memset(state, 0, sizeof(*state)); + + /* See the following sections of the Sandy Bridge PRM, Volume 1, Part2: + * - 7.5.3.1 Depth Buffer Clear + * - 7.5.3.2 Depth Buffer Resolve + * - 7.5.3.3 Hierarchical Depth Buffer Resolve + */ + state->ds2.depth_write_enable = 1; + if (params->hiz_op == GEN6_HIZ_OP_DEPTH_RESOLVE) { + state->ds2.depth_test_enable = 1; + state->ds2.depth_test_func = BRW_COMPAREFUNCTION_NEVER; + } + + return depthstencil_offset; +} + + +/* BINDING_TABLE. See brw_wm_binding_table(). */ +uint32_t +gen6_blorp_emit_binding_table(struct brw_context *brw, + uint32_t wm_surf_offset_renderbuffer, + uint32_t wm_surf_offset_texture) +{ + uint32_t wm_bind_bo_offset; + uint32_t *bind = (uint32_t *) + brw_state_batch(brw, AUB_TRACE_BINDING_TABLE, + sizeof(uint32_t) * + BRW_BLORP_NUM_BINDING_TABLE_ENTRIES, + 32, /* alignment */ + &wm_bind_bo_offset); + bind[BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX] = + wm_surf_offset_renderbuffer; + bind[BRW_BLORP_TEXTURE_BINDING_TABLE_INDEX] = wm_surf_offset_texture; + + return wm_bind_bo_offset; +} + + +/** + * SAMPLER_STATE. See brw_update_sampler_state(). + */ +uint32_t +gen6_blorp_emit_sampler_state(struct brw_context *brw, + unsigned tex_filter, unsigned max_lod, + bool non_normalized_coords) +{ + uint32_t sampler_offset; + uint32_t *sampler_state = (uint32_t *) + brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE, 16, 32, &sampler_offset); + + unsigned address_rounding = BRW_ADDRESS_ROUNDING_ENABLE_U_MIN | + BRW_ADDRESS_ROUNDING_ENABLE_V_MIN | + BRW_ADDRESS_ROUNDING_ENABLE_R_MIN | + BRW_ADDRESS_ROUNDING_ENABLE_U_MAG | + BRW_ADDRESS_ROUNDING_ENABLE_V_MAG | + BRW_ADDRESS_ROUNDING_ENABLE_R_MAG; + + /* XXX: I don't think that using firstLevel, lastLevel works, + * because we always setup the surface state as if firstLevel == + * level zero. Probably have to subtract firstLevel from each of + * these: + */ + brw_emit_sampler_state(brw, + sampler_state, + sampler_offset, + tex_filter, /* min filter */ + tex_filter, /* mag filter */ + BRW_MIPFILTER_NONE, + BRW_ANISORATIO_2, + address_rounding, + BRW_TEXCOORDMODE_CLAMP, + BRW_TEXCOORDMODE_CLAMP, + BRW_TEXCOORDMODE_CLAMP, + 0, /* min LOD */ + max_lod, + 0, /* LOD bias */ + 0, /* shadow function */ + non_normalized_coords, + 0); /* border color offset - unused */ + + return sampler_offset; +} + + +/* 3DSTATE_CLIP + * + * Disable the clipper. + * + * The BLORP op emits a rectangle primitive, which requires clipping to + * be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1 + * Section 1.3 "3D Primitives Overview": + * RECTLIST: + * Either the CLIP unit should be DISABLED, or the CLIP unit's Clip + * Mode should be set to a value other than CLIPMODE_NORMAL. + * + * Also disable perspective divide. This doesn't change the clipper's + * output, but does spare a few electrons. + */ +void +gen6_blorp_emit_clip_disable(struct brw_context *brw) +{ + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + + +/* 3DSTATE_DRAWING_RECTANGLE */ +void +gen6_blorp_emit_drawing_rectangle(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(((MAX2(params->x1, params->x0) - 1) & 0xffff) | + ((MAX2(params->y1, params->y0) - 1) << 16)); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + + +/* Once vertex fetcher has written full VUE entries with complete + * header the space requirement is as follows per vertex (in bytes): + * + * Header Position Program constants + * +--------+------------+-------------------+ + * | 16 | 16 | n x 16 | + * +--------+------------+-------------------+ + * + * where 'n' stands for number of varying inputs expressed as vec4s. + * + * The URB size is in turn expressed in 64 bytes (512 bits). + */ +static unsigned +gen7_blorp_get_vs_entry_size(const struct brw_blorp_params *params) +{ + const unsigned num_varyings = + params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0; + const unsigned total_needed = 16 + 16 + num_varyings * 16; + + return DIV_ROUND_UP(total_needed, 64); +} + +/* 3DSTATE_URB_VS + * 3DSTATE_URB_HS + * 3DSTATE_URB_DS + * 3DSTATE_URB_GS + * + * If the 3DSTATE_URB_VS is emitted, than the others must be also. + * From the Ivybridge PRM, Volume 2 Part 1, section 1.7.1 3DSTATE_URB_VS: + * + * 3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be + * programmed in order for the programming of this state to be + * valid. + */ +void +gen7_blorp_emit_urb_config(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + const unsigned vs_entry_size = gen7_blorp_get_vs_entry_size(params); + + if (!(brw->ctx.NewDriverState & (BRW_NEW_CONTEXT | BRW_NEW_URB_SIZE)) && + brw->urb.vsize >= vs_entry_size) + return; + + brw->ctx.NewDriverState |= BRW_NEW_URB_SIZE; + + gen7_upload_urb(brw, vs_entry_size, false, false); +} + + +/* 3DSTATE_BLEND_STATE_POINTERS */ +void +gen7_blorp_emit_blend_state_pointer(struct brw_context *brw, + uint32_t cc_blend_state_offset) +{ + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_BLEND_STATE_POINTERS << 16 | (2 - 2)); + OUT_BATCH(cc_blend_state_offset | 1); + ADVANCE_BATCH(); +} + + +/* 3DSTATE_CC_STATE_POINTERS */ +void +gen7_blorp_emit_cc_state_pointer(struct brw_context *brw, + uint32_t cc_state_offset) +{ + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (2 - 2)); + OUT_BATCH(cc_state_offset | 1); + ADVANCE_BATCH(); +} + +void +gen7_blorp_emit_cc_viewport(struct brw_context *brw) +{ + struct brw_cc_viewport *ccv; + uint32_t cc_vp_offset; + + ccv = (struct brw_cc_viewport *)brw_state_batch(brw, AUB_TRACE_CC_VP_STATE, + sizeof(*ccv), 32, + &cc_vp_offset); + ccv->min_depth = 0.0; + ccv->max_depth = 1.0; + + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_VIEWPORT_STATE_POINTERS_CC << 16 | (2 - 2)); + OUT_BATCH(cc_vp_offset); + ADVANCE_BATCH(); +} + + +/* 3DSTATE_TE + * + * Disable the tesselation engine. + */ +void +gen7_blorp_emit_te_disable(struct brw_context *brw) +{ + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_TE << 16 | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + + +void +gen7_blorp_emit_binding_table_pointers_ps(struct brw_context *brw, + uint32_t wm_bind_bo_offset) +{ + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_PS << 16 | (2 - 2)); + OUT_BATCH(wm_bind_bo_offset); + ADVANCE_BATCH(); +} + + +void +gen7_blorp_emit_sampler_state_pointers_ps(struct brw_context *brw, + uint32_t sampler_offset) +{ + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_PS << 16 | (2 - 2)); + OUT_BATCH(sampler_offset); + ADVANCE_BATCH(); +} + + +/* 3DSTATE_CLEAR_PARAMS + * + * From the Ivybridge PRM, Volume 2 Part 1, Section 11.5.5.4 + * 3DSTATE_CLEAR_PARAMS: + * 3DSTATE_CLEAR_PARAMS must always be programmed in the along + * with the other Depth/Stencil state commands(i.e. 3DSTATE_DEPTH_BUFFER, + * 3DSTATE_STENCIL_BUFFER, or 3DSTATE_HIER_DEPTH_BUFFER). + */ +void +gen7_blorp_emit_clear_params(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + BEGIN_BATCH(3); + OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS << 16 | (3 - 2)); + OUT_BATCH(params->depth.clear_color.u32[0]); + OUT_BATCH(GEN7_DEPTH_CLEAR_VALID); + ADVANCE_BATCH(); +} + + +/* 3DPRIMITIVE */ +void +gen7_blorp_emit_primitive(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + BEGIN_BATCH(7); + OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2)); + OUT_BATCH(GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL | + _3DPRIM_RECTLIST); + OUT_BATCH(3); /* vertex count per instance */ + OUT_BATCH(0); + OUT_BATCH(params->num_layers); /* instance count */ + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); +} diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.c b/src/mesa/drivers/dri/i965/gen6_blorp.c index 93ff8d1897b..d27c550be5f 100644 --- a/src/mesa/drivers/dri/i965/gen6_blorp.c +++ b/src/mesa/drivers/dri/i965/gen6_blorp.c @@ -34,193 +34,6 @@ #include "vbo/vbo.h" #include "brw_draw.h" -static void -gen6_blorp_emit_input_varying_data(struct brw_context *brw, - const struct brw_blorp_params *params, - unsigned *offset, - unsigned *size) -{ - const unsigned vec4_size_in_bytes = 4 * sizeof(float); - const unsigned max_num_varyings = - DIV_ROUND_UP(sizeof(params->wm_inputs), vec4_size_in_bytes); - const unsigned num_varyings = params->wm_prog_data->num_varying_inputs; - - *size = num_varyings * vec4_size_in_bytes; - - const float *const inputs_src = (const float *)¶ms->wm_inputs; - float *inputs = (float *)brw_state_batch(brw, AUB_TRACE_VERTEX_BUFFER, - *size, 32, offset); - - /* Walk over the attribute slots, determine if the attribute is used by - * the program and when necessary copy the values from the input storage to - * the vertex data buffer. - */ - for (unsigned i = 0; i < max_num_varyings; i++) { - const gl_varying_slot attr = VARYING_SLOT_VAR0 + i; - - if (!(params->wm_prog_data->inputs_read & BITFIELD64_BIT(attr))) - continue; - - memcpy(inputs, inputs_src + i * 4, vec4_size_in_bytes); - - inputs += 4; - } -} - -static void -gen6_blorp_emit_vertex_data(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - uint32_t vertex_offset; - uint32_t const_data_offset = 0; - unsigned const_data_size = 0; - - /* Setup VBO for the rectangle primitive.. - * - * A rectangle primitive (3DPRIM_RECTLIST) consists of only three - * vertices. The vertices reside in screen space with DirectX coordinates - * (that is, (0, 0) is the upper left corner). - * - * v2 ------ implied - * | | - * | | - * v0 ----- v1 - * - * Since the VS is disabled, the clipper loads each VUE directly from - * the URB. This is controlled by the 3DSTATE_VERTEX_BUFFERS and - * 3DSTATE_VERTEX_ELEMENTS packets below. The VUE contents are as follows: - * dw0: Reserved, MBZ. - * dw1: Render Target Array Index. The HiZ op does not use indexed - * vertices, so set the dword to 0. - * dw2: Viewport Index. The HiZ op disables viewport mapping and - * scissoring, so set the dword to 0. - * dw3: Point Width: The HiZ op does not emit the POINTLIST primitive, so - * set the dword to 0. - * dw4: Vertex Position X. - * dw5: Vertex Position Y. - * dw6: Vertex Position Z. - * dw7: Vertex Position W. - * - * dw8: Flat vertex input 0 - * dw9: Flat vertex input 1 - * ... - * dwn: Flat vertex input n - 8 - * - * For details, see the Sandybridge PRM, Volume 2, Part 1, Section 1.5.1 - * "Vertex URB Entry (VUE) Formats". - * - * Only vertex position X and Y are going to be variable, Z is fixed to - * zero and W to one. Header words dw0-3 are all zero. There is no need to - * include the fixed values in the vertex buffer. Vertex fetcher can be - * instructed to fill vertex elements with constant values of one and zero - * instead of reading them from the buffer. - * Flat inputs are program constants that are not interpolated. Moreover - * their values will be the same between vertices. - * - * See the vertex element setup below. - */ - const float vertices[] = { - /* v0 */ (float)params->x0, (float)params->y1, - /* v1 */ (float)params->x1, (float)params->y1, - /* v2 */ (float)params->x0, (float)params->y0, - }; - - float *const vertex_data = (float *)brw_state_batch( - brw, AUB_TRACE_VERTEX_BUFFER, - sizeof(vertices), 32, - &vertex_offset); - memcpy(vertex_data, vertices, sizeof(vertices)); - - if (params->wm_prog_data && params->wm_prog_data->num_varying_inputs) - gen6_blorp_emit_input_varying_data(brw, params, - &const_data_offset, - &const_data_size); - - /* 3DSTATE_VERTEX_BUFFERS */ - const int num_buffers = 1 + (const_data_size > 0); - const int batch_length = 1 + 4 * num_buffers; - - BEGIN_BATCH(batch_length); - OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (batch_length - 2)); - - const unsigned blorp_num_vue_elems = 2; - const unsigned stride = blorp_num_vue_elems * sizeof(float); - EMIT_VERTEX_BUFFER_STATE(brw, 0 /* buffer_nr */, brw->batch.bo, - vertex_offset, vertex_offset + sizeof(vertices), - stride, 0 /* steprate */); - - if (const_data_size) { - /* Tell vertex fetcher not to advance the pointer in the buffer when - * moving to the next vertex. This will effectively provide the same - * data for all the vertices. For flat inputs only the data provided - * for the first provoking vertex actually matters. - */ - const unsigned stride_zero = 0; - EMIT_VERTEX_BUFFER_STATE(brw, 1 /* buffer_nr */, brw->batch.bo, - const_data_offset, - const_data_offset + const_data_size, - stride_zero, 0 /* step_rate */); - } - - ADVANCE_BATCH(); -} - -void -gen6_blorp_emit_vertices(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - gen6_blorp_emit_vertex_data(brw, params); - - const unsigned num_varyings = - params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0; - const unsigned num_elements = 2 + num_varyings; - const int batch_length = 1 + 2 * num_elements; - - BEGIN_BATCH(batch_length); - - /* 3DSTATE_VERTEX_ELEMENTS - * - * Fetch dwords 0 - 7 from each VUE. See the comments above where - * the vertex_bo is filled with data. First element contains dwords - * for the VUE header, second the actual position values and the - * remaining contain the flat inputs. - */ - { - OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (batch_length - 2)); - /* Element 0 */ - OUT_BATCH(GEN6_VE0_VALID | - BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT | - 0 << BRW_VE0_SRC_OFFSET_SHIFT); - OUT_BATCH(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT | - BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT | - BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT | - BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT); - /* Element 1 */ - OUT_BATCH(GEN6_VE0_VALID | - BRW_SURFACEFORMAT_R32G32_FLOAT << BRW_VE0_FORMAT_SHIFT | - 0 << BRW_VE0_SRC_OFFSET_SHIFT); - OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT | - BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT | - BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT | - BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT); - } - - for (unsigned i = 0; i < num_varyings; ++i) { - /* Element 2 + i */ - OUT_BATCH(1 << GEN6_VE0_INDEX_SHIFT | - GEN6_VE0_VALID | - BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT | - (i * 4 * sizeof(float)) << BRW_VE0_SRC_OFFSET_SHIFT); - OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT | - BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT | - BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_2_SHIFT | - BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_3_SHIFT); - } - - ADVANCE_BATCH(); -} - - /* 3DSTATE_URB * * Assign the entire URB to the VS. Even though the VS disabled, URB space @@ -249,86 +62,6 @@ gen6_blorp_emit_urb_config(struct brw_context *brw, } -/* BLEND_STATE */ -uint32_t -gen6_blorp_emit_blend_state(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - uint32_t cc_blend_state_offset; - - assume(params->num_draw_buffers); - - const unsigned size = params->num_draw_buffers * - sizeof(struct gen6_blend_state); - struct gen6_blend_state *blend = (struct gen6_blend_state *) - brw_state_batch(brw, AUB_TRACE_BLEND_STATE, size, 64, - &cc_blend_state_offset); - - memset(blend, 0, size); - - for (unsigned i = 0; i < params->num_draw_buffers; ++i) { - blend[i].blend1.pre_blend_clamp_enable = 1; - blend[i].blend1.post_blend_clamp_enable = 1; - blend[i].blend1.clamp_range = BRW_RENDERTARGET_CLAMPRANGE_FORMAT; - - blend[i].blend1.write_disable_r = params->color_write_disable[0]; - blend[i].blend1.write_disable_g = params->color_write_disable[1]; - blend[i].blend1.write_disable_b = params->color_write_disable[2]; - blend[i].blend1.write_disable_a = params->color_write_disable[3]; - } - - return cc_blend_state_offset; -} - - -/* CC_STATE */ -uint32_t -gen6_blorp_emit_cc_state(struct brw_context *brw) -{ - uint32_t cc_state_offset; - - struct gen6_color_calc_state *cc = (struct gen6_color_calc_state *) - brw_state_batch(brw, AUB_TRACE_CC_STATE, - sizeof(gen6_color_calc_state), 64, - &cc_state_offset); - memset(cc, 0, sizeof(*cc)); - - return cc_state_offset; -} - - -/** - * \param out_offset is relative to - * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress. - */ -uint32_t -gen6_blorp_emit_depth_stencil_state(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - uint32_t depthstencil_offset; - - struct gen6_depth_stencil_state *state; - state = (struct gen6_depth_stencil_state *) - brw_state_batch(brw, AUB_TRACE_DEPTH_STENCIL_STATE, - sizeof(*state), 64, - &depthstencil_offset); - memset(state, 0, sizeof(*state)); - - /* See the following sections of the Sandy Bridge PRM, Volume 1, Part2: - * - 7.5.3.1 Depth Buffer Clear - * - 7.5.3.2 Depth Buffer Resolve - * - 7.5.3.3 Hierarchical Depth Buffer Resolve - */ - state->ds2.depth_write_enable = 1; - if (params->hiz_op == GEN6_HIZ_OP_DEPTH_RESOLVE) { - state->ds2.depth_test_enable = 1; - state->ds2.depth_test_func = BRW_COMPAREFUNCTION_NEVER; - } - - return depthstencil_offset; -} - - /* 3DSTATE_CC_STATE_POINTERS * * The pointer offsets are relative to @@ -352,73 +85,6 @@ gen6_blorp_emit_cc_state_pointers(struct brw_context *brw, } -/* BINDING_TABLE. See brw_wm_binding_table(). */ -uint32_t -gen6_blorp_emit_binding_table(struct brw_context *brw, - uint32_t wm_surf_offset_renderbuffer, - uint32_t wm_surf_offset_texture) -{ - uint32_t wm_bind_bo_offset; - uint32_t *bind = (uint32_t *) - brw_state_batch(brw, AUB_TRACE_BINDING_TABLE, - sizeof(uint32_t) * - BRW_BLORP_NUM_BINDING_TABLE_ENTRIES, - 32, /* alignment */ - &wm_bind_bo_offset); - bind[BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX] = - wm_surf_offset_renderbuffer; - bind[BRW_BLORP_TEXTURE_BINDING_TABLE_INDEX] = wm_surf_offset_texture; - - return wm_bind_bo_offset; -} - - -/** - * SAMPLER_STATE. See brw_update_sampler_state(). - */ -uint32_t -gen6_blorp_emit_sampler_state(struct brw_context *brw, - unsigned tex_filter, unsigned max_lod, - bool non_normalized_coords) -{ - uint32_t sampler_offset; - uint32_t *sampler_state = (uint32_t *) - brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE, 16, 32, &sampler_offset); - - unsigned address_rounding = BRW_ADDRESS_ROUNDING_ENABLE_U_MIN | - BRW_ADDRESS_ROUNDING_ENABLE_V_MIN | - BRW_ADDRESS_ROUNDING_ENABLE_R_MIN | - BRW_ADDRESS_ROUNDING_ENABLE_U_MAG | - BRW_ADDRESS_ROUNDING_ENABLE_V_MAG | - BRW_ADDRESS_ROUNDING_ENABLE_R_MAG; - - /* XXX: I don't think that using firstLevel, lastLevel works, - * because we always setup the surface state as if firstLevel == - * level zero. Probably have to subtract firstLevel from each of - * these: - */ - brw_emit_sampler_state(brw, - sampler_state, - sampler_offset, - tex_filter, /* min filter */ - tex_filter, /* mag filter */ - BRW_MIPFILTER_NONE, - BRW_ANISORATIO_2, - address_rounding, - BRW_TEXCOORDMODE_CLAMP, - BRW_TEXCOORDMODE_CLAMP, - BRW_TEXCOORDMODE_CLAMP, - 0, /* min LOD */ - max_lod, - 0, /* LOD bias */ - 0, /* shadow function */ - non_normalized_coords, - 0); /* border color offset - unused */ - - return sampler_offset; -} - - /** * 3DSTATE_SAMPLER_STATE_POINTERS. See upload_sampler_state_pointers(). */ @@ -508,32 +174,6 @@ gen6_blorp_emit_gs_disable(struct brw_context *brw, } -/* 3DSTATE_CLIP - * - * Disable the clipper. - * - * The BLORP op emits a rectangle primitive, which requires clipping to - * be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1 - * Section 1.3 "3D Primitives Overview": - * RECTLIST: - * Either the CLIP unit should be DISABLED, or the CLIP unit's Clip - * Mode should be set to a value other than CLIPMODE_NORMAL. - * - * Also disable perspective divide. This doesn't change the clipper's - * output, but does spare a few electrons. - */ -void -gen6_blorp_emit_clip_disable(struct brw_context *brw) -{ - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2)); - OUT_BATCH(0); - OUT_BATCH(GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - - /* 3DSTATE_SF * * Disable ViewportTransformEnable (dw2.1) @@ -829,21 +469,6 @@ gen6_blorp_emit_clear_params(struct brw_context *brw, ADVANCE_BATCH(); } - -/* 3DSTATE_DRAWING_RECTANGLE */ -void -gen6_blorp_emit_drawing_rectangle(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2)); - OUT_BATCH(0); - OUT_BATCH(((MAX2(params->x1, params->x0) - 1) & 0xffff) | - ((MAX2(params->y1, params->y0) - 1) << 16)); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - /* 3DSTATE_VIEWPORT_STATE_POINTERS */ static void gen6_blorp_emit_viewport_state(struct brw_context *brw, diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.c b/src/mesa/drivers/dri/i965/gen7_blorp.c index 8fe293dc051..ce7047b7ccd 100644 --- a/src/mesa/drivers/dri/i965/gen7_blorp.c +++ b/src/mesa/drivers/dri/i965/gen7_blorp.c @@ -32,97 +32,6 @@ #include "blorp_priv.h" -/* Once vertex fetcher has written full VUE entries with complete - * header the space requirement is as follows per vertex (in bytes): - * - * Header Position Program constants - * +--------+------------+-------------------+ - * | 16 | 16 | n x 16 | - * +--------+------------+-------------------+ - * - * where 'n' stands for number of varying inputs expressed as vec4s. - * - * The URB size is in turn expressed in 64 bytes (512 bits). - */ -static unsigned -gen7_blorp_get_vs_entry_size(const struct brw_blorp_params *params) -{ - const unsigned num_varyings = - params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0; - const unsigned total_needed = 16 + 16 + num_varyings * 16; - - return DIV_ROUND_UP(total_needed, 64); -} - -/* 3DSTATE_URB_VS - * 3DSTATE_URB_HS - * 3DSTATE_URB_DS - * 3DSTATE_URB_GS - * - * If the 3DSTATE_URB_VS is emitted, than the others must be also. - * From the Ivybridge PRM, Volume 2 Part 1, section 1.7.1 3DSTATE_URB_VS: - * - * 3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be - * programmed in order for the programming of this state to be - * valid. - */ -void -gen7_blorp_emit_urb_config(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - const unsigned vs_entry_size = gen7_blorp_get_vs_entry_size(params); - - if (!(brw->ctx.NewDriverState & (BRW_NEW_CONTEXT | BRW_NEW_URB_SIZE)) && - brw->urb.vsize >= vs_entry_size) - return; - - brw->ctx.NewDriverState |= BRW_NEW_URB_SIZE; - - gen7_upload_urb(brw, vs_entry_size, false, false); -} - - -/* 3DSTATE_BLEND_STATE_POINTERS */ -void -gen7_blorp_emit_blend_state_pointer(struct brw_context *brw, - uint32_t cc_blend_state_offset) -{ - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_BLEND_STATE_POINTERS << 16 | (2 - 2)); - OUT_BATCH(cc_blend_state_offset | 1); - ADVANCE_BATCH(); -} - - -/* 3DSTATE_CC_STATE_POINTERS */ -void -gen7_blorp_emit_cc_state_pointer(struct brw_context *brw, - uint32_t cc_state_offset) -{ - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (2 - 2)); - OUT_BATCH(cc_state_offset | 1); - ADVANCE_BATCH(); -} - -void -gen7_blorp_emit_cc_viewport(struct brw_context *brw) -{ - struct brw_cc_viewport *ccv; - uint32_t cc_vp_offset; - - ccv = (struct brw_cc_viewport *)brw_state_batch(brw, AUB_TRACE_CC_VP_STATE, - sizeof(*ccv), 32, - &cc_vp_offset); - ccv->min_depth = 0.0; - ccv->max_depth = 1.0; - - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_VIEWPORT_STATE_POINTERS_CC << 16 | (2 - 2)); - OUT_BATCH(cc_vp_offset); - ADVANCE_BATCH(); -} - /* 3DSTATE_DEPTH_STENCIL_STATE_POINTERS * @@ -195,22 +104,6 @@ gen7_blorp_emit_hs_disable(struct brw_context *brw) } -/* 3DSTATE_TE - * - * Disable the tesselation engine. - */ -void -gen7_blorp_emit_te_disable(struct brw_context *brw) -{ - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_TE << 16 | (4 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - - /* 3DSTATE_DS * * Disable the domain shader. @@ -460,27 +353,6 @@ gen7_blorp_emit_ps_config(struct brw_context *brw, } -void -gen7_blorp_emit_binding_table_pointers_ps(struct brw_context *brw, - uint32_t wm_bind_bo_offset) -{ - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_PS << 16 | (2 - 2)); - OUT_BATCH(wm_bind_bo_offset); - ADVANCE_BATCH(); -} - - -void -gen7_blorp_emit_sampler_state_pointers_ps(struct brw_context *brw, - uint32_t sampler_offset) -{ - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_PS << 16 | (2 - 2)); - OUT_BATCH(sampler_offset); - ADVANCE_BATCH(); -} - static void gen7_blorp_emit_depth_stencil_config(struct brw_context *brw, const struct brw_blorp_params *params) @@ -580,44 +452,6 @@ gen7_blorp_emit_depth_disable(struct brw_context *brw) } -/* 3DSTATE_CLEAR_PARAMS - * - * From the Ivybridge PRM, Volume 2 Part 1, Section 11.5.5.4 - * 3DSTATE_CLEAR_PARAMS: - * 3DSTATE_CLEAR_PARAMS must always be programmed in the along - * with the other Depth/Stencil state commands(i.e. 3DSTATE_DEPTH_BUFFER, - * 3DSTATE_STENCIL_BUFFER, or 3DSTATE_HIER_DEPTH_BUFFER). - */ -void -gen7_blorp_emit_clear_params(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - BEGIN_BATCH(3); - OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS << 16 | (3 - 2)); - OUT_BATCH(params->depth.clear_color.u32[0]); - OUT_BATCH(GEN7_DEPTH_CLEAR_VALID); - ADVANCE_BATCH(); -} - - -/* 3DPRIMITIVE */ -void -gen7_blorp_emit_primitive(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - BEGIN_BATCH(7); - OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2)); - OUT_BATCH(GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL | - _3DPRIM_RECTLIST); - OUT_BATCH(3); /* vertex count per instance */ - OUT_BATCH(0); - OUT_BATCH(params->num_layers); /* instance count */ - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - - /** * \copydoc gen6_blorp_exec() */ |