diff options
author | Rafael Antognolli <[email protected]> | 2017-04-06 16:43:49 -0700 |
---|---|---|
committer | Kenneth Graunke <[email protected]> | 2017-05-03 20:39:48 -0700 |
commit | 158dcd86590ad5c513bc4db283b32ea2d8559493 (patch) | |
tree | 39719a9ff9e458006ef16dcee0e34c71fb694320 | |
parent | 46d8f9454f8bdd469f389b78ce3cb8ef35b7fd68 (diff) |
i965: Port gen4+ emit vertices code to genxml.
Some code that was placed in brw_draw_upload.c and exported to be used
by gen8+ was also moved to genX_state_upload, and the respective symbols
are not exported anymore.
v2:
- Remove code from brw_draw_upload too
- Emit vertices for gen4-5 too.
- Use helper to setup brw_address (Kristian)
- Use macros for MOCS values.
- Do not use #ifndef NDEBUG on code that is actually used (Ken)
v3:
- Style and code clenup (Ken)
- Keep some of the common code inside brw_draw_upload.c (Ken)
Signed-off-by: Rafael Antognolli <[email protected]>
Reviewed-by: Kenneth Graunke <[email protected]>
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_draw_upload.c | 456 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_state.h | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen8_draw_upload.c | 330 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/genX_state_upload.c | 560 |
4 files changed, 556 insertions, 792 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index 002e863a649..8b30151b816 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -242,86 +242,6 @@ double_types(struct brw_context *brw, : double_types_float[size]); } -static bool -is_passthru_format(uint32_t format) -{ - switch (format) { - case ISL_FORMAT_R64_PASSTHRU: - case ISL_FORMAT_R64G64_PASSTHRU: - case ISL_FORMAT_R64G64B64_PASSTHRU: - case ISL_FORMAT_R64G64B64A64_PASSTHRU: - return true; - default: - return false; - } -} - -static int -uploads_needed(uint32_t format) -{ - if (!is_passthru_format(format)) - return 1; - - switch (format) { - case ISL_FORMAT_R64_PASSTHRU: - case ISL_FORMAT_R64G64_PASSTHRU: - return 1; - case ISL_FORMAT_R64G64B64_PASSTHRU: - case ISL_FORMAT_R64G64B64A64_PASSTHRU: - return 2; - default: - unreachable("not reached"); - } -} - -/* - * Returns the number of componentes associated with a format that is used on - * a 64 to 32 format split. See downsize_format() - */ -static int -upload_format_size(uint32_t upload_format) -{ - switch (upload_format) { - case ISL_FORMAT_R32G32_FLOAT: - return 2; - case ISL_FORMAT_R32G32B32A32_FLOAT: - return 4; - default: - unreachable("not reached"); - } -} - -/* - * Returns the format that we are finally going to use when upload a vertex - * element. It will only change if we are using *64*PASSTHRU formats, as for - * gen < 8 they need to be splitted on two *32*FLOAT formats. - * - * @upload points in which upload we are. Valid values are [0,1] - */ -static uint32_t -downsize_format_if_needed(uint32_t format, - int upload) -{ - assert(upload == 0 || upload == 1); - - if (!is_passthru_format(format)) - return format; - - switch (format) { - case ISL_FORMAT_R64_PASSTHRU: - return ISL_FORMAT_R32G32_FLOAT; - case ISL_FORMAT_R64G64_PASSTHRU: - return ISL_FORMAT_R32G32B32A32_FLOAT; - case ISL_FORMAT_R64G64B64_PASSTHRU: - return !upload ? ISL_FORMAT_R32G32B32A32_FLOAT - : ISL_FORMAT_R32G32_FLOAT; - case ISL_FORMAT_R64G64B64A64_PASSTHRU: - return ISL_FORMAT_R32G32B32A32_FLOAT; - default: - unreachable("not reached"); - } -} - /** * Given vertex array type/size/format/normalized info, return * the appopriate hardware surface type. @@ -786,382 +706,6 @@ brw_prepare_shader_draw_parameters(struct brw_context *brw) } } -/** - * Emit a VERTEX_BUFFER_STATE entry (part of 3DSTATE_VERTEX_BUFFERS). - */ -uint32_t * -brw_emit_vertex_buffer_state(struct brw_context *brw, - unsigned buffer_nr, - struct brw_bo *bo, - unsigned start_offset, - unsigned end_offset, - unsigned stride, - unsigned step_rate, - uint32_t *__map) -{ - struct gl_context *ctx = &brw->ctx; - uint32_t dw0; - - if (brw->gen >= 8) { - dw0 = buffer_nr << GEN6_VB0_INDEX_SHIFT; - } else if (brw->gen >= 6) { - dw0 = (buffer_nr << GEN6_VB0_INDEX_SHIFT) | - (step_rate ? GEN6_VB0_ACCESS_INSTANCEDATA - : GEN6_VB0_ACCESS_VERTEXDATA); - } else { - dw0 = (buffer_nr << BRW_VB0_INDEX_SHIFT) | - (step_rate ? BRW_VB0_ACCESS_INSTANCEDATA - : BRW_VB0_ACCESS_VERTEXDATA); - } - - if (brw->gen >= 7) - dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE; - - switch (brw->gen) { - case 7: - dw0 |= GEN7_MOCS_L3 << 16; - break; - case 8: - dw0 |= BDW_MOCS_WB << 16; - break; - case 9: - dw0 |= SKL_MOCS_WB << 16; - break; - } - - WARN_ONCE(stride >= (brw->gen >= 5 ? 2048 : 2047), - "VBO stride %d too large, bad rendering may occur\n", - stride); - OUT_BATCH(dw0 | (stride << BRW_VB0_PITCH_SHIFT)); - if (brw->gen >= 8) { - OUT_RELOC64(bo, I915_GEM_DOMAIN_VERTEX, 0, start_offset); - /* From the BSpec: 3D Pipeline Stages - 3D Pipeline Geometry - - * Vertex Fetch (VF) Stage - State - * - * Instead of "VBState.StartingBufferAddress + VBState.MaxIndex x - * VBState.BufferPitch", the address of the byte immediately beyond the - * last valid byte of the buffer is determined by - * "VBState.StartingBufferAddress + VBState.BufferSize". - */ - OUT_BATCH(end_offset - start_offset); - } else if (brw->gen >= 5) { - OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, start_offset); - /* From the BSpec: 3D Pipeline Stages - 3D Pipeline Geometry - - * Vertex Fetch (VF) Stage - State - * - * Instead of "VBState.StartingBufferAddress + VBState.MaxIndex x - * VBState.BufferPitch", the address of the byte immediately beyond the - * last valid byte of the buffer is determined by - * "VBState.EndAddress + 1". - */ - OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, end_offset - 1); - OUT_BATCH(step_rate); - } else { - OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, start_offset); - OUT_BATCH(0); - OUT_BATCH(step_rate); - } - - return __map; -} - -static void -brw_emit_vertices(struct brw_context *brw) -{ - GLuint i; - - brw_prepare_vertices(brw); - brw_prepare_shader_draw_parameters(brw); - - brw_emit_query_begin(brw); - - const struct brw_vs_prog_data *vs_prog_data = - brw_vs_prog_data(brw->vs.base.prog_data); - - unsigned nr_elements = brw->vb.nr_enabled; - if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid || - vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance) - ++nr_elements; - if (vs_prog_data->uses_drawid) - nr_elements++; - - /* If any of the formats of vb.enabled needs more that one upload, we need - * to add it to nr_elements */ - unsigned extra_uploads = 0; - for (unsigned i = 0; i < brw->vb.nr_enabled; i++) { - struct brw_vertex_element *input = brw->vb.enabled[i]; - uint32_t format = brw_get_vertex_surface_type(brw, input->glarray); - - if (uploads_needed(format) > 1) - extra_uploads++; - } - nr_elements += extra_uploads; - - /* If the VS doesn't read any inputs (calculating vertex position from - * a state variable for some reason, for example), emit a single pad - * VERTEX_ELEMENT struct and bail. - * - * The stale VB state stays in place, but they don't do anything unless - * a VE loads from them. - */ - if (nr_elements == 0) { - BEGIN_BATCH(3); - OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | 1); - if (brw->gen >= 6) { - OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) | - GEN6_VE0_VALID | - (ISL_FORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | - (0 << BRW_VE0_SRC_OFFSET_SHIFT)); - } else { - OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) | - BRW_VE0_VALID | - (ISL_FORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | - (0 << BRW_VE0_SRC_OFFSET_SHIFT)); - } - OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | - (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT)); - ADVANCE_BATCH(); - return; - } - - /* Now emit VB and VEP state packets. - */ - - const bool uses_draw_params = - vs_prog_data->uses_basevertex || - vs_prog_data->uses_baseinstance; - const unsigned nr_buffers = brw->vb.nr_buffers + - uses_draw_params + vs_prog_data->uses_drawid; - - if (nr_buffers) { - if (brw->gen >= 6) { - assert(nr_buffers <= 33); - } else { - assert(nr_buffers <= 17); - } - - BEGIN_BATCH(1 + 4 * nr_buffers); - OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4 * nr_buffers - 1)); - for (i = 0; i < brw->vb.nr_buffers; i++) { - struct brw_vertex_buffer *buffer = &brw->vb.buffers[i]; - /* Prior to Haswell and Bay Trail we have to use 4-component formats - * to fake 3-component ones. In particular, we do this for - * half-float and 8 and 16-bit integer formats. This means that the - * vertex element may poke over the end of the buffer by 2 bytes. - */ - unsigned padding = - (brw->gen <= 7 && !brw->is_baytrail && !brw->is_haswell) * 2; - EMIT_VERTEX_BUFFER_STATE(brw, i, buffer->bo, buffer->offset, - buffer->offset + buffer->size + padding, - buffer->stride, buffer->step_rate); - - } - - if (uses_draw_params) { - EMIT_VERTEX_BUFFER_STATE(brw, brw->vb.nr_buffers, - brw->draw.draw_params_bo, - brw->draw.draw_params_offset, - brw->draw.draw_params_bo->size, - 0, /* stride */ - 0); /* step rate */ - } - - if (vs_prog_data->uses_drawid) { - EMIT_VERTEX_BUFFER_STATE(brw, brw->vb.nr_buffers + 1, - brw->draw.draw_id_bo, - brw->draw.draw_id_offset, - brw->draw.draw_id_bo->size, - 0, /* stride */ - 0); /* step rate */ - } - - ADVANCE_BATCH(); - } - - /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS, presumably - * for VertexID/InstanceID. - */ - if (brw->gen >= 6) { - assert(nr_elements <= 34); - } else { - assert(nr_elements <= 18); - } - - struct brw_vertex_element *gen6_edgeflag_input = NULL; - - BEGIN_BATCH(1 + nr_elements * 2); - OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (2 * nr_elements - 1)); - for (i = 0; i < brw->vb.nr_enabled; i++) { - struct brw_vertex_element *input = brw->vb.enabled[i]; - uint32_t format = brw_get_vertex_surface_type(brw, input->glarray); - uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC; - uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC; - uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC; - uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC; - unsigned num_uploads = 1; - unsigned c; - - num_uploads = uploads_needed(format); - - if (input == &brw->vb.inputs[VERT_ATTRIB_EDGEFLAG]) { - /* Gen6+ passes edgeflag as sideband along with the vertex, instead - * of in the VUE. We have to upload it sideband as the last vertex - * element according to the B-Spec. - */ - if (brw->gen >= 6) { - gen6_edgeflag_input = input; - continue; - } - } - - for (c = 0; c < num_uploads; c++) { - uint32_t upload_format = downsize_format_if_needed(format, c); - /* If we need more that one upload, the offset stride would be 128 - * bits (16 bytes), as for previous uploads we are using the full - * entry. */ - unsigned int offset = input->offset + c * 16; - int size = input->glarray->Size; - - if (is_passthru_format(format)) - size = upload_format_size(upload_format); - - switch (size) { - case 0: comp0 = BRW_VE1_COMPONENT_STORE_0; - case 1: comp1 = BRW_VE1_COMPONENT_STORE_0; - case 2: comp2 = BRW_VE1_COMPONENT_STORE_0; - case 3: comp3 = input->glarray->Integer - ? BRW_VE1_COMPONENT_STORE_1_INT - : BRW_VE1_COMPONENT_STORE_1_FLT; - break; - } - - if (brw->gen >= 6) { - OUT_BATCH((input->buffer << GEN6_VE0_INDEX_SHIFT) | - GEN6_VE0_VALID | - (upload_format << BRW_VE0_FORMAT_SHIFT) | - (offset << BRW_VE0_SRC_OFFSET_SHIFT)); - } else { - OUT_BATCH((input->buffer << BRW_VE0_INDEX_SHIFT) | - BRW_VE0_VALID | - (upload_format << BRW_VE0_FORMAT_SHIFT) | - (offset << BRW_VE0_SRC_OFFSET_SHIFT)); - } - - if (brw->gen >= 5) - OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | - (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | - (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | - (comp3 << BRW_VE1_COMPONENT_3_SHIFT)); - else - OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | - (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | - (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | - (comp3 << BRW_VE1_COMPONENT_3_SHIFT) | - ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT)); - } - } - - if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid || - vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance) { - uint32_t dw0 = 0, dw1 = 0; - uint32_t comp0 = BRW_VE1_COMPONENT_STORE_0; - uint32_t comp1 = BRW_VE1_COMPONENT_STORE_0; - uint32_t comp2 = BRW_VE1_COMPONENT_STORE_0; - uint32_t comp3 = BRW_VE1_COMPONENT_STORE_0; - - if (vs_prog_data->uses_basevertex) - comp0 = BRW_VE1_COMPONENT_STORE_SRC; - - if (vs_prog_data->uses_baseinstance) - comp1 = BRW_VE1_COMPONENT_STORE_SRC; - - if (vs_prog_data->uses_vertexid) - comp2 = BRW_VE1_COMPONENT_STORE_VID; - - if (vs_prog_data->uses_instanceid) - comp3 = BRW_VE1_COMPONENT_STORE_IID; - - dw1 = (comp0 << BRW_VE1_COMPONENT_0_SHIFT) | - (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | - (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | - (comp3 << BRW_VE1_COMPONENT_3_SHIFT); - - if (brw->gen >= 6) { - dw0 |= GEN6_VE0_VALID | - brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT | - ISL_FORMAT_R32G32_UINT << BRW_VE0_FORMAT_SHIFT; - } else { - dw0 |= BRW_VE0_VALID | - brw->vb.nr_buffers << BRW_VE0_INDEX_SHIFT | - ISL_FORMAT_R32G32_UINT << BRW_VE0_FORMAT_SHIFT; - if (brw->gen == 4) - dw1 |= (i * 4) << BRW_VE1_DST_OFFSET_SHIFT; - } - - /* Note that for gl_VertexID, gl_InstanceID, and gl_PrimitiveID values, - * the format is ignored and the value is always int. - */ - - OUT_BATCH(dw0); - OUT_BATCH(dw1); - } - - if (vs_prog_data->uses_drawid) { - uint32_t dw0 = 0, dw1 = 0; - - dw1 = (BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT); - - if (brw->gen >= 6) { - dw0 |= GEN6_VE0_VALID | - ((brw->vb.nr_buffers + 1) << GEN6_VE0_INDEX_SHIFT) | - (ISL_FORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT); - } else { - dw0 |= BRW_VE0_VALID | - ((brw->vb.nr_buffers + 1) << BRW_VE0_INDEX_SHIFT) | - (ISL_FORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT); - - if (brw->gen == 4) - dw1 |= (i * 4) << BRW_VE1_DST_OFFSET_SHIFT; - } - - OUT_BATCH(dw0); - OUT_BATCH(dw1); - } - - if (brw->gen >= 6 && gen6_edgeflag_input) { - uint32_t format = - brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray); - - OUT_BATCH((gen6_edgeflag_input->buffer << GEN6_VE0_INDEX_SHIFT) | - GEN6_VE0_VALID | - GEN6_VE0_EDGE_FLAG_ENABLE | - (format << BRW_VE0_FORMAT_SHIFT) | - (gen6_edgeflag_input->offset << BRW_VE0_SRC_OFFSET_SHIFT)); - OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); - } - - ADVANCE_BATCH(); -} - -const struct brw_tracked_state brw_vertices = { - .dirty = { - .mesa = _NEW_POLYGON, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_VERTICES | - BRW_NEW_VS_PROG_DATA, - }, - .emit = brw_emit_vertices, -}; - static void brw_upload_indices(struct brw_context *brw) { diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 084f97fa1c4..acb7334109e 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -103,7 +103,6 @@ extern const struct brw_tracked_state brw_psp_urb_cbs; extern const struct brw_tracked_state brw_drawing_rect; extern const struct brw_tracked_state brw_indices; -extern const struct brw_tracked_state brw_vertices; extern const struct brw_tracked_state brw_index_buffer; extern const struct brw_tracked_state brw_cs_state; extern const struct brw_tracked_state gen7_cs_push_constants; @@ -125,7 +124,6 @@ extern const struct brw_tracked_state haswell_cut_index; extern const struct brw_tracked_state gen8_index_buffer; extern const struct brw_tracked_state gen8_multisample_state; extern const struct brw_tracked_state gen8_pma_fix; -extern const struct brw_tracked_state gen8_vertices; extern const struct brw_tracked_state gen8_vf_topology; extern const struct brw_tracked_state brw_cs_work_groups_surface; diff --git a/src/mesa/drivers/dri/i965/gen8_draw_upload.c b/src/mesa/drivers/dri/i965/gen8_draw_upload.c index e81cca96749..8db160b5eca 100644 --- a/src/mesa/drivers/dri/i965/gen8_draw_upload.c +++ b/src/mesa/drivers/dri/i965/gen8_draw_upload.c @@ -34,336 +34,6 @@ #include "intel_batchbuffer.h" #include "intel_buffer_objects.h" -#ifndef NDEBUG -static bool -is_passthru_format(uint32_t format) -{ - switch (format) { - case ISL_FORMAT_R64_PASSTHRU: - case ISL_FORMAT_R64G64_PASSTHRU: - case ISL_FORMAT_R64G64B64_PASSTHRU: - case ISL_FORMAT_R64G64B64A64_PASSTHRU: - return true; - default: - return false; - } -} -#endif - -static void -gen8_emit_vertices(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - bool uses_edge_flag; - - brw_prepare_vertices(brw); - brw_prepare_shader_draw_parameters(brw); - - uses_edge_flag = (ctx->Polygon.FrontMode != GL_FILL || - ctx->Polygon.BackMode != GL_FILL); - - const struct brw_vs_prog_data *vs_prog_data = - brw_vs_prog_data(brw->vs.base.prog_data); - - if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid) { - unsigned vue = brw->vb.nr_enabled; - - /* The element for the edge flags must always be last, so we have to - * insert the SGVS before it in that case. - */ - if (uses_edge_flag) { - assert(vue > 0); - vue--; - } - - WARN_ONCE(vue >= 33, - "Trying to insert VID/IID past 33rd vertex element, " - "need to reorder the vertex attrbutes."); - - unsigned dw1 = 0; - if (vs_prog_data->uses_vertexid) { - dw1 |= GEN8_SGVS_ENABLE_VERTEX_ID | - (2 << GEN8_SGVS_VERTEX_ID_COMPONENT_SHIFT) | /* .z channel */ - (vue << GEN8_SGVS_VERTEX_ID_ELEMENT_OFFSET_SHIFT); - } - - if (vs_prog_data->uses_instanceid) { - dw1 |= GEN8_SGVS_ENABLE_INSTANCE_ID | - (3 << GEN8_SGVS_INSTANCE_ID_COMPONENT_SHIFT) | /* .w channel */ - (vue << GEN8_SGVS_INSTANCE_ID_ELEMENT_OFFSET_SHIFT); - } - - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_VF_SGVS << 16 | (2 - 2)); - OUT_BATCH(dw1); - ADVANCE_BATCH(); - - BEGIN_BATCH(3); - OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2)); - OUT_BATCH(vue | GEN8_VF_INSTANCING_ENABLE); - OUT_BATCH(0); - ADVANCE_BATCH(); - } else { - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_VF_SGVS << 16 | (2 - 2)); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - - /* Normally we don't need an element for the SGVS attribute because the - * 3DSTATE_VF_SGVS instruction lets you store the generated attribute in an - * element that is past the list in 3DSTATE_VERTEX_ELEMENTS. However if - * we're using draw parameters then we need an element for the those - * values. Additionally if there is an edge flag element then the SGVS - * can't be inserted past that so we need a dummy element to ensure that - * the edge flag is the last one. - */ - const bool needs_sgvs_element = (vs_prog_data->uses_basevertex || - vs_prog_data->uses_baseinstance || - ((vs_prog_data->uses_instanceid || - vs_prog_data->uses_vertexid) && - uses_edge_flag)); - const unsigned nr_elements = - brw->vb.nr_enabled + needs_sgvs_element + vs_prog_data->uses_drawid; - - /* If the VS doesn't read any inputs (calculating vertex position from - * a state variable for some reason, for example), emit a single pad - * VERTEX_ELEMENT struct and bail. - * - * The stale VB state stays in place, but they don't do anything unless - * a VE loads from them. - */ - if (nr_elements == 0) { - BEGIN_BATCH(3); - OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (3 - 2)); - OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) | - GEN6_VE0_VALID | - (ISL_FORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | - (0 << BRW_VE0_SRC_OFFSET_SHIFT)); - OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | - (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT)); - ADVANCE_BATCH(); - return; - } - - /* Now emit 3DSTATE_VERTEX_BUFFERS and 3DSTATE_VERTEX_ELEMENTS packets. */ - const bool uses_draw_params = - vs_prog_data->uses_basevertex || - vs_prog_data->uses_baseinstance; - const unsigned nr_buffers = brw->vb.nr_buffers + - uses_draw_params + vs_prog_data->uses_drawid; - - if (nr_buffers) { - assert(nr_buffers <= 33); - - BEGIN_BATCH(1 + 4 * nr_buffers); - OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4 * nr_buffers - 1)); - for (unsigned i = 0; i < brw->vb.nr_buffers; i++) { - const struct brw_vertex_buffer *buffer = &brw->vb.buffers[i]; - EMIT_VERTEX_BUFFER_STATE(brw, i, buffer->bo, - buffer->offset, - buffer->offset + buffer->size, - buffer->stride, 0 /* unused */); - } - - if (uses_draw_params) { - EMIT_VERTEX_BUFFER_STATE(brw, brw->vb.nr_buffers, - brw->draw.draw_params_bo, - brw->draw.draw_params_offset, - brw->draw.draw_params_bo->size, - 0 /* stride */, - 0 /* unused */); - } - - if (vs_prog_data->uses_drawid) { - EMIT_VERTEX_BUFFER_STATE(brw, brw->vb.nr_buffers + 1, - brw->draw.draw_id_bo, - brw->draw.draw_id_offset, - brw->draw.draw_id_bo->size, - 0 /* stride */, - 0 /* unused */); - } - ADVANCE_BATCH(); - } - - /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS, - * presumably for VertexID/InstanceID. - */ - assert(nr_elements <= 34); - - struct brw_vertex_element *gen6_edgeflag_input = NULL; - - BEGIN_BATCH(1 + nr_elements * 2); - OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (2 * nr_elements - 1)); - for (unsigned i = 0; i < brw->vb.nr_enabled; i++) { - struct brw_vertex_element *input = brw->vb.enabled[i]; - uint32_t format = brw_get_vertex_surface_type(brw, input->glarray); - uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC; - uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC; - uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC; - uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC; - - /* From the BDW PRM, Volume 2d, page 588 (VERTEX_ELEMENT_STATE): - * "Any SourceElementFormat of *64*_PASSTHRU cannot be used with an - * element which has edge flag enabled." - */ - assert(!(is_passthru_format(format) && uses_edge_flag)); - - /* The gen4 driver expects edgeflag to come in as a float, and passes - * that float on to the tests in the clipper. Mesa's current vertex - * attribute value for EdgeFlag is stored as a float, which works out. - * glEdgeFlagPointer, on the other hand, gives us an unnormalized - * integer ubyte. Just rewrite that to convert to a float. - */ - if (input == &brw->vb.inputs[VERT_ATTRIB_EDGEFLAG]) { - /* Gen6+ passes edgeflag as sideband along with the vertex, instead - * of in the VUE. We have to upload it sideband as the last vertex - * element according to the B-Spec. - */ - gen6_edgeflag_input = input; - continue; - } - - switch (input->glarray->Size) { - case 0: comp0 = BRW_VE1_COMPONENT_STORE_0; - case 1: comp1 = BRW_VE1_COMPONENT_STORE_0; - case 2: comp2 = BRW_VE1_COMPONENT_STORE_0; - case 3: - if (input->glarray->Doubles) { - comp3 = BRW_VE1_COMPONENT_STORE_0; - } else if (input->glarray->Integer) { - comp3 = BRW_VE1_COMPONENT_STORE_1_INT; - } else { - comp3 = BRW_VE1_COMPONENT_STORE_1_FLT; - } - - break; - } - - /* From the BDW PRM, Volume 2d, page 586 (VERTEX_ELEMENT_STATE): - * - * "When SourceElementFormat is set to one of the *64*_PASSTHRU - * formats, 64-bit components are stored in the URB without any - * conversion. In this case, vertex elements must be written as 128 - * or 256 bits, with VFCOMP_STORE_0 being used to pad the output - * as required. E.g., if R64_PASSTHRU is used to copy a 64-bit Red - * component into the URB, Component 1 must be specified as - * VFCOMP_STORE_0 (with Components 2,3 set to VFCOMP_NOSTORE) - * in order to output a 128-bit vertex element, or Components 1-3 must - * be specified as VFCOMP_STORE_0 in order to output a 256-bit vertex - * element. Likewise, use of R64G64B64_PASSTHRU requires Component 3 - * to be specified as VFCOMP_STORE_0 in order to output a 256-bit vertex - * element." - */ - if (input->glarray->Doubles && !input->is_dual_slot) { - /* Store vertex elements which correspond to double and dvec2 vertex - * shader inputs as 128-bit vertex elements, instead of 256-bits. - */ - comp2 = BRW_VE1_COMPONENT_NOSTORE; - comp3 = BRW_VE1_COMPONENT_NOSTORE; - } - - OUT_BATCH((input->buffer << GEN6_VE0_INDEX_SHIFT) | - GEN6_VE0_VALID | - (format << BRW_VE0_FORMAT_SHIFT) | - (input->offset << BRW_VE0_SRC_OFFSET_SHIFT)); - - OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | - (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | - (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | - (comp3 << BRW_VE1_COMPONENT_3_SHIFT)); - } - - if (needs_sgvs_element) { - if (vs_prog_data->uses_basevertex || - vs_prog_data->uses_baseinstance) { - OUT_BATCH(GEN6_VE0_VALID | - brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT | - ISL_FORMAT_R32G32_UINT << BRW_VE0_FORMAT_SHIFT); - OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) | - (BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); - } else { - OUT_BATCH(GEN6_VE0_VALID); - OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); - } - } - - if (vs_prog_data->uses_drawid) { - OUT_BATCH(GEN6_VE0_VALID | - ((brw->vb.nr_buffers + 1) << GEN6_VE0_INDEX_SHIFT) | - (ISL_FORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT)); - OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); - } - - if (gen6_edgeflag_input) { - uint32_t format = - brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray); - - OUT_BATCH((gen6_edgeflag_input->buffer << GEN6_VE0_INDEX_SHIFT) | - GEN6_VE0_VALID | - GEN6_VE0_EDGE_FLAG_ENABLE | - (format << BRW_VE0_FORMAT_SHIFT) | - (gen6_edgeflag_input->offset << BRW_VE0_SRC_OFFSET_SHIFT)); - OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); - } - ADVANCE_BATCH(); - - for (unsigned i = 0, j = 0; i < brw->vb.nr_enabled; i++) { - const struct brw_vertex_element *input = brw->vb.enabled[i]; - const struct brw_vertex_buffer *buffer = &brw->vb.buffers[input->buffer]; - unsigned element_index; - - /* The edge flag element is reordered to be the last one in the code - * above so we need to compensate for that in the element indices used - * below. - */ - if (input == gen6_edgeflag_input) - element_index = nr_elements - 1; - else - element_index = j++; - - BEGIN_BATCH(3); - OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2)); - OUT_BATCH(element_index | - (buffer->step_rate ? GEN8_VF_INSTANCING_ENABLE : 0)); - OUT_BATCH(buffer->step_rate); - ADVANCE_BATCH(); - } - - if (vs_prog_data->uses_drawid) { - const unsigned element = brw->vb.nr_enabled + needs_sgvs_element; - BEGIN_BATCH(3); - OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2)); - OUT_BATCH(element); - OUT_BATCH(0); - ADVANCE_BATCH(); - } -} - -const struct brw_tracked_state gen8_vertices = { - .dirty = { - .mesa = _NEW_POLYGON, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_VERTICES | - BRW_NEW_VS_PROG_DATA, - }, - .emit = gen8_emit_vertices, -}; - static void gen8_emit_index_buffer(struct brw_context *brw) { diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c index 8d5bfea6320..bc7cbcc63e7 100644 --- a/src/mesa/drivers/dri/i965/genX_state_upload.c +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c @@ -26,10 +26,16 @@ #include "common/gen_device_info.h" #include "genxml/gen_macros.h" +#include "main/bufferobj.h" +#include "main/context.h" +#include "main/enums.h" +#include "main/macros.h" + #include "brw_context.h" #if GEN_GEN == 6 #include "brw_defines.h" #endif +#include "brw_draw.h" #include "brw_state.h" #include "brw_wm.h" #include "brw_util.h" @@ -123,6 +129,17 @@ instruction_bo(struct brw_bo *bo, uint32_t offset) }; } +static inline struct brw_address +vertex_bo(struct brw_bo *bo, uint32_t offset) +{ + return (struct brw_address) { + .bo = bo, + .offset = offset, + .read_domains = I915_GEM_DOMAIN_VERTEX, + .write_domain = 0, + }; +} + #include "genxml/genX_pack.h" #define _brw_cmd_length(cmd) cmd ## _length @@ -156,6 +173,541 @@ instruction_bo(struct brw_bo *bo, uint32_t offset) _brw_cmd_pack(cmd)(brw, (void *)_dst, &name), \ _dst = NULL) +static uint32_t * +genX(emit_vertex_buffer_state)(struct brw_context *brw, + uint32_t *dw, + unsigned buffer_nr, + struct brw_bo *bo, + unsigned start_offset, + unsigned end_offset, + unsigned stride, + unsigned step_rate) +{ + struct GENX(VERTEX_BUFFER_STATE) buf_state = { + .VertexBufferIndex = buffer_nr, + .BufferPitch = stride, + .BufferStartingAddress = vertex_bo(bo, start_offset), +#if GEN_GEN >= 8 + .BufferSize = end_offset - start_offset, +#endif + +#if GEN_GEN >= 7 + .AddressModifyEnable = true, +#endif + +#if GEN_GEN < 8 + .BufferAccessType = step_rate ? INSTANCEDATA : VERTEXDATA, + .InstanceDataStepRate = step_rate, +#if GEN_GEN >= 5 + .EndAddress = vertex_bo(bo, end_offset - 1), +#endif +#endif + +#if GEN_GEN == 9 + .VertexBufferMOCS = SKL_MOCS_WB, +#elif GEN_GEN == 8 + .VertexBufferMOCS = BDW_MOCS_WB, +#elif GEN_GEN == 7 + .VertexBufferMOCS = GEN7_MOCS_L3, +#endif + }; + + GENX(VERTEX_BUFFER_STATE_pack)(brw, dw, &buf_state); + return dw + GENX(VERTEX_BUFFER_STATE_length); +} + +UNUSED static bool +is_passthru_format(uint32_t format) +{ + switch (format) { + case ISL_FORMAT_R64_PASSTHRU: + case ISL_FORMAT_R64G64_PASSTHRU: + case ISL_FORMAT_R64G64B64_PASSTHRU: + case ISL_FORMAT_R64G64B64A64_PASSTHRU: + return true; + default: + return false; + } +} + +UNUSED static int +genX(uploads_needed)(uint32_t format) +{ + if (!is_passthru_format(format)) + return 1; + + switch (format) { + case ISL_FORMAT_R64_PASSTHRU: + case ISL_FORMAT_R64G64_PASSTHRU: + return 1; + case ISL_FORMAT_R64G64B64_PASSTHRU: + case ISL_FORMAT_R64G64B64A64_PASSTHRU: + return 2; + default: + unreachable("not reached"); + } +} + +/* + * Returns the format that we are finally going to use when upload a vertex + * element. It will only change if we are using *64*PASSTHRU formats, as for + * gen < 8 they need to be splitted on two *32*FLOAT formats. + * + * @upload points in which upload we are. Valid values are [0,1] + */ +static uint32_t +downsize_format_if_needed(uint32_t format, + int upload) +{ + assert(upload == 0 || upload == 1); + + if (!is_passthru_format(format)) + return format; + + switch (format) { + case ISL_FORMAT_R64_PASSTHRU: + return ISL_FORMAT_R32G32_FLOAT; + case ISL_FORMAT_R64G64_PASSTHRU: + return ISL_FORMAT_R32G32B32A32_FLOAT; + case ISL_FORMAT_R64G64B64_PASSTHRU: + return !upload ? ISL_FORMAT_R32G32B32A32_FLOAT + : ISL_FORMAT_R32G32_FLOAT; + case ISL_FORMAT_R64G64B64A64_PASSTHRU: + return ISL_FORMAT_R32G32B32A32_FLOAT; + default: + unreachable("not reached"); + } +} + +/* + * Returns the number of componentes associated with a format that is used on + * a 64 to 32 format split. See downsize_format() + */ +static int +upload_format_size(uint32_t upload_format) +{ + switch (upload_format) { + case ISL_FORMAT_R32G32_FLOAT: + return 2; + case ISL_FORMAT_R32G32B32A32_FLOAT: + return 4; + default: + unreachable("not reached"); + } +} + +static void +genX(emit_vertices)(struct brw_context *brw) +{ + uint32_t *dw; + + brw_prepare_vertices(brw); + brw_prepare_shader_draw_parameters(brw); + +#if GEN_GEN < 6 + brw_emit_query_begin(brw); +#endif + + const struct brw_vs_prog_data *vs_prog_data = + brw_vs_prog_data(brw->vs.base.prog_data); + +#if GEN_GEN >= 8 + struct gl_context *ctx = &brw->ctx; + bool uses_edge_flag = (ctx->Polygon.FrontMode != GL_FILL || + ctx->Polygon.BackMode != GL_FILL); + + if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid) { + unsigned vue = brw->vb.nr_enabled; + + /* The element for the edge flags must always be last, so we have to + * insert the SGVS before it in that case. + */ + if (uses_edge_flag) { + assert(vue > 0); + vue--; + } + + WARN_ONCE(vue >= 33, + "Trying to insert VID/IID past 33rd vertex element, " + "need to reorder the vertex attrbutes."); + + brw_batch_emit(brw, GENX(3DSTATE_VF_SGVS), vfs) { + if (vs_prog_data->uses_vertexid) { + vfs.VertexIDEnable = true; + vfs.VertexIDComponentNumber = 2; + vfs.VertexIDElementOffset = vue; + } + + if (vs_prog_data->uses_instanceid) { + vfs.InstanceIDEnable = true; + vfs.InstanceIDComponentNumber = 3; + vfs.InstanceIDElementOffset = vue; + } + } + + brw_batch_emit(brw, GENX(3DSTATE_VF_INSTANCING), vfi) { + vfi.InstancingEnable = true; + vfi.VertexElementIndex = vue; + } + } else { + brw_batch_emit(brw, GENX(3DSTATE_VF_SGVS), vfs); + } + + /* Normally we don't need an element for the SGVS attribute because the + * 3DSTATE_VF_SGVS instruction lets you store the generated attribute in an + * element that is past the list in 3DSTATE_VERTEX_ELEMENTS. However if + * we're using draw parameters then we need an element for the those + * values. Additionally if there is an edge flag element then the SGVS + * can't be inserted past that so we need a dummy element to ensure that + * the edge flag is the last one. + */ + const bool needs_sgvs_element = (vs_prog_data->uses_basevertex || + vs_prog_data->uses_baseinstance || + ((vs_prog_data->uses_instanceid || + vs_prog_data->uses_vertexid) + && uses_edge_flag)); +#else + const bool needs_sgvs_element = (vs_prog_data->uses_basevertex || + vs_prog_data->uses_baseinstance || + vs_prog_data->uses_instanceid || + vs_prog_data->uses_vertexid); +#endif + unsigned nr_elements = + brw->vb.nr_enabled + needs_sgvs_element + vs_prog_data->uses_drawid; + +#if GEN_GEN < 8 + /* If any of the formats of vb.enabled needs more that one upload, we need + * to add it to nr_elements + */ + for (unsigned i = 0; i < brw->vb.nr_enabled; i++) { + struct brw_vertex_element *input = brw->vb.enabled[i]; + uint32_t format = brw_get_vertex_surface_type(brw, input->glarray); + + if (genX(uploads_needed(format)) > 1) + nr_elements++; + } +#endif + + /* If the VS doesn't read any inputs (calculating vertex position from + * a state variable for some reason, for example), emit a single pad + * VERTEX_ELEMENT struct and bail. + * + * The stale VB state stays in place, but they don't do anything unless + * a VE loads from them. + */ + if (nr_elements == 0) { + dw = brw_batch_emitn(brw, GENX(3DSTATE_VERTEX_ELEMENTS), 1 + GENX(VERTEX_ELEMENT_STATE_length)); + struct GENX(VERTEX_ELEMENT_STATE) elem = { + .Valid = true, + .SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT, + .Component0Control = VFCOMP_STORE_0, + .Component1Control = VFCOMP_STORE_0, + .Component2Control = VFCOMP_STORE_0, + .Component3Control = VFCOMP_STORE_1_FP, + }; + GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem); + return; + } + + /* Now emit 3DSTATE_VERTEX_BUFFERS and 3DSTATE_VERTEX_ELEMENTS packets. */ + const bool uses_draw_params = + vs_prog_data->uses_basevertex || + vs_prog_data->uses_baseinstance; + const unsigned nr_buffers = brw->vb.nr_buffers + + uses_draw_params + vs_prog_data->uses_drawid; + + if (nr_buffers) { +#if GEN_GEN >= 6 + assert(nr_buffers <= 33); +#else + assert(nr_buffers <= 17); +#endif + assert(nr_buffers <= (GEN_GEN >= 6 ? 33 : 17)); + + dw = brw_batch_emitn(brw, GENX(3DSTATE_VERTEX_BUFFERS), + 1 + GENX(VERTEX_BUFFER_STATE_length) * nr_buffers); + + for (unsigned i = 0; i < brw->vb.nr_buffers; i++) { + const struct brw_vertex_buffer *buffer = &brw->vb.buffers[i]; + /* Prior to Haswell and Bay Trail we have to use 4-component formats + * to fake 3-component ones. In particular, we do this for + * half-float and 8 and 16-bit integer formats. This means that the + * vertex element may poke over the end of the buffer by 2 bytes. + */ + unsigned padding = + (GEN_GEN <= 7 && !brw->is_baytrail && !brw->is_haswell) * 2; + dw = genX(emit_vertex_buffer_state)(brw, dw, i, buffer->bo, + buffer->offset, + buffer->offset + buffer->size + padding, + buffer->stride, + buffer->step_rate); + } + + if (uses_draw_params) { + dw = genX(emit_vertex_buffer_state)(brw, dw, brw->vb.nr_buffers, + brw->draw.draw_params_bo, + brw->draw.draw_params_offset, + brw->draw.draw_params_bo->size, + 0 /* stride */, + 0 /* step rate */); + } + + if (vs_prog_data->uses_drawid) { + dw = genX(emit_vertex_buffer_state)(brw, dw, brw->vb.nr_buffers + 1, + brw->draw.draw_id_bo, + brw->draw.draw_id_offset, + brw->draw.draw_id_bo->size, + 0 /* stride */, + 0 /* step rate */); + } + } + + /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS, + * presumably for VertexID/InstanceID. + */ +#if GEN_GEN >= 6 + assert(nr_elements <= 34); + struct brw_vertex_element *gen6_edgeflag_input = NULL; +#else + assert(nr_elements <= 18); +#endif + + dw = brw_batch_emitn(brw, GENX(3DSTATE_VERTEX_ELEMENTS), + 1 + GENX(VERTEX_ELEMENT_STATE_length) * nr_elements); + unsigned i; + for (i = 0; i < brw->vb.nr_enabled; i++) { + struct brw_vertex_element *input = brw->vb.enabled[i]; + uint32_t format = brw_get_vertex_surface_type(brw, input->glarray); + uint32_t comp0 = VFCOMP_STORE_SRC; + uint32_t comp1 = VFCOMP_STORE_SRC; + uint32_t comp2 = VFCOMP_STORE_SRC; + uint32_t comp3 = VFCOMP_STORE_SRC; + unsigned num_uploads = 1; + +#if GEN_GEN >= 8 + /* From the BDW PRM, Volume 2d, page 588 (VERTEX_ELEMENT_STATE): + * "Any SourceElementFormat of *64*_PASSTHRU cannot be used with an + * element which has edge flag enabled." + */ + assert(!(is_passthru_format(format) && uses_edge_flag)); +#endif + + /* The gen4 driver expects edgeflag to come in as a float, and passes + * that float on to the tests in the clipper. Mesa's current vertex + * attribute value for EdgeFlag is stored as a float, which works out. + * glEdgeFlagPointer, on the other hand, gives us an unnormalized + * integer ubyte. Just rewrite that to convert to a float. + * + * Gen6+ passes edgeflag as sideband along with the vertex, instead + * of in the VUE. We have to upload it sideband as the last vertex + * element according to the B-Spec. + */ +#if GEN_GEN >= 6 + if (input == &brw->vb.inputs[VERT_ATTRIB_EDGEFLAG]) { + gen6_edgeflag_input = input; + continue; + } +#endif + +#if GEN_GEN < 8 + num_uploads = genX(uploads_needed(format)); +#endif + + for (unsigned c = 0; c < num_uploads; c++) { + uint32_t upload_format = GEN_GEN >= 8 ? format : + downsize_format_if_needed(format, c); + /* If we need more that one upload, the offset stride would be 128 + * bits (16 bytes), as for previous uploads we are using the full + * entry. */ + unsigned int offset = input->offset + c * 16; + int size = input->glarray->Size; + + if (GEN_GEN < 8 && is_passthru_format(format)) + size = upload_format_size(upload_format); + + switch (size) { + case 0: comp0 = VFCOMP_STORE_0; + case 1: comp1 = VFCOMP_STORE_0; + case 2: comp2 = VFCOMP_STORE_0; + case 3: + if (GEN_GEN >= 8 && input->glarray->Doubles) { + comp3 = VFCOMP_STORE_0; + } else if (input->glarray->Integer) { + comp3 = VFCOMP_STORE_1_INT; + } else { + comp3 = VFCOMP_STORE_1_FP; + } + + break; + } + +#if GEN_GEN >= 8 + /* From the BDW PRM, Volume 2d, page 586 (VERTEX_ELEMENT_STATE): + * + * "When SourceElementFormat is set to one of the *64*_PASSTHRU + * formats, 64-bit components are stored in the URB without any + * conversion. In this case, vertex elements must be written as 128 + * or 256 bits, with VFCOMP_STORE_0 being used to pad the output as + * required. E.g., if R64_PASSTHRU is used to copy a 64-bit Red + * component into the URB, Component 1 must be specified as + * VFCOMP_STORE_0 (with Components 2,3 set to VFCOMP_NOSTORE) in + * order to output a 128-bit vertex element, or Components 1-3 must + * be specified as VFCOMP_STORE_0 in order to output a 256-bit vertex + * element. Likewise, use of R64G64B64_PASSTHRU requires Component 3 + * to be specified as VFCOMP_STORE_0 in order to output a 256-bit + * vertex element." + */ + if (input->glarray->Doubles && !input->is_dual_slot) { + /* Store vertex elements which correspond to double and dvec2 vertex + * shader inputs as 128-bit vertex elements, instead of 256-bits. + */ + comp2 = VFCOMP_NOSTORE; + comp3 = VFCOMP_NOSTORE; + } +#endif + + struct GENX(VERTEX_ELEMENT_STATE) elem_state = { + .VertexBufferIndex = input->buffer, + .Valid = true, + .SourceElementFormat = upload_format, + .SourceElementOffset = offset, + .Component0Control = comp0, + .Component1Control = comp1, + .Component2Control = comp2, + .Component3Control = comp3, +#if GEN_GEN < 5 + .DestinationElementOffset = i * 4, +#endif + }; + + GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem_state); + dw += GENX(VERTEX_ELEMENT_STATE_length); + } + } + + if (needs_sgvs_element) { + struct GENX(VERTEX_ELEMENT_STATE) elem_state = { + .Valid = true, + .Component0Control = VFCOMP_STORE_0, + .Component1Control = VFCOMP_STORE_0, + .Component2Control = VFCOMP_STORE_0, + .Component3Control = VFCOMP_STORE_0, +#if GEN_GEN < 5 + .DestinationElementOffset = i * 4, +#endif + }; + +#if GEN_GEN >= 8 + if (vs_prog_data->uses_basevertex || + vs_prog_data->uses_baseinstance) { + elem_state.VertexBufferIndex = brw->vb.nr_buffers; + elem_state.SourceElementFormat = ISL_FORMAT_R32G32_UINT; + elem_state.Component0Control = VFCOMP_STORE_SRC; + elem_state.Component1Control = VFCOMP_STORE_SRC; + } +#else + elem_state.VertexBufferIndex = brw->vb.nr_buffers; + elem_state.SourceElementFormat = ISL_FORMAT_R32G32_UINT; + if (vs_prog_data->uses_basevertex) + elem_state.Component0Control = VFCOMP_STORE_SRC; + + if (vs_prog_data->uses_baseinstance) + elem_state.Component1Control = VFCOMP_STORE_SRC; + + if (vs_prog_data->uses_vertexid) + elem_state.Component2Control = VFCOMP_STORE_VID; + + if (vs_prog_data->uses_instanceid) + elem_state.Component3Control = VFCOMP_STORE_IID; +#endif + + GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem_state); + dw += GENX(VERTEX_ELEMENT_STATE_length); + } + + if (vs_prog_data->uses_drawid) { + struct GENX(VERTEX_ELEMENT_STATE) elem_state = { + .Valid = true, + .VertexBufferIndex = brw->vb.nr_buffers + 1, + .SourceElementFormat = ISL_FORMAT_R32_UINT, + .Component0Control = VFCOMP_STORE_SRC, + .Component1Control = VFCOMP_STORE_0, + .Component2Control = VFCOMP_STORE_0, + .Component3Control = VFCOMP_STORE_0, +#if GEN_GEN < 5 + .DestinationElementOffset = i * 4, +#endif + }; + + GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem_state); + dw += GENX(VERTEX_ELEMENT_STATE_length); + } + +#if GEN_GEN >= 6 + if (gen6_edgeflag_input) { + uint32_t format = + brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray); + + struct GENX(VERTEX_ELEMENT_STATE) elem_state = { + .Valid = true, + .VertexBufferIndex = gen6_edgeflag_input->buffer, + .EdgeFlagEnable = true, + .SourceElementFormat = format, + .SourceElementOffset = gen6_edgeflag_input->offset, + .Component0Control = VFCOMP_STORE_SRC, + .Component1Control = VFCOMP_STORE_0, + .Component2Control = VFCOMP_STORE_0, + .Component3Control = VFCOMP_STORE_0, + }; + + GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem_state); + dw += GENX(VERTEX_ELEMENT_STATE_length); + } +#endif + +#if GEN_GEN >= 8 + for (unsigned i = 0, j = 0; i < brw->vb.nr_enabled; i++) { + const struct brw_vertex_element *input = brw->vb.enabled[i]; + const struct brw_vertex_buffer *buffer = &brw->vb.buffers[input->buffer]; + unsigned element_index; + + /* The edge flag element is reordered to be the last one in the code + * above so we need to compensate for that in the element indices used + * below. + */ + if (input == gen6_edgeflag_input) + element_index = nr_elements - 1; + else + element_index = j++; + + brw_batch_emit(brw, GENX(3DSTATE_VF_INSTANCING), vfi) { + vfi.VertexElementIndex = element_index; + vfi.InstancingEnable = buffer->step_rate != 0; + vfi.InstanceDataStepRate = buffer->step_rate; + } + } + + if (vs_prog_data->uses_drawid) { + const unsigned element = brw->vb.nr_enabled + needs_sgvs_element; + + brw_batch_emit(brw, GENX(3DSTATE_VF_INSTANCING), vfi) { + vfi.VertexElementIndex = element; + } + } +#endif +} + +static const struct brw_tracked_state genX(vertices) = { + .dirty = { + .mesa = _NEW_POLYGON, + .brw = BRW_NEW_BATCH | + BRW_NEW_BLORP | + BRW_NEW_VERTICES | + BRW_NEW_VS_PROG_DATA, + }, + .emit = genX(emit_vertices), +}; + #if GEN_GEN >= 6 /** * Determine the appropriate attribute override value to store into the @@ -3000,7 +3552,7 @@ genX(init_atoms)(struct brw_context *brw) &brw_drawing_rect, &brw_indices, /* must come before brw_vertices */ &brw_index_buffer, - &brw_vertices, + &genX(vertices), &brw_constant_buffer }; @@ -3067,7 +3619,7 @@ genX(init_atoms)(struct brw_context *brw) &brw_indices, /* must come before brw_vertices */ &brw_index_buffer, - &brw_vertices, + &genX(vertices), }; #elif GEN_GEN == 7 static const struct brw_tracked_state *render_atoms[] = @@ -3155,7 +3707,7 @@ genX(init_atoms)(struct brw_context *brw) &brw_indices, /* must come before brw_vertices */ &brw_index_buffer, - &brw_vertices, + &genX(vertices), &haswell_cut_index, }; @@ -3248,7 +3800,7 @@ genX(init_atoms)(struct brw_context *brw) &brw_indices, &gen8_index_buffer, - &gen8_vertices, + &genX(vertices), &haswell_cut_index, &gen8_pma_fix, |