From 02f8f90cc2261167ea025cbb69f8856c33444007 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 28 Oct 2014 17:18:55 -0700 Subject: i965: Rename brw_gs{,_emit}.[ch] to brw_ff_gs{,_emit}.[ch]. The brw_gs.[ch] and brw_gs_emit.c source files contain code for emulating fixed-function unit functionality (VF primitive decomposition or SOL) using the GS unit. They do not contain code to support proper geometry shaders. We've taken to calling that code "ff_gs" (see brw_ff_gs_prog_key, brw_ff_gs_prog_data, brw_context::ff_gs, brw_ff_gs_compile, brw_ff_gs_prog). So it makes sense to make the filenames match. Signed-off-by: Kenneth Graunke Acked-by: Matt Turner Acked-by: Jason Ekstrand Acked-by: Iago Toral Quiroga --- src/mesa/drivers/dri/i965/Makefile.sources | 4 +- src/mesa/drivers/dri/i965/brw_ff_gs.c | 259 ++++++++++++++ src/mesa/drivers/dri/i965/brw_ff_gs.h | 115 +++++++ src/mesa/drivers/dri/i965/brw_ff_gs_emit.c | 529 +++++++++++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_gs.c | 259 -------------- src/mesa/drivers/dri/i965/brw_gs.h | 115 ------- src/mesa/drivers/dri/i965/brw_gs_emit.c | 529 ----------------------------- src/mesa/drivers/dri/i965/brw_vec4_gs.c | 2 +- 8 files changed, 906 insertions(+), 906 deletions(-) create mode 100644 src/mesa/drivers/dri/i965/brw_ff_gs.c create mode 100644 src/mesa/drivers/dri/i965/brw_ff_gs.h create mode 100644 src/mesa/drivers/dri/i965/brw_ff_gs_emit.c delete mode 100644 src/mesa/drivers/dri/i965/brw_gs.c delete mode 100644 src/mesa/drivers/dri/i965/brw_gs.h delete mode 100644 src/mesa/drivers/dri/i965/brw_gs_emit.c (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 9c006daa0e3..9f29551256c 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -50,6 +50,8 @@ i965_FILES = \ brw_eu_compact.c \ brw_eu_emit.c \ brw_eu_util.c \ + brw_ff_gs.c \ + brw_ff_gs_emit.c \ brw_fs.cpp \ brw_fs_channel_expressions.cpp \ brw_fs_copy_propagation.cpp \ @@ -65,8 +67,6 @@ i965_FILES = \ brw_fs_sel_peephole.cpp \ brw_fs_vector_splitting.cpp \ brw_fs_visitor.cpp \ - brw_gs.c \ - brw_gs_emit.c \ brw_gs_state.c \ brw_gs_surface_state.c \ brw_interpolation_map.c \ diff --git a/src/mesa/drivers/dri/i965/brw_ff_gs.c b/src/mesa/drivers/dri/i965/brw_ff_gs.c new file mode 100644 index 00000000000..6ca9e7f51e9 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_ff_gs.c @@ -0,0 +1,259 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + +#include "main/glheader.h" +#include "main/macros.h" +#include "main/enums.h" +#include "main/transformfeedback.h" + +#include "intel_batchbuffer.h" + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_state.h" +#include "brw_ff_gs.h" + +#include "util/ralloc.h" + +static void compile_ff_gs_prog(struct brw_context *brw, + struct brw_ff_gs_prog_key *key) +{ + struct brw_ff_gs_compile c; + const GLuint *program; + void *mem_ctx; + GLuint program_size; + + memset(&c, 0, sizeof(c)); + + c.key = *key; + c.vue_map = brw->vs.prog_data->base.vue_map; + c.nr_regs = (c.vue_map.num_slots + 1)/2; + + mem_ctx = ralloc_context(NULL); + + /* Begin the compilation: + */ + brw_init_compile(brw, &c.func, mem_ctx); + + c.func.single_program_flow = 1; + + /* For some reason the thread is spawned with only 4 channels + * unmasked. + */ + brw_set_default_mask_control(&c.func, BRW_MASK_DISABLE); + + if (brw->gen >= 6) { + unsigned num_verts; + bool check_edge_flag; + /* On Sandybridge, we use the GS for implementing transform feedback + * (called "Stream Out" in the PRM). + */ + switch (key->primitive) { + case _3DPRIM_POINTLIST: + num_verts = 1; + check_edge_flag = false; + break; + case _3DPRIM_LINELIST: + case _3DPRIM_LINESTRIP: + case _3DPRIM_LINELOOP: + num_verts = 2; + check_edge_flag = false; + break; + case _3DPRIM_TRILIST: + case _3DPRIM_TRIFAN: + case _3DPRIM_TRISTRIP: + case _3DPRIM_RECTLIST: + num_verts = 3; + check_edge_flag = false; + break; + case _3DPRIM_QUADLIST: + case _3DPRIM_QUADSTRIP: + case _3DPRIM_POLYGON: + num_verts = 3; + check_edge_flag = true; + break; + default: + unreachable("Unexpected primitive type in Gen6 SOL program."); + } + gen6_sol_program(&c, key, num_verts, check_edge_flag); + } else { + /* On Gen4-5, we use the GS to decompose certain types of primitives. + * Note that primitives which don't require a GS program have already + * been weeded out by now. + */ + switch (key->primitive) { + case _3DPRIM_QUADLIST: + brw_ff_gs_quads( &c, key ); + break; + case _3DPRIM_QUADSTRIP: + brw_ff_gs_quad_strip( &c, key ); + break; + case _3DPRIM_LINELOOP: + brw_ff_gs_lines( &c ); + break; + default: + ralloc_free(mem_ctx); + return; + } + } + + brw_compact_instructions(&c.func, 0, 0, NULL); + + /* get the program + */ + program = brw_get_program(&c.func, &program_size); + + if (unlikely(INTEL_DEBUG & DEBUG_GS)) { + fprintf(stderr, "gs:\n"); + brw_disassemble(brw, c.func.store, 0, program_size, stderr); + fprintf(stderr, "\n"); + } + + brw_upload_cache(&brw->cache, BRW_FF_GS_PROG, + &c.key, sizeof(c.key), + program, program_size, + &c.prog_data, sizeof(c.prog_data), + &brw->ff_gs.prog_offset, &brw->ff_gs.prog_data); + ralloc_free(mem_ctx); +} + +static void populate_key(struct brw_context *brw, + struct brw_ff_gs_prog_key *key) +{ + static const unsigned swizzle_for_offset[4] = { + BRW_SWIZZLE4(0, 1, 2, 3), + BRW_SWIZZLE4(1, 2, 3, 3), + BRW_SWIZZLE4(2, 3, 3, 3), + BRW_SWIZZLE4(3, 3, 3, 3) + }; + + struct gl_context *ctx = &brw->ctx; + + memset(key, 0, sizeof(*key)); + + /* CACHE_NEW_VS_PROG (part of VUE map) */ + key->attrs = brw->vs.prog_data->base.vue_map.slots_valid; + + /* BRW_NEW_PRIMITIVE */ + key->primitive = brw->primitive; + + /* _NEW_LIGHT */ + key->pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION); + if (key->primitive == _3DPRIM_QUADLIST && ctx->Light.ShadeModel != GL_FLAT) { + /* Provide consistent primitive order with brw_set_prim's + * optimization of single quads to trifans. + */ + key->pv_first = true; + } + + if (brw->gen >= 7) { + /* On Gen7 and later, we don't use GS (yet). */ + key->need_gs_prog = false; + } else if (brw->gen == 6) { + /* On Gen6, GS is used for transform feedback. */ + /* BRW_NEW_TRANSFORM_FEEDBACK */ + if (_mesa_is_xfb_active_and_unpaused(ctx)) { + const struct gl_shader_program *shaderprog = + ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX]; + const struct gl_transform_feedback_info *linked_xfb_info = + &shaderprog->LinkedTransformFeedback; + int i; + + /* Make sure that the VUE slots won't overflow the unsigned chars in + * key->transform_feedback_bindings[]. + */ + STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 256); + + /* Make sure that we don't need more binding table entries than we've + * set aside for use in transform feedback. (We shouldn't, since we + * set aside enough binding table entries to have one per component). + */ + assert(linked_xfb_info->NumOutputs <= BRW_MAX_SOL_BINDINGS); + + key->need_gs_prog = true; + key->num_transform_feedback_bindings = linked_xfb_info->NumOutputs; + for (i = 0; i < key->num_transform_feedback_bindings; ++i) { + key->transform_feedback_bindings[i] = + linked_xfb_info->Outputs[i].OutputRegister; + key->transform_feedback_swizzles[i] = + swizzle_for_offset[linked_xfb_info->Outputs[i].ComponentOffset]; + } + } + } else { + /* Pre-gen6, GS is used to transform QUADLIST, QUADSTRIP, and LINELOOP + * into simpler primitives. + */ + key->need_gs_prog = (brw->primitive == _3DPRIM_QUADLIST || + brw->primitive == _3DPRIM_QUADSTRIP || + brw->primitive == _3DPRIM_LINELOOP); + } +} + +/* Calculate interpolants for triangle and line rasterization. + */ +static void +brw_upload_ff_gs_prog(struct brw_context *brw) +{ + struct brw_ff_gs_prog_key key; + /* Populate the key: + */ + populate_key(brw, &key); + + if (brw->ff_gs.prog_active != key.need_gs_prog) { + brw->state.dirty.cache |= CACHE_NEW_FF_GS_PROG; + brw->ff_gs.prog_active = key.need_gs_prog; + } + + if (brw->ff_gs.prog_active) { + if (!brw_search_cache(&brw->cache, BRW_FF_GS_PROG, + &key, sizeof(key), + &brw->ff_gs.prog_offset, &brw->ff_gs.prog_data)) { + compile_ff_gs_prog( brw, &key ); + } + } +} + +void gen6_brw_upload_ff_gs_prog(struct brw_context *brw) +{ + brw_upload_ff_gs_prog(brw); +} + +const struct brw_tracked_state brw_ff_gs_prog = { + .dirty = { + .mesa = (_NEW_LIGHT), + .brw = (BRW_NEW_PRIMITIVE | + BRW_NEW_TRANSFORM_FEEDBACK), + .cache = CACHE_NEW_VS_PROG + }, + .emit = brw_upload_ff_gs_prog +}; diff --git a/src/mesa/drivers/dri/i965/brw_ff_gs.h b/src/mesa/drivers/dri/i965/brw_ff_gs.h new file mode 100644 index 00000000000..a538948e9ac --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_ff_gs.h @@ -0,0 +1,115 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#ifndef BRW_GS_H +#define BRW_GS_H + + +#include "brw_context.h" +#include "brw_eu.h" + +#define MAX_GS_VERTS (4) + +struct brw_ff_gs_prog_key { + GLbitfield64 attrs; + + /** + * Hardware primitive type being drawn, e.g. _3DPRIM_TRILIST. + */ + GLuint primitive:8; + + GLuint pv_first:1; + GLuint need_gs_prog:1; + + /** + * Number of varyings that are output to transform feedback. + */ + GLuint num_transform_feedback_bindings:7; /* 0-BRW_MAX_SOL_BINDINGS */ + + /** + * Map from the index of a transform feedback binding table entry to the + * gl_varying_slot that should be streamed out through that binding table + * entry. + */ + unsigned char transform_feedback_bindings[BRW_MAX_SOL_BINDINGS]; + + /** + * Map from the index of a transform feedback binding table entry to the + * swizzles that should be used when streaming out data through that + * binding table entry. + */ + unsigned char transform_feedback_swizzles[BRW_MAX_SOL_BINDINGS]; +}; + +struct brw_ff_gs_compile { + struct brw_compile func; + struct brw_ff_gs_prog_key key; + struct brw_ff_gs_prog_data prog_data; + + struct { + struct brw_reg R0; + + /** + * Register holding streamed vertex buffer pointers -- see the Sandy + * Bridge PRM, volume 2 part 1, section 4.4.2 (GS Thread Payload + * [DevSNB]). These pointers are delivered in GRF 1. + */ + struct brw_reg SVBI; + + struct brw_reg vertex[MAX_GS_VERTS]; + struct brw_reg header; + struct brw_reg temp; + + /** + * Register holding destination indices for streamed buffer writes. + * Only used for SOL programs. + */ + struct brw_reg destination_indices; + } reg; + + /* Number of registers used to store vertex data */ + GLuint nr_regs; + + struct brw_vue_map vue_map; +}; + +void brw_ff_gs_quads(struct brw_ff_gs_compile *c, + struct brw_ff_gs_prog_key *key); +void brw_ff_gs_quad_strip(struct brw_ff_gs_compile *c, + struct brw_ff_gs_prog_key *key); +void brw_ff_gs_lines(struct brw_ff_gs_compile *c); +void gen6_sol_program(struct brw_ff_gs_compile *c, + struct brw_ff_gs_prog_key *key, + unsigned num_verts, bool check_edge_flag); +void gen6_brw_upload_ff_gs_prog(struct brw_context *brw); + +#endif diff --git a/src/mesa/drivers/dri/i965/brw_ff_gs_emit.c b/src/mesa/drivers/dri/i965/brw_ff_gs_emit.c new file mode 100644 index 00000000000..3f315972289 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_ff_gs_emit.c @@ -0,0 +1,529 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#include "main/glheader.h" +#include "main/macros.h" +#include "main/enums.h" + +#include "program/program.h" +#include "intel_batchbuffer.h" + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_ff_gs.h" + +/** + * Allocate registers for GS. + * + * If sol_program is true, then: + * + * - The thread will be spawned with the "SVBI Payload Enable" bit set, so GRF + * 1 needs to be set aside to hold the streamed vertex buffer indices. + * + * - The thread will need to use the destination_indices register. + */ +static void brw_ff_gs_alloc_regs(struct brw_ff_gs_compile *c, + GLuint nr_verts, + bool sol_program) +{ + GLuint i = 0,j; + + /* Register usage is static, precompute here: + */ + c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; + + /* Streamed vertex buffer indices */ + if (sol_program) + c->reg.SVBI = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD); + + /* Payload vertices plus space for more generated vertices: + */ + for (j = 0; j < nr_verts; j++) { + c->reg.vertex[j] = brw_vec4_grf(i, 0); + i += c->nr_regs; + } + + c->reg.header = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD); + c->reg.temp = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD); + + if (sol_program) { + c->reg.destination_indices = + retype(brw_vec4_grf(i++, 0), BRW_REGISTER_TYPE_UD); + } + + c->prog_data.urb_read_length = c->nr_regs; + c->prog_data.total_grf = i; +} + + +/** + * Set up the initial value of c->reg.header register based on c->reg.R0. + * + * The following information is passed to the GS thread in R0, and needs to be + * included in the first URB_WRITE or FF_SYNC message sent by the GS: + * + * - DWORD 0 [31:0] handle info (Gen4 only) + * - DWORD 5 [7:0] FFTID + * - DWORD 6 [31:0] Debug info + * - DWORD 7 [31:0] Debug info + * + * This function sets up the above data by copying by copying the contents of + * R0 to the header register. + */ +static void brw_ff_gs_initialize_header(struct brw_ff_gs_compile *c) +{ + struct brw_compile *p = &c->func; + brw_MOV(p, c->reg.header, c->reg.R0); +} + +/** + * Overwrite DWORD 2 of c->reg.header with the given immediate unsigned value. + * + * In URB_WRITE messages, DWORD 2 contains the fields PrimType, PrimStart, + * PrimEnd, Increment CL_INVOCATIONS, and SONumPrimsWritten, many of which we + * need to be able to update on a per-vertex basis. + */ +static void brw_ff_gs_overwrite_header_dw2(struct brw_ff_gs_compile *c, + unsigned dw2) +{ + struct brw_compile *p = &c->func; + brw_MOV(p, get_element_ud(c->reg.header, 2), brw_imm_ud(dw2)); +} + +/** + * Overwrite DWORD 2 of c->reg.header with the primitive type from c->reg.R0. + * + * When the thread is spawned, GRF 0 contains the primitive type in bits 4:0 + * of DWORD 2. URB_WRITE messages need the primitive type in bits 6:2 of + * DWORD 2. So this function extracts the primitive type field, bitshifts it + * appropriately, and stores it in c->reg.header. + */ +static void brw_ff_gs_overwrite_header_dw2_from_r0(struct brw_ff_gs_compile *c) +{ + struct brw_compile *p = &c->func; + brw_AND(p, get_element_ud(c->reg.header, 2), get_element_ud(c->reg.R0, 2), + brw_imm_ud(0x1f)); + brw_SHL(p, get_element_ud(c->reg.header, 2), + get_element_ud(c->reg.header, 2), brw_imm_ud(2)); +} + +/** + * Apply an additive offset to DWORD 2 of c->reg.header. + * + * This is used to set/unset the "PrimStart" and "PrimEnd" flags appropriately + * for each vertex. + */ +static void brw_ff_gs_offset_header_dw2(struct brw_ff_gs_compile *c, + int offset) +{ + struct brw_compile *p = &c->func; + brw_ADD(p, get_element_d(c->reg.header, 2), get_element_d(c->reg.header, 2), + brw_imm_d(offset)); +} + + +/** + * Emit a vertex using the URB_WRITE message. Use the contents of + * c->reg.header for the message header, and the registers starting at \c vert + * for the vertex data. + * + * If \c last is true, then this is the last vertex, so no further URB space + * should be allocated, and this message should end the thread. + * + * If \c last is false, then a new URB entry will be allocated, and its handle + * will be stored in DWORD 0 of c->reg.header for use in the next URB_WRITE + * message. + */ +static void brw_ff_gs_emit_vue(struct brw_ff_gs_compile *c, + struct brw_reg vert, + bool last) +{ + struct brw_compile *p = &c->func; + int write_offset = 0; + bool complete = false; + + do { + /* We can't write more than 14 registers at a time to the URB */ + int write_len = MIN2(c->nr_regs - write_offset, 14); + if (write_len == c->nr_regs - write_offset) + complete = true; + + /* Copy the vertex from vertn into m1..mN+1: + */ + brw_copy8(p, brw_message_reg(1), offset(vert, write_offset), write_len); + + /* Send the vertex data to the URB. If this is the last write for this + * vertex, then we mark it as complete, and either end the thread or + * allocate another vertex URB entry (depending whether this is the last + * vertex). + */ + enum brw_urb_write_flags flags; + if (!complete) + flags = BRW_URB_WRITE_NO_FLAGS; + else if (last) + flags = BRW_URB_WRITE_EOT_COMPLETE; + else + flags = BRW_URB_WRITE_ALLOCATE_COMPLETE; + brw_urb_WRITE(p, + (flags & BRW_URB_WRITE_ALLOCATE) ? c->reg.temp + : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), + 0, + c->reg.header, + flags, + write_len + 1, /* msg length */ + (flags & BRW_URB_WRITE_ALLOCATE) ? 1 + : 0, /* response length */ + write_offset, /* urb offset */ + BRW_URB_SWIZZLE_NONE); + write_offset += write_len; + } while (!complete); + + if (!last) { + brw_MOV(p, get_element_ud(c->reg.header, 0), + get_element_ud(c->reg.temp, 0)); + } +} + +/** + * Send an FF_SYNC message to ensure that all previously spawned GS threads + * have finished sending primitives down the pipeline, and to allocate a URB + * entry for the first output vertex. Only needed on Ironlake+. + * + * This function modifies c->reg.header: in DWORD 1, it stores num_prim (which + * is needed by the FF_SYNC message), and in DWORD 0, it stores the handle to + * the allocated URB entry (which will be needed by the URB_WRITE meesage that + * follows). + */ +static void brw_ff_gs_ff_sync(struct brw_ff_gs_compile *c, int num_prim) +{ + struct brw_compile *p = &c->func; + + brw_MOV(p, get_element_ud(c->reg.header, 1), brw_imm_ud(num_prim)); + brw_ff_sync(p, + c->reg.temp, + 0, + c->reg.header, + 1, /* allocate */ + 1, /* response length */ + 0 /* eot */); + brw_MOV(p, get_element_ud(c->reg.header, 0), + get_element_ud(c->reg.temp, 0)); +} + + +void +brw_ff_gs_quads(struct brw_ff_gs_compile *c, struct brw_ff_gs_prog_key *key) +{ + struct brw_context *brw = c->func.brw; + + brw_ff_gs_alloc_regs(c, 4, false); + brw_ff_gs_initialize_header(c); + /* Use polygons for correct edgeflag behaviour. Note that vertex 3 + * is the PV for quads, but vertex 0 for polygons: + */ + if (brw->gen == 5) + brw_ff_gs_ff_sync(c, 1); + brw_ff_gs_overwrite_header_dw2( + c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT) + | URB_WRITE_PRIM_START)); + if (key->pv_first) { + brw_ff_gs_emit_vue(c, c->reg.vertex[0], 0); + brw_ff_gs_overwrite_header_dw2( + c, _3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT); + brw_ff_gs_emit_vue(c, c->reg.vertex[1], 0); + brw_ff_gs_emit_vue(c, c->reg.vertex[2], 0); + brw_ff_gs_overwrite_header_dw2( + c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT) + | URB_WRITE_PRIM_END)); + brw_ff_gs_emit_vue(c, c->reg.vertex[3], 1); + } + else { + brw_ff_gs_emit_vue(c, c->reg.vertex[3], 0); + brw_ff_gs_overwrite_header_dw2( + c, _3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT); + brw_ff_gs_emit_vue(c, c->reg.vertex[0], 0); + brw_ff_gs_emit_vue(c, c->reg.vertex[1], 0); + brw_ff_gs_overwrite_header_dw2( + c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT) + | URB_WRITE_PRIM_END)); + brw_ff_gs_emit_vue(c, c->reg.vertex[2], 1); + } +} + +void +brw_ff_gs_quad_strip(struct brw_ff_gs_compile *c, + struct brw_ff_gs_prog_key *key) +{ + struct brw_context *brw = c->func.brw; + + brw_ff_gs_alloc_regs(c, 4, false); + brw_ff_gs_initialize_header(c); + + if (brw->gen == 5) + brw_ff_gs_ff_sync(c, 1); + brw_ff_gs_overwrite_header_dw2( + c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT) + | URB_WRITE_PRIM_START)); + if (key->pv_first) { + brw_ff_gs_emit_vue(c, c->reg.vertex[0], 0); + brw_ff_gs_overwrite_header_dw2( + c, _3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT); + brw_ff_gs_emit_vue(c, c->reg.vertex[1], 0); + brw_ff_gs_emit_vue(c, c->reg.vertex[2], 0); + brw_ff_gs_overwrite_header_dw2( + c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT) + | URB_WRITE_PRIM_END)); + brw_ff_gs_emit_vue(c, c->reg.vertex[3], 1); + } + else { + brw_ff_gs_emit_vue(c, c->reg.vertex[2], 0); + brw_ff_gs_overwrite_header_dw2( + c, _3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT); + brw_ff_gs_emit_vue(c, c->reg.vertex[3], 0); + brw_ff_gs_emit_vue(c, c->reg.vertex[0], 0); + brw_ff_gs_overwrite_header_dw2( + c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT) + | URB_WRITE_PRIM_END)); + brw_ff_gs_emit_vue(c, c->reg.vertex[1], 1); + } +} + +void brw_ff_gs_lines(struct brw_ff_gs_compile *c) +{ + struct brw_context *brw = c->func.brw; + + brw_ff_gs_alloc_regs(c, 2, false); + brw_ff_gs_initialize_header(c); + + if (brw->gen == 5) + brw_ff_gs_ff_sync(c, 1); + brw_ff_gs_overwrite_header_dw2( + c, ((_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT) + | URB_WRITE_PRIM_START)); + brw_ff_gs_emit_vue(c, c->reg.vertex[0], 0); + brw_ff_gs_overwrite_header_dw2( + c, ((_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT) + | URB_WRITE_PRIM_END)); + brw_ff_gs_emit_vue(c, c->reg.vertex[1], 1); +} + +/** + * Generate the geometry shader program used on Gen6 to perform stream output + * (transform feedback). + */ +void +gen6_sol_program(struct brw_ff_gs_compile *c, struct brw_ff_gs_prog_key *key, + unsigned num_verts, bool check_edge_flags) +{ + struct brw_compile *p = &c->func; + struct brw_context *brw = p->brw; + brw_inst *inst; + c->prog_data.svbi_postincrement_value = num_verts; + + brw_ff_gs_alloc_regs(c, num_verts, true); + brw_ff_gs_initialize_header(c); + + if (key->num_transform_feedback_bindings > 0) { + unsigned vertex, binding; + struct brw_reg destination_indices_uw = + vec8(retype(c->reg.destination_indices, BRW_REGISTER_TYPE_UW)); + + /* Note: since we use the binding table to keep track of buffer offsets + * and stride, the GS doesn't need to keep track of a separate pointer + * into each buffer; it uses a single pointer which increments by 1 for + * each vertex. So we use SVBI0 for this pointer, regardless of whether + * transform feedback is in interleaved or separate attribs mode. + * + * Make sure that the buffers have enough room for all the vertices. + */ + brw_ADD(p, get_element_ud(c->reg.temp, 0), + get_element_ud(c->reg.SVBI, 0), brw_imm_ud(num_verts)); + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, + get_element_ud(c->reg.temp, 0), + get_element_ud(c->reg.SVBI, 4)); + brw_IF(p, BRW_EXECUTE_1); + + /* Compute the destination indices to write to. Usually we use SVBI[0] + * + (0, 1, 2). However, for odd-numbered triangles in tristrips, the + * vertices come down the pipeline in reversed winding order, so we need + * to flip the order when writing to the transform feedback buffer. To + * ensure that flatshading accuracy is preserved, we need to write them + * in order SVBI[0] + (0, 2, 1) if we're using the first provoking + * vertex convention, and in order SVBI[0] + (1, 0, 2) if we're using + * the last provoking vertex convention. + * + * Note: since brw_imm_v can only be used in instructions in + * packed-word execution mode, and SVBI is a double-word, we need to + * first move the appropriate immediate constant ((0, 1, 2), (0, 2, 1), + * or (1, 0, 2)) to the destination_indices register, and then add SVBI + * using a separate instruction. Also, since the immediate constant is + * expressed as packed words, and we need to load double-words into + * destination_indices, we need to intersperse zeros to fill the upper + * halves of each double-word. + */ + brw_MOV(p, destination_indices_uw, + brw_imm_v(0x00020100)); /* (0, 1, 2) */ + if (num_verts == 3) { + /* Get primitive type into temp register. */ + brw_AND(p, get_element_ud(c->reg.temp, 0), + get_element_ud(c->reg.R0, 2), brw_imm_ud(0x1f)); + + /* Test if primitive type is TRISTRIP_REVERSE. We need to do this as + * an 8-wide comparison so that the conditional MOV that follows + * moves all 8 words correctly. + */ + brw_CMP(p, vec8(brw_null_reg()), BRW_CONDITIONAL_EQ, + get_element_ud(c->reg.temp, 0), + brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE)); + + /* If so, then overwrite destination_indices_uw with the appropriate + * reordering. + */ + inst = brw_MOV(p, destination_indices_uw, + brw_imm_v(key->pv_first ? 0x00010200 /* (0, 2, 1) */ + : 0x00020001)); /* (1, 0, 2) */ + brw_inst_set_pred_control(brw, inst, BRW_PREDICATE_NORMAL); + } + brw_ADD(p, c->reg.destination_indices, + c->reg.destination_indices, get_element_ud(c->reg.SVBI, 0)); + + /* For each vertex, generate code to output each varying using the + * appropriate binding table entry. + */ + for (vertex = 0; vertex < num_verts; ++vertex) { + /* Set up the correct destination index for this vertex */ + brw_MOV(p, get_element_ud(c->reg.header, 5), + get_element_ud(c->reg.destination_indices, vertex)); + + for (binding = 0; binding < key->num_transform_feedback_bindings; + ++binding) { + unsigned char varying = + key->transform_feedback_bindings[binding]; + unsigned char slot = c->vue_map.varying_to_slot[varying]; + /* From the Sandybridge PRM, Volume 2, Part 1, Section 4.5.1: + * + * "Prior to End of Thread with a URB_WRITE, the kernel must + * ensure that all writes are complete by sending the final + * write as a committed write." + */ + bool final_write = + binding == key->num_transform_feedback_bindings - 1 && + vertex == num_verts - 1; + struct brw_reg vertex_slot = c->reg.vertex[vertex]; + vertex_slot.nr += slot / 2; + vertex_slot.subnr = (slot % 2) * 16; + /* gl_PointSize is stored in VARYING_SLOT_PSIZ.w. */ + vertex_slot.dw1.bits.swizzle = varying == VARYING_SLOT_PSIZ + ? BRW_SWIZZLE_WWWW : key->transform_feedback_swizzles[binding]; + brw_set_default_access_mode(p, BRW_ALIGN_16); + brw_MOV(p, stride(c->reg.header, 4, 4, 1), + retype(vertex_slot, BRW_REGISTER_TYPE_UD)); + brw_set_default_access_mode(p, BRW_ALIGN_1); + brw_svb_write(p, + final_write ? c->reg.temp : brw_null_reg(), /* dest */ + 1, /* msg_reg_nr */ + c->reg.header, /* src0 */ + SURF_INDEX_GEN6_SOL_BINDING(binding), /* binding_table_index */ + final_write); /* send_commit_msg */ + } + } + brw_ENDIF(p); + + /* Now, reinitialize the header register from R0 to restore the parts of + * the register that we overwrote while streaming out transform feedback + * data. + */ + brw_ff_gs_initialize_header(c); + + /* Finally, wait for the write commit to occur so that we can proceed to + * other things safely. + * + * From the Sandybridge PRM, Volume 4, Part 1, Section 3.3: + * + * The write commit does not modify the destination register, but + * merely clears the dependency associated with the destination + * register. Thus, a simple “mov” instruction using the register as a + * source is sufficient to wait for the write commit to occur. + */ + brw_MOV(p, c->reg.temp, c->reg.temp); + } + + brw_ff_gs_ff_sync(c, 1); + + brw_ff_gs_overwrite_header_dw2_from_r0(c); + switch (num_verts) { + case 1: + brw_ff_gs_offset_header_dw2(c, + URB_WRITE_PRIM_START | URB_WRITE_PRIM_END); + brw_ff_gs_emit_vue(c, c->reg.vertex[0], true); + break; + case 2: + brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_START); + brw_ff_gs_emit_vue(c, c->reg.vertex[0], false); + brw_ff_gs_offset_header_dw2(c, + URB_WRITE_PRIM_END - URB_WRITE_PRIM_START); + brw_ff_gs_emit_vue(c, c->reg.vertex[1], true); + break; + case 3: + if (check_edge_flags) { + /* Only emit vertices 0 and 1 if this is the first triangle of the + * polygon. Otherwise they are redundant. + */ + brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), + get_element_ud(c->reg.R0, 2), + brw_imm_ud(BRW_GS_EDGE_INDICATOR_0)); + brw_inst_set_cond_modifier(brw, brw_last_inst, BRW_CONDITIONAL_NZ); + brw_IF(p, BRW_EXECUTE_1); + } + brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_START); + brw_ff_gs_emit_vue(c, c->reg.vertex[0], false); + brw_ff_gs_offset_header_dw2(c, -URB_WRITE_PRIM_START); + brw_ff_gs_emit_vue(c, c->reg.vertex[1], false); + if (check_edge_flags) { + brw_ENDIF(p); + /* Only emit vertex 2 in PRIM_END mode if this is the last triangle + * of the polygon. Otherwise leave the primitive incomplete because + * there are more polygon vertices coming. + */ + brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), + get_element_ud(c->reg.R0, 2), + brw_imm_ud(BRW_GS_EDGE_INDICATOR_1)); + brw_inst_set_cond_modifier(brw, brw_last_inst, BRW_CONDITIONAL_NZ); + brw_set_default_predicate_control(p, BRW_PREDICATE_NORMAL); + } + brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_END); + brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); + brw_ff_gs_emit_vue(c, c->reg.vertex[2], true); + break; + } +} diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c deleted file mode 100644 index c0c4c13f13c..00000000000 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ /dev/null @@ -1,259 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" -#include "main/transformfeedback.h" - -#include "intel_batchbuffer.h" - -#include "brw_defines.h" -#include "brw_context.h" -#include "brw_eu.h" -#include "brw_util.h" -#include "brw_state.h" -#include "brw_gs.h" - -#include "util/ralloc.h" - -static void compile_ff_gs_prog(struct brw_context *brw, - struct brw_ff_gs_prog_key *key) -{ - struct brw_ff_gs_compile c; - const GLuint *program; - void *mem_ctx; - GLuint program_size; - - memset(&c, 0, sizeof(c)); - - c.key = *key; - c.vue_map = brw->vs.prog_data->base.vue_map; - c.nr_regs = (c.vue_map.num_slots + 1)/2; - - mem_ctx = ralloc_context(NULL); - - /* Begin the compilation: - */ - brw_init_compile(brw, &c.func, mem_ctx); - - c.func.single_program_flow = 1; - - /* For some reason the thread is spawned with only 4 channels - * unmasked. - */ - brw_set_default_mask_control(&c.func, BRW_MASK_DISABLE); - - if (brw->gen >= 6) { - unsigned num_verts; - bool check_edge_flag; - /* On Sandybridge, we use the GS for implementing transform feedback - * (called "Stream Out" in the PRM). - */ - switch (key->primitive) { - case _3DPRIM_POINTLIST: - num_verts = 1; - check_edge_flag = false; - break; - case _3DPRIM_LINELIST: - case _3DPRIM_LINESTRIP: - case _3DPRIM_LINELOOP: - num_verts = 2; - check_edge_flag = false; - break; - case _3DPRIM_TRILIST: - case _3DPRIM_TRIFAN: - case _3DPRIM_TRISTRIP: - case _3DPRIM_RECTLIST: - num_verts = 3; - check_edge_flag = false; - break; - case _3DPRIM_QUADLIST: - case _3DPRIM_QUADSTRIP: - case _3DPRIM_POLYGON: - num_verts = 3; - check_edge_flag = true; - break; - default: - unreachable("Unexpected primitive type in Gen6 SOL program."); - } - gen6_sol_program(&c, key, num_verts, check_edge_flag); - } else { - /* On Gen4-5, we use the GS to decompose certain types of primitives. - * Note that primitives which don't require a GS program have already - * been weeded out by now. - */ - switch (key->primitive) { - case _3DPRIM_QUADLIST: - brw_ff_gs_quads( &c, key ); - break; - case _3DPRIM_QUADSTRIP: - brw_ff_gs_quad_strip( &c, key ); - break; - case _3DPRIM_LINELOOP: - brw_ff_gs_lines( &c ); - break; - default: - ralloc_free(mem_ctx); - return; - } - } - - brw_compact_instructions(&c.func, 0, 0, NULL); - - /* get the program - */ - program = brw_get_program(&c.func, &program_size); - - if (unlikely(INTEL_DEBUG & DEBUG_GS)) { - fprintf(stderr, "gs:\n"); - brw_disassemble(brw, c.func.store, 0, program_size, stderr); - fprintf(stderr, "\n"); - } - - brw_upload_cache(&brw->cache, BRW_FF_GS_PROG, - &c.key, sizeof(c.key), - program, program_size, - &c.prog_data, sizeof(c.prog_data), - &brw->ff_gs.prog_offset, &brw->ff_gs.prog_data); - ralloc_free(mem_ctx); -} - -static void populate_key(struct brw_context *brw, - struct brw_ff_gs_prog_key *key) -{ - static const unsigned swizzle_for_offset[4] = { - BRW_SWIZZLE4(0, 1, 2, 3), - BRW_SWIZZLE4(1, 2, 3, 3), - BRW_SWIZZLE4(2, 3, 3, 3), - BRW_SWIZZLE4(3, 3, 3, 3) - }; - - struct gl_context *ctx = &brw->ctx; - - memset(key, 0, sizeof(*key)); - - /* CACHE_NEW_VS_PROG (part of VUE map) */ - key->attrs = brw->vs.prog_data->base.vue_map.slots_valid; - - /* BRW_NEW_PRIMITIVE */ - key->primitive = brw->primitive; - - /* _NEW_LIGHT */ - key->pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION); - if (key->primitive == _3DPRIM_QUADLIST && ctx->Light.ShadeModel != GL_FLAT) { - /* Provide consistent primitive order with brw_set_prim's - * optimization of single quads to trifans. - */ - key->pv_first = true; - } - - if (brw->gen >= 7) { - /* On Gen7 and later, we don't use GS (yet). */ - key->need_gs_prog = false; - } else if (brw->gen == 6) { - /* On Gen6, GS is used for transform feedback. */ - /* BRW_NEW_TRANSFORM_FEEDBACK */ - if (_mesa_is_xfb_active_and_unpaused(ctx)) { - const struct gl_shader_program *shaderprog = - ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX]; - const struct gl_transform_feedback_info *linked_xfb_info = - &shaderprog->LinkedTransformFeedback; - int i; - - /* Make sure that the VUE slots won't overflow the unsigned chars in - * key->transform_feedback_bindings[]. - */ - STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 256); - - /* Make sure that we don't need more binding table entries than we've - * set aside for use in transform feedback. (We shouldn't, since we - * set aside enough binding table entries to have one per component). - */ - assert(linked_xfb_info->NumOutputs <= BRW_MAX_SOL_BINDINGS); - - key->need_gs_prog = true; - key->num_transform_feedback_bindings = linked_xfb_info->NumOutputs; - for (i = 0; i < key->num_transform_feedback_bindings; ++i) { - key->transform_feedback_bindings[i] = - linked_xfb_info->Outputs[i].OutputRegister; - key->transform_feedback_swizzles[i] = - swizzle_for_offset[linked_xfb_info->Outputs[i].ComponentOffset]; - } - } - } else { - /* Pre-gen6, GS is used to transform QUADLIST, QUADSTRIP, and LINELOOP - * into simpler primitives. - */ - key->need_gs_prog = (brw->primitive == _3DPRIM_QUADLIST || - brw->primitive == _3DPRIM_QUADSTRIP || - brw->primitive == _3DPRIM_LINELOOP); - } -} - -/* Calculate interpolants for triangle and line rasterization. - */ -static void -brw_upload_ff_gs_prog(struct brw_context *brw) -{ - struct brw_ff_gs_prog_key key; - /* Populate the key: - */ - populate_key(brw, &key); - - if (brw->ff_gs.prog_active != key.need_gs_prog) { - brw->state.dirty.cache |= CACHE_NEW_FF_GS_PROG; - brw->ff_gs.prog_active = key.need_gs_prog; - } - - if (brw->ff_gs.prog_active) { - if (!brw_search_cache(&brw->cache, BRW_FF_GS_PROG, - &key, sizeof(key), - &brw->ff_gs.prog_offset, &brw->ff_gs.prog_data)) { - compile_ff_gs_prog( brw, &key ); - } - } -} - -void gen6_brw_upload_ff_gs_prog(struct brw_context *brw) -{ - brw_upload_ff_gs_prog(brw); -} - -const struct brw_tracked_state brw_ff_gs_prog = { - .dirty = { - .mesa = (_NEW_LIGHT), - .brw = (BRW_NEW_PRIMITIVE | - BRW_NEW_TRANSFORM_FEEDBACK), - .cache = CACHE_NEW_VS_PROG - }, - .emit = brw_upload_ff_gs_prog -}; diff --git a/src/mesa/drivers/dri/i965/brw_gs.h b/src/mesa/drivers/dri/i965/brw_gs.h deleted file mode 100644 index a538948e9ac..00000000000 --- a/src/mesa/drivers/dri/i965/brw_gs.h +++ /dev/null @@ -1,115 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#ifndef BRW_GS_H -#define BRW_GS_H - - -#include "brw_context.h" -#include "brw_eu.h" - -#define MAX_GS_VERTS (4) - -struct brw_ff_gs_prog_key { - GLbitfield64 attrs; - - /** - * Hardware primitive type being drawn, e.g. _3DPRIM_TRILIST. - */ - GLuint primitive:8; - - GLuint pv_first:1; - GLuint need_gs_prog:1; - - /** - * Number of varyings that are output to transform feedback. - */ - GLuint num_transform_feedback_bindings:7; /* 0-BRW_MAX_SOL_BINDINGS */ - - /** - * Map from the index of a transform feedback binding table entry to the - * gl_varying_slot that should be streamed out through that binding table - * entry. - */ - unsigned char transform_feedback_bindings[BRW_MAX_SOL_BINDINGS]; - - /** - * Map from the index of a transform feedback binding table entry to the - * swizzles that should be used when streaming out data through that - * binding table entry. - */ - unsigned char transform_feedback_swizzles[BRW_MAX_SOL_BINDINGS]; -}; - -struct brw_ff_gs_compile { - struct brw_compile func; - struct brw_ff_gs_prog_key key; - struct brw_ff_gs_prog_data prog_data; - - struct { - struct brw_reg R0; - - /** - * Register holding streamed vertex buffer pointers -- see the Sandy - * Bridge PRM, volume 2 part 1, section 4.4.2 (GS Thread Payload - * [DevSNB]). These pointers are delivered in GRF 1. - */ - struct brw_reg SVBI; - - struct brw_reg vertex[MAX_GS_VERTS]; - struct brw_reg header; - struct brw_reg temp; - - /** - * Register holding destination indices for streamed buffer writes. - * Only used for SOL programs. - */ - struct brw_reg destination_indices; - } reg; - - /* Number of registers used to store vertex data */ - GLuint nr_regs; - - struct brw_vue_map vue_map; -}; - -void brw_ff_gs_quads(struct brw_ff_gs_compile *c, - struct brw_ff_gs_prog_key *key); -void brw_ff_gs_quad_strip(struct brw_ff_gs_compile *c, - struct brw_ff_gs_prog_key *key); -void brw_ff_gs_lines(struct brw_ff_gs_compile *c); -void gen6_sol_program(struct brw_ff_gs_compile *c, - struct brw_ff_gs_prog_key *key, - unsigned num_verts, bool check_edge_flag); -void gen6_brw_upload_ff_gs_prog(struct brw_context *brw); - -#endif diff --git a/src/mesa/drivers/dri/i965/brw_gs_emit.c b/src/mesa/drivers/dri/i965/brw_gs_emit.c deleted file mode 100644 index 91986c3a6cc..00000000000 --- a/src/mesa/drivers/dri/i965/brw_gs_emit.c +++ /dev/null @@ -1,529 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" - -#include "program/program.h" -#include "intel_batchbuffer.h" - -#include "brw_defines.h" -#include "brw_context.h" -#include "brw_eu.h" -#include "brw_gs.h" - -/** - * Allocate registers for GS. - * - * If sol_program is true, then: - * - * - The thread will be spawned with the "SVBI Payload Enable" bit set, so GRF - * 1 needs to be set aside to hold the streamed vertex buffer indices. - * - * - The thread will need to use the destination_indices register. - */ -static void brw_ff_gs_alloc_regs(struct brw_ff_gs_compile *c, - GLuint nr_verts, - bool sol_program) -{ - GLuint i = 0,j; - - /* Register usage is static, precompute here: - */ - c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; - - /* Streamed vertex buffer indices */ - if (sol_program) - c->reg.SVBI = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD); - - /* Payload vertices plus space for more generated vertices: - */ - for (j = 0; j < nr_verts; j++) { - c->reg.vertex[j] = brw_vec4_grf(i, 0); - i += c->nr_regs; - } - - c->reg.header = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD); - c->reg.temp = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD); - - if (sol_program) { - c->reg.destination_indices = - retype(brw_vec4_grf(i++, 0), BRW_REGISTER_TYPE_UD); - } - - c->prog_data.urb_read_length = c->nr_regs; - c->prog_data.total_grf = i; -} - - -/** - * Set up the initial value of c->reg.header register based on c->reg.R0. - * - * The following information is passed to the GS thread in R0, and needs to be - * included in the first URB_WRITE or FF_SYNC message sent by the GS: - * - * - DWORD 0 [31:0] handle info (Gen4 only) - * - DWORD 5 [7:0] FFTID - * - DWORD 6 [31:0] Debug info - * - DWORD 7 [31:0] Debug info - * - * This function sets up the above data by copying by copying the contents of - * R0 to the header register. - */ -static void brw_ff_gs_initialize_header(struct brw_ff_gs_compile *c) -{ - struct brw_compile *p = &c->func; - brw_MOV(p, c->reg.header, c->reg.R0); -} - -/** - * Overwrite DWORD 2 of c->reg.header with the given immediate unsigned value. - * - * In URB_WRITE messages, DWORD 2 contains the fields PrimType, PrimStart, - * PrimEnd, Increment CL_INVOCATIONS, and SONumPrimsWritten, many of which we - * need to be able to update on a per-vertex basis. - */ -static void brw_ff_gs_overwrite_header_dw2(struct brw_ff_gs_compile *c, - unsigned dw2) -{ - struct brw_compile *p = &c->func; - brw_MOV(p, get_element_ud(c->reg.header, 2), brw_imm_ud(dw2)); -} - -/** - * Overwrite DWORD 2 of c->reg.header with the primitive type from c->reg.R0. - * - * When the thread is spawned, GRF 0 contains the primitive type in bits 4:0 - * of DWORD 2. URB_WRITE messages need the primitive type in bits 6:2 of - * DWORD 2. So this function extracts the primitive type field, bitshifts it - * appropriately, and stores it in c->reg.header. - */ -static void brw_ff_gs_overwrite_header_dw2_from_r0(struct brw_ff_gs_compile *c) -{ - struct brw_compile *p = &c->func; - brw_AND(p, get_element_ud(c->reg.header, 2), get_element_ud(c->reg.R0, 2), - brw_imm_ud(0x1f)); - brw_SHL(p, get_element_ud(c->reg.header, 2), - get_element_ud(c->reg.header, 2), brw_imm_ud(2)); -} - -/** - * Apply an additive offset to DWORD 2 of c->reg.header. - * - * This is used to set/unset the "PrimStart" and "PrimEnd" flags appropriately - * for each vertex. - */ -static void brw_ff_gs_offset_header_dw2(struct brw_ff_gs_compile *c, - int offset) -{ - struct brw_compile *p = &c->func; - brw_ADD(p, get_element_d(c->reg.header, 2), get_element_d(c->reg.header, 2), - brw_imm_d(offset)); -} - - -/** - * Emit a vertex using the URB_WRITE message. Use the contents of - * c->reg.header for the message header, and the registers starting at \c vert - * for the vertex data. - * - * If \c last is true, then this is the last vertex, so no further URB space - * should be allocated, and this message should end the thread. - * - * If \c last is false, then a new URB entry will be allocated, and its handle - * will be stored in DWORD 0 of c->reg.header for use in the next URB_WRITE - * message. - */ -static void brw_ff_gs_emit_vue(struct brw_ff_gs_compile *c, - struct brw_reg vert, - bool last) -{ - struct brw_compile *p = &c->func; - int write_offset = 0; - bool complete = false; - - do { - /* We can't write more than 14 registers at a time to the URB */ - int write_len = MIN2(c->nr_regs - write_offset, 14); - if (write_len == c->nr_regs - write_offset) - complete = true; - - /* Copy the vertex from vertn into m1..mN+1: - */ - brw_copy8(p, brw_message_reg(1), offset(vert, write_offset), write_len); - - /* Send the vertex data to the URB. If this is the last write for this - * vertex, then we mark it as complete, and either end the thread or - * allocate another vertex URB entry (depending whether this is the last - * vertex). - */ - enum brw_urb_write_flags flags; - if (!complete) - flags = BRW_URB_WRITE_NO_FLAGS; - else if (last) - flags = BRW_URB_WRITE_EOT_COMPLETE; - else - flags = BRW_URB_WRITE_ALLOCATE_COMPLETE; - brw_urb_WRITE(p, - (flags & BRW_URB_WRITE_ALLOCATE) ? c->reg.temp - : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), - 0, - c->reg.header, - flags, - write_len + 1, /* msg length */ - (flags & BRW_URB_WRITE_ALLOCATE) ? 1 - : 0, /* response length */ - write_offset, /* urb offset */ - BRW_URB_SWIZZLE_NONE); - write_offset += write_len; - } while (!complete); - - if (!last) { - brw_MOV(p, get_element_ud(c->reg.header, 0), - get_element_ud(c->reg.temp, 0)); - } -} - -/** - * Send an FF_SYNC message to ensure that all previously spawned GS threads - * have finished sending primitives down the pipeline, and to allocate a URB - * entry for the first output vertex. Only needed on Ironlake+. - * - * This function modifies c->reg.header: in DWORD 1, it stores num_prim (which - * is needed by the FF_SYNC message), and in DWORD 0, it stores the handle to - * the allocated URB entry (which will be needed by the URB_WRITE meesage that - * follows). - */ -static void brw_ff_gs_ff_sync(struct brw_ff_gs_compile *c, int num_prim) -{ - struct brw_compile *p = &c->func; - - brw_MOV(p, get_element_ud(c->reg.header, 1), brw_imm_ud(num_prim)); - brw_ff_sync(p, - c->reg.temp, - 0, - c->reg.header, - 1, /* allocate */ - 1, /* response length */ - 0 /* eot */); - brw_MOV(p, get_element_ud(c->reg.header, 0), - get_element_ud(c->reg.temp, 0)); -} - - -void -brw_ff_gs_quads(struct brw_ff_gs_compile *c, struct brw_ff_gs_prog_key *key) -{ - struct brw_context *brw = c->func.brw; - - brw_ff_gs_alloc_regs(c, 4, false); - brw_ff_gs_initialize_header(c); - /* Use polygons for correct edgeflag behaviour. Note that vertex 3 - * is the PV for quads, but vertex 0 for polygons: - */ - if (brw->gen == 5) - brw_ff_gs_ff_sync(c, 1); - brw_ff_gs_overwrite_header_dw2( - c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT) - | URB_WRITE_PRIM_START)); - if (key->pv_first) { - brw_ff_gs_emit_vue(c, c->reg.vertex[0], 0); - brw_ff_gs_overwrite_header_dw2( - c, _3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT); - brw_ff_gs_emit_vue(c, c->reg.vertex[1], 0); - brw_ff_gs_emit_vue(c, c->reg.vertex[2], 0); - brw_ff_gs_overwrite_header_dw2( - c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT) - | URB_WRITE_PRIM_END)); - brw_ff_gs_emit_vue(c, c->reg.vertex[3], 1); - } - else { - brw_ff_gs_emit_vue(c, c->reg.vertex[3], 0); - brw_ff_gs_overwrite_header_dw2( - c, _3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT); - brw_ff_gs_emit_vue(c, c->reg.vertex[0], 0); - brw_ff_gs_emit_vue(c, c->reg.vertex[1], 0); - brw_ff_gs_overwrite_header_dw2( - c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT) - | URB_WRITE_PRIM_END)); - brw_ff_gs_emit_vue(c, c->reg.vertex[2], 1); - } -} - -void -brw_ff_gs_quad_strip(struct brw_ff_gs_compile *c, - struct brw_ff_gs_prog_key *key) -{ - struct brw_context *brw = c->func.brw; - - brw_ff_gs_alloc_regs(c, 4, false); - brw_ff_gs_initialize_header(c); - - if (brw->gen == 5) - brw_ff_gs_ff_sync(c, 1); - brw_ff_gs_overwrite_header_dw2( - c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT) - | URB_WRITE_PRIM_START)); - if (key->pv_first) { - brw_ff_gs_emit_vue(c, c->reg.vertex[0], 0); - brw_ff_gs_overwrite_header_dw2( - c, _3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT); - brw_ff_gs_emit_vue(c, c->reg.vertex[1], 0); - brw_ff_gs_emit_vue(c, c->reg.vertex[2], 0); - brw_ff_gs_overwrite_header_dw2( - c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT) - | URB_WRITE_PRIM_END)); - brw_ff_gs_emit_vue(c, c->reg.vertex[3], 1); - } - else { - brw_ff_gs_emit_vue(c, c->reg.vertex[2], 0); - brw_ff_gs_overwrite_header_dw2( - c, _3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT); - brw_ff_gs_emit_vue(c, c->reg.vertex[3], 0); - brw_ff_gs_emit_vue(c, c->reg.vertex[0], 0); - brw_ff_gs_overwrite_header_dw2( - c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT) - | URB_WRITE_PRIM_END)); - brw_ff_gs_emit_vue(c, c->reg.vertex[1], 1); - } -} - -void brw_ff_gs_lines(struct brw_ff_gs_compile *c) -{ - struct brw_context *brw = c->func.brw; - - brw_ff_gs_alloc_regs(c, 2, false); - brw_ff_gs_initialize_header(c); - - if (brw->gen == 5) - brw_ff_gs_ff_sync(c, 1); - brw_ff_gs_overwrite_header_dw2( - c, ((_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT) - | URB_WRITE_PRIM_START)); - brw_ff_gs_emit_vue(c, c->reg.vertex[0], 0); - brw_ff_gs_overwrite_header_dw2( - c, ((_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT) - | URB_WRITE_PRIM_END)); - brw_ff_gs_emit_vue(c, c->reg.vertex[1], 1); -} - -/** - * Generate the geometry shader program used on Gen6 to perform stream output - * (transform feedback). - */ -void -gen6_sol_program(struct brw_ff_gs_compile *c, struct brw_ff_gs_prog_key *key, - unsigned num_verts, bool check_edge_flags) -{ - struct brw_compile *p = &c->func; - struct brw_context *brw = p->brw; - brw_inst *inst; - c->prog_data.svbi_postincrement_value = num_verts; - - brw_ff_gs_alloc_regs(c, num_verts, true); - brw_ff_gs_initialize_header(c); - - if (key->num_transform_feedback_bindings > 0) { - unsigned vertex, binding; - struct brw_reg destination_indices_uw = - vec8(retype(c->reg.destination_indices, BRW_REGISTER_TYPE_UW)); - - /* Note: since we use the binding table to keep track of buffer offsets - * and stride, the GS doesn't need to keep track of a separate pointer - * into each buffer; it uses a single pointer which increments by 1 for - * each vertex. So we use SVBI0 for this pointer, regardless of whether - * transform feedback is in interleaved or separate attribs mode. - * - * Make sure that the buffers have enough room for all the vertices. - */ - brw_ADD(p, get_element_ud(c->reg.temp, 0), - get_element_ud(c->reg.SVBI, 0), brw_imm_ud(num_verts)); - brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, - get_element_ud(c->reg.temp, 0), - get_element_ud(c->reg.SVBI, 4)); - brw_IF(p, BRW_EXECUTE_1); - - /* Compute the destination indices to write to. Usually we use SVBI[0] - * + (0, 1, 2). However, for odd-numbered triangles in tristrips, the - * vertices come down the pipeline in reversed winding order, so we need - * to flip the order when writing to the transform feedback buffer. To - * ensure that flatshading accuracy is preserved, we need to write them - * in order SVBI[0] + (0, 2, 1) if we're using the first provoking - * vertex convention, and in order SVBI[0] + (1, 0, 2) if we're using - * the last provoking vertex convention. - * - * Note: since brw_imm_v can only be used in instructions in - * packed-word execution mode, and SVBI is a double-word, we need to - * first move the appropriate immediate constant ((0, 1, 2), (0, 2, 1), - * or (1, 0, 2)) to the destination_indices register, and then add SVBI - * using a separate instruction. Also, since the immediate constant is - * expressed as packed words, and we need to load double-words into - * destination_indices, we need to intersperse zeros to fill the upper - * halves of each double-word. - */ - brw_MOV(p, destination_indices_uw, - brw_imm_v(0x00020100)); /* (0, 1, 2) */ - if (num_verts == 3) { - /* Get primitive type into temp register. */ - brw_AND(p, get_element_ud(c->reg.temp, 0), - get_element_ud(c->reg.R0, 2), brw_imm_ud(0x1f)); - - /* Test if primitive type is TRISTRIP_REVERSE. We need to do this as - * an 8-wide comparison so that the conditional MOV that follows - * moves all 8 words correctly. - */ - brw_CMP(p, vec8(brw_null_reg()), BRW_CONDITIONAL_EQ, - get_element_ud(c->reg.temp, 0), - brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE)); - - /* If so, then overwrite destination_indices_uw with the appropriate - * reordering. - */ - inst = brw_MOV(p, destination_indices_uw, - brw_imm_v(key->pv_first ? 0x00010200 /* (0, 2, 1) */ - : 0x00020001)); /* (1, 0, 2) */ - brw_inst_set_pred_control(brw, inst, BRW_PREDICATE_NORMAL); - } - brw_ADD(p, c->reg.destination_indices, - c->reg.destination_indices, get_element_ud(c->reg.SVBI, 0)); - - /* For each vertex, generate code to output each varying using the - * appropriate binding table entry. - */ - for (vertex = 0; vertex < num_verts; ++vertex) { - /* Set up the correct destination index for this vertex */ - brw_MOV(p, get_element_ud(c->reg.header, 5), - get_element_ud(c->reg.destination_indices, vertex)); - - for (binding = 0; binding < key->num_transform_feedback_bindings; - ++binding) { - unsigned char varying = - key->transform_feedback_bindings[binding]; - unsigned char slot = c->vue_map.varying_to_slot[varying]; - /* From the Sandybridge PRM, Volume 2, Part 1, Section 4.5.1: - * - * "Prior to End of Thread with a URB_WRITE, the kernel must - * ensure that all writes are complete by sending the final - * write as a committed write." - */ - bool final_write = - binding == key->num_transform_feedback_bindings - 1 && - vertex == num_verts - 1; - struct brw_reg vertex_slot = c->reg.vertex[vertex]; - vertex_slot.nr += slot / 2; - vertex_slot.subnr = (slot % 2) * 16; - /* gl_PointSize is stored in VARYING_SLOT_PSIZ.w. */ - vertex_slot.dw1.bits.swizzle = varying == VARYING_SLOT_PSIZ - ? BRW_SWIZZLE_WWWW : key->transform_feedback_swizzles[binding]; - brw_set_default_access_mode(p, BRW_ALIGN_16); - brw_MOV(p, stride(c->reg.header, 4, 4, 1), - retype(vertex_slot, BRW_REGISTER_TYPE_UD)); - brw_set_default_access_mode(p, BRW_ALIGN_1); - brw_svb_write(p, - final_write ? c->reg.temp : brw_null_reg(), /* dest */ - 1, /* msg_reg_nr */ - c->reg.header, /* src0 */ - SURF_INDEX_GEN6_SOL_BINDING(binding), /* binding_table_index */ - final_write); /* send_commit_msg */ - } - } - brw_ENDIF(p); - - /* Now, reinitialize the header register from R0 to restore the parts of - * the register that we overwrote while streaming out transform feedback - * data. - */ - brw_ff_gs_initialize_header(c); - - /* Finally, wait for the write commit to occur so that we can proceed to - * other things safely. - * - * From the Sandybridge PRM, Volume 4, Part 1, Section 3.3: - * - * The write commit does not modify the destination register, but - * merely clears the dependency associated with the destination - * register. Thus, a simple “mov” instruction using the register as a - * source is sufficient to wait for the write commit to occur. - */ - brw_MOV(p, c->reg.temp, c->reg.temp); - } - - brw_ff_gs_ff_sync(c, 1); - - brw_ff_gs_overwrite_header_dw2_from_r0(c); - switch (num_verts) { - case 1: - brw_ff_gs_offset_header_dw2(c, - URB_WRITE_PRIM_START | URB_WRITE_PRIM_END); - brw_ff_gs_emit_vue(c, c->reg.vertex[0], true); - break; - case 2: - brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_START); - brw_ff_gs_emit_vue(c, c->reg.vertex[0], false); - brw_ff_gs_offset_header_dw2(c, - URB_WRITE_PRIM_END - URB_WRITE_PRIM_START); - brw_ff_gs_emit_vue(c, c->reg.vertex[1], true); - break; - case 3: - if (check_edge_flags) { - /* Only emit vertices 0 and 1 if this is the first triangle of the - * polygon. Otherwise they are redundant. - */ - brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), - get_element_ud(c->reg.R0, 2), - brw_imm_ud(BRW_GS_EDGE_INDICATOR_0)); - brw_inst_set_cond_modifier(brw, brw_last_inst, BRW_CONDITIONAL_NZ); - brw_IF(p, BRW_EXECUTE_1); - } - brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_START); - brw_ff_gs_emit_vue(c, c->reg.vertex[0], false); - brw_ff_gs_offset_header_dw2(c, -URB_WRITE_PRIM_START); - brw_ff_gs_emit_vue(c, c->reg.vertex[1], false); - if (check_edge_flags) { - brw_ENDIF(p); - /* Only emit vertex 2 in PRIM_END mode if this is the last triangle - * of the polygon. Otherwise leave the primitive incomplete because - * there are more polygon vertices coming. - */ - brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), - get_element_ud(c->reg.R0, 2), - brw_imm_ud(BRW_GS_EDGE_INDICATOR_1)); - brw_inst_set_cond_modifier(brw, brw_last_inst, BRW_CONDITIONAL_NZ); - brw_set_default_predicate_control(p, BRW_PREDICATE_NORMAL); - } - brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_END); - brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); - brw_ff_gs_emit_vue(c, c->reg.vertex[2], true); - break; - } -} diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs.c b/src/mesa/drivers/dri/i965/brw_vec4_gs.c index c8814fe70cc..93c9d37cc59 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs.c +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs.c @@ -31,7 +31,7 @@ #include "brw_context.h" #include "brw_vec4_gs_visitor.h" #include "brw_state.h" -#include "brw_gs.h" +#include "brw_ff_gs.h" static bool -- cgit v1.2.3