diff options
author | Samuel Iglesias Gonsalvez <[email protected]> | 2014-07-31 13:27:30 +0200 |
---|---|---|
committer | Iago Toral Quiroga <[email protected]> | 2014-09-19 15:01:16 +0200 |
commit | 21204434845398de86fb707c78dd5bdd1fb5826f (patch) | |
tree | f6ce807f012388c7d3223f9fbbe596729f7d30e7 | |
parent | 28a7da612b4a3f822df293e50bf043782eca1cb8 (diff) |
i965/gen6/gs: Buffer PSIZ/flags vertex data in gen6_gs_visitor
Since geometry shaders can alter the value of varyings packed in the first
output VUE slot (PSIZ), we need to buffer it together with all the other
vertex data so we can emit the right value for each vertex when we do the
URB writes.
This fixes the following piglit test in gen6:
tests/spec/glsl-1.50/execution/redeclare-pervertex-out-subset-gs.shader_test
Signed-off-by: Samuel Iglesias Gonsalvez <[email protected]>
Acked-by: Kenneth Graunke <[email protected]>
Reviewed-by: Jordan Justen <[email protected]>
-rw-r--r-- | src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp | 79 |
1 files changed, 41 insertions, 38 deletions
diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp index c9e8e66e62d..ccc197d02f7 100644 --- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp @@ -176,16 +176,33 @@ gen6_gs_visitor::visit(ir_emit_vertex *) /* Buffer all output slots for this vertex in vertex_output */ for (int slot = 0; slot < prog_data->vue_map.num_slots; ++slot) { - /* We will handle PSIZ for each vertex at thread end time since it - * is not computed by the GS algorithm and requires specific handling. - */ int varying = prog_data->vue_map.slot_to_varying[slot]; if (varying != VARYING_SLOT_PSIZ) { dst_reg dst(this->vertex_output); dst.reladdr = ralloc(mem_ctx, src_reg); memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg)); emit_urb_slot(dst, varying); + } else { + /* The PSIZ slot can pack multiple varyings in different channels + * and emit_urb_slot() will produce a MOV instruction for each of + * them. Since we are writing to an array, that will translate to + * possibly multiple MOV instructions with an array destination and + * each will generate a scratch write with the same offset into + * scratch space (thus, each one overwriting the previous). This is + * not what we want. What we will do instead is emit PSIZ to a + * a regular temporary register, then move that resgister into the + * array. This way we only have one instruction with an array + * destination and we only produce a single scratch write. + */ + dst_reg tmp = dst_reg(src_reg(this, glsl_type::uvec4_type)); + emit_urb_slot(tmp, varying); + dst_reg dst(this->vertex_output); + dst.reladdr = ralloc(mem_ctx, src_reg); + memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg)); + vec4_instruction *inst = emit(MOV(dst, src_reg(tmp))); + inst->force_writemask_all = true; } + emit(ADD(dst_reg(this->vertex_output_offset), this->vertex_output_offset, 1u)); } @@ -426,17 +443,12 @@ gen6_gs_visitor::emit_thread_end() memcpy(data.reladdr, &this->vertex_output_offset, sizeof(src_reg)); - if (varying == VARYING_SLOT_PSIZ) { - /* We did not buffer PSIZ, emit it directly here */ - emit_urb_slot(dst_reg(MRF, mrf), varying); - } else { - /* Copy this slot to the appropriate message register */ - dst_reg reg = dst_reg(MRF, mrf); - reg.type = output_reg[varying].type; - data.type = reg.type; - vec4_instruction *inst = emit(MOV(reg, data)); - inst->force_writemask_all = true; - } + /* Copy this slot to the appropriate message register */ + dst_reg reg = dst_reg(MRF, mrf); + reg.type = output_reg[varying].type; + data.type = reg.type; + vec4_instruction *inst = emit(MOV(reg, data)); + inst->force_writemask_all = true; mrf++; emit(ADD(dst_reg(this->vertex_output_offset), @@ -584,22 +596,19 @@ gen6_gs_visitor::xfb_buffer_output() /* Buffer all TF outputs for this vertex in xfb_output */ for (int binding = 0; binding < prog_data->num_transform_feedback_bindings; binding++) { - /* We will handle PSIZ for each vertex at thread end time since it - * is not computed by the GS algorithm and requires specific handling. - */ unsigned varying = prog_data->transform_feedback_bindings[binding]; - if (varying != VARYING_SLOT_PSIZ) { - dst_reg dst(this->xfb_output); - dst.reladdr = ralloc(mem_ctx, src_reg); - memcpy(dst.reladdr, &this->xfb_output_offset, sizeof(src_reg)); - dst.type = output_reg[varying].type; + dst_reg dst(this->xfb_output); + dst.reladdr = ralloc(mem_ctx, src_reg); + memcpy(dst.reladdr, &this->xfb_output_offset, sizeof(src_reg)); + dst.type = output_reg[varying].type; + + this->current_annotation = output_reg_annotation[varying]; + src_reg out_reg = src_reg(output_reg[varying]); + out_reg.swizzle = varying == VARYING_SLOT_PSIZ + ? BRW_SWIZZLE_WWWW : prog_data->transform_feedback_swizzles[binding]; + emit(MOV(dst, out_reg)); - this->current_annotation = output_reg_annotation[varying]; - src_reg out_reg = src_reg(output_reg[varying]); - out_reg.swizzle = prog_data->transform_feedback_swizzles[binding]; - emit(MOV(dst, out_reg)); - } emit(ADD(dst_reg(this->xfb_output_offset), this->xfb_output_offset, 1u)); } } @@ -743,18 +752,12 @@ gen6_gs_visitor::xfb_program(unsigned num_verts) src_reg out_reg; this->current_annotation = output_reg_annotation[varying]; - if (varying == VARYING_SLOT_PSIZ) { - /* We did not buffer PSIZ, emit it directly here */ - out_reg = src_reg(output_reg[varying]); - out_reg.swizzle = BRW_SWIZZLE_WWWW; - } else { - /* Copy this varying to the appropriate message register */ - out_reg = src_reg(this, glsl_type::uvec4_type); - out_reg.type = output_reg[varying].type; + /* Copy this varying to the appropriate message register */ + out_reg = src_reg(this, glsl_type::uvec4_type); + out_reg.type = output_reg[varying].type; - data.type = output_reg[varying].type; - emit(MOV(dst_reg(out_reg), data)); - } + data.type = output_reg[varying].type; + emit(MOV(dst_reg(out_reg), data)); /* Write data and send SVB Write */ inst = emit(GS_OPCODE_SVB_WRITE, mrf_reg, out_reg, sol_temp); |