diff options
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/auxiliary/draw/draw_gs.c | 50 | ||||
-rw-r--r-- | src/gallium/auxiliary/draw/draw_gs.h | 1 | ||||
-rw-r--r-- | src/gallium/auxiliary/draw/draw_llvm.c | 6 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_tgsi.h | 1 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 34 |
5 files changed, 81 insertions, 11 deletions
diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c index 987c01a7919..2f94eaeda4f 100644 --- a/src/gallium/auxiliary/draw/draw_gs.c +++ b/src/gallium/auxiliary/draw/draw_gs.c @@ -280,6 +280,7 @@ llvm_fetch_gs_outputs(struct draw_geometry_shader *shader, int max_prims_per_invocation = 0; char *output_ptr = (char*)shader->gs_output; int i, j, prim_idx; + unsigned next_prim_boundary = shader->primitive_boundary; for (i = 0; i < shader->vector_length; ++i) { int prims = shader->llvm_emitted_primitives[i]; @@ -290,19 +291,42 @@ llvm_fetch_gs_outputs(struct draw_geometry_shader *shader, total_verts += shader->llvm_emitted_vertices[i]; } - output_ptr += shader->emitted_vertices * shader->vertex_size; for (i = 0; i < shader->vector_length - 1; ++i) { int current_verts = shader->llvm_emitted_vertices[i]; - - if (current_verts != shader->max_output_vertices) { - memcpy(output_ptr + (vertex_count + current_verts) * shader->vertex_size, - output_ptr + (vertex_count + shader->max_output_vertices) * shader->vertex_size, - shader->vertex_size * (total_verts - vertex_count)); + int next_verts = shader->llvm_emitted_vertices[i + 1]; +#if 0 + int j; + for (j = 0; j < current_verts; ++j) { + struct vertex_header *vh = (struct vertex_header *) + (output_ptr + shader->vertex_size * (i * next_prim_boundary + j)); + debug_printf("--- %d) [%f, %f, %f, %f]\n", j + vertex_count, + vh->data[0][0], vh->data[0][1], vh->data[0][2], vh->data[0][3]); + + } +#endif + debug_assert(current_verts <= shader->max_output_vertices); + debug_assert(next_verts <= shader->max_output_vertices); + if (next_verts) { + memmove(output_ptr + (vertex_count + current_verts) * shader->vertex_size, + output_ptr + ((i + 1) * next_prim_boundary) * shader->vertex_size, + shader->vertex_size * next_verts); } vertex_count += current_verts; } +#if 0 + { + int i; + for (i = 0; i < total_verts; ++i) { + struct vertex_header *vh = (struct vertex_header *)(output_ptr + shader->vertex_size * i); + debug_printf("%d) [%f, %f, %f, %f]\n", i, + vh->data[0][0], vh->data[0][1], vh->data[0][2], vh->data[0][3]); + + } + } +#endif + prim_idx = 0; for (i = 0; i < shader->vector_length; ++i) { int num_prims = shader->llvm_emitted_primitives[i]; @@ -513,10 +537,12 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader, output_verts->vertex_size = vertex_size; output_verts->stride = output_verts->vertex_size; + /* we allocate exactly one extra vertex per primitive to allow the GS to emit + * overflown vertices into some area where they won't harm anyone */ output_verts->verts = (struct vertex_header *)MALLOC(output_verts->vertex_size * max_out_prims * - shader->max_output_vertices); + shader->primitive_boundary); #if 0 debug_printf("%s count = %d (in prims # = %d)\n", @@ -724,6 +750,16 @@ draw_create_geometry_shader(struct draw_context *draw, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) gs->max_output_vertices = gs->info.properties[i].data[0]; } + /* Primitive boundary is bigger than max_output_vertices by one, because + * the specification says that the geometry shader should exit if the + * number of emitted vertices is bigger or equal to max_output_vertices and + * we can't do that because we're running in the SoA mode, which means that + * our storing routines will keep getting called on channels that have + * overflown. + * So we need some scratch area where we can keep writing the overflown + * vertices without overwriting anything important or crashing. + */ + gs->primitive_boundary = gs->max_output_vertices + 1; for (i = 0; i < gs->info.num_outputs; i++) { if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_POSITION && diff --git a/src/gallium/auxiliary/draw/draw_gs.h b/src/gallium/auxiliary/draw/draw_gs.h index 7c841396aa2..ca744cebfc9 100644 --- a/src/gallium/auxiliary/draw/draw_gs.h +++ b/src/gallium/auxiliary/draw/draw_gs.h @@ -68,6 +68,7 @@ struct draw_geometry_shader { unsigned position_output; unsigned max_output_vertices; + unsigned primitive_boundary; unsigned input_primitive; unsigned output_primitive; diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 0e349783198..33fe40d5e7f 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -1287,8 +1287,8 @@ draw_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base, LLVMValueRef clipmask = lp_build_const_int_vec(gallivm, lp_int_type(gs_type), 0); LLVMValueRef indices[LP_MAX_VECTOR_LENGTH]; - LLVMValueRef max_output_vertices = - lp_build_const_int32(gallivm, variant->shader->base.max_output_vertices); + LLVMValueRef next_prim_offset = + lp_build_const_int32(gallivm, variant->shader->base.primitive_boundary); LLVMValueRef io = variant->io_ptr; unsigned i; const struct tgsi_shader_info *gs_info = &variant->shader->base.info; @@ -1297,7 +1297,7 @@ draw_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base, LLVMValueRef ind = lp_build_const_int32(gallivm, i); LLVMValueRef currently_emitted = LLVMBuildExtractElement(builder, emitted_vertices_vec, ind, ""); - indices[i] = LLVMBuildMul(builder, ind, max_output_vertices, ""); + indices[i] = LLVMBuildMul(builder, ind, next_prim_offset, ""); indices[i] = LLVMBuildAdd(builder, indices[i], currently_emitted, ""); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index f1b1d79eab7..0fbb8aabbb0 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -396,6 +396,7 @@ struct lp_build_tgsi_soa_context LLVMValueRef emitted_prims_vec_ptr; LLVMValueRef total_emitted_vertices_vec_ptr; LLVMValueRef emitted_vertices_vec_ptr; + LLVMValueRef max_output_vertices_vec; LLVMValueRef consts_ptr; const LLVMValueRef *pos; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 9822f72c6a3..28eb57b4b51 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -828,7 +828,6 @@ emit_fetch_gs_input( vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index); } - res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base, vertex_index, attrib_index, swizzle_index); @@ -2257,6 +2256,20 @@ clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base, LLVMBuildStore(builder, current_vec, ptr); } +static LLVMValueRef +clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld, + LLVMValueRef current_mask_vec, + LLVMValueRef total_emitted_vertices_vec) +{ + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; + struct lp_build_context *uint_bld = &bld->bld_base.uint_bld; + LLVMValueRef max_mask = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, + total_emitted_vertices_vec, + bld->max_output_vertices_vec); + + return LLVMBuildAnd(builder, current_mask_vec, max_mask, ""); +} + static void emit_vertex( const struct lp_build_tgsi_action * action, @@ -2270,6 +2283,8 @@ emit_vertex( LLVMValueRef masked_ones = mask_to_one_vec(bld_base); LLVMValueRef total_emitted_vertices_vec = LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, ""); + masked_ones = clamp_mask_to_max_output_vertices(bld, masked_ones, + total_emitted_vertices_vec); gather_outputs(bld); bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base, bld->outputs, @@ -2812,12 +2827,29 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm, bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit; if (gs_iface) { + /* There's no specific value for this because it should always + * be set, but apps using ext_geometry_shader4 quite often + * were forgetting so we're using MAX_VERTEX_VARYING from + * that spec even though we could debug_assert if it's not + * set, but that's a lot uglier. */ + uint max_output_vertices = 32; + uint i = 0; /* inputs are always indirect with gs */ bld.indirect_files |= (1 << TGSI_FILE_INPUT); bld.gs_iface = gs_iface; bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input; bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex; bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive; + + for (i = 0; i < info->num_properties; ++i) { + if (info->properties[i].name == + TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) { + max_output_vertices = info->properties[i].data[0]; + } + } + bld.max_output_vertices_vec = + lp_build_const_int_vec(gallivm, bld.bld_base.uint_bld.type, + max_output_vertices); } lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.base); |