diff options
-rw-r--r-- | src/gallium/auxiliary/draw/draw_llvm.c | 111 |
1 files changed, 46 insertions, 65 deletions
diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 414f2dc022d..c5485728e42 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -669,18 +669,17 @@ fetch_instanced(struct gallivm_state *gallivm, LLVMValueRef zero = LLVMConstNull(i32_t); LLVMBuilderRef builder = gallivm->builder; LLVMValueRef stride, buffer_overflowed, aos, index_valid; - LLVMValueRef ofbit = NULL; unsigned i; aosf_t = lp_build_vec_type(gallivm, lp_float32_vec4_type()); aosi_t = lp_build_vec_type(gallivm, lp_int32_vec4_type()); - stride = lp_build_umul_overflow(gallivm, vb_stride, index, &ofbit); + /* This mul can overflow. Wraparound is ok. */ + stride = LLVMBuildMul(builder, vb_stride, index, ""); buffer_overflowed = LLVMBuildICmp(builder, LLVMIntUGE, stride, buffer_size_adj, "buffer_overflowed"); - buffer_overflowed = LLVMBuildOr(builder, buffer_overflowed, ofbit, ""); if (0) { lp_build_print_value(gallivm, " instance index = ", index); @@ -759,7 +758,7 @@ fetch_vector(struct gallivm_state *gallivm, LLVMValueRef zero = LLVMConstNull(LLVMInt32TypeInContext(gallivm->context)); LLVMBuilderRef builder = gallivm->builder; struct lp_build_context blduivec; - LLVMValueRef offset, tmp, valid_mask; + LLVMValueRef offset, valid_mask; LLVMValueRef aos_fetch[LP_MAX_VECTOR_WIDTH / 32]; unsigned i; @@ -768,24 +767,11 @@ fetch_vector(struct gallivm_state *gallivm, vb_stride = lp_build_broadcast_scalar(&blduivec, vb_stride); buffer_size_adj = lp_build_broadcast_scalar(&blduivec, buffer_size_adj); - /* - * Sort of interestingly, with interleaved attribs, llvm 3.7+ will - * recognize these calculations to be constant with different attribs - * (the different offset has been added to map_ptr). - * llvm 3.3, however, will not (I can't get llvm 3.4-3.6 to link...) - * - * XXX: could actually avoid this altogether (replacing by simple - * non-widening mul) by precalculating the max index instead outside - * the loop (at the cost of one scalar udiv per vertex element). - */ - offset = lp_build_mul_32_lohi_cpu(&blduivec, vb_stride, indices, &tmp); + /* This mul can overflow. Wraparound is ok. */ + offset = lp_build_mul(&blduivec, vb_stride, indices); valid_mask = lp_build_compare(gallivm, blduivec.type, - PIPE_FUNC_EQUAL, tmp, blduivec.zero); - - tmp = lp_build_compare(gallivm, blduivec.type, - PIPE_FUNC_LESS, offset, buffer_size_adj); - valid_mask = LLVMBuildAnd(builder, tmp, valid_mask, ""); + PIPE_FUNC_LESS, offset, buffer_size_adj); /* not valid elements use offset 0 */ offset = LLVMBuildAnd(builder, offset, valid_mask, ""); @@ -1566,10 +1552,10 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) LLVMBuilderRef builder; char func_name[64]; struct lp_type vs_type; - LLVMValueRef count, fetch_elts, start_or_maxelt, start; + LLVMValueRef count, fetch_elts, start_or_maxelt; LLVMValueRef vertex_id_offset, start_instance; LLVMValueRef stride, step, io_itr; - LLVMValueRef ind_vec, ind_vec_store, have_elts, fetch_max, tmp; + LLVMValueRef ind_vec, start_vec, have_elts, fetch_max, tmp; LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr; LLVMValueRef vb_stride[PIPE_MAX_ATTRIBS]; LLVMValueRef map_ptr[PIPE_MAX_ATTRIBS]; @@ -1580,7 +1566,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) struct draw_context *draw = llvm->draw; const struct tgsi_shader_info *vs_info = &draw->vs.vertex_shader->info; unsigned i, j; - struct lp_build_context bld, bldivec, blduivec; + struct lp_build_context bld, blduivec; struct lp_build_loop_state lp_loop; struct lp_build_if_state if_ctx; const int vector_length = lp_native_vector_width / 32; @@ -1640,6 +1626,11 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) io_ptr = LLVMGetParam(variant_func, 1); vbuffers_ptr = LLVMGetParam(variant_func, 2); count = LLVMGetParam(variant_func, 3); + /* + * XXX: the maxelt part is unused. Not really useful, since we cannot + * get index buffer overflows due to vsplit (which provides its own + * elts buffer, with a different size than what's passed in here). + */ start_or_maxelt = LLVMGetParam(variant_func, 4); /* * XXX: stride is actually unused. The stride we use is strictly calculated @@ -1682,7 +1673,6 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) vs_type.length = vector_length; lp_build_context_init(&bld, gallivm, lp_type_uint(32)); - lp_build_context_init(&bldivec, gallivm, lp_int_type(vs_type)); lp_build_context_init(&blduivec, gallivm, lp_uint_type(vs_type)); /* hold temporary "bool" clipmask */ @@ -1706,29 +1696,16 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) } fetch_max = lp_build_alloca(gallivm, int32_type, "fetch_max"); - ind_vec_store = lp_build_alloca(gallivm, bldivec.vec_type, "ind_vec"); have_elts = LLVMBuildICmp(builder, LLVMIntNE, LLVMConstPointerNull(arg_types[10]), fetch_elts, ""); - lp_build_if(&if_ctx, gallivm, have_elts); - { - LLVMBuildStore(builder, ind_vec, ind_vec_store); - LLVMBuildStore(builder, count, fetch_max); - } - lp_build_else(&if_ctx); - { - tmp = lp_build_add(&bld, count, start_or_maxelt); - LLVMBuildStore(builder, tmp, fetch_max); - start = lp_build_broadcast_scalar(&bldivec, start_or_maxelt); - tmp = lp_build_add(&bldivec, start, ind_vec); - LLVMBuildStore(builder, tmp, ind_vec_store); - } - lp_build_endif(&if_ctx); - fetch_max = LLVMBuildLoad(builder, fetch_max, ""); - fetch_max = LLVMBuildSub(builder, fetch_max, bld.one, "fetch_max"); - fetch_max = lp_build_broadcast_scalar(&bldivec, fetch_max); - ind_vec = LLVMBuildLoad(builder, ind_vec_store, ""); + fetch_max = LLVMBuildSub(builder, count, bld.one, "fetch_max"); + fetch_max = lp_build_broadcast_scalar(&blduivec, fetch_max); + /* + * Only needed for non-indexed path. + */ + start_vec = lp_build_broadcast_scalar(&blduivec, start_or_maxelt); /* * Pre-calculate everything which is constant per shader invocation. @@ -1757,9 +1734,12 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) /* * We'll set buffer_size_adj to zero if we have of, so it will * always overflow later automatically without having to keep ofbit. + * Overflows (with normal wraparound) doing the actual offset + * calculation should be ok, just not for the buffer size calc. + * It would also be possible to detect such overflows and return + * zeros if that happens, but this would be more complex. */ - buf_offset = lp_build_uadd_overflow(gallivm, vb_buffer_offset, - src_offset, &ofbit); + buf_offset = lp_build_add(&bld, vb_buffer_offset, src_offset); tmp = lp_build_sub(&bld, bsize, bld.one); buffer_size_adj[j] = lp_build_usub_overflow(gallivm, buffer_size, tmp, &ofbit); @@ -1843,21 +1823,17 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) true_index_array = LLVMBuildAdd(builder, true_index_array, ind_vec, ""); /* - * XXX: This code is really fishy. We are required to use a int min - * here, not uint. The reason is that for some non-indexed draws, we - * might get something like MAX_UINT - 3 as start value (due to start - * vertex). So, the first 3 elements in the vector are huge, and - * limiting them to fetch_max is incorrect. By using int min, we'll - * pick that huge value - we rely on this creating an overflow (which - * is guaranteed) in the stride mul later (using (signed) cmp and - * incorporating the result into ofmask would also work). - * For the later elements, this just wraps around the indices, which - * is apparently ok... + * Limit indices to fetch_max, otherwise might try to access indices + * beyond index buffer (or rather vsplit elt buffer) size. + * Could probably safely (?) skip this for non-indexed draws and + * simplify things minimally (by removing it could combine the ind_vec + * and start_vec adds). I think the only effect for non-indexed draws will + * be that for the invalid elements they will be all fetched from the + * same location as the last valid one, but noone should really care. */ - true_index_array = lp_build_min(&bldivec, true_index_array, fetch_max); + true_index_array = lp_build_min(&blduivec, true_index_array, fetch_max); - index_store = lp_build_alloca_undef(gallivm, bldivec.vec_type, "index_store"); - LLVMBuildStore(builder, true_index_array, index_store); + index_store = lp_build_alloca_undef(gallivm, blduivec.vec_type, "index_store"); lp_build_if(&if_ctx, gallivm, have_elts); { @@ -1875,22 +1851,27 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) * not being zero will get a different fetch index than the valid * index 0. So, just rely on vsplit code preventing out-of-bounds * fetches. This is also why it's safe to do elts fetch even if there - * was no index buffer bound - the real buffer is never seen here. + * was no index buffer bound - the real buffer is never seen here, at + * least not if there are index buffer overflows... */ /* * XXX should not have to do this, as scale can be handled * natively by loads (hits asserts though). */ - true_index_array = lp_build_shl_imm(&blduivec, true_index_array, 2); + tmp = lp_build_shl_imm(&blduivec, true_index_array, 2); fetch_elts = LLVMBuildBitCast(builder, fetch_elts, LLVMPointerType(LLVMInt8TypeInContext(context), 0), ""); - true_index_array = lp_build_gather(gallivm, vs_type.length, - 32, 32, TRUE, - fetch_elts, true_index_array, - FALSE); - LLVMBuildStore(builder, true_index_array, index_store); + tmp = lp_build_gather(gallivm, vs_type.length, + 32, 32, TRUE, + fetch_elts, tmp, FALSE); + LLVMBuildStore(builder, tmp, index_store); + } + lp_build_else(&if_ctx); + { + tmp = LLVMBuildAdd(builder, true_index_array, start_vec, ""); + LLVMBuildStore(builder, tmp, index_store); } lp_build_endif(&if_ctx); |