From 5ec3a7333fd77698610755d51e42094376e11d01 Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Sat, 12 Nov 2016 22:46:58 +0100 Subject: draw: finally optimize bool clip mask generation lp_build_any_true_range is just what we need, though it will only produce optimal code with sse41 (ptest + set) - but even without it on 64bit x86 the code is still better (1 unpack, 2 movq + or + set), on 32bit x86 it's going to be roughly the same as before. While here also make it a "real" 8bit boolean - cuts one instruction but more importantly similar to ordinary booleans. Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/draw/draw_llvm.c | 43 ++++++++++++---------- src/gallium/auxiliary/draw/draw_llvm.h | 4 +- .../draw/draw_pt_fetch_shade_pipeline_llvm.c | 2 +- 3 files changed, 26 insertions(+), 23 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 115cb79197c..8aa2d11946e 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -1383,20 +1383,21 @@ generate_clipmask(struct draw_llvm *llvm, /** * Returns boolean if any clipping has occurred - * Used zero/non-zero i32 value to represent boolean + * Used zero/one i8 value to represent boolean */ static LLVMValueRef -clipmask_booli32(struct gallivm_state *gallivm, - const struct lp_type vs_type, - LLVMValueRef clipmask_bool_ptr, - boolean edgeflag_in_clipmask) +clipmask_booli8(struct gallivm_state *gallivm, + const struct lp_type vs_type, + LLVMValueRef clipmask_bool_ptr, + boolean edgeflag_in_clipmask) { LLVMBuilderRef builder = gallivm->builder; - LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context); + LLVMTypeRef int8_type = LLVMInt8TypeInContext(gallivm->context); LLVMValueRef clipmask_bool = LLVMBuildLoad(builder, clipmask_bool_ptr, ""); - LLVMValueRef ret = LLVMConstNull(int32_type); - LLVMValueRef temp; - int i; + LLVMValueRef ret; + struct lp_build_context bldivec; + + lp_build_context_init(&bldivec, gallivm, lp_int_type(vs_type)); /* * We need to invert the edgeflag bit from the clipmask here @@ -1404,19 +1405,20 @@ clipmask_booli32(struct gallivm_state *gallivm, * and we (may) need it if edgeflag was 0). */ if (edgeflag_in_clipmask) { - struct lp_type i32_type = lp_int_type(vs_type); - LLVMValueRef edge = lp_build_const_int_vec(gallivm, i32_type, + LLVMValueRef edge = lp_build_const_int_vec(gallivm, bldivec.type, 1LL << DRAW_TOTAL_CLIP_PLANES); clipmask_bool = LLVMBuildXor(builder, clipmask_bool, edge, ""); } + /* - * Could do much better with just cmp/movmskps. + * XXX: probably should mask off bits from the mask which come from + * vertices which were beyond the count (i.e. indices_valid for + * linear fetches, for elts ones we don't have the correct mask + * right now). Otherwise might run the pipeline for nothing, + * though everything should still work. */ - for (i=0; i < vs_type.length; i++) { - temp = LLVMBuildExtractElement(builder, clipmask_bool, - lp_build_const_int32(gallivm, i) , ""); - ret = LLVMBuildOr(builder, ret, temp, ""); - } + ret = lp_build_any_true_range(&bldivec, vs_type.length, clipmask_bool); + ret = LLVMBuildZExt(builder, ret, int8_type, ""); return ret; } @@ -1641,7 +1643,8 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant, arg_types[i++] = int32_type; /* vertex_id_offset */ arg_types[i++] = int32_type; /* start_instance */ - func_type = LLVMFunctionType(int32_type, arg_types, num_arg_types, 0); + func_type = LLVMFunctionType(LLVMInt8TypeInContext(context), + arg_types, num_arg_types, 0); variant_func = LLVMAddFunction(gallivm->module, func_name, func_type); @@ -2005,8 +2008,8 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant, sampler->destroy(sampler); /* return clipping boolean value for function */ - ret = clipmask_booli32(gallivm, vs_type, clipmask_bool_ptr, - enable_cliptest && key->need_edgeflags); + ret = clipmask_booli8(gallivm, vs_type, clipmask_bool_ptr, + enable_cliptest && key->need_edgeflags); LLVMBuildRet(builder, ret); diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h index 271433c8ecb..9ea100f9e43 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.h +++ b/src/gallium/auxiliary/draw/draw_llvm.h @@ -261,7 +261,7 @@ enum { -typedef int +typedef boolean (*draw_jit_vert_func)(struct draw_jit_context *context, struct vertex_header *io, const struct draw_vertex_buffer vbuffers[PIPE_MAX_ATTRIBS], @@ -274,7 +274,7 @@ typedef int unsigned start_instance); -typedef int +typedef boolean (*draw_jit_vert_func_elts)(struct draw_jit_context *context, struct vertex_header *io, const struct draw_vertex_buffer vbuffers[PIPE_MAX_ATTRIBS], diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c index 4033eddca6e..582b5978464 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -353,7 +353,7 @@ llvm_pipeline_generic(struct draw_pt_middle_end *middle, const struct draw_prim_info *prim_info = in_prim_info; boolean free_prim_info = FALSE; unsigned opt = fpme->opt; - unsigned clipped = 0; + boolean clipped = 0; llvm_vert_info.count = fetch_info->count; llvm_vert_info.vertex_size = fpme->vertex_size; -- cgit v1.2.3